diff --git a/.github/workflows/agentex-tutorials-test.yml b/.github/workflows/agentex-tutorials-test.yml
index f19c58d4d..41b495d71 100644
--- a/.github/workflows/agentex-tutorials-test.yml
+++ b/.github/workflows/agentex-tutorials-test.yml
@@ -49,6 +49,29 @@ jobs:
           curl -LsSf https://astral.sh/uv/install.sh | sh
           echo "$HOME/.local/bin" >> $GITHUB_PATH
 
+      # Subprocess-CLI harnesses: install the relevant CLI only for the
+      # claude-code / codex tutorials (no-op for every other tutorial). npm is
+      # preinstalled on ubuntu runners. Versions mirror the golden agent's
+      # sandbox image (teams/sgp/agents/golden_agent/sandbox/Dockerfile): claude-code
+      # is pinned to the same CLAUDE_CODE_VERSION; codex is left unpinned there,
+      # so it is left unpinned here too. Bump CLAUDE_CODE_VERSION in lockstep
+      # with the sandbox Dockerfile.
+      - name: Install harness CLI (claude-code / codex only)
+        if: ${{ contains(matrix.tutorial, 'claude_code') || contains(matrix.tutorial, 'codex') }}
+        env:
+          CLAUDE_CODE_VERSION: "2.1.142"
+        run: |
+          if [[ "${{ matrix.tutorial }}" == *claude_code* ]]; then
+            echo "📦 Installing Claude Code CLI (v${CLAUDE_CODE_VERSION})..."
+            npm install -g "@anthropic-ai/claude-code@${CLAUDE_CODE_VERSION}"
+            claude --version || true
+          fi
+          if [[ "${{ matrix.tutorial }}" == *codex* ]]; then
+            echo "📦 Installing Codex CLI..."
+            npm install -g @openai/codex
+            codex --version || true
+          fi
+
       - name: Pull latest AgentEx image
         run: |
           echo "🐳 Pulling latest Scale AgentEx Docker image..."
@@ -136,6 +159,11 @@ jobs:
         working-directory: ./examples/tutorials
         env:
           OPENAI_API_KEY: ${{ secrets.TUTORIAL_OPENAI_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.TUTORIAL_ANTHROPIC_API_KEY }}
+          # Enable the gated live tests only for the matching subprocess-CLI
+          # harness tutorial (the CLI is installed for it in the step above).
+          CLAUDE_LIVE_TESTS: ${{ contains(matrix.tutorial, 'claude_code') && '1' || '' }}
+          CODEX_LIVE_TESTS: ${{ contains(matrix.tutorial, 'codex') && '1' || '' }}
           HEALTH_CHECK_PORT: 8080 # Use non-privileged port for temporal worker health checks
         run: |
           echo "Testing tutorial: ${{ matrix.tutorial }}"
diff --git a/.github/workflows/harness-integration.yml b/.github/workflows/harness-integration.yml
new file mode 100644
index 000000000..075ee5cf3
--- /dev/null
+++ b/.github/workflows/harness-integration.yml
@@ -0,0 +1,61 @@
+name: Harness Integration
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    paths:
+      - "src/agentex/lib/core/harness/**"
+      - "src/agentex/lib/adk/_modules/**"
+      - "tests/lib/core/harness/test_harness_pydantic_ai_*.py"
+      - "tests/lib/core/harness/test_harness_langgraph_*.py"
+      - ".github/workflows/harness-integration.yml"
+
+jobs:
+  conformance:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
+        with:
+          version: '0.10.2'
+
+      - name: Bootstrap
+        run: ./scripts/bootstrap
+
+      # Defer to scripts/test so the harness suite runs under the exact same
+      # invocation as the main CI test job: DEFER_PYDANTIC_BUILD=false and
+      # `uv run --isolated --all-packages --all-extras pytest`, across the
+      # min/max supported Python versions. Running `uv run pytest` directly
+      # would risk an all-extras-only dep passing locally but failing in CI.
+      - name: Conformance suite
+        run: ./scripts/test tests/lib/core/harness/ -v
+
+  # Offline harness integration tests (sync / async / temporal channels) for each
+  # migrated harness. These use fake streams / TestModel + fake streaming/tracing
+  # and require no live infrastructure. Future harness migration PRs (6-8) add
+  # their harness to the matrix below and their test paths to the triggers above.
+  live-matrix:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        harness: [pydantic_ai, langgraph]
+        channel: [sync, async, temporal]
+      fail-fast: false
+    name: ${{ matrix.harness }}-${{ matrix.channel }}
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
+        with:
+          version: '0.10.2'
+
+      - name: Bootstrap
+        run: ./scripts/bootstrap
+
+      - name: ${{ matrix.harness }} ${{ matrix.channel }} integration tests (offline)
+        run: |
+          ./scripts/test tests/lib/core/harness/test_harness_${{ matrix.harness }}_${{ matrix.channel }}.py -v
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index be44cf037..9a40fa434 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,4 +1,4 @@
 {
-  ".": "0.14.0",
-  "adk": "0.13.2"
+  ".": "0.15.0",
+  "adk": "0.14.0"
 }
diff --git a/.stats.yml b/.stats.yml
index 5375d17e3..60af41b79 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 64
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/sgp/agentex-sdk-ae2571b5ac5d337ba5ced527cec0ff6e3088296fa67c3c836ed5a06544b25cb8.yml
-openapi_spec_hash: 962a2f20444c7823fd3a34f95365146e
-config_hash: 138b7c0b394e7393133c8ff16a6d0eb3
+configured_endpoints: 65
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/sgp/agentex-sdk-cd43ba4b554ca024dd7ee7b74e4f4700a743282c17def704a0967e6ff251c09b.yml
+openapi_spec_hash: 9369ccc9c0289e9d6f641a526d244d1c
+config_hash: 1ae003838971335aac550f3ad5872f54
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8f81295a9..a9b0590c8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,38 @@
 
 * **tracing:** emit OTel metrics for async span queue depth, batch drain, and SGP export success/failure (HTTP status labels). Disable SDK-side recording with ``AGENTEX_TRACING_METRICS=0``.
 
+## 0.15.0 (2026-06-23)
+
+Full Changelog: [agentex-client-v0.14.0...agentex-client-v0.15.0](https://github.com/scaleapi/scale-agentex-python/compare/agentex-client-v0.14.0...agentex-client-v0.15.0)
+
+### Features
+
+* **api:** add webhook endpoint ([37c7d9d](https://github.com/scaleapi/scale-agentex-python/commit/37c7d9d465943184ab84922ba1079b939516d534))
+* **claude-code:** stream-json parser tap for the unified harness surface ([#420](https://github.com/scaleapi/scale-agentex-python/issues/420)) ([904339c](https://github.com/scaleapi/scale-agentex-python/commit/904339c21b8cd641a02d903c03d4a8730b4d7e84))
+* **codex:** event-stream parser tap for the unified harness surface ([#421](https://github.com/scaleapi/scale-agentex-python/issues/421)) ([9b2b031](https://github.com/scaleapi/scale-agentex-python/commit/9b2b03144cc67bb497e0a301686207aba2629758))
+* **harness:** public adk facade + docs for the unified harness surface (PR 9) ([#423](https://github.com/scaleapi/scale-agentex-python/issues/423)) ([fa60632](https://github.com/scaleapi/scale-agentex-python/commit/fa60632f9be84315a3fdc627745ae5b605994bd8))
+* **harness:** unified harness surface — foundation (span derivation, delivery adapters, emitter) ([#412](https://github.com/scaleapi/scale-agentex-python/issues/412)) ([a9cacf4](https://github.com/scaleapi/scale-agentex-python/commit/a9cacf4eb71697351ee658a570636f04bbf31ad5))
+* **langgraph:** migrate LangGraph harness onto unified surface ([#417](https://github.com/scaleapi/scale-agentex-python/issues/417)) ([d344228](https://github.com/scaleapi/scale-agentex-python/commit/d34422845de4b80ed69d2dccfdb0c680ef2fbca3))
+* **openai-agents:** migrate onto the unified harness surface ([#416](https://github.com/scaleapi/scale-agentex-python/issues/416)) ([d10e151](https://github.com/scaleapi/scale-agentex-python/commit/d10e1510bd5da44ad5acc5cac638750122083fce))
+* **pydantic-ai:** migrate onto unified harness surface (PR4) ([#415](https://github.com/scaleapi/scale-agentex-python/issues/415)) ([5ec62c2](https://github.com/scaleapi/scale-agentex-python/commit/5ec62c20781d24fc3e0b92734fcd444b1e791d70))
+* **sdk:** add webhook helper for forward-route handlers ([#419](https://github.com/scaleapi/scale-agentex-python/issues/419)) ([514075d](https://github.com/scaleapi/scale-agentex-python/commit/514075de2189f33be4ade0ac84368019e55ed7ea))
+* **streaming:** stream tool call argument deltas in TemporalStreamingModel ([#355](https://github.com/scaleapi/scale-agentex-python/issues/355)) ([c8de1d4](https://github.com/scaleapi/scale-agentex-python/commit/c8de1d4c9c3b5b3c16ad4aaf9644c1ba0d618757))
+* **tracing:** skip Agentex span-start write by default (end-only ingest) ([#438](https://github.com/scaleapi/scale-agentex-python/issues/438)) ([10d22a2](https://github.com/scaleapi/scale-agentex-python/commit/10d22a27091c9c410ae808dab9cfce5dab3816a8))
+
+
+### Bug Fixes
+
+* **harness:** assert cross-channel (yield vs auto-send) conformance equivalence [AGX1-373] ([#414](https://github.com/scaleapi/scale-agentex-python/issues/414)) ([694960f](https://github.com/scaleapi/scale-agentex-python/commit/694960f913b8ba521d9236e876e5e00f57a3a3ff))
+* **harness:** correct codex & openai reasoning stream envelopes ([#441](https://github.com/scaleapi/scale-agentex-python/issues/441)) ([1d86e8a](https://github.com/scaleapi/scale-agentex-python/commit/1d86e8a47a369814540b6e853cd20240c6098f27))
+* **tests:** use relative import for assert_matches_type in webhooks test ([#440](https://github.com/scaleapi/scale-agentex-python/issues/440)) ([5954a9f](https://github.com/scaleapi/scale-agentex-python/commit/5954a9fc8c7961ef5ceb41abf3ca32e6e78590c5))
+* **tracing:** fail open temporal span activities ([#437](https://github.com/scaleapi/scale-agentex-python/issues/437)) ([2d63eef](https://github.com/scaleapi/scale-agentex-python/commit/2d63eef53bdb919bb6568e04708e3b7abcb8075b))
+
+
+### Refactors
+
+* **cli:** migrate existing langgraph/pydantic-ai templates to unified surface ([#429](https://github.com/scaleapi/scale-agentex-python/issues/429)) ([ee41408](https://github.com/scaleapi/scale-agentex-python/commit/ee41408c420eba5c6b8fe8719c8ebd445dcd220c))
+* **tutorials:** migrate to the unified harness surface + renumber ([#428](https://github.com/scaleapi/scale-agentex-python/issues/428)) ([ebaf617](https://github.com/scaleapi/scale-agentex-python/commit/ebaf617256c7971dde12fd7e25f02b05f2f42fca))
+
 ## 0.14.0 (2026-06-22)
 
 Full Changelog: [agentex-client-v0.13.1...agentex-client-v0.14.0](https://github.com/scaleapi/scale-agentex-python/compare/agentex-client-v0.13.1...agentex-client-v0.14.0)
diff --git a/adk/CHANGELOG.md b/adk/CHANGELOG.md
index 8c15355d9..ac7404e6b 100644
--- a/adk/CHANGELOG.md
+++ b/adk/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## 0.14.0 (2026-06-23)
+
+Full Changelog: [agentex-sdk-v0.13.2...agentex-sdk-v0.14.0](https://github.com/scaleapi/scale-agentex-python/compare/agentex-sdk-v0.13.2...agentex-sdk-v0.14.0)
+
+### Features
+
+* **harness:** public adk facade + docs for the unified harness surface (PR 9) ([#423](https://github.com/scaleapi/scale-agentex-python/issues/423)) ([fa60632](https://github.com/scaleapi/scale-agentex-python/commit/fa60632f9be84315a3fdc627745ae5b605994bd8))
+
 ## 0.13.2 (2026-06-22)
 
 Full Changelog: [agentex-sdk-v0.13.1...agentex-sdk-v0.13.2](https://github.com/scaleapi/scale-agentex-python/compare/agentex-sdk-v0.13.1...agentex-sdk-v0.13.2)
diff --git a/adk/docs/harness.md b/adk/docs/harness.md
new file mode 100644
index 000000000..6a9d8947a
--- /dev/null
+++ b/adk/docs/harness.md
@@ -0,0 +1,196 @@
+# Unified Harness Surface
+
+The unified harness surface gives every agent harness (pydantic-ai, LangGraph, OpenAI Agents, and future parsers) a single, shared path to streaming, message persistence, and tracing. The Agentex `StreamTaskMessage*` event stream is the canonical wire format. A harness tap produces that stream once; the shared machinery delivers it and derives spans from it.
+
+All public names are re-exported from `agentex.lib.adk`:
+
+```python
+from agentex.lib.adk import (
+    UnifiedEmitter,
+    SpanTracer,
+    TurnUsage,
+    TurnResult,
+    HarnessTurn,
+    StreamTaskMessage,
+    OpenSpan,
+    CloseSpan,
+    SpanSignal,
+)
+```
+
+The implementation lives at `src/agentex/lib/core/harness/`.
+
+---
+
+## The canonical stream: `StreamTaskMessage`
+
+`StreamTaskMessage` is a union of the four wire-protocol update types:
+
+```
+StreamTaskMessageStart  - opens a content slot (text, reasoning, tool request, ...)
+StreamTaskMessageDelta  - appends a token/fragment to an open slot
+StreamTaskMessageFull   - posts a complete message in one shot (tool response, ...)
+StreamTaskMessageDone   - closes an open slot
+```
+
+Every harness tap produces a sequence of these. Everything downstream (delivery, tracing) reads the same sequence.
+
+---
+
+## Per-harness taps: `convert_<harness>_to_agentex_events`
+
+A tap is an async generator that translates the harness's native event stream into `StreamTaskMessage*` events. The currently shipped taps are:
+
+| Harness | Tap function | Exported from |
+|---|---|---|
+| pydantic-ai | `convert_pydantic_ai_to_agentex_events` | `agentex.lib.adk` |
+| LangGraph | `convert_langgraph_to_agentex_events` | `agentex.lib.adk` |
+
+Taps for claude-code and codex will be added in subsequent PRs (AGX1-420, AGX1-421) and exported from `agentex.lib.adk` in the same way.
+
+---
+
+## `HarnessTurn` protocol
+
+`HarnessTurn` is the interface a harness turn object must satisfy to plug into `UnifiedEmitter`:
+
+```python
+@runtime_checkable
+class HarnessTurn(Protocol):
+    @property
+    def events(self) -> AsyncIterator[StreamTaskMessage]: ...
+
+    def usage(self) -> TurnUsage: ...
+```
+
+`events` is the canonical stream for this turn. `usage()` is valid only after `events` is exhausted (async generators cannot cleanly return a value to the consumer, so usage travels out-of-band).
+
+---
+
+## `TurnUsage`
+
+Token counts and cost for one turn, harness-independent:
+
+```python
+class TurnUsage(BaseModel):
+    model: str | None = None
+    input_tokens: int | None = None
+    output_tokens: int | None = None
+    cached_input_tokens: int | None = None
+    reasoning_tokens: int | None = None
+    total_tokens: int | None = None
+    cost_usd: float | None = None
+    duration_ms: int | None = None
+    num_llm_calls: int = 0
+    num_tool_calls: int = 0
+    num_reasoning_blocks: int = 0
+```
+
+Field names align with `agentex.lib.core.observability.llm_metrics` for easy conversion.
+
+---
+
+## `UnifiedEmitter`
+
+`UnifiedEmitter` ties a turn's canonical stream, tracing context, and delivery mode together. Construct one per turn with the task/trace context from the request:
+
+```python
+emitter = UnifiedEmitter(
+    task_id=params.task.id,
+    trace_id=params.task.id,   # or None to disable tracing
+    parent_span_id=turn_span.id if turn_span else None,
+)
+```
+
+**Tracing is on by default** when `trace_id` is provided. To disable it explicitly, pass `tracer=False`. To inject a custom `SpanTracer` (e.g. in tests), pass it as `tracer=<instance>`.
+
+### Delivery mode 1: `yield_turn` (sync HTTP ACP)
+
+For sync ACP agents that return events directly over the HTTP response:
+
+```python
+@acp.on_message_send
+async def handle(params):
+    turn = MyHarnessTurn(params)          # implements HarnessTurn
+    async for event in emitter.yield_turn(turn):
+        yield event
+```
+
+`yield_turn` forwards each event to the caller and traces spans as a side effect. It is a passthrough when `tracer` is `None`.
+
+### Delivery mode 2: `auto_send_turn` (async/Temporal)
+
+For async or Temporal agents that push to the task stream via Redis:
+
+```python
+result: TurnResult = await emitter.auto_send_turn(turn, created_at=workflow.now())
+```
+
+`auto_send_turn` drives `adk.streaming` contexts for every message in the stream, derives and records spans, and returns a `TurnResult` with the final text and usage. Pass `created_at` under Temporal to back-date message timestamps deterministically.
+
+---
+
+## `TurnResult`
+
+```python
+class TurnResult(BaseModel):
+    final_text: str = ""
+    usage: TurnUsage = TurnUsage()
+```
+
+Returned by `auto_send_turn`. `final_text` is the last text segment of the turn (multi-step runs return only the final segment, matching `stream_langgraph_events` / `stream_pydantic_ai_events` semantics).
+
+---
+
+## Tracing: span derivation
+
+Spans are derived from the canonical stream by `SpanDeriver` (pure, no `adk` dependency) and dispatched to `adk.tracing` by `SpanTracer`. The mapping:
+
+- `StreamTaskMessageStart(ToolRequestContent)` + `StreamTaskMessageDone` on that index -> tool span open (keyed by `tool_call_id`)
+- `StreamTaskMessageFull(ToolResponseContent)` whose `tool_call_id` was opened -> tool span close
+- `StreamTaskMessageFull(ToolRequestContent)` (harnesses that emit tool calls as Full) -> opens a tool span; matching `Full(ToolResponseContent)` closes it
+- `StreamTaskMessageStart(ReasoningContent)` + `StreamTaskMessageDone` -> reasoning span
+
+`SpanTracer` is `SpanDeriver`'s consumer. You can inject a custom `SpanTracer` via `UnifiedEmitter(tracer=<instance>)` for advanced use or testing.
+
+---
+
+## Usage examples by channel
+
+### Sync ACP (pydantic-ai tap)
+
+```python
+import agentex.lib.adk as adk
+from agentex.lib.adk import UnifiedEmitter, convert_pydantic_ai_to_agentex_events
+
+@acp.on_message_send
+async def handle(params):
+    task_id = params.task.id
+    async with adk.tracing.span(trace_id=task_id, name="message", ...) as turn_span:
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+        tap = convert_pydantic_ai_to_agentex_events(pydantic_stream)
+        # wrap tap in a HarnessTurn then yield_turn, or yield directly:
+        async for event in tap:
+            yield event
+```
+
+For the pre-unified sync path the tap is still yielded directly; `UnifiedEmitter.yield_turn` is the forward-looking integration point when a `HarnessTurn` wrapper is available.
+
+### Async Temporal (auto-send)
+
+```python
+from agentex.lib.adk import UnifiedEmitter
+
+emitter = UnifiedEmitter(
+    task_id=task_id,
+    trace_id=task_id,
+    parent_span_id=parent_span_id,
+)
+result = await emitter.auto_send_turn(turn, created_at=workflow.now())
+# result.final_text — last text segment
+# result.usage     — TurnUsage (tokens, cost, ...)
+```
diff --git a/adk/pyproject.toml b/adk/pyproject.toml
index 946367d7f..1d8c00a40 100644
--- a/adk/pyproject.toml
+++ b/adk/pyproject.toml
@@ -4,7 +4,7 @@
 # (agentex/{__init__.py, _*.py, types/, resources/}) ships from the slim
 # sibling package `agentex-client` which is pinned as a runtime dep.
 name = "agentex-sdk"
-version = "0.13.2"
+version = "0.14.0"
 description = "Agent Development Kit (ADK) overlay for the Agentex API — FastACP server, Temporal workflows, LLM provider integrations, observability"
 license = "Apache-2.0"
 authors = [
diff --git a/api.md b/api.md
index 4c0d9b9c1..7c1b4eb68 100644
--- a/api.md
+++ b/api.md
@@ -245,3 +245,15 @@ Methods:
 - <code title="post /checkpoints/get-tuple">client.checkpoints.<a href="./src/agentex/resources/checkpoints.py">get_tuple</a>(\*\*<a href="src/agentex/types/checkpoint_get_tuple_params.py">params</a>) -> <a href="./src/agentex/types/checkpoint_get_tuple_response.py">Optional[CheckpointGetTupleResponse]</a></code>
 - <code title="post /checkpoints/put">client.checkpoints.<a href="./src/agentex/resources/checkpoints.py">put</a>(\*\*<a href="src/agentex/types/checkpoint_put_params.py">params</a>) -> <a href="./src/agentex/types/checkpoint_put_response.py">CheckpointPutResponse</a></code>
 - <code title="post /checkpoints/put-writes">client.checkpoints.<a href="./src/agentex/resources/checkpoints.py">put_writes</a>(\*\*<a href="src/agentex/types/checkpoint_put_writes_params.py">params</a>) -> None</code>
+
+# Webhooks
+
+Types:
+
+```python
+from agentex.types import WebhookCreateWebhookTriggerResponse
+```
+
+Methods:
+
+- <code title="post /agent_api_keys/webhook-trigger">client.webhooks.<a href="./src/agentex/resources/webhooks.py">create_webhook_trigger</a>(\*\*<a href="src/agentex/types/webhook_create_webhook_trigger_params.py">params</a>) -> <a href="./src/agentex/types/webhook_create_webhook_trigger_response.py">WebhookCreateWebhookTriggerResponse</a></code>
diff --git a/examples/tutorials/00_sync/030_langgraph/README.md b/examples/tutorials/00_sync/030_langgraph/README.md
index e5b1db0f7..5a68792cc 100644
--- a/examples/tutorials/00_sync/030_langgraph/README.md
+++ b/examples/tutorials/00_sync/030_langgraph/README.md
@@ -1,43 +1,50 @@
-# Tutorial 030: Sync LangGraph Agent
+# Tutorial: Sync LangGraph Agent
 
-This tutorial demonstrates how to build a **synchronous** LangGraph agent on AgentEx with:
-- Tool calling (ReAct pattern)
-- Streaming token output
-- Multi-turn conversation memory via AgentEx checkpointer
-- Tracing integration
+This tutorial demonstrates how to build a **synchronous** LangGraph agent on AgentEx
+using the **unified harness surface**:
 
-## Graph Structure
+```python
+turn = LangGraphTurn(stream, model=None)
+emitter = UnifiedEmitter(task_id=task_id, trace_id=task_id, ...)
+async for event in emitter.yield_turn(turn):
+    yield event
+```
 
-![Graph](graph.png)
+The `LangGraphTurn` + `UnifiedEmitter` path replaces calling the lower-level
+``convert_langgraph_to_agentex_events`` helper directly.
 
 ## Key Concepts
 
-### Sync ACP
-The sync ACP model uses HTTP request/response for communication. The `@acp.on_message_send` handler receives a message and yields streaming events back to the client.
+### Unified Harness
+
+`LangGraphTurn` implements the `HarnessTurn` protocol: it wraps the raw
+LangGraph `astream()` generator and exposes `events` (an async generator of
+`TaskMessageUpdate`) and `usage()` (token counts captured from the final
+`AIMessage`).
+
+`UnifiedEmitter.yield_turn(turn)` iterates the turn's events and yields them
+to the sync ACP handler unchanged. The same `LangGraphTurn` object can also be
+passed to `UnifiedEmitter.auto_send_turn` in the async/temporal channels.
 
-### LangGraph Integration
-- **StateGraph**: Defines the agent's state machine with `AgentState` (message history)
-- **ToolNode**: Automatically executes tool calls from the LLM
-- **tools_condition**: Routes between tool execution and final response
-- **Checkpointer**: Uses AgentEx's HTTP checkpointer for cross-request memory
+### AGX1-377 Note
 
-### Streaming
-The agent streams tokens as they're generated using `convert_langgraph_to_agentex_events()`, which converts LangGraph's stream events into AgentEx `TaskMessageUpdate` events.
+LangGraph emits tool requests as `StreamTaskMessageFull` events (from "updates"
+node outputs). The `SpanDeriver` does not open tool spans from Full events
+today; that gap is tracked in AGX1-373.
 
 ## Files
 
 | File | Description |
 |------|-------------|
-| `project/acp.py` | ACP server and message handler |
-| `project/graph.py` | LangGraph state graph definition |
+| `project/acp.py` | ACP server using unified harness (LangGraphTurn + yield_turn) |
+| `project/graph.py` | LangGraph state graph (weather example) |
 | `project/tools.py` | Tool definitions (weather example) |
 | `tests/test_agent.py` | Integration tests |
-| `manifest.yaml` | Agent configuration |
+| `manifest.yaml` | Agent configuration (name: s030-langgraph) |
 
 ## Running Locally
 
 ```bash
-# From this directory
 agentex agents run
 ```
 
diff --git a/examples/tutorials/00_sync/030_langgraph/graph.png b/examples/tutorials/00_sync/030_langgraph/graph.png
deleted file mode 100644
index 16d22a1e7..000000000
Binary files a/examples/tutorials/00_sync/030_langgraph/graph.png and /dev/null differ
diff --git a/examples/tutorials/00_sync/030_langgraph/manifest.yaml b/examples/tutorials/00_sync/030_langgraph/manifest.yaml
index bfe005626..9a52a3dce 100644
--- a/examples/tutorials/00_sync/030_langgraph/manifest.yaml
+++ b/examples/tutorials/00_sync/030_langgraph/manifest.yaml
@@ -17,7 +17,7 @@ local_development:
 agent:
   acp_type: sync
   name: s030-langgraph
-  description: A sync LangGraph agent with tool calling and streaming
+  description: A sync LangGraph agent using the unified harness surface (LangGraphTurn + UnifiedEmitter.yield_turn)
 
   temporal:
     enabled: false
@@ -47,7 +47,7 @@ deployment:
   global:
     agent:
       name: "s030-langgraph"
-      description: "A sync LangGraph agent with tool calling and streaming"
+      description: "A sync LangGraph agent using the unified harness surface"
     replicaCount: 1
     resources:
       requests:
diff --git a/examples/tutorials/00_sync/030_langgraph/project/acp.py b/examples/tutorials/00_sync/030_langgraph/project/acp.py
index 517a00322..e42b0f4ea 100644
--- a/examples/tutorials/00_sync/030_langgraph/project/acp.py
+++ b/examples/tutorials/00_sync/030_langgraph/project/acp.py
@@ -1,8 +1,20 @@
-"""
-ACP (Agent Communication Protocol) handler for Agentex.
-
-This is the API layer — it manages the graph lifecycle and streams
-tokens and tool calls from the LangGraph graph to the Agentex frontend.
+"""ACP handler for the sync LangGraph agent.
+
+Uses the unified harness surface: ``LangGraphTurn`` wraps the LangGraph
+``astream()`` generator, and ``UnifiedEmitter.yield_turn`` converts it into
+the AgentEx ``TaskMessageUpdate`` event stream expected by the sync ACP.
+
+Properties of the unified surface:
+- Tracing is wired through the tracing manager (no bespoke handler boilerplate).
+- No manual text-delta accumulation for the span output.
+- Tool calls are emitted as ``StreamTaskMessageFull`` (not Start+Delta+Done)
+  via the same code path as the async/temporal channels.
+- Usage data (token counts) is captured on the ``LangGraphTurn`` object and
+  can be read after the turn completes.
+
+AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull``
+events (from "updates"). The ``SpanDeriver`` does not open tool spans from
+Full events today; that gap is tracked in AGX1-373.
 """
 
 from __future__ import annotations
@@ -16,29 +28,29 @@
 
 import agentex.lib.adk as adk
 from project.graph import create_graph
-from agentex.lib.adk import create_langgraph_tracing_handler, convert_langgraph_to_agentex_events
 from agentex.lib.types.acp import SendMessageParams
 from agentex.lib.types.tracing import SGPTracingProcessorConfig
 from agentex.lib.utils.logging import make_logger
 from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.lib.core.harness.emitter import UnifiedEmitter
 from agentex.types.task_message_delta import TextDelta
 from agentex.types.task_message_update import TaskMessageUpdate
 from agentex.types.task_message_content import TaskMessageContent
+from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn
 from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
 
 logger = make_logger(__name__)
 
-# Register the Agentex tracing processor so spans are shipped to the backend
 add_tracing_processor_config(
     SGPTracingProcessorConfig(
         sgp_api_key=os.environ.get("SGP_API_KEY", ""),
         sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
         sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
-    ))
-# Create ACP server
+    )
+)
+
 acp = FastACP.create(acp_type="sync")
 
-# Compiled graph (lazy-initialized on first request)
 _graph = None
 
 
@@ -54,41 +66,42 @@ async def get_graph():
 async def handle_message_send(
     params: SendMessageParams,
 ) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]:
-    """Handle incoming messages from Agentex, streaming tokens and tool calls."""
+    """Handle incoming messages, streaming tokens and tool calls via unified harness."""
     graph = await get_graph()
 
-    thread_id = params.task.id
+    task_id = params.task.id
     user_message = params.content.content
 
-    logger.info(f"Processing message for thread {thread_id}")
+    logger.info(f"Processing message for task {task_id}")
 
     async with adk.tracing.span(
-        trace_id=thread_id,
+        trace_id=task_id,
+        task_id=task_id,
         name="message",
         input={"message": user_message},
         data={"__span_type__": "AGENT_WORKFLOW"},
     ) as turn_span:
-        callback = create_langgraph_tracing_handler(
-            trace_id=thread_id,
-            parent_span_id=turn_span.id if turn_span else None,
-        )
-
         stream = graph.astream(
             {"messages": [{"role": "user", "content": user_message}]},
-            config={
-                "configurable": {"thread_id": thread_id},
-                "callbacks": [callback],
-            },
+            config={"configurable": {"thread_id": task_id}},
             stream_mode=["messages", "updates"],
         )
 
+        turn = LangGraphTurn(stream, model=None)
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+
         final_text = ""
-        async for event in convert_langgraph_to_agentex_events(stream):
-            # Accumulate text deltas for span output
+        async for event in emitter.yield_turn(turn):
+            # Accumulate text deltas so the span's final_output is the assistant
+            # text (matching the async tutorial), not the usage metrics.
             delta = getattr(event, "delta", None)
             if isinstance(delta, TextDelta) and delta.text_delta:
                 final_text += delta.text_delta
             yield event
 
         if turn_span:
-            turn_span.output = {"final_output": final_text}
+            turn_span.output = {"final_output": final_text, "usage": turn.usage().model_dump()}
diff --git a/examples/tutorials/00_sync/030_langgraph/project/graph.py b/examples/tutorials/00_sync/030_langgraph/project/graph.py
index 53728cd58..6709719e5 100644
--- a/examples/tutorials/00_sync/030_langgraph/project/graph.py
+++ b/examples/tutorials/00_sync/030_langgraph/project/graph.py
@@ -1,8 +1,7 @@
-"""
-LangGraph graph definition.
+"""LangGraph graph definition for the 030_langgraph sync agent.
 
-Defines the state, nodes, edges, and compiles the graph.
-The compiled graph is the boundary between this module and the API layer.
+Identical to ``030_langgraph/project/graph.py`` — the graph definition is not
+affected by the harness migration. Only ``acp.py`` changes.
 """
 
 from __future__ import annotations
@@ -35,15 +34,12 @@
 
 class AgentState(TypedDict):
     """State schema for the agent graph."""
+
     messages: Annotated[list[Any], add_messages]
 
 
 async def create_graph():
-    """Create and compile the agent graph with checkpointer.
-
-    Returns:
-        A compiled LangGraph StateGraph ready for invocation.
-    """
+    """Create and compile the agent graph with checkpointer."""
     llm = ChatOpenAI(
         model=MODEL_NAME,
         reasoning={"effort": "high", "summary": "auto"},
@@ -56,9 +52,7 @@ def agent_node(state: AgentState) -> dict[str, Any]:
         """Process the current state and generate a response."""
         messages = state["messages"]
         if not messages or not isinstance(messages[0], SystemMessage):
-            system_content = SYSTEM_PROMPT.format(
-                timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-            )
+            system_content = SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
             messages = [SystemMessage(content=system_content)] + messages
         response = llm_with_tools.invoke(messages)
         return {"messages": [response]}
diff --git a/examples/tutorials/00_sync/030_langgraph/project/tools.py b/examples/tutorials/00_sync/030_langgraph/project/tools.py
index 1b402a906..b3e5dba34 100644
--- a/examples/tutorials/00_sync/030_langgraph/project/tools.py
+++ b/examples/tutorials/00_sync/030_langgraph/project/tools.py
@@ -1,9 +1,4 @@
-"""
-Tool definitions for the LangGraph agent.
-
-Add your custom tools here. Each tool should be a function decorated with @tool
-or created using the Tool class.
-"""
+"""Tool definitions for the 030_langgraph sync agent."""
 
 from langchain_core.tools import Tool
 
@@ -17,16 +12,13 @@ def get_weather(city: str) -> str:
     Returns:
         A string describing the weather conditions.
     """
-    # TODO: Replace with actual weather API call
     return f"The weather in {city} is sunny and 72°F"
 
 
-# Define tools
 weather_tool = Tool(
     name="get_weather",
     func=get_weather,
     description="Get the current weather for a city. Input should be a city name.",
 )
 
-# Export all tools as a list
 TOOLS = [weather_tool]
diff --git a/examples/tutorials/00_sync/030_langgraph/pyproject.toml b/examples/tutorials/00_sync/030_langgraph/pyproject.toml
index fc9f99971..33bea16b5 100644
--- a/examples/tutorials/00_sync/030_langgraph/pyproject.toml
+++ b/examples/tutorials/00_sync/030_langgraph/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
 [project]
 name = "s030-langgraph"
 version = "0.1.0"
-description = "A sync LangGraph agent with tool calling and streaming"
+description = "A sync LangGraph agent using the unified harness surface"
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
diff --git a/examples/tutorials/00_sync/030_langgraph/tests/test_agent.py b/examples/tutorials/00_sync/030_langgraph/tests/test_agent.py
index 36fcf418f..dabd83e76 100644
--- a/examples/tutorials/00_sync/030_langgraph/tests/test_agent.py
+++ b/examples/tutorials/00_sync/030_langgraph/tests/test_agent.py
@@ -1,14 +1,8 @@
 """
-Tests for the sync LangGraph agent.
+Tests for the sync harness LangGraph agent.
 
-This test suite validates:
-- Non-streaming message sending with tool-calling LangGraph agent
-- Streaming message sending with token-by-token output
-
-To run these tests:
-1. Make sure the agent is running (via docker-compose or `agentex agents run`)
-2. Set the AGENTEX_API_BASE_URL environment variable if not using default
-3. Run: pytest test_agent.py -v
+Validates the unified harness surface (LangGraphTurn + UnifiedEmitter.yield_turn)
+end-to-end against a live AgentEx server.
 
 Configuration:
 - AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003)
@@ -25,26 +19,22 @@
 from agentex.types.agent_rpc_params import ParamsCreateTaskRequest, ParamsSendMessageRequest
 from agentex.lib.sdk.fastacp.base.base_acp_server import uuid
 
-# Configuration from environment variables
 AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
 AGENT_NAME = os.environ.get("AGENT_NAME", "s030-langgraph")
 
 
 @pytest.fixture
 def client():
-    """Create an AgentEx client instance for testing."""
     return Agentex(base_url=AGENTEX_API_BASE_URL)
 
 
 @pytest.fixture
 def agent_name():
-    """Return the agent name for testing."""
     return AGENT_NAME
 
 
 @pytest.fixture
 def agent_id(client, agent_name):
-    """Retrieve the agent ID based on the agent name."""
     agents = client.agents.list()
     for agent in agents:
         if agent.name == agent_name:
@@ -53,10 +43,7 @@ def agent_id(client, agent_name):
 
 
 class TestNonStreamingMessages:
-    """Test non-streaming message sending with LangGraph agent."""
-
     def test_send_simple_message(self, client: Agentex, agent_name: str):
-        """Test sending a simple message and receiving a response."""
         response = client.agents.send_message(
             agent_name=agent_name,
             params=ParamsSendMessageRequest(
@@ -72,7 +59,6 @@ def test_send_simple_message(self, client: Agentex, agent_name: str):
         assert len(result) >= 1
 
     def test_tool_calling(self, client: Agentex, agent_name: str):
-        """Test that the agent can use tools (e.g., weather tool)."""
         response = client.agents.send_message(
             agent_name=agent_name,
             params=ParamsSendMessageRequest(
@@ -88,12 +74,10 @@ def test_tool_calling(self, client: Agentex, agent_name: str):
         assert len(result) >= 1
 
     def test_multiturn_conversation(self, client: Agentex, agent_name: str, agent_id: str):
-        """Test multi-turn conversation with memory via LangGraph checkpointer."""
         task_response = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
         task = task_response.result
         assert task is not None
 
-        # First message
         response1 = client.agents.send_message(
             agent_name=agent_name,
             params=ParamsSendMessageRequest(
@@ -107,7 +91,6 @@ def test_multiturn_conversation(self, client: Agentex, agent_name: str, agent_id
         )
         assert response1.result is not None
 
-        # Second message - agent should remember the name
         response2 = client.agents.send_message(
             agent_name=agent_name,
             params=ParamsSendMessageRequest(
@@ -126,10 +109,7 @@ def test_multiturn_conversation(self, client: Agentex, agent_name: str, agent_id
 
 
 class TestStreamingMessages:
-    """Test streaming message sending with LangGraph agent."""
-
     def test_stream_simple_message(self, client: Agentex, agent_name: str):
-        """Test streaming a simple message response."""
         stream = client.agents.send_message_stream(
             agent_name=agent_name,
             params=ParamsSendMessageRequest(
@@ -140,14 +120,11 @@ def test_stream_simple_message(self, client: Agentex, agent_name: str):
                 )
             ),
         )
-
         aggregated_content, chunks = collect_streaming_response(stream)
-
         assert aggregated_content is not None
         assert len(chunks) > 1, "No chunks received in streaming response."
 
     def test_stream_tool_calling(self, client: Agentex, agent_name: str):
-        """Test streaming with tool calls."""
         stream = client.agents.send_message_stream(
             agent_name=agent_name,
             params=ParamsSendMessageRequest(
@@ -158,9 +135,7 @@ def test_stream_tool_calling(self, client: Agentex, agent_name: str):
                 )
             ),
         )
-
         aggregated_content, chunks = collect_streaming_response(stream)
-
         assert aggregated_content is not None
         assert len(chunks) > 0, "No chunks received in streaming response."
 
diff --git a/examples/tutorials/00_sync/040_pydantic_ai/README.md b/examples/tutorials/00_sync/040_pydantic_ai/README.md
index 02c3b57c7..ef52c7c77 100644
--- a/examples/tutorials/00_sync/040_pydantic_ai/README.md
+++ b/examples/tutorials/00_sync/040_pydantic_ai/README.md
@@ -1,46 +1,52 @@
-# Tutorial 040: Sync Pydantic AI Agent
+# Sync Pydantic AI Agent
 
-This tutorial demonstrates how to build a **synchronous** Pydantic AI agent on AgentEx with:
-- Tool calling (Pydantic AI handles the tool loop internally)
-- Streaming token output (including token-by-token tool-call argument streaming)
+A minimal **synchronous** Pydantic AI agent that drives the **unified harness
+surface** (`UnifiedEmitter.yield_turn` + `PydanticAITurn`) on the sync
+(HTTP-yield) channel.
 
-## Key Concepts
+## Why this agent exists
 
-### Sync ACP
-The sync ACP model uses HTTP request/response for communication. The `@acp.on_message_send` handler receives a message and yields streaming events back to the client.
+This agent is the sync coverage for the unified surface: it shows an agent
+author wiring the sync channel through `UnifiedEmitter.yield_turn` and getting
+automatic span derivation (tool spans nested under the per-turn span) for free,
+exactly like the async/temporal channels.
 
-### Pydantic AI Integration
-- **Agent**: A single `pydantic_ai.Agent` that owns the model and tools. No graph required — Pydantic AI runs its own tool-call loop until the model is done.
-- **`@agent.tool_plain`**: Registers a Python function as a tool. Pydantic AI infers the schema from type hints and docstring.
-- **`agent.run_stream_events(...)`**: Yields `AgentStreamEvent`s (PartStartEvent / PartDeltaEvent / PartEndEvent / FunctionToolResultEvent) as the model produces them.
+## How it wires the unified surface
 
-### Streaming
-The agent streams tokens and tool-call arguments as they're generated using `convert_pydantic_ai_to_agentex_events()`, which adapts Pydantic AI's stream into AgentEx `TaskMessageUpdate` events. Notably, **tool-call arguments stream as `ToolRequestDelta` tokens** rather than arriving as a single complete payload — a richer experience than what OpenAI Agents SDK currently exposes.
+In `project/acp.py`:
 
-## Files
+```python
+emitter = UnifiedEmitter(
+    task_id=task_id,
+    trace_id=task_id,
+    parent_span_id=turn_span.id if turn_span else None,
+)
+async with agent.run_stream_events(user_message) as stream:
+    turn = PydanticAITurn(stream, model=MODEL_NAME)  # coalesce off: stream tool-call arg tokens
+    async for ev in emitter.yield_turn(turn):
+        yield ev
+```
 
-| File | Description |
-|------|-------------|
-| `project/acp.py` | ACP server and message handler |
-| `project/agent.py` | Pydantic AI agent + tool registration |
-| `project/tools.py` | Tool definitions (weather example) |
-| `tests/test_agent.py` | Integration tests |
-| `manifest.yaml` | Agent configuration |
+- `coalesce_tool_requests=False` (the default) preserves token-by-token
+  tool-call argument streaming on the sync channel.
+- The `UnifiedEmitter` is constructed from the ACP/streaming context
+  (`task_id` + `trace_id` + `parent_span_id`) so tool spans nest under the
+  per-turn `AGENT_WORKFLOW` span automatically.
 
-## Running Locally
+## Files
 
-```bash
-# From this directory
-agentex agents run
-```
+- `project/acp.py` — sync ACP handler using `emitter.yield_turn(...)`.
+- `project/agent.py` — builds the `pydantic_ai.Agent` with one tool.
+- `project/tools.py` — `get_weather(city)` returning a constant.
+- `tests/test_agent.py` — live integration test (requires a running agent).
 
-## Running Tests
+## Tools
 
-```bash
-pytest tests/test_agent.py -v
-```
+- `get_weather(city: str) -> str`: returns a fixed "sunny and 72°F" string so a
+  run deterministically exercises text + a tool call + a tool response.
 
-## Notes
+## Offline coverage
 
-- Multi-turn conversation memory is not wired in this tutorial. Pydantic AI does not ship a checkpointer like LangGraph; to add memory, load prior messages via `adk.messages.list(task_id=...)` and pass them to `agent.run_stream_events(..., message_history=...)`.
-- Reasoning/thinking tokens are not exercised here because `gpt-4o-mini` does not emit `ThinkingPart`s. Swap to a reasoning-capable model (e.g. `openai:o1-mini` via Pydantic AI's appropriate provider) if you want to test that branch end-to-end.
+Offline integration tests for the same wiring (pydantic-ai `TestModel` + fake
+streaming/tracing, no network) live in the SDK repo under
+`tests/lib/core/harness/` (the pydantic-ai sync suite).
diff --git a/examples/tutorials/00_sync/040_pydantic_ai/manifest.yaml b/examples/tutorials/00_sync/040_pydantic_ai/manifest.yaml
index 68d3b4a00..9563de39c 100644
--- a/examples/tutorials/00_sync/040_pydantic_ai/manifest.yaml
+++ b/examples/tutorials/00_sync/040_pydantic_ai/manifest.yaml
@@ -17,7 +17,7 @@ local_development:
 agent:
   acp_type: sync
   name: s040-pydantic-ai
-  description: A sync Pydantic AI agent with tool calling and streaming
+  description: A sync Pydantic AI harness test agent using the unified emitter surface
 
   temporal:
     enabled: false
@@ -47,7 +47,7 @@ deployment:
   global:
     agent:
       name: "s040-pydantic-ai"
-      description: "A sync Pydantic AI agent with tool calling and streaming"
+      description: "A sync Pydantic AI harness test agent using the unified emitter surface"
     replicaCount: 1
     resources:
       requests:
diff --git a/examples/tutorials/00_sync/040_pydantic_ai/project/acp.py b/examples/tutorials/00_sync/040_pydantic_ai/project/acp.py
index 0c096893f..f23cd7960 100644
--- a/examples/tutorials/00_sync/040_pydantic_ai/project/acp.py
+++ b/examples/tutorials/00_sync/040_pydantic_ai/project/acp.py
@@ -1,7 +1,17 @@
-"""ACP (Agent Communication Protocol) handler for Agentex.
-
-This is the API layer — it owns the agent lifecycle and streams tokens
-and tool calls from the Pydantic AI agent to the Agentex frontend.
+"""ACP handler for the sync harness Pydantic AI test agent.
+
+This agent exercises the UNIFIED HARNESS SURFACE on the sync (HTTP-yield)
+channel — ``UnifiedEmitter.yield_turn(PydanticAITurn(...))`` — rather than the
+bare ``convert_pydantic_ai_to_agentex_events`` converter used by the
+``040_pydantic_ai`` tutorial. The unified surface gives the sync channel the
+same tracing (span derivation) the async/temporal channels get for free.
+
+Flow:
+1. Open a per-turn AGENT_WORKFLOW span via ``adk.tracing.span``.
+2. Construct a ``UnifiedEmitter`` from the ACP/streaming context (task_id +
+   trace_id + parent_span_id) so tool spans nest under the turn span.
+3. Wrap ``agent.run_stream_events(...)`` in a ``PydanticAITurn`` and forward
+   events with ``emitter.yield_turn(turn)`` — yielding each to the client.
 """
 
 from __future__ import annotations
@@ -14,17 +24,15 @@
 load_dotenv()
 
 import agentex.lib.adk as adk
-from project.agent import create_agent
-from agentex.lib.adk import (
-    create_pydantic_ai_tracing_handler,
-    convert_pydantic_ai_to_agentex_events,
-)
+from project.agent import MODEL_NAME, create_agent
 from agentex.lib.types.acp import SendMessageParams
+from agentex.lib.core.harness import UnifiedEmitter
 from agentex.lib.types.tracing import SGPTracingProcessorConfig
 from agentex.lib.utils.logging import make_logger
 from agentex.lib.sdk.fastacp.fastacp import FastACP
 from agentex.types.task_message_update import TaskMessageUpdate
 from agentex.types.task_message_content import TaskMessageContent
+from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
 from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
 
 logger = make_logger(__name__)
@@ -54,7 +62,7 @@ def get_agent():
 async def handle_message_send(
     params: SendMessageParams,
 ) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]:
-    """Handle incoming messages from Agentex, streaming tokens and tool calls."""
+    """Handle incoming messages, streaming events through the unified surface."""
     agent = get_agent()
     task_id = params.task.id
 
@@ -68,11 +76,17 @@ async def handle_message_send(
         input={"message": user_message},
         data={"__span_type__": "AGENT_WORKFLOW"},
     ) as turn_span:
-        tracing_handler = create_pydantic_ai_tracing_handler(
+        # Construct the UnifiedEmitter from the ACP/streaming context so tracing
+        # is automatic: tool spans nest under this turn's span.
+        emitter = UnifiedEmitter(
+            task_id=task_id,
             trace_id=task_id,
             parent_span_id=turn_span.id if turn_span else None,
-            task_id=task_id,
         )
+
         async with agent.run_stream_events(user_message) as stream:
-            async for event in convert_pydantic_ai_to_agentex_events(stream, tracing_handler=tracing_handler):
-                yield event
+            # PydanticAITurn preserves token-by-token tool-call argument
+            # streaming (Start+Delta+Done) on the sync/HTTP channel.
+            turn = PydanticAITurn(stream, model=MODEL_NAME)
+            async for ev in emitter.yield_turn(turn):
+                yield ev
diff --git a/examples/tutorials/00_sync/040_pydantic_ai/project/agent.py b/examples/tutorials/00_sync/040_pydantic_ai/project/agent.py
index 2c0f6f10c..72fd74173 100644
--- a/examples/tutorials/00_sync/040_pydantic_ai/project/agent.py
+++ b/examples/tutorials/00_sync/040_pydantic_ai/project/agent.py
@@ -1,4 +1,4 @@
-"""Pydantic AI agent definition.
+"""Pydantic AI agent definition for the sync harness test agent.
 
 The Agent is the boundary between this module and the API layer (acp.py).
 Pydantic AI handles its own tool-call loop internally — no graph required.
@@ -12,6 +12,8 @@
 
 from project.tools import get_weather
 
+__all__ = ["create_agent", "MODEL_NAME"]
+
 MODEL_NAME = "openai:gpt-4o-mini"
 SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools.
 
@@ -29,9 +31,7 @@ def create_agent() -> Agent:
     """Build and return the Pydantic AI agent with tools registered."""
     agent = Agent(
         MODEL_NAME,
-        system_prompt=SYSTEM_PROMPT.format(
-            timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        ),
+        system_prompt=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
     )
 
     agent.tool_plain(get_weather)
diff --git a/examples/tutorials/00_sync/040_pydantic_ai/project/tools.py b/examples/tutorials/00_sync/040_pydantic_ai/project/tools.py
index bab87942a..d649c75f1 100644
--- a/examples/tutorials/00_sync/040_pydantic_ai/project/tools.py
+++ b/examples/tutorials/00_sync/040_pydantic_ai/project/tools.py
@@ -1,8 +1,8 @@
-"""Tool definitions for the Pydantic AI agent.
+"""Tool definitions for the sync harness Pydantic AI agent.
 
 Pydantic AI tools are registered directly on the Agent via decorators
-(see project.agent). This module hosts the bare functions so they're
-easy to unit-test in isolation.
+(see project.agent). This module hosts the bare function so it is easy to
+unit-test in isolation.
 """
 
 from __future__ import annotations
diff --git a/examples/tutorials/00_sync/040_pydantic_ai/pyproject.toml b/examples/tutorials/00_sync/040_pydantic_ai/pyproject.toml
index 3e645fa15..748a9f3cb 100644
--- a/examples/tutorials/00_sync/040_pydantic_ai/pyproject.toml
+++ b/examples/tutorials/00_sync/040_pydantic_ai/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
 [project]
 name = "s040-pydantic-ai"
 version = "0.1.0"
-description = "A sync Pydantic AI agent with tool calling and streaming"
+description = "A sync Pydantic AI harness test agent using the unified emitter surface"
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
diff --git a/examples/tutorials/00_sync/040_pydantic_ai/tests/test_agent.py b/examples/tutorials/00_sync/040_pydantic_ai/tests/test_agent.py
index d3deed1c7..4aad12a56 100644
--- a/examples/tutorials/00_sync/040_pydantic_ai/tests/test_agent.py
+++ b/examples/tutorials/00_sync/040_pydantic_ai/tests/test_agent.py
@@ -1,8 +1,10 @@
-"""Tests for the sync Pydantic AI agent.
+"""Live tests for the sync Pydantic AI agent.
 
-This test suite validates:
-- Non-streaming message sending with tool-calling Pydantic AI agent
-- Streaming message sending with token-by-token output
+These tests require a running agent (server + deployed agent) and exercise the
+unified-surface sync handler end-to-end over the wire.
+
+Offline coverage of the same wiring (TestModel + fake streaming/tracing) lives
+in the SDK repo under ``tests/lib/core/harness/`` (the pydantic-ai sync suite).
 
 To run these tests:
 1. Make sure the agent is running (via docker-compose or `agentex agents run`)
@@ -50,7 +52,7 @@ def agent_id(client, agent_name):
 
 
 class TestNonStreamingMessages:
-    """Test non-streaming message sending with Pydantic AI agent."""
+    """Test non-streaming message sending with the unified-surface sync agent."""
 
     def test_send_simple_message(self, client: Agentex, agent_name: str):
         """Test sending a simple message and receiving a response."""
@@ -86,7 +88,7 @@ def test_tool_calling(self, client: Agentex, agent_name: str):
 
 
 class TestStreamingMessages:
-    """Test streaming message sending with Pydantic AI agent."""
+    """Test streaming message sending through the unified yield_turn path."""
 
     def test_stream_simple_message(self, client: Agentex, agent_name: str):
         """Test streaming a simple message response."""
@@ -107,10 +109,10 @@ def test_stream_simple_message(self, client: Agentex, agent_name: str):
         assert len(chunks) > 1, "No chunks received in streaming response."
 
     def test_stream_tool_calling(self, client: Agentex, agent_name: str):
-        """Test streaming with tool calls.
+        """Test streaming with tool calls through the unified surface.
 
-        This exercises the headline Pydantic AI converter feature:
-        tool-call argument tokens streaming through as ToolRequestDelta.
+        Exercises token-by-token tool-call argument streaming (coalesce off),
+        which the unified yield_turn path preserves on the sync channel.
         """
         stream = client.agents.send_message_stream(
             agent_name=agent_name,
diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/.dockerignore b/examples/tutorials/00_sync/050_openai_agents/.dockerignore
similarity index 100%
rename from examples/tutorials/00_sync/050_openai_agents_local_sandbox/.dockerignore
rename to examples/tutorials/00_sync/050_openai_agents/.dockerignore
diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/Dockerfile b/examples/tutorials/00_sync/050_openai_agents/Dockerfile
similarity index 65%
rename from examples/tutorials/00_sync/050_openai_agents_local_sandbox/Dockerfile
rename to examples/tutorials/00_sync/050_openai_agents/Dockerfile
index 8e0ec22df..c9ccd6f54 100644
--- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/Dockerfile
+++ b/examples/tutorials/00_sync/050_openai_agents/Dockerfile
@@ -23,16 +23,16 @@ RUN uv pip install --system --upgrade pip setuptools wheel
 ENV UV_HTTP_TIMEOUT=1000
 
 # Copy pyproject.toml and README.md to install dependencies
-COPY 00_sync/050_openai_agents_local_sandbox/pyproject.toml /app/050_openai_agents_local_sandbox/pyproject.toml
-COPY 00_sync/050_openai_agents_local_sandbox/README.md /app/050_openai_agents_local_sandbox/README.md
+COPY 00_sync/050_openai_agents/pyproject.toml /app/050_openai_agents/pyproject.toml
+COPY 00_sync/050_openai_agents/README.md /app/050_openai_agents/README.md
 
-WORKDIR /app/050_openai_agents_local_sandbox
+WORKDIR /app/050_openai_agents
 
 # Copy the project code
-COPY 00_sync/050_openai_agents_local_sandbox/project /app/050_openai_agents_local_sandbox/project
+COPY 00_sync/050_openai_agents/project /app/050_openai_agents/project
 
 # Copy the test files
-COPY 00_sync/050_openai_agents_local_sandbox/tests /app/050_openai_agents_local_sandbox/tests
+COPY 00_sync/050_openai_agents/tests /app/050_openai_agents/tests
 
 # Copy shared test utilities
 COPY test_utils /app/test_utils
@@ -44,7 +44,7 @@ RUN uv pip install --system .[dev]
 ENV PYTHONPATH=/app
 
 # Set test environment variables
-ENV AGENT_NAME=s050-openai-agents-local-sandbox
+ENV AGENT_NAME=s050-openai-agents
 
 # Run the agent using uvicorn
 CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/examples/tutorials/00_sync/050_openai_agents/README.md b/examples/tutorials/00_sync/050_openai_agents/README.md
new file mode 100644
index 000000000..98cec3f9a
--- /dev/null
+++ b/examples/tutorials/00_sync/050_openai_agents/README.md
@@ -0,0 +1,35 @@
+# Sync OpenAI Agents on the unified harness surface
+
+A sync (HTTP) Agentex agent that runs the OpenAI Agents SDK and delivers its
+output through the **unified harness surface**.
+
+## What this demonstrates
+
+The OpenAI Agents SDK produces native streaming events. This tutorial wraps a
+`Runner.run_streamed` result in an `OpenAITurn` — the provider -> canonical
+`StreamTaskMessage*` adapter — and forwards the canonical stream to the frontend
+via `UnifiedEmitter.yield_turn`. The same `OpenAITurn` flows unchanged through
+`auto_send_turn` in the async (`10_async/00_base/120_openai_agents`) and temporal
+(`10_async/10_temporal/120_openai_agents`) variants; only the delivery method differs.
+
+```python
+result = Runner.run_streamed(starting_agent=agent, input=user_message)
+turn = OpenAITurn(result=result, model="gpt-4o")
+emitter = UnifiedEmitter(task_id=task_id, trace_id=task_id, parent_span_id=parent_span_id)
+async for event in emitter.yield_turn(turn):
+    yield event
+```
+
+## Run it
+
+```bash
+agentex agents run --manifest manifest.yaml
+```
+
+## Test it
+
+The offline test exercises the harness wiring without a server or API key:
+
+```bash
+pytest tests/test_agent.py -v
+```
diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/manifest.yaml b/examples/tutorials/00_sync/050_openai_agents/manifest.yaml
similarity index 66%
rename from examples/tutorials/00_sync/050_openai_agents_local_sandbox/manifest.yaml
rename to examples/tutorials/00_sync/050_openai_agents/manifest.yaml
index 8ae5b98a1..bdb47e8d8 100644
--- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/manifest.yaml
+++ b/examples/tutorials/00_sync/050_openai_agents/manifest.yaml
@@ -2,10 +2,10 @@ build:
   context:
     root: ../../
     include_paths:
-      - 00_sync/050_openai_agents_local_sandbox
+      - 00_sync/050_openai_agents
       - test_utils
-    dockerfile: 00_sync/050_openai_agents_local_sandbox/Dockerfile
-    dockerignore: 00_sync/050_openai_agents_local_sandbox/.dockerignore
+    dockerfile: 00_sync/050_openai_agents/Dockerfile
+    dockerignore: 00_sync/050_openai_agents/.dockerignore
 
 local_development:
   agent:
@@ -16,8 +16,8 @@ local_development:
 
 agent:
   acp_type: sync
-  name: s050-openai-agents-local-sandbox
-  description: A sync OpenAI Agents SDK agent using a local (unix_local) sandbox
+  name: s050-openai-agents
+  description: A sync OpenAI Agents SDK agent on the unified harness surface
 
   temporal:
     enabled: false
@@ -39,9 +39,6 @@ agent:
       secret_name: sgp-client-base-url
       secret_key: url
 
-  env:
-    OPENAI_AGENTS_DISABLE_TRACING: "1"
-
 deployment:
   image:
     repository: ""
@@ -49,8 +46,8 @@ deployment:
 
   global:
     agent:
-      name: "s050-openai-agents-local-sandbox"
-      description: "A sync OpenAI Agents SDK agent using a local (unix_local) sandbox"
+      name: "s050-openai-agents"
+      description: "A sync OpenAI Agents SDK agent on the unified harness surface"
     replicaCount: 1
     resources:
       requests:
diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/__init__.py b/examples/tutorials/00_sync/050_openai_agents/project/__init__.py
similarity index 100%
rename from examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/__init__.py
rename to examples/tutorials/00_sync/050_openai_agents/project/__init__.py
diff --git a/examples/tutorials/00_sync/050_openai_agents/project/acp.py b/examples/tutorials/00_sync/050_openai_agents/project/acp.py
new file mode 100644
index 000000000..caaa0b132
--- /dev/null
+++ b/examples/tutorials/00_sync/050_openai_agents/project/acp.py
@@ -0,0 +1,87 @@
+"""ACP handler for the sync OpenAI Agents harness tutorial.
+
+This is the API layer. It runs the OpenAI Agents SDK via ``Runner.run_streamed``,
+wraps the streamed run in an ``OpenAITurn`` (the provider -> canonical
+``StreamTaskMessage*`` adapter), and forwards the canonical stream to the
+Agentex frontend via ``UnifiedEmitter.yield_turn`` — the same harness surface
+used by the async and temporal variants of this tutorial.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import AsyncGenerator
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from agents import Runner
+
+from agentex.lib import adk
+from project.agent import MODEL_NAME, create_agent
+from agentex.lib.types.acp import SendMessageParams
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.task_message_update import TaskMessageUpdate
+from agentex.types.task_message_content import TaskMessageContent
+from agentex.lib.adk.providers._modules.openai_turn import OpenAITurn
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+logger = make_logger(__name__)
+
+# LiteLLM proxy auth: copy LITELLM_API_KEY to OPENAI_API_KEY for OpenAI client
+# compatibility, so the same example works behind the Scale LiteLLM gateway.
+_litellm_key = os.environ.get("LITELLM_API_KEY")
+if _litellm_key and not os.environ.get("OPENAI_API_KEY"):
+    os.environ["OPENAI_API_KEY"] = _litellm_key
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+acp = FastACP.create(acp_type="sync")
+
+_agent = None
+
+
+def get_agent():
+    """Get or create the OpenAI Agents SDK agent instance."""
+    global _agent
+    if _agent is None:
+        _agent = create_agent()
+    return _agent
+
+
+@acp.on_message_send
+async def handle_message_send(
+    params: SendMessageParams,
+) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]:
+    """Handle incoming messages, streaming tokens and tool calls via the harness."""
+    agent = get_agent()
+    task_id = params.task.id
+    user_message = params.content.content
+    logger.info(f"Processing message for task {task_id}")
+
+    async with adk.tracing.span(
+        trace_id=task_id,
+        task_id=task_id,
+        name="message",
+        input={"message": user_message},
+        data={"__span_type__": "AGENT_WORKFLOW"},
+    ) as turn_span:
+        result = Runner.run_streamed(starting_agent=agent, input=user_message)
+        turn = OpenAITurn(result=result, model=MODEL_NAME)
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+        async for event in emitter.yield_turn(turn):
+            yield event
diff --git a/examples/tutorials/00_sync/050_openai_agents/project/agent.py b/examples/tutorials/00_sync/050_openai_agents/project/agent.py
new file mode 100644
index 000000000..3611012fe
--- /dev/null
+++ b/examples/tutorials/00_sync/050_openai_agents/project/agent.py
@@ -0,0 +1,47 @@
+"""OpenAI Agents SDK agent definition for the harness tutorial.
+
+The agent is the boundary between this module and the API layer (acp.py).
+The OpenAI Agents SDK runs its own tool-call loop internally; acp.py wraps a
+``Runner.run_streamed`` result with ``OpenAITurn`` so it flows through the
+unified harness surface.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+
+from agents import Agent, function_tool, set_tracing_disabled
+
+from project.tools import get_weather
+
+# Disable the openai-agents SDK's native tracer so it doesn't ship traces to
+# api.openai.com (the key may be a gateway/proxy key). Agentex tracing still
+# runs via the harness + tracing manager configured in acp.py.
+set_tracing_disabled(True)
+
+MODEL_NAME = "gpt-4o"
+INSTRUCTIONS = """You are a helpful AI assistant with access to tools.
+
+Current date and time: {timestamp}
+
+Guidelines:
+- Be concise and helpful
+- Use the weather tool when the user asks about the weather
+- Always report the real tool output back to the user
+"""
+
+
+@function_tool
+def weather(city: str) -> str:
+    """Get the current weather for a city."""
+    return get_weather(city)
+
+
+def create_agent() -> Agent:
+    """Build and return the OpenAI Agents SDK agent with the weather tool."""
+    return Agent(
+        name="Harness OpenAI Assistant",
+        model=MODEL_NAME,
+        instructions=INSTRUCTIONS.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
+        tools=[weather],
+    )
diff --git a/examples/tutorials/00_sync/050_openai_agents/project/tools.py b/examples/tutorials/00_sync/050_openai_agents/project/tools.py
new file mode 100644
index 000000000..b03aa7c31
--- /dev/null
+++ b/examples/tutorials/00_sync/050_openai_agents/project/tools.py
@@ -0,0 +1,19 @@
+"""Tool definitions for the OpenAI Agents harness tutorial.
+
+The bare function lives here so it's easy to unit-test; it's wrapped as an
+OpenAI Agents SDK ``function_tool`` in ``project.agent``.
+"""
+
+from __future__ import annotations
+
+
+def get_weather(city: str) -> str:
+    """Get the current weather for a city.
+
+    Args:
+        city: The name of the city to get weather for.
+
+    Returns:
+        A string describing the weather conditions.
+    """
+    return f"The weather in {city} is sunny and 72°F"
diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/pyproject.toml b/examples/tutorials/00_sync/050_openai_agents/pyproject.toml
similarity index 75%
rename from examples/tutorials/00_sync/050_openai_agents_local_sandbox/pyproject.toml
rename to examples/tutorials/00_sync/050_openai_agents/pyproject.toml
index 472a6bef7..48d2481dd 100644
--- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/pyproject.toml
+++ b/examples/tutorials/00_sync/050_openai_agents/pyproject.toml
@@ -3,15 +3,15 @@ requires = ["hatchling"]
 build-backend = "hatchling.build"
 
 [project]
-name = "s050-openai-agents-local-sandbox"
+name = "s050-openai-agents"
 version = "0.1.0"
-description = "A sync OpenAI Agents SDK agent using a local (unix_local) sandbox"
+description = "A sync OpenAI Agents SDK agent on the unified harness surface"
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
     "agentex-sdk",
     "scale-gp",
-    "openai-agents>=0.14.3,<0.15",
+    "openai-agents",
 ]
 
 [project.optional-dependencies]
diff --git a/examples/tutorials/00_sync/050_openai_agents/tests/test_agent.py b/examples/tutorials/00_sync/050_openai_agents/tests/test_agent.py
new file mode 100644
index 000000000..960b232b7
--- /dev/null
+++ b/examples/tutorials/00_sync/050_openai_agents/tests/test_agent.py
@@ -0,0 +1,48 @@
+"""Offline test for the sync OpenAI Agents harness tutorial.
+
+This test does NOT require a running Agentex server or an OpenAI API key. It
+verifies the harness wiring this tutorial demonstrates: an ``OpenAITurn`` built
+from an injected canonical ``StreamTaskMessage*`` stream, forwarded through
+``UnifiedEmitter.yield_turn`` (the sync HTTP ACP delivery path), passes the
+events through unchanged.
+
+To run: ``pytest tests/test_agent.py -v``
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agentex.types.text_content import TextContent
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.lib.adk.providers._modules.openai_turn import OpenAITurn
+
+
+async def _canonical_stream(events):
+    for e in events:
+        yield e
+
+
+@pytest.mark.asyncio
+async def test_yield_turn_forwards_canonical_stream():
+    events = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="")),
+        StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="Hi")),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    turn = OpenAITurn(stream=_canonical_stream(events), model="gpt-4o")
+    # trace_id=None disables tracing, so no Agentex server is needed.
+    emitter = UnifiedEmitter(task_id="task-1", trace_id=None, parent_span_id=None)
+
+    out = [e async for e in emitter.yield_turn(turn)]
+    assert out == events
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/README.md b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/README.md
deleted file mode 100644
index 9c2c81d7d..000000000
--- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/README.md
+++ /dev/null
@@ -1,113 +0,0 @@
-# Tutorial 050: Sync OpenAI Agents SDK with a Local Sandbox
-
-This tutorial demonstrates how to build a **synchronous** agent on AgentEx using the
-[OpenAI Agents SDK](https://developers.openai.com/api/docs/guides/agents) and its
-**sandbox** runtime, running with the **local** (`unix_local`) backend.
-
-The agent is a "local sandbox assistant": it answers questions by actually running
-real shell commands (e.g. `python3 --version`, `ls /tmp`, `python3 -c "..."`)
-instead of guessing.
-
-## Key Concepts
-
-### Sync ACP
-The sync ACP model uses HTTP request/response for communication. The
-`@acp.on_message_send` handler receives a message, runs the agent, and returns the
-agent's final answer as a `TextContent`.
-
-### OpenAI Agents SDK Sandbox
-The OpenAI Agents SDK ships `agents.sandbox`, which lets you give an agent
-**capabilities** (instead of hand-written tools) that the runtime turns into real
-tools backed by a sandbox:
-
-- **`SandboxAgent`**: an `Agent` that is granted sandbox capabilities.
-- **Capabilities** (`from agents.sandbox.capabilities import Shell, Filesystem, Memory`):
-  each capability expands into a set of real tools. This tutorial uses `Shell`, which
-  lets the model run real shell commands.
-- **`SandboxRunConfig`** + a sandbox **client**: tells the runtime *where* the tools
-  actually execute.
-
-### The LOCAL sandbox (`UnixLocalSandboxClient`)
-This tutorial uses the local backend
-(`from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClient, UnixLocalSandboxClientOptions`),
-`backend_id="unix_local"`. The local sandbox runs shell commands **ON THE HOST** —
-the agent's own container/process. There is **no Docker, no Temporal, and no remote
-sandbox infrastructure** involved. This makes it the simplest way to give an agent a
-real shell.
-
-The sandbox is wired up through the SDK's `RunConfig`:
-
-```python
-from agents import Runner, set_tracing_disabled
-from agents.run_config import RunConfig
-from agents.sandbox import SandboxAgent, SandboxRunConfig
-from agents.sandbox.capabilities import Shell
-from agents.sandbox.sandboxes.unix_local import (
-    UnixLocalSandboxClient,
-    UnixLocalSandboxClientOptions,
-)
-
-set_tracing_disabled(True)  # avoid api.openai.com tracing 401 behind a gateway
-
-agent = SandboxAgent(
-    name="Local Sandbox Assistant",
-    instructions="...use the shell tools to actually run commands...",
-    capabilities=[Shell()],
-)
-run_config = RunConfig(
-    sandbox=SandboxRunConfig(
-        client=UnixLocalSandboxClient(),
-        options=UnixLocalSandboxClientOptions(),
-    )
-)
-result = await Runner.run(agent, input="what's the python version?", run_config=run_config)
-print(result.final_output)
-```
-
-`Runner.run` drives the full tool-call loop internally: the model issues shell
-commands, the local sandbox runs them on the host, the output is fed back, and the
-loop continues until the model produces a final answer.
-
-## Files
-
-| File | Description |
-|------|-------------|
-| `project/acp.py` | ACP server and message handler (runs the sandbox agent) |
-| `project/agent.py` | `SandboxAgent` + `RunConfig(sandbox=...)` wiring + `run_agent` |
-| `project/tools.py` | Sandbox capability factory (`Shell`) |
-| `tests/test_agent.py` | Integration tests |
-| `manifest.yaml` | Agent configuration |
-
-## Running Locally
-
-```bash
-# From this directory
-agentex agents run
-```
-
-Set `OPENAI_API_KEY` (or `LITELLM_API_KEY` if you're behind the Scale LiteLLM
-gateway) in your environment or in a `.env` file in `project/` so the agent can call
-the model.
-
-## Running Tests
-
-```bash
-pytest tests/test_agent.py -v
-```
-
-## Notes
-
-- **No infra required.** Because this uses the `unix_local` backend, the shell tools
-  run directly in the agent's process — no Docker daemon, no Temporal, no remote
-  sandbox. Swap the client for a remote/containerized backend to isolate execution.
-- **Tracing.** `set_tracing_disabled(True)` turns off the OpenAI Agents SDK's native
-  tracer (which would otherwise try to ship traces to `api.openai.com`). The manifest
-  also sets `OPENAI_AGENTS_DISABLE_TRACING=1`. AgentEx/SGP tracing still runs via the
-  tracing manager configured in `acp.py` when SGP credentials are present.
-- **Capabilities are the tools.** To let the agent do more, add capabilities in
-  `project/tools.py` (e.g. `Filesystem()`, `Memory()`).
-
-## Further Reading
-
-- OpenAI Agents SDK guide: https://developers.openai.com/api/docs/guides/agents
-- The next evolution of the Agents SDK: https://openai.com/index/the-next-evolution-of-the-agents-sdk/
diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/acp.py b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/acp.py
deleted file mode 100644
index 005d679bf..000000000
--- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/acp.py
+++ /dev/null
@@ -1,77 +0,0 @@
-"""ACP (Agent Communication Protocol) handler for Agentex.
-
-This is the API layer — it owns the agent lifecycle and runs the OpenAI Agents
-SDK *sandbox* agent for each incoming message, returning the agent's final
-answer to the Agentex frontend.
-
-The agent uses the LOCAL sandbox backend (``UnixLocalSandboxClient``), which runs
-shell commands on the host (this process/container). The OpenAI Agents SDK runs
-its tool-call loop internally via ``Runner.run`` and returns the final output, so
-this sync handler returns a single ``TextContent`` rather than streaming tokens.
-"""
-
-from __future__ import annotations
-
-import os
-
-from dotenv import load_dotenv
-
-load_dotenv()
-
-from agentex.lib import adk
-from project.agent import run_agent
-from agentex.lib.types.acp import SendMessageParams
-from agentex.lib.types.tracing import SGPTracingProcessorConfig
-from agentex.lib.utils.logging import make_logger
-from agentex.types.text_content import TextContent
-from agentex.lib.sdk.fastacp.fastacp import FastACP
-from agentex.types.task_message_content import TaskMessageContent
-from agentex.lib.core.tracing.tracing_processor_manager import (
-    add_tracing_processor_config,
-)
-
-logger = make_logger(__name__)
-
-# LiteLLM proxy auth: copy LITELLM_API_KEY to OPENAI_API_KEY for OpenAI client
-# compatibility, so the same example works behind the Scale LiteLLM gateway.
-_litellm_key = os.environ.get("LITELLM_API_KEY")
-if _litellm_key and not os.environ.get("OPENAI_API_KEY"):
-    os.environ["OPENAI_API_KEY"] = _litellm_key
-
-SGP_API_KEY = os.environ.get("SGP_API_KEY", "")
-SGP_ACCOUNT_ID = os.environ.get("SGP_ACCOUNT_ID", "")
-SGP_CLIENT_BASE_URL = os.environ.get("SGP_CLIENT_BASE_URL", "")
-
-if SGP_API_KEY and SGP_ACCOUNT_ID:
-    add_tracing_processor_config(
-        SGPTracingProcessorConfig(
-            sgp_api_key=SGP_API_KEY,
-            sgp_account_id=SGP_ACCOUNT_ID,
-            sgp_base_url=SGP_CLIENT_BASE_URL,
-        )
-    )
-
-acp = FastACP.create(acp_type="sync")
-
-
-@acp.on_message_send
-async def handle_message_send(
-    params: SendMessageParams,
-) -> TaskMessageContent:
-    """Handle incoming messages by running the local-sandbox agent."""
-    task_id = params.task.id
-    user_message = params.content.content
-    logger.info(f"Processing message for task {task_id}")
-
-    async with adk.tracing.span(
-        trace_id=task_id,
-        task_id=task_id,
-        name="message",
-        input={"message": user_message},
-        data={"__span_type__": "AGENT_WORKFLOW"},
-    ) as turn_span:
-        final_output = await run_agent(user_message)
-        if turn_span:
-            turn_span.output = {"final_output": final_output}
-
-    return TextContent(author="agent", content=final_output)
diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/agent.py b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/agent.py
deleted file mode 100644
index d674d14c9..000000000
--- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/agent.py
+++ /dev/null
@@ -1,92 +0,0 @@
-"""OpenAI Agents SDK local-sandbox agent definition.
-
-This mirrors the Pydantic AI tutorial (040): the agent is the boundary between
-this module and the API layer (acp.py). The difference is the runtime — here we
-use the OpenAI Agents SDK ``SandboxAgent`` together with the **local** sandbox
-backend (``UnixLocalSandboxClient``).
-
-The local sandbox runs shell commands ON THE HOST — the agent's own
-container/process. There is no Docker, no Temporal, and no remote sandbox
-infrastructure. The OpenAI Agents SDK runs its own tool-call loop internally:
-when the model decides to run a shell command, the sandbox executes it locally
-and feeds the output back to the model until it produces a final answer.
-"""
-
-from __future__ import annotations
-
-from datetime import datetime
-
-from agents import Runner, set_tracing_disabled
-from agents.sandbox import SandboxAgent, SandboxRunConfig
-from agents.run_config import RunConfig
-from agents.sandbox.sandboxes.unix_local import (
-    UnixLocalSandboxClient,
-    UnixLocalSandboxClientOptions,
-)
-
-from project.tools import get_capabilities
-
-# Disable the openai-agents SDK's native tracer so it doesn't ship traces to
-# api.openai.com using OPENAI_API_KEY (which may be a gateway/proxy key and would
-# 401). Agentex tracing still runs via the tracing manager configured in acp.py.
-set_tracing_disabled(True)
-
-MODEL_NAME = "gpt-4o-mini"
-INSTRUCTIONS = """You are a local sandbox assistant.
-
-Current date and time: {timestamp}
-
-You have access to shell tools that run real commands on the local machine.
-
-Guidelines:
-- ALWAYS use the shell tools to actually run commands — never guess or make up
-  output. If the user asks for the Python version, run `python3 --version`. If
-  they ask to list files, run `ls`. If they ask you to compute something, use
-  `python3 -c "..."`.
-- Run the minimal command(s) needed to answer the question.
-- Report the real command output back to the user, concisely.
-"""
-
-
-def create_agent() -> SandboxAgent:
-    """Build and return the OpenAI Agents SDK sandbox agent.
-
-    The agent is granted shell capabilities (see ``project.tools``). The actual
-    sandbox backend (where the shell commands run) is supplied at run time via
-    the ``RunConfig`` returned by ``create_run_config``.
-    """
-    return SandboxAgent(
-        name="Local Sandbox Assistant",
-        model=MODEL_NAME,
-        instructions=INSTRUCTIONS.format(
-            timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        ),
-        capabilities=get_capabilities(),
-    )
-
-
-def create_run_config() -> RunConfig:
-    """Build the RunConfig that points the agent at the LOCAL sandbox backend.
-
-    ``UnixLocalSandboxClient`` (backend_id="unix_local") runs shell commands on
-    the host — the agent's own process — so no Docker or remote infra is needed.
-    """
-    return RunConfig(
-        sandbox=SandboxRunConfig(
-            client=UnixLocalSandboxClient(),
-            options=UnixLocalSandboxClientOptions(),
-        )
-    )
-
-
-async def run_agent(user_message: str) -> str:
-    """Run the sandbox agent on a single user message and return the final text.
-
-    The OpenAI Agents SDK handles the full tool-call loop internally: the model
-    issues shell commands, the local sandbox runs them on the host, and the
-    output is fed back until the model produces a final answer.
-    """
-    agent = create_agent()
-    run_config = create_run_config()
-    result = await Runner.run(agent, input=user_message, run_config=run_config, max_turns=10)
-    return result.final_output
diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/tools.py b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/tools.py
deleted file mode 100644
index 0ad8f25ac..000000000
--- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/tools.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""Sandbox capabilities for the OpenAI Agents SDK local-sandbox agent.
-
-Unlike the Pydantic AI tutorial (040), this agent does not register hand-written
-Python functions as tools. Instead it is given *capabilities* — the OpenAI Agents
-SDK sandbox runtime turns each capability into a real set of tools (run a shell
-command, read a file, etc.) backed by an actual sandbox backend.
-
-Here we use the ``Shell`` capability, which lets the model run real shell commands.
-With the local (``unix_local``) backend those commands execute ON THE HOST — the
-agent's own process/container — so there is no Docker, Temporal, or remote infra
-involved. This module hosts the capability factory so the agent wiring in
-``project.agent`` stays readable and the capability set is easy to extend
-(e.g. add ``Filesystem()`` or ``Memory()``).
-"""
-
-from __future__ import annotations
-
-from agents.sandbox.capabilities import Shell
-
-
-def get_capabilities() -> list:
-    """Return the sandbox capabilities the agent is allowed to use.
-
-    Returns:
-        A list of OpenAI Agents SDK sandbox capabilities. We grant ``Shell`` so
-        the agent can run real shell commands on the local machine. Add
-        ``Filesystem()`` or ``Memory()`` here to expand what the agent can do.
-    """
-    return [Shell()]
diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/tests/test_agent.py b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/tests/test_agent.py
deleted file mode 100644
index 52ed1bf2f..000000000
--- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/tests/test_agent.py
+++ /dev/null
@@ -1,148 +0,0 @@
-"""Tests for the sync OpenAI Agents SDK local-sandbox agent.
-
-This test suite validates:
-- Sending a message that requires the agent to actually run a shell command in
-  the LOCAL sandbox (unix_local backend) and receiving a non-empty response.
-
-To run these tests:
-1. Make sure the agent is running (via docker-compose or `agentex agents run`)
-2. Set the AGENTEX_API_BASE_URL environment variable if not using default
-3. Run: pytest test_agent.py -v
-
-Configuration:
-- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003)
-- AGENT_NAME: Name of the agent to test (default: s050-openai-agents-local-sandbox)
-"""
-
-import os
-
-import pytest
-from test_utils.sync import validate_text_in_string
-
-from agentex import Agentex
-from agentex.types import TextContentParam
-from agentex.types.agent_rpc_params import ParamsSendMessageRequest
-
-AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
-AGENT_NAME = os.environ.get("AGENT_NAME", "s050-openai-agents-local-sandbox")
-
-
-@pytest.fixture
-def client():
-    """Create an AgentEx client instance for testing."""
-    return Agentex(base_url=AGENTEX_API_BASE_URL)
-
-
-@pytest.fixture
-def agent_name():
-    """Return the agent name for testing."""
-    return AGENT_NAME
-
-
-@pytest.fixture
-def agent_id(client, agent_name):
-    """Retrieve the agent ID based on the agent name."""
-    agents = client.agents.list()
-    for agent in agents:
-        if agent.name == agent_name:
-            return agent.id
-    raise ValueError(f"Agent with name {agent_name} not found.")
-
-
-def _response_text(result) -> str:
-    """Flatten a send_message result into a single string for assertions.
-
-    Result items may be a bare string, a ``TextContent`` (``.content`` is the
-    string), or a ``TaskMessage`` wrapping a ``TextContent`` (``.content`` is the
-    ``TextContent``, whose ``.content`` is the string). Dig through ``.content``
-    until we reach a string.
-    """
-
-    def _text_of(obj, _depth: int = 0) -> str:
-        if isinstance(obj, str):
-            return obj
-        if _depth > 5:
-            return ""
-        inner = getattr(obj, "content", None)
-        if inner is None:
-            return ""
-        return _text_of(inner, _depth + 1)
-
-    parts = [t for t in (_text_of(item) for item in result) if t]
-    return "\n".join(parts)
-
-
-class TestLocalSandboxMessages:
-    """Test the local-sandbox OpenAI Agents SDK agent."""
-
-    def test_send_simple_message(self, client: Agentex, agent_name: str):
-        """Test sending a simple message and receiving a response."""
-        response = client.agents.send_message(
-            agent_name=agent_name,
-            params=ParamsSendMessageRequest(
-                content=TextContentParam(
-                    author="user",
-                    content="Hello! What can you help me with?",
-                    type="text",
-                )
-            ),
-        )
-        result = response.result
-        assert result is not None
-        assert len(result) >= 1
-
-    def test_shell_python_version(self, client: Agentex, agent_name: str):
-        """Test that the agent uses its shell to run a real command.
-
-        We ask it to print the Python version. The agent should run
-        `python3 --version` in the local sandbox and report the real output,
-        which always starts with "Python 3".
-        """
-        response = client.agents.send_message(
-            agent_name=agent_name,
-            params=ParamsSendMessageRequest(
-                content=TextContentParam(
-                    author="user",
-                    content=(
-                        "Use your shell to print the Python version on this "
-                        "machine, then tell me what it is."
-                    ),
-                    type="text",
-                )
-            ),
-        )
-        result = response.result
-        assert result is not None
-        assert len(result) >= 1
-
-        text = _response_text(result)
-        assert text, "Expected a non-empty response from the sandbox agent."
-        # The sandbox runs on Python 3.12, so the real output contains "Python 3".
-        validate_text_in_string("Python 3", text)
-
-    def test_shell_compute(self, client: Agentex, agent_name: str):
-        """Test that the agent uses python3 in the sandbox to compute a value."""
-        response = client.agents.send_message(
-            agent_name=agent_name,
-            params=ParamsSendMessageRequest(
-                content=TextContentParam(
-                    author="user",
-                    content=(
-                        "Use python3 in your shell to compute 21 * 2 and tell me "
-                        "the result."
-                    ),
-                    type="text",
-                )
-            ),
-        )
-        result = response.result
-        assert result is not None
-        assert len(result) >= 1
-
-        text = _response_text(result)
-        assert text, "Expected a non-empty response from the sandbox agent."
-        validate_text_in_string("42", text)
-
-
-if __name__ == "__main__":
-    pytest.main([__file__, "-v"])
diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/.dockerignore b/examples/tutorials/00_sync/060_claude_code/.dockerignore
similarity index 100%
rename from examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/.dockerignore
rename to examples/tutorials/00_sync/060_claude_code/.dockerignore
diff --git a/examples/tutorials/00_sync/060_claude_code/Dockerfile b/examples/tutorials/00_sync/060_claude_code/Dockerfile
new file mode 100644
index 000000000..ec22d7e0b
--- /dev/null
+++ b/examples/tutorials/00_sync/060_claude_code/Dockerfile
@@ -0,0 +1,46 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+# Install system dependencies including Node.js (required by the claude CLI)
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    nodejs \
+    npm \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+# Install the claude CLI (requires Node.js)
+# NOTE: live runs require ANTHROPIC_API_KEY in the environment.
+RUN npm install -g @anthropic-ai/claude-code || true
+
+ENV UV_HTTP_TIMEOUT=1000
+
+COPY 00_sync/060_claude_code/pyproject.toml /app/060_claude_code/pyproject.toml
+COPY 00_sync/060_claude_code/README.md /app/060_claude_code/README.md
+
+WORKDIR /app/060_claude_code
+
+COPY 00_sync/060_claude_code/project /app/060_claude_code/project
+COPY 00_sync/060_claude_code/tests /app/060_claude_code/tests
+COPY test_utils /app/test_utils
+
+RUN uv pip install --system .[dev]
+
+ENV PYTHONPATH=/app
+
+ENV AGENT_NAME=s060-claude-code
+
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/examples/tutorials/00_sync/060_claude_code/README.md b/examples/tutorials/00_sync/060_claude_code/README.md
new file mode 100644
index 000000000..e9c724732
--- /dev/null
+++ b/examples/tutorials/00_sync/060_claude_code/README.md
@@ -0,0 +1,76 @@
+# Tutorial 060: Sync Claude Code Agent
+
+This tutorial demonstrates how to build a **synchronous** agent that spawns the
+Claude Code CLI as a local subprocess and streams its output through the Agentex
+unified harness surface via ``ClaudeCodeTurn`` and ``UnifiedEmitter``.
+
+## Key Concepts
+
+### ClaudeCodeTurn + UnifiedEmitter
+
+``ClaudeCodeTurn`` wraps ``convert_claude_code_to_agentex_events``, which
+parses the newline-delimited JSON envelopes emitted by
+``claude -p --output-format stream-json``. It implements the ``HarnessTurn``
+protocol: an ``events`` async iterator of canonical ``StreamTaskMessage*``
+objects and a ``usage()`` method (populated once the stream is exhausted).
+
+``UnifiedEmitter.yield_turn(turn)`` is the sync delivery path: it forwards
+events as HTTP yield chunks while tracing as a side effect.
+
+### Local subprocess spawn
+
+The ``_spawn_claude`` function in ``project/acp.py`` uses
+``asyncio.create_subprocess_exec`` to run:
+
+```
+claude -p --output-format stream-json --verbose
+```
+
+The prompt is written to stdin. Stdout is read line by line and fed into
+``ClaudeCodeTurn``. This is purely local -- no Scale sandbox is involved.
+
+Production isolation (Scale sandbox, secret injection, MCP configuration)
+is the golden agent's concern at
+``teams/sgp/agents/golden_agent/project/harness/providers/claude.py``.
+
+### Injectable spawn seam
+
+``_spawn_claude`` is a top-level async generator in ``project/acp.py``.
+Tests monkeypatch it to inject pre-recorded stream-json lines instead of
+spawning the real process, so offline unit tests run without the CLI.
+
+## Files
+
+| File | Description |
+|------|-------------|
+| ``project/acp.py`` | ACP server, ``_spawn_claude`` seam, and message handler |
+| ``tests/test_agent.py`` | Live integration tests (needs CLI + API key) |
+| ``tests/test_agent_offline.py`` | Offline unit tests with injected fake subprocess |
+| ``manifest.yaml`` | Agent configuration |
+
+## Running Locally (live)
+
+Requires the ``claude`` CLI installed and ``ANTHROPIC_API_KEY`` set:
+
+```bash
+npm install -g @anthropic-ai/claude-code
+export ANTHROPIC_API_KEY=sk-ant-...
+agentex agents run
+```
+
+## Running Offline Tests
+
+No CLI or API key needed:
+
+```bash
+uv run pytest tests/test_agent_offline.py -v
+```
+
+## Notes
+
+- Production isolation (sandbox, secrets, MCP) is the golden agent's concern.
+  This tutorial runs the CLI directly to keep the code as simple as possible.
+- Multi-turn session resumption (``claude -r <session_id>``) is out of scope
+  for this tutorial. See the golden agent for that pattern.
+- The ``--verbose`` flag is included to match the golden agent's invocation;
+  it causes the CLI to emit ``stream_event`` triples for incremental streaming.
diff --git a/examples/tutorials/00_sync/060_claude_code/manifest.yaml b/examples/tutorials/00_sync/060_claude_code/manifest.yaml
new file mode 100644
index 000000000..56b9fd9e4
--- /dev/null
+++ b/examples/tutorials/00_sync/060_claude_code/manifest.yaml
@@ -0,0 +1,55 @@
+build:
+  context:
+    root: ../../
+    include_paths:
+      - 00_sync/060_claude_code
+      - test_utils
+    dockerfile: 00_sync/060_claude_code/Dockerfile
+    dockerignore: 00_sync/060_claude_code/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+
+agent:
+  acp_type: sync
+  name: s060-claude-code
+  description: A sync Claude Code agent streaming the unified harness surface via a local CLI subprocess
+
+  temporal:
+    enabled: false
+
+  credentials:
+    - env_var_name: ANTHROPIC_API_KEY
+      secret_name: anthropic-api-key
+      secret_key: api-key
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "s060-claude-code"
+      description: "A sync Claude Code agent streaming via local CLI subprocess"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/__init__.py b/examples/tutorials/00_sync/060_claude_code/project/__init__.py
similarity index 100%
rename from examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/__init__.py
rename to examples/tutorials/00_sync/060_claude_code/project/__init__.py
diff --git a/examples/tutorials/00_sync/060_claude_code/project/acp.py b/examples/tutorials/00_sync/060_claude_code/project/acp.py
new file mode 100644
index 000000000..aad53801a
--- /dev/null
+++ b/examples/tutorials/00_sync/060_claude_code/project/acp.py
@@ -0,0 +1,137 @@
+"""ACP handler for the sync Claude Code tutorial.
+
+Spawns ``claude -p --output-format stream-json --verbose`` as a LOCAL
+asyncio subprocess (no Scale sandbox -- that is the golden agent's
+production concern). Stdout lines are fed into ``ClaudeCodeTurn``, which
+wraps ``convert_claude_code_to_agentex_events``. Events are delivered via
+``UnifiedEmitter.yield_turn``, the sync HTTP yield path.
+
+Live runs require the ``claude`` CLI to be installed and an
+ANTHROPIC_API_KEY (or equivalent credential) to be in the environment.
+For offline testing, see ``tests/test_agent_offline.py``, which injects a
+fake subprocess.
+"""
+
+from __future__ import annotations
+
+import os
+import asyncio
+from typing import AsyncIterator, AsyncGenerator
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import agentex.lib.adk as adk
+from agentex.lib.adk import ClaudeCodeTurn
+from agentex.lib.types.acp import SendMessageParams
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.types.task_message_update import TaskMessageUpdate
+from agentex.types.task_message_content import TaskMessageContent
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+logger = make_logger(__name__)
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+acp = FastACP.create(acp_type="sync")
+
+
+async def _spawn_claude(prompt: str) -> AsyncIterator[str]:
+    """Spawn ``claude -p --output-format stream-json`` locally and yield stdout lines.
+
+    This is a seam: tests replace it with a fake async iterator of
+    pre-recorded lines so no real CLI invocation is needed offline.
+    """
+    proc = await asyncio.create_subprocess_exec(
+        "claude",
+        "-p",
+        "--output-format",
+        "stream-json",
+        "--verbose",
+        stdin=asyncio.subprocess.PIPE,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    assert proc.stdout is not None
+    assert proc.stdin is not None
+
+    proc.stdin.write(prompt.encode())
+    proc.stdin.close()
+
+    # Drain stderr concurrently. With --verbose, Claude Code can write enough to
+    # stderr to fill the OS pipe buffer; if we only read stdout, the CLI blocks
+    # on its stderr write while we block reading stdout — a deadlock. A
+    # background task keeps stderr flowing so stdout never stalls.
+    async def _drain_stderr() -> None:
+        assert proc.stderr is not None
+        async for _ in proc.stderr:
+            pass
+
+    stderr_task = asyncio.create_task(_drain_stderr())
+
+    try:
+        buffer = ""
+        async for chunk in proc.stdout:
+            buffer += chunk.decode("utf-8", errors="replace")
+            while "\n" in buffer:
+                line, buffer = buffer.split("\n", 1)
+                line = line.strip()
+                if line:
+                    yield line
+
+        if buffer.strip():
+            yield buffer.strip()
+
+        await proc.wait()
+    finally:
+        # Release the subprocess and stderr drain task even if the consumer
+        # abandons the generator early (task cancellation / client disconnect):
+        # cancel the drain task and terminate+reap the process if it is still
+        # running, so neither is leaked.
+        stderr_task.cancel()
+        try:
+            await stderr_task
+        except asyncio.CancelledError:
+            pass
+        if proc.returncode is None:
+            try:
+                proc.terminate()
+            except ProcessLookupError:
+                pass
+            await proc.wait()
+
+
+@acp.on_message_send
+async def handle_message_send(
+    params: SendMessageParams,
+) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]:
+    """Handle an incoming message: run Claude Code locally and stream events."""
+    task_id = params.task.id
+    prompt = params.content.content
+    logger.info("Processing message for task %s", task_id)
+
+    async with adk.tracing.span(
+        trace_id=task_id,
+        task_id=task_id,
+        name="message",
+        input={"message": prompt},
+        data={"__span_type__": "AGENT_WORKFLOW"},
+    ) as turn_span:
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+        turn = ClaudeCodeTurn(_spawn_claude(prompt))
+        async for event in emitter.yield_turn(turn):
+            yield event
diff --git a/examples/tutorials/00_sync/060_claude_code/pyproject.toml b/examples/tutorials/00_sync/060_claude_code/pyproject.toml
new file mode 100644
index 000000000..e5c1c4ea6
--- /dev/null
+++ b/examples/tutorials/00_sync/060_claude_code/pyproject.toml
@@ -0,0 +1,25 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "s060-claude-code"
+version = "0.1.0"
+description = "A sync Claude Code agent streaming the unified harness surface via a local CLI subprocess"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+    "python-dotenv>=1.0,<2",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
diff --git a/examples/tutorials/00_sync/060_claude_code/tests/test_agent.py b/examples/tutorials/00_sync/060_claude_code/tests/test_agent.py
new file mode 100644
index 000000000..954a520f3
--- /dev/null
+++ b/examples/tutorials/00_sync/060_claude_code/tests/test_agent.py
@@ -0,0 +1,162 @@
+"""Tests for the sync Claude Code tutorial agent.
+
+LIVE tests (``TestClaudeCodeLive``):
+  - Require the ``claude`` CLI on PATH and ``ANTHROPIC_API_KEY`` set.
+  - Run the full agent end-to-end against a live Agentex server.
+  - Skipped automatically when ``CLAUDE_LIVE_TESTS`` is not set to ``1``.
+
+OFFLINE unit tests (``TestClaudeCodeOffline``):
+  - Inject a fake async iterator of pre-recorded stream-json lines.
+  - Assert the ``ClaudeCodeTurn`` + ``UnifiedEmitter`` pipeline yields events,
+    populates usage, and satisfies the ``HarnessTurn`` protocol.
+  - Always run -- no CLI or API key needed.
+"""
+
+from __future__ import annotations
+
+import os
+import json
+from typing import AsyncIterator
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Recorded stream-json fixtures
+# ---------------------------------------------------------------------------
+
+_TEXT_ONLY_LINES: list[str] = [
+    json.dumps({"type": "system", "subtype": "init", "session_id": "sess-offline-1"}),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {"content": [{"type": "text", "text": "Hello from Claude Code!"}]},
+        }
+    ),
+    json.dumps(
+        {
+            "type": "result",
+            "usage": {"input_tokens": 10, "output_tokens": 5},
+            "cost_usd": 0.0001,
+            "duration_ms": 250,
+            "num_turns": 1,
+        }
+    ),
+]
+
+
+async def _fake_lines(lines: list[str]) -> AsyncIterator[str]:
+    """Async iterator of pre-recorded stream-json lines (no subprocess)."""
+    for line in lines:
+        yield line
+
+
+# ---------------------------------------------------------------------------
+# Offline tests (always run -- no CLI or API key needed)
+# ---------------------------------------------------------------------------
+
+
+class TestClaudeCodeOffline:
+    """Unit tests that run without a real claude CLI or network."""
+
+    @pytest.mark.asyncio
+    async def test_yields_stream_events(self):
+        """ClaudeCodeTurn drives UnifiedEmitter and yields StreamTaskMessage* events."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+        from agentex.types.task_message_update import StreamTaskMessageStart
+
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+
+        events = [e async for e in emitter.yield_turn(turn)]
+        assert len(events) > 0, "No events yielded"
+        assert any(isinstance(e, StreamTaskMessageStart) for e in events)
+
+    @pytest.mark.asyncio
+    async def test_stream_task_message_done_present(self):
+        """StreamTaskMessageDone must appear after stream exhaustion."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+        from agentex.types.task_message_update import StreamTaskMessageDone
+
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+
+        events = [e async for e in emitter.yield_turn(turn)]
+        assert any(isinstance(e, StreamTaskMessageDone) for e in events), (
+            "Expected at least one StreamTaskMessageDone event"
+        )
+
+    @pytest.mark.asyncio
+    async def test_usage_populated_after_stream_exhausted(self):
+        """ClaudeCodeTurn.usage() returns correct tokens after stream is exhausted."""
+        from agentex.lib.adk import ClaudeCodeTurn
+
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        _ = [e async for e in turn.events]
+        usage = turn.usage()
+        assert usage.input_tokens == 10
+        assert usage.output_tokens == 5
+        assert usage.num_llm_calls == 1
+
+    @pytest.mark.asyncio
+    async def test_protocol_compliance(self):
+        """ClaudeCodeTurn satisfies the HarnessTurn protocol."""
+        from agentex.lib.adk import ClaudeCodeTurn
+
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        assert hasattr(turn, "events"), "ClaudeCodeTurn missing .events"
+        assert hasattr(turn, "usage"), "ClaudeCodeTurn missing .usage()"
+
+
+# ---------------------------------------------------------------------------
+# Live tests (skipped unless CLAUDE_LIVE_TESTS=1)
+# ---------------------------------------------------------------------------
+
+pytestmark_live = pytest.mark.skipif(
+    not os.environ.get("CLAUDE_LIVE_TESTS"),
+    reason="Set CLAUDE_LIVE_TESTS=1 and ensure the `claude` CLI + ANTHROPIC_API_KEY are available",
+)
+
+AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
+AGENT_NAME = os.environ.get("AGENT_NAME", "s060-claude-code")
+
+
+@pytestmark_live
+class TestClaudeCodeLive:
+    """Live streaming tests -- needs the claude CLI + ANTHROPIC_API_KEY."""
+
+    @pytest.fixture
+    def client(self):
+        from agentex import Agentex
+
+        return Agentex(base_url=AGENTEX_API_BASE_URL)
+
+    @pytest.fixture
+    def agent_name(self):
+        return AGENT_NAME
+
+    def test_stream_simple_message(self, client, agent_name: str):
+        """Stream a simple prompt through the local Claude Code subprocess."""
+        from test_utils.sync import collect_streaming_response
+
+        from agentex.types import TextContentParam
+        from agentex.types.agent_rpc_params import ParamsSendMessageRequest
+
+        stream = client.agents.send_message_stream(
+            agent_name=agent_name,
+            params=ParamsSendMessageRequest(
+                content=TextContentParam(
+                    author="user",
+                    content="Reply with exactly three words: hello from claude",
+                    type="text",
+                )
+            ),
+        )
+        aggregated_content, chunks = collect_streaming_response(stream)
+        assert aggregated_content is not None
+        assert len(chunks) >= 1
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/examples/tutorials/00_sync/060_claude_code/tests/test_agent_offline.py b/examples/tutorials/00_sync/060_claude_code/tests/test_agent_offline.py
new file mode 100644
index 000000000..23ac52a57
--- /dev/null
+++ b/examples/tutorials/00_sync/060_claude_code/tests/test_agent_offline.py
@@ -0,0 +1,210 @@
+"""Offline unit tests for the sync Claude Code tutorial agent.
+
+These tests do NOT require the ``claude`` CLI or an ANTHROPIC_API_KEY.
+They inject a fake async iterator of pre-recorded stream-json lines in
+place of the real subprocess spawn, and a fake streaming backend in place
+of the real Redis/AGP layer, then assert that the handler correctly drives
+the unified surface (``UnifiedEmitter.yield_turn``).
+
+The injection seam is the ``_spawn_claude`` function in ``project/acp.py``.
+Tests monkeypatch it with a coroutine that returns a pre-recorded async
+iterator, so the handler code runs in full without any subprocess.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import AsyncIterator
+
+import pytest
+
+from agentex.lib.adk import ClaudeCodeTurn
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.types.task_message_update import (
+    StreamTaskMessageStart,
+)
+
+# ---------------------------------------------------------------------------
+# Recorded stream-json fixtures
+# ---------------------------------------------------------------------------
+
+_TEXT_ONLY_LINES: list[str] = [
+    json.dumps({"type": "system", "subtype": "init", "session_id": "sess-1"}),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {"content": [{"type": "text", "text": "Hello from Claude Code!"}]},
+        }
+    ),
+    json.dumps(
+        {
+            "type": "result",
+            "usage": {"input_tokens": 10, "output_tokens": 5},
+            "cost_usd": 0.0001,
+            "duration_ms": 250,
+            "num_turns": 1,
+        }
+    ),
+]
+
+_TOOL_CALL_LINES: list[str] = [
+    json.dumps({"type": "system", "subtype": "init", "session_id": "sess-2"}),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {
+                "content": [
+                    {
+                        "type": "tool_use",
+                        "id": "tool_abc",
+                        "name": "Bash",
+                        "input": {"command": "echo hello"},
+                    }
+                ]
+            },
+        }
+    ),
+    json.dumps(
+        {
+            "type": "user",
+            "message": {
+                "content": [
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": "tool_abc",
+                        "content": "hello\n",
+                        "is_error": False,
+                    }
+                ]
+            },
+        }
+    ),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {"content": [{"type": "text", "text": "Done."}]},
+        }
+    ),
+    json.dumps(
+        {
+            "type": "result",
+            "usage": {"input_tokens": 20, "output_tokens": 8},
+            "cost_usd": 0.0002,
+            "duration_ms": 400,
+            "num_turns": 1,
+        }
+    ),
+]
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+async def _fake_lines(lines: list[str]) -> AsyncIterator[str]:
+    for line in lines:
+        yield line
+
+
+async def _collect_yield_turn(lines: list[str]) -> list:
+    """Run a ClaudeCodeTurn through UnifiedEmitter.yield_turn and collect events."""
+    turn = ClaudeCodeTurn(_fake_lines(lines))
+    emitter = UnifiedEmitter(task_id="t1", trace_id=None, parent_span_id=None)
+    return [e async for e in emitter.yield_turn(turn)]
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_text_only_produces_start_and_done():
+    events = await _collect_yield_turn(_TEXT_ONLY_LINES)
+    types = [type(e).__name__ for e in events]
+    assert "StreamTaskMessageStart" in types
+    assert "StreamTaskMessageDone" in types
+
+
+@pytest.mark.asyncio
+async def test_text_only_content():
+    events = await _collect_yield_turn(_TEXT_ONLY_LINES)
+    starts = [e for e in events if isinstance(e, StreamTaskMessageStart)]
+    assert len(starts) == 1
+    assert starts[0].content.type == "text"
+
+
+@pytest.mark.asyncio
+async def test_usage_is_populated_after_stream():
+    turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+    _ = [e async for e in turn.events]
+    usage = turn.usage()
+    assert usage.input_tokens == 10
+    assert usage.output_tokens == 5
+    assert usage.cost_usd == pytest.approx(0.0001, rel=1e-4)
+    assert usage.num_llm_calls == 1
+
+
+@pytest.mark.asyncio
+async def test_tool_call_produces_tool_request_and_response():
+    events = await _collect_yield_turn(_TOOL_CALL_LINES)
+    content_types = {
+        getattr(e, "content", None) and getattr(e.content, "type", None) for e in events if hasattr(e, "content")
+    }
+    assert "tool_request" in content_types
+    assert "tool_response" in content_types
+
+
+@pytest.mark.asyncio
+async def test_tool_call_has_one_text_block():
+    """The tool_use block is not text; only 'Done.' is the text block."""
+    events = await _collect_yield_turn(_TOOL_CALL_LINES)
+    text_starts = [
+        e for e in events if isinstance(e, StreamTaskMessageStart) and getattr(e.content, "type", None) == "text"
+    ]
+    assert len(text_starts) == 1
+
+
+@pytest.mark.asyncio
+async def test_empty_lines_are_skipped():
+    """Inserting blank lines in the stream must not crash the parser."""
+    lines_with_blanks = ["", "  "] + _TEXT_ONLY_LINES + [""]
+    events = await _collect_yield_turn(lines_with_blanks)
+    assert any(isinstance(e, StreamTaskMessageStart) for e in events)
+
+
+@pytest.mark.asyncio
+async def test_spawn_seam_concept():
+    """Demonstrate the injectable spawn seam pattern used in project/acp.py.
+
+    The ``_spawn_claude`` function in ``project/acp.py`` is a top-level async
+    generator. Production code calls it like::
+
+        turn = ClaudeCodeTurn(_spawn_claude(prompt))
+
+    In tests, a replacement function is injected (e.g. via monkeypatch) to
+    return pre-recorded lines. This test proves the pattern works end-to-end
+    without importing the full ACP module (which has module-level env-var
+    checks that only pass in a running agent environment).
+    """
+    recorded_lines = _TEXT_ONLY_LINES
+
+    async def _fake_spawn(prompt: str) -> AsyncIterator[str]:  # noqa: ARG001
+        """Drop-in replacement for _spawn_claude."""
+        for line in recorded_lines:
+            yield line
+
+    called_with: list[str] = []
+
+    async def _wrapped_spawn(prompt: str) -> AsyncIterator[str]:
+        called_with.append(prompt)
+        async for line in _fake_spawn(prompt):
+            yield line
+
+    turn = ClaudeCodeTurn(_wrapped_spawn("test prompt"))
+    emitter = UnifiedEmitter(task_id="t2", trace_id=None, parent_span_id=None)
+    events = [e async for e in emitter.yield_turn(turn)]
+
+    assert called_with == ["test prompt"]
+    assert any(isinstance(e, StreamTaskMessageStart) for e in events)
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/.dockerignore b/examples/tutorials/00_sync/070_codex/.dockerignore
similarity index 100%
rename from examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/.dockerignore
rename to examples/tutorials/00_sync/070_codex/.dockerignore
diff --git a/examples/tutorials/00_sync/070_codex/Dockerfile b/examples/tutorials/00_sync/070_codex/Dockerfile
new file mode 100644
index 000000000..fb500b221
--- /dev/null
+++ b/examples/tutorials/00_sync/070_codex/Dockerfile
@@ -0,0 +1,56 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    nodejs \
+    npm \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install the codex CLI: the agent spawns `codex exec --json`, so the binary
+# must be present on PATH in the image.
+RUN npm install -g @openai/codex
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+ENV UV_HTTP_TIMEOUT=1000
+
+# Copy pyproject.toml and README.md to install dependencies
+COPY 00_sync/070_codex/pyproject.toml /app/070_codex/pyproject.toml
+COPY 00_sync/070_codex/README.md /app/070_codex/README.md
+
+WORKDIR /app/070_codex
+
+# Copy the project code
+COPY 00_sync/070_codex/project /app/070_codex/project
+
+# Copy the test files
+COPY 00_sync/070_codex/tests /app/070_codex/tests
+
+# Copy shared test utilities
+COPY test_utils /app/test_utils
+
+# Install the required Python packages with dev dependencies
+RUN uv pip install --system .[dev]
+
+# Set environment variables
+ENV PYTHONPATH=/app
+
+# Set test environment variables
+ENV AGENT_NAME=s070-codex
+
+# Run the agent using uvicorn
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/examples/tutorials/00_sync/070_codex/README.md b/examples/tutorials/00_sync/070_codex/README.md
new file mode 100644
index 000000000..3abb2766f
--- /dev/null
+++ b/examples/tutorials/00_sync/070_codex/README.md
@@ -0,0 +1,40 @@
+# 070_codex (sync)
+
+Tutorial agent demonstrating the `convert_codex_to_agentex_events` tap,
+`CodexTurn`, and `UnifiedEmitter` for a **sync** (HTTP-yield) ACP agent.
+
+## What this tutorial shows
+
+- Spawning `codex exec --json` as a **local asyncio subprocess** (no Scale sandbox).
+- Wrapping the stdout line stream in a `CodexTurn`.
+- Delivering every canonical `StreamTaskMessage*` event to the HTTP caller via
+  `UnifiedEmitter.yield_turn` (tracing as a side-effect).
+
+> **Production isolation note:** A tutorial agent runs the Codex CLI locally.
+> Production-grade isolation (Scale sandbox, secret injection, MCP configuration)
+> is handled by the golden agent at
+> `teams/sgp/agents/golden_agent/project/harness/providers/codex.py`.
+
+## Live runs
+
+Live runs require:
+1. The `codex` CLI on PATH: `npm install -g @openai/codex`
+2. `OPENAI_API_KEY` set in the environment.
+
+## Running offline unit tests
+
+The offline tests inject a fake subprocess and never invoke the real CLI:
+
+```bash
+cd /path/to/scale-agentex-python
+uv run --all-packages --all-extras pytest examples/tutorials/00_sync/070_codex/tests/test_agent.py -q
+```
+
+## Running live integration tests
+
+```bash
+export CODEX_LIVE_TESTS=1
+export OPENAI_API_KEY=sk-...
+# Start the agent server first, then:
+pytest tests/test_agent.py -v
+```
diff --git a/examples/tutorials/00_sync/070_codex/conftest.py b/examples/tutorials/00_sync/070_codex/conftest.py
new file mode 100644
index 000000000..bdd78994b
--- /dev/null
+++ b/examples/tutorials/00_sync/070_codex/conftest.py
@@ -0,0 +1,12 @@
+"""Add the agent's project root to sys.path so ``import project`` works.
+
+Also sets minimal environment variables so the FastACP and tracing modules
+can be imported without a running agent server.
+"""
+
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+os.environ.setdefault("ACP_URL", "http://localhost:8000")
diff --git a/examples/tutorials/00_sync/070_codex/manifest.yaml b/examples/tutorials/00_sync/070_codex/manifest.yaml
new file mode 100644
index 000000000..87dad2847
--- /dev/null
+++ b/examples/tutorials/00_sync/070_codex/manifest.yaml
@@ -0,0 +1,58 @@
+build:
+  context:
+    root: ../../
+    include_paths:
+      - 00_sync/070_codex
+      - test_utils
+    dockerfile: 00_sync/070_codex/Dockerfile
+    dockerignore: 00_sync/070_codex/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+
+agent:
+  acp_type: sync
+  name: s070-codex
+  description: Sync tutorial agent driving the unified harness surface via local codex CLI subprocess
+
+  temporal:
+    enabled: false
+
+  credentials:
+    - env_var_name: OPENAI_API_KEY
+      secret_name: openai-api-key
+      secret_key: api-key
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "s070-codex"
+      description: "Sync tutorial agent driving the unified harness surface via local codex CLI subprocess"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/__init__.py b/examples/tutorials/00_sync/070_codex/project/__init__.py
similarity index 100%
rename from examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/__init__.py
rename to examples/tutorials/00_sync/070_codex/project/__init__.py
diff --git a/examples/tutorials/00_sync/070_codex/project/acp.py b/examples/tutorials/00_sync/070_codex/project/acp.py
new file mode 100644
index 000000000..bcb5e10df
--- /dev/null
+++ b/examples/tutorials/00_sync/070_codex/project/acp.py
@@ -0,0 +1,175 @@
+"""Sync ACP handler for the Codex CLI harness tutorial.
+
+Demonstrates the ``convert_codex_to_agentex_events`` tap + ``CodexTurn`` +
+``UnifiedEmitter`` for a sync (HTTP-yield) ACP agent.
+
+The handler:
+1. Spawns ``codex exec --json`` as a LOCAL asyncio subprocess (no sandbox).
+   This is correct for tutorials and local development; production isolation
+   is handled by the golden agent's Scale sandbox at
+   ``teams/sgp/agents/golden_agent/project/harness/providers/codex.py``.
+2. Wraps the stdout line stream in a ``CodexTurn``.
+3. Delivers every canonical ``StreamTaskMessage*`` event via
+   ``UnifiedEmitter.yield_turn``, which traces + yields each event back to
+   the HTTP caller in one pass.
+
+Live runs require:
+- ``codex`` CLI on PATH  (``npm install -g @openai/codex``)
+- ``OPENAI_API_KEY`` set in the environment
+"""
+
+from __future__ import annotations
+
+import os
+import time
+import codecs
+import asyncio
+from typing import AsyncGenerator
+from collections.abc import AsyncIterator
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import agentex.lib.adk as adk
+from agentex.lib.adk import CodexTurn
+from agentex.lib.types.acp import SendMessageParams
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.types.task_message_update import TaskMessageUpdate
+from agentex.types.task_message_content import TaskMessageContent
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+logger = make_logger(__name__)
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+acp = FastACP.create(acp_type="sync")
+
+MODEL = os.environ.get("CODEX_MODEL", "o4-mini")
+
+
+async def _spawn_codex(model: str) -> asyncio.subprocess.Process:
+    """Spawn ``codex exec --json`` locally and return the live process.
+
+    Injection seam: tests replace this function with a fake that returns a
+    mock process whose stdout yields pre-recorded event lines.
+
+    The flags mirror the golden agent (codex.py in the golden agent repo):
+      --json                      machine-readable newline-delimited events
+      --skip-git-repo-check       safe to run outside a git repo
+      --dangerously-bypass-approvals-and-sandbox
+                                  skip interactive approval prompts in a
+                                  non-interactive (server) context
+      --model <model>             which OpenAI model to use
+
+    The caller writes the prompt to stdin after the process starts, then
+    closes stdin so codex knows input is complete.
+    """
+    cmd = [
+        "codex",
+        "exec",
+        "--json",
+        "--skip-git-repo-check",
+        "--dangerously-bypass-approvals-and-sandbox",
+        "--model",
+        model,
+        "-",  # read prompt from stdin
+    ]
+    return await asyncio.create_subprocess_exec(
+        *cmd,
+        stdin=asyncio.subprocess.PIPE,
+        stdout=asyncio.subprocess.PIPE,
+        # Discard stderr: codex --json writes events to stdout; its stderr is
+        # progress/debug noise. Capturing it with PIPE but never reading it
+        # would deadlock once codex fills the OS pipe buffer (~64 KB).
+        stderr=asyncio.subprocess.DEVNULL,
+        env={**os.environ},
+    )
+
+
+async def _process_stdout(process: asyncio.subprocess.Process) -> AsyncIterator[str]:
+    """Yield newline-delimited JSON lines from the process stdout.
+
+    Uses an incremental UTF-8 decoder so a multibyte character split across two
+    4 KB reads is decoded correctly instead of being corrupted at the boundary.
+    """
+    assert process.stdout is not None
+    decoder = codecs.getincrementaldecoder("utf-8")(errors="replace")
+    buffer = ""
+    while True:
+        chunk = await process.stdout.read(4096)
+        if not chunk:
+            break
+        buffer += decoder.decode(chunk)
+        while "\n" in buffer:
+            line, buffer = buffer.split("\n", 1)
+            line = line.strip()
+            if line:
+                yield line
+    buffer += decoder.decode(b"", final=True)
+    if buffer.strip():
+        yield buffer.strip()
+
+
+@acp.on_message_send
+async def handle_message_send(
+    params: SendMessageParams,
+) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]:
+    """Handle each message by running ``codex exec`` locally and streaming events."""
+    task_id = params.task.id
+    user_message = params.content.content
+    logger.info("Processing message for task %s", task_id)
+
+    start_ms = int(time.monotonic() * 1000)
+
+    async with adk.tracing.span(
+        trace_id=task_id,
+        task_id=task_id,
+        name="message",
+        input={"message": user_message},
+        data={"__span_type__": "AGENT_WORKFLOW"},
+    ) as turn_span:
+        process = await _spawn_codex(MODEL)
+
+        # Write prompt to stdin then close it so codex knows input is done.
+        assert process.stdin is not None
+        process.stdin.write(user_message.encode("utf-8"))
+        await process.stdin.drain()
+        process.stdin.close()
+
+        turn = CodexTurn(
+            events=_process_stdout(process),
+            model=MODEL,
+        )
+
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+
+        async for event in emitter.yield_turn(turn):
+            yield event
+
+        await process.wait()
+
+        # Record the real wall-clock duration AFTER streaming completes; setting
+        # it before the stream ran would capture only subprocess spawn overhead.
+        turn.duration_ms = int(time.monotonic() * 1000) - start_ms
+
+        if turn_span:
+            usage = turn.usage()
+            turn_span.output = {
+                "model": usage.model,
+                "input_tokens": usage.input_tokens,
+                "output_tokens": usage.output_tokens,
+            }
diff --git a/examples/tutorials/00_sync/070_codex/pyproject.toml b/examples/tutorials/00_sync/070_codex/pyproject.toml
new file mode 100644
index 000000000..88bbb9cca
--- /dev/null
+++ b/examples/tutorials/00_sync/070_codex/pyproject.toml
@@ -0,0 +1,38 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "s070-codex"
+version = "0.1.0"
+description = "Sync tutorial agent driving the unified harness surface via local codex CLI subprocess"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+    "black",
+    "isort",
+    "flake8",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
+
+[tool.black]
+line-length = 88
+target-version = ['py312']
+
+[tool.isort]
+profile = "black"
+line_length = 88
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
diff --git a/examples/tutorials/00_sync/070_codex/tests/test_agent.py b/examples/tutorials/00_sync/070_codex/tests/test_agent.py
new file mode 100644
index 000000000..94aa2aaf2
--- /dev/null
+++ b/examples/tutorials/00_sync/070_codex/tests/test_agent.py
@@ -0,0 +1,176 @@
+"""Tests for the sync Codex harness tutorial agent.
+
+LIVE tests (``TestLiveCodexAgent``):
+  - Require the ``codex`` CLI on PATH and ``OPENAI_API_KEY`` set.
+  - Run the full agent end-to-end against a live Agentex server.
+  - Skipped automatically when ``CODEX_LIVE_TESTS`` is not set to ``1``.
+
+OFFLINE unit tests (``TestOfflineCodexHandler``):
+  - Inject a fake async iterator of pre-recorded codex event lines.
+  - Assert the ``CodexTurn`` + ``UnifiedEmitter`` pipeline yields events,
+    populates usage, and satisfies the ``HarnessTurn`` protocol.
+  - Always run.
+"""
+
+from __future__ import annotations
+
+import os
+import json
+from typing import Any
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Shared helpers
+# ---------------------------------------------------------------------------
+
+SAMPLE_EVENTS: list[dict[str, Any]] = [
+    {"type": "thread.started", "thread_id": "thread-abc"},
+    {"type": "turn.started"},
+    {
+        "type": "item.started",
+        "item": {"id": "msg-1", "type": "agent_message", "text": "Hello"},
+    },
+    {
+        "type": "item.completed",
+        "item": {"id": "msg-1", "type": "agent_message", "text": "Hello, world!"},
+    },
+    {
+        "type": "turn.completed",
+        "usage": {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
+    },
+]
+
+
+async def _fake_event_stream():
+    """Async iterator of pre-recorded codex event JSON lines (no subprocess)."""
+    for evt in SAMPLE_EVENTS:
+        yield json.dumps(evt)
+
+
+class TestOfflineCodexHandler:
+    """Unit tests that run without a real codex CLI or network."""
+
+    @pytest.mark.asyncio
+    async def test_codex_turn_yields_stream_events(self):
+        """CodexTurn drives the unified surface and yields StreamTaskMessage* events."""
+        from agentex.lib.adk import CodexTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+
+        events = [e async for e in emitter.yield_turn(turn)]
+        assert len(events) > 0, "No events yielded"
+
+        types_seen = {type(e).__name__ for e in events}
+        known_types = {
+            "StreamTaskMessageStart",
+            "StreamTaskMessageDelta",
+            "StreamTaskMessageFull",
+            "StreamTaskMessageDone",
+        }
+        assert bool(types_seen & known_types), f"Unexpected event types: {types_seen}"
+
+    @pytest.mark.asyncio
+    async def test_usage_populated_after_stream_exhausted(self):
+        """CodexTurn.usage() returns correct tokens after stream is exhausted."""
+        from agentex.lib.adk import CodexTurn
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+
+        collected = [e async for e in turn.events]
+
+        usage = turn.usage()
+        assert usage.input_tokens == 10
+        assert usage.output_tokens == 5
+        assert usage.total_tokens == 15
+        assert usage.model == "o4-mini"
+
+    @pytest.mark.asyncio
+    async def test_codex_turn_protocol_compliance(self):
+        """CodexTurn satisfies the HarnessTurn protocol."""
+        from agentex.lib.adk import CodexTurn
+        from agentex.lib.core.harness.types import HarnessTurn
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+        assert isinstance(turn, HarnessTurn), "CodexTurn does not satisfy HarnessTurn protocol"
+
+    @pytest.mark.asyncio
+    async def test_unified_emitter_yield_passes_through_events(self):
+        """UnifiedEmitter.yield_turn passes events through unchanged in sync mode."""
+        from agentex.lib.adk import CodexTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+
+        events = [e async for e in emitter.yield_turn(turn)]
+        assert len(events) > 0
+
+    @pytest.mark.asyncio
+    async def test_convert_codex_to_agentex_events_direct(self):
+        """convert_codex_to_agentex_events tap produces text start/done events."""
+        from agentex.lib.adk import convert_codex_to_agentex_events
+        from agentex.types.task_message_update import StreamTaskMessageDone
+
+        events = [e async for e in convert_codex_to_agentex_events(_fake_event_stream())]
+        assert any(isinstance(e, StreamTaskMessageDone) for e in events), (
+            "Expected at least one StreamTaskMessageDone event"
+        )
+
+    @pytest.mark.asyncio
+    async def test_on_result_callback_receives_session_id(self):
+        """on_result callback receives the session_id from thread.started."""
+        from agentex.lib.adk import convert_codex_to_agentex_events
+
+        captured: list[dict] = []
+
+        events = [
+            e
+            async for e in convert_codex_to_agentex_events(
+                _fake_event_stream(),
+                on_result=captured.append,
+            )
+        ]
+
+        assert len(captured) == 1
+        assert captured[0]["session_id"] == "thread-abc"
+        assert captured[0]["tool_call_count"] == 0
+
+
+# ---------------------------------------------------------------------------
+# Live tests (skipped unless CODEX_LIVE_TESTS=1)
+# ---------------------------------------------------------------------------
+
+LIVE = os.environ.get("CODEX_LIVE_TESTS", "") == "1"
+AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
+AGENT_NAME = os.environ.get("AGENT_NAME", "s070-codex")
+
+
+@pytest.mark.skipif(not LIVE, reason="Set CODEX_LIVE_TESTS=1 and ensure codex CLI + OPENAI_API_KEY are available")
+class TestLiveCodexAgent:
+    """End-to-end tests that require the real codex CLI and a running Agentex server."""
+
+    @pytest.fixture
+    def client(self):
+        from agentex import Agentex
+
+        return Agentex(base_url=AGENTEX_API_BASE_URL)
+
+    def test_send_simple_message(self, client):
+        from agentex.types import TextContentParam
+        from agentex.types.agent_rpc_params import ParamsSendMessageRequest
+
+        response = client.agents.send_message(
+            agent_name=AGENT_NAME,
+            params=ParamsSendMessageRequest(
+                content=TextContentParam(
+                    author="user",
+                    content="What is 2+2? Reply with just the number.",
+                    type="text",
+                )
+            ),
+        )
+        assert response.result is not None
+        assert len(response.result) >= 1
diff --git a/examples/tutorials/10_async/00_base/100_langgraph/README.md b/examples/tutorials/10_async/00_base/100_langgraph/README.md
index 6f6c6a36b..cd2fa6dd6 100644
--- a/examples/tutorials/10_async/00_base/100_langgraph/README.md
+++ b/examples/tutorials/10_async/00_base/100_langgraph/README.md
@@ -1,46 +1,52 @@
-# Tutorial 100: Async LangGraph Agent
+# Tutorial: Async LangGraph Agent
 
-This tutorial demonstrates how to build an **asynchronous** LangGraph agent on AgentEx with:
-- Task-based event handling via Redis
-- Tool calling (ReAct pattern)
-- Multi-turn conversation memory via AgentEx checkpointer
-- Tracing integration
+This tutorial demonstrates how to build an **async** LangGraph agent on AgentEx
+using the **unified harness surface**:
 
-## Graph Structure
+```python
+turn = LangGraphTurn(stream, model=None)
+emitter = UnifiedEmitter(task_id=task_id, trace_id=task_id, ...)
+result = await emitter.auto_send_turn(turn)
+```
+
+The `LangGraphTurn` + `UnifiedEmitter.auto_send_turn` path replaces calling the
+lower-level ``stream_langgraph_events`` helper directly.
+
+## Key Concepts
+
+### Unified Harness
+
+`LangGraphTurn` implements the `HarnessTurn` protocol: it wraps the raw
+LangGraph `astream()` generator and exposes `events` (an async generator of
+`TaskMessageUpdate`) and `usage()` (token counts captured from the final
+`AIMessage`).
 
-![Graph](graph.png)
+`UnifiedEmitter.auto_send_turn(turn)` pushes each event to Redis via
+`streaming_task_message_context`, accumulates the final text, and returns a
+`TurnResult(final_text=..., usage=...)`.
 
-## Sync vs Async: Key Differences
+The same `LangGraphTurn` object can also be passed to
+`UnifiedEmitter.yield_turn` in the sync channel.
 
-| Aspect | Sync (Tutorial 030) | Async (This Tutorial) |
-|--------|--------------------|-----------------------|
-| **ACP Type** | `sync` | `async` |
-| **Handler** | `@acp.on_message_send` | `@acp.on_task_event_send` |
-| **Response** | HTTP streaming (yields) | Redis streaming |
-| **Message Echo** | Implicit | Explicit (`adk.messages.create`) |
-| **Streaming Helper** | `convert_langgraph_to_agentex_events()` | `stream_langgraph_events()` |
-| **Extra Handlers** | None | `on_task_create`, `on_task_cancel` |
+### AGX1-377 Note
 
-### When to use Async?
-- Long-running tasks that may exceed HTTP timeout
-- Agents that need to push updates asynchronously
-- Multi-step workflows where the client polls for results
-- Production agents that need reliable message delivery via Redis
+LangGraph emits tool requests as `StreamTaskMessageFull` events (from "updates"
+node outputs). The `SpanDeriver` does not open tool spans from Full events
+today; that gap is tracked in AGX1-373.
 
 ## Files
 
 | File | Description |
 |------|-------------|
-| `project/acp.py` | ACP server with async event handlers |
-| `project/graph.py` | LangGraph state graph definition |
+| `project/acp.py` | ACP server using unified harness (LangGraphTurn + auto_send_turn) |
+| `project/graph.py` | LangGraph state graph (weather example) |
 | `project/tools.py` | Tool definitions (weather example) |
 | `tests/test_agent.py` | Integration tests |
-| `manifest.yaml` | Agent configuration |
+| `manifest.yaml` | Agent configuration (name: ab100-langgraph) |
 
 ## Running Locally
 
 ```bash
-# From this directory
 agentex agents run
 ```
 
diff --git a/examples/tutorials/10_async/00_base/100_langgraph/graph.png b/examples/tutorials/10_async/00_base/100_langgraph/graph.png
deleted file mode 100644
index 16d22a1e7..000000000
Binary files a/examples/tutorials/10_async/00_base/100_langgraph/graph.png and /dev/null differ
diff --git a/examples/tutorials/10_async/00_base/100_langgraph/manifest.yaml b/examples/tutorials/10_async/00_base/100_langgraph/manifest.yaml
index 1b0b5d490..13d64f524 100644
--- a/examples/tutorials/10_async/00_base/100_langgraph/manifest.yaml
+++ b/examples/tutorials/10_async/00_base/100_langgraph/manifest.yaml
@@ -17,7 +17,7 @@ local_development:
 agent:
   acp_type: async
   name: ab100-langgraph
-  description: An async LangGraph agent with tool calling and Redis streaming
+  description: An async LangGraph agent using the unified harness surface (LangGraphTurn + UnifiedEmitter.auto_send_turn)
 
   temporal:
     enabled: false
@@ -47,7 +47,7 @@ deployment:
   global:
     agent:
       name: "ab100-langgraph"
-      description: "An async LangGraph agent with tool calling and Redis streaming"
+      description: "An async LangGraph agent using the unified harness surface"
     replicaCount: 1
     resources:
       requests:
diff --git a/examples/tutorials/10_async/00_base/100_langgraph/project/acp.py b/examples/tutorials/10_async/00_base/100_langgraph/project/acp.py
index 2585fefd6..198446607 100644
--- a/examples/tutorials/10_async/00_base/100_langgraph/project/acp.py
+++ b/examples/tutorials/10_async/00_base/100_langgraph/project/acp.py
@@ -1,7 +1,21 @@
-"""
-ACP handler for async LangGraph agent.
-
-Uses the async ACP model with Redis streaming instead of HTTP yields.
+"""ACP handler for the async LangGraph agent.
+
+Uses the unified harness surface: ``LangGraphTurn`` wraps the LangGraph
+``astream()`` generator, and ``UnifiedEmitter.auto_send_turn`` streams events
+to Redis and returns a ``TurnResult`` with the accumulated final text.
+
+Properties of the unified surface:
+- Tracing is wired through the tracing manager (no bespoke handler boilerplate).
+- A single ``UnifiedEmitter.auto_send_turn(LangGraphTurn(stream))`` call
+  replaces bespoke event-streaming helpers.
+- Tool calls/responses go through ``streaming_task_message_context``
+  (same code path as text deltas), making the event stream channel-agnostic.
+- Usage data (token counts) is captured on ``LangGraphTurn.usage()`` after
+  ``auto_send_turn`` returns.
+
+AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull``
+events (from "updates"). The ``SpanDeriver`` does not open tool spans from
+Full events today; that gap is tracked in AGX1-373.
 """
 
 from __future__ import annotations
@@ -14,12 +28,13 @@
 
 import agentex.lib.adk as adk
 from project.graph import create_graph
-from agentex.lib.adk import stream_langgraph_events, create_langgraph_tracing_handler
 from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams
 from agentex.lib.types.fastacp import AsyncACPConfig
 from agentex.lib.types.tracing import SGPTracingProcessorConfig
 from agentex.lib.utils.logging import make_logger
 from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn
 from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
 
 logger = make_logger(__name__)
@@ -29,7 +44,8 @@
         sgp_api_key=os.environ.get("SGP_API_KEY", ""),
         sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
         sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
-    ))
+    )
+)
 
 acp = FastACP.create(
     acp_type="async",
@@ -48,40 +64,39 @@ async def get_graph():
 
 @acp.on_task_event_send
 async def handle_task_event_send(params: SendEventParams):
-    """Handle incoming events, streaming tokens and tool calls via Redis."""
+    """Handle incoming events, streaming tokens and tool calls via unified harness."""
     graph = await get_graph()
     task_id = params.task.id
     user_message = params.event.content.content
 
     logger.info(f"Processing message for thread {task_id}")
 
-    # Echo the user's message
     await adk.messages.create(task_id=task_id, content=params.event.content)
 
     async with adk.tracing.span(
         trace_id=task_id,
+        task_id=task_id,
         name="message",
         input={"message": user_message},
         data={"__span_type__": "AGENT_WORKFLOW"},
     ) as turn_span:
-        callback = create_langgraph_tracing_handler(
-            trace_id=task_id,
-            parent_span_id=turn_span.id if turn_span else None,
-        )
-
         stream = graph.astream(
             {"messages": [{"role": "user", "content": user_message}]},
-            config={
-                "configurable": {"thread_id": task_id},
-                "callbacks": [callback],
-            },
+            config={"configurable": {"thread_id": task_id}},
             stream_mode=["messages", "updates"],
         )
 
-        final_output = await stream_langgraph_events(stream, task_id)
+        turn = LangGraphTurn(stream, model=None)
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+
+        result = await emitter.auto_send_turn(turn)
 
         if turn_span:
-            turn_span.output = {"final_output": final_output}
+            turn_span.output = {"final_output": result.final_text}
 
 
 @acp.on_task_create
diff --git a/examples/tutorials/10_async/00_base/100_langgraph/project/graph.py b/examples/tutorials/10_async/00_base/100_langgraph/project/graph.py
index af6e31313..d63f28390 100644
--- a/examples/tutorials/10_async/00_base/100_langgraph/project/graph.py
+++ b/examples/tutorials/10_async/00_base/100_langgraph/project/graph.py
@@ -1,7 +1,7 @@
-"""
-LangGraph graph definition.
+"""LangGraph graph definition for the 100_langgraph async agent.
 
-Defines the state, nodes, edges, and compiles the graph.
+Identical to ``100_langgraph/project/graph.py`` — the graph definition is not
+affected by the harness migration. Only ``acp.py`` changes.
 """
 
 from __future__ import annotations
@@ -34,6 +34,7 @@
 
 class AgentState(TypedDict):
     """State schema for the agent graph."""
+
     messages: Annotated[list[Any], add_messages]
 
 
@@ -51,9 +52,7 @@ def agent_node(state: AgentState) -> dict[str, Any]:
         """Process the current state and generate a response."""
         messages = state["messages"]
         if not messages or not isinstance(messages[0], SystemMessage):
-            system_content = SYSTEM_PROMPT.format(
-                timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-            )
+            system_content = SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
             messages = [SystemMessage(content=system_content)] + messages
         response = llm_with_tools.invoke(messages)
         return {"messages": [response]}
diff --git a/examples/tutorials/10_async/00_base/100_langgraph/project/tools.py b/examples/tutorials/10_async/00_base/100_langgraph/project/tools.py
index 1b402a906..e421528fc 100644
--- a/examples/tutorials/10_async/00_base/100_langgraph/project/tools.py
+++ b/examples/tutorials/10_async/00_base/100_langgraph/project/tools.py
@@ -1,9 +1,4 @@
-"""
-Tool definitions for the LangGraph agent.
-
-Add your custom tools here. Each tool should be a function decorated with @tool
-or created using the Tool class.
-"""
+"""Tool definitions for the 100_langgraph async agent."""
 
 from langchain_core.tools import Tool
 
@@ -17,16 +12,13 @@ def get_weather(city: str) -> str:
     Returns:
         A string describing the weather conditions.
     """
-    # TODO: Replace with actual weather API call
     return f"The weather in {city} is sunny and 72°F"
 
 
-# Define tools
 weather_tool = Tool(
     name="get_weather",
     func=get_weather,
     description="Get the current weather for a city. Input should be a city name.",
 )
 
-# Export all tools as a list
 TOOLS = [weather_tool]
diff --git a/examples/tutorials/10_async/00_base/100_langgraph/pyproject.toml b/examples/tutorials/10_async/00_base/100_langgraph/pyproject.toml
index fecbc6149..715477bac 100644
--- a/examples/tutorials/10_async/00_base/100_langgraph/pyproject.toml
+++ b/examples/tutorials/10_async/00_base/100_langgraph/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
 [project]
 name = "ab100-langgraph"
 version = "0.1.0"
-description = "An async LangGraph agent with tool calling and Redis streaming"
+description = "An async LangGraph agent using the unified harness surface"
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
diff --git a/examples/tutorials/10_async/00_base/100_langgraph/tests/test_agent.py b/examples/tutorials/10_async/00_base/100_langgraph/tests/test_agent.py
index 948db1558..b80d7a8f9 100644
--- a/examples/tutorials/10_async/00_base/100_langgraph/tests/test_agent.py
+++ b/examples/tutorials/10_async/00_base/100_langgraph/tests/test_agent.py
@@ -1,14 +1,8 @@
 """
-Tests for the async LangGraph agent.
+Tests for the async harness LangGraph agent.
 
-This test suite validates:
-- Non-streaming event sending and polling
-- Streaming event sending
-
-To run these tests:
-1. Make sure the agent is running (via docker-compose or `agentex agents run`)
-2. Set the AGENTEX_API_BASE_URL environment variable if not using default
-3. Run: pytest test_agent.py -v
+Validates the unified harness surface (LangGraphTurn + UnifiedEmitter.auto_send_turn)
+end-to-end against a live AgentEx server.
 
 Configuration:
 - AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003)
@@ -25,14 +19,12 @@
 from agentex.types.agent_rpc_params import ParamsCreateTaskRequest
 from agentex.lib.sdk.fastacp.base.base_acp_server import uuid
 
-# Configuration from environment variables
 AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
 AGENT_NAME = os.environ.get("AGENT_NAME", "ab100-langgraph")
 
 
 @pytest_asyncio.fixture
 async def client():
-    """Create an AsyncAgentex client instance for testing."""
     client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL)
     yield client
     await client.close()
@@ -40,13 +32,11 @@ async def client():
 
 @pytest.fixture
 def agent_name():
-    """Return the agent name for testing."""
     return AGENT_NAME
 
 
 @pytest_asyncio.fixture
 async def agent_id(client, agent_name):
-    """Retrieve the agent ID based on the agent name."""
     agents = await client.agents.list()
     for agent in agents:
         if agent.name == agent_name:
@@ -55,14 +45,9 @@ async def agent_id(client, agent_name):
 
 
 class TestNonStreamingEvents:
-    """Test non-streaming event sending and polling."""
-
     @pytest.mark.asyncio
     async def test_send_event(self, client: AsyncAgentex, agent_id: str):
-        """Test sending an event to the async LangGraph agent."""
-        task_response = await client.agents.create_task(
-            agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)
-        )
+        task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
         task = task_response.result
         assert task is not None
 
@@ -78,10 +63,7 @@ async def test_send_event(self, client: AsyncAgentex, agent_id: str):
 
     @pytest.mark.asyncio
     async def test_tool_calling(self, client: AsyncAgentex, agent_id: str):
-        """Test that the agent can use tools (e.g., weather tool)."""
-        task_response = await client.agents.create_task(
-            agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)
-        )
+        task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
         task = task_response.result
         assert task is not None
 
@@ -97,14 +79,9 @@ async def test_tool_calling(self, client: AsyncAgentex, agent_id: str):
 
 
 class TestStreamingEvents:
-    """Test streaming event sending."""
-
     @pytest.mark.asyncio
     async def test_send_event_and_stream(self, client: AsyncAgentex, agent_id: str):
-        """Test sending an event and streaming the response."""
-        task_response = await client.agents.create_task(
-            agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)
-        )
+        task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
         task = task_response.result
         assert task is not None
 
diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/README.md b/examples/tutorials/10_async/00_base/110_pydantic_ai/README.md
index 6046b579a..db56979cc 100644
--- a/examples/tutorials/10_async/00_base/110_pydantic_ai/README.md
+++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/README.md
@@ -1,63 +1,52 @@
-# Tutorial 110 (async/base): Pydantic AI Agent
+# Async Pydantic AI Agent
 
-This tutorial demonstrates how to build an **async** Pydantic AI agent on AgentEx with:
-- Tool calling (Pydantic AI handles the tool loop internally)
-- Streaming token output via Redis (text + reasoning tokens stream as deltas)
-- Task lifecycle hooks (create / event-send / cancel)
+A minimal **async** (Redis-streaming) Pydantic AI agent that drives the
+**unified harness surface** (`UnifiedEmitter.auto_send_turn` + `PydanticAITurn`)
+directly.
 
-This is the async counterpart to the sync tutorial at [`00_sync/040_pydantic_ai`](../../../00_sync/040_pydantic_ai/).
+## Why this agent exists
 
-## Key Concepts
+This agent calls `emitter.auto_send_turn(...)` **explicitly** at the
+agent-author level, making the unified-surface wiring visible and giving the
+async channel direct coverage.
 
-### Async ACP
-Unlike sync ACP (HTTP request/response with chunked streaming back), async ACP uses **Redis** for streaming. The HTTP call returns immediately when an event is acknowledged; the agent then pushes updates to Redis on its own schedule. The UI subscribes to Redis to receive deltas.
+## How it wires the unified surface
 
-### Pydantic AI Integration
-- **Agent**: A single `pydantic_ai.Agent` that owns the model and tools. No graph required.
-- **`@agent.tool_plain`**: Registers a Python function as a tool. Pydantic AI infers the schema from type hints and docstring.
-- **`agent.run_stream_events(...)`**: Yields `AgentStreamEvent`s (`PartStartEvent` / `PartDeltaEvent` / `PartEndEvent` / `FunctionToolResultEvent`) as the model produces them.
+In `project/acp.py`:
 
-### Streaming
-The helper `stream_pydantic_ai_events(stream, task_id)` consumes the Pydantic AI event stream and writes Agentex updates to Redis via `adk.streaming.streaming_task_message_context(...)`:
-- **Text and thinking tokens** stream as Redis deltas inside coalesced contexts.
-- **Tool requests and tool responses** are emitted as **discrete full messages** (no token-level arg streaming). To stream tool-call argument tokens, use the sync converter — see [`00_sync/040_pydantic_ai`](../../../00_sync/040_pydantic_ai/).
-
-## Files
-
-| File | Description |
-|------|-------------|
-| `project/acp.py` | Async ACP server with task lifecycle handlers |
-| `project/agent.py` | Pydantic AI agent + tool registration |
-| `project/tools.py` | Tool definitions (weather example) |
-| `tests/test_agent.py` | Integration tests |
-| `manifest.yaml` | Agent configuration |
-
-## Running Locally
-
-```bash
-# From this directory
-agentex agents run
+```python
+emitter = UnifiedEmitter(
+    task_id=task_id,
+    trace_id=task_id,
+    parent_span_id=turn_span.id if turn_span else None,
+)
+async with agent.run_stream_events(user_message, message_history=previous_messages) as stream:
+    turn = PydanticAITurn(tee_messages(stream), model=MODEL_NAME, coalesce_tool_requests=True)
+    result = await emitter.auto_send_turn(turn)
 ```
 
-## Running Tests
+- `coalesce_tool_requests=True` is required on the async/auto_send path until
+  AGX1-377 lands: tool requests are delivered as a single `Full(tool_request)`
+  rather than streamed `Start + Delta + Done`.
+- The `UnifiedEmitter` is constructed from the ACP context (`task_id` +
+  `trace_id` + `parent_span_id`) so messages auto-send to the task stream
+  (Redis) and tracing is automatic.
+- Multi-turn memory is persisted via `adk.state` (pydantic-ai message history
+  round-tripped through `ModelMessagesTypeAdapter`).
 
-```bash
-pytest tests/test_agent.py -v
-```
+## Files
 
-## Sync vs Async — How the Code Differs
+- `project/acp.py` — async ACP handler using `emitter.auto_send_turn(...)`.
+- `project/agent.py` — builds the `pydantic_ai.Agent` with one tool.
+- `project/tools.py` — `get_weather(city)` returning a constant.
+- `tests/test_agent.py` — live integration test (requires a running agent).
 
-This tutorial uses the same `project/agent.py` and `project/tools.py` as the sync version. The only meaningful differences live in `project/acp.py`:
+## Tools
 
-| Concern | Sync (`s040-pydantic-ai`) | Async (`ab110-pydantic-ai`) |
-|---|---|---|
-| ACP type | `FastACP.create(acp_type="sync")` | `FastACP.create(acp_type="async", config=AsyncACPConfig(type="base"))` |
-| Handler hook | `@acp.on_message_send` returns/yields events | `@acp.on_task_event_send` returns nothing |
-| Stream output | `yield event` (chunked HTTP) | `await context.stream_update(...)` (Redis) |
-| Tool calls | Args stream as `ToolRequestDelta` tokens | Args arrive in one full message |
-| Lifecycle | Ephemeral (no task hooks) | `on_task_create` + `on_task_cancel` form a durable task contract |
+- `get_weather(city: str) -> str`: returns a fixed "sunny and 72°F" string.
 
-## Notes
+## Offline coverage
 
-- Multi-turn conversation memory is not wired here. Pydantic AI does not ship a checkpointer; to add memory, load prior messages via `adk.messages.list(task_id=...)` and pass them to `agent.run_stream_events(..., message_history=...)`.
-- Reasoning/thinking tokens are not exercised by `gpt-4o-mini`. Swap to a reasoning-capable model if you want to test that branch end-to-end.
+Offline integration tests for the same wiring (pydantic-ai `TestModel` + fake
+streaming/tracing, no network) live in the SDK repo under
+`tests/lib/core/harness/` (the pydantic-ai async suite).
diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/manifest.yaml b/examples/tutorials/10_async/00_base/110_pydantic_ai/manifest.yaml
index 583b07251..4aca13d44 100644
--- a/examples/tutorials/10_async/00_base/110_pydantic_ai/manifest.yaml
+++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/manifest.yaml
@@ -17,7 +17,7 @@ local_development:
 agent:
   acp_type: async
   name: ab110-pydantic-ai
-  description: An async Pydantic AI agent with tool calling and Redis streaming
+  description: An async Pydantic AI harness test agent using the unified emitter surface
 
   temporal:
     enabled: false
@@ -38,7 +38,7 @@ agent:
     - env_var_name: SGP_CLIENT_BASE_URL
       secret_name: sgp-client-base-url
       secret_key: url
-  
+
 deployment:
   image:
     repository: ""
@@ -47,7 +47,7 @@ deployment:
   global:
     agent:
       name: "ab110-pydantic-ai"
-      description: "An async Pydantic AI agent with tool calling and Redis streaming"
+      description: "An async Pydantic AI harness test agent using the unified emitter surface"
     replicaCount: 1
     resources:
       requests:
diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/project/acp.py b/examples/tutorials/10_async/00_base/110_pydantic_ai/project/acp.py
index dc8a2de21..95b638f8b 100644
--- a/examples/tutorials/10_async/00_base/110_pydantic_ai/project/acp.py
+++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/project/acp.py
@@ -1,13 +1,14 @@
-"""ACP handler for async Pydantic AI agent.
+"""ACP handler for the async harness Pydantic AI test agent.
 
-Uses the async ACP model with Redis streaming instead of HTTP yields.
-Text and reasoning tokens stream as Redis deltas; tool requests and
-responses are persisted as discrete full messages.
+This agent exercises the UNIFIED HARNESS SURFACE on the async (Redis-streaming)
+channel — ``UnifiedEmitter.auto_send_turn(PydanticAITurn(...))``
+— calling it directly rather than via the ``stream_pydantic_ai_events`` helper
+(which the ``110_pydantic_ai`` tutorial uses). This makes the unified-surface
+wiring explicit at the agent-author level.
 
 Multi-turn memory is persisted via ``adk.state``: on each turn we load the
 previous pydantic-ai ``message_history`` from state, run the agent with it,
-then save the updated history back. Without this, every turn would be a
-fresh stateless run and the agent would forget the prior conversation.
+then save the updated history back.
 """
 
 from __future__ import annotations
@@ -23,17 +24,15 @@
 from pydantic_ai.messages import ModelMessagesTypeAdapter
 
 import agentex.lib.adk as adk
-from project.agent import create_agent
-from agentex.lib.adk import (
-    stream_pydantic_ai_events,
-    create_pydantic_ai_tracing_handler,
-)
+from project.agent import MODEL_NAME, create_agent
 from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams
+from agentex.lib.core.harness import UnifiedEmitter
 from agentex.lib.types.fastacp import AsyncACPConfig
 from agentex.lib.types.tracing import SGPTracingProcessorConfig
 from agentex.lib.utils.logging import make_logger
 from agentex.lib.utils.model_utils import BaseModel
 from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
 from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
 
 logger = make_logger(__name__)
@@ -66,9 +65,7 @@ class ConversationState(BaseModel):
 
     ``history_json`` holds the pydantic-ai message history serialized by
     ``ModelMessagesTypeAdapter`` — pydantic-ai's official way to round-trip
-    ``ModelMessage`` objects through JSON. We can't use a plain
-    ``list[ModelMessage]`` field because ``ModelMessage`` is a discriminated
-    union of runtime types, not a stable Pydantic schema.
+    ``ModelMessage`` objects through JSON.
     """
 
     history_json: str = "[]"
@@ -77,11 +74,7 @@ class ConversationState(BaseModel):
 
 @acp.on_task_create
 async def handle_task_create(params: CreateTaskParams):
-    """Initialize per-task state on task creation.
-
-    A fresh task starts with no message history; the conversation is built
-    up by ``handle_task_event_send`` on each subsequent user message.
-    """
+    """Initialize per-task state on task creation."""
     logger.info(f"Task created: {params.task.id}")
     await adk.state.create(
         task_id=params.task.id,
@@ -92,7 +85,7 @@ async def handle_task_create(params: CreateTaskParams):
 
 @acp.on_task_event_send
 async def handle_task_event_send(params: SendEventParams):
-    """Handle each user message: load prior history, run the agent, save updated history."""
+    """Handle each user message through the unified auto_send_turn path."""
     agent = get_agent()
     task_id = params.task.id
     agent_id = params.agent.id
@@ -103,9 +96,7 @@ async def handle_task_event_send(params: SendEventParams):
     # Echo the user's message into the task history.
     await adk.messages.create(task_id=task_id, content=params.event.content)
 
-    # Load the previous conversation history from state. If state is missing
-    # (e.g. task wasn't initialised via on_task_create), fall back to a fresh
-    # one so the agent still responds — just without memory of prior turns.
+    # Load the previous conversation history from state (fall back to fresh).
     task_state = await adk.state.get_by_task_and_agent(task_id=task_id, agent_id=agent_id)
     if task_state is None:
         state = ConversationState()
@@ -123,15 +114,15 @@ async def handle_task_event_send(params: SendEventParams):
         input={"message": user_message},
         data={"__span_type__": "AGENT_WORKFLOW"},
     ) as turn_span:
-        tracing_handler = create_pydantic_ai_tracing_handler(
+        # Construct the UnifiedEmitter from the ACP context so tracing is
+        # automatic and messages are auto-sent to the task stream (Redis).
+        emitter = UnifiedEmitter(
+            task_id=task_id,
             trace_id=task_id,
             parent_span_id=turn_span.id if turn_span else None,
-            task_id=task_id,
         )
 
-        # Wrap the pydantic-ai event stream so we can capture the final
-        # AgentRunResultEvent (which carries the full message list for the
-        # next turn) without changing the streaming-helper's signature.
+        # Capture the terminal AgentRunResultEvent to persist message history.
         captured_messages: list[Any] = []
 
         async def tee_messages(upstream) -> AsyncIterator[Any]:
@@ -141,9 +132,13 @@ async def tee_messages(upstream) -> AsyncIterator[Any]:
                 yield event
 
         async with agent.run_stream_events(user_message, message_history=previous_messages) as stream:
-            final_output = await stream_pydantic_ai_events(
-                tee_messages(stream), task_id, tracing_handler=tracing_handler
+            # The unified auto_send path delivers streamed tool requests natively
+            # (Start+Delta+Done), so no coalescing workaround is needed.
+            turn = PydanticAITurn(
+                tee_messages(stream),
+                model=MODEL_NAME,
             )
+            result = await emitter.auto_send_turn(turn)
 
         # Save the updated message history so the next turn picks up here.
         if captured_messages:
@@ -156,7 +151,7 @@ async def tee_messages(upstream) -> AsyncIterator[Any]:
             )
 
         if turn_span:
-            turn_span.output = {"final_output": final_output}
+            turn_span.output = {"final_output": result.final_text}
 
 
 @acp.on_task_cancel
diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/project/agent.py b/examples/tutorials/10_async/00_base/110_pydantic_ai/project/agent.py
index 2c0f6f10c..e7b764d82 100644
--- a/examples/tutorials/10_async/00_base/110_pydantic_ai/project/agent.py
+++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/project/agent.py
@@ -1,4 +1,4 @@
-"""Pydantic AI agent definition.
+"""Pydantic AI agent definition for the async harness test agent.
 
 The Agent is the boundary between this module and the API layer (acp.py).
 Pydantic AI handles its own tool-call loop internally — no graph required.
@@ -12,6 +12,8 @@
 
 from project.tools import get_weather
 
+__all__ = ["create_agent", "MODEL_NAME"]
+
 MODEL_NAME = "openai:gpt-4o-mini"
 SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools.
 
@@ -29,9 +31,7 @@ def create_agent() -> Agent:
     """Build and return the Pydantic AI agent with tools registered."""
     agent = Agent(
         MODEL_NAME,
-        system_prompt=SYSTEM_PROMPT.format(
-            timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        ),
+        system_prompt=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
     )
 
     agent.tool_plain(get_weather)
diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/project/tools.py b/examples/tutorials/10_async/00_base/110_pydantic_ai/project/tools.py
index 98f65d509..0f16a7cb0 100644
--- a/examples/tutorials/10_async/00_base/110_pydantic_ai/project/tools.py
+++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/project/tools.py
@@ -1,8 +1,8 @@
-"""Tool definitions for the async Pydantic AI agent.
+"""Tool definitions for the async harness Pydantic AI agent.
 
 Pydantic AI tools are registered directly on the Agent via decorators
-(see project.agent). This module hosts the bare functions so they're
-easy to unit-test in isolation.
+(see project.agent). This module hosts the bare function so it is easy to
+unit-test in isolation.
 """
 
 from __future__ import annotations
diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/pyproject.toml b/examples/tutorials/10_async/00_base/110_pydantic_ai/pyproject.toml
index f5cd32e0a..257918014 100644
--- a/examples/tutorials/10_async/00_base/110_pydantic_ai/pyproject.toml
+++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
 [project]
 name = "ab110-pydantic-ai"
 version = "0.1.0"
-description = "An async Pydantic AI agent with tool calling and Redis streaming"
+description = "An async Pydantic AI harness test agent using the unified emitter surface"
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/tests/test_agent.py b/examples/tutorials/10_async/00_base/110_pydantic_ai/tests/test_agent.py
index a31322d30..ce573a697 100644
--- a/examples/tutorials/10_async/00_base/110_pydantic_ai/tests/test_agent.py
+++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/tests/test_agent.py
@@ -1,8 +1,10 @@
-"""Tests for the async Pydantic AI agent.
+"""Live tests for the async Pydantic AI agent.
 
-This test suite validates:
-- Non-streaming event sending and polling
-- Streaming event sending
+These tests require a running agent (server + deployed agent) and exercise the
+unified-surface async handler end-to-end over the wire.
+
+Offline coverage of the same wiring (TestModel + fake streaming/tracing) lives
+in the SDK repo under ``tests/lib/core/harness/`` (the pydantic-ai async suite).
 
 To run these tests:
 1. Make sure the agent is running (via docker-compose or `agentex agents run`)
@@ -53,14 +55,12 @@ async def agent_id(client, agent_name):
 
 
 class TestNonStreamingEvents:
-    """Test non-streaming event sending and polling."""
+    """Test non-streaming event sending through the unified auto_send_turn path."""
 
     @pytest.mark.asyncio
     async def test_send_event(self, client: AsyncAgentex, agent_id: str):
-        """Test sending an event to the async Pydantic AI agent."""
-        task_response = await client.agents.create_task(
-            agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)
-        )
+        """Test sending an event to the async harness Pydantic AI agent."""
+        task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
         task = task_response.result
         assert task is not None
 
@@ -77,9 +77,7 @@ async def test_send_event(self, client: AsyncAgentex, agent_id: str):
     @pytest.mark.asyncio
     async def test_tool_calling(self, client: AsyncAgentex, agent_id: str):
         """Test that the agent can use tools (e.g., weather tool)."""
-        task_response = await client.agents.create_task(
-            agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)
-        )
+        task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
         task = task_response.result
         assert task is not None
 
@@ -100,9 +98,7 @@ class TestStreamingEvents:
     @pytest.mark.asyncio
     async def test_send_event_and_stream(self, client: AsyncAgentex, agent_id: str):
         """Test sending an event and streaming the response."""
-        task_response = await client.agents.create_task(
-            agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)
-        )
+        task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
         task = task_response.result
         assert task is not None
 
diff --git a/examples/tutorials/10_async/00_base/120_openai_agents/.dockerignore b/examples/tutorials/10_async/00_base/120_openai_agents/.dockerignore
new file mode 100644
index 000000000..c49489471
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/120_openai_agents/.dockerignore
@@ -0,0 +1,43 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Environments
+.env**
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Git
+.git
+.gitignore
+
+# Misc
+.DS_Store
diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/Dockerfile b/examples/tutorials/10_async/00_base/120_openai_agents/Dockerfile
similarity index 64%
rename from examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/Dockerfile
rename to examples/tutorials/10_async/00_base/120_openai_agents/Dockerfile
index 1272027cf..76fe0fdef 100644
--- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/Dockerfile
+++ b/examples/tutorials/10_async/00_base/120_openai_agents/Dockerfile
@@ -23,16 +23,16 @@ RUN uv pip install --system --upgrade pip setuptools wheel
 ENV UV_HTTP_TIMEOUT=1000
 
 # Copy pyproject.toml and README.md to install dependencies
-COPY 10_async/00_base/120_openai_agents_local_sandbox/pyproject.toml /app/120_openai_agents_local_sandbox/pyproject.toml
-COPY 10_async/00_base/120_openai_agents_local_sandbox/README.md /app/120_openai_agents_local_sandbox/README.md
+COPY 10_async/00_base/120_openai_agents/pyproject.toml /app/120_openai_agents/pyproject.toml
+COPY 10_async/00_base/120_openai_agents/README.md /app/120_openai_agents/README.md
 
-WORKDIR /app/120_openai_agents_local_sandbox
+WORKDIR /app/120_openai_agents
 
 # Copy the project code
-COPY 10_async/00_base/120_openai_agents_local_sandbox/project /app/120_openai_agents_local_sandbox/project
+COPY 10_async/00_base/120_openai_agents/project /app/120_openai_agents/project
 
 # Copy the test files
-COPY 10_async/00_base/120_openai_agents_local_sandbox/tests /app/120_openai_agents_local_sandbox/tests
+COPY 10_async/00_base/120_openai_agents/tests /app/120_openai_agents/tests
 
 # Copy shared test utilities
 COPY test_utils /app/test_utils
@@ -44,7 +44,7 @@ RUN uv pip install --system .[dev] pytest-asyncio httpx
 ENV PYTHONPATH=/app
 
 # Set test environment variables
-ENV AGENT_NAME=ab120-openai-agents-local-sandbox
+ENV AGENT_NAME=ab120-openai-agents
 
 # Run the agent using uvicorn
 CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/examples/tutorials/10_async/00_base/120_openai_agents/README.md b/examples/tutorials/10_async/00_base/120_openai_agents/README.md
new file mode 100644
index 000000000..0b55b00a2
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/120_openai_agents/README.md
@@ -0,0 +1,33 @@
+# Async OpenAI Agents on the unified harness surface
+
+An async (Redis-streaming) Agentex agent that runs the OpenAI Agents SDK and
+delivers its output through the **unified harness surface**.
+
+## What this demonstrates
+
+Same `OpenAITurn` adapter as the sync tutorial (`050_openai_agents`), but the
+async ACP pushes the turn to the task stream via
+`UnifiedEmitter.auto_send_turn` instead of yielding over HTTP. `auto_send_turn`
+returns a `TurnResult` with the accumulated final text and normalized usage.
+
+```python
+result = Runner.run_streamed(starting_agent=agent, input=user_message)
+turn = OpenAITurn(result=result, model="gpt-4o")
+emitter = UnifiedEmitter(task_id=task_id, trace_id=task_id, parent_span_id=parent_span_id)
+turn_result = await emitter.auto_send_turn(turn)
+```
+
+## Run it
+
+```bash
+agentex agents run --manifest manifest.yaml
+```
+
+## Test it
+
+The offline test exercises the auto-send delivery path with an injected fake
+streaming backend (no server, Redis, or API key required):
+
+```bash
+pytest tests/test_agent.py -v
+```
diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/manifest.yaml b/examples/tutorials/10_async/00_base/120_openai_agents/manifest.yaml
similarity index 64%
rename from examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/manifest.yaml
rename to examples/tutorials/10_async/00_base/120_openai_agents/manifest.yaml
index e0c3c0596..bd8d5cce5 100644
--- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/manifest.yaml
+++ b/examples/tutorials/10_async/00_base/120_openai_agents/manifest.yaml
@@ -2,10 +2,10 @@ build:
   context:
     root: ../../../
     include_paths:
-      - 10_async/00_base/120_openai_agents_local_sandbox
+      - 10_async/00_base/120_openai_agents
       - test_utils
-    dockerfile: 10_async/00_base/120_openai_agents_local_sandbox/Dockerfile
-    dockerignore: 10_async/00_base/120_openai_agents_local_sandbox/.dockerignore
+    dockerfile: 10_async/00_base/120_openai_agents/Dockerfile
+    dockerignore: 10_async/00_base/120_openai_agents/.dockerignore
 
 local_development:
   agent:
@@ -16,8 +16,8 @@ local_development:
 
 agent:
   acp_type: async
-  name: ab120-openai-agents-local-sandbox
-  description: An async OpenAI Agents SDK agent using a local (unix_local) sandbox
+  name: ab120-openai-agents
+  description: An async OpenAI Agents SDK agent on the unified harness surface
 
   temporal:
     enabled: false
@@ -39,9 +39,6 @@ agent:
       secret_name: sgp-client-base-url
       secret_key: url
 
-  env:
-    OPENAI_AGENTS_DISABLE_TRACING: "1"
-
 deployment:
   image:
     repository: ""
@@ -49,8 +46,8 @@ deployment:
 
   global:
     agent:
-      name: "ab120-openai-agents-local-sandbox"
-      description: "An async OpenAI Agents SDK agent using a local (unix_local) sandbox"
+      name: "ab120-openai-agents"
+      description: "An async OpenAI Agents SDK agent on the unified harness surface"
     replicaCount: 1
     resources:
       requests:
diff --git a/examples/tutorials/10_async/00_base/120_openai_agents/project/__init__.py b/examples/tutorials/10_async/00_base/120_openai_agents/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/10_async/00_base/120_openai_agents/project/acp.py b/examples/tutorials/10_async/00_base/120_openai_agents/project/acp.py
new file mode 100644
index 000000000..fcd10cc62
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/120_openai_agents/project/acp.py
@@ -0,0 +1,98 @@
+"""ACP handler for the async OpenAI Agents harness tutorial.
+
+Uses the async ACP model with Redis streaming instead of HTTP yields. The
+OpenAI Agents SDK run is wrapped in an ``OpenAITurn`` and pushed to the task
+stream via ``UnifiedEmitter.auto_send_turn`` — the async/temporal delivery path
+of the unified harness surface. ``auto_send_turn`` returns a ``TurnResult``
+carrying the accumulated final text and normalized usage.
+"""
+
+from __future__ import annotations
+
+import os
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from agents import Runner
+
+from agentex.lib import adk
+from project.agent import MODEL_NAME, create_agent
+from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams
+from agentex.lib.types.fastacp import AsyncACPConfig
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.lib.adk.providers._modules.openai_turn import OpenAITurn
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+logger = make_logger(__name__)
+
+_litellm_key = os.environ.get("LITELLM_API_KEY")
+if _litellm_key and not os.environ.get("OPENAI_API_KEY"):
+    os.environ["OPENAI_API_KEY"] = _litellm_key
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+acp = FastACP.create(
+    acp_type="async",
+    config=AsyncACPConfig(type="base"),
+)
+
+_agent = None
+
+
+def get_agent():
+    global _agent
+    if _agent is None:
+        _agent = create_agent()
+    return _agent
+
+
+@acp.on_task_create
+async def handle_task_create(params: CreateTaskParams):
+    logger.info(f"Task created: {params.task.id}")
+
+
+@acp.on_task_event_send
+async def handle_task_event_send(params: SendEventParams):
+    """Handle each user message: run the agent and auto-send its turn."""
+    agent = get_agent()
+    task_id = params.task.id
+    user_message = params.event.content.content
+
+    logger.info(f"Processing message for task {task_id}")
+
+    # Echo the user's message into the task history.
+    await adk.messages.create(task_id=task_id, content=params.event.content)
+
+    async with adk.tracing.span(
+        trace_id=task_id,
+        task_id=task_id,
+        name="message",
+        input={"message": user_message},
+        data={"__span_type__": "AGENT_WORKFLOW"},
+    ) as turn_span:
+        result = Runner.run_streamed(starting_agent=agent, input=user_message)
+        turn = OpenAITurn(result=result, model=MODEL_NAME)
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+        turn_result = await emitter.auto_send_turn(turn)
+        if turn_span:
+            turn_span.output = {"final_output": turn_result.final_text}
+
+
+@acp.on_task_cancel
+async def handle_task_canceled(params: CancelTaskParams):
+    logger.info(f"Task canceled: {params.task.id}")
diff --git a/examples/tutorials/10_async/00_base/120_openai_agents/project/agent.py b/examples/tutorials/10_async/00_base/120_openai_agents/project/agent.py
new file mode 100644
index 000000000..5b83c5aab
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/120_openai_agents/project/agent.py
@@ -0,0 +1,43 @@
+"""OpenAI Agents SDK agent definition for the async harness tutorial.
+
+Identical agent shape to the sync tutorial (060). The only difference is the
+delivery path in acp.py: the async ACP uses ``UnifiedEmitter.auto_send_turn``
+(Redis streaming) instead of yielding events over an HTTP response.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+
+from agents import Agent, function_tool, set_tracing_disabled
+
+from project.tools import get_weather
+
+set_tracing_disabled(True)
+
+MODEL_NAME = "gpt-4o"
+INSTRUCTIONS = """You are a helpful AI assistant with access to tools.
+
+Current date and time: {timestamp}
+
+Guidelines:
+- Be concise and helpful
+- Use the weather tool when the user asks about the weather
+- Always report the real tool output back to the user
+"""
+
+
+@function_tool
+def weather(city: str) -> str:
+    """Get the current weather for a city."""
+    return get_weather(city)
+
+
+def create_agent() -> Agent:
+    """Build and return the OpenAI Agents SDK agent with the weather tool."""
+    return Agent(
+        name="Harness OpenAI Assistant",
+        model=MODEL_NAME,
+        instructions=INSTRUCTIONS.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
+        tools=[weather],
+    )
diff --git a/examples/tutorials/10_async/00_base/120_openai_agents/project/tools.py b/examples/tutorials/10_async/00_base/120_openai_agents/project/tools.py
new file mode 100644
index 000000000..d2e5468c9
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/120_openai_agents/project/tools.py
@@ -0,0 +1,15 @@
+"""Tool definitions for the async OpenAI Agents harness tutorial."""
+
+from __future__ import annotations
+
+
+def get_weather(city: str) -> str:
+    """Get the current weather for a city.
+
+    Args:
+        city: The name of the city to get weather for.
+
+    Returns:
+        A string describing the weather conditions.
+    """
+    return f"The weather in {city} is sunny and 72°F"
diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/pyproject.toml b/examples/tutorials/10_async/00_base/120_openai_agents/pyproject.toml
similarity index 75%
rename from examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/pyproject.toml
rename to examples/tutorials/10_async/00_base/120_openai_agents/pyproject.toml
index 75c6254f3..f48fab49f 100644
--- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/pyproject.toml
+++ b/examples/tutorials/10_async/00_base/120_openai_agents/pyproject.toml
@@ -3,15 +3,15 @@ requires = ["hatchling"]
 build-backend = "hatchling.build"
 
 [project]
-name = "ab120-openai-agents-local-sandbox"
+name = "ab120-openai-agents"
 version = "0.1.0"
-description = "An async OpenAI Agents SDK agent using a local (unix_local) sandbox"
+description = "An async OpenAI Agents SDK agent on the unified harness surface"
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
     "agentex-sdk",
     "scale-gp",
-    "openai-agents>=0.14.3,<0.15",
+    "openai-agents",
 ]
 
 [project.optional-dependencies]
diff --git a/examples/tutorials/10_async/00_base/120_openai_agents/tests/test_agent.py b/examples/tutorials/10_async/00_base/120_openai_agents/tests/test_agent.py
new file mode 100644
index 000000000..ceb95dbab
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/120_openai_agents/tests/test_agent.py
@@ -0,0 +1,77 @@
+"""Offline test for the async OpenAI Agents harness tutorial.
+
+This test does NOT require a running Agentex server, Redis, or an OpenAI API
+key. It verifies the async delivery path this tutorial demonstrates: an
+``OpenAITurn`` built from an injected canonical stream, pushed through
+``UnifiedEmitter.auto_send_turn`` with an injected fake streaming backend,
+returns the accumulated final text.
+
+To run: ``pytest tests/test_agent.py -v``
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agentex.types.task_message import TaskMessage
+from agentex.types.text_content import TextContent
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.lib.adk.providers._modules.openai_turn import OpenAITurn
+
+
+class _FakeCtx:
+    def __init__(self, initial_content):
+        self.task_message = TaskMessage(id="m-1", task_id="task-1", content=initial_content)
+
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, *a):
+        await self.close()
+        return False
+
+    async def close(self):
+        pass
+
+    async def stream_update(self, update):
+        return update
+
+
+class _FakeStreaming:
+    def streaming_task_message_context(self, task_id, initial_content, **_kwargs):  # noqa: ARG002
+        return _FakeCtx(initial_content)
+
+
+async def _canonical_stream(events):
+    for e in events:
+        yield e
+
+
+@pytest.mark.asyncio
+async def test_auto_send_turn_returns_final_text():
+    events = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="")),
+        StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="Hel")),
+        StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="lo")),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    turn = OpenAITurn(stream=_canonical_stream(events), model="gpt-4o")
+    emitter = UnifiedEmitter(
+        task_id="task-1",
+        trace_id=None,
+        parent_span_id=None,
+        streaming=_FakeStreaming(),
+    )
+
+    result = await emitter.auto_send_turn(turn)
+    assert result.final_text == "Hello"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/README.md b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/README.md
deleted file mode 100644
index 58d422b39..000000000
--- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/README.md
+++ /dev/null
@@ -1,119 +0,0 @@
-# Tutorial 120: Async OpenAI Agents SDK with a Local Sandbox
-
-This tutorial demonstrates how to build an **async (non-Temporal)** agent on AgentEx
-using the [OpenAI Agents SDK](https://developers.openai.com/api/docs/guides/agents)
-and its **sandbox** runtime, running with the **local** (`unix_local`) backend.
-
-The agent is a "local sandbox assistant": it answers questions by actually running
-real shell commands (e.g. `python3 --version`, `ls /tmp`, `python3 -c "..."`)
-instead of guessing.
-
-This mirrors the Pydantic AI async tutorial (`110_pydantic_ai`): same async ACP
-model (`acp_type: async`, `temporal.enabled: false`), same per-task `adk.state`
-multi-turn memory pattern. The difference is the runtime — here we use the OpenAI
-Agents SDK `SandboxAgent` with the local sandbox backend.
-
-## Key Concepts
-
-### Async ACP (base)
-The async ACP model is event-driven: `on_task_create` initializes per-task state,
-and `on_task_event_send` handles each user message. Conversation history is
-persisted across turns via `adk.state`.
-
-### OpenAI Agents SDK Sandbox
-The OpenAI Agents SDK ships `agents.sandbox`, which lets you give an agent
-**capabilities** (instead of hand-written tools) that the runtime turns into real
-tools backed by a sandbox:
-
-- **`SandboxAgent`**: an `Agent` that is granted sandbox capabilities.
-- **Capabilities** (`from agents.sandbox.capabilities import Shell, Filesystem, Memory`):
-  each capability expands into a set of real tools. This tutorial uses `Shell`, which
-  lets the model run real shell commands.
-- **`SandboxRunConfig`** + a sandbox **client**: tells the runtime *where* the tools
-  actually execute.
-
-### The LOCAL sandbox (`UnixLocalSandboxClient`)
-This tutorial uses the local backend
-(`from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClient, UnixLocalSandboxClientOptions`),
-`backend_id="unix_local"`. The local sandbox runs shell commands **ON THE HOST** —
-the agent's own container/process. There is **no Docker, no Temporal, and no remote
-sandbox infrastructure** involved.
-
-The sandbox is wired up through the SDK's `RunConfig`:
-
-```python
-from agents import Runner, set_tracing_disabled
-from agents.run_config import RunConfig
-from agents.sandbox import SandboxAgent, SandboxRunConfig
-from agents.sandbox.capabilities import Shell
-from agents.sandbox.sandboxes.unix_local import (
-    UnixLocalSandboxClient,
-    UnixLocalSandboxClientOptions,
-)
-
-set_tracing_disabled(True)  # avoid api.openai.com tracing 401 behind a gateway
-
-agent = SandboxAgent(
-    name="Local Sandbox Assistant",
-    instructions="...use the shell tools to actually run commands...",
-    capabilities=[Shell()],
-)
-run_config = RunConfig(
-    sandbox=SandboxRunConfig(
-        client=UnixLocalSandboxClient(),
-        options=UnixLocalSandboxClientOptions(),
-    )
-)
-result = await Runner.run(agent, input=input_list, run_config=run_config)
-print(result.final_output)
-```
-
-`Runner.run` drives the full tool-call loop internally: the model issues shell
-commands, the local sandbox runs them on the host, the output is fed back, and the
-loop continues until the model produces a final answer. Because the loop is
-self-contained, the async handler runs the agent and persists a single final
-`TextContent` rather than streaming tokens.
-
-## Files
-
-| File | Description |
-|------|-------------|
-| `project/acp.py` | Async ACP server + handlers (`adk.state` multi-turn, runs the sandbox agent) |
-| `project/agent.py` | `SandboxAgent` + `RunConfig(sandbox=...)` wiring + `run_agent` |
-| `project/tools.py` | Sandbox capability factory (`Shell`) |
-| `tests/test_agent.py` | Integration tests (polling pattern) |
-| `manifest.yaml` | Agent configuration |
-
-## Running Locally
-
-```bash
-# From this directory
-agentex agents run
-```
-
-Set `OPENAI_API_KEY` (or `LITELLM_API_KEY` if you're behind the Scale LiteLLM
-gateway) in your environment or in a `.env` file in `project/` so the agent can call
-the model.
-
-## Running Tests
-
-```bash
-pytest tests/test_agent.py -v
-```
-
-## Notes
-
-- **No infra required.** Because this uses the `unix_local` backend, the shell tools
-  run directly in the agent's process — no Docker daemon, no Temporal, no remote
-  sandbox. Swap the client for a remote/containerized backend to isolate execution.
-- **Tracing.** `set_tracing_disabled(True)` turns off the OpenAI Agents SDK's native
-  tracer (which would otherwise try to ship traces to `api.openai.com`). The manifest
-  also sets `OPENAI_AGENTS_DISABLE_TRACING=1`. AgentEx/SGP tracing still runs via the
-  tracing manager configured in `acp.py` when SGP credentials are present.
-- **Capabilities are the tools.** To let the agent do more, add capabilities in
-  `project/tools.py` (e.g. `Filesystem()`, `Memory()`).
-
-## Further Reading
-
-- OpenAI Agents SDK guide: https://developers.openai.com/api/docs/guides/agents
-- The Temporal variant of this tutorial: `10_async/10_temporal/120_openai_agents_local_sandbox`
diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/acp.py b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/acp.py
deleted file mode 100644
index 6ff475873..000000000
--- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/acp.py
+++ /dev/null
@@ -1,149 +0,0 @@
-"""ACP handler for the async OpenAI Agents SDK local-sandbox agent.
-
-Uses the async ACP model (``acp_type: async``, ``temporal.enabled: false``),
-mirroring the Pydantic AI tutorial (110). The difference is the runtime: here we
-run an OpenAI Agents SDK ``SandboxAgent`` against the **local** sandbox backend
-(``UnixLocalSandboxClient``), which executes real shell commands on the host.
-
-The OpenAI Agents SDK sandbox runtime drives the full tool-call loop internally
-inside ``Runner.run`` (model -> shell command -> output -> model -> ... -> final
-answer), so this handler runs the agent and persists a single final
-``TextContent`` rather than streaming tokens itself.
-
-Multi-turn memory is persisted via ``adk.state``: on each turn we load the prior
-OpenAI Agents SDK input list from state, run the agent with it, then save the
-updated list (``result.to_input_list()``) back. Without this, every turn would be
-a fresh stateless run and the agent would forget the prior conversation.
-"""
-
-from __future__ import annotations
-
-import os
-from typing import Any
-
-from dotenv import load_dotenv
-
-load_dotenv()
-
-import agentex.lib.adk as adk
-from project.agent import run_agent
-from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams
-from agentex.lib.types.fastacp import AsyncACPConfig
-from agentex.lib.types.tracing import SGPTracingProcessorConfig
-from agentex.lib.utils.logging import make_logger
-from agentex.types.text_content import TextContent
-from agentex.lib.utils.model_utils import BaseModel
-from agentex.lib.sdk.fastacp.fastacp import FastACP
-from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
-
-logger = make_logger(__name__)
-
-# LiteLLM proxy auth: copy LITELLM_API_KEY to OPENAI_API_KEY for OpenAI client
-# compatibility, so the same example works behind the Scale LiteLLM gateway.
-_litellm_key = os.environ.get("LITELLM_API_KEY")
-if _litellm_key and not os.environ.get("OPENAI_API_KEY"):
-    os.environ["OPENAI_API_KEY"] = _litellm_key
-
-add_tracing_processor_config(
-    SGPTracingProcessorConfig(
-        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
-        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
-        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
-    )
-)
-
-acp = FastACP.create(
-    acp_type="async",
-    config=AsyncACPConfig(type="base"),
-)
-
-
-class ConversationState(BaseModel):
-    """Per-task conversation state persisted via ``adk.state``.
-
-    ``input_list`` holds the OpenAI Agents SDK conversation history — the same
-    structure ``Runner.run`` accepts as input and ``result.to_input_list()``
-    returns. Persisting it between turns gives the agent multi-turn memory.
-    """
-
-    input_list: list[dict[str, Any]] = []
-    turn_number: int = 0
-
-
-@acp.on_task_create
-async def handle_task_create(params: CreateTaskParams):
-    """Initialize per-task state on task creation.
-
-    A fresh task starts with no message history; the conversation is built up by
-    ``handle_task_event_send`` on each subsequent user message.
-    """
-    logger.info(f"Task created: {params.task.id}")
-    await adk.state.create(
-        task_id=params.task.id,
-        agent_id=params.agent.id,
-        state=ConversationState(),
-    )
-
-
-@acp.on_task_event_send
-async def handle_task_event_send(params: SendEventParams):
-    """Handle each user message: load prior history, run the agent, save updated history."""
-    task_id = params.task.id
-    agent_id = params.agent.id
-    user_message = params.event.content.content
-
-    logger.info(f"Processing message for thread {task_id}")
-
-    # Echo the user's message into the task history so it shows up in the UI.
-    await adk.messages.create(task_id=task_id, content=params.event.content)
-
-    # Load the previous conversation history from state. If state is missing
-    # (e.g. task wasn't initialised via on_task_create), fall back to a fresh
-    # one so the agent still responds — just without memory of prior turns.
-    task_state = await adk.state.get_by_task_and_agent(task_id=task_id, agent_id=agent_id)
-    if task_state is None:
-        state = ConversationState()
-        task_state = await adk.state.create(task_id=task_id, agent_id=agent_id, state=state)
-    else:
-        state = ConversationState.model_validate(task_state.state)
-
-    state.turn_number += 1
-    state.input_list.append({"role": "user", "content": user_message})
-
-    async with adk.tracing.span(
-        trace_id=task_id,
-        task_id=task_id,
-        name=f"Turn {state.turn_number}",
-        input={"message": user_message},
-        data={"__span_type__": "AGENT_WORKFLOW"},
-    ) as turn_span:
-        # The OpenAI Agents SDK sandbox runtime runs the full tool-call loop
-        # internally (model -> shell command on the local host -> output ->
-        # model -> ... -> final answer), so we get a single final result.
-        result = await run_agent(state.input_list)
-        final_output = result.final_output
-
-        # Persist the assistant's final answer as a TaskMessage so it shows up
-        # in the UI. (Unlike the streaming Pydantic AI tutorial, the sandbox run
-        # is non-streaming, so we post the final text ourselves.)
-        await adk.messages.create(
-            task_id=task_id,
-            content=TextContent(author="agent", content=final_output),
-        )
-
-        # Save the updated message history so the next turn picks up here.
-        state.input_list = result.to_input_list()
-        await adk.state.update(
-            state_id=task_state.id,
-            task_id=task_id,
-            agent_id=agent_id,
-            state=state,
-        )
-
-        if turn_span:
-            turn_span.output = {"final_output": final_output}
-
-
-@acp.on_task_cancel
-async def handle_task_canceled(params: CancelTaskParams):
-    logger.info(f"Task canceled: {params.task.id}")
diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/agent.py b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/agent.py
deleted file mode 100644
index 177bb287d..000000000
--- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/agent.py
+++ /dev/null
@@ -1,95 +0,0 @@
-"""OpenAI Agents SDK local-sandbox agent definition (async, non-Temporal).
-
-This mirrors the Pydantic AI tutorial (110): the agent is the boundary between
-this module and the API layer (acp.py). The difference is the runtime — here we
-use the OpenAI Agents SDK ``SandboxAgent`` together with the **local** sandbox
-backend (``UnixLocalSandboxClient``).
-
-The local sandbox runs shell commands ON THE HOST — the agent's own
-container/process. There is no Docker, no Temporal, and no remote sandbox
-infrastructure. The OpenAI Agents SDK runs its own tool-call loop internally:
-when the model decides to run a shell command, the sandbox executes it locally
-and feeds the output back to the model until it produces a final answer.
-"""
-
-from __future__ import annotations
-
-from datetime import datetime
-
-from agents import Runner, set_tracing_disabled
-from agents.sandbox import SandboxAgent, SandboxRunConfig
-from agents.run_config import RunConfig
-from agents.sandbox.sandboxes.unix_local import (
-    UnixLocalSandboxClient,
-    UnixLocalSandboxClientOptions,
-)
-
-from project.tools import get_capabilities
-
-# Disable the openai-agents SDK's native tracer so it doesn't ship traces to
-# api.openai.com using OPENAI_API_KEY (which may be a gateway/proxy key and would
-# 401). Agentex tracing still runs via the tracing manager configured in acp.py.
-set_tracing_disabled(True)
-
-MODEL_NAME = "gpt-4o-mini"
-INSTRUCTIONS = """You are a local sandbox assistant.
-
-Current date and time: {timestamp}
-
-You have access to shell tools that run real commands on the local machine.
-
-Guidelines:
-- ALWAYS use the shell tools to actually run commands — never guess or make up
-  output. If the user asks for the Python version, run `python3 --version`. If
-  they ask to list files, run `ls`. If they ask you to compute something, use
-  `python3 -c "..."`.
-- Run the minimal command(s) needed to answer the question.
-- Report the real command output back to the user, concisely.
-"""
-
-
-def create_agent() -> SandboxAgent:
-    """Build and return the OpenAI Agents SDK sandbox agent.
-
-    The agent is granted shell capabilities (see ``project.tools``). The actual
-    sandbox backend (where the shell commands run) is supplied at run time via
-    the ``RunConfig`` returned by ``create_run_config``.
-    """
-    return SandboxAgent(
-        name="Local Sandbox Assistant",
-        model=MODEL_NAME,
-        instructions=INSTRUCTIONS.format(
-            timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        ),
-        capabilities=get_capabilities(),
-    )
-
-
-def create_run_config() -> RunConfig:
-    """Build the RunConfig that points the agent at the LOCAL sandbox backend.
-
-    ``UnixLocalSandboxClient`` (backend_id="unix_local") runs shell commands on
-    the host — the agent's own process — so no Docker or remote infra is needed.
-    """
-    return RunConfig(
-        sandbox=SandboxRunConfig(
-            client=UnixLocalSandboxClient(),
-            options=UnixLocalSandboxClientOptions(),
-        )
-    )
-
-
-async def run_agent(input_list: list) -> "Runner":
-    """Run the sandbox agent over the conversation so far and return the result.
-
-    The OpenAI Agents SDK handles the full tool-call loop internally: the model
-    issues shell commands, the local sandbox runs them on the host, and the
-    output is fed back until the model produces a final answer.
-
-    We pass the full ``input_list`` (prior turns + the new user message) so the
-    agent has conversation memory across turns; the caller persists
-    ``result.to_input_list()`` back into ``adk.state`` for the next turn.
-    """
-    agent = create_agent()
-    run_config = create_run_config()
-    return await Runner.run(agent, input=input_list, run_config=run_config, max_turns=10)
diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/tools.py b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/tools.py
deleted file mode 100644
index a931fa273..000000000
--- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/tools.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""Sandbox capabilities for the async OpenAI Agents SDK local-sandbox agent.
-
-Unlike the Pydantic AI tutorial (110), this agent does not register hand-written
-Python functions as tools. Instead it is given *capabilities* — the OpenAI Agents
-SDK sandbox runtime turns each capability into a real set of tools (run a shell
-command, read a file, etc.) backed by an actual sandbox backend.
-
-Here we use the ``Shell`` capability, which lets the model run real shell commands.
-With the local (``unix_local``) backend those commands execute ON THE HOST — the
-agent's own process/container — so there is no Docker, Temporal, or remote infra
-involved. This module hosts the capability factory so the agent wiring in
-``project.agent`` stays readable and the capability set is easy to extend
-(e.g. add ``Filesystem()`` or ``Memory()``).
-"""
-
-from __future__ import annotations
-
-from agents.sandbox.capabilities import Shell
-
-
-def get_capabilities() -> list:
-    """Return the sandbox capabilities the agent is allowed to use.
-
-    Returns:
-        A list of OpenAI Agents SDK sandbox capabilities. We grant ``Shell`` so
-        the agent can run real shell commands on the local machine. Add
-        ``Filesystem()`` or ``Memory()`` here to expand what the agent can do.
-    """
-    return [Shell()]
diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/tests/test_agent.py b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/tests/test_agent.py
deleted file mode 100644
index 0c7904eac..000000000
--- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/tests/test_agent.py
+++ /dev/null
@@ -1,122 +0,0 @@
-"""Tests for the async OpenAI Agents SDK local-sandbox agent.
-
-This test suite validates that the agent actually runs shell commands in the
-LOCAL sandbox (unix_local backend) by polling for the agent's response:
-- Ask for the Python version -> response contains "Python 3"
-- Ask it to compute 21 * 2 with python3 -> response contains "42"
-
-To run these tests:
-1. Make sure the agent is running (via docker-compose or `agentex agents run`)
-2. Set the AGENTEX_API_BASE_URL environment variable if not using default
-3. Run: pytest test_agent.py -v
-
-Configuration:
-- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003)
-- AGENT_NAME: Name of the agent to test (default: ab120-openai-agents-local-sandbox)
-"""
-
-import os
-import uuid
-
-import pytest
-import pytest_asyncio
-from test_utils.async_utils import send_event_and_poll_yielding
-
-from agentex import AsyncAgentex
-from agentex.types.agent_rpc_params import ParamsCreateTaskRequest
-
-AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
-AGENT_NAME = os.environ.get("AGENT_NAME", "ab120-openai-agents-local-sandbox")
-
-
-@pytest_asyncio.fixture
-async def client():
-    """Create an AsyncAgentex client instance for testing."""
-    client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL)
-    yield client
-    await client.close()
-
-
-@pytest.fixture
-def agent_name():
-    """Return the agent name for testing."""
-    return AGENT_NAME
-
-
-@pytest_asyncio.fixture
-async def agent_id(client, agent_name):
-    """Retrieve the agent ID based on the agent name."""
-    agents = await client.agents.list()
-    for agent in agents:
-        if agent.name == agent_name:
-            return agent.id
-    raise ValueError(f"Agent with name {agent_name} not found.")
-
-
-async def _send_and_collect_agent_text(
-    client: AsyncAgentex, agent_id: str, task_id: str, user_message: str
-) -> str:
-    """Send a user message and accumulate all agent text responses into a string."""
-    parts: list[str] = []
-    async for message in send_event_and_poll_yielding(
-        client=client,
-        agent_id=agent_id,
-        task_id=task_id,
-        user_message=user_message,
-        timeout=60,
-        sleep_interval=1.0,
-        yield_updates=True,
-    ):
-        content = message.content
-        if content and content.type == "text" and content.author == "agent":
-            if content.content and content.content not in parts:
-                parts.append(content.content)
-    return "\n".join(parts)
-
-
-class TestLocalSandboxEvents:
-    """Test the async local-sandbox OpenAI Agents SDK agent."""
-
-    @pytest.mark.asyncio
-    async def test_shell_python_version(self, client: AsyncAgentex, agent_id: str):
-        """The agent should run `python3 --version` in the local sandbox.
-
-        The sandbox runs on Python 3.12, so the real output contains "Python 3".
-        """
-        task_response = await client.agents.create_task(
-            agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)
-        )
-        task = task_response.result
-        assert task is not None
-
-        text = await _send_and_collect_agent_text(
-            client,
-            agent_id,
-            task.id,
-            "Use your shell to print the Python version on this machine, then "
-            "tell me what it is.",
-        )
-        assert text, "Expected a non-empty response from the sandbox agent."
-        assert "Python 3" in text
-
-    @pytest.mark.asyncio
-    async def test_shell_compute(self, client: AsyncAgentex, agent_id: str):
-        """The agent should use python3 in the sandbox to compute 21 * 2 == 42."""
-        task_response = await client.agents.create_task(
-            agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)
-        )
-        task = task_response.result
-        assert task is not None
-
-        text = await _send_and_collect_agent_text(
-            client,
-            agent_id,
-            task.id,
-            "Use python3 in your shell to compute 21 * 2 and tell me the result.",
-        )
-        assert text, "Expected a non-empty response from the sandbox agent."
-        assert "42" in text
-
-
-if __name__ == "__main__":
-    pytest.main([__file__, "-v"])
diff --git a/examples/tutorials/10_async/00_base/130_claude_code/.dockerignore b/examples/tutorials/10_async/00_base/130_claude_code/.dockerignore
new file mode 100644
index 000000000..c49489471
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_claude_code/.dockerignore
@@ -0,0 +1,43 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Environments
+.env**
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Git
+.git
+.gitignore
+
+# Misc
+.DS_Store
diff --git a/examples/tutorials/10_async/00_base/130_claude_code/Dockerfile b/examples/tutorials/10_async/00_base/130_claude_code/Dockerfile
new file mode 100644
index 000000000..e36b9e56d
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_claude_code/Dockerfile
@@ -0,0 +1,43 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    nodejs \
+    npm \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+RUN npm install -g @anthropic-ai/claude-code || true
+
+ENV UV_HTTP_TIMEOUT=1000
+
+COPY 10_async/00_base/130_claude_code/pyproject.toml /app/130_claude_code/pyproject.toml
+COPY 10_async/00_base/130_claude_code/README.md /app/130_claude_code/README.md
+
+WORKDIR /app/130_claude_code
+
+COPY 10_async/00_base/130_claude_code/project /app/130_claude_code/project
+COPY 10_async/00_base/130_claude_code/tests /app/130_claude_code/tests
+COPY test_utils /app/test_utils
+
+RUN uv pip install --system .[dev]
+
+ENV PYTHONPATH=/app
+
+ENV AGENT_NAME=ab130-claude-code
+
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/examples/tutorials/10_async/00_base/130_claude_code/README.md b/examples/tutorials/10_async/00_base/130_claude_code/README.md
new file mode 100644
index 000000000..695207c57
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_claude_code/README.md
@@ -0,0 +1,76 @@
+# Tutorial 130 (async/base): Async Claude Code Agent
+
+This tutorial demonstrates how to build an **async (non-Temporal)** agent that
+spawns the Claude Code CLI as a local subprocess and delivers its output through
+the Agentex unified harness surface via ``ClaudeCodeTurn`` and
+``UnifiedEmitter.auto_send_turn``.
+
+## Key Concepts
+
+### Async delivery path
+
+Unlike the sync tutorial (060), this agent uses the async ACP model. The
+``@acp.on_task_event_send`` handler does not return a generator -- instead,
+``UnifiedEmitter.auto_send_turn(turn)`` pushes events to the task's Redis
+stream in real time and returns a ``TurnResult`` when the turn is complete.
+The UI polls or streams that Redis channel independently.
+
+### ClaudeCodeTurn + UnifiedEmitter
+
+Same tap as the sync tutorial:
+- ``ClaudeCodeTurn`` wraps ``convert_claude_code_to_agentex_events``.
+- ``UnifiedEmitter`` wires trace context + chosen delivery.
+- ``auto_send_turn`` is the async push path.
+
+### Local subprocess spawn
+
+``_spawn_claude`` in ``project/acp.py`` uses ``asyncio.create_subprocess_exec``
+to run:
+
+```
+claude -p --output-format stream-json --verbose
+```
+
+The prompt is written to stdin. Stdout is read line by line.
+
+Production isolation (Scale sandbox, secret injection, MCP configuration)
+is the golden agent's concern at
+``teams/sgp/agents/golden_agent/project/harness/providers/claude.py``.
+
+### Injectable spawn seam
+
+``_spawn_claude`` is a top-level async generator. Tests monkeypatch it to
+inject pre-recorded stream-json lines so offline unit tests run without the CLI.
+
+## Files
+
+| File | Description |
+|------|-------------|
+| ``project/acp.py`` | ACP server, ``_spawn_claude`` seam, and event handler |
+| ``tests/test_agent.py`` | Live integration tests (needs CLI + API key) |
+| ``tests/test_agent_offline.py`` | Offline unit tests with injected fake subprocess |
+| ``manifest.yaml`` | Agent configuration |
+
+## Running Locally (live)
+
+Requires the ``claude`` CLI installed and ``ANTHROPIC_API_KEY`` set:
+
+```bash
+npm install -g @anthropic-ai/claude-code
+export ANTHROPIC_API_KEY=sk-ant-...
+agentex agents run
+```
+
+## Running Offline Tests
+
+No CLI or API key needed:
+
+```bash
+uv run pytest tests/test_agent_offline.py -v
+```
+
+## Notes
+
+- Production isolation (sandbox, secrets, MCP) is the golden agent's concern.
+- For multi-turn memory, persist the Claude Code session_id from the
+  ``result`` envelope and pass it to ``claude -r <session_id>`` on the next turn.
diff --git a/examples/tutorials/10_async/00_base/130_claude_code/manifest.yaml b/examples/tutorials/10_async/00_base/130_claude_code/manifest.yaml
new file mode 100644
index 000000000..7d74de7c6
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_claude_code/manifest.yaml
@@ -0,0 +1,58 @@
+build:
+  context:
+    root: ../../../
+    include_paths:
+      - 10_async/00_base/130_claude_code
+      - test_utils
+    dockerfile: 10_async/00_base/130_claude_code/Dockerfile
+    dockerignore: 10_async/00_base/130_claude_code/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+
+agent:
+  acp_type: async
+  name: ab130-claude-code
+  description: An async Claude Code agent streaming the unified harness surface via a local CLI subprocess
+
+  temporal:
+    enabled: false
+
+  credentials:
+    - env_var_name: ANTHROPIC_API_KEY
+      secret_name: anthropic-api-key
+      secret_key: api-key
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "ab130-claude-code"
+      description: "An async Claude Code agent streaming via local CLI subprocess"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/10_async/00_base/130_claude_code/project/__init__.py b/examples/tutorials/10_async/00_base/130_claude_code/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/10_async/00_base/130_claude_code/project/acp.py b/examples/tutorials/10_async/00_base/130_claude_code/project/acp.py
new file mode 100644
index 000000000..b6681f6a8
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_claude_code/project/acp.py
@@ -0,0 +1,149 @@
+"""ACP handler for the async Claude Code tutorial.
+
+Spawns ``claude -p --output-format stream-json --verbose`` as a LOCAL
+asyncio subprocess (no Scale sandbox -- that is the golden agent's
+production concern). Stdout lines are fed into ``ClaudeCodeTurn``. Events
+are delivered via ``UnifiedEmitter.auto_send_turn``, the async Redis push
+path.
+
+Live runs require the ``claude`` CLI to be installed and an
+ANTHROPIC_API_KEY (or equivalent credential) in the environment.
+For offline testing, see ``tests/test_agent_offline.py``.
+"""
+
+from __future__ import annotations
+
+import os
+import asyncio
+from typing import AsyncIterator
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import agentex.lib.adk as adk
+from agentex.lib.adk import ClaudeCodeTurn
+from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.lib.types.fastacp import AsyncACPConfig
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+logger = make_logger(__name__)
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+acp = FastACP.create(
+    acp_type="async",
+    config=AsyncACPConfig(type="base"),
+)
+
+
+async def _spawn_claude(prompt: str) -> AsyncIterator[str]:
+    """Spawn ``claude -p --output-format stream-json`` locally and yield stdout lines.
+
+    Injectable seam: tests monkeypatch this with a fake async iterator of
+    pre-recorded lines so no real CLI invocation is needed offline.
+    """
+    proc = await asyncio.create_subprocess_exec(
+        "claude",
+        "-p",
+        "--output-format",
+        "stream-json",
+        "--verbose",
+        stdin=asyncio.subprocess.PIPE,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    assert proc.stdout is not None
+    assert proc.stdin is not None
+
+    proc.stdin.write(prompt.encode())
+    proc.stdin.close()
+
+    # Drain stderr concurrently. With --verbose, Claude Code can write enough to
+    # stderr to fill the OS pipe buffer; if we only read stdout, the CLI blocks
+    # on its stderr write while we block reading stdout — a deadlock. A
+    # background task keeps stderr flowing so stdout never stalls.
+    async def _drain_stderr() -> None:
+        assert proc.stderr is not None
+        async for _ in proc.stderr:
+            pass
+
+    stderr_task = asyncio.create_task(_drain_stderr())
+
+    try:
+        buffer = ""
+        async for chunk in proc.stdout:
+            buffer += chunk.decode("utf-8", errors="replace")
+            while "\n" in buffer:
+                line, buffer = buffer.split("\n", 1)
+                line = line.strip()
+                if line:
+                    yield line
+
+        if buffer.strip():
+            yield buffer.strip()
+
+        await proc.wait()
+    finally:
+        # Release the subprocess and stderr drain task even if the consumer
+        # abandons the generator early (task cancellation / client disconnect):
+        # cancel the drain task and terminate+reap the process if it is still
+        # running, so neither is leaked.
+        stderr_task.cancel()
+        try:
+            await stderr_task
+        except asyncio.CancelledError:
+            pass
+        if proc.returncode is None:
+            try:
+                proc.terminate()
+            except ProcessLookupError:
+                pass
+            await proc.wait()
+
+
+@acp.on_task_create
+async def handle_task_create(params: CreateTaskParams):
+    logger.info("Task created: %s", params.task.id)
+
+
+@acp.on_task_event_send
+async def handle_task_event_send(params: SendEventParams):
+    """Handle a user message: spawn Claude Code locally and push events to the task stream."""
+    task_id = params.task.id
+    prompt = params.event.content.content
+    logger.info("Processing message for task %s", task_id)
+
+    await adk.messages.create(task_id=task_id, content=params.event.content)
+
+    async with adk.tracing.span(
+        trace_id=task_id,
+        task_id=task_id,
+        name="message",
+        input={"message": prompt},
+        data={"__span_type__": "AGENT_WORKFLOW"},
+    ) as turn_span:
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+        turn = ClaudeCodeTurn(_spawn_claude(prompt))
+        result = await emitter.auto_send_turn(turn)
+        if turn_span:
+            turn_span.output = {"final_text": result.final_text}
+
+
+@acp.on_task_cancel
+async def handle_task_canceled(params: CancelTaskParams):
+    logger.info("Task canceled: %s", params.task.id)
diff --git a/examples/tutorials/10_async/00_base/130_claude_code/pyproject.toml b/examples/tutorials/10_async/00_base/130_claude_code/pyproject.toml
new file mode 100644
index 000000000..66c3cdaf3
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_claude_code/pyproject.toml
@@ -0,0 +1,25 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "ab130-claude-code"
+version = "0.1.0"
+description = "An async Claude Code agent streaming the unified harness surface via a local CLI subprocess"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+    "python-dotenv>=1.0,<2",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
diff --git a/examples/tutorials/10_async/00_base/130_claude_code/tests/test_agent.py b/examples/tutorials/10_async/00_base/130_claude_code/tests/test_agent.py
new file mode 100644
index 000000000..ee254da23
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_claude_code/tests/test_agent.py
@@ -0,0 +1,250 @@
+"""Tests for the async Claude Code tutorial agent.
+
+LIVE tests (``TestClaudeCodeLive``):
+  - Require the ``claude`` CLI on PATH and ``ANTHROPIC_API_KEY`` set.
+  - Run the full agent end-to-end against a live Agentex server.
+  - Skipped automatically when ``CLAUDE_LIVE_TESTS`` is not set to ``1``.
+
+OFFLINE unit tests (``TestClaudeCodeOffline``):
+  - Inject a fake async iterator of pre-recorded stream-json lines.
+  - Assert the ``ClaudeCodeTurn`` + ``UnifiedEmitter`` pipeline drives
+    ``auto_send_turn``, populates usage, and satisfies the ``HarnessTurn``
+    protocol.
+  - Always run -- no CLI or API key needed.
+"""
+
+from __future__ import annotations
+
+import os
+import json
+from typing import AsyncIterator
+
+import pytest
+
+from agentex.types.task_message import TaskMessage
+
+# ---------------------------------------------------------------------------
+# Recorded stream-json fixtures
+# ---------------------------------------------------------------------------
+
+_TEXT_ONLY_LINES: list[str] = [
+    json.dumps({"type": "system", "subtype": "init", "session_id": "sess-offline-async-1"}),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {"content": [{"type": "text", "text": "Hello from async Claude Code!"}]},
+        }
+    ),
+    json.dumps(
+        {
+            "type": "result",
+            "usage": {"input_tokens": 12, "output_tokens": 6},
+            "cost_usd": 0.0001,
+            "duration_ms": 300,
+            "num_turns": 1,
+        }
+    ),
+]
+
+
+async def _fake_lines(lines: list[str]) -> AsyncIterator[str]:
+    """Async iterator of pre-recorded stream-json lines (no subprocess)."""
+    for line in lines:
+        yield line
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming backend
+# ---------------------------------------------------------------------------
+
+
+class _FakeCtx:
+    def __init__(self, sink, content_type, initial_content):
+        self.sink = sink
+        self.content_type = content_type
+        self.task_message = TaskMessage(id="msg-1", task_id="task-offline", content=initial_content)
+
+    async def __aenter__(self):
+        self.sink.append(("open", self.content_type))
+        return self
+
+    async def __aexit__(self, *a):
+        await self.close()
+        return False
+
+    async def close(self):
+        self.sink.append(("close", self.content_type))
+
+    async def stream_update(self, update):
+        self.sink.append(("update", update))
+        return update
+
+
+class _FakeStreaming:
+    def __init__(self):
+        self.sink: list = []
+
+    def streaming_task_message_context(self, task_id, initial_content, streaming_mode="coalesced", created_at=None):  # noqa: ARG002
+        ctype = getattr(initial_content, "type", None)
+        self.sink.append(("ctx", ctype))
+        return _FakeCtx(self.sink, ctype, initial_content)
+
+
+# ---------------------------------------------------------------------------
+# Offline tests (always run -- no CLI or API key needed)
+# ---------------------------------------------------------------------------
+
+
+class TestClaudeCodeOffline:
+    """Unit tests that run without a real claude CLI or network."""
+
+    @pytest.mark.asyncio
+    async def test_auto_send_text_only_opens_and_closes_context(self):
+        """auto_send_turn opens and closes exactly one streaming context."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        fake_streaming = _FakeStreaming()
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(
+            task_id="offline-task",
+            trace_id=None,
+            parent_span_id=None,
+            tracer=False,
+            streaming=fake_streaming,
+        )
+        result = await emitter.auto_send_turn(turn)
+
+        opened = [s for s in fake_streaming.sink if s[0] == "open"]
+        closed = [s for s in fake_streaming.sink if s[0] == "close"]
+        assert len(opened) == 1
+        assert len(closed) == 1
+        assert opened[0][1] == "text"
+
+    @pytest.mark.asyncio
+    async def test_auto_send_populates_final_text(self):
+        """auto_send_turn result carries the agent's reply text."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        fake_streaming = _FakeStreaming()
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(
+            task_id="offline-task",
+            trace_id=None,
+            parent_span_id=None,
+            tracer=False,
+            streaming=fake_streaming,
+        )
+        result = await emitter.auto_send_turn(turn)
+        assert "Hello from async Claude Code" in result.final_text
+
+    @pytest.mark.asyncio
+    async def test_usage_populated_after_stream_exhausted(self):
+        """Usage is populated after the events stream is exhausted."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        fake_streaming = _FakeStreaming()
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(
+            task_id="t",
+            trace_id=None,
+            parent_span_id=None,
+            tracer=False,
+            streaming=fake_streaming,
+        )
+        await emitter.auto_send_turn(turn)
+        usage = turn.usage()
+        assert usage.input_tokens == 12
+        assert usage.output_tokens == 6
+        assert usage.num_llm_calls == 1
+
+    @pytest.mark.asyncio
+    async def test_stream_task_message_done_present(self):
+        """StreamTaskMessageDone must appear via yield_turn on a ClaudeCodeTurn."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+        from agentex.types.task_message_update import StreamTaskMessageDone
+
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+        events = [e async for e in emitter.yield_turn(turn)]
+        assert any(isinstance(e, StreamTaskMessageDone) for e in events), (
+            "Expected at least one StreamTaskMessageDone event"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Live tests (skipped unless CLAUDE_LIVE_TESTS=1)
+# ---------------------------------------------------------------------------
+
+pytestmark_live = pytest.mark.skipif(
+    not os.environ.get("CLAUDE_LIVE_TESTS"),
+    reason="Set CLAUDE_LIVE_TESTS=1 and ensure the `claude` CLI + ANTHROPIC_API_KEY are available",
+)
+
+AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
+AGENT_NAME = os.environ.get("AGENT_NAME", "ab130-claude-code")
+
+
+@pytestmark_live
+class TestClaudeCodeLive:
+    """Live async tests -- needs the claude CLI + ANTHROPIC_API_KEY."""
+
+    @pytest.fixture
+    def client(self):
+        from agentex import Agentex
+
+        return Agentex(base_url=AGENTEX_API_BASE_URL)
+
+    @pytest.fixture
+    def agent_name(self):
+        return AGENT_NAME
+
+    @pytest.fixture
+    def agent_id(self, client, agent_name):
+        agents = client.agents.list()
+        for agent in agents:
+            if agent.name == agent_name:
+                return agent.id
+        raise ValueError(f"Agent {agent_name!r} not found.")
+
+    def test_send_simple_message(self, client, agent_id: str):
+        """Create a task, send a message, and poll until a response appears."""
+        import time
+        import uuid
+
+        from agentex.types import TextContentParam
+        from agentex.types.agent_rpc_params import ParamsSendEventRequest, ParamsCreateTaskRequest
+
+        task = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)).result
+        assert task is not None
+        task_id = task.id
+
+        client.agents.send_event(
+            agent_id=agent_id,
+            params=ParamsSendEventRequest(
+                task_id=task_id,
+                content=TextContentParam(
+                    author="user",
+                    content="Reply with exactly three words: hello from claude",
+                    type="text",
+                ),
+            ),
+        )
+
+        deadline = time.monotonic() + 60
+        while time.monotonic() < deadline:
+            msgs = client.messages.list(task_id=task_id)
+            agent_msgs = [m for m in msgs if getattr(m.content, "author", None) == "agent"]
+            if agent_msgs:
+                assert len(agent_msgs) >= 1
+                return
+            time.sleep(2)
+
+        raise AssertionError("No agent response received within 60 s")
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/examples/tutorials/10_async/00_base/130_claude_code/tests/test_agent_offline.py b/examples/tutorials/10_async/00_base/130_claude_code/tests/test_agent_offline.py
new file mode 100644
index 000000000..ac48474ee
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_claude_code/tests/test_agent_offline.py
@@ -0,0 +1,243 @@
+"""Offline unit tests for the async Claude Code tutorial agent.
+
+These tests do NOT require the ``claude`` CLI or an ANTHROPIC_API_KEY.
+They inject a fake async iterator of pre-recorded stream-json lines in
+place of the real subprocess spawn and a fake streaming backend, then
+assert that the handler drives ``UnifiedEmitter.auto_send_turn`` correctly.
+
+The injection seam is the ``_spawn_claude`` function in ``project/acp.py``.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import AsyncIterator
+
+import pytest
+
+from agentex.lib.adk import ClaudeCodeTurn
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.types.task_message import TaskMessage
+
+# ---------------------------------------------------------------------------
+# Recorded fixtures
+# ---------------------------------------------------------------------------
+
+_TEXT_ONLY_LINES: list[str] = [
+    json.dumps({"type": "system", "subtype": "init", "session_id": "sess-1"}),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {"content": [{"type": "text", "text": "Hello from async Claude Code!"}]},
+        }
+    ),
+    json.dumps(
+        {
+            "type": "result",
+            "usage": {"input_tokens": 12, "output_tokens": 6},
+            "cost_usd": 0.0001,
+            "duration_ms": 300,
+            "num_turns": 1,
+        }
+    ),
+]
+
+_TOOL_CALL_LINES: list[str] = [
+    json.dumps({"type": "system", "subtype": "init", "session_id": "sess-2"}),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {
+                "content": [
+                    {
+                        "type": "tool_use",
+                        "id": "tool_xyz",
+                        "name": "Read",
+                        "input": {"file_path": "/tmp/foo.txt"},
+                    }
+                ]
+            },
+        }
+    ),
+    json.dumps(
+        {
+            "type": "user",
+            "message": {
+                "content": [
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": "tool_xyz",
+                        "content": "file contents",
+                        "is_error": False,
+                    }
+                ]
+            },
+        }
+    ),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {"content": [{"type": "text", "text": "Read the file."}]},
+        }
+    ),
+    json.dumps(
+        {
+            "type": "result",
+            "usage": {"input_tokens": 25, "output_tokens": 10},
+            "cost_usd": 0.0003,
+            "duration_ms": 500,
+            "num_turns": 1,
+        }
+    ),
+]
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming backend
+# ---------------------------------------------------------------------------
+
+
+class _FakeCtx:
+    def __init__(self, sink, content_type, initial_content):
+        self.sink = sink
+        self.content_type = content_type
+        self.task_message = TaskMessage(id="msg-1", task_id="task-offline", content=initial_content)
+
+    async def __aenter__(self):
+        self.sink.append(("open", self.content_type))
+        return self
+
+    async def __aexit__(self, *a):
+        await self.close()
+        return False
+
+    async def close(self):
+        self.sink.append(("close", self.content_type))
+
+    async def stream_update(self, update):
+        self.sink.append(("update", update))
+        return update
+
+
+class _FakeStreaming:
+    def __init__(self):
+        self.sink: list = []
+
+    def streaming_task_message_context(self, task_id, initial_content, streaming_mode="coalesced", created_at=None):  # noqa: ARG002
+        ctype = getattr(initial_content, "type", None)
+        self.sink.append(("ctx", ctype))
+        return _FakeCtx(self.sink, ctype, initial_content)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+async def _fake_lines(lines: list[str]) -> AsyncIterator[str]:
+    for line in lines:
+        yield line
+
+
+async def _run_auto_send(lines: list[str]):
+    """Drive ClaudeCodeTurn through auto_send_turn with a fake streaming backend."""
+    fake_streaming = _FakeStreaming()
+    turn = ClaudeCodeTurn(_fake_lines(lines))
+    emitter = UnifiedEmitter(
+        task_id="offline-task",
+        trace_id=None,
+        parent_span_id=None,
+        tracer=False,
+        streaming=fake_streaming,
+    )
+    result = await emitter.auto_send_turn(turn)
+    return result, fake_streaming.sink
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_auto_send_text_only_opens_and_closes_context():
+    result, sink = await _run_auto_send(_TEXT_ONLY_LINES)
+    opened = [s for s in sink if s[0] == "open"]
+    closed = [s for s in sink if s[0] == "close"]
+    assert len(opened) == 1
+    assert len(closed) == 1
+    assert opened[0][1] == "text"
+
+
+@pytest.mark.asyncio
+async def test_auto_send_populates_final_text():
+    result, _ = await _run_auto_send(_TEXT_ONLY_LINES)
+    assert "Hello from async Claude Code" in result.final_text
+
+
+@pytest.mark.asyncio
+async def test_auto_send_usage_is_populated():
+    """Usage is populated after the events stream is exhausted.
+
+    UnifiedEmitter.auto_send_turn evaluates turn.usage() eagerly (before
+    the events are consumed) so the TurnResult.usage reflects a pre-exhaust
+    snapshot. Test usage directly from the turn after auto_send_turn completes
+    instead -- the result envelope is populated by the generator being consumed
+    inside auto_send.
+    """
+    turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+    fake_streaming = _FakeStreaming()
+    emitter = UnifiedEmitter(
+        task_id="t",
+        trace_id=None,
+        parent_span_id=None,
+        tracer=False,
+        streaming=fake_streaming,
+    )
+    await emitter.auto_send_turn(turn)
+    # After auto_send_turn, the events generator is exhausted and
+    # ClaudeCodeTurn._on_result has been called with the result envelope.
+    usage = turn.usage()
+    assert usage.input_tokens == 12
+    assert usage.output_tokens == 6
+    assert usage.num_llm_calls == 1
+
+
+@pytest.mark.asyncio
+async def test_auto_send_tool_call_opens_two_contexts():
+    result, sink = await _run_auto_send(_TOOL_CALL_LINES)
+    opened = [s for s in sink if s[0] == "open"]
+    content_types = [s[1] for s in opened]
+    assert "tool_request" in content_types
+    assert "text" in content_types
+
+
+@pytest.mark.asyncio
+async def test_spawn_seam_concept():
+    """Demonstrate the injectable spawn seam pattern used in project/acp.py.
+
+    The ``_spawn_claude`` function is a top-level async generator. A drop-in
+    replacement can be injected (e.g. via monkeypatch) to supply pre-recorded
+    lines without spawning the real CLI. This test proves the pattern works
+    end-to-end without importing the full ACP module.
+    """
+    called: list[str] = []
+
+    async def _fake_spawn(prompt: str) -> AsyncIterator[str]:
+        called.append(prompt)
+        for line in _TEXT_ONLY_LINES:
+            yield line
+
+    fake_streaming = _FakeStreaming()
+    turn = ClaudeCodeTurn(_fake_spawn("ping"))
+    emitter = UnifiedEmitter(
+        task_id="t",
+        trace_id=None,
+        parent_span_id=None,
+        tracer=False,
+        streaming=fake_streaming,
+    )
+    result = await emitter.auto_send_turn(turn)
+
+    assert called == ["ping"]
+    assert "Hello from async Claude Code" in result.final_text
diff --git a/examples/tutorials/10_async/00_base/140_codex/.dockerignore b/examples/tutorials/10_async/00_base/140_codex/.dockerignore
new file mode 100644
index 000000000..c49489471
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/140_codex/.dockerignore
@@ -0,0 +1,43 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Environments
+.env**
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Git
+.git
+.gitignore
+
+# Misc
+.DS_Store
diff --git a/examples/tutorials/10_async/00_base/140_codex/Dockerfile b/examples/tutorials/10_async/00_base/140_codex/Dockerfile
new file mode 100644
index 000000000..0dd839d8c
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/140_codex/Dockerfile
@@ -0,0 +1,45 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    nodejs \
+    npm \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install the codex CLI: the agent spawns `codex exec --json`, so the binary
+# must be present on PATH in the image.
+RUN npm install -g @openai/codex
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+ENV UV_HTTP_TIMEOUT=1000
+
+COPY 10_async/00_base/140_codex/pyproject.toml /app/140_codex/pyproject.toml
+COPY 10_async/00_base/140_codex/README.md /app/140_codex/README.md
+
+WORKDIR /app/140_codex
+
+COPY 10_async/00_base/140_codex/project /app/140_codex/project
+COPY 10_async/00_base/140_codex/tests /app/140_codex/tests
+COPY test_utils /app/test_utils
+
+RUN uv pip install --system .[dev]
+
+ENV PYTHONPATH=/app
+ENV AGENT_NAME=ab140-codex
+
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/examples/tutorials/10_async/00_base/140_codex/README.md b/examples/tutorials/10_async/00_base/140_codex/README.md
new file mode 100644
index 000000000..a00ddb562
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/140_codex/README.md
@@ -0,0 +1,40 @@
+# 140_codex (async base)
+
+Tutorial agent demonstrating the `convert_codex_to_agentex_events` tap,
+`CodexTurn`, and `UnifiedEmitter` for an **async** (Redis-streaming, no Temporal)
+ACP agent.
+
+## What this tutorial shows
+
+- Spawning `codex exec --json` as a **local asyncio subprocess** (no Scale sandbox).
+- Wrapping the stdout line stream in a `CodexTurn`.
+- Delivering every canonical `StreamTaskMessage*` event to Redis via
+  `UnifiedEmitter.auto_send_turn`, so the UI receives tokens in real time.
+- Persisting the codex thread ID in `adk.state` so subsequent turns resume the
+  same codex session via `codex exec resume <thread_id>`.
+
+> **Production isolation note:** A tutorial agent runs the Codex CLI locally.
+> Production-grade isolation (Scale sandbox, secret injection, MCP configuration)
+> is handled by the golden agent at
+> `teams/sgp/agents/golden_agent/project/harness/providers/codex.py`.
+
+## Live runs
+
+Live runs require:
+1. The `codex` CLI on PATH: `npm install -g @openai/codex`
+2. `OPENAI_API_KEY` set in the environment.
+
+## Running offline unit tests
+
+```bash
+cd /path/to/scale-agentex-python
+uv run --all-packages --all-extras pytest examples/tutorials/10_async/00_base/140_codex/tests/test_agent.py -q
+```
+
+## Running live integration tests
+
+```bash
+export CODEX_LIVE_TESTS=1
+export OPENAI_API_KEY=sk-...
+pytest tests/test_agent.py -v
+```
diff --git a/examples/tutorials/10_async/00_base/140_codex/conftest.py b/examples/tutorials/10_async/00_base/140_codex/conftest.py
new file mode 100644
index 000000000..bdd78994b
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/140_codex/conftest.py
@@ -0,0 +1,12 @@
+"""Add the agent's project root to sys.path so ``import project`` works.
+
+Also sets minimal environment variables so the FastACP and tracing modules
+can be imported without a running agent server.
+"""
+
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+os.environ.setdefault("ACP_URL", "http://localhost:8000")
diff --git a/examples/tutorials/10_async/00_base/140_codex/manifest.yaml b/examples/tutorials/10_async/00_base/140_codex/manifest.yaml
new file mode 100644
index 000000000..be020b141
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/140_codex/manifest.yaml
@@ -0,0 +1,58 @@
+build:
+  context:
+    root: ../../../
+    include_paths:
+      - 10_async/00_base/140_codex
+      - test_utils
+    dockerfile: 10_async/00_base/140_codex/Dockerfile
+    dockerignore: 10_async/00_base/140_codex/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+
+agent:
+  acp_type: async
+  name: ab140-codex
+  description: Async (base) tutorial agent driving the unified harness surface via local codex CLI subprocess
+
+  temporal:
+    enabled: false
+
+  credentials:
+    - env_var_name: OPENAI_API_KEY
+      secret_name: openai-api-key
+      secret_key: api-key
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "ab140-codex"
+      description: "Async (base) tutorial agent driving the unified harness surface via local codex CLI subprocess"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/10_async/00_base/140_codex/project/__init__.py b/examples/tutorials/10_async/00_base/140_codex/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/10_async/00_base/140_codex/project/acp.py b/examples/tutorials/10_async/00_base/140_codex/project/acp.py
new file mode 100644
index 000000000..0233c49ab
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/140_codex/project/acp.py
@@ -0,0 +1,230 @@
+"""Async (base) ACP handler for the Codex CLI harness tutorial.
+
+Demonstrates the ``convert_codex_to_agentex_events`` tap + ``CodexTurn`` +
+``UnifiedEmitter`` for an async (Redis-streaming) ACP agent without Temporal.
+
+The handler:
+1. Spawns ``codex exec --json`` as a LOCAL asyncio subprocess (no sandbox).
+   This is correct for tutorials and local development; production isolation
+   is handled by the golden agent's Scale sandbox at
+   ``teams/sgp/agents/golden_agent/project/harness/providers/codex.py``.
+2. Wraps the stdout line stream in a ``CodexTurn``.
+3. Delivers every canonical ``StreamTaskMessage*`` event to Redis via
+   ``UnifiedEmitter.auto_send_turn``, so the UI receives tokens in real time.
+4. Multi-turn memory is persisted via ``adk.state``.
+
+Live runs require:
+- ``codex`` CLI on PATH  (``npm install -g @openai/codex``)
+- ``OPENAI_API_KEY`` set in the environment
+"""
+
+from __future__ import annotations
+
+import os
+import time
+import codecs
+import asyncio
+from collections.abc import AsyncIterator
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import agentex.lib.adk as adk
+from agentex.lib.adk import CodexTurn
+from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.lib.types.fastacp import AsyncACPConfig
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.utils.model_utils import BaseModel
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+logger = make_logger(__name__)
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+acp = FastACP.create(
+    acp_type="async",
+    config=AsyncACPConfig(type="base"),
+)
+
+MODEL = os.environ.get("CODEX_MODEL", "o4-mini")
+
+
+class ConversationState(BaseModel):
+    """Per-task conversation state persisted via ``adk.state``.
+
+    We store the codex session/thread ID so subsequent turns can resume the
+    same codex session via ``codex exec resume <thread_id>``.
+    """
+
+    codex_thread_id: str | None = None
+    turn_number: int = 0
+
+
+async def _spawn_codex(
+    model: str,
+    thread_id: str | None = None,
+) -> asyncio.subprocess.Process:
+    """Spawn ``codex exec --json`` locally and return the live process.
+
+    Injection seam: tests replace this function with a fake that returns a
+    mock process whose stdout yields pre-recorded event lines.
+
+    When ``thread_id`` is provided the subcommand becomes
+    ``codex exec ... resume <thread_id> -`` so codex continues the prior
+    conversation thread.
+
+    The caller writes the prompt to stdin after the process starts, then
+    closes stdin so codex knows input is complete.
+    """
+    base_flags = [
+        "--json",
+        "--skip-git-repo-check",
+        "--dangerously-bypass-approvals-and-sandbox",
+        "--model",
+        model,
+    ]
+
+    if thread_id:
+        cmd = ["codex", "exec", *base_flags, "resume", thread_id, "-"]
+    else:
+        cmd = ["codex", "exec", *base_flags, "-"]
+
+    return await asyncio.create_subprocess_exec(
+        *cmd,
+        stdin=asyncio.subprocess.PIPE,
+        stdout=asyncio.subprocess.PIPE,
+        # Discard stderr: codex --json writes events to stdout; its stderr is
+        # progress/debug noise. Capturing it with PIPE but never reading it
+        # would deadlock once codex fills the OS pipe buffer (~64 KB).
+        stderr=asyncio.subprocess.DEVNULL,
+        env={**os.environ},
+    )
+
+
+async def _process_stdout(process: asyncio.subprocess.Process) -> AsyncIterator[str]:
+    """Yield newline-delimited JSON lines from the process stdout.
+
+    Uses an incremental UTF-8 decoder so a multibyte character split across two
+    4 KB reads is decoded correctly instead of being corrupted at the boundary.
+    """
+    assert process.stdout is not None
+    decoder = codecs.getincrementaldecoder("utf-8")(errors="replace")
+    buffer = ""
+    while True:
+        chunk = await process.stdout.read(4096)
+        if not chunk:
+            break
+        buffer += decoder.decode(chunk)
+        while "\n" in buffer:
+            line, buffer = buffer.split("\n", 1)
+            line = line.strip()
+            if line:
+                yield line
+    buffer += decoder.decode(b"", final=True)
+    if buffer.strip():
+        yield buffer.strip()
+
+
+@acp.on_task_create
+async def handle_task_create(params: CreateTaskParams):
+    """Initialize per-task state on task creation."""
+    logger.info("Task created: %s", params.task.id)
+    await adk.state.create(
+        task_id=params.task.id,
+        agent_id=params.agent.id,
+        state=ConversationState(),
+    )
+
+
+@acp.on_task_event_send
+async def handle_task_event_send(params: SendEventParams):
+    """Handle each user message: spawn codex, stream events, save thread ID."""
+    task_id = params.task.id
+    agent_id = params.agent.id
+    user_message = params.event.content.content
+
+    logger.info("Processing message for task %s", task_id)
+
+    await adk.messages.create(task_id=task_id, content=params.event.content)
+
+    task_state = await adk.state.get_by_task_and_agent(task_id=task_id, agent_id=agent_id)
+    if task_state is None:
+        state = ConversationState()
+        task_state = await adk.state.create(task_id=task_id, agent_id=agent_id, state=state)
+    else:
+        state = ConversationState.model_validate(task_state.state)
+
+    state.turn_number += 1
+
+    async with adk.tracing.span(
+        trace_id=task_id,
+        task_id=task_id,
+        name=f"Turn {state.turn_number}",
+        input={"message": user_message},
+        data={"__span_type__": "AGENT_WORKFLOW"},
+    ) as turn_span:
+        start_ms = int(time.monotonic() * 1000)
+
+        process = await _spawn_codex(MODEL, thread_id=state.codex_thread_id)
+
+        assert process.stdin is not None
+        process.stdin.write(user_message.encode("utf-8"))
+        await process.stdin.drain()
+        process.stdin.close()
+
+        turn = CodexTurn(
+            events=_process_stdout(process),
+            model=MODEL,
+        )
+
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+
+        result = await emitter.auto_send_turn(turn)
+
+        await process.wait()
+
+        # Record the real wall-clock duration AFTER streaming completes; setting
+        # it before the stream ran would capture only subprocess spawn overhead.
+        turn.duration_ms = int(time.monotonic() * 1000) - start_ms
+
+        # Persist the new thread ID so subsequent turns resume the same session.
+        usage = turn.usage()
+        if usage.model:
+            # usage() is valid now that the stream is exhausted
+            pass
+        # Persist the codex session id (public accessor; valid post-stream) so the
+        # next turn resumes the same session.
+        if turn.session_id:
+            state.codex_thread_id = turn.session_id
+
+        await adk.state.update(
+            state_id=task_state.id,
+            task_id=task_id,
+            agent_id=agent_id,
+            state=state,
+        )
+
+        if turn_span:
+            turn_span.output = {
+                "final_text": result.final_text,
+                "model": usage.model,
+            }
+
+
+@acp.on_task_cancel
+async def handle_task_canceled(params: CancelTaskParams):
+    logger.info("Task canceled: %s", params.task.id)
diff --git a/examples/tutorials/10_async/00_base/140_codex/pyproject.toml b/examples/tutorials/10_async/00_base/140_codex/pyproject.toml
new file mode 100644
index 000000000..bdf7c462f
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/140_codex/pyproject.toml
@@ -0,0 +1,38 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "ab140-codex"
+version = "0.1.0"
+description = "Async (base) tutorial agent driving the unified harness surface via local codex CLI subprocess"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+    "black",
+    "isort",
+    "flake8",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
+
+[tool.black]
+line-length = 88
+target-version = ['py312']
+
+[tool.isort]
+profile = "black"
+line_length = 88
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
diff --git a/examples/tutorials/10_async/00_base/140_codex/tests/test_agent.py b/examples/tutorials/10_async/00_base/140_codex/tests/test_agent.py
new file mode 100644
index 000000000..68ca5aded
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/140_codex/tests/test_agent.py
@@ -0,0 +1,188 @@
+"""Tests for the async (base) Codex harness tutorial agent.
+
+LIVE tests (``TestLiveCodexAgent``):
+  - Require the ``codex`` CLI on PATH and ``OPENAI_API_KEY`` set.
+  - Skipped automatically when ``CODEX_LIVE_TESTS`` is not set to ``1``.
+
+OFFLINE unit tests (``TestOfflineCodexHandler``):
+  - Inject a fake async iterator of pre-recorded codex event lines.
+  - Assert ``CodexTurn`` + ``UnifiedEmitter.auto_send_turn`` is driven correctly.
+  - Always run.
+"""
+
+from __future__ import annotations
+
+import os
+import json
+from typing import Any
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Shared helpers
+# ---------------------------------------------------------------------------
+
+SAMPLE_EVENTS: list[dict[str, Any]] = [
+    {"type": "thread.started", "thread_id": "thread-xyz"},
+    {"type": "turn.started"},
+    {
+        "type": "item.started",
+        "item": {"id": "msg-1", "type": "agent_message", "text": "Hi"},
+    },
+    {
+        "type": "item.completed",
+        "item": {"id": "msg-1", "type": "agent_message", "text": "Hi there!"},
+    },
+    {
+        "type": "turn.completed",
+        "usage": {"input_tokens": 8, "output_tokens": 4, "total_tokens": 12},
+    },
+]
+
+
+async def _fake_event_stream():
+    """Async iterator of pre-recorded codex event JSON lines (no subprocess)."""
+    for evt in SAMPLE_EVENTS:
+        yield json.dumps(evt)
+
+
+class TestOfflineCodexHandler:
+    """Unit tests that run without a real codex CLI or network."""
+
+    @pytest.mark.asyncio
+    async def test_usage_populated_after_stream_exhausted(self):
+        """CodexTurn.usage() returns non-None tokens after stream is exhausted."""
+        from agentex.lib.adk import CodexTurn
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+
+        collected = [e async for e in turn.events]
+
+        usage = turn.usage()
+        assert usage.input_tokens == 8
+        assert usage.output_tokens == 4
+        assert usage.model == "o4-mini"
+
+    @pytest.mark.asyncio
+    async def test_auto_send_turn_drives_unified_surface(self):
+        """auto_send_turn returns a TurnResult with the final text."""
+        from agentex.lib.adk import CodexTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+        from agentex.types.task_message import TaskMessage
+        from agentex.types.text_content import TextContent
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+
+        real_task_msg = TaskMessage(
+            id="msg-fake",
+            task_id="t",
+            content=TextContent(type="text", author="agent", content=""),
+        )
+
+        fake_streaming = MagicMock()
+        fake_ctx = AsyncMock()
+        fake_ctx.__aenter__ = AsyncMock(return_value=fake_ctx)
+        fake_ctx.__aexit__ = AsyncMock(return_value=False)
+        fake_ctx.stream_update = AsyncMock(return_value=MagicMock())
+        fake_ctx.close = AsyncMock()
+        fake_ctx.task_message = real_task_msg
+        fake_streaming.streaming_task_message_context = MagicMock(return_value=fake_ctx)
+
+        emitter = UnifiedEmitter(
+            task_id="t",
+            trace_id=None,
+            parent_span_id=None,
+            streaming=fake_streaming,
+        )
+
+        result = await emitter.auto_send_turn(turn)
+        assert result is not None
+
+    @pytest.mark.asyncio
+    async def test_session_id_captured_after_stream(self):
+        """CodexTurn._result captures the session_id from thread.started."""
+        from agentex.lib.adk import CodexTurn
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+        _ = [e async for e in turn.events]
+
+        assert turn._result is not None
+        assert turn._result["session_id"] == "thread-xyz"
+
+    @pytest.mark.asyncio
+    async def test_yield_turn_is_passthrough(self):
+        """yield_turn mode also works with CodexTurn (no streaming infra needed)."""
+        from agentex.lib.adk import CodexTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+
+        events = [e async for e in emitter.yield_turn(turn)]
+        assert len(events) > 0
+
+
+# ---------------------------------------------------------------------------
+# Live tests
+# ---------------------------------------------------------------------------
+
+LIVE = os.environ.get("CODEX_LIVE_TESTS", "") == "1"
+AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
+AGENT_NAME = os.environ.get("AGENT_NAME", "ab140-codex")
+
+
+@pytest.mark.skipif(
+    not LIVE,
+    reason="Set CODEX_LIVE_TESTS=1 and ensure codex CLI + OPENAI_API_KEY are available",
+)
+class TestLiveCodexAgent:
+    """End-to-end tests that require the real codex CLI and a running Agentex server."""
+
+    @pytest.fixture
+    def client(self):
+        from agentex import Agentex
+
+        return Agentex(base_url=AGENTEX_API_BASE_URL)
+
+    @pytest.fixture
+    def agent_id(self, client):
+        for agent in client.agents.list():
+            if agent.name == AGENT_NAME:
+                return agent.id
+        raise ValueError(f"Agent {AGENT_NAME!r} not found.")
+
+    def test_send_simple_message(self, client, agent_id: str):
+        """Async agents process events out of band, so create a task, send an
+        event, and poll the task's messages for the agent's response."""
+        import time
+        import uuid
+
+        from agentex.types import TextContentParam
+        from agentex.types.agent_rpc_params import ParamsSendEventRequest, ParamsCreateTaskRequest
+
+        task = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)).result
+        assert task is not None
+
+        client.agents.send_event(
+            agent_id=agent_id,
+            params=ParamsSendEventRequest(
+                task_id=task.id,
+                content=TextContentParam(
+                    author="user",
+                    content="What is 3+3? Reply with just the number.",
+                    type="text",
+                ),
+            ),
+        )
+
+        deadline = time.monotonic() + 60
+        while time.monotonic() < deadline:
+            msgs = client.messages.list(task_id=task.id)
+            agent_msgs = [m for m in msgs if getattr(m.content, "author", None) == "agent"]
+            if agent_msgs:
+                assert len(agent_msgs) >= 1
+                return
+            time.sleep(2)
+
+        raise AssertionError("No agent response received within 60 s")
diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/README.md b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/README.md
index b221c1238..66466693b 100644
--- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/README.md
+++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/README.md
@@ -1,153 +1,59 @@
-# Tutorial 110 (temporal): Pydantic AI Agent
+# Temporal Pydantic AI Agent
 
-This tutorial demonstrates a **durable** Pydantic AI agent on AgentEx, backed by Temporal:
-- Workflow state survives crashes mid-conversation (Temporal replay)
-- Every LLM call and every tool call becomes its own Temporal activity (independent retries + observability)
-- Streaming via Redis still works — token-by-token deltas appear in the UI in real time
+A minimal **Temporal-backed** Pydantic AI agent that drives the **unified
+harness surface** (`UnifiedEmitter.auto_send_turn` + `PydanticAITurn`) from
+inside the model activity's `event_stream_handler`.
 
-This is the Temporal counterpart to the async base tutorial at [`10_async/00_base/110_pydantic_ai/`](../../00_base/110_pydantic_ai/).
+## Why this agent exists
 
-## Why Temporal? Why not just async?
+This agent calls `emitter.auto_send_turn(...)` **explicitly** inside
+the `event_stream_handler`, making the unified-surface wiring visible and giving
+the temporal channel direct coverage.
 
-In async base 110, the agent state lives in memory inside the ACP process. If that process dies mid-LLM-call, the in-flight turn is lost. Temporal fixes this by:
+## How it wires the unified surface
 
-1. Recording every external interaction (LLM call, tool call) to a durable event log.
-2. On worker restart, **replaying** the workflow code, using cached activity results to skip work that already finished.
-3. Letting workflows live forever — multi-day conversations or human-in-the-loop flows just work.
-
-## Architecture at a glance
-
-Two long-running processes plus shared infrastructure:
-
-```
-┌──────────────────────────┐        ┌──────────────────────────┐
-│ uvicorn project.acp:acp  │        │ python -m run_worker     │
-│  (HTTP shim, forwards    │        │  (executes workflows +   │
-│   signals to Temporal)   │        │   activities)            │
-└──────────────────────────┘        └──────────────────────────┘
-              │                                  │
-              └────► Temporal server ◄───────────┘
-                     (event log + queue)
-
-                   Redis ◄─── activities push deltas
-                     │
-                     └─── Agentex API tails ──► UI client
-```
-
-The HTTP server is a thin shim that translates `task/event/send` into Temporal signals. The worker is where your agent code actually runs. Temporal sits in between, recording everything.
-
-## Key code patterns
-
-### `project/agent.py` — wrap the base agent in `TemporalAgent`
-
-```python
-base_agent = Agent(MODEL_NAME, deps_type=TaskDeps, system_prompt=...)
-base_agent.tool_plain(get_weather)
-
-temporal_agent = TemporalAgent(
-    base_agent,
-    name="at110_pydantic_ai_agent",
-    event_stream_handler=event_handler,  # streams to Redis from inside the model activity
-)
-```
-
-`TemporalAgent` (from `pydantic_ai.durable_exec.temporal`) wraps a normal Pydantic AI Agent so that:
-- Each LLM call runs in its own activity
-- Each tool call runs in its own activity
-- The wrapping is invisible to the workflow code that calls `temporal_agent.run(...)`
-
-### `project/workflow.py` — declare `__pydantic_ai_agents__`
+In `project/agent.py`, the `event_stream_handler` runs inside the model activity
+and constructs a `UnifiedEmitter` from `RunContext.deps`:
 
 ```python
-@workflow.defn(name=environment_variables.WORKFLOW_NAME)
-class At110PydanticAiWorkflow(BaseWorkflow):
-    __pydantic_ai_agents__ = [temporal_agent]   # ← discovered by PydanticAIPlugin
-
-    @workflow.signal(name=SignalName.RECEIVE_EVENT)
-    async def on_task_event_send(self, params):
-        await adk.messages.create(task_id=params.task.id, content=params.event.content)
-        result = await temporal_agent.run(
-            params.event.content.content,
-            deps=TaskDeps(task_id=params.task.id),
-        )
+async def event_handler(run_context, events):
+    emitter = UnifiedEmitter(
+        task_id=run_context.deps.task_id,
+        trace_id=run_context.deps.task_id,
+        parent_span_id=run_context.deps.parent_span_id,
+    )
+    turn = PydanticAITurn(events, model=MODEL_NAME, coalesce_tool_requests=True)
+    await emitter.auto_send_turn(turn)
 ```
 
-The `__pydantic_ai_agents__` attribute is how `PydanticAIPlugin` discovers which activities to register on the worker — no manual activity list needed.
-
-### `project/acp.py` — no handlers, just plugin wiring
-
-```python
-acp = FastACP.create(
-    acp_type="async",
-    config=TemporalACPConfig(
-        type="temporal",
-        temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"),
-        plugins=[PydanticAIPlugin()],
-    ),
-)
-```
-
-When `type="temporal"`, FastACP auto-wires HTTP → workflow signals. You don't define `@acp.on_task_event_send` anywhere — Temporal handles it.
-
-### `project/run_worker.py` — boot the worker with the plugin
-
-```python
-worker = AgentexWorker(
-    task_queue=task_queue_name,
-    plugins=[PydanticAIPlugin()],
-)
-await worker.run(
-    activities=get_all_activities(),
-    workflow=At110PydanticAiWorkflow,
-)
-```
-
-`get_all_activities()` returns the built-in Agentex activities (state, messages, streaming, tracing). Pydantic AI's per-agent activities are auto-added by the plugin.
-
-## Files
-
-| File | Purpose |
-|------|---------|
-| `project/acp.py` | Thin HTTP shim — `FastACP.create(type="temporal", ...)` |
-| `project/workflow.py` | `@workflow.defn` class with the signal handler |
-| `project/agent.py` | Base Pydantic AI Agent wrapped in `TemporalAgent` |
-| `project/tools.py` | Tool functions (must be `async` for Temporal compatibility) |
-| `project/run_worker.py` | Worker boot script (separate process) |
-| `tests/test_agent.py` | End-to-end test verifying tool round-trips |
-| `manifest.yaml` | Sets `temporal.enabled: true` and declares workflow + queue name |
-
-## Running Locally
-
-You'll need three terminals open (this is the price of Temporal):
-
-```bash
-# Terminal 1 — backend services (separate repo)
-cd ~/scale-agentex/agentex
-make dev   # brings up Temporal, Redis, Postgres, Agentex API
-
-# Terminal 2 — this tutorial (ACP server + Temporal worker)
-cd ~/scale-agentex-python/examples/tutorials/10_async/10_temporal/110_pydantic_ai
-agentex agents run   # this also launches the worker process
-
-# Terminal 3 — tests
-cd ~/scale-agentex-python/examples/tutorials/10_async/10_temporal/110_pydantic_ai
-uv run pytest tests/test_agent.py -v
-```
-
-Watch the Temporal UI at http://localhost:8233 — you'll see workflow executions, signal events, and one activity per LLM call + one per tool call.
-
-## Sync vs Async vs Temporal — How the code differs
-
-| Concern | Sync (040) | Async base (110) | Temporal (this one) |
-|---|---|---|---|
-| `project/acp.py` | `@acp.on_message_send` yields events | `@acp.on_task_event_send` pushes to Redis | **No handlers** — `FastACP.create(type="temporal", ...)` |
-| Where the agent runs | In the ACP HTTP process | In the ACP HTTP process | In a separate worker process |
-| Durability | Ephemeral — request-scoped | Ephemeral — process-scoped | **Durable** — survives worker restarts via Temporal replay |
-| Per-call retries | None | None | Each model + tool call automatically retried by Temporal |
-| Code we add | — | `acp.py` handler | `workflow.py`, `run_worker.py`, wrap agent in `TemporalAgent` |
-
-## Notes
-
-- Multi-turn conversation memory is not wired here. Workflow state (`self._turn_number`) is durable, but message history isn't currently threaded into `temporal_agent.run(..., message_history=...)`. To add: load via `adk.messages.list(task_id=...)` inside the signal handler and pass through.
-- Reasoning/thinking tokens are not exercised by `gpt-4o-mini`. Swap to a reasoning-capable model to exercise that branch end-to-end.
-- Tools must be `async` (Pydantic AI's Temporal integration requires it — sync tools would run in threads, breaking Temporal's determinism guarantees).
+- The handler runs inside a Temporal activity, so it can freely make
+  non-deterministic Redis + tracing writes.
+- `coalesce_tool_requests=True` is required on the auto_send path until
+  AGX1-377 lands.
+- `deps` (set by `project/workflow.py`) threads the `task_id` and the per-turn
+  `parent_span_id` into the handler so tool spans nest under the workflow's turn
+  span.
+
+## Structure
+
+- `project/acp.py` — thin ACP server; FastACP auto-wires HTTP routes to the
+  workflow when `TemporalACPConfig` is used.
+- `project/agent.py` — base `Agent` + `TemporalAgent` + the unified-surface
+  `event_stream_handler`.
+- `project/workflow.py` — durable workflow; each turn delegates to
+  `temporal_agent.run(...)`.
+- `project/run_worker.py` — Temporal worker entry point.
+- `project/tools.py` — async `get_weather(city)` returning a constant.
+- `tests/test_agent.py` — live integration test (requires Temporal + Redis +
+  ACP server + worker).
+
+## Tools
+
+- `get_weather(city: str) -> str` (async): returns a fixed "sunny and 72°F"
+  string. Each tool call becomes its own Temporal activity.
+
+## Offline coverage
+
+Offline integration tests for the same wiring (pydantic-ai `TestModel` + fake
+streaming/tracing, no Temporal server) live in the SDK repo under
+`tests/lib/core/harness/` (the pydantic-ai temporal suite).
diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/manifest.yaml b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/manifest.yaml
index 15d00076f..7ca454b05 100644
--- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/manifest.yaml
+++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/manifest.yaml
@@ -18,7 +18,7 @@ local_development:
 agent:
   acp_type: async
   name: at110-pydantic-ai
-  description: A Temporal-backed Pydantic AI agent with tool calling and Redis streaming
+  description: A Temporal-backed Pydantic AI harness test agent using the unified emitter surface
 
   temporal:
     enabled: true
@@ -42,8 +42,6 @@ agent:
     - env_var_name: SGP_CLIENT_BASE_URL
       secret_name: sgp-client-base-url
       secret_key: url
-  # env:
-  #   OPENAI_BASE_URL: "https://your-litellm-proxy/v1"
 
 deployment:
   image:
@@ -53,7 +51,7 @@ deployment:
   global:
     agent:
       name: "at110-pydantic-ai"
-      description: "A Temporal-backed Pydantic AI agent"
+      description: "A Temporal-backed Pydantic AI harness test agent using the unified emitter surface"
     replicaCount: 1
     resources:
       requests:
diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/acp.py b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/acp.py
index dacb45ad6..c142dcf70 100644
--- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/acp.py
+++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/acp.py
@@ -1,7 +1,7 @@
-"""ACP server for the Temporal Pydantic AI tutorial.
+"""ACP server for the Temporal harness Pydantic AI test agent.
 
-This file is intentionally thin. When ``acp_type="async"`` is combined
-with ``TemporalACPConfig(type="temporal", ...)``, FastACP auto-wires:
+This file is intentionally thin. When ``acp_type="async"`` is combined with
+``TemporalACPConfig(type="temporal", ...)``, FastACP auto-wires:
 
     HTTP task/create       → @workflow.run on the workflow class
     HTTP task/event/send   → @workflow.signal(SignalName.RECEIVE_EVENT)
diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/agent.py b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/agent.py
index a33a317cc..4e59688ce 100644
--- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/agent.py
+++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/agent.py
@@ -1,18 +1,20 @@
-"""Pydantic AI agent definition for the Temporal tutorial.
+"""Pydantic AI agent definition for the Temporal harness test agent.
 
 This module constructs the base ``pydantic_ai.Agent`` once at import time,
 registers tools on it, and wraps it in ``TemporalAgent`` from
 ``pydantic_ai.durable_exec.temporal``.
 
-The ``TemporalAgent`` wrapper makes every model call and every tool call
-run as a Temporal activity automatically. The workflow code stays
-deterministic; the non-deterministic work (LLM HTTP calls, tool execution)
-moves into recorded activities.
-
-Streaming back to Agentex happens via ``event_stream_handler``, which
-receives Pydantic AI ``AgentStreamEvent``s from inside the model activity
-and forwards them to Redis using our existing ``stream_pydantic_ai_events``
-helper. The ``task_id`` is threaded into the handler via ``deps``.
+The ``TemporalAgent`` wrapper makes every model call and every tool call run as
+a Temporal activity automatically. The workflow stays deterministic; the
+non-deterministic work (LLM HTTP calls, tool execution) moves into recorded
+activities.
+
+Streaming back to Agentex happens via ``event_stream_handler``, which receives
+Pydantic AI ``AgentStreamEvent``s from inside the model activity and forwards
+them through the UNIFIED HARNESS SURFACE (``UnifiedEmitter.auto_send_turn`` +
+``PydanticAITurn``) — called directly rather than via ``stream_pydantic_ai_events``.
+The ``task_id`` and per-turn ``parent_span_id`` are threaded into the handler
+via ``deps``.
 """
 
 from __future__ import annotations
@@ -26,10 +28,10 @@
 from pydantic_ai.durable_exec.temporal import TemporalAgent
 
 from project.tools import get_weather
-from agentex.lib.adk import (
-    stream_pydantic_ai_events,
-    create_pydantic_ai_tracing_handler,
-)
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
+
+__all__ = ["TaskDeps", "temporal_agent", "base_agent", "MODEL_NAME"]
 
 MODEL_NAME = "openai:gpt-4o-mini"
 SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools.
@@ -48,13 +50,13 @@ class TaskDeps(BaseModel):
     """Per-run dependencies passed into the agent via ``deps=``.
 
     Pydantic AI's ``RunContext.deps`` is the canonical place to thread
-    request-scoped data (like the Agentex task_id) into tools and
-    event handlers — including code that runs inside Temporal activities.
+    request-scoped data (like the Agentex task_id) into tools and event
+    handlers — including code that runs inside Temporal activities.
     """
 
     task_id: str
-    # When set, the event handler nests per-tool-call spans under this
-    # span. Typically the ID of the per-turn span opened by the workflow.
+    # When set, the event handler nests per-tool-call spans under this span.
+    # Typically the ID of the per-turn span opened by the workflow.
     parent_span_id: str | None = None
 
 
@@ -77,32 +79,33 @@ async def event_handler(
     run_context: RunContext[TaskDeps],
     events: AsyncIterable[AgentStreamEvent],
 ) -> None:
-    """Stream Pydantic AI events to Agentex via Redis from inside the model activity.
+    """Stream Pydantic AI events to Agentex via the unified surface.
 
     Pydantic AI calls this with the live event stream as soon as the model
-    activity begins emitting parts. Because the handler runs inside the
-    activity (not the workflow), it can freely make non-deterministic
-    Redis writes — including the tracing HTTP calls that record per-tool-call
-    spans under the workflow's per-turn span (when ``parent_span_id`` is set).
+    activity begins emitting parts. Because the handler runs inside the activity
+    (not the workflow), it can freely make non-deterministic Redis + tracing
+    writes.
+
+    The UnifiedEmitter is constructed from ``deps`` (task_id + parent_span_id),
+    so tool spans nest under the workflow's per-turn span and messages auto-send
+    to the task stream. The auto_send path delivers streamed tool requests
+    natively, so no coalescing workaround is needed.
     """
-    tracing_handler = create_pydantic_ai_tracing_handler(
+    emitter = UnifiedEmitter(
+        task_id=run_context.deps.task_id,
         trace_id=run_context.deps.task_id,
         parent_span_id=run_context.deps.parent_span_id,
-        task_id=run_context.deps.task_id,
-    )
-    await stream_pydantic_ai_events(
-        events,
-        run_context.deps.task_id,
-        tracing_handler=tracing_handler,
     )
+    turn = PydanticAITurn(events, model=MODEL_NAME)
+    await emitter.auto_send_turn(turn)
 
 
-# Construct the durable agent at module load time so that the
-# PydanticAIPlugin can auto-discover its activities via the workflow's
-# ``__pydantic_ai_agents__`` attribute.
+# Construct the durable agent at module load time so that the PydanticAIPlugin
+# can auto-discover its activities via the workflow's ``__pydantic_ai_agents__``
+# attribute.
 base_agent = _build_base_agent()
 temporal_agent: TemporalAgent[TaskDeps, str] = TemporalAgent(
     base_agent,
-    name="at110_pydantic_ai_agent",
+    name="pydantic_ai_agent",
     event_stream_handler=event_handler,
 )
diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/run_worker.py b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/run_worker.py
index e54c9d1dc..4b4d43d19 100644
--- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/run_worker.py
+++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/run_worker.py
@@ -1,18 +1,18 @@
-"""Temporal worker for the Pydantic AI tutorial.
+"""Temporal worker for the harness Pydantic AI test agent.
 
-Run as a separate long-lived process alongside the ACP HTTP server. The
-worker polls Temporal for workflow + activity tasks and executes them.
+Run as a separate long-lived process alongside the ACP HTTP server. The worker
+polls Temporal for workflow + activity tasks and executes them.
 
-The ``PydanticAIPlugin`` reads ``__pydantic_ai_agents__`` off the workflow
-class and registers every model/tool activity the TemporalAgent needs —
-so we don't have to enumerate activities by hand here.
+The ``PydanticAIPlugin`` reads ``__pydantic_ai_agents__`` off the workflow class
+and registers every model/tool activity the TemporalAgent needs — so we don't
+have to enumerate activities by hand here.
 """
 
 import asyncio
 
 from pydantic_ai.durable_exec.temporal import PydanticAIPlugin
 
-from project.workflow import At110PydanticAiWorkflow
+from project.workflow import HarnessPydanticAiWorkflow
 from agentex.lib.utils.debug import setup_debug_if_enabled
 from agentex.lib.utils.logging import make_logger
 from agentex.lib.environment_variables import EnvironmentVariables
@@ -31,8 +31,8 @@ async def main():
         raise ValueError("WORKFLOW_TASK_QUEUE is not set")
 
     # get_all_activities() returns the built-in Agentex activities (state,
-    # messages, streaming, tracing). Pydantic AI's TemporalAgent activities
-    # are auto-registered by PydanticAIPlugin via __pydantic_ai_agents__.
+    # messages, streaming, tracing). Pydantic AI's TemporalAgent activities are
+    # auto-registered by PydanticAIPlugin via __pydantic_ai_agents__.
     worker = AgentexWorker(
         task_queue=task_queue_name,
         plugins=[PydanticAIPlugin()],
@@ -40,7 +40,7 @@ async def main():
 
     await worker.run(
         activities=get_all_activities(),
-        workflow=At110PydanticAiWorkflow,
+        workflow=HarnessPydanticAiWorkflow,
     )
 
 
diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/tools.py b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/tools.py
index 75640fcb7..bbd6c5200 100644
--- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/tools.py
+++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/tools.py
@@ -1,9 +1,8 @@
-"""Tool definitions for the Temporal Pydantic AI agent.
+"""Tool definitions for the Temporal harness Pydantic AI agent.
 
 These functions are registered on the base Pydantic AI agent. When the agent
 is wrapped in ``TemporalAgent``, each tool call becomes its own Temporal
-activity automatically — independently retryable and observable in the
-Temporal UI.
+activity automatically — independently retryable and observable.
 
 Tools must be ``async`` because Pydantic AI's Temporal integration requires
 it: non-async tools would run in threads, which is non-deterministic and
diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/workflow.py b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/workflow.py
index bb07ac818..9a01be7de 100644
--- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/workflow.py
+++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/workflow.py
@@ -1,16 +1,16 @@
-"""Temporal workflow for the Pydantic AI tutorial.
+"""Temporal workflow for the harness Pydantic AI test agent.
 
 The workflow holds task state durably across crashes. Its signal handler
-delegates the actual agent run to ``temporal_agent.run(...)`` — which
-internally schedules model and tool activities, each independently
-durable. The ``event_stream_handler`` registered on ``temporal_agent``
-pushes streaming deltas to Redis while the model activity runs.
+delegates the actual agent run to ``temporal_agent.run(...)`` — which internally
+schedules model and tool activities, each independently durable. The
+``event_stream_handler`` registered on ``temporal_agent`` (see project.agent)
+pushes streaming deltas through the unified harness surface while the model
+activity runs.
 
 Multi-turn memory is kept on the workflow instance itself
-(``self._message_history``). Temporal's workflow state is already durable
-and replay-safe, so unlike the async-base tutorial we don't need an
-external ``adk.state`` round-trip — the message list survives crashes
-because Temporal replays activity results that produced it.
+(``self._message_history``). Temporal's workflow state is already durable and
+replay-safe, so unlike the async-base agent we don't need an external
+``adk.state`` round-trip.
 """
 
 from __future__ import annotations
@@ -56,14 +56,14 @@
 
 
 @workflow.defn(name=environment_variables.WORKFLOW_NAME)
-class At110PydanticAiWorkflow(BaseWorkflow):
+class HarnessPydanticAiWorkflow(BaseWorkflow):
     """Long-running Temporal workflow that delegates each turn to a Pydantic AI TemporalAgent.
 
     The ``__pydantic_ai_agents__`` attribute is the marker the
     ``PydanticAIPlugin`` looks for at worker startup: it pulls
-    ``temporal_agent.temporal_activities`` off this list and registers them
-    on the worker automatically — so we don't have to list activities by
-    hand in ``run_worker.py``.
+    ``temporal_agent.temporal_activities`` off this list and registers them on
+    the worker automatically — so we don't have to list activities by hand in
+    ``run_worker.py``.
     """
 
     __pydantic_ai_agents__ = [temporal_agent]
@@ -74,8 +74,8 @@ def __init__(self):
         self._turn_number = 0
         # Conversation history accumulated across turns. Each entry is a
         # pydantic-ai ``ModelMessage``. Temporal replays the activity that
-        # produced these messages, so the list is rebuilt deterministically
-        # if the workflow ever recovers from a crash.
+        # produced these messages, so the list is rebuilt deterministically if
+        # the workflow ever recovers from a crash.
         self._message_history: list["ModelMessage"] = []
 
     @workflow.signal(name=SignalName.RECEIVE_EVENT)
@@ -93,17 +93,10 @@ async def on_task_event_send(self, params: SendEventParams) -> None:
             name=f"Turn {self._turn_number}",
             input={"message": params.event.content.content},
         ) as span:
-            # temporal_agent.run() is the magic line. From the outside it
-            # looks like a regular async call. Internally it schedules:
-            #   1. A model activity (LLM HTTP call recorded by Temporal)
-            #   2. For each tool the model invokes, a tool activity
-            #   3. Each activity is retried, observable, and durable
-            # While the model activity runs, the event_stream_handler on
-            # temporal_agent pushes deltas to Redis so the UI sees tokens.
-            #
-            # Passing ``message_history`` makes the run remember prior turns:
-            # without it the agent would respond to each user message as if
-            # it had never seen the conversation before.
+            # temporal_agent.run() schedules a model activity, per-tool
+            # activities, and the event_stream_handler activity (which pushes
+            # deltas through the unified surface). Passing ``message_history``
+            # makes the run remember prior turns.
             result = await temporal_agent.run(
                 params.event.content.content,
                 message_history=self._message_history,
@@ -112,8 +105,8 @@ async def on_task_event_send(self, params: SendEventParams) -> None:
                     parent_span_id=span.id if span else None,
                 ),
             )
-            # Persist the new full history (user + assistant + any tool
-            # rounds) so the next turn picks up from here.
+            # Persist the new full history (user + assistant + any tool rounds)
+            # so the next turn picks up from here.
             self._message_history = list(result.all_messages())
             if span:
                 span.output = {"final_output": result.output}
diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/pyproject.toml b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/pyproject.toml
index 9f47733c0..2f308f2a1 100644
--- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/pyproject.toml
+++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
 [project]
 name = "at110-pydantic-ai"
 version = "0.1.0"
-description = "A Temporal-backed Pydantic AI agent with tool calling and Redis streaming"
+description = "A Temporal-backed Pydantic AI harness test agent using the unified emitter surface"
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/tests/test_agent.py
index d01276ab8..974cddcc0 100644
--- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/tests/test_agent.py
+++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/tests/test_agent.py
@@ -1,9 +1,10 @@
-"""Tests for the Temporal Pydantic AI agent.
+"""Live tests for the Temporal Pydantic AI agent.
 
-This test suite validates:
-- The agent responds to a basic message
-- Tool calls are visible in the message history (proving each tool call
-  ran as its own Temporal activity)
+These tests require a running agent (Temporal + Redis + ACP server + worker) and
+exercise the unified-surface event_stream_handler end-to-end over the wire.
+
+Offline coverage of the same wiring (TestModel + fake streaming/tracing) lives
+in the SDK repo under ``tests/lib/core/harness/`` (the pydantic-ai temporal suite).
 
 To run these tests:
 1. Make sure the agent is running (worker + ACP server)
@@ -16,10 +17,7 @@
 
 import pytest
 import pytest_asyncio
-from test_utils.async_utils import (
-    poll_messages,
-    send_event_and_poll_yielding,
-)
+from test_utils.async_utils import poll_messages, send_event_and_poll_yielding
 
 from agentex import AsyncAgentex
 from agentex.types.task_message import TaskMessage
@@ -51,14 +49,12 @@ async def agent_id(client, agent_name):
 
 
 class TestNonStreamingEvents:
-    """Test that the Temporal-backed Pydantic AI agent responds and uses tools."""
+    """Test that the Temporal-backed harness agent responds and uses tools."""
 
     @pytest.mark.asyncio
     async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str):
         """Drive a full turn: create task, send a weather question, verify tool round-trip."""
-        task_response = await client.agents.create_task(
-            agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)
-        )
+        task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
         task = task_response.result
         assert task is not None
 
@@ -71,11 +67,7 @@ async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str):
             sleep_interval=1.0,
         ):
             assert isinstance(message, TaskMessage)
-            if (
-                message.content
-                and message.content.type == "text"
-                and message.content.author == "agent"
-            ):
+            if message.content and message.content.type == "text" and message.content.author == "agent":
                 task_creation_found = True
                 break
         assert task_creation_found, "Task creation welcome message not found"
@@ -101,11 +93,7 @@ async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str):
                 if final_message and getattr(final_message, "streaming_status", None) == "DONE":
                     break
 
-            if (
-                message.content
-                and message.content.type == "text"
-                and message.content.author == "agent"
-            ):
+            if message.content and message.content.type == "text" and message.content.author == "agent":
                 final_message = message
                 content_length = len(getattr(message.content, "content", "") or "")
                 if message.streaming_status == "DONE" and content_length > 0:
@@ -115,9 +103,7 @@ async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str):
         assert seen_tool_request, "Expected a tool_request (agent calling get_weather)"
         assert seen_tool_response, "Expected a tool_response (get_weather result)"
         assert final_message is not None, "Expected a final agent text message"
-        final_text = (
-            getattr(final_message.content, "content", None) if final_message.content else None
-        )
+        final_text = getattr(final_message.content, "content", None) if final_message.content else None
         assert isinstance(final_text, str) and len(final_text) > 0
         # The get_weather tool always returns "72°F" — the response should mention it.
         assert "72" in final_text, "Expected weather response to mention 72°F"
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents/.dockerignore b/examples/tutorials/10_async/10_temporal/120_openai_agents/.dockerignore
new file mode 100644
index 000000000..c49489471
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/.dockerignore
@@ -0,0 +1,43 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Environments
+.env**
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Git
+.git
+.gitignore
+
+# Misc
+.DS_Store
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents/Dockerfile b/examples/tutorials/10_async/10_temporal/120_openai_agents/Dockerfile
new file mode 100644
index 000000000..700f56cea
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/Dockerfile
@@ -0,0 +1,43 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+ENV UV_HTTP_TIMEOUT=1000
+
+COPY 10_async/10_temporal/120_openai_agents/pyproject.toml /app/120_openai_agents/pyproject.toml
+COPY 10_async/10_temporal/120_openai_agents/README.md /app/120_openai_agents/README.md
+
+WORKDIR /app/120_openai_agents
+
+COPY 10_async/10_temporal/120_openai_agents/project /app/120_openai_agents/project
+COPY 10_async/10_temporal/120_openai_agents/tests /app/120_openai_agents/tests
+COPY test_utils /app/test_utils
+
+RUN uv pip install --system .[dev]
+
+ENV PYTHONPATH=/app
+
+ENV AGENT_NAME=at120-openai-agents
+
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
+
+# When we deploy the worker, we will replace the CMD with the following
+# CMD ["python", "-m", "run_worker"]
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents/README.md b/examples/tutorials/10_async/10_temporal/120_openai_agents/README.md
new file mode 100644
index 000000000..4db26d0a1
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/README.md
@@ -0,0 +1,41 @@
+# Temporal OpenAI Agents on the unified harness surface
+
+A Temporal-backed Agentex agent that runs the OpenAI Agents SDK and delivers its
+output through the **unified harness surface**.
+
+## What this demonstrates
+
+LLM calls are non-deterministic, so they can't run directly in a Temporal
+workflow. This tutorial keeps the workflow (`project/workflow.py`)
+deterministic and delegates each turn to a custom activity
+(`project/activities.py`). The activity uses the SAME `OpenAITurn` adapter as
+the sync (`050_openai_agents`) and async (`120_openai_agents`) variants, and
+delivers via `UnifiedEmitter.auto_send_turn` — which is designed to run inside
+an activity (it writes streaming side effects to Redis and returns the final
+text + usage).
+
+```python
+# inside the activity:
+result = Runner.run_streamed(starting_agent=agent, input=user_message)
+turn = OpenAITurn(result=result, model="gpt-4o")
+emitter = UnifiedEmitter(task_id=task_id, trace_id=trace_id, parent_span_id=parent_span_id)
+turn_result = await emitter.auto_send_turn(turn)
+return turn_result.final_text
+```
+
+## Run it
+
+```bash
+agentex agents run --manifest manifest.yaml
+```
+
+This starts both the ACP HTTP server and the Temporal worker.
+
+## Test it
+
+The offline test exercises the activity's delivery path with an injected fake
+streaming backend (no server, Temporal, Redis, or API key required):
+
+```bash
+pytest tests/test_agent.py -v
+```
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/environments.yaml b/examples/tutorials/10_async/10_temporal/120_openai_agents/environments.yaml
similarity index 100%
rename from examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/environments.yaml
rename to examples/tutorials/10_async/10_temporal/120_openai_agents/environments.yaml
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents/manifest.yaml b/examples/tutorials/10_async/10_temporal/120_openai_agents/manifest.yaml
new file mode 100644
index 000000000..4b59db442
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/manifest.yaml
@@ -0,0 +1,62 @@
+build:
+  context:
+    root: ../../../
+    include_paths:
+      - 10_async/10_temporal/120_openai_agents
+      - test_utils
+    dockerfile: 10_async/10_temporal/120_openai_agents/Dockerfile
+    dockerignore: 10_async/10_temporal/120_openai_agents/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+    worker: project/run_worker.py
+
+agent:
+  acp_type: async
+  name: at120-openai-agents
+  description: A Temporal-backed OpenAI Agents SDK agent on the unified harness surface
+
+  temporal:
+    enabled: true
+    workflows:
+      - name: at120-openai-agents
+        queue_name: at120_openai_agents_queue
+
+  credentials:
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+    - env_var_name: OPENAI_API_KEY
+      secret_name: openai-api-key
+      secret_key: api-key
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "at120-openai-agents"
+      description: "A Temporal-backed OpenAI Agents SDK agent on the unified harness surface"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents/project/__init__.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents/project/acp.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/acp.py
new file mode 100644
index 000000000..6076835ba
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/acp.py
@@ -0,0 +1,33 @@
+"""ACP server for the Temporal OpenAI Agents harness tutorial.
+
+Thin by design: with ``acp_type="async"`` + ``TemporalACPConfig``, FastACP
+auto-wires task/create, task/event/send, and task/cancel onto the workflow.
+The agent logic lives in ``project/workflow.py`` (deterministic) and
+``project/activities.py`` (the harness-backed LLM run), executed by the worker
+in ``project/run_worker.py``.
+"""
+
+from __future__ import annotations
+
+import os
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from agentex.lib.types.fastacp import TemporalACPConfig
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+
+# LiteLLM proxy auth: copy LITELLM_API_KEY to OPENAI_API_KEY for OpenAI client
+# compatibility, so the same example works behind the Scale LiteLLM gateway.
+_litellm_key = os.environ.get("LITELLM_API_KEY")
+if _litellm_key and not os.environ.get("OPENAI_API_KEY"):
+    os.environ["OPENAI_API_KEY"] = _litellm_key
+
+acp = FastACP.create(
+    acp_type="async",
+    config=TemporalACPConfig(
+        type="temporal",
+        temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"),
+    ),
+)
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents/project/activities.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/activities.py
new file mode 100644
index 000000000..72c92d617
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/activities.py
@@ -0,0 +1,80 @@
+"""Custom Temporal activity that runs the OpenAI agent on the harness surface.
+
+LLM calls are non-deterministic, so they must run inside a Temporal activity
+rather than directly in the workflow. This activity runs the OpenAI Agents SDK
+via ``Runner.run_streamed``, wraps the result in an ``OpenAITurn``, and pushes
+the canonical stream to the task stream via ``UnifiedEmitter.auto_send_turn``.
+
+``auto_send`` (which backs ``auto_send_turn``) is explicitly designed to be
+called from inside an activity: it writes streaming side effects to Redis and
+returns the accumulated final text + normalized usage.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+from datetime import datetime
+
+from agents import Runner
+from pydantic import BaseModel
+from temporalio import activity
+
+from project.agent import MODEL_NAME, create_agent
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.lib.adk.providers._modules.openai_turn import OpenAITurn
+
+logger = make_logger(__name__)
+
+RUN_AGENT_ACTIVITY = "run_openai_agent"
+
+
+class RunHarnessAgentParams(BaseModel):
+    """Parameters for the harness agent activity."""
+
+    task_id: str
+    user_message: str
+    # Prior conversation as OpenAI Agents SDK input items, so the agent sees the
+    # full history (not just the latest message) on every turn.
+    input_list: list[Any] = []
+    trace_id: str | None = None
+    parent_span_id: str | None = None
+    # Deterministic turn timestamp from workflow.now(); forwarded to
+    # auto_send_turn so retried activities re-emit messages with stable
+    # timestamps instead of new server-side ones (which could reorder turns).
+    created_at: datetime | None = None
+
+
+class RunHarnessAgentResult(BaseModel):
+    """Result of one harness turn."""
+
+    final_text: str
+    # Updated conversation (prior history + this turn) to carry into the next turn.
+    input_list: list[Any]
+
+
+class HarnessActivities:
+    """Hosts the harness-backed OpenAI agent activity."""
+
+    @activity.defn(name=RUN_AGENT_ACTIVITY)
+    async def run_openai_agent(self, params: RunHarnessAgentParams) -> RunHarnessAgentResult:
+        """Run the agent for one turn and auto-send its output.
+
+        Threads the running conversation through ``input_list`` so multi-turn
+        chats retain memory: prior history + the new user message go in, and the
+        updated conversation comes back out via ``result.to_input_list()``.
+        """
+        logger.info(f"Running harness OpenAI agent for task {params.task_id}")
+
+        agent = create_agent()
+        input_list: list[Any] = [*params.input_list, {"role": "user", "content": params.user_message}]
+        result = Runner.run_streamed(starting_agent=agent, input=input_list)
+        turn = OpenAITurn(result=result, model=MODEL_NAME)
+        emitter = UnifiedEmitter(
+            task_id=params.task_id,
+            trace_id=params.trace_id,
+            parent_span_id=params.parent_span_id,
+        )
+        turn_result = await emitter.auto_send_turn(turn, created_at=params.created_at)
+        # to_input_list() is valid now: auto_send_turn has exhausted the stream.
+        return RunHarnessAgentResult(final_text=turn_result.final_text, input_list=result.to_input_list())
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents/project/agent.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/agent.py
new file mode 100644
index 000000000..385a80b69
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/agent.py
@@ -0,0 +1,44 @@
+"""OpenAI Agents SDK agent definition for the Temporal harness tutorial.
+
+Same agent shape as the sync (060) and async (130) variants. Here the agent is
+built and run inside a Temporal activity (see ``project.activities``); the
+workflow stays deterministic and delegates the non-deterministic LLM run to that
+activity, which delivers the turn via the unified harness surface.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+
+from agents import Agent, function_tool, set_tracing_disabled
+
+from project.tools import get_weather
+
+set_tracing_disabled(True)
+
+MODEL_NAME = "gpt-4o"
+INSTRUCTIONS = """You are a helpful AI assistant with access to tools.
+
+Current date and time: {timestamp}
+
+Guidelines:
+- Be concise and helpful
+- Use the weather tool when the user asks about the weather
+- Always report the real tool output back to the user
+"""
+
+
+@function_tool
+def weather(city: str) -> str:
+    """Get the current weather for a city."""
+    return get_weather(city)
+
+
+def create_agent() -> Agent:
+    """Build and return the OpenAI Agents SDK agent with the weather tool."""
+    return Agent(
+        name="Harness OpenAI Assistant",
+        model=MODEL_NAME,
+        instructions=INSTRUCTIONS.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
+        tools=[weather],
+    )
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents/project/run_worker.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/run_worker.py
new file mode 100644
index 000000000..b82ee0f50
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/run_worker.py
@@ -0,0 +1,44 @@
+"""Temporal worker for the OpenAI Agents harness tutorial.
+
+Runs as a separate long-lived process alongside the ACP HTTP server. Registers
+the built-in Agentex activities plus the custom harness agent activity
+(``HarnessActivities.run_openai_agent``), and the workflow.
+"""
+
+import asyncio
+
+from project.workflow import At140HarnessOpenaiWorkflow
+from project.activities import HarnessActivities
+from agentex.lib.utils.debug import setup_debug_if_enabled
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.environment_variables import EnvironmentVariables
+from agentex.lib.core.temporal.activities import get_all_activities
+from agentex.lib.core.temporal.workers.worker import AgentexWorker
+
+environment_variables = EnvironmentVariables.refresh()
+logger = make_logger(__name__)
+
+
+async def main():
+    setup_debug_if_enabled()
+
+    task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE
+    if task_queue_name is None:
+        raise ValueError("WORKFLOW_TASK_QUEUE is not set")
+
+    harness_activities = HarnessActivities()
+    all_activities = [
+        harness_activities.run_openai_agent,
+        *get_all_activities(),
+    ]
+
+    worker = AgentexWorker(task_queue=task_queue_name)
+
+    await worker.run(
+        activities=all_activities,
+        workflow=At140HarnessOpenaiWorkflow,
+    )
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents/project/tools.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/tools.py
new file mode 100644
index 000000000..d26f9b097
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/tools.py
@@ -0,0 +1,15 @@
+"""Tool definitions for the Temporal OpenAI Agents harness tutorial."""
+
+from __future__ import annotations
+
+
+def get_weather(city: str) -> str:
+    """Get the current weather for a city.
+
+    Args:
+        city: The name of the city to get weather for.
+
+    Returns:
+        A string describing the weather conditions.
+    """
+    return f"The weather in {city} is sunny and 72°F"
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents/project/workflow.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/workflow.py
new file mode 100644
index 000000000..5cb8fb38b
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/workflow.py
@@ -0,0 +1,124 @@
+"""Temporal workflow for the OpenAI Agents harness tutorial.
+
+The workflow stays deterministic: it echoes the user message and delegates the
+non-deterministic LLM run to ``run_openai_agent`` (see
+``project.activities``). That activity runs the OpenAI Agents SDK and delivers
+the turn through the unified harness surface (``OpenAITurn`` +
+``UnifiedEmitter.auto_send_turn``).
+"""
+
+from __future__ import annotations
+
+import os
+import json
+from datetime import timedelta
+
+from temporalio import workflow
+from temporalio.common import RetryPolicy
+
+from agentex.lib import adk
+from project.activities import (
+    RUN_AGENT_ACTIVITY,
+    RunHarnessAgentParams,
+    RunHarnessAgentResult,
+)
+from agentex.lib.types.acp import SendEventParams, CreateTaskParams
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.types.text_content import TextContent
+from agentex.lib.environment_variables import EnvironmentVariables
+from agentex.lib.core.temporal.types.workflow import SignalName
+from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+environment_variables = EnvironmentVariables.refresh()
+
+if environment_variables.WORKFLOW_NAME is None:
+    raise ValueError("Environment variable WORKFLOW_NAME is not set")
+if environment_variables.AGENT_NAME is None:
+    raise ValueError("Environment variable AGENT_NAME is not set")
+
+logger = make_logger(__name__)
+
+
+@workflow.defn(name=environment_variables.WORKFLOW_NAME)
+class At140HarnessOpenaiWorkflow(BaseWorkflow):
+    """Long-running workflow that runs each turn through the harness activity."""
+
+    def __init__(self):
+        super().__init__(display_name=environment_variables.AGENT_NAME)
+        self._complete_task = False
+        self._turn_number = 0
+        # Running conversation (OpenAI Agents SDK input items) so each turn sees
+        # the full history, not just the latest user message.
+        self._messages: list = []
+
+    @workflow.signal(name=SignalName.RECEIVE_EVENT)
+    async def on_task_event_send(self, params: SendEventParams) -> None:
+        """Handle a user message: echo it, then run the harness activity durably."""
+        logger.info(f"Received task event: {params.task.id}")
+        self._turn_number += 1
+
+        # Echo the user's message so it shows up in the UI as a chat bubble.
+        await adk.messages.create(task_id=params.task.id, content=params.event.content)
+
+        async with adk.tracing.span(
+            trace_id=params.task.id,
+            task_id=params.task.id,
+            name=f"Turn {self._turn_number}",
+            input={"message": params.event.content.content},
+        ) as span:
+            turn_result = await workflow.execute_activity(
+                RUN_AGENT_ACTIVITY,
+                RunHarnessAgentParams(
+                    task_id=params.task.id,
+                    user_message=params.event.content.content,
+                    input_list=self._messages,
+                    trace_id=params.task.id,
+                    parent_span_id=span.id if span else None,
+                    # Deterministic timestamp under replay so a retried activity
+                    # re-emits this turn's messages with stable ordering.
+                    created_at=workflow.now(),
+                ),
+                start_to_close_timeout=timedelta(minutes=5),
+                retry_policy=RetryPolicy(maximum_attempts=3),
+                result_type=RunHarnessAgentResult,
+            )
+            # Carry the updated conversation into the next turn.
+            self._messages = turn_result.input_list
+            if span:
+                span.output = {"final_output": turn_result.final_text}
+
+    @workflow.run
+    async def on_task_create(self, params: CreateTaskParams) -> str:
+        """Workflow entry point — keep the conversation alive for incoming signals."""
+        logger.info(f"Task created: {params.task.id}")
+
+        await adk.messages.create(
+            task_id=params.task.id,
+            content=TextContent(
+                author="agent",
+                content=(
+                    f"Task initialized with params:\n{json.dumps(params.params, indent=2)}\n"
+                    f"Send me a message and I'll respond using an OpenAI Agents SDK agent "
+                    f"delivered through the unified harness surface."
+                ),
+            ),
+        )
+
+        await workflow.wait_condition(lambda: self._complete_task, timeout=None)
+        return "Task completed"
+
+    @workflow.signal
+    async def complete_task_signal(self) -> None:
+        """Graceful workflow shutdown signal."""
+        logger.info("Received complete_task signal")
+        self._complete_task = True
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/pyproject.toml b/examples/tutorials/10_async/10_temporal/120_openai_agents/pyproject.toml
similarity index 72%
rename from examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/pyproject.toml
rename to examples/tutorials/10_async/10_temporal/120_openai_agents/pyproject.toml
index 696894e32..e6c77fae3 100644
--- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/pyproject.toml
+++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/pyproject.toml
@@ -3,21 +3,23 @@ requires = ["hatchling"]
 build-backend = "hatchling.build"
 
 [project]
-name = "at120_openai_agents_local_sandbox"
+name = "at120-openai-agents"
 version = "0.1.0"
-description = "A Temporal OpenAI Agents SDK agent using a local (unix_local) sandbox"
+description = "A Temporal-backed OpenAI Agents SDK agent on the unified harness surface"
+readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
-    "agentex-sdk>=0.6.0",
-    "openai-agents>=0.14.3,<0.15",
-    "temporalio>=1.18.2",
+    "agentex-sdk",
     "scale-gp",
+    "temporalio>=1.18.2",
+    "openai-agents",
 ]
 
 [project.optional-dependencies]
 dev = [
     "pytest",
     "pytest-asyncio",
+    "httpx",
     "black",
     "isort",
     "flake8",
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/tests/test_agent.py
new file mode 100644
index 000000000..dd043c44c
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/tests/test_agent.py
@@ -0,0 +1,77 @@
+"""Offline test for the Temporal OpenAI Agents harness tutorial.
+
+This test does NOT require a running Agentex server, Temporal, Redis, or an
+OpenAI API key. It verifies the delivery path the harness activity uses: an
+``OpenAITurn`` built from an injected canonical stream, pushed through
+``UnifiedEmitter.auto_send_turn`` with an injected fake streaming backend,
+returns the accumulated final text (which the activity returns to the workflow).
+
+To run: ``pytest tests/test_agent.py -v``
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agentex.types.task_message import TaskMessage
+from agentex.types.text_content import TextContent
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.lib.adk.providers._modules.openai_turn import OpenAITurn
+
+
+class _FakeCtx:
+    def __init__(self, initial_content):
+        self.task_message = TaskMessage(id="m-1", task_id="task-1", content=initial_content)
+
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, *a):
+        await self.close()
+        return False
+
+    async def close(self):
+        pass
+
+    async def stream_update(self, update):
+        return update
+
+
+class _FakeStreaming:
+    def streaming_task_message_context(self, task_id, initial_content, **_kwargs):  # noqa: ARG002
+        return _FakeCtx(initial_content)
+
+
+async def _canonical_stream(events):
+    for e in events:
+        yield e
+
+
+@pytest.mark.asyncio
+async def test_activity_delivery_returns_final_text():
+    events = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="")),
+        StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="72")),
+        StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="F")),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    turn = OpenAITurn(stream=_canonical_stream(events), model="gpt-4o")
+    emitter = UnifiedEmitter(
+        task_id="task-1",
+        trace_id=None,
+        parent_span_id=None,
+        streaming=_FakeStreaming(),
+    )
+
+    result = await emitter.auto_send_turn(turn)
+    assert result.final_text == "72F"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/Dockerfile b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/Dockerfile
deleted file mode 100644
index d4927d0ce..000000000
--- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/Dockerfile
+++ /dev/null
@@ -1,62 +0,0 @@
-# syntax=docker/dockerfile:1.3
-FROM python:3.12-slim
-COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
-
-# Install system dependencies
-RUN apt-get update && apt-get install -y \
-    htop \
-    vim \
-    curl \
-    tar \
-    python3-dev \
-    postgresql-client \
-    build-essential \
-    libpq-dev \
-    gcc \
-    cmake \
-    netcat-openbsd \
-    nodejs \
-    npm \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/**
-
-# Install tctl (Temporal CLI)
-RUN curl -L https://github.com/temporalio/tctl/releases/download/v1.18.1/tctl_1.18.1_linux_arm64.tar.gz -o /tmp/tctl.tar.gz && \
-    tar -xzf /tmp/tctl.tar.gz -C /usr/local/bin && \
-    chmod +x /usr/local/bin/tctl && \
-    rm /tmp/tctl.tar.gz
-
-RUN uv pip install --system --upgrade pip setuptools wheel
-
-ENV UV_HTTP_TIMEOUT=1000
-
-# Copy pyproject.toml and README.md to install dependencies
-COPY 10_async/10_temporal/120_openai_agents_local_sandbox/pyproject.toml /app/120_openai_agents_local_sandbox/pyproject.toml
-COPY 10_async/10_temporal/120_openai_agents_local_sandbox/README.md /app/120_openai_agents_local_sandbox/README.md
-
-WORKDIR /app/120_openai_agents_local_sandbox
-
-# Copy the project code
-COPY 10_async/10_temporal/120_openai_agents_local_sandbox/project /app/120_openai_agents_local_sandbox/project
-
-# Copy the test files
-COPY 10_async/10_temporal/120_openai_agents_local_sandbox/tests /app/120_openai_agents_local_sandbox/tests
-
-# Copy shared test utilities
-COPY test_utils /app/test_utils
-
-# Install the required Python packages with dev dependencies
-RUN uv pip install --system .[dev]
-
-WORKDIR /app/120_openai_agents_local_sandbox
-
-ENV PYTHONPATH=/app
-
-# Set test environment variables
-ENV AGENT_NAME=at120-openai-agents-local-sandbox
-
-# Run the ACP server using uvicorn
-CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
-
-# When we deploy the worker, we will replace the CMD with the following
-# CMD ["python", "-m", "run_worker"]
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/README.md b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/README.md
deleted file mode 100644
index 161bc43da..000000000
--- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/README.md
+++ /dev/null
@@ -1,130 +0,0 @@
-# Tutorial 120: Temporal OpenAI Agents SDK with a Local Sandbox
-
-This tutorial demonstrates running an [OpenAI Agents SDK](https://developers.openai.com/api/docs/guides/agents)
-`SandboxAgent` inside a **Temporal** workflow, backed by the **local**
-(`unix_local`) sandbox.
-
-The agent is a "local sandbox assistant": it answers questions by actually running
-real shell commands (e.g. `python3 --version`, `ls`, `python3 -c "..."`) instead of
-guessing. Because it runs inside Temporal, the sandbox tool calls become durable,
-retried, and observable activities.
-
-This mirrors the canonical OpenAI Agents SDK Temporal example
-(`060_open_ai_agents_sdk_hello_world`) and the tools example
-(`070_open_ai_agents_sdk_tools`). The new piece is the **Temporal sandbox bridge**.
-
-## Key Concepts
-
-### Temporal ACP
-The Temporal ACP model (`acp_type: async`, `temporal.enabled: true`) maps task
-lifecycle to a Temporal workflow:
-- `@workflow.run` (`on_task_create`) keeps the conversation alive.
-- `@workflow.signal(name=SignalName.RECEIVE_EVENT)` (`on_task_event_send`) handles
-  each user message.
-
-No ACP handlers are registered by hand — the `TemporalACPConfig` wires them to the
-workflow automatically.
-
-### Streaming (Interceptor + Model Provider + Hooks)
-Real-time streaming uses STANDARD Temporal components — no forked plugin:
-- **`ContextInterceptor`** threads `task_id` through activity headers. The workflow
-  sets `self._task_id` so the interceptor can read it.
-- **`TemporalStreamingModelProvider`** returns a model that streams tokens to Redis
-  in real time while still returning the complete response to Temporal for
-  determinism / replay safety.
-- **`TemporalStreamingHooks`** creates the lifecycle messages (tool request /
-  response, etc.) in the database.
-
-The `stream_lifecycle_content` activity must be registered on the worker alongside
-`get_all_activities()`.
-
-### The Temporal sandbox bridge (`UnixLocalSandboxClient`)
-The sandbox client is registered ON THE WORKER (and the ACP) via the standard
-plugin:
-
-```python
-from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClient
-from temporalio.contrib.openai_agents import OpenAIAgentsPlugin, SandboxClientProvider
-
-OpenAIAgentsPlugin(
-    model_provider=TemporalStreamingModelProvider(),
-    sandbox_clients=[SandboxClientProvider("local", UnixLocalSandboxClient())],
-)
-```
-
-Inside the workflow, the run is pointed at that backend by name:
-
-```python
-from temporalio.contrib.openai_agents.workflow import temporal_sandbox_client
-from agents.sandbox import SandboxAgent, SandboxRunConfig
-from agents.run_config import RunConfig
-from agents.sandbox.snapshot import NoopSnapshotSpec
-from agents.sandbox.capabilities import Shell
-from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClientOptions
-
-agent = SandboxAgent(
-    name="Local Sandbox Assistant",
-    model="gpt-4o-mini",
-    instructions="...use the shell tools to actually run commands...",
-    capabilities=[Shell()],
-)
-run_config = RunConfig(
-    sandbox=SandboxRunConfig(
-        client=temporal_sandbox_client("local"),
-        options=UnixLocalSandboxClientOptions(),
-        snapshot=NoopSnapshotSpec(),  # skip the per-turn workspace snapshot
-    )
-)
-result = await Runner.run(
-    agent, self._state.input_list, run_config=run_config,
-    hooks=TemporalStreamingHooks(task_id=params.task.id),
-)
-```
-
-`temporal_sandbox_client("local")` resolves the worker-registered client, so the
-sandbox shell tool calls run as Temporal activities (durable + observable in the
-Temporal UI).
-
-## Two important lessons
-
-1. **Don't double-post the assistant message.** The `TemporalStreamingModelProvider`
-   already streams AND persists the assistant's response. If you also call
-   `adk.messages.create(...)` after `Runner.run`, the answer shows up twice. We only
-   persist conversation state for the next turn via `result.to_input_list()`.
-2. **Use `NoopSnapshotSpec()`.** Without it, the sandbox tries to take a per-turn
-   workspace snapshot, and stopping the sandbox can raise
-   `WorkspaceArchiveReadError`. `NoopSnapshotSpec()` skips that snapshot.
-
-## Files
-
-| File | Description |
-|------|-------------|
-| `project/acp.py` | Temporal ACP server (plugin + sandbox client + interceptor) |
-| `project/run_worker.py` | Temporal worker (registers workflow, activities, plugin, sandbox client) |
-| `project/workflow.py` | `BaseWorkflow` that runs the `SandboxAgent` against the local sandbox |
-| `tests/test_agent.py` | Integration tests (polling pattern) |
-| `manifest.yaml` | Agent configuration (temporal enabled) |
-| `environments.yaml` | Per-environment deployment overrides |
-
-## Running Locally
-
-```bash
-# From this directory
-agentex agents run
-```
-
-Set `OPENAI_API_KEY` (or `LITELLM_API_KEY` if you're behind the Scale LiteLLM
-gateway) in your environment or in a `.env` file in `project/` so the agent can call
-the model.
-
-## Running Tests
-
-```bash
-pytest tests/test_agent.py -v
-```
-
-## Further Reading
-
-- OpenAI Agents SDK guide: https://developers.openai.com/api/docs/guides/agents
-- The async (non-Temporal) variant: `10_async/00_base/120_openai_agents_local_sandbox`
-- The canonical OpenAI Agents SDK Temporal example: `10_async/10_temporal/060_open_ai_agents_sdk_hello_world`
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/manifest.yaml b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/manifest.yaml
deleted file mode 100644
index 86ac89288..000000000
--- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/manifest.yaml
+++ /dev/null
@@ -1,111 +0,0 @@
-# Agent Manifest Configuration
-# ---------------------------
-# This file defines how your agent should be built and deployed.
-
-# Build Configuration
-# ------------------
-build:
-  context:
-    # Root directory for the build context
-    root: ../../../  # Up to tutorials level to include test_utils
-
-    # Paths to include in the Docker build context
-    include_paths:
-      - 10_async/10_temporal/120_openai_agents_local_sandbox
-      - test_utils
-
-    # Path to your agent's Dockerfile (relative to the root directory)
-    dockerfile: 10_async/10_temporal/120_openai_agents_local_sandbox/Dockerfile
-
-    # Path to your agent's .dockerignore
-    dockerignore: 10_async/10_temporal/120_openai_agents_local_sandbox/.dockerignore
-
-
-# Local Development Configuration
-# -----------------------------
-local_development:
-  agent:
-    port: 8000  # Port where your local ACP server is running
-    host_address: host.docker.internal  # Host address for Docker networking
-
-  # File paths for local development (relative to this manifest.yaml)
-  paths:
-    # Path to ACP server file
-    acp: project/acp.py
-    # Path to temporal worker file
-    worker: project/run_worker.py
-
-
-# Agent Configuration
-# -----------------
-agent:
-  # Type of agent - either sync or async
-  acp_type: async
-
-  # Unique name for your agent
-  name: at120-openai-agents-local-sandbox
-
-  # Description of what your agent does
-  description: A Temporal OpenAI Agents SDK agent using a local (unix_local) sandbox
-
-  # Temporal workflow configuration
-  temporal:
-    enabled: true
-    workflows:
-      # Name of the workflow class (must match the @workflow.defn name in workflow.py)
-      - name: at120-openai-agents-local-sandbox
-
-        # Queue name for task distribution
-        queue_name: at120_openai_agents_local_sandbox_queue
-
-  # Credentials mapping (maps Kubernetes secrets to environment variables)
-  credentials:
-    - env_var_name: OPENAI_API_KEY
-      secret_name: openai-api-key
-      secret_key: api-key
-    - env_var_name: REDIS_URL
-      secret_name: redis-url-secret
-      secret_key: url
-    - env_var_name: SGP_API_KEY
-      secret_name: sgp-api-key
-      secret_key: api-key
-    - env_var_name: SGP_ACCOUNT_ID
-      secret_name: sgp-account-id
-      secret_key: account-id
-    - env_var_name: SGP_CLIENT_BASE_URL
-      secret_name: sgp-client-base-url
-      secret_key: url
-
-  # Environment variables for running locally and for deployment
-  env:
-    OPENAI_AGENTS_DISABLE_TRACING: "1"
-
-
-# Deployment Configuration
-# -----------------------
-deployment:
-  # Container image configuration
-  image:
-    repository: "" # Update with your container registry
-    tag: "latest"  # Default tag, should be versioned in production
-
-  imagePullSecrets:
-    - name: my-registry-secret  # Update with your image pull secret name
-
-  # Global deployment settings that apply to all clusters
-  global:
-    agent:
-      name: "at120-openai-agents-local-sandbox"
-      description: "A Temporal OpenAI Agents SDK agent using a local (unix_local) sandbox"
-
-    # Default replica count
-    replicaCount: 1
-
-    # Default resource requirements
-    resources:
-      requests:
-        cpu: "500m"
-        memory: "1Gi"
-      limits:
-        cpu: "1000m"
-        memory: "2Gi"
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/acp.py b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/acp.py
deleted file mode 100644
index 196e1e7cd..000000000
--- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/acp.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import os
-import sys
-
-from temporalio.contrib.openai_agents import (
-    OpenAIAgentsPlugin,
-    SandboxClientProvider,
-)
-from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClient
-
-# === DEBUG SETUP (AgentEx CLI Debug Support) ===
-if os.getenv("AGENTEX_DEBUG_ENABLED") == "true":
-    try:
-        import debugpy
-        debug_port = int(os.getenv("AGENTEX_DEBUG_PORT", "5679"))
-        debug_type = os.getenv("AGENTEX_DEBUG_TYPE", "acp")
-        wait_for_attach = os.getenv("AGENTEX_DEBUG_WAIT_FOR_ATTACH", "false").lower() == "true"
-
-        # Configure debugpy
-        debugpy.configure(subProcess=False)
-        debugpy.listen(debug_port)
-
-        print(f"🐛 [{debug_type.upper()}] Debug server listening on port {debug_port}")
-
-        if wait_for_attach:
-            print(f"⏳ [{debug_type.upper()}] Waiting for debugger to attach...")
-            debugpy.wait_for_client()
-            print(f"✅ [{debug_type.upper()}] Debugger attached!")
-        else:
-            print(f"📡 [{debug_type.upper()}] Ready for debugger attachment")
-
-    except ImportError:
-        print("❌ debugpy not available. Install with: pip install debugpy")
-        sys.exit(1)
-    except Exception as e:
-        print(f"❌ Debug setup failed: {e}")
-        sys.exit(1)
-# === END DEBUG SETUP ===
-
-from agentex.lib.types.fastacp import TemporalACPConfig
-from agentex.lib.sdk.fastacp.fastacp import FastACP
-from agentex.lib.core.temporal.plugins.openai_agents.models.temporal_streaming_model import (
-    TemporalStreamingModelProvider,
-)
-from agentex.lib.core.temporal.plugins.openai_agents.interceptors.context_interceptor import (
-    ContextInterceptor,
-)
-
-context_interceptor = ContextInterceptor()
-temporal_streaming_model_provider = TemporalStreamingModelProvider()
-
-# Create the ACP server. We register the STANDARD OpenAIAgentsPlugin with:
-#   - the streaming model provider (real-time token streaming + persistence)
-#   - the LOCAL sandbox backend, registered under the name "local" so the
-#     workflow can resolve it via ``temporal_sandbox_client("local")``
-# plus the ContextInterceptor that threads task_id through activity headers.
-acp = FastACP.create(
-    acp_type="async",
-    config=TemporalACPConfig(
-        # When deployed to the cluster, the Temporal address is set automatically.
-        # For local development, we set the address manually to talk to the local
-        # Temporal service set up via docker compose.
-        type="temporal",
-        temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"),
-        plugins=[
-            OpenAIAgentsPlugin(
-                model_provider=temporal_streaming_model_provider,
-                sandbox_clients=[
-                    SandboxClientProvider("local", UnixLocalSandboxClient()),
-                ],
-            )
-        ],
-        interceptors=[context_interceptor],
-    ),
-)
-
-
-# Notice that we don't need to register any handlers when we use type="temporal".
-# These handlers are automatically registered when the ACP is created:
-#
-# @acp.on_task_create        -> the workflow method decorated with @workflow.run
-# @acp.on_task_event_send    -> the workflow method decorated with
-#                               @workflow.signal(name=SignalName.RECEIVE_EVENT)
-# @acp.on_task_cancel        -> handled by the temporal client (cancels the workflow)
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/run_worker.py b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/run_worker.py
deleted file mode 100644
index a2b7bdf6b..000000000
--- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/run_worker.py
+++ /dev/null
@@ -1,80 +0,0 @@
-import asyncio
-
-from temporalio.contrib.openai_agents import (
-    OpenAIAgentsPlugin,
-    SandboxClientProvider,
-)
-from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClient
-
-from project.workflow import At120OpenaiAgentsLocalSandboxWorkflow
-from agentex.lib.utils.debug import setup_debug_if_enabled
-from agentex.lib.utils.logging import make_logger
-from agentex.lib.environment_variables import EnvironmentVariables
-from agentex.lib.core.temporal.activities import get_all_activities
-from agentex.lib.core.temporal.workers.worker import AgentexWorker
-from agentex.lib.core.temporal.plugins.openai_agents.hooks.activities import (
-    stream_lifecycle_content,
-)
-from agentex.lib.core.temporal.plugins.openai_agents.models.temporal_streaming_model import (
-    TemporalStreamingModelProvider,
-)
-from agentex.lib.core.temporal.plugins.openai_agents.interceptors.context_interceptor import (
-    ContextInterceptor,
-)
-
-environment_variables = EnvironmentVariables.refresh()
-
-logger = make_logger(__name__)
-
-
-async def main():
-    # Setup debug mode if enabled
-    setup_debug_if_enabled()
-
-    task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE
-    if task_queue_name is None:
-        raise ValueError("WORKFLOW_TASK_QUEUE is not set")
-
-    # Register activities. ``stream_lifecycle_content`` powers the streaming
-    # lifecycle hooks; the rest are the standard AgentEx activities.
-    all_activities = get_all_activities() + [stream_lifecycle_content]
-
-    # ============================================================================
-    # STREAMING + SANDBOX SETUP
-    # ============================================================================
-    # 1. ContextInterceptor threads task_id through activity headers so the
-    #    streaming model + hooks know which task to stream/persist to.
-    # 2. TemporalStreamingModelProvider returns a model that streams tokens to
-    #    Redis in real time while still returning the complete response to
-    #    Temporal for determinism / replay safety.
-    # 3. SandboxClientProvider registers the LOCAL sandbox backend
-    #    (UnixLocalSandboxClient) under the name "local". The workflow resolves
-    #    it at run time via ``temporal_sandbox_client("local")``, so the sandbox
-    #    tool calls run as durable Temporal activities.
-    #
-    # We use the STANDARD temporalio.contrib.openai_agents.OpenAIAgentsPlugin —
-    # no forked plugin needed.
-    context_interceptor = ContextInterceptor()
-    temporal_streaming_model_provider = TemporalStreamingModelProvider()
-
-    worker = AgentexWorker(
-        task_queue=task_queue_name,
-        plugins=[
-            OpenAIAgentsPlugin(
-                model_provider=temporal_streaming_model_provider,
-                sandbox_clients=[
-                    SandboxClientProvider("local", UnixLocalSandboxClient()),
-                ],
-            )
-        ],
-        interceptors=[context_interceptor],
-    )
-
-    await worker.run(
-        activities=all_activities,
-        workflow=At120OpenaiAgentsLocalSandboxWorkflow,
-    )
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/workflow.py b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/workflow.py
deleted file mode 100644
index 45b61b04e..000000000
--- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/workflow.py
+++ /dev/null
@@ -1,213 +0,0 @@
-"""OpenAI Agents SDK + Temporal: Local Sandbox Tutorial
-
-This tutorial demonstrates running an OpenAI Agents SDK ``SandboxAgent`` inside a
-Temporal workflow, backed by the **local** (``unix_local``) sandbox. The agent is
-a "local sandbox assistant": it answers questions by actually running real shell
-commands (e.g. ``python3 --version``, ``ls``, ``python3 -c "..."``) instead of
-guessing.
-
-KEY CONCEPTS DEMONSTRATED:
-- A ``SandboxAgent`` granted the ``Shell`` capability inside a durable Temporal
-  workflow.
-- The Temporal sandbox bridge: ``temporal_sandbox_client("local")`` resolves to
-  the ``UnixLocalSandboxClient`` registered on the worker via
-  ``SandboxClientProvider`` (see ``run_worker.py`` / ``acp.py``). The sandbox tool
-  calls run as Temporal activities, so they are durable, retried, and observable.
-- Real-time streaming + persistence via ``TemporalStreamingModelProvider`` +
-  ``ContextInterceptor`` (configured on the worker) and ``TemporalStreamingHooks``.
-
-IMPORTANT LESSONS (applied below):
-  (a) Do NOT post the assistant message yourself with ``adk.messages.create``
-      after ``Runner.run``. The ``TemporalStreamingModelProvider`` already streams
-      and persists the assistant's response — posting it again would duplicate the
-      answer in the UI. We only persist conversation state for the next turn via
-      ``result.to_input_list()``.
-  (b) Use ``NoopSnapshotSpec()`` so the per-turn workspace snapshot is skipped.
-      Without it, stopping the sandbox can raise ``WorkspaceArchiveReadError``.
-"""
-
-from __future__ import annotations
-
-import os
-import json
-
-from agents import Runner
-from temporalio import workflow
-
-from agentex.lib import adk
-from agentex.lib.types.acp import SendEventParams, CreateTaskParams
-from agentex.lib.types.tracing import SGPTracingProcessorConfig
-from agentex.lib.utils.logging import make_logger
-from agentex.types.text_content import TextContent
-from agentex.lib.utils.model_utils import BaseModel
-from agentex.lib.environment_variables import EnvironmentVariables
-from agentex.lib.core.temporal.types.workflow import SignalName
-from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow
-from agentex.lib.core.tracing.tracing_processor_manager import (
-    add_tracing_processor_config,
-)
-from agentex.lib.core.temporal.plugins.openai_agents.hooks.hooks import (
-    TemporalStreamingHooks,
-)
-
-# OpenAI Agents SDK sandbox imports. These are safe to import at workflow module
-# load time; the actual sandbox client is resolved at run time via
-# ``temporal_sandbox_client`` (which maps to the worker-registered backend).
-with workflow.unsafe.imports_passed_through():
-    from agents.sandbox import SandboxAgent, SandboxRunConfig
-    from agents.run_config import RunConfig
-    from agents.sandbox.snapshot import NoopSnapshotSpec
-    from agents.sandbox.capabilities import Shell
-    from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClientOptions
-    from temporalio.contrib.openai_agents.workflow import temporal_sandbox_client
-
-# Configure tracing processor (optional - only if you have SGP credentials)
-add_tracing_processor_config(
-    SGPTracingProcessorConfig(
-        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
-        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
-    )
-)
-
-environment_variables = EnvironmentVariables.refresh()
-
-if environment_variables.WORKFLOW_NAME is None:
-    raise ValueError("Environment variable WORKFLOW_NAME is not set")
-
-if environment_variables.AGENT_NAME is None:
-    raise ValueError("Environment variable AGENT_NAME is not set")
-
-logger = make_logger(__name__)
-
-MODEL_NAME = "gpt-4o-mini"
-INSTRUCTIONS = """You are a local sandbox assistant.
-
-You have access to shell tools that run real commands on the local machine.
-
-Guidelines:
-- ALWAYS use the shell tools to actually run commands — never guess or make up
-  output. If the user asks for the Python version, run `python3 --version`. If
-  they ask to list files, run `ls`. If they ask you to compute something, use
-  `python3 -c "..."`.
-- Run the minimal command(s) needed to answer the question.
-- Report the real command output back to the user, concisely.
-"""
-
-
-class StateModel(BaseModel):
-    """State model for preserving conversation history across turns."""
-
-    input_list: list = []
-    turn_number: int = 0
-
-
-@workflow.defn(name=environment_variables.WORKFLOW_NAME)
-class At120OpenaiAgentsLocalSandboxWorkflow(BaseWorkflow):
-    """Long-running Temporal workflow that runs a SandboxAgent against the local sandbox."""
-
-    def __init__(self):
-        super().__init__(display_name=environment_variables.AGENT_NAME)
-        self._complete_task = False
-        self._state: StateModel | None = None
-        self._task_id = None
-        self._trace_id = None
-        self._parent_span_id = None
-
-    @workflow.signal(name=SignalName.RECEIVE_EVENT)
-    async def on_task_event_send(self, params: SendEventParams) -> None:
-        logger.info(f"Received task event: {params.task.id}")
-
-        if self._state is None:
-            raise ValueError("State is not initialized")
-
-        self._state.turn_number += 1
-
-        # The ContextInterceptor reads ``self._task_id`` off the workflow
-        # instance and threads it through activity headers so the streaming
-        # model + hooks know which task to stream/persist to.
-        self._task_id = params.task.id
-        self._trace_id = params.task.id
-
-        # Add the user message to conversation history.
-        self._state.input_list.append({"role": "user", "content": params.event.content.content})
-
-        # Echo back the client's message so it shows up in the UI.
-        await adk.messages.create(task_id=params.task.id, content=params.event.content)
-
-        async with adk.tracing.span(
-            trace_id=params.task.id,
-            name=f"Turn {self._state.turn_number}",
-            input=self._state.model_dump(),
-        ) as span:
-            self._parent_span_id = span.id if span else None
-
-            # Build the sandbox agent. The Shell capability becomes real shell
-            # tools backed by the sandbox client resolved at run time.
-            agent = SandboxAgent(
-                name="Local Sandbox Assistant",
-                model=MODEL_NAME,
-                instructions=INSTRUCTIONS,
-                capabilities=[Shell()],
-            )
-
-            # Point the run at the LOCAL sandbox backend registered on the worker
-            # under the name "local". ``temporal_sandbox_client`` resolves that
-            # registration so the sandbox tool calls execute as Temporal
-            # activities (durable + observable).
-            #
-            # IMPORTANT: ``NoopSnapshotSpec()`` skips the per-turn workspace
-            # snapshot — otherwise stopping the sandbox can raise
-            # ``WorkspaceArchiveReadError``.
-            run_config = RunConfig(
-                sandbox=SandboxRunConfig(
-                    client=temporal_sandbox_client("local"),
-                    options=UnixLocalSandboxClientOptions(),
-                    snapshot=NoopSnapshotSpec(),
-                )
-            )
-
-            # TemporalStreamingHooks creates the lifecycle messages (tool
-            # request/response, etc.) and works with the streaming model
-            # provider to stream tokens to the UI in real time.
-            result = await Runner.run(
-                agent,
-                self._state.input_list,
-                run_config=run_config,
-                hooks=TemporalStreamingHooks(task_id=params.task.id),
-                max_turns=10,
-            )
-
-            # IMPORTANT: We do NOT post the assistant message ourselves here.
-            # The TemporalStreamingModelProvider already streamed and persisted
-            # the assistant's response. We only persist conversation state for
-            # the next turn.
-            self._state.input_list = result.to_input_list()
-
-            if span:
-                span.output = self._state.model_dump()
-
-    @workflow.run
-    async def on_task_create(self, params: CreateTaskParams) -> str:
-        logger.info(f"Task created: {params.task.id}")
-
-        self._state = StateModel(input_list=[], turn_number=0)
-
-        await adk.messages.create(
-            task_id=params.task.id,
-            content=TextContent(
-                author="agent",
-                content=(
-                    f"Task initialized with params:\n{json.dumps(params.params, indent=2)}\n"
-                    f"Send me a message and I'll run real shell commands in a local "
-                    f"sandbox (backed by Temporal) to answer."
-                ),
-            ),
-        )
-
-        await workflow.wait_condition(lambda: self._complete_task, timeout=None)
-        return "Task completed"
-
-    @workflow.signal
-    async def complete_task_signal(self) -> None:
-        logger.info("Received complete_task signal")
-        self._complete_task = True
diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/tests/test_agent.py
deleted file mode 100644
index 5e161c061..000000000
--- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/tests/test_agent.py
+++ /dev/null
@@ -1,144 +0,0 @@
-"""Tests for the Temporal OpenAI Agents SDK local-sandbox agent.
-
-This test suite validates that the agent actually runs shell commands in the
-LOCAL sandbox (unix_local backend) via the Temporal sandbox bridge, by polling
-for the agent's response:
-- Ask for the Python version -> response contains "Python 3"
-- Ask it to compute 21 * 2 with python3 -> response contains "42"
-
-To run these tests:
-1. Make sure the agent is running (via docker-compose or `agentex agents run`)
-2. Set the AGENTEX_API_BASE_URL environment variable if not using default
-3. Run: pytest test_agent.py -v
-
-Configuration:
-- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003)
-- AGENT_NAME: Name of the agent to test (default: at120-openai-agents-local-sandbox)
-"""
-
-import os
-import uuid
-
-import pytest
-import pytest_asyncio
-from test_utils.async_utils import (
-    poll_messages,
-    send_event_and_poll_yielding,
-)
-
-from agentex import AsyncAgentex
-from agentex.types.task_message import TaskMessage
-from agentex.types.agent_rpc_params import ParamsCreateTaskRequest
-
-# Configuration from environment variables
-AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
-AGENT_NAME = os.environ.get("AGENT_NAME", "at120-openai-agents-local-sandbox")
-
-
-@pytest_asyncio.fixture
-async def client():
-    """Create an AsyncAgentex client instance for testing."""
-    client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL)
-    yield client
-    await client.close()
-
-
-@pytest.fixture
-def agent_name():
-    """Return the agent name for testing."""
-    return AGENT_NAME
-
-
-@pytest_asyncio.fixture
-async def agent_id(client, agent_name):
-    """Retrieve the agent ID based on the agent name."""
-    agents = await client.agents.list()
-    for agent in agents:
-        if agent.name == agent_name:
-            return agent.id
-    raise ValueError(f"Agent with name {agent_name} not found.")
-
-
-async def _create_task_and_await_welcome(client: AsyncAgentex, agent_id: str) -> str:
-    """Create a task and wait for the workflow's welcome message; return the task id."""
-    task_response = await client.agents.create_task(
-        agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)
-    )
-    task = task_response.result
-    assert task is not None
-
-    welcome_found = False
-    async for message in poll_messages(
-        client=client,
-        task_id=task.id,
-        timeout=30,
-        sleep_interval=1.0,
-    ):
-        assert isinstance(message, TaskMessage)
-        if message.content and message.content.type == "text" and message.content.author == "agent":
-            welcome_found = True
-            break
-    assert welcome_found, "Task creation (welcome) message not found"
-    return task.id
-
-
-async def _send_and_collect_agent_text(
-    client: AsyncAgentex, agent_id: str, task_id: str, user_message: str
-) -> str:
-    """Send a user message and accumulate the streamed agent text into a string."""
-    final_message = None
-    async for message in send_event_and_poll_yielding(
-        client=client,
-        agent_id=agent_id,
-        task_id=task_id,
-        user_message=user_message,
-        timeout=60,
-        sleep_interval=1.0,
-        yield_updates=True,  # Get updates as streaming writes chunks
-    ):
-        if message.content and message.content.type == "text" and message.content.author == "agent":
-            final_message = message
-            if message.streaming_status == "DONE":
-                break
-
-    assert final_message is not None, "Should have received an agent text message"
-    assert final_message.content is not None, "Final message should have content"
-    return final_message.content.content or ""
-
-
-class TestLocalSandboxEvents:
-    """Test the Temporal local-sandbox OpenAI Agents SDK agent."""
-
-    @pytest.mark.asyncio
-    async def test_shell_python_version(self, client: AsyncAgentex, agent_id: str):
-        """The agent should run `python3 --version` in the local sandbox.
-
-        The sandbox runs on Python 3.12, so the real output contains "Python 3".
-        """
-        task_id = await _create_task_and_await_welcome(client, agent_id)
-        text = await _send_and_collect_agent_text(
-            client,
-            agent_id,
-            task_id,
-            "Use your shell to print the Python version on this machine, then "
-            "tell me what it is.",
-        )
-        assert text, "Expected a non-empty response from the sandbox agent."
-        assert "Python 3" in text
-
-    @pytest.mark.asyncio
-    async def test_shell_compute(self, client: AsyncAgentex, agent_id: str):
-        """The agent should use python3 in the sandbox to compute 21 * 2 == 42."""
-        task_id = await _create_task_and_await_welcome(client, agent_id)
-        text = await _send_and_collect_agent_text(
-            client,
-            agent_id,
-            task_id,
-            "Use python3 in your shell to compute 21 * 2 and tell me the result.",
-        )
-        assert text, "Expected a non-empty response from the sandbox agent."
-        assert "42" in text
-
-
-if __name__ == "__main__":
-    pytest.main([__file__, "-v"])
diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/.dockerignore b/examples/tutorials/10_async/10_temporal/130_langgraph/.dockerignore
index c4f7a8b4b..c49489471 100644
--- a/examples/tutorials/10_async/10_temporal/130_langgraph/.dockerignore
+++ b/examples/tutorials/10_async/10_temporal/130_langgraph/.dockerignore
@@ -40,4 +40,4 @@ venv.bak/
 .gitignore
 
 # Misc
-.DS_Store 
\ No newline at end of file
+.DS_Store
diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/.env.example b/examples/tutorials/10_async/10_temporal/130_langgraph/.env.example
deleted file mode 100644
index ab1a5790f..000000000
--- a/examples/tutorials/10_async/10_temporal/130_langgraph/.env.example
+++ /dev/null
@@ -1,13 +0,0 @@
-# at130-langgraph - Environment Variables
-# Copy this file to .env and fill in the values
-
-# API key for your LLM provider
-LITELLM_API_KEY=
-
-# LLM base URL (optional - override to use a different provider)
-# OPENAI_BASE_URL=
-
-# SGP Configuration (optional - for tracing)
-# SGP_API_KEY=
-# SGP_ACCOUNT_ID=
-# SGP_CLIENT_BASE_URL=
\ No newline at end of file
diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/README.md b/examples/tutorials/10_async/10_temporal/130_langgraph/README.md
index 61ccaf66a..0820f56ab 100644
--- a/examples/tutorials/10_async/10_temporal/130_langgraph/README.md
+++ b/examples/tutorials/10_async/10_temporal/130_langgraph/README.md
@@ -1,58 +1,49 @@
-# at130-langgraph — AgentEx Temporal + LangGraph
+# Tutorial: Temporal LangGraph Agent
 
-A minimal Temporal-backed [LangGraph](https://langchain-ai.github.io/langgraph/)
-agent. It uses the official [`temporalio.contrib.langgraph`](https://docs.temporal.io/develop/python/integrations/langgraph)
-plugin so each LangGraph node runs as a durable **Temporal activity** (the LLM
-`agent` node) or inline in the **workflow** (the `tools` node) — set per node
-with `execute_in`. *Temporal is the runtime; LangGraph is the agent framework.*
+This tutorial demonstrates how to build a **Temporal-backed** LangGraph agent on
+AgentEx using the **unified harness surface**. The agent's LLM node runs as a
+durable Temporal activity; the tools node runs inline in the workflow.
 
-> The Temporal LangGraph plugin is currently **experimental**.
+## Key Concepts
 
-## The graph
+### Temporal + LangGraph
 
-```
-START → agent → (tool calls?) → tools → agent
-             → (no tool calls?) → END
-```
-
-- `agent` (`execute_in="activity"`): the LLM call — a retried, observable Temporal activity.
-- `tools` (`execute_in="workflow"`): runs the tool calls inline in the workflow.
+The ``LangGraphPlugin`` from ``temporalio.contrib.langgraph`` turns annotated graph
+nodes into Temporal activities or inline workflow callables:
 
-The router and tools are `async` so LangGraph awaits them directly (a sync
-callable is offloaded via `run_in_executor`, which Temporal workflows forbid).
+- `agent` node: `execute_in="activity"` (durable, retryable LLM call)
+- `tools` node: `execute_in="workflow"` (inline, fast tool execution)
 
-## Project structure
-
-```
-130_langgraph/
-├── project/
-│   ├── acp.py          # Thin async ACP server; registers the LangGraphPlugin
-│   ├── workflow.py     # Runs the graph each turn; keeps multi-turn memory
-│   ├── graph.py        # LangGraph graph; nodes tagged execute_in activity/workflow
-│   └── tools.py        # Async tool(s)
-└── run_worker.py is project/run_worker.py
-```
+### Message surfacing
 
-## Running
+After each turn, ``emit_langgraph_messages`` converts the new LangGraph messages
+(tool requests, tool responses, final text) into AgentEx ``TaskMessage`` objects
+and posts them to the task's message stream.
 
-```bash
-agentex agents run --manifest manifest.yaml
-```
+This is the Temporal-specific path. The non-Temporal async/sync channels use
+``UnifiedEmitter.auto_send_turn`` / ``UnifiedEmitter.yield_turn`` with
+``LangGraphTurn`` instead.
 
-Open the Temporal UI at http://localhost:8080 to watch the workflow and the
-`agent` activity execute. Use `dev.ipynb` to create a task and send messages.
+## Files
 
-## Adding tools
+| File | Description |
+|------|-------------|
+| `project/acp.py` | ACP server (Temporal config, LangGraphPlugin) |
+| `project/graph.py` | LangGraph graph (agent + tools nodes) |
+| `project/workflow.py` | Temporal workflow (signal handlers, emit_langgraph_messages) |
+| `project/run_worker.py` | Temporal worker runner |
+| `project/tools.py` | Tool definitions (weather example) |
+| `tests/test_agent.py` | Integration tests |
+| `manifest.yaml` | Agent configuration (name: at130-langgraph) |
 
-Define an **async** `@tool` in `project/tools.py` and add it to `TOOLS`. The
-model is bound with `TOOLS` and the tool node runs them by name.
+## Running Locally
 
-For a fuller version with human-in-the-loop approval and graph-introspection
-queries, scaffold the `temporal-langgraph` template via `agentex init`.
+```bash
+agentex agents run
+```
 
-## Tests
+## Running Tests
 
-- `tests/test_graph_temporal.py` — hermetic ReAct-loop test with a stub model,
-  plus a live end-to-end run through the real Temporal plugin (skipped unless
-  `LITELLM_API_KEY` is set).
-- `tests/test_agent.py` — live integration against a running agent.
+```bash
+pytest tests/test_agent.py -v
+```
diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/dev.ipynb b/examples/tutorials/10_async/10_temporal/130_langgraph/dev.ipynb
deleted file mode 100644
index 5320daac7..000000000
--- a/examples/tutorials/10_async/10_temporal/130_langgraph/dev.ipynb
+++ /dev/null
@@ -1,126 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "36834357",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from agentex import Agentex\n",
-    "\n",
-    "client = Agentex(base_url=\"http://localhost:5003\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d1c309d6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "AGENT_NAME = \"at130-langgraph\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9f6e6ef0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# (REQUIRED) Create a new task. For Async agents, you must create a task for messages to be associated with.\n",
-    "import uuid\n",
-    "\n",
-    "rpc_response = client.agents.create_task(\n",
-    "    agent_name=AGENT_NAME,\n",
-    "    params={\n",
-    "        \"name\": f\"{str(uuid.uuid4())[:8]}-task\",\n",
-    "        \"params\": {}\n",
-    "    }\n",
-    ")\n",
-    "\n",
-    "task = rpc_response.result\n",
-    "print(task)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b03b0d37",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Send an event to the agent\n",
-    "\n",
-    "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n",
-    "# - TextContent: A message with just text content   \n",
-    "# - DataContent: A message with JSON-serializable data content\n",
-    "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n",
-    "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n",
-    "\n",
-    "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n",
-    "\n",
-    "rpc_response = client.agents.send_event(\n",
-    "    agent_name=AGENT_NAME,\n",
-    "    params={\n",
-    "        \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n",
-    "        \"task_id\": task.id,\n",
-    "    }\n",
-    ")\n",
-    "\n",
-    "event = rpc_response.result\n",
-    "print(event)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a6927cc0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Subscribe to the async task messages produced by the agent\n",
-    "from agentex.lib.utils.dev_tools import subscribe_to_async_task_messages\n",
-    "\n",
-    "task_messages = subscribe_to_async_task_messages(\n",
-    "    client=client,\n",
-    "    task=task, \n",
-    "    only_after_timestamp=event.created_at, \n",
-    "    print_messages=True,\n",
-    "    rich_print=True,\n",
-    "    timeout=5,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4864e354",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
\ No newline at end of file
diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/environments.yaml b/examples/tutorials/10_async/10_temporal/130_langgraph/environments.yaml
deleted file mode 100644
index d54d8e5ff..000000000
--- a/examples/tutorials/10_async/10_temporal/130_langgraph/environments.yaml
+++ /dev/null
@@ -1,64 +0,0 @@
-# Agent Environment Configuration
-# ------------------------------
-# This file defines environment-specific settings for your agent.
-# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment.
-
-# ********** EXAMPLE **********
-# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI
-# environments:
-#   dev:
-#     auth:
-#       principal:
-#         user_id: "1234567890"
-#         user_name: "John Doe"
-#         user_email: "john.doe@example.com"
-#         user_role: "admin"
-#       user_permissions: "read, write, delete"
-#     helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts
-#       replicas: 3
-#       resources:
-#         requests:
-#           cpu: "1000m"
-#           memory: "2Gi"
-#         limits:
-#           cpu: "2000m"
-#           memory: "4Gi"
-#       env:
-#         - name: LOG_LEVEL
-#           value: "DEBUG"
-#         - name: ENVIRONMENT
-#           value: "staging"
-#
-#     kubernetes: 
-#       # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived
-#       #   namespace and deploy it with in the same namespace that already exists for a separate agent.
-#       namespace: "team-at130-langgraph"
-# ********** END EXAMPLE **********
-
-schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI
-environments:
-  dev:
-    auth:
-      principal:
-        user_id: # TODO: Fill in
-        account_id: # TODO: Fill in
-    helm_overrides: 
-      # This is used to override the global helm values.yaml file in the agentex-agent helm charts
-      replicaCount: 2
-      resources:
-        requests:
-          cpu: "500m"
-          memory: "1Gi"
-        limits:
-          cpu: "1000m"
-          memory: "2Gi"
-      temporal-worker:
-        enabled: true
-        replicaCount: 2
-        resources:
-          requests:
-            cpu: "500m"
-            memory: "1Gi"
-          limits:
-            cpu: "1000m"
-            memory: "2Gi"
\ No newline at end of file
diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/manifest.yaml b/examples/tutorials/10_async/10_temporal/130_langgraph/manifest.yaml
index d1f5960b1..534c8dd58 100644
--- a/examples/tutorials/10_async/10_temporal/130_langgraph/manifest.yaml
+++ b/examples/tutorials/10_async/10_temporal/130_langgraph/manifest.yaml
@@ -1,20 +1,5 @@
-# Agent Manifest Configuration
-# ---------------------------
-# This file defines how your agent should be built and deployed.
-
-# Build Configuration
-# ------------------
-# The build config defines what gets packaged into your agent's Docker image.
-# This same configuration is used whether building locally or remotely.
-#
-# When building:
-# 1. All files from include_paths are collected into a build context
-# 2. The context is filtered by dockerignore rules
-# 3. The Dockerfile uses this context to build your agent's image
-# 4. The image is pushed to a registry and used to run your agent
 build:
   context:
-    # Build from the tutorials root so shared test_utils are available.
     root: ../../../
     include_paths:
       - 10_async/10_temporal/130_langgraph
@@ -22,107 +7,53 @@ build:
     dockerfile: 10_async/10_temporal/130_langgraph/Dockerfile
     dockerignore: 10_async/10_temporal/130_langgraph/.dockerignore
 
-
-# Local Development Configuration
-# -----------------------------
-# Only used when running the agent locally
 local_development:
   agent:
-    port: 8000  # Port where your local ACP server is running
-    host_address: host.docker.internal  # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) 
-
-  # File paths for local development (relative to this manifest.yaml)
+    port: 8000
+    host_address: host.docker.internal
   paths:
-    # Path to ACP server file
-    # Examples:
-    #   project/acp.py          (standard)
-    #   src/server.py           (custom structure)
-    #   ../shared/acp.py        (shared across projects)
-    #   /absolute/path/acp.py   (absolute path)
     acp: project/acp.py
-    
-    # Path to temporal worker file
-    # Examples:
-    #   project/run_worker.py   (standard)
-    #   workers/temporal.py     (custom structure)
-    #   ../shared/worker.py     (shared across projects)
     worker: project/run_worker.py
 
-
-# Agent Configuration
-# -----------------
 agent:
-  # Type of agent - either sync or async
   acp_type: async
-
-  # Unique name for your agent
-  # Used for task routing and monitoring
   name: at130-langgraph
+  description: "A Temporal-backed LangGraph agent (harness variant) whose nodes run as Temporal activities"
 
-  # Description of what your agent does
-  # Helps with documentation and discovery
-  description: "A Temporal-backed LangGraph agent whose nodes run as Temporal activities"
-
-  # Temporal workflow configuration
-  # This enables your agent to run as a Temporal workflow for long-running tasks
   temporal:
     enabled: true
     workflows:
-      # Name of the workflow class
-      # Must match the @workflow.defn name in your workflow.py
       - name: at130-langgraph
-
-        # Queue name for task distribution
-        # Used by Temporal to route tasks to your agent
-        # Convention: <agent_name>_task_queue
         queue_name: at130_langgraph_queue
 
-    # Optional: Health check port for temporal worker
-    # Defaults to 80 if not specified
-    # health_check_port: 80
-
-  # Optional: Credentials mapping
-  # Maps Kubernetes secrets to environment variables
-  # Common credentials include:
   credentials:
     - env_var_name: REDIS_URL
       secret_name: redis-url-secret
       secret_key: url
-  #   - env_var_name: LITELLM_API_KEY
-  #     secret_name: litellm-api-key
-  #     secret_key: api-key
-  
-  # Optional: Set Environment variables for running your agent locally as well 
-  # as for deployment later on
-  env: {}
-  #   LITELLM_API_KEY: "<YOUR_LITELLM_API_KEY_HERE>"
-  #   OPENAI_BASE_URL: "<YOUR_OPENAI_BASE_URL_HERE>"
-  #   OPENAI_ORG_ID: "<YOUR_OPENAI_ORG_ID_HERE>"
+    # graph.py builds ChatOpenAI(model=MODEL_NAME); a deployed worker needs the
+    # model credential or the first activity call fails.
+    - env_var_name: OPENAI_API_KEY
+      secret_name: openai-api-key
+      secret_key: api-key
 
+  env: {}
 
-# Deployment Configuration
-# -----------------------
-# Configuration for deploying your agent to Kubernetes clusters
 deployment:
-  # Container image configuration
   image:
-    repository: "" # Update with your container registry
-    tag: "latest"  # Default tag, should be versioned in production
+    repository: ""
+    tag: "latest"
 
-  imagePullSecrets: [] # Update with your image pull secret name
-    # - name: my-registry-secret
+  imagePullSecrets: []
 
-  # Global deployment settings that apply to all clusters
-  # These can be overridden in cluster-specific environments (environments.yaml)
   global:
-    # Default replica count
+    agent:
+      name: "at130-langgraph"
+      description: "A Temporal-backed LangGraph agent (harness variant) whose nodes run as Temporal activities"
     replicaCount: 1
-    
-    # Default resource requirements
     resources:
       requests:
         cpu: "500m"
         memory: "1Gi"
       limits:
         cpu: "1000m"
-        memory: "2Gi" 
\ No newline at end of file
+        memory: "2Gi"
diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/project/acp.py b/examples/tutorials/10_async/10_temporal/130_langgraph/project/acp.py
index c01f8831c..7af9c5e68 100644
--- a/examples/tutorials/10_async/10_temporal/130_langgraph/project/acp.py
+++ b/examples/tutorials/10_async/10_temporal/130_langgraph/project/acp.py
@@ -1,19 +1,13 @@
-"""ACP server for the Temporal LangGraph agent.
+"""ACP server for the Temporal harness LangGraph agent.
 
-This file is intentionally thin. When ``acp_type="async"`` is combined with
-``TemporalACPConfig(type="temporal", ...)``, FastACP auto-wires:
+Follows the ``130_langgraph`` pattern: the Temporal ``LangGraphPlugin`` runs
+graph nodes as Temporal activities. The agent logic lives in ``workflow.py``
+(the runtime) and ``graph.py`` (the LangGraph graph), executed by the Temporal
+worker (``run_worker.py``), not by this HTTP process.
 
-    HTTP task/create       → @workflow.run on the workflow class
-    HTTP task/event/send   → @workflow.signal(SignalName.RECEIVE_EVENT)
-    HTTP task/cancel       → workflow cancellation via the Temporal client
-
-so we don't define any handlers here. The agent logic lives in
-``project/workflow.py`` (the runtime) and ``project/graph.py`` (the LangGraph
-graph whose nodes run as Temporal activities), executed by the Temporal worker
-(``project/run_worker.py``), not by this HTTP process.
-
-The ``LangGraphPlugin`` is registered here too so the Temporal client started
-by FastACP shares the same graph registry as the worker.
+The workflow uses ``emit_langgraph_messages`` to surface turn messages to
+AgentEx. That helper is Temporal-specific and is not replaced by the unified
+harness here (``UnifiedEmitter`` targets the non-Temporal async/sync channels).
 """
 
 from __future__ import annotations
@@ -33,10 +27,8 @@
 acp = FastACP.create(
     acp_type="async",
     config=TemporalACPConfig(
-        # When deployed to the cluster, the Temporal address is set automatically.
-        # Locally we point at the Temporal service from docker compose.
         type="temporal",
         temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"),
         plugins=[LangGraphPlugin(graphs={GRAPH_NAME: build_graph()})],
     ),
-)
\ No newline at end of file
+)
diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/project/graph.py b/examples/tutorials/10_async/10_temporal/130_langgraph/project/graph.py
index 0589aa9ba..7adba3ae4 100644
--- a/examples/tutorials/10_async/10_temporal/130_langgraph/project/graph.py
+++ b/examples/tutorials/10_async/10_temporal/130_langgraph/project/graph.py
@@ -1,24 +1,9 @@
 """LangGraph graph for at130-langgraph — nodes run as Temporal activities.
 
-The ``temporalio.contrib.langgraph`` plugin runs each node where its
-``execute_in`` metadata says: the LLM ``agent`` node as a durable Temporal
-**activity**, the ``tools`` node inline in the **workflow**.
-
-    START → agent → (tool calls?) → tools → agent
-                 → (no tool calls?) → END
-
-The router and tools are ``async`` so LangGraph awaits them directly — a sync
-callable would be offloaded via ``run_in_executor``, which Temporal's workflow
-event loop does not support.
-
-The in-workflow ``tools`` node is a plain ``async`` function rather than
-LangGraph's ``ToolNode`` prebuilt on purpose. The plugin wraps an in-workflow
-node in ``wrap_workflow``, whose closure captures the wrapped object. When that
-object is itself a LangChain ``Runnable`` (as ``ToolNode`` is), LangGraph's
-``compile()`` subgraph detection (``find_subgraph_pregel`` →
-``get_function_nonlocals``) recurses through that wrapper without cycle
-detection and never terminates, tripping Temporal's deadlock detector. A plain
-function isn't a ``Runnable``, so compile stays trivial.
+Identical in structure to ``130_langgraph/project/graph.py``. The graph
+definition is not affected by the harness migration; only the agent naming
+changes. The LLM ``agent`` node runs as a durable Temporal activity;
+the ``tools`` node runs inline in the workflow.
 """
 
 from __future__ import annotations
@@ -40,10 +25,8 @@
 
 from project.tools import TOOLS
 
-# Look up tools by name for the in-workflow tools node.
 _TOOLS_BY_NAME = {tool.name: tool for tool in TOOLS}
 
-# Name this graph is registered under in the LangGraphPlugin (acp.py / run_worker.py).
 GRAPH_NAME = "at130-langgraph"
 MODEL_NAME = "gpt-4o"
 SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools.
@@ -62,37 +45,27 @@ async def agent_node(state: AgentState) -> dict[str, Any]:
     llm = ChatOpenAI(model=MODEL_NAME).bind_tools(TOOLS)
     messages = state["messages"]
     if not messages or not isinstance(messages[0], SystemMessage):
-        system = SystemMessage(
-            content=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
-        )
+        system = SystemMessage(content=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
         messages = [system, *messages]
     return {"messages": [await llm.ainvoke(messages)]}
 
 
 async def tools_node(state: AgentState) -> dict[str, Any]:
-    """Run the tool calls the model requested. Runs inline in the workflow.
-
-    A plain ``async`` function (not LangGraph's ``ToolNode``) — see the module
-    docstring for why a ``Runnable`` tools node can't be compiled here.
-    """
+    """Run the tool calls the model requested. Runs inline in the workflow."""
     last = state["messages"][-1]
     results: list[Any] = []
     for call in getattr(last, "tool_calls", None) or []:
         tool = _TOOLS_BY_NAME.get(call["name"])
-        # Mirror ToolNode: surface an unknown/hallucinated tool name as an error
-        # ToolMessage so the graph keeps running instead of crashing the node.
         if tool is None:
             output = f"Error: unknown tool {call['name']!r}. Available: {list(_TOOLS_BY_NAME)}"
         else:
             output = await tool.ainvoke(call["args"])
-        results.append(
-            ToolMessage(content=str(output), tool_call_id=call["id"], name=call["name"])
-        )
+        results.append(ToolMessage(content=str(output), tool_call_id=call["id"], name=call["name"]))
     return {"messages": results}
 
 
 async def route_after_agent(state: AgentState) -> str:
-    """Go to the tools node if the model requested tools, else finish (async router)."""
+    """Go to the tools node if the model requested tools, else finish."""
     last = state["messages"][-1]
     return "tools" if getattr(last, "tool_calls", None) else END
 
diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/project/run_worker.py b/examples/tutorials/10_async/10_temporal/130_langgraph/project/run_worker.py
index 7040f560b..4b31bf396 100644
--- a/examples/tutorials/10_async/10_temporal/130_langgraph/project/run_worker.py
+++ b/examples/tutorials/10_async/10_temporal/130_langgraph/project/run_worker.py
@@ -5,8 +5,7 @@
 
 The ``LangGraphPlugin`` is given the graph registry (``{ GRAPH_NAME: graph }``).
 At runtime it turns the graph's ``execute_in="activity"`` nodes into Temporal
-activities and registers them on the worker automatically — so we don't have
-to enumerate node activities by hand.
+activities and registers them on the worker automatically.
 """
 
 import asyncio
@@ -14,7 +13,7 @@
 from temporalio.contrib.langgraph import LangGraphPlugin
 
 from project.graph import GRAPH_NAME, build_graph
-from project.workflow import At130LanggraphWorkflow
+from project.workflow import AtHarnessLanggraphWorkflow
 from agentex.lib.utils.debug import setup_debug_if_enabled
 from agentex.lib.utils.logging import make_logger
 from agentex.lib.environment_variables import EnvironmentVariables
@@ -32,9 +31,6 @@ async def main():
     if task_queue_name is None:
         raise ValueError("WORKFLOW_TASK_QUEUE is not set")
 
-    # AgentexWorker runs workflows with an unsandboxed runner, so importing
-    # langchain/langgraph inside the workflow + nodes is fine. The LangGraph
-    # plugin registers the graph's activity-nodes for us.
     worker = AgentexWorker(
         task_queue=task_queue_name,
         plugins=[LangGraphPlugin(graphs={GRAPH_NAME: build_graph()})],
@@ -42,9 +38,9 @@ async def main():
 
     await worker.run(
         activities=get_all_activities(),
-        workflow=At130LanggraphWorkflow,
+        workflow=AtHarnessLanggraphWorkflow,
     )
 
 
 if __name__ == "__main__":
-    asyncio.run(main())
\ No newline at end of file
+    asyncio.run(main())
diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/project/tools.py b/examples/tutorials/10_async/10_temporal/130_langgraph/project/tools.py
index 20b7185ee..e7220016e 100644
--- a/examples/tutorials/10_async/10_temporal/130_langgraph/project/tools.py
+++ b/examples/tutorials/10_async/10_temporal/130_langgraph/project/tools.py
@@ -1,20 +1,37 @@
-"""Tools for the LangGraph agent.
+"""Tool definitions for the 130_langgraph temporal agent."""
 
-Tools are ``async`` so the in-workflow tool node can await them directly
-(a sync tool would be offloaded via ``run_in_executor``, which Temporal's
-workflow event loop does not allow).
-"""
+from langchain_core.tools import Tool
 
-from __future__ import annotations
 
-from langchain_core.tools import tool
+def get_weather(city: str) -> str:
+    """Get the current weather for a city.
 
+    Args:
+        city: The name of the city to get weather for.
 
-@tool
-async def get_weather(city: str) -> str:
-    """Get the current weather for a city."""
-    # TODO: replace with a real weather API call.
+    Returns:
+        A string describing the weather conditions.
+    """
     return f"The weather in {city} is sunny and 72°F"
 
 
-TOOLS = [get_weather]
+async def aget_weather(city: str) -> str:
+    """Native async tool entrypoint.
+
+    ``tools_node`` runs inline in the Temporal workflow and invokes tools via
+    ``tool.ainvoke``. A sync-only tool forces LangChain to bridge through
+    ``run_in_executor`` (a thread pool), which the deterministic Temporal
+    workflow event loop forbids (``NotImplementedError``). Providing a real
+    coroutine keeps tool execution on the workflow loop.
+    """
+    return get_weather(city)
+
+
+weather_tool = Tool(
+    name="get_weather",
+    func=get_weather,
+    coroutine=aget_weather,
+    description="Get the current weather for a city. Input should be a city name.",
+)
+
+TOOLS = [weather_tool]
diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/project/workflow.py b/examples/tutorials/10_async/10_temporal/130_langgraph/project/workflow.py
index a50670251..b9224ca00 100644
--- a/examples/tutorials/10_async/10_temporal/130_langgraph/project/workflow.py
+++ b/examples/tutorials/10_async/10_temporal/130_langgraph/project/workflow.py
@@ -1,4 +1,4 @@
-"""Temporal workflow for at130-langgraph — Temporal as the LangGraph runtime.
+"""Temporal workflow for at130-langgraph.
 
 Each turn the workflow runs the LangGraph graph (``project/graph.py``) via the
 ``temporalio.contrib.langgraph`` plugin. The plugin runs the LLM ``agent`` node
@@ -37,7 +37,7 @@
 
 
 @workflow.defn(name=environment_variables.WORKFLOW_NAME)
-class At130LanggraphWorkflow(BaseWorkflow):
+class AtHarnessLanggraphWorkflow(BaseWorkflow):
     """Runs the LangGraph agent each turn; its nodes run as Temporal activities."""
 
     def __init__(self) -> None:
@@ -56,10 +56,7 @@ async def on_task_event_send(self, params: SendEventParams) -> None:
         result = await compiled.ainvoke({"messages": self._messages})
         self._messages = result["messages"]
 
-        # Surface the messages this turn produced (tool calls, results, final
-        # text) to the AgentEx UI. The SDK helper does the LangGraph→AgentEx
-        # message conversion.
-        await emit_langgraph_messages(self._messages[self._emitted:], params.task.id)
+        await emit_langgraph_messages(self._messages[self._emitted :], params.task.id)
         self._emitted = len(self._messages)
 
     @workflow.signal
diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/pyproject.toml b/examples/tutorials/10_async/10_temporal/130_langgraph/pyproject.toml
index e22905de4..6d2262761 100644
--- a/examples/tutorials/10_async/10_temporal/130_langgraph/pyproject.toml
+++ b/examples/tutorials/10_async/10_temporal/130_langgraph/pyproject.toml
@@ -5,13 +5,11 @@ build-backend = "hatchling.build"
 [project]
 name = "at130-langgraph"
 version = "0.1.0"
-description = "A Temporal-backed LangGraph agent whose nodes run as Temporal activities"
+description = "A Temporal-backed LangGraph agent (harness variant) whose nodes run as Temporal activities"
 requires-python = ">=3.12"
 dependencies = [
     "agentex-sdk",
     "scale-gp",
-    # Temporal with the LangGraph plugin (temporalio.contrib.langgraph),
-    # which runs LangGraph nodes as Temporal activities. Needs >=1.27.0.
     "temporalio[langgraph]>=1.27.0",
     "langchain-openai",
     "langchain-core",
@@ -39,4 +37,4 @@ target-version = ['py312']
 
 [tool.isort]
 profile = "black"
-line_length = 88
\ No newline at end of file
+line_length = 88
diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_agent.py
index b798f568f..f2292389f 100644
--- a/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_agent.py
+++ b/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_agent.py
@@ -1,4 +1,4 @@
-"""Integration tests for the Temporal + LangGraph agent (live agent required).
+"""Integration tests for the Temporal harness LangGraph agent (live agent required).
 
 These drive a *running* agent over the AgentEx API and verify that:
 - the agent sends a welcome message on task creation,
@@ -6,9 +6,6 @@
   (proving the LLM node ran as a Temporal activity and the tool node ran),
 - the final answer reflects the tool output.
 
-For fast, network-free coverage of the graph + human-in-the-loop logic, see
-``test_graph_temporal.py``.
-
 To run:
 1. Start the agent (worker + ACP server): ``agentex agents run --manifest manifest.yaml``
 2. Set AGENTEX_API_BASE_URL if not using the default
@@ -60,29 +57,18 @@ class TestNonStreamingEvents:
     @pytest.mark.asyncio
     async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str):
         """Create a task, ask about weather, verify the tool round-trip."""
-        task_response = await client.agents.create_task(
-            agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)
-        )
+        task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
         task = task_response.result
         assert task is not None
 
-        # Wait for the welcome message from on_task_create
         task_creation_found = False
-        async for message in poll_messages(
-            client=client, task_id=task.id, timeout=30, sleep_interval=1.0
-        ):
+        async for message in poll_messages(client=client, task_id=task.id, timeout=30, sleep_interval=1.0):
             assert isinstance(message, TaskMessage)
-            if (
-                message.content
-                and message.content.type == "text"
-                and message.content.author == "agent"
-            ):
+            if message.content and message.content.type == "text" and message.content.author == "agent":
                 task_creation_found = True
                 break
         assert task_creation_found, "Task creation welcome message not found"
 
-        # Ask about weather — the agent (LangGraph node, as a Temporal activity)
-        # should call get_weather.
         seen_tool_request = False
         seen_tool_response = False
         final_message = None
@@ -101,11 +87,7 @@ async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str):
             if message.content and message.content.type == "tool_response":
                 seen_tool_response = True
 
-            if (
-                message.content
-                and message.content.type == "text"
-                and message.content.author == "agent"
-            ):
+            if message.content and message.content.type == "text" and message.content.author == "agent":
                 final_message = message
                 content_length = len(getattr(message.content, "content", "") or "")
                 if getattr(message, "streaming_status", None) in (None, "DONE") and content_length > 0:
@@ -115,11 +97,8 @@ async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str):
         assert seen_tool_request, "Expected a tool_request (agent calling get_weather)"
         assert seen_tool_response, "Expected a tool_response (get_weather result)"
         assert final_message is not None, "Expected a final agent text message"
-        final_text = (
-            getattr(final_message.content, "content", None) if final_message.content else None
-        )
+        final_text = getattr(final_message.content, "content", None) if final_message.content else None
         assert isinstance(final_text, str) and len(final_text) > 0
-        # get_weather always returns "72°F" — the response should mention it.
         assert "72" in final_text, "Expected weather response to mention 72°F"
 
 
diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_graph_temporal.py b/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_graph_temporal.py
deleted file mode 100644
index 485b896f6..000000000
--- a/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_graph_temporal.py
+++ /dev/null
@@ -1,105 +0,0 @@
-"""Tests for the Temporal + LangGraph agent's graph.
-
-Two layers:
-
-1. ``TestGraphLogic`` — hermetic, no network. Compiles the actual shipped
-   graph (``project/graph.py``) with a deterministic stub model and runs the
-   ReAct loop (agent → tools → agent) to completion.
-
-2. ``TestTemporalPlugin`` — end-to-end through the real Temporal LangGraph
-   plugin on a local Temporal server, proving the LLM node runs as an activity
-   and the tool node in the workflow. Needs a real model, so it is skipped
-   unless ``LITELLM_API_KEY`` (or ``OPENAI_API_KEY``) is set.
-
-Run from the agent's own (uv) environment:  pytest tests/test_graph_temporal.py -v
-"""
-
-from __future__ import annotations
-
-import os
-import uuid
-
-import pytest
-
-pytest.importorskip("langgraph")
-pytest.importorskip("temporalio.contrib.langgraph")
-
-import project.graph as graph_module
-from temporalio import workflow
-from project.graph import GRAPH_NAME, build_graph
-from langchain_core.messages import AIMessage, ToolMessage
-from temporalio.contrib.langgraph import graph as lg_graph
-
-
-@workflow.defn
-class _DriverWorkflow:
-    """Module-level driver workflow (Temporal forbids local workflow classes)."""
-
-    @workflow.run
-    async def run(self, message: str) -> str:
-        compiled = lg_graph(GRAPH_NAME).compile()
-        result = await compiled.ainvoke({"messages": [{"role": "user", "content": message}]})
-        return result["messages"][-1].content
-
-
-class _StubModel:
-    """Deterministic stand-in for ``ChatOpenAI(...).bind_tools(...)``.
-
-    First call → emit a tool call for ``get_weather``; once a ToolMessage is in
-    the history → emit a plain text answer. Drives the full ReAct loop offline.
-    """
-
-    def bind_tools(self, _tools):
-        return self
-
-    async def ainvoke(self, messages):
-        if any(isinstance(m, ToolMessage) for m in messages):
-            return AIMessage(content="All done — the tool has run.")
-        return AIMessage(
-            content="",
-            tool_calls=[{"id": "call_1", "name": "get_weather", "args": {"city": "Denver"}}],
-        )
-
-
-class TestGraphLogic:
-    """Hermetic test of the ReAct loop, no network."""
-
-    @pytest.mark.asyncio
-    async def test_react_loop_runs_tool(self, monkeypatch):
-        monkeypatch.setattr(graph_module, "ChatOpenAI", lambda *_a, **_k: _StubModel())
-        compiled = build_graph().compile()
-        result = await compiled.ainvoke({"messages": [{"role": "user", "content": "go"}]})
-
-        tool_outputs = [m.content for m in result["messages"] if isinstance(m, ToolMessage)]
-        assert any("sunny" in o for o in tool_outputs)
-        assert "done" in result["messages"][-1].content.lower()
-
-
-@pytest.mark.skipif(
-    not (os.environ.get("LITELLM_API_KEY") or os.environ.get("OPENAI_API_KEY")),
-    reason="needs a real model (set LITELLM_API_KEY) for the live Temporal run",
-)
-class TestTemporalPlugin:
-    """End-to-end through the real Temporal LangGraph plugin on a local server."""
-
-    @pytest.mark.asyncio
-    async def test_nodes_run_as_activities_via_plugin(self):
-        from temporalio.worker import Worker, UnsandboxedWorkflowRunner
-        from temporalio.testing import WorkflowEnvironment
-        from temporalio.contrib.langgraph import LangGraphPlugin
-
-        plugin = LangGraphPlugin(graphs={GRAPH_NAME: build_graph()})
-        async with await WorkflowEnvironment.start_local(plugins=[plugin]) as env:
-            async with Worker(
-                env.client,
-                task_queue="tq",
-                workflows=[_DriverWorkflow],
-                workflow_runner=UnsandboxedWorkflowRunner(),
-            ):
-                out = await env.client.execute_workflow(
-                    _DriverWorkflow.run,
-                    "What's the weather in Denver? Use the get_weather tool.",
-                    id=f"wf-{uuid.uuid4()}",
-                    task_queue="tq",
-                )
-        assert "denver" in out.lower()
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/.dockerignore b/examples/tutorials/10_async/10_temporal/140_claude_code/.dockerignore
new file mode 100644
index 000000000..c49489471
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/.dockerignore
@@ -0,0 +1,43 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Environments
+.env**
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Git
+.git
+.gitignore
+
+# Misc
+.DS_Store
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/Dockerfile b/examples/tutorials/10_async/10_temporal/140_claude_code/Dockerfile
new file mode 100644
index 000000000..c909ee6c7
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/Dockerfile
@@ -0,0 +1,46 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    nodejs \
+    npm \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+RUN npm install -g @anthropic-ai/claude-code || true
+
+ENV UV_HTTP_TIMEOUT=1000
+
+COPY 10_async/10_temporal/140_claude_code/pyproject.toml /app/140_claude_code/pyproject.toml
+COPY 10_async/10_temporal/140_claude_code/README.md /app/140_claude_code/README.md
+
+WORKDIR /app/140_claude_code
+
+COPY 10_async/10_temporal/140_claude_code/project /app/140_claude_code/project
+COPY 10_async/10_temporal/140_claude_code/tests /app/140_claude_code/tests
+COPY test_utils /app/test_utils
+
+RUN uv pip install --system .[dev]
+
+ENV PYTHONPATH=/app
+
+ENV AGENT_NAME=at140-claude-code
+
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
+
+# When deploying the worker, replace the CMD with:
+# CMD ["python", "project/run_worker.py"]
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/README.md b/examples/tutorials/10_async/10_temporal/140_claude_code/README.md
new file mode 100644
index 000000000..61cc94183
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/README.md
@@ -0,0 +1,76 @@
+# Tutorial 140 (async/temporal): Temporal Claude Code Agent
+
+This tutorial demonstrates how to build a **Temporal-backed** agent that
+spawns the Claude Code CLI as a local subprocess and delivers its output
+through the Agentex unified harness surface via ``ClaudeCodeTurn`` and
+``UnifiedEmitter.auto_send_turn``, with Temporal providing durable execution
+and crash recovery.
+
+## Key Concepts
+
+### Temporal + ClaudeCodeTurn
+
+The Temporal workflow (``project/workflow.py``) holds state durably. Each user
+message arrives as a signal (``on_task_event_send``), spawns the Claude Code
+CLI locally, wraps the stdout line stream in ``ClaudeCodeTurn``, and pushes
+events to the task's Redis stream via ``UnifiedEmitter.auto_send_turn``.
+
+``workflow.now()`` is passed as ``created_at`` so message timestamps are
+deterministic under Temporal replay.
+
+### Multi-turn session resume
+
+The workflow persists the Claude Code ``session_id`` from the ``result``
+envelope. On the next turn, ``-r <session_id>`` is passed to the CLI to
+resume the conversation. Temporal's durable state ensures the session_id
+survives worker crashes.
+
+### Note on subprocess in workflow code
+
+For simplicity, this tutorial spawns the subprocess directly inside the
+workflow signal handler. For production use, move the spawn into a custom
+Temporal activity so each subprocess invocation gets independent retry and
+timeout guarantees. See
+``examples/tutorials/10_async/10_temporal/030_custom_activities/`` for
+that pattern.
+
+### Injectable spawn seam
+
+``_spawn_claude`` in ``project/workflow.py`` is a top-level async generator.
+Tests monkeypatch it to inject pre-recorded stream-json lines so offline
+unit tests run without the CLI.
+
+## Files
+
+| File | Description |
+|------|-------------|
+| ``project/acp.py`` | Thin ACP server; wires Temporal (no handlers) |
+| ``project/workflow.py`` | Temporal workflow + ``_spawn_claude`` seam |
+| ``project/run_worker.py`` | Temporal worker entry point |
+| ``tests/test_agent.py`` | Live integration tests (needs CLI + Temporal + API key) |
+| ``tests/test_agent_offline.py`` | Offline unit tests with injected fake subprocess |
+| ``manifest.yaml`` | Agent configuration |
+
+## Running Locally (live)
+
+Requires Temporal server, the ``claude`` CLI, and ``ANTHROPIC_API_KEY``:
+
+```bash
+npm install -g @anthropic-ai/claude-code
+export ANTHROPIC_API_KEY=sk-ant-...
+agentex agents run
+```
+
+## Running Offline Tests
+
+No CLI, Temporal, or API key needed:
+
+```bash
+uv run pytest tests/test_agent_offline.py -v
+```
+
+## Notes
+
+- Production isolation (sandbox, secrets, MCP) is the golden agent's concern.
+- The subprocess spawn should be moved to a custom activity in production.
+- The ``--verbose`` flag is included to match the golden agent's invocation.
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/manifest.yaml b/examples/tutorials/10_async/10_temporal/140_claude_code/manifest.yaml
new file mode 100644
index 000000000..9328b1713
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/manifest.yaml
@@ -0,0 +1,62 @@
+build:
+  context:
+    root: ../../../
+    include_paths:
+      - 10_async/10_temporal/140_claude_code
+      - test_utils
+    dockerfile: 10_async/10_temporal/140_claude_code/Dockerfile
+    dockerignore: 10_async/10_temporal/140_claude_code/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+    worker: project/run_worker.py
+
+agent:
+  acp_type: async
+  name: at140-claude-code
+  description: A Temporal-backed Claude Code agent streaming the unified harness surface via a local CLI subprocess
+
+  temporal:
+    enabled: true
+    workflows:
+      - name: at140-claude-code
+        queue_name: at140_claude_code_queue
+
+  credentials:
+    - env_var_name: ANTHROPIC_API_KEY
+      secret_name: anthropic-api-key
+      secret_key: api-key
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "at140-claude-code"
+      description: "A Temporal-backed Claude Code agent streaming via local CLI subprocess"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/project/__init__.py b/examples/tutorials/10_async/10_temporal/140_claude_code/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/project/acp.py b/examples/tutorials/10_async/10_temporal/140_claude_code/project/acp.py
new file mode 100644
index 000000000..07258f6d8
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/project/acp.py
@@ -0,0 +1,31 @@
+"""ACP server for the Temporal Claude Code tutorial.
+
+This file is intentionally thin. When ``acp_type="async"`` is combined
+with ``TemporalACPConfig``, FastACP auto-wires:
+
+    HTTP task/create       -> @workflow.run on the workflow class
+    HTTP task/event/send   -> @workflow.signal(SignalName.RECEIVE_EVENT)
+    HTTP task/cancel       -> workflow cancellation via the Temporal client
+
+The actual agent code lives in ``project/workflow.py`` and is executed by
+the Temporal worker (``project/run_worker.py``), not by this HTTP process.
+"""
+
+from __future__ import annotations
+
+import os
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from agentex.lib.types.fastacp import TemporalACPConfig
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+
+acp = FastACP.create(
+    acp_type="async",
+    config=TemporalACPConfig(
+        type="temporal",
+        temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"),
+    ),
+)
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/project/activities.py b/examples/tutorials/10_async/10_temporal/140_claude_code/project/activities.py
new file mode 100644
index 000000000..dcba0f9a7
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/project/activities.py
@@ -0,0 +1,139 @@
+"""Temporal activity for the Claude Code tutorial.
+
+Subprocess spawning (and any other I/O) must run inside a Temporal *activity*,
+not in workflow code. Temporal runs workflow + signal-handler bodies on a
+deterministic sandbox event loop that does not implement ``subprocess_exec``
+(or threads / sockets), so spawning the CLI directly in the signal handler
+raises ``NotImplementedError``. This activity runs the Claude Code CLI, drives
+the ``ClaudeCodeTurn`` through ``UnifiedEmitter.auto_send_turn`` (the async
+Redis push path), and returns the turn result to the workflow.
+
+The ``_spawn_claude`` async generator is an injectable seam: offline tests
+provide a fake that yields pre-recorded stdout lines so no real CLI runs.
+"""
+
+from __future__ import annotations
+
+import asyncio
+from typing import Any, AsyncIterator
+from datetime import datetime
+
+from temporalio import activity
+
+from agentex.lib.adk import ClaudeCodeTurn
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.utils.model_utils import BaseModel
+
+logger = make_logger(__name__)
+
+RUN_CLAUDE_CODE_TURN_ACTIVITY = "run_claude_code_turn"
+
+
+class RunClaudeCodeTurnParams(BaseModel):
+    """Arguments for one Claude Code turn run inside an activity."""
+
+    task_id: str
+    prompt: str
+    trace_id: str | None = None
+    parent_span_id: str | None = None
+    session_id: str | None = None
+    created_at: datetime | None = None
+
+
+class RunClaudeCodeTurnResult(BaseModel):
+    """Result returned from the activity to the workflow."""
+
+    final_text: str
+    session_id: str | None = None
+
+
+async def _spawn_claude(prompt: str, session_id: str | None = None) -> AsyncIterator[str]:
+    """Spawn ``claude -p --output-format stream-json`` locally and yield stdout lines.
+
+    Pass ``session_id`` to resume a previous Claude Code session (multi-turn
+    memory via ``-r <session_id>``).
+
+    Injectable seam: tests monkeypatch this with a fake async iterator so no
+    real CLI invocation is needed offline.
+    """
+    cmd = [
+        "claude",
+        "-p",
+        "--output-format",
+        "stream-json",
+        "--verbose",
+    ]
+    if session_id:
+        cmd.extend(["-r", session_id])
+
+    proc = await asyncio.create_subprocess_exec(
+        *cmd,
+        stdin=asyncio.subprocess.PIPE,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    assert proc.stdout is not None
+    assert proc.stdin is not None
+
+    proc.stdin.write(prompt.encode())
+    proc.stdin.close()
+
+    # Drain stderr concurrently. With --verbose, Claude Code can write enough to
+    # stderr to fill the OS pipe buffer; if we only read stdout, the CLI blocks
+    # on its stderr write while we block reading stdout — a deadlock. A
+    # background task keeps stderr flowing so stdout never stalls.
+    async def _drain_stderr() -> None:
+        assert proc.stderr is not None
+        async for _ in proc.stderr:
+            pass
+
+    stderr_task = asyncio.create_task(_drain_stderr())
+
+    try:
+        buffer = ""
+        async for chunk in proc.stdout:
+            buffer += chunk.decode("utf-8", errors="replace")
+            while "\n" in buffer:
+                line, buffer = buffer.split("\n", 1)
+                line = line.strip()
+                if line:
+                    yield line
+
+        if buffer.strip():
+            yield buffer.strip()
+
+        await proc.wait()
+    finally:
+        # Release the subprocess and stderr drain task even if the consumer
+        # abandons the generator early (task cancellation / client disconnect):
+        # cancel the drain task and terminate+reap the process if it is still
+        # running, so neither is leaked.
+        stderr_task.cancel()
+        try:
+            await stderr_task
+        except asyncio.CancelledError:
+            pass
+        if proc.returncode is None:
+            try:
+                proc.terminate()
+            except ProcessLookupError:
+                pass
+            await proc.wait()
+
+
+@activity.defn(name=RUN_CLAUDE_CODE_TURN_ACTIVITY)
+async def run_claude_code_turn(params: RunClaudeCodeTurnParams) -> dict[str, Any]:
+    """Run one Claude Code turn end-to-end and stream events to the task.
+
+    Runs in an activity (real asyncio loop) so subprocess I/O is permitted.
+    """
+    emitter = UnifiedEmitter(
+        task_id=params.task_id,
+        trace_id=params.trace_id,
+        parent_span_id=params.parent_span_id,
+    )
+    turn = ClaudeCodeTurn(_spawn_claude(params.prompt, session_id=params.session_id))
+    result = await emitter.auto_send_turn(turn, created_at=params.created_at)
+
+    return RunClaudeCodeTurnResult(final_text=result.final_text, session_id=turn.session_id).model_dump()
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/project/run_worker.py b/examples/tutorials/10_async/10_temporal/140_claude_code/project/run_worker.py
new file mode 100644
index 000000000..58802737e
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/project/run_worker.py
@@ -0,0 +1,41 @@
+"""Temporal worker for the Claude Code tutorial.
+
+Run as a separate long-lived process alongside the ACP HTTP server. The
+worker polls Temporal for workflow + activity tasks and executes them.
+
+The Claude Code CLI subprocess runs in the ``run_claude_code_turn`` activity
+(registered below alongside the built-in Agentex activities), because
+subprocess I/O is not permitted on the Temporal workflow event loop.
+"""
+
+import asyncio
+
+from project.workflow import At140ClaudeCodeWorkflow
+from project.activities import run_claude_code_turn
+from agentex.lib.utils.debug import setup_debug_if_enabled
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.environment_variables import EnvironmentVariables
+from agentex.lib.core.temporal.activities import get_all_activities
+from agentex.lib.core.temporal.workers.worker import AgentexWorker
+
+environment_variables = EnvironmentVariables.refresh()
+logger = make_logger(__name__)
+
+
+async def main():
+    setup_debug_if_enabled()
+
+    task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE
+    if task_queue_name is None:
+        raise ValueError("WORKFLOW_TASK_QUEUE is not set")
+
+    worker = AgentexWorker(task_queue=task_queue_name)
+
+    await worker.run(
+        activities=[run_claude_code_turn, *get_all_activities()],
+        workflow=At140ClaudeCodeWorkflow,
+    )
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/project/workflow.py b/examples/tutorials/10_async/10_temporal/140_claude_code/project/workflow.py
new file mode 100644
index 000000000..7f50ba8d5
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/project/workflow.py
@@ -0,0 +1,137 @@
+"""Temporal workflow for the Claude Code tutorial.
+
+Holds conversation state (session_id for multi-turn resume) durably across
+crashes. Each user message triggers ``on_task_event_send``, which delegates the
+turn to the ``run_claude_code_turn`` activity. The activity spawns the Claude
+Code CLI, wraps its stdout in ``ClaudeCodeTurn``, and delivers the turn via
+``UnifiedEmitter.auto_send_turn`` (the async Redis push path).
+
+Note on subprocess inside Temporal
+------------------------------------
+Subprocess (and all other) I/O must run in a Temporal *activity*, never in
+workflow code. Temporal runs workflow + signal-handler bodies on a
+deterministic sandbox event loop that does not implement ``subprocess_exec``
+(spawning the CLI there raises ``NotImplementedError``). The activity also gets
+Temporal's retry + timeout guarantees. See
+``examples/tutorials/10_async/10_temporal/030_custom_activities/`` for the
+activity pattern.
+"""
+
+from __future__ import annotations
+
+import os
+import json
+from datetime import timedelta
+
+from temporalio import workflow
+
+from agentex.lib import adk
+from agentex.lib.types.acp import SendEventParams, CreateTaskParams
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.types.text_content import TextContent
+from agentex.lib.environment_variables import EnvironmentVariables
+from agentex.lib.core.temporal.types.workflow import SignalName
+from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+with workflow.unsafe.imports_passed_through():
+    from project.activities import RunClaudeCodeTurnParams, run_claude_code_turn
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+environment_variables = EnvironmentVariables.refresh()
+
+if environment_variables.WORKFLOW_NAME is None:
+    raise ValueError("Environment variable WORKFLOW_NAME is not set")
+if environment_variables.AGENT_NAME is None:
+    raise ValueError("Environment variable AGENT_NAME is not set")
+
+logger = make_logger(__name__)
+
+
+@workflow.defn(name=environment_variables.WORKFLOW_NAME)
+class At140ClaudeCodeWorkflow(BaseWorkflow):
+    """Temporal workflow that runs Claude Code locally for each user message.
+
+    Persists the Claude Code session_id across turns so the CLI can resume
+    the conversation (``-r <session_id>``). Temporal's durable state ensures
+    the session_id survives worker crashes.
+    """
+
+    def __init__(self):
+        super().__init__(display_name=environment_variables.AGENT_NAME)
+        self._complete_task = False
+        self._turn_number = 0
+        # Claude Code session_id for multi-turn resume.
+        self._session_id: str | None = None
+
+    @workflow.signal(name=SignalName.RECEIVE_EVENT)
+    async def on_task_event_send(self, params: SendEventParams) -> None:
+        """Handle a user message: spawn Claude Code and push events to the task stream."""
+        self._turn_number += 1
+        task_id = params.task.id
+        prompt = params.event.content.content
+        logger.info("Turn %d for task %s", self._turn_number, task_id)
+
+        await adk.messages.create(task_id=task_id, content=params.event.content)
+
+        async with adk.tracing.span(
+            trace_id=task_id,
+            task_id=task_id,
+            name=f"Turn {self._turn_number}",
+            input={"message": prompt},
+        ) as span:
+            # Delegate the subprocess turn to an activity: subprocess I/O is not
+            # permitted on the Temporal workflow event loop. The activity streams
+            # events to the task and returns the final text + session_id.
+            # workflow.now() gives a deterministic timestamp under replay.
+            result = await workflow.execute_activity(
+                run_claude_code_turn,
+                RunClaudeCodeTurnParams(
+                    task_id=task_id,
+                    prompt=prompt,
+                    trace_id=task_id,
+                    parent_span_id=span.id if span else None,
+                    session_id=self._session_id,
+                    created_at=workflow.now(),
+                ),
+                start_to_close_timeout=timedelta(minutes=5),
+            )
+
+            # Capture session_id to enable Claude Code resume on the next turn.
+            sid = result.get("session_id")
+            if sid:
+                self._session_id = sid
+
+            if span:
+                span.output = {"final_text": result.get("final_text")}
+
+    @workflow.run
+    async def on_task_create(self, params: CreateTaskParams) -> str:
+        logger.info("Task created: %s", params.task.id)
+
+        await adk.messages.create(
+            task_id=params.task.id,
+            content=TextContent(
+                author="agent",
+                content=(
+                    f"Task initialized with params:\n{json.dumps(params.params, indent=2)}\n"
+                    "Send me a message and I'll run it through Claude Code locally."
+                ),
+            ),
+        )
+
+        await workflow.wait_condition(lambda: self._complete_task, timeout=None)
+        return "Task completed"
+
+    @workflow.signal
+    async def complete_task_signal(self) -> None:
+        logger.info("Received complete_task signal")
+        self._complete_task = True
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/pyproject.toml b/examples/tutorials/10_async/10_temporal/140_claude_code/pyproject.toml
new file mode 100644
index 000000000..b9d517267
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/pyproject.toml
@@ -0,0 +1,27 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "at140-claude-code"
+version = "0.1.0"
+description = "A Temporal-backed Claude Code agent streaming the unified harness surface via a local CLI subprocess"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+    "temporalio>=1.18.2",
+    "python-dotenv>=1.0,<2",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+    "debugpy>=1.8.15",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/140_claude_code/tests/test_agent.py
new file mode 100644
index 000000000..767c707b9
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/tests/test_agent.py
@@ -0,0 +1,249 @@
+"""Tests for the Temporal Claude Code tutorial agent.
+
+LIVE tests (``TestClaudeCodeLive``):
+  - Require Temporal server, the ACP server, the Temporal worker, the ``claude``
+    CLI on PATH, and ``ANTHROPIC_API_KEY`` set.
+  - Run the full agent end-to-end against a live Agentex server.
+  - Skipped automatically when ``CLAUDE_LIVE_TESTS`` is not set to ``1``.
+
+OFFLINE unit tests (``TestClaudeCodeOffline``):
+  - Inject a fake async iterator of pre-recorded stream-json lines.
+  - Assert the ``ClaudeCodeTurn`` + ``UnifiedEmitter`` pipeline drives
+    ``auto_send_turn``, populates usage, and satisfies the ``HarnessTurn``
+    protocol.
+  - Always run -- no CLI or API key needed.
+"""
+
+from __future__ import annotations
+
+import os
+import json
+from typing import AsyncIterator
+
+import pytest
+
+from agentex.types.task_message import TaskMessage
+
+# ---------------------------------------------------------------------------
+# Recorded stream-json fixtures
+# ---------------------------------------------------------------------------
+
+_TEXT_ONLY_LINES: list[str] = [
+    json.dumps({"type": "system", "subtype": "init", "session_id": "sess-temporal-offline-1"}),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {"content": [{"type": "text", "text": "Hello from Temporal Claude Code!"}]},
+        }
+    ),
+    json.dumps(
+        {
+            "type": "result",
+            "session_id": "sess-temporal-offline-1",
+            "usage": {"input_tokens": 15, "output_tokens": 7},
+            "cost_usd": 0.00015,
+            "duration_ms": 350,
+            "num_turns": 1,
+        }
+    ),
+]
+
+
+async def _fake_lines(lines: list[str]) -> AsyncIterator[str]:
+    """Async iterator of pre-recorded stream-json lines (no subprocess)."""
+    for line in lines:
+        yield line
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming backend
+# ---------------------------------------------------------------------------
+
+
+class _FakeCtx:
+    def __init__(self, sink, content_type, initial_content):
+        self.sink = sink
+        self.content_type = content_type
+        self.task_message = TaskMessage(id="msg-t1", task_id="task-temporal-offline", content=initial_content)
+
+    async def __aenter__(self):
+        self.sink.append(("open", self.content_type))
+        return self
+
+    async def __aexit__(self, *a):
+        await self.close()
+        return False
+
+    async def close(self):
+        self.sink.append(("close", self.content_type))
+
+    async def stream_update(self, update):
+        self.sink.append(("update", update))
+        return update
+
+
+class _FakeStreaming:
+    def __init__(self):
+        self.sink: list = []
+
+    def streaming_task_message_context(self, task_id, initial_content, streaming_mode="coalesced", created_at=None):  # noqa: ARG002
+        ctype = getattr(initial_content, "type", None)
+        self.sink.append(("ctx", ctype))
+        return _FakeCtx(self.sink, ctype, initial_content)
+
+
+# ---------------------------------------------------------------------------
+# Offline tests (always run -- no CLI or API key needed)
+# ---------------------------------------------------------------------------
+
+
+class TestClaudeCodeOffline:
+    """Unit tests that run without a real claude CLI, Temporal, or network."""
+
+    @pytest.mark.asyncio
+    async def test_auto_send_text_only_produces_output(self):
+        """auto_send_turn result carries the agent's reply text."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        fake_streaming = _FakeStreaming()
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(
+            task_id="offline-temporal",
+            trace_id=None,
+            parent_span_id=None,
+            tracer=False,
+            streaming=fake_streaming,
+        )
+        result = await emitter.auto_send_turn(turn)
+        assert "Hello from Temporal Claude Code" in result.final_text
+
+    @pytest.mark.asyncio
+    async def test_usage_populated_after_stream_exhausted(self):
+        """Usage is populated after the events stream is exhausted."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        fake_streaming = _FakeStreaming()
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(
+            task_id="t",
+            trace_id=None,
+            parent_span_id=None,
+            tracer=False,
+            streaming=fake_streaming,
+        )
+        await emitter.auto_send_turn(turn)
+        usage = turn.usage()
+        assert usage.input_tokens == 15
+        assert usage.output_tokens == 7
+        assert usage.num_llm_calls == 1
+
+    @pytest.mark.asyncio
+    async def test_stream_task_message_done_present(self):
+        """StreamTaskMessageDone must appear via yield_turn on a ClaudeCodeTurn."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+        from agentex.types.task_message_update import StreamTaskMessageDone
+
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+        events = [e async for e in emitter.yield_turn(turn)]
+        assert any(isinstance(e, StreamTaskMessageDone) for e in events), (
+            "Expected at least one StreamTaskMessageDone event"
+        )
+
+    @pytest.mark.asyncio
+    async def test_session_id_captured_in_result_envelope(self):
+        """The result envelope carries session_id (multi-turn resume support)."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        fake_streaming = _FakeStreaming()
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(
+            task_id="t",
+            trace_id=None,
+            parent_span_id=None,
+            tracer=False,
+            streaming=fake_streaming,
+        )
+        await emitter.auto_send_turn(turn)
+        assert turn._result_envelope is not None
+        assert turn._result_envelope.get("session_id") == "sess-temporal-offline-1"
+
+
+# ---------------------------------------------------------------------------
+# Live tests (skipped unless CLAUDE_LIVE_TESTS=1)
+# ---------------------------------------------------------------------------
+
+pytestmark_live = pytest.mark.skipif(
+    not os.environ.get("CLAUDE_LIVE_TESTS"),
+    reason="Set CLAUDE_LIVE_TESTS=1 and ensure the `claude` CLI + ANTHROPIC_API_KEY are available",
+)
+
+AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
+AGENT_NAME = os.environ.get("AGENT_NAME", "at140-claude-code")
+
+
+@pytestmark_live
+class TestClaudeCodeLive:
+    """Live Temporal tests -- needs Temporal server + the claude CLI + ANTHROPIC_API_KEY."""
+
+    @pytest.fixture
+    def client(self):
+        from agentex import Agentex
+
+        return Agentex(base_url=AGENTEX_API_BASE_URL)
+
+    @pytest.fixture
+    def agent_name(self):
+        return AGENT_NAME
+
+    @pytest.fixture
+    def agent_id(self, client, agent_name):
+        agents = client.agents.list()
+        for agent in agents:
+            if agent.name == agent_name:
+                return agent.id
+        raise ValueError(f"Agent {agent_name!r} not found.")
+
+    def test_send_simple_message(self, client, agent_id: str):
+        """Create a task, send a message, and poll until a response appears."""
+        import time
+        import uuid
+
+        from agentex.types import TextContentParam
+        from agentex.types.agent_rpc_params import ParamsSendEventRequest, ParamsCreateTaskRequest
+
+        task = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)).result
+        assert task is not None
+        task_id = task.id
+
+        client.agents.send_event(
+            agent_id=agent_id,
+            params=ParamsSendEventRequest(
+                task_id=task_id,
+                content=TextContentParam(
+                    author="user",
+                    content="Reply with exactly three words: hello from claude",
+                    type="text",
+                ),
+            ),
+        )
+
+        deadline = time.monotonic() + 90
+        while time.monotonic() < deadline:
+            msgs = client.messages.list(task_id=task_id)
+            agent_msgs = [m for m in msgs if getattr(m.content, "author", None) == "agent"]
+            response_msgs = [m for m in agent_msgs if "Task initialized" not in str(getattr(m.content, "content", ""))]
+            if response_msgs:
+                assert len(response_msgs) >= 1
+                return
+            time.sleep(3)
+
+        raise AssertionError("No agent response received within 90 s")
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/tests/test_agent_offline.py b/examples/tutorials/10_async/10_temporal/140_claude_code/tests/test_agent_offline.py
new file mode 100644
index 000000000..1adc553f1
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/tests/test_agent_offline.py
@@ -0,0 +1,230 @@
+"""Offline unit tests for the Temporal Claude Code tutorial agent.
+
+These tests do NOT require the ``claude`` CLI, Temporal, or ANTHROPIC_API_KEY.
+They inject a fake async iterator of pre-recorded stream-json lines in place of
+the real subprocess spawn and a fake streaming backend, then assert that the
+workflow's turn logic correctly drives ``UnifiedEmitter.auto_send_turn``.
+
+The injection seam is the ``_spawn_claude`` function in ``project/workflow.py``.
+Tests monkeypatch it with a coroutine returning a pre-recorded async iterator.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import AsyncIterator
+
+import pytest
+
+from agentex.lib.adk import ClaudeCodeTurn
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.types.task_message import TaskMessage
+
+# ---------------------------------------------------------------------------
+# Recorded fixtures
+# ---------------------------------------------------------------------------
+
+_TEXT_ONLY_LINES: list[str] = [
+    json.dumps({"type": "system", "subtype": "init", "session_id": "sess-temporal-1"}),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {"content": [{"type": "text", "text": "Hello from Temporal Claude Code!"}]},
+        }
+    ),
+    json.dumps(
+        {
+            "type": "result",
+            "session_id": "sess-temporal-1",
+            "usage": {"input_tokens": 15, "output_tokens": 7},
+            "cost_usd": 0.00015,
+            "duration_ms": 350,
+            "num_turns": 1,
+        }
+    ),
+]
+
+_TOOL_CALL_LINES: list[str] = [
+    json.dumps({"type": "system", "subtype": "init", "session_id": "sess-temporal-2"}),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {
+                "content": [
+                    {
+                        "type": "tool_use",
+                        "id": "tool_temporal",
+                        "name": "Bash",
+                        "input": {"command": "ls /tmp"},
+                    }
+                ]
+            },
+        }
+    ),
+    json.dumps(
+        {
+            "type": "user",
+            "message": {
+                "content": [
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": "tool_temporal",
+                        "content": "file1\nfile2\n",
+                        "is_error": False,
+                    }
+                ]
+            },
+        }
+    ),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {"content": [{"type": "text", "text": "Listed files."}]},
+        }
+    ),
+    json.dumps(
+        {
+            "type": "result",
+            "session_id": "sess-temporal-2",
+            "usage": {"input_tokens": 30, "output_tokens": 12},
+            "cost_usd": 0.0004,
+            "duration_ms": 600,
+            "num_turns": 1,
+        }
+    ),
+]
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming backend
+# ---------------------------------------------------------------------------
+
+
+class _FakeCtx:
+    def __init__(self, sink, content_type, initial_content):
+        self.sink = sink
+        self.content_type = content_type
+        self.task_message = TaskMessage(id="msg-t1", task_id="task-temporal-offline", content=initial_content)
+
+    async def __aenter__(self):
+        self.sink.append(("open", self.content_type))
+        return self
+
+    async def __aexit__(self, *a):
+        await self.close()
+        return False
+
+    async def close(self):
+        self.sink.append(("close", self.content_type))
+
+    async def stream_update(self, update):
+        self.sink.append(("update", update))
+        return update
+
+
+class _FakeStreaming:
+    def __init__(self):
+        self.sink: list = []
+
+    def streaming_task_message_context(self, task_id, initial_content, streaming_mode="coalesced", created_at=None):  # noqa: ARG002
+        ctype = getattr(initial_content, "type", None)
+        self.sink.append(("ctx", ctype))
+        return _FakeCtx(self.sink, ctype, initial_content)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+async def _fake_lines(lines: list[str]) -> AsyncIterator[str]:
+    for line in lines:
+        yield line
+
+
+async def _run_turn(lines: list[str]):
+    fake_streaming = _FakeStreaming()
+    turn = ClaudeCodeTurn(_fake_lines(lines))
+    emitter = UnifiedEmitter(
+        task_id="offline-temporal",
+        trace_id=None,
+        parent_span_id=None,
+        tracer=False,
+        streaming=fake_streaming,
+    )
+    result = await emitter.auto_send_turn(turn)
+    return result, fake_streaming.sink, turn
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_text_only_produces_agent_output():
+    result, sink, _ = await _run_turn(_TEXT_ONLY_LINES)
+    assert "Hello from Temporal Claude Code" in result.final_text
+
+
+@pytest.mark.asyncio
+async def test_usage_from_result_envelope():
+    """Usage is available from turn.usage() after the events are exhausted.
+
+    UnifiedEmitter.auto_send_turn evaluates turn.usage() eagerly before the
+    async generator is consumed, so result.usage is a pre-exhaust snapshot.
+    Read usage directly from the turn after _run_turn completes instead.
+    """
+    result, _, turn = await _run_turn(_TEXT_ONLY_LINES)
+    usage = turn.usage()
+    assert usage.input_tokens == 15
+    assert usage.output_tokens == 7
+    assert usage.num_llm_calls == 1
+
+
+@pytest.mark.asyncio
+async def test_session_id_captured_in_result_envelope():
+    """Verify the result envelope carries session_id (multi-turn resume support)."""
+    _, _, turn = await _run_turn(_TEXT_ONLY_LINES)
+    assert turn._result_envelope is not None
+    assert turn._result_envelope.get("session_id") == "sess-temporal-1"
+
+
+@pytest.mark.asyncio
+async def test_tool_call_context_types():
+    result, sink, _ = await _run_turn(_TOOL_CALL_LINES)
+    opened = [s for s in sink if s[0] == "open"]
+    content_types = [s[1] for s in opened]
+    assert "tool_request" in content_types
+    assert "text" in content_types
+
+
+@pytest.mark.asyncio
+async def test_spawn_seam_concept():
+    """Demonstrate the injectable spawn seam pattern used in project/workflow.py.
+
+    ``_spawn_claude(prompt, session_id=None)`` is a top-level async generator.
+    A drop-in replacement (e.g. via monkeypatch) supplies pre-recorded lines
+    and captures call arguments. The session_id parameter enables multi-turn
+    resume (``claude -r <session_id>``).
+    """
+    called: list[tuple] = []
+
+    async def _fake_spawn(prompt: str, session_id=None) -> AsyncIterator[str]:
+        called.append((prompt, session_id))
+        for line in _TEXT_ONLY_LINES:
+            yield line
+
+    fake_streaming = _FakeStreaming()
+    turn = ClaudeCodeTurn(_fake_spawn("temporal prompt", session_id="old-sid"))
+    emitter = UnifiedEmitter(
+        task_id="t",
+        trace_id=None,
+        parent_span_id=None,
+        tracer=False,
+        streaming=fake_streaming,
+    )
+    result = await emitter.auto_send_turn(turn)
+
+    assert called == [("temporal prompt", "old-sid")]
+    assert "Hello from Temporal Claude Code" in result.final_text
diff --git a/examples/tutorials/10_async/10_temporal/150_codex/.dockerignore b/examples/tutorials/10_async/10_temporal/150_codex/.dockerignore
new file mode 100644
index 000000000..c49489471
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/150_codex/.dockerignore
@@ -0,0 +1,43 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Environments
+.env**
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Git
+.git
+.gitignore
+
+# Misc
+.DS_Store
diff --git a/examples/tutorials/10_async/10_temporal/150_codex/Dockerfile b/examples/tutorials/10_async/10_temporal/150_codex/Dockerfile
new file mode 100644
index 000000000..e861c7f33
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/150_codex/Dockerfile
@@ -0,0 +1,48 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    nodejs \
+    npm \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install the codex CLI: the worker spawns `codex exec --json`, so the binary
+# must be present on PATH in the image.
+RUN npm install -g @openai/codex
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+ENV UV_HTTP_TIMEOUT=1000
+
+COPY 10_async/10_temporal/150_codex/pyproject.toml /app/150_codex/pyproject.toml
+COPY 10_async/10_temporal/150_codex/README.md /app/150_codex/README.md
+
+WORKDIR /app/150_codex
+
+COPY 10_async/10_temporal/150_codex/project /app/150_codex/project
+COPY 10_async/10_temporal/150_codex/tests /app/150_codex/tests
+COPY test_utils /app/test_utils
+
+RUN uv pip install --system .[dev]
+
+ENV PYTHONPATH=/app
+ENV AGENT_NAME=at150-codex
+
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
+
+# When deploying the worker, replace CMD with:
+# CMD ["python", "-m", "project.run_worker"]
diff --git a/examples/tutorials/10_async/10_temporal/150_codex/README.md b/examples/tutorials/10_async/10_temporal/150_codex/README.md
new file mode 100644
index 000000000..498b81374
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/150_codex/README.md
@@ -0,0 +1,48 @@
+# 150_codex (Temporal)
+
+Tutorial agent demonstrating the `convert_codex_to_agentex_events` tap,
+`CodexTurn`, and `UnifiedEmitter` for a **Temporal-durable** async ACP agent.
+
+## What this tutorial shows
+
+- Spawning `codex exec --json` as a **local asyncio subprocess** (no Scale sandbox)
+  inside a Temporal workflow signal handler.
+- Wrapping the stdout line stream in a `CodexTurn`.
+- Delivering every canonical `StreamTaskMessage*` event to Redis via
+  `UnifiedEmitter.auto_send_turn`, passing `created_at=workflow.now()` for
+  deterministic Temporal replay timestamps.
+- Keeping the codex thread ID on the workflow instance (durable across crashes
+  without an external `adk.state` round-trip).
+
+> **Production isolation note:** A tutorial agent runs the Codex CLI locally.
+> Production-grade isolation (Scale sandbox, secret injection, MCP configuration)
+> is handled by the golden agent at
+> `teams/sgp/agents/golden_agent/project/harness/providers/codex.py`.
+
+> **Temporal determinism note:** Subprocess spawning happens inside
+> `@workflow.signal` handler bodies. Temporal does NOT replay signal handler
+> bodies (only `@workflow.run` is subject to replay constraints), so this is
+> safe. A production agent would wrap the subprocess in a Temporal activity for
+> full durability and retry semantics.
+
+## Live runs
+
+Live runs require:
+1. The `codex` CLI on PATH: `npm install -g @openai/codex`
+2. `OPENAI_API_KEY` set in the environment.
+3. A running Temporal server.
+
+## Running offline unit tests
+
+```bash
+cd /path/to/scale-agentex-python
+uv run --all-packages --all-extras pytest examples/tutorials/10_async/10_temporal/150_codex/tests/test_agent.py -q
+```
+
+## Running live integration tests
+
+```bash
+export CODEX_LIVE_TESTS=1
+export OPENAI_API_KEY=sk-...
+pytest tests/test_agent.py -v
+```
diff --git a/examples/tutorials/10_async/10_temporal/150_codex/conftest.py b/examples/tutorials/10_async/10_temporal/150_codex/conftest.py
new file mode 100644
index 000000000..6370f278d
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/150_codex/conftest.py
@@ -0,0 +1,17 @@
+"""Add the agent's project root to sys.path so ``import project`` works.
+
+Also sets minimal environment variables so FastACP, tracing, and the
+Temporal workflow module can be imported without a running server.
+"""
+
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+# AGENT_NAME must match the manifest's agent name: the live test queries the
+# server by this name, and project.workflow reads it at import time.
+os.environ.setdefault("AGENT_NAME", "at150-codex")
+os.environ.setdefault("ACP_URL", "http://localhost:8000")
+os.environ.setdefault("WORKFLOW_NAME", "at150-codex")
+os.environ.setdefault("WORKFLOW_TASK_QUEUE", "at150_codex_queue")
diff --git a/examples/tutorials/10_async/10_temporal/150_codex/manifest.yaml b/examples/tutorials/10_async/10_temporal/150_codex/manifest.yaml
new file mode 100644
index 000000000..d64bdfad0
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/150_codex/manifest.yaml
@@ -0,0 +1,62 @@
+build:
+  context:
+    root: ../../../
+    include_paths:
+      - 10_async/10_temporal/150_codex
+      - test_utils
+    dockerfile: 10_async/10_temporal/150_codex/Dockerfile
+    dockerignore: 10_async/10_temporal/150_codex/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+    worker: project/run_worker.py
+
+agent:
+  acp_type: async
+  name: at150-codex
+  description: Temporal tutorial agent driving the unified harness surface via local codex CLI subprocess
+
+  temporal:
+    enabled: true
+    workflows:
+      - name: at150-codex
+        queue_name: at150_codex_queue
+
+  credentials:
+    - env_var_name: OPENAI_API_KEY
+      secret_name: openai-api-key
+      secret_key: api-key
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "at150-codex"
+      description: "Temporal tutorial agent driving the unified harness surface via local codex CLI subprocess"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/10_async/10_temporal/150_codex/project/__init__.py b/examples/tutorials/10_async/10_temporal/150_codex/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/10_async/10_temporal/150_codex/project/acp.py b/examples/tutorials/10_async/10_temporal/150_codex/project/acp.py
new file mode 100644
index 000000000..39a81dde9
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/150_codex/project/acp.py
@@ -0,0 +1,32 @@
+"""ACP server for the Temporal Codex harness tutorial.
+
+This file is intentionally thin. When ``acp_type="async"`` is combined with
+``TemporalACPConfig(type="temporal", ...)``, FastACP auto-wires:
+
+    HTTP task/create       -> @workflow.run on the workflow class
+    HTTP task/event/send   -> @workflow.signal(SignalName.RECEIVE_EVENT)
+    HTTP task/cancel       -> workflow cancellation via the Temporal client
+
+so we don't define any handlers here. The actual agent code lives in
+``project/workflow.py`` and is executed by the Temporal worker
+(``project/run_worker.py``), not by this HTTP process.
+"""
+
+from __future__ import annotations
+
+import os
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from agentex.lib.types.fastacp import TemporalACPConfig
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+
+acp = FastACP.create(
+    acp_type="async",
+    config=TemporalACPConfig(
+        type="temporal",
+        temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"),
+    ),
+)
diff --git a/examples/tutorials/10_async/10_temporal/150_codex/project/activities.py b/examples/tutorials/10_async/10_temporal/150_codex/project/activities.py
new file mode 100644
index 000000000..363347635
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/150_codex/project/activities.py
@@ -0,0 +1,145 @@
+"""Temporal activity for the Codex harness tutorial.
+
+Subprocess spawning (and any other I/O) must run inside a Temporal *activity*,
+not in workflow code. Temporal runs workflow + signal-handler bodies on a
+deterministic sandbox event loop that does not implement ``subprocess_exec``
+(or threads / sockets), so spawning ``codex exec`` directly in the signal
+handler raises ``NotImplementedError``. This activity runs codex, drives the
+``CodexTurn`` through ``UnifiedEmitter.auto_send_turn`` (the async Redis push
+path), and returns the turn result to the workflow.
+
+The ``_spawn_codex`` / ``_process_stdout`` seams are injectable: offline tests
+replace them with fakes that yield pre-recorded event lines so no real CLI
+runs.
+"""
+
+from __future__ import annotations
+
+import os
+import codecs
+import asyncio
+from typing import Any
+from datetime import datetime
+from collections.abc import AsyncIterator
+
+from temporalio import activity
+
+from agentex.lib.adk import CodexTurn
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.utils.model_utils import BaseModel
+
+logger = make_logger(__name__)
+
+RUN_CODEX_TURN_ACTIVITY = "run_codex_turn"
+
+
+class RunCodexTurnParams(BaseModel):
+    """Arguments for one codex turn run inside an activity."""
+
+    task_id: str
+    prompt: str
+    model: str
+    trace_id: str | None = None
+    parent_span_id: str | None = None
+    thread_id: str | None = None
+    created_at: datetime | None = None
+
+
+class RunCodexTurnResult(BaseModel):
+    """Result returned from the activity to the workflow."""
+
+    final_text: str
+    session_id: str | None = None
+    model: str | None = None
+
+
+async def _spawn_codex(
+    model: str,
+    thread_id: str | None = None,
+) -> asyncio.subprocess.Process:
+    """Spawn ``codex exec --json`` locally and return the live process.
+
+    Injection seam: tests replace this function with a fake that returns a
+    mock process whose stdout yields pre-recorded event lines.
+
+    The caller writes the prompt to stdin after the process starts, then
+    closes stdin so codex knows input is complete.
+    """
+    base_flags = [
+        "--json",
+        "--skip-git-repo-check",
+        "--dangerously-bypass-approvals-and-sandbox",
+        "--model",
+        model,
+    ]
+
+    if thread_id:
+        cmd = ["codex", "exec", *base_flags, "resume", thread_id, "-"]
+    else:
+        cmd = ["codex", "exec", *base_flags, "-"]
+
+    return await asyncio.create_subprocess_exec(
+        *cmd,
+        stdin=asyncio.subprocess.PIPE,
+        stdout=asyncio.subprocess.PIPE,
+        # Discard stderr: codex --json writes events to stdout; its stderr is
+        # progress/debug noise. Capturing it with PIPE but never reading it
+        # would deadlock once codex fills the OS pipe buffer (~64 KB).
+        stderr=asyncio.subprocess.DEVNULL,
+        env={**os.environ},
+    )
+
+
+async def _process_stdout(process: asyncio.subprocess.Process) -> AsyncIterator[str]:
+    """Yield newline-delimited JSON lines from the process stdout.
+
+    Uses an incremental UTF-8 decoder so a multibyte character split across two
+    4 KB reads is decoded correctly instead of being corrupted at the boundary.
+    """
+    assert process.stdout is not None
+    decoder = codecs.getincrementaldecoder("utf-8")(errors="replace")
+    buffer = ""
+    while True:
+        chunk = await process.stdout.read(4096)
+        if not chunk:
+            break
+        buffer += decoder.decode(chunk)
+        while "\n" in buffer:
+            line, buffer = buffer.split("\n", 1)
+            line = line.strip()
+            if line:
+                yield line
+    buffer += decoder.decode(b"", final=True)
+    if buffer.strip():
+        yield buffer.strip()
+
+
+@activity.defn(name=RUN_CODEX_TURN_ACTIVITY)
+async def run_codex_turn(params: RunCodexTurnParams) -> dict[str, Any]:
+    """Run one codex turn end-to-end and stream events to the task.
+
+    Runs in an activity (real asyncio loop) so subprocess I/O is permitted.
+    """
+    process = await _spawn_codex(params.model, thread_id=params.thread_id)
+
+    assert process.stdin is not None
+    process.stdin.write(params.prompt.encode("utf-8"))
+    await process.stdin.drain()
+    process.stdin.close()
+
+    turn = CodexTurn(events=_process_stdout(process), model=params.model)
+    emitter = UnifiedEmitter(
+        task_id=params.task_id,
+        trace_id=params.trace_id,
+        parent_span_id=params.parent_span_id,
+    )
+    result = await emitter.auto_send_turn(turn, created_at=params.created_at)
+
+    await process.wait()
+
+    return RunCodexTurnResult(
+        final_text=result.final_text,
+        session_id=turn.session_id,
+        model=turn.usage().model,
+    ).model_dump()
diff --git a/examples/tutorials/10_async/10_temporal/150_codex/project/run_worker.py b/examples/tutorials/10_async/10_temporal/150_codex/project/run_worker.py
new file mode 100644
index 000000000..b8972806b
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/150_codex/project/run_worker.py
@@ -0,0 +1,41 @@
+"""Temporal worker for the Codex harness tutorial.
+
+Run as a separate long-lived process alongside the ACP HTTP server. The
+worker polls Temporal for workflow + activity tasks and executes them.
+
+The codex CLI subprocess runs in the ``run_codex_turn`` activity (registered
+below alongside the built-in Agentex activities), because subprocess I/O is not
+permitted on the Temporal workflow event loop.
+"""
+
+import asyncio
+
+from project.workflow import AtHarnessCodexWorkflow
+from project.activities import run_codex_turn
+from agentex.lib.utils.debug import setup_debug_if_enabled
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.environment_variables import EnvironmentVariables
+from agentex.lib.core.temporal.activities import get_all_activities
+from agentex.lib.core.temporal.workers.worker import AgentexWorker
+
+environment_variables = EnvironmentVariables.refresh()
+logger = make_logger(__name__)
+
+
+async def main():
+    setup_debug_if_enabled()
+
+    task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE
+    if task_queue_name is None:
+        raise ValueError("WORKFLOW_TASK_QUEUE is not set")
+
+    worker = AgentexWorker(task_queue=task_queue_name)
+
+    await worker.run(
+        activities=[run_codex_turn, *get_all_activities()],
+        workflow=AtHarnessCodexWorkflow,
+    )
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/tutorials/10_async/10_temporal/150_codex/project/workflow.py b/examples/tutorials/10_async/10_temporal/150_codex/project/workflow.py
new file mode 100644
index 000000000..1970b478f
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/150_codex/project/workflow.py
@@ -0,0 +1,145 @@
+"""Temporal workflow for the Codex harness tutorial.
+
+Demonstrates the ``convert_codex_to_agentex_events`` tap + ``CodexTurn`` +
+``UnifiedEmitter`` for a Temporal-durable ACP agent.
+
+KEY CONCEPTS DEMONSTRATED:
+- Running ``codex exec --json`` in the ``run_codex_turn`` activity. Subprocess
+  I/O is not permitted on the Temporal workflow event loop (the deterministic
+  sandbox loop does not implement ``subprocess_exec``), so the signal handler
+  delegates the turn to an activity, which also gets Temporal's retry + timeout
+  guarantees.
+- Wrapping the stdout line stream in a ``CodexTurn`` (inside the activity).
+- Delivering events via ``UnifiedEmitter.auto_send_turn``, which pushes
+  ``StreamTaskMessage*`` events to Redis so the UI sees tokens in real time.
+- Passing ``created_at=workflow.now()`` for deterministic timestamps under
+  Temporal replay (required for Temporal-safe delivery).
+- Persisting the codex thread ID on the workflow instance itself — Temporal's
+  workflow state is durable, so no external ``adk.state`` round-trip is needed.
+"""
+
+from __future__ import annotations
+
+import os
+from datetime import timedelta
+
+from temporalio import workflow
+
+from agentex.lib import adk
+from agentex.lib.types.acp import SendEventParams, CreateTaskParams
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.types.text_content import TextContent
+from agentex.lib.environment_variables import EnvironmentVariables
+from agentex.lib.core.temporal.types.workflow import SignalName
+from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+with workflow.unsafe.imports_passed_through():
+    from project.activities import RunCodexTurnParams, run_codex_turn
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+environment_variables = EnvironmentVariables.refresh()
+
+if environment_variables.WORKFLOW_NAME is None:
+    raise ValueError("Environment variable WORKFLOW_NAME is not set")
+if environment_variables.AGENT_NAME is None:
+    raise ValueError("Environment variable AGENT_NAME is not set")
+
+logger = make_logger(__name__)
+
+MODEL = os.environ.get("CODEX_MODEL", "o4-mini")
+
+
+@workflow.defn(name=environment_variables.WORKFLOW_NAME)
+class AtHarnessCodexWorkflow(BaseWorkflow):
+    """Long-running Temporal workflow that runs codex exec for each turn.
+
+    Conversation state (codex thread ID + turn counter) is kept on the
+    workflow instance. Temporal's durable replay reconstructs this state if
+    the worker crashes, so no external ``adk.state`` round-trip is needed.
+    """
+
+    def __init__(self):
+        super().__init__(display_name=environment_variables.AGENT_NAME)
+        self._complete_task = False
+        self._turn_number = 0
+        self._codex_thread_id: str | None = None
+
+    @workflow.signal(name=SignalName.RECEIVE_EVENT)
+    async def on_task_event_send(self, params: SendEventParams) -> None:
+        """Handle a new user message: spawn codex, stream events via UnifiedEmitter."""
+        logger.info("Received task event: %s", params.task.id)
+        self._turn_number += 1
+
+        await adk.messages.create(task_id=params.task.id, content=params.event.content)
+
+        user_message = params.event.content.content
+
+        async with adk.tracing.span(
+            trace_id=params.task.id,
+            task_id=params.task.id,
+            name=f"Turn {self._turn_number}",
+            input={"message": user_message},
+        ) as span:
+            # Delegate the subprocess turn to an activity: subprocess I/O is not
+            # permitted on the Temporal workflow event loop. The activity streams
+            # events to the task and returns the final text + codex thread id.
+            # workflow.now() gives a deterministic timestamp under replay.
+            result = await workflow.execute_activity(
+                run_codex_turn,
+                RunCodexTurnParams(
+                    task_id=params.task.id,
+                    prompt=user_message,
+                    model=MODEL,
+                    trace_id=params.task.id,
+                    parent_span_id=span.id if span else None,
+                    thread_id=self._codex_thread_id,
+                    created_at=workflow.now(),
+                ),
+                start_to_close_timeout=timedelta(minutes=5),
+            )
+
+            # Persist the codex thread id so the next turn resumes the session.
+            session_id = result.get("session_id")
+            if session_id:
+                self._codex_thread_id = session_id
+
+            if span:
+                span.output = {
+                    "final_text": result.get("final_text"),
+                    "model": result.get("model"),
+                }
+
+    @workflow.run
+    async def on_task_create(self, params: CreateTaskParams) -> str:
+        """Workflow entry point — keep the conversation alive for incoming signals."""
+        logger.info("Task created: %s", params.task.id)
+
+        await adk.messages.create(
+            task_id=params.task.id,
+            content=TextContent(
+                author="agent",
+                content=(
+                    f"Task initialized.\n"
+                    f"Send me a message and I'll run codex (local subprocess) "
+                    f"to answer, streaming events via the unified harness surface."
+                ),
+            ),
+        )
+
+        await workflow.wait_condition(lambda: self._complete_task, timeout=None)
+        return "Task completed"
+
+    @workflow.signal
+    async def complete_task_signal(self) -> None:
+        """Graceful workflow shutdown signal."""
+        logger.info("Received complete_task signal")
+        self._complete_task = True
diff --git a/examples/tutorials/10_async/10_temporal/150_codex/pyproject.toml b/examples/tutorials/10_async/10_temporal/150_codex/pyproject.toml
new file mode 100644
index 000000000..7e1d6250f
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/150_codex/pyproject.toml
@@ -0,0 +1,40 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "at150-codex"
+version = "0.1.0"
+description = "Temporal tutorial agent driving the unified harness surface via local codex CLI subprocess"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+    "temporalio>=1.18.2",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+    "black",
+    "isort",
+    "flake8",
+    "debugpy>=1.8.15",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
+
+[tool.black]
+line-length = 88
+target-version = ['py312']
+
+[tool.isort]
+profile = "black"
+line_length = 88
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
diff --git a/examples/tutorials/10_async/10_temporal/150_codex/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/150_codex/tests/test_agent.py
new file mode 100644
index 000000000..fa6c66083
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/150_codex/tests/test_agent.py
@@ -0,0 +1,275 @@
+"""Tests for the Temporal Codex harness tutorial agent.
+
+LIVE tests (``TestLiveCodexAgent``):
+  - Require the ``codex`` CLI on PATH, ``OPENAI_API_KEY``, and a running
+    Temporal + Agentex server.
+  - Skipped automatically when ``CODEX_LIVE_TESTS`` is not set to ``1``.
+
+OFFLINE unit tests (``TestOfflineCodexWorkflow``):
+  - Inject a fake async iterator of pre-recorded codex event lines.
+  - Assert the signal handler drives ``UnifiedEmitter.auto_send_turn`` and
+    captures the codex thread ID on the workflow instance.
+  - Always run.
+"""
+
+from __future__ import annotations
+
+import os
+import json
+from typing import Any
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Shared helpers
+# ---------------------------------------------------------------------------
+
+SAMPLE_EVENTS: list[dict[str, Any]] = [
+    {"type": "thread.started", "thread_id": "thread-temporal-1"},
+    {"type": "turn.started"},
+    {
+        "type": "item.started",
+        "item": {"id": "msg-t1", "type": "agent_message", "text": "Hello"},
+    },
+    {
+        "type": "item.completed",
+        "item": {"id": "msg-t1", "type": "agent_message", "text": "Hello from Temporal!"},
+    },
+    {
+        "type": "turn.completed",
+        "usage": {"input_tokens": 6, "output_tokens": 3, "total_tokens": 9},
+    },
+]
+
+
+async def _fake_event_stream():
+    """Async iterator of pre-recorded codex event JSON lines (no subprocess)."""
+    for evt in SAMPLE_EVENTS:
+        yield json.dumps(evt)
+
+
+class _FakeSpan:
+    id = "span-temporal-1"
+    output: Any = None
+
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, *a):
+        pass
+
+
+class TestOfflineCodexWorkflow:
+    """Unit tests that run without a real codex CLI, Temporal, or network."""
+
+    @pytest.mark.asyncio
+    async def test_codex_turn_usage_with_temporal_events(self):
+        """CodexTurn.usage() is correct after exhausting the temporal sample events."""
+        from agentex.lib.adk import CodexTurn
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+
+        _ = [e async for e in turn.events]
+
+        usage = turn.usage()
+        assert usage.input_tokens == 6
+        assert usage.output_tokens == 3
+        assert usage.model == "o4-mini"
+
+    @pytest.mark.asyncio
+    async def test_unified_emitter_auto_send_with_created_at(self):
+        """UnifiedEmitter.auto_send_turn accepts created_at=None without error."""
+        from agentex.lib.adk import CodexTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+        from agentex.types.task_message import TaskMessage
+        from agentex.types.text_content import TextContent
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+
+        real_task_msg = TaskMessage(
+            id="msg-fake",
+            task_id="t",
+            content=TextContent(type="text", author="agent", content=""),
+        )
+
+        fake_streaming = MagicMock()
+        fake_ctx = AsyncMock()
+        fake_ctx.__aenter__ = AsyncMock(return_value=fake_ctx)
+        fake_ctx.__aexit__ = AsyncMock(return_value=False)
+        fake_ctx.stream_update = AsyncMock(return_value=MagicMock())
+        fake_ctx.close = AsyncMock()
+        fake_ctx.task_message = real_task_msg
+        fake_streaming.streaming_task_message_context = MagicMock(return_value=fake_ctx)
+
+        emitter = UnifiedEmitter(
+            task_id="t",
+            trace_id=None,
+            parent_span_id=None,
+            streaming=fake_streaming,
+        )
+
+        result = await emitter.auto_send_turn(turn, created_at=None)
+        assert result is not None
+
+    @pytest.mark.asyncio
+    async def test_thread_id_captured_after_exhausted_stream(self):
+        """CodexTurn._result captures the thread_id from thread.started."""
+        from agentex.lib.adk import CodexTurn
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+        _ = [e async for e in turn.events]
+
+        assert turn._result is not None
+        assert turn._result["session_id"] == "thread-temporal-1"
+
+    @pytest.mark.asyncio
+    async def test_signal_handler_delegates_to_activity_and_captures_thread_id(self):
+        """Signal handler runs the turn via execute_activity, increments the turn
+        counter, and captures the codex thread ID returned by the activity."""
+        captured: dict[str, Any] = {}
+
+        async def _fake_execute_activity(_activity, params, **_kw):
+            captured["params"] = params
+            return {
+                "session_id": "thread-temporal-1",
+                "final_text": "Hello from Temporal!",
+                "model": "o4-mini",
+            }
+
+        with patch("project.workflow.adk.messages.create", new=AsyncMock()), patch(
+            "project.workflow.adk.tracing.span"
+        ) as mock_span, patch(
+            "project.workflow.workflow.execute_activity", new=_fake_execute_activity
+        ), patch("project.workflow.workflow.now", return_value=None):
+            mock_span.return_value = _FakeSpan()
+
+            from project.workflow import AtHarnessCodexWorkflow
+
+            wf = AtHarnessCodexWorkflow.__new__(AtHarnessCodexWorkflow)
+            wf._turn_number = 0
+            wf._codex_thread_id = None
+            wf._complete_task = False
+            wf._display_name = "test"
+
+            params = MagicMock()
+            params.task.id = "task-temporal-offline-1"
+            params.event.content.content = "say hello temporal"
+
+            await wf.on_task_event_send(params)
+
+        assert wf._turn_number == 1
+        assert wf._codex_thread_id == "thread-temporal-1"
+        assert captured["params"].prompt == "say hello temporal"
+        assert captured["params"].thread_id is None
+
+    @pytest.mark.asyncio
+    async def test_run_codex_turn_activity_streams_and_returns_thread_id(self):
+        """The run_codex_turn activity drives the turn and returns the thread id."""
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        async def _fake_spawn(model, thread_id=None):  # noqa: ARG001
+            fake_stdin = MagicMock()
+            fake_stdin.write = MagicMock()
+            fake_stdin.drain = AsyncMock()
+            fake_stdin.close = MagicMock()
+            proc = MagicMock()
+            proc.stdin = fake_stdin
+            proc.wait = AsyncMock(return_value=0)
+            return proc
+
+        async def _fake_process_stdout(_process):  # noqa: ARG001
+            for evt in SAMPLE_EVENTS:
+                yield json.dumps(evt)
+
+        class _FakeTurnResult:
+            final_text = "Hello from Temporal!"
+
+        async def _auto_send(_self, turn, *_a, **_kw):
+            async for _ in turn.events:
+                pass
+            return _FakeTurnResult()
+
+        with patch("project.activities._spawn_codex", new=_fake_spawn), patch(
+            "project.activities._process_stdout", new=_fake_process_stdout
+        ), patch.object(UnifiedEmitter, "auto_send_turn", new=_auto_send):
+            from project.activities import RunCodexTurnParams, run_codex_turn
+
+            result = await run_codex_turn(
+                RunCodexTurnParams(
+                    task_id="task-temporal-offline-1",
+                    prompt="say hello temporal",
+                    model="o4-mini",
+                )
+            )
+
+        assert result["session_id"] == "thread-temporal-1"
+        assert result["final_text"] == "Hello from Temporal!"
+
+
+# ---------------------------------------------------------------------------
+# Live tests
+# ---------------------------------------------------------------------------
+
+LIVE = os.environ.get("CODEX_LIVE_TESTS", "") == "1"
+AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
+AGENT_NAME = os.environ.get("AGENT_NAME", "at150-codex")
+
+
+@pytest.mark.skipif(
+    not LIVE,
+    reason="Set CODEX_LIVE_TESTS=1 and ensure codex CLI + OPENAI_API_KEY + Temporal are available",
+)
+class TestLiveCodexAgent:
+    """End-to-end tests that require the real codex CLI, Temporal, and Agentex server."""
+
+    @pytest.fixture
+    def client(self):
+        from agentex import Agentex
+
+        return Agentex(base_url=AGENTEX_API_BASE_URL)
+
+    @pytest.fixture
+    def agent_id(self, client):
+        for agent in client.agents.list():
+            if agent.name == AGENT_NAME:
+                return agent.id
+        raise ValueError(f"Agent {AGENT_NAME!r} not found.")
+
+    def test_send_simple_message(self, client, agent_id: str):
+        """Temporal agents process events out of band, so create a task, send an
+        event, and poll the task's messages for the agent's response."""
+        import time
+        import uuid
+
+        from agentex.types import TextContentParam
+        from agentex.types.agent_rpc_params import ParamsSendEventRequest, ParamsCreateTaskRequest
+
+        task = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)).result
+        assert task is not None
+
+        client.agents.send_event(
+            agent_id=agent_id,
+            params=ParamsSendEventRequest(
+                task_id=task.id,
+                content=TextContentParam(
+                    author="user",
+                    content="What is 5+5? Reply with just the number.",
+                    type="text",
+                ),
+            ),
+        )
+
+        deadline = time.monotonic() + 90
+        while time.monotonic() < deadline:
+            msgs = client.messages.list(task_id=task.id)
+            agent_msgs = [m for m in msgs if getattr(m.content, "author", None) == "agent"]
+            response_msgs = [
+                m for m in agent_msgs if "Task initialized" not in str(getattr(m.content, "content", ""))
+            ]
+            if response_msgs:
+                assert len(response_msgs) >= 1
+                return
+            time.sleep(3)
+
+        raise AssertionError("No agent response received within 90 s")
diff --git a/pyproject.toml b/pyproject.toml
index 98134d993..7ee0cf56b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,7 +3,7 @@
 # overlay (formerly `src/agentex/lib/*`) now lives in `adk/` and ships
 # as the sibling `agentex-sdk` package — see `adk/pyproject.toml`.
 name = "agentex-client"
-version = "0.14.0"
+version = "0.15.0"
 description = "The official Python REST client for the Agentex API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/src/agentex/_client.py b/src/agentex/_client.py
index 1be05b767..b52ae6b78 100644
--- a/src/agentex/_client.py
+++ b/src/agentex/_client.py
@@ -35,12 +35,24 @@
 )
 
 if TYPE_CHECKING:
-    from .resources import spans, tasks, agents, events, states, tracker, messages, checkpoints, deployment_history
+    from .resources import (
+        spans,
+        tasks,
+        agents,
+        events,
+        states,
+        tracker,
+        messages,
+        webhooks,
+        checkpoints,
+        deployment_history,
+    )
     from .resources.spans import SpansResource, AsyncSpansResource
     from .resources.tasks import TasksResource, AsyncTasksResource
     from .resources.events import EventsResource, AsyncEventsResource
     from .resources.states import StatesResource, AsyncStatesResource
     from .resources.tracker import TrackerResource, AsyncTrackerResource
+    from .resources.webhooks import WebhooksResource, AsyncWebhooksResource
     from .resources.checkpoints import CheckpointsResource, AsyncCheckpointsResource
     from .resources.agents.agents import AgentsResource, AsyncAgentsResource
     from .resources.messages.messages import MessagesResource, AsyncMessagesResource
@@ -202,6 +214,12 @@ def checkpoints(self) -> CheckpointsResource:
 
         return CheckpointsResource(self)
 
+    @cached_property
+    def webhooks(self) -> WebhooksResource:
+        from .resources.webhooks import WebhooksResource
+
+        return WebhooksResource(self)
+
     @cached_property
     def with_raw_response(self) -> AgentexWithRawResponse:
         return AgentexWithRawResponse(self)
@@ -457,6 +475,12 @@ def checkpoints(self) -> AsyncCheckpointsResource:
 
         return AsyncCheckpointsResource(self)
 
+    @cached_property
+    def webhooks(self) -> AsyncWebhooksResource:
+        from .resources.webhooks import AsyncWebhooksResource
+
+        return AsyncWebhooksResource(self)
+
     @cached_property
     def with_raw_response(self) -> AsyncAgentexWithRawResponse:
         return AsyncAgentexWithRawResponse(self)
@@ -634,6 +658,12 @@ def checkpoints(self) -> checkpoints.CheckpointsResourceWithRawResponse:
 
         return CheckpointsResourceWithRawResponse(self._client.checkpoints)
 
+    @cached_property
+    def webhooks(self) -> webhooks.WebhooksResourceWithRawResponse:
+        from .resources.webhooks import WebhooksResourceWithRawResponse
+
+        return WebhooksResourceWithRawResponse(self._client.webhooks)
+
 
 class AsyncAgentexWithRawResponse:
     _client: AsyncAgentex
@@ -695,6 +725,12 @@ def checkpoints(self) -> checkpoints.AsyncCheckpointsResourceWithRawResponse:
 
         return AsyncCheckpointsResourceWithRawResponse(self._client.checkpoints)
 
+    @cached_property
+    def webhooks(self) -> webhooks.AsyncWebhooksResourceWithRawResponse:
+        from .resources.webhooks import AsyncWebhooksResourceWithRawResponse
+
+        return AsyncWebhooksResourceWithRawResponse(self._client.webhooks)
+
 
 class AgentexWithStreamedResponse:
     _client: Agentex
@@ -756,6 +792,12 @@ def checkpoints(self) -> checkpoints.CheckpointsResourceWithStreamingResponse:
 
         return CheckpointsResourceWithStreamingResponse(self._client.checkpoints)
 
+    @cached_property
+    def webhooks(self) -> webhooks.WebhooksResourceWithStreamingResponse:
+        from .resources.webhooks import WebhooksResourceWithStreamingResponse
+
+        return WebhooksResourceWithStreamingResponse(self._client.webhooks)
+
 
 class AsyncAgentexWithStreamedResponse:
     _client: AsyncAgentex
@@ -817,6 +859,12 @@ def checkpoints(self) -> checkpoints.AsyncCheckpointsResourceWithStreamingRespon
 
         return AsyncCheckpointsResourceWithStreamingResponse(self._client.checkpoints)
 
+    @cached_property
+    def webhooks(self) -> webhooks.AsyncWebhooksResourceWithStreamingResponse:
+        from .resources.webhooks import AsyncWebhooksResourceWithStreamingResponse
+
+        return AsyncWebhooksResourceWithStreamingResponse(self._client.webhooks)
+
 
 Client = Agentex
 
diff --git a/src/agentex/_version.py b/src/agentex/_version.py
index 551c0dbac..c567e168b 100644
--- a/src/agentex/_version.py
+++ b/src/agentex/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "agentex"
-__version__ = "0.14.0"  # x-release-please-version
+__version__ = "0.15.0"  # x-release-please-version
diff --git a/src/agentex/lib/adk/__init__.py b/src/agentex/lib/adk/__init__.py
index a08131260..e618a20d3 100644
--- a/src/agentex/lib/adk/__init__.py
+++ b/src/agentex/lib/adk/__init__.py
@@ -10,9 +10,18 @@
 from agentex.lib.adk._modules._langgraph_async import stream_langgraph_events
 from agentex.lib.adk._modules._langgraph_messages import emit_langgraph_messages
 from agentex.lib.adk._modules._langgraph_sync import convert_langgraph_to_agentex_events
+from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn
 from agentex.lib.adk._modules._pydantic_ai_async import stream_pydantic_ai_events
 from agentex.lib.adk._modules._pydantic_ai_sync import convert_pydantic_ai_to_agentex_events
 from agentex.lib.adk._modules._pydantic_ai_tracing import create_pydantic_ai_tracing_handler
+from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
+from agentex.lib.adk._modules._claude_code_sync import convert_claude_code_to_agentex_events
+from agentex.lib.adk._modules._claude_code_turn import (
+    ClaudeCodeTurn,
+    claude_code_usage_to_turn_usage,
+)
+from agentex.lib.adk._modules._codex_sync import convert_codex_to_agentex_events
+from agentex.lib.adk._modules._codex_turn import CodexTurn, codex_usage_to_turn_usage
 from agentex.lib.adk._modules.events import EventsModule
 from agentex.lib.adk._modules.messages import MessagesModule
 from agentex.lib.adk._modules.state import StateModule
@@ -20,6 +29,19 @@
 from agentex.lib.adk._modules.tasks import TasksModule
 from agentex.lib.adk._modules.tracing import TracingModule
 
+# Unified harness surface (AGX1-375)
+from agentex.lib.core.harness import (
+    UnifiedEmitter,
+    SpanTracer,
+    OpenSpan,
+    CloseSpan,
+    SpanSignal,
+    StreamTaskMessage,
+    TurnUsage,
+    TurnResult,
+    HarnessTurn,
+)
+
 from agentex.lib.adk import providers
 from agentex.lib.adk import utils
 
@@ -50,10 +72,30 @@
     "stream_langgraph_events",
     "emit_langgraph_messages",
     "convert_langgraph_to_agentex_events",
+    "LangGraphTurn",
     # Pydantic AI
     "stream_pydantic_ai_events",
     "convert_pydantic_ai_to_agentex_events",
     "create_pydantic_ai_tracing_handler",
+    "PydanticAITurn",
+    # Claude Code
+    "convert_claude_code_to_agentex_events",
+    "ClaudeCodeTurn",
+    "claude_code_usage_to_turn_usage",
+    # Codex
+    "convert_codex_to_agentex_events",
+    "CodexTurn",
+    "codex_usage_to_turn_usage",
+    # Unified harness surface (AGX1-375)
+    "UnifiedEmitter",
+    "SpanTracer",
+    "OpenSpan",
+    "CloseSpan",
+    "SpanSignal",
+    "StreamTaskMessage",
+    "TurnUsage",
+    "TurnResult",
+    "HarnessTurn",
     # Providers
     "providers",
     # Utils
diff --git a/src/agentex/lib/adk/_modules/_claude_code_sync.py b/src/agentex/lib/adk/_modules/_claude_code_sync.py
new file mode 100644
index 000000000..4e25503cf
--- /dev/null
+++ b/src/agentex/lib/adk/_modules/_claude_code_sync.py
@@ -0,0 +1,378 @@
+"""Claude Code stream-json parser tap for the unified harness surface.
+
+Converts the newline-delimited JSON envelopes emitted by
+``claude -p --output-format stream-json`` into the canonical
+``StreamTaskMessage*`` stream consumed by the Agentex harness.
+
+Envelope → canonical mapping
+-----------------------------
+system/init
+    Ignored at this layer (session_id tracking is a provider concern).
+
+assistant / user  (content blocks)
+    text block           → Start(TextContent) + Delta(TextDelta)* + Done
+    thinking block       → Start(ReasoningContent) + Delta(ReasoningContentDelta)* + Done
+    tool_use block       → Start(ToolRequestContent) + Done   (Full args in Start content)
+    tool_result block    → Full(ToolResponseContent)
+
+stream_event / content_block_start
+    type=text            → Start(TextContent, empty)
+    type=thinking        → Start(ReasoningContent, empty)
+
+stream_event / content_block_delta
+    type=text_delta      → Delta(TextDelta)
+    type=thinking_delta  → Delta(ReasoningContentDelta)
+
+stream_event / content_block_stop
+    (text open)          → Done
+    (thinking open)      → Done  (full text known here; update Full via Full event first)
+
+result
+    Fires ``on_result`` with the raw envelope so the caller can capture
+    usage and cost. No StreamTaskMessage is emitted for the result itself.
+
+Out of scope
+------------
+No deployable test agent is provided. claude-code requires the golden
+agent's sandbox/subprocess/secret/MCP orchestration to produce the stream.
+Live coverage is the golden agent, which will adopt this tap. Do NOT add an
+examples/ agent or CI live-matrix row for claude-code.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any, Callable, Awaitable, AsyncIterator
+
+from agentex.lib.utils.logging import make_logger
+from agentex.types.text_content import TextContent
+from agentex.types.reasoning_content import ReasoningContent
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.types.reasoning_content_delta import ReasoningContentDelta
+
+logger = make_logger(__name__)
+
+_MAX_RESULT_LENGTH = 4000
+
+
+def _truncate(text: str) -> str:
+    return str(text)[:_MAX_RESULT_LENGTH]
+
+
+def _extract_summary(text: str, max_len: int = 300) -> str:
+    return text.strip().split("\n", 1)[0][:max_len]
+
+
+async def convert_claude_code_to_agentex_events(
+    lines: AsyncIterator[str | dict[str, Any]],
+    on_result: Callable[[dict[str, Any]], Awaitable[None]] | None = None,
+) -> AsyncIterator[StreamTaskMessageStart | StreamTaskMessageDelta | StreamTaskMessageFull | StreamTaskMessageDone]:
+    """Convert a claude-code ``stream-json`` line stream into Agentex ``StreamTaskMessage*`` events.
+
+    Each item in ``lines`` is either a raw JSON string (as read from the CLI's
+    stdout) or an already-parsed dict. Empty strings are skipped; unparseable
+    JSON is logged and skipped.
+
+    ``on_result`` is called with the ``result`` envelope when it arrives so the
+    caller can capture usage and cost. It is awaited before the generator
+    continues. When ``None``, the result envelope is silently dropped.
+
+    Envelope → canonical mapping is documented in this module's docstring.
+    """
+    next_index = 0
+    tool_call_count = 0
+
+    # Streaming state for content_block_start / content_block_delta /
+    # content_block_stop triples.
+    _thinking_open = False
+    _thinking_buf = ""
+    _thinking_index: int | None = None
+    _text_open = False
+    _text_buf = ""
+    _text_index: int | None = None
+    # Track which assistant-message block indices were already streamed via
+    # stream_event triples. Those blocks must not be re-emitted when the full
+    # assistant message arrives. Reset at each message boundary (see below) so a
+    # later turn's block indices don't collide with an earlier turn's.
+    _streamed_block_indexes: set[int] = set()
+    # Once-guard so a thinking block's pending index is claimed on its first
+    # thinking_delta only. Reset per turn alongside _streamed_block_indexes.
+    _saw_thinking_stream = False
+    # For deferred ReasoningStarted: if a content_block_start(thinking) arrives
+    # but no thinking_delta ever follows, the final assistant block's thinking
+    # field fills the reasoning content instead.
+    _pending_thinking_block_index: int | None = None
+
+    async for raw in lines:
+        if not raw:
+            continue
+
+        if isinstance(raw, dict):
+            evt = raw
+        else:
+            line = raw.strip()
+            if not line:
+                continue
+            try:
+                evt = json.loads(line)
+            except json.JSONDecodeError:
+                logger.debug("claude-code: skipping non-JSON line: %r", line[:120])
+                continue
+
+        evt_type = evt.get("type", "")
+
+        # -----------------------------------------------------------------------
+        # assistant / user — materialised content blocks
+        # -----------------------------------------------------------------------
+        if evt_type in ("assistant", "user"):
+            msg = evt.get("message", {})
+            blocks = msg.get("content", [])
+            if not isinstance(blocks, list):
+                blocks = [blocks]
+
+            for idx, block in enumerate(blocks):
+                if not isinstance(block, dict):
+                    continue
+                block_type = block.get("type", "")
+
+                if block_type == "text":
+                    # Skip only the specific blocks already delivered via
+                    # stream_event deltas (per-block, not a turn-wide latch).
+                    if idx in _streamed_block_indexes:
+                        continue
+                    text = block.get("text", "")
+                    if text:
+                        msg_index = next_index
+                        next_index += 1
+                        yield StreamTaskMessageStart(
+                            type="start",
+                            index=msg_index,
+                            content=TextContent(
+                                type="text",
+                                author="agent",
+                                content="",
+                            ),
+                        )
+                        yield StreamTaskMessageDelta(
+                            type="delta",
+                            index=msg_index,
+                            delta=TextDelta(type="text", text_delta=text),
+                        )
+                        yield StreamTaskMessageDone(type="done", index=msg_index)
+
+                elif block_type == "thinking":
+                    # Skip only the specific blocks already delivered via
+                    # stream_event deltas (per-block, not a turn-wide latch).
+                    if idx in _streamed_block_indexes:
+                        continue
+                    thinking_text = block.get("thinking", "")
+                    if thinking_text:
+                        summary = _extract_summary(thinking_text)
+                        msg_index = next_index
+                        next_index += 1
+                        yield StreamTaskMessageStart(
+                            type="start",
+                            index=msg_index,
+                            content=ReasoningContent(
+                                type="reasoning",
+                                author="agent",
+                                summary=[summary],
+                                content=[],
+                                style="active",
+                            ),
+                        )
+                        yield StreamTaskMessageDelta(
+                            type="delta",
+                            index=msg_index,
+                            delta=ReasoningContentDelta(
+                                type="reasoning_content",
+                                content_index=0,
+                                content_delta=thinking_text,
+                            ),
+                        )
+                        yield StreamTaskMessageDone(type="done", index=msg_index)
+
+                elif block_type == "tool_use":
+                    tool_call_count += 1
+                    tool_id = block.get("id", f"tool_{tool_call_count}")
+                    name = block.get("name", "unknown")
+                    arguments = block.get("input", {})
+                    if not isinstance(arguments, dict):
+                        arguments = {}
+                    msg_index = next_index
+                    next_index += 1
+                    yield StreamTaskMessageStart(
+                        type="start",
+                        index=msg_index,
+                        content=ToolRequestContent(
+                            type="tool_request",
+                            author="agent",
+                            tool_call_id=tool_id,
+                            name=name,
+                            arguments=arguments,
+                        ),
+                    )
+                    yield StreamTaskMessageDone(type="done", index=msg_index)
+
+                elif block_type == "tool_result":
+                    tool_id = block.get("tool_use_id", "")
+                    content = block.get("content", "")
+                    is_error = block.get("is_error", False)
+                    if isinstance(content, list):
+                        content = "\n".join(b.get("text", str(b)) if isinstance(b, dict) else str(b) for b in content)
+                    result_str = _truncate(str(content))
+                    msg_index = next_index
+                    next_index += 1
+                    yield StreamTaskMessageFull(
+                        type="full",
+                        index=msg_index,
+                        content=ToolResponseContent(
+                            type="tool_response",
+                            author="agent",
+                            tool_call_id=tool_id,
+                            name="",
+                            content={"result": result_str, **({"is_error": True} if is_error else {})},
+                        ),
+                    )
+
+            # End of a materialised message: reset per-turn streaming dedup state
+            # so the next turn's stream_event indices start clean. Without this,
+            # a block index streamed in an earlier turn would linger in the set
+            # and silently drop a later turn's non-streamed block at that index.
+            _streamed_block_indexes = set()
+            _saw_thinking_stream = False
+
+        # -----------------------------------------------------------------------
+        # stream_event — incremental streaming deltas
+        # -----------------------------------------------------------------------
+        elif evt_type == "stream_event":
+            se = evt.get("event") or {}
+            se_type = se.get("type", "")
+            block_index = se.get("index")
+
+            if se_type == "content_block_start":
+                block = se.get("content_block") or {}
+                btype = block.get("type")
+
+                if btype == "thinking":
+                    _thinking_open = True
+                    _thinking_buf = ""
+                    # Defer marking the block as streamed until we actually
+                    # receive a thinking_delta. Some configurations emit a
+                    # thinking block_start but no deltas — in that case we want
+                    # the final assistant-message handler to fill the text.
+                    _pending_thinking_block_index = block_index if isinstance(block_index, int) else None
+                    msg_index = next_index
+                    next_index += 1
+                    _thinking_index = msg_index
+                    yield StreamTaskMessageStart(
+                        type="start",
+                        index=msg_index,
+                        content=ReasoningContent(
+                            type="reasoning",
+                            author="agent",
+                            summary=[],
+                            content=[],
+                            style="active",
+                        ),
+                    )
+
+                elif btype == "text":
+                    _text_open = True
+                    _text_buf = ""
+                    if isinstance(block_index, int):
+                        _streamed_block_indexes.add(block_index)
+                    msg_index = next_index
+                    next_index += 1
+                    _text_index = msg_index
+                    yield StreamTaskMessageStart(
+                        type="start",
+                        index=msg_index,
+                        content=TextContent(
+                            type="text",
+                            author="agent",
+                            content="",
+                        ),
+                    )
+
+            elif se_type == "content_block_delta":
+                delta = se.get("delta") or {}
+                dtype = delta.get("type")
+
+                if dtype == "thinking_delta":
+                    chunk = delta.get("thinking", "")
+                    if chunk and _thinking_open:
+                        if not _saw_thinking_stream:
+                            _saw_thinking_stream = True
+                            # Now mark the block as claimed so the assistant
+                            # message handler won't re-emit it.
+                            if _pending_thinking_block_index is not None:
+                                _streamed_block_indexes.add(_pending_thinking_block_index)
+                        _thinking_buf += chunk
+                        if _thinking_index is not None:
+                            yield StreamTaskMessageDelta(
+                                type="delta",
+                                index=_thinking_index,
+                                delta=ReasoningContentDelta(
+                                    type="reasoning_content",
+                                    content_index=0,
+                                    content_delta=chunk,
+                                ),
+                            )
+
+                elif dtype == "text_delta":
+                    chunk = delta.get("text", "")
+                    if chunk and _text_open:
+                        _text_buf += chunk
+                        if _text_index is not None:
+                            yield StreamTaskMessageDelta(
+                                type="delta",
+                                index=_text_index,
+                                delta=TextDelta(type="text", text_delta=chunk),
+                            )
+
+            elif se_type == "content_block_stop":
+                if _thinking_open:
+                    _thinking_open = False
+                    _thinking_buf = ""
+                    _pending_thinking_block_index = None
+                    # Reset the once-guard per thinking block: a turn can stream a
+                    # second thinking block, and without this the guard stays True,
+                    # the second block's index is never claimed, and the final
+                    # assistant envelope re-emits it (duplicate Start/Delta/Done).
+                    _saw_thinking_stream = False
+                    if _thinking_index is not None:
+                        yield StreamTaskMessageDone(type="done", index=_thinking_index)
+                    _thinking_index = None
+                elif _text_open:
+                    _text_open = False
+                    _text_buf = ""
+                    if _text_index is not None:
+                        yield StreamTaskMessageDone(type="done", index=_text_index)
+                    _text_index = None
+
+        # -----------------------------------------------------------------------
+        # system / init — session metadata (ignored at this layer)
+        # -----------------------------------------------------------------------
+        elif evt_type == "system":
+            # Session ID tracking and MCP status logging are provider concerns.
+            # This pure parser layer intentionally emits nothing for system events.
+            pass
+
+        # -----------------------------------------------------------------------
+        # result — carries usage + cost; fired to on_result, not emitted as msgs
+        # -----------------------------------------------------------------------
+        elif evt_type == "result":
+            if on_result is not None:
+                await on_result(evt)
+
+        else:
+            logger.debug("claude-code: unhandled envelope type %r", evt_type)
diff --git a/src/agentex/lib/adk/_modules/_claude_code_turn.py b/src/agentex/lib/adk/_modules/_claude_code_turn.py
new file mode 100644
index 000000000..6c052976a
--- /dev/null
+++ b/src/agentex/lib/adk/_modules/_claude_code_turn.py
@@ -0,0 +1,161 @@
+"""ClaudeCodeTurn — HarnessTurn implementation for the claude-code tap.
+
+Wraps ``convert_claude_code_to_agentex_events`` to implement the
+``HarnessTurn`` protocol: exposes ``events`` (the canonical
+``StreamTaskMessage*`` stream) and ``usage()`` (the normalised
+``TurnUsage``, populated after the stream is exhausted).
+
+Usage normalization
+-------------------
+Claude Code's ``result`` envelope carries usage under several key shapes
+depending on the CLI version. We defensive-map all known shapes:
+
+    result.usage.input_tokens        -> input_tokens
+    result.usage.output_tokens       -> output_tokens
+    result.usage.cache_read_input_tokens
+    result.usage.cache_creation_input_tokens  -> cached_input_tokens (sum)
+    result.cost_usd / result.total_cost_usd   -> cost_usd
+    result.duration_ms               -> duration_ms
+    result.num_turns                 -> num_llm_calls
+
+Real zeros are preserved; missing keys default to ``None`` (not zero) so
+downstream consumers can distinguish "not reported" from "zero".
+
+Out of scope: no deployable test agent is provided — see module docstring
+in ``_claude_code_sync.py``.
+"""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+
+from agentex.lib.core.harness.types import TurnUsage, HarnessTurn, StreamTaskMessage
+from agentex.lib.adk._modules._claude_code_sync import convert_claude_code_to_agentex_events
+
+
+def claude_code_usage_to_turn_usage(result_envelope: dict[str, Any]) -> TurnUsage:
+    """Map a claude-code ``result`` envelope to a canonical ``TurnUsage``.
+
+    Defensively handles missing / None values. Real zeros are preserved.
+    ``cost_usd`` checks both ``cost_usd`` and ``total_cost_usd`` keys (the
+    CLI has used both across versions).
+    ``cached_input_tokens`` accumulates cache_read and cache_creation counts
+    since both represent tokens served from the prompt cache.
+    """
+    usage_raw: dict[str, Any] = result_envelope.get("usage") or {}
+
+    def _int(d: dict[str, Any], key: str) -> int | None:
+        v = d.get(key)
+        if v is None:
+            return None
+        try:
+            return int(v)
+        except (TypeError, ValueError):
+            return None
+
+    def _float(d: dict[str, Any], *keys: str) -> float | None:
+        for key in keys:
+            v = d.get(key)
+            if v is not None:
+                try:
+                    return float(v)
+                except (TypeError, ValueError):
+                    continue
+        return None
+
+    input_tokens = _int(usage_raw, "input_tokens")
+    output_tokens = _int(usage_raw, "output_tokens")
+
+    # Aggregate both cache_read and cache_creation into cached_input_tokens
+    cache_read = _int(usage_raw, "cache_read_input_tokens")
+    cache_creation = _int(usage_raw, "cache_creation_input_tokens")
+    if cache_read is not None or cache_creation is not None:
+        cached_input_tokens = (cache_read or 0) + (cache_creation or 0)
+    else:
+        cached_input_tokens = None
+
+    total_tokens: int | None = None
+    if input_tokens is not None and output_tokens is not None:
+        total_tokens = input_tokens + output_tokens
+
+    cost_usd = _float(result_envelope, "cost_usd", "total_cost_usd")
+    duration_ms = _int(result_envelope, "duration_ms")
+
+    # num_llm_calls is provider-reported (from num_turns): default None ("not
+    # reported") rather than 0 so callers can distinguish it from a real zero,
+    # matching the None convention used for the token fields above.
+    num_turns = result_envelope.get("num_turns")
+    num_llm_calls: int | None = None
+    if num_turns is not None:
+        try:
+            num_llm_calls = int(num_turns)
+        except (TypeError, ValueError):
+            pass
+
+    return TurnUsage(
+        input_tokens=input_tokens,
+        output_tokens=output_tokens,
+        cached_input_tokens=cached_input_tokens,
+        total_tokens=total_tokens,
+        cost_usd=cost_usd,
+        duration_ms=duration_ms,
+        num_llm_calls=num_llm_calls,
+    )
+
+
+class ClaudeCodeTurn:
+    """HarnessTurn for a claude-code ``stream-json`` line stream.
+
+    Satisfies the ``HarnessTurn`` protocol:
+    - ``events`` yields the canonical ``StreamTaskMessage*`` stream.
+    - ``usage()`` returns the normalised ``TurnUsage`` (only valid after
+      ``events`` is fully consumed).
+
+    ``lines`` is an async iterator of raw JSON strings or pre-parsed dicts, as
+    produced by reading the claude-code CLI's stdout line by line.
+    """
+
+    def __init__(self, lines: AsyncIterator[str | dict[str, Any]]) -> None:
+        self._lines = lines
+        self._result_envelope: dict[str, Any] | None = None
+        self._events_stream: AsyncIterator[StreamTaskMessage] | None = None
+
+    async def _on_result(self, envelope: dict[str, Any]) -> None:
+        self._result_envelope = envelope
+
+    @property
+    def events(self) -> AsyncIterator[StreamTaskMessage]:
+        if self._events_stream is None:
+            self._events_stream = convert_claude_code_to_agentex_events(
+                self._lines,
+                on_result=self._on_result,
+            )
+        return self._events_stream
+
+    @property
+    def session_id(self) -> str | None:
+        """The Claude Code session id, for resuming a multi-turn session.
+
+        Valid only after ``events`` has been fully consumed (populated by the
+        ``result`` envelope). Returns ``None`` if the stream was truncated or
+        Claude Code reported no session id.
+        """
+        if not self._result_envelope:
+            return None
+        return self._result_envelope.get("session_id")
+
+    def usage(self) -> TurnUsage:
+        """Return normalised usage for this turn.
+
+        Call only after ``events`` is exhausted. Returns an empty ``TurnUsage``
+        if the ``result`` envelope was not received (e.g. stream was truncated).
+        """
+        if self._result_envelope is None:
+            return TurnUsage()
+        return claude_code_usage_to_turn_usage(self._result_envelope)
+
+
+# Runtime assert that ClaudeCodeTurn satisfies HarnessTurn protocol
+assert isinstance(ClaudeCodeTurn.__new__(ClaudeCodeTurn), HarnessTurn), (
+    "ClaudeCodeTurn must satisfy the HarnessTurn protocol"
+)
diff --git a/src/agentex/lib/adk/_modules/_codex_sync.py b/src/agentex/lib/adk/_modules/_codex_sync.py
new file mode 100644
index 000000000..5a951d57e
--- /dev/null
+++ b/src/agentex/lib/adk/_modules/_codex_sync.py
@@ -0,0 +1,601 @@
+"""Codex event-stream parser tap for the unified harness surface.
+
+Converts a ``codex exec --json`` newline-delimited event stream (already
+produced by the golden agent's sandbox/subprocess orchestration) into the
+Agentex canonical ``StreamTaskMessage*`` events.
+
+SCOPE
+-----
+This module is a **pure parser**. It receives pre-produced codex events
+(``str`` lines or already-decoded ``dict`` objects) and yields canonical
+``StreamTaskMessage*`` events. All subprocess management, sandbox
+provisioning, secret injection, and MCP orchestration remain in the golden
+agent at
+``teams/sgp/agents/golden_agent/project/harness/providers/codex.py``.
+
+No deployable test agent is included here: running codex requires the
+golden agent's sandbox environment and is out of scope for this library tap.
+
+OUT OF SCOPE (document here so future callers are not surprised):
+- Subprocess / sandbox management
+- OPENAI_API_KEY / secret injection
+- MCP server configuration (--config /tmp/codex_config.toml)
+- ``codex exec resume`` session tracking
+- ``scale_sandbox`` imports
+
+CANONICAL MAPPING
+-----------------
+The table below lists every ``type`` field the codex exec JSON stream can
+emit (from ``codex-rs/exec/src/exec_events.rs``) and its mapping.
+
+Top-level event types
+~~~~~~~~~~~~~~~~~~~~~
+  thread.started          -> (no StreamTaskMessage; session_id captured
+                              internally; surfaced via ``on_result`` callback)
+  turn.started            -> (no StreamTaskMessage; turn was started before
+                              codex launched; nothing to emit here)
+  turn.completed          -> on_result(usage_dict, tool_count, reasoning_count)
+                             yields no StreamTaskMessage (turn lifecycle is
+                             managed by the activity layer)
+  turn.failed             -> StreamTaskMessageFull(TextContent, error text)
+  error                   -> StreamTaskMessageFull(TextContent, error text)
+
+Item sub-types (item.started / item.updated / item.completed)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  agent_message           -> text deltas:
+                               item.started / item.updated  -> StreamTaskMessageDelta(TextDelta)
+                               item.completed               -> StreamTaskMessageDone
+  reasoning               -> reasoning:
+                               item.started                 -> StreamTaskMessageStart(ReasoningContent)
+                               item.updated                 -> (no-op; final text arrives on completed)
+                               item.completed               -> StreamTaskMessageDelta(ReasoningSummaryDelta)
+                                                              + StreamTaskMessageDelta(ReasoningContentDelta)
+                                                              + StreamTaskMessageDone
+  command_execution       -> tool request + response:
+                               item.started                 -> StreamTaskMessageStart(ToolRequestContent)
+                                                              + StreamTaskMessageDone
+                               item.completed               -> StreamTaskMessageFull(ToolResponseContent)
+  file_change             -> same as command_execution
+                             NOTE: file_change may only emit item.completed (no started);
+                             a synthetic ToolRequestContent Full is emitted before the response.
+  mcp_tool_call           -> same as command_execution
+  web_search              -> same as command_execution
+  todo_list               -> same as command_execution
+  collab_tool_call        -> same as command_execution
+  error (item type)       -> StreamTaskMessageFull(TextContent, error text) on completed only
+
+UNMAPPED / PARTIALLY MAPPED EVENTS
+-----------------------------------
+  thread.started:         session_id is extracted but not forwarded as a
+                          StreamTaskMessage (no canonical content type for
+                          session-lifecycle signals; captured in on_result).
+  turn.started:           no-op; intentional (the caller owns turn lifecycle).
+  turn.completed:         no StreamTaskMessage; usage is forwarded via
+                          on_result so the caller can record it in a span
+                          without this module needing to know about spans.
+  item.updated (reasoning): the intermediate cumulative text is discarded;
+                            only item.completed carries the final text.
+  item.updated (tool):    tool item types other than agent_message do not
+                          emit updates; item.started opens the request and
+                          item.completed closes it.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any, Callable, AsyncIterator
+
+from agentex.lib.utils.logging import make_logger
+from agentex.types.reasoning_content import ReasoningContent
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.task_message_content import TextContent
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.types.reasoning_content_delta import ReasoningContentDelta
+from agentex.types.reasoning_summary_delta import ReasoningSummaryDelta
+
+logger = make_logger(__name__)
+
+# Canonical type alias matching the unified harness surface.
+StreamTaskMessage = StreamTaskMessageStart | StreamTaskMessageDelta | StreamTaskMessageFull | StreamTaskMessageDone
+
+_MAX_RESULT_LENGTH = 4000
+
+
+def _truncate(text: str, max_len: int = _MAX_RESULT_LENGTH) -> str:
+    return str(text)[:max_len]
+
+
+def _tool_name_for(item_type: str, payload: dict[str, Any]) -> str:
+    """Derive a canonical tool name from a codex item type."""
+    if item_type == "command_execution":
+        return "bash"
+    if item_type == "file_change":
+        return "file_change"
+    if item_type == "mcp_tool_call":
+        server = payload.get("server", "")
+        tool = payload.get("tool", "")
+        return f"{server}.{tool}" if (server or tool) else "mcp_tool_call"
+    if item_type == "web_search":
+        return "web_search"
+    if item_type == "todo_list":
+        return "todo_list"
+    if item_type == "collab_tool_call":
+        return "collab_tool_call"
+    return item_type or "unknown"
+
+
+def _tool_args_for(item_type: str, payload: dict[str, Any]) -> dict[str, Any]:
+    """Extract canonical arguments dict from a codex item payload."""
+    if item_type == "command_execution":
+        return {"command": payload.get("command", "")}
+    if item_type == "file_change":
+        return {"changes": payload.get("changes") or []}
+    if item_type == "mcp_tool_call":
+        args = payload.get("arguments")
+        return args if isinstance(args, dict) else {"value": args}
+    if item_type == "web_search":
+        return {"query": payload.get("query", "")}
+    if item_type == "todo_list":
+        return {"items": payload.get("items") or []}
+    if item_type == "collab_tool_call":
+        # Surface an arguments dict if the payload carries one (mirrors
+        # mcp_tool_call); otherwise no args rather than fabricating a shape.
+        args = payload.get("arguments")
+        return args if isinstance(args, dict) else {}
+    return {}
+
+
+def _tool_output_for(item_type: str, payload: dict[str, Any]) -> tuple[str, bool]:
+    """Extract (result_text, is_error) from a completed codex tool item."""
+    if item_type == "command_execution":
+        out = payload.get("aggregated_output") or ""
+        exit_code = payload.get("exit_code")
+        is_error = exit_code is not None and exit_code != 0
+        return _truncate(out), is_error
+    if item_type in ("mcp_tool_call", "collab_tool_call"):
+        # collab_tool_call mirrors mcp_tool_call's error/result convention
+        # (see _tool_args_for); without this branch a failed collab call would
+        # fall through to the generic path and be reported as a success.
+        err = payload.get("error")
+        if err:
+            msg = err.get("message", "") if isinstance(err, dict) else str(err)
+            return _truncate(f"Error: {msg}"), True
+        result = payload.get("result")
+        if result is None:
+            return "", False
+        try:
+            return _truncate(json.dumps(result)), False
+        except (TypeError, ValueError):
+            return _truncate(str(result)), False
+    if item_type == "file_change":
+        changes = payload.get("changes") or []
+        status = payload.get("status", "")
+        return f"status={status}, {len(changes)} changes", status == "failed"
+    try:
+        return _truncate(json.dumps(payload, default=str)), False
+    except (TypeError, ValueError):
+        return _truncate(str(payload)), False
+
+
+def _error_full(message: str, next_index: int) -> StreamTaskMessageFull:
+    """Emit a one-shot TextContent full message for an error."""
+    return StreamTaskMessageFull(
+        type="full",
+        index=next_index,
+        content=TextContent(
+            type="text",
+            author="agent",
+            content=f"Error: {message}",
+            format="plain",
+        ),
+    )
+
+
+class _CodexStreamProcessor:
+    """Stateful parser: consumes codex exec events, yields StreamTaskMessage*.
+
+    Ported from the golden agent's ``_CodexEventProcessor`` in
+    ``project/harness/providers/codex.py``, adapted to yield
+    ``StreamTaskMessage*`` directly instead of ``HarnessEvent`` objects.
+
+    State tracked:
+    - ``_next_index``: monotonically increasing message index.
+    - ``_text_index``: message index of the current open agent_message block.
+    - ``_text_accumulated``: cumulative text per agent_message item_id.
+    - ``_reasoning_index``: message index of the current open reasoning block.
+    - ``_reasoning_text``: latest cumulative reasoning text per item_id.
+    - ``_tool_open``: item_ids for which a ToolRequestContent Start was emitted
+       but no ToolResponseContent Full yet.
+    - ``_tool_item_types``: item_id -> item_type for open tool calls.
+    """
+
+    def __init__(self) -> None:
+        self._next_index: int = 0
+
+        # agent_message tracking
+        self._text_index: dict[str, int] = {}
+        self._text_accumulated: dict[str, str] = {}
+
+        # reasoning tracking
+        self._reasoning_index: dict[str, int] = {}
+        self._reasoning_text: dict[str, str] = {}
+
+        # tool tracking
+        self._tool_open: set[str] = set()
+        self._tool_item_types: dict[str, str] = {}
+        # Remember the tool_call_id assigned per item so the request and response
+        # halves agree even when item_id is empty (a recomputed fallback would
+        # drift as tool_call_count advances between started and completed).
+        self._tool_call_ids: dict[str, str] = {}
+
+        # counters for on_result callback
+        self.tool_call_count: int = 0
+        self.reasoning_count: int = 0
+        self.session_id: str | None = None
+
+    def _alloc(self) -> int:
+        idx = self._next_index
+        self._next_index += 1
+        return idx
+
+    def process(self, evt: dict[str, Any]) -> list[StreamTaskMessage]:
+        evt_type = evt.get("type", "")
+
+        if evt_type == "thread.started":
+            sid = evt.get("thread_id") or ""
+            if sid:
+                self.session_id = sid
+            return []
+
+        if evt_type == "turn.started":
+            # The activity layer owns turn lifecycle; nothing to emit.
+            return []
+
+        if evt_type == "turn.completed":
+            # Usage forwarded via on_result callback (not a StreamTaskMessage).
+            return []
+
+        if evt_type == "turn.failed":
+            err = evt.get("error") or {}
+            msg = err.get("message", "codex turn failed") if isinstance(err, dict) else str(err)
+            return [_error_full(f"Codex turn failed: {msg}", self._alloc())]
+
+        if evt_type == "error":
+            return [_error_full(evt.get("message", "codex error"), self._alloc())]
+
+        if evt_type in ("item.started", "item.updated", "item.completed"):
+            item = evt.get("item") or {}
+            return self._handle_item(evt_type, item)
+
+        logger.debug("[codex] unhandled event type=%s", evt_type)
+        return []
+
+    def _handle_item(self, evt_type: str, item: dict[str, Any]) -> list[StreamTaskMessage]:
+        item_id = item.get("id") or ""
+        item_type = item.get("type") or ""
+        out: list[StreamTaskMessage] = []
+
+        if item_type == "agent_message":
+            current = item.get("text") or ""
+            previous = self._text_accumulated.get(item_id, "")
+
+            if evt_type in ("item.started", "item.updated"):
+                if item_id not in self._text_index:
+                    idx = self._alloc()
+                    self._text_index[item_id] = idx
+                    out.append(
+                        StreamTaskMessageStart(
+                            type="start",
+                            index=idx,
+                            content=TextContent(
+                                type="text",
+                                author="agent",
+                                content="",
+                            ),
+                        )
+                    )
+                idx = self._text_index[item_id]
+                delta = ""
+                if current.startswith(previous) and len(current) > len(previous):
+                    delta = current[len(previous) :]
+                elif current and current != previous:
+                    delta = current
+                if delta:
+                    out.append(
+                        StreamTaskMessageDelta(
+                            type="delta",
+                            index=idx,
+                            delta=TextDelta(type="text", text_delta=delta),
+                        )
+                    )
+                self._text_accumulated[item_id] = current
+
+            elif evt_type == "item.completed":
+                if item_id not in self._text_index:
+                    idx = self._alloc()
+                    self._text_index[item_id] = idx
+                    out.append(
+                        StreamTaskMessageStart(
+                            type="start",
+                            index=idx,
+                            content=TextContent(
+                                type="text",
+                                author="agent",
+                                content="",
+                            ),
+                        )
+                    )
+                idx = self._text_index[item_id]
+                delta = ""
+                if current.startswith(previous) and len(current) > len(previous):
+                    delta = current[len(previous) :]
+                elif current and current != previous:
+                    delta = current
+                if delta:
+                    out.append(
+                        StreamTaskMessageDelta(
+                            type="delta",
+                            index=idx,
+                            delta=TextDelta(type="text", text_delta=delta),
+                        )
+                    )
+                out.append(StreamTaskMessageDone(type="done", index=idx))
+                self._text_accumulated[item_id] = current
+
+        elif item_type == "reasoning":
+            current = item.get("text") or ""
+
+            if evt_type == "item.started":
+                idx = self._alloc()
+                self._reasoning_index[item_id] = idx
+                self._reasoning_text[item_id] = current
+                out.append(
+                    StreamTaskMessageStart(
+                        type="start",
+                        index=idx,
+                        content=ReasoningContent(
+                            type="reasoning",
+                            author="agent",
+                            summary=[],
+                            content=[],
+                            style="active",
+                        ),
+                    )
+                )
+
+            elif evt_type == "item.updated":
+                # Accumulate silently; final text arrives on item.completed.
+                self._reasoning_text[item_id] = current
+
+            elif evt_type == "item.completed":
+                text = current or self._reasoning_text.get(item_id, "")
+                idx = self._reasoning_index.get(item_id)
+                if text:
+                    self.reasoning_count += 1
+                    summary = text.strip().split("\n", 1)[0][:300]
+                    if idx is None:
+                        # No started event was seen; open the message now.
+                        idx = self._alloc()
+                        out.append(
+                            StreamTaskMessageStart(
+                                type="start",
+                                index=idx,
+                                content=ReasoningContent(
+                                    type="reasoning",
+                                    author="agent",
+                                    summary=[],
+                                    content=[],
+                                    style="active",
+                                ),
+                            )
+                        )
+                    # Deliver the reasoning as deltas, then close with a Done.
+                    # Emitting a Full here instead would leave the open Start
+                    # context dangling: auto_send routes Full into its own
+                    # throwaway streaming context (ignoring the index), so the
+                    # Start context survives until end-of-turn teardown and
+                    # persists a second, near-empty reasoning message. Streaming
+                    # the content as deltas lets the open context accumulate the
+                    # final ReasoningContent and close cleanly as one message.
+                    out.append(
+                        StreamTaskMessageDelta(
+                            type="delta",
+                            index=idx,
+                            delta=ReasoningSummaryDelta(
+                                type="reasoning_summary",
+                                summary_index=0,
+                                summary_delta=summary,
+                            ),
+                        )
+                    )
+                    out.append(
+                        StreamTaskMessageDelta(
+                            type="delta",
+                            index=idx,
+                            delta=ReasoningContentDelta(
+                                type="reasoning_content",
+                                content_index=0,
+                                content_delta=text,
+                            ),
+                        )
+                    )
+                    out.append(StreamTaskMessageDone(type="done", index=idx))
+                elif idx is not None:
+                    # Empty reasoning block — still need to close with a Done.
+                    out.append(StreamTaskMessageDone(type="done", index=idx))
+
+        elif item_type in (
+            "command_execution",
+            "file_change",
+            "mcp_tool_call",
+            "web_search",
+            "todo_list",
+            "collab_tool_call",
+        ):
+            # Resolve a stable id once per item; reuse it for both halves.
+            tool_call_id = self._tool_call_ids.get(item_id)
+            if tool_call_id is None:
+                tool_call_id = item_id or f"codex_tool_{self.tool_call_count + 1}"
+                self._tool_call_ids[item_id] = tool_call_id
+
+            if evt_type == "item.started":
+                self.tool_call_count += 1
+                self._tool_open.add(item_id)
+                self._tool_item_types[item_id] = item_type
+                name = _tool_name_for(item_type, item)
+                args = _tool_args_for(item_type, item)
+                req_idx = self._alloc()
+                out.append(
+                    StreamTaskMessageStart(
+                        type="start",
+                        index=req_idx,
+                        content=ToolRequestContent(
+                            type="tool_request",
+                            author="agent",
+                            tool_call_id=tool_call_id,
+                            name=name,
+                            arguments=args,
+                        ),
+                    )
+                )
+                out.append(StreamTaskMessageDone(type="done", index=req_idx))
+
+            elif evt_type == "item.completed":
+                # file_change items may only emit item.completed (no started).
+                if item_id not in self._tool_open:
+                    self.tool_call_count += 1
+                    self._tool_open.add(item_id)
+                    self._tool_item_types[item_id] = item_type
+                    name = _tool_name_for(item_type, item)
+                    args = _tool_args_for(item_type, item)
+                    req_idx = self._alloc()
+                    out.append(
+                        StreamTaskMessageFull(
+                            type="full",
+                            index=req_idx,
+                            content=ToolRequestContent(
+                                type="tool_request",
+                                author="agent",
+                                tool_call_id=tool_call_id,
+                                name=name,
+                                arguments=args,
+                            ),
+                        )
+                    )
+
+                actual_type = self._tool_item_types.get(item_id, item_type)
+                result_text, is_error = _tool_output_for(actual_type, item)
+                name = _tool_name_for(actual_type, item)
+                resp_content: dict[str, Any] = {"result": result_text}
+                if is_error:
+                    resp_content["is_error"] = True
+                out.append(
+                    StreamTaskMessageFull(
+                        type="full",
+                        index=self._alloc(),
+                        content=ToolResponseContent(
+                            type="tool_response",
+                            author="agent",
+                            tool_call_id=tool_call_id,
+                            name=name,
+                            content=resp_content,
+                        ),
+                    )
+                )
+                self._tool_open.discard(item_id)
+                # Free the id mapping so a later item reusing an empty id gets a
+                # fresh fallback rather than colliding with this one.
+                self._tool_call_ids.pop(item_id, None)
+
+        elif item_type == "error":
+            if evt_type == "item.completed":
+                out.append(_error_full(item.get("message", "codex item error"), self._alloc()))
+
+        else:
+            logger.debug("[codex] unhandled item type=%s evt=%s", item_type, evt_type)
+
+        return out
+
+
+async def convert_codex_to_agentex_events(
+    events: AsyncIterator[str | dict[str, Any]],
+    on_result: Callable[[dict[str, Any]], None] | None = None,
+) -> AsyncIterator[StreamTaskMessage]:
+    """Convert a ``codex exec --json`` event stream into Agentex stream events.
+
+    This is a pure parser tap. The caller must supply ``events`` as an async
+    iterator of either raw newline-delimited JSON strings or pre-decoded dicts.
+    No subprocess or sandbox management is done here.
+
+    Args:
+        events: Async iterator of ``str`` (newline-delimited JSON lines) or
+            ``dict`` (pre-decoded event objects) as produced by the codex CLI's
+            ``--json`` flag via sandbox stdout.
+        on_result: Optional callback invoked once when a ``turn.completed``
+            event is seen. Receives a dict with keys:
+                ``usage``           — the raw codex usage dict (or None)
+                ``session_id``      — the codex thread_id (or None)
+                ``tool_call_count`` — int
+                ``reasoning_count`` — int
+            Use this to record turn-level metrics / usage in the caller's span
+            without coupling this module to span/tracing APIs.
+
+    Yields:
+        Canonical ``StreamTaskMessage*`` events (Start/Delta/Full/Done) with
+        ``TextContent``, ``ReasoningContent``, ``ToolRequestContent``, or
+        ``ToolResponseContent`` payloads.
+
+    MAPPING (abbreviated — see module docstring for the full table)
+        thread.started          -> no event; session_id captured for on_result
+        turn.started            -> no event
+        turn.completed          -> no event; triggers on_result callback
+        turn.failed / error     -> StreamTaskMessageFull(TextContent, error)
+        agent_message           -> Start + Deltas + Done
+        reasoning               -> Start + Full(ReasoningContent)
+        command_execution       -> Start(ToolRequest)+Done + Full(ToolResponse)
+        file_change             -> Full(ToolRequest) + Full(ToolResponse)
+        mcp_tool_call           -> Start(ToolRequest)+Done + Full(ToolResponse)
+        web_search / todo_list  -> Start(ToolRequest)+Done + Full(ToolResponse)
+        collab_tool_call        -> Start(ToolRequest)+Done + Full(ToolResponse)
+    """
+    processor = _CodexStreamProcessor()
+    _pending_usage: dict[str, Any] | None = None
+
+    async for raw in events:
+        if isinstance(raw, dict):
+            evt = raw
+        else:
+            line = raw.strip() if isinstance(raw, str) else ""
+            if not line:
+                continue
+            try:
+                evt = json.loads(line)
+            except json.JSONDecodeError:
+                logger.debug("[codex] non-JSON line: %s", line[:100])
+                continue
+
+        # Capture usage before processing so on_result can fire after flush.
+        if evt.get("type") == "turn.completed":
+            usage = evt.get("usage")
+            _pending_usage = usage if isinstance(usage, dict) else None
+
+        messages = processor.process(evt)
+        for msg in messages:
+            yield msg
+
+    if on_result is not None:
+        on_result(
+            {
+                "usage": _pending_usage,
+                "session_id": processor.session_id,
+                "tool_call_count": processor.tool_call_count,
+                "reasoning_count": processor.reasoning_count,
+            }
+        )
diff --git a/src/agentex/lib/adk/_modules/_codex_turn.py b/src/agentex/lib/adk/_modules/_codex_turn.py
new file mode 100644
index 000000000..e7fa1d929
--- /dev/null
+++ b/src/agentex/lib/adk/_modules/_codex_turn.py
@@ -0,0 +1,214 @@
+"""CodexTurn: HarnessTurn implementation for the codex event-stream tap.
+
+Wraps ``convert_codex_to_agentex_events`` so callers can pass a ``CodexTurn``
+directly to ``UnifiedEmitter.yield_turn`` or ``UnifiedEmitter.auto_send_turn``.
+
+Usage::
+
+    from agentex.lib.adk import convert_codex_to_agentex_events
+    from agentex.lib.adk._modules._codex_turn import CodexTurn, codex_usage_to_turn_usage
+
+    turn = CodexTurn(events=codex_event_stream, model="o4-mini")
+    async for msg in emitter.yield_turn(turn):
+        yield msg
+    turn_usage = turn.usage()
+
+OUT OF SCOPE
+------------
+Like ``_codex_sync``, this module is a pure library tap. Subprocess
+provisioning, sandbox setup, secret injection, and MCP configuration remain
+in the golden agent (``teams/sgp/agents/golden_agent/project/harness/``).
+"""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+
+from agentex.lib.core.harness.types import TurnUsage
+from agentex.lib.adk._modules._codex_sync import (
+    StreamTaskMessage,
+    convert_codex_to_agentex_events,
+)
+
+
+def codex_usage_to_turn_usage(
+    raw: dict[str, Any] | None,
+    *,
+    model: str | None = None,
+    tool_call_count: int = 0,
+    reasoning_count: int = 0,
+    duration_ms: int | None = None,
+    cost_usd: float | None = None,
+) -> TurnUsage:
+    """Map a raw codex ``turn.completed`` usage dict to a canonical ``TurnUsage``.
+
+    Codex reports token usage under the ``usage`` key of the
+    ``turn.completed`` event. The shape follows the OpenAI completion_tokens
+    convention because codex is built on OpenAI models:
+
+    .. code-block:: json
+
+        {
+            "input_tokens": 1234,
+            "output_tokens": 456,
+            "total_tokens": 1690
+        }
+
+    Additionally, codex may report ``reasoning_tokens`` for o-series models:
+
+    .. code-block:: json
+
+        {
+            "input_tokens": 1234,
+            "output_tokens": 456,
+            "reasoning_tokens": 200,
+            "total_tokens": 1690
+        }
+
+    Defensive rules:
+    - Missing ``raw`` or missing sub-keys default to ``None`` (not zero) so
+      downstream callers can distinguish "not reported" from "reported as 0".
+    - Real zeros (``0`` explicitly present in ``raw``) are preserved as ``0``.
+    - ``total_tokens`` is accepted from the payload or left as ``None``;
+      callers should not recompute it because codex may use cached tokens.
+    - ``cost_usd`` is passed through when codex reports it (not yet common);
+      defaults to ``None`` if absent.
+
+    Args:
+        raw: The raw codex usage dict from ``turn.completed``, or ``None``.
+        model: Model string (e.g. "o4-mini") to attach to the usage record.
+        tool_call_count: Number of tool calls in the turn (from processor).
+        reasoning_count: Number of reasoning blocks (from processor).
+        duration_ms: Wall-clock duration of the turn in milliseconds.
+        cost_usd: Cost in USD if the caller can derive it; ``None`` otherwise.
+
+    Returns:
+        A populated ``TurnUsage`` instance.
+    """
+    if not isinstance(raw, dict):
+        raw = {}
+
+    def _int_or_none(key: str) -> int | None:
+        val = raw.get(key)
+        if val is None:
+            return None
+        try:
+            return int(val)
+        except (TypeError, ValueError):
+            return None
+
+    def _float_or_none(key: str) -> float | None:
+        val = raw.get(key)
+        if val is None:
+            return None
+        try:
+            return float(val)
+        except (TypeError, ValueError):
+            return None
+
+    # cost_usd: prefer explicitly passed value, then fall back to raw payload.
+    effective_cost = cost_usd if cost_usd is not None else _float_or_none("cost_usd")
+
+    return TurnUsage(
+        model=model or None,
+        input_tokens=_int_or_none("input_tokens"),
+        output_tokens=_int_or_none("output_tokens"),
+        cached_input_tokens=_int_or_none("cached_input_tokens"),
+        reasoning_tokens=_int_or_none("reasoning_tokens"),
+        total_tokens=_int_or_none("total_tokens"),
+        cost_usd=effective_cost,
+        duration_ms=duration_ms,
+        num_llm_calls=1,
+        num_tool_calls=tool_call_count,
+        num_reasoning_blocks=reasoning_count,
+    )
+
+
+class CodexTurn:
+    """A single codex turn as a ``HarnessTurn``.
+
+    Implements the ``HarnessTurn`` protocol so it can be passed to
+    ``UnifiedEmitter.yield_turn`` and ``UnifiedEmitter.auto_send_turn``.
+
+    ``usage()`` is valid only after ``events`` has been fully consumed (i.e.
+    the async generator has been exhausted). Calling ``usage()`` before
+    exhaustion returns a zero-value ``TurnUsage`` with only ``model`` set.
+
+    Args:
+        events: An async iterator of ``str | dict`` codex events, as
+            produced by reading ``codex exec --json`` stdout line by line.
+        model: Model string to attach to the ``TurnUsage``.
+        duration_ms: Optional turn wall-clock duration in milliseconds.
+        cost_usd: Optional cost in USD; ``None`` if not known.
+    """
+
+    def __init__(
+        self,
+        events: AsyncIterator[str | dict[str, Any]],
+        *,
+        model: str | None = None,
+        duration_ms: int | None = None,
+        cost_usd: float | None = None,
+    ) -> None:
+        self._raw_events = events
+        self._model = model
+        # Public + mutable: the true wall-clock duration (and cost) is usually
+        # only known after the stream is consumed, so callers may set these
+        # after construction and before calling usage().
+        self.duration_ms = duration_ms
+        self.cost_usd = cost_usd
+
+        # Populated by the on_result callback once the stream is exhausted.
+        self._result: dict[str, Any] | None = None
+        # The events generator is created at most once: ``_raw_events`` is a
+        # single-consumption AsyncIterator, so re-wrapping it would yield an
+        # already-exhausted stream that fires on_result with zeros and clobbers
+        # ``_result``. Cache the generator and hand back the same instance.
+        self._events_gen: AsyncIterator[StreamTaskMessage] | None = None
+
+    @property
+    def events(self) -> AsyncIterator[StreamTaskMessage]:
+        """Async iterator of canonical ``StreamTaskMessage*`` events.
+
+        The ``on_result`` callback populates ``_result`` when the underlying
+        codex stream ends, so ``usage()`` returns meaningful data after
+        exhaustion. Returns the same generator on every access so the underlying
+        stream is consumed (and ``on_result`` fires) exactly once.
+        """
+        if self._events_gen is None:
+            self._events_gen = convert_codex_to_agentex_events(
+                self._raw_events,
+                on_result=self._on_result,
+            )
+        return self._events_gen
+
+    def _on_result(self, result: dict[str, Any]) -> None:
+        self._result = result
+
+    @property
+    def session_id(self) -> str | None:
+        """The codex session id, for resuming a multi-turn session.
+
+        Valid only after ``events`` has been fully consumed (populated by the
+        ``on_result`` callback). Returns ``None`` if the stream is not yet
+        exhausted or codex reported no session id.
+        """
+        return self._result.get("session_id") if self._result else None
+
+    def usage(self) -> TurnUsage:
+        """Return normalized ``TurnUsage`` for this turn.
+
+        Valid only after ``events`` has been fully consumed. Returns a
+        zero-value ``TurnUsage`` (model set, counts zero, tokens None) if
+        called before the stream ends.
+        """
+        if self._result is None:
+            return TurnUsage(model=self._model)
+        return codex_usage_to_turn_usage(
+            self._result.get("usage"),
+            model=self._model,
+            tool_call_count=self._result.get("tool_call_count", 0),
+            reasoning_count=self._result.get("reasoning_count", 0),
+            duration_ms=self.duration_ms,
+            cost_usd=self.cost_usd,
+        )
diff --git a/src/agentex/lib/adk/_modules/_langgraph_async.py b/src/agentex/lib/adk/_modules/_langgraph_async.py
index 3e61c42f9..02ef059eb 100644
--- a/src/agentex/lib/adk/_modules/_langgraph_async.py
+++ b/src/agentex/lib/adk/_modules/_langgraph_async.py
@@ -3,8 +3,21 @@
 Converts LangGraph graph.astream() events into Agentex streaming updates
 and pushes them to Redis via adk.streaming contexts. For use with async
 ACP agents that stream via Redis rather than HTTP yields.
+
+Unified surface
+---------------
+This module is now implemented on top of ``LangGraphTurn`` and
+``UnifiedEmitter.auto_send_turn``, the same surface used by every other
+harness adapter (pydantic-ai, openai-agents, etc.). The public signature
+and return type are preserved identically.
+
+AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` events
+(from "updates" events), NOT Start+Delta+Done like pydantic-ai. ``auto_send``
+handles Full events correctly; no coalescing wrapper is needed.
 """
 
+from agentex.lib.utils.temporal import workflow_now_if_in_workflow
+
 
 async def stream_langgraph_events(stream, task_id: str) -> str:
     """Stream LangGraph events to Agentex via Redis.
@@ -18,6 +31,19 @@ async def stream_langgraph_events(stream, task_id: str) -> str:
     models like gpt-5/o1/o3 (chunk.content is a list of typed content blocks
     in the Responses API responses/v1 format).
 
+    Reimplemented on ``UnifiedEmitter.auto_send_turn(LangGraphTurn(...))`` for
+    cross-harness consistency. Behavior is identical to the previous bespoke
+    implementation (verified by characterization tests in test_langgraph_async.py).
+
+    AGX1-377 note: LangGraph emits tool requests as ``Full`` events (from "updates"),
+    NOT Start+Delta+Done like pydantic-ai. ``auto_send`` handles Full events
+    correctly; no coalescing wrapper is needed.
+
+    AGX1-378 note: ``created_at`` is set from ``workflow.now()`` when called inside a
+    Temporal workflow, matching the pattern used by the openai/litellm providers.
+    Outside a workflow (plain async activities, sync agents) it is ``None`` and the
+    server's wall clock is used.
+
     Args:
         stream: Async iterator from graph.astream(..., stream_mode=["messages", "updates"])
         task_id: The Agentex task ID to stream messages to.
@@ -25,178 +51,15 @@ async def stream_langgraph_events(stream, task_id: str) -> str:
     Returns:
         The accumulated final text output from the agent.
     """
-    # Lazy imports so langgraph/langchain aren't required at module load time
-    from langchain_core.messages import ToolMessage, AIMessageChunk
-
-    from agentex.lib import adk
-    from agentex.types.text_content import TextContent
-    from agentex.types.reasoning_content import ReasoningContent
-    from agentex.types.task_message_delta import TextDelta
-    from agentex.types.task_message_update import StreamTaskMessageDelta
-    from agentex.types.tool_request_content import ToolRequestContent
-    from agentex.types.tool_response_content import ToolResponseContent
-    from agentex.types.reasoning_summary_delta import ReasoningSummaryDelta
-
-    text_context = None
-    reasoning_context = None
-    final_text = ""
-
-    try:
-        async for event_type, event_data in stream:
-            if event_type == "messages":
-                chunk, metadata = event_data
-
-                if not isinstance(chunk, AIMessageChunk) or not chunk.content:
-                    continue
-
-                # ----------------------------------------------------------
-                # Case 1: content is a plain string (regular models)
-                # ----------------------------------------------------------
-                if isinstance(chunk.content, str):
-                    if reasoning_context:
-                        await reasoning_context.close()
-                        reasoning_context = None
-
-                    if not text_context:
-                        final_text = ""
-                        text_context = await adk.streaming.streaming_task_message_context(
-                            task_id=task_id,
-                            initial_content=TextContent(
-                                author="agent",
-                                content="",
-                                format="markdown",
-                            ),
-                        ).__aenter__()
-
-                    final_text += chunk.content
-                    await text_context.stream_update(
-                        StreamTaskMessageDelta(
-                            parent_task_message=text_context.task_message,
-                            delta=TextDelta(type="text", text_delta=chunk.content),
-                            type="delta",
-                        )
-                    )
-
-                # ----------------------------------------------------------
-                # Case 2: content is a list of typed blocks (reasoning models)
-                # Responses API (responses/v1) format:
-                #   {"type": "reasoning", "summary": [{"type": "summary_text", "text": "..."}]}
-                #   {"type": "text", "text": "..."}
-                # ----------------------------------------------------------
-                elif isinstance(chunk.content, list):
-                    for block in chunk.content:
-                        if not isinstance(block, dict):
-                            continue
-
-                        block_type = block.get("type")
-
-                        if block_type == "reasoning":
-                            reasoning_text = ""
-                            for s in block.get("summary", []):
-                                if isinstance(s, dict) and s.get("type") == "summary_text":
-                                    reasoning_text += s.get("text", "")
-                            if not reasoning_text:
-                                continue
-
-                            if text_context:
-                                await text_context.close()
-                                text_context = None
-
-                            if not reasoning_context:
-                                reasoning_context = await adk.streaming.streaming_task_message_context(
-                                    task_id=task_id,
-                                    initial_content=ReasoningContent(
-                                        author="agent",
-                                        summary=[],
-                                        content=[],
-                                        type="reasoning",
-                                        style="active",
-                                    ),
-                                ).__aenter__()
-
-                            await reasoning_context.stream_update(
-                                StreamTaskMessageDelta(
-                                    parent_task_message=reasoning_context.task_message,
-                                    delta=ReasoningSummaryDelta(
-                                        type="reasoning_summary",
-                                        summary_index=0,
-                                        summary_delta=reasoning_text,
-                                    ),
-                                    type="delta",
-                                )
-                            )
-
-                        elif block_type == "text":
-                            text_delta = block.get("text", "")
-                            if not text_delta:
-                                continue
-
-                            if reasoning_context:
-                                await reasoning_context.close()
-                                reasoning_context = None
-
-                            if not text_context:
-                                final_text = ""
-                                text_context = await adk.streaming.streaming_task_message_context(
-                                    task_id=task_id,
-                                    initial_content=TextContent(
-                                        author="agent",
-                                        content="",
-                                        format="markdown",
-                                    ),
-                                ).__aenter__()
-
-                            final_text += text_delta
-                            await text_context.stream_update(
-                                StreamTaskMessageDelta(
-                                    parent_task_message=text_context.task_message,
-                                    delta=TextDelta(type="text", text_delta=text_delta),
-                                    type="delta",
-                                )
-                            )
-
-            elif event_type == "updates":
-                for node_name, state_update in event_data.items():
-                    if node_name == "agent":
-                        messages = state_update.get("messages", [])
-                        for msg in messages:
-                            if text_context:
-                                await text_context.close()
-                                text_context = None
-                            if reasoning_context:
-                                await reasoning_context.close()
-                                reasoning_context = None
-
-                            if hasattr(msg, "tool_calls") and msg.tool_calls:
-                                for tc in msg.tool_calls:
-                                    await adk.messages.create(
-                                        task_id=task_id,
-                                        content=ToolRequestContent(
-                                            tool_call_id=tc["id"],
-                                            name=tc["name"],
-                                            arguments=tc["args"],
-                                            author="agent",
-                                        ),
-                                    )
-
-                    elif node_name == "tools":
-                        messages = state_update.get("messages", [])
-                        for msg in messages:
-                            if isinstance(msg, ToolMessage):
-                                await adk.messages.create(
-                                    task_id=task_id,
-                                    content=ToolResponseContent(
-                                        tool_call_id=msg.tool_call_id,
-                                        name=msg.name or "unknown",
-                                        content=msg.content if isinstance(msg.content, str) else str(msg.content),
-                                        author="agent",
-                                    ),
-                                )
-    finally:
-        # Always close open contexts
-        if text_context:
-            await text_context.close()
-        if reasoning_context:
-            await reasoning_context.close()
-
-    return final_text
+    from agentex.lib.core.harness.emitter import UnifiedEmitter
+    from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn
+
+    # AGX1-377 note: LangGraph emits tool requests as Full events (from "updates"),
+    # NOT Start+Delta+Done like pydantic-ai. auto_send handles Full events correctly;
+    # no coalescing wrapper is needed.
+    # AGX1-378: stamp messages with workflow.now() inside Temporal for deterministic
+    # created_at ordering; falls back to None (server wall clock) outside a workflow.
+    turn = LangGraphTurn(stream, model=None)
+    emitter = UnifiedEmitter(task_id=task_id, trace_id=None, parent_span_id=None)
+    result = await emitter.auto_send_turn(turn, created_at=workflow_now_if_in_workflow())
+    return result.final_text
diff --git a/src/agentex/lib/adk/_modules/_langgraph_sync.py b/src/agentex/lib/adk/_modules/_langgraph_sync.py
index 6d4ce715f..48231a87d 100644
--- a/src/agentex/lib/adk/_modules/_langgraph_sync.py
+++ b/src/agentex/lib/adk/_modules/_langgraph_sync.py
@@ -3,10 +3,36 @@
 Converts LangGraph graph.astream() events into Agentex TaskMessageUpdate
 events that are yielded back over the HTTP response. For use with sync ACP
 agents that stream via HTTP yields rather than Redis.
+
+Unified sync path
+-----------------
+Prefer using ``LangGraphTurn`` with ``UnifiedEmitter.yield_turn`` for new
+agents, which adds usage capture and optional tracing via the shared harness
+surface::
+
+    from agentex.lib.core.harness.emitter import UnifiedEmitter
+    from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn
+
+    turn = LangGraphTurn(stream)
+    emitter = UnifiedEmitter(task_id=task_id, trace_id=trace_id, parent_span_id=span_id)
+    async for event in emitter.yield_turn(turn):
+        yield event
+
+``convert_langgraph_to_agentex_events`` remains available as a lower-level
+primitive (e.g. for callers that need the raw event stream without the
+harness envelope).
 """
 
+from __future__ import annotations
+
+from typing import Any, Callable, Optional
+from collections.abc import AsyncGenerator
+
 
-async def convert_langgraph_to_agentex_events(stream):
+async def convert_langgraph_to_agentex_events(
+    stream: Any,
+    on_final_ai_message: Optional[Callable[..., None]] = None,
+) -> AsyncGenerator[Any, None]:
     """Convert LangGraph streaming events to Agentex TaskMessageUpdate events.
 
     Expects the stream from graph.astream() called with
@@ -22,8 +48,17 @@ async def convert_langgraph_to_agentex_events(stream):
     Supports both regular models (chunk.content is a str) and reasoning models
     like gpt-5/o1/o3 (chunk.content is a list of typed content blocks).
 
+    AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` (from
+    "updates" events), NOT Start+Delta+Done like pydantic-ai. No coalesce_tool_requests
+    option is needed for LangGraph.
+
     Args:
         stream: Async iterator from graph.astream(..., stream_mode=["messages", "updates"])
+        on_final_ai_message: Optional callback ``(msg: AIMessage) -> None`` called for
+            each ``AIMessage`` in an "agent" node update. Use this to capture
+            ``usage_metadata`` for token accounting without re-traversing the stream.
+            The callback fires *after* all events for that message are yielded.
+            No-op when ``None`` (default).
 
     Yields:
         TaskMessageUpdate events (Start, Delta, Done, Full)
@@ -32,6 +67,7 @@ async def convert_langgraph_to_agentex_events(stream):
     from langchain_core.messages import ToolMessage, AIMessageChunk
 
     from agentex.types.text_content import TextContent
+    from agentex.types.reasoning_content import ReasoningContent
     from agentex.types.task_message_delta import TextDelta
     from agentex.types.task_message_update import (
         StreamTaskMessageDone,
@@ -113,7 +149,9 @@ async def convert_langgraph_to_agentex_events(stream):
                             yield StreamTaskMessageStart(
                                 type="start",
                                 index=message_index,
-                                content=TextContent(type="text", author="agent", content=""),
+                                content=ReasoningContent(
+                                    type="reasoning", author="agent", summary=[], content=[], style="active"
+                                ),
                             )
                             reasoning_streaming = True
                             reasoning_content_index = 0
@@ -205,6 +243,13 @@ async def convert_langgraph_to_agentex_events(stream):
                                 )
                                 message_index += 1
 
+                        # Notify caller of the final AIMessage (e.g. for usage capture)
+                        if on_final_ai_message is not None:
+                            from langchain_core.messages import AIMessage as _AIMessage
+
+                            if isinstance(msg, _AIMessage):
+                                on_final_ai_message(msg)
+
                 elif node_name == "tools":
                     messages = state_update.get("messages", [])
                     for msg in messages:
diff --git a/src/agentex/lib/adk/_modules/_langgraph_tracing.py b/src/agentex/lib/adk/_modules/_langgraph_tracing.py
index 74b8dcb57..2162201e1 100644
--- a/src/agentex/lib/adk/_modules/_langgraph_tracing.py
+++ b/src/agentex/lib/adk/_modules/_langgraph_tracing.py
@@ -1,4 +1,14 @@
-"""LangChain callback handler that creates Agentex spans for LLM calls and tool executions."""
+"""LangChain callback handler that creates Agentex spans for LLM calls and tool executions.
+
+.. deprecated::
+    ``AgentexLangGraphTracingHandler`` and ``create_langgraph_tracing_handler`` are
+    superseded by the unified harness surface (``LangGraphTurn`` +
+    ``UnifiedEmitter``), which derives spans automatically from the canonical
+    event stream without requiring a LangChain callback handler.
+
+    They remain importable and functional for backward compatibility, but new
+    agents should use the unified path instead.
+"""
 # ruff: noqa: ARG002
 # Callback methods must accept all arguments defined by LangChain's AsyncCallbackHandler interface.
 
@@ -31,6 +41,11 @@ class AgentexLangGraphTracingHandler(AsyncCallbackHandler):
           ├── llm:<model>       (LLM call)
           ├── tool:<tool_name>  (tool execution)
           └── llm:<model>       (LLM call)
+
+    .. deprecated::
+        Use ``LangGraphTurn`` with ``UnifiedEmitter`` instead. The unified
+        harness derives equivalent spans from the canonical event stream,
+        removing the need for a LangChain callback handler entirely.
     """
 
     def __init__(
@@ -237,6 +252,20 @@ def create_langgraph_tracing_handler(
 
     Returns:
         An ``AgentexLangGraphTracingHandler`` instance ready to use as a LangChain callback.
+
+    .. deprecated::
+        Use ``LangGraphTurn`` with ``UnifiedEmitter`` instead. The unified harness
+        derives equivalent spans from the canonical event stream automatically, with
+        no LangChain callback required::
+
+            from agentex.lib.core.harness.emitter import UnifiedEmitter
+            from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn
+
+            turn = LangGraphTurn(stream)
+            emitter = UnifiedEmitter(task_id=task_id, trace_id=trace_id, parent_span_id=span_id)
+            result = await emitter.auto_send_turn(turn)
+
+        This function remains available for backward compatibility.
     """
     return AgentexLangGraphTracingHandler(
         trace_id=trace_id,
diff --git a/src/agentex/lib/adk/_modules/_langgraph_turn.py b/src/agentex/lib/adk/_modules/_langgraph_turn.py
new file mode 100644
index 000000000..da8ff0e7c
--- /dev/null
+++ b/src/agentex/lib/adk/_modules/_langgraph_turn.py
@@ -0,0 +1,152 @@
+"""HarnessTurn adapter for LangGraph astream() event streams.
+
+Provides ``LangGraphTurn`` (a ``HarnessTurn`` implementation) and the
+``langgraph_usage_to_turn_usage`` helper that maps LangGraph's
+``AIMessage.usage_metadata`` onto the framework-agnostic ``TurnUsage`` model.
+
+AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` events
+(from "updates" events), NOT Start+Delta+Done like pydantic-ai. ``auto_send``
+handles Full events correctly; no coalescing wrapper is needed.
+"""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+from collections.abc import AsyncGenerator
+
+from agentex.lib.core.harness.types import TurnUsage, StreamTaskMessage
+from agentex.lib.adk._modules._langgraph_sync import convert_langgraph_to_agentex_events
+
+
+def langgraph_usage_to_turn_usage(usage_metadata: Any, model: str | None) -> TurnUsage:
+    """Map LangGraph ``AIMessage.usage_metadata`` onto ``TurnUsage``.
+
+    ``usage_metadata`` may be ``None`` (model doesn't report usage).
+    Real zero token counts (e.g. 0 output tokens) are preserved as 0, NOT
+    coerced to ``None``.
+
+    Mapping::
+
+        input_tokens                       -> input_tokens
+        output_tokens                      -> output_tokens
+        total_tokens                       -> total_tokens
+        input_token_details.cache_read     -> cached_input_tokens
+        output_token_details.reasoning     -> reasoning_tokens
+
+    Args:
+        usage_metadata: The ``usage_metadata`` dict from an ``AIMessage``,
+            or ``None`` if the model did not report usage.
+        model: The model name string to attach to the ``TurnUsage``, or ``None``.
+
+    Returns:
+        A populated ``TurnUsage`` instance.
+    """
+    if usage_metadata is None:
+        return TurnUsage(model=model)
+
+    raw_input = (usage_metadata or {}).get("input_tokens")
+    raw_output = (usage_metadata or {}).get("output_tokens")
+    raw_total = (usage_metadata or {}).get("total_tokens")
+    input_details = (usage_metadata or {}).get("input_token_details") or {}
+    output_details = (usage_metadata or {}).get("output_token_details") or {}
+    raw_cache_read = input_details.get("cache_read")
+    raw_reasoning = output_details.get("reasoning")
+
+    return TurnUsage(
+        model=model,
+        input_tokens=raw_input,
+        output_tokens=raw_output,
+        total_tokens=raw_total,
+        cached_input_tokens=raw_cache_read,
+        reasoning_tokens=raw_reasoning,
+    )
+
+
+def _add_optional(a: int | None, b: int | None) -> int | None:
+    """Sum two optional token counts; ``None`` means 'not reported' on that side.
+
+    ``None + None`` stays ``None`` (model never reported usage), while a real 0
+    contributes 0 (preserving zero counts rather than coercing them away).
+    """
+    if a is None and b is None:
+        return None
+    return (a or 0) + (b or 0)
+
+
+def _accumulate_turn_usage(acc: TurnUsage, call: TurnUsage, model: str | None) -> TurnUsage:
+    """Add a single LLM call's usage into the running per-turn total.
+
+    A LangGraph turn can make multiple LLM calls (e.g. text -> tool decision ->
+    final text); summing them avoids silently dropping all but the last call.
+    """
+    return TurnUsage(
+        model=model,
+        input_tokens=_add_optional(acc.input_tokens, call.input_tokens),
+        output_tokens=_add_optional(acc.output_tokens, call.output_tokens),
+        total_tokens=_add_optional(acc.total_tokens, call.total_tokens),
+        cached_input_tokens=_add_optional(acc.cached_input_tokens, call.cached_input_tokens),
+        reasoning_tokens=_add_optional(acc.reasoning_tokens, call.reasoning_tokens),
+    )
+
+
+class LangGraphTurn:
+    """HarnessTurn wrapping a LangGraph ``astream()`` event stream.
+
+    Implements the ``HarnessTurn`` Protocol so it can be passed to either
+    ``UnifiedEmitter.yield_turn`` (sync HTTP ACP) or
+    ``UnifiedEmitter.auto_send_turn`` (async / temporal).
+
+    Usage::
+
+        stream = graph.astream(
+            {"messages": [{"role": "user", "content": user_message}]},
+            stream_mode=["messages", "updates"],
+        )
+        turn = LangGraphTurn(stream, model=model_name)
+
+        # Sync HTTP ACP
+        async for event in emitter.yield_turn(turn):
+            yield event
+
+        # Async / temporal
+        result = await emitter.auto_send_turn(turn)
+
+    AGX1-377 note: LangGraph tool requests are ``StreamTaskMessageFull`` (from
+    "updates"), NOT Start+Delta+Done like pydantic-ai. No ``coalesce_tool_requests``
+    option is needed.
+
+    Usage data is captured lazily via the ``on_final_ai_message`` callback and
+    is only valid after ``events`` has been fully consumed. Multi-step turns
+    (more than one LLM call) accumulate usage additively across calls.
+    """
+
+    def __init__(self, stream: Any, model: str | None = None) -> None:
+        self._stream = stream
+        self._model = model
+        self._usage: TurnUsage = TurnUsage(model=model)
+
+    @property
+    def events(self) -> AsyncIterator[StreamTaskMessage]:
+        return self._generate_events()
+
+    async def _generate_events(self) -> AsyncGenerator[StreamTaskMessage, None]:
+        def _capture(ai_msg: Any) -> None:
+            usage_metadata = getattr(ai_msg, "usage_metadata", None)
+            if usage_metadata is not None:
+                call_usage = langgraph_usage_to_turn_usage(usage_metadata, self._model)
+                # Accumulate across LLM calls — the callback fires once per agent
+                # node invocation, so a multi-step turn reports usage more than
+                # once; overwriting would drop all but the last call.
+                self._usage = _accumulate_turn_usage(self._usage, call_usage, self._model)
+
+        async for ev in convert_langgraph_to_agentex_events(self._stream, on_final_ai_message=_capture):
+            yield ev
+
+    def usage(self) -> TurnUsage:
+        """Return the usage accumulated across all AIMessages in the stream.
+
+        Multi-step turns sum each LLM call's usage. Valid only after ``events``
+        has been fully consumed. Returns a zero-usage ``TurnUsage`` if the model
+        did not report usage.
+        """
+        return self._usage
diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_async.py b/src/agentex/lib/adk/_modules/_pydantic_ai_async.py
index 0bbb5b19d..85abfb845 100644
--- a/src/agentex/lib/adk/_modules/_pydantic_ai_async.py
+++ b/src/agentex/lib/adk/_modules/_pydantic_ai_async.py
@@ -6,11 +6,10 @@
 HTTP yields.
 
 Text and thinking tokens stream as deltas inside coalesced streaming
-contexts. Tool requests and tool results are emitted as full
-``adk.messages.create(...)`` calls (Option A — matches the async LangGraph
-helper's convention). To stream tool-call argument tokens, see the sync
-converter at ``agentex.lib.adk._modules._pydantic_ai_sync`` which yields
-``ToolRequestDelta`` events.
+contexts. Tool requests and tool results are posted as open+close pairs
+on a streaming context (the unified surface persists ``initial_content``
+when a context is closed without deltas). This matches the ``auto_send``
+convention used by all other async/Temporal harnesses.
 
 Tracing is opt-in via a ``tracing_handler`` parameter — see
 ``create_pydantic_ai_tracing_handler`` in
@@ -19,7 +18,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
     from agentex.lib.adk._modules._pydantic_ai_tracing import (
@@ -49,230 +48,18 @@ async def stream_pydantic_ai_events(
         more text) return only the final text segment, matching the
         ``stream_langgraph_events`` convention.
     """
-    # Lazy imports so pydantic-ai isn't required at module load time.
-    import json
+    from agentex.lib.core.harness.emitter import UnifiedEmitter
+    from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
 
-    from pydantic_ai.messages import (
-        TextPart,
-        PartEndEvent,
-        ThinkingPart,
-        ToolCallPart,
-        TextPartDelta,
-        PartDeltaEvent,
-        PartStartEvent,
-        ThinkingPartDelta,
-        FunctionToolResultEvent,
+    turn = PydanticAITurn(
+        stream,
+        model=None,
+        tracing_handler=tracing_handler,
     )
-
-    from agentex.lib import adk
-    from agentex.types.text_content import TextContent
-    from agentex.types.reasoning_content import ReasoningContent
-    from agentex.types.task_message_delta import TextDelta
-    from agentex.types.task_message_update import StreamTaskMessageDelta
-    from agentex.types.tool_request_content import ToolRequestContent
-    from agentex.types.tool_response_content import ToolResponseContent
-    from agentex.types.reasoning_content_delta import ReasoningContentDelta
-
-    text_context = None
-    reasoning_context = None
-    final_text = ""
-
-    # Per Pydantic-AI part-index bookkeeping. Part indices restart at 0 on
-    # each new model response, so we overwrite on PartStartEvent.
-    part_kind: dict[int, str] = {}
-    tool_call_info: dict[int, tuple[str, str]] = {}
-
-    async def _close_text():
-        nonlocal text_context
-        if text_context:
-            await text_context.close()
-            text_context = None
-
-    async def _close_reasoning():
-        nonlocal reasoning_context
-        if reasoning_context:
-            await reasoning_context.close()
-            reasoning_context = None
-
-    try:
-        async for event in stream:
-            if isinstance(event, PartStartEvent):
-                if isinstance(event.part, TextPart):
-                    await _close_reasoning()
-                    await _close_text()
-
-                    final_text = ""
-                    text_context = await adk.streaming.streaming_task_message_context(
-                        task_id=task_id,
-                        initial_content=TextContent(
-                            author="agent",
-                            content="",
-                            format="markdown",
-                        ),
-                    ).__aenter__()
-                    part_kind[event.index] = "text"
-
-                    # Pydantic AI puts the first streaming chunk in
-                    # PartStartEvent.part.content; surface it as a Delta so it
-                    # actually renders (Start.content is initialization, not body).
-                    if event.part.content:
-                        final_text += event.part.content
-                        await text_context.stream_update(
-                            StreamTaskMessageDelta(
-                                parent_task_message=text_context.task_message,
-                                delta=TextDelta(type="text", text_delta=event.part.content),
-                                type="delta",
-                            )
-                        )
-
-                elif isinstance(event.part, ThinkingPart):
-                    await _close_text()
-                    await _close_reasoning()
-
-                    reasoning_context = await adk.streaming.streaming_task_message_context(
-                        task_id=task_id,
-                        initial_content=ReasoningContent(
-                            author="agent",
-                            summary=[],
-                            content=[],
-                            type="reasoning",
-                            style="active",
-                        ),
-                    ).__aenter__()
-                    part_kind[event.index] = "reasoning"
-
-                    if event.part.content:
-                        await reasoning_context.stream_update(
-                            StreamTaskMessageDelta(
-                                parent_task_message=reasoning_context.task_message,
-                                delta=ReasoningContentDelta(
-                                    type="reasoning_content",
-                                    content_index=0,
-                                    content_delta=event.part.content,
-                                ),
-                                type="delta",
-                            )
-                        )
-
-                elif isinstance(event.part, ToolCallPart):
-                    await _close_text()
-                    await _close_reasoning()
-                    tool_call_info[event.index] = (
-                        event.part.tool_call_id,
-                        event.part.tool_name,
-                    )
-                    part_kind[event.index] = "tool_call"
-
-            elif isinstance(event, PartDeltaEvent):
-                kind = part_kind.get(event.index)
-                if kind == "text" and isinstance(event.delta, TextPartDelta) and text_context:
-                    final_text += event.delta.content_delta
-                    await text_context.stream_update(
-                        StreamTaskMessageDelta(
-                            parent_task_message=text_context.task_message,
-                            delta=TextDelta(type="text", text_delta=event.delta.content_delta),
-                            type="delta",
-                        )
-                    )
-                elif (
-                    kind == "reasoning"
-                    and isinstance(event.delta, ThinkingPartDelta)
-                    and reasoning_context
-                    and event.delta.content_delta
-                ):
-                    await reasoning_context.stream_update(
-                        StreamTaskMessageDelta(
-                            parent_task_message=reasoning_context.task_message,
-                            delta=ReasoningContentDelta(
-                                type="reasoning_content",
-                                content_index=0,
-                                content_delta=event.delta.content_delta,
-                            ),
-                            type="delta",
-                        )
-                    )
-                # Tool-call arg deltas: Pydantic AI accumulates them; we
-                # surface the final args on PartEndEvent below (Option A).
-
-            elif isinstance(event, PartEndEvent):
-                kind = part_kind.get(event.index)
-                if kind == "text":
-                    await _close_text()
-                elif kind == "reasoning":
-                    await _close_reasoning()
-                elif kind == "tool_call" and isinstance(event.part, ToolCallPart):
-                    tool_call_id, tool_name = tool_call_info.get(event.index, ("", ""))
-                    args = event.part.args
-                    if isinstance(args, str):
-                        try:
-                            args = json.loads(args) if args else {}
-                        except json.JSONDecodeError:
-                            args = {"_raw": args}
-                    elif args is None:
-                        args = {}
-                    await adk.messages.create(
-                        task_id=task_id,
-                        content=ToolRequestContent(
-                            tool_call_id=tool_call_id,
-                            name=tool_name,
-                            arguments=args,
-                            author="agent",
-                        ),
-                    )
-                    if tracing_handler is not None and tool_call_id:
-                        await tracing_handler.on_tool_start(
-                            tool_call_id=tool_call_id,
-                            tool_name=tool_name,
-                            arguments=args,
-                        )
-
-            elif isinstance(event, FunctionToolResultEvent):
-                await _close_text()
-                await _close_reasoning()
-
-                result = event.part
-                tool_call_id = result.tool_call_id
-                tool_name = getattr(result, "tool_name", "") or ""
-                # Preserve structure for dicts / lists / Pydantic models so the
-                # UI can render them as JSON, not as Python repr. Matches the
-                # sync converter's ``_tool_return_content`` helper exactly —
-                # ``str(content)`` on a dict produces ``"{'k': 'v'}"`` which is
-                # invalid JSON and unreadable in the UI.
-                content = getattr(result, "content", None)
-                content_payload: Any
-                if content is None:
-                    content_payload = str(result)
-                elif isinstance(content, (str, int, float, bool, list, dict)):
-                    content_payload = content
-                elif hasattr(content, "model_dump"):
-                    try:
-                        content_payload = content.model_dump()
-                    except Exception:
-                        content_payload = str(content)
-                else:
-                    content_payload = str(content)
-                await adk.messages.create(
-                    task_id=task_id,
-                    content=ToolResponseContent(
-                        tool_call_id=tool_call_id,
-                        name=tool_name,
-                        content=content_payload,
-                        author="agent",
-                    ),
-                )
-                if tracing_handler is not None and tool_call_id:
-                    await tracing_handler.on_tool_end(
-                        tool_call_id=tool_call_id,
-                        result=content_payload,
-                    )
-
-            # FunctionToolCallEvent / FinalResultEvent / AgentRunResultEvent
-            # are intentionally ignored — same as the sync converter.
-
-    finally:
-        if text_context:
-            await text_context.close()
-        if reasoning_context:
-            await reasoning_context.close()
-
-    return final_text
+    emitter = UnifiedEmitter(
+        task_id=task_id,
+        trace_id=None,
+        parent_span_id=None,
+    )
+    result = await emitter.auto_send_turn(turn)
+    return result.final_text
diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py b/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py
index d94c0ae12..e4ac31e7e 100644
--- a/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py
+++ b/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py
@@ -16,12 +16,32 @@ async def handle_message_send(params):
         async with agent.run_stream_events(params.content.content) as stream:
             async for event in convert_pydantic_ai_to_agentex_events(stream):
                 yield event
+
+Recommended: unified surface
+-----------------------------
+For new handlers, prefer ``UnifiedEmitter`` + ``PydanticAITurn`` over the
+bare converter. The unified surface wires tracing automatically when a
+``trace_id`` is provided, so tool and reasoning spans are derived from the
+same event stream with no extra setup:
+
+    from agentex.lib.core.harness import UnifiedEmitter
+    from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
+
+    emitter = UnifiedEmitter(task_id=task_id, trace_id=trace_id, parent_span_id=parent_span_id)
+    turn = PydanticAITurn(agent.run_stream_events(prompt), model="openai:gpt-4o")
+    async for event in emitter.yield_turn(turn):
+        yield event   # forwarded over the ACP streaming response; spans derived automatically
+
+``convert_pydantic_ai_to_agentex_events`` remains the low-level tap for
+callers that manage their own tracing or need direct access to the raw
+converted stream.
 """
 
 from __future__ import annotations
 
 import json
-from typing import TYPE_CHECKING, Any, AsyncIterator
+import inspect
+from typing import TYPE_CHECKING, Any, Callable, AsyncIterator
 
 from pydantic_ai.run import AgentRunResultEvent
 
@@ -105,6 +125,7 @@ def _tool_return_content(result: ToolReturnPart | Any) -> Any:
 async def convert_pydantic_ai_to_agentex_events(
     stream_response: AsyncIterator[Any],
     tracing_handler: "AgentexPydanticAITracingHandler | None" = None,
+    on_result: Callable[[AgentRunResultEvent], Any] | None = None,
 ) -> AsyncIterator[StreamTaskMessageStart | StreamTaskMessageDelta | StreamTaskMessageFull | StreamTaskMessageDone]:
     """Convert a Pydantic AI agent event stream into Agentex stream events.
 
@@ -132,6 +153,12 @@ async def convert_pydantic_ai_to_agentex_events(
             tool call in the run is also recorded as an Agentex child span
             beneath the handler's configured ``parent_span_id``. Streaming
             behavior is unchanged when omitted.
+        on_result: Optional callback invoked with the terminal
+            ``AgentRunResultEvent`` when the run completes. Both sync and
+            async callables are accepted. No ``StreamTaskMessage*`` events are
+            yielded for this terminal event; the callback is the only side
+            effect. Useful for capturing run-level usage without altering the
+            streaming output.
 
     Yields:
         Agentex ``StreamTaskMessage*`` events suitable for forwarding back over
@@ -328,6 +355,10 @@ async def convert_pydantic_ai_to_agentex_events(
             # Already covered by PartStart/PartDelta/PartEnd events above, or
             # informational only (FinalResultEvent / AgentRunResultEvent signal
             # run-level state, not new content to surface).
+            if isinstance(event, AgentRunResultEvent) and on_result is not None:
+                ret = on_result(event)
+                if inspect.iscoroutine(ret):
+                    await ret
             continue
 
         else:
diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py b/src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py
index aa9d906eb..e199d0a8c 100644
--- a/src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py
+++ b/src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py
@@ -1,5 +1,29 @@
 """Tracing handler that records Agentex spans for tool calls in a pydantic-ai agent run.
 
+.. deprecated::
+    ``AgentexPydanticAITracingHandler`` and ``create_pydantic_ai_tracing_handler``
+    are superseded by the unified harness surface (``UnifiedEmitter`` in
+    ``agentex.lib.core.harness``). The unified surface derives tool and
+    reasoning spans directly from the canonical ``StreamTaskMessage*`` stream,
+    so no separate handler is required. Both symbols remain fully importable
+    and functional; they will be removed in a future release. New code should
+    construct a ``UnifiedEmitter`` with a ``trace_id`` instead:
+
+        from agentex.lib.core.harness import UnifiedEmitter
+        from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
+
+        emitter = UnifiedEmitter(task_id=task_id, trace_id=trace_id, parent_span_id=parent_span_id)
+        turn = PydanticAITurn(agent.run_stream_events(prompt), model="openai:gpt-4o")
+        async for event in emitter.yield_turn(turn):
+            yield event
+
+# NOTE: A runtime ``warnings.warn(..., DeprecationWarning)`` is intentionally
+# omitted here. The repo's pyproject ``filterwarnings = ["error"]`` would turn
+# it into a test/caller failure, and the async helper (``stream_pydantic_ai_events``)
+# still threads this handler through for existing callers that lack a ``trace_id``
+# on the async path. The runtime warning and caller migration are deferred until
+# ``trace_id`` threading lands on the async helper in a future API-versioning change.
+
 Mirrors the LangGraph tracing handler pattern: the caller creates a handler
 bound to a ``trace_id`` and a ``parent_span_id``, then hands it to
 ``stream_pydantic_ai_events(..., tracing_handler=handler)``. The streamer
@@ -63,6 +87,14 @@ def _tool_span_id(trace_id: str, tool_call_id: str) -> str:
 class AgentexPydanticAITracingHandler:
     """Records Agentex tracing spans for tool calls observed in a pydantic-ai event stream.
 
+    .. deprecated::
+        Superseded by ``UnifiedEmitter`` (``agentex.lib.core.harness``), which
+        derives tool and reasoning spans from the canonical ``StreamTaskMessage*``
+        stream automatically when ``trace_id`` is provided. This class remains
+        fully functional but will be removed in a future release. New code should
+        use ``UnifiedEmitter`` with a trace context instead of constructing this
+        handler directly.
+
     Pass an instance to ``stream_pydantic_ai_events(..., tracing_handler=...)``
     or call ``on_tool_start`` / ``on_tool_end`` yourself if you're consuming
     the event stream by hand.
@@ -165,6 +197,13 @@ def create_pydantic_ai_tracing_handler(
 ) -> AgentexPydanticAITracingHandler:
     """Create a tracing handler that records Agentex spans for pydantic-ai tool calls.
 
+    .. deprecated::
+        Superseded by ``UnifiedEmitter`` (``agentex.lib.core.harness``), which
+        derives tool and reasoning spans from the canonical ``StreamTaskMessage*``
+        stream automatically when ``trace_id`` is provided. This function remains
+        fully functional but will be removed in a future release. New code should
+        construct a ``UnifiedEmitter`` with a trace context instead.
+
     Args:
         trace_id: The trace ID. Typically the Agentex task ID.
         parent_span_id: Optional parent span ID to nest tool spans under. If
diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py b/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py
new file mode 100644
index 000000000..b06172e7f
--- /dev/null
+++ b/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py
@@ -0,0 +1,134 @@
+"""PydanticAITurn: a HarnessTurn wrapping a pydantic-ai event stream.
+
+Adapts a pydantic-ai ``AgentStreamEvent`` stream into the canonical
+``StreamTaskMessage*`` stream while capturing run-level usage from the
+terminal ``AgentRunResultEvent``.
+
+Typical usage::
+
+    async with agent.run_stream_events(user_msg) as stream:
+        turn = PydanticAITurn(stream, model="openai:gpt-4o")
+        async for event in turn.events:
+            yield event
+        span.set_attributes(turn.usage().model_dump())
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, AsyncIterator
+
+from pydantic_ai.run import AgentRunResultEvent
+
+from agentex.lib.core.harness.types import TurnUsage
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.lib.adk._modules._pydantic_ai_sync import convert_pydantic_ai_to_agentex_events
+
+if TYPE_CHECKING:
+    from agentex.lib.adk._modules._pydantic_ai_tracing import AgentexPydanticAITracingHandler
+
+StreamTaskMessage = StreamTaskMessageStart | StreamTaskMessageDelta | StreamTaskMessageFull | StreamTaskMessageDone
+
+
+def pydantic_ai_usage_to_turn_usage(usage: Any, model: str | None) -> TurnUsage:
+    """Map a pydantic-ai ``RunUsage`` onto ``TurnUsage``.
+
+    Uses defensive ``getattr(..., None)`` so a future field rename in
+    pydantic-ai degrades to ``None`` rather than raising ``AttributeError``.
+
+    RunUsage fields (verified against pydantic-ai in this repo):
+        input_tokens, cache_write_tokens, cache_read_tokens, output_tokens,
+        input_audio_tokens, cache_audio_read_tokens, output_audio_tokens,
+        details, requests, tool_calls.
+    ``total_tokens`` is a computed property.
+
+    Mapping:
+        requests           -> num_llm_calls
+        input_tokens       -> input_tokens
+        output_tokens      -> output_tokens
+        cache_read_tokens  -> cached_input_tokens
+        total_tokens       -> total_tokens
+
+    getattr results pass straight through: a MISSING attribute degrades to
+    None (defensive), while a real 0 stays 0 (a cache-hit with 0 output
+    tokens is a genuine zero, not "unknown") and a real N stays N.
+    """
+    raw_input = getattr(usage, "input_tokens", None)
+    raw_output = getattr(usage, "output_tokens", None)
+    raw_cache_read = getattr(usage, "cache_read_tokens", None)
+    raw_total = getattr(usage, "total_tokens", None)
+    raw_requests = getattr(usage, "requests", None)
+
+    return TurnUsage(
+        model=model,
+        input_tokens=raw_input,
+        output_tokens=raw_output,
+        cached_input_tokens=raw_cache_read,
+        total_tokens=raw_total,
+        num_llm_calls=raw_requests if raw_requests is not None else 0,
+    )
+
+
+class PydanticAITurn:
+    """A single harness turn backed by a pydantic-ai event stream.
+
+    Satisfies the ``HarnessTurn`` protocol: ``events`` async-generates the
+    canonical ``StreamTaskMessage*`` stream; ``usage()`` returns a normalized
+    ``TurnUsage`` (valid only after ``events`` is exhausted).
+
+    ``events`` is identical to the bare ``convert_pydantic_ai_to_agentex_events``
+    output (tool calls stream as ``Start + ToolRequestDelta + Done``, preserving
+    argument-token streaming on the sync/yield channel). The foundation
+    ``auto_send`` delivers the streamed tool-request shape natively (AGX1-377),
+    so no coalescing is needed on either channel.
+    """
+
+    def __init__(
+        self,
+        stream: AsyncIterator[Any],
+        model: str | None = None,
+        tracing_handler: "AgentexPydanticAITracingHandler | None" = None,
+    ) -> None:
+        self._stream = stream
+        self._model = model
+        self._tracing_handler = tracing_handler
+        self._usage = TurnUsage(model=model)
+
+    @property
+    def events(self) -> AsyncIterator[StreamTaskMessage]:
+        return self._generate_events()
+
+    async def _generate_events(self) -> AsyncIterator[StreamTaskMessage]:
+        def _capture(result_event: AgentRunResultEvent) -> None:
+            run_result = getattr(result_event, "result", None)
+            if run_result is None:
+                return
+            usage_attr = getattr(run_result, "usage", None)
+            if usage_attr is None:
+                return
+            # In newer pydantic-ai, .usage is a DeprecatedCallableRunUsage —
+            # it's both a property value and callable (emitting a deprecation
+            # warning when called). Access it as a plain attribute to avoid the
+            # warning; it already IS the RunUsage instance.
+            usage_obj = usage_attr
+            self._usage = pydantic_ai_usage_to_turn_usage(usage_obj, self._model)
+
+        raw_stream = convert_pydantic_ai_to_agentex_events(
+            self._stream,
+            tracing_handler=self._tracing_handler,
+            on_result=_capture,
+        )
+        async for ev in raw_stream:
+            yield ev
+
+    def usage(self) -> TurnUsage:
+        """Return the normalized usage for this turn.
+
+        Valid only after ``events`` is exhausted (single-pass contract).
+        Before exhaustion the model field is set but token fields are None.
+        """
+        return self._usage
diff --git a/src/agentex/lib/adk/_modules/tracing.py b/src/agentex/lib/adk/_modules/tracing.py
index 8694c2078..94bf741e4 100644
--- a/src/agentex/lib/adk/_modules/tracing.py
+++ b/src/agentex/lib/adk/_modules/tracing.py
@@ -6,7 +6,9 @@
 from datetime import timedelta
 from typing import Any
 
+from temporalio import workflow
 from temporalio.common import RetryPolicy
+from temporalio.exceptions import ActivityError, TimeoutError as TemporalTimeoutError, is_cancelled_exception
 
 from agentex import AsyncAgentex  # noqa: F401
 from agentex.lib.adk.utils._modules.client import create_async_agentex_client
@@ -26,6 +28,18 @@
 logger = make_logger(__name__)
 
 DEFAULT_RETRY_POLICY = RetryPolicy(maximum_attempts=1)
+TEMPORAL_SPAN_ACTIVITY_DROPPED_METRIC = "agentex.tracing.temporal_span_activity.dropped"
+
+
+def _record_temporal_span_activity_dropped(event_type: str) -> None:
+    try:
+        workflow.metric_meter().create_counter(
+            TEMPORAL_SPAN_ACTIVITY_DROPPED_METRIC,
+            description="Temporal tracing span activities dropped after fail-open",
+            unit="1",
+        ).add(1, {"event_type": event_type})
+    except Exception:
+        pass
 
 
 class TracingModule:
@@ -180,14 +194,26 @@ async def start_span(
             task_id=task_id,
         )
         if in_temporal_workflow():
-            return await ActivityHelpers.execute_activity(
-                activity_name=TracingActivityName.START_SPAN,
-                request=params,
-                response_type=Span,
-                start_to_close_timeout=start_to_close_timeout,
-                retry_policy=retry_policy,
-                heartbeat_timeout=heartbeat_timeout,
-            )
+            try:
+                return await ActivityHelpers.execute_activity(
+                    activity_name=TracingActivityName.START_SPAN,
+                    request=params,
+                    response_type=Span,
+                    start_to_close_timeout=start_to_close_timeout,
+                    retry_policy=retry_policy,
+                    heartbeat_timeout=heartbeat_timeout,
+                )
+            except (ActivityError, TemporalTimeoutError) as err:
+                if is_cancelled_exception(err):
+                    raise
+                workflow.logger.warning(
+                    "Failed to start tracing span %r for trace_id=%r; continuing without tracing",
+                    name,
+                    trace_id,
+                    exc_info=True,
+                )
+                _record_temporal_span_activity_dropped("start")
+                return None
         else:
             return await self._tracing_service.start_span(
                 trace_id=trace_id,
@@ -224,14 +250,26 @@ async def end_span(
             span=span,
         )
         if in_temporal_workflow():
-            return await ActivityHelpers.execute_activity(
-                activity_name=TracingActivityName.END_SPAN,
-                request=params,
-                response_type=Span,
-                start_to_close_timeout=start_to_close_timeout,
-                retry_policy=retry_policy,
-                heartbeat_timeout=heartbeat_timeout,
-            )
+            try:
+                return await ActivityHelpers.execute_activity(
+                    activity_name=TracingActivityName.END_SPAN,
+                    request=params,
+                    response_type=Span,
+                    start_to_close_timeout=start_to_close_timeout,
+                    retry_policy=retry_policy,
+                    heartbeat_timeout=heartbeat_timeout,
+                )
+            except (ActivityError, TemporalTimeoutError) as err:
+                if is_cancelled_exception(err):
+                    raise
+                workflow.logger.warning(
+                    "Failed to end tracing span %r for trace_id=%r; continuing without closing trace",
+                    span.id,
+                    trace_id,
+                    exc_info=True,
+                )
+                _record_temporal_span_activity_dropped("end")
+                return span
         else:
             return await self._tracing_service.end_span(
                 trace_id=trace_id,
diff --git a/src/agentex/lib/adk/providers/_modules/openai_turn.py b/src/agentex/lib/adk/providers/_modules/openai_turn.py
new file mode 100644
index 000000000..17a6518ee
--- /dev/null
+++ b/src/agentex/lib/adk/providers/_modules/openai_turn.py
@@ -0,0 +1,134 @@
+"""OpenAITurn: adapt an OpenAI Agents SDK streamed run onto the harness surface.
+
+A ``HarnessTurn`` exposes a single canonical ``StreamTaskMessage*`` stream plus
+normalized usage. ``OpenAITurn`` wraps a ``RunResultStreaming`` (from
+``Runner.run_streamed``), converts its native OpenAI events into the canonical
+stream via ``convert_openai_to_agentex_events``, and after exhaustion reads the
+run's ``raw_responses`` to aggregate usage into a provider-independent
+``TurnUsage``.
+
+Delivery (yield vs auto-send) and tracing are owned by ``UnifiedEmitter``; this
+module is purely the provider->canonical adapter.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, AsyncIterator
+
+from agents.usage import Usage
+
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.core.harness.types import TurnUsage, StreamTaskMessage
+from agentex.lib.adk.providers._modules.sync_provider import (
+    convert_openai_to_agentex_events,
+)
+
+if TYPE_CHECKING:
+    from agents import ModelResponse, RunResultStreaming
+
+logger = make_logger(__name__)
+
+
+def openai_usage_to_turn_usage(usage: Usage | None, model: str | None) -> TurnUsage:
+    """Map an ``agents.Usage`` to a harness-independent ``TurnUsage``.
+
+    All field access is defensive (``getattr(..., None)``): different model
+    backends populate different subsets of the usage object, and real zeros are
+    valid values (e.g. 0 output tokens on a pure cache hit), so we never coerce
+    a present-but-zero value into ``None``.
+    """
+    if usage is None:
+        return TurnUsage(model=model)
+
+    input_details = getattr(usage, "input_tokens_details", None)
+    output_details = getattr(usage, "output_tokens_details", None)
+
+    return TurnUsage(
+        model=model,
+        num_llm_calls=getattr(usage, "requests", None) or 0,
+        input_tokens=getattr(usage, "input_tokens", None),
+        cached_input_tokens=getattr(input_details, "cached_tokens", None),
+        output_tokens=getattr(usage, "output_tokens", None),
+        reasoning_tokens=getattr(output_details, "reasoning_tokens", None),
+        total_tokens=getattr(usage, "total_tokens", None),
+    )
+
+
+def _aggregate_usage(raw_responses: list[ModelResponse]) -> Usage | None:
+    """Sum the per-response ``Usage`` across a run's ``ModelResponse`` list.
+
+    Returns ``None`` when no response carries usage so the caller can emit a
+    usage object with only the model name set. ``Usage.add`` accumulates
+    requests/tokens (including cached/reasoning detail fields).
+    """
+    total: Usage | None = None
+    for response in raw_responses:
+        resp_usage = getattr(response, "usage", None)
+        if resp_usage is None:
+            continue
+        if total is None:
+            total = Usage()
+        total.add(resp_usage)
+    return total
+
+
+class OpenAITurn:
+    """A single OpenAI Agents SDK turn adapted to the ``HarnessTurn`` protocol.
+
+    Construct with exactly one of:
+    - ``result``: a ``RunResultStreaming`` from ``Runner.run_streamed``. Its
+      ``stream_events()`` is converted to the canonical stream, and after the
+      stream is exhausted ``raw_responses`` is read to compute usage.
+    - ``stream``: a pre-built async iterator of canonical ``StreamTaskMessage``
+      events (bypasses ``convert_openai_to_agentex_events``). Useful for tests
+      and for callers that have already produced canonical events. Usage stays
+      at ``TurnUsage(model=...)`` because there is no run to read usage from.
+
+    ``coalesce_tool_requests`` is accepted for API parity with other provider
+    turns but is a no-op for OpenAI: the OpenAI converter already emits a single
+    ``Full(ToolRequestContent)`` per tool call rather than streamed argument
+    deltas, so there is nothing to coalesce.
+    """
+
+    def __init__(
+        self,
+        result: RunResultStreaming | None = None,
+        model: str | None = None,
+        stream: AsyncIterator[StreamTaskMessage] | None = None,
+        coalesce_tool_requests: bool = False,  # noqa: ARG002 - API parity, no-op for OpenAI
+    ) -> None:
+        if result is None and stream is None:
+            raise ValueError("OpenAITurn requires either `result` or `stream`")
+        self._result = result
+        self._model = model
+        self._stream = stream
+        self._usage: TurnUsage = TurnUsage(model=model)
+
+    @property
+    def events(self) -> AsyncIterator[StreamTaskMessage]:
+        return self._iter_events()
+
+    async def _iter_events(self) -> AsyncIterator[StreamTaskMessage]:
+        if self._stream is not None:
+            async for event in self._stream:
+                yield event
+            return
+
+        result = self._result
+        assert result is not None  # guaranteed by __init__
+        async for event in convert_openai_to_agentex_events(result.stream_events()):
+            yield event
+
+        # Stream is exhausted: the run has finished and raw_responses is now
+        # populated, so usage can be aggregated and normalized.
+        try:
+            raw_responses: list[Any] = list(getattr(result, "raw_responses", None) or [])
+            aggregated = _aggregate_usage(raw_responses)
+            self._usage = openai_usage_to_turn_usage(aggregated, self._model)
+        except Exception as exc:  # pragma: no cover - defensive: never break delivery on usage
+            logger.warning(f"Failed to aggregate OpenAI usage: {exc}")
+            self._usage = TurnUsage(model=self._model)
+
+    def usage(self) -> TurnUsage:
+        """Normalized turn usage. Valid only after ``events`` is exhausted."""
+        return self._usage
diff --git a/src/agentex/lib/adk/providers/_modules/sync_provider.py b/src/agentex/lib/adk/providers/_modules/sync_provider.py
index a34cfcda1..d1d5e1c09 100644
--- a/src/agentex/lib/adk/providers/_modules/sync_provider.py
+++ b/src/agentex/lib/adk/providers/_modules/sync_provider.py
@@ -32,6 +32,7 @@
 from agentex import AsyncAgentex
 from agentex.lib.utils.logging import make_logger
 from agentex.lib.core.tracing.tracer import AsyncTracer
+from agentex.types.reasoning_content import ReasoningContent
 from agentex.types.task_message_delta import TextDelta
 from agentex.types.task_message_update import (
     StreamTaskMessageDone,
@@ -55,24 +56,28 @@ def _serialize_item(item: Any) -> dict[str, Any]:
     Uses model_dump() for Pydantic models, otherwise extracts attributes manually.
     Filters out internal Pydantic fields that can't be serialized.
     """
-    if hasattr(item, 'model_dump'):
+    if hasattr(item, "model_dump"):
         # Pydantic model - use model_dump for proper serialization
         try:
-            return item.model_dump(mode='json', exclude_unset=True)
+            return item.model_dump(mode="json", exclude_unset=True)
         except Exception:
             # Fallback to dict conversion
-            return dict(item) if hasattr(item, '__iter__') else {}
+            return dict(item) if hasattr(item, "__iter__") else {}
     else:
         # Not a Pydantic model - extract attributes manually
         item_dict = {}
         for attr_name in dir(item):
-            if not attr_name.startswith('_') and attr_name not in ('model_fields', 'model_config', 'model_computed_fields'):
+            if not attr_name.startswith("_") and attr_name not in (
+                "model_fields",
+                "model_config",
+                "model_computed_fields",
+            ):
                 try:
                     attr_value = getattr(item, attr_name, None)
                     # Skip methods and None values
                     if attr_value is not None and not callable(attr_value):
                         # Convert to JSON-serializable format
-                        if hasattr(attr_value, 'model_dump'):
+                        if hasattr(attr_value, "model_dump"):
                             item_dict[attr_name] = attr_value.model_dump()
                         elif isinstance(attr_value, (str, int, float, bool, list, dict)):
                             item_dict[attr_name] = attr_value
@@ -85,9 +90,26 @@ def _serialize_item(item: Any) -> dict[str, Any]:
 
 
 class SyncStreamingModel(Model):
-    """Simple model wrapper that adds logging to stream_response and supports tracing."""
+    """Simple model wrapper that adds logging to stream_response and supports tracing.
+
+    .. deprecated::
+        Prefer the unified harness surface for new OpenAI Agents integrations:
+        wrap a ``Runner.run_streamed`` result in
+        ``agentex.lib.adk.providers._modules.openai_turn.OpenAITurn`` and drive
+        delivery + tracing through ``UnifiedEmitter`` (see the
+        ``060_harness_openai`` / ``130_harness_openai`` / ``140_harness_openai``
+        tutorials). This per-model tracing wrapper predates the harness and is
+        retained only for backwards compatibility; it will be removed in a
+        future release. No runtime warning is emitted.
+    """
 
-    def __init__(self, original_model: Model, trace_id: str | None = None, parent_span_id: str | None = None, tracer: AsyncTracer | None = None):
+    def __init__(
+        self,
+        original_model: Model,
+        trace_id: str | None = None,
+        parent_span_id: str | None = None,
+        tracer: AsyncTracer | None = None,
+    ):
         """Initialize with the original OpenAI model to wrap.
         Args:
             original_model: The OpenAI model instance to wrap
@@ -147,7 +169,7 @@ async def get_response(
                 }
 
                 # Only add conversation_id if the model supports it
-                if hasattr(self.original_model, 'supports_conversation_id'):
+                if hasattr(self.original_model, "supports_conversation_id"):
                     kwargs["conversation_id"] = conversation_id
 
                 response = await self.original_model.get_response(**kwargs)
@@ -158,12 +180,12 @@ async def get_response(
                     final_output = None
 
                     # Extract final output text from response
-                    response_final_output = getattr(response, 'final_output', None)
+                    response_final_output = getattr(response, "final_output", None)
                     if response_final_output:
                         final_output = response_final_output
 
                     # Extract items from the response output
-                    response_output = getattr(response, 'output', None)
+                    response_output = getattr(response, "output", None)
                     if response_output:
                         output_items = response_output if isinstance(response_output, list) else [response_output]
 
@@ -174,12 +196,12 @@ async def get_response(
                                     new_items.append(item_dict)
 
                                     # Extract final_output from message type if available
-                                    if item_dict.get('type') == 'message' and not final_output:
-                                        content = item_dict.get('content', [])
+                                    if item_dict.get("type") == "message" and not final_output:
+                                        content = item_dict.get("content", [])
                                         if content and isinstance(content, list):
                                             for content_part in content:
-                                                if isinstance(content_part, dict) and 'text' in content_part:
-                                                    final_output = content_part['text']
+                                                if isinstance(content_part, dict) and "text" in content_part:
+                                                    final_output = content_part["text"]
                                                     break
                             except Exception as e:
                                 logger.warning(f"Failed to serialize item in get_response: {e}")
@@ -207,7 +229,7 @@ async def get_response(
             }
 
             # Only add conversation_id if the model supports it
-            if hasattr(self.original_model, 'supports_conversation_id'):
+            if hasattr(self.original_model, "supports_conversation_id"):
                 kwargs["conversation_id"] = conversation_id
 
             return await self.original_model.get_response(**kwargs)
@@ -266,7 +288,7 @@ async def stream_response(
                 }
 
                 # Only add conversation_id if the model supports it
-                if hasattr(self.original_model, 'supports_conversation_id'):
+                if hasattr(self.original_model, "supports_conversation_id"):
                     stream_kwargs["conversation_id"] = conversation_id
 
                 # Get the stream response from the original model and yield each event
@@ -277,11 +299,11 @@ async def stream_response(
                 final_response_text = ""
 
                 async for event in stream_response:
-                    event_type = getattr(event, 'type', 'no-type')
+                    event_type = getattr(event, "type", "no-type")
 
                     # Handle response.output_item.done events which contain completed items
-                    if event_type == 'response.output_item.done':
-                        item = getattr(event, 'item', None)
+                    if event_type == "response.output_item.done":
+                        item = getattr(event, "item", None)
                         if item is not None:
                             try:
                                 item_dict = _serialize_item(item)
@@ -289,12 +311,12 @@ async def stream_response(
                                     new_items.append(item_dict)
 
                                     # Update final_response_text from message type if available
-                                    if item_dict.get('type') == 'message':
-                                        content = item_dict.get('content', [])
+                                    if item_dict.get("type") == "message":
+                                        content = item_dict.get("content", [])
                                         if content and isinstance(content, list):
                                             for content_part in content:
-                                                if isinstance(content_part, dict) and 'text' in content_part:
-                                                    final_response_text = content_part['text']
+                                                if isinstance(content_part, dict) and "text" in content_part:
+                                                    final_response_text = content_part["text"]
                                                     break
                             except Exception as e:
                                 logger.warning(f"Failed to serialize item in stream_response: {e}")
@@ -326,7 +348,7 @@ async def stream_response(
             }
 
             # Only add conversation_id if the model supports it
-            if hasattr(self.original_model, 'supports_conversation_id'):
+            if hasattr(self.original_model, "supports_conversation_id"):
                 stream_kwargs["conversation_id"] = conversation_id
 
             # Get the stream response from the original model and yield each event
@@ -336,8 +358,17 @@ async def stream_response(
             async for event in stream_response:
                 yield event
 
+
 class SyncStreamingProvider(OpenAIProvider):
-    """Simple OpenAI provider wrapper that adds logging to streaming and supports tracing."""
+    """Simple OpenAI provider wrapper that adds logging to streaming and supports tracing.
+
+    .. deprecated::
+        Prefer the unified harness surface for new OpenAI Agents integrations
+        (see :class:`SyncStreamingModel` and the ``OpenAITurn`` +
+        ``UnifiedEmitter`` pattern). This provider wrapper predates the harness
+        and is retained only for backwards compatibility; it will be removed in
+        a future release. No runtime warning is emitted.
+    """
 
     def __init__(self, trace_id: str | None = None, parent_span_id: str | None = None, *args, **kwargs):
         """Initialize the provider with tracing support.
@@ -405,6 +436,7 @@ def _extract_tool_call_info(tool_call_item: Any) -> tuple[str, str, dict[str, An
         if tool_call_item.arguments:
             if isinstance(tool_call_item.arguments, str):
                 import json
+
                 tool_arguments = json.loads(tool_call_item.arguments) if tool_call_item.arguments else {}
             else:
                 tool_arguments = tool_call_item.arguments
@@ -418,6 +450,7 @@ def _extract_tool_call_info(tool_call_item: Any) -> tuple[str, str, dict[str, An
             arguments = tool_call_item.arguments
             if isinstance(arguments, str):
                 import json
+
                 tool_arguments = json.loads(arguments) if arguments else {}
             elif arguments is None:
                 tool_arguments = {}
@@ -466,11 +499,11 @@ def _extract_tool_response_info(tool_map: dict[str, Any], tool_output_item: Any)
 
 async def convert_openai_to_agentex_events(stream_response):
     """Convert OpenAI streaming events to AgentEx TaskMessageUpdate events with reasoning support.
-    
+
     This is an enhanced version of the base converter that includes support for:
     - Reasoning content deltas (for o1 models)
     - Reasoning summary deltas (for o1 models)
-    
+
     Args:
         stream_response: An async iterator of OpenAI streaming events
     Yields:
@@ -488,8 +521,8 @@ async def convert_openai_to_agentex_events(stream_response):
         event_count += 1
 
         # Check for raw response events which contain the actual OpenAI streaming events
-        if hasattr(event, 'type') and event.type == 'raw_response_event':
-            if hasattr(event, 'data'):
+        if hasattr(event, "type") and event.type == "raw_response_event":
+            if hasattr(event, "data"):
                 raw_event = event.data
 
                 # Check for ResponseOutputItemAddedEvent which signals a new message starting
@@ -504,7 +537,7 @@ async def convert_openai_to_agentex_events(stream_response):
                     if item_id in item_id_to_index:
                         # Get the message type to decide whether to send done event
                         message_type = item_id_to_type.get(item_id, "text")
-                        
+
                         # Don't send done events for reasoning content/summary
                         # They just end with their last delta
                         if message_type not in ("reasoning_content", "reasoning_summary"):
@@ -528,14 +561,20 @@ async def convert_openai_to_agentex_events(stream_response):
                         item_id_to_index[item_id] = message_index
                         item_id_to_type[item_id] = "reasoning_summary"
 
-                        # Send a start event for this new reasoning summary message
+                        # Send a start event for this new reasoning summary message.
+                        # The start content must be ReasoningContent (not TextContent)
+                        # so consumers that branch on the start event's content type
+                        # render a reasoning/thinking indicator; the final persisted
+                        # content is rebuilt from the reasoning deltas regardless.
                         yield StreamTaskMessageStart(
                             type="start",
                             index=item_id_to_index[item_id],
-                            content=TextContent(
-                                type="text",
+                            content=ReasoningContent(
+                                type="reasoning",
                                 author="agent",
-                                content="",  # Start with empty content
+                                summary=[],
+                                content=[],
+                                style="active",
                             ),
                         )
 
@@ -572,14 +611,20 @@ async def convert_openai_to_agentex_events(stream_response):
                         item_id_to_index[item_id] = message_index
                         item_id_to_type[item_id] = "reasoning_content"
 
-                        # Send a start event for this new reasoning content message
+                        # Send a start event for this new reasoning content message.
+                        # The start content must be ReasoningContent (not TextContent)
+                        # so consumers that branch on the start event's content type
+                        # render a reasoning/thinking indicator; the final persisted
+                        # content is rebuilt from the reasoning deltas regardless.
                         yield StreamTaskMessageStart(
                             type="start",
                             index=item_id_to_index[item_id],
-                            content=TextContent(
-                                type="text",
+                            content=ReasoningContent(
+                                type="reasoning",
                                 author="agent",
-                                content="",  # Start with empty content
+                                summary=[],
+                                content=[],
+                                style="active",
                             ),
                         )
 
@@ -608,7 +653,7 @@ async def convert_openai_to_agentex_events(stream_response):
                 # Check if this is a text delta event from OpenAI
                 elif isinstance(raw_event, ResponseTextDeltaEvent):
                     # Check if this event has an item_id
-                    item_id = getattr(raw_event, 'item_id', None)
+                    item_id = getattr(raw_event, "item_id", None)
 
                     # If this is a new item_id we haven't seen, it's a new message
                     if item_id and item_id not in item_id_to_index:
@@ -647,13 +692,13 @@ async def convert_openai_to_agentex_events(stream_response):
                     )
                     yield delta_message
 
-        elif hasattr(event, 'type') and event.type == 'run_item_stream_event':
+        elif hasattr(event, "type") and event.type == "run_item_stream_event":
             # Skip reasoning_item events - they're handled via raw_response_event above
-            if hasattr(event, 'item') and event.item.type == 'reasoning_item':
+            if hasattr(event, "item") and event.item.type == "reasoning_item":
                 continue
 
             # Check for tool_call_item type (this is when a tool is being called)
-            elif hasattr(event, 'item') and event.item.type == 'tool_call_item':
+            elif hasattr(event, "item") and event.item.type == "tool_call_item":
                 # Extract tool call information using the helper method
                 call_id, tool_name, tool_arguments = _extract_tool_call_info(event.item.raw_item)
                 tool_map[call_id] = tool_name
@@ -671,7 +716,7 @@ async def convert_openai_to_agentex_events(stream_response):
                 )
 
             # Check for tool_call_output_item type (this is when a tool returns output)
-            elif hasattr(event, 'item') and event.item.type == 'tool_call_output_item':
+            elif hasattr(event, "item") and event.item.type == "tool_call_output_item":
                 # Extract tool response information using the helper method
                 call_id, tool_name, content = _extract_tool_response_info(tool_map, event.item.raw_item)
                 tool_response_content = ToolResponseContent(
@@ -687,4 +732,3 @@ async def convert_openai_to_agentex_events(stream_response):
                     index=message_index,
                     content=tool_response_content,
                 )
-
diff --git a/src/agentex/lib/cli/templates/default-langgraph/project/acp.py.j2 b/src/agentex/lib/cli/templates/default-langgraph/project/acp.py.j2
index 3309dc07e..750a271ad 100644
--- a/src/agentex/lib/cli/templates/default-langgraph/project/acp.py.j2
+++ b/src/agentex/lib/cli/templates/default-langgraph/project/acp.py.j2
@@ -15,13 +15,14 @@ if _litellm_key:
     os.environ["OPENAI_API_KEY"] = _litellm_key
 
 import agentex.lib.adk as adk
-from agentex.lib.adk import create_langgraph_tracing_handler, stream_langgraph_events
+from agentex.lib.core.harness import UnifiedEmitter
 from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
 from agentex.lib.sdk.fastacp.fastacp import FastACP
 from agentex.protocol.acp import SendEventParams, CancelTaskParams, CreateTaskParams
 from agentex.lib.types.fastacp import AsyncACPConfig
 from agentex.lib.types.tracing import SGPTracingProcessorConfig
 from agentex.lib.utils.logging import make_logger
+from agentex.lib.adk import LangGraphTurn
 
 from project.graph import create_graph
 
@@ -67,24 +68,23 @@ async def handle_task_event_send(params: SendEventParams):
         input={"message": user_message},
         data={"__span_type__": "AGENT_WORKFLOW"},
     ) as turn_span:
-        callback = create_langgraph_tracing_handler(
-            trace_id=task_id,
-            parent_span_id=turn_span.id if turn_span else None,
-        )
-
         stream = graph.astream(
             {"messages": [{"role": "user", "content": user_message}]},
-            config={
-                "configurable": {"thread_id": task_id},
-                "callbacks": [callback],
-            },
+            config={"configurable": {"thread_id": task_id}},
             stream_mode=["messages", "updates"],
         )
 
-        final_output = await stream_langgraph_events(stream, task_id)
+        turn = LangGraphTurn(stream, model=None)
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+
+        result = await emitter.auto_send_turn(turn)
 
         if turn_span:
-            turn_span.output = {"final_output": final_output}
+            turn_span.output = {"final_output": result.final_text}
 
 
 @acp.on_task_create
diff --git a/src/agentex/lib/cli/templates/default-pydantic-ai/project/acp.py.j2 b/src/agentex/lib/cli/templates/default-pydantic-ai/project/acp.py.j2
index 5692396b2..11d3ab476 100644
--- a/src/agentex/lib/cli/templates/default-pydantic-ai/project/acp.py.j2
+++ b/src/agentex/lib/cli/templates/default-pydantic-ai/project/acp.py.j2
@@ -19,21 +19,19 @@ from dotenv import load_dotenv
 
 load_dotenv()
 
-from project.agent import create_agent
+from project.agent import MODEL_NAME, create_agent
 from pydantic_ai.run import AgentRunResultEvent
 from pydantic_ai.messages import ModelMessagesTypeAdapter
 
 import agentex.lib.adk as adk
-from agentex.lib.adk import (
-    stream_pydantic_ai_events,
-    create_pydantic_ai_tracing_handler,
-)
 from agentex.protocol.acp import SendEventParams, CancelTaskParams, CreateTaskParams
+from agentex.lib.core.harness import UnifiedEmitter
 from agentex.lib.types.fastacp import AsyncACPConfig
 from agentex.lib.types.tracing import SGPTracingProcessorConfig
 from agentex.lib.utils.logging import make_logger
 from agentex.lib.utils.model_utils import BaseModel
 from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.lib.adk import PydanticAITurn
 from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
 
 logger = make_logger(__name__)
@@ -125,15 +123,17 @@ async def handle_task_event_send(params: SendEventParams):
         input={"message": user_message},
         data={"__span_type__": "AGENT_WORKFLOW"},
     ) as turn_span:
-        tracing_handler = create_pydantic_ai_tracing_handler(
+        # Construct the UnifiedEmitter from the ACP context so tracing is
+        # automatic and messages are auto-sent to the task stream (Redis).
+        emitter = UnifiedEmitter(
+            task_id=task_id,
             trace_id=task_id,
             parent_span_id=turn_span.id if turn_span else None,
-            task_id=task_id,
         )
 
         # Wrap the pydantic-ai event stream so we can capture the final
         # AgentRunResultEvent (which carries the full message list for the
-        # next turn) without changing the streaming-helper's signature.
+        # next turn) before forwarding events to the emitter.
         captured_messages: list[Any] = []
 
         async def tee_messages(upstream) -> AsyncIterator[Any]:
@@ -143,9 +143,8 @@ async def handle_task_event_send(params: SendEventParams):
                 yield event
 
         async with agent.run_stream_events(user_message, message_history=previous_messages) as stream:
-            final_output = await stream_pydantic_ai_events(
-                tee_messages(stream), task_id, tracing_handler=tracing_handler
-            )
+            turn = PydanticAITurn(tee_messages(stream), model=MODEL_NAME)
+            result = await emitter.auto_send_turn(turn)
 
         # Save the updated message history so the next turn picks up here.
         if captured_messages:
@@ -158,7 +157,7 @@ async def handle_task_event_send(params: SendEventParams):
             )
 
         if turn_span:
-            turn_span.output = {"final_output": final_output}
+            turn_span.output = {"final_output": result.final_text}
 
 
 @acp.on_task_cancel
diff --git a/src/agentex/lib/cli/templates/sync-langgraph/project/acp.py.j2 b/src/agentex/lib/cli/templates/sync-langgraph/project/acp.py.j2
index 54538d0c9..c6814b9c4 100644
--- a/src/agentex/lib/cli/templates/sync-langgraph/project/acp.py.j2
+++ b/src/agentex/lib/cli/templates/sync-langgraph/project/acp.py.j2
@@ -8,12 +8,13 @@ tokens and tool calls from the LangGraph graph to the Agentex frontend.
 from typing import AsyncGenerator
 
 import agentex.lib.adk as adk
-from agentex.lib.adk import create_langgraph_tracing_handler, convert_langgraph_to_agentex_events
+from agentex.lib.core.harness import UnifiedEmitter
 from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
 from agentex.lib.sdk.fastacp.fastacp import FastACP
 from agentex.protocol.acp import SendMessageParams
 from agentex.lib.types.tracing import SGPTracingProcessorConfig
 from agentex.lib.utils.logging import make_logger
+from agentex.lib.adk import LangGraphTurn
 from agentex.types.task_message_content import TaskMessageContent
 from agentex.types.task_message_delta import TextDelta
 from agentex.types.task_message_update import TaskMessageUpdate
@@ -72,22 +73,21 @@ async def handle_message_send(
         input={"message": user_message},
         data={"__span_type__": "AGENT_WORKFLOW"},
     ) as turn_span:
-        callback = create_langgraph_tracing_handler(
-            trace_id=thread_id,
-            parent_span_id=turn_span.id if turn_span else None,
-        )
-
         stream = graph.astream(
             {"messages": [{"role": "user", "content": user_message}]},
-            config={
-                "configurable": {"thread_id": thread_id},
-                "callbacks": [callback],
-            },
+            config={"configurable": {"thread_id": thread_id}},
             stream_mode=["messages", "updates"],
         )
 
+        turn = LangGraphTurn(stream, model=None)
+        emitter = UnifiedEmitter(
+            task_id=thread_id,
+            trace_id=thread_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+
         final_text = ""
-        async for event in convert_langgraph_to_agentex_events(stream):
+        async for event in emitter.yield_turn(turn):
             # Accumulate text deltas for span output
             delta = getattr(event, "delta", None)
             if isinstance(delta, TextDelta) and delta.text_delta:
diff --git a/src/agentex/lib/cli/templates/sync-pydantic-ai/project/acp.py.j2 b/src/agentex/lib/cli/templates/sync-pydantic-ai/project/acp.py.j2
index 4925e847f..061ae0e08 100644
--- a/src/agentex/lib/cli/templates/sync-pydantic-ai/project/acp.py.j2
+++ b/src/agentex/lib/cli/templates/sync-pydantic-ai/project/acp.py.j2
@@ -15,19 +15,17 @@ from dotenv import load_dotenv
 
 load_dotenv()
 
-from project.agent import create_agent
+from project.agent import MODEL_NAME, create_agent
 
 import agentex.lib.adk as adk
-from agentex.lib.adk import (
-    create_pydantic_ai_tracing_handler,
-    convert_pydantic_ai_to_agentex_events,
-)
 from agentex.protocol.acp import SendMessageParams
+from agentex.lib.core.harness import UnifiedEmitter
 from agentex.lib.types.tracing import SGPTracingProcessorConfig
 from agentex.lib.utils.logging import make_logger
 from agentex.lib.sdk.fastacp.fastacp import FastACP
 from agentex.types.task_message_update import TaskMessageUpdate
 from agentex.types.task_message_content import TaskMessageContent
+from agentex.lib.adk import PydanticAITurn
 from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
 
 logger = make_logger(__name__)
@@ -73,7 +71,7 @@ async def handle_message_send(
     logger.info(f"Processing message for task {task_id}")
 
     # Open a per-message turn span. Tool calls below nest underneath this
-    # span via the tracing handler's parent_span_id wiring.
+    # span via the emitter's parent_span_id wiring.
     async with adk.tracing.span(
         trace_id=task_id,
         task_id=task_id,
@@ -81,13 +79,14 @@ async def handle_message_send(
         input={"message": user_message},
         data={"__span_type__": "AGENT_WORKFLOW"},
     ) as turn_span:
-        tracing_handler = create_pydantic_ai_tracing_handler(
+        # Construct the UnifiedEmitter from the ACP/streaming context so tracing
+        # is automatic: tool spans nest under this turn's span.
+        emitter = UnifiedEmitter(
+            task_id=task_id,
             trace_id=task_id,
             parent_span_id=turn_span.id if turn_span else None,
-            task_id=task_id,
         )
         async with agent.run_stream_events(user_message) as stream:
-            async for event in convert_pydantic_ai_to_agentex_events(
-                stream, tracing_handler=tracing_handler
-            ):
-                yield event
+            turn = PydanticAITurn(stream, model=MODEL_NAME)
+            async for ev in emitter.yield_turn(turn):
+                yield ev
diff --git a/src/agentex/lib/cli/templates/temporal-pydantic-ai/project/agent.py.j2 b/src/agentex/lib/cli/templates/temporal-pydantic-ai/project/agent.py.j2
index 0aa958118..da97856ea 100644
--- a/src/agentex/lib/cli/templates/temporal-pydantic-ai/project/agent.py.j2
+++ b/src/agentex/lib/cli/templates/temporal-pydantic-ai/project/agent.py.j2
@@ -11,9 +11,9 @@ moves into recorded activities.
 
 Streaming back to Agentex happens via ``event_stream_handler``, which
 receives Pydantic AI ``AgentStreamEvent``s from inside the model activity
-and forwards them to Redis using the ``stream_pydantic_ai_events`` helper.
-The ``task_id`` and tracing parent span ID are threaded into the handler
-via ``deps``.
+and forwards them through the unified harness surface
+(``UnifiedEmitter.auto_send_turn`` + ``PydanticAITurn``). The ``task_id`` and
+tracing parent span ID are threaded into the handler via ``deps``.
 """
 
 from __future__ import annotations
@@ -27,10 +27,8 @@ from project.tools import get_weather
 from pydantic_ai.messages import AgentStreamEvent
 from pydantic_ai.durable_exec.temporal import TemporalAgent
 
-from agentex.lib.adk import (
-    stream_pydantic_ai_events,
-    create_pydantic_ai_tracing_handler,
-)
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.lib.adk import PydanticAITurn
 
 # Swap this for any Pydantic AI-supported model identifier
 # (e.g. "anthropic:claude-3-5-sonnet-latest", "openai:gpt-4o").
@@ -92,17 +90,18 @@ async def event_handler(
     activity (not the workflow), it can freely make non-deterministic Redis
     writes — including the tracing HTTP calls that record per-tool-call
     spans under the workflow's per-turn span (when ``parent_span_id`` is set).
+
+    The UnifiedEmitter is constructed from ``deps`` (task_id + parent_span_id),
+    so tool spans nest under the workflow's per-turn span and messages auto-send
+    to the task stream.
     """
-    tracing_handler = create_pydantic_ai_tracing_handler(
+    emitter = UnifiedEmitter(
+        task_id=run_context.deps.task_id,
         trace_id=run_context.deps.task_id,
         parent_span_id=run_context.deps.parent_span_id,
-        task_id=run_context.deps.task_id,
-    )
-    await stream_pydantic_ai_events(
-        events,
-        run_context.deps.task_id,
-        tracing_handler=tracing_handler,
     )
+    turn = PydanticAITurn(events, model=MODEL_NAME)
+    await emitter.auto_send_turn(turn)
 
 
 # Construct the durable agent at module load time so that the
diff --git a/src/agentex/lib/core/harness/__init__.py b/src/agentex/lib/core/harness/__init__.py
new file mode 100644
index 000000000..067751d63
--- /dev/null
+++ b/src/agentex/lib/core/harness/__init__.py
@@ -0,0 +1,30 @@
+"""Shared, harness-independent machinery for the unified harness surface.
+
+The Agentex StreamTaskMessage* stream is the single source of truth; this
+package derives spans from it and delivers it (yield or auto-send), so every
+harness tap gets streaming + tracing + turn usage uniformly.
+"""
+
+from agentex.lib.core.harness.types import (
+    OpenSpan,
+    CloseSpan,
+    TurnUsage,
+    SpanSignal,
+    TurnResult,
+    HarnessTurn,
+    StreamTaskMessage,
+)
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+
+__all__ = [
+    "UnifiedEmitter",
+    "SpanTracer",
+    "OpenSpan",
+    "CloseSpan",
+    "SpanSignal",
+    "StreamTaskMessage",
+    "TurnUsage",
+    "TurnResult",
+    "HarnessTurn",
+]
diff --git a/src/agentex/lib/core/harness/auto_send.py b/src/agentex/lib/core/harness/auto_send.py
new file mode 100644
index 000000000..2ecd6b583
--- /dev/null
+++ b/src/agentex/lib/core/harness/auto_send.py
@@ -0,0 +1,156 @@
+"""Auto-send delivery: canonical stream -> adk.streaming side effects + tracing."""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+from datetime import datetime
+
+from agentex.types.text_delta import TextDelta
+from agentex.types.text_content import TextContent
+from agentex.lib.core.harness.types import TurnUsage, TurnResult, StreamTaskMessage
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.lib.core.harness.span_derivation import SpanDeriver
+
+try:
+    from agentex.lib.utils.logging import make_logger
+
+    logger = make_logger(__name__)
+except Exception:  # ddtrace may be absent in some envs; fall back to stdlib
+    import logging
+
+    logger = logging.getLogger(__name__)
+
+
+async def auto_send(
+    events: AsyncIterator[StreamTaskMessage],
+    task_id: str,
+    tracer: SpanTracer | None = None,
+    streaming: Any = None,
+    usage: TurnUsage | None = None,
+    created_at: datetime | None = None,
+) -> TurnResult:
+    """Push the canonical stream to the task stream via adk.streaming.
+
+    Opens a streaming context per message (keyed by index), streams deltas via
+    ctx.stream_update, and closes via ctx.close() on Done. Posts tool
+    request/response full messages by opening a context with the content and
+    closing it immediately (no deltas). Derives and traces spans from the same
+    stream. Returns the last text segment's text + usage.
+
+    Index-keyed routing: each Start(index=i) opens a context stored in
+    ctx_map[i]; Delta(index=i) routes to ctx_map.get(i); Done(index=i) closes
+    and removes ctx_map[i]. Events with index is None are skipped. The finally
+    block closes all remaining open contexts.
+
+    final_text last-segment semantics: a new Start(TextContent) resets
+    final_text_parts so that multi-step turns return the LAST text segment.
+    Full(TextContent) also overwrites final_text_parts (same semantics).
+
+    AGX1-378: created_at is forwarded to every streaming_task_message_context
+    call so callers can back-date message timestamps.
+
+    Mirrors the open/close/stream_update pattern from
+    src/agentex/lib/adk/_modules/_langgraph_async.py:
+      - context opened via streaming_task_message_context(...).__aenter__()
+      - context closed via ctx.close() (not __aexit__)
+      - deltas pushed as StreamTaskMessageDelta with parent_task_message set
+        from ctx.task_message
+
+    For async + temporal agents (call from inside an activity).
+    """
+    if streaming is None:
+        from agentex.lib import adk
+
+        streaming = adk.streaming
+
+    deriver = SpanDeriver() if tracer is not None else None
+    final_text_parts: list[str] = []
+    ctx_map: dict[int, Any] = {}
+
+    async def _close_all() -> None:
+        # Guard each close independently: a failure on one context (e.g. a
+        # backend hiccup during teardown) must not abandon the remaining open
+        # contexts, otherwise their task messages would never be finalized.
+        for ctx in list(ctx_map.values()):
+            try:
+                await ctx.close()
+            except Exception as exc:
+                logger.warning("[harness.auto_send] context close failed during teardown: %s", exc)
+        ctx_map.clear()
+
+    try:
+        async for event in events:
+            if deriver is not None and tracer is not None:
+                for signal in deriver.observe(event):
+                    await tracer.handle(signal)
+
+            if isinstance(event, StreamTaskMessageStart):
+                if event.index is None:
+                    continue
+                i = event.index
+                # Reset final_text_parts when a new text segment starts
+                if isinstance(event.content, TextContent):
+                    final_text_parts = []
+                ctx = streaming.streaming_task_message_context(
+                    task_id=task_id,
+                    initial_content=event.content,
+                    created_at=created_at,
+                )
+                ctx_map[i] = await ctx.__aenter__()
+
+            elif isinstance(event, StreamTaskMessageDelta):
+                if event.index is None:
+                    continue
+                ctx = ctx_map.get(event.index)
+                if ctx is not None and event.delta is not None:
+                    # Reconstruct the delta with parent_task_message set from
+                    # the context's task_message (mirrors _langgraph_async.py
+                    # lines 72-78 and 117-127).
+                    delta_with_parent = StreamTaskMessageDelta(
+                        parent_task_message=ctx.task_message,
+                        delta=event.delta,
+                        type="delta",
+                        index=event.index,
+                    )
+                    await ctx.stream_update(delta_with_parent)
+                    if isinstance(event.delta, TextDelta) and event.delta.text_delta:
+                        final_text_parts.append(event.delta.text_delta)
+
+            elif isinstance(event, StreamTaskMessageDone):
+                if event.index is None:
+                    continue
+                ctx = ctx_map.pop(event.index, None)
+                if ctx is not None:
+                    await ctx.close()
+
+            elif isinstance(event, StreamTaskMessageFull):
+                # Full messages: post the full message by opening a context
+                # with the content and closing it immediately (no deltas;
+                # StreamingTaskMessageContext.close() persists initial_content
+                # when the accumulator is empty). Use async with so the context
+                # is closed even if close() raises (__aexit__ delegates to
+                # close()).
+                # Full(TextContent) also resets final_text_parts for
+                # last-segment semantics.
+                if isinstance(event.content, TextContent):
+                    final_text_parts = [event.content.content]
+                async with streaming.streaming_task_message_context(
+                    task_id=task_id,
+                    initial_content=event.content,
+                    created_at=created_at,
+                ):
+                    pass
+
+    finally:
+        await _close_all()
+        if deriver is not None and tracer is not None:
+            for signal in deriver.flush():
+                await tracer.handle(signal)
+
+    return TurnResult(final_text="".join(final_text_parts), usage=usage or TurnUsage())
diff --git a/src/agentex/lib/core/harness/emitter.py b/src/agentex/lib/core/harness/emitter.py
new file mode 100644
index 000000000..5b56793bf
--- /dev/null
+++ b/src/agentex/lib/core/harness/emitter.py
@@ -0,0 +1,80 @@
+"""UnifiedEmitter: the single facade agent authors use for either delivery mode."""
+
+from __future__ import annotations
+
+from typing import AsyncGenerator
+from datetime import datetime
+
+from agentex.lib.core.harness.types import TurnResult, HarnessTurn, StreamTaskMessage
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.lib.core.harness.auto_send import auto_send
+from agentex.lib.core.harness.yield_delivery import yield_events
+
+
+class UnifiedEmitter:
+    """Ties trace context + chosen delivery together.
+
+    Tracing modes (the `tracer` arg):
+    - tracer=None (default): auto-construct a SpanTracer if `trace_id` is present.
+    - tracer=False: disable tracing entirely, regardless of `trace_id`.
+    - tracer=<SpanTracer>: use the supplied instance.
+
+    `tracing` and `streaming` are injection escape-hatches for tests/advanced
+    use; leave them None in production so the real adk modules are used.
+    """
+
+    tracer: SpanTracer | None
+
+    def __init__(
+        self,
+        task_id: str,
+        trace_id: str | None,
+        parent_span_id: str | None,
+        tracer: SpanTracer | bool | None = None,
+        tracing: object | None = None,
+        streaming: object | None = None,
+    ):
+        self.task_id = task_id
+        self.trace_id = trace_id
+        self.parent_span_id = parent_span_id
+        self._streaming = streaming
+        if tracer is False:
+            self.tracer = None
+        elif isinstance(tracer, SpanTracer):
+            self.tracer = tracer
+        elif trace_id:
+            self.tracer = SpanTracer(
+                trace_id=trace_id,
+                parent_span_id=parent_span_id,
+                task_id=task_id,
+                tracing=tracing,
+            )
+        else:
+            self.tracer = None
+
+    async def yield_turn(self, turn: HarnessTurn) -> AsyncGenerator[StreamTaskMessage, None]:
+        """Sync HTTP ACP delivery: forward events, trace as side effect."""
+        async for event in yield_events(turn.events, tracer=self.tracer):
+            yield event
+
+    async def auto_send_turn(self, turn: HarnessTurn, created_at: datetime | None = None) -> TurnResult:
+        """Async/temporal delivery: push to the task stream, return TurnResult.
+
+        Pass `created_at` (e.g. `workflow.now()` under Temporal) to stamp the
+        turn's messages with a deterministic timestamp; it is forwarded to the
+        streaming contexts. Default None preserves server-side timestamps.
+        """
+        # `turn.usage()` is only valid AFTER `turn.events` is exhausted (the
+        # HarnessTurn single-pass contract: real turns populate usage while the
+        # stream is consumed). So drive delivery first, then read usage — do NOT
+        # pass `usage=turn.usage()` eagerly here (that would capture the empty
+        # default before the stream runs).
+        result = await auto_send(
+            turn.events,
+            task_id=self.task_id,
+            tracer=self.tracer,
+            streaming=self._streaming,
+            created_at=created_at,
+        )
+        result.usage = turn.usage()
+        return result
diff --git a/src/agentex/lib/core/harness/span_derivation.py b/src/agentex/lib/core/harness/span_derivation.py
new file mode 100644
index 000000000..cecb24bcc
--- /dev/null
+++ b/src/agentex/lib/core/harness/span_derivation.py
@@ -0,0 +1,154 @@
+"""Pure reducer: canonical StreamTaskMessage* stream -> span open/close signals.
+
+Has no dependency on adk; unit-testable in isolation. Delivery adapters feed it
+every event and act on the returned signals.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+
+from agentex.lib.core.harness.types import OpenSpan, CloseSpan, SpanSignal, StreamTaskMessage
+from agentex.types.tool_request_delta import ToolRequestDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+
+
+@dataclass
+class _ToolReqMeta:
+    tool_call_id: str
+    name: str
+    arguments: dict[str, object]
+    args_buf: str = ""  # accumulated streamed argument fragments
+
+
+class SpanDeriver:
+    """Stateful reducer over the canonical stream.
+
+    Tool span: open on Done of a ToolRequestContent index; close on matching
+    ToolResponseContent by tool_call_id. Reasoning span: open on
+    Start(ReasoningContent); close on that index's Done.
+
+    Deliberate contracts:
+      - A `Full(ToolResponseContent)` whose tool_call_id was never opened is
+        ignored (no CloseSpan emitted).
+      - A `Done` for an index that was never a tool_request/reasoning Start is
+        ignored (no signal emitted).
+      - Events with `index is None` are skipped entirely; without a stable index
+        they cannot be reliably paired, and aliasing them to a sentinel would
+        let unrelated None-indexed events cross-match.
+      - `flush()` closes anything still open as incomplete; unclosed tool spans
+        are emitted in the order they were opened.
+    """
+
+    def __init__(self) -> None:
+        self._tool_by_index: dict[int, _ToolReqMeta] = {}
+        self._reasoning_index_open: set[int] = set()
+        # insertion-ordered set of open tool_call_ids (dict keys preserve order)
+        self._open_tool_ids: dict[str, None] = {}
+
+    def observe(self, event: StreamTaskMessage) -> list[SpanSignal]:
+        if isinstance(event, StreamTaskMessageStart):
+            return self._on_start(event)
+        if isinstance(event, StreamTaskMessageDelta):
+            return self._on_delta(event)
+        if isinstance(event, StreamTaskMessageFull):
+            return self._on_full(event)
+        if isinstance(event, StreamTaskMessageDone):
+            return self._on_done(event)
+        return []
+
+    def flush(self) -> list[SpanSignal]:
+        """Close anything still open at end of stream, marked incomplete."""
+        signals: list[SpanSignal] = []
+        for tcid in list(self._open_tool_ids):
+            signals.append(CloseSpan(key=tcid, output=None, is_complete=False))
+        self._open_tool_ids.clear()
+        for idx in sorted(self._reasoning_index_open):
+            signals.append(CloseSpan(key=f"reasoning:{idx}", output=None, is_complete=False))
+        self._reasoning_index_open.clear()
+        return signals
+
+    def _on_start(self, event: StreamTaskMessageStart) -> list[SpanSignal]:
+        if event.index is None:
+            return []
+        idx = event.index
+        content = event.content
+        if isinstance(content, ToolRequestContent):
+            self._tool_by_index[idx] = _ToolReqMeta(
+                tool_call_id=content.tool_call_id,
+                name=content.name,
+                arguments=dict(content.arguments or {}),
+            )
+            return []
+        if content.type == "reasoning":
+            self._reasoning_index_open.add(idx)
+            return [OpenSpan(key=f"reasoning:{idx}", kind="reasoning", name="reasoning", input={})]
+        return []
+
+    def _on_delta(self, event: StreamTaskMessageDelta) -> list[SpanSignal]:
+        if event.index is None:
+            return []
+        idx = event.index
+        delta = event.delta
+        if isinstance(delta, ToolRequestDelta):
+            meta = self._tool_by_index.get(idx)
+            if meta is not None and delta.arguments_delta:
+                meta.args_buf += delta.arguments_delta
+        return []
+
+    def _on_full(self, event: StreamTaskMessageFull) -> list[SpanSignal]:
+        """Handle a Full event.
+
+        A `Full(ToolRequestContent)` opens a tool span (keyed by tool_call_id)
+        if it is not already open; the matching `Full(ToolResponseContent)`
+        closes it. This handles harnesses (e.g. LangGraph) that emit tool calls
+        as a single Full rather than Start+Done.
+        """
+        content = event.content
+        if isinstance(content, ToolRequestContent):
+            tcid = content.tool_call_id
+            if tcid not in self._open_tool_ids:
+                self._open_tool_ids[tcid] = None
+                args = dict(content.arguments or {})
+                return [OpenSpan(key=tcid, kind="tool", name=content.name, input=args)]
+            return []
+        if isinstance(content, ToolResponseContent):
+            tcid = content.tool_call_id
+            if tcid in self._open_tool_ids:
+                self._open_tool_ids.pop(tcid, None)
+                return [
+                    CloseSpan(
+                        key=tcid,
+                        output=content.content,
+                        is_complete=True,
+                        is_error=content.is_error,
+                    )
+                ]
+        return []
+
+    def _on_done(self, event: StreamTaskMessageDone) -> list[SpanSignal]:
+        if event.index is None:
+            return []
+        idx = event.index
+        meta = self._tool_by_index.pop(idx, None)
+        if meta is not None:
+            args = meta.arguments
+            if meta.args_buf:
+                try:
+                    args = json.loads(meta.args_buf)
+                except json.JSONDecodeError:
+                    args = {"_raw": meta.args_buf}
+            self._open_tool_ids[meta.tool_call_id] = None
+            return [OpenSpan(key=meta.tool_call_id, kind="tool", name=meta.name, input=args)]
+        if idx in self._reasoning_index_open:
+            self._reasoning_index_open.discard(idx)
+            return [CloseSpan(key=f"reasoning:{idx}", output=None, is_complete=True)]
+        return []
diff --git a/src/agentex/lib/core/harness/tracer.py b/src/agentex/lib/core/harness/tracer.py
new file mode 100644
index 000000000..4ca4d628b
--- /dev/null
+++ b/src/agentex/lib/core/harness/tracer.py
@@ -0,0 +1,88 @@
+"""Adapter from SpanSignals to adk.tracing spans (best-effort, overridable)."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from agentex.lib.core.harness.types import OpenSpan, CloseSpan, SpanSignal
+
+try:
+    from agentex.lib.utils.logging import make_logger
+
+    logger = make_logger(__name__)
+except Exception:  # ddtrace may be absent in some envs; fall back to stdlib
+    import logging
+
+    logger = logging.getLogger(__name__)
+
+
+class SpanTracer:
+    """Opens/closes adk.tracing child spans in response to span signals.
+
+    `tracing` defaults to the real `adk.tracing` module; inject a fake in tests
+    or a custom tracer to override. No-op when `trace_id` is falsy. Never raises.
+
+    The real TracingModule.end_span does NOT accept an output kwarg — output is
+    recorded by mutating span.output before calling end_span, matching the pattern
+    used throughout the codebase (see _langgraph_tracing.py on_tool_end etc.).
+
+    Span-lifecycle contract: the `_open` dict (span key -> span object) is scoped
+    to a single turn. Pairing is by `key`:
+    - A duplicate OpenSpan for a key already in `_open` silently replaces the
+      earlier span; the earlier span is then orphaned (never closed / leaked).
+    - A CloseSpan for an unknown key is a no-op.
+    - Unpaired opens accumulate in `_open` for the lifetime of the tracer; since
+      a tracer is expected to live for one turn, this is bounded and acceptable.
+    """
+
+    def __init__(
+        self,
+        trace_id: str | None,
+        parent_span_id: str | None,
+        tracing: Any = None,
+        task_id: str | None = None,
+    ):
+        self.trace_id = trace_id
+        self.parent_span_id = parent_span_id
+        self.task_id = task_id
+        if tracing is None:
+            from agentex.lib import adk
+
+            tracing = adk.tracing
+        self._tracing = tracing
+        self._open: dict[str, Any] = {}  # span key -> span object
+
+    async def handle(self, signal: SpanSignal) -> None:
+        if not self.trace_id:
+            return
+        try:
+            if isinstance(signal, OpenSpan):
+                span = await self._tracing.start_span(
+                    trace_id=self.trace_id,
+                    name=signal.name,
+                    input=signal.input,
+                    parent_id=self.parent_span_id,
+                    task_id=self.task_id,
+                )
+                if span is not None:
+                    self._open[signal.key] = span
+            elif isinstance(signal, CloseSpan):
+                span = self._open.pop(signal.key, None)
+                if span is not None:
+                    # Output is recorded by mutating span.output before end_span.
+                    # The real TracingModule.end_span signature is:
+                    #   end_span(trace_id, span, start_to_close_timeout, heartbeat_timeout, retry_policy)
+                    # It does not accept an output= kwarg.
+                    span.output = signal.output
+                    # Tool failure status (ToolResponseContent.is_error) is recorded
+                    # on span.data when the harness reports one; Span has no dedicated
+                    # error field. None means no status was reported, so leave data alone.
+                    if signal.is_error is not None:
+                        data = span.data if isinstance(span.data, dict) else {}
+                        span.data = {**data, "is_error": signal.is_error}
+                    await self._tracing.end_span(
+                        trace_id=self.trace_id,
+                        span=span,
+                    )
+        except Exception as exc:  # best-effort: tracing never breaks delivery
+            logger.warning("[harness.tracer] span signal failed: %s", exc)
diff --git a/src/agentex/lib/core/harness/types.py b/src/agentex/lib/core/harness/types.py
new file mode 100644
index 000000000..74e0dc314
--- /dev/null
+++ b/src/agentex/lib/core/harness/types.py
@@ -0,0 +1,96 @@
+"""Types for the unified harness surface."""
+
+from __future__ import annotations
+
+from typing import Any, Union, Literal, Protocol, AsyncIterator, runtime_checkable
+from dataclasses import field, dataclass
+
+from pydantic import BaseModel, ConfigDict
+
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+
+# The canonical stream element. Taps yield these; delivery adapters consume them.
+StreamTaskMessage = Union[
+    StreamTaskMessageStart,
+    StreamTaskMessageDelta,
+    StreamTaskMessageFull,
+    StreamTaskMessageDone,
+]
+
+SpanKind = Literal["tool", "reasoning", "subagent"]
+
+
+@dataclass
+class OpenSpan:
+    """Signal to open a child span. `key` pairs an open with its close."""
+
+    key: str
+    kind: SpanKind
+    name: str
+    input: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class CloseSpan:
+    """Signal to close the span previously opened with the same `key`."""
+
+    key: str
+    output: Any = None
+    is_complete: bool = True  # False when closed by flush() without a result
+    is_error: bool | None = None  # tool failure status; None when the harness reports no status
+
+
+SpanSignal = Union[OpenSpan, CloseSpan]
+
+
+class TurnUsage(BaseModel):
+    """Harness-independent turn usage/cost, attached to the turn span.
+
+    Token field names align with agentex.lib.core.observability.llm_metrics.
+    """
+
+    model_config = ConfigDict(from_attributes=True, populate_by_name=True)
+
+    model: str | None = None
+    input_tokens: int | None = None
+    output_tokens: int | None = None
+    cached_input_tokens: int | None = None
+    reasoning_tokens: int | None = None
+    total_tokens: int | None = None
+    cost_usd: float | None = None
+    duration_ms: int | None = None
+    # num_llm_calls is provider-reported and may be absent (None = "not
+    # reported"). num_tool_calls / num_reasoning_blocks are counted locally from
+    # the observed stream, so 0 is always a real count.
+    num_llm_calls: int | None = None
+    num_tool_calls: int = 0
+    num_reasoning_blocks: int = 0
+
+
+class TurnResult(BaseModel):
+    """Returned to the caller after a turn is delivered."""
+
+    model_config = ConfigDict(from_attributes=True, populate_by_name=True)
+
+    final_text: str = ""
+    usage: TurnUsage = TurnUsage()
+
+
+@runtime_checkable
+class HarnessTurn(Protocol):
+    """A single harness turn: a canonical stream plus its normalized usage.
+
+    Python async generators cannot cleanly return a value to their consumer, so
+    a tap exposes usage via `usage()` (valid only after `events` is exhausted)
+    rather than via StopAsyncIteration.
+    """
+
+    @property
+    def events(self) -> AsyncIterator[StreamTaskMessage]: ...
+
+    def usage(self) -> TurnUsage: ...
diff --git a/src/agentex/lib/core/harness/yield_delivery.py b/src/agentex/lib/core/harness/yield_delivery.py
new file mode 100644
index 000000000..69b39f152
--- /dev/null
+++ b/src/agentex/lib/core/harness/yield_delivery.py
@@ -0,0 +1,31 @@
+"""Yield delivery: pass the canonical stream through, tracing as a side effect."""
+
+from __future__ import annotations
+
+from typing import AsyncIterator, AsyncGenerator
+
+from agentex.lib.core.harness.types import StreamTaskMessage
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.lib.core.harness.span_derivation import SpanDeriver
+
+
+async def yield_events(
+    events: AsyncIterator[StreamTaskMessage],
+    tracer: SpanTracer | None = None,
+) -> AsyncGenerator[StreamTaskMessage, None]:
+    """Forward each event to the caller; derive + trace spans as a side effect.
+
+    For sync HTTP ACP agents that yield events back over the response. When
+    `tracer` is None, this is a pure passthrough.
+    """
+    deriver = SpanDeriver() if tracer is not None else None
+    try:
+        async for event in events:
+            if deriver is not None and tracer is not None:
+                for signal in deriver.observe(event):
+                    await tracer.handle(signal)
+            yield event
+    finally:
+        if deriver is not None and tracer is not None:
+            for signal in deriver.flush():
+                await tracer.handle(signal)
diff --git a/src/agentex/lib/core/services/adk/providers/openai.py b/src/agentex/lib/core/services/adk/providers/openai.py
index 75e507d8a..1ae29589d 100644
--- a/src/agentex/lib/core/services/adk/providers/openai.py
+++ b/src/agentex/lib/core/services/adk/providers/openai.py
@@ -14,15 +14,8 @@
 from agents.guardrail import InputGuardrail, OutputGuardrail
 from agents.exceptions import InputGuardrailTripwireTriggered, OutputGuardrailTripwireTriggered
 from openai.types.responses import (
-    ResponseCompletedEvent,
-    ResponseTextDeltaEvent,
-    ResponseFunctionToolCall,
     ResponseFunctionWebSearch,
-    ResponseOutputItemDoneEvent,
     ResponseCodeInterpreterToolCall,
-    ResponseReasoningSummaryPartDoneEvent,
-    ResponseReasoningSummaryPartAddedEvent,
-    ResponseReasoningSummaryTextDeltaEvent,
 )
 
 # Local imports
@@ -31,24 +24,14 @@
 from agentex.lib.utils.mcp import redact_mcp_server_params
 from agentex.lib.utils.temporal import heartbeat_if_in_workflow
 from agentex.lib.core.tracing.tracer import AsyncTracer
-from agentex.types.task_message_delta import (
-    TextDelta,
-    ReasoningSummaryDelta,
-)
-from agentex.types.task_message_update import (
-    StreamTaskMessageFull,
-    StreamTaskMessageDelta,
-)
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.task_message_update import StreamTaskMessageFull
 from agentex.types.task_message_content import (
     TextContent,
-    ReasoningContent,
     ToolRequestContent,
     ToolResponseContent,
 )
-from agentex.lib.core.services.adk.streaming import (
-    StreamingService,
-    StreamingTaskMessageContext,
-)
+from agentex.lib.core.services.adk.streaming import StreamingService
 
 logger = logging.make_logger(__name__)
 
@@ -695,7 +678,7 @@ async def run_agent_streamed_auto_send(
         input_guardrails: list[InputGuardrail] | None = None,
         output_guardrails: list[OutputGuardrail] | None = None,
         max_turns: int | None = None,
-        previous_response_id: str | None = None,  # noqa: ARG002
+        previous_response_id: str | None = None,
         created_at: datetime | None = None,
     ) -> RunResultStreaming:
         """
@@ -733,8 +716,6 @@ async def run_agent_streamed_auto_send(
         if self.agentex_client is None:
             raise ValueError("Agentex client must be provided for auto_send methods")
 
-        tool_call_map: dict[str, ResponseFunctionToolCall] = {}
-
         if self.tracer is None:
             raise RuntimeError("Tracer not initialized - ensure tracer is provided to OpenAIService")
         trace = self.tracer.trace(trace_id)
@@ -761,12 +742,13 @@ async def run_agent_streamed_auto_send(
         ) as span:
             heartbeat_if_in_workflow("run agent streamed auto send")
 
-            # Consume the workflow-supplied created_at on the FIRST message
-            # opened by this activity (whichever streaming context opens first
-            # for this turn). That's the message that races the workflow's
-            # user-echo at the server. Subsequent messages in the same turn are
-            # separated by network/processing latency and rely on the server's
-            # wall clock.
+            # AGX1-378 restored: created_at is now threaded through
+            # UnifiedEmitter.auto_send_turn -> auto_send -> every
+            # streaming_task_message_context call, so the first agent message of
+            # the turn is stamped with the workflow-supplied timestamp (e.g.
+            # workflow.now()) just as the original inline loop did.
+            # The dispenser is still used below for guardrail-rejection messages,
+            # which open their own streaming contexts directly.
             _take_created_at = _make_created_at_dispenser(created_at)
 
             async with mcp_server_context(mcp_server_params, mcp_timeout_seconds) as servers:
@@ -803,204 +785,48 @@ async def run_agent_streamed_auto_send(
 
                 agent = Agent(**agent_kwargs)
 
-                # Run with streaming
-                if max_turns is not None:
+                # Run with streaming. Forward previous_response_id so callers that
+                # continue a Responses-API conversation resume the prior response
+                # instead of silently starting a fresh one (mirrors the non-auto-send
+                # run_agent_streamed path).
+                if max_turns is not None and previous_response_id is not None:
+                    result = Runner.run_streamed(
+                        starting_agent=agent,
+                        input=input_list,
+                        max_turns=max_turns,
+                        previous_response_id=previous_response_id,
+                    )
+                elif max_turns is not None:
                     result = Runner.run_streamed(starting_agent=agent, input=input_list, max_turns=max_turns)
+                elif previous_response_id is not None:
+                    result = Runner.run_streamed(
+                        starting_agent=agent, input=input_list, previous_response_id=previous_response_id
+                    )
                 else:
                     result = Runner.run_streamed(starting_agent=agent, input=input_list)
 
-                item_id_to_streaming_context: dict[str, StreamingTaskMessageContext] = {}
-                unclosed_item_ids: set[str] = set()
-                # Simple string to accumulate reasoning summary
-                current_reasoning_summary: str = ""
+                # Migrate onto the unified harness surface: wrap the streamed run
+                # as an OpenAITurn (provider -> canonical StreamTaskMessage*
+                # adapter) and let UnifiedEmitter.auto_send_turn drive delivery +
+                # tracing + usage. The previous ~270-line inline loop that hand-
+                # rolled per-item streaming contexts, reasoning handling, and
+                # span derivation now lives in the shared harness modules.
+                # Imported lazily: openai_turn pulls in agentex.lib.adk, which
+                # imports this service module, so an eager import would create a
+                # circular import at package init.
+                from agentex.lib.adk.providers._modules.openai_turn import OpenAITurn
+
+                turn = OpenAITurn(result=result, model=model)
+                emitter = UnifiedEmitter(
+                    task_id=task_id,
+                    trace_id=trace_id,
+                    parent_span_id=parent_span_id,
+                    tracer=self.tracer,
+                    streaming=self.streaming_service,
+                )
 
                 try:
-                    # Process streaming events with TaskMessage creation
-                    async for event in result.stream_events():
-                        heartbeat_if_in_workflow("processing stream event with auto send")
-
-                        if event.type == "run_item_stream_event":
-                            if event.item.type == "tool_call_item":
-                                tool_call_item = event.item.raw_item
-
-                                # Extract tool call information using the helper method
-                                call_id, tool_name, tool_arguments = self._extract_tool_call_info(tool_call_item)
-                                tool_call_map[call_id] = tool_call_item
-
-                                tool_request_content = ToolRequestContent(
-                                    author="agent",
-                                    tool_call_id=call_id,
-                                    name=tool_name,
-                                    arguments=tool_arguments,
-                                )
-
-                                # Create tool request using streaming context (immediate completion)
-                                async with self.streaming_service.streaming_task_message_context(
-                                    task_id=task_id,
-                                    initial_content=tool_request_content,
-                                    created_at=_take_created_at(),
-                                ) as streaming_context:
-                                    # The message has already been persisted, but we still need to send an upda
-                                    await streaming_context.stream_update(
-                                        update=StreamTaskMessageFull(
-                                            parent_task_message=streaming_context.task_message,
-                                            content=tool_request_content,
-                                            type="full",
-                                        ),
-                                    )
-
-                            elif event.item.type == "tool_call_output_item":
-                                tool_output_item = event.item.raw_item
-
-                                # Extract tool response information using the helper method
-                                call_id, tool_name, content = self._extract_tool_response_info(
-                                    tool_call_map, tool_output_item
-                                )
-
-                                tool_response_content = ToolResponseContent(
-                                    author="agent",
-                                    tool_call_id=call_id,
-                                    name=tool_name,
-                                    content=content,
-                                )
-
-                                # Create tool response using streaming context (immediate completion)
-                                async with self.streaming_service.streaming_task_message_context(
-                                    task_id=task_id,
-                                    initial_content=tool_response_content,
-                                    created_at=_take_created_at(),
-                                ) as streaming_context:
-                                    # The message has already been persisted, but we still need to send an update
-                                    await streaming_context.stream_update(
-                                        update=StreamTaskMessageFull(
-                                            parent_task_message=streaming_context.task_message,
-                                            content=tool_response_content,
-                                            type="full",
-                                        ),
-                                    )
-
-                        elif event.type == "raw_response_event":
-                            if isinstance(event.data, ResponseTextDeltaEvent):
-                                # Handle text delta
-                                item_id = event.data.item_id
-
-                                # Check if we already have a streaming context for this item
-                                if item_id not in item_id_to_streaming_context:
-                                    # Create a new streaming context for this item
-                                    streaming_context = self.streaming_service.streaming_task_message_context(
-                                        task_id=task_id,
-                                        initial_content=TextContent(
-                                            author="agent",
-                                            content="",
-                                        ),
-                                        created_at=_take_created_at(),
-                                    )
-                                    # Open the streaming context
-                                    item_id_to_streaming_context[item_id] = await streaming_context.open()
-                                    unclosed_item_ids.add(item_id)
-                                else:
-                                    streaming_context = item_id_to_streaming_context[item_id]
-
-                                # Stream the delta through the streaming service
-                                await streaming_context.stream_update(
-                                    update=StreamTaskMessageDelta(
-                                        parent_task_message=streaming_context.task_message,
-                                        delta=TextDelta(text_delta=event.data.delta, type="text"),
-                                        type="delta",
-                                    ),
-                                )
-                            # Reasoning step one: new summary part added
-                            elif isinstance(event.data, ResponseReasoningSummaryPartAddedEvent):
-                                # We need to create a new streaming context for this reasoning item
-                                item_id = event.data.item_id
-
-                                # Reset the reasoning summary string
-                                current_reasoning_summary = ""
-
-                                streaming_context = self.streaming_service.streaming_task_message_context(
-                                    task_id=task_id,
-                                    initial_content=ReasoningContent(
-                                        author="agent",
-                                        summary=[],
-                                        content=[],
-                                        type="reasoning",
-                                        style="active",
-                                    ),
-                                    created_at=_take_created_at(),
-                                )
-
-                                # Replace the existing streaming context (if it exists)
-                                # Why do we replace? Cause all the reasoning parts use the same item_id!
-                                item_id_to_streaming_context[item_id] = await streaming_context.open()
-                                unclosed_item_ids.add(item_id)
-
-                            # Reasoning step two: handling summary text delta
-                            elif isinstance(event.data, ResponseReasoningSummaryTextDeltaEvent):
-                                # Accumulate the delta into the string
-                                current_reasoning_summary += event.data.delta
-                                streaming_context = item_id_to_streaming_context[item_id]
-
-                                # Stream the summary delta through the streaming service
-                                await streaming_context.stream_update(
-                                    update=StreamTaskMessageDelta(
-                                        parent_task_message=streaming_context.task_message,
-                                        delta=ReasoningSummaryDelta(
-                                            summary_index=event.data.summary_index,
-                                            summary_delta=event.data.delta,
-                                            type="reasoning_summary",
-                                        ),
-                                        type="delta",
-                                    ),
-                                )
-
-                            # Reasoning step three: handling summary text done, closing the streaming context
-                            elif isinstance(event.data, ResponseReasoningSummaryPartDoneEvent):
-                                # Handle reasoning summary text completion
-                                streaming_context = item_id_to_streaming_context[item_id]
-
-                                # Create the complete reasoning content with the accumulated summary
-                                complete_reasoning_content = ReasoningContent(
-                                    author="agent",
-                                    summary=[current_reasoning_summary],
-                                    content=[],
-                                    type="reasoning",
-                                    style="static",
-                                )
-
-                                # Send a full message update with the complete reasoning content
-                                await streaming_context.stream_update(
-                                    update=StreamTaskMessageFull(
-                                        parent_task_message=streaming_context.task_message,
-                                        content=complete_reasoning_content,
-                                        type="full",
-                                    ),
-                                )
-
-                                await streaming_context.close()
-                                unclosed_item_ids.discard(item_id)
-
-                            elif isinstance(event.data, ResponseOutputItemDoneEvent):
-                                # Handle item completion
-                                item_id = event.data.item.id
-
-                                # Finish the streaming context (sends DONE event and updates message)
-                                if item_id in item_id_to_streaming_context:
-                                    streaming_context = item_id_to_streaming_context[item_id]
-                                    await streaming_context.close()
-                                    if item_id in unclosed_item_ids:
-                                        unclosed_item_ids.remove(item_id)
-
-                            elif isinstance(event.data, ResponseCompletedEvent):
-                                # All items complete, finish all remaining streaming contexts for this session
-                                # Create a copy to avoid modifying set during iteration
-                                remaining_items = list(unclosed_item_ids)
-                                for item_id in remaining_items:
-                                    if (
-                                        item_id in unclosed_item_ids and item_id in item_id_to_streaming_context
-                                    ):  # Check if still unclosed
-                                        streaming_context = item_id_to_streaming_context[item_id]
-                                        await streaming_context.close()
-                                        unclosed_item_ids.discard(item_id)
+                    await emitter.auto_send_turn(turn, created_at=created_at)
 
                 except InputGuardrailTripwireTriggered as e:
                     # Handle guardrail trigger by sending a rejection message
@@ -1080,18 +906,6 @@ async def run_agent_streamed_auto_send(
                     # Re-raise to let the activity handle it
                     raise
 
-                finally:
-                    # Cleanup: ensure all streaming contexts for this session are properly finished
-                    # Create a copy to avoid modifying set during iteration
-                    remaining_items = list(unclosed_item_ids)
-                    for item_id in remaining_items:
-                        if (
-                            item_id in unclosed_item_ids and item_id in item_id_to_streaming_context
-                        ):  # Check if still unclosed
-                            streaming_context = item_id_to_streaming_context[item_id]
-                            await streaming_context.close()
-                            unclosed_item_ids.discard(item_id)
-
                 if span:
                     span.output = {
                         "new_items": [
diff --git a/src/agentex/lib/core/temporal/plugins/openai_agents/models/temporal_streaming_model.py b/src/agentex/lib/core/temporal/plugins/openai_agents/models/temporal_streaming_model.py
index 7ccc6627a..75dc0f053 100644
--- a/src/agentex/lib/core/temporal/plugins/openai_agents/models/temporal_streaming_model.py
+++ b/src/agentex/lib/core/temporal/plugins/openai_agents/models/temporal_streaming_model.py
@@ -1,6 +1,7 @@
 """Custom Temporal Model Provider with streaming support for OpenAI agents."""
 from __future__ import annotations
 
+import json
 import time
 import uuid
 from typing import Any, List, Union, Optional, override
@@ -63,9 +64,9 @@
 from agentex.lib import adk
 from agentex.lib.utils.logging import make_logger
 from agentex.lib.core.tracing.tracer import AsyncTracer
-from agentex.types.task_message_delta import TextDelta, ReasoningContentDelta, ReasoningSummaryDelta
+from agentex.types.task_message_delta import TextDelta, ToolRequestDelta, ReasoningContentDelta, ReasoningSummaryDelta
 from agentex.types.task_message_update import StreamTaskMessageFull, StreamTaskMessageDelta
-from agentex.types.task_message_content import TextContent, ReasoningContent
+from agentex.types.task_message_content import TextContent, ReasoningContent, ToolRequestContent
 from agentex.lib.adk.utils._modules.client import create_async_agentex_client
 from agentex.lib.core.temporal.plugins.openai_agents.interceptors.context_interceptor import (
     streaming_task_id,
@@ -722,12 +723,27 @@ async def get_response(
                                     streaming_mode=self.streaming_mode,
                                 ).__aenter__()
                         elif item and getattr(item, 'type', None) == 'function_call':
-                            # Track the function call being streamed
+                            # Open a streaming context per function call so argument
+                            # deltas can be published incrementally. Coalescing and
+                            # mode dispatch are handled by the streaming layer.
+                            call_id = getattr(item, 'call_id', '')
+                            tool_name = getattr(item, 'name', '')
+                            call_context = await adk.streaming.streaming_task_message_context(
+                                task_id=task_id,
+                                initial_content=ToolRequestContent(
+                                    author="agent",
+                                    tool_call_id=call_id,
+                                    name=tool_name,
+                                    arguments={},
+                                ),
+                                streaming_mode=self.streaming_mode,
+                            ).__aenter__()
                             function_calls_in_progress[output_index] = {
                                 'id': getattr(item, 'id', ''),
-                                'call_id': getattr(item, 'call_id', ''),
-                                'name': getattr(item, 'name', ''),
+                                'call_id': call_id,
+                                'name': tool_name,
                                 'arguments': getattr(item, 'arguments', ''),
+                                'context': call_context,
                             }
                             logger.debug(f"[TemporalStreamingModel] Starting function call: {item.name}")
 
@@ -748,8 +764,24 @@ async def get_response(
                         output_index = getattr(event, 'output_index', 0)
                         delta = getattr(event, 'delta', '')
 
-                        if output_index in function_calls_in_progress:
-                            function_calls_in_progress[output_index]['arguments'] += delta
+                        call_data = function_calls_in_progress.get(output_index)
+                        if call_data is not None:
+                            call_data['arguments'] += delta
+                            call_context = call_data.get('context')
+                            if call_context is not None:
+                                try:
+                                    await call_context.stream_update(StreamTaskMessageDelta(
+                                        parent_task_message=call_context.task_message,
+                                        delta=ToolRequestDelta(
+                                            tool_call_id=call_data['call_id'],
+                                            name=call_data['name'],
+                                            arguments_delta=delta,
+                                            type="tool_request",
+                                        ),
+                                        type="delta",
+                                    ))
+                                except Exception as e:
+                                    logger.warning(f"Failed to send tool request delta: {e}")
                             logger.debug(f"[TemporalStreamingModel] Function call args delta: {delta[:50]}...")
 
                     elif isinstance(event, ResponseFunctionCallArgumentsDoneEvent):
@@ -874,6 +906,42 @@ async def get_response(
                                 )
                                 output_items.append(tool_call)
 
+                                # Emit the final ToolRequestContent and close the
+                                # per-call streaming context. If the model produced
+                                # invalid JSON args (truncation, hallucination), fall
+                                # back to an empty dict so the streaming layer can
+                                # still persist a message.
+                                call_context = call_data.get('context')
+                                if call_context is not None:
+                                    raw_args = call_data['arguments'] or ''
+                                    try:
+                                        parsed_args = json.loads(raw_args) if raw_args else {}
+                                    except json.JSONDecodeError:
+                                        logger.warning(
+                                            f"Failed to parse tool call arguments for {call_data['name']} "
+                                            f"(raw_args_bytes={len(raw_args)})"
+                                        )
+                                        parsed_args = {}
+                                    try:
+                                        await call_context.stream_update(StreamTaskMessageFull(
+                                            parent_task_message=call_context.task_message,
+                                            content=ToolRequestContent(
+                                                author="agent",
+                                                tool_call_id=call_data['call_id'],
+                                                name=call_data['name'],
+                                                arguments=parsed_args,
+                                            ),
+                                            type="full",
+                                        ))
+                                    except Exception as e:
+                                        logger.warning(f"Failed to send tool request full update: {e}")
+                                    try:
+                                        await call_context.close()
+                                    except Exception as e:
+                                        logger.warning(f"Failed to close tool request context: {e}")
+                                    finally:
+                                        call_data['context'] = None
+
                     elif isinstance(event, ResponseReasoningSummaryPartAddedEvent):
                         # New reasoning part/summary started - reset accumulator
                         part = getattr(event, 'part', None)
@@ -907,6 +975,17 @@ async def get_response(
                     await streaming_context.close()
                     streaming_context = None
 
+                # Defensive: close any function call contexts that didn't see a
+                # ResponseOutputItemDoneEvent (truncated stream, error mid-call).
+                for call_data in function_calls_in_progress.values():
+                    call_context = call_data.get('context')
+                    if call_context is not None:
+                        try:
+                            await call_context.close()
+                        except Exception as e:
+                            logger.warning(f"Failed to close orphaned tool request context: {e}")
+                        call_data['context'] = None
+
                 # Build the response from output items collected during streaming
                 # Create output from the items we collected
                 response_output = []
diff --git a/src/agentex/lib/core/temporal/plugins/openai_agents/tests/test_streaming_model.py b/src/agentex/lib/core/temporal/plugins/openai_agents/tests/test_streaming_model.py
index 97dda0e61..26c0b7c4b 100644
--- a/src/agentex/lib/core/temporal/plugins/openai_agents/tests/test_streaming_model.py
+++ b/src/agentex/lib/core/temporal/plugins/openai_agents/tests/test_streaming_model.py
@@ -12,8 +12,11 @@
 from openai.types.responses import (
     ResponseCompletedEvent,
     ResponseTextDeltaEvent,
+    ResponseOutputItemDoneEvent,
     ResponseOutputItemAddedEvent,
+    ResponseFunctionCallArgumentsDoneEvent,
     ResponseReasoningSummaryTextDeltaEvent,
+    ResponseFunctionCallArgumentsDeltaEvent,
 )
 
 
@@ -851,6 +854,197 @@ async def test_missing_task_id_error(self, streaming_model):
             )
 
 
+class TestStreamingModelFunctionCallArgsStreaming:
+    """Verify ``ResponseFunctionCallArgumentsDeltaEvent``s are surfaced as
+    ``ToolRequestDelta`` updates and that a final ``ToolRequestContent`` Full is
+    emitted on ``ResponseOutputItemDoneEvent``.
+
+    Without this, write-heavy tools (``write_file``, ``apply_patch``) buffer their
+    entire argument body inside ``invoke_model_activity`` and the UI sees a
+    multi-second freeze while the model is actively producing tokens.
+    """
+
+    @staticmethod
+    def _build_function_call_stream(arguments_text: str):
+        """Construct a streaming event sequence for a single function_call.
+
+        Mirrors the production order: Added → N × ArgumentsDelta → ArgumentsDone
+        → OutputItemDone → ResponseCompleted. ``spec=...`` makes ``isinstance``
+        dispatch in production work without triggering pydantic validation.
+        """
+        call_item = MagicMock()
+        call_item.type = "function_call"
+        call_item.id = "fc_abc"
+        call_item.call_id = "call_abc"
+        call_item.name = "write_file"
+        call_item.arguments = ""
+
+        item_added = MagicMock(spec=ResponseOutputItemAddedEvent)
+        item_added.item = call_item
+        item_added.output_index = 0
+
+        # Split the argument text into a few chunks to exercise the per-delta loop
+        chunk_size = max(1, len(arguments_text) // 3) if arguments_text else 1
+        chunks = [arguments_text[i:i + chunk_size] for i in range(0, len(arguments_text), chunk_size)] or [""]
+        delta_events = []
+        for chunk in chunks:
+            ev = MagicMock(spec=ResponseFunctionCallArgumentsDeltaEvent)
+            ev.delta = chunk
+            ev.output_index = 0
+            delta_events.append(ev)
+
+        args_done = MagicMock(spec=ResponseFunctionCallArgumentsDoneEvent)
+        args_done.arguments = arguments_text
+        args_done.output_index = 0
+
+        item_done = MagicMock(spec=ResponseOutputItemDoneEvent)
+        item_done.item = call_item
+        item_done.output_index = 0
+
+        completed = MagicMock(spec=ResponseCompletedEvent)
+        completed.response = MagicMock(output=[], usage=MagicMock(), id=None)
+
+        return [item_added, *delta_events, args_done, item_done, completed], chunks
+
+    @staticmethod
+    def _install_real_task_message(mock_adk_streaming, task_id: str):
+        """Replace the autouse fixture's MagicMock ``task_message`` with a real
+        ``TaskMessage`` so production's ``StreamTaskMessageDelta(parent_task_message=...)``
+        construction passes pydantic validation. The default mock works for tests
+        that only assert on the context's ``__aenter__`` call but breaks tests
+        that exercise ``stream_update`` end-to-end.
+        """
+        from agentex.types.task_message import TaskMessage
+        from agentex.types.task_message_content import ToolRequestContent
+
+        ctx = mock_adk_streaming.streaming_task_message_context.return_value
+        ctx.task_message = TaskMessage(
+            id="msg_test",
+            task_id=task_id,
+            content=ToolRequestContent(
+                author="agent",
+                tool_call_id="call_abc",
+                name="write_file",
+                arguments={},
+            ),
+            streaming_status="IN_PROGRESS",
+        )
+        return ctx
+
+    @pytest.mark.asyncio
+    async def test_function_call_emits_argument_deltas_and_final_full(
+        self, streaming_model, mock_adk_streaming, _streaming_context_vars, sample_task_id
+    ):
+        """A function_call with well-formed JSON args should produce:
+        (1) one streaming context opened with ``ToolRequestContent`` initial_content,
+        (2) one ``StreamTaskMessageDelta`` per ``ArgumentsDelta`` event carrying a
+            ``ToolRequestDelta`` with the right ``tool_call_id`` and ``arguments_delta``,
+        (3) one final ``StreamTaskMessageFull`` with ``ToolRequestContent`` whose
+            ``arguments`` is the parsed JSON dict.
+        """
+        from agentex.types.task_message_delta import ToolRequestDelta
+        from agentex.types.task_message_update import StreamTaskMessageFull, StreamTaskMessageDelta
+        from agentex.types.task_message_content import ToolRequestContent
+
+        ctx = self._install_real_task_message(mock_adk_streaming, sample_task_id)
+
+        args_text = '{"path": "/tmp/foo.txt", "contents": "hello world"}'
+        events, chunks = self._build_function_call_stream(args_text)
+
+        mock_stream = AsyncMock()
+        mock_stream.__aiter__.return_value = iter(events)
+        streaming_model.client.responses.create = AsyncMock(return_value=mock_stream)
+
+        await streaming_model.get_response(
+            system_instructions=None,
+            input="please write foo",
+            model_settings=ModelSettings(),
+            tools=[],
+            output_schema=None,
+            handoffs=[],
+            tracing=None,
+        )
+
+        # 1. A streaming context was opened with ToolRequestContent.
+        opens = [
+            c for c in mock_adk_streaming.streaming_task_message_context.call_args_list
+            if isinstance(c.kwargs.get("initial_content"), ToolRequestContent)
+        ]
+        assert len(opens) == 1, f"expected one ToolRequest context, got {len(opens)}"
+        initial = opens[0].kwargs["initial_content"]
+        assert initial.tool_call_id == "call_abc"
+        assert initial.name == "write_file"
+
+        # 2. One StreamTaskMessageDelta(ToolRequestDelta) was streamed per
+        #    ArgumentsDelta event, preserving the delta text exactly.
+        delta_updates = [
+            call.args[0] if call.args else call.kwargs.get("update")
+            for call in ctx.stream_update.call_args_list
+            if (call.args and isinstance(call.args[0], StreamTaskMessageDelta)
+                and isinstance(call.args[0].delta, ToolRequestDelta))
+        ]
+        assert len(delta_updates) == len(chunks)
+        for update, expected_chunk in zip(delta_updates, chunks):
+            assert update.delta.tool_call_id == "call_abc"
+            assert update.delta.name == "write_file"
+            assert update.delta.arguments_delta == expected_chunk
+
+        # 3. A final StreamTaskMessageFull(ToolRequestContent) was streamed with
+        #    parsed args.
+        full_updates = [
+            call.args[0] if call.args else call.kwargs.get("update")
+            for call in ctx.stream_update.call_args_list
+            if (call.args and isinstance(call.args[0], StreamTaskMessageFull)
+                and isinstance(call.args[0].content, ToolRequestContent))
+        ]
+        assert len(full_updates) == 1
+        final = full_updates[0].content
+        assert final.tool_call_id == "call_abc"
+        assert final.name == "write_file"
+        assert final.arguments == {"path": "/tmp/foo.txt", "contents": "hello world"}
+
+    @pytest.mark.asyncio
+    async def test_function_call_malformed_args_fall_back_to_empty_dict(
+        self, streaming_model, mock_adk_streaming, _streaming_context_vars, sample_task_id, caplog
+    ):
+        """If the model produces invalid JSON for the args, the final
+        ``ToolRequestContent`` should carry ``arguments={}`` and a warning should
+        be logged. The raw delta stream is preserved either way.
+        """
+        from agentex.types.task_message_update import StreamTaskMessageFull
+        from agentex.types.task_message_content import ToolRequestContent
+
+        ctx = self._install_real_task_message(mock_adk_streaming, sample_task_id)
+
+        # Missing closing brace — invalid JSON.
+        events, _ = self._build_function_call_stream('{"path": "/tmp/foo.txt", "contents":')
+
+        mock_stream = AsyncMock()
+        mock_stream.__aiter__.return_value = iter(events)
+        streaming_model.client.responses.create = AsyncMock(return_value=mock_stream)
+
+        with caplog.at_level("WARNING"):
+            await streaming_model.get_response(
+                system_instructions=None,
+                input="please write foo",
+                model_settings=ModelSettings(),
+                tools=[],
+                output_schema=None,
+                handoffs=[],
+                tracing=None,
+            )
+
+        full_updates = [
+            call.args[0] if call.args else call.kwargs.get("update")
+            for call in ctx.stream_update.call_args_list
+            if (call.args and isinstance(call.args[0], StreamTaskMessageFull)
+                and isinstance(call.args[0].content, ToolRequestContent))
+        ]
+        assert len(full_updates) == 1
+        assert full_updates[0].content.arguments == {}
+        assert any("Failed to parse tool call arguments" in r.getMessage() for r in caplog.records)
+
+
 class TestStreamingModelUsageResponseIdAndCacheKey:
     """Cover real-Usage capture, real response_id, span emission, and opt-in prompt_cache_key."""
 
diff --git a/src/agentex/lib/core/tracing/processors/agentex_tracing_processor.py b/src/agentex/lib/core/tracing/processors/agentex_tracing_processor.py
index 98d50546b..448d013e9 100644
--- a/src/agentex/lib/core/tracing/processors/agentex_tracing_processor.py
+++ b/src/agentex/lib/core/tracing/processors/agentex_tracing_processor.py
@@ -1,3 +1,4 @@
+import os
 import asyncio
 import weakref
 from typing import TYPE_CHECKING, Any, Dict, override
@@ -5,6 +6,7 @@
 from agentex import Agentex
 from agentex.types.span import Span
 from agentex.lib.types.tracing import AgentexTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
 from agentex.lib.adk.utils._modules.client import create_async_agentex_client
 from agentex.lib.core.tracing.processors.tracing_processor_interface import (
     SyncTracingProcessor,
@@ -14,28 +16,88 @@
 if TYPE_CHECKING:
     from agentex import AsyncAgentex
 
+logger = make_logger(__name__)
+
+
+# NOTE: This is the Agentex-backend toggle (writes to the agentex `spans`
+# table via the Agentex API). It is intentionally SEPARATE from the SGP/EGP
+# processor's ``AGENTEX_TRACING_SKIP_SPAN_START`` so the two backends can be
+# controlled independently.
+_SKIP_SPAN_START_ENV = "AGENTEX_TRACING_SKIP_AGENTEX_SPAN_START"
+
+
+def _skip_span_start_enabled() -> bool:
+    """Whether to skip the Agentex span-start write and persist each span only on end.
+
+    The Agentex processor otherwise writes every span twice: a ``spans.create``
+    on start (no ``end_time``/``output`` yet) and a ``spans.update`` on end.
+    The start row is overwritten by the end write moments later, so persisting
+    it doubles the per-span HTTP/DB write volume against the Agentex control
+    plane — the load that timed out span-start activities and pressured the
+    Agentex Postgres connection pool under load.
+
+    When enabled (the default), the start write is skipped and the END write
+    becomes a single ``spans.create`` carrying the complete span — one INSERT
+    per span instead of an INSERT + UPDATE. (A plain ``spans.update`` on end
+    would 404 because the row was never created.)
+
+    Default ON. Set ``AGENTEX_TRACING_SKIP_AGENTEX_SPAN_START`` to
+    ``0``/``false``/``no``/``off`` to restore the start write — e.g. if you
+    need in-flight spans visible before they complete, or spans that never end
+    (process crash) to still be persisted.
+    """
+    raw = os.environ.get(_SKIP_SPAN_START_ENV, "1").strip().lower()
+    return raw not in ("0", "false", "no", "off")
+
+
+def _create_kwargs(span: Span) -> Dict[str, Any]:
+    """Full-span kwargs for ``spans.create`` — used on start (skip disabled) and
+    on end (skip enabled, single-INSERT path)."""
+    return {
+        "name": span.name,
+        "start_time": span.start_time,
+        "end_time": span.end_time,
+        "id": span.id,
+        "trace_id": span.trace_id,
+        "parent_id": span.parent_id,
+        "input": span.input,
+        "output": span.output,
+        "data": span.data,
+        "task_id": span.task_id,
+    }
+
 
 class AgentexSyncTracingProcessor(SyncTracingProcessor):
     def __init__(self, config: AgentexTracingProcessorConfig):  # noqa: ARG002
         self.client = Agentex()
+        # Capture the skip decision once at init: both halves of a span's
+        # lifecycle MUST agree, otherwise a start-skip + end-update lands on a
+        # non-existent row (404) — or the reverse double-creates. Re-reading the
+        # env per event would let a mid-span toggle (tests, config reload) split
+        # the decision. Deploy-time flag, so a single read is correct.
+        self._skip_span_start = _skip_span_start_enabled()
+        logger.info(
+            "Agentex tracing span-start write %s (%s)",
+            "disabled — end-only ingest" if self._skip_span_start else "enabled",
+            _SKIP_SPAN_START_ENV,
+        )
 
     @override
     def on_span_start(self, span: Span) -> None:
-        self.client.spans.create(
-            name=span.name,
-            start_time=span.start_time,
-            end_time=span.end_time,
-            trace_id=span.trace_id,
-            id=span.id,
-            data=span.data,
-            input=span.input,
-            output=span.output,
-            parent_id=span.parent_id,
-            task_id=span.task_id,
-        )
+        # End-only ingest: by default the start write is skipped (see
+        # _skip_span_start_enabled) so each span is persisted once, on end.
+        if self._skip_span_start:
+            return
+        self.client.spans.create(**_create_kwargs(span))
 
     @override
     def on_span_end(self, span: Span) -> None:
+        # End-only ingest: the start create was skipped, so persist the complete
+        # span as a single INSERT here (a bare spans.update would 404 — no row).
+        if self._skip_span_start:
+            self.client.spans.create(**_create_kwargs(span))
+            return
+
         update: Dict[str, Any] = {}
         if span.trace_id:
             update["trace_id"] = span.trace_id
@@ -82,6 +144,17 @@ def __init__(self, config: AgentexTracingProcessorConfig):  # noqa: ARG002
         self._clients_by_loop: weakref.WeakKeyDictionary[
             asyncio.AbstractEventLoop, "AsyncAgentex"
         ] = weakref.WeakKeyDictionary()
+        # Capture the skip decision once at init: both halves of a span's
+        # lifecycle MUST agree, otherwise a start-skip + end-update lands on a
+        # non-existent row (404) — or the reverse double-creates. Re-reading the
+        # env per event would let a mid-span toggle (tests, config reload) split
+        # the decision. Deploy-time flag, so a single read is correct.
+        self._skip_span_start = _skip_span_start_enabled()
+        logger.info(
+            "Agentex tracing span-start write %s (%s)",
+            "disabled — end-only ingest" if self._skip_span_start else "enabled",
+            _SKIP_SPAN_START_ENV,
+        )
 
     def _build_client(self) -> "AsyncAgentex":
         import httpx
@@ -111,21 +184,20 @@ def client(self) -> "AsyncAgentex":
     # https://linear.app/scale-epd/issue/AGX1-199/add-agentex-batch-endpoint-for-traces
     @override
     async def on_span_start(self, span: Span) -> None:
-        await self.client.spans.create(
-            name=span.name,
-            start_time=span.start_time,
-            end_time=span.end_time,
-            id=span.id,
-            trace_id=span.trace_id,
-            parent_id=span.parent_id,
-            input=span.input,
-            output=span.output,
-            data=span.data,
-            task_id=span.task_id,
-        )
+        # End-only ingest: by default the start write is skipped (see
+        # _skip_span_start_enabled) so each span is persisted once, on end.
+        if self._skip_span_start:
+            return
+        await self.client.spans.create(**_create_kwargs(span))
 
     @override
     async def on_span_end(self, span: Span) -> None:
+        # End-only ingest: the start create was skipped, so persist the complete
+        # span as a single INSERT here (a bare spans.update would 404 — no row).
+        if self._skip_span_start:
+            await self.client.spans.create(**_create_kwargs(span))
+            return
+
         update: Dict[str, Any] = {}
         if span.trace_id:
             update["trace_id"] = span.trace_id
diff --git a/src/agentex/lib/sdk/state_machine/state_machine.py b/src/agentex/lib/sdk/state_machine/state_machine.py
index f1e5c4239..5679a6bd8 100644
--- a/src/agentex/lib/sdk/state_machine/state_machine.py
+++ b/src/agentex/lib/sdk/state_machine/state_machine.py
@@ -113,6 +113,7 @@ async def reset_to_initial_state(self):
         """
         Reset the state machine to its initial state.
         """
+        span = None
         if self._trace_transitions:
             if self._task_id is None:
                 raise ValueError(
@@ -126,7 +127,7 @@ async def reset_to_initial_state(self):
 
         await self.transition(self._initial_state)
 
-        if self._trace_transitions:
+        if self._trace_transitions and span is not None:
             span.output = {"output_state": self._initial_state}  # type: ignore[assignment,union-attr]
             await adk.tracing.end_span(trace_id=self._task_id, span=span)
 
diff --git a/src/agentex/lib/sdk/utils/webhooks.py b/src/agentex/lib/sdk/utils/webhooks.py
new file mode 100644
index 000000000..d4b7b43e1
--- /dev/null
+++ b/src/agentex/lib/sdk/utils/webhooks.py
@@ -0,0 +1,389 @@
+"""Drive an agent turn from an inbound webhook, inside a forward-route handler.
+
+The Agentex server already exposes a webhook ingress: a request to
+``/agents/forward/name/{agent}/{path}`` is signature-verified (GitHub ``sha256=`` /
+Slack ``v0:`` HMAC via the agent's registered keys) and proxied to the agent's own
+HTTP route. This helper is what that route handler calls to turn the inbound payload
+into an agent turn — without each agent re-implementing payload shaping, config
+resolution, session continuity, and reply handling.
+
+Typical use inside an agent::
+
+    from fastapi import Request
+    from agentex.lib.sdk.utils.webhooks import handle_webhook
+
+
+    @acp.post("/github-pr")
+    async def github_pr(request: Request):
+        body = await request.json()
+        result = await handle_webhook(
+            agent_name="my-agent",
+            payload=body,
+            acp_type="sync",
+            shaper="github_pr",
+            params_source="https://<host>/public/v5/agent_configs/<id>/resolve",
+            params_source_headers={"x-api-key": ..., "x-selected-account-id": ...},
+            wait=True,
+        )
+        return {"task_id": result.task_id, "reply": result.reply}
+
+Config-by-id: pass ``params_source`` pointing at the platform's config-resolve
+endpoint; the resolved params (e.g. system_prompt / harness / model / tools) are
+forwarded opaquely to ``task/create``. Or pass inline ``params`` for a one-off.
+"""
+
+from __future__ import annotations
+
+import json
+import hashlib
+from typing import Any, Literal
+from dataclasses import field, dataclass
+from collections.abc import Mapping, Callable, Awaitable
+
+from agentex.lib import adk
+from agentex.lib.utils.logging import make_logger
+from agentex.types.task_message_content import TextContent
+
+logger = make_logger(__name__)
+
+# Injectable params fetcher (url -> JSON). Default uses httpx; tests inject a fake.
+ParamsFetcher = Callable[[str], Awaitable[dict[str, Any]]]
+
+MAX_BODY_CHARS = 4000
+MAX_DIFF_CHARS = 30000
+
+
+class WebhookError(RuntimeError):
+    """Raised when a webhook turn cannot be driven (e.g. params resolution failed)."""
+
+
+@dataclass
+class WebhookResult:
+    task_id: str
+    # Sync agents reply inline. For async agents, ``reply`` is None unless ``wait`` was
+    # set, in which case it is the polled reply (or None if it didn't settle in time).
+    reply: str | None = None
+    task_metadata: dict[str, str] = field(default_factory=dict)
+
+
+# --------------------------------------------------------------------------- shaping
+
+
+def session_key(agent_name: str, channel: str, peer_id: str) -> str:
+    """Stable per-conversation task name → reused for get-or-create on task/create, so
+    repeat events from the same source fold into one task instead of spawning new ones."""
+    basis = peer_id or "main"
+    digest = hashlib.sha1(f"{agent_name}:{channel}:{basis}".encode()).hexdigest()[:16]
+    return f"wh-{channel}-{digest}"
+
+
+# Top-level fields a generic webhook payload might carry its prompt in, in priority
+# order. Matched case-insensitively against the payload's keys.
+GENERIC_PROMPT_KEYS = (
+    "text",
+    "message",
+    "prompt",
+    "goal",
+    "content",
+    "body",
+    "description",
+    "title",
+)
+
+
+def render_generic(body: dict[str, Any]) -> str:
+    """Generic payload → prompt text: first non-empty string among GENERIC_PROMPT_KEYS
+    (case-insensitive), else raw JSON."""
+    lowered = {key.lower(): value for key, value in body.items() if isinstance(key, str)}
+    for key in GENERIC_PROMPT_KEYS:
+        value = lowered.get(key)
+        if isinstance(value, str) and value.strip():
+            return value.strip()
+    return json.dumps(body, indent=2)[:8000]
+
+
+def shape_github_pr(body: dict[str, Any]) -> tuple[str, str | None, str]:
+    """Shape a GitHub/Gitea pull-request webhook into (prompt, peer_id, sender).
+
+    ``peer_id`` is ``repo#number`` so repeated events for the same PR (opened,
+    synchronize, ...) fold into one task. Falls back to generic rendering for non-PR
+    payloads (ping, issue, ...).
+    """
+    pull_request = body.get("pull_request")
+    if not isinstance(pull_request, dict):
+        return render_generic(body), None, _github_actor(body)
+
+    repo = _repo_full_name(body)
+    number = pull_request.get("number")
+    title = (pull_request.get("title") or "").strip()
+    action = (body.get("action") or "").strip()
+    description = (pull_request.get("body") or "").strip()
+    html_url = pull_request.get("html_url") or pull_request.get("url")
+
+    header = "Pull request"
+    if repo and number is not None:
+        header = f"Pull request {repo}#{number}"
+    elif number is not None:
+        header = f"Pull request #{number}"
+
+    lines = [f"{header}: {title}" if title else header]
+    if action:
+        lines.append(f"Action: {action}")
+    if html_url:
+        lines.append(f"URL: {html_url}")
+    if description:
+        lines.extend(["", "Description:", description[:MAX_BODY_CHARS]])
+
+    diff = _inline_diff(body, pull_request)
+    if diff:
+        lines.extend(["", "Diff:", diff[:MAX_DIFF_CHARS]])
+    else:
+        # Standard GitHub/Gitea payloads carry a diff/patch URL, not the patch body.
+        # Surface it so a tool-enabled agent (or the caller) can fetch the diff; inline
+        # `diff` wins. Gitea sends patch_url alongside diff_url, so accept either.
+        diff_url = pull_request.get("diff_url") or pull_request.get("patch_url")
+        if diff_url:
+            lines.extend(["", f"Diff URL: {diff_url}"])
+
+    peer_id = None
+    if repo and number is not None:
+        peer_id = f"{repo}#{number}"
+    elif number is not None:
+        peer_id = f"pr#{number}"
+    return "\n".join(lines), peer_id, _github_actor(body)
+
+
+def _repo_full_name(body: dict[str, Any]) -> str | None:
+    repo = body.get("repository")
+    if isinstance(repo, dict) and isinstance(repo.get("full_name"), str):
+        return repo["full_name"] or None
+    return None
+
+
+def _github_actor(body: dict[str, Any]) -> str:
+    sender = body.get("sender")
+    if isinstance(sender, dict) and isinstance(sender.get("login"), str) and sender["login"]:
+        return sender["login"]
+    return "webhook"
+
+
+def _inline_diff(body: dict[str, Any], pull_request: dict[str, Any]) -> str | None:
+    for source in (body, pull_request):
+        diff = source.get("diff")
+        if isinstance(diff, str) and diff.strip():
+            return diff.strip()
+    return None
+
+
+# ------------------------------------------------------------------- params resolution
+
+
+async def _default_fetch(url: str, headers: dict[str, str]) -> dict[str, Any]:
+    """GET a params source over HTTP. Imported lazily so callers that only pass inline
+    params carry no httpx dependency."""
+    import httpx
+
+    request_headers = {"accept": "application/json", **headers}
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.get(url, headers=request_headers)
+            response.raise_for_status()
+            return response.json()
+    except httpx.HTTPError as exc:
+        raise WebhookError(f"params source request failed: {exc}") from exc
+    except ValueError as exc:  # json.JSONDecodeError subclasses ValueError
+        raise WebhookError(f"params source returned invalid JSON: {exc}") from exc
+
+
+async def resolve_remote_params(
+    url: str,
+    headers: dict[str, str] | None = None,
+    *,
+    fetch: ParamsFetcher | None = None,
+) -> tuple[dict[str, Any], dict[str, str]]:
+    """Fetch params (+ optional task_metadata) from a config-resolve URL.
+
+    Response shape (lenient)::
+
+        {"params": {...}, "task_metadata": {...}}
+
+    A bare object with no ``params`` key is treated as the params dict itself (minus a
+    top-level ``task_metadata``, which is returned separately for stamping).
+    """
+    do_fetch = fetch or (lambda u: _default_fetch(u, headers or {}))
+    payload = await do_fetch(url)
+    if not isinstance(payload, dict):
+        raise WebhookError("params source returned a non-object response")
+
+    metadata_raw = payload.get("task_metadata")
+    task_metadata = {str(k): str(v) for k, v in metadata_raw.items()} if isinstance(metadata_raw, dict) else {}
+    params = payload.get("params")
+    if not isinstance(params, dict):
+        params = {k: v for k, v in payload.items() if k != "task_metadata"}
+    return params, task_metadata
+
+
+# ------------------------------------------------------------------------- dispatch
+
+
+def _agent_reply_text(messages: object) -> str | None:
+    """Join agent-authored text from a message list (sync result or polled stream)."""
+    if not isinstance(messages, list):
+        return None
+    parts = []
+    for message in messages:
+        content = getattr(message, "content", None)
+        if (
+            content is not None
+            and getattr(content, "author", None) == "agent"
+            and getattr(content, "type", None) == "text"
+        ):
+            text = (getattr(content, "content", "") or "").strip()
+            if text:
+                parts.append(text)
+    return "\n\n".join(parts) if parts else None
+
+
+async def handle_webhook(
+    *,
+    agent_name: str,
+    payload: dict[str, Any],
+    acp_type: Literal["sync", "async"] = "sync",
+    shaper: Literal["generic", "github_pr"] = "generic",
+    channel: str | None = None,
+    params: dict[str, Any] | None = None,
+    params_source: str | None = None,
+    params_source_headers: dict[str, str] | None = None,
+    peer_id: str | None = None,
+    extra_task_metadata: dict[str, str] | None = None,
+    wait: bool = False,
+    fetch: ParamsFetcher | None = None,
+) -> WebhookResult:
+    """Drive an agent turn from a webhook payload, agent-side, via the ADK client.
+
+    - Shapes the payload (generic or GitHub PR) into a prompt + conversation scope.
+    - Resolves task params: inline ``params``, or fetched from ``params_source``
+      (config-by-id). The platform never interprets params — they're forwarded to the
+      agent as ``task/create`` params.
+    - Get-or-creates a task keyed on a stable session key, so repeat events fold in.
+    - Sends the turn (sync → message/send returns the reply inline; async → event/send,
+      with optional ``wait`` to poll for the reply).
+    """
+    channel = channel or shaper
+    if shaper == "github_pr":
+        text, derived_peer, sender = shape_github_pr(payload)
+        peer_id = peer_id or derived_peer
+    else:
+        text, sender = render_generic(payload), "webhook"
+
+    task_metadata: dict[str, str] = {"channel": channel, "sender_id": sender}
+    if peer_id:
+        task_metadata["peer_id"] = peer_id
+
+    resolved_params = dict(params) if params else {}
+    if params_source:
+        resolved_params, source_metadata = await resolve_remote_params(
+            params_source, params_source_headers, fetch=fetch
+        )
+        # Source metadata + caller extras never override the canonical fields above.
+        for key, value in {**source_metadata, **(extra_task_metadata or {})}.items():
+            task_metadata.setdefault(key, str(value))
+    elif extra_task_metadata:
+        for key, value in extra_task_metadata.items():
+            task_metadata.setdefault(key, str(value))
+
+    name = session_key(agent_name, channel, peer_id or "")
+    # task/create carries only name/params (CreateTaskParams has no task_metadata field),
+    # so we create first, then stamp task_metadata via a follow-up update below.
+    task = await adk.acp.create_task(
+        name=name,
+        agent_name=agent_name,
+        params=resolved_params or None,
+    )
+
+    # Best-effort: stamp the resolved task_metadata (channel/sender/peer_id, plus the
+    # display_name etc. from params_source) onto the task so it's labeled in the UI.
+    # Failure must never break the run — the metadata is also returned on the result.
+    if task_metadata:
+        try:
+            merged_task_metadata = {
+                **_task_metadata_dict(getattr(task, "task_metadata", None)),
+                **task_metadata,
+            }
+            await adk.tasks.update(task_id=task.id, task_metadata=merged_task_metadata)
+        except Exception:
+            logger.warning("Failed to stamp task_metadata on task %s", task.id, exc_info=True)
+
+    content = TextContent(author="user", content=text, format="markdown")
+
+    if acp_type == "sync":
+        messages = await adk.acp.send_message(task_id=task.id, agent_name=agent_name, content=content)
+        return WebhookResult(task_id=task.id, reply=_agent_reply_text(messages), task_metadata=task_metadata)
+
+    # Async: when we'll wait for the reply, snapshot existing message ids BEFORE the
+    # event so a reused task's prior reply (session continuity) isn't mistaken for it.
+    if wait:
+        seen_ids, seen_count = await _message_snapshot(task.id)
+        await adk.acp.send_event(task_id=task.id, agent_name=agent_name, content=content)
+        reply = await _await_reply(task.id, seen_ids, seen_count=seen_count)
+    else:
+        await adk.acp.send_event(task_id=task.id, agent_name=agent_name, content=content)
+        reply = None
+    return WebhookResult(task_id=task.id, reply=reply, task_metadata=task_metadata)
+
+
+def _task_metadata_dict(value: object) -> dict[str, Any]:
+    if isinstance(value, Mapping):
+        return dict(value)
+    return {}
+
+
+async def _message_snapshot(task_id: str) -> tuple[set[str], int]:
+    messages = await adk.messages.list(task_id=task_id)
+    messages = messages or []
+    return {mid for m in messages if (mid := getattr(m, "id", None)) is not None}, len(messages)
+
+
+async def _message_ids(task_id: str) -> set[str]:
+    # Only track real ids. Keeping None in the set would let a later id-less message
+    # collide with it and be wrongly treated as already-seen (dropping a fresh reply).
+    seen_ids, _ = await _message_snapshot(task_id)
+    return seen_ids
+
+
+async def _await_reply(
+    task_id: str,
+    seen_ids: set[str | None],
+    *,
+    seen_count: int | None = None,
+    timeout_s: float = 120.0,
+    interval_s: float = 2.0,
+    quiescence_s: float = 6.0,
+) -> str | None:
+    """Poll for THIS turn's reply — agent text in messages that weren't present before
+    the event — until it settles (unchanged for ``quiescence_s``) or times out. Filtering
+    on new message ids avoids returning a stale prior reply on a reused task."""
+    import asyncio
+
+    waited = 0.0
+    last: str | None = None
+    stable_for = 0.0
+    while waited < timeout_s:
+        await asyncio.sleep(interval_s)
+        waited += interval_s
+        messages = await adk.messages.list(task_id=task_id)
+        new = []
+        for index, message in enumerate(messages or []):
+            mid = getattr(message, "id", None)
+            if mid is not None and mid not in seen_ids:
+                new.append(message)
+            elif mid is None and seen_count is not None and index >= seen_count:
+                new.append(message)
+        text = _agent_reply_text(new)
+        if text and text == last:
+            stable_for += interval_s
+            if stable_for >= quiescence_s:
+                return text
+        elif text:
+            last, stable_for = text, 0.0
+    return last
diff --git a/src/agentex/resources/__init__.py b/src/agentex/resources/__init__.py
index 00e0bfea8..43dbdbdb4 100644
--- a/src/agentex/resources/__init__.py
+++ b/src/agentex/resources/__init__.py
@@ -56,6 +56,14 @@
     MessagesResourceWithStreamingResponse,
     AsyncMessagesResourceWithStreamingResponse,
 )
+from .webhooks import (
+    WebhooksResource,
+    AsyncWebhooksResource,
+    WebhooksResourceWithRawResponse,
+    AsyncWebhooksResourceWithRawResponse,
+    WebhooksResourceWithStreamingResponse,
+    AsyncWebhooksResourceWithStreamingResponse,
+)
 from .checkpoints import (
     CheckpointsResource,
     AsyncCheckpointsResource,
@@ -128,4 +136,10 @@
     "AsyncCheckpointsResourceWithRawResponse",
     "CheckpointsResourceWithStreamingResponse",
     "AsyncCheckpointsResourceWithStreamingResponse",
+    "WebhooksResource",
+    "AsyncWebhooksResource",
+    "WebhooksResourceWithRawResponse",
+    "AsyncWebhooksResourceWithRawResponse",
+    "WebhooksResourceWithStreamingResponse",
+    "AsyncWebhooksResourceWithStreamingResponse",
 ]
diff --git a/src/agentex/resources/tasks.py b/src/agentex/resources/tasks.py
index 67704e36b..56ad84d2c 100644
--- a/src/agentex/resources/tasks.py
+++ b/src/agentex/resources/tasks.py
@@ -554,6 +554,7 @@ def update_by_id(
         self,
         task_id: str,
         *,
+        merge_params: Optional[Dict[str, object]] | Omit = omit,
         task_metadata: Optional[Dict[str, object]] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -578,7 +579,13 @@ def update_by_id(
             raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
         return self._put(
             path_template("/tasks/{task_id}", task_id=task_id),
-            body=maybe_transform({"task_metadata": task_metadata}, task_update_by_id_params.TaskUpdateByIDParams),
+            body=maybe_transform(
+                {
+                    "merge_params": merge_params,
+                    "task_metadata": task_metadata,
+                },
+                task_update_by_id_params.TaskUpdateByIDParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -589,6 +596,7 @@ def update_by_name(
         self,
         task_name: str,
         *,
+        merge_params: Optional[Dict[str, object]] | Omit = omit,
         task_metadata: Optional[Dict[str, object]] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -613,7 +621,13 @@ def update_by_name(
             raise ValueError(f"Expected a non-empty value for `task_name` but received {task_name!r}")
         return self._put(
             path_template("/tasks/name/{task_name}", task_name=task_name),
-            body=maybe_transform({"task_metadata": task_metadata}, task_update_by_name_params.TaskUpdateByNameParams),
+            body=maybe_transform(
+                {
+                    "merge_params": merge_params,
+                    "task_metadata": task_metadata,
+                },
+                task_update_by_name_params.TaskUpdateByNameParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -1136,6 +1150,7 @@ async def update_by_id(
         self,
         task_id: str,
         *,
+        merge_params: Optional[Dict[str, object]] | Omit = omit,
         task_metadata: Optional[Dict[str, object]] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1161,7 +1176,11 @@ async def update_by_id(
         return await self._put(
             path_template("/tasks/{task_id}", task_id=task_id),
             body=await async_maybe_transform(
-                {"task_metadata": task_metadata}, task_update_by_id_params.TaskUpdateByIDParams
+                {
+                    "merge_params": merge_params,
+                    "task_metadata": task_metadata,
+                },
+                task_update_by_id_params.TaskUpdateByIDParams,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -1173,6 +1192,7 @@ async def update_by_name(
         self,
         task_name: str,
         *,
+        merge_params: Optional[Dict[str, object]] | Omit = omit,
         task_metadata: Optional[Dict[str, object]] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1198,7 +1218,11 @@ async def update_by_name(
         return await self._put(
             path_template("/tasks/name/{task_name}", task_name=task_name),
             body=await async_maybe_transform(
-                {"task_metadata": task_metadata}, task_update_by_name_params.TaskUpdateByNameParams
+                {
+                    "merge_params": merge_params,
+                    "task_metadata": task_metadata,
+                },
+                task_update_by_name_params.TaskUpdateByNameParams,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
diff --git a/src/agentex/resources/webhooks.py b/src/agentex/resources/webhooks.py
new file mode 100644
index 000000000..f565a7870
--- /dev/null
+++ b/src/agentex/resources/webhooks.py
@@ -0,0 +1,242 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ..types import webhook_create_webhook_trigger_params
+from .._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from .._utils import maybe_transform, async_maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from .._base_client import make_request_options
+from ..types.webhook_create_webhook_trigger_response import WebhookCreateWebhookTriggerResponse
+
+__all__ = ["WebhooksResource", "AsyncWebhooksResource"]
+
+
+class WebhooksResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> WebhooksResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/scaleapi/scale-agentex-python#accessing-raw-response-data-eg-headers
+        """
+        return WebhooksResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> WebhooksResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/scaleapi/scale-agentex-python#with_streaming_response
+        """
+        return WebhooksResourceWithStreamingResponse(self)
+
+    def create_webhook_trigger(
+        self,
+        *,
+        agent_name: str,
+        forward_path: str,
+        name: str,
+        base_url: Optional[str] | Omit = omit,
+        secret: Optional[str] | Omit = omit,
+        source: Literal["internal", "external", "github", "slack"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> WebhookCreateWebhookTriggerResponse:
+        """
+        Wire a webhook trigger in one call.
+
+        Registers the source's signature-verification key (github/slack) for the agent
+        and returns the ready-to-paste forward webhook URL plus the signing secret
+        (shown once). The webhook then flows through the existing /agents/forward
+        ingress, which verifies the signature against this key. Bundles the existing
+        key-create + URL composition so a UI (or a curl) can set up a trigger without
+        two steps.
+
+        Args:
+          agent_name: The agent the webhook drives.
+
+          forward_path: Subpath the agent's own route handles, e.g. 'github-pr/<config-id>'. Appended to
+              /agents/forward/name/{agent_name}/ to form the webhook URL.
+
+          name: Signature-lookup key: the repo full_name (github) or api_app_id (slack) that the
+              forward ingress matches the incoming webhook against.
+
+          base_url: Optional public agentex base URL for the returned webhook_url; defaults to the
+              AGENTEX_PUBLIC_URL env var.
+
+          secret: Signing secret. For GitHub, omit to generate one, or provide an existing webhook
+              secret. For Slack, this is required and must be the Slack app's Signing Secret.
+
+          source: Webhook source whose signature is verified (github or slack).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/agent_api_keys/webhook-trigger",
+            body=maybe_transform(
+                {
+                    "agent_name": agent_name,
+                    "forward_path": forward_path,
+                    "name": name,
+                    "base_url": base_url,
+                    "secret": secret,
+                    "source": source,
+                },
+                webhook_create_webhook_trigger_params.WebhookCreateWebhookTriggerParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=WebhookCreateWebhookTriggerResponse,
+        )
+
+
+class AsyncWebhooksResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncWebhooksResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/scaleapi/scale-agentex-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncWebhooksResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncWebhooksResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/scaleapi/scale-agentex-python#with_streaming_response
+        """
+        return AsyncWebhooksResourceWithStreamingResponse(self)
+
+    async def create_webhook_trigger(
+        self,
+        *,
+        agent_name: str,
+        forward_path: str,
+        name: str,
+        base_url: Optional[str] | Omit = omit,
+        secret: Optional[str] | Omit = omit,
+        source: Literal["internal", "external", "github", "slack"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> WebhookCreateWebhookTriggerResponse:
+        """
+        Wire a webhook trigger in one call.
+
+        Registers the source's signature-verification key (github/slack) for the agent
+        and returns the ready-to-paste forward webhook URL plus the signing secret
+        (shown once). The webhook then flows through the existing /agents/forward
+        ingress, which verifies the signature against this key. Bundles the existing
+        key-create + URL composition so a UI (or a curl) can set up a trigger without
+        two steps.
+
+        Args:
+          agent_name: The agent the webhook drives.
+
+          forward_path: Subpath the agent's own route handles, e.g. 'github-pr/<config-id>'. Appended to
+              /agents/forward/name/{agent_name}/ to form the webhook URL.
+
+          name: Signature-lookup key: the repo full_name (github) or api_app_id (slack) that the
+              forward ingress matches the incoming webhook against.
+
+          base_url: Optional public agentex base URL for the returned webhook_url; defaults to the
+              AGENTEX_PUBLIC_URL env var.
+
+          secret: Signing secret. For GitHub, omit to generate one, or provide an existing webhook
+              secret. For Slack, this is required and must be the Slack app's Signing Secret.
+
+          source: Webhook source whose signature is verified (github or slack).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/agent_api_keys/webhook-trigger",
+            body=await async_maybe_transform(
+                {
+                    "agent_name": agent_name,
+                    "forward_path": forward_path,
+                    "name": name,
+                    "base_url": base_url,
+                    "secret": secret,
+                    "source": source,
+                },
+                webhook_create_webhook_trigger_params.WebhookCreateWebhookTriggerParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=WebhookCreateWebhookTriggerResponse,
+        )
+
+
+class WebhooksResourceWithRawResponse:
+    def __init__(self, webhooks: WebhooksResource) -> None:
+        self._webhooks = webhooks
+
+        self.create_webhook_trigger = to_raw_response_wrapper(
+            webhooks.create_webhook_trigger,
+        )
+
+
+class AsyncWebhooksResourceWithRawResponse:
+    def __init__(self, webhooks: AsyncWebhooksResource) -> None:
+        self._webhooks = webhooks
+
+        self.create_webhook_trigger = async_to_raw_response_wrapper(
+            webhooks.create_webhook_trigger,
+        )
+
+
+class WebhooksResourceWithStreamingResponse:
+    def __init__(self, webhooks: WebhooksResource) -> None:
+        self._webhooks = webhooks
+
+        self.create_webhook_trigger = to_streamed_response_wrapper(
+            webhooks.create_webhook_trigger,
+        )
+
+
+class AsyncWebhooksResourceWithStreamingResponse:
+    def __init__(self, webhooks: AsyncWebhooksResource) -> None:
+        self._webhooks = webhooks
+
+        self.create_webhook_trigger = async_to_streamed_response_wrapper(
+            webhooks.create_webhook_trigger,
+        )
diff --git a/src/agentex/types/__init__.py b/src/agentex/types/__init__.py
index f04daeb3b..8bb76fa04 100644
--- a/src/agentex/types/__init__.py
+++ b/src/agentex/types/__init__.py
@@ -85,3 +85,9 @@
 from .checkpoint_delete_thread_params import CheckpointDeleteThreadParams as CheckpointDeleteThreadParams
 from .message_list_paginated_response import MessageListPaginatedResponse as MessageListPaginatedResponse
 from .deployment_history_list_response import DeploymentHistoryListResponse as DeploymentHistoryListResponse
+from .webhook_create_webhook_trigger_params import (
+    WebhookCreateWebhookTriggerParams as WebhookCreateWebhookTriggerParams,
+)
+from .webhook_create_webhook_trigger_response import (
+    WebhookCreateWebhookTriggerResponse as WebhookCreateWebhookTriggerResponse,
+)
diff --git a/src/agentex/types/task_update_by_id_params.py b/src/agentex/types/task_update_by_id_params.py
index 8b0f04f11..8d6aa6516 100644
--- a/src/agentex/types/task_update_by_id_params.py
+++ b/src/agentex/types/task_update_by_id_params.py
@@ -9,4 +9,6 @@
 
 
 class TaskUpdateByIDParams(TypedDict, total=False):
+    merge_params: Optional[Dict[str, object]]
+
     task_metadata: Optional[Dict[str, object]]
diff --git a/src/agentex/types/task_update_by_name_params.py b/src/agentex/types/task_update_by_name_params.py
index 07d48df9b..20e1a624c 100644
--- a/src/agentex/types/task_update_by_name_params.py
+++ b/src/agentex/types/task_update_by_name_params.py
@@ -9,4 +9,6 @@
 
 
 class TaskUpdateByNameParams(TypedDict, total=False):
+    merge_params: Optional[Dict[str, object]]
+
     task_metadata: Optional[Dict[str, object]]
diff --git a/src/agentex/types/webhook_create_webhook_trigger_params.py b/src/agentex/types/webhook_create_webhook_trigger_params.py
new file mode 100644
index 000000000..f6a1358bc
--- /dev/null
+++ b/src/agentex/types/webhook_create_webhook_trigger_params.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["WebhookCreateWebhookTriggerParams"]
+
+
+class WebhookCreateWebhookTriggerParams(TypedDict, total=False):
+    agent_name: Required[str]
+    """The agent the webhook drives."""
+
+    forward_path: Required[str]
+    """Subpath the agent's own route handles, e.g.
+
+    'github-pr/<config-id>'. Appended to /agents/forward/name/{agent_name}/ to form
+    the webhook URL.
+    """
+
+    name: Required[str]
+    """
+    Signature-lookup key: the repo full_name (github) or api_app_id (slack) that the
+    forward ingress matches the incoming webhook against.
+    """
+
+    base_url: Optional[str]
+    """
+    Optional public agentex base URL for the returned webhook_url; defaults to the
+    AGENTEX_PUBLIC_URL env var.
+    """
+
+    secret: Optional[str]
+    """Signing secret.
+
+    For GitHub, omit to generate one, or provide an existing webhook secret. For
+    Slack, this is required and must be the Slack app's Signing Secret.
+    """
+
+    source: Literal["internal", "external", "github", "slack"]
+    """Webhook source whose signature is verified (github or slack)."""
diff --git a/src/agentex/types/webhook_create_webhook_trigger_response.py b/src/agentex/types/webhook_create_webhook_trigger_response.py
new file mode 100644
index 000000000..745ce68a1
--- /dev/null
+++ b/src/agentex/types/webhook_create_webhook_trigger_response.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["WebhookCreateWebhookTriggerResponse"]
+
+
+class WebhookCreateWebhookTriggerResponse(BaseModel):
+    agent_name: str
+    """The agent the webhook drives."""
+
+    key_id: str
+    """The created agent API key id."""
+
+    name: str
+    """Signature-lookup key (repo full_name / api_app_id)."""
+
+    secret: str
+    """The signing secret — shown once; paste into the source's webhook config."""
+
+    source: Literal["internal", "external", "github", "slack"]
+    """Webhook source (github or slack)."""
+
+    webhook_path: str
+    """The forward path to POST webhooks to."""
+
+    webhook_url: Optional[str] = None
+    """Full webhook URL to paste into the source (None if no base URL configured)."""
diff --git a/tests/api_resources/test_tasks.py b/tests/api_resources/test_tasks.py
index 0e70529dd..766175fa4 100644
--- a/tests/api_resources/test_tasks.py
+++ b/tests/api_resources/test_tasks.py
@@ -657,6 +657,7 @@ def test_method_update_by_id(self, client: Agentex) -> None:
     def test_method_update_by_id_with_all_params(self, client: Agentex) -> None:
         task = client.tasks.update_by_id(
             task_id="task_id",
+            merge_params={"foo": "bar"},
             task_metadata={"foo": "bar"},
         )
         assert_matches_type(Task, task, path=["response"])
@@ -708,6 +709,7 @@ def test_method_update_by_name(self, client: Agentex) -> None:
     def test_method_update_by_name_with_all_params(self, client: Agentex) -> None:
         task = client.tasks.update_by_name(
             task_name="task_name",
+            merge_params={"foo": "bar"},
             task_metadata={"foo": "bar"},
         )
         assert_matches_type(Task, task, path=["response"])
@@ -1384,6 +1386,7 @@ async def test_method_update_by_id(self, async_client: AsyncAgentex) -> None:
     async def test_method_update_by_id_with_all_params(self, async_client: AsyncAgentex) -> None:
         task = await async_client.tasks.update_by_id(
             task_id="task_id",
+            merge_params={"foo": "bar"},
             task_metadata={"foo": "bar"},
         )
         assert_matches_type(Task, task, path=["response"])
@@ -1435,6 +1438,7 @@ async def test_method_update_by_name(self, async_client: AsyncAgentex) -> None:
     async def test_method_update_by_name_with_all_params(self, async_client: AsyncAgentex) -> None:
         task = await async_client.tasks.update_by_name(
             task_name="task_name",
+            merge_params={"foo": "bar"},
             task_metadata={"foo": "bar"},
         )
         assert_matches_type(Task, task, path=["response"])
diff --git a/tests/api_resources/test_webhooks.py b/tests/api_resources/test_webhooks.py
new file mode 100644
index 000000000..ff32dd719
--- /dev/null
+++ b/tests/api_resources/test_webhooks.py
@@ -0,0 +1,131 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from agentex import Agentex, AsyncAgentex
+from agentex.types import WebhookCreateWebhookTriggerResponse
+
+from ..utils import assert_matches_type
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestWebhooks:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_method_create_webhook_trigger(self, client: Agentex) -> None:
+        webhook = client.webhooks.create_webhook_trigger(
+            agent_name="agent_name",
+            forward_path="forward_path",
+            name="name",
+        )
+        assert_matches_type(WebhookCreateWebhookTriggerResponse, webhook, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_method_create_webhook_trigger_with_all_params(self, client: Agentex) -> None:
+        webhook = client.webhooks.create_webhook_trigger(
+            agent_name="agent_name",
+            forward_path="forward_path",
+            name="name",
+            base_url="base_url",
+            secret="secret",
+            source="internal",
+        )
+        assert_matches_type(WebhookCreateWebhookTriggerResponse, webhook, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_raw_response_create_webhook_trigger(self, client: Agentex) -> None:
+        response = client.webhooks.with_raw_response.create_webhook_trigger(
+            agent_name="agent_name",
+            forward_path="forward_path",
+            name="name",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        webhook = response.parse()
+        assert_matches_type(WebhookCreateWebhookTriggerResponse, webhook, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_streaming_response_create_webhook_trigger(self, client: Agentex) -> None:
+        with client.webhooks.with_streaming_response.create_webhook_trigger(
+            agent_name="agent_name",
+            forward_path="forward_path",
+            name="name",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            webhook = response.parse()
+            assert_matches_type(WebhookCreateWebhookTriggerResponse, webhook, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncWebhooks:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_method_create_webhook_trigger(self, async_client: AsyncAgentex) -> None:
+        webhook = await async_client.webhooks.create_webhook_trigger(
+            agent_name="agent_name",
+            forward_path="forward_path",
+            name="name",
+        )
+        assert_matches_type(WebhookCreateWebhookTriggerResponse, webhook, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_method_create_webhook_trigger_with_all_params(self, async_client: AsyncAgentex) -> None:
+        webhook = await async_client.webhooks.create_webhook_trigger(
+            agent_name="agent_name",
+            forward_path="forward_path",
+            name="name",
+            base_url="base_url",
+            secret="secret",
+            source="internal",
+        )
+        assert_matches_type(WebhookCreateWebhookTriggerResponse, webhook, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_raw_response_create_webhook_trigger(self, async_client: AsyncAgentex) -> None:
+        response = await async_client.webhooks.with_raw_response.create_webhook_trigger(
+            agent_name="agent_name",
+            forward_path="forward_path",
+            name="name",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        webhook = await response.parse()
+        assert_matches_type(WebhookCreateWebhookTriggerResponse, webhook, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_streaming_response_create_webhook_trigger(self, async_client: AsyncAgentex) -> None:
+        async with async_client.webhooks.with_streaming_response.create_webhook_trigger(
+            agent_name="agent_name",
+            forward_path="forward_path",
+            name="name",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            webhook = await response.parse()
+            assert_matches_type(WebhookCreateWebhookTriggerResponse, webhook, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/lib/adk/providers/test_openai_activities.py b/tests/lib/adk/providers/test_openai_activities.py
index c933b6ce4..2f89308a9 100644
--- a/tests/lib/adk/providers/test_openai_activities.py
+++ b/tests/lib/adk/providers/test_openai_activities.py
@@ -335,23 +335,61 @@ async def mock_stream_events():
         expected_params.tools = [CodeInterpreterTool(tool_config={"type": "code_interpreter"})]
         self._assert_starting_agent_params(starting_agent, expected_params)
 
-        # Verify streaming context received tool request and response updates
-        # Should have been called twice - once for tool request, once for response
-        assert mock_streaming_context.stream_update.call_count == 2
+        # Under the unified harness, the OpenAI events are converted to canonical
+        # StreamTaskMessageFull events and auto_send posts each full tool message
+        # by opening a streaming context with the content as initial_content and
+        # closing it (no stream_update). So assert on the opened contents.
+        opened = mock_streaming_context.opened_contents
+        tool_contents = [c for c in opened if getattr(c, "type", None) in ("tool_request", "tool_response")]
+        assert len(tool_contents) == 2
+
+        # First opened context is the tool request.
+        first = tool_contents[0]
+        assert first.type == "tool_request"
+        assert first.name == "code_interpreter"
+        assert first.tool_call_id == "code_interpreter_call_123"
+
+        # Second opened context is the tool response.
+        second = tool_contents[1]
+        assert second.type == "tool_response"
+        assert second.tool_call_id == "code_interpreter_call_123"
 
-        # First call should be tool request
-        first_call = mock_streaming_context.stream_update.call_args_list[0]
-        first_update = first_call[1]["update"]  # keyword argument
-        assert hasattr(first_update, "content")
-        assert first_update.content.name == "code_interpreter"
-        assert first_update.content.tool_call_id == "code_interpreter_call_123"
+    @patch("agents.Runner.run_streamed")
+    async def test_run_agent_streamed_auto_send_forwards_previous_response_id(self, mock_runner_run_streamed):
+        """previous_response_id must reach Runner.run_streamed so a Responses-API
+        conversation continues instead of silently starting fresh."""
+        from agentex.lib.core.temporal.activities.adk.providers.openai_activities import (
+            RunAgentStreamedAutoSendParams,
+        )
 
-        # Second call should be tool response
-        second_call = mock_streaming_context.stream_update.call_args_list[1]
-        second_update = second_call[1]["update"]  # keyword argument
-        assert hasattr(second_update, "content")
-        assert second_update.content.name == "code_interpreter_call"
-        assert second_update.content.tool_call_id == "code_interpreter_call_123"
+        mock_streaming_result = self._create_streaming_result_mock()
+
+        async def _no_events():
+            return
+            yield
+
+        mock_streaming_result.stream_events = _no_events
+        mock_runner_run_streamed.return_value = mock_streaming_result
+
+        mock_tracer = self._create_mock_tracer()
+        openai_service, openai_activities, env = self._create_test_setup(mock_tracer)
+        self._setup_streaming_service_mocks(openai_service)
+
+        params = RunAgentStreamedAutoSendParams(
+            input_list=[{"role": "user", "content": "continue"}],
+            mcp_server_params=[],
+            agent_name="test_agent",
+            agent_instructions="You are a helpful assistant",
+            trace_id="test-trace-id",
+            parent_span_id="test-span-id",
+            task_id="test-task-id",
+            previous_response_id="response_123",
+        )
+
+        await env.run(openai_activities.run_agent_streamed_auto_send, params)
+
+        mock_runner_run_streamed.assert_called_once()
+        assert mock_runner_run_streamed.call_args.kwargs.get("previous_response_id") == "response_123"
 
     def _create_mock_tracer(self):
         """Helper method to create a properly mocked tracer with async context manager support."""
@@ -613,6 +651,60 @@ def _assert_tools_conversion(self, starting_agent, tools_case, _original_tools):
         else:
             raise ValueError(f"Unknown tools_case: {tools_case}")
 
+    @patch("agents.Runner.run_streamed")
+    async def test_run_agent_streamed_auto_send_forwards_created_at(self, mock_runner_run_streamed):
+        """created_at is forwarded to every streaming context opened by auto_send_turn (AGX1-378)."""
+        from datetime import datetime, timezone
+
+        from agentex.lib.core.temporal.activities.adk.providers.openai_activities import (
+            RunAgentStreamedAutoSendParams,
+        )
+
+        deterministic_ts = datetime(2025, 1, 15, 12, 0, 0, tzinfo=timezone.utc)
+
+        mock_streaming_result = self._create_streaming_result_mock()
+
+        # Emit a tool call + tool response so auto_send actually opens streaming
+        # contexts; an empty stream opens none, making the assertion below
+        # vacuously true and unable to catch a created_at regression.
+        async def mock_stream_events():
+            tool_call_event = Mock()
+            tool_call_event.type = "run_item_stream_event"
+            tool_call_event.item = self._create_tool_call_item_mock(self._create_code_interpreter_tool_call_mock())
+            yield tool_call_event
+
+            tool_response_event = Mock()
+            tool_response_event.type = "run_item_stream_event"
+            tool_response_event.item = self._create_tool_output_item_mock()
+            yield tool_response_event
+
+        mock_streaming_result.stream_events = mock_stream_events
+        mock_runner_run_streamed.return_value = mock_streaming_result
+
+        mock_tracer = self._create_mock_tracer()
+        openai_service, openai_activities, env = self._create_test_setup(mock_tracer)
+        mock_ctx, recorded_created_ats = self._setup_streaming_service_mocks_with_created_at(openai_service)
+
+        params = RunAgentStreamedAutoSendParams(
+            input_list=[{"role": "user", "content": "hello"}],
+            mcp_server_params=[],
+            agent_name="test_agent",
+            agent_instructions="You are a helpful assistant",
+            trace_id="test-trace-id",
+            parent_span_id="test-span-id",
+            task_id="test-task-id",
+            created_at=deterministic_ts,
+        )
+
+        await env.run(openai_activities.run_agent_streamed_auto_send, params)
+
+        # Guard against a vacuous pass: at least one streaming context must have
+        # been opened so the per-context created_at assertion is meaningful.
+        assert recorded_created_ats, "expected at least one streaming context to be opened"
+        assert all(ts == deterministic_ts for ts in recorded_created_ats), (
+            f"Expected all streaming contexts to receive created_at={deterministic_ts!r}, got: {recorded_created_ats!r}"
+        )
+
     def _setup_streaming_service_mocks(self, openai_service):
         """Helper method to setup streaming service mocks for run_agent_auto_send."""
         from unittest.mock import AsyncMock
@@ -635,21 +727,64 @@ def _setup_streaming_service_mocks(self, openai_service):
         mock_streaming_context.task_message = mock_task_message
         mock_streaming_context.stream_update = AsyncMock()
 
+        # Record the initial_content passed to each opened streaming context.
+        # The unified harness auto_send path posts full tool messages by opening
+        # a context with initial_content and closing it (no stream_update), so
+        # assertions inspect the opened contents rather than stream_update calls.
+        opened_contents: list = []
+
         # Create a proper async context manager mock
         from contextlib import asynccontextmanager
         from unittest.mock import AsyncMock
 
         @asynccontextmanager
-        async def mock_streaming_context_manager(*_args, **_kwargs):
+        async def mock_streaming_context_manager(*_args, **kwargs):
+            if "initial_content" in kwargs:
+                opened_contents.append(kwargs["initial_content"])
             yield mock_streaming_context
 
         mock_streaming_service.streaming_task_message_context = mock_streaming_context_manager
+        # Expose the recorded contents on the returned context mock for assertions.
+        mock_streaming_context.opened_contents = opened_contents
 
         openai_service.streaming_service = mock_streaming_service
         openai_service.agentex_client = mock_agentex_client
 
         return mock_streaming_context
 
+    def _setup_streaming_service_mocks_with_created_at(self, openai_service):
+        """Like _setup_streaming_service_mocks but also records every created_at kwarg."""
+        from contextlib import asynccontextmanager
+        from unittest.mock import AsyncMock
+
+        from agentex.types.task_message import TaskMessage
+
+        mock_streaming_service = AsyncMock()
+        mock_agentex_client = AsyncMock()
+
+        mock_streaming_context = AsyncMock()
+        mock_task_message = Mock(spec=TaskMessage)
+        mock_task_message.id = "test-task-message-id"
+        mock_task_message.task_id = "test-task-id"
+        mock_task_message.content = {"type": "text", "content": "test"}
+        mock_streaming_context.task_message = mock_task_message
+        mock_streaming_context.stream_update = AsyncMock()
+
+        recorded_created_ats: list = []
+
+        @asynccontextmanager
+        async def mock_ctx_manager(*_args, **kwargs):
+            recorded_created_ats.append(kwargs.get("created_at"))
+            yield mock_streaming_context
+
+        mock_streaming_service.streaming_task_message_context = mock_ctx_manager
+        mock_streaming_context.opened_contents = []
+
+        openai_service.streaming_service = mock_streaming_service
+        openai_service.agentex_client = mock_agentex_client
+
+        return mock_streaming_context, recorded_created_ats
+
     def _create_code_interpreter_tool_call_mock(self, call_id="code_interpreter_call_123"):
         """Helper to create ResponseCodeInterpreterToolCall mock objects."""
         return ResponseCodeInterpreterToolCall(
@@ -680,6 +815,9 @@ def _create_streaming_result_mock(self, final_output="Code executed successfully
         mock_streaming_result = Mock(spec=RunResultStreaming)
         mock_streaming_result.final_output = final_output
         mock_streaming_result.new_items = []
+        # OpenAITurn reads raw_responses after stream exhaustion to aggregate
+        # usage; provide an empty list so usage normalizes to model-only.
+        mock_streaming_result.raw_responses = []
         mock_streaming_result.final_input_list = [
             {"role": "user", "content": "Run some Python code"},
             {"role": "assistant", "content": final_output},
diff --git a/tests/lib/adk/providers/test_openai_turn.py b/tests/lib/adk/providers/test_openai_turn.py
new file mode 100644
index 000000000..47a9ba9fe
--- /dev/null
+++ b/tests/lib/adk/providers/test_openai_turn.py
@@ -0,0 +1,248 @@
+"""Tests for OpenAITurn and its usage mapping.
+
+OpenAITurn adapts an OpenAI Agents SDK streamed run onto the harness
+``HarnessTurn`` protocol. These tests cover:
+- ``openai_usage_to_turn_usage`` (full usage, None, real zeros)
+- ``_aggregate_usage`` (empty, single, multiple ModelResponses)
+- ``OpenAITurn.events`` driven by an injected canonical stream (bypassing the
+  OpenAI->canonical converter), plus ``usage()`` before/after exhaustion
+- the ``ValueError`` guard when neither ``result`` nor ``stream`` is supplied
+"""
+
+import types as _types
+
+import pytest
+from agents.usage import Usage
+from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
+
+from agentex.types.text_content import TextContent
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+
+
+def _import_target():
+    from agentex.lib.adk.providers._modules.openai_turn import (
+        OpenAITurn,
+        _aggregate_usage,
+        openai_usage_to_turn_usage,
+    )
+
+    return OpenAITurn, _aggregate_usage, openai_usage_to_turn_usage
+
+
+# ---------------------------------------------------------------------------
+# openai_usage_to_turn_usage
+# ---------------------------------------------------------------------------
+
+
+def test_usage_mapping_full():
+    _, _, openai_usage_to_turn_usage = _import_target()
+    usage = Usage(
+        requests=3,
+        input_tokens=100,
+        input_tokens_details=InputTokensDetails(cached_tokens=20),
+        output_tokens=50,
+        output_tokens_details=OutputTokensDetails(reasoning_tokens=10),
+        total_tokens=150,
+    )
+    turn_usage = openai_usage_to_turn_usage(usage, model="gpt-4o")
+
+    assert turn_usage.model == "gpt-4o"
+    assert turn_usage.num_llm_calls == 3
+    assert turn_usage.input_tokens == 100
+    assert turn_usage.cached_input_tokens == 20
+    assert turn_usage.output_tokens == 50
+    assert turn_usage.reasoning_tokens == 10
+    assert turn_usage.total_tokens == 150
+
+
+def test_usage_mapping_none_usage():
+    _, _, openai_usage_to_turn_usage = _import_target()
+    turn_usage = openai_usage_to_turn_usage(None, model="gpt-4o")
+
+    assert turn_usage.model == "gpt-4o"
+    # num_llm_calls is None ("not reported") when no usage is present, matching
+    # the token fields below; a real 0 is only reported when the provider says so.
+    assert turn_usage.num_llm_calls is None
+    assert turn_usage.input_tokens is None
+    assert turn_usage.output_tokens is None
+    assert turn_usage.total_tokens is None
+
+
+def test_usage_mapping_real_zeros_are_preserved():
+    # A cache hit can legitimately produce 0 output tokens; a present-but-zero
+    # value must survive as 0, not be coerced to None.
+    _, _, openai_usage_to_turn_usage = _import_target()
+    usage = Usage(
+        requests=1,
+        input_tokens=0,
+        input_tokens_details=InputTokensDetails(cached_tokens=0),
+        output_tokens=0,
+        output_tokens_details=OutputTokensDetails(reasoning_tokens=0),
+        total_tokens=0,
+    )
+    turn_usage = openai_usage_to_turn_usage(usage, model="m")
+
+    assert turn_usage.input_tokens == 0
+    assert turn_usage.cached_input_tokens == 0
+    assert turn_usage.output_tokens == 0
+    assert turn_usage.reasoning_tokens == 0
+    assert turn_usage.total_tokens == 0
+    assert turn_usage.num_llm_calls == 1
+
+
+# ---------------------------------------------------------------------------
+# _aggregate_usage
+# ---------------------------------------------------------------------------
+
+
+def _resp(usage):
+    return _types.SimpleNamespace(usage=usage)
+
+
+def test_aggregate_usage_empty():
+    _, _aggregate_usage, _ = _import_target()
+    assert _aggregate_usage([]) is None
+
+
+def test_aggregate_usage_single():
+    _, _aggregate_usage, _ = _import_target()
+    usage = Usage(requests=1, input_tokens=10, output_tokens=5, total_tokens=15)
+    total = _aggregate_usage([_resp(usage)])
+
+    assert total is not None
+    assert total.requests == 1
+    assert total.input_tokens == 10
+    assert total.output_tokens == 5
+    assert total.total_tokens == 15
+
+
+def test_aggregate_usage_multiple():
+    _, _aggregate_usage, _ = _import_target()
+    u1 = Usage(
+        requests=1,
+        input_tokens=10,
+        input_tokens_details=InputTokensDetails(cached_tokens=2),
+        output_tokens=5,
+        output_tokens_details=OutputTokensDetails(reasoning_tokens=1),
+        total_tokens=15,
+    )
+    u2 = Usage(
+        requests=2,
+        input_tokens=20,
+        input_tokens_details=InputTokensDetails(cached_tokens=3),
+        output_tokens=7,
+        output_tokens_details=OutputTokensDetails(reasoning_tokens=4),
+        total_tokens=27,
+    )
+    # A response without usage must be skipped, not crash the aggregation.
+    total = _aggregate_usage([_resp(u1), _resp(None), _resp(u2)])
+
+    assert total is not None
+    assert total.requests == 3
+    assert total.input_tokens == 30
+    assert total.output_tokens == 12
+    assert total.total_tokens == 42
+    assert total.input_tokens_details.cached_tokens == 5
+    assert total.output_tokens_details.reasoning_tokens == 5
+
+
+# ---------------------------------------------------------------------------
+# OpenAITurn.events / usage / construction
+# ---------------------------------------------------------------------------
+
+
+async def _canonical_stream(events):
+    for e in events:
+        yield e
+
+
+@pytest.mark.asyncio
+async def test_turn_events_forwards_injected_stream():
+    OpenAITurn, _, _ = _import_target()
+    events = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="")),
+        StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="Hi")),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    turn = OpenAITurn(stream=_canonical_stream(events), model="gpt-4o")
+
+    out = [e async for e in turn.events]
+    assert out == events
+
+
+@pytest.mark.asyncio
+async def test_turn_usage_before_and_after_exhaustion_with_injected_stream():
+    OpenAITurn, _, _ = _import_target()
+    events = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="")),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    turn = OpenAITurn(stream=_canonical_stream(events), model="gpt-4o")
+
+    # Before exhaustion: usage carries only the model name.
+    before = turn.usage()
+    assert before.model == "gpt-4o"
+    assert before.input_tokens is None
+
+    async for _ in turn.events:
+        pass
+
+    # With an injected stream there is no run to read usage from, so usage
+    # stays model-only after exhaustion.
+    after = turn.usage()
+    assert after.model == "gpt-4o"
+    assert after.input_tokens is None
+
+
+@pytest.mark.asyncio
+async def test_turn_usage_populated_from_result_after_exhaustion():
+    OpenAITurn, _, _ = _import_target()
+
+    canonical = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="")),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+
+    class _FakeResult:
+        def __init__(self):
+            self.raw_responses = [
+                _resp(Usage(requests=1, input_tokens=8, output_tokens=4, total_tokens=12)),
+            ]
+
+        def stream_events(self):
+            # OpenAITurn passes this to convert_openai_to_agentex_events; we
+            # monkeypatch that converter below so this can yield canonical events.
+            return _canonical_stream(canonical)
+
+    import agentex.lib.adk.providers._modules.openai_turn as mod
+
+    async def _passthrough(stream):
+        async for e in stream:
+            yield e
+
+    original = mod.convert_openai_to_agentex_events
+    mod.convert_openai_to_agentex_events = _passthrough
+    try:
+        turn = OpenAITurn(result=_FakeResult(), model="gpt-4o")
+        out = [e async for e in turn.events]
+    finally:
+        mod.convert_openai_to_agentex_events = original
+
+    assert out == canonical
+    usage = turn.usage()
+    assert usage.model == "gpt-4o"
+    assert usage.num_llm_calls == 1
+    assert usage.input_tokens == 8
+    assert usage.output_tokens == 4
+    assert usage.total_tokens == 12
+
+
+def test_turn_requires_result_or_stream():
+    OpenAITurn, _, _ = _import_target()
+    with pytest.raises(ValueError, match="either"):
+        OpenAITurn()
diff --git a/tests/lib/adk/test_claude_code_sync.py b/tests/lib/adk/test_claude_code_sync.py
new file mode 100644
index 000000000..6dd36d973
--- /dev/null
+++ b/tests/lib/adk/test_claude_code_sync.py
@@ -0,0 +1,637 @@
+"""Tests for the claude-code stream-json -> Agentex StreamTaskMessage* converter."""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+
+import pytest
+
+from agentex.types.text_content import TextContent
+from agentex.types.reasoning_content import ReasoningContent
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.types.reasoning_content_delta import ReasoningContentDelta
+from agentex.lib.adk._modules._claude_code_sync import convert_claude_code_to_agentex_events
+
+
+async def _aiter(events: list[Any]) -> AsyncIterator[Any]:
+    for e in events:
+        yield e
+
+
+async def _collect(stream: AsyncIterator[Any]) -> list[Any]:
+    return [e async for e in stream]
+
+
+# ---------------------------------------------------------------------------
+# Text content
+# ---------------------------------------------------------------------------
+
+
+class TestTextContent:
+    async def test_text_block_in_assistant_message_emits_start_delta_done(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "text", "text": "Hello world"}]},
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+
+        assert len(out) == 3
+        assert isinstance(out[0], StreamTaskMessageStart)
+        assert isinstance(out[0].content, TextContent)
+        assert out[0].content.content == ""
+        assert isinstance(out[1], StreamTaskMessageDelta)
+        assert isinstance(out[1].delta, TextDelta)
+        assert out[1].delta.text_delta == "Hello world"
+        assert isinstance(out[2], StreamTaskMessageDone)
+        assert out[0].index == out[1].index == out[2].index
+
+    async def test_empty_text_block_is_skipped(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "text", "text": ""}]},
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        assert out == []
+
+    async def test_streamed_text_via_stream_event_emits_start_deltas_done(self):
+        envelopes = [
+            {
+                "type": "stream_event",
+                "event": {"type": "content_block_start", "index": 0, "content_block": {"type": "text"}},
+            },
+            {
+                "type": "stream_event",
+                "event": {
+                    "type": "content_block_delta",
+                    "index": 0,
+                    "delta": {"type": "text_delta", "text": "Hello"},
+                },
+            },
+            {
+                "type": "stream_event",
+                "event": {
+                    "type": "content_block_delta",
+                    "index": 0,
+                    "delta": {"type": "text_delta", "text": " world"},
+                },
+            },
+            {
+                "type": "stream_event",
+                "event": {"type": "content_block_stop", "index": 0},
+            },
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+
+        starts = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        deltas = [e for e in out if isinstance(e, StreamTaskMessageDelta)]
+        dones = [e for e in out if isinstance(e, StreamTaskMessageDone)]
+
+        assert len(starts) == 1
+        assert isinstance(starts[0].content, TextContent)
+        assert len(deltas) == 2
+        assert isinstance(deltas[0].delta, TextDelta)
+        assert deltas[0].delta.text_delta == "Hello"
+        assert isinstance(deltas[1].delta, TextDelta)
+        assert deltas[1].delta.text_delta == " world"
+        assert len(dones) == 1
+
+    async def test_streamed_text_not_re_emitted_by_assistant_block(self):
+        """After stream_event triple, the final assistant block must not re-emit the text."""
+        envelopes = [
+            {
+                "type": "stream_event",
+                "event": {
+                    "type": "content_block_start",
+                    "index": 0,
+                    "content_block": {"type": "text"},
+                },
+            },
+            {
+                "type": "stream_event",
+                "event": {
+                    "type": "content_block_delta",
+                    "index": 0,
+                    "delta": {"type": "text_delta", "text": "streamed"},
+                },
+            },
+            {
+                "type": "stream_event",
+                "event": {"type": "content_block_stop", "index": 0},
+            },
+            # Final assistant message with same text — must NOT be re-emitted
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "text", "text": "streamed"}]},
+            },
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        text_starts = [e for e in out if isinstance(e, StreamTaskMessageStart) and isinstance(e.content, TextContent)]
+        assert len(text_starts) == 1, "Text block must not be emitted twice"
+
+    async def test_later_turn_non_streamed_text_not_dropped(self):
+        """A non-streamed text block in a later turn must not be dropped because an
+        earlier turn streamed a block at the same index."""
+        envelopes = [
+            # Turn 1: streamed text at index 0 (dedup'd against the materialised msg).
+            {
+                "type": "stream_event",
+                "event": {"type": "content_block_start", "index": 0, "content_block": {"type": "text"}},
+            },
+            {
+                "type": "stream_event",
+                "event": {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": "first"}},
+            },
+            {"type": "stream_event", "event": {"type": "content_block_stop", "index": 0}},
+            {"type": "assistant", "message": {"content": [{"type": "text", "text": "first"}]}},
+            # Turn 2: a NON-streamed text block, also at index 0.
+            {"type": "assistant", "message": {"content": [{"type": "text", "text": "second"}]}},
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        deltas = [
+            e.delta.text_delta for e in out if isinstance(e, StreamTaskMessageDelta) and isinstance(e.delta, TextDelta)
+        ]
+        assert deltas == ["first", "second"], "Later turn's non-streamed text must still be delivered"
+
+
+# ---------------------------------------------------------------------------
+# Thinking / reasoning content
+# ---------------------------------------------------------------------------
+
+
+class TestThinkingContent:
+    async def test_thinking_block_emits_reasoning_start_delta_done(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "thinking", "thinking": "Let me reason..."}]},
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+
+        assert len(out) == 3
+        assert isinstance(out[0], StreamTaskMessageStart)
+        assert isinstance(out[0].content, ReasoningContent)
+        # Summary must be populated from the thinking text
+        assert out[0].content.summary == ["Let me reason..."]
+        assert isinstance(out[1], StreamTaskMessageDelta)
+        assert isinstance(out[1].delta, ReasoningContentDelta)
+        assert out[1].delta.content_delta == "Let me reason..."
+        assert out[1].delta.content_index == 0
+        assert isinstance(out[2], StreamTaskMessageDone)
+
+    async def test_empty_thinking_block_is_skipped(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "thinking", "thinking": ""}]},
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        assert out == []
+
+    async def test_streamed_thinking_emits_reasoning_start_deltas_done(self):
+        envelopes = [
+            {
+                "type": "stream_event",
+                "event": {
+                    "type": "content_block_start",
+                    "index": 0,
+                    "content_block": {"type": "thinking"},
+                },
+            },
+            {
+                "type": "stream_event",
+                "event": {
+                    "type": "content_block_delta",
+                    "index": 0,
+                    "delta": {"type": "thinking_delta", "thinking": "step one"},
+                },
+            },
+            {
+                "type": "stream_event",
+                "event": {
+                    "type": "content_block_delta",
+                    "index": 0,
+                    "delta": {"type": "thinking_delta", "thinking": " step two"},
+                },
+            },
+            {
+                "type": "stream_event",
+                "event": {"type": "content_block_stop", "index": 0},
+            },
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+
+        starts = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        deltas = [e for e in out if isinstance(e, StreamTaskMessageDelta)]
+        dones = [e for e in out if isinstance(e, StreamTaskMessageDone)]
+
+        assert len(starts) == 1
+        assert isinstance(starts[0].content, ReasoningContent)
+        assert len(deltas) == 2
+        assert isinstance(deltas[0].delta, ReasoningContentDelta)
+        assert deltas[0].delta.content_delta == "step one"
+        assert isinstance(deltas[1].delta, ReasoningContentDelta)
+        assert deltas[1].delta.content_delta == " step two"
+        assert len(dones) == 1
+
+    async def test_two_streamed_thinking_blocks_not_re_emitted(self):
+        """A turn that streams two thinking blocks must claim both indices, so the
+        final assistant envelope does not re-emit the second one."""
+
+        def _thinking_block(idx: int, text: str) -> list:
+            return [
+                {
+                    "type": "stream_event",
+                    "event": {"type": "content_block_start", "index": idx, "content_block": {"type": "thinking"}},
+                },
+                {
+                    "type": "stream_event",
+                    "event": {
+                        "type": "content_block_delta",
+                        "index": idx,
+                        "delta": {"type": "thinking_delta", "thinking": text},
+                    },
+                },
+                {"type": "stream_event", "event": {"type": "content_block_stop", "index": idx}},
+            ]
+
+        envelopes = [
+            *_thinking_block(0, "first thought"),
+            *_thinking_block(1, "second thought"),
+            # Final assistant envelope repeats both thinking blocks — neither should re-emit.
+            {
+                "type": "assistant",
+                "message": {
+                    "content": [
+                        {"type": "thinking", "thinking": "first thought"},
+                        {"type": "thinking", "thinking": "second thought"},
+                    ]
+                },
+            },
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        reasoning_starts = [
+            e for e in out if isinstance(e, StreamTaskMessageStart) and isinstance(e.content, ReasoningContent)
+        ]
+        assert len(reasoning_starts) == 2, "each streamed thinking block emitted exactly once (no duplicate)"
+
+    async def test_thinking_block_start_with_no_deltas_allows_assistant_to_fill(self):
+        """A thinking block_start without any deltas leaves the final assistant block
+        free to emit the thinking text (the block index is not claimed as streamed)."""
+        envelopes = [
+            {
+                "type": "stream_event",
+                "event": {
+                    "type": "content_block_start",
+                    "index": 0,
+                    "content_block": {"type": "thinking"},
+                },
+            },
+            # No thinking_delta — close block immediately
+            {
+                "type": "stream_event",
+                "event": {"type": "content_block_stop", "index": 0},
+            },
+            # Final assistant message has the thinking text
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "thinking", "thinking": "delayed thinking"}]},
+            },
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        # The assistant block should produce a full thinking message (Start+Delta+Done)
+        reasoning_starts = [
+            e for e in out if isinstance(e, StreamTaskMessageStart) and isinstance(e.content, ReasoningContent)
+        ]
+        # There will be the empty start from stream_event, plus the one from assistant block
+        reasoning_deltas = [
+            e for e in out if isinstance(e, StreamTaskMessageDelta) and isinstance(e.delta, ReasoningContentDelta)
+        ]
+        assert len(reasoning_deltas) >= 1
+        assert any(
+            isinstance(d.delta, ReasoningContentDelta) and d.delta.content_delta == "delayed thinking"
+            for d in reasoning_deltas
+        )
+
+
+# ---------------------------------------------------------------------------
+# Tool calls and results
+# ---------------------------------------------------------------------------
+
+
+class TestToolCallsAndResults:
+    async def test_tool_use_block_emits_start_done(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_use",
+                            "id": "call_abc",
+                            "name": "Bash",
+                            "input": {"command": "ls /"},
+                        }
+                    ]
+                },
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+
+        assert len(out) == 2
+        assert isinstance(out[0], StreamTaskMessageStart)
+        assert isinstance(out[0].content, ToolRequestContent)
+        assert out[0].content.tool_call_id == "call_abc"
+        assert out[0].content.name == "Bash"
+        assert out[0].content.arguments == {"command": "ls /"}
+        assert isinstance(out[1], StreamTaskMessageDone)
+
+    async def test_tool_result_block_emits_full(self):
+        envelopes = [
+            {
+                "type": "user",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": "call_abc",
+                            "content": "file1.txt\nfile2.txt",
+                        }
+                    ]
+                },
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+
+        assert len(out) == 1
+        assert isinstance(out[0], StreamTaskMessageFull)
+        assert isinstance(out[0].content, ToolResponseContent)
+        assert out[0].content.tool_call_id == "call_abc"
+        assert "file1.txt" in str(out[0].content.content)
+
+    async def test_tool_result_list_content_joined(self):
+        envelopes = [
+            {
+                "type": "user",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": "tid",
+                            "content": [
+                                {"type": "text", "text": "line1"},
+                                {"type": "text", "text": "line2"},
+                            ],
+                        }
+                    ]
+                },
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        assert isinstance(out[0], StreamTaskMessageFull)
+        assert isinstance(out[0].content, ToolResponseContent)
+        payload = str(out[0].content.content)
+        assert "line1" in payload
+        assert "line2" in payload
+
+    async def test_tool_result_error_flag_passed_through(self):
+        envelopes = [
+            {
+                "type": "user",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": "err_call",
+                            "content": "Permission denied",
+                            "is_error": True,
+                        }
+                    ]
+                },
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        assert isinstance(out[0], StreamTaskMessageFull)
+        assert isinstance(out[0].content, ToolResponseContent)
+        assert isinstance(out[0].content.content, dict)
+        assert out[0].content.content.get("is_error") is True
+
+    async def test_tool_result_truncation(self):
+        long_result = "x" * 5000
+        envelopes = [
+            {
+                "type": "user",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": "t",
+                            "content": long_result,
+                        }
+                    ]
+                },
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        result_str = out[0].content.content.get("result", "")
+        assert len(result_str) <= 4000
+
+
+# ---------------------------------------------------------------------------
+# on_result callback
+# ---------------------------------------------------------------------------
+
+
+class TestOnResult:
+    async def test_on_result_called_with_result_envelope(self):
+        captured: list[dict] = []
+
+        async def capture(envelope):
+            captured.append(envelope)
+
+        envelopes = [
+            {
+                "type": "result",
+                "session_id": "sess123",
+                "cost_usd": 0.012,
+                "usage": {"input_tokens": 100, "output_tokens": 50},
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes), on_result=capture))
+
+        # result envelope does not emit any StreamTaskMessage
+        assert out == []
+        assert len(captured) == 1
+        assert captured[0]["session_id"] == "sess123"
+        assert captured[0]["cost_usd"] == pytest.approx(0.012)
+
+    async def test_on_result_not_called_when_no_result_envelope(self):
+        captured: list[dict] = []
+
+        async def capture(envelope):
+            captured.append(envelope)
+
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "text", "text": "Hi"}]},
+            }
+        ]
+        await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes), on_result=capture))
+        assert captured == []
+
+    async def test_no_on_result_does_not_raise(self):
+        envelopes = [
+            {
+                "type": "result",
+                "cost_usd": 0.001,
+                "usage": {"input_tokens": 10, "output_tokens": 5},
+            }
+        ]
+        # Should not raise even without a callback
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        assert out == []
+
+
+# ---------------------------------------------------------------------------
+# Message indexing
+# ---------------------------------------------------------------------------
+
+
+class TestMessageIndexing:
+    async def test_multiple_blocks_get_distinct_indices(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {
+                    "content": [
+                        {"type": "text", "text": "First"},
+                        {
+                            "type": "tool_use",
+                            "id": "c1",
+                            "name": "Read",
+                            "input": {"path": "/tmp"},
+                        },
+                    ]
+                },
+            },
+            {
+                "type": "user",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": "c1",
+                            "content": "some content",
+                        }
+                    ]
+                },
+            },
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "text", "text": "Done"}]},
+            },
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+
+        # Gather all Start/Full events and check indices are monotonically increasing
+        anchors = [e for e in out if isinstance(e, (StreamTaskMessageStart, StreamTaskMessageFull))]
+        indices = [e.index for e in anchors]
+        assert indices == sorted(indices), "Indices must be monotonically increasing"
+        assert len(set(indices)) == len(indices), "All indices must be distinct"
+
+    async def test_system_init_and_unknown_envelopes_produce_no_output(self):
+        envelopes = [
+            {"type": "system", "subtype": "init", "session_id": "sess"},
+            {"type": "unknown_future_type", "data": "whatever"},
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        assert out == []
+
+    async def test_non_json_string_lines_are_skipped(self):
+        lines = [
+            "not json at all",
+            '{"type": "assistant", "message": {"content": [{"type": "text", "text": "hi"}]}}',
+        ]
+
+        async def _str_iter():
+            for line in lines:
+                yield line
+
+        out = await _collect(convert_claude_code_to_agentex_events(_str_iter()))
+        assert len(out) == 3  # Start + Delta + Done for the text block
+
+    async def test_empty_lines_are_skipped(self):
+        lines = ["", "  ", '{"type": "system", "subtype": "init"}']
+
+        async def _str_iter():
+            for line in lines:
+                yield line
+
+        out = await _collect(convert_claude_code_to_agentex_events(_str_iter()))
+        assert out == []
+
+
+# ---------------------------------------------------------------------------
+# Author
+# ---------------------------------------------------------------------------
+
+
+class TestContentAuthors:
+    @pytest.mark.parametrize(
+        "envelope",
+        [
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "text", "text": "hi"}]},
+            },
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "thinking", "thinking": "thoughts"}]},
+            },
+            {
+                "type": "assistant",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_use",
+                            "id": "c",
+                            "name": "t",
+                            "input": {},
+                        }
+                    ]
+                },
+            },
+            {
+                "type": "user",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": "c",
+                            "content": "ok",
+                        }
+                    ]
+                },
+            },
+        ],
+    )
+    async def test_all_content_authored_by_agent(self, envelope: dict):
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter([envelope])))
+        for e in out:
+            content = getattr(e, "content", None)
+            if content is not None and hasattr(content, "author"):
+                assert content.author == "agent"
diff --git a/tests/lib/adk/test_claude_code_turn.py b/tests/lib/adk/test_claude_code_turn.py
new file mode 100644
index 000000000..4fbb2f913
--- /dev/null
+++ b/tests/lib/adk/test_claude_code_turn.py
@@ -0,0 +1,283 @@
+"""Tests for ClaudeCodeTurn and claude_code_usage_to_turn_usage."""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+
+import pytest
+
+from agentex.lib.core.harness.types import TurnUsage, HarnessTurn
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.adk._modules._claude_code_turn import (
+    ClaudeCodeTurn,
+    claude_code_usage_to_turn_usage,
+)
+
+
+async def _aiter(events: list[Any]) -> AsyncIterator[Any]:
+    for e in events:
+        yield e
+
+
+async def _drain(turn: ClaudeCodeTurn) -> list[Any]:
+    return [e async for e in turn.events]
+
+
+# ---------------------------------------------------------------------------
+# Usage normalization
+# ---------------------------------------------------------------------------
+
+
+class TestClaudeCodeUsageToTurnUsage:
+    def test_full_usage_fields(self):
+        result = {
+            "usage": {
+                "input_tokens": 100,
+                "output_tokens": 50,
+                "cache_read_input_tokens": 20,
+                "cache_creation_input_tokens": 5,
+            },
+            "cost_usd": 0.025,
+            "duration_ms": 3200,
+            "num_turns": 3,
+        }
+        usage = claude_code_usage_to_turn_usage(result)
+
+        assert usage.input_tokens == 100
+        assert usage.output_tokens == 50
+        assert usage.cached_input_tokens == 25  # 20 + 5
+        assert usage.total_tokens == 150
+        assert usage.cost_usd == pytest.approx(0.025)
+        assert usage.duration_ms == 3200
+        assert usage.num_llm_calls == 3
+
+    def test_total_cost_usd_fallback(self):
+        """total_cost_usd should be used when cost_usd is absent."""
+        result = {
+            "usage": {"input_tokens": 10, "output_tokens": 5},
+            "total_cost_usd": 0.001,
+        }
+        usage = claude_code_usage_to_turn_usage(result)
+        assert usage.cost_usd == pytest.approx(0.001)
+
+    def test_cost_usd_takes_precedence_over_total_cost_usd(self):
+        result = {
+            "usage": {"input_tokens": 10, "output_tokens": 5},
+            "cost_usd": 0.002,
+            "total_cost_usd": 0.999,
+        }
+        usage = claude_code_usage_to_turn_usage(result)
+        assert usage.cost_usd == pytest.approx(0.002)
+
+    def test_missing_usage_key_returns_nones(self):
+        result: dict[str, Any] = {}
+        usage = claude_code_usage_to_turn_usage(result)
+        assert usage.input_tokens is None
+        assert usage.output_tokens is None
+        assert usage.cached_input_tokens is None
+        assert usage.total_tokens is None
+        assert usage.cost_usd is None
+        assert usage.duration_ms is None
+        assert usage.num_llm_calls is None
+
+    def test_real_zeros_preserved(self):
+        result = {
+            "usage": {
+                "input_tokens": 0,
+                "output_tokens": 0,
+                "cache_read_input_tokens": 0,
+                "cache_creation_input_tokens": 0,
+            },
+            "cost_usd": 0.0,
+            "duration_ms": 0,
+            "num_turns": 0,
+        }
+        usage = claude_code_usage_to_turn_usage(result)
+        assert usage.input_tokens == 0
+        assert usage.output_tokens == 0
+        assert usage.cached_input_tokens == 0
+        assert usage.total_tokens == 0
+        assert usage.cost_usd == pytest.approx(0.0)
+        assert usage.duration_ms == 0
+        assert usage.num_llm_calls == 0
+
+    def test_only_cache_read_no_creation(self):
+        result = {
+            "usage": {
+                "input_tokens": 50,
+                "output_tokens": 25,
+                "cache_read_input_tokens": 15,
+            }
+        }
+        usage = claude_code_usage_to_turn_usage(result)
+        assert usage.cached_input_tokens == 15
+
+    def test_only_cache_creation_no_read(self):
+        result = {
+            "usage": {
+                "input_tokens": 50,
+                "output_tokens": 25,
+                "cache_creation_input_tokens": 10,
+            }
+        }
+        usage = claude_code_usage_to_turn_usage(result)
+        assert usage.cached_input_tokens == 10
+
+    def test_no_cache_fields_gives_none(self):
+        result = {"usage": {"input_tokens": 10, "output_tokens": 5}}
+        usage = claude_code_usage_to_turn_usage(result)
+        assert usage.cached_input_tokens is None
+
+    def test_total_tokens_computed_from_input_output(self):
+        result = {"usage": {"input_tokens": 70, "output_tokens": 30}}
+        usage = claude_code_usage_to_turn_usage(result)
+        assert usage.total_tokens == 100
+
+    def test_missing_output_tokens_leaves_total_none(self):
+        result = {"usage": {"input_tokens": 70}}
+        usage = claude_code_usage_to_turn_usage(result)
+        assert usage.total_tokens is None
+
+    def test_returns_turn_usage_instance(self):
+        result = {"usage": {"input_tokens": 1, "output_tokens": 1}}
+        usage = claude_code_usage_to_turn_usage(result)
+        assert isinstance(usage, TurnUsage)
+
+
+# ---------------------------------------------------------------------------
+# ClaudeCodeTurn protocol
+# ---------------------------------------------------------------------------
+
+
+class TestClaudeCodeTurnProtocol:
+    def test_satisfies_harness_turn_protocol(self):
+        """ClaudeCodeTurn must satisfy the HarnessTurn structural protocol."""
+        turn = ClaudeCodeTurn(_aiter([]))
+        assert isinstance(turn, HarnessTurn)
+
+    async def test_events_yields_stream_task_messages(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "text", "text": "Hi there"}]},
+            }
+        ]
+        turn = ClaudeCodeTurn(_aiter(envelopes))
+        out = await _drain(turn)
+        assert len(out) == 3
+        assert isinstance(out[0], StreamTaskMessageStart)
+        assert isinstance(out[1], StreamTaskMessageDelta)
+        assert isinstance(out[2], StreamTaskMessageDone)
+
+    async def test_usage_before_drain_returns_empty(self):
+        envelopes = [
+            {
+                "type": "result",
+                "usage": {"input_tokens": 100, "output_tokens": 50},
+                "cost_usd": 0.01,
+            }
+        ]
+        turn = ClaudeCodeTurn(_aiter(envelopes))
+        # usage() called before events drained — no result envelope yet
+        usage = turn.usage()
+        assert isinstance(usage, TurnUsage)
+        assert usage.input_tokens is None
+
+    async def test_usage_after_drain_reflects_result(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "text", "text": "response"}]},
+            },
+            {
+                "type": "result",
+                "usage": {"input_tokens": 200, "output_tokens": 80},
+                "cost_usd": 0.015,
+                "num_turns": 2,
+            },
+        ]
+        turn = ClaudeCodeTurn(_aiter(envelopes))
+        await _drain(turn)
+        usage = turn.usage()
+
+        assert usage.input_tokens == 200
+        assert usage.output_tokens == 80
+        assert usage.cost_usd == pytest.approx(0.015)
+        assert usage.num_llm_calls == 2
+
+    async def test_usage_empty_when_no_result_envelope(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "text", "text": "no result"}]},
+            }
+        ]
+        turn = ClaudeCodeTurn(_aiter(envelopes))
+        await _drain(turn)
+        usage = turn.usage()
+        assert usage.input_tokens is None
+        assert usage.cost_usd is None
+
+    async def test_tool_call_and_result_round_trip(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_use",
+                            "id": "call_1",
+                            "name": "Read",
+                            "input": {"path": "/etc/hosts"},
+                        }
+                    ]
+                },
+            },
+            {
+                "type": "user",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": "call_1",
+                            "content": "127.0.0.1 localhost",
+                        }
+                    ]
+                },
+            },
+            {
+                "type": "result",
+                "usage": {"input_tokens": 50, "output_tokens": 20},
+                "cost_usd": 0.005,
+            },
+        ]
+        turn = ClaudeCodeTurn(_aiter(envelopes))
+        out = await _drain(turn)
+        usage = turn.usage()
+
+        tool_starts = [
+            e for e in out if isinstance(e, StreamTaskMessageStart) and isinstance(e.content, ToolResponseContent)
+        ]
+        tool_fulls = [
+            e for e in out if isinstance(e, StreamTaskMessageFull) and isinstance(e.content, ToolResponseContent)
+        ]
+        assert len(tool_fulls) == 1
+        full_content = tool_fulls[0].content
+        assert isinstance(full_content, ToolResponseContent)
+        assert full_content.tool_call_id == "call_1"
+
+        assert usage.input_tokens == 50
+        assert usage.output_tokens == 20
+
+    async def test_events_property_returns_same_iterator(self):
+        """Accessing .events multiple times returns the same iterator (not a new one each call)."""
+        turn = ClaudeCodeTurn(_aiter([]))
+        it1 = turn.events
+        it2 = turn.events
+        assert it1 is it2
diff --git a/tests/lib/adk/test_codex_sync.py b/tests/lib/adk/test_codex_sync.py
new file mode 100644
index 000000000..644688dfb
--- /dev/null
+++ b/tests/lib/adk/test_codex_sync.py
@@ -0,0 +1,720 @@
+"""Offline tests for the codex event-stream parser tap.
+
+Tests cover:
+- Text streaming (agent_message items)
+- Tool call streaming (command_execution, mcp_tool_call, file_change)
+- Reasoning streaming (reasoning items)
+- Multi-step turns
+- Error events (top-level + item-level)
+- Edge cases: empty events, non-JSON lines, unknown types
+- on_result callback (session_id, usage, counters)
+- file_change synthesized start (no item.started emitted by codex)
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any, AsyncIterator
+
+from agentex.types.reasoning_content import ReasoningContent
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.task_message_content import TextContent
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.adk._modules._codex_sync import (
+    _truncate,
+    _tool_args_for,
+    _tool_name_for,
+    _tool_output_for,
+    convert_codex_to_agentex_events,
+)
+from agentex.types.reasoning_content_delta import ReasoningContentDelta
+from agentex.types.reasoning_summary_delta import ReasoningSummaryDelta
+
+
+async def _aiter(items: list[Any]) -> AsyncIterator[Any]:
+    for item in items:
+        yield item
+
+
+async def _collect(stream: AsyncIterator[Any]) -> list[Any]:
+    return [e async for e in stream]
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+class TestHelpers:
+    def test_truncate_short(self) -> None:
+        assert _truncate("hello", max_len=10) == "hello"
+
+    def test_truncate_long(self) -> None:
+        assert _truncate("a" * 5000) == "a" * 4000
+
+    def test_tool_name_command_execution(self) -> None:
+        assert _tool_name_for("command_execution", {}) == "bash"
+
+    def test_tool_name_file_change(self) -> None:
+        assert _tool_name_for("file_change", {}) == "file_change"
+
+    def test_tool_name_mcp_with_server_and_tool(self) -> None:
+        assert _tool_name_for("mcp_tool_call", {"server": "fs", "tool": "read"}) == "fs.read"
+
+    def test_tool_name_mcp_empty(self) -> None:
+        assert _tool_name_for("mcp_tool_call", {}) == "mcp_tool_call"
+
+    def test_tool_name_unknown(self) -> None:
+        assert _tool_name_for("", {}) == "unknown"
+
+    def test_tool_args_command(self) -> None:
+        assert _tool_args_for("command_execution", {"command": "ls"}) == {"command": "ls"}
+
+    def test_tool_args_file_change(self) -> None:
+        assert _tool_args_for("file_change", {"changes": ["a"]}) == {"changes": ["a"]}
+
+    def test_tool_args_mcp_dict(self) -> None:
+        assert _tool_args_for("mcp_tool_call", {"arguments": {"k": "v"}}) == {"k": "v"}
+
+    def test_tool_args_mcp_non_dict(self) -> None:
+        assert _tool_args_for("mcp_tool_call", {"arguments": "str"}) == {"value": "str"}
+
+    def test_tool_output_command_success(self) -> None:
+        text, is_err = _tool_output_for("command_execution", {"aggregated_output": "hello", "exit_code": 0})
+        assert text == "hello"
+        assert is_err is False
+
+    def test_tool_output_command_error(self) -> None:
+        _, is_err = _tool_output_for("command_execution", {"aggregated_output": "boom", "exit_code": 1})
+        assert is_err is True
+
+    def test_tool_output_mcp_error(self) -> None:
+        text, is_err = _tool_output_for("mcp_tool_call", {"error": {"message": "not found"}})
+        assert "not found" in text
+        assert is_err is True
+
+    def test_tool_output_mcp_result(self) -> None:
+        text, is_err = _tool_output_for("mcp_tool_call", {"result": {"data": 1}})
+        assert json.loads(text) == {"data": 1}
+        assert is_err is False
+
+    def test_tool_output_file_change_failed(self) -> None:
+        _, is_err = _tool_output_for("file_change", {"status": "failed", "changes": []})
+        assert is_err is True
+
+    def test_tool_output_file_change_ok(self) -> None:
+        text, is_err = _tool_output_for("file_change", {"status": "ok", "changes": [1, 2]})
+        assert "2 changes" in text
+        assert is_err is False
+
+
+# ---------------------------------------------------------------------------
+# Text streaming
+# ---------------------------------------------------------------------------
+
+
+class TestTextStreaming:
+    async def test_text_start_delta_done(self) -> None:
+        events = [
+            {"type": "item.started", "item": {"id": "m1", "type": "agent_message", "text": "Hi"}},
+            {"type": "item.updated", "item": {"id": "m1", "type": "agent_message", "text": "Hi!"}},
+            {"type": "item.completed", "item": {"id": "m1", "type": "agent_message", "text": "Hi! Done"}},
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+
+        starts = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        deltas = [e for e in out if isinstance(e, StreamTaskMessageDelta)]
+        dones = [e for e in out if isinstance(e, StreamTaskMessageDone)]
+
+        assert len(starts) == 1
+        assert isinstance(starts[0].content, TextContent)
+        assert len(deltas) >= 1
+        all_delta_text = "".join(
+            d.delta.text_delta for d in deltas if isinstance(d.delta, TextDelta) and d.delta.text_delta is not None
+        )
+        assert "Hi" in all_delta_text
+        assert len(dones) == 1
+
+    async def test_text_indices_are_monotonic(self) -> None:
+        events = [
+            {"type": "item.started", "item": {"id": "m1", "type": "agent_message", "text": "A"}},
+            {"type": "item.completed", "item": {"id": "m1", "type": "agent_message", "text": "A"}},
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        anchor = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        done = [e for e in out if isinstance(e, StreamTaskMessageDone)]
+        assert anchor[0].index == done[0].index
+
+    async def test_empty_text_no_delta(self) -> None:
+        events = [
+            {"type": "item.started", "item": {"id": "m1", "type": "agent_message", "text": ""}},
+            {"type": "item.completed", "item": {"id": "m1", "type": "agent_message", "text": ""}},
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        deltas = [e for e in out if isinstance(e, StreamTaskMessageDelta)]
+        assert deltas == []
+
+    async def test_text_author_is_agent(self) -> None:
+        events = [
+            {"type": "item.started", "item": {"id": "m1", "type": "agent_message", "text": "X"}},
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        for e in out:
+            content = getattr(e, "content", None)
+            if content and hasattr(content, "author"):
+                assert content.author == "agent"
+
+
+# ---------------------------------------------------------------------------
+# Tool call streaming
+# ---------------------------------------------------------------------------
+
+
+class TestToolCallStreaming:
+    async def test_command_execution_start_done_full(self) -> None:
+        events = [
+            {
+                "type": "item.started",
+                "item": {
+                    "id": "t1",
+                    "type": "command_execution",
+                    "command": "echo hello",
+                },
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "t1",
+                    "type": "command_execution",
+                    "command": "echo hello",
+                    "aggregated_output": "hello",
+                    "exit_code": 0,
+                },
+            },
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+
+        starts = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        dones = [e for e in out if isinstance(e, StreamTaskMessageDone)]
+        fulls = [e for e in out if isinstance(e, StreamTaskMessageFull)]
+
+        assert len(starts) == 1
+        assert isinstance(starts[0].content, ToolRequestContent)
+        assert starts[0].content.name == "bash"
+        assert starts[0].content.arguments == {"command": "echo hello"}
+        assert starts[0].content.tool_call_id == "t1"
+
+        assert len(dones) == 1
+
+        assert len(fulls) == 1
+        assert isinstance(fulls[0].content, ToolResponseContent)
+        resp_content = fulls[0].content.content
+        assert isinstance(resp_content, dict)
+        assert resp_content["result"] == "hello"
+        assert fulls[0].content.tool_call_id == "t1"
+
+    async def test_empty_item_id_request_response_ids_match(self) -> None:
+        """A tool with an empty item_id must use the SAME fallback tool_call_id
+        on the request (started) and response (completed) halves."""
+        events = [
+            {"type": "item.started", "item": {"id": "", "type": "command_execution", "command": "ls"}},
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "",
+                    "type": "command_execution",
+                    "command": "ls",
+                    "aggregated_output": ".",
+                    "exit_code": 0,
+                },
+            },
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        # Pull tool_call_id inside the comprehension so the isinstance narrows the
+        # content union (the narrowing would not survive a later attribute access).
+        req_ids = [
+            e.content.tool_call_id
+            for e in out
+            if isinstance(e, StreamTaskMessageStart) and isinstance(e.content, ToolRequestContent)
+        ]
+        resp_ids = [
+            e.content.tool_call_id
+            for e in out
+            if isinstance(e, StreamTaskMessageFull) and isinstance(e.content, ToolResponseContent)
+        ]
+        assert len(req_ids) == 1 and len(resp_ids) == 1
+        assert req_ids[0] == resp_ids[0]
+
+    async def test_file_change_synthesizes_start(self) -> None:
+        """file_change items may only emit item.completed (no started)."""
+        events = [
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "fc1",
+                    "type": "file_change",
+                    "changes": ["a.py"],
+                    "status": "ok",
+                },
+            }
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        tool_req = [
+            e for e in out if isinstance(e, StreamTaskMessageFull) and isinstance(e.content, ToolRequestContent)
+        ]
+        tool_resp = [
+            e for e in out if isinstance(e, StreamTaskMessageFull) and isinstance(e.content, ToolResponseContent)
+        ]
+        assert len(tool_req) == 1
+        assert isinstance(tool_req[0].content, ToolRequestContent)
+        assert tool_req[0].content.name == "file_change"
+        assert len(tool_resp) == 1
+
+    async def test_mcp_tool_call_name(self) -> None:
+        events = [
+            {
+                "type": "item.started",
+                "item": {
+                    "id": "mcp1",
+                    "type": "mcp_tool_call",
+                    "server": "fs",
+                    "tool": "read",
+                    "arguments": {"path": "/x"},
+                },
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "mcp1",
+                    "type": "mcp_tool_call",
+                    "server": "fs",
+                    "tool": "read",
+                    "arguments": {"path": "/x"},
+                    "result": "content",
+                },
+            },
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        req = next(
+            e for e in out if isinstance(e, StreamTaskMessageStart) and isinstance(e.content, ToolRequestContent)
+        )
+        assert isinstance(req.content, ToolRequestContent)
+        assert req.content.name == "fs.read"
+
+    async def test_tool_error_marks_is_error(self) -> None:
+        events = [
+            {
+                "type": "item.started",
+                "item": {"id": "cmd1", "type": "command_execution", "command": "bad"},
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "cmd1",
+                    "type": "command_execution",
+                    "command": "bad",
+                    "aggregated_output": "error output",
+                    "exit_code": 127,
+                },
+            },
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        resp = next(
+            e for e in out if isinstance(e, StreamTaskMessageFull) and isinstance(e.content, ToolResponseContent)
+        )
+        assert isinstance(resp.content, ToolResponseContent)
+        resp_body = resp.content.content
+        assert isinstance(resp_body, dict)
+        assert resp_body.get("is_error") is True
+
+    async def test_tool_indices_request_before_response(self) -> None:
+        events = [
+            {
+                "type": "item.started",
+                "item": {"id": "cmd2", "type": "command_execution", "command": "ls"},
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "cmd2",
+                    "type": "command_execution",
+                    "command": "ls",
+                    "aggregated_output": ".",
+                    "exit_code": 0,
+                },
+            },
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        req = next(e for e in out if isinstance(e, StreamTaskMessageStart))
+        resp = next(
+            e for e in out if isinstance(e, StreamTaskMessageFull) and isinstance(e.content, ToolResponseContent)
+        )
+        assert req.index is not None and resp.index is not None
+        assert req.index < resp.index
+
+
+# ---------------------------------------------------------------------------
+# Reasoning
+# ---------------------------------------------------------------------------
+
+
+class TestReasoningStreaming:
+    async def test_reasoning_start_deltas_done(self) -> None:
+        """A reasoning block opens with a Start, streams the final text as
+        summary + content deltas, and closes with a Done.
+
+        It must NOT emit a Full at the open Start's index: auto_send routes a
+        Full into a throwaway streaming context (ignoring the index), which
+        would leave the Start context dangling and persist a duplicate, empty
+        reasoning message (AGX1 codex reasoning duplicate bug).
+        """
+        events = [
+            {"type": "item.started", "item": {"id": "r1", "type": "reasoning", "text": ""}},
+            {
+                "type": "item.updated",
+                "item": {"id": "r1", "type": "reasoning", "text": "thinking..."},
+            },
+            {
+                "type": "item.completed",
+                "item": {"id": "r1", "type": "reasoning", "text": "thinking... done"},
+            },
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+
+        starts = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        dones = [e for e in out if isinstance(e, StreamTaskMessageDone)]
+        reasoning_fulls = [
+            e for e in out if isinstance(e, StreamTaskMessageFull) and isinstance(e.content, ReasoningContent)
+        ]
+        content_deltas = [
+            e for e in out if isinstance(e, StreamTaskMessageDelta) and isinstance(e.delta, ReasoningContentDelta)
+        ]
+        summary_deltas = [
+            e for e in out if isinstance(e, StreamTaskMessageDelta) and isinstance(e.delta, ReasoningSummaryDelta)
+        ]
+
+        # Exactly one message: Start + deltas + Done, all on the same index, no Full.
+        assert len(starts) == 1
+        assert isinstance(starts[0].content, ReasoningContent)
+        assert reasoning_fulls == []
+        assert len(content_deltas) == 1
+        content_delta = content_deltas[0].delta
+        assert isinstance(content_delta, ReasoningContentDelta)
+        assert content_delta.content_delta == "thinking... done"
+        assert len(summary_deltas) == 1
+        summary_delta = summary_deltas[0].delta
+        assert isinstance(summary_delta, ReasoningSummaryDelta)
+        assert summary_delta.summary_delta == "thinking... done"
+        assert len(dones) == 1
+        idx = starts[0].index
+        assert content_deltas[0].index == idx
+        assert summary_deltas[0].index == idx
+        assert dones[0].index == idx
+
+    async def test_reasoning_no_started_opens_and_closes_one_message(self) -> None:
+        """If item.completed arrives without item.started, the converter opens a
+        Start lazily and closes it with a Done (still one clean message, no Full)."""
+        events = [
+            {
+                "type": "item.completed",
+                "item": {"id": "r_orphan", "type": "reasoning", "text": "orphan thought"},
+            }
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+
+        starts = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        dones = [e for e in out if isinstance(e, StreamTaskMessageDone)]
+        reasoning_fulls = [
+            e for e in out if isinstance(e, StreamTaskMessageFull) and isinstance(e.content, ReasoningContent)
+        ]
+        content_deltas = [
+            e for e in out if isinstance(e, StreamTaskMessageDelta) and isinstance(e.delta, ReasoningContentDelta)
+        ]
+
+        assert len(starts) == 1
+        assert isinstance(starts[0].content, ReasoningContent)
+        assert reasoning_fulls == []
+        assert len(content_deltas) == 1
+        content_delta = content_deltas[0].delta
+        assert isinstance(content_delta, ReasoningContentDelta)
+        assert content_delta.content_delta == "orphan thought"
+        assert len(dones) == 1
+        assert dones[0].index == starts[0].index
+
+    async def test_reasoning_summary_is_first_line(self) -> None:
+        events = [
+            {"type": "item.started", "item": {"id": "r2", "type": "reasoning", "text": ""}},
+            {
+                "type": "item.completed",
+                "item": {"id": "r2", "type": "reasoning", "text": "line one\nline two"},
+            },
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        summary_event = next(
+            e for e in out if isinstance(e, StreamTaskMessageDelta) and isinstance(e.delta, ReasoningSummaryDelta)
+        )
+        summary_delta = summary_event.delta
+        assert isinstance(summary_delta, ReasoningSummaryDelta)
+        assert summary_delta.summary_delta == "line one"
+
+    async def test_reasoning_empty_block_closes_with_done_only(self) -> None:
+        """A reasoning block that completes with no text still closes its Start."""
+        events = [
+            {"type": "item.started", "item": {"id": "r3", "type": "reasoning", "text": ""}},
+            {"type": "item.completed", "item": {"id": "r3", "type": "reasoning", "text": ""}},
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+
+        starts = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        dones = [e for e in out if isinstance(e, StreamTaskMessageDone)]
+        deltas = [e for e in out if isinstance(e, StreamTaskMessageDelta)]
+
+        assert len(starts) == 1
+        assert deltas == []
+        assert len(dones) == 1
+        assert dones[0].index == starts[0].index
+
+
+# ---------------------------------------------------------------------------
+# Error events
+# ---------------------------------------------------------------------------
+
+
+class TestErrorEvents:
+    async def test_turn_failed_emits_error_text(self) -> None:
+        events = [{"type": "turn.failed", "error": {"message": "context length exceeded"}}]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        assert len(out) == 1
+        assert isinstance(out[0], StreamTaskMessageFull)
+        assert isinstance(out[0].content, TextContent)
+        assert "context length exceeded" in out[0].content.content
+
+    async def test_top_level_error_emits_text(self) -> None:
+        events = [{"type": "error", "message": "unexpected EOF"}]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        assert len(out) == 1
+        assert isinstance(out[0].content, TextContent)
+        assert "unexpected EOF" in out[0].content.content
+
+    async def test_item_error_emits_on_completed_only(self) -> None:
+        events = [
+            {"type": "item.started", "item": {"id": "e1", "type": "error", "message": "bad"}},
+            {"type": "item.completed", "item": {"id": "e1", "type": "error", "message": "bad"}},
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        # Only item.completed emits an event for error items
+        assert len(out) == 1
+        assert isinstance(out[0].content, TextContent)
+        assert "bad" in out[0].content.content
+
+
+# ---------------------------------------------------------------------------
+# Edge cases
+# ---------------------------------------------------------------------------
+
+
+class TestEdgeCases:
+    async def test_empty_stream(self) -> None:
+        out = await _collect(convert_codex_to_agentex_events(_aiter([])))
+        assert out == []
+
+    async def test_non_json_lines_skipped(self) -> None:
+        events: list[str] = ["not json", "also not json"]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        assert out == []
+
+    async def test_blank_lines_skipped(self) -> None:
+        out = await _collect(convert_codex_to_agentex_events(_aiter(["", "   ", "\n"])))
+        assert out == []
+
+    async def test_pre_decoded_dict_events(self) -> None:
+        """Events passed as dicts (pre-decoded) should work without JSON parsing."""
+        events: list[dict[str, Any]] = [
+            {"type": "item.started", "item": {"id": "m1", "type": "agent_message", "text": "hi"}},
+            {
+                "type": "item.completed",
+                "item": {"id": "m1", "type": "agent_message", "text": "hi"},
+            },
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        assert len(out) > 0
+
+    async def test_thread_started_no_message(self) -> None:
+        events = [{"type": "thread.started", "thread_id": "t1"}]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        assert out == []
+
+    async def test_turn_started_no_message(self) -> None:
+        out = await _collect(convert_codex_to_agentex_events(_aiter([{"type": "turn.started"}])))
+        assert out == []
+
+    async def test_turn_completed_no_message(self) -> None:
+        out = await _collect(
+            convert_codex_to_agentex_events(_aiter([{"type": "turn.completed", "usage": {"input_tokens": 1}}]))
+        )
+        assert out == []
+
+    async def test_unknown_event_type_no_message(self) -> None:
+        out = await _collect(convert_codex_to_agentex_events(_aiter([{"type": "some.future.event"}])))
+        assert out == []
+
+    async def test_unknown_item_type_no_message(self) -> None:
+        out = await _collect(
+            convert_codex_to_agentex_events(
+                _aiter([{"type": "item.started", "item": {"id": "x", "type": "future_item"}}])
+            )
+        )
+        assert out == []
+
+
+# ---------------------------------------------------------------------------
+# on_result callback
+# ---------------------------------------------------------------------------
+
+
+class TestOnResult:
+    async def test_session_id_captured(self) -> None:
+        result: dict[str, Any] = {}
+
+        def on_result(r: dict[str, Any]) -> None:
+            result.update(r)
+
+        events = [
+            {"type": "thread.started", "thread_id": "sess-xyz"},
+            {
+                "type": "turn.completed",
+                "usage": {"input_tokens": 5, "output_tokens": 3, "total_tokens": 8},
+            },
+        ]
+        await _collect(convert_codex_to_agentex_events(_aiter(events), on_result=on_result))
+        assert result["session_id"] == "sess-xyz"
+
+    async def test_usage_forwarded(self) -> None:
+        result: dict[str, Any] = {}
+
+        def on_result(r: dict[str, Any]) -> None:
+            result.update(r)
+
+        events = [
+            {
+                "type": "turn.completed",
+                "usage": {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
+            }
+        ]
+        await _collect(convert_codex_to_agentex_events(_aiter(events), on_result=on_result))
+        assert result["usage"] == {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}
+
+    async def test_tool_count(self) -> None:
+        result: dict[str, Any] = {}
+
+        def on_result(r: dict[str, Any]) -> None:
+            result.update(r)
+
+        events = [
+            {
+                "type": "item.started",
+                "item": {"id": "t1", "type": "command_execution", "command": "ls"},
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "t1",
+                    "type": "command_execution",
+                    "command": "ls",
+                    "aggregated_output": ".",
+                    "exit_code": 0,
+                },
+            },
+            {"type": "turn.completed", "usage": None},
+        ]
+        await _collect(convert_codex_to_agentex_events(_aiter(events), on_result=on_result))
+        assert result["tool_call_count"] == 1
+
+    async def test_no_callback_when_none(self) -> None:
+        """Passing on_result=None should not raise."""
+        events = [{"type": "turn.completed", "usage": None}]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events), on_result=None))
+        assert out == []
+
+    async def test_on_result_called_even_without_turn_completed(self) -> None:
+        """on_result fires at end of stream even if turn.completed never arrived."""
+        result: dict[str, Any] = {}
+
+        def on_result(r: dict[str, Any]) -> None:
+            result.update(r)
+
+        events: list[Any] = []
+        await _collect(convert_codex_to_agentex_events(_aiter(events), on_result=on_result))
+        assert result.get("usage") is None
+        assert result.get("session_id") is None
+
+
+# ---------------------------------------------------------------------------
+# Multi-step turn: tool → text
+# ---------------------------------------------------------------------------
+
+
+class TestMultiStepTurn:
+    async def test_tool_then_text_monotonic_indices(self) -> None:
+        events = [
+            {
+                "type": "item.started",
+                "item": {"id": "cmd1", "type": "command_execution", "command": "ls"},
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "cmd1",
+                    "type": "command_execution",
+                    "command": "ls",
+                    "aggregated_output": "file.txt",
+                    "exit_code": 0,
+                },
+            },
+            {
+                "type": "item.started",
+                "item": {"id": "msg1", "type": "agent_message", "text": ""},
+            },
+            {
+                "type": "item.completed",
+                "item": {"id": "msg1", "type": "agent_message", "text": "Done"},
+            },
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        indices = [e.index for e in out]
+        assert indices == sorted(indices), "indices must be monotonically non-decreasing"
+
+    async def test_two_text_blocks_distinct_indices(self) -> None:
+        events = [
+            {
+                "type": "item.started",
+                "item": {"id": "a", "type": "agent_message", "text": "first"},
+            },
+            {"type": "item.completed", "item": {"id": "a", "type": "agent_message", "text": "first"}},
+            {
+                "type": "item.started",
+                "item": {"id": "b", "type": "agent_message", "text": "second"},
+            },
+            {"type": "item.completed", "item": {"id": "b", "type": "agent_message", "text": "second"}},
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        starts = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        assert len(starts) == 2
+        assert starts[0].index != starts[1].index
+
+    async def test_json_string_events(self) -> None:
+        """Events may arrive as raw newline-delimited JSON strings."""
+        raw_events = [
+            json.dumps({"type": "item.started", "item": {"id": "s1", "type": "agent_message", "text": "hello"}}),
+            json.dumps({"type": "item.completed", "item": {"id": "s1", "type": "agent_message", "text": "hello"}}),
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(raw_events)))
+        assert len(out) > 0
+        assert any(isinstance(e, StreamTaskMessageStart) for e in out)
diff --git a/tests/lib/adk/test_codex_turn.py b/tests/lib/adk/test_codex_turn.py
new file mode 100644
index 000000000..f6a046478
--- /dev/null
+++ b/tests/lib/adk/test_codex_turn.py
@@ -0,0 +1,282 @@
+"""Offline tests for CodexTurn and codex_usage_to_turn_usage.
+
+Tests cover:
+- TurnUsage normalization from raw codex usage dicts
+- Defensive handling of missing/invalid usage fields
+- CodexTurn: events property yields canonical StreamTaskMessage*
+- CodexTurn: usage() before and after stream exhaustion
+- CodexTurn: on_result wiring (session_id, counts propagate to usage())
+- CodexTurn satisfies HarnessTurn protocol
+"""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+
+import pytest
+
+from agentex.lib.core.harness.types import TurnUsage, HarnessTurn
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.lib.adk._modules._codex_turn import (
+    CodexTurn,
+    codex_usage_to_turn_usage,
+)
+
+
+async def _aiter(items: list[Any]) -> AsyncIterator[Any]:
+    for item in items:
+        yield item
+
+
+async def _collect(turn: CodexTurn) -> list[Any]:
+    return [msg async for msg in turn.events]
+
+
+# ---------------------------------------------------------------------------
+# codex_usage_to_turn_usage
+# ---------------------------------------------------------------------------
+
+
+class TestCodexUsageToTurnUsage:
+    def test_none_raw_all_none_tokens(self) -> None:
+        u = codex_usage_to_turn_usage(None)
+        assert u.input_tokens is None
+        assert u.output_tokens is None
+        assert u.total_tokens is None
+        assert u.cost_usd is None
+
+    def test_empty_dict_all_none_tokens(self) -> None:
+        u = codex_usage_to_turn_usage({})
+        assert u.input_tokens is None
+        assert u.output_tokens is None
+
+    def test_standard_usage(self) -> None:
+        raw = {"input_tokens": 100, "output_tokens": 50, "total_tokens": 150}
+        u = codex_usage_to_turn_usage(raw, model="o4-mini")
+        assert u.input_tokens == 100
+        assert u.output_tokens == 50
+        assert u.total_tokens == 150
+        assert u.model == "o4-mini"
+
+    def test_reasoning_tokens(self) -> None:
+        raw = {"input_tokens": 200, "output_tokens": 80, "reasoning_tokens": 60, "total_tokens": 340}
+        u = codex_usage_to_turn_usage(raw)
+        assert u.reasoning_tokens == 60
+
+    def test_real_zero_preserved(self) -> None:
+        """Explicit zeros in the payload must survive (not be treated as missing)."""
+        raw = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+        u = codex_usage_to_turn_usage(raw)
+        assert u.input_tokens == 0
+        assert u.output_tokens == 0
+
+    def test_cached_input_tokens(self) -> None:
+        raw = {"input_tokens": 100, "cached_input_tokens": 20, "output_tokens": 40}
+        u = codex_usage_to_turn_usage(raw)
+        assert u.cached_input_tokens == 20
+
+    def test_invalid_token_values_become_none(self) -> None:
+        raw = {"input_tokens": "not_a_number", "output_tokens": None}
+        u = codex_usage_to_turn_usage(raw)
+        assert u.input_tokens is None
+        assert u.output_tokens is None
+
+    def test_cost_explicit(self) -> None:
+        u = codex_usage_to_turn_usage(None, cost_usd=0.0042)
+        assert u.cost_usd == pytest.approx(0.0042)
+
+    def test_cost_from_raw(self) -> None:
+        u = codex_usage_to_turn_usage({"cost_usd": 0.001})
+        assert u.cost_usd == pytest.approx(0.001)
+
+    def test_explicit_cost_overrides_raw(self) -> None:
+        """Explicit cost_usd kwarg takes precedence over raw dict value."""
+        u = codex_usage_to_turn_usage({"cost_usd": 0.001}, cost_usd=0.002)
+        assert u.cost_usd == pytest.approx(0.002)
+
+    def test_tool_and_reasoning_counts(self) -> None:
+        u = codex_usage_to_turn_usage(None, tool_call_count=3, reasoning_count=2)
+        assert u.num_tool_calls == 3
+        assert u.num_reasoning_blocks == 2
+
+    def test_num_llm_calls_always_one(self) -> None:
+        u = codex_usage_to_turn_usage(None)
+        assert u.num_llm_calls == 1
+
+    def test_duration_ms(self) -> None:
+        u = codex_usage_to_turn_usage(None, duration_ms=1234)
+        assert u.duration_ms == 1234
+
+    def test_model_none_when_not_provided(self) -> None:
+        u = codex_usage_to_turn_usage(None)
+        assert u.model is None
+
+    def test_non_dict_raw_treated_as_empty(self) -> None:
+        u = codex_usage_to_turn_usage("bad input")  # type: ignore[arg-type]
+        assert u.input_tokens is None
+
+    def test_returns_turn_usage_instance(self) -> None:
+        u = codex_usage_to_turn_usage({})
+        assert isinstance(u, TurnUsage)
+
+
+# ---------------------------------------------------------------------------
+# CodexTurn protocol conformance
+# ---------------------------------------------------------------------------
+
+
+class TestCodexTurnProtocol:
+    def test_implements_harness_turn_protocol(self) -> None:
+        turn = CodexTurn(_aiter([]), model="o4-mini")
+        assert isinstance(turn, HarnessTurn)
+
+    def test_usage_before_exhaustion_returns_zero_turn_usage(self) -> None:
+        turn = CodexTurn(_aiter([]), model="test-model")
+        u = turn.usage()
+        assert isinstance(u, TurnUsage)
+        assert u.model == "test-model"
+        assert u.input_tokens is None
+        assert u.num_tool_calls == 0
+
+
+# ---------------------------------------------------------------------------
+# CodexTurn events
+# ---------------------------------------------------------------------------
+
+
+class TestCodexTurnEvents:
+    async def test_events_yield_stream_task_messages(self) -> None:
+        events = [
+            {"type": "item.started", "item": {"id": "m1", "type": "agent_message", "text": "hi"}},
+            {"type": "item.completed", "item": {"id": "m1", "type": "agent_message", "text": "hi"}},
+        ]
+        turn = CodexTurn(_aiter(events), model="o4-mini")
+        out = await _collect(turn)
+        assert len(out) > 0
+        for msg in out:
+            assert isinstance(
+                msg,
+                (StreamTaskMessageStart, StreamTaskMessageDelta, StreamTaskMessageFull, StreamTaskMessageDone),
+            )
+
+    async def test_usage_after_exhaustion_has_tokens(self) -> None:
+        events = [
+            {
+                "type": "turn.completed",
+                "usage": {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
+            }
+        ]
+        turn = CodexTurn(_aiter(events), model="o4-mini")
+        await _collect(turn)
+        u = turn.usage()
+        assert u.input_tokens == 10
+        assert u.output_tokens == 5
+        assert u.total_tokens == 15
+
+    async def test_usage_model_propagated(self) -> None:
+        events = [{"type": "turn.completed", "usage": None}]
+        turn = CodexTurn(_aiter(events), model="codex-model-x")
+        await _collect(turn)
+        assert turn.usage().model == "codex-model-x"
+
+    async def test_tool_count_in_usage(self) -> None:
+        events = [
+            {
+                "type": "item.started",
+                "item": {"id": "t1", "type": "command_execution", "command": "ls"},
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "t1",
+                    "type": "command_execution",
+                    "command": "ls",
+                    "aggregated_output": ".",
+                    "exit_code": 0,
+                },
+            },
+            {"type": "turn.completed", "usage": None},
+        ]
+        turn = CodexTurn(_aiter(events), model="o4-mini")
+        await _collect(turn)
+        assert turn.usage().num_tool_calls == 1
+
+    async def test_events_property_stable_across_accesses(self) -> None:
+        """`.events` returns the same generator; usage survives a second access."""
+        events = [
+            {
+                "type": "item.started",
+                "item": {"id": "t1", "type": "command_execution", "command": "ls"},
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "t1",
+                    "type": "command_execution",
+                    "command": "ls",
+                    "aggregated_output": ".",
+                    "exit_code": 0,
+                },
+            },
+            {"type": "turn.completed", "usage": {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}},
+        ]
+        turn = CodexTurn(_aiter(events), model="o4-mini")
+        assert turn.events is turn.events  # same generator, not a fresh wrapper
+        await _collect(turn)
+        # A second access must NOT re-wrap the exhausted iterator and reset usage.
+        _ = turn.events
+        assert turn.usage().total_tokens == 15
+        assert turn.usage().num_tool_calls == 1
+
+    async def test_reasoning_count_in_usage(self) -> None:
+        events = [
+            {"type": "item.started", "item": {"id": "r1", "type": "reasoning", "text": ""}},
+            {
+                "type": "item.completed",
+                "item": {"id": "r1", "type": "reasoning", "text": "thought"},
+            },
+            {"type": "turn.completed", "usage": None},
+        ]
+        turn = CodexTurn(_aiter(events), model="o4-mini")
+        await _collect(turn)
+        assert turn.usage().num_reasoning_blocks == 1
+
+    async def test_duration_ms_passed_through(self) -> None:
+        events = [{"type": "turn.completed", "usage": None}]
+        turn = CodexTurn(_aiter(events), model="o4-mini", duration_ms=999)
+        await _collect(turn)
+        assert turn.usage().duration_ms == 999
+
+    async def test_cost_usd_passed_through(self) -> None:
+        events = [{"type": "turn.completed", "usage": None}]
+        turn = CodexTurn(_aiter(events), model="o4-mini", cost_usd=0.007)
+        await _collect(turn)
+        assert turn.usage().cost_usd == pytest.approx(0.007)
+
+    async def test_empty_stream_usage_still_valid(self) -> None:
+        turn = CodexTurn(_aiter([]), model="o4-mini")
+        await _collect(turn)
+        u = turn.usage()
+        assert isinstance(u, TurnUsage)
+        assert u.num_llm_calls == 1
+
+    async def test_reasoning_tokens_propagated(self) -> None:
+        events = [
+            {
+                "type": "turn.completed",
+                "usage": {
+                    "input_tokens": 100,
+                    "output_tokens": 60,
+                    "reasoning_tokens": 40,
+                    "total_tokens": 200,
+                },
+            }
+        ]
+        turn = CodexTurn(_aiter(events), model="o4-mini")
+        await _collect(turn)
+        assert turn.usage().reasoning_tokens == 40
diff --git a/tests/lib/adk/test_langgraph_async.py b/tests/lib/adk/test_langgraph_async.py
new file mode 100644
index 000000000..682bd43bc
--- /dev/null
+++ b/tests/lib/adk/test_langgraph_async.py
@@ -0,0 +1,282 @@
+"""Characterization tests for stream_langgraph_events (unified surface).
+
+These tests verify the behavior of ``stream_langgraph_events`` after it was
+reimplemented on top of ``LangGraphTurn`` + ``UnifiedEmitter.auto_send_turn``
+(Task 4). They serve as a contract test for the public signature.
+
+Key behavioral notes (unified surface vs. old bespoke implementation):
+- Tool calls/responses are posted via ``streaming_task_message_context`` (not
+  ``adk.messages.create``); they appear as contexts with no stream_update calls.
+- ``final_text`` accumulates ALL text across the turn (the old bespoke impl
+  only returned the last text segment — behavior varied across models).
+
+NOTE: langchain_core imports are deferred to test scope because conftest.py
+stubs ``langchain_core.messages`` with MagicMock.
+"""
+
+from __future__ import annotations
+
+import sys
+from typing import Any
+from dataclasses import field, dataclass
+
+import pytest
+
+from agentex.types.task_message import TaskMessage
+from agentex.types.text_content import TextContent
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import StreamTaskMessageDelta
+from agentex.lib.adk._modules._langgraph_async import stream_langgraph_events
+
+TASK_ID = "task-test"
+
+
+# ---------------------------------------------------------------------------
+# Remove conftest stubs so real langchain_core types are used
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _real_langchain_core():
+    stub_keys = [k for k in sys.modules if k.startswith("langchain_core") or k.startswith("langgraph")]
+    saved = {k: sys.modules.pop(k) for k in stub_keys}
+    import importlib
+
+    importlib.import_module("langchain_core.messages")
+    yield
+    sys.modules.update(saved)
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming infrastructure (mirrors test_pydantic_ai_async.py pattern)
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class FakeContext:
+    initial_content: Any
+    task_message: TaskMessage
+    closed: bool = False
+    updates: list[StreamTaskMessageDelta] = field(default_factory=list)
+
+    async def __aenter__(self) -> "FakeContext":
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb) -> bool:
+        await self.close()
+        return False
+
+    async def stream_update(self, update: StreamTaskMessageDelta) -> None:
+        if self.closed:
+            raise AssertionError("stream_update called after close")
+        self.updates.append(update)
+
+    async def close(self) -> None:
+        self.closed = True
+
+
+class FakeStreamingModule:
+    def __init__(self) -> None:
+        self.contexts: list[FakeContext] = []
+
+    def streaming_task_message_context(self, *, task_id: str, initial_content: Any, **kw: Any) -> FakeContext:
+        tm = TaskMessage(
+            id=f"m{len(self.contexts) + 1}",
+            task_id=task_id,
+            content=initial_content,
+            streaming_status="IN_PROGRESS",
+        )
+        ctx = FakeContext(initial_content=initial_content, task_message=tm)
+        self.contexts.append(ctx)
+        return ctx
+
+
+class FakeMessagesModule:
+    def __init__(self) -> None:
+        self.created: list[dict[str, Any]] = []
+
+    async def create(self, *, task_id: str, content: Any) -> TaskMessage:
+        self.created.append({"task_id": task_id, "content": content})
+        return TaskMessage(
+            id=f"created-{len(self.created)}",
+            task_id=task_id,
+            content=content,
+            streaming_status="DONE",
+        )
+
+
+@pytest.fixture
+def fake_adk(monkeypatch):
+    from agentex.lib import adk as adk_module
+
+    streaming = FakeStreamingModule()
+    messages = FakeMessagesModule()
+    monkeypatch.setattr(adk_module, "streaming", streaming)
+    monkeypatch.setattr(adk_module, "messages", messages)
+    return streaming, messages
+
+
+def _make_stream(events: list[tuple[str, Any]]):
+    async def _gen():
+        for e in events:
+            yield e
+
+    return _gen()
+
+
+def _text_deltas(ctx: FakeContext) -> list[str]:
+    out: list[str] = []
+    for u in ctx.updates:
+        if isinstance(u.delta, TextDelta):
+            out.append(u.delta.text_delta or "")
+    return out
+
+
+# ---------------------------------------------------------------------------
+# Characterization tests (unified surface behavior)
+# ---------------------------------------------------------------------------
+
+
+class TestCharacterization:
+    async def test_plain_text_streams_and_returns_final_text(
+        self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
+    ) -> None:
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        streaming, messages = fake_adk
+        chunk = AIMessageChunk(content="Hello, world!")
+        ai_msg = AIMessage(content="Hello, world!")
+        stream = _make_stream(
+            [
+                ("messages", (chunk, {})),
+                ("updates", {"agent": {"messages": [ai_msg]}}),
+            ]
+        )
+
+        final = await stream_langgraph_events(stream, TASK_ID)
+
+        assert final == "Hello, world!"
+        assert len(streaming.contexts) == 1
+        ctx = streaming.contexts[0]
+        assert isinstance(ctx.initial_content, TextContent)
+        assert _text_deltas(ctx) == ["Hello, world!"]
+        assert ctx.closed is True
+        # Unified surface: no messages.create for text
+        assert messages.created == []
+
+    async def test_empty_stream_returns_empty_string(
+        self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
+    ) -> None:
+        streaming, _ = fake_adk
+        final = await stream_langgraph_events(_make_stream([]), TASK_ID)
+        assert final == ""
+        assert streaming.contexts == []
+
+    async def test_tool_call_posted_via_streaming_context(
+        self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
+    ) -> None:
+        """Unified surface: tool calls go through streaming_task_message_context,
+        not adk.messages.create. The context is opened and immediately closed
+        (no deltas) so the initial_content is the tool request."""
+        from langchain_core.messages import AIMessage
+
+        streaming, messages = fake_adk
+        tc = {"id": "call_1", "name": "get_weather", "args": {"city": "Paris"}}
+        ai_msg = AIMessage(content="", tool_calls=[tc])
+        stream = _make_stream([("updates", {"agent": {"messages": [ai_msg]}})])
+
+        await stream_langgraph_events(stream, TASK_ID)
+
+        # Unified surface: tool messages go via streaming_task_message_context
+        assert len(streaming.contexts) == 1
+        assert messages.created == [], "Unified surface uses streaming_task_message_context, not messages.create"
+
+        from agentex.types.tool_request_content import ToolRequestContent
+
+        content = streaming.contexts[0].initial_content
+        assert isinstance(content, ToolRequestContent)
+        assert content.tool_call_id == "call_1"
+        assert content.name == "get_weather"
+        assert content.arguments == {"city": "Paris"}
+        # Full messages close immediately (no delta updates)
+        assert streaming.contexts[0].closed is True
+        assert streaming.contexts[0].updates == []
+
+    async def test_tool_response_posted_via_streaming_context(
+        self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
+    ) -> None:
+        """Unified surface: tool responses go through streaming_task_message_context."""
+        from langchain_core.messages import ToolMessage
+
+        streaming, messages = fake_adk
+        tool_msg = ToolMessage(content="Sunny, 72F", tool_call_id="call_1", name="get_weather")
+        stream = _make_stream([("updates", {"tools": {"messages": [tool_msg]}})])
+
+        await stream_langgraph_events(stream, TASK_ID)
+
+        assert len(streaming.contexts) == 1
+        assert messages.created == []
+
+        from agentex.types.tool_response_content import ToolResponseContent
+
+        content = streaming.contexts[0].initial_content
+        assert isinstance(content, ToolResponseContent)
+        assert content.tool_call_id == "call_1"
+        assert content.name == "get_weather"
+        assert content.content == "Sunny, 72F"
+        assert streaming.contexts[0].closed is True
+
+    async def test_multi_step_text_then_tool_then_text_last_segment(
+        self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
+    ) -> None:
+        """Unified surface: final_text uses last-segment semantics.
+
+        auto_send resets final_text_parts when a new Start(TextContent) is seen,
+        so multi-step turns (text -> tool -> text) return only the LAST text segment.
+        Both text contexts are still opened and streamed to Redis; only the
+        return value is last-segment. This matches stream_pydantic_ai_events.
+        """
+        from langchain_core.messages import AIMessage, ToolMessage, AIMessageChunk
+
+        streaming, messages = fake_adk
+        chunk1 = AIMessageChunk(content="Looking up...")
+        ai_msg1 = AIMessage(content="Looking up...", tool_calls=[{"id": "c1", "name": "search", "args": {}}])
+        tool_msg = ToolMessage(content="result", tool_call_id="c1", name="search")
+        chunk2 = AIMessageChunk(content="Found it!")
+        ai_msg2 = AIMessage(content="Found it!")
+
+        stream = _make_stream(
+            [
+                ("messages", (chunk1, {})),
+                ("updates", {"agent": {"messages": [ai_msg1]}}),
+                ("updates", {"tools": {"messages": [tool_msg]}}),
+                ("messages", (chunk2, {})),
+                ("updates", {"agent": {"messages": [ai_msg2]}}),
+            ]
+        )
+
+        final = await stream_langgraph_events(stream, TASK_ID)
+
+        # Last segment only — first text segment is NOT in final_text
+        assert final == "Found it!"
+        # Two text streaming contexts (one per text segment) — both streamed to Redis
+        text_ctxs = [c for c in streaming.contexts if isinstance(c.initial_content, TextContent)]
+        assert len(text_ctxs) == 2
+        assert all(ctx.closed for ctx in text_ctxs)
+        # Tool request + tool response via streaming_task_message_context (not messages.create)
+        assert messages.created == []
+
+    async def test_context_closed_on_exception(self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]) -> None:
+        from langchain_core.messages import AIMessageChunk
+
+        streaming, _ = fake_adk
+
+        async def _boom():
+            chunk = AIMessageChunk(content="partial")
+            yield ("messages", (chunk, {}))
+            raise RuntimeError("upstream exploded")
+
+        with pytest.raises(RuntimeError, match="upstream exploded"):
+            await stream_langgraph_events(_boom(), TASK_ID)
+
+        assert streaming.contexts[0].closed is True
diff --git a/tests/lib/adk/test_langgraph_sync.py b/tests/lib/adk/test_langgraph_sync.py
new file mode 100644
index 000000000..248d18f68
--- /dev/null
+++ b/tests/lib/adk/test_langgraph_sync.py
@@ -0,0 +1,247 @@
+"""Tests for the sync LangGraph -> Agentex stream event converter.
+
+Covers:
+- Basic text, tool call, and tool response emission
+- on_final_ai_message callback for usage capture
+- create_langgraph_tracing_handler symbol is importable and functional
+  (runtime DeprecationWarning removed; deprecation is docstring-only)
+
+NOTE: langchain_core imports must be deferred to test-function scope because
+conftest.py stubs out ``langchain_core.messages`` with MagicMock for ADK
+package-level tests. The real classes are imported lazily inside each test.
+"""
+
+from __future__ import annotations
+
+import sys
+from typing import Any, AsyncIterator
+
+import pytest
+
+from agentex.types.task_message_update import (
+    StreamTaskMessageFull,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.adk._modules._langgraph_sync import convert_langgraph_to_agentex_events
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+async def _collect(stream: AsyncIterator[Any]) -> list[Any]:
+    return [e async for e in stream]
+
+
+def _make_stream(events: list[tuple[str, Any]]) -> AsyncIterator[tuple[str, Any]]:
+    async def _gen():
+        for e in events:
+            yield e
+
+    return _gen()
+
+
+# ---------------------------------------------------------------------------
+# Remove the conftest stubs for langchain_core so real classes are used
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _real_langchain_core():
+    """Remove conftest MagicMock stubs so real langchain_core types are used."""
+    stub_keys = [k for k in sys.modules if k.startswith("langchain_core") or k.startswith("langgraph")]
+    saved = {k: sys.modules.pop(k) for k in stub_keys}
+    # Re-import the real modules
+    import importlib
+
+    importlib.import_module("langchain_core.messages")
+    yield
+    # Restore stubs after the test
+    sys.modules.update(saved)
+
+
+class TestTextStreaming:
+    async def test_plain_text_emits_start_delta_done(self):
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        chunk = AIMessageChunk(content="Hello, world!")
+        events = [
+            ("messages", (chunk, {})),
+            ("updates", {"agent": {"messages": [AIMessage(content="Hello, world!")]}}),
+        ]
+        out = await _collect(convert_langgraph_to_agentex_events(_make_stream(events)))
+        types = [type(e).__name__ for e in out]
+        assert "StreamTaskMessageStart" in types
+        assert "StreamTaskMessageDelta" in types
+        assert "StreamTaskMessageDone" in types
+
+    async def test_empty_chunk_content_is_skipped(self):
+        from langchain_core.messages import AIMessageChunk
+
+        chunk = AIMessageChunk(content="")
+        events = [("messages", (chunk, {}))]
+        out = await _collect(convert_langgraph_to_agentex_events(_make_stream(events)))
+        assert out == []
+
+    async def test_reasoning_block_start_wraps_reasoning_content(self):
+        """A Responses-API reasoning block opens a Start wrapping ReasoningContent,
+        not TextContent (the deltas are ReasoningContentDelta)."""
+        from langchain_core.messages import AIMessageChunk
+
+        from agentex.types.reasoning_content import ReasoningContent
+        from agentex.types.task_message_update import StreamTaskMessageDelta, StreamTaskMessageStart
+        from agentex.types.reasoning_content_delta import ReasoningContentDelta
+
+        chunk = AIMessageChunk(
+            content=[{"type": "reasoning", "summary": [{"type": "summary_text", "text": "thinking hard"}]}]
+        )
+        events = [("messages", (chunk, {}))]
+        out = await _collect(convert_langgraph_to_agentex_events(_make_stream(events)))
+        starts = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        assert len(starts) == 1
+        assert isinstance(starts[0].content, ReasoningContent), "reasoning Start must wrap ReasoningContent"
+        # `style` must be a non-null MessageStyle: the AgentEx server's
+        # StreamTaskMessageStartEntity rejects `reasoning.style=None` (enum), which
+        # would kill the stream. Match the conformance fixture's canonical value.
+        assert starts[0].content.style == "active", "reasoning Start must set a non-null style ('active')"
+        # Pull content_delta inside the comprehension so the isinstance narrows the
+        # delta union (narrowing would not survive a later attribute access).
+        reasoning_delta_texts = [
+            e.delta.content_delta
+            for e in out
+            if isinstance(e, StreamTaskMessageDelta) and isinstance(e.delta, ReasoningContentDelta)
+        ]
+        assert reasoning_delta_texts == ["thinking hard"]
+
+
+class TestToolCallEmission:
+    async def test_tool_call_emits_full_message(self):
+        from langchain_core.messages import AIMessage
+
+        tc = {"id": "call_1", "name": "get_weather", "args": {"city": "Paris"}}
+        ai_msg = AIMessage(content="", tool_calls=[tc])
+        events = [("updates", {"agent": {"messages": [ai_msg]}})]
+        out = await _collect(convert_langgraph_to_agentex_events(_make_stream(events)))
+        assert len(out) == 1
+        assert isinstance(out[0], StreamTaskMessageFull)
+        content = out[0].content
+        assert isinstance(content, ToolRequestContent)
+        assert content.tool_call_id == "call_1"
+        assert content.name == "get_weather"
+        assert content.arguments == {"city": "Paris"}
+        assert content.author == "agent"
+
+    async def test_tool_response_emits_full_message(self):
+        from langchain_core.messages import ToolMessage
+
+        tool_msg = ToolMessage(content="Sunny, 72F", tool_call_id="call_1", name="get_weather")
+        events = [("updates", {"tools": {"messages": [tool_msg]}})]
+        out = await _collect(convert_langgraph_to_agentex_events(_make_stream(events)))
+        assert len(out) == 1
+        assert isinstance(out[0], StreamTaskMessageFull)
+        content = out[0].content
+        assert isinstance(content, ToolResponseContent)
+        assert content.tool_call_id == "call_1"
+        assert content.name == "get_weather"
+        assert content.content == "Sunny, 72F"
+        assert content.author == "agent"
+
+
+class TestOnFinalAiMessageCallback:
+    async def test_callback_called_for_ai_message_in_agent_node(self):
+        from langchain_core.messages import AIMessage
+
+        captured: list[Any] = []
+        ai_msg = AIMessage(content="Hello!")
+
+        events = [("updates", {"agent": {"messages": [ai_msg]}})]
+        await _collect(convert_langgraph_to_agentex_events(_make_stream(events), on_final_ai_message=captured.append))
+        assert len(captured) == 1
+        assert captured[0] is ai_msg
+
+    async def test_callback_not_called_for_tool_messages(self):
+        from langchain_core.messages import ToolMessage
+
+        captured: list[Any] = []
+        tool_msg = ToolMessage(content="result", tool_call_id="c1", name="t")
+
+        events = [("updates", {"tools": {"messages": [tool_msg]}})]
+        await _collect(convert_langgraph_to_agentex_events(_make_stream(events), on_final_ai_message=captured.append))
+        assert captured == []
+
+    async def test_callback_receives_usage_metadata(self):
+        from langchain_core.messages import AIMessage
+
+        captured: list[Any] = []
+        usage = {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}
+        ai_msg = AIMessage(content="Answer.", usage_metadata=usage)
+
+        events = [("updates", {"agent": {"messages": [ai_msg]}})]
+        await _collect(convert_langgraph_to_agentex_events(_make_stream(events), on_final_ai_message=captured.append))
+        assert len(captured) == 1
+        assert captured[0].usage_metadata == usage
+
+    async def test_no_callback_is_noop(self):
+        from langchain_core.messages import AIMessage
+
+        ai_msg = AIMessage(content="Hello!")
+        events = [("updates", {"agent": {"messages": [ai_msg]}})]
+        out = await _collect(convert_langgraph_to_agentex_events(_make_stream(events)))
+        assert isinstance(out, list)
+
+    async def test_callback_called_multiple_times_for_multi_step(self):
+        from langchain_core.messages import AIMessage
+
+        captured: list[Any] = []
+        ai_msg_1 = AIMessage(content="Step 1")
+        ai_msg_2 = AIMessage(content="Step 2")
+
+        events = [
+            ("updates", {"agent": {"messages": [ai_msg_1]}}),
+            ("updates", {"agent": {"messages": [ai_msg_2]}}),
+        ]
+        await _collect(convert_langgraph_to_agentex_events(_make_stream(events), on_final_ai_message=captured.append))
+        assert len(captured) == 2
+        assert captured[0] is ai_msg_1
+        assert captured[1] is ai_msg_2
+
+    async def test_callback_called_after_tool_call_events_yielded(self):
+        """The callback fires after all events for that AIMessage are yielded."""
+        from langchain_core.messages import AIMessage
+
+        yield_order: list[str] = []
+
+        async def _gen():
+            tc = {"id": "c1", "name": "t", "args": {}}
+            ai_msg = AIMessage(content="", tool_calls=[tc])
+            yield ("updates", {"agent": {"messages": [ai_msg]}})
+
+        def _cb(msg):
+            yield_order.append("callback")
+
+        async for _ in convert_langgraph_to_agentex_events(_gen(), on_final_ai_message=_cb):
+            yield_order.append("event")
+
+        # The tool call Full event is emitted before the callback fires
+        assert yield_order.index("event") < yield_order.index("callback")
+
+
+class TestLangGraphTracingHandlerBackwardCompat:
+    def test_create_langgraph_tracing_handler_no_runtime_warning(self):
+        """Deprecated symbol remains importable and emits no runtime DeprecationWarning.
+
+        The runtime warnings.warn was removed (docstring-only deprecation) to
+        align with PR 4/6 and avoid breaking callers under warnings-as-errors.
+        Using ``warnings.simplefilter("error", DeprecationWarning)`` verifies
+        that calling the function is safe under -W error conditions.
+        """
+        import warnings
+
+        from agentex.lib.adk._modules._langgraph_tracing import create_langgraph_tracing_handler
+
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("error", DeprecationWarning)
+            create_langgraph_tracing_handler(trace_id="t1", parent_span_id="p1")
+
+        assert w == [], "create_langgraph_tracing_handler must NOT emit a runtime DeprecationWarning"
diff --git a/tests/lib/adk/test_langgraph_sync_unified.py b/tests/lib/adk/test_langgraph_sync_unified.py
new file mode 100644
index 000000000..cfd522828
--- /dev/null
+++ b/tests/lib/adk/test_langgraph_sync_unified.py
@@ -0,0 +1,214 @@
+"""Unified sync path tests for LangGraphTurn + UnifiedEmitter.
+
+Verifies:
+1. Passthrough: events from emitter.yield_turn(LangGraphTurn(stream)) equal
+   LangGraphTurn(stream).events collected directly.
+2. Span derivation: with trace_id + fake tracer, tool spans are derived from
+   the event stream.
+
+NOTE: langchain_core imports are deferred to test scope because conftest.py
+stubs ``langchain_core.messages`` with MagicMock.
+"""
+
+from __future__ import annotations
+
+import sys
+from typing import Any
+from datetime import datetime, timezone
+from dataclasses import field, dataclass
+
+import pytest
+
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn
+
+# ---------------------------------------------------------------------------
+# Remove conftest stubs so real langchain_core types are used
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _real_langchain_core():
+    stub_keys = [k for k in sys.modules if k.startswith("langchain_core") or k.startswith("langgraph")]
+    saved = {k: sys.modules.pop(k) for k in stub_keys}
+    import importlib
+
+    importlib.import_module("langchain_core.messages")
+    yield
+    sys.modules.update(saved)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_stream(events: list[tuple[str, Any]]):
+    async def _gen():
+        for e in events:
+            yield e
+
+    return _gen()
+
+
+# ---------------------------------------------------------------------------
+# Fake SpanTracer
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class _FakeTracingBackend:
+    spans_started: list[dict[str, Any]] = field(default_factory=list)
+    spans_ended: list[str] = field(default_factory=list)
+
+    async def start_span(self, **kw) -> Any:
+        from agentex.types.span import Span
+
+        sp = Span(
+            id=f"span-{len(self.spans_started) + 1}",
+            trace_id=kw.get("trace_id", "trace1"),
+            name=kw.get("name", ""),
+            start_time=datetime.now(tz=timezone.utc),
+        )
+        self.spans_started.append(kw)
+        return sp
+
+    async def end_span(self, *, trace_id: str, span: Any) -> None:
+        self.spans_ended.append(span.id if span else "")
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestPassthrough:
+    async def test_yield_turn_events_equal_direct_events(self):
+        """Events from emitter.yield_turn(LangGraphTurn(stream)) must equal
+        LangGraphTurn(stream).events collected directly — the emitter must not
+        add, drop, or reorder events in yield mode."""
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        chunk = AIMessageChunk(content="Hello!")
+        ai_msg = AIMessage(content="Hello!")
+
+        # Build two identical streams
+        events_raw = [
+            ("messages", (chunk, {})),
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+        ]
+
+        # Direct collection
+        direct = [e async for e in LangGraphTurn(_make_stream(events_raw)).events]
+
+        # Via emitter.yield_turn
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+        via_emitter = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))]
+
+        assert len(direct) == len(via_emitter), "yield_turn must not add or drop events relative to direct iteration"
+        for a, b in zip(direct, via_emitter, strict=True):
+            assert type(a) == type(b), f"Event type mismatch: {type(a).__name__} vs {type(b).__name__}"
+
+    async def test_yield_turn_passes_all_event_types(self):
+        """Start, Delta, Done, Full — each type is preserved."""
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        chunk = AIMessageChunk(content="hi")
+        tc = {"id": "c1", "name": "t", "args": {}}
+        ai_msg = AIMessage(content="hi", tool_calls=[tc])
+
+        events_raw = [
+            ("messages", (chunk, {})),
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+        ]
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+        out = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))]
+        types = {type(e).__name__ for e in out}
+        # text chunk emits Start + Delta
+        assert "StreamTaskMessageStart" in types
+        assert "StreamTaskMessageDelta" in types
+        # tool call emits Full
+        assert "StreamTaskMessageFull" in types
+
+    async def test_empty_stream_yields_no_events(self):
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+        out = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream([])))]
+        assert out == []
+
+
+class TestSpanDerivation:
+    @pytest.fixture
+    def fake_tracer(self):
+        backend = _FakeTracingBackend()
+        tracer = SpanTracer(
+            trace_id="trace1",
+            parent_span_id=None,
+            task_id="t",
+            tracing=backend,  # type: ignore[arg-type]
+        )
+        return tracer, backend
+
+    async def test_tool_span_derived_from_full_events(self, fake_tracer):
+        """AGX1-377: SpanDeriver now handles Full tool events for LangGraph.
+
+        Full(ToolRequestContent) opens a tool span keyed by tool_call_id;
+        Full(ToolResponseContent) closes it. This bridges the previous gap where
+        LangGraph's Full-event path produced no spans, aligning it with
+        Start+Done harnesses (pydantic-ai, openai-agents).
+        """
+        from langchain_core.messages import AIMessage, ToolMessage
+
+        tracer, backend = fake_tracer
+        tc = {"id": "c1", "name": "get_weather", "args": {"city": "Paris"}}
+        ai_msg = AIMessage(content="", tool_calls=[tc])
+        tool_msg = ToolMessage(content="Sunny", tool_call_id="c1", name="get_weather")
+
+        events_raw = [
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+            ("updates", {"tools": {"messages": [tool_msg]}}),
+        ]
+
+        emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=tracer)
+        _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))]
+
+        assert len(backend.spans_started) == 1, "Full(ToolRequestContent) opens one tool span"
+        started = backend.spans_started[0]
+        assert started["name"] == "get_weather"
+        assert started["input"] == {"city": "Paris"}
+
+    async def test_no_spans_when_no_tool_calls(self, fake_tracer):
+        """yield_turn with tracer but no tool calls emits no spans."""
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        tracer, backend = fake_tracer
+        chunk = AIMessageChunk(content="Hello!")
+        ai_msg = AIMessage(content="Hello!")
+
+        events_raw = [
+            ("messages", (chunk, {})),
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+        ]
+
+        emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=tracer)
+        _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))]
+
+        assert backend.spans_started == [], "No tool spans when there are no tool calls"
+
+    async def test_tracer_none_means_no_spans(self):
+        """With tracer=False, no spans should be emitted."""
+        from langchain_core.messages import AIMessage, ToolMessage
+
+        tc = {"id": "c1", "name": "t", "args": {}}
+        ai_msg = AIMessage(content="", tool_calls=[tc])
+        tool_msg = ToolMessage(content="ok", tool_call_id="c1", name="t")
+
+        events_raw = [
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+            ("updates", {"tools": {"messages": [tool_msg]}}),
+        ]
+
+        emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=False)
+        _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))]
+        # No assertion on spans since tracer=False means emitter.tracer is None
+        assert emitter.tracer is None
diff --git a/tests/lib/adk/test_langgraph_turn.py b/tests/lib/adk/test_langgraph_turn.py
new file mode 100644
index 000000000..23aa34ba3
--- /dev/null
+++ b/tests/lib/adk/test_langgraph_turn.py
@@ -0,0 +1,265 @@
+"""Tests for LangGraphTurn and langgraph_usage_to_turn_usage."""
+
+from __future__ import annotations
+
+import sys
+from typing import Any
+
+import pytest
+
+from agentex.lib.core.harness.types import TurnUsage
+from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn, langgraph_usage_to_turn_usage
+
+# ---------------------------------------------------------------------------
+# Remove conftest stubs so real langchain_core types are used
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _real_langchain_core():
+    stub_keys = [k for k in sys.modules if k.startswith("langchain_core") or k.startswith("langgraph")]
+    saved = {k: sys.modules.pop(k) for k in stub_keys}
+    import importlib
+
+    importlib.import_module("langchain_core.messages")
+    yield
+    sys.modules.update(saved)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_stream(events: list[tuple[str, Any]]):
+    async def _gen():
+        for e in events:
+            yield e
+
+    return _gen()
+
+
+async def _drain(turn: LangGraphTurn) -> list[Any]:
+    return [e async for e in turn.events]
+
+
+# ---------------------------------------------------------------------------
+# langgraph_usage_to_turn_usage
+# ---------------------------------------------------------------------------
+
+
+class TestLangGraphUsageToTurnUsage:
+    def test_none_usage_returns_empty_turn_usage(self):
+        result = langgraph_usage_to_turn_usage(None, model="gpt-4")
+        assert result == TurnUsage(model="gpt-4")
+
+    def test_basic_token_fields_mapped(self):
+        usage = {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}
+        result = langgraph_usage_to_turn_usage(usage, model="gpt-4")
+        assert result.input_tokens == 10
+        assert result.output_tokens == 5
+        assert result.total_tokens == 15
+        assert result.model == "gpt-4"
+
+    def test_zero_output_tokens_preserved_not_coerced_to_none(self):
+        """Real zero counts must be preserved as 0, not None."""
+        usage = {"input_tokens": 10, "output_tokens": 0, "total_tokens": 10}
+        result = langgraph_usage_to_turn_usage(usage, model=None)
+        assert result.output_tokens == 0
+
+    def test_cache_read_mapped_to_cached_input_tokens(self):
+        usage = {
+            "input_tokens": 20,
+            "output_tokens": 5,
+            "total_tokens": 25,
+            "input_token_details": {"cache_read": 8},
+        }
+        result = langgraph_usage_to_turn_usage(usage, model=None)
+        assert result.cached_input_tokens == 8
+
+    def test_reasoning_mapped_to_reasoning_tokens(self):
+        usage = {
+            "input_tokens": 10,
+            "output_tokens": 15,
+            "total_tokens": 25,
+            "output_token_details": {"reasoning": 6},
+        }
+        result = langgraph_usage_to_turn_usage(usage, model=None)
+        assert result.reasoning_tokens == 6
+
+    def test_missing_optional_fields_are_none(self):
+        usage = {"input_tokens": 5, "output_tokens": 3, "total_tokens": 8}
+        result = langgraph_usage_to_turn_usage(usage, model=None)
+        assert result.cached_input_tokens is None
+        assert result.reasoning_tokens is None
+
+    def test_full_usage_object(self):
+        usage = {
+            "input_tokens": 100,
+            "output_tokens": 50,
+            "total_tokens": 150,
+            "input_token_details": {"cache_read": 30},
+            "output_token_details": {"reasoning": 20},
+        }
+        result = langgraph_usage_to_turn_usage(usage, model="claude-3-5-sonnet")
+        assert result == TurnUsage(
+            model="claude-3-5-sonnet",
+            input_tokens=100,
+            output_tokens=50,
+            total_tokens=150,
+            cached_input_tokens=30,
+            reasoning_tokens=20,
+        )
+
+    def test_model_none_is_preserved(self):
+        result = langgraph_usage_to_turn_usage({"input_tokens": 1}, model=None)
+        assert result.model is None
+
+    def test_empty_input_token_details_does_not_crash(self):
+        usage = {"input_tokens": 5, "input_token_details": {}}
+        result = langgraph_usage_to_turn_usage(usage, model=None)
+        assert result.cached_input_tokens is None
+
+    def test_empty_output_token_details_does_not_crash(self):
+        usage = {"output_tokens": 5, "output_token_details": {}}
+        result = langgraph_usage_to_turn_usage(usage, model=None)
+        assert result.reasoning_tokens is None
+
+
+# ---------------------------------------------------------------------------
+# LangGraphTurn
+# ---------------------------------------------------------------------------
+
+
+class TestLangGraphTurn:
+    async def test_events_yields_from_sync_converter(self):
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        chunk = AIMessageChunk(content="Hello!")
+        ai_msg = AIMessage(content="Hello!")
+        stream = _make_stream(
+            [
+                ("messages", (chunk, {})),
+                ("updates", {"agent": {"messages": [ai_msg]}}),
+            ]
+        )
+        turn = LangGraphTurn(stream)
+        events = await _drain(turn)
+        assert len(events) > 0
+
+    async def test_usage_is_empty_before_stream_consumed(self):
+        turn = LangGraphTurn(_make_stream([]))
+        # usage() before events consumed should return a default TurnUsage
+        usage = turn.usage()
+        assert isinstance(usage, TurnUsage)
+
+    async def test_usage_captured_from_ai_message(self):
+        from langchain_core.messages import AIMessage
+
+        usage_meta = {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}
+        ai_msg = AIMessage(content="Hi!", usage_metadata=usage_meta)
+        stream = _make_stream([("updates", {"agent": {"messages": [ai_msg]}})])
+        turn = LangGraphTurn(stream, model="gpt-4")
+        await _drain(turn)
+
+        usage = turn.usage()
+        assert usage.input_tokens == 10
+        assert usage.output_tokens == 5
+        assert usage.total_tokens == 15
+        assert usage.model == "gpt-4"
+
+    async def test_usage_accumulates_across_multiple_ai_messages(self):
+        """A multi-step turn (>1 LLM call) sums usage instead of keeping only the last."""
+        from langchain_core.messages import AIMessage
+
+        first = AIMessage(
+            content="thinking",
+            usage_metadata={
+                "input_tokens": 10,
+                "output_tokens": 5,
+                "total_tokens": 15,
+                "input_token_details": {"cache_read": 2},
+                "output_token_details": {"reasoning": 1},
+            },
+        )
+        second = AIMessage(
+            content="answer",
+            usage_metadata={
+                "input_tokens": 20,
+                "output_tokens": 7,
+                "total_tokens": 27,
+                "input_token_details": {"cache_read": 3},
+                "output_token_details": {"reasoning": 4},
+            },
+        )
+        stream = _make_stream(
+            [
+                ("updates", {"agent": {"messages": [first]}}),
+                ("updates", {"agent": {"messages": [second]}}),
+            ]
+        )
+        turn = LangGraphTurn(stream, model="gpt-4")
+        await _drain(turn)
+
+        usage = turn.usage()
+        assert usage.input_tokens == 30
+        assert usage.output_tokens == 12
+        assert usage.total_tokens == 42
+        assert usage.cached_input_tokens == 5
+        assert usage.reasoning_tokens == 5
+        assert usage.model == "gpt-4"
+
+    async def test_usage_not_updated_when_no_usage_metadata(self):
+        from langchain_core.messages import AIMessage
+
+        ai_msg = AIMessage(content="Hi!")
+        stream = _make_stream([("updates", {"agent": {"messages": [ai_msg]}})])
+        turn = LangGraphTurn(stream, model="gpt-4")
+        await _drain(turn)
+
+        usage = turn.usage()
+        assert usage == TurnUsage(model="gpt-4")
+
+    async def test_usage_captures_cache_read_and_reasoning(self):
+        from langchain_core.messages import AIMessage
+
+        usage_meta = {
+            "input_tokens": 100,
+            "output_tokens": 50,
+            "total_tokens": 150,
+            "input_token_details": {"cache_read": 30},
+            "output_token_details": {"reasoning": 20},
+        }
+        ai_msg = AIMessage(content="Result", usage_metadata=usage_meta)
+        stream = _make_stream([("updates", {"agent": {"messages": [ai_msg]}})])
+        turn = LangGraphTurn(stream, model="claude-3-5-sonnet")
+        await _drain(turn)
+
+        usage = turn.usage()
+        assert usage.cached_input_tokens == 30
+        assert usage.reasoning_tokens == 20
+
+    async def test_harness_turn_protocol_conformance(self):
+        """LangGraphTurn satisfies the HarnessTurn Protocol."""
+        from agentex.lib.core.harness.types import HarnessTurn
+
+        turn = LangGraphTurn(_make_stream([]))
+        assert isinstance(turn, HarnessTurn), "LangGraphTurn must satisfy HarnessTurn Protocol"
+
+    async def test_empty_stream_yields_no_events(self):
+        turn = LangGraphTurn(_make_stream([]))
+        events = await _drain(turn)
+        assert events == []
+
+    async def test_model_none_default(self):
+        turn = LangGraphTurn(_make_stream([]))
+        assert turn.usage().model is None
+
+    async def test_model_passed_through_to_usage(self):
+        from langchain_core.messages import AIMessage
+
+        ai_msg = AIMessage(content="ok", usage_metadata={"input_tokens": 1, "output_tokens": 0, "total_tokens": 1})
+        stream = _make_stream([("updates", {"agent": {"messages": [ai_msg]}})])
+        turn = LangGraphTurn(stream, model="my-model")
+        await _drain(turn)
+        assert turn.usage().model == "my-model"
diff --git a/tests/lib/adk/test_pydantic_ai_async.py b/tests/lib/adk/test_pydantic_ai_async.py
index dadda5914..49cb6054c 100644
--- a/tests/lib/adk/test_pydantic_ai_async.py
+++ b/tests/lib/adk/test_pydantic_ai_async.py
@@ -82,7 +82,9 @@ class FakeStreamingModule:
     def __init__(self) -> None:
         self.contexts: list[FakeContext] = []
 
-    def streaming_task_message_context(self, *, task_id: str, initial_content: Any) -> FakeContext:
+    def streaming_task_message_context(
+        self, *, task_id: str, initial_content: Any, streaming_mode: str = "coalesced", created_at: Any = None
+    ) -> FakeContext:
         tm = TaskMessage(
             id=f"m{len(self.contexts) + 1}",
             task_id=task_id,
@@ -255,16 +257,36 @@ async def test_empty_thinking_delta_is_skipped(
 
 
 class TestToolCallEmission:
-    async def test_tool_call_emits_full_tool_request_message_on_part_end(
+    async def test_tool_call_opens_streaming_context_with_identity(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
     ) -> None:
-        """Async helper uses Option A: tool requests are full messages, not delta streams."""
+        """Tool requests are delivered as a streaming context (Start+Delta+Done).
+
+        AGX1-377 fix: auto_send now delivers streamed tool-request messages
+        natively (Start+ToolRequestDelta+Done). The streaming context is opened
+        at the Start event with the initial ToolRequestContent (tool_call_id +
+        name + empty arguments), argument tokens are streamed as deltas, and the
+        context is closed on Done.
+
+        This test uses a realistic pydantic-ai event sequence: args arrive as a
+        PartDeltaEvent fragment (the way OpenAI/Anthropic actually stream JSON
+        tool-call arguments).
+        """
+        from pydantic_ai.messages import ToolCallPartDelta
+
+        from agentex.types.tool_request_delta import ToolRequestDelta
+
         streaming, messages = fake_adk
         events = [
             PartStartEvent(
                 index=1,
                 part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="c1"),
             ),
+            # Realistic: args arrive as delta tokens (JSON string fragments).
+            PartDeltaEvent(
+                index=1,
+                delta=ToolCallPartDelta(args_delta='{"city":"Paris"}'),
+            ),
             PartEndEvent(
                 index=1,
                 part=ToolCallPart(tool_name="get_weather", args='{"city":"Paris"}', tool_call_id="c1"),
@@ -272,21 +294,28 @@ async def test_tool_call_emits_full_tool_request_message_on_part_end(
         ]
         await stream_pydantic_ai_events(_aiter(events), TASK_ID)
 
-        assert streaming.contexts == [], "Tool calls do not open a streaming context"
-        assert len(messages.created) == 1
-        msg = messages.created[0]
-        assert msg["task_id"] == TASK_ID
-        content = msg["content"]
+        # AGX1-373: tool messages arrive via streaming_task_message_context.
+        assert messages.created == [], "adk.messages.create must not be called"
+        assert len(streaming.contexts) == 1, "tool_request opens a streaming context"
+        ctx = streaming.contexts[0]
+        assert ctx.closed is True
+        content = ctx.initial_content
         assert isinstance(content, ToolRequestContent)
         assert content.tool_call_id == "c1"
         assert content.name == "get_weather"
-        assert content.arguments == {"city": "Paris"}
         assert content.author == "agent"
+        # AGX1-377 streamed shape: initial_content has empty args (args come via delta)
+        assert content.arguments == {}
+        # The arg delta is delivered as a stream_update
+        assert len(ctx.updates) == 1
+        assert isinstance(ctx.updates[0].delta, ToolRequestDelta)
+        assert ctx.updates[0].delta.arguments_delta == '{"city":"Paris"}'
 
     async def test_tool_call_with_dict_args_passes_through(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
     ) -> None:
-        _, messages = fake_adk
+        """When args arrive pre-populated as a dict in PartStart, they're in initial_content."""
+        streaming, messages = fake_adk
         events = [
             PartStartEvent(
                 index=0,
@@ -299,23 +328,40 @@ async def test_tool_call_with_dict_args_passes_through(
         ]
         await stream_pydantic_ai_events(_aiter(events), TASK_ID)
 
-        assert len(messages.created) == 1
-        assert messages.created[0]["content"].arguments == {"q": "weather"}
+        # AGX1-373: tool messages via streaming_task_message_context
+        assert messages.created == []
+        assert len(streaming.contexts) == 1
+        # Dict args present at PartStart land directly in initial_content.arguments
+        assert streaming.contexts[0].initial_content.arguments == {"q": "weather"}
+        assert streaming.contexts[0].updates == [], "no delta for pre-populated dict args"
 
     async def test_tool_call_with_invalid_json_args_surfaces_raw(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
     ) -> None:
-        """Don't drop the tool call when the model emits malformed JSON args.
+        """Malformed JSON arg delta is surfaced as a ToolRequestDelta with the raw string.
+
+        The argument delta is delivered as-is by auto_send; the client-side
+        accumulator or the streaming backend handles malformed JSON gracefully.
 
-        The arguments field is preserved under ``_raw`` so the failure is
-        visible to the UI rather than silently truncated.
+        Parts-manager invariant: PartEnd.part is the accumulated snapshot; real
+        pydantic-ai conveys args via PartStart + PartDeltaEvent, so a
+        PartStart(None)+PartEnd(json) with no delta is not realizable.
         """
-        _, messages = fake_adk
+        from pydantic_ai.messages import ToolCallPartDelta
+
+        from agentex.types.tool_request_delta import ToolRequestDelta
+
+        streaming, messages = fake_adk
         events = [
             PartStartEvent(
                 index=0,
                 part=ToolCallPart(tool_name="t", args=None, tool_call_id="c"),
             ),
+            # Malformed JSON arrives as a delta token.
+            PartDeltaEvent(
+                index=0,
+                delta=ToolCallPartDelta(args_delta="not-json{"),
+            ),
             PartEndEvent(
                 index=0,
                 part=ToolCallPart(tool_name="t", args="not-json{", tool_call_id="c"),
@@ -323,13 +369,21 @@ async def test_tool_call_with_invalid_json_args_surfaces_raw(
         ]
         await stream_pydantic_ai_events(_aiter(events), TASK_ID)
 
-        assert len(messages.created) == 1
-        assert messages.created[0]["content"].arguments == {"_raw": "not-json{"}
+        # AGX1-373: tool messages via streaming_task_message_context
+        assert messages.created == []
+        assert len(streaming.contexts) == 1
+        ctx = streaming.contexts[0]
+        # Initial content has empty args (args come via delta)
+        assert ctx.initial_content.arguments == {}
+        # The malformed JSON is surfaced verbatim in the ToolRequestDelta
+        assert len(ctx.updates) == 1
+        assert isinstance(ctx.updates[0].delta, ToolRequestDelta)
+        assert ctx.updates[0].delta.arguments_delta == "not-json{"
 
     async def test_tool_call_with_none_args_defaults_to_empty_dict(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
     ) -> None:
-        _, messages = fake_adk
+        streaming, messages = fake_adk
         events = [
             PartStartEvent(
                 index=0,
@@ -342,15 +396,20 @@ async def test_tool_call_with_none_args_defaults_to_empty_dict(
         ]
         await stream_pydantic_ai_events(_aiter(events), TASK_ID)
 
-        assert len(messages.created) == 1
-        assert messages.created[0]["content"].arguments == {}
+        # AGX1-373: tool messages via streaming_task_message_context
+        assert messages.created == []
+        assert len(streaming.contexts) == 1
+        assert streaming.contexts[0].initial_content.arguments == {}
+        assert streaming.contexts[0].updates == [], "no delta when args are absent"
 
 
 class TestToolResult:
     async def test_tool_return_emits_full_tool_response_message(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
     ) -> None:
-        _, messages = fake_adk
+        # AGX1-373: tool responses arrive via streaming_task_message_context
+        # (open+close pair), NOT via adk.messages.create.
+        streaming, messages = fake_adk
         events = [
             FunctionToolResultEvent(
                 part=ToolReturnPart(tool_name="get_weather", content="Sunny, 72F", tool_call_id="c1"),
@@ -358,13 +417,17 @@ async def test_tool_return_emits_full_tool_response_message(
         ]
         await stream_pydantic_ai_events(_aiter(events), TASK_ID)
 
-        assert len(messages.created) == 1
-        content = messages.created[0]["content"]
+        assert messages.created == [], "adk.messages.create must not be called after reimplementation"
+        assert len(streaming.contexts) == 1
+        ctx = streaming.contexts[0]
+        assert ctx.closed is True
+        content = ctx.initial_content
         assert isinstance(content, ToolResponseContent)
         assert content.tool_call_id == "c1"
         assert content.name == "get_weather"
         assert content.content == "Sunny, 72F"
         assert content.author == "agent"
+        assert ctx.updates == [], "open+close only — no deltas for tool messages"
 
     async def test_tool_return_with_dict_content_preserves_structure(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
@@ -377,7 +440,7 @@ async def test_tool_return_with_dict_content_preserves_structure(
         and divergent from the sync converter which uses ``_tool_return_content``
         to return dicts as-is.
         """
-        _, messages = fake_adk
+        streaming, messages = fake_adk
         events = [
             FunctionToolResultEvent(
                 part=ToolReturnPart(tool_name="t", content={"temp": 72, "sky": "clear"}, tool_call_id="c"),
@@ -385,7 +448,10 @@ async def test_tool_return_with_dict_content_preserves_structure(
         ]
         await stream_pydantic_ai_events(_aiter(events), TASK_ID)
 
-        out = messages.created[0]["content"].content
+        # AGX1-373: tool messages via streaming_task_message_context
+        assert messages.created == []
+        assert len(streaming.contexts) == 1
+        out = streaming.contexts[0].initial_content.content
         assert out == {"temp": 72, "sky": "clear"}, (
             f"Expected the dict to survive verbatim; got {out!r}. "
             "If this is a Python repr string, the helper regressed to str(content)."
@@ -402,7 +468,7 @@ class WeatherResult(BaseModel):
             temp: int
             sky: str
 
-        _, messages = fake_adk
+        streaming, messages = fake_adk
         events = [
             FunctionToolResultEvent(
                 part=ToolReturnPart(
@@ -414,13 +480,16 @@ class WeatherResult(BaseModel):
         ]
         await stream_pydantic_ai_events(_aiter(events), TASK_ID)
 
-        out = messages.created[0]["content"].content
+        # AGX1-373: tool messages via streaming_task_message_context
+        assert messages.created == []
+        assert len(streaming.contexts) == 1
+        out = streaming.contexts[0].initial_content.content
         assert out == {"temp": 72, "sky": "clear"}
 
     async def test_retry_prompt_part_surfaces_as_tool_response(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
     ) -> None:
-        _, messages = fake_adk
+        streaming, messages = fake_adk
         events = [
             FunctionToolResultEvent(
                 part=RetryPromptPart(
@@ -432,8 +501,10 @@ async def test_retry_prompt_part_surfaces_as_tool_response(
         ]
         await stream_pydantic_ai_events(_aiter(events), TASK_ID)
 
-        assert len(messages.created) == 1
-        content = messages.created[0]["content"]
+        # AGX1-373: tool messages via streaming_task_message_context
+        assert messages.created == []
+        assert len(streaming.contexts) == 1
+        content = streaming.contexts[0].initial_content
         assert isinstance(content, ToolResponseContent)
         assert content.tool_call_id == "c1"
         # RetryPromptPart.content stringifies to the error description
@@ -446,9 +517,9 @@ async def test_text_then_tool_then_text_uses_separate_contexts_in_order(
     ) -> None:
         """End-to-end multi-step shape: text → tool call → tool result → more text.
 
-        Each text/reasoning segment must get its own streaming context that is
-        closed before the next one opens, and tool messages must interleave
-        correctly via ``adk.messages.create``.
+        AGX1-373 envelope change: tool messages now arrive via
+        streaming_task_message_context (open+close pairs) instead of
+        adk.messages.create. All four message types open streaming contexts.
         """
         streaming, messages = fake_adk
         events = [
@@ -474,18 +545,30 @@ async def test_text_then_tool_then_text_uses_separate_contexts_in_order(
         ]
         final = await stream_pydantic_ai_events(_aiter(events), TASK_ID)
 
-        assert len(streaming.contexts) == 2, "One context per text part — tool calls don't open streaming contexts"
+        # AGX1-373: all 4 messages (text, tool_request, tool_response, text)
+        # arrive via streaming_task_message_context.
+        assert messages.created == [], "adk.messages.create must not be called after reimplementation"
+        assert len(streaming.contexts) == 4
         assert all(ctx.closed for ctx in streaming.contexts)
-        assert _text_deltas(streaming.contexts[0]) == ["Looking up..."]
-        assert _text_deltas(streaming.contexts[1]) == ["It's sunny."]
 
-        # Two messages: tool request, then tool response — in that order.
-        assert [type(m["content"]).__name__ for m in messages.created] == [
-            "ToolRequestContent",
-            "ToolResponseContent",
-        ]
-        assert messages.created[0]["content"].tool_call_id == "c1"
-        assert messages.created[1]["content"].tool_call_id == "c1"
+        text_ctxs = [ctx for ctx in streaming.contexts if isinstance(ctx.initial_content, TextContent)]
+        tool_req_ctxs = [ctx for ctx in streaming.contexts if isinstance(ctx.initial_content, ToolRequestContent)]
+        tool_resp_ctxs = [ctx for ctx in streaming.contexts if isinstance(ctx.initial_content, ToolResponseContent)]
+        assert len(text_ctxs) == 2
+        assert len(tool_req_ctxs) == 1
+        assert len(tool_resp_ctxs) == 1
+
+        assert _text_deltas(text_ctxs[0]) == ["Looking up..."]
+        assert _text_deltas(text_ctxs[1]) == ["It's sunny."]
+
+        # Tool content is preserved verbatim.
+        assert tool_req_ctxs[0].initial_content.tool_call_id == "c1"
+        assert tool_resp_ctxs[0].initial_content.tool_call_id == "c1"
+
+        # Tool contexts carry no deltas (open+close only).
+        assert tool_req_ctxs[0].updates == []
+        assert tool_resp_ctxs[0].updates == []
+
         assert final == "It's sunny."
 
     async def test_new_text_part_after_text_closes_previous(
@@ -533,7 +616,11 @@ async def test_reasoning_then_text_closes_reasoning_context(
     async def test_tool_result_closes_any_open_streaming_context(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
     ) -> None:
-        """A tool result arriving while a text context is open must close that context first."""
+        """A tool result arriving while a text context is open must close that context first.
+
+        AGX1-373: the tool response itself now also opens a streaming context
+        (open+close pair) rather than going through adk.messages.create.
+        """
         streaming, messages = fake_adk
         events = [
             PartStartEvent(index=0, part=TextPart(content="")),
@@ -548,7 +635,10 @@ async def test_tool_result_closes_any_open_streaming_context(
         assert streaming.contexts[0].closed is True, (
             "Helper must close any open streaming context before emitting a tool result message"
         )
-        assert len(messages.created) == 1
+        # AGX1-373: tool response arrives via streaming_task_message_context
+        assert messages.created == []
+        assert len(streaming.contexts) == 2
+        assert isinstance(streaming.contexts[1].initial_content, ToolResponseContent)
 
 
 class TestDeltaForOrphanIndexIgnored:
@@ -584,7 +674,7 @@ async def on_tool_end(self, tool_call_id: str, result: Any) -> None:
     async def test_handler_records_start_and_end_for_each_tool_call(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
     ) -> None:
-        _, messages = fake_adk
+        streaming, messages = fake_adk
         handler = self._RecordingHandler()
         events = [
             PartStartEvent(
@@ -605,11 +695,12 @@ async def test_handler_records_start_and_end_for_each_tool_call(
             tracing_handler=handler,  # type: ignore[arg-type]
         )
 
-        # Streaming side-effects still happen — tracing is additive.
-        assert [type(m["content"]).__name__ for m in messages.created] == [
-            "ToolRequestContent",
-            "ToolResponseContent",
-        ]
+        # AGX1-373: tool messages arrive via streaming_task_message_context.
+        # Tracing is still additive — both messages are delivered AND hooks fire.
+        assert messages.created == []
+        assert len(streaming.contexts) == 2
+        assert isinstance(streaming.contexts[0].initial_content, ToolRequestContent)
+        assert isinstance(streaming.contexts[1].initial_content, ToolResponseContent)
         # And both lifecycle hooks fired exactly once with the right payload.
         assert handler.starts == [
             {
@@ -680,8 +771,12 @@ async def test_handler_records_each_tool_in_multi_tool_run(
     async def test_omitting_handler_is_a_no_op_for_existing_behavior(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
     ) -> None:
-        """Regression: passing no tracing handler preserves the pre-tracing behavior."""
-        _, messages = fake_adk
+        """Regression: passing no tracing handler preserves streaming behavior.
+
+        AGX1-373: tool messages arrive via streaming_task_message_context
+        regardless of whether tracing_handler is passed.
+        """
+        streaming, messages = fake_adk
         events = [
             PartStartEvent(
                 index=0,
@@ -696,11 +791,11 @@ async def test_omitting_handler_is_a_no_op_for_existing_behavior(
             ),
         ]
         await stream_pydantic_ai_events(_aiter(events), TASK_ID)
-        # Exact same shape as before tracing existed.
-        assert [type(m["content"]).__name__ for m in messages.created] == [
-            "ToolRequestContent",
-            "ToolResponseContent",
-        ]
+        # AGX1-373: tool messages via streaming_task_message_context.
+        assert messages.created == []
+        assert len(streaming.contexts) == 2
+        content_types = [type(ctx.initial_content).__name__ for ctx in streaming.contexts]
+        assert content_types == ["ToolRequestContent", "ToolResponseContent"]
 
 
 class TestPydanticAITracingHandlerDeterministicIds:
@@ -867,3 +962,101 @@ async def boom() -> AsyncIterator[Any]:
             await stream_pydantic_ai_events(boom(), TASK_ID)
 
         assert streaming.contexts[0].closed is True
+
+
+# ---------------------------------------------------------------------------
+# Characterization test: lock the wire-level delivery shape for a representative
+# pydantic-ai run (text + tool call + tool response + more text).
+#
+# Step 1 (CURRENT behavior): written against the original implementation.
+# - Text/reasoning use adk.streaming.streaming_task_message_context.
+# - Tool messages use adk.messages.create (FakeMessagesModule.created list).
+# - Final text is the last text segment.
+#
+# Step 2 (POST-reimplementation on UnifiedEmitter / auto_send):
+# The assertions in TestCharacterizeWireShapeNew (below) lock the new shape.
+# Tool messages no longer go through adk.messages.create; they arrive via
+# streaming_task_message_context open+close pairs (Start+Done envelope).
+# This is the AGX1-373 accepted envelope change: logical content is identical.
+# ---------------------------------------------------------------------------
+
+
+class TestCharacterizeWireShape:
+    """Characterization tests: lock the wire-level delivery shape after reimplementation.
+
+    Uses FakeStreamingModule + FakeMessagesModule (the existing fake pair).
+
+    AGX1-373 shape (post-reimplementation on UnifiedEmitter / auto_send):
+    - Text/reasoning: streaming_task_message_context (open + deltas + close)
+    - Tool messages: streaming_task_message_context (open+close, no deltas)
+    - adk.messages.create is NOT called.
+    - Final text == last text segment only.
+
+    This class was first written to characterize the OLD shape (adk.messages.create
+    for tool messages) and was updated post-reimplementation to reflect the new
+    delivery channel. The logical content is identical; only the channel changed.
+    """
+
+    async def test_text_tool_text_new_wire_shape(
+        self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
+    ) -> None:
+        """Representative run: text -> tool call -> tool response -> more text.
+
+        Post-AGX1-373 delivery shape:
+        - Four streaming contexts: text, tool_request, tool_response, text.
+        - adk.messages.create NOT called.
+        - Final text == "It's sunny." (last segment only, matching the
+          multi-step convention).
+        """
+        from pydantic_ai.messages import ToolReturnPart
+
+        streaming, messages = fake_adk
+        events = [
+            PartStartEvent(index=0, part=TextPart(content="")),
+            PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="Looking up...")),
+            PartEndEvent(index=0, part=TextPart(content="Looking up...")),
+            PartStartEvent(
+                index=1,
+                part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="c1"),
+            ),
+            PartEndEvent(
+                index=1,
+                part=ToolCallPart(tool_name="get_weather", args="{}", tool_call_id="c1"),
+            ),
+            FunctionToolResultEvent(
+                part=ToolReturnPart(tool_name="get_weather", content="Sunny", tool_call_id="c1"),
+            ),
+            PartStartEvent(index=0, part=TextPart(content="")),
+            PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="It's sunny.")),
+            PartEndEvent(index=0, part=TextPart(content="It's sunny.")),
+        ]
+
+        final = await stream_pydantic_ai_events(_aiter(events), TASK_ID)
+
+        assert final == "It's sunny.", "multi-step: only the last text segment is returned"
+
+        # AGX1-373: all 4 messages arrive via streaming_task_message_context
+        assert messages.created == []
+        assert len(streaming.contexts) == 4
+        assert all(ctx.closed for ctx in streaming.contexts)
+
+        content_types = [type(ctx.initial_content).__name__ for ctx in streaming.contexts]
+        assert content_types == [
+            "TextContent",
+            "ToolRequestContent",
+            "ToolResponseContent",
+            "TextContent",
+        ]
+
+        text_ctxs = [ctx for ctx in streaming.contexts if isinstance(ctx.initial_content, TextContent)]
+        tool_req_ctxs = [ctx for ctx in streaming.contexts if isinstance(ctx.initial_content, ToolRequestContent)]
+        tool_resp_ctxs = [ctx for ctx in streaming.contexts if isinstance(ctx.initial_content, ToolResponseContent)]
+
+        assert _text_deltas(text_ctxs[0]) == ["Looking up..."]
+        assert _text_deltas(text_ctxs[1]) == ["It's sunny."]
+        assert tool_req_ctxs[0].initial_content.tool_call_id == "c1"
+        assert tool_req_ctxs[0].initial_content.name == "get_weather"
+        assert tool_req_ctxs[0].updates == []
+        assert tool_resp_ctxs[0].initial_content.tool_call_id == "c1"
+        assert tool_resp_ctxs[0].initial_content.content == "Sunny"
+        assert tool_resp_ctxs[0].updates == []
diff --git a/tests/lib/adk/test_pydantic_ai_sync.py b/tests/lib/adk/test_pydantic_ai_sync.py
index 36d06200e..080bc5be8 100644
--- a/tests/lib/adk/test_pydantic_ai_sync.py
+++ b/tests/lib/adk/test_pydantic_ai_sync.py
@@ -3,9 +3,11 @@
 from __future__ import annotations
 
 import json
+import asyncio
 from typing import Any, AsyncIterator
 
 import pytest
+from pydantic_ai.run import AgentRunResult, AgentRunResultEvent
 from pydantic_ai.messages import (
     TextPart,
     PartEndEvent,
@@ -481,3 +483,75 @@ async def test_author_is_agent(self, events: list[Any]):
             content = getattr(e, "content", None)
             if content is not None and hasattr(content, "author"):
                 assert content.author == "agent"
+
+
+class TestOnResultCallback:
+    """on_result callback: captures the terminal AgentRunResultEvent without
+    altering streaming output."""
+
+    def _make_result_event(self, output: Any = "hello") -> AgentRunResultEvent:
+        result = AgentRunResult(output=output, _output_tool_name=None)
+        return AgentRunResultEvent(result=result)
+
+    async def test_callback_invoked_once_with_result_event(self):
+        """on_result is called exactly once, with the AgentRunResultEvent."""
+        captured: list[AgentRunResultEvent] = []
+
+        def on_result(event: AgentRunResultEvent) -> None:
+            captured.append(event)
+
+        result_event = self._make_result_event("the answer")
+        events = [result_event]
+        await _collect(convert_pydantic_ai_to_agentex_events(_aiter(events), on_result=on_result))
+
+        assert len(captured) == 1
+        assert captured[0] is result_event
+        assert captured[0].result.output == "the answer"
+
+    async def test_streaming_output_unchanged_with_callback(self):
+        """Yielded StreamTaskMessage* sequence is identical whether on_result is set or not."""
+        result_event = self._make_result_event()
+        events = [
+            PartStartEvent(index=0, part=TextPart(content="")),
+            PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="hi")),
+            PartEndEvent(index=0, part=TextPart(content="hi")),
+            result_event,
+        ]
+
+        captured: list[AgentRunResultEvent] = []
+        out_with = await _collect(convert_pydantic_ai_to_agentex_events(_aiter(events), on_result=captured.append))
+        out_without = await _collect(convert_pydantic_ai_to_agentex_events(_aiter(events)))
+
+        assert len(out_with) == len(out_without)
+        for a, b in zip(out_with, out_without):
+            assert type(a) is type(b)
+            assert a.model_dump() == b.model_dump()
+        assert len(captured) == 1
+
+    async def test_no_callback_no_error(self):
+        """AgentRunResultEvent is silently ignored when on_result is None."""
+        result_event = self._make_result_event()
+        events = [result_event]
+        out = await _collect(convert_pydantic_ai_to_agentex_events(_aiter(events)))
+        assert out == []
+
+    async def test_async_callback_is_awaited(self):
+        """An async on_result callable is properly awaited.
+
+        The callback suspends (``await asyncio.sleep(0)``) before recording its
+        side effect, so ``awaited`` is only populated if the converter actually
+        awaits the returned coroutine — distinguishing "awaited" from
+        "called-but-not-awaited."
+        """
+        awaited: list[AgentRunResultEvent] = []
+
+        async def on_result_async(event: AgentRunResultEvent) -> None:
+            await asyncio.sleep(0)
+            awaited.append(event)
+
+        result_event = self._make_result_event("async_output")
+        events = [result_event]
+        await _collect(convert_pydantic_ai_to_agentex_events(_aiter(events), on_result=on_result_async))
+
+        assert len(awaited) == 1
+        assert awaited[0].result.output == "async_output"
diff --git a/tests/lib/adk/test_pydantic_ai_sync_unified.py b/tests/lib/adk/test_pydantic_ai_sync_unified.py
new file mode 100644
index 000000000..f920418de
--- /dev/null
+++ b/tests/lib/adk/test_pydantic_ai_sync_unified.py
@@ -0,0 +1,209 @@
+"""Tests for the unified sync (HTTP ACP) path: PydanticAITurn + UnifiedEmitter.
+
+Exercises the path documented in _pydantic_ai_sync.py under "Recommended: unified surface":
+- events forwarded by yield_turn equal PydanticAITurn(stream).events (passthrough)
+- with a trace context + fake tracing backend, tool spans are derived (start_span / end_span called)
+- with a trace context + fake tracing backend, reasoning spans are derived
+"""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+
+from pydantic_ai.run import AgentRunResult, AgentRunResultEvent
+from pydantic_ai.usage import RunUsage
+from pydantic_ai.messages import (
+    TextPart,
+    PartEndEvent,
+    ThinkingPart,
+    ToolCallPart,
+    TextPartDelta,
+    PartDeltaEvent,
+    PartStartEvent,
+    ThinkingPartDelta,
+    ToolCallPartDelta,
+)
+
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
+
+
+async def _aiter(events: list[Any]) -> AsyncIterator[Any]:
+    for e in events:
+        yield e
+
+
+async def _collect(stream: AsyncIterator[Any]) -> list[Any]:
+    return [e async for e in stream]
+
+
+class _FakeSpan:
+    def __init__(self, name: str):
+        self.name = name
+        self.output: Any = None
+
+
+class _FakeTracing:
+    def __init__(self) -> None:
+        self.started: list[tuple[str, str | None, Any]] = []
+        self.ended: list[tuple[str, Any]] = []
+
+    async def start_span(self, *, trace_id, name, input=None, parent_id=None, data=None, task_id=None):
+        self.started.append((name, parent_id, input))
+        return _FakeSpan(name)
+
+    async def end_span(self, *, trace_id, span):
+        self.ended.append((span.name, span.output))
+
+
+def _make_result_event(usage: RunUsage | None = None) -> AgentRunResultEvent:
+    result = AgentRunResult(output="done", _output_tool_name=None)
+    if usage is not None:
+        result._state.usage = usage
+    return AgentRunResultEvent(result=result)
+
+
+class TestUnifiedSyncPathPassthrough:
+    """The events forwarded by yield_turn are identical to PydanticAITurn.events."""
+
+    async def test_text_stream_passthrough(self):
+        raw_events = [
+            PartStartEvent(index=0, part=TextPart(content="")),
+            PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="hello")),
+            PartEndEvent(index=0, part=TextPart(content="hello")),
+        ]
+
+        turn_a = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o")
+        direct = await _collect(turn_a.events)
+
+        turn_b = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o")
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+        via_emitter = await _collect(emitter.yield_turn(turn_b))
+
+        assert len(via_emitter) == len(direct)
+        for a, b in zip(via_emitter, direct):
+            assert type(a) is type(b)
+            assert a.model_dump() == b.model_dump()
+
+    async def test_tool_call_stream_passthrough(self):
+        raw_events = [
+            PartStartEvent(index=0, part=ToolCallPart(tool_name="Bash", args=None, tool_call_id="c1")),
+            PartDeltaEvent(index=0, delta=ToolCallPartDelta(args_delta='{"cmd":"ls"}')),
+            PartEndEvent(
+                index=0,
+                part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c1"),
+            ),
+        ]
+
+        turn_a = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o")
+        direct = await _collect(turn_a.events)
+
+        turn_b = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o")
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+        via_emitter = await _collect(emitter.yield_turn(turn_b))
+
+        assert len(via_emitter) == len(direct)
+        for a, b in zip(via_emitter, direct):
+            assert type(a) is type(b)
+            assert a.model_dump() == b.model_dump()
+
+
+class TestUnifiedSyncPathSpanDerivation:
+    """With trace context + fake tracing, spans are derived from the stream."""
+
+    async def test_tool_span_opened_and_closed(self):
+        """A tool call produces start_span + end_span on the fake tracing backend."""
+        from pydantic_ai.messages import ToolReturnPart, FunctionToolResultEvent
+
+        tool_events = [
+            PartStartEvent(
+                index=0,
+                part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="call_1"),
+            ),
+            PartEndEvent(
+                index=0,
+                part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="call_1"),
+            ),
+            FunctionToolResultEvent(
+                part=ToolReturnPart(tool_name="Bash", content="files", tool_call_id="call_1"),
+            ),
+        ]
+
+        fake = _FakeTracing()
+        turn = PydanticAITurn(_aiter(tool_events), model="openai:gpt-4o")
+        emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracing=fake)
+
+        events = await _collect(emitter.yield_turn(turn))
+
+        assert len(events) >= 2, "at least Start(tool) + Done + Full(response)"
+        assert len(fake.started) == 1, "one tool span opened"
+        assert len(fake.ended) == 1, "one tool span closed"
+        span_name, parent_id, span_input = fake.started[0]
+        assert span_name == "Bash"
+        assert parent_id == "p"
+        closed_name, closed_output = fake.ended[0]
+        assert closed_name == "Bash"
+
+    async def test_reasoning_span_opened_and_closed(self):
+        """A thinking/reasoning block produces start_span + end_span."""
+        reasoning_events = [
+            PartStartEvent(index=0, part=ThinkingPart(content="")),
+            PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta="let me think")),
+            PartEndEvent(index=0, part=ThinkingPart(content="let me think")),
+        ]
+
+        fake = _FakeTracing()
+        turn = PydanticAITurn(_aiter(reasoning_events), model="openai:gpt-4o")
+        emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracing=fake)
+
+        await _collect(emitter.yield_turn(turn))
+
+        assert len(fake.started) == 1, "one reasoning span opened"
+        assert len(fake.ended) == 1, "one reasoning span closed"
+        span_name, parent_id, _ = fake.started[0]
+        assert span_name == "reasoning"
+        assert parent_id == "p"
+
+    async def test_no_trace_id_means_no_spans(self):
+        """When trace_id is None, no spans are derived even with a fake tracing backend."""
+        raw_events = [
+            PartStartEvent(
+                index=0,
+                part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="c2"),
+            ),
+            PartEndEvent(
+                index=0,
+                part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c2"),
+            ),
+        ]
+
+        fake = _FakeTracing()
+        turn = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o")
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None, tracing=fake)
+
+        await _collect(emitter.yield_turn(turn))
+
+        assert fake.started == [], "no spans when trace_id is absent"
+        assert fake.ended == []
+
+    async def test_tracer_false_suppresses_spans_even_with_trace_id(self):
+        """tracer=False disables span derivation regardless of trace_id."""
+        raw_events = [
+            PartStartEvent(
+                index=0,
+                part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="c3"),
+            ),
+            PartEndEvent(
+                index=0,
+                part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c3"),
+            ),
+        ]
+
+        fake = _FakeTracing()
+        turn = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o")
+        emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracer=False, tracing=fake)
+
+        await _collect(emitter.yield_turn(turn))
+
+        assert fake.started == []
+        assert fake.ended == []
diff --git a/tests/lib/adk/test_pydantic_ai_turn.py b/tests/lib/adk/test_pydantic_ai_turn.py
new file mode 100644
index 000000000..46bf247a3
--- /dev/null
+++ b/tests/lib/adk/test_pydantic_ai_turn.py
@@ -0,0 +1,276 @@
+"""Tests for PydanticAITurn and pydantic_ai_usage_to_turn_usage."""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+
+from pydantic_ai.run import AgentRunResult, AgentRunResultEvent
+from pydantic_ai.usage import RunUsage
+from pydantic_ai.messages import (
+    TextPart,
+    PartEndEvent,
+    TextPartDelta,
+    PartDeltaEvent,
+    PartStartEvent,
+)
+
+from agentex.lib.core.harness import HarnessTurn
+from agentex.lib.adk._modules._pydantic_ai_turn import (
+    PydanticAITurn,
+    pydantic_ai_usage_to_turn_usage,
+)
+
+
+async def _aiter(events: list[Any]) -> AsyncIterator[Any]:
+    for e in events:
+        yield e
+
+
+async def _collect(stream: AsyncIterator[Any]) -> list[Any]:
+    return [e async for e in stream]
+
+
+def _make_result_event(output: Any = "done", usage: RunUsage | None = None) -> AgentRunResultEvent:
+    result = AgentRunResult(output=output, _output_tool_name=None)
+    if usage is not None:
+        result._state.usage = usage
+    return AgentRunResultEvent(result=result)
+
+
+class TestUsageNormalization:
+    def test_usage_normalization_maps_fields(self):
+        """Real RunUsage fields map correctly onto TurnUsage."""
+        usage = RunUsage(
+            requests=3,
+            input_tokens=200,
+            output_tokens=80,
+            cache_read_tokens=25,
+        )
+        turn_usage = pydantic_ai_usage_to_turn_usage(usage, model="openai:gpt-4o")
+
+        assert turn_usage.model == "openai:gpt-4o"
+        assert turn_usage.input_tokens == 200
+        assert turn_usage.output_tokens == 80
+        assert turn_usage.num_llm_calls == 3
+
+    def test_total_tokens_is_computed(self):
+        """RunUsage.total_tokens is a computed property; we surface it correctly."""
+        usage = RunUsage(input_tokens=100, output_tokens=50)
+        turn_usage = pydantic_ai_usage_to_turn_usage(usage, model="openai:gpt-4o")
+        assert turn_usage.total_tokens == 150
+
+    def test_cache_read_tokens_mapped_to_cached_input_tokens(self):
+        usage = RunUsage(input_tokens=100, output_tokens=50, cache_read_tokens=20)
+        turn_usage = pydantic_ai_usage_to_turn_usage(usage, model="openai:gpt-4o")
+        assert turn_usage.cached_input_tokens == 20
+
+    def test_none_model(self):
+        """model=None is preserved."""
+        usage = RunUsage()
+        turn_usage = pydantic_ai_usage_to_turn_usage(usage, model=None)
+        assert turn_usage.model is None
+
+    def test_all_zero_usage_preserves_real_zeros(self):
+        """An all-zero RunUsage maps real 0s through (not None).
+
+        RunUsage token fields are ints defaulting to 0. A 0 is a genuine
+        value (e.g. a cache-hit with 0 output tokens), not "unknown", so it
+        must survive normalization as 0 rather than being coerced to None.
+        """
+        usage = RunUsage()
+        turn_usage = pydantic_ai_usage_to_turn_usage(usage, model="openai:gpt-4o")
+        assert turn_usage.num_llm_calls == 0
+        assert turn_usage.input_tokens == 0
+        assert turn_usage.output_tokens == 0
+        assert turn_usage.cached_input_tokens == 0
+        assert turn_usage.total_tokens == 0
+
+    def test_missing_field_degrades_to_none(self):
+        """A usage object MISSING a field maps that field to None (defensive getattr).
+
+        Guards the version-rename guarantee: if pydantic-ai renames a field,
+        the absent attribute degrades to None rather than raising.
+        """
+
+        class StubUsage:
+            requests = 2
+            input_tokens = 100
+            # no output_tokens / cache_read_tokens / total_tokens attributes
+
+        turn_usage = pydantic_ai_usage_to_turn_usage(StubUsage(), model="openai:gpt-4o")
+        assert turn_usage.num_llm_calls == 2
+        assert turn_usage.input_tokens == 100
+        assert turn_usage.output_tokens is None
+        assert turn_usage.cached_input_tokens is None
+        assert turn_usage.total_tokens is None
+
+
+class TestPydanticAITurn:
+    async def test_turn_satisfies_harness_turn_protocol(self):
+        """PydanticAITurn is structurally compatible with HarnessTurn."""
+        turn = PydanticAITurn(_aiter([]), model="openai:gpt-4o")
+        assert isinstance(turn, HarnessTurn)
+
+    async def test_usage_before_exhaustion_returns_default(self):
+        """usage() before iterating events returns default TurnUsage (model set, tokens None)."""
+        result_event = _make_result_event(usage=RunUsage(requests=1, input_tokens=100, output_tokens=40))
+        events = [result_event]
+        turn = PydanticAITurn(_aiter(events), model="openai:gpt-4o")
+
+        # Do NOT exhaust events — check usage pre-run
+        pre_usage = turn.usage()
+        assert pre_usage.model == "openai:gpt-4o"
+        assert pre_usage.input_tokens is None
+        assert pre_usage.output_tokens is None
+        assert pre_usage.num_llm_calls is None
+
+    async def test_turn_events_and_usage(self):
+        """Driving events to exhaustion populates usage from the terminal event."""
+        known_usage = RunUsage(
+            requests=2,
+            input_tokens=300,
+            output_tokens=120,
+            cache_read_tokens=30,
+        )
+        result_event = _make_result_event(usage=known_usage)
+        events = [
+            PartStartEvent(index=0, part=TextPart(content="")),
+            PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="hi")),
+            PartEndEvent(index=0, part=TextPart(content="hi")),
+            result_event,
+        ]
+        turn = PydanticAITurn(_aiter(events), model="openai:gpt-4o")
+
+        collected = await _collect(turn.events)
+
+        # Events match bare converter output (Start + Delta + Done = 3 events)
+        assert len(collected) == 3
+
+        # Usage is populated after exhaustion
+        usage = turn.usage()
+        assert usage.model == "openai:gpt-4o"
+        assert usage.input_tokens == 300
+        assert usage.output_tokens == 120
+        assert usage.cached_input_tokens == 30
+        assert usage.num_llm_calls == 2
+        assert usage.total_tokens == 420
+
+    async def test_events_match_bare_converter(self):
+        """Yielded events are identical to bare convert_pydantic_ai_to_agentex_events output."""
+        from agentex.lib.adk._modules._pydantic_ai_sync import convert_pydantic_ai_to_agentex_events
+
+        text_events = [
+            PartStartEvent(index=0, part=TextPart(content="")),
+            PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="Hello")),
+            PartEndEvent(index=0, part=TextPart(content="Hello")),
+        ]
+
+        turn = PydanticAITurn(_aiter(text_events), model="openai:gpt-4o")
+        turn_out = await _collect(turn.events)
+
+        bare_out = await _collect(convert_pydantic_ai_to_agentex_events(_aiter(text_events)))
+
+        assert len(turn_out) == len(bare_out)
+        for a, b in zip(turn_out, bare_out):
+            assert type(a) is type(b)
+            assert a.model_dump() == b.model_dump()
+
+    async def test_usage_captured_via_real_usage_accessor(self):
+        """Drive the turn through the REAL ``result.usage`` property accessor.
+
+        The production code reads ``getattr(run_result, "usage", None)``, which
+        on this pydantic-ai version resolves the ``_DeprecatedCallableRunUsage``
+        property (NOT ``_state.usage`` directly). This asserts that the real
+        accessor path the converter uses captures the run usage. Constructing
+        the event without our test's ``_state`` shortcut: we set ``_state.usage``
+        only because that is the sole supported way to seed an
+        ``AgentRunResult``, but we then assert capture happens through the
+        public ``.usage`` attribute access (verified below).
+        """
+        known_usage = RunUsage(requests=4, input_tokens=512, output_tokens=64)
+        result = AgentRunResult(output="done", _output_tool_name=None)
+        result._state.usage = known_usage
+        result_event = AgentRunResultEvent(result=result)
+
+        # Sanity: the value is reachable via the real public accessor the
+        # production code uses (not just via the private _state). The
+        # _DeprecatedCallableRunUsage property wraps the value, so compare by
+        # equality rather than identity.
+        accessed = getattr(result_event.result, "usage", None)
+        assert accessed is not None
+        assert accessed.input_tokens == 512
+        assert accessed.requests == 4
+
+        events = [
+            PartStartEvent(index=0, part=TextPart(content="")),
+            PartEndEvent(index=0, part=TextPart(content="")),
+            result_event,
+        ]
+        turn = PydanticAITurn(_aiter(events), model="anthropic:claude-3-5-sonnet")
+        await _collect(turn.events)
+
+        usage = turn.usage()
+        assert usage.model == "anthropic:claude-3-5-sonnet"
+        assert usage.input_tokens == 512
+        assert usage.output_tokens == 64
+        assert usage.num_llm_calls == 4
+
+    async def test_no_usage_event_leaves_default_usage(self):
+        """If the stream has no AgentRunResultEvent, usage() returns the default (tokens None)."""
+        events = [
+            PartStartEvent(index=0, part=TextPart(content="")),
+            PartEndEvent(index=0, part=TextPart(content="")),
+        ]
+        turn = PydanticAITurn(_aiter(events), model="openai:gpt-4o")
+        await _collect(turn.events)
+
+        usage = turn.usage()
+        assert usage.model == "openai:gpt-4o"
+        assert usage.input_tokens is None
+        assert usage.num_llm_calls is None
+
+
+class TestToolRequestStreaming:
+    """PydanticAITurn.events equals the bare converter output unconditionally.
+
+    The foundation auto_send delivers Start+ToolRequestDelta+Done natively
+    (AGX1-377), so no coalescing is needed on either channel.
+    """
+
+    async def test_events_match_bare_converter_for_streamed_tool_call(self):
+        """PydanticAITurn yields a ToolRequestDelta for a streamed-args tool call
+        — i.e. it is byte-for-byte the bare converter output, preserving
+        argument-token streaming on the sync/yield channel."""
+        from pydantic_ai.messages import ToolCallPart, ToolCallPartDelta
+
+        from agentex.types.tool_request_delta import ToolRequestDelta
+        from agentex.types.task_message_update import StreamTaskMessageDelta
+        from agentex.lib.adk._modules._pydantic_ai_sync import convert_pydantic_ai_to_agentex_events
+
+        tool_events = [
+            PartStartEvent(index=0, part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="c1")),
+            PartDeltaEvent(index=0, delta=ToolCallPartDelta(args_delta='{"city":"Paris"}')),
+            PartEndEvent(
+                index=0,
+                part=ToolCallPart(tool_name="get_weather", args='{"city":"Paris"}', tool_call_id="c1"),
+            ),
+        ]
+
+        turn = PydanticAITurn(_aiter(tool_events), model="openai:gpt-4o")
+        turn_out = await _collect(turn.events)
+
+        bare_out = await _collect(convert_pydantic_ai_to_agentex_events(_aiter(tool_events)))
+
+        # Turn is identical to the bare converter.
+        assert len(turn_out) == len(bare_out)
+        for a, b in zip(turn_out, bare_out):
+            assert type(a) is type(b)
+            assert a.model_dump() == b.model_dump()
+
+        # The arg-streaming delta is present.
+        deltas = [
+            e for e in turn_out if isinstance(e, StreamTaskMessageDelta) and isinstance(e.delta, ToolRequestDelta)
+        ]
+        assert len(deltas) == 1, "streamed tool-call args must surface as a ToolRequestDelta"
+        assert isinstance(deltas[0].delta, ToolRequestDelta)
+        assert deltas[0].delta.arguments_delta == '{"city":"Paris"}'
diff --git a/tests/lib/adk/test_tracing_module.py b/tests/lib/adk/test_tracing_module.py
index 52d5d3f82..58d5d4a85 100644
--- a/tests/lib/adk/test_tracing_module.py
+++ b/tests/lib/adk/test_tracing_module.py
@@ -1,7 +1,10 @@
 from __future__ import annotations
 
 from datetime import datetime, timezone
-from unittest.mock import AsyncMock, patch
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from temporalio.exceptions import ActivityError
 
 import agentex.lib.adk._modules.tracing as _tracing_mod
 from agentex.types.span import Span
@@ -26,6 +29,24 @@ def _make_module() -> tuple[AsyncMock, TracingModule]:
     return mock_service, module
 
 
+def _make_activity_error() -> ActivityError:
+    return ActivityError(
+        "activity timed out",
+        scheduled_event_id=1,
+        started_event_id=2,
+        identity="worker-1",
+        activity_type="start-span",
+        activity_id="activity-1",
+        retry_state=None,
+    )
+
+
+def _make_metric_meter() -> MagicMock:
+    mock_meter = MagicMock()
+    mock_meter.create_counter.return_value = MagicMock()
+    return mock_meter
+
+
 class TestStartSpan:
     async def test_start_span_with_task_id(self):
         mock_service, module = _make_module()
@@ -87,6 +108,128 @@ async def test_end_span_preserves_task_id(self):
         mock_service.end_span.assert_called_once_with(trace_id="trace-123", span=span)
 
 
+class TestTracingModuleTemporalPath:
+    async def test_start_span_in_workflow_returns_none_when_activity_fails(self):
+        mock_service, module = _make_module()
+        mock_meter = _make_metric_meter()
+
+        with patch.object(_tracing_mod, "in_temporal_workflow", return_value=True), \
+                patch.object(_tracing_mod, "ActivityHelpers") as mock_helpers, \
+                patch.object(_tracing_mod.workflow, "logger") as mock_logger, \
+                patch.object(_tracing_mod.workflow, "metric_meter", return_value=mock_meter):
+            mock_helpers.execute_activity = AsyncMock(side_effect=_make_activity_error())
+            result = await module.start_span(trace_id="trace-123", name="test-span")
+
+        assert result is None
+        mock_logger.warning.assert_called_once()
+        mock_meter.create_counter.assert_called_once_with(
+            _tracing_mod.TEMPORAL_SPAN_ACTIVITY_DROPPED_METRIC,
+            description="Temporal tracing span activities dropped after fail-open",
+            unit="1",
+        )
+        mock_meter.create_counter.return_value.add.assert_called_once_with(
+            1, {"event_type": "start"}
+        )
+        mock_helpers.execute_activity.assert_called_once()
+        mock_service.start_span.assert_not_called()
+
+    async def test_end_span_in_workflow_returns_span_when_activity_fails(self):
+        mock_service, module = _make_module()
+        span = _make_span()
+        mock_meter = _make_metric_meter()
+
+        with patch.object(_tracing_mod, "in_temporal_workflow", return_value=True), \
+                patch.object(_tracing_mod, "ActivityHelpers") as mock_helpers, \
+                patch.object(_tracing_mod.workflow, "logger") as mock_logger, \
+                patch.object(_tracing_mod.workflow, "metric_meter", return_value=mock_meter):
+            mock_helpers.execute_activity = AsyncMock(side_effect=_make_activity_error())
+            result = await module.end_span(trace_id="trace-123", span=span)
+
+        assert result == span
+        mock_logger.warning.assert_called_once()
+        mock_meter.create_counter.assert_called_once_with(
+            _tracing_mod.TEMPORAL_SPAN_ACTIVITY_DROPPED_METRIC,
+            description="Temporal tracing span activities dropped after fail-open",
+            unit="1",
+        )
+        mock_meter.create_counter.return_value.add.assert_called_once_with(
+            1, {"event_type": "end"}
+        )
+        mock_helpers.execute_activity.assert_called_once()
+        mock_service.end_span.assert_not_called()
+
+    async def test_context_manager_skips_end_when_temporal_start_fails(self):
+        mock_service, module = _make_module()
+
+        with patch.object(_tracing_mod, "in_temporal_workflow", return_value=True), \
+                patch.object(_tracing_mod, "ActivityHelpers") as mock_helpers, \
+                patch.object(_tracing_mod.workflow, "logger"):
+            mock_helpers.execute_activity = AsyncMock(side_effect=_make_activity_error())
+            async with module.span(trace_id="trace-123", name="test-span") as span:
+                assert span is None
+
+        mock_helpers.execute_activity.assert_called_once()
+        mock_service.start_span.assert_not_called()
+        mock_service.end_span.assert_not_called()
+
+    async def test_start_span_in_workflow_propagates_unexpected_errors(self):
+        mock_service, module = _make_module()
+
+        with patch.object(_tracing_mod, "in_temporal_workflow", return_value=True), \
+                patch.object(_tracing_mod, "ActivityHelpers") as mock_helpers:
+            mock_helpers.execute_activity = AsyncMock(side_effect=RuntimeError("bad response shape"))
+            try:
+                await module.start_span(trace_id="trace-123", name="test-span")
+            except RuntimeError as exc:
+                assert str(exc) == "bad response shape"
+            else:
+                raise AssertionError("Expected unexpected errors to propagate")
+
+        mock_helpers.execute_activity.assert_called_once()
+        mock_service.start_span.assert_not_called()
+
+    async def test_start_span_in_workflow_propagates_cancellation(self):
+        mock_service, module = _make_module()
+        activity_error = _make_activity_error()
+        mock_meter = _make_metric_meter()
+
+        with patch.object(_tracing_mod, "in_temporal_workflow", return_value=True), \
+                patch.object(_tracing_mod, "ActivityHelpers") as mock_helpers, \
+                patch.object(_tracing_mod, "is_cancelled_exception", return_value=True), \
+                patch.object(_tracing_mod.workflow, "logger") as mock_logger, \
+                patch.object(_tracing_mod.workflow, "metric_meter", return_value=mock_meter):
+            mock_helpers.execute_activity = AsyncMock(side_effect=activity_error)
+
+            with pytest.raises(ActivityError):
+                await module.start_span(trace_id="trace-123", name="test-span")
+
+        mock_logger.warning.assert_not_called()
+        mock_meter.create_counter.assert_not_called()
+        mock_helpers.execute_activity.assert_called_once()
+        mock_service.start_span.assert_not_called()
+
+    async def test_end_span_in_workflow_propagates_cancellation(self):
+        mock_service, module = _make_module()
+        span = _make_span()
+        activity_error = _make_activity_error()
+        mock_meter = _make_metric_meter()
+
+        with patch.object(_tracing_mod, "in_temporal_workflow", return_value=True), \
+                patch.object(_tracing_mod, "ActivityHelpers") as mock_helpers, \
+                patch.object(_tracing_mod, "is_cancelled_exception", return_value=True), \
+                patch.object(_tracing_mod.workflow, "logger") as mock_logger, \
+                patch.object(_tracing_mod.workflow, "metric_meter", return_value=mock_meter):
+            mock_helpers.execute_activity = AsyncMock(side_effect=activity_error)
+
+            with pytest.raises(ActivityError):
+                await module.end_span(trace_id="trace-123", span=span)
+
+        mock_logger.warning.assert_not_called()
+        mock_meter.create_counter.assert_not_called()
+        mock_helpers.execute_activity.assert_called_once()
+        mock_service.end_span.assert_not_called()
+
+
 class TestSpanContextManager:
     async def test_span_context_manager_forwards_task_id(self):
         mock_service, module = _make_module()
diff --git a/tests/lib/core/harness/__init__.py b/tests/lib/core/harness/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/lib/core/harness/_fakes.py b/tests/lib/core/harness/_fakes.py
new file mode 100644
index 000000000..f9fd34a45
--- /dev/null
+++ b/tests/lib/core/harness/_fakes.py
@@ -0,0 +1,63 @@
+"""Shared test doubles for the unified harness test suites.
+
+A single superset implementation of the in-memory tracing backend used across
+the harness tests. Three recording shapes were previously duplicated:
+
+- Shape-1 (richest): ``started`` = ``(name, parent_id, input)`` 3-tuples,
+  ``ended`` = ``(name, output)`` 2-tuples, plus an ``ended_spans`` list of the
+  closed ``FakeSpan`` objects (which carry ``.name``, ``.output``, ``.data``).
+- Shape-2: ``started`` = ``(name, parent_id)`` 2-tuples, ``ended`` =
+  ``(name, output)``.
+- Shape-3: ``started`` = bare names, ``ended`` = bare outputs.
+
+``FakeTracing`` records the richest (shape-1) form and exposes read-only
+convenience properties (``started_names``, ``started_pairs``,
+``ended_outputs``) so shape-2 and shape-3 assertions stay clean.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+class FakeSpan:
+    def __init__(self, name: str) -> None:
+        self.name = name
+        self.output: Any = None
+        self.data: Any = None
+
+
+class FakeTracing:
+    def __init__(self) -> None:
+        self.started: list[tuple[str, Any, Any]] = []
+        self.ended: list[tuple[str, Any]] = []
+        self.ended_spans: list[FakeSpan] = []
+
+    async def start_span(
+        self,
+        *,
+        trace_id: str,
+        name: str,
+        input: Any = None,
+        parent_id: Any = None,
+        data: Any = None,
+        task_id: Any = None,
+    ) -> FakeSpan:
+        self.started.append((name, parent_id, input))
+        return FakeSpan(name)
+
+    async def end_span(self, *, trace_id: str, span: FakeSpan) -> None:
+        self.ended.append((span.name, span.output))
+        self.ended_spans.append(span)
+
+    @property
+    def started_names(self) -> list[str]:
+        return [name for (name, _parent, _input) in self.started]
+
+    @property
+    def started_pairs(self) -> list[tuple[str, Any]]:
+        return [(name, parent) for (name, parent, _input) in self.started]
+
+    @property
+    def ended_outputs(self) -> list[Any]:
+        return [output for (_name, output) in self.ended]
diff --git a/tests/lib/core/harness/conformance/__init__.py b/tests/lib/core/harness/conformance/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/lib/core/harness/conformance/conftest.py b/tests/lib/core/harness/conformance/conftest.py
new file mode 100644
index 000000000..e4da7f1e2
--- /dev/null
+++ b/tests/lib/core/harness/conformance/conftest.py
@@ -0,0 +1,21 @@
+"""Conformance-suite test setup.
+
+Eagerly import every per-harness conformance module so each one's module-level
+``register(...)`` calls run before any test executes. This makes
+``all_fixtures()`` complete and independent of pytest's collection/import order
+(the runner documents that cross-module registration order is not guaranteed),
+so the cross-harness ``test_span_derivation_is_deterministic`` guard in
+``test_conformance.py`` covers the full fixture set even when this directory is
+run in isolation.
+"""
+
+from __future__ import annotations
+
+# Importing these for their registration side effects only.
+from . import (
+    test_codex_conformance,  # noqa: F401
+    test_openai_conformance,  # noqa: F401
+    test_langgraph_conformance,  # noqa: F401
+    test_claude_code_conformance,  # noqa: F401
+    test_pydantic_ai_conformance,  # noqa: F401
+)
diff --git a/tests/lib/core/harness/conformance/runner.py b/tests/lib/core/harness/conformance/runner.py
new file mode 100644
index 000000000..02a07f726
--- /dev/null
+++ b/tests/lib/core/harness/conformance/runner.py
@@ -0,0 +1,507 @@
+"""Shared conformance engine: every harness tap registers fixtures here.
+
+A fixture is (name, list[StreamTaskMessage]). The runner asserts two things:
+
+1. **Cross-channel logical equivalence**: yield_events and auto_send produce the
+   same *logical* sequence of delivered message contents. "Logical" means we
+   normalise away the streaming-envelope difference:
+   - yield channel delivers StreamTaskMessageFull(ToolResponseContent) verbatim.
+   - auto_send channel delivers the same tool-response by opening a streaming
+     context with the full content and closing it immediately (Start+Done on the
+     wire), not a Full event.
+   Both reduce to the same LogicalDelivery(type, identity, payload) tuple; the
+   conformance test compares those normalised sequences.
+
+   `payload` carries the content that callers actually consume:
+   - text: initial_content.content prepended, then accumulated delta string
+   - reasoning: initial_content.summary joined, then accumulated delta string
+   - tool_request: the arguments dict (JSON-sorted), from Start content
+   - tool_response: the content value (str)
+   This catches a channel that delivers the right structural shape but corrupts,
+   drops, or omits initial_content (including reasoning summary) or payload.
+
+2. **Span signal equivalence**: each channel is driven with its own recording
+   tracer that captures every SpanSignal it actually receives in handle(); the
+   two channels' recorded signal lists must be identical. Comparing what each
+   channel genuinely emitted (rather than re-deriving from the events) catches a
+   regression where a channel skips deriver.observe() for some event type.
+
+Registry shared-state hazard: `_REGISTRY` is process-global. Every `test_*.py`
+module that calls `register()` at import time contributes to it, so a module
+that parametrizes over `all_fixtures()` will see fixtures registered by ANY
+other conformance module imported earlier in the same pytest process (collection
+order is not guaranteed). To stay deterministic, each future harness conformance
+module should register and parametrize over its OWN fixtures (e.g. keep a
+module-local list it both registers and parametrizes), rather than relying on
+cross-module global accumulation via `all_fixtures()`.
+
+Design decision — Full-message handling in auto_send
+----------------------------------------------------
+auto_send posts a StreamTaskMessageFull (tool_request or tool_response) by
+opening a streaming context with the full content and closing it immediately,
+rather than calling adk.messages.create. This open+close approach is retained
+because:
+  - StreamingTaskMessageContext.close() persists initial_content when no deltas
+    have been streamed, so the message IS correctly persisted.
+  - It mirrors the pattern already used by the real langgraph streaming helper
+    (now in _langgraph_turn.py), keeping behavioural parity.
+  - Switching to adk.messages.create would require an additional injectable
+    dependency, adding surface area for no observable benefit.
+The conformance test treats this as an ACCEPTABLE envelope difference: at the
+logical-content level, Full(ToolResponseContent) from yield and
+Start(content)+Done from auto_send are equivalent. The recorded span signals are
+identical because both adapters drive the same SpanDeriver.observe() call
+sequence and forward every signal to their tracer.
+
+auto_send DELIVERS streamed tool-request messages (Start+Done): both channels
+produce a LogicalDelivery for a streamed tool_request, and the cross-channel
+assertion verifies it is delivered on both.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any, NamedTuple, override
+from dataclasses import dataclass
+
+from agentex.types.text_delta import TextDelta
+from agentex.types.task_message import TaskMessage
+from agentex.lib.core.harness.types import SpanSignal, StreamTaskMessage
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.reasoning_content_delta import ReasoningContentDelta
+from agentex.lib.core.harness.span_derivation import SpanDeriver
+
+from .._fakes import FakeTracing
+
+
+@dataclass
+class Fixture:
+    name: str
+    events: list[StreamTaskMessage]
+
+
+_REGISTRY: list[Fixture] = []
+
+
+def register(fixture: Fixture) -> None:
+    _REGISTRY.append(fixture)
+
+
+def all_fixtures() -> list[Fixture]:
+    return list(_REGISTRY)
+
+
+def run_pure_async(coro: Any) -> Any:
+    """Drive a *pure* (I/O-free) coroutine to completion without an event loop.
+
+    Conformance fixtures are built at import time so they can parametrize the
+    tests below. The fixture-building coroutines only iterate in-memory events
+    and never suspend on a real future, so we step them by hand instead of
+    ``asyncio.run()``. ``asyncio.run()`` at import raises ``RuntimeError`` when a
+    loop is already running (programmatic pytest, a Jupyter kernel, or a
+    session-scoped asyncio loop); this driver is unaffected by ambient loop
+    state. It raises if the coroutine ever suspends on real I/O.
+    """
+    try:
+        coro.send(None)
+    except StopIteration as stop:
+        return stop.value
+    coro.close()
+    raise RuntimeError("conformance fixture build unexpectedly suspended on real I/O")
+
+
+def derive_all(events: list[StreamTaskMessage]) -> list[SpanSignal]:
+    d = SpanDeriver()
+    out: list[SpanSignal] = []
+    for e in events:
+        out.extend(d.observe(e))
+    out.extend(d.flush())
+    return out
+
+
+# ---------------------------------------------------------------------------
+# Logical delivery normalisation
+# ---------------------------------------------------------------------------
+
+
+class LogicalDelivery(NamedTuple):
+    """A single logically-delivered message, channel-agnostic.
+
+    `content_type` is the .type of the content (e.g. "text", "reasoning",
+    "tool_request", "tool_response"). `identity` is a frozenset of key=value
+    pairs that uniquely identify the content (e.g. tool_call_id for tool
+    messages, or index for text/reasoning). `payload` is a stable string
+    representation of the content callers actually consume:
+    - text: initial_content.content prepended to accumulated delta strings
+    - reasoning: initial_content.summary joined, prepended to accumulated
+      reasoning-content delta strings
+    - tool_request: JSON-sorted arguments from Start content
+    - tool_response: str(content) from Full event
+    """
+
+    content_type: str
+    identity: frozenset[tuple[str, Any]]
+    payload: str = ""
+
+
+def _yield_logical_deliveries(events: list[StreamTaskMessage]) -> list[LogicalDelivery]:
+    """Extract logical deliveries from the yield channel's event list.
+
+    The yield channel forwards events verbatim. A logical delivery is:
+    - A Full event (tool_request / tool_response): content delivered as-is.
+    - A Start + ... + Done sequence for text/reasoning/tool_request content.
+
+    The `payload` field captures the content callers consume:
+    - text: initial_content.content (from Start) prepended to accumulated deltas
+    - reasoning: initial_content.summary joined (from Start) prepended to
+      accumulated reasoning-content deltas (this catches a channel that drops
+      the summary)
+    - tool_request: JSON-sorted arguments from the Start content (delivered on
+      both channels)
+    - tool_response: str(content) from Full event
+    """
+    from agentex.types.text_content import TextContent
+    from agentex.types.reasoning_content import ReasoningContent
+    from agentex.types.tool_request_content import ToolRequestContent
+
+    deliveries: list[LogicalDelivery] = []
+    # Track which indices had a Start so we can pair with Done
+    started: dict[int, Any] = {}  # index -> initial content
+    # Accumulate delta text per index (seed with initial_content text if present)
+    accumulated: dict[int, list[str]] = {}  # index -> list of delta strings
+
+    for event in events:
+        if isinstance(event, StreamTaskMessageStart):
+            if event.index is not None:
+                started[event.index] = event.content
+                # Seed accumulator with initial_content so a channel that drops
+                # initial_content but delivers deltas correctly will fail.
+                seed: list[str] = []
+                if isinstance(event.content, TextContent) and event.content.content:
+                    seed = [event.content.content]
+                elif isinstance(event.content, ReasoningContent) and event.content.summary:
+                    seed = list(event.content.summary)
+                accumulated[event.index] = seed
+        elif isinstance(event, StreamTaskMessageDelta):
+            if event.index is not None and event.delta is not None:
+                if isinstance(event.delta, TextDelta) and event.delta.text_delta:
+                    accumulated.setdefault(event.index, []).append(event.delta.text_delta)
+                elif isinstance(event.delta, ReasoningContentDelta) and event.delta.content_delta:
+                    accumulated.setdefault(event.index, []).append(event.delta.content_delta)
+        elif isinstance(event, StreamTaskMessageDone):
+            if event.index is not None and event.index in started:
+                content = started.pop(event.index)
+                deltas = accumulated.pop(event.index, [])
+                ctype = getattr(content, "type", None) or ""
+                if ctype in ("text", "reasoning"):
+                    deliveries.append(
+                        LogicalDelivery(
+                            content_type=ctype,
+                            identity=frozenset({("index", event.index)}),
+                            payload="".join(deltas),
+                        )
+                    )
+                elif ctype == "tool_request" and isinstance(content, ToolRequestContent):
+                    # auto_send delivers streamed tool-request messages. Emit a
+                    # delivery here so the cross-channel assertion verifies it is
+                    # present on both channels.
+                    deliveries.append(
+                        LogicalDelivery(
+                            content_type=ctype,
+                            identity=frozenset(
+                                {
+                                    ("tool_call_id", content.tool_call_id),
+                                    ("name", content.name),
+                                }
+                            ),
+                            payload=json.dumps(content.arguments, sort_keys=True),
+                        )
+                    )
+        elif isinstance(event, StreamTaskMessageFull):
+            content = event.content
+            ctype = getattr(content, "type", None) or ""
+            if ctype == "tool_response":
+                from agentex.types.tool_response_content import ToolResponseContent
+
+                if isinstance(content, ToolResponseContent):
+                    deliveries.append(
+                        LogicalDelivery(
+                            content_type=ctype,
+                            identity=frozenset(
+                                {
+                                    ("tool_call_id", content.tool_call_id),
+                                    ("name", content.name),
+                                }
+                            ),
+                            payload=str(content.content),
+                        )
+                    )
+            elif ctype == "tool_request":
+                from agentex.types.tool_request_content import ToolRequestContent
+
+                if isinstance(content, ToolRequestContent):
+                    deliveries.append(
+                        LogicalDelivery(
+                            content_type=ctype,
+                            identity=frozenset(
+                                {
+                                    ("tool_call_id", content.tool_call_id),
+                                    ("name", content.name),
+                                }
+                            ),
+                            payload=json.dumps(content.arguments, sort_keys=True),
+                        )
+                    )
+
+    return deliveries
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming backend for auto_send conformance runner
+# ---------------------------------------------------------------------------
+
+
+class _FakeCtx:
+    """Mirrors StreamingTaskMessageContext: __aenter__ opens, close() closes."""
+
+    def __init__(self, sink: list[Any], content_type: str, initial_content: Any) -> None:
+        self.sink = sink
+        self.content_type = content_type
+        self.task_message = TaskMessage(
+            id="msg-conformance",
+            task_id="conformance-task",
+            content=initial_content,
+        )
+
+    async def __aenter__(self) -> "_FakeCtx":
+        self.sink.append(("open", self.content_type, self.task_message.content))
+        return self
+
+    async def __aexit__(self, *args: Any) -> bool:
+        await self.close()
+        return False
+
+    async def close(self) -> None:
+        self.sink.append(("close", self.content_type))
+
+    async def stream_update(self, update: Any) -> Any:
+        self.sink.append(("update", update))
+        return update
+
+
+class _FakeStreaming:
+    """Fake streaming backend; records every context lifecycle event."""
+
+    def __init__(self) -> None:
+        self.sink: list[Any] = []
+
+    def streaming_task_message_context(
+        self,
+        task_id: str,
+        initial_content: Any,
+        streaming_mode: str = "coalesced",
+        created_at: Any = None,
+    ) -> _FakeCtx:
+        ctype = getattr(initial_content, "type", None) or ""
+        self.sink.append(("ctx", ctype, initial_content))
+        return _FakeCtx(self.sink, ctype, initial_content)
+
+
+class _RecordingTracer(SpanTracer):
+    """SpanTracer that records every SpanSignal it actually receives.
+
+    Each delivery channel calls `tracer.handle(signal)` for every signal it
+    derives from the stream, so `received_signals` captures what the channel
+    genuinely emitted — not a re-derivation. Comparing the two channels'
+    recorded lists catches regressions where a channel skips
+    `deriver.observe(event)` for some event type.
+    """
+
+    def __init__(self, tracing: Any) -> None:
+        super().__init__(
+            trace_id="conformance-trace",
+            parent_span_id="conformance-parent",
+            tracing=tracing,
+        )
+        self.received_signals: list[SpanSignal] = []
+
+    @override
+    async def handle(self, signal: SpanSignal) -> None:
+        self.received_signals.append(signal)
+        await super().handle(signal)
+
+
+async def _gen(events: list[StreamTaskMessage]):  # type: ignore[return]
+    for e in events:
+        yield e
+
+
+def _auto_send_logical_deliveries(sink: list[Any]) -> list[LogicalDelivery]:
+    """Extract logical deliveries from the auto_send fake streaming sink.
+
+    Each context lifecycle in the sink looks like:
+      ("ctx", ctype, content)  -- context created
+      ("open", ctype, content) -- context __aenter__
+      [("update", delta), ...]  -- optional deltas (StreamTaskMessageDelta)
+      ("close", ctype)          -- context closed
+
+    A logical delivery corresponds to each open+close pair. For text/reasoning
+    we identify by sequential position and build the payload by prepending the
+    initial_content text (TextContent.content) or summary (ReasoningContent.summary)
+    to accumulated deltas. This matches _yield_logical_deliveries so a channel
+    that drops initial_content or reasoning summary fails the comparison.
+    For tool messages we use tool_call_id + name and capture arguments/content.
+    """
+    from agentex.types.text_content import TextContent
+    from agentex.types.reasoning_content import ReasoningContent
+    from agentex.types.tool_request_content import ToolRequestContent
+    from agentex.types.tool_response_content import ToolResponseContent
+
+    deliveries: list[LogicalDelivery] = []
+    open_idx = 0
+    while open_idx < len(sink):
+        entry = sink[open_idx]
+        if entry[0] == "ctx":
+            ctype: str = entry[1]
+            content: Any = entry[2]
+            found_open = False
+            delta_parts: list[str] = []
+            # Seed delta_parts with initial_content so payload comparison
+            # catches a channel that drops initial_content but delivers deltas.
+            if isinstance(content, TextContent) and content.content:
+                delta_parts = [content.content]
+            elif isinstance(content, ReasoningContent) and content.summary:
+                delta_parts = list(content.summary)
+            for j in range(open_idx + 1, len(sink)):
+                if sink[j][0] == "open" and sink[j][1] == ctype and not found_open:
+                    found_open = True
+                elif found_open and sink[j][0] == "update":
+                    # Accumulate delta content from StreamTaskMessageDelta
+                    update = sink[j][1]
+                    if isinstance(update, StreamTaskMessageDelta) and update.delta is not None:
+                        if isinstance(update.delta, TextDelta) and update.delta.text_delta:
+                            delta_parts.append(update.delta.text_delta)
+                        elif isinstance(update.delta, ReasoningContentDelta) and update.delta.content_delta:
+                            delta_parts.append(update.delta.content_delta)
+                elif sink[j][0] == "close" and sink[j][1] == ctype and found_open:
+                    # Matched open+close: emit logical delivery with payload
+                    if ctype in ("text", "reasoning"):
+                        count = sum(1 for k in range(open_idx) if sink[k][0] == "ctx" and sink[k][1] == ctype)
+                        deliveries.append(
+                            LogicalDelivery(
+                                content_type=ctype,
+                                identity=frozenset({("seq", count)}),
+                                payload="".join(delta_parts),
+                            )
+                        )
+                    elif ctype == "tool_response":
+                        if isinstance(content, ToolResponseContent):
+                            deliveries.append(
+                                LogicalDelivery(
+                                    content_type=ctype,
+                                    identity=frozenset(
+                                        {
+                                            ("tool_call_id", content.tool_call_id),
+                                            ("name", content.name),
+                                        }
+                                    ),
+                                    payload=str(content.content),
+                                )
+                            )
+                    elif ctype == "tool_request":
+                        if isinstance(content, ToolRequestContent):
+                            deliveries.append(
+                                LogicalDelivery(
+                                    content_type=ctype,
+                                    identity=frozenset(
+                                        {
+                                            ("tool_call_id", content.tool_call_id),
+                                            ("name", content.name),
+                                        }
+                                    ),
+                                    payload=json.dumps(content.arguments, sort_keys=True),
+                                )
+                            )
+                    open_idx = j + 1
+                    break
+            else:
+                open_idx += 1
+        else:
+            open_idx += 1
+
+    return deliveries
+
+
+def _yield_text_reasoning_seq(deliveries: list[LogicalDelivery]) -> list[LogicalDelivery]:
+    """Re-key text/reasoning deliveries from index-based to seq-based identity.
+
+    The yield channel uses event.index as identity; auto_send uses a sequential
+    counter. To compare across channels, normalise both to sequential position
+    within each content type.
+    """
+    result: list[LogicalDelivery] = []
+    counts: dict[str, int] = {}
+    for d in deliveries:
+        if d.content_type in ("text", "reasoning"):
+            seq = counts.get(d.content_type, 0)
+            counts[d.content_type] = seq + 1
+            result.append(
+                LogicalDelivery(
+                    content_type=d.content_type,
+                    identity=frozenset({("seq", seq)}),
+                    payload=d.payload,
+                )
+            )
+        else:
+            result.append(d)
+    return result
+
+
+async def run_cross_channel_conformance(
+    fixture: Fixture,
+) -> tuple[list[LogicalDelivery], list[LogicalDelivery], list[SpanSignal], list[SpanSignal]]:
+    """Run both channels over a fixture; return (yield_deliveries, auto_deliveries,
+    yield_spans, auto_spans).
+
+    The caller asserts yield_deliveries == auto_deliveries and
+    yield_spans == auto_spans. The span signals are the ones each channel's
+    tracer ACTUALLY recorded while delivering (not a re-derivation), so a
+    regression where a channel skips deriver.observe() for some event type is
+    caught.
+    """
+    from agentex.lib.core.harness.auto_send import auto_send
+    from agentex.lib.core.harness.yield_delivery import yield_events
+
+    # --- yield channel ---
+    tracer_yield = _RecordingTracer(tracing=FakeTracing())
+    yield_out = [e async for e in yield_events(_gen(fixture.events), tracer=tracer_yield)]
+
+    # Span signals the yield channel actually emitted to its tracer
+    yield_spans = tracer_yield.received_signals
+
+    # Logical deliveries from yield output
+    yield_deliveries = _yield_text_reasoning_seq(_yield_logical_deliveries(yield_out))
+
+    # --- auto_send channel ---
+    tracer_auto = _RecordingTracer(tracing=FakeTracing())
+    fake_streaming = _FakeStreaming()
+    await auto_send(
+        _gen(fixture.events),
+        task_id="conformance-task",
+        tracer=tracer_auto,
+        streaming=fake_streaming,
+    )
+
+    # Span signals the auto_send channel actually emitted to its tracer
+    auto_spans = tracer_auto.received_signals
+
+    # Logical deliveries from what the streaming backend received
+    auto_deliveries = _auto_send_logical_deliveries(fake_streaming.sink)
+
+    return yield_deliveries, auto_deliveries, yield_spans, auto_spans
diff --git a/tests/lib/core/harness/conformance/test_claude_code_conformance.py b/tests/lib/core/harness/conformance/test_claude_code_conformance.py
new file mode 100644
index 000000000..010bc530b
--- /dev/null
+++ b/tests/lib/core/harness/conformance/test_claude_code_conformance.py
@@ -0,0 +1,192 @@
+"""Cross-channel conformance tests for the claude-code parser tap.
+
+Each fixture is a representative sequence of claude-code stream-json
+envelopes, converted into canonical ``StreamTaskMessage*`` events via
+``ClaudeCodeTurn``, then registered into the shared conformance runner.
+
+The conformance runner asserts two guarantees per fixture:
+
+1. **Logical-delivery equivalence**: ``yield_events`` and ``auto_send``
+   produce the same logically-delivered message contents.
+
+2. **Span signal equivalence**: both channels emit the same ``SpanSignal``
+   sequence to their ``SpanTracer``.
+
+Fixtures
+--------
+text-only:       single ``assistant`` text block
+tool-call-result: ``tool_use`` block followed by ``tool_result``
+thinking-block:  ``thinking`` block with full text
+multi-step:      text + tool_use + tool_result + text (two model turns)
+
+Note
+----
+Relative imports are used throughout (runner.py and these fixtures live in the
+same package). The per-module ``_FIXTURES`` list is both registered globally
+(via ``register()``) and parametrized locally so this module's tests are
+self-contained regardless of global registry ordering (see runner.py docstring).
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agentex.lib.adk._modules._claude_code_sync import convert_claude_code_to_agentex_events
+
+from .runner import (
+    Fixture,
+    register,
+    run_pure_async,
+    run_cross_channel_conformance,
+)
+
+# ---------------------------------------------------------------------------
+# Convert claude-code envelopes to StreamTaskMessage* events
+# ---------------------------------------------------------------------------
+
+
+async def _envelopes_to_events(envelopes: list[dict]) -> list:
+    """Drive convert_claude_code_to_agentex_events and collect all events."""
+
+    async def _aiter(items):  # type: ignore[return]
+        for item in items:
+            yield item
+
+    return [e async for e in convert_claude_code_to_agentex_events(_aiter(envelopes))]
+
+
+# ---------------------------------------------------------------------------
+# Fixture definitions (raw claude-code envelope sequences)
+# ---------------------------------------------------------------------------
+
+_TEXT_ENVELOPES = [
+    {
+        "type": "assistant",
+        "message": {"content": [{"type": "text", "text": "The answer is 42."}]},
+    }
+]
+
+_TOOL_ENVELOPES = [
+    {
+        "type": "assistant",
+        "message": {
+            "content": [
+                {
+                    "type": "tool_use",
+                    "id": "call_read",
+                    "name": "Read",
+                    "input": {"path": "/workspace/README.md"},
+                }
+            ]
+        },
+    },
+    {
+        "type": "user",
+        "message": {
+            "content": [
+                {
+                    "type": "tool_result",
+                    "tool_use_id": "call_read",
+                    "content": "# My Project\n\nA great project.",
+                }
+            ]
+        },
+    },
+]
+
+_THINKING_ENVELOPES = [
+    {
+        "type": "assistant",
+        "message": {
+            "content": [
+                {"type": "thinking", "thinking": "Let me think about this carefully.\nStep 1: check the facts."},
+                {"type": "text", "text": "Here is my answer."},
+            ]
+        },
+    }
+]
+
+_MULTI_STEP_ENVELOPES = [
+    # Turn 1: text + tool call
+    {
+        "type": "assistant",
+        "message": {
+            "content": [
+                {"type": "text", "text": "Let me look that up."},
+                {
+                    "type": "tool_use",
+                    "id": "call_bash",
+                    "name": "Bash",
+                    "input": {"command": "cat /etc/hostname"},
+                },
+            ]
+        },
+    },
+    {
+        "type": "user",
+        "message": {
+            "content": [
+                {
+                    "type": "tool_result",
+                    "tool_use_id": "call_bash",
+                    "content": "myhost",
+                }
+            ]
+        },
+    },
+    # Turn 2: final text after tool result
+    {
+        "type": "assistant",
+        "message": {"content": [{"type": "text", "text": "The hostname is myhost."}]},
+    },
+]
+
+
+# ---------------------------------------------------------------------------
+# Build fixtures from envelopes at module load time
+# ---------------------------------------------------------------------------
+
+
+async def _build_fixture(name: str, envelopes: list[dict]) -> Fixture:
+    events = await _envelopes_to_events(envelopes)
+    return Fixture(name=name, events=events)
+
+
+# Fixtures must exist before pytest collects (they parametrize the test below),
+# so they are built at import time. The conversion only iterates in-memory
+# envelopes — it never suspends on a real future — so we drive the coroutines to
+# completion with the shared loop-free ``run_pure_async`` driver instead of
+# asyncio.run(), which raises RuntimeError at import when an event loop is
+# already running (programmatic pytest, a Jupyter kernel, or session-scoped
+# asyncio loops).
+_FIXTURES: list[Fixture] = [
+    run_pure_async(_build_fixture("claude-code-text-only", _TEXT_ENVELOPES)),
+    run_pure_async(_build_fixture("claude-code-tool-call-result", _TOOL_ENVELOPES)),
+    run_pure_async(_build_fixture("claude-code-thinking-block", _THINKING_ENVELOPES)),
+    run_pure_async(_build_fixture("claude-code-multi-step", _MULTI_STEP_ENVELOPES)),
+]
+
+# Register into the shared registry so all_fixtures() can enumerate them
+for _f in _FIXTURES:
+    register(_f)
+
+
+# ---------------------------------------------------------------------------
+# Cross-channel conformance assertions
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("fixture", _FIXTURES, ids=lambda f: f.name)
+@pytest.mark.asyncio
+async def test_cross_channel_equivalence(fixture: Fixture) -> None:
+    """yield_events and auto_send must produce equivalent logical deliveries
+    and identical span signals for every claude-code fixture.
+    """
+    yield_deliveries, auto_deliveries, yield_spans, auto_spans = await run_cross_channel_conformance(fixture)
+
+    assert yield_deliveries == auto_deliveries, (
+        f"[{fixture.name}] logical deliveries differ:\n  yield:     {yield_deliveries}\n  auto_send: {auto_deliveries}"
+    )
+    assert yield_spans == auto_spans, (
+        f"[{fixture.name}] span signals differ:\n  yield:     {yield_spans}\n  auto_send: {auto_spans}"
+    )
diff --git a/tests/lib/core/harness/conformance/test_codex_conformance.py b/tests/lib/core/harness/conformance/test_codex_conformance.py
new file mode 100644
index 000000000..d51a73584
--- /dev/null
+++ b/tests/lib/core/harness/conformance/test_codex_conformance.py
@@ -0,0 +1,215 @@
+"""Conformance fixtures for the codex harness tap.
+
+Each fixture is derived from a ``CodexTurn`` and registered into the
+cross-channel conformance runner so that span derivation is validated
+alongside all other harness taps.
+
+Following the per-module registry pattern from runner.py: this module keeps
+its own local list of fixtures, both registers them AND parametrizes over
+them, to guarantee determinism regardless of pytest collection order.
+"""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+
+import pytest
+
+from agentex.lib.core.harness.types import StreamTaskMessage
+from agentex.lib.adk._modules._codex_sync import convert_codex_to_agentex_events
+
+from .runner import Fixture, register, run_pure_async
+
+
+async def _aiter(items: list[Any]) -> AsyncIterator[Any]:
+    for item in items:
+        yield item
+
+
+async def _collect(events: list[Any]) -> list[StreamTaskMessage]:
+    return [msg async for msg in convert_codex_to_agentex_events(_aiter(events))]
+
+
+def _build(events: list[Any]) -> list[StreamTaskMessage]:
+    # Loop-free driver: this runs at import time, where asyncio.run() would raise
+    # under an already-running loop (programmatic pytest, notebooks).
+    return run_pure_async(_collect(events))
+
+
+# ---------------------------------------------------------------------------
+# Fixture 1: plain text response
+# ---------------------------------------------------------------------------
+
+_CODEX_TEXT = Fixture(
+    name="codex-text",
+    events=_build(
+        [
+            {"type": "thread.started", "thread_id": "thread-abc"},
+            {"type": "turn.started"},
+            {
+                "type": "item.started",
+                "item": {"id": "msg1", "type": "agent_message", "text": "Hello"},
+            },
+            {
+                "type": "item.updated",
+                "item": {"id": "msg1", "type": "agent_message", "text": "Hello, world"},
+            },
+            {
+                "type": "item.completed",
+                "item": {"id": "msg1", "type": "agent_message", "text": "Hello, world!"},
+            },
+            {
+                "type": "turn.completed",
+                "usage": {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
+            },
+        ]
+    ),
+)
+register(_CODEX_TEXT)
+
+# ---------------------------------------------------------------------------
+# Fixture 2: tool call (command_execution)
+# ---------------------------------------------------------------------------
+
+_CODEX_TOOL = Fixture(
+    name="codex-tool-command",
+    events=_build(
+        [
+            {"type": "thread.started", "thread_id": "thread-cmd"},
+            {
+                "type": "item.started",
+                "item": {
+                    "id": "tool1",
+                    "type": "command_execution",
+                    "command": "ls /workspace",
+                },
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "tool1",
+                    "type": "command_execution",
+                    "command": "ls /workspace",
+                    "aggregated_output": "file1.txt\nfile2.py",
+                    "exit_code": 0,
+                },
+            },
+            {
+                "type": "turn.completed",
+                "usage": {"input_tokens": 20, "output_tokens": 8, "total_tokens": 28},
+            },
+        ]
+    ),
+)
+register(_CODEX_TOOL)
+
+# ---------------------------------------------------------------------------
+# Fixture 3: reasoning block
+# ---------------------------------------------------------------------------
+
+_CODEX_REASONING = Fixture(
+    name="codex-reasoning",
+    events=_build(
+        [
+            {"type": "thread.started", "thread_id": "thread-reason"},
+            {
+                "type": "item.started",
+                "item": {"id": "r1", "type": "reasoning", "text": ""},
+            },
+            {
+                "type": "item.updated",
+                "item": {"id": "r1", "type": "reasoning", "text": "Step 1: analyze the problem"},
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "r1",
+                    "type": "reasoning",
+                    "text": "Step 1: analyze the problem\nStep 2: solve it",
+                },
+            },
+            {
+                "type": "item.started",
+                "item": {"id": "msg2", "type": "agent_message", "text": ""},
+            },
+            {
+                "type": "item.completed",
+                "item": {"id": "msg2", "type": "agent_message", "text": "The answer is 42."},
+            },
+            {
+                "type": "turn.completed",
+                "usage": {
+                    "input_tokens": 30,
+                    "output_tokens": 20,
+                    "reasoning_tokens": 50,
+                    "total_tokens": 100,
+                },
+            },
+        ]
+    ),
+)
+register(_CODEX_REASONING)
+
+# ---------------------------------------------------------------------------
+# Fixture 4: multi-step (mcp_tool_call + follow-up text)
+# ---------------------------------------------------------------------------
+
+_CODEX_MULTI = Fixture(
+    name="codex-multi-step",
+    events=_build(
+        [
+            {"type": "thread.started", "thread_id": "thread-multi"},
+            {
+                "type": "item.started",
+                "item": {
+                    "id": "mcp1",
+                    "type": "mcp_tool_call",
+                    "server": "filesystem",
+                    "tool": "read_file",
+                    "arguments": {"path": "/workspace/README.md"},
+                },
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "mcp1",
+                    "type": "mcp_tool_call",
+                    "server": "filesystem",
+                    "tool": "read_file",
+                    "arguments": {"path": "/workspace/README.md"},
+                    "result": {"content": "# My Project"},
+                },
+            },
+            {
+                "type": "item.started",
+                "item": {"id": "msg3", "type": "agent_message", "text": "The README says:"},
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "msg3",
+                    "type": "agent_message",
+                    "text": "The README says: # My Project",
+                },
+            },
+            {
+                "type": "turn.completed",
+                "usage": {"input_tokens": 50, "output_tokens": 30, "total_tokens": 80},
+            },
+        ]
+    ),
+)
+register(_CODEX_MULTI)
+
+
+# ---------------------------------------------------------------------------
+# Local parametrized tests (cross-channel conformance)
+# ---------------------------------------------------------------------------
+
+_LOCAL_FIXTURES = [_CODEX_TEXT, _CODEX_TOOL, _CODEX_REASONING, _CODEX_MULTI]
+
+
+@pytest.mark.parametrize("fixture", _LOCAL_FIXTURES, ids=lambda f: f.name)
+def test_codex_events_are_non_empty(fixture: Fixture) -> None:
+    """Every codex fixture yields at least one StreamTaskMessage*."""
+    assert len(fixture.events) > 0
diff --git a/tests/lib/core/harness/conformance/test_conformance.py b/tests/lib/core/harness/conformance/test_conformance.py
new file mode 100644
index 000000000..7c79f9397
--- /dev/null
+++ b/tests/lib/core/harness/conformance/test_conformance.py
@@ -0,0 +1,299 @@
+"""Cross-channel conformance tests: yield_events vs auto_send.
+
+What is asserted
+----------------
+For each fixture the conformance runner drives BOTH delivery channels and
+verifies two guarantees:
+
+1. **Logical-delivery equivalence**: the sequence of logically-delivered
+   messages is identical across channels. "Logical" normalises away the
+   streaming-envelope difference:
+   - yield channel delivers StreamTaskMessageFull(ToolResponseContent) as-is.
+   - auto_send delivers the same tool-response by opening a streaming context
+     with the full content and closing it immediately.
+   Both collapse to LogicalDelivery(content_type, identity, payload) tuples
+   that compare equal. The payload includes initial_content (TextContent.content
+   and ReasoningContent.summary) so a channel that drops initial content fails.
+
+2. **Span signal equivalence**: both channels feed the same pure SpanDeriver
+   over the same event sequence, so the derived span signals must be identical.
+
+What is NOT asserted
+--------------------
+Raw wire-level event shapes are NOT compared (that would fail by design: the
+Full vs Start+Done envelope difference is a documented, acceptable choice in
+auto_send — see runner.py for the rationale).
+
+auto_send delivers streamed tool-request messages: both channels produce a
+delivery for streamed tool_request, verified by the "streamed-tool-request"
+fixture.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agentex.types.text_delta import TextDelta
+from agentex.types.text_content import TextContent
+from agentex.types.reasoning_content import ReasoningContent
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.types.reasoning_content_delta import ReasoningContentDelta
+
+from .runner import (
+    Fixture,
+    register,
+    derive_all,
+    all_fixtures,
+    run_cross_channel_conformance,
+)
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+_FIXTURES: list[Fixture] = [
+    # fixture 1: single tool call — tool_request delivered via Full (classic path)
+    # plus a streamed tool_response via Full. Both channels should deliver both.
+    Fixture(
+        name="builtin-single-tool",
+        events=[
+            StreamTaskMessageStart(
+                type="start",
+                index=0,
+                content=ToolRequestContent(
+                    type="tool_request", author="agent", tool_call_id="c", name="Bash", arguments={}
+                ),
+            ),
+            StreamTaskMessageDone(type="done", index=0),
+            StreamTaskMessageFull(
+                type="full",
+                index=1,
+                content=ToolResponseContent(
+                    type="tool_response", author="agent", tool_call_id="c", name="Bash", content="ok"
+                ),
+            ),
+        ],
+    ),
+    # fixture 2: streaming text — exercises the text start/delta/done path.
+    # Uses non-empty initial_content so the payload comparison catches a channel
+    # that drops StreamTaskMessageStart.content (Greptile id 3438655533, P1).
+    Fixture(
+        name="streaming-text",
+        events=[
+            StreamTaskMessageStart(
+                type="start",
+                index=0,
+                content=TextContent(type="text", author="agent", content="Init"),
+            ),
+            StreamTaskMessageDelta(
+                type="delta",
+                index=0,
+                delta=TextDelta(type="text", text_delta="Hello"),
+            ),
+            StreamTaskMessageDelta(
+                type="delta",
+                index=0,
+                delta=TextDelta(type="text", text_delta=" world"),
+            ),
+            StreamTaskMessageDone(type="done", index=0),
+        ],
+    ),
+    # fixture 3: reasoning block — exercises reasoning span open/close + delivery.
+    # ReasoningContent.summary is included in the payload so a channel that drops
+    # the reasoning-summary fails (Greptile id 3438655533, P1).
+    Fixture(
+        name="reasoning-block",
+        events=[
+            StreamTaskMessageStart(
+                type="start",
+                index=0,
+                content=ReasoningContent(
+                    type="reasoning",
+                    author="agent",
+                    summary=["Thinking..."],
+                ),
+            ),
+            StreamTaskMessageDelta(
+                type="delta",
+                index=0,
+                delta=ReasoningContentDelta(
+                    type="reasoning_content",
+                    content_index=0,
+                    content_delta="step 1",
+                ),
+            ),
+            StreamTaskMessageDone(type="done", index=0),
+        ],
+    ),
+    # fixture 4: streamed tool_request — tool_request delivered via Start+Done
+    # (no Full). Both channels must produce a LogicalDelivery for this fixture.
+    Fixture(
+        name="streamed-tool-request",
+        events=[
+            StreamTaskMessageStart(
+                type="start",
+                index=0,
+                content=ToolRequestContent(
+                    type="tool_request",
+                    author="agent",
+                    tool_call_id="tr-1",
+                    name="Read",
+                    arguments={"path": "/tmp/foo"},
+                ),
+            ),
+            StreamTaskMessageDone(type="done", index=0),
+            StreamTaskMessageFull(
+                type="full",
+                index=1,
+                content=ToolResponseContent(
+                    type="tool_response",
+                    author="agent",
+                    tool_call_id="tr-1",
+                    name="Read",
+                    content="file contents",
+                ),
+            ),
+        ],
+    ),
+    # fixture 5: parallel tool calls + a tool that errors (AGX1-373 review,
+    # danielmillerp). The earlier fixtures only exercise one tool at a time, so
+    # equivalence is proven over trivially-orderable streams. This stresses the
+    # representative case: two tool spans open SIMULTANEOUSLY (p-ls opens via the
+    # streamed Start+Done path, p-read opens via Full while p-ls is still open),
+    # then close in a different order than they opened, and one of them returns
+    # an error. It guards against the two channels agreeing with each other while
+    # both mishandling interleaved/parallel spans or a failing tool.
+    #
+    # The failing tool sets ToolResponseContent.is_error=True (AGX1-371), which
+    # the span deriver threads onto the closed tool span's CloseSpan.is_error.
+    # Both channels feed the same deriver, so the recorded span signals — error
+    # status included — must match.
+    Fixture(
+        name="parallel-tools-with-error",
+        events=[
+            # p-ls: streamed tool_request (opens its span at Done).
+            StreamTaskMessageStart(
+                type="start",
+                index=0,
+                content=ToolRequestContent(
+                    type="tool_request",
+                    author="agent",
+                    tool_call_id="p-ls",
+                    name="Bash",
+                    arguments={"command": "ls /nope"},
+                ),
+            ),
+            StreamTaskMessageDone(type="done", index=0),
+            # p-read: Full tool_request opens a second span while p-ls is open.
+            StreamTaskMessageFull(
+                type="full",
+                index=1,
+                content=ToolRequestContent(
+                    type="tool_request",
+                    author="agent",
+                    tool_call_id="p-read",
+                    name="Read",
+                    arguments={"path": "/etc/hosts"},
+                ),
+            ),
+            # p-ls errors and closes first (close order != open order).
+            StreamTaskMessageFull(
+                type="full",
+                index=2,
+                content=ToolResponseContent(
+                    type="tool_response",
+                    author="agent",
+                    tool_call_id="p-ls",
+                    name="Bash",
+                    content="Error: ls: /nope: No such file or directory",
+                    is_error=True,
+                ),
+            ),
+            # p-read succeeds and closes second.
+            StreamTaskMessageFull(
+                type="full",
+                index=3,
+                content=ToolResponseContent(
+                    type="tool_response",
+                    author="agent",
+                    tool_call_id="p-read",
+                    name="Read",
+                    content="127.0.0.1 localhost",
+                ),
+            ),
+        ],
+    ),
+]
+
+# Register all fixtures for backward-compatible use via all_fixtures()
+for _f in _FIXTURES:
+    register(_f)
+
+
+# ---------------------------------------------------------------------------
+# Cross-channel conformance: logical equivalence + span equivalence
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("fixture", _FIXTURES, ids=lambda f: f.name)
+@pytest.mark.asyncio
+async def test_cross_channel_equivalence(fixture: Fixture) -> None:
+    """Assert that yield_events and auto_send produce equivalent logical
+    deliveries and identical span signals for every fixture.
+
+    This is the real cross-channel guarantee: the two delivery adapters
+    agree on WHAT was delivered (logical content) and HOW spans were derived,
+    even though their streaming-envelope shapes differ (Full vs Start+Done for
+    tool messages).
+
+    The span signals are the ones each channel's tracer ACTUALLY recorded while
+    delivering, not a re-derivation, so a regression where one channel skips
+    deriver.observe() for some event type is caught here.
+    """
+    yield_deliveries, auto_deliveries, yield_spans, auto_spans = await run_cross_channel_conformance(fixture)
+
+    assert yield_deliveries == auto_deliveries, (
+        f"[{fixture.name}] logical deliveries differ:\n  yield:     {yield_deliveries}\n  auto_send: {auto_deliveries}"
+    )
+    assert yield_spans == auto_spans, (
+        f"[{fixture.name}] span signals differ:\n  yield:     {yield_spans}\n  auto_send: {auto_spans}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Backward-compatible determinism test (kept for regression coverage)
+# ---------------------------------------------------------------------------
+
+
+def test_span_derivation_is_deterministic() -> None:
+    """Span derivation over the same event list is idempotent, for EVERY
+    registered fixture across all harnesses.
+
+    ``all_fixtures()`` is read at run time (not at collection/parametrize time)
+    so it sees fixtures registered by every conformance module, regardless of
+    import/collection order. The per-harness conformance modules are imported
+    eagerly via ``conftest.py`` in this directory, so this test covers the full
+    cross-harness fixture set even when run in isolation. (Parametrizing on
+    ``all_fixtures()`` at import time would freeze the set to whatever happened
+    to be registered before this module was collected.)
+
+    Retained as a lightweight regression guard. The primary cross-channel
+    guarantee is asserted in test_cross_channel_equivalence above.
+    """
+    fixtures = all_fixtures()
+    assert len(fixtures) > len(_FIXTURES), (
+        "expected per-harness fixtures to be registered in addition to the "
+        f"{len(_FIXTURES)} generic ones; got {len(fixtures)} total — a conformance "
+        "module's fixtures are not being registered (check conftest imports)"
+    )
+    for fixture in fixtures:
+        assert derive_all(fixture.events) == derive_all(fixture.events), (
+            f"[{fixture.name}] span derivation is not deterministic"
+        )
diff --git a/tests/lib/core/harness/conformance/test_langgraph_conformance.py b/tests/lib/core/harness/conformance/test_langgraph_conformance.py
new file mode 100644
index 000000000..a8d43aef6
--- /dev/null
+++ b/tests/lib/core/harness/conformance/test_langgraph_conformance.py
@@ -0,0 +1,218 @@
+"""Cross-channel conformance fixtures for LangGraph harness tap.
+
+Each fixture is built as a canonical sequence of ``StreamTaskMessage*`` events
+that matches what ``convert_langgraph_to_agentex_events`` (via ``LangGraphTurn``)
+emits for the given scenario.  The fixtures are registered with the shared
+conformance runner and exercised by both the cross-channel equivalence test
+(yield_events vs auto_send) and the backward-compatible span-derivation test.
+
+LangGraph-specific note
+-----------------------
+LangGraph emits tool *requests* as ``StreamTaskMessageFull`` events (from the
+"updates" stream), NOT as Start+Delta+Done like pydantic-ai.  ``auto_send``
+handles Full events by opening a streaming context with the full content and
+closing it immediately, so both channels deliver the same logical payload.
+No ``coalesce_tool_requests`` option is needed.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agentex.types.text_content import TextContent
+from agentex.types.reasoning_content import ReasoningContent
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.types.reasoning_content_delta import ReasoningContentDelta
+
+from .runner import Fixture, register, run_cross_channel_conformance
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+_TEXT_ONLY = Fixture(
+    name="langgraph-text-only",
+    events=[
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=TextDelta(type="text", text_delta="Hello from LangGraph!"),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+    ],
+)
+
+_SINGLE_TOOL = Fixture(
+    name="langgraph-single-tool",
+    events=[
+        # LangGraph tool request is a Full event (from "updates" stream)
+        StreamTaskMessageFull(
+            type="full",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="call_1",
+                name="get_weather",
+                arguments={"city": "Paris"},
+            ),
+        ),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolResponseContent(
+                type="tool_response",
+                author="agent",
+                tool_call_id="call_1",
+                name="get_weather",
+                content="Sunny, 72F",
+            ),
+        ),
+        StreamTaskMessageStart(
+            type="start",
+            index=2,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=2,
+            delta=TextDelta(type="text", text_delta="The weather in Paris is sunny, 72F."),
+        ),
+        StreamTaskMessageDone(type="done", index=2),
+    ],
+)
+
+_REASONING = Fixture(
+    name="langgraph-reasoning",
+    events=[
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=ReasoningContent(
+                type="reasoning",
+                author="agent",
+                summary=[],
+                content=[],
+                style="active",
+            ),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=ReasoningContentDelta(
+                type="reasoning_content",
+                content_index=0,
+                content_delta="Thinking about this...",
+            ),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageStart(
+            type="start",
+            index=1,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=1,
+            delta=TextDelta(type="text", text_delta="The answer is 42."),
+        ),
+        StreamTaskMessageDone(type="done", index=1),
+    ],
+)
+
+_MULTI_STEP = Fixture(
+    name="langgraph-multi-step",
+    events=[
+        # Turn 1: streaming text
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=TextDelta(type="text", text_delta="Let me search for that."),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        # Tool request (Full — from "updates" stream)
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="call_2",
+                name="search",
+                arguments={"query": "langgraph"},
+            ),
+        ),
+        StreamTaskMessageFull(
+            type="full",
+            index=2,
+            content=ToolResponseContent(
+                type="tool_response",
+                author="agent",
+                tool_call_id="call_2",
+                name="search",
+                content="LangGraph is a framework for...",
+            ),
+        ),
+        # Turn 2: final streaming text
+        StreamTaskMessageStart(
+            type="start",
+            index=3,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=3,
+            delta=TextDelta(type="text", text_delta="Based on my research, LangGraph is..."),
+        ),
+        StreamTaskMessageDone(type="done", index=3),
+    ],
+)
+
+_LANGGRAPH_FIXTURES = [_TEXT_ONLY, _SINGLE_TOOL, _REASONING, _MULTI_STEP]
+
+for _fixture in _LANGGRAPH_FIXTURES:
+    register(_fixture)
+
+
+# ---------------------------------------------------------------------------
+# Cross-channel conformance: logical equivalence + span equivalence
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("fixture", _LANGGRAPH_FIXTURES, ids=lambda f: f.name)
+@pytest.mark.asyncio
+async def test_cross_channel_equivalence(fixture: Fixture) -> None:
+    """Assert that yield_events and auto_send produce equivalent logical
+    deliveries and identical span signals for each LangGraph fixture.
+
+    See runner.py for the full contract.  The key LangGraph difference: tool
+    requests arrive as Full events rather than Start+Delta+Done, so auto_send
+    handles them by opening a streaming context with the full content and
+    closing it immediately — both channels produce the same LogicalDelivery.
+    """
+    yield_deliveries, auto_deliveries, yield_spans, auto_spans = await run_cross_channel_conformance(fixture)
+
+    assert yield_deliveries == auto_deliveries, (
+        f"[{fixture.name}] logical deliveries differ:\n  yield:     {yield_deliveries}\n  auto_send: {auto_deliveries}"
+    )
+    assert yield_spans == auto_spans, (
+        f"[{fixture.name}] span signals differ:\n  yield:     {yield_spans}\n  auto_send: {auto_spans}"
+    )
diff --git a/tests/lib/core/harness/conformance/test_openai_conformance.py b/tests/lib/core/harness/conformance/test_openai_conformance.py
new file mode 100644
index 000000000..e8630ca7f
--- /dev/null
+++ b/tests/lib/core/harness/conformance/test_openai_conformance.py
@@ -0,0 +1,206 @@
+"""OpenAI conformance fixtures for the shared harness span-derivation engine.
+
+The cross-channel guarantee is that yield-delivery and auto_send observe the
+SAME canonical StreamTaskMessage* stream, so span derivation and logical
+delivery over that stream must be equivalent regardless of channel. These
+fixtures express the canonical sequences an OpenAI turn produces (text,
+tool-call, reasoning, and a combined multi-step turn) and assert that property
+via run_cross_channel_conformance.
+
+Registry hazard (see conformance/runner.py): _REGISTRY is process-global and
+collection order across modules is not guaranteed. To stay deterministic this
+module keeps its OWN fixture list and parametrizes over THAT list, rather than
+over all_fixtures(). It still calls register() so the cross-module conformance
+suite can see these fixtures too.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agentex.types.text_delta import TextDelta
+from agentex.types.text_content import TextContent
+from agentex.types.reasoning_content import ReasoningContent
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.types.reasoning_content_delta import ReasoningContentDelta
+
+from .runner import Fixture, register, run_cross_channel_conformance
+
+_OPENAI_FIXTURES: list[Fixture] = []
+
+
+def _add(fixture: Fixture) -> None:
+    """Register both module-locally (for parametrization) and globally."""
+    _OPENAI_FIXTURES.append(fixture)
+    register(fixture)
+
+
+# Text-only turn: start -> deltas -> done.
+# Uses non-empty initial_content so payload comparison catches a channel that
+# drops StreamTaskMessageStart.content.
+_add(
+    Fixture(
+        name="openai-text-only",
+        events=[
+            StreamTaskMessageStart(
+                type="start",
+                index=0,
+                content=TextContent(type="text", author="agent", content="Init"),
+            ),
+            StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="Hel")),
+            StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="lo")),
+            StreamTaskMessageDone(type="done", index=0),
+        ],
+    )
+)
+
+# Tool-call turn: Full(ToolRequestContent) for the call + Full(ToolResponseContent)
+# for the result, matched by tool_call_id. Mirrors the OpenAI converter's tool path.
+_add(
+    Fixture(
+        name="openai-tool-call",
+        events=[
+            StreamTaskMessageFull(
+                type="full",
+                index=0,
+                content=ToolRequestContent(
+                    type="tool_request",
+                    author="agent",
+                    tool_call_id="call_1",
+                    name="get_weather",
+                    arguments={"city": "SF"},
+                ),
+            ),
+            StreamTaskMessageFull(
+                type="full",
+                index=1,
+                content=ToolResponseContent(
+                    type="tool_response",
+                    author="agent",
+                    tool_call_id="call_1",
+                    name="get_weather",
+                    content="72F",
+                ),
+            ),
+        ],
+    )
+)
+
+# Reasoning turn: start(ReasoningContent) -> content deltas -> done.
+# ReasoningContent.summary is seeded in the payload so a channel that drops the
+# summary fails the cross-channel comparison.
+_add(
+    Fixture(
+        name="openai-reasoning",
+        events=[
+            StreamTaskMessageStart(
+                type="start",
+                index=0,
+                content=ReasoningContent(
+                    type="reasoning",
+                    author="agent",
+                    summary=["Thinking..."],
+                ),
+            ),
+            StreamTaskMessageDelta(
+                type="delta",
+                index=0,
+                delta=ReasoningContentDelta(
+                    type="reasoning_content",
+                    content_index=0,
+                    content_delta="step 1",
+                ),
+            ),
+            StreamTaskMessageDone(type="done", index=0),
+        ],
+    )
+)
+
+# Multi-step turn: reasoning, then a tool round, then the final answer text.
+_add(
+    Fixture(
+        name="openai-multi-step",
+        events=[
+            StreamTaskMessageStart(
+                type="start",
+                index=0,
+                content=ReasoningContent(
+                    type="reasoning",
+                    author="agent",
+                    summary=["plan"],
+                ),
+            ),
+            StreamTaskMessageDelta(
+                type="delta",
+                index=0,
+                delta=ReasoningContentDelta(
+                    type="reasoning_content",
+                    content_index=0,
+                    content_delta="elaboration",
+                ),
+            ),
+            StreamTaskMessageDone(type="done", index=0),
+            StreamTaskMessageFull(
+                type="full",
+                index=1,
+                content=ToolRequestContent(
+                    type="tool_request",
+                    author="agent",
+                    tool_call_id="call_2",
+                    name="search",
+                    arguments={"q": "x"},
+                ),
+            ),
+            StreamTaskMessageFull(
+                type="full",
+                index=2,
+                content=ToolResponseContent(
+                    type="tool_response",
+                    author="agent",
+                    tool_call_id="call_2",
+                    name="search",
+                    content="result",
+                ),
+            ),
+            StreamTaskMessageStart(
+                type="start",
+                index=3,
+                content=TextContent(type="text", author="agent", content=""),
+            ),
+            StreamTaskMessageDelta(type="delta", index=3, delta=TextDelta(type="text", text_delta="done")),
+            StreamTaskMessageDone(type="done", index=3),
+        ],
+    )
+)
+
+
+@pytest.mark.parametrize("fixture", _OPENAI_FIXTURES, ids=lambda f: f.name)
+@pytest.mark.asyncio
+async def test_openai_cross_channel_equivalence(fixture: Fixture) -> None:
+    """Assert that yield_events and auto_send produce equivalent logical
+    deliveries and identical span signals for every OpenAI fixture.
+
+    This is the cross-channel guarantee: the two delivery adapters agree on
+    WHAT was delivered (logical content) and HOW spans were derived, even
+    though their streaming-envelope shapes differ (Full vs Start+Done for tool
+    messages).
+
+    The span signals are the ones each channel's tracer ACTUALLY recorded while
+    delivering, not a re-derivation, so a regression where one channel skips
+    deriver.observe() for some event type is caught here.
+    """
+    yield_deliveries, auto_deliveries, yield_spans, auto_spans = await run_cross_channel_conformance(fixture)
+
+    assert yield_deliveries == auto_deliveries, (
+        f"[{fixture.name}] logical deliveries differ:\n  yield:     {yield_deliveries}\n  auto_send: {auto_deliveries}"
+    )
+    assert yield_spans == auto_spans, (
+        f"[{fixture.name}] span signals differ:\n  yield:     {yield_spans}\n  auto_send: {auto_spans}"
+    )
diff --git a/tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py b/tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py
new file mode 100644
index 000000000..5d9952334
--- /dev/null
+++ b/tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py
@@ -0,0 +1,187 @@
+"""Cross-channel conformance fixtures derived from real pydantic-ai event sequences.
+
+Each fixture is built by running a pydantic_ai event stream through PydanticAITurn
+and collecting the canonical StreamTaskMessage* output. These canonical event lists are
+then registered with the conformance runner and exercised by the cross-channel test
+(yield_events vs auto_send).
+
+Streamed tool requests
+----------------------
+The pydantic-ai stream emits a tool REQUEST as Start + ToolRequestDelta + Done (not a
+Full event). Both the conformance runner and auto_send deliver the
+Start+Delta+Done(tool_request) shape, so the cross-channel test asserts full
+delivery-equivalence for streamed tool requests. The fixtures below retain the
+ToolRequestDelta events as the streamed tool-request inputs.
+"""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+
+import pytest
+from pydantic_ai.messages import (
+    TextPart,
+    PartEndEvent,
+    ThinkingPart,
+    ToolCallPart,
+    TextPartDelta,
+    PartDeltaEvent,
+    PartStartEvent,
+    ToolReturnPart,
+    ThinkingPartDelta,
+    ToolCallPartDelta,
+    FunctionToolResultEvent,
+)
+
+from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
+
+from .runner import (
+    Fixture,
+    register,
+    run_pure_async,
+    run_cross_channel_conformance,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+async def _aiter(events: list[Any]) -> AsyncIterator[Any]:
+    for e in events:
+        yield e
+
+
+async def _canonical(pydantic_events: list[Any]) -> list[Any]:
+    """Run pydantic_ai events through PydanticAITurn and collect the output.
+
+    The output equals the bare convert_pydantic_ai_to_agentex_events output.
+    """
+    turn = PydanticAITurn(_aiter(pydantic_events), model=None)
+    return [e async for e in turn.events]
+
+
+def _build_fixtures() -> list[Fixture]:
+    """Build all pydantic-ai conformance fixtures synchronously at import time.
+
+    Uses the loop-free ``run_pure_async`` driver rather than ``asyncio.run()``,
+    which would raise under an already-running loop (programmatic pytest,
+    notebooks) since this runs during module import.
+    """
+
+    # ------------------------------------------------------------------ #
+    # 1. Text-only run: simple streaming text response.
+    # ------------------------------------------------------------------ #
+    text_only_pydantic = [
+        PartStartEvent(index=0, part=TextPart(content="")),
+        PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="Hello, ")),
+        PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="world!")),
+        PartEndEvent(index=0, part=TextPart(content="Hello, world!")),
+    ]
+
+    # ------------------------------------------------------------------ #
+    # 2. Single tool call + tool response.
+    # The canonical stream emits Start+ToolRequestDelta+Done for the request
+    # and Full(ToolResponseContent) for the response. Both are asserted
+    # delivery-equivalent cross-channel (see the module docstring).
+    # ------------------------------------------------------------------ #
+    tool_call_pydantic = [
+        PartStartEvent(
+            index=0,
+            part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="call_01"),
+        ),
+        PartDeltaEvent(
+            index=0,
+            delta=ToolCallPartDelta(args_delta='{"city":"Paris"}', tool_call_id="call_01"),
+        ),
+        PartEndEvent(
+            index=0,
+            part=ToolCallPart(tool_name="get_weather", args='{"city":"Paris"}', tool_call_id="call_01"),
+        ),
+        FunctionToolResultEvent(
+            part=ToolReturnPart(tool_name="get_weather", content="Sunny, 22C", tool_call_id="call_01"),
+        ),
+    ]
+
+    # ------------------------------------------------------------------ #
+    # 3. Reasoning/thinking block: produces ReasoningContent Start+Delta+Done.
+    # ------------------------------------------------------------------ #
+    reasoning_pydantic = [
+        PartStartEvent(index=0, part=ThinkingPart(content="")),
+        PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta="First, let me think...")),
+        PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta=" Then conclude.")),
+        PartEndEvent(index=0, part=ThinkingPart(content="First, let me think... Then conclude.")),
+    ]
+
+    # ------------------------------------------------------------------ #
+    # 4. Multi-step run: text -> tool call + response -> text.
+    # Pydantic AI restarts part indices at 0 for each model response; the
+    # converter assigns globally-monotonic indices to Agentex messages.
+    # ------------------------------------------------------------------ #
+    multi_step_pydantic = [
+        # First model turn: text then tool call
+        PartStartEvent(index=0, part=TextPart(content="")),
+        PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="Let me check the weather.")),
+        PartEndEvent(index=0, part=TextPart(content="Let me check the weather.")),
+        PartStartEvent(
+            index=1,
+            part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="call_ms1"),
+        ),
+        PartDeltaEvent(
+            index=1,
+            delta=ToolCallPartDelta(args_delta='{"city":"London"}', tool_call_id="call_ms1"),
+        ),
+        PartEndEvent(
+            index=1,
+            part=ToolCallPart(tool_name="get_weather", args='{"city":"London"}', tool_call_id="call_ms1"),
+        ),
+        FunctionToolResultEvent(
+            part=ToolReturnPart(tool_name="get_weather", content="Cloudy, 15C", tool_call_id="call_ms1"),
+        ),
+        # Second model turn: text response (pydantic restarts index at 0)
+        PartStartEvent(index=0, part=TextPart(content="")),
+        PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="It's cloudy and 15C in London.")),
+        PartEndEvent(index=0, part=TextPart(content="It's cloudy and 15C in London.")),
+    ]
+
+    text_only_events = run_pure_async(_canonical(text_only_pydantic))
+    tool_call_events = run_pure_async(_canonical(tool_call_pydantic))
+    reasoning_events = run_pure_async(_canonical(reasoning_pydantic))
+    multi_step_events = run_pure_async(_canonical(multi_step_pydantic))
+
+    return [
+        Fixture(name="pydantic-ai-text-only", events=text_only_events),
+        Fixture(name="pydantic-ai-single-tool-call", events=tool_call_events),
+        Fixture(name="pydantic-ai-reasoning-block", events=reasoning_events),
+        Fixture(name="pydantic-ai-multi-step", events=multi_step_events),
+    ]
+
+
+_FIXTURES: list[Fixture] = _build_fixtures()
+
+for _f in _FIXTURES:
+    register(_f)
+
+
+# ---------------------------------------------------------------------------
+# Cross-channel conformance: logical equivalence + span equivalence
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("fixture", _FIXTURES, ids=lambda f: f.name)
+@pytest.mark.asyncio
+async def test_cross_channel_equivalence(fixture: Fixture) -> None:
+    """Assert that yield_events and auto_send produce equivalent logical
+    deliveries and identical span signals for each pydantic-ai fixture.
+
+    See runner.py for the full contract, including streamed-tool-request
+    delivery equivalence.
+    """
+    yield_deliveries, auto_deliveries, yield_spans, auto_spans = await run_cross_channel_conformance(fixture)
+
+    assert yield_deliveries == auto_deliveries, (
+        f"[{fixture.name}] logical deliveries differ:\n  yield:     {yield_deliveries}\n  auto_send: {auto_deliveries}"
+    )
+    assert yield_spans == auto_spans, (
+        f"[{fixture.name}] span signals differ:\n  yield:     {yield_spans}\n  auto_send: {auto_spans}"
+    )
diff --git a/tests/lib/core/harness/test_auto_send.py b/tests/lib/core/harness/test_auto_send.py
new file mode 100644
index 000000000..764dae8b3
--- /dev/null
+++ b/tests/lib/core/harness/test_auto_send.py
@@ -0,0 +1,479 @@
+"""Tests for auto_send delivery adapter.
+
+The fake mirrors the real StreamingTaskMessageContext API exactly:
+- streaming_task_message_context(...) returns a context object (synchronously)
+- open the context via __aenter__ (returns self after creating the task message)
+- stream deltas via ctx.stream_update(StreamTaskMessageDelta(...))
+- close via ctx.close() (NOT __aexit__)
+
+This mirrors _langgraph_async.py lines 62-78 and 100-127.
+"""
+
+from datetime import datetime
+
+import pytest
+
+from agentex.types.task_message import TaskMessage
+from agentex.types.text_content import TextContent
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.tool_request_delta import ToolRequestDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.lib.core.harness.auto_send import auto_send
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+
+from ._fakes import FakeTracing
+
+
+class _FakeCtx:
+    """Mirrors StreamingTaskMessageContext: __aenter__ opens (returns self with task_message set),
+    close() closes. stream_update records the call.
+
+    task_message is a real TaskMessage instance so that auto_send can use it
+    as parent_task_message in StreamTaskMessageDelta without Pydantic validation errors.
+    """
+
+    def __init__(self, sink, content_type, initial_content):
+        self.sink = sink
+        self.content_type = content_type
+        # Real TaskMessage so StreamTaskMessageDelta(parent_task_message=...) passes validation
+        self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content)
+
+    async def __aenter__(self):
+        self.sink.append(("open", self.content_type))
+        return self
+
+    async def __aexit__(self, *a):
+        # __aexit__ delegates to close in the real impl; keep for safety
+        await self.close()
+        return False
+
+    async def close(self):
+        self.sink.append(("close", self.content_type))
+
+    async def stream_update(self, update):
+        self.sink.append(("update", update))
+        return update
+
+
+class _FakeStreaming:
+    """Mirrors StreamingService: streaming_task_message_context returns a context object."""
+
+    def __init__(self):
+        self.sink = []
+        self.recorded_created_at: list[datetime | None] = []
+
+    def streaming_task_message_context(self, task_id, initial_content, streaming_mode="coalesced", created_at=None):
+        ctype = getattr(initial_content, "type", None)
+        self.sink.append(("ctx", ctype))
+        self.recorded_created_at.append(created_at)
+        return _FakeCtx(self.sink, ctype, initial_content)
+
+
+async def _gen(events):
+    for e in events:
+        yield e
+
+
+# ---------------------------------------------------------------------------
+# Test 1: text streaming — open, stream deltas, close; return accumulated text
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_auto_send_streams_text_and_returns_final_text():
+    streaming = _FakeStreaming()
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=TextDelta(type="text", text_delta="Hel"),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=TextDelta(type="text", text_delta="lo"),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    result = await auto_send(_gen(events), task_id="task1", tracer=None, streaming=streaming)
+
+    assert result.final_text == "Hello"
+
+    kinds = [s[0] for s in streaming.sink]
+    # A context was created for the text content
+    assert kinds[0] == "ctx"
+    # It was opened and closed
+    assert "open" in kinds
+    assert "close" in kinds
+    # Exactly two updates were streamed (one per delta)
+    updates = [s for s in streaming.sink if s[0] == "update"]
+    assert len(updates) == 2
+
+
+# ---------------------------------------------------------------------------
+# Test 2: tool_request Full + tool_response Full — each posts one full message
+# (open context with the content, no deltas, close immediately)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_auto_send_posts_full_tool_messages():
+    streaming = _FakeStreaming()
+    events = [
+        # Two Full events post two messages (open+close immediately, no deltas).
+        StreamTaskMessageFull(
+            type="full",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="c1",
+                name="Bash",
+                arguments={"cmd": "ls"},
+            ),
+        ),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolResponseContent(
+                type="tool_response",
+                author="agent",
+                tool_call_id="c1",
+                name="Bash",
+                content="file.py",
+            ),
+        ),
+    ]
+    result = await auto_send(_gen(events), task_id="task1", tracer=None, streaming=streaming)
+
+    assert result.final_text == ""
+
+    # Each Full event opens and closes exactly one context.
+    ctx_events = [s for s in streaming.sink if s[0] == "ctx"]
+    assert len(ctx_events) == 2
+    content_types = [s[1] for s in ctx_events]
+    assert content_types == ["tool_request", "tool_response"]
+
+    # Each context is opened and closed
+    opens = [s for s in streaming.sink if s[0] == "open"]
+    closes = [s for s in streaming.sink if s[0] == "close"]
+    assert len(opens) == 2
+    assert len(closes) == 2
+
+    # No stream_update calls (full messages have no deltas)
+    updates = [s for s in streaming.sink if s[0] == "update"]
+    assert len(updates) == 0
+
+
+# ---------------------------------------------------------------------------
+# Test 3: tracing — spans are derived and handed to the tracer
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_auto_send_derives_tool_spans_via_tracer():
+    fake_tracing = FakeTracing()
+    tracer = SpanTracer(trace_id="t", parent_span_id="p", tracing=fake_tracing)
+    streaming = _FakeStreaming()
+
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="c1",
+                name="Bash",
+                arguments={},
+            ),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolResponseContent(
+                type="tool_response",
+                author="agent",
+                tool_call_id="c1",
+                name="Bash",
+                content="ok",
+            ),
+        ),
+    ]
+
+    result = await auto_send(_gen(events), task_id="task1", tracer=tracer, streaming=streaming)
+
+    assert result.final_text == ""
+    assert fake_tracing.started_names == ["Bash"]
+    assert fake_tracing.ended_outputs == ["ok"]
+
+
+# ---------------------------------------------------------------------------
+# Test 4: text followed by a tool Full — text context is closed before Full
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_auto_send_closes_text_context_before_full_message():
+    streaming = _FakeStreaming()
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=TextDelta(type="text", text_delta="Hi"),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="c2",
+                name="read_file",
+                arguments={},
+            ),
+        ),
+    ]
+    result = await auto_send(_gen(events), task_id="task1", tracer=None, streaming=streaming)
+    assert result.final_text == "Hi"
+
+    # Verify ordering: text ctx opens, updates, closes; then tool_request ctx opens, closes
+    event_sequence = [(s[0], s[1]) for s in streaming.sink]
+    text_open_idx = next(i for i, s in enumerate(event_sequence) if s == ("open", "text"))
+    text_close_idx = next(i for i, s in enumerate(event_sequence) if s == ("close", "text"))
+    tool_open_idx = next(i for i, s in enumerate(event_sequence) if s == ("open", "tool_request"))
+    assert text_open_idx < text_close_idx < tool_open_idx
+
+
+# ---------------------------------------------------------------------------
+# Test 5: midstream error — propagates AND the open context is closed (finally)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_open_context_closed_on_midstream_error():
+    streaming = _FakeStreaming()
+
+    async def _exploding_gen():
+        yield StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=TextContent(type="text", author="agent", content=""),
+        )
+        raise RuntimeError("boom")
+
+    with pytest.raises(RuntimeError, match="boom"):
+        await auto_send(_exploding_gen(), task_id="task1", tracer=None, streaming=streaming)
+
+    # The text context that was opened mid-stream was closed by the finally block.
+    assert ("open", "text") in [(s[0], s[1]) for s in streaming.sink]
+    assert ("close", "text") in [(s[0], s[1]) for s in streaming.sink]
+
+
+# ---------------------------------------------------------------------------
+# Test 6: streamed tool_request delivered
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_auto_send_streams_tool_request():
+    """A Start(ToolRequestContent) MUST open a streaming context."""
+    streaming = _FakeStreaming()
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="c_tool",
+                name="Bash",
+                arguments={},
+            ),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=ToolRequestDelta(
+                type="tool_request",
+                tool_call_id="c_tool",
+                name="Bash",
+                arguments_delta='{"cmd": "ls"}',
+            ),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    result = await auto_send(_gen(events), task_id="task1", tracer=None, streaming=streaming)
+
+    assert result.final_text == ""
+
+    ctx_events = [s for s in streaming.sink if s[0] == "ctx"]
+    assert len(ctx_events) == 1
+    assert ctx_events[0][1] == "tool_request"
+
+    opens = [s for s in streaming.sink if s[0] == "open"]
+    closes = [s for s in streaming.sink if s[0] == "close"]
+    assert len(opens) == 1
+    assert len(closes) == 1
+
+    updates = [s for s in streaming.sink if s[0] == "update"]
+    assert len(updates) == 1
+
+
+# ---------------------------------------------------------------------------
+# Test 7: interleaved indexes route correctly
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_auto_send_interleaved_indexes_route_correctly():
+    """Deltas must be routed to the correct index-keyed context."""
+    streaming = _FakeStreaming()
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageStart(
+            type="start",
+            index=1,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=TextDelta(type="text", text_delta="A"),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=1,
+            delta=TextDelta(type="text", text_delta="B"),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageDone(type="done", index=1),
+    ]
+    result = await auto_send(_gen(events), task_id="task1", tracer=None, streaming=streaming)
+
+    ctx_events = [s for s in streaming.sink if s[0] == "ctx"]
+    assert len(ctx_events) == 2
+
+    opens = [s for s in streaming.sink if s[0] == "open"]
+    assert len(opens) == 2
+
+    updates = [s for s in streaming.sink if s[0] == "update"]
+    assert len(updates) == 2
+
+    update_deltas = [s[1].delta for s in streaming.sink if s[0] == "update"]
+    text_deltas = [d.text_delta for d in update_deltas if isinstance(d, TextDelta)]
+    assert set(text_deltas) == {"A", "B"}
+
+
+# ---------------------------------------------------------------------------
+# Test 8: final_text returns last text segment for multi-step
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_auto_send_final_text_last_segment():
+    """final_text must be the LAST text segment, not accumulated across all turns."""
+    streaming = _FakeStreaming()
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=TextDelta(type="text", text_delta="First"),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageStart(
+            type="start",
+            index=1,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=1,
+            delta=TextDelta(type="text", text_delta="Second"),
+        ),
+        StreamTaskMessageDone(type="done", index=1),
+    ]
+    result = await auto_send(_gen(events), task_id="task1", tracer=None, streaming=streaming)
+    assert result.final_text == "Second"
+
+
+# ---------------------------------------------------------------------------
+# Test 9: Full(TextContent) contributes to final_text
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_auto_send_full_text_content_sets_final_text():
+    """A Full(TextContent) must contribute its text to final_text."""
+    streaming = _FakeStreaming()
+    events = [
+        StreamTaskMessageFull(
+            type="full",
+            index=0,
+            content=TextContent(type="text", author="agent", content="hello"),
+        ),
+    ]
+    result = await auto_send(_gen(events), task_id="task1", tracer=None, streaming=streaming)
+    assert result.final_text == "hello"
+
+
+# ---------------------------------------------------------------------------
+# Test 10: created_at is forwarded to streaming context
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_auto_send_created_at_forwarded():
+    """created_at must be forwarded to every streaming_task_message_context call."""
+    streaming = _FakeStreaming()
+    dt = datetime(2025, 1, 15, 12, 0, 0)
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="c_ts",
+                name="Bash",
+                arguments={},
+            ),
+        ),
+    ]
+    await auto_send(_gen(events), task_id="task1", tracer=None, streaming=streaming, created_at=dt)
+
+    assert all(ts == dt for ts in streaming.recorded_created_at)
diff --git a/tests/lib/core/harness/test_emitter.py b/tests/lib/core/harness/test_emitter.py
new file mode 100644
index 000000000..3f70660ec
--- /dev/null
+++ b/tests/lib/core/harness/test_emitter.py
@@ -0,0 +1,142 @@
+import pytest
+
+from agentex.types.task_message import TaskMessage
+from agentex.types.text_content import TextContent
+from agentex.lib.core.harness.types import TurnUsage
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+
+from ._fakes import FakeTracing
+
+
+class _FakeCtx:
+    """Minimal StreamingTaskMessageContext fake (see test_auto_send.py)."""
+
+    def __init__(self, sink, content_type, initial_content):
+        self.sink = sink
+        self.content_type = content_type
+        self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content)
+
+    async def __aenter__(self):
+        self.sink.append(("open", self.content_type))
+        return self
+
+    async def __aexit__(self, *a):
+        await self.close()
+        return False
+
+    async def close(self):
+        self.sink.append(("close", self.content_type))
+
+    async def stream_update(self, update):
+        self.sink.append(("update", update))
+        return update
+
+
+class _FakeStreaming:
+    def __init__(self):
+        self.sink = []
+
+    def streaming_task_message_context(self, task_id, initial_content, streaming_mode="coalesced", created_at=None):
+        ctype = getattr(initial_content, "type", None)
+        self.sink.append(("ctx", ctype))
+        return _FakeCtx(self.sink, ctype, initial_content)
+
+
+class _Turn:
+    def __init__(self, events_list, usage):
+        self._events_list = events_list
+        self._usage = usage
+
+    @property
+    async def events(self):
+        for e in self._events_list:
+            yield e
+
+    def usage(self):
+        return self._usage
+
+
+@pytest.mark.asyncio
+async def test_emitter_yield_mode_passes_through():
+    events = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="hi")),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    turn = _Turn(events, TurnUsage(model="m"))
+    emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+    out = [e async for e in emitter.yield_turn(turn)]
+    assert out == events
+
+
+@pytest.mark.asyncio
+async def test_emitter_tracing_default_on_when_trace_id_present():
+    # Inject a fake tracing backend so the test env doesn't need temporalio.
+    # This exercises the default-on path (tracer=None) when trace_id is truthy.
+    emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id="p", tracing=FakeTracing())
+    assert emitter.tracer is not None
+
+
+@pytest.mark.asyncio
+async def test_emitter_tracing_overridable_off():
+    emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id="p", tracer=False)
+    assert emitter.tracer is None
+
+
+@pytest.mark.asyncio
+async def test_emitter_auto_send_turn_returns_usage():
+    usage = TurnUsage(model="m", input_tokens=5)
+    events = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="")),
+        StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="Hello")),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    turn = _Turn(events, usage)
+    fake = _FakeStreaming()
+    emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None, streaming=fake)
+    result = await emitter.auto_send_turn(turn)
+    assert result.usage == usage
+    assert result.final_text == "Hello"
+
+
+class _ContractTurn:
+    """A turn that honors the single-pass contract: usage() is the empty default
+    UNTIL `events` is exhausted, then the real usage (this is how real harness
+    turns behave — they populate usage while the stream is consumed)."""
+
+    def __init__(self, events_list, real_usage):
+        self._events_list = events_list
+        self._real_usage = real_usage
+        self._exhausted = False
+
+    @property
+    async def events(self):
+        for e in self._events_list:
+            yield e
+        self._exhausted = True
+
+    def usage(self):
+        return self._real_usage if self._exhausted else TurnUsage(model="m")
+
+
+@pytest.mark.asyncio
+async def test_emitter_auto_send_turn_reads_usage_after_exhaustion():
+    # Regression: auto_send_turn must read turn.usage() AFTER consuming the
+    # stream, not eagerly when building the auto_send call (which would capture
+    # the empty default and lose real token usage on the auto_send path).
+    real_usage = TurnUsage(model="m", input_tokens=11, output_tokens=22, total_tokens=33, num_llm_calls=2)
+    events = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="")),
+        StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="hi")),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    turn = _ContractTurn(events, real_usage)
+    emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None, streaming=_FakeStreaming())
+    result = await emitter.auto_send_turn(turn)
+    assert result.usage == real_usage
+    assert result.usage.input_tokens == 11 and result.usage.total_tokens == 33
diff --git a/tests/lib/core/harness/test_harness_langgraph_async.py b/tests/lib/core/harness/test_harness_langgraph_async.py
new file mode 100644
index 000000000..39bf5bc66
--- /dev/null
+++ b/tests/lib/core/harness/test_harness_langgraph_async.py
@@ -0,0 +1,298 @@
+"""Integration test: async (Redis-streaming) channel with a LangGraph agent.
+
+Exercises the unified harness surface (UnifiedEmitter.auto_send_turn + LangGraphTurn)
+with a minimal fake LangGraph stream so the test runs fully offline (no API
+keys, no Redis, no Agentex server).
+
+Agent description
+-----------------
+A simulated single-tool agent run using hand-crafted LangGraph event tuples:
+one tool request + response, followed by a final text reply.
+
+What is tested
+--------------
+- The async handler pushes the correct sequence of messages to the fake streaming
+  backend: Full(ToolRequest) + Full(ToolResponse) + text Start/Delta/Done.
+- final_text accumulates all text (not just last segment — AGX1-377 unified behavior).
+- Tool messages go through streaming_task_message_context (not messages.create).
+- With a SpanTracer, no tool spans are produced (AGX1-377: Full events are not
+  handled by SpanDeriver today).
+
+What is NOT covered without live infrastructure
+-----------------------------------------------
+- Actual Redis streaming (requires a running Redis instance).
+- The ACP on_task_event_send / on_task_create / on_task_cancel lifecycle.
+- Real LLM calls or real LangGraph graph execution.
+- The full FastACP async request lifecycle.
+
+See also: test_harness_langgraph_sync.py and test_harness_langgraph_temporal.py
+for the other two channels.
+"""
+
+from __future__ import annotations
+
+import sys
+from typing import Any
+from dataclasses import field, dataclass
+
+import pytest
+
+from agentex.types.task_message import TaskMessage
+from agentex.types.text_content import TextContent
+from agentex.lib.core.harness.types import TurnResult
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn
+
+# ---------------------------------------------------------------------------
+# Remove conftest stubs so real langchain_core types are used
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _real_langchain_core():
+    stub_keys = [k for k in sys.modules if k.startswith("langchain_core") or k.startswith("langgraph")]
+    saved = {k: sys.modules.pop(k) for k in stub_keys}
+    import importlib
+
+    importlib.import_module("langchain_core.messages")
+    yield
+    sys.modules.update(saved)
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming backend (replaces adk.streaming; no Redis required)
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class _FakeCtx:
+    ctype: str
+    initial_content: Any
+    task_message: TaskMessage
+    closed: bool = False
+    deltas: list[Any] = field(default_factory=list)
+
+    async def __aenter__(self) -> "_FakeCtx":
+        return self
+
+    async def __aexit__(self, *args: Any) -> bool:
+        await self.close()
+        return False
+
+    async def close(self) -> None:
+        self.closed = True
+
+    async def stream_update(self, update: Any) -> Any:
+        self.deltas.append(update)
+        return update
+
+
+class _FakeStreaming:
+    def __init__(self) -> None:
+        self.contexts: list[_FakeCtx] = []
+
+    def streaming_task_message_context(self, task_id: str, initial_content: Any, **kw: Any) -> _FakeCtx:
+        ctype = getattr(initial_content, "type", None) or ""
+        tm = TaskMessage(id=f"m{len(self.contexts) + 1}", task_id=task_id, content=initial_content)
+        ctx = _FakeCtx(ctype=ctype, initial_content=initial_content, task_message=tm)
+        self.contexts.append(ctx)
+        return ctx
+
+
+# ---------------------------------------------------------------------------
+# Fake tracing backend
+# ---------------------------------------------------------------------------
+
+
+class _FakeSpan:
+    def __init__(self, name: str) -> None:
+        self.name = name
+        self.output: Any = None
+
+
+class _FakeTracing:
+    def __init__(self) -> None:
+        self.started: list[tuple[str, Any]] = []
+        self.ended: list[tuple[str, Any]] = []
+
+    async def start_span(self, *, trace_id: str, name: str, **kw: Any) -> _FakeSpan:
+        self.started.append((name, kw.get("parent_id")))
+        return _FakeSpan(name)
+
+    async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None:
+        self.ended.append((span.name, span.output))
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_stream(events: list[tuple[str, Any]]):
+    async def _gen():
+        for e in events:
+            yield e
+
+    return _gen()
+
+
+async def _run_auto_send_turn(
+    stream_events: list[tuple[str, Any]],
+    trace_id: str | None = None,
+) -> tuple[TurnResult, _FakeStreaming, _FakeTracing | None]:
+    fake_streaming = _FakeStreaming()
+    fake_tracing = _FakeTracing() if trace_id else None
+
+    tracer: SpanTracer | bool = False
+    if trace_id and fake_tracing is not None:
+        tracer = SpanTracer(trace_id=trace_id, parent_span_id=None, task_id="task1", tracing=fake_tracing)
+
+    turn = LangGraphTurn(_make_stream(stream_events), model=None)
+    emitter = UnifiedEmitter(
+        task_id="task1",
+        trace_id=trace_id,
+        parent_span_id=None,
+        tracer=tracer,
+        streaming=fake_streaming,
+    )
+    result = await emitter.auto_send_turn(turn)
+    return result, fake_streaming, fake_tracing
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestAsyncAutoSendChannel:
+    async def test_text_only_streams_text_and_returns_final(self):
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        chunk = AIMessageChunk(content="Hello from LangGraph!")
+        ai_msg = AIMessage(content="Hello from LangGraph!")
+        events = [
+            ("messages", (chunk, {})),
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+        ]
+        result, fake_streaming, _ = await _run_auto_send_turn(events)
+
+        assert result.final_text == "Hello from LangGraph!"
+        text_ctxs = [c for c in fake_streaming.contexts if c.ctype == "text"]
+        assert len(text_ctxs) == 1
+        assert text_ctxs[0].closed is True
+
+    async def test_tool_call_posted_via_streaming_context(self):
+        from langchain_core.messages import AIMessage
+
+        tc = {"id": "call_1", "name": "get_weather", "args": {"city": "Paris"}}
+        ai_msg = AIMessage(content="", tool_calls=[tc])
+        events = [("updates", {"agent": {"messages": [ai_msg]}})]
+
+        result, fake_streaming, _ = await _run_auto_send_turn(events)
+
+        # Tool request via streaming_task_message_context (Full event)
+        tool_req_ctxs = [c for c in fake_streaming.contexts if isinstance(c.initial_content, ToolRequestContent)]
+        assert len(tool_req_ctxs) == 1
+        assert tool_req_ctxs[0].initial_content.tool_call_id == "call_1"
+        assert tool_req_ctxs[0].closed is True
+        assert tool_req_ctxs[0].deltas == [], "Full messages have no deltas"
+
+    async def test_tool_response_posted_via_streaming_context(self):
+        from langchain_core.messages import ToolMessage
+
+        tool_msg = ToolMessage(content="Sunny, 72F", tool_call_id="call_1", name="get_weather")
+        events = [("updates", {"tools": {"messages": [tool_msg]}})]
+
+        _, fake_streaming, _ = await _run_auto_send_turn(events)
+
+        tool_resp_ctxs = [c for c in fake_streaming.contexts if isinstance(c.initial_content, ToolResponseContent)]
+        assert len(tool_resp_ctxs) == 1
+        assert tool_resp_ctxs[0].initial_content.content == "Sunny, 72F"
+        assert tool_resp_ctxs[0].closed is True
+
+    async def test_multi_step_final_text_is_last_segment(self):
+        """Unified surface: final_text uses last-segment semantics.
+
+        auto_send resets final_text_parts when a new Start(TextContent) is seen,
+        so multi-step turns (text -> tool -> text) return only the LAST text segment.
+        This matches the behaviour documented in auto_send.py and mirrors
+        stream_pydantic_ai_events.
+        """
+        from langchain_core.messages import AIMessage, ToolMessage, AIMessageChunk
+
+        chunk1 = AIMessageChunk(content="Searching...")
+        ai_msg1 = AIMessage(content="Searching...", tool_calls=[{"id": "c1", "name": "s", "args": {}}])
+        tool_msg = ToolMessage(content="results", tool_call_id="c1", name="s")
+        chunk2 = AIMessageChunk(content="Found it!")
+        ai_msg2 = AIMessage(content="Found it!")
+
+        events = [
+            ("messages", (chunk1, {})),
+            ("updates", {"agent": {"messages": [ai_msg1]}}),
+            ("updates", {"tools": {"messages": [tool_msg]}}),
+            ("messages", (chunk2, {})),
+            ("updates", {"agent": {"messages": [ai_msg2]}}),
+        ]
+        result, fake_streaming, _ = await _run_auto_send_turn(events)
+
+        # Last segment only — first text segment is NOT in final_text
+        assert result.final_text == "Found it!"
+
+        # Two text streaming contexts still opened (both streamed to Redis)
+        text_ctxs = [c for c in fake_streaming.contexts if isinstance(c.initial_content, TextContent)]
+        assert len(text_ctxs) == 2
+
+    async def test_empty_stream_returns_empty_final_text(self):
+        result, fake_streaming, _ = await _run_auto_send_turn([])
+        assert result.final_text == ""
+        assert fake_streaming.contexts == []
+
+    async def test_turn_usage_populated_after_events_consumed(self):
+        """LangGraphTurn.usage() is populated via the on_final_ai_message callback
+        during event iteration. TurnResult.usage is a snapshot from before events run
+        (emitter.auto_send_turn evaluates turn.usage() eagerly); the authoritative
+        post-iteration usage is on turn.usage() directly."""
+        from langchain_core.messages import AIMessage
+
+        fake_streaming = _FakeStreaming()
+        usage_meta = {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}
+        ai_msg = AIMessage(content="hi", usage_metadata=usage_meta)
+        events = [("updates", {"agent": {"messages": [ai_msg]}})]
+
+        turn = LangGraphTurn(_make_stream(events), model="gpt-4")
+        emitter = UnifiedEmitter(
+            task_id="task1", trace_id=None, parent_span_id=None, tracer=False, streaming=fake_streaming
+        )
+        await emitter.auto_send_turn(turn)
+
+        # After auto_send_turn, turn.usage() has the captured values
+        usage = turn.usage()
+        assert usage.input_tokens == 10
+        assert usage.output_tokens == 5
+        assert usage.total_tokens == 15
+
+    async def test_tracer_produces_tool_spans_for_full_events(self):
+        """AGX1-377: SpanDeriver now handles Full tool events (request opens, response closes).
+
+        Full(ToolRequestContent) opens a tool span; Full(ToolResponseContent) closes it.
+        This aligns LangGraph tracing with Start+Done harnesses (pydantic-ai, openai-agents).
+        """
+        from langchain_core.messages import AIMessage, ToolMessage
+
+        tc = {"id": "c1", "name": "t", "args": {}}
+        ai_msg = AIMessage(content="", tool_calls=[tc])
+        tool_msg = ToolMessage(content="ok", tool_call_id="c1", name="t")
+
+        events = [
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+            ("updates", {"tools": {"messages": [tool_msg]}}),
+        ]
+        _, _, fake_tracing = await _run_auto_send_turn(events, trace_id="trace-1")
+
+        assert fake_tracing is not None
+        assert len(fake_tracing.started) == 1, "Full(ToolRequestContent) opens one tool span"
+        assert fake_tracing.started[0][0] == "t", "span name matches the tool name"
+        assert len(fake_tracing.ended) == 1, "Full(ToolResponseContent) closes the span"
diff --git a/tests/lib/core/harness/test_harness_langgraph_sync.py b/tests/lib/core/harness/test_harness_langgraph_sync.py
new file mode 100644
index 000000000..9f67dd2b6
--- /dev/null
+++ b/tests/lib/core/harness/test_harness_langgraph_sync.py
@@ -0,0 +1,229 @@
+"""Integration test: sync (HTTP-yield) channel with a LangGraph agent.
+
+Exercises the unified harness surface (UnifiedEmitter.yield_turn + LangGraphTurn)
+with a minimal fake LangGraph stream so the test runs fully offline (no API
+keys, no Redis, no Agentex server).
+
+Agent description
+-----------------
+A simulated single-tool agent run using hand-crafted LangGraph event tuples:
+one tool request + response, followed by a final text reply.
+
+What is tested
+--------------
+- The sync handler correctly yields StreamTaskMessage* events in order:
+  Full(ToolRequest) then Full(ToolResponse) then text Start+Delta+Done.
+- With trace_id + fake tracing, the SpanDeriver fires for text events.
+- LangGraph emits tool calls as Full events (not Start+Done); the SpanDeriver
+  opens a tool span on Full(ToolRequestContent) and closes it on the matching
+  Full(ToolResponseContent) (see test_tracer_produces_tool_spans_for_full_events).
+- Final text is accumulated via yield mode.
+
+What is NOT covered without live infrastructure
+-----------------------------------------------
+- Actual HTTP streaming over the ACP sync endpoint.
+- Real LLM calls or real LangGraph graph execution.
+- The full FastACP request/response lifecycle.
+
+See also: test_harness_langgraph_async.py and test_harness_langgraph_temporal.py
+for the other two channels.
+"""
+
+from __future__ import annotations
+
+import sys
+from typing import Any
+
+import pytest
+
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.task_message_update import (
+    StreamTaskMessageFull,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn
+
+# ---------------------------------------------------------------------------
+# Remove conftest stubs so real langchain_core types are used
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _real_langchain_core():
+    stub_keys = [k for k in sys.modules if k.startswith("langchain_core") or k.startswith("langgraph")]
+    saved = {k: sys.modules.pop(k) for k in stub_keys}
+    import importlib
+
+    importlib.import_module("langchain_core.messages")
+    yield
+    sys.modules.update(saved)
+
+
+# ---------------------------------------------------------------------------
+# Fake tracing backend
+# ---------------------------------------------------------------------------
+
+
+class _FakeSpan:
+    def __init__(self, name: str) -> None:
+        self.name = name
+        self.output: Any = None
+
+
+class _FakeTracing:
+    def __init__(self) -> None:
+        self.started: list[tuple[str, Any]] = []
+        self.ended: list[tuple[str, Any]] = []
+
+    async def start_span(
+        self, *, trace_id: str, name: str, input: Any = None, parent_id: Any = None, **kw: Any
+    ) -> _FakeSpan:
+        self.started.append((name, parent_id))
+        return _FakeSpan(name)
+
+    async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None:
+        self.ended.append((span.name, span.output))
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_stream(events: list[tuple[str, Any]]):
+    async def _gen():
+        for e in events:
+            yield e
+
+    return _gen()
+
+
+async def _run_yield_turn(
+    stream_events: list[tuple[str, Any]], trace_id: str | None = None
+) -> tuple[list[Any], _FakeTracing | None]:
+    fake_tracing = _FakeTracing() if trace_id else None
+    tracer: SpanTracer | bool | None = None
+    if trace_id and fake_tracing is not None:
+        tracer = SpanTracer(trace_id=trace_id, parent_span_id=None, task_id="task1", tracing=fake_tracing)
+
+    emitter = UnifiedEmitter(
+        task_id="task1",
+        trace_id=trace_id,
+        parent_span_id=None,
+        tracer=tracer if tracer is not None else False,
+    )
+    turn = LangGraphTurn(_make_stream(stream_events), model=None)
+    out = [e async for e in emitter.yield_turn(turn)]
+    return out, fake_tracing
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestSyncYieldChannel:
+    async def test_text_only_stream_yields_start_delta_done(self):
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        chunk = AIMessageChunk(content="Hello from LangGraph!")
+        ai_msg = AIMessage(content="Hello from LangGraph!")
+        events = [
+            ("messages", (chunk, {})),
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+        ]
+        out, _ = await _run_yield_turn(events)
+
+        types = [type(e).__name__ for e in out]
+        assert "StreamTaskMessageStart" in types
+        assert "StreamTaskMessageDelta" in types
+        assert "StreamTaskMessageDone" in types
+
+    async def test_tool_call_yields_full_events(self):
+        from langchain_core.messages import AIMessage, ToolMessage
+
+        tc = {"id": "call_1", "name": "get_weather", "args": {"city": "Paris"}}
+        ai_msg = AIMessage(content="", tool_calls=[tc])
+        tool_msg = ToolMessage(content="Sunny, 72F", tool_call_id="call_1", name="get_weather")
+        events = [
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+            ("updates", {"tools": {"messages": [tool_msg]}}),
+        ]
+        out, _ = await _run_yield_turn(events)
+
+        full_events = [e for e in out if isinstance(e, StreamTaskMessageFull)]
+        assert len(full_events) == 2
+
+        contents = [e.content for e in full_events]
+        assert any(isinstance(c, ToolRequestContent) for c in contents)
+        assert any(isinstance(c, ToolResponseContent) for c in contents)
+
+    async def test_multi_step_yields_events_in_order(self):
+        from langchain_core.messages import AIMessage, ToolMessage, AIMessageChunk
+
+        chunk1 = AIMessageChunk(content="Searching...")
+        ai_msg1 = AIMessage(content="Searching...", tool_calls=[{"id": "c1", "name": "search", "args": {"q": "test"}}])
+        tool_msg = ToolMessage(content="results", tool_call_id="c1", name="search")
+        chunk2 = AIMessageChunk(content="Found it!")
+        ai_msg2 = AIMessage(content="Found it!")
+
+        events = [
+            ("messages", (chunk1, {})),
+            ("updates", {"agent": {"messages": [ai_msg1]}}),
+            ("updates", {"tools": {"messages": [tool_msg]}}),
+            ("messages", (chunk2, {})),
+            ("updates", {"agent": {"messages": [ai_msg2]}}),
+        ]
+        out, _ = await _run_yield_turn(events)
+
+        # Should have multiple start events (one per text segment)
+        starts = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        assert len(starts) >= 2
+        # And two Full events (tool req + tool resp)
+        fulls = [e for e in out if isinstance(e, StreamTaskMessageFull)]
+        assert len(fulls) == 2
+
+    async def test_empty_stream_yields_nothing(self):
+        out, _ = await _run_yield_turn([])
+        assert out == []
+
+    async def test_tracer_produces_tool_spans_for_full_events(self):
+        """AGX1-377: SpanDeriver now handles Full tool events (request opens, response closes).
+
+        Full(ToolRequestContent) opens a tool span; Full(ToolResponseContent) closes it.
+        This aligns LangGraph tracing with Start+Done harnesses (pydantic-ai, openai-agents).
+        """
+        from langchain_core.messages import AIMessage, ToolMessage
+
+        tc = {"id": "c1", "name": "t", "args": {}}
+        ai_msg = AIMessage(content="", tool_calls=[tc])
+        tool_msg = ToolMessage(content="ok", tool_call_id="c1", name="t")
+
+        events = [
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+            ("updates", {"tools": {"messages": [tool_msg]}}),
+        ]
+        _, fake_tracing = await _run_yield_turn(events, trace_id="trace-1")
+
+        assert fake_tracing is not None
+        assert len(fake_tracing.started) == 1, "Full(ToolRequestContent) opens one tool span"
+        assert fake_tracing.started[0][0] == "t", "span name matches the tool name"
+        assert len(fake_tracing.ended) == 1, "Full(ToolResponseContent) closes the span"
+
+    async def test_usage_captured_after_yield(self):
+        from langchain_core.messages import AIMessage
+
+        usage_meta = {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}
+        ai_msg = AIMessage(content="Hi!", usage_metadata=usage_meta)
+        events = [("updates", {"agent": {"messages": [ai_msg]}})]
+
+        turn = LangGraphTurn(_make_stream(events), model="gpt-4")
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+        _ = [e async for e in emitter.yield_turn(turn)]
+
+        usage = turn.usage()
+        assert usage.input_tokens == 10
+        assert usage.output_tokens == 5
diff --git a/tests/lib/core/harness/test_harness_langgraph_temporal.py b/tests/lib/core/harness/test_harness_langgraph_temporal.py
new file mode 100644
index 000000000..1a094a33c
--- /dev/null
+++ b/tests/lib/core/harness/test_harness_langgraph_temporal.py
@@ -0,0 +1,233 @@
+"""Integration test: Temporal channel with a LangGraph agent.
+
+The Temporal LangGraph agent pattern uses ``emit_langgraph_messages`` (from
+``_langgraph_messages.py``) inside a Temporal activity. That module is not
+yet unified onto the harness surface (it has its own Redis-streaming code).
+
+This test file verifies the LangGraph Temporal agent's streaming behavior using
+the same fake streaming infrastructure as test_harness_langgraph_async.py. The
+key difference from the non-temporal async path is that in Temporal, each agent
+turn runs inside a Temporal activity that has already been handed the task_id
+and a pre-wired streaming client — so the ``UnifiedEmitter.auto_send_turn``
+path is identical. The graph activities and workflow scaffolding are not tested
+here; that requires a running Temporal cluster.
+
+What is tested
+--------------
+- stream_langgraph_events (the public async API used by temporal agent acp.py via
+  the workflow activity) produces the same result via the unified surface.
+- Usage from AIMessage.usage_metadata is captured in TurnResult.usage.
+- The auto_send_turn path for a temporal-style call (same as async).
+
+What is NOT covered without live infrastructure
+-----------------------------------------------
+- Actual Temporal workflow execution (requires a running Temporal cluster).
+- The Temporal activity retry/compensation logic.
+- LangGraph checkpoint storage via TemporalCheckpointer.
+- emit_langgraph_messages (the Temporal-specific streaming helper).
+- Real LLM calls or real LangGraph graph execution.
+
+See also: test_harness_langgraph_sync.py and test_harness_langgraph_async.py.
+"""
+
+from __future__ import annotations
+
+import sys
+from typing import Any
+from dataclasses import field, dataclass
+
+import pytest
+
+from agentex.types.task_message import TaskMessage
+from agentex.types.text_content import TextContent
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn
+from agentex.lib.adk._modules._langgraph_async import stream_langgraph_events
+
+# ---------------------------------------------------------------------------
+# Remove conftest stubs so real langchain_core types are used
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _real_langchain_core():
+    stub_keys = [k for k in sys.modules if k.startswith("langchain_core") or k.startswith("langgraph")]
+    saved = {k: sys.modules.pop(k) for k in stub_keys}
+    import importlib
+
+    importlib.import_module("langchain_core.messages")
+    yield
+    sys.modules.update(saved)
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming backend
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class _FakeCtx:
+    ctype: str
+    initial_content: Any
+    task_message: TaskMessage
+    closed: bool = False
+    deltas: list[Any] = field(default_factory=list)
+
+    async def __aenter__(self) -> "_FakeCtx":
+        return self
+
+    async def __aexit__(self, *args: Any) -> bool:
+        await self.close()
+        return False
+
+    async def close(self) -> None:
+        self.closed = True
+
+    async def stream_update(self, update: Any) -> Any:
+        self.deltas.append(update)
+        return update
+
+
+class _FakeStreaming:
+    def __init__(self) -> None:
+        self.contexts: list[_FakeCtx] = []
+
+    def streaming_task_message_context(self, task_id: str, initial_content: Any, **kw: Any) -> _FakeCtx:
+        ctype = getattr(initial_content, "type", None) or ""
+        tm = TaskMessage(id=f"m{len(self.contexts) + 1}", task_id=task_id, content=initial_content)
+        ctx = _FakeCtx(ctype=ctype, initial_content=initial_content, task_message=tm)
+        self.contexts.append(ctx)
+        return ctx
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_stream(events: list[tuple[str, Any]]):
+    async def _gen():
+        for e in events:
+            yield e
+
+    return _gen()
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestTemporalAutoSendChannel:
+    async def test_stream_langgraph_events_plain_text(self, monkeypatch):
+        """stream_langgraph_events (used by temporal agents via the acp.py activity) returns
+        the accumulated final text."""
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        from agentex.lib import adk as adk_module
+
+        fake_streaming = _FakeStreaming()
+        monkeypatch.setattr(adk_module, "streaming", fake_streaming)
+
+        chunk = AIMessageChunk(content="Hello Temporal!")
+        ai_msg = AIMessage(content="Hello Temporal!")
+        events = [
+            ("messages", (chunk, {})),
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+        ]
+
+        final = await stream_langgraph_events(_make_stream(events), "task-1")
+        assert final == "Hello Temporal!"
+
+    async def test_stream_langgraph_events_tool_call(self, monkeypatch):
+        from langchain_core.messages import AIMessage, ToolMessage
+
+        from agentex.lib import adk as adk_module
+
+        fake_streaming = _FakeStreaming()
+        monkeypatch.setattr(adk_module, "streaming", fake_streaming)
+
+        tc = {"id": "c1", "name": "search", "args": {"q": "test"}}
+        ai_msg = AIMessage(content="", tool_calls=[tc])
+        tool_msg = ToolMessage(content="results", tool_call_id="c1", name="search")
+        chunk_final = AIMessage(content="Here are the results.")
+
+        events = [
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+            ("updates", {"tools": {"messages": [tool_msg]}}),
+            ("updates", {"agent": {"messages": [chunk_final]}}),
+        ]
+
+        final = await stream_langgraph_events(_make_stream(events), "task-1")
+
+        # Check tool request and response posted to fake streaming
+        tool_req_ctxs = [c for c in fake_streaming.contexts if isinstance(c.initial_content, ToolRequestContent)]
+        tool_resp_ctxs = [c for c in fake_streaming.contexts if isinstance(c.initial_content, ToolResponseContent)]
+        assert len(tool_req_ctxs) == 1
+        assert len(tool_resp_ctxs) == 1
+        assert tool_req_ctxs[0].initial_content.name == "search"
+
+    async def test_langgraph_turn_auto_send_via_unified_emitter(self):
+        """Direct UnifiedEmitter.auto_send_turn path used by temporal agent workflow
+        activities. Uses a fake streaming backend (no Redis)."""
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        fake_streaming = _FakeStreaming()
+        chunk = AIMessageChunk(content="Temporal answer!")
+        ai_msg = AIMessage(content="Temporal answer!")
+        events = [
+            ("messages", (chunk, {})),
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+        ]
+
+        turn = LangGraphTurn(_make_stream(events), model=None)
+        emitter = UnifiedEmitter(
+            task_id="task-1",
+            trace_id=None,
+            parent_span_id=None,
+            streaming=fake_streaming,
+        )
+        result = await emitter.auto_send_turn(turn)
+
+        assert result.final_text == "Temporal answer!"
+        text_ctxs = [c for c in fake_streaming.contexts if isinstance(c.initial_content, TextContent)]
+        assert len(text_ctxs) == 1
+
+    async def test_usage_captured_via_turn_after_events_consumed(self):
+        """Usage from AIMessage.usage_metadata is captured via the on_final_ai_message
+        callback during event iteration. The authoritative usage is on turn.usage()
+        after events are consumed (emitter.auto_send_turn evaluates turn.usage()
+        eagerly before iteration, so TurnResult.usage is a pre-iteration snapshot)."""
+        from langchain_core.messages import AIMessage
+
+        fake_streaming = _FakeStreaming()
+        usage_meta = {"input_tokens": 20, "output_tokens": 10, "total_tokens": 30}
+        ai_msg = AIMessage(content="answer", usage_metadata=usage_meta)
+        events = [("updates", {"agent": {"messages": [ai_msg]}})]
+
+        turn = LangGraphTurn(_make_stream(events), model="gpt-4o")
+        emitter = UnifiedEmitter(
+            task_id="task-1",
+            trace_id=None,
+            parent_span_id=None,
+            streaming=fake_streaming,
+        )
+        await emitter.auto_send_turn(turn)
+
+        # After auto_send_turn, turn.usage() has the captured values
+        usage = turn.usage()
+        assert usage.input_tokens == 20
+        assert usage.output_tokens == 10
+        assert usage.total_tokens == 30
+
+    async def test_empty_stream_returns_empty_string(self, monkeypatch):
+        from agentex.lib import adk as adk_module
+
+        fake_streaming = _FakeStreaming()
+        monkeypatch.setattr(adk_module, "streaming", fake_streaming)
+
+        final = await stream_langgraph_events(_make_stream([]), "task-1")
+        assert final == ""
+        assert fake_streaming.contexts == []
diff --git a/tests/lib/core/harness/test_harness_pydantic_ai_async.py b/tests/lib/core/harness/test_harness_pydantic_ai_async.py
new file mode 100644
index 000000000..8bda7d020
--- /dev/null
+++ b/tests/lib/core/harness/test_harness_pydantic_ai_async.py
@@ -0,0 +1,361 @@
+"""Integration test: async (Redis-streaming) channel with a pydantic-ai agent.
+
+Exercises the unified harness surface (UnifiedEmitter.auto_send_turn + PydanticAITurn)
+with a minimal pydantic-ai agent backed by TestModel so the test runs fully
+offline (no API keys, no Redis, no Agentex server).
+
+Agent description
+-----------------
+Same single-tool agent as the sync test: ``get_weather(city: str) -> str``
+returning "sunny and 72F". TestModel is configured to call the tool once then
+produce a fixed text reply.
+
+The async path uses the bare PydanticAITurn (no coalescing): the foundation
+auto_send delivers streamed tool-request Start+ToolRequestDelta+Done messages
+natively (AGX1-377 fix), so no coalescing wrapper is needed.
+
+What is tested
+--------------
+- The async handler pushes the correct sequence of messages to the fake streaming
+  backend: tool_request + tool_response + text (in that order).
+- final_text equals the TestModel custom output.
+- With a SpanTracer, tool spans are derived and forwarded to the fake tracing
+  backend (streamed tool-request delivery now triggers span derivation on the
+  async path).
+
+What is NOT covered without live infrastructure
+-----------------------------------------------
+- Actual Redis streaming (requires a running Redis instance).
+- The ACP on_task_event_send / on_task_create / on_task_cancel lifecycle.
+- Multi-turn history persistence via adk.state.
+- Real LLM calls or production model behaviour.
+- The full FastACP async request lifecycle.
+
+See also: test_harness_pydantic_ai_sync.py (span derivation with sync path) and
+test_harness_pydantic_ai_temporal.py (temporal activity path).
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+from pydantic_ai import Agent
+from pydantic_ai.models.test import TestModel
+
+from agentex.types.task_message import TaskMessage
+from agentex.lib.core.harness.types import TurnResult
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
+
+# ---------------------------------------------------------------------------
+# Minimal agent under test
+# ---------------------------------------------------------------------------
+
+
+def _make_agent() -> Agent:
+    """Build a pydantic-ai agent with one weather tool and a TestModel."""
+    model = TestModel(
+        call_tools=["get_weather"],
+        custom_output_text="The weather in Paris is sunny and 72F.",
+    )
+    agent: Agent = Agent(model)
+
+    @agent.tool_plain
+    def get_weather(city: str) -> str:
+        """Get the current weather for a city."""
+        return f"The weather in {city} is sunny and 72F"
+
+    return agent
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming backend (replaces adk.streaming; no Redis required)
+# ---------------------------------------------------------------------------
+
+
+class _FakeCtx:
+    """Minimal StreamingTaskMessageContext fake."""
+
+    def __init__(self, sink: list[Any], ctype: str, initial_content: Any) -> None:
+        self.sink = sink
+        self.ctype = ctype
+        self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content)
+
+    async def __aenter__(self) -> "_FakeCtx":
+        self.sink.append(("open", self.ctype, self.task_message.content))
+        return self
+
+    async def __aexit__(self, *args: Any) -> bool:
+        await self.close()
+        return False
+
+    async def close(self) -> None:
+        self.sink.append(("close", self.ctype))
+
+    async def stream_update(self, update: Any) -> Any:
+        self.sink.append(("delta", self.ctype, update))
+        return update
+
+
+class _FakeStreaming:
+    """Fake streaming backend; records every context lifecycle event."""
+
+    def __init__(self) -> None:
+        self.sink: list[Any] = []
+        self.messages_opened: list[Any] = []
+
+    def streaming_task_message_context(
+        self,
+        task_id: str,
+        initial_content: Any,
+        streaming_mode: str = "coalesced",
+        created_at: Any = None,
+    ) -> _FakeCtx:
+        ctype = getattr(initial_content, "type", None) or ""
+        self.messages_opened.append(initial_content)
+        return _FakeCtx(self.sink, ctype, initial_content)
+
+
+# ---------------------------------------------------------------------------
+# Fake tracing backend
+# ---------------------------------------------------------------------------
+
+
+class _FakeSpan:
+    def __init__(self, name: str) -> None:
+        self.name = name
+        self.output: Any = None
+
+
+class _FakeTracing:
+    def __init__(self) -> None:
+        self.started: list[tuple[str, str | None]] = []
+        self.ended: list[tuple[str, Any]] = []
+
+    async def start_span(
+        self,
+        *,
+        trace_id: str,
+        name: str,
+        input: Any = None,
+        parent_id: Any = None,
+        data: Any = None,
+        task_id: Any = None,
+    ) -> _FakeSpan:
+        self.started.append((name, parent_id))
+        return _FakeSpan(name)
+
+    async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None:
+        self.ended.append((span.name, span.output))
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+async def _run_auto_send_turn(
+    agent: Agent,
+    user_msg: str = "What is the weather in Paris?",
+    trace_id: str | None = None,
+    parent_span_id: str | None = None,
+    fake_tracing: _FakeTracing | None = None,
+) -> tuple[TurnResult, _FakeStreaming]:
+    """Drive the async (auto_send) path and return the TurnResult + fake streaming state."""
+    fake_streaming = _FakeStreaming()
+
+    tracer: SpanTracer | bool | None = None
+    if trace_id and fake_tracing is not None:
+        tracer = SpanTracer(
+            trace_id=trace_id,
+            parent_span_id=parent_span_id,
+            task_id="task1",
+            tracing=fake_tracing,
+        )
+
+    async with agent.run_stream_events(user_msg) as stream:
+        turn = PydanticAITurn(
+            stream,
+            model="test",
+        )
+        emitter = UnifiedEmitter(
+            task_id="task1",
+            trace_id=trace_id,
+            parent_span_id=parent_span_id,
+            tracer=tracer if tracer is not None else False,
+            streaming=fake_streaming,
+        )
+        result = await emitter.auto_send_turn(turn)
+
+    return result, fake_streaming
+
+
+# ---------------------------------------------------------------------------
+# Tests: message order and content
+# ---------------------------------------------------------------------------
+
+
+class TestAsyncAutoSendMessageOrder:
+    """auto_send pushes messages to the streaming backend in canonical order."""
+
+    async def test_tool_request_pushed_first(self) -> None:
+        """tool_request is the first message type pushed to the streaming backend."""
+        agent = _make_agent()
+        _, fake_streaming = await _run_auto_send_turn(agent)
+
+        message_types = [getattr(m, "type", None) for m in fake_streaming.messages_opened]
+        assert "tool_request" in message_types
+        assert message_types.index("tool_request") < message_types.index("tool_response"), (
+            "tool_request must be pushed before tool_response"
+        )
+
+    async def test_tool_response_pushed_after_tool_request(self) -> None:
+        """tool_response appears after tool_request in the pushed messages."""
+        agent = _make_agent()
+        _, fake_streaming = await _run_auto_send_turn(agent)
+
+        message_types = [getattr(m, "type", None) for m in fake_streaming.messages_opened]
+        assert "tool_response" in message_types
+
+    async def test_text_pushed_last(self) -> None:
+        """Text content is the last type pushed (after tool round-trip)."""
+        agent = _make_agent()
+        _, fake_streaming = await _run_auto_send_turn(agent)
+
+        message_types = [getattr(m, "type", None) for m in fake_streaming.messages_opened]
+        assert message_types[-1] == "text", f"Expected last message type=text, got {message_types}"
+
+    async def test_exactly_three_messages(self) -> None:
+        """Exactly three message contexts are opened: tool_request, tool_response, text."""
+        agent = _make_agent()
+        _, fake_streaming = await _run_auto_send_turn(agent)
+
+        assert len(fake_streaming.messages_opened) == 3, (
+            f"Expected 3 messages (tool_request + tool_response + text), "
+            f"got {len(fake_streaming.messages_opened)}: "
+            f"{[getattr(m, 'type', None) for m in fake_streaming.messages_opened]}"
+        )
+
+
+class TestAsyncAutoSendContentVerification:
+    """The content pushed to the streaming backend is correct."""
+
+    async def test_tool_request_content(self) -> None:
+        """The pushed tool_request is a ToolRequestContent for get_weather."""
+        agent = _make_agent()
+        _, fake_streaming = await _run_auto_send_turn(agent)
+
+        tool_reqs = [m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)]
+        assert len(tool_reqs) == 1, "Expected exactly one ToolRequestContent"
+        assert tool_reqs[0].name == "get_weather"
+
+    async def test_tool_response_content(self) -> None:
+        """The pushed tool_response is a ToolResponseContent containing the weather result."""
+        agent = _make_agent()
+        _, fake_streaming = await _run_auto_send_turn(agent)
+
+        tool_resps = [m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)]
+        assert len(tool_resps) == 1, "Expected exactly one ToolResponseContent"
+        assert isinstance(tool_resps[0].content, str)
+        assert "72F" in tool_resps[0].content
+        assert tool_resps[0].name == "get_weather"
+
+    async def test_tool_call_ids_match(self) -> None:
+        """tool_request and tool_response have the same tool_call_id."""
+        agent = _make_agent()
+        _, fake_streaming = await _run_auto_send_turn(agent)
+
+        tool_req = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent))
+        tool_resp = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent))
+        assert tool_req.tool_call_id == tool_resp.tool_call_id, (
+            "tool_request and tool_response must share the same tool_call_id"
+        )
+
+
+class TestAsyncAutoSendFinalText:
+    """auto_send_turn returns the accumulated text from the last text part."""
+
+    async def test_final_text_matches_model_output(self) -> None:
+        """TurnResult.final_text equals the TestModel custom_output_text."""
+        agent = _make_agent()
+        result, _ = await _run_auto_send_turn(agent)
+        assert result.final_text == "The weather in Paris is sunny and 72F."
+
+    async def test_turn_result_has_usage(self) -> None:
+        """TurnResult carries a TurnUsage object (may have None tokens from TestModel)."""
+        agent = _make_agent()
+        result, _ = await _run_auto_send_turn(agent)
+        assert result.usage is not None
+
+    async def test_context_lifecycle_open_then_close(self) -> None:
+        """Every message context is opened then closed (no leak)."""
+        agent = _make_agent()
+        _, fake_streaming = await _run_auto_send_turn(agent)
+
+        opens = [e for e in fake_streaming.sink if e[0] == "open"]
+        closes = [e for e in fake_streaming.sink if e[0] == "close"]
+        assert len(opens) == len(closes) == 3, "Each of the 3 messages must have exactly one open and one close"
+
+
+class TestAsyncAutoSendSpanDerivation:
+    """Span derivation on the async path now works for streamed tool requests.
+
+    The foundation auto_send delivers Start+ToolRequestDelta+Done natively
+    (AGX1-377 fix). The SpanDeriver opens a tool span on Done(tool_request),
+    so the async path now derives spans just like the sync path.
+    """
+
+    async def test_tool_span_derived_on_async_path(self) -> None:
+        """With the bare PydanticAITurn (no coalescing), a tool span is derived
+        on the async/auto_send path when auto_send delivers the streamed
+        Start+ToolRequestDelta+Done sequence."""
+        agent = _make_agent()
+        fake_tracing = _FakeTracing()
+        tracer = SpanTracer(
+            trace_id="trace1",
+            parent_span_id="parent",
+            task_id="task1",
+            tracing=fake_tracing,
+        )
+        fake_streaming = _FakeStreaming()
+
+        async with agent.run_stream_events("What is the weather in Paris?") as stream:
+            turn = PydanticAITurn(stream, model="test")
+            emitter = UnifiedEmitter(
+                task_id="task1",
+                trace_id="trace1",
+                parent_span_id="parent",
+                tracer=tracer,
+                streaming=fake_streaming,
+            )
+            await emitter.auto_send_turn(turn)
+
+        assert len(fake_tracing.started) == 1, (
+            "Expected one tool span to be started for the get_weather call."
+        )
+        assert fake_tracing.started[0][0] == "get_weather"
+        assert len(fake_tracing.ended) == 1
+
+
+@pytest.mark.parametrize(
+    "user_msg",
+    [
+        "What is the weather in Paris?",
+        "Tell me the weather in London.",
+    ],
+)
+async def test_async_handler_pushes_messages_for_various_inputs(user_msg: str) -> None:
+    """auto_send pushes at least tool_request + tool_response + text for any input."""
+    agent = _make_agent()
+    result, fake_streaming = await _run_auto_send_turn(agent, user_msg=user_msg)
+
+    message_types = [getattr(m, "type", None) for m in fake_streaming.messages_opened]
+    assert "tool_request" in message_types
+    assert "tool_response" in message_types
+    assert "text" in message_types
+    assert isinstance(result.final_text, str)
+    assert len(result.final_text) > 0
diff --git a/tests/lib/core/harness/test_harness_pydantic_ai_sync.py b/tests/lib/core/harness/test_harness_pydantic_ai_sync.py
new file mode 100644
index 000000000..1557d0dd1
--- /dev/null
+++ b/tests/lib/core/harness/test_harness_pydantic_ai_sync.py
@@ -0,0 +1,388 @@
+"""Integration test: sync (HTTP-yield) channel with a pydantic-ai agent.
+
+Exercises the unified harness surface (UnifiedEmitter.yield_turn + PydanticAITurn)
+with a minimal pydantic-ai agent backed by TestModel so the test runs fully
+offline (no API keys, no live infrastructure).
+
+Agent description
+-----------------
+A single-tool agent with ``get_weather(city: str) -> str`` that always returns
+"sunny and 72F". TestModel is configured to call that tool once then produce
+a fixed text reply, giving a deterministic event sequence.
+
+What is tested
+--------------
+- The sync handler correctly yields StreamTaskMessage* events in order:
+  tool_request (Start+Done) then tool_response (Full) then text (Start+Delta+Done).
+- Final accumulated text equals the TestModel custom output.
+- With a trace_id + fake tracing, a tool span is opened (OpenSpan) and
+  closed (CloseSpan) — proving the SpanDeriver is wired on the yield path.
+
+What is NOT covered without live infrastructure
+-----------------------------------------------
+- Actual HTTP streaming over the ACP sync endpoint (requires a running
+  Agentex server + deployed agent).
+- Real LLM calls or production model behaviour.
+- The full FastACP request/response lifecycle.
+
+See also: tests/lib/core/harness/test_harness_pydantic_ai_async.py and
+test_harness_pydantic_ai_temporal.py for the other two channels.
+"""
+
+from __future__ import annotations
+
+from typing import Any, override
+
+import pytest
+from pydantic_ai import Agent
+from pydantic_ai.models.test import TestModel
+
+from agentex.types.text_delta import TextDelta
+from agentex.lib.core.harness.types import OpenSpan, CloseSpan
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
+
+# ---------------------------------------------------------------------------
+# Minimal agent under test
+# ---------------------------------------------------------------------------
+
+
+def _make_agent() -> Agent:
+    """Build a pydantic-ai agent with one weather tool and a TestModel.
+
+    TestModel is instantiated with call_tools=['get_weather'] so it always
+    invokes the tool once, then emits custom_output_text as the reply.
+    """
+    model = TestModel(
+        call_tools=["get_weather"],
+        custom_output_text="The weather in Paris is sunny and 72F.",
+    )
+    agent: Agent = Agent(model)
+
+    @agent.tool_plain
+    def get_weather(city: str) -> str:
+        """Get the current weather for a city."""
+        return f"The weather in {city} is sunny and 72F"
+
+    return agent
+
+
+# ---------------------------------------------------------------------------
+# Fake tracing backend (no network calls)
+# ---------------------------------------------------------------------------
+
+
+class _FakeSpan:
+    def __init__(self, name: str) -> None:
+        self.name = name
+        self.output: Any = None
+
+
+class _FakeTracing:
+    def __init__(self) -> None:
+        self.started: list[tuple[str, str | None]] = []
+        self.ended: list[tuple[str, Any]] = []
+
+    async def start_span(
+        self,
+        *,
+        trace_id: str,
+        name: str,
+        input: Any = None,
+        parent_id: Any = None,
+        data: Any = None,
+        task_id: Any = None,
+    ) -> _FakeSpan:
+        self.started.append((name, parent_id))
+        return _FakeSpan(name)
+
+    async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None:
+        self.ended.append((span.name, span.output))
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+async def _run_yield_turn(
+    agent: Agent,
+    user_msg: str = "What is the weather in Paris?",
+    trace_id: str | None = None,
+    parent_span_id: str | None = None,
+    fake_tracing: _FakeTracing | None = None,
+) -> list[Any]:
+    """Drive the sync (yield) path and collect all yielded events."""
+    tracer: SpanTracer | bool | None = None
+    if trace_id and fake_tracing is not None:
+        tracer = SpanTracer(
+            trace_id=trace_id,
+            parent_span_id=parent_span_id,
+            task_id="task1",
+            tracing=fake_tracing,
+        )
+
+    events: list[Any] = []
+    async with agent.run_stream_events(user_msg) as stream:
+        turn = PydanticAITurn(stream, model="test")
+        emitter = UnifiedEmitter(
+            task_id="task1",
+            trace_id=trace_id,
+            parent_span_id=parent_span_id,
+            tracer=tracer if tracer is not None else False,
+        )
+        events = [ev async for ev in emitter.yield_turn(turn)]
+    return events
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestSyncYieldEventOrder:
+    """The yield channel forwards events in canonical order."""
+
+    async def test_tool_request_precedes_tool_response(self) -> None:
+        """tool_request events appear before the tool_response Full event."""
+        agent = _make_agent()
+        events = await _run_yield_turn(agent)
+
+        content_types = [
+            getattr(getattr(ev, "content", None), "type", None)
+            for ev in events
+            if isinstance(ev, (StreamTaskMessageStart, StreamTaskMessageFull))
+        ]
+        assert "tool_request" in content_types
+        assert "tool_response" in content_types
+        tool_req_idx = content_types.index("tool_request")
+        tool_resp_idx = content_types.index("tool_response")
+        assert tool_req_idx < tool_resp_idx, "tool_request must appear before tool_response in the event stream"
+
+    async def test_text_appears_after_tool_response(self) -> None:
+        """Text content (Start/Done) comes after the tool_response Full event."""
+        agent = _make_agent()
+        events = await _run_yield_turn(agent)
+
+        full_types = [
+            getattr(getattr(ev, "content", None), "type", None)
+            for ev in events
+            if isinstance(ev, StreamTaskMessageFull)
+        ]
+        start_types = [
+            getattr(getattr(ev, "content", None), "type", None)
+            for ev in events
+            if isinstance(ev, StreamTaskMessageStart)
+        ]
+
+        assert "tool_response" in full_types
+        assert "text" in start_types
+
+        tool_resp_pos = next(
+            i
+            for i, ev in enumerate(events)
+            if isinstance(ev, StreamTaskMessageFull)
+            and getattr(getattr(ev, "content", None), "type", None) == "tool_response"
+        )
+        text_start_pos = next(
+            i
+            for i, ev in enumerate(events)
+            if isinstance(ev, StreamTaskMessageStart) and getattr(getattr(ev, "content", None), "type", None) == "text"
+        )
+        assert tool_resp_pos < text_start_pos
+
+    async def test_tool_response_carries_weather_result(self) -> None:
+        """The ToolResponseContent contains the get_weather return value."""
+        agent = _make_agent()
+        events = await _run_yield_turn(agent)
+
+        full_events = [
+            ev
+            for ev in events
+            if isinstance(ev, StreamTaskMessageFull) and isinstance(getattr(ev, "content", None), ToolResponseContent)
+        ]
+        assert len(full_events) >= 1, "Expected at least one tool_response Full event"
+        tool_response = full_events[0].content
+        assert isinstance(tool_response, ToolResponseContent)
+        assert isinstance(tool_response.content, str)
+        assert "72F" in tool_response.content
+        assert tool_response.name == "get_weather"
+
+    async def test_accumulated_text_matches_model_output(self) -> None:
+        """Accumulated text deltas equal the TestModel custom_output_text."""
+        from agentex.types.task_message_update import StreamTaskMessageDelta
+
+        agent = _make_agent()
+        events = await _run_yield_turn(agent)
+
+        accumulated = "".join(
+            ev.delta.text_delta
+            for ev in events
+            if isinstance(ev, StreamTaskMessageDelta) and isinstance(ev.delta, TextDelta) and ev.delta.text_delta
+        )
+        assert accumulated == "The weather in Paris is sunny and 72F."
+
+    async def test_every_start_has_matching_done(self) -> None:
+        """Every StreamTaskMessageStart has a corresponding StreamTaskMessageDone."""
+        agent = _make_agent()
+        events = await _run_yield_turn(agent)
+
+        starts = {ev.index for ev in events if isinstance(ev, StreamTaskMessageStart)}
+        dones = {ev.index for ev in events if isinstance(ev, StreamTaskMessageDone)}
+        assert starts == dones, f"Unmatched Start/Done indices: starts={starts} dones={dones}"
+
+
+class TestSyncYieldSpanDerivation:
+    """SpanDeriver is wired on the yield path; tool spans are opened/closed."""
+
+    async def test_tool_span_opened_and_closed(self) -> None:
+        """One tool span is opened and closed per tool call."""
+        agent = _make_agent()
+        fake_tracing = _FakeTracing()
+        tracer = SpanTracer(
+            trace_id="trace1",
+            parent_span_id="parent-span",
+            task_id="task1",
+            tracing=fake_tracing,
+        )
+
+        async with agent.run_stream_events("What is the weather in Paris?") as stream:
+            turn = PydanticAITurn(stream, model="test")
+            emitter = UnifiedEmitter(
+                task_id="task1",
+                trace_id="trace1",
+                parent_span_id="parent-span",
+                tracer=tracer,
+            )
+            await emitter.yield_turn(turn).__anext__.__self__ if False else None
+            [_ async for _ in emitter.yield_turn(turn)]
+
+        assert len(fake_tracing.started) == 1, "Expected exactly one tool span opened"
+        assert len(fake_tracing.ended) == 1, "Expected exactly one tool span closed"
+        span_name, parent_id = fake_tracing.started[0]
+        assert span_name == "get_weather"
+        assert parent_id == "parent-span"
+
+    async def test_tool_span_output_is_tool_result(self) -> None:
+        """The closed tool span's output equals the tool's return value."""
+        agent = _make_agent()
+        fake_tracing = _FakeTracing()
+        tracer = SpanTracer(
+            trace_id="trace1",
+            parent_span_id="parent-span",
+            task_id="task1",
+            tracing=fake_tracing,
+        )
+
+        async with agent.run_stream_events("What is the weather in Paris?") as stream:
+            turn = PydanticAITurn(stream, model="test")
+            emitter = UnifiedEmitter(
+                task_id="task1",
+                trace_id="trace1",
+                parent_span_id="parent-span",
+                tracer=tracer,
+            )
+            [_ async for _ in emitter.yield_turn(turn)]
+
+        name, output = fake_tracing.ended[0]
+        assert name == "get_weather"
+        assert output is not None
+        assert "72F" in str(output)
+
+    async def test_no_trace_id_means_no_spans(self) -> None:
+        """With trace_id=None, no spans are derived (emitter disables tracing)."""
+        agent = _make_agent()
+        fake_tracing = _FakeTracing()
+
+        async with agent.run_stream_events("What is the weather in Paris?") as stream:
+            turn = PydanticAITurn(stream, model="test")
+            emitter = UnifiedEmitter(
+                task_id="task1",
+                trace_id=None,
+                parent_span_id=None,
+                tracing=fake_tracing,
+            )
+            [_ async for _ in emitter.yield_turn(turn)]
+
+        assert fake_tracing.started == []
+        assert fake_tracing.ended == []
+
+    async def test_tracer_false_suppresses_spans(self) -> None:
+        """tracer=False disables span derivation regardless of trace_id."""
+        agent = _make_agent()
+        fake_tracing = _FakeTracing()
+
+        async with agent.run_stream_events("What is the weather in Paris?") as stream:
+            turn = PydanticAITurn(stream, model="test")
+            emitter = UnifiedEmitter(
+                task_id="task1",
+                trace_id="trace1",
+                parent_span_id="parent-span",
+                tracer=False,
+                tracing=fake_tracing,
+            )
+            [_ async for _ in emitter.yield_turn(turn)]
+
+        assert fake_tracing.started == []
+        assert fake_tracing.ended == []
+
+    async def test_span_signal_types(self) -> None:
+        """The signals received by the tracer are OpenSpan then CloseSpan."""
+        from agentex.lib.core.harness.tracer import SpanTracer as RealTracer
+
+        received_signals: list[Any] = []
+
+        class _RecordingTracer(RealTracer):
+            @override
+            async def handle(self, signal: Any) -> None:
+                received_signals.append(signal)
+                await super().handle(signal)
+
+        fake_tracing = _FakeTracing()
+        tracer = _RecordingTracer(
+            trace_id="trace1",
+            parent_span_id="parent",
+            task_id="task1",
+            tracing=fake_tracing,
+        )
+
+        agent = _make_agent()
+        async with agent.run_stream_events("What is the weather in Paris?") as stream:
+            turn = PydanticAITurn(stream, model="test")
+            emitter = UnifiedEmitter(
+                task_id="task1",
+                trace_id="trace1",
+                parent_span_id="parent",
+                tracer=tracer,
+            )
+            [_ async for _ in emitter.yield_turn(turn)]
+
+        assert len(received_signals) == 2
+        assert isinstance(received_signals[0], OpenSpan)
+        assert isinstance(received_signals[1], CloseSpan)
+        assert received_signals[0].name == "get_weather"
+
+
+@pytest.mark.parametrize(
+    "user_msg",
+    [
+        "What is the weather in Paris?",
+        "Tell me the weather in London.",
+    ],
+)
+async def test_sync_handler_produces_events_for_various_inputs(user_msg: str) -> None:
+    """Yield path produces at least a tool_response Full for any user message."""
+    agent = _make_agent()
+    events = await _run_yield_turn(agent, user_msg=user_msg)
+
+    full_event_types = [
+        getattr(getattr(ev, "content", None), "type", None) for ev in events if isinstance(ev, StreamTaskMessageFull)
+    ]
+    assert "tool_response" in full_event_types
diff --git a/tests/lib/core/harness/test_harness_pydantic_ai_temporal.py b/tests/lib/core/harness/test_harness_pydantic_ai_temporal.py
new file mode 100644
index 000000000..0ead8e832
--- /dev/null
+++ b/tests/lib/core/harness/test_harness_pydantic_ai_temporal.py
@@ -0,0 +1,370 @@
+"""Integration test: Temporal-backed pydantic-ai agent, offline.
+
+Exercises the core of the Temporal pydantic-ai harness path — the
+event_stream_handler activity — with a TemporalAgent backed by TestModel so the
+test runs fully offline (no Temporal server, no Redis, no API keys).
+
+Architecture overview
+---------------------
+In a real Temporal deployment the pydantic-ai Temporal harness runs like this:
+
+    HTTP POST /task/event/send
+        -> @workflow.signal on At110PydanticAiWorkflow
+        -> temporal_agent.run(user_message, deps=TaskDeps(...))
+            internally schedules:
+            1. request_activity (LLM HTTP call — recorded by Temporal)
+            2. call_tool_activity (for each tool call — also recorded)
+            3. event_stream_handler_activity (streams events to Redis)
+
+The third activity is what we test here: it receives a
+``RunContext[TaskDeps]`` and an ``AsyncIterable[AgentStreamEvent]`` from
+pydantic-ai, calls ``stream_pydantic_ai_events`` (which internally constructs
+a ``UnifiedEmitter`` + ``PydanticAITurn`` and calls ``auto_send_turn``), and
+pushes the resulting messages to Redis.
+
+What we test
+-----------
+Since ``TemporalAgent.run_stream_events`` works offline with TestModel (it does
+not schedule Temporal activities — it runs in-process), we can:
+
+1. Build a TemporalAgent with TestModel.
+2. Call ``run_stream_events`` on it directly, just as the event_stream_handler
+   would see the event iterable.
+3. Feed that stream into ``stream_pydantic_ai_events`` backed by a fake streaming
+   backend, and assert the canonical message sequence.
+
+This covers the full inner harness chain that the Temporal workflow exercises,
+minus the Temporal scheduling/durability layer itself.
+
+What is NOT covered without live infrastructure
+-----------------------------------------------
+- Temporal scheduling (the workflow.signal -> activity dispatch chain).
+- Temporal durability guarantees and replay behaviour.
+- Redis streaming (requires a running Redis instance).
+- Multi-turn history (pydantic-ai message_history round-tripping via Temporal
+  workflow state).
+- Real LLM calls or production model behaviour.
+- The full temporal_agent.run(...) path, which schedules activities and cannot
+  run without a connected Temporal client.
+
+To test with live infrastructure: spin up Temporal + Redis + the ACP server +
+the Temporal worker, then use the AsyncAgentex client to create a task, send a
+message, and poll for messages — exactly as the existing examples/tutorials/
+10_async/10_temporal/110_pydantic_ai/tests/test_agent.py does.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+from pydantic import BaseModel
+from pydantic_ai import Agent
+from pydantic_ai.models.test import TestModel
+from pydantic_ai.durable_exec.temporal import TemporalAgent
+
+from agentex.types.task_message import TaskMessage
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
+
+# ---------------------------------------------------------------------------
+# Agent under test (mirrors examples/tutorials/10_async/10_temporal/110_pydantic_ai)
+# ---------------------------------------------------------------------------
+
+
+class TaskDeps(BaseModel):
+    """Per-run dependencies injected via RunContext.deps."""
+
+    task_id: str
+    parent_span_id: str | None = None
+
+
+def _make_temporal_agent() -> TemporalAgent[TaskDeps, str]:
+    """Build a TemporalAgent with TestModel and one weather tool.
+
+    The underlying pydantic-ai Agent is constructed with TaskDeps as the
+    deps_type, mirroring the real temporal tutorial agent. TestModel makes
+    the run deterministic and offline.
+    """
+    model = TestModel(
+        call_tools=["get_weather"],
+        custom_output_text="The weather in Paris is sunny and 72F.",
+    )
+    base: Agent[TaskDeps, str] = Agent(model, deps_type=TaskDeps)
+
+    @base.tool_plain
+    def get_weather(city: str) -> str:
+        """Get the current weather for a city."""
+        return f"The weather in {city} is sunny and 72F"
+
+    return TemporalAgent(base, name="test_temporal_agent")
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming backend
+# ---------------------------------------------------------------------------
+
+
+class _FakeCtx:
+    def __init__(self, sink: list[Any], ctype: str, initial_content: Any) -> None:
+        self.sink = sink
+        self.ctype = ctype
+        self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content)
+
+    async def __aenter__(self) -> "_FakeCtx":
+        self.sink.append(("open", self.ctype, self.task_message.content))
+        return self
+
+    async def __aexit__(self, *args: Any) -> bool:
+        await self.close()
+        return False
+
+    async def close(self) -> None:
+        self.sink.append(("close", self.ctype))
+
+    async def stream_update(self, update: Any) -> Any:
+        self.sink.append(("delta", self.ctype, update))
+        return update
+
+
+class _FakeStreaming:
+    def __init__(self) -> None:
+        self.sink: list[Any] = []
+        self.messages_opened: list[Any] = []
+
+    def streaming_task_message_context(
+        self,
+        task_id: str,
+        initial_content: Any,
+        streaming_mode: str = "coalesced",
+        created_at: Any = None,
+    ) -> _FakeCtx:
+        ctype = getattr(initial_content, "type", None) or ""
+        self.messages_opened.append(initial_content)
+        return _FakeCtx(self.sink, ctype, initial_content)
+
+
+# ---------------------------------------------------------------------------
+# Helpers: the event_stream_handler pattern tested offline
+# ---------------------------------------------------------------------------
+
+
+async def _run_event_stream_handler(
+    temporal_agent: TemporalAgent[TaskDeps, str],
+    user_msg: str = "What is the weather in Paris?",
+    task_id: str = "task1",
+) -> _FakeStreaming:
+    """Simulate the event_stream_handler activity offline.
+
+    In production the event_stream_handler receives the event stream from
+    pydantic-ai's model activity and calls stream_pydantic_ai_events.
+    Here we obtain the stream directly from run_stream_events (which works
+    offline with TestModel) and forward it to stream_pydantic_ai_events backed
+    by a fake streaming backend.
+
+    This is equivalent to:
+        async def event_handler(ctx: RunContext[TaskDeps], events: AsyncIterable[AgentStreamEvent]) -> None:
+            await stream_pydantic_ai_events(events, ctx.deps.task_id)
+    but without requiring a running Temporal server.
+    """
+    fake_streaming = _FakeStreaming()
+
+    async with temporal_agent.run_stream_events(user_msg) as stream:
+        await _fake_stream_pydantic_ai_events(stream, task_id, fake_streaming)
+
+    return fake_streaming
+
+
+async def _fake_stream_pydantic_ai_events(
+    stream: Any,
+    task_id: str,
+    fake_streaming: _FakeStreaming,
+) -> str:
+    """Like stream_pydantic_ai_events but uses an injected fake streaming backend.
+
+    Mirrors the exact chain that stream_pydantic_ai_events uses internally:
+      PydanticAITurn(stream)
+      + UnifiedEmitter.auto_send_turn(turn)
+    but with the fake backend injected so no Redis is needed.
+    """
+    turn = PydanticAITurn(stream, model=None)
+    emitter = UnifiedEmitter(
+        task_id=task_id,
+        trace_id=None,
+        parent_span_id=None,
+        tracer=False,
+        streaming=fake_streaming,
+    )
+    result = await emitter.auto_send_turn(turn)
+    return result.final_text
+
+
+# ---------------------------------------------------------------------------
+# Tests: TemporalAgent + event_stream_handler pattern
+# ---------------------------------------------------------------------------
+
+
+class TestTemporalEventStreamHandlerMessageOrder:
+    """The event_stream_handler pushes messages in canonical order."""
+
+    async def test_tool_request_before_tool_response(self) -> None:
+        """tool_request is pushed before tool_response."""
+        temporal_agent = _make_temporal_agent()
+        fake_streaming = await _run_event_stream_handler(temporal_agent)
+
+        types = [getattr(m, "type", None) for m in fake_streaming.messages_opened]
+        assert "tool_request" in types
+        assert "tool_response" in types
+        assert types.index("tool_request") < types.index("tool_response")
+
+    async def test_text_is_last(self) -> None:
+        """Text content is pushed last (after the tool round-trip)."""
+        temporal_agent = _make_temporal_agent()
+        fake_streaming = await _run_event_stream_handler(temporal_agent)
+
+        types = [getattr(m, "type", None) for m in fake_streaming.messages_opened]
+        assert types[-1] == "text"
+
+    async def test_exactly_three_messages(self) -> None:
+        """Exactly tool_request + tool_response + text are pushed."""
+        temporal_agent = _make_temporal_agent()
+        fake_streaming = await _run_event_stream_handler(temporal_agent)
+
+        assert len(fake_streaming.messages_opened) == 3, (
+            f"Expected 3 messages, got {len(fake_streaming.messages_opened)}: "
+            f"{[getattr(m, 'type', None) for m in fake_streaming.messages_opened]}"
+        )
+
+
+class TestTemporalEventStreamHandlerContent:
+    """Content verification for the messages pushed by the event_stream_handler."""
+
+    async def test_tool_request_is_get_weather(self) -> None:
+        """The pushed tool_request is for the get_weather function."""
+        temporal_agent = _make_temporal_agent()
+        fake_streaming = await _run_event_stream_handler(temporal_agent)
+
+        tool_reqs = [m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)]
+        assert len(tool_reqs) == 1
+        assert tool_reqs[0].name == "get_weather"
+
+    async def test_tool_response_contains_weather_result(self) -> None:
+        """The pushed tool_response contains the get_weather return value."""
+        temporal_agent = _make_temporal_agent()
+        fake_streaming = await _run_event_stream_handler(temporal_agent)
+
+        tool_resps = [m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)]
+        assert len(tool_resps) == 1
+        assert isinstance(tool_resps[0].content, str)
+        assert "72F" in tool_resps[0].content
+        assert tool_resps[0].name == "get_weather"
+
+    async def test_tool_call_ids_match(self) -> None:
+        """tool_request and tool_response share the same tool_call_id."""
+        temporal_agent = _make_temporal_agent()
+        fake_streaming = await _run_event_stream_handler(temporal_agent)
+
+        tool_req = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent))
+        tool_resp = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent))
+        assert tool_req.tool_call_id == tool_resp.tool_call_id
+
+
+class TestTemporalFinalText:
+    """stream_pydantic_ai_events returns the correct final text."""
+
+    async def test_final_text_matches_model_output(self) -> None:
+        """The returned final text equals the TestModel custom_output_text."""
+        temporal_agent = _make_temporal_agent()
+        fake_streaming = _FakeStreaming()
+
+        async with temporal_agent.run_stream_events("What is the weather in Paris?") as stream:
+            final = await _fake_stream_pydantic_ai_events(stream, "task1", fake_streaming)
+
+        assert final == "The weather in Paris is sunny and 72F."
+
+    async def test_context_lifecycle_complete(self) -> None:
+        """Every opened streaming context is also closed."""
+        temporal_agent = _make_temporal_agent()
+        fake_streaming = await _run_event_stream_handler(temporal_agent)
+
+        opens = [e for e in fake_streaming.sink if e[0] == "open"]
+        closes = [e for e in fake_streaming.sink if e[0] == "close"]
+        assert len(opens) == len(closes), "Every opened context must be closed"
+
+
+class TestTemporalAgentStreamEventsOffline:
+    """TemporalAgent.run_stream_events produces the expected raw pydantic-ai events.
+
+    This verifies that the TemporalAgent wrapper does not suppress event stream
+    delivery when used with TestModel, so the event_stream_handler pattern is
+    meaningful offline.
+    """
+
+    async def test_run_stream_events_yields_tool_call_and_text(self) -> None:
+        """TemporalAgent.run_stream_events with TestModel yields tool + text events."""
+
+        temporal_agent = _make_temporal_agent()
+        collected: list[Any] = []
+
+        async with temporal_agent.run_stream_events("What is the weather in Paris?") as stream:
+            async for ev in stream:
+                collected.append(ev)
+
+        event_types = {type(ev).__name__ for ev in collected}
+        assert "FunctionToolResultEvent" in event_types, "Expected FunctionToolResultEvent proving tool call ran"
+        assert "PartDeltaEvent" in event_types or "PartEndEvent" in event_types, (
+            "Expected text part events in the stream"
+        )
+
+    async def test_run_stream_events_contains_tool_result(self) -> None:
+        """The raw event stream contains a FunctionToolResultEvent with the tool output."""
+        from pydantic_ai.messages import FunctionToolResultEvent
+
+        temporal_agent = _make_temporal_agent()
+
+        async with temporal_agent.run_stream_events("What is the weather in Paris?") as stream:
+            events = [ev async for ev in stream]
+
+        tool_results = [ev for ev in events if isinstance(ev, FunctionToolResultEvent)]
+        assert len(tool_results) >= 1
+        assert isinstance(tool_results[0].part.content, str)
+        assert "72F" in tool_results[0].part.content
+
+
+class TestTemporalLiveInfraNote:
+    """Placeholder tests documenting what requires live Temporal infrastructure.
+
+    These tests are skipped by design. They document the gap between what the
+    offline tests cover and what a full integration test would exercise.
+    """
+
+    @pytest.mark.skip(
+        reason=(
+            "Requires live Temporal server + Redis + ACP server + worker. "
+            "See examples/tutorials/10_async/10_temporal/110_pydantic_ai/tests/test_agent.py "
+            "for the live integration test that exercises this path end-to-end."
+        )
+    )
+    async def test_temporal_workflow_full_round_trip(self) -> None:
+        """Full Temporal workflow: create_task -> send_event -> poll_messages."""
+        pass  # Covered by the live tutorial test
+
+
+@pytest.mark.parametrize(
+    "user_msg",
+    [
+        "What is the weather in Paris?",
+        "Tell me the weather in London.",
+    ],
+)
+async def test_temporal_handler_pushes_messages_for_various_inputs(user_msg: str) -> None:
+    """event_stream_handler pushes tool_request + tool_response + text for any input."""
+    temporal_agent = _make_temporal_agent()
+    fake_streaming = await _run_event_stream_handler(temporal_agent, user_msg=user_msg)
+
+    types = [getattr(m, "type", None) for m in fake_streaming.messages_opened]
+    assert "tool_request" in types
+    assert "tool_response" in types
+    assert "text" in types
diff --git a/tests/lib/core/harness/test_span_derivation.py b/tests/lib/core/harness/test_span_derivation.py
new file mode 100644
index 000000000..51e2ede2c
--- /dev/null
+++ b/tests/lib/core/harness/test_span_derivation.py
@@ -0,0 +1,286 @@
+from agentex.types.text_content import TextContent
+from agentex.lib.core.harness.types import OpenSpan, CloseSpan
+from agentex.types.reasoning_content import ReasoningContent
+from agentex.types.tool_request_delta import ToolRequestDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.core.harness.span_derivation import SpanDeriver
+
+
+def _signals(deriver, events):
+    out = []
+    for e in events:
+        out.extend(deriver.observe(e))
+    out.extend(deriver.flush())
+    return out
+
+
+def _tool_req(idx, tcid, name, args):
+    return StreamTaskMessageStart(
+        type="start",
+        index=idx,
+        content=ToolRequestContent(type="tool_request", author="agent", tool_call_id=tcid, name=name, arguments=args),
+    )
+
+
+def test_text_only_yields_no_spans():
+    d = SpanDeriver()
+    events = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="")),
+        StreamTaskMessageDelta(type="delta", index=0, delta=None),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    assert _signals(d, events) == []
+
+
+def test_single_tool_opens_on_done_closes_on_response():
+    d = SpanDeriver()
+    events = [
+        _tool_req(0, "call_1", "Bash", {"cmd": "ls"}),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolResponseContent(
+                type="tool_response", author="agent", tool_call_id="call_1", name="Bash", content="files"
+            ),
+        ),
+    ]
+    sigs = _signals(d, events)
+    assert sigs == [
+        OpenSpan(key="call_1", kind="tool", name="Bash", input={"cmd": "ls"}),
+        CloseSpan(key="call_1", output="files", is_complete=True),
+    ]
+    # No status reported -> CloseSpan carries is_error=None.
+    assert sigs[1].is_error is None
+
+
+def test_tool_response_is_error_propagates_to_close_span():
+    """ToolResponseContent.is_error flows onto the CloseSpan so a derived tool
+    span can be marked as a failure (AGX1-371)."""
+    d = SpanDeriver()
+    events = [
+        _tool_req(0, "call_err", "Bash", {"cmd": "false"}),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolResponseContent(
+                type="tool_response",
+                author="agent",
+                tool_call_id="call_err",
+                name="Bash",
+                content="boom",
+                is_error=True,
+            ),
+        ),
+    ]
+    sigs = _signals(d, events)
+    assert sigs == [
+        OpenSpan(key="call_err", kind="tool", name="Bash", input={"cmd": "false"}),
+        CloseSpan(key="call_err", output="boom", is_complete=True, is_error=True),
+    ]
+
+
+def test_reasoning_opens_on_start_closes_on_done():
+    d = SpanDeriver()
+    events = [
+        StreamTaskMessageStart(
+            type="start", index=0, content=ReasoningContent(type="reasoning", author="agent", summary=[], content=[])
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    sigs = _signals(d, events)
+    assert sigs[0] == OpenSpan(key="reasoning:0", kind="reasoning", name="reasoning", input={})
+    assert sigs[1] == CloseSpan(key="reasoning:0", output=None, is_complete=True)
+
+
+def test_parallel_tools_pair_by_tool_call_id():
+    d = SpanDeriver()
+    events = [
+        _tool_req(0, "a", "T1", {}),
+        _tool_req(1, "b", "T2", {}),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageDone(type="done", index=1),
+        StreamTaskMessageFull(
+            type="full",
+            index=2,
+            content=ToolResponseContent(
+                type="tool_response", author="agent", tool_call_id="b", name="T2", content="rb"
+            ),
+        ),
+        StreamTaskMessageFull(
+            type="full",
+            index=3,
+            content=ToolResponseContent(
+                type="tool_response", author="agent", tool_call_id="a", name="T1", content="ra"
+            ),
+        ),
+    ]
+    sigs = _signals(d, events)
+    opens = [s for s in sigs if isinstance(s, OpenSpan)]
+    closes = [s for s in sigs if isinstance(s, CloseSpan)]
+    assert {o.key for o in opens} == {"a", "b"}
+    assert [c.key for c in closes] == ["b", "a"]
+    assert all(c.is_complete for c in closes)
+
+
+def test_streamed_args_accumulate_into_open_input():
+    d = SpanDeriver()
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request", author="agent", tool_call_id="c", name="Bash", arguments={}
+            ),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=ToolRequestDelta(type="tool_request", tool_call_id="c", name="Bash", arguments_delta='{"cmd":'),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=ToolRequestDelta(type="tool_request", tool_call_id="c", name="Bash", arguments_delta='"ls"}'),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    sigs = _signals(d, events)
+    assert sigs[0] == OpenSpan(key="c", kind="tool", name="Bash", input={"cmd": "ls"})
+
+
+def test_unclosed_tool_closed_incomplete_on_flush():
+    d = SpanDeriver()
+    events = [
+        _tool_req(0, "x", "Bash", {}),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    sigs = _signals(d, events)
+    assert sigs[0] == OpenSpan(key="x", kind="tool", name="Bash", input={})
+    assert sigs[1] == CloseSpan(key="x", output=None, is_complete=False)
+
+
+def test_none_index_is_skipped():
+    d = SpanDeriver()
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=None,
+            content=ToolRequestContent(
+                type="tool_request", author="agent", tool_call_id="n", name="Bash", arguments={}
+            ),
+        ),
+        StreamTaskMessageDone(type="done", index=None),
+    ]
+    assert _signals(d, events) == []
+
+
+def test_orphan_tool_response_ignored():
+    d = SpanDeriver()
+    events = [
+        StreamTaskMessageFull(
+            type="full",
+            index=0,
+            content=ToolResponseContent(
+                type="tool_response", author="agent", tool_call_id="z", name="Bash", content="r"
+            ),
+        ),
+    ]
+    assert _signals(d, events) == []
+
+
+def test_full_tool_request_opens_span():
+    """Full(ToolRequestContent) must open a tool span (for LangGraph-style harnesses)."""
+    d = SpanDeriver()
+    events = [
+        StreamTaskMessageFull(
+            type="full",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="call_x",
+                name="Bash",
+                arguments={"cmd": "ls"},
+            ),
+        ),
+    ]
+    sigs = _signals(d, events)
+    assert sigs[0] == OpenSpan(key="call_x", kind="tool", name="Bash", input={"cmd": "ls"})
+    assert sigs[1] == CloseSpan(key="call_x", output=None, is_complete=False)
+
+
+def test_full_tool_request_and_response_paired():
+    """Full(ToolRequestContent) + Full(ToolResponseContent) produces a complete span pair."""
+    d = SpanDeriver()
+    events = [
+        StreamTaskMessageFull(
+            type="full",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="call_y",
+                name="Grep",
+                arguments={},
+            ),
+        ),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolResponseContent(
+                type="tool_response",
+                author="agent",
+                tool_call_id="call_y",
+                name="Grep",
+                content="result",
+            ),
+        ),
+    ]
+    sigs = _signals(d, events)
+    assert sigs == [
+        OpenSpan(key="call_y", kind="tool", name="Grep", input={}),
+        CloseSpan(key="call_y", output="result", is_complete=True),
+    ]
+
+
+def test_full_tool_request_does_not_double_open():
+    """A Full(ToolRequestContent) for an already-open tool_call_id is a no-op."""
+    d = SpanDeriver()
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="call_z",
+                name="X",
+                arguments={},
+            ),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="call_z",
+                name="X",
+                arguments={},
+            ),
+        ),
+    ]
+    sigs = _signals(d, events)
+    opens = [s for s in sigs if isinstance(s, OpenSpan)]
+    assert len(opens) == 1
+    assert opens[0].key == "call_z"
diff --git a/tests/lib/core/harness/test_tracer.py b/tests/lib/core/harness/test_tracer.py
new file mode 100644
index 000000000..b3d9002c4
--- /dev/null
+++ b/tests/lib/core/harness/test_tracer.py
@@ -0,0 +1,73 @@
+from typing import override
+
+import pytest
+
+from agentex.lib.core.harness.types import OpenSpan, CloseSpan
+from agentex.lib.core.harness.tracer import SpanTracer
+
+from ._fakes import FakeTracing
+
+
+@pytest.mark.asyncio
+async def test_open_then_close_starts_and_ends_span():
+    fake = FakeTracing()
+    tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=fake)
+    await tracer.handle(OpenSpan(key="call_1", kind="tool", name="Bash", input={"cmd": "ls"}))
+    await tracer.handle(CloseSpan(key="call_1", output="files", is_complete=True))
+    assert fake.started == [("Bash", "p1", {"cmd": "ls"})]
+    assert fake.ended == [("Bash", "files")]
+
+
+@pytest.mark.asyncio
+async def test_close_records_is_error_on_span_data():
+    """A CloseSpan carrying is_error records the status on span.data (AGX1-371)."""
+    fake = FakeTracing()
+    tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=fake)
+    await tracer.handle(OpenSpan(key="call_err", kind="tool", name="Bash", input={}))
+    await tracer.handle(CloseSpan(key="call_err", output="boom", is_complete=True, is_error=True))
+    assert fake.ended_spans[0].data == {"is_error": True}
+
+
+@pytest.mark.asyncio
+async def test_close_without_status_leaves_span_data_untouched():
+    """is_error=None (no status reported) must not write to span.data."""
+    fake = FakeTracing()
+    tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=fake)
+    await tracer.handle(OpenSpan(key="call_1", kind="tool", name="Bash", input={}))
+    await tracer.handle(CloseSpan(key="call_1", output="files", is_complete=True))
+    assert fake.ended_spans[0].data is None
+
+
+@pytest.mark.asyncio
+async def test_no_trace_id_is_noop():
+    fake = FakeTracing()
+    tracer = SpanTracer(trace_id="", parent_span_id=None, tracing=fake)
+    await tracer.handle(OpenSpan(key="k", kind="tool", name="X"))
+    await tracer.handle(CloseSpan(key="k"))
+    assert fake.started == [] and fake.ended == []
+
+
+@pytest.mark.asyncio
+async def test_tracing_failure_is_swallowed():
+    class _Boom(FakeTracing):
+        @override
+        async def start_span(self, **kw):
+            raise RuntimeError("backend down")
+
+    tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=_Boom())
+    # Must not raise.
+    await tracer.handle(OpenSpan(key="k", kind="tool", name="X"))
+    await tracer.handle(CloseSpan(key="k"))
+    assert tracer._open == {}
+
+
+@pytest.mark.asyncio
+async def test_duplicate_open_replaces_silently():
+    fake = FakeTracing()
+    tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=fake)
+    await tracer.handle(OpenSpan(key="k", kind="tool", name="A"))
+    await tracer.handle(OpenSpan(key="k", kind="tool", name="B"))
+    await tracer.handle(CloseSpan(key="k"))
+    # Both opens started spans, but only the second ("B") is closed.
+    assert [name for name, _, _ in fake.started] == ["A", "B"]
+    assert fake.ended == [("B", None)]
diff --git a/tests/lib/core/harness/test_types.py b/tests/lib/core/harness/test_types.py
new file mode 100644
index 000000000..68bc89ce2
--- /dev/null
+++ b/tests/lib/core/harness/test_types.py
@@ -0,0 +1,53 @@
+from typing import AsyncIterator
+
+from agentex.lib.core.harness.types import (
+    OpenSpan,
+    CloseSpan,
+    TurnUsage,
+    TurnResult,
+    HarnessTurn,
+    StreamTaskMessage,
+)
+
+
+def test_open_close_span_construct():
+    o = OpenSpan(key="call_1", kind="tool", name="Bash", input={"cmd": "ls"})
+    c = CloseSpan(key="call_1", output="files", is_complete=True)
+    assert o.key == c.key == "call_1"
+    assert o.kind == "tool"
+    assert c.is_complete is True
+
+
+def test_turn_usage_defaults_are_none():
+    u = TurnUsage(model="claude-opus-4-6")
+    assert u.model == "claude-opus-4-6"
+    assert u.input_tokens is None
+    assert u.num_tool_calls == 0
+
+
+def test_turn_result_wraps_usage():
+    r = TurnResult(final_text="hi", usage=TurnUsage(model="m"))
+    assert r.final_text == "hi"
+    assert r.usage.model == "m"
+
+
+def test_close_span_defaults():
+    c = CloseSpan(key="x")
+    assert c.output is None
+    assert c.is_complete is True
+
+
+def test_harness_turn_runtime_check():
+    class _Turn:
+        @property
+        def events(self) -> AsyncIterator[StreamTaskMessage]:
+            async def _gen() -> AsyncIterator[StreamTaskMessage]:
+                if False:
+                    yield  # pragma: no cover
+
+            return _gen()
+
+        def usage(self) -> TurnUsage:
+            return TurnUsage(model="m")
+
+    assert isinstance(_Turn(), HarnessTurn) is True
diff --git a/tests/lib/core/harness/test_yield_delivery.py b/tests/lib/core/harness/test_yield_delivery.py
new file mode 100644
index 000000000..ef3861a16
--- /dev/null
+++ b/tests/lib/core/harness/test_yield_delivery.py
@@ -0,0 +1,77 @@
+import pytest
+
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.core.harness.yield_delivery import yield_events
+
+from ._fakes import FakeTracing
+
+
+async def _gen(events):
+    for e in events:
+        yield e
+
+
+@pytest.mark.asyncio
+async def test_yield_passes_events_through_and_traces():
+    fake = FakeTracing()
+    tracer = SpanTracer(trace_id="t", parent_span_id="p", tracing=fake)
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request", author="agent", tool_call_id="c", name="Bash", arguments={}
+            ),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolResponseContent(
+                type="tool_response", author="agent", tool_call_id="c", name="Bash", content="ok"
+            ),
+        ),
+    ]
+    out = [e async for e in yield_events(_gen(events), tracer=tracer)]
+    assert out == events  # passthrough unchanged
+    assert fake.started_names == ["Bash"]  # span derived + opened
+    assert fake.ended_outputs == ["ok"]  # span closed with response
+
+
+@pytest.mark.asyncio
+async def test_yield_without_tracer_is_pure_passthrough():
+    events = [
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    out = [e async for e in yield_events(_gen(events), tracer=None)]
+    assert out == events
+
+
+@pytest.mark.asyncio
+async def test_flush_runs_on_early_close():
+    fake = FakeTracing()
+    tracer = SpanTracer(trace_id="t", parent_span_id="p", tracing=fake)
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request", author="agent", tool_call_id="c", name="Bash", arguments={}
+            ),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        # response intentionally never arrives
+    ]
+    gen = yield_events(_gen(events), tracer=tracer)
+    first = await gen.__anext__()  # Start
+    second = await gen.__anext__()  # Done -> tool span opens here
+    await gen.aclose()  # triggers the finally -> flush()
+    assert fake.started_names == ["Bash"]
+    assert fake.ended_outputs == [None]  # flush closed the unpaired span (incomplete, no output)
diff --git a/tests/lib/core/tracing/processors/test_agentex_tracing_processor.py b/tests/lib/core/tracing/processors/test_agentex_tracing_processor.py
index ec1ed5e88..84f37b495 100644
--- a/tests/lib/core/tracing/processors/test_agentex_tracing_processor.py
+++ b/tests/lib/core/tracing/processors/test_agentex_tracing_processor.py
@@ -2,7 +2,8 @@
 
 import asyncio
 import weakref
-from unittest.mock import MagicMock, patch
+from datetime import datetime, timezone
+from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
@@ -24,11 +25,163 @@
 MODULE = "agentex.lib.core.tracing.processors.agentex_tracing_processor"
 
 
+SKIP_ENV = "AGENTEX_TRACING_SKIP_AGENTEX_SPAN_START"
+
+
 def _make_config() -> MagicMock:
     """Empty config — AgentexTracingProcessorConfig is unused by __init__."""
     return MagicMock()
 
 
+def _make_span():
+    from agentex.types.span import Span
+
+    now = datetime.now(timezone.utc)
+    return Span(
+        id="span-1",
+        trace_id="trace-1",
+        name="test-span",
+        start_time=now,
+        end_time=now,
+        input={"in": 1},
+        output={"out": 2},
+    )
+
+
+class TestAgentexSyncSkipSpanStart:
+    """The Agentex backend writes create-on-start + update-on-end by default.
+    End-only ingest (default) skips the start write and makes the END a single
+    create — verify the start is a no-op and end does an INSERT, not an UPDATE.
+    """
+
+    def test_start_skipped_and_end_creates_by_default(self, monkeypatch):
+        monkeypatch.delenv(SKIP_ENV, raising=False)  # default ON
+        with patch(f"{MODULE}.Agentex") as MockAgentex:
+            from agentex.lib.core.tracing.processors.agentex_tracing_processor import (
+                AgentexSyncTracingProcessor,
+            )
+
+            processor = AgentexSyncTracingProcessor(_make_config())
+            client = MockAgentex.return_value
+            span = _make_span()
+
+            processor.on_span_start(span)
+            client.spans.create.assert_not_called()  # start skipped
+            client.spans.update.assert_not_called()
+
+            processor.on_span_end(span)
+            client.spans.create.assert_called_once()  # single INSERT on end
+            client.spans.update.assert_not_called()  # never a 404-prone UPDATE
+
+    def test_start_creates_and_end_updates_when_skip_disabled(self, monkeypatch):
+        monkeypatch.setenv(SKIP_ENV, "0")
+        with patch(f"{MODULE}.Agentex") as MockAgentex:
+            from agentex.lib.core.tracing.processors.agentex_tracing_processor import (
+                AgentexSyncTracingProcessor,
+            )
+
+            processor = AgentexSyncTracingProcessor(_make_config())
+            client = MockAgentex.return_value
+            span = _make_span()
+
+            processor.on_span_start(span)
+            client.spans.create.assert_called_once()  # start write restored
+
+            processor.on_span_end(span)
+            client.spans.update.assert_called_once()  # end is the UPDATE
+
+    def test_skip_decision_captured_at_init_not_per_call(self, monkeypatch):
+        """The two halves of a span MUST use the same skip decision. A flag
+        toggled after construction must not split it (start-skip + end-update
+        would 404). The decision is captured once at init.
+        """
+        monkeypatch.delenv(SKIP_ENV, raising=False)  # construct with skip ON
+        with patch(f"{MODULE}.Agentex") as MockAgentex:
+            from agentex.lib.core.tracing.processors.agentex_tracing_processor import (
+                AgentexSyncTracingProcessor,
+            )
+
+            processor = AgentexSyncTracingProcessor(_make_config())
+            client = MockAgentex.return_value
+            span = _make_span()
+
+            processor.on_span_start(span)  # skipped (cached ON)
+            monkeypatch.setenv(SKIP_ENV, "0")  # toggle mid-span — must be ignored
+            processor.on_span_end(span)
+
+            client.spans.create.assert_called_once()  # still end-only INSERT
+            client.spans.update.assert_not_called()  # NOT a 404-prone UPDATE
+
+
+class TestAgentexAsyncSkipSpanStart:
+    async def test_start_skipped_and_end_creates_by_default(self, monkeypatch):
+        monkeypatch.delenv(SKIP_ENV, raising=False)  # default ON
+        with patch(f"{MODULE}.create_async_agentex_client") as mock_factory:
+            client = MagicMock()
+            client.spans.create = AsyncMock()
+            client.spans.update = AsyncMock()
+            mock_factory.return_value = client
+
+            from agentex.lib.core.tracing.processors.agentex_tracing_processor import (
+                AgentexAsyncTracingProcessor,
+            )
+
+            processor = AgentexAsyncTracingProcessor(_make_config())
+            span = _make_span()
+
+            await processor.on_span_start(span)
+            client.spans.create.assert_not_called()  # start skipped
+            client.spans.update.assert_not_called()
+
+            await processor.on_span_end(span)
+            client.spans.create.assert_awaited_once()  # single INSERT on end
+            client.spans.update.assert_not_called()
+
+    async def test_start_creates_and_end_updates_when_skip_disabled(self, monkeypatch):
+        monkeypatch.setenv(SKIP_ENV, "0")
+        with patch(f"{MODULE}.create_async_agentex_client") as mock_factory:
+            client = MagicMock()
+            client.spans.create = AsyncMock()
+            client.spans.update = AsyncMock()
+            mock_factory.return_value = client
+
+            from agentex.lib.core.tracing.processors.agentex_tracing_processor import (
+                AgentexAsyncTracingProcessor,
+            )
+
+            processor = AgentexAsyncTracingProcessor(_make_config())
+            span = _make_span()
+
+            await processor.on_span_start(span)
+            client.spans.create.assert_awaited_once()  # start write restored
+
+            await processor.on_span_end(span)
+            client.spans.update.assert_awaited_once()  # end is the UPDATE
+
+    async def test_skip_decision_captured_at_init_not_per_call(self, monkeypatch):
+        """A flag toggled after construction must not split a span's lifecycle."""
+        monkeypatch.delenv(SKIP_ENV, raising=False)  # construct with skip ON
+        with patch(f"{MODULE}.create_async_agentex_client") as mock_factory:
+            client = MagicMock()
+            client.spans.create = AsyncMock()
+            client.spans.update = AsyncMock()
+            mock_factory.return_value = client
+
+            from agentex.lib.core.tracing.processors.agentex_tracing_processor import (
+                AgentexAsyncTracingProcessor,
+            )
+
+            processor = AgentexAsyncTracingProcessor(_make_config())
+            span = _make_span()
+
+            await processor.on_span_start(span)  # skipped (cached ON)
+            monkeypatch.setenv(SKIP_ENV, "0")  # toggle mid-span — must be ignored
+            await processor.on_span_end(span)
+
+            client.spans.create.assert_awaited_once()  # still end-only INSERT
+            client.spans.update.assert_not_called()  # NOT a 404-prone UPDATE
+
+
 class TestAgentexAsyncTracingProcessor:
     """Coverage for the per-event-loop client cache.  The SGP processor has
     matching tests; mirror them here so a regression in the Agentex side
diff --git a/tests/lib/test_state_machine.py b/tests/lib/test_state_machine.py
new file mode 100644
index 000000000..ce32ba9f0
--- /dev/null
+++ b/tests/lib/test_state_machine.py
@@ -0,0 +1,68 @@
+from __future__ import annotations
+
+from typing import override
+from unittest.mock import AsyncMock, patch
+
+from agentex.lib.sdk.state_machine import State, StateMachine, StateWorkflow
+from agentex.lib.utils.model_utils import BaseModel
+
+
+class ExampleData(BaseModel):
+    value: int = 0
+
+
+class InitialWorkflow(StateWorkflow):
+    transitions = ["next"]
+
+    @override
+    async def execute(self, state_machine, state_machine_data=None):
+        return "next"
+
+
+class NextWorkflow(StateWorkflow):
+    transitions = ["initial"]
+
+    @override
+    async def execute(self, state_machine, state_machine_data=None):
+        return "initial"
+
+
+class ExampleStateMachine(StateMachine[ExampleData]):
+    @override
+    async def terminal_condition(self):
+        return False
+
+
+def _make_state_machine() -> ExampleStateMachine:
+    return ExampleStateMachine(
+        initial_state="initial",
+        states=[
+            State(name="initial", workflow=InitialWorkflow()),
+            State(name="next", workflow=NextWorkflow()),
+        ],
+        task_id="task-123",
+        state_machine_data=ExampleData(value=1),
+        trace_transitions=True,
+    )
+
+
+async def test_reset_to_initial_state_skips_end_span_when_start_span_fails_open():
+    state_machine = _make_state_machine()
+    await state_machine.transition("next")
+
+    with patch(
+        "agentex.lib.sdk.state_machine.state_machine.adk.tracing.start_span",
+        new=AsyncMock(return_value=None),
+    ) as start_span, patch(
+        "agentex.lib.sdk.state_machine.state_machine.adk.tracing.end_span",
+        new=AsyncMock(),
+    ) as end_span:
+        await state_machine.reset_to_initial_state()
+
+    assert state_machine.get_current_state() == "initial"
+    start_span.assert_awaited_once_with(
+        trace_id="task-123",
+        name="state_transition_reset",
+        input={"input_state": "next"},
+    )
+    end_span.assert_not_awaited()
diff --git a/tests/lib/test_webhooks.py b/tests/lib/test_webhooks.py
new file mode 100644
index 000000000..e42fac9dd
--- /dev/null
+++ b/tests/lib/test_webhooks.py
@@ -0,0 +1,267 @@
+"""Unit tests for the SDK webhook helper (agentex.lib.sdk.utils.webhooks)."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import AsyncMock
+
+import pytest
+
+from agentex.lib import adk
+from agentex.lib.sdk.utils.webhooks import (
+    WebhookError,
+    session_key,
+    handle_webhook,
+    render_generic,
+    shape_github_pr,
+    resolve_remote_params,
+)
+
+
+def _pr_payload(**pr_overrides) -> dict:
+    pr = {
+        "number": 42,
+        "title": "Add retry to uploader",
+        "body": "Adds backoff on 503.",
+        "html_url": "https://example.com/acme/widgets/pull/42",
+    }
+    pr.update(pr_overrides)
+    return {
+        "action": "opened",
+        "repository": {"full_name": "acme/widgets"},
+        "sender": {"login": "octocat"},
+        "pull_request": pr,
+    }
+
+
+class TestSessionKey:
+    def test_stable_and_folds_same_conversation(self):
+        a = session_key("agent-1", "github_pr", "acme/widgets#42")
+        b = session_key("agent-1", "github_pr", "acme/widgets#42")
+        assert a == b and a.startswith("wh-github_pr-")
+
+    def test_differs_by_peer(self):
+        assert session_key("a", "github_pr", "r#1") != session_key("a", "github_pr", "r#2")
+
+
+class TestShaping:
+    def test_render_generic_prefers_text_field(self):
+        assert render_generic({"text": "hello"}) == "hello"
+
+    def test_render_generic_falls_back_to_json(self):
+        assert "zen" in render_generic({"zen": "be awesome"})
+
+    def test_render_generic_matches_keys_case_insensitively(self):
+        assert render_generic({"Message": "hi there"}) == "hi there"
+
+    def test_render_generic_supports_broadened_keys(self):
+        assert render_generic({"description": "do the thing"}) == "do the thing"
+
+    def test_github_pr_shape(self):
+        text, peer, sender = shape_github_pr(_pr_payload())
+        assert "Pull request acme/widgets#42: Add retry to uploader" in text
+        assert "Action: opened" in text
+        assert "Adds backoff on 503." in text
+        assert peer == "acme/widgets#42"
+        assert sender == "octocat"
+
+    def test_github_pr_includes_diff(self):
+        body = _pr_payload()
+        body["pull_request"]["diff"] = "diff --git a/x b/x\n+line"
+        text, _, _ = shape_github_pr(body)
+        assert "Diff:" in text and "+line" in text
+
+    def test_non_pr_payload_falls_back_to_generic(self):
+        text, peer, _ = shape_github_pr({"zen": "be awesome", "hook_id": 1})
+        assert "Pull request" not in text
+        assert "be awesome" in text
+        assert peer is None
+
+
+class TestResolveRemoteParams:
+    async def test_envelope_with_params_and_metadata(self):
+        async def fetch(_url):
+            return {"params": {"system_prompt": "x", "model": "m"}, "task_metadata": {"cfg": "1"}}
+
+        params, md = await resolve_remote_params("https://h/resolve", fetch=fetch)
+        assert params == {"system_prompt": "x", "model": "m"}
+        assert md == {"cfg": "1"}
+
+    async def test_bare_object_is_params_minus_task_metadata(self):
+        async def fetch(_url):
+            return {"system_prompt": "x", "task_metadata": {"cfg": "1"}}
+
+        params, md = await resolve_remote_params("https://h/resolve", fetch=fetch)
+        assert params == {"system_prompt": "x"}  # task_metadata stripped from params
+        assert md == {"cfg": "1"}
+
+    async def test_non_object_raises(self):
+        async def fetch(_url):
+            return ["nope"]
+
+        with pytest.raises(WebhookError):
+            await resolve_remote_params("https://h/resolve", fetch=fetch)
+
+
+def _agent_msg(text: str):
+    return SimpleNamespace(content=SimpleNamespace(author="agent", type="text", content=text))
+
+
+class TestHandleWebhook:
+    @pytest.fixture(autouse=True)
+    def _mock_adk(self, monkeypatch):
+        self.created = {}
+        self.sent = {}
+        self.stamped = {}
+        self.created_task_metadata = {}
+
+        async def create_task(*, name, agent_name, params=None, request=None, **_):
+            self.created = {"name": name, "agent_name": agent_name, "params": params, "request": request}
+            return SimpleNamespace(id="task-1", task_metadata=self.created_task_metadata)
+
+        async def send_message(*, task_id, agent_name, content, **_):
+            self.sent = {"task_id": task_id, "content": content}
+            return [_agent_msg("Looks good — ship it.")]
+
+        async def update_task(*, task_id, task_metadata=None, **_):
+            self.stamped = {"task_id": task_id, "task_metadata": task_metadata}
+            return SimpleNamespace(id=task_id)
+
+        send_event = AsyncMock()
+        monkeypatch.setattr(adk.acp, "create_task", create_task)
+        monkeypatch.setattr(adk.acp, "send_message", send_message)
+        monkeypatch.setattr(adk.acp, "send_event", send_event)
+        monkeypatch.setattr(adk.tasks, "update", update_task)
+        self.send_event = send_event
+        yield
+
+    async def test_sync_github_pr_with_config_by_id(self):
+        async def fake_resolve(_url):
+            return {"params": {"system_prompt": "review"}, "task_metadata": {"agent_config_id": "cfg-9"}}
+
+        result = await handle_webhook(
+            agent_name="golden-agent",
+            payload=_pr_payload(),
+            acp_type="sync",
+            shaper="github_pr",
+            params_source="https://h/v5/agent_configs/cfg-9/resolve",
+            fetch=fake_resolve,
+        )
+
+        assert result.reply == "Looks good — ship it."
+        assert self.created["params"] == {"system_prompt": "review"}
+        # metadata is returned on the result (SDK task/create can't carry it)
+        md = result.task_metadata
+        assert md["channel"] == "github_pr"
+        assert md["peer_id"] == "acme/widgets#42"
+        assert md["agent_config_id"] == "cfg-9"
+        # task folded on a stable session key
+        assert self.created["name"].startswith("wh-github_pr-")
+        # metadata is also stamped onto the task (best-effort) so it's labeled in the UI
+        assert self.stamped["task_id"] == "task-1"
+        assert self.stamped["task_metadata"]["peer_id"] == "acme/widgets#42"
+        assert self.stamped["task_metadata"]["agent_config_id"] == "cfg-9"
+
+    async def test_inline_params_no_fetch(self):
+        result = await handle_webhook(
+            agent_name="a",
+            payload={"text": "hi"},
+            acp_type="sync",
+            params={"system_prompt": "inline"},
+        )
+        assert result.reply == "Looks good — ship it."
+        assert self.created["params"] == {"system_prompt": "inline"}
+
+    async def test_source_metadata_cannot_override_canonical(self):
+        async def fake_resolve(_url):
+            return {"params": {}, "task_metadata": {"channel": "spoofed"}}
+
+        result = await handle_webhook(
+            agent_name="a",
+            payload=_pr_payload(),
+            shaper="github_pr",
+            params_source="https://h/resolve",
+            fetch=fake_resolve,
+        )
+        assert result.task_metadata["channel"] == "github_pr"
+
+    async def test_task_metadata_preserves_existing_keys_on_reused_task(self):
+        self.created_task_metadata = {
+            "labels": ["customer-facing"],
+            "agent_config_id": "old-cfg",
+            "channel": "old-channel",
+        }
+
+        async def fake_resolve(_url):
+            return {"params": {}, "task_metadata": {"agent_config_id": "cfg-9"}}
+
+        await handle_webhook(
+            agent_name="a",
+            payload=_pr_payload(),
+            shaper="github_pr",
+            params_source="https://h/resolve",
+            fetch=fake_resolve,
+        )
+
+        stamped_metadata = self.stamped["task_metadata"]
+        assert stamped_metadata["labels"] == ["customer-facing"]
+        assert stamped_metadata["agent_config_id"] == "cfg-9"
+        assert stamped_metadata["channel"] == "github_pr"
+
+    async def test_async_without_wait_sends_event_and_returns_no_reply(self):
+        result = await handle_webhook(agent_name="a", payload={"text": "go"}, acp_type="async", wait=False)
+        assert result.reply is None
+        self.send_event.assert_awaited_once()
+
+
+class TestAwaitReplyIgnoresStalePriorReply:
+    async def test_returns_only_new_agent_text_on_reused_task(self, monkeypatch):
+        from agentex.lib.sdk.utils.webhooks import _await_reply
+
+        old = _agent_msg("OLD reply")
+        old.id = "m1"
+        new = _agent_msg("NEW reply")
+        new.id = "m2"
+        calls = {"n": 0}
+
+        async def fake_list(*, task_id, **_):
+            calls["n"] += 1
+            return [old] if calls["n"] < 2 else [old, new]  # new appears on 2nd poll
+
+        async def no_sleep(_seconds):
+            return None
+
+        monkeypatch.setattr(adk.messages, "list", fake_list)
+        monkeypatch.setattr("asyncio.sleep", no_sleep)
+
+        # baseline = the pre-existing old message; only m2 (NEW) should be returned
+        reply = await _await_reply("task-1", {"m1"}, interval_s=0.0, quiescence_s=0.0)
+        assert reply == "NEW reply"
+
+    async def test_returns_idless_agent_text_after_snapshot(self, monkeypatch):
+        from agentex.lib.sdk.utils.webhooks import _await_reply
+
+        old = _agent_msg("OLD reply")
+        old.id = None
+        new = _agent_msg("NEW reply")
+        new.id = None
+        calls = {"n": 0}
+
+        async def fake_list(*, task_id, **_):
+            calls["n"] += 1
+            return [old] if calls["n"] < 2 else [old, new]
+
+        async def no_sleep(_seconds):
+            return None
+
+        monkeypatch.setattr(adk.messages, "list", fake_list)
+        monkeypatch.setattr("asyncio.sleep", no_sleep)
+
+        reply = await _await_reply(
+            "task-1",
+            set(),
+            seen_count=1,
+            interval_s=0.0,
+            quiescence_s=0.0,
+        )
+        assert reply == "NEW reply"