From 797837f7eb4d1a4a8246eeafbefd11a93242794e Mon Sep 17 00:00:00 2001 From: Nikhil Kulkarni Date: Mon, 1 Jun 2026 15:19:33 +0530 Subject: [PATCH] fix(llm): include expected and received keys in tool schema error message When a local model sends wrong argument keys to a tool the error fed back only said "Invalid tool input: Missing key at ["content"]". The model has no signal about what keys to use, so it retries identically and hits the doom loop. Adds expected and received keys to the error so the model can self-correct on the next turn. Also resolves Effect's $defs/$ref JSON schema shape for multi-field structs so the expected keys are extracted correctly. Fixes #29142 --- packages/llm/src/tool-runtime.ts | 41 ++++++++-- packages/llm/test/tool-runtime.test.ts | 106 +++++++++++++++++++++++++ 2 files changed, 139 insertions(+), 8 deletions(-) diff --git a/packages/llm/src/tool-runtime.ts b/packages/llm/src/tool-runtime.ts index d69bbb9d478c..54d5cc5471bf 100644 --- a/packages/llm/src/tool-runtime.ts +++ b/packages/llm/src/tool-runtime.ts @@ -34,9 +34,34 @@ export const dispatch = (tools: Tools, call: ToolCallPart): Effect.Effect | undefined): Record | undefined => { + if (parameters?.properties) return parameters.properties as Record + if (typeof parameters?.$ref === "string" && parameters.$defs) { + const refName = parameters.$ref.split("/").pop() + if (refName) { + const def = (parameters.$defs as Record)[refName] + if (def && typeof def === "object") return (def as Record).properties as Record + } + } + return undefined +} + +const schemaErrorMessage = (name: string, input: unknown, parameters: Record | undefined, schemaError: string) => { + const expected = resolveProperties(parameters) + const expectedKeys = expected ? Object.keys(expected) : undefined + const receivedKeys = input !== null && typeof input === "object" ? Object.keys(input as Record) : undefined + if (!expectedKeys && !receivedKeys) return `Invalid tool input for "${name}": ${schemaError}` + const parts = [`Invalid tool input for "${name}": ${schemaError}`] + if (expectedKeys) parts.push(`Expected keys: [${expectedKeys.join(", ")}]`) + if (receivedKeys) parts.push(`Received keys: [${receivedKeys.join(", ")}]`) + return parts.join(". ") +} + const decodeAndExecute = (tool: AnyTool, call: ToolCallPart): Effect.Effect => tool._decode(call.input).pipe( - Effect.mapError((error) => new ToolFailure({ message: `Invalid tool input: ${error.message}` })), + Effect.mapError((error) => new ToolFailure({ message: schemaErrorMessage(call.name, call.input, tool._definition.inputSchema as Record | undefined, error.message) })), Effect.flatMap((decoded) => tool.execute!(decoded, { id: call.id, name: call.name }).pipe( Effect.flatMap((value) => @@ -47,15 +72,15 @@ const decodeAndExecute = (tool: AnyTool, call: ToolCallPart): Effect.Effect { + if (tool._legacyResult && ToolResultValue.is(encoded)) + return { result: encoded, output: ToolOutput.fromResultValue(encoded) } + const output = tool._project(decoded, call.id, encoded) + const result = ToolOutput.toResultValue(output) + return result.type === "error" ? { result } : { result, output } + }), ), ), - Effect.map((encoded) => { - if (tool._legacyResult && ToolResultValue.is(encoded)) - return { result: encoded, output: ToolOutput.fromResultValue(encoded) } - const output = tool._project(decoded, call.id, encoded) - const result = ToolOutput.toResultValue(output) - return result.type === "error" ? { result } : { result, output } - }), ), ), ) diff --git a/packages/llm/test/tool-runtime.test.ts b/packages/llm/test/tool-runtime.test.ts index 5194c8f6e5f2..bfc4bbb74e2f 100644 --- a/packages/llm/test/tool-runtime.test.ts +++ b/packages/llm/test/tool-runtime.test.ts @@ -55,6 +55,31 @@ const schema_only_weather = Tool.make({ success: Schema.Struct({ temperature: Schema.Number, condition: Schema.String }), }) +// Mirrors the real write tool schema — the most-reported failure surface for +// OpenAI-compatible local models (Qwen, DeepSeek) that emit wrong key names. +const write_file = Tool.make({ + description: "Write content to a file.", + parameters: Schema.Struct({ + content: Schema.String, + filePath: Schema.String, + }), + success: Schema.Struct({ output: Schema.String }), + execute: ({ filePath }) => Effect.succeed({ output: `Wrote ${filePath}` }), +}) + +// Mirrors the real edit tool schema — covers the missing-required-key failure +// mode where the model runs out of token budget mid-generation. +const edit_file = Tool.make({ + description: "Edit a file by replacing a string.", + parameters: Schema.Struct({ + filePath: Schema.String, + oldString: Schema.String, + newString: Schema.String, + }), + success: Schema.Struct({ output: Schema.String }), + execute: ({ filePath }) => Effect.succeed({ output: `Edited ${filePath}` }), +}) + describe("LLMClient tools", () => { it.effect("uses the registered model route when adding runtime tools", () => Effect.gen(function* () { @@ -636,6 +661,87 @@ describe("LLMClient tools", () => { }), ) + it.effect("includes expected and received keys in schema error message when wrong keys are sent", () => + Effect.gen(function* () { + const layer = scriptedResponses([ + sseEvents(toolCallChunk("call_1", "get_weather", '{"cityName":"Paris"}'), finishChunk("tool_calls")), + sseEvents(deltaChunk({ role: "assistant", content: "Done." }), finishChunk("stop")), + ]) + + const events = Array.from( + yield* TestToolRuntime.runTools({ request: baseRequest, tools: { get_weather } }).pipe( + Stream.runCollect, + Effect.provide(layer), + ), + ) + + const toolError = events.find(LLMEvent.is.toolError) + expect(toolError?.message).toContain("get_weather") + expect(toolError?.message).toContain("city") + expect(toolError?.message).toContain("cityName") + }), + ) + + it.effect("emits write tool schema error naming expected and received keys when model uses wrong key names", () => + Effect.gen(function* () { + // OpenAI-compatible local models (Qwen, DeepSeek) commonly emit "fileContent" + // and "path" instead of the declared "content" and "filePath". The error fed + // back to the model must name both sides so it can self-correct on the next turn. + const layer = scriptedResponses([ + sseEvents( + toolCallChunk("call_1", "write_file", '{"fileContent":"hello world","path":"/tmp/x.ts"}'), + finishChunk("tool_calls"), + ), + sseEvents(deltaChunk({ role: "assistant", content: "Done." }), finishChunk("stop")), + ]) + + const events = Array.from( + yield* TestToolRuntime.runTools({ request: baseRequest, tools: { write_file } }).pipe( + Stream.runCollect, + Effect.provide(layer), + ), + ) + + const toolError = events.find(LLMEvent.is.toolError) + expect(toolError?.message).toContain("write_file") + // Model knows what it should have sent + expect(toolError?.message).toContain("content") + expect(toolError?.message).toContain("filePath") + // Model knows what it actually sent wrong + expect(toolError?.message).toContain("fileContent") + expect(toolError?.message).toContain("path") + }), + ) + + it.effect("emits edit tool schema error when model omits a required key due to token budget truncation", () => + Effect.gen(function* () { + // When a model runs out of token budget mid-generation it can produce a + // partial tool call — filePath and oldString present but newString missing. + // The error must list all expected keys so the model retries with the full set. + const layer = scriptedResponses([ + sseEvents( + toolCallChunk("call_1", "edit_file", '{"filePath":"/tmp/x.ts","oldString":"foo"}'), + finishChunk("tool_calls"), + ), + sseEvents(deltaChunk({ role: "assistant", content: "Done." }), finishChunk("stop")), + ]) + + const events = Array.from( + yield* TestToolRuntime.runTools({ request: baseRequest, tools: { edit_file } }).pipe( + Stream.runCollect, + Effect.provide(layer), + ), + ) + + const toolError = events.find(LLMEvent.is.toolError) + expect(toolError?.message).toContain("edit_file") + // All three required keys are named so the model knows the full contract + expect(toolError?.message).toContain("filePath") + expect(toolError?.message).toContain("oldString") + expect(toolError?.message).toContain("newString") + }), + ) + it.effect("emits tool-error when the handler returns a ToolFailure", () => Effect.gen(function* () { const layer = scriptedResponses([