Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 33 additions & 8 deletions packages/llm/src/tool-runtime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,34 @@ export const dispatch = (tools: Tools, call: ToolCallPart): Effect.Effect<Dispat
)
}

// Effect's toJsonSchema (tool.ts line 183) emits bare properties for single-field structs
// and $defs/$ref for multi-field ones — resolve whichever shape is present.
const resolveProperties = (parameters: Record<string, unknown> | undefined): Record<string, unknown> | undefined => {
if (parameters?.properties) return parameters.properties as Record<string, unknown>
if (typeof parameters?.$ref === "string" && parameters.$defs) {
const refName = parameters.$ref.split("/").pop()
if (refName) {
const def = (parameters.$defs as Record<string, unknown>)[refName]
if (def && typeof def === "object") return (def as Record<string, unknown>).properties as Record<string, unknown>
}
}
return undefined
}

const schemaErrorMessage = (name: string, input: unknown, parameters: Record<string, unknown> | undefined, schemaError: string) => {
const expected = resolveProperties(parameters)
const expectedKeys = expected ? Object.keys(expected) : undefined
const receivedKeys = input !== null && typeof input === "object" ? Object.keys(input as Record<string, unknown>) : undefined
if (!expectedKeys && !receivedKeys) return `Invalid tool input for "${name}": ${schemaError}`
const parts = [`Invalid tool input for "${name}": ${schemaError}`]
if (expectedKeys) parts.push(`Expected keys: [${expectedKeys.join(", ")}]`)
if (receivedKeys) parts.push(`Received keys: [${receivedKeys.join(", ")}]`)
return parts.join(". ")
}

const decodeAndExecute = (tool: AnyTool, call: ToolCallPart): Effect.Effect<ToolSettlement, ToolFailure> =>
tool._decode(call.input).pipe(
Effect.mapError((error) => new ToolFailure({ message: `Invalid tool input: ${error.message}` })),
Effect.mapError((error) => new ToolFailure({ message: schemaErrorMessage(call.name, call.input, tool._definition.inputSchema as Record<string, unknown> | undefined, error.message) })),
Effect.flatMap((decoded) =>
tool.execute!(decoded, { id: call.id, name: call.name }).pipe(
Effect.flatMap((value) =>
Expand All @@ -47,15 +72,15 @@ const decodeAndExecute = (tool: AnyTool, call: ToolCallPart): Effect.Effect<Tool
message: `Tool returned an invalid value for its success schema: ${error.message}`,
}),
),
Effect.map((encoded) => {
if (tool._legacyResult && ToolResultValue.is(encoded))
return { result: encoded, output: ToolOutput.fromResultValue(encoded) }
const output = tool._project(decoded, call.id, encoded)
const result = ToolOutput.toResultValue(output)
return result.type === "error" ? { result } : { result, output }
}),
),
),
Effect.map((encoded) => {
if (tool._legacyResult && ToolResultValue.is(encoded))
return { result: encoded, output: ToolOutput.fromResultValue(encoded) }
const output = tool._project(decoded, call.id, encoded)
const result = ToolOutput.toResultValue(output)
return result.type === "error" ? { result } : { result, output }
}),
),
),
)
Expand Down
106 changes: 106 additions & 0 deletions packages/llm/test/tool-runtime.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,31 @@ const schema_only_weather = Tool.make({
success: Schema.Struct({ temperature: Schema.Number, condition: Schema.String }),
})

// Mirrors the real write tool schema — the most-reported failure surface for
// OpenAI-compatible local models (Qwen, DeepSeek) that emit wrong key names.
const write_file = Tool.make({
description: "Write content to a file.",
parameters: Schema.Struct({
content: Schema.String,
filePath: Schema.String,
}),
success: Schema.Struct({ output: Schema.String }),
execute: ({ filePath }) => Effect.succeed({ output: `Wrote ${filePath}` }),
})

// Mirrors the real edit tool schema — covers the missing-required-key failure
// mode where the model runs out of token budget mid-generation.
const edit_file = Tool.make({
description: "Edit a file by replacing a string.",
parameters: Schema.Struct({
filePath: Schema.String,
oldString: Schema.String,
newString: Schema.String,
}),
success: Schema.Struct({ output: Schema.String }),
execute: ({ filePath }) => Effect.succeed({ output: `Edited ${filePath}` }),
})

describe("LLMClient tools", () => {
it.effect("uses the registered model route when adding runtime tools", () =>
Effect.gen(function* () {
Expand Down Expand Up @@ -636,6 +661,87 @@ describe("LLMClient tools", () => {
}),
)

it.effect("includes expected and received keys in schema error message when wrong keys are sent", () =>
Effect.gen(function* () {
const layer = scriptedResponses([
sseEvents(toolCallChunk("call_1", "get_weather", '{"cityName":"Paris"}'), finishChunk("tool_calls")),
sseEvents(deltaChunk({ role: "assistant", content: "Done." }), finishChunk("stop")),
])

const events = Array.from(
yield* TestToolRuntime.runTools({ request: baseRequest, tools: { get_weather } }).pipe(
Stream.runCollect,
Effect.provide(layer),
),
)

const toolError = events.find(LLMEvent.is.toolError)
expect(toolError?.message).toContain("get_weather")
expect(toolError?.message).toContain("city")
expect(toolError?.message).toContain("cityName")
}),
)

it.effect("emits write tool schema error naming expected and received keys when model uses wrong key names", () =>
Effect.gen(function* () {
// OpenAI-compatible local models (Qwen, DeepSeek) commonly emit "fileContent"
// and "path" instead of the declared "content" and "filePath". The error fed
// back to the model must name both sides so it can self-correct on the next turn.
const layer = scriptedResponses([
sseEvents(
toolCallChunk("call_1", "write_file", '{"fileContent":"hello world","path":"/tmp/x.ts"}'),
finishChunk("tool_calls"),
),
sseEvents(deltaChunk({ role: "assistant", content: "Done." }), finishChunk("stop")),
])

const events = Array.from(
yield* TestToolRuntime.runTools({ request: baseRequest, tools: { write_file } }).pipe(
Stream.runCollect,
Effect.provide(layer),
),
)

const toolError = events.find(LLMEvent.is.toolError)
expect(toolError?.message).toContain("write_file")
// Model knows what it should have sent
expect(toolError?.message).toContain("content")
expect(toolError?.message).toContain("filePath")
// Model knows what it actually sent wrong
expect(toolError?.message).toContain("fileContent")
expect(toolError?.message).toContain("path")
}),
)

it.effect("emits edit tool schema error when model omits a required key due to token budget truncation", () =>
Effect.gen(function* () {
// When a model runs out of token budget mid-generation it can produce a
// partial tool call — filePath and oldString present but newString missing.
// The error must list all expected keys so the model retries with the full set.
const layer = scriptedResponses([
sseEvents(
toolCallChunk("call_1", "edit_file", '{"filePath":"/tmp/x.ts","oldString":"foo"}'),
finishChunk("tool_calls"),
),
sseEvents(deltaChunk({ role: "assistant", content: "Done." }), finishChunk("stop")),
])

const events = Array.from(
yield* TestToolRuntime.runTools({ request: baseRequest, tools: { edit_file } }).pipe(
Stream.runCollect,
Effect.provide(layer),
),
)

const toolError = events.find(LLMEvent.is.toolError)
expect(toolError?.message).toContain("edit_file")
// All three required keys are named so the model knows the full contract
expect(toolError?.message).toContain("filePath")
expect(toolError?.message).toContain("oldString")
expect(toolError?.message).toContain("newString")
}),
)

it.effect("emits tool-error when the handler returns a ToolFailure", () =>
Effect.gen(function* () {
const layer = scriptedResponses([
Expand Down
Loading