diff --git a/.gitignore b/.gitignore index 52a5a0459626..19198a7a5918 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ node_modules .worktrees .sst .env +.env.local .idea .vscode .codex diff --git a/.gitleaksignore b/.gitleaksignore new file mode 100644 index 000000000000..cc01a286fb70 --- /dev/null +++ b/.gitleaksignore @@ -0,0 +1,5 @@ +# Fake secret-looking strings used by HTTP recorder redaction tests. +afa57acfda894e0ebf3c637dd710310b705c0a2f:packages/http-recorder/test/record-replay.test.ts:generic-api-key:69 +afa57acfda894e0ebf3c637dd710310b705c0a2f:packages/http-recorder/test/record-replay.test.ts:generic-api-key:92 +afa57acfda894e0ebf3c637dd710310b705c0a2f:packages/http-recorder/test/record-replay.test.ts:generic-api-key:146 +afa57acfda894e0ebf3c637dd710310b705c0a2f:packages/http-recorder/test/record-replay.test.ts:gcp-api-key:71 diff --git a/bun.lock b/bun.lock index 3e73e0c236f5..90be71910aca 100644 --- a/bun.lock +++ b/bun.lock @@ -111,6 +111,7 @@ "zod": "catalog:", }, "devDependencies": { + "@types/bun": "catalog:", "@typescript/native-preview": "catalog:", "@webgpu/types": "0.1.54", "typescript": "catalog:", @@ -302,6 +303,7 @@ "devDependencies": { "@cloudflare/workers-types": "catalog:", "@tailwindcss/vite": "catalog:", + "@types/bun": "catalog:", "@types/luxon": "catalog:", "@typescript/native-preview": "catalog:", "tailwindcss": "catalog:", @@ -325,6 +327,37 @@ "typescript": "catalog:", }, }, + "packages/http-recorder": { + "name": "@opencode-ai/http-recorder", + "version": "0.0.0", + "dependencies": { + "@effect/platform-node": "catalog:", + "effect": "catalog:", + }, + "devDependencies": { + "@tsconfig/bun": "catalog:", + "@types/bun": "catalog:", + "@typescript/native-preview": "catalog:", + }, + }, + "packages/llm": { + "name": "@opencode-ai/llm", + "version": "1.14.25", + "dependencies": { + "@smithy/eventstream-codec": "4.2.14", + "@smithy/util-utf8": "4.2.2", + "aws4fetch": "1.0.20", + "effect": "catalog:", + }, + "devDependencies": { + "@clack/prompts": "1.0.0-alpha.1", + "@effect/platform-node": "catalog:", + "@opencode-ai/http-recorder": "workspace:*", + "@tsconfig/bun": "catalog:", + "@types/bun": "catalog:", + "@typescript/native-preview": "catalog:", + }, + }, "packages/opencode": { "name": "opencode", "version": "1.14.41", @@ -1552,6 +1585,10 @@ "@opencode-ai/function": ["@opencode-ai/function@workspace:packages/function"], + "@opencode-ai/http-recorder": ["@opencode-ai/http-recorder@workspace:packages/http-recorder"], + + "@opencode-ai/llm": ["@opencode-ai/llm@workspace:packages/llm"], + "@opencode-ai/plugin": ["@opencode-ai/plugin@workspace:packages/plugin"], "@opencode-ai/script": ["@opencode-ai/script@workspace:packages/script"], @@ -5566,6 +5603,10 @@ "@opencode-ai/desktop/typescript": ["typescript@5.6.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw=="], + "@opencode-ai/llm/@smithy/eventstream-codec": ["@smithy/eventstream-codec@4.2.14", "", { "dependencies": { "@aws-crypto/crc32": "5.2.0", "@smithy/types": "^4.14.1", "@smithy/util-hex-encoding": "^4.2.2", "tslib": "^2.6.2" } }, "sha512-erZq0nOIpzfeZdCyzZjdJb4nVSKLUmSkaQUVkRGQTXs30gyUGeKnrYEg+Xe1W5gE3aReS7IgsvANwVPxSzY6Pw=="], + + "@opencode-ai/llm/@smithy/util-utf8": ["@smithy/util-utf8@4.2.2", "", { "dependencies": { "@smithy/util-buffer-from": "^4.2.2", "tslib": "^2.6.2" } }, "sha512-75MeYpjdWRe8M5E3AW0O4Cx3UadweS+cwdXjwYGBW5h/gxxnbeZ877sLPX/ZJA9GVTlL/qG0dXP29JWFCD1Ayw=="], + "@opencode-ai/ui/@solid-primitives/resize-observer": ["@solid-primitives/resize-observer@2.1.3", "", { "dependencies": { "@solid-primitives/event-listener": "^2.4.3", "@solid-primitives/rootless": "^1.5.2", "@solid-primitives/static-store": "^0.1.2", "@solid-primitives/utils": "^6.3.2" }, "peerDependencies": { "solid-js": "^1.6.12" } }, "sha512-zBLje5E06TgOg93S7rGPldmhDnouNGhvfZVKOp+oG2XU8snA+GoCSSCz1M+jpNAg5Ek2EakU5UVQqL152WmdXQ=="], "@opencode-ai/web/@shikijs/transformers": ["@shikijs/transformers@3.20.0", "", { "dependencies": { "@shikijs/core": "3.20.0", "@shikijs/types": "3.20.0" } }, "sha512-PrHHMRr3Q5W1qB/42kJW6laqFyWdhrPF2hNR9qjOm1xcSiAO3hAHo7HaVyHE6pMyevmy3i51O8kuGGXC78uK3g=="], @@ -6632,6 +6673,8 @@ "@opencode-ai/desktop/@actions/artifact/@actions/http-client": ["@actions/http-client@2.2.3", "", { "dependencies": { "tunnel": "^0.0.6", "undici": "^5.25.4" } }, "sha512-mx8hyJi/hjFvbPokCg4uRd4ZX78t+YyRPtnKWwIl+RzNaVuFpQHfmlGVfsKEJN8LwTCvL+DfVgAM04XaHkm6bA=="], + "@opencode-ai/llm/@smithy/eventstream-codec/@smithy/types": ["@smithy/types@4.14.1", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-59b5HtSVrVR/eYNei3BUj3DCPKD/G7EtDDe7OEJE7i7FtQFugYo6MxbotS8mVJkLNVf8gYaAlEBwwtJ9HzhWSg=="], + "@opencode-ai/web/@shikijs/transformers/@shikijs/core": ["@shikijs/core@3.20.0", "", { "dependencies": { "@shikijs/types": "3.20.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4", "hast-util-to-html": "^9.0.5" } }, "sha512-f2ED7HYV4JEk827mtMDwe/yQ25pRiXZmtHjWF8uzZKuKiEsJR7Ce1nuQ+HhV9FzDcbIo4ObBCD9GPTzNuy9S1g=="], "@opencode-ai/web/@shikijs/transformers/@shikijs/types": ["@shikijs/types@3.20.0", "", { "dependencies": { "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4" } }, "sha512-lhYAATn10nkZcBQ0BlzSbJA3wcmL5MXUUF8d2Zzon6saZDlToKaiRX60n2+ZaHJCmXEcZRWNzn+k9vplr8Jhsw=="], diff --git a/packages/console/app/package.json b/packages/console/app/package.json index f2471d2926b7..b31488f1553a 100644 --- a/packages/console/app/package.json +++ b/packages/console/app/package.json @@ -35,6 +35,7 @@ "zod": "catalog:" }, "devDependencies": { + "@types/bun": "catalog:", "@typescript/native-preview": "catalog:", "@webgpu/types": "0.1.54", "typescript": "catalog:", diff --git a/packages/console/app/tsconfig.json b/packages/console/app/tsconfig.json index e5fb212de515..be7ee4319439 100644 --- a/packages/console/app/tsconfig.json +++ b/packages/console/app/tsconfig.json @@ -12,7 +12,7 @@ "allowJs": true, "strict": true, "noEmit": true, - "types": ["vite/client", "@webgpu/types"], + "types": ["vite/client", "@webgpu/types", "bun"], "isolatedModules": true, "paths": { "~/*": ["./src/*"] diff --git a/packages/enterprise/package.json b/packages/enterprise/package.json index beccdb6991d4..7f964f511a8e 100644 --- a/packages/enterprise/package.json +++ b/packages/enterprise/package.json @@ -32,6 +32,7 @@ "@cloudflare/workers-types": "catalog:", "@tailwindcss/vite": "catalog:", "@typescript/native-preview": "catalog:", + "@types/bun": "catalog:", "@types/luxon": "catalog:", "tailwindcss": "catalog:", "typescript": "catalog:", diff --git a/packages/enterprise/tsconfig.json b/packages/enterprise/tsconfig.json index af4ce16490f7..eafea7e4f084 100644 --- a/packages/enterprise/tsconfig.json +++ b/packages/enterprise/tsconfig.json @@ -11,7 +11,7 @@ "allowJs": true, "noEmit": true, "strict": true, - "types": ["@cloudflare/workers-types", "vite/client"], + "types": ["@cloudflare/workers-types", "vite/client", "bun"], "isolatedModules": true, "paths": { "~/*": ["./src/*"] diff --git a/packages/http-recorder/package.json b/packages/http-recorder/package.json new file mode 100644 index 000000000000..ee4865b47516 --- /dev/null +++ b/packages/http-recorder/package.json @@ -0,0 +1,26 @@ +{ + "$schema": "https://json.schemastore.org/package.json", + "version": "0.0.0", + "name": "@opencode-ai/http-recorder", + "type": "module", + "license": "MIT", + "private": true, + "scripts": { + "test": "bun test --timeout 30000", + "test:ci": "mkdir -p .artifacts/unit && bun test --timeout 30000 --reporter=junit --reporter-outfile=.artifacts/unit/junit.xml", + "typecheck": "tsgo --noEmit" + }, + "exports": { + ".": "./src/index.ts", + "./*": "./src/*.ts" + }, + "devDependencies": { + "@tsconfig/bun": "catalog:", + "@types/bun": "catalog:", + "@typescript/native-preview": "catalog:" + }, + "dependencies": { + "@effect/platform-node": "catalog:", + "effect": "catalog:" + } +} diff --git a/packages/http-recorder/src/cassette.ts b/packages/http-recorder/src/cassette.ts new file mode 100644 index 000000000000..23f1ba4e682b --- /dev/null +++ b/packages/http-recorder/src/cassette.ts @@ -0,0 +1,105 @@ +import { Context, Effect, FileSystem, Layer, PlatformError, Ref } from "effect" +import * as path from "node:path" +import { cassetteSecretFindings, type SecretFinding } from "./redaction" +import type { Cassette, CassetteMetadata, Interaction } from "./schema" +import { cassetteFor, cassettePath, DEFAULT_RECORDINGS_DIR, formatCassette, parseCassette } from "./storage" + +export interface Entry { + readonly name: string + readonly path: string +} + +export interface Interface { + readonly path: (name: string) => string + readonly read: (name: string) => Effect.Effect + readonly write: (name: string, cassette: Cassette) => Effect.Effect + readonly append: ( + name: string, + interaction: Interaction, + metadata: CassetteMetadata | undefined, + ) => Effect.Effect< + { + readonly cassette: Cassette + readonly findings: ReadonlyArray + }, + PlatformError.PlatformError + > + readonly exists: (name: string) => Effect.Effect + readonly list: () => Effect.Effect, PlatformError.PlatformError> + readonly scan: (cassette: Cassette) => ReadonlyArray +} + +export class Service extends Context.Service()("@opencode-ai/http-recorder/Cassette") {} + +export const layer = (options: { readonly directory?: string } = {}) => + Layer.effect( + Service, + Effect.gen(function* () { + const fileSystem = yield* FileSystem.FileSystem + const directory = options.directory ?? DEFAULT_RECORDINGS_DIR + const recorded = yield* Ref.make(new Map>()) + + const pathFor = (name: string) => cassettePath(name, directory) + + const walk = (directory: string): Effect.Effect, PlatformError.PlatformError> => + Effect.gen(function* () { + const entries = yield* fileSystem + .readDirectory(directory) + .pipe(Effect.catch(() => Effect.succeed([] as string[]))) + const nested = yield* Effect.forEach(entries, (entry) => { + const full = path.join(directory, entry) + return fileSystem.stat(full).pipe( + Effect.flatMap((stat) => (stat.type === "Directory" ? walk(full) : Effect.succeed([full]))), + Effect.catch(() => Effect.succeed([] as string[])), + ) + }) + return nested.flat() + }) + + const read = Effect.fn("Cassette.read")(function* (name: string) { + return parseCassette(yield* fileSystem.readFileString(pathFor(name))) + }) + + const write = Effect.fn("Cassette.write")(function* (name: string, cassette: Cassette) { + yield* fileSystem.makeDirectory(path.dirname(pathFor(name)), { recursive: true }) + yield* fileSystem.writeFileString(pathFor(name), formatCassette(cassette)) + }) + + const append = Effect.fn("Cassette.append")(function* ( + name: string, + interaction: Interaction, + metadata: CassetteMetadata | undefined, + ) { + const interactions = yield* Ref.updateAndGet(recorded, (previous) => + new Map(previous).set(name, [...(previous.get(name) ?? []), interaction]), + ) + const cassette = cassetteFor(name, interactions.get(name) ?? [], metadata) + const findings = cassetteSecretFindings(cassette) + if (findings.length === 0) yield* write(name, cassette) + return { cassette, findings } + }) + + const exists = Effect.fn("Cassette.exists")(function* (name: string) { + return yield* fileSystem.access(pathFor(name)).pipe( + Effect.as(true), + Effect.catch(() => Effect.succeed(false)), + ) + }) + + const list = Effect.fn("Cassette.list")(function* () { + return (yield* walk(directory)) + .filter((file) => file.endsWith(".json")) + .map((file) => ({ + name: path.relative(directory, file).replace(/\\/g, "/").replace(/\.json$/, ""), + path: file, + })) + .toSorted((a, b) => a.name.localeCompare(b.name)) + }) + + return Service.of({ path: pathFor, read, write, append, exists, list, scan: cassetteSecretFindings }) + }), + ) + +export const defaultLayer = layer() + +export * as Cassette from "./cassette" diff --git a/packages/http-recorder/src/diff.ts b/packages/http-recorder/src/diff.ts new file mode 100644 index 000000000000..29517befcbd2 --- /dev/null +++ b/packages/http-recorder/src/diff.ts @@ -0,0 +1,95 @@ +import { Option } from "effect" +import { Headers, HttpBody, HttpClientRequest, UrlParams } from "effect/unstable/http" +import { decodeJson } from "./matching" +import { REDACTED, redactUrl, secretFindings } from "./redaction" +import { httpInteractions, type Cassette, type RequestSnapshot } from "./schema" + +const safeText = (value: unknown) => { + if (value === undefined) return "undefined" + if (secretFindings(value).length > 0) return JSON.stringify(REDACTED) + const text = typeof value === "string" ? JSON.stringify(value) : JSON.stringify(value) + if (!text) return String(value) + return text.length > 300 ? `${text.slice(0, 300)}...` : text +} + +const jsonBody = (body: string) => Option.getOrUndefined(decodeJson(body)) + +const valueDiffs = (expected: unknown, received: unknown, base = "$", limit = 8): ReadonlyArray => { + if (Object.is(expected, received)) return [] + if ( + expected && + received && + typeof expected === "object" && + typeof received === "object" && + !Array.isArray(expected) && + !Array.isArray(received) + ) { + return [...new Set([...Object.keys(expected), ...Object.keys(received)])] + .toSorted() + .flatMap((key) => + valueDiffs( + (expected as Record)[key], + (received as Record)[key], + `${base}.${key}`, + limit, + ), + ) + .slice(0, limit) + } + if (Array.isArray(expected) && Array.isArray(received)) { + return Array.from({ length: Math.max(expected.length, received.length) }, (_, index) => index) + .flatMap((index) => valueDiffs(expected[index], received[index], `${base}[${index}]`, limit)) + .slice(0, limit) + } + return [`${base} expected ${safeText(expected)}, received ${safeText(received)}`] +} + +const headerDiffs = (expected: Record, received: Record) => + [...new Set([...Object.keys(expected), ...Object.keys(received)])].toSorted().flatMap((key) => { + if (expected[key] === received[key]) return [] + if (expected[key] === undefined) return [` ${key} unexpected ${safeText(received[key])}`] + if (received[key] === undefined) return [` ${key} missing expected ${safeText(expected[key])}`] + return [` ${key} expected ${safeText(expected[key])}, received ${safeText(received[key])}`] + }) + +export const requestDiff = (expected: RequestSnapshot, received: RequestSnapshot) => { + const lines = [] + if (expected.method !== received.method) { + lines.push("method:", ` expected ${expected.method}, received ${received.method}`) + } + if (expected.url !== received.url) { + lines.push("url:", ` expected ${expected.url}`, ` received ${received.url}`) + } + const headers = headerDiffs(expected.headers, received.headers) + if (headers.length > 0) lines.push("headers:", ...headers.slice(0, 8)) + const expectedBody = jsonBody(expected.body) + const receivedBody = jsonBody(received.body) + const body = + expectedBody !== undefined && receivedBody !== undefined + ? valueDiffs(expectedBody, receivedBody).map((line) => ` ${line}`) + : expected.body === received.body + ? [] + : [` expected ${safeText(expected.body)}, received ${safeText(received.body)}`] + if (body.length > 0) lines.push("body:", ...body) + return lines +} + +export const mismatchDetail = (cassette: Cassette, incoming: RequestSnapshot) => { + const interactions = httpInteractions(cassette) + if (interactions.length === 0) return "cassette has no recorded HTTP interactions" + const ranked = interactions + .map((interaction, index) => ({ index, lines: requestDiff(interaction.request, incoming) })) + .toSorted((a, b) => a.lines.length - b.lines.length || a.index - b.index) + const best = ranked[0] + return ["no recorded interaction matched", `closest interaction: #${best.index + 1}`, ...best.lines].join("\n") +} + +export const redactedErrorRequest = (request: HttpClientRequest.HttpClientRequest) => + HttpClientRequest.makeWith( + request.method, + redactUrl(request.url), + UrlParams.empty, + Option.none(), + Headers.empty, + HttpBody.empty, + ) diff --git a/packages/http-recorder/src/effect.ts b/packages/http-recorder/src/effect.ts new file mode 100644 index 000000000000..f103e45dc7b4 --- /dev/null +++ b/packages/http-recorder/src/effect.ts @@ -0,0 +1,211 @@ +import { NodeFileSystem } from "@effect/platform-node" +import { Effect, Layer, Option, Ref } from "effect" +import { + FetchHttpClient, + HttpClient, + HttpClientError, + HttpClientRequest, + HttpClientResponse, +} from "effect/unstable/http" +import { redactedErrorRequest, mismatchDetail, requestDiff } from "./diff" +import { defaultMatcher, decodeJson, type RequestMatcher } from "./matching" +import { redactHeaders, redactUrl, type SecretFinding } from "./redaction" +import { + httpInteractions, + type Cassette, + type CassetteMetadata, + type HttpInteraction, + type ResponseSnapshot, +} from "./schema" +import * as CassetteService from "./cassette" + +export const DEFAULT_REQUEST_HEADERS: ReadonlyArray = ["content-type", "accept", "openai-beta"] +const DEFAULT_RESPONSE_HEADERS: ReadonlyArray = ["content-type"] + +export type RecordReplayMode = "record" | "replay" | "passthrough" + +export interface RecordReplayOptions { + readonly mode?: RecordReplayMode + readonly directory?: string + readonly metadata?: CassetteMetadata + readonly redact?: { + readonly headers?: ReadonlyArray + readonly query?: ReadonlyArray + readonly url?: (url: string) => string + } + readonly requestHeaders?: ReadonlyArray + readonly responseHeaders?: ReadonlyArray + readonly redactBody?: (body: unknown) => unknown + readonly dispatch?: "match" | "sequential" + readonly match?: RequestMatcher +} + +const responseHeaders = ( + response: HttpClientResponse.HttpClientResponse, + allow: ReadonlyArray, + redact: ReadonlyArray | undefined, +) => { + const merged = redactHeaders(response.headers as Record, allow, redact) + if (!merged["content-type"]) merged["content-type"] = "text/event-stream" + return merged +} + +const BINARY_CONTENT_TYPES: ReadonlyArray = ["vnd.amazon.eventstream", "octet-stream"] + +const isBinaryContentType = (contentType: string | undefined) => { + if (!contentType) return false + const lower = contentType.toLowerCase() + return BINARY_CONTENT_TYPES.some((token) => lower.includes(token)) +} + +const captureResponseBody = (response: HttpClientResponse.HttpClientResponse, contentType: string | undefined) => + isBinaryContentType(contentType) + ? response.arrayBuffer.pipe( + Effect.map((bytes) => ({ body: Buffer.from(bytes).toString("base64"), bodyEncoding: "base64" as const })), + ) + : response.text.pipe(Effect.map((body) => ({ body }))) + +const decodeResponseBody = (snapshot: ResponseSnapshot) => + snapshot.bodyEncoding === "base64" ? Buffer.from(snapshot.body, "base64") : snapshot.body + +const fixtureMissing = (request: HttpClientRequest.HttpClientRequest, name: string) => + new HttpClientError.HttpClientError({ + reason: new HttpClientError.TransportError({ + request: redactedErrorRequest(request), + description: `Fixture "${name}" not found. Run with RECORD=true to create it.`, + }), + }) + +const fixtureMismatch = (request: HttpClientRequest.HttpClientRequest, name: string, detail: string) => + new HttpClientError.HttpClientError({ + reason: new HttpClientError.TransportError({ + request: redactedErrorRequest(request), + description: `Fixture "${name}" does not match the current request: ${detail}. Run with RECORD=true to update it.`, + }), + }) + +const unsafeCassette = ( + request: HttpClientRequest.HttpClientRequest, + name: string, + findings: ReadonlyArray, +) => + new HttpClientError.HttpClientError({ + reason: new HttpClientError.TransportError({ + request: redactedErrorRequest(request), + description: `Refusing to write cassette "${name}" because it contains possible secrets: ${findings + .map((item) => `${item.path} (${item.reason})`) + .join(", ")}`, + }), + }) + +export const recordingLayer = ( + name: string, + options: Omit = {}, +): Layer.Layer => + Layer.effect( + HttpClient.HttpClient, + Effect.gen(function* () { + const upstream = yield* HttpClient.HttpClient + const cassetteService = yield* CassetteService.Service + const requestHeadersAllow = options.requestHeaders ?? DEFAULT_REQUEST_HEADERS + const responseHeadersAllow = options.responseHeaders ?? DEFAULT_RESPONSE_HEADERS + const match = options.match ?? defaultMatcher + const mode = options.mode ?? "replay" + const sequential = options.dispatch === "sequential" + const replay = yield* Ref.make(undefined) + const cursor = yield* Ref.make(0) + + const snapshotRequest = (request: HttpClientRequest.HttpClientRequest) => + Effect.gen(function* () { + const web = yield* HttpClientRequest.toWeb(request).pipe(Effect.orDie) + const raw = yield* Effect.promise(() => web.text()) + const body = options.redactBody + ? Option.match(decodeJson(raw), { + onNone: () => raw, + onSome: (parsed) => JSON.stringify(options.redactBody?.(parsed)), + }) + : raw + return { + method: web.method, + url: redactUrl(web.url, options.redact?.query, options.redact?.url), + headers: redactHeaders( + Object.fromEntries(web.headers.entries()), + requestHeadersAllow, + options.redact?.headers, + ), + body, + } + }) + + const selectInteraction = (cassette: Cassette, incoming: HttpInteraction["request"]) => + Effect.gen(function* () { + const interactions = httpInteractions(cassette) + if (sequential) { + const index = yield* Ref.get(cursor) + const interaction = interactions[index] + if (!interaction) + return { interaction, detail: `interaction ${index + 1} of ${interactions.length} not recorded` } + if (!match(incoming, interaction.request)) { + return { interaction: undefined, detail: requestDiff(interaction.request, incoming).join("\n") } + } + yield* Ref.update(cursor, (n) => n + 1) + return { interaction, detail: "" } + } + const interaction = interactions.find((candidate) => match(incoming, candidate.request)) + return { interaction, detail: interaction ? "" : mismatchDetail(cassette, incoming) } + }) + + const loadReplay = (request: HttpClientRequest.HttpClientRequest) => + Effect.gen(function* () { + const cached = yield* Ref.get(replay) + if (cached) return cached + const cassette = yield* cassetteService.read(name).pipe(Effect.mapError(() => fixtureMissing(request, name))) + yield* Ref.set(replay, cassette) + return cassette + }) + + return HttpClient.make((request) => { + if (mode === "passthrough") return upstream.execute(request) + + if (mode === "record") { + return Effect.gen(function* () { + const currentRequest = yield* snapshotRequest(request) + const response = yield* upstream.execute(request) + const headers = responseHeaders(response, responseHeadersAllow, options.redact?.headers) + const captured = yield* captureResponseBody(response, headers["content-type"]) + const interaction: HttpInteraction = { + transport: "http", + request: currentRequest, + response: { status: response.status, headers, ...captured }, + } + const result = yield* cassetteService.append(name, interaction, options.metadata).pipe(Effect.orDie) + const findings = result.findings + if (findings.length > 0) return yield* unsafeCassette(request, name, findings) + return HttpClientResponse.fromWeb( + request, + new Response(decodeResponseBody(interaction.response), interaction.response), + ) + }) + } + + return Effect.gen(function* () { + const cassette = yield* loadReplay(request) + const incoming = yield* snapshotRequest(request) + const { interaction, detail } = yield* selectInteraction(cassette, incoming) + if (!interaction) return yield* fixtureMismatch(request, name, detail) + + return HttpClientResponse.fromWeb( + request, + new Response(decodeResponseBody(interaction.response), interaction.response), + ) + }) + }) + }), + ) + +export const cassetteLayer = (name: string, options: RecordReplayOptions = {}): Layer.Layer => + recordingLayer(name, options).pipe( + Layer.provide(CassetteService.layer({ directory: options.directory })), + Layer.provide(FetchHttpClient.layer), + Layer.provide(NodeFileSystem.layer), + ) diff --git a/packages/http-recorder/src/index.ts b/packages/http-recorder/src/index.ts new file mode 100644 index 000000000000..d85e13bf4c0b --- /dev/null +++ b/packages/http-recorder/src/index.ts @@ -0,0 +1,10 @@ +export * from "./schema" +export * from "./redaction" +export * from "./matching" +export * from "./diff" +export * from "./storage" +export * from "./websocket" +export * from "./effect" +export * as Cassette from "./cassette" + +export * as HttpRecorder from "." diff --git a/packages/http-recorder/src/matching.ts b/packages/http-recorder/src/matching.ts new file mode 100644 index 000000000000..b66c8fd14677 --- /dev/null +++ b/packages/http-recorder/src/matching.ts @@ -0,0 +1,36 @@ +import { Option, Schema } from "effect" +import type { RequestSnapshot } from "./schema" + +const JsonValue = Schema.fromJsonString(Schema.Unknown) +export const decodeJson = Schema.decodeUnknownOption(JsonValue) + +const isRecord = (value: unknown): value is Record => + value !== null && typeof value === "object" && !Array.isArray(value) + +export const canonicalizeJson = (value: unknown): unknown => { + if (Array.isArray(value)) return value.map(canonicalizeJson) + if (isRecord(value)) { + return Object.fromEntries( + Object.keys(value) + .toSorted() + .map((key) => [key, canonicalizeJson(value[key])]), + ) + } + return value +} + +export type RequestMatcher = (incoming: RequestSnapshot, recorded: RequestSnapshot) => boolean + +export const canonicalSnapshot = (snapshot: RequestSnapshot): string => + JSON.stringify({ + method: snapshot.method, + url: snapshot.url, + headers: canonicalizeJson(snapshot.headers), + body: Option.match(decodeJson(snapshot.body), { + onNone: () => snapshot.body, + onSome: canonicalizeJson, + }), + }) + +export const defaultMatcher: RequestMatcher = (incoming, recorded) => + canonicalSnapshot(incoming) === canonicalSnapshot(recorded) diff --git a/packages/http-recorder/src/redaction.ts b/packages/http-recorder/src/redaction.ts new file mode 100644 index 000000000000..062ea61dc7da --- /dev/null +++ b/packages/http-recorder/src/redaction.ts @@ -0,0 +1,112 @@ +import type { Cassette } from "./schema" + +export const REDACTED = "[REDACTED]" + +const DEFAULT_REDACT_HEADERS = [ + "authorization", + "cookie", + "proxy-authorization", + "set-cookie", + "x-api-key", + "x-amz-security-token", + "x-goog-api-key", +] + +const DEFAULT_REDACT_QUERY = [ + "access_token", + "api-key", + "api_key", + "apikey", + "code", + "key", + "signature", + "sig", + "token", + "x-amz-credential", + "x-amz-security-token", + "x-amz-signature", +] + +const SECRET_PATTERNS: ReadonlyArray<{ readonly label: string; readonly pattern: RegExp }> = [ + { label: "bearer token", pattern: /\bBearer\s+[A-Za-z0-9._~+/=-]{16,}\b/i }, + { label: "API key", pattern: /\bsk-[A-Za-z0-9][A-Za-z0-9_-]{20,}\b/ }, + { label: "Anthropic API key", pattern: /\bsk-ant-[A-Za-z0-9_-]{20,}\b/ }, + { label: "Google API key", pattern: /\bAIza[0-9A-Za-z_-]{20,}\b/ }, + { label: "AWS access key", pattern: /\b(?:AKIA|ASIA)[0-9A-Z]{16}\b/ }, + { label: "GitHub token", pattern: /\bgh[pousr]_[A-Za-z0-9_]{20,}\b/ }, + { label: "private key", pattern: /-----BEGIN [A-Z ]*PRIVATE KEY-----/ }, +] + +const ENV_SECRET_NAMES = /(?:API|AUTH|BEARER|CREDENTIAL|KEY|PASSWORD|SECRET|TOKEN)/i +const SAFE_ENV_VALUES = new Set(["fixture", "test", "test-key"]) + +const envSecrets = () => + Object.entries(process.env).flatMap(([name, value]) => { + if (!value) return [] + if (!ENV_SECRET_NAMES.test(name)) return [] + if (value.length < 12) return [] + if (SAFE_ENV_VALUES.has(value.toLowerCase())) return [] + return [{ name, value }] + }) + +const pathFor = (base: string, key: string) => (base ? `${base}.${key}` : key) + +const stringEntries = (value: unknown, base = ""): ReadonlyArray<{ readonly path: string; readonly value: string }> => { + if (typeof value === "string") return [{ path: base, value }] + if (Array.isArray(value)) return value.flatMap((item, index) => stringEntries(item, `${base}[${index}]`)) + if (value && typeof value === "object") { + return Object.entries(value).flatMap(([key, child]) => stringEntries(child, pathFor(base, key))) + } + return [] +} + +const redactionSet = (values: ReadonlyArray | undefined, defaults: ReadonlyArray) => + new Set([...defaults, ...(values ?? [])].map((value) => value.toLowerCase())) + +export type UrlRedactor = (url: string) => string + +export const redactUrl = (raw: string, query: ReadonlyArray = DEFAULT_REDACT_QUERY, urlRedactor?: UrlRedactor) => { + if (!URL.canParse(raw)) return urlRedactor?.(raw) ?? raw + const url = new URL(raw) + if (url.username) url.username = REDACTED + if (url.password) url.password = REDACTED + const redacted = redactionSet(query, DEFAULT_REDACT_QUERY) + for (const key of [...url.searchParams.keys()]) { + if (redacted.has(key.toLowerCase())) url.searchParams.set(key, REDACTED) + } + return urlRedactor?.(url.toString()) ?? url.toString() +} + +export const redactHeaders = ( + headers: Record, + allow: ReadonlyArray, + redact: ReadonlyArray = DEFAULT_REDACT_HEADERS, +) => { + const allowed = new Set(allow.map((name) => name.toLowerCase())) + const redacted = redactionSet(redact, DEFAULT_REDACT_HEADERS) + return Object.fromEntries( + Object.entries(headers) + .map(([name, value]) => [name.toLowerCase(), value] as const) + .filter(([name]) => allowed.has(name)) + .map(([name, value]) => [name, redacted.has(name) ? REDACTED : value] as const) + .toSorted(([a], [b]) => a.localeCompare(b)), + ) +} + +export type SecretFinding = { + readonly path: string + readonly reason: string +} + +export const secretFindings = (value: unknown): ReadonlyArray => + stringEntries(value).flatMap((entry) => [ + ...SECRET_PATTERNS.filter((item) => item.pattern.test(entry.value)).map((item) => ({ + path: entry.path, + reason: item.label, + })), + ...envSecrets() + .filter((item) => entry.value.includes(item.value)) + .map((item) => ({ path: entry.path, reason: `environment secret ${item.name}` })), + ]) + +export const cassetteSecretFindings = (cassette: Cassette) => secretFindings(cassette) diff --git a/packages/http-recorder/src/schema.ts b/packages/http-recorder/src/schema.ts new file mode 100644 index 000000000000..2692b525b4ac --- /dev/null +++ b/packages/http-recorder/src/schema.ts @@ -0,0 +1,67 @@ +import { Schema } from "effect" + +export const RequestSnapshotSchema = Schema.Struct({ + method: Schema.String, + url: Schema.String, + headers: Schema.Record(Schema.String, Schema.String), + body: Schema.String, +}) +export type RequestSnapshot = Schema.Schema.Type + +export const ResponseSnapshotSchema = Schema.Struct({ + status: Schema.Number, + headers: Schema.Record(Schema.String, Schema.String), + body: Schema.String, + bodyEncoding: Schema.optional(Schema.Literals(["text", "base64"])), +}) +export type ResponseSnapshot = Schema.Schema.Type + +export const CassetteMetadataSchema = Schema.Record(Schema.String, Schema.Unknown) +export type CassetteMetadata = Schema.Schema.Type + +export const HttpInteractionSchema = Schema.Struct({ + transport: Schema.tag("http"), + request: RequestSnapshotSchema, + response: ResponseSnapshotSchema, +}) +export type HttpInteraction = Schema.Schema.Type + +export const WebSocketFrameSchema = Schema.Union([ + Schema.Struct({ kind: Schema.tag("text"), body: Schema.String }), + Schema.Struct({ kind: Schema.tag("binary"), body: Schema.String, bodyEncoding: Schema.Literal("base64") }), +]) +export type WebSocketFrame = Schema.Schema.Type + +export const WebSocketInteractionSchema = Schema.Struct({ + transport: Schema.tag("websocket"), + open: Schema.Struct({ + url: Schema.String, + headers: Schema.Record(Schema.String, Schema.String), + }), + client: Schema.Array(WebSocketFrameSchema), + server: Schema.Array(WebSocketFrameSchema), +}) +export type WebSocketInteraction = Schema.Schema.Type + +export const InteractionSchema = Schema.Union([HttpInteractionSchema, WebSocketInteractionSchema]).pipe( + Schema.toTaggedUnion("transport"), +) +export type Interaction = Schema.Schema.Type + +export const isHttpInteraction = InteractionSchema.guards.http + +export const isWebSocketInteraction = InteractionSchema.guards.websocket + +export const httpInteractions = (cassette: Cassette) => cassette.interactions.filter(isHttpInteraction) + +export const webSocketInteractions = (cassette: Cassette) => cassette.interactions.filter(isWebSocketInteraction) + +export const CassetteSchema = Schema.Struct({ + version: Schema.Literal(1), + metadata: Schema.optional(CassetteMetadataSchema), + interactions: Schema.Array(InteractionSchema), +}) +export type Cassette = Schema.Schema.Type + +export const decodeCassette = Schema.decodeUnknownSync(CassetteSchema) +export const encodeCassette = Schema.encodeSync(CassetteSchema) diff --git a/packages/http-recorder/src/storage.ts b/packages/http-recorder/src/storage.ts new file mode 100644 index 000000000000..08dadb1bb9a7 --- /dev/null +++ b/packages/http-recorder/src/storage.ts @@ -0,0 +1,34 @@ +import { Option } from "effect" +import * as fs from "node:fs" +import * as path from "node:path" +import { encodeCassette, decodeCassette, type Cassette, type CassetteMetadata, type Interaction } from "./schema" + +export const DEFAULT_RECORDINGS_DIR = path.resolve(process.cwd(), "test", "fixtures", "recordings") + +export const cassettePath = (name: string, directory = DEFAULT_RECORDINGS_DIR) => path.join(directory, `${name}.json`) + +export const metadataFor = (name: string, metadata: CassetteMetadata | undefined): CassetteMetadata => ({ + name, + recordedAt: new Date().toISOString(), + ...(metadata ?? {}), +}) + +export const cassetteFor = ( + name: string, + interactions: ReadonlyArray, + metadata: CassetteMetadata | undefined, +): Cassette => ({ + version: 1, + metadata: metadataFor(name, metadata), + interactions, +}) + +export const formatCassette = (cassette: Cassette) => `${JSON.stringify(encodeCassette(cassette), null, 2)}\n` + +export const parseCassette = (raw: string) => decodeCassette(JSON.parse(raw)) + +export const hasCassetteSync = (name: string, options: { readonly directory?: string } = {}) => { + const file = cassettePath(name, options.directory) + if (!fs.existsSync(file)) return false + return Option.isSome(Option.liftThrowable(parseCassette)(fs.readFileSync(file, "utf8"))) +} diff --git a/packages/http-recorder/src/websocket.ts b/packages/http-recorder/src/websocket.ts new file mode 100644 index 000000000000..8a854cb62c67 --- /dev/null +++ b/packages/http-recorder/src/websocket.ts @@ -0,0 +1,204 @@ +import { Effect, Option, Ref, Scope, Stream } from "effect" +import type { Headers } from "effect/unstable/http" +import * as CassetteService from "./cassette" +import { canonicalizeJson, decodeJson } from "./matching" +import { redactHeaders, redactUrl, type SecretFinding } from "./redaction" +import { webSocketInteractions, type CassetteMetadata, type WebSocketFrame, type WebSocketInteraction } from "./schema" + +export const DEFAULT_WEBSOCKET_REQUEST_HEADERS: ReadonlyArray = ["content-type", "accept", "openai-beta"] + +export interface WebSocketRequest { + readonly url: string + readonly headers: Headers.Headers +} + +export interface WebSocketConnection { + readonly sendText: (message: string) => Effect.Effect + readonly messages: Stream.Stream + readonly close: Effect.Effect +} + +export interface WebSocketExecutor { + readonly open: (request: WebSocketRequest) => Effect.Effect, E> +} + +export interface WebSocketRecordReplayOptions { + readonly name: string + readonly mode?: "record" | "replay" | "passthrough" + readonly metadata?: CassetteMetadata + readonly cassette: CassetteService.Interface + readonly live: WebSocketExecutor + readonly redact?: { + readonly headers?: ReadonlyArray + readonly query?: ReadonlyArray + readonly url?: (url: string) => string + } + readonly requestHeaders?: ReadonlyArray + readonly compareClientMessagesAsJson?: boolean +} + +const headersRecord = (headers: Headers.Headers) => + Object.fromEntries( + Object.entries(headers as Record) + .filter((entry): entry is [string, string] => typeof entry[1] === "string") + .toSorted(([a], [b]) => a.localeCompare(b)), + ) + +const openSnapshot = ( + request: WebSocketRequest, + options: Pick, "redact" | "requestHeaders"> = {}, +) => ({ + url: redactUrl(request.url, options.redact?.query, options.redact?.url), + headers: redactHeaders( + headersRecord(request.headers), + options.requestHeaders ?? DEFAULT_WEBSOCKET_REQUEST_HEADERS, + options.redact?.headers, + ), +}) + +const textFrame = (body: string): WebSocketFrame => ({ kind: "text", body }) + +const frameText = (frame: WebSocketFrame) => { + if (frame.kind === "text") return frame.body + return new TextDecoder().decode(Buffer.from(frame.body, "base64")) +} + +const frameMessage = (frame: WebSocketFrame) => + frame.kind === "text" ? frame.body : new Uint8Array(Buffer.from(frame.body, "base64")) + +const receivedFrame = (message: string | Uint8Array): WebSocketFrame => + typeof message === "string" + ? textFrame(message) + : { kind: "binary", body: Buffer.from(message).toString("base64"), bodyEncoding: "base64" } + +const unsafeCassette = (name: string, findings: ReadonlyArray) => + new Error( + `Refusing to write WebSocket cassette "${name}" because it contains possible secrets: ${findings + .map((item) => `${item.path} (${item.reason})`) + .join(", ")}`, + ) + +const mismatch = (message: string, actual: unknown, expected: unknown) => + new Error(`${message}: expected ${JSON.stringify(expected)}, received ${JSON.stringify(actual)}`) + +const assertEqual = (message: string, actual: unknown, expected: unknown) => + Effect.sync(() => { + if (JSON.stringify(actual) === JSON.stringify(expected)) return + throw mismatch(message, actual, expected) + }) + +const jsonOrText = (value: string) => Option.match(decodeJson(value), { onNone: () => value, onSome: canonicalizeJson }) + +const compareClientMessage = (actual: string, expected: WebSocketFrame | undefined, index: number, asJson: boolean) => { + if (!expected) + return Effect.sync(() => { + throw new Error(`Unexpected WebSocket client frame ${index + 1}: ${actual}`) + }) + const expectedText = frameText(expected) + if (!asJson) return assertEqual(`WebSocket client frame ${index + 1}`, actual, expectedText) + return assertEqual(`WebSocket client JSON frame ${index + 1}`, jsonOrText(actual), jsonOrText(expectedText)) +} + +export const makeWebSocketExecutor = ( + options: WebSocketRecordReplayOptions, +): Effect.Effect, never, Scope.Scope> => + Effect.gen(function* () { + const mode = options.mode ?? "replay" + + if (mode === "passthrough") return options.live + + if (mode === "record") { + return { + open: (request) => + Effect.gen(function* () { + const client: WebSocketFrame[] = [] + const server: WebSocketFrame[] = [] + const connection = yield* options.live.open(request) + const closed = yield* Ref.make(false) + const closeOnce = Effect.gen(function* () { + if (yield* Ref.getAndSet(closed, true)) return + yield* connection.close + const result = yield* options.cassette + .append( + options.name, + { transport: "websocket", open: openSnapshot(request, options), client, server }, + options.metadata, + ) + .pipe(Effect.orDie) + if (result.findings.length > 0) yield* Effect.die(unsafeCassette(options.name, result.findings)) + }) + return { + sendText: (message: string) => + connection.sendText(message).pipe(Effect.tap(() => Effect.sync(() => client.push(textFrame(message))))), + messages: connection.messages.pipe( + Stream.map((message) => { + server.push(receivedFrame(message)) + return message + }), + ), + close: closeOnce, + } + }), + } + } + + const replay = yield* Ref.make<{ readonly interactions: ReadonlyArray } | undefined>( + undefined, + ) + const cursor = yield* Ref.make(0) + + yield* Effect.addFinalizer(() => + Effect.gen(function* () { + const input = yield* Ref.get(replay) + if (!input) return + yield* assertEqual( + `Unused recorded WebSocket interactions in ${options.name}`, + yield* Ref.get(cursor), + input.interactions.length, + ) + }), + ) + + const loadReplay = Effect.fn("WebSocketRecorder.loadReplay")(function* () { + const cached = yield* Ref.get(replay) + if (cached) return cached + const input = { + interactions: webSocketInteractions(yield* options.cassette.read(options.name).pipe(Effect.orDie)), + } + yield* Ref.set(replay, input) + return input + }) + + return { + open: (request) => { + return Effect.gen(function* () { + const input = yield* loadReplay() + const index = yield* Ref.getAndUpdate(cursor, (value) => value + 1) + const interaction = input.interactions[index] + if (!interaction) return yield* Effect.die(new Error(`No recorded WebSocket interaction for ${request.url}`)) + yield* assertEqual(`WebSocket open frame ${index + 1}`, openSnapshot(request, options), interaction.open) + const messageIndex = yield* Ref.make(0) + return { + sendText: (message: string) => + Effect.gen(function* () { + const current = yield* Ref.getAndUpdate(messageIndex, (value) => value + 1) + yield* compareClientMessage( + message, + interaction.client[current], + current, + options.compareClientMessagesAsJson === true, + ) + }), + messages: Stream.fromIterable(interaction.server).pipe(Stream.map(frameMessage)), + close: Effect.gen(function* () { + yield* assertEqual( + `WebSocket client frame count for interaction ${index + 1}`, + yield* Ref.get(messageIndex), + interaction.client.length, + ) + }), + } + }) + }, + } + }) diff --git a/packages/http-recorder/test/fixtures/recordings/record-replay/multi-step.json b/packages/http-recorder/test/fixtures/recordings/record-replay/multi-step.json new file mode 100644 index 000000000000..9953b860cd94 --- /dev/null +++ b/packages/http-recorder/test/fixtures/recordings/record-replay/multi-step.json @@ -0,0 +1,41 @@ +{ + "version": 1, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://example.test/echo", + "headers": { + "content-type": "application/json" + }, + "body": "{\"step\":1}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "application/json" + }, + "body": "{\"reply\":\"first\"}" + } + }, + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://example.test/echo", + "headers": { + "content-type": "application/json" + }, + "body": "{\"step\":2}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "application/json" + }, + "body": "{\"reply\":\"second\"}" + } + } + ] +} diff --git a/packages/http-recorder/test/fixtures/recordings/record-replay/retry.json b/packages/http-recorder/test/fixtures/recordings/record-replay/retry.json new file mode 100644 index 000000000000..873e5a16c056 --- /dev/null +++ b/packages/http-recorder/test/fixtures/recordings/record-replay/retry.json @@ -0,0 +1,41 @@ +{ + "version": 1, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://example.test/poll", + "headers": { + "content-type": "application/json" + }, + "body": "{\"id\":\"job_1\"}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "application/json" + }, + "body": "{\"status\":\"pending\"}" + } + }, + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://example.test/poll", + "headers": { + "content-type": "application/json" + }, + "body": "{\"id\":\"job_1\"}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "application/json" + }, + "body": "{\"status\":\"complete\"}" + } + } + ] +} diff --git a/packages/http-recorder/test/record-replay.test.ts b/packages/http-recorder/test/record-replay.test.ts new file mode 100644 index 000000000000..2f118a88c163 --- /dev/null +++ b/packages/http-recorder/test/record-replay.test.ts @@ -0,0 +1,322 @@ +import { NodeFileSystem } from "@effect/platform-node" +import { describe, expect, test } from "bun:test" +import { Cause, Effect, Exit, Scope, Stream } from "effect" +import { Headers, HttpBody, HttpClient, HttpClientRequest } from "effect/unstable/http" +import * as fs from "node:fs" +import * as os from "node:os" +import * as path from "node:path" +import { HttpRecorder } from "../src" +import { redactedErrorRequest } from "../src/diff" + +const post = (url: string, body: object) => + Effect.gen(function* () { + const http = yield* HttpClient.HttpClient + const request = HttpClientRequest.post(url, { + headers: { "content-type": "application/json" }, + body: HttpBody.text(JSON.stringify(body), "application/json"), + }) + const response = yield* http.execute(request) + return yield* response.text + }) + +const run = (effect: Effect.Effect) => + Effect.runPromise(effect.pipe(Effect.provide(HttpRecorder.cassetteLayer("record-replay/multi-step")))) + +const runWith = ( + name: string, + options: HttpRecorder.RecordReplayOptions, + effect: Effect.Effect, +) => Effect.runPromise(effect.pipe(Effect.provide(HttpRecorder.cassetteLayer(name, options)))) + +const runRecorder = (effect: Effect.Effect) => + Effect.runPromise( + Effect.scoped( + effect.pipe( + Effect.provide( + HttpRecorder.Cassette.layer({ directory: fs.mkdtempSync(path.join(os.tmpdir(), "http-recorder-")) }), + ), + Effect.provide(NodeFileSystem.layer), + ), + ), + ) + +const failureText = (exit: Exit.Exit) => { + if (Exit.isSuccess(exit)) return "" + return Cause.prettyErrors(exit.cause).join("\n") +} + +describe("http-recorder", () => { + test("redacts sensitive URL query parameters", () => { + expect( + HttpRecorder.redactUrl( + "https://example.test/path?key=secret-google-key&api_key=secret-openai-key&safe=value&X-Amz-Signature=secret-signature", + ), + ).toBe( + "https://example.test/path?key=%5BREDACTED%5D&api_key=%5BREDACTED%5D&safe=value&X-Amz-Signature=%5BREDACTED%5D", + ) + }) + + test("redacts URL credentials", () => { + expect(HttpRecorder.redactUrl("https://user:password@example.test/path?safe=value")).toBe( + "https://%5BREDACTED%5D:%5BREDACTED%5D@example.test/path?safe=value", + ) + }) + + test("applies custom URL redaction after built-in redaction", () => { + expect( + HttpRecorder.redactUrl( + "https://example.test/accounts/real-account/path?key=secret-key", + undefined, + (url) => url.replace("/accounts/real-account/", "/accounts/{account}/"), + ), + ).toBe("https://example.test/accounts/{account}/path?key=%5BREDACTED%5D") + }) + + test("redacts sensitive headers when allow-listed", () => { + expect( + HttpRecorder.redactHeaders( + { + authorization: "Bearer secret-token", + "content-type": "application/json", + "x-custom-token": "custom-secret", + "x-api-key": "secret-key", + "x-goog-api-key": "secret-google-key", + }, + ["authorization", "content-type", "x-api-key", "x-goog-api-key", "x-custom-token"], + ["x-custom-token"], + ), + ).toEqual({ + authorization: "[REDACTED]", + "content-type": "application/json", + "x-api-key": "[REDACTED]", + "x-custom-token": "[REDACTED]", + "x-goog-api-key": "[REDACTED]", + }) + }) + + test("redacts error requests without retaining headers, params, or body", () => { + const request = HttpClientRequest.post("https://example.test/path", { + headers: { authorization: "Bearer super-secret" }, + body: HttpBody.text("super-secret-body", "text/plain"), + }).pipe(HttpClientRequest.setUrlParam("api_key", "super-secret-key")) + + expect(redactedErrorRequest(request).toJSON()).toMatchObject({ + url: "https://example.test/path", + urlParams: { params: [] }, + headers: {}, + body: { _tag: "Empty" }, + }) + }) + + test("detects secret-looking values without returning the secret", () => { + expect( + HttpRecorder.cassetteSecretFindings({ + version: 1, + interactions: [ + { + transport: "http", + request: { + method: "POST", + url: "https://example.test/path?key=sk-123456789012345678901234", + headers: {}, + body: JSON.stringify({ nested: "AIzaSyDHibiBRvJZLsFnPYPoiTwxY4ztQ55yqCE" }), + }, + response: { + status: 200, + headers: {}, + body: "Bearer abcdefghijklmnopqrstuvwxyz", + }, + }, + ], + }), + ).toEqual([ + { path: "interactions[0].request.url", reason: "API key" }, + { path: "interactions[0].request.body", reason: "Google API key" }, + { path: "interactions[0].response.body", reason: "bearer token" }, + ]) + }) + + test("detects secret-looking values inside metadata", () => { + expect( + HttpRecorder.cassetteSecretFindings({ + version: 1, + metadata: { token: "sk-123456789012345678901234" }, + interactions: [], + }), + ).toEqual([{ path: "metadata.token", reason: "API key" }]) + }) + + test("formats websocket cassettes with shared metadata", () => { + const cassette = HttpRecorder.cassetteFor( + "websocket/basic", + [ + { + transport: "websocket", + open: { url: "wss://example.test/realtime", headers: { "content-type": "application/json" } }, + client: [{ kind: "text", body: JSON.stringify({ type: "response.create" }) }], + server: [{ kind: "text", body: JSON.stringify({ type: "response.completed" }) }], + }, + ], + { provider: "openai" }, + ) + + expect(cassette.metadata).toMatchObject({ name: "websocket/basic", provider: "openai" }) + expect(HttpRecorder.parseCassette(HttpRecorder.formatCassette(cassette))).toEqual(cassette) + }) + + test("replays websocket interactions from the shared cassette service", async () => { + await runRecorder( + Effect.gen(function* () { + const cassette = yield* HttpRecorder.Cassette.Service + yield* cassette.write( + "websocket/replay", + HttpRecorder.cassetteFor( + "websocket/replay", + [ + { + transport: "websocket", + open: { url: "wss://example.test/realtime", headers: { "content-type": "application/json" } }, + client: [{ kind: "text", body: JSON.stringify({ type: "response.create" }) }], + server: [{ kind: "text", body: JSON.stringify({ type: "response.completed" }) }], + }, + ], + undefined, + ), + ) + const executor = yield* HttpRecorder.makeWebSocketExecutor({ + name: "websocket/replay", + cassette, + compareClientMessagesAsJson: true, + live: { open: () => Effect.die(new Error("unexpected live WebSocket open")) }, + }) + const connection = yield* executor.open({ + url: "wss://example.test/realtime", + headers: Headers.fromInput({ "content-type": "application/json" }), + }) + yield* connection.sendText(JSON.stringify({ type: "response.create" })) + const messages: Array = [] + yield* connection.messages.pipe(Stream.runForEach((message) => Effect.sync(() => messages.push(message)))) + yield* connection.close + + expect(messages).toEqual([JSON.stringify({ type: "response.completed" })]) + }), + ) + }) + + test("records websocket interactions into the shared cassette service", async () => { + await runRecorder( + Effect.gen(function* () { + const cassette = yield* HttpRecorder.Cassette.Service + const executor = yield* HttpRecorder.makeWebSocketExecutor({ + name: "websocket/record", + mode: "record", + metadata: { provider: "test" }, + cassette, + live: { + open: () => + Effect.succeed({ + sendText: () => Effect.void, + messages: Stream.fromIterable([JSON.stringify({ type: "response.completed" })]), + close: Effect.void, + }), + }, + }) + const connection = yield* executor.open({ + url: "wss://example.test/realtime", + headers: Headers.fromInput({ "content-type": "application/json" }), + }) + yield* connection.sendText(JSON.stringify({ type: "response.create" })) + yield* connection.messages.pipe(Stream.runDrain) + yield* connection.close + + expect(yield* cassette.read("websocket/record")).toMatchObject({ + metadata: { name: "websocket/record", provider: "test" }, + interactions: [ + { + transport: "websocket", + open: { url: "wss://example.test/realtime", headers: { "content-type": "application/json" } }, + client: [{ kind: "text", body: JSON.stringify({ type: "response.create" }) }], + server: [{ kind: "text", body: JSON.stringify({ type: "response.completed" }) }], + }, + ], + }) + }), + ) + }) + + test("default matcher dispatches multi-interaction cassettes by request shape", async () => { + await run( + Effect.gen(function* () { + expect(yield* post("https://example.test/echo", { step: 2 })).toBe('{"reply":"second"}') + expect(yield* post("https://example.test/echo", { step: 1 })).toBe('{"reply":"first"}') + }), + ) + }) + + test("sequential dispatch returns recorded responses in order for identical requests", async () => { + await runWith( + "record-replay/retry", + { dispatch: "sequential" }, + Effect.gen(function* () { + expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"pending"}') + expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"complete"}') + }), + ) + }) + + test("default matcher returns the first match for identical requests", async () => { + await runWith( + "record-replay/retry", + {}, + Effect.gen(function* () { + expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"pending"}') + expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"pending"}') + }), + ) + }) + + test("sequential dispatch reports cursor exhaustion when more requests are made than recorded", async () => { + await runWith( + "record-replay/multi-step", + { dispatch: "sequential" }, + Effect.gen(function* () { + yield* post("https://example.test/echo", { step: 1 }) + yield* post("https://example.test/echo", { step: 2 }) + const exit = yield* Effect.exit(post("https://example.test/echo", { step: 3 })) + expect(Exit.isFailure(exit)).toBe(true) + }), + ) + }) + + test("sequential dispatch still validates each recorded request", async () => { + await runWith( + "record-replay/multi-step", + { dispatch: "sequential" }, + Effect.gen(function* () { + yield* post("https://example.test/echo", { step: 1 }) + const exit = yield* Effect.exit(post("https://example.test/echo", { step: 3 })) + expect(Exit.isFailure(exit)).toBe(true) + expect(failureText(exit)).toContain("$.step expected 2, received 3") + expect(yield* post("https://example.test/echo", { step: 2 })).toBe('{"reply":"second"}') + }), + ) + }) + + test("mismatch diagnostics show closest redacted request differences", async () => { + await run( + Effect.gen(function* () { + const exit = yield* Effect.exit( + post("https://example.test/echo?api_key=secret-value", { step: 3, token: "sk-123456789012345678901234" }), + ) + const message = failureText(exit) + expect(message).toContain("closest interaction: #1") + expect(message).toContain("url:") + expect(message).toContain("https://example.test/echo?api_key=%5BREDACTED%5D") + expect(message).toContain("body:") + expect(message).toContain("$.step expected 1, received 3") + expect(message).toContain('$.token expected undefined, received "[REDACTED]"') + expect(message).not.toContain("sk-123456789012345678901234") + }), + ) + }) +}) diff --git a/packages/http-recorder/tsconfig.json b/packages/http-recorder/tsconfig.json new file mode 100644 index 000000000000..2bc480ffbb60 --- /dev/null +++ b/packages/http-recorder/tsconfig.json @@ -0,0 +1,15 @@ +{ + "$schema": "https://json.schemastore.org/tsconfig", + "extends": "@tsconfig/bun/tsconfig.json", + "compilerOptions": { + "lib": ["ESNext", "DOM", "DOM.Iterable"], + "noUncheckedIndexedAccess": false, + "plugins": [ + { + "name": "@effect/language-service", + "transform": "@effect/language-service/transform", + "namespaceImportPackages": ["effect", "@effect/*"] + } + ] + } +} diff --git a/packages/llm/AGENTS.md b/packages/llm/AGENTS.md new file mode 100644 index 000000000000..61d57cf06b9e --- /dev/null +++ b/packages/llm/AGENTS.md @@ -0,0 +1,294 @@ +# LLM Package Guide + +## Effect + +- Prefer `HttpClient.HttpClient` / `HttpClientResponse.HttpClientResponse` over web `fetch` / `Response` at package boundaries. +- Use `Stream.Stream` for streaming data flow. Avoid ad hoc async generators or manual web reader loops unless an Effect `Stream` API cannot model the behavior. +- Use Effect Schema codecs for JSON encode/decode (`Schema.fromJsonString(...)`) instead of direct `JSON.parse` / `JSON.stringify` in implementation code. +- In `Effect.gen`, yield yieldable errors directly (`return yield* new MyError(...)`) instead of `Effect.fail(new MyError(...))`. +- Use `Effect.void` instead of `Effect.succeed(undefined)` when the successful value is intentionally void. + +## Tests + +- Use `testEffect(...)` from `test/lib/effect.ts` for tests requiring Effect layers. +- Keep provider tests fixture-first. Live provider calls must stay behind `RECORD=true` and required API-key checks. + +## Architecture + +This package is an Effect Schema-first LLM core. The Schema classes in `src/schema/` are the canonical runtime data model. Convenience functions in `src/llm.ts` are thin constructors that return those same Schema class instances; they should improve callsites without creating a second model. + +### Request Flow + +The intended callsite is: + +```ts +const request = LLM.request({ + model: OpenAI.model("gpt-4o-mini", { apiKey }), + system: "You are concise.", + prompt: "Say hello.", +}) + +const response = yield* LLMClient.generate(request) +``` + +`LLM.request(...)` builds an `LLMRequest`. `LLMClient.generate(...)` selects a registered route by `request.model.route`, builds the provider-native body, asks the route's transport for a real `HttpClientRequest.HttpClientRequest`, sends it through `RequestExecutor.Service`, parses the provider stream into common `LLMEvent`s, and finally returns an `LLMResponse`. + +Use `LLMClient.stream(request)` when callers want incremental `LLMEvent`s. Use `LLMClient.generate(request)` when callers want those same events collected into an `LLMResponse`. Use `LLMClient.prepare(request)` to compile a request through the route pipeline without sending it — the optional `Body` type argument narrows `.body` to the route's native shape (e.g. `prepare(...)` returns a `PreparedRequestOf`). The runtime body is identical; the generic is a type-level assertion. + +Filter or narrow `LLMEvent` streams with `LLMEvent.is.*` (camelCase guards, e.g. `events.filter(LLMEvent.is.toolCall)`). The kebab-case `LLMEvent.guards["tool-call"]` form also works but prefer `is.*` in new code. + +### Routes + +A route is the registered, runnable composition of four orthogonal pieces: + +- **`Protocol`** (`src/route/protocol.ts`) — semantic API contract. Owns request body construction (`body.from`), the body schema (`body.schema`), the streaming-event schema (`stream.event`), and the event-to-`LLMEvent` state machine (`stream.step`). `Route.make(...)` validates and JSON-encodes the body from `body.schema` and decodes frames with `stream.event`. Examples: `OpenAIChat.protocol`, `OpenAIResponses.protocol`, `AnthropicMessages.protocol`, `Gemini.protocol`, `BedrockConverse.protocol`. +- **`Endpoint`** (`src/route/endpoint.ts`) — path construction. The host always lives on `model.baseURL`; the endpoint just supplies the path. `Endpoint.path("/chat/completions")` is the common case; pass a function for paths that embed the model id or a body field (e.g. `Endpoint.path(({ body }) => `/model/${body.modelId}/converse-stream`)`). +- **`Auth`** (`src/route/auth.ts`) — per-request transport authentication. Routes read `model.apiKey` at request time via `Auth.bearer` (the default; sets `Authorization: Bearer `) or `Auth.apiKeyHeader(name)` for providers that use a custom header (Anthropic `x-api-key`, Gemini `x-goog-api-key`). Routes that need per-request signing (Bedrock SigV4, future Vertex IAM, Azure AAD) implement `Auth` as a function that signs the body and merges signed headers into the result. +- **`Framing`** (`src/route/framing.ts`) — bytes → frames. SSE (`Framing.sse`) is shared; Bedrock keeps its AWS event-stream framing as a typed `Framing` value alongside its protocol. + +Compose them via `Route.make(...)`: + +```ts +export const route = Route.make({ + id: "openai-chat", + provider: "openai", + protocol: OpenAIChat.protocol, + transport: HttpTransport.httpJson({ + endpoint: Endpoint.path("/chat/completions"), + auth: Auth.bearer(), + framing: Framing.sse, + encodeBody, + }), + defaults: { + baseURL: "https://api.openai.com/v1", + capabilities: capabilities({ tools: { calls: true, streamingInput: true } }), + }, +}) +``` + +The four-axis decomposition is the reason DeepSeek, TogetherAI, Cerebras, Baseten, Fireworks, and DeepInfra all reuse `OpenAIChat.protocol` verbatim — each provider deployment is a 5-15 line `Route.make(...)` call instead of a 300-400 line route clone. Bug fixes in one protocol propagate to every consumer of that protocol in a single commit. + +When a provider ships a non-HTTP transport (OpenAI's WebSocket Responses backend, hypothetical bidirectional streaming APIs), the seam is `Transport` — `WebSocketTransport.json(...)` constructs a transport whose `prepare` builds a WebSocket URL and message and whose `frames` yields decoded text from the socket. Same protocol, different transport. + +### URL Construction + +`model.baseURL` is required; `Endpoint` only carries the path. Each protocol's `Route.make` includes a canonical URL in `defaults.baseURL` (e.g. `https://api.openai.com/v1`); provider helpers can override by passing `baseURL` in their input. Routes that have no canonical URL (OpenAI-compatible Chat, GitHub Copilot) set `baseURL: string` (required) on their input type so TypeScript catches a missing host at the call site. + +For providers where the URL is derived from typed inputs (Azure resource name, Bedrock region), the provider helper computes `baseURL` at model construction time. Use `AtLeastOne` from `route/auth-options.ts` for inputs that accept either of two derivation paths (Azure: `resourceName` or `baseURL`). + +### Provider Definitions + +Provider-facing APIs are defined with `Provider.make(...)` from `src/provider.ts`: + +```ts +export const provider = Provider.make({ + id: ProviderID.make("openai"), + model: responses, + apis: { responses, chat }, +}) + +export const model = provider.model +export const apis = provider.apis +``` + +Keep provider definitions small and explicit: + +- Use only `id`, `model`, and optional `apis` in `Provider.make(...)`. +- Use branded `ProviderID.make(...)` and `ModelID.make(...)` where ids are constructed directly. +- Use `model` for the default API path and `apis` for named provider-native alternatives such as OpenAI `responses` versus `chat`. +- Do not add author-facing `kind`, `version`, or `routes` fields. +- Export lower-level `routes` arrays separately only when advanced internal wiring needs them. +- Prefer `apiKey` as provider-specific sugar and `auth` as the explicit override; keep them mutually exclusive in provider option types with `ProviderAuthOption`. +- Resolve `apiKey` → `Auth` with `AuthOptions.bearer(options, "_API_KEY")` (it honors an explicit `auth` override and falls back to `Auth.config(envVar)` so missing keys surface a typed `Authentication` error rather than a runtime crash). + +Built-in providers are namespace modules from `src/providers/index.ts`, so aliases like `OpenAI.model(...)`, `OpenAI.responses(...)`, and `OpenAI.apis.chat(...)` are fine. External provider packages should default-export the `Provider.make(...)` result and may add named aliases if useful. + +### Folder layout + +``` +packages/llm/src/ + schema/ canonical Schema model, split by concern + ids.ts branded IDs, literal types, ProviderMetadata + options.ts Generation/Provider/Http options, Capabilities, Limits, ModelRef + messages.ts content parts, Message, ToolDefinition, LLMRequest + events.ts Usage, individual events, LLMEvent, PreparedRequest, LLMResponse + errors.ts error reasons, LLMError, ToolFailure + index.ts barrel + llm.ts request constructors and convenience helpers + route/ + index.ts @opencode-ai/llm/route advanced barrel + client.ts Route.make + LLMClient.prepare/stream/generate + executor.ts RequestExecutor service + transport error mapping + protocol.ts Protocol type + Protocol.make + endpoint.ts Endpoint type + Endpoint.path + auth.ts Auth type + Auth.bearer / Auth.apiKeyHeader / Auth.passthrough + auth-options.ts ProviderAuthOption shape, AuthOptions.bearer, AtLeastOne helper + framing.ts Framing type + Framing.sse + transport/ transport implementations + index.ts Transport type + HttpTransport / WebSocketTransport namespaces + http.ts HttpTransport.httpJson — POST + framing + websocket.ts WebSocketTransport.json + WebSocketExecutor service + protocols/ + shared.ts ProviderShared toolkit used inside protocol impls + openai-chat.ts protocol + route (compose OpenAIChat.protocol) + openai-responses.ts + anthropic-messages.ts + gemini.ts + bedrock-converse.ts + bedrock-event-stream.ts framing for AWS event-stream binary frames + openai-compatible-chat.ts route that reuses OpenAIChat.protocol, no canonical URL + utils/ per-protocol helpers (auth, cache, media, tool-stream, ...) + providers/ + openai-compatible.ts generic compatible helper + family model helpers + openai-compatible-profile.ts family defaults (deepseek, togetherai, ...) + azure.ts / amazon-bedrock.ts / github-copilot.ts / google.ts / xai.ts / openai.ts / anthropic.ts / openrouter.ts + tool.ts typed tool() helper + tool-runtime.ts implementation helpers for LLMClient tool execution +``` + +The dependency arrow points down: `providers/*.ts` files import `protocols`, `endpoint`, `auth`, and `framing`; protocols do not import provider metadata. Lower-level modules know nothing about specific providers. + +### Shared protocol helpers + +`ProviderShared` exports a small toolkit used inside protocol implementations to keep them focused on provider-native shapes: + +- `joinText(parts)` — joins an array of `TextPart` (or anything with a `.text`) with newlines. Use this anywhere a protocol flattens text content into a single string for a provider field. +- `parseToolInput(route, name, raw)` — Schema-decodes a tool-call argument string with the canonical "Invalid JSON input for `` tool call ``" error message. Treats empty input as `{}`. +- `parseJson(route, raw, message)` — generic JSON-via-Schema decode for non-tool bodies. +- `eventError(route, message, ...)` — typed `InvalidProviderOutput` constructor for stream-time decode failures. +- `validateWith(decoder)` — maps Schema decode errors to `InvalidRequest`. `Route.make(...)` uses this for body validation; lower-level routes can reuse it. +- `matchToolChoice(provider, choice, branches)` — branches over `LLMRequest["toolChoice"]` for provider-specific lowering. + +If you find yourself copying a 3-to-5-line snippet between two protocols, lift it into `ProviderShared` next to these helpers rather than duplicating. + +### Tools + +Tool loops are represented in common messages and events: + +```ts +const call = LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } }) +const result = LLM.toolMessage({ id: "call_1", name: "lookup", result: { forecast: "sunny" } }) + +const followUp = LLM.request({ + model, + messages: [LLM.user("Weather?"), LLM.assistant([call]), result], +}) +``` + +Routes lower these into provider-native assistant tool-call messages and tool-result messages. Streaming providers should emit `tool-input-delta` events while arguments arrive, then a final `tool-call` event with parsed input. + +### Tool runtime + +`LLM.stream({ request, tools })` executes model-requested tools with full type safety. Plain `LLM.stream(request)` only streams the model; if `request.tools` contains schemas, tool calls are returned for the caller to handle. Use `toolExecution: "none"` to pass executable tool definitions as schemas without invoking handlers. Add `stopWhen` to opt into follow-up model rounds after tool results. + +```ts +const get_weather = tool({ + description: "Get current weather for a city", + parameters: Schema.Struct({ city: Schema.String }), + success: Schema.Struct({ temperature: Schema.Number, condition: Schema.String }), + execute: ({ city }) => + Effect.gen(function* () { + // city: string — typed from parameters Schema + const data = yield* WeatherApi.fetch(city) + return { temperature: data.temp, condition: data.cond } + // return type checked against success Schema + }), +}) + +const events = yield* LLM.stream({ + request, + tools: { get_weather, get_time, ... }, + stopWhen: LLM.stepCountIs(10), +}).pipe(Stream.runCollect) +``` + +The runtime: + +- Adds tool definitions (derived from each tool's `parameters` Schema via `Schema.toJsonSchemaDocument`) onto `request.tools`. +- Streams the model. +- On `tool-call`: looks up the named tool, decodes input against `parameters` Schema, dispatches to the typed `execute`, encodes the result against `success` Schema, emits `tool-result`. +- Emits local `tool-result` events in the same step by default. +- Loops only when `stopWhen` is provided and the step finishes with `tool-calls`, appending the assistant + tool messages. + +Handler dependencies (services, permissions, plugin hooks, abort handling) are closed over by the consumer at tool-construction time. The runtime's only environment requirement is `RequestExecutor.Service`. Build the tools record inside an `Effect.gen` once and reuse it across many runs. + +Errors must be expressed as `ToolFailure`. The runtime catches it and emits a `tool-error` event, then a `tool-result` of `type: "error"`, so the model can self-correct on the next step. Anything that is not a `ToolFailure` is treated as a defect and fails the stream. Three recoverable error paths produce `tool-error` events: + +- The model called an unknown tool name. +- Input failed the `parameters` Schema. +- The handler returned a `ToolFailure`. + +Provider-defined / hosted tools (Anthropic `web_search` / `code_execution` / `web_fetch`, OpenAI Responses `web_search_call` / `file_search_call` / `code_interpreter_call` / `mcp_call` / `local_shell_call` / `image_generation_call` / `computer_use_call`) pass through the runtime untouched: + +- Routes surface the model's call as a `tool-call` event with `providerExecuted: true`, and the provider's result as a matching `tool-result` event with `providerExecuted: true`. +- The runtime detects `providerExecuted` on `tool-call` and **skips client dispatch** — no handler is invoked and no `tool-error` is raised for "unknown tool". The provider already executed it. +- Both events are appended to the assistant message in `assistantContent` so the next round's history carries the call + result for context. Anthropic encodes them back as `server_tool_use` + `web_search_tool_result` (or `code_execution_tool_result` / `web_fetch_tool_result`) blocks; OpenAI Responses callers typically use `previous_response_id` instead of resending hosted-tool items. + +Add provider-defined tools to `request.tools` (no runtime entry needed). The matching route must know how to lower the tool definition into the provider-native shape; right now Anthropic accepts `web_search` / `code_execution` / `web_fetch` and OpenAI Responses accepts the hosted tool names listed above. + +## Protocol File Style + +Protocol files should look self-similar. Provider quirks belong behind named helpers so a new route can be reviewed by comparing the same sections across files. + +### Section order + +Use this order for every protocol module: + +1. Public model input +2. Request body schema +3. Streaming event schema +4. Parser state +5. Request body construction (`fromRequest`) +6. Stream parsing (`step` and per-event handlers) +7. Protocol and route +8. Model helper + +### Rules + +- Keep protocol files focused on the protocol. Move provider-specific projection, signing, media normalization, or other bulky transformations into `src/protocols/utils/*`. +- Use `Effect.fn("Provider.fromRequest")` for request body construction entrypoints. Use `Effect.fn(...)` for event handlers that yield effects; keep purely synchronous handlers as plain functions returning a `StepResult` that the dispatcher lifts via `Effect.succeed(...)`. +- Parser state owns terminal information. The state machine records finish reason, usage, and pending tool calls; emit one terminal `request-finish` (or `provider-error`) when a `terminal` event arrives. If a provider splits reason and usage across events, merge them in parser state before flushing. +- Emit exactly one terminal `request-finish` event for a completed response. Use `stream.terminal` to signal the run is over and have `step` emit the final event. +- Use shared helpers for repeated protocol policy such as text joining, usage totals, JSON parsing, and tool-call accumulation. `ToolStream` (`protocols/utils/tool-stream.ts`) accumulates streamed tool-call arguments uniformly. +- Make intentional provider differences explicit in helper names or comments. If two protocol files differ visually, the reason should be obvious from the names. +- Prefer dispatched per-event handlers (`onMessageStart`, `onContentBlockDelta`, ...) called from a small top-level `step` switch over a long if-chain. The dispatcher keeps the event surface visible at a glance. +- Keep tests in the same conceptual order as the protocol: basic prepare, tools prepare, unsupported lowering, text/usage parsing, tool streaming, finish reasons, provider errors. + +### Review checklist + +- Can the file be skimmed side-by-side with `openai-chat.ts` without hunting for equivalent sections? +- Are provider quirks named, isolated, and covered by focused tests? +- Does request body construction validate unsupported common content at the protocol boundary? +- Does stream parsing emit stable common events without leaking provider event order to callers? +- Does `toolChoice: "none"` behavior read as intentional? + +## Recording Tests + +Recorded tests use one cassette file per scenario. A cassette holds an ordered array of `{ request, response }` interactions, so multi-step flows (tool loops, retries, polling) record into a single file. Use `recordedTests({ prefix, requires })` and let the helper derive cassette names from test names: + +```ts +const recorded = recordedTests({ prefix: "openai-chat", requires: ["OPENAI_API_KEY"] }) + +recorded.effect("streams text", () => + Effect.gen(function* () { + // test body + }), +) +``` + +Replay is the default. `RECORD=true` records fresh cassettes and requires the listed env vars. Cassettes are written as pretty-printed JSON so multi-interaction diffs stay reviewable. + +Pass `provider`, `protocol`, and optional `tags` to `recordedTests(...)` / `recorded.effect.with(...)` so cassettes carry searchable metadata. Use recorded-test filters to replay or record a narrow subset without rewriting a whole file: + +- `RECORDED_PROVIDER=openai` matches tests tagged with `provider:openai`; comma-separated values are allowed. +- `RECORDED_PREFIX=openai-chat` matches cassette groups by `recordedTests({ prefix })`; comma-separated values are allowed. +- `RECORDED_TAGS=tool` requires all listed tags to be present, e.g. `RECORDED_TAGS=provider:togetherai,tool`. +- `RECORDED_TEST="streams text"` matches by test name, kebab-case test id, or cassette path. + +Filters apply in replay and record mode. Combine them with `RECORD=true` when refreshing only one provider or scenario. + +**Binary response bodies.** Most providers stream text (SSE, JSON). AWS Bedrock streams binary AWS event-stream frames whose CRC32 fields would be mangled by a UTF-8 round-trip — those bodies are stored as base64 with `bodyEncoding: "base64"` on the response snapshot. Detection is by `Content-Type` in `@opencode-ai/http-recorder` (currently `application/vnd.amazon.eventstream` and `application/octet-stream`); cassettes for SSE/JSON routes omit the field and decode as text. + +**Matching strategies.** Replay defaults to structural matching, which finds an interaction by comparing method, URL, allow-listed headers, and the canonical JSON body. This is the right choice for tool loops because each round's request differs (the message history grows). For scenarios where successive requests are byte-identical and expect different responses (retries, polling), pass `dispatch: "sequential"` in `RecordReplayOptions` — replay then walks the cassette in record order via an internal cursor. `scriptedResponses` (in `test/lib/http.ts`) is the deterministic counterpart for tests that don't need a live provider; it scripts response bodies in order without reading from disk. + +Do not blanket re-record an entire test file when adding one cassette. `RECORD=true` rewrites every recorded case that runs, and provider streams contain volatile IDs, timestamps, fingerprints, and obfuscation fields. Prefer deleting the one cassette you intend to refresh, or run a focused test pattern that only registers the scenario you want to record. Keep stable existing cassettes unchanged unless their request shape or expected behavior changed. diff --git a/packages/llm/example/tutorial.ts b/packages/llm/example/tutorial.ts new file mode 100644 index 000000000000..6b0b894b1321 --- /dev/null +++ b/packages/llm/example/tutorial.ts @@ -0,0 +1,242 @@ +import { Config, Effect, Formatter, Layer, Schema, Stream } from "effect" +import { LLM, LLMClient, Provider, ProviderID, Tool, type ProviderModelOptions } from "@opencode-ai/llm" +import { Route, Auth, Endpoint, Framing, Protocol, RequestExecutor } from "@opencode-ai/llm/route" +import { OpenAI } from "@opencode-ai/llm/providers" + +/** + * A runnable walkthrough of the LLM package use-site API. + * + * Run from `packages/llm` with an OpenAI key in the environment: + * + * OPENAI_API_KEY=... bun example/tutorial.ts + * + * The file is intentionally written as a normal TypeScript program. You can + * hover imports and local values to see how the public API is typed. + */ + +const apiKey = Config.redacted("OPENAI_API_KEY") + +// 1. Pick a model. The provider helper records provider identity, protocol +// choice, capabilities, deployment options, authentication, and defaults. +const model = OpenAI.model("gpt-4o-mini", { + apiKey, + generation: { maxTokens: 160 }, + providerOptions: { + openai: { store: false }, + }, +}) + +// 2. Build a provider-neutral request. This is useful when reusing one request +// across generate and stream examples. +// +// Options can live on both the model and the request: +// +// - `generation`: common controls such as max tokens, temperature, topP/topK, +// penalties, seed, and stop sequences. +// - `providerOptions`: namespaced provider-native behavior. For example, +// OpenAI cache keys and store behavior, Anthropic thinking, Gemini thinking +// config, or OpenRouter routing/reasoning. +// - `http`: last-resort serializable overlays for final request body, headers, +// and query params. Prefer typed `providerOptions` when a field is stable. +// +// Model options are defaults. Request options override them for this call. +const request = LLM.request({ + model, + system: "You are concise and practical.", + prompt: "Tell me a joke", + generation: { maxTokens: 80, temperature: 0.7 }, + providerOptions: { + openai: { promptCacheKey: "tutorial-joke" }, + }, +}) + +// `http` is intentionally not needed for normal calls. This shows the shape for +// newly released provider fields before they deserve a typed provider option. +const rawOverlayExample = LLM.request({ + model, + prompt: "Show the final HTTP overlay shape.", + http: { + body: { metadata: { example: "tutorial" } }, + headers: { "x-opencode-tutorial": "1" }, + query: { debug: "1" }, + }, +}) + +// 3. `generate` sends the request and collects the event stream into one +// response object. `response.text` is the collected text output. +const generateOnce = Effect.gen(function* () { + const response = yield* LLM.generate(request) + + console.log("\n== generate ==") + console.log("generated text:", response.text) + console.log("usage", Formatter.formatJson(response.usage, { space: 2 })) +}) + +// 4. `stream` exposes provider output as common `LLMEvent`s for UIs that want +// incremental text, reasoning, tool input, usage, or finish events. +const streamText = LLM.stream(request).pipe( + Stream.tap((event) => + Effect.sync(() => { + if (event.type === "text-delta") process.stdout.write(`\ntext: ${event.text}`) + if (event.type === "request-finish") process.stdout.write(`\nfinish: ${event.reason}\n`) + }), + ), + Stream.runDrain, +) + +// 5. Tools are typed with Effect Schema. Passing tools to `LLMClient.stream` +// adds their definitions to the request and dispatches matching tool calls. +// Add `stopWhen` to opt into follow-up model rounds after tool results. +const tools = { + get_weather: Tool.make({ + description: "Get current weather for a city.", + parameters: Schema.Struct({ city: Schema.String }), + success: Schema.Struct({ forecast: Schema.String }), + execute: (input) => Effect.succeed({ forecast: `${input.city}: sunny, 72F` }), + }), +} + +const streamWithTools = LLM.stream({ + request: LLM.request({ + model, + prompt: "Use get_weather for San Francisco, then answer in one sentence.", + generation: { maxTokens: 80, temperature: 0 }, + }), + tools, + stopWhen: LLM.stepCountIs(3), +}).pipe( + Stream.tap((event) => + Effect.sync(() => { + if (event.type === "tool-call") console.log("tool call", event.name, event.input) + if (event.type === "tool-result") console.log("tool result", event.name, event.result) + if (event.type === "text-delta") process.stdout.write(event.text) + }), + ), + Stream.runDrain, +) + +// 6. `generateObject` is the structured-output helper. It forces a synthetic +// tool call internally, so the same call site works across providers instead of +// depending on provider-specific JSON mode flags. +const WeatherReport = Schema.Struct({ + city: Schema.String, + forecast: Schema.String, + highFahrenheit: Schema.Number, +}) + +const generateStructuredObject = Effect.gen(function* () { + const response = yield* LLM.generateObject({ + model, + system: "Return only structured weather data.", + prompt: "Give me today's weather for San Francisco.", + schema: WeatherReport, + generation: { maxTokens: 120, temperature: 0 }, + }) + + console.log("\n== generateObject ==") + console.log(Formatter.formatJson(response.object, { space: 2 })) +}) + +// If the shape is only known at runtime, pass raw JSON Schema instead. The +// `.object` type is `unknown`; callers that need static types should validate it. +const generateDynamicObject = LLM.generateObject({ + model, + prompt: "Extract the city and forecast from: San Francisco is sunny.", + jsonSchema: { + type: "object", + properties: { + city: { type: "string" }, + forecast: { type: "string" }, + }, + required: ["city", "forecast"], + }, +}) + +// ----------------------------------------------------------------------------- +// Part 2: provider composition with a fake provider +// ----------------------------------------------------------------------------- + +// A protocol is the provider-native API shape: common request -> body, response +// frames -> common events. This fake one turns text prompts into a JSON body +// and treats every SSE frame as output text. +const FakeBody = Schema.Struct({ + model: Schema.String, + input: Schema.String, +}) +type FakeBody = Schema.Schema.Type + +const FakeProtocol = Protocol.make({ + // Protocol ids are open strings, so external packages can define their own + // protocols without changing this package. + id: "fake-echo", + body: { + schema: FakeBody, + from: (request) => + Effect.succeed({ + model: request.model.id, + input: request.messages + .flatMap((message) => message.content) + .filter((part) => part.type === "text") + .map((part) => part.text) + .join("\n"), + }), + }, + stream: { + event: Schema.String, + initial: () => undefined, + step: (_, frame) => Effect.succeed([undefined, [{ type: "text-delta", text: frame }]] as const), + onHalt: () => [{ type: "request-finish", reason: "stop" }], + }, +}) + +// An route is the runnable binding for that protocol. It adds the deployment +// axes that the protocol deliberately does not know: URL, auth, and framing. +const FakeAdapter = Route.make({ + id: "fake-echo", + protocol: FakeProtocol, + endpoint: Endpoint.path("/v1/echo"), + auth: Auth.passthrough, + framing: Framing.sse, +}) + +// A provider module exports a Provider definition. The default `model` helper +// sets provider identity, protocol id, and the route id resolved by the registry. +const fakeEchoModel = Route.model(FakeAdapter, { provider: "fake-echo", baseURL: "https://fake.local" }) +const FakeEcho = Provider.make({ + id: ProviderID.make("fake-echo"), + model: (id: string, options: ProviderModelOptions = {}) => fakeEchoModel({ id, ...options }), +}) + +// `LLMClient.prepare` is the lower-level inspection hook: it compiles through +// body conversion, validation, endpoint, auth, and HTTP construction without +// sending anything over the network. +const inspectFakeProvider = Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.request({ + model: FakeEcho.model("tiny-echo"), + prompt: "Show me the provider pipeline.", + }), + ) + + console.log("\n== fake provider prepare ==") + console.log("route:", prepared.route) + console.log("body:", Formatter.formatJson(prepared.body, { space: 2 })) +}) + +// Provide the LLM runtime and the HTTP request executor once. Keep one path +// enabled at a time so the tutorial can demonstrate generate, prepare, stream, +// or tool-loop behavior without spending tokens on every example. +const requestExecutorLayer = RequestExecutor.defaultLayer +const llmClientLayer = LLMClient.layer.pipe(Layer.provide(requestExecutorLayer)) + +const program = Effect.gen(function* () { + // yield* generateOnce + // yield* inspectFakeProvider + // yield* LLMClient.prepare(rawOverlayExample).pipe(Effect.andThen((prepared) => Effect.sync(() => console.log(prepared.body)))) + // yield* streamText + // yield* generateStructuredObject + // yield* generateDynamicObject.pipe(Effect.andThen((response) => Effect.sync(() => console.log(response.object)))) + yield* streamWithTools +}).pipe(Effect.provide(Layer.mergeAll(requestExecutorLayer, llmClientLayer))) + +Effect.runPromise(program) diff --git a/packages/llm/package.json b/packages/llm/package.json new file mode 100644 index 000000000000..3a616f409371 --- /dev/null +++ b/packages/llm/package.json @@ -0,0 +1,51 @@ +{ + "$schema": "https://json.schemastore.org/package.json", + "version": "1.14.25", + "name": "@opencode-ai/llm", + "type": "module", + "license": "MIT", + "private": true, + "scripts": { + "setup:recording-env": "bun run script/setup-recording-env.ts", + "test": "bun test --timeout 30000", + "typecheck": "tsgo --noEmit" + }, + "exports": { + ".": "./src/index.ts", + "./route": "./src/route/index.ts", + "./provider": "./src/provider.ts", + "./providers": "./src/providers/index.ts", + "./providers/amazon-bedrock": "./src/providers/amazon-bedrock.ts", + "./providers/anthropic": "./src/providers/anthropic.ts", + "./providers/azure": "./src/providers/azure.ts", + "./providers/cloudflare": "./src/providers/cloudflare.ts", + "./providers/github-copilot": "./src/providers/github-copilot.ts", + "./providers/google": "./src/providers/google.ts", + "./providers/openai": "./src/providers/openai.ts", + "./providers/openai-compatible": "./src/providers/openai-compatible.ts", + "./providers/openai-compatible-profile": "./src/providers/openai-compatible-profile.ts", + "./providers/openrouter": "./src/providers/openrouter.ts", + "./providers/xai": "./src/providers/xai.ts", + "./protocols": "./src/protocols/index.ts", + "./protocols/anthropic-messages": "./src/protocols/anthropic-messages.ts", + "./protocols/bedrock-converse": "./src/protocols/bedrock-converse.ts", + "./protocols/gemini": "./src/protocols/gemini.ts", + "./protocols/openai-chat": "./src/protocols/openai-chat.ts", + "./protocols/openai-compatible-chat": "./src/protocols/openai-compatible-chat.ts", + "./protocols/openai-responses": "./src/protocols/openai-responses.ts" + }, + "devDependencies": { + "@clack/prompts": "1.0.0-alpha.1", + "@effect/platform-node": "catalog:", + "@opencode-ai/http-recorder": "workspace:*", + "@tsconfig/bun": "catalog:", + "@types/bun": "catalog:", + "@typescript/native-preview": "catalog:" + }, + "dependencies": { + "@smithy/eventstream-codec": "4.2.14", + "@smithy/util-utf8": "4.2.2", + "aws4fetch": "1.0.20", + "effect": "catalog:" + } +} diff --git a/packages/llm/script/recording-cost-report.ts b/packages/llm/script/recording-cost-report.ts new file mode 100644 index 000000000000..5b08e72d5c7a --- /dev/null +++ b/packages/llm/script/recording-cost-report.ts @@ -0,0 +1,250 @@ +import * as fs from "node:fs/promises" +import * as path from "node:path" + +const RECORDINGS_DIR = path.resolve(import.meta.dir, "..", "test", "fixtures", "recordings") +const MODELS_DEV_URL = "https://models.dev/api.json" + +type JsonRecord = Record + +type Pricing = { + readonly input?: number + readonly output?: number + readonly cache_read?: number + readonly cache_write?: number + readonly reasoning?: number +} + +type Usage = { + readonly inputTokens: number + readonly outputTokens: number + readonly cacheReadTokens: number + readonly cacheWriteTokens: number + readonly reasoningTokens: number + readonly reportedCost: number +} + +type Row = Usage & { + readonly cassette: string + readonly provider: string + readonly model: string + readonly estimatedCost: number + readonly pricingSource: string +} + +const isRecord = (value: unknown): value is JsonRecord => + value !== null && typeof value === "object" && !Array.isArray(value) + +const asNumber = (value: unknown) => (typeof value === "number" && Number.isFinite(value) ? value : 0) + +const asString = (value: unknown) => (typeof value === "string" ? value : undefined) + +const readJson = async (file: string) => JSON.parse(await Bun.file(file).text()) as unknown + +const walk = async (dir: string): Promise> => + (await fs.readdir(dir, { withFileTypes: true })) + .flatMap((entry) => { + const file = path.join(dir, entry.name) + return entry.isDirectory() ? [] : [file] + }) + .concat( + ...(await Promise.all( + (await fs.readdir(dir, { withFileTypes: true })) + .filter((entry) => entry.isDirectory()) + .map((entry) => walk(path.join(dir, entry.name))), + )), + ) + +const providerFromUrl = (url: string) => { + if (url.includes("api.openai.com")) return "openai" + if (url.includes("api.anthropic.com")) return "anthropic" + if (url.includes("generativelanguage.googleapis.com")) return "google" + if (url.includes("bedrock")) return "amazon-bedrock" + if (url.includes("openrouter.ai")) return "openrouter" + if (url.includes("api.x.ai")) return "xai" + if (url.includes("api.groq.com")) return "groq" + if (url.includes("api.deepseek.com")) return "deepseek" + if (url.includes("api.together.xyz")) return "togetherai" + return "unknown" +} + +const providerAliases: Record> = { + openai: ["openai"], + anthropic: ["anthropic"], + google: ["google"], + "amazon-bedrock": ["amazon-bedrock"], + openrouter: ["openrouter", "openai", "anthropic", "google"], + xai: ["xai"], + groq: ["groq"], + deepseek: ["deepseek"], + togetherai: ["togetherai"], +} + +const modelAliases = (model: string) => [ + model, + model.replace(/^models\//, ""), + model.replace(/-\d{8}$/, ""), + model.replace(/-\d{4}-\d{2}-\d{2}$/, ""), + model.replace(/-\d{4}-\d{2}-\d{2}$/, "").replace(/-\d{8}$/, ""), + model.replace(/^openai\//, ""), + model.replace(/^anthropic\//, ""), + model.replace(/^google\//, ""), +] + +const pricingFor = (models: JsonRecord, provider: string, model: string) => { + for (const providerID of providerAliases[provider] ?? [provider]) { + const providerEntry = models[providerID] + if (!isRecord(providerEntry) || !isRecord(providerEntry.models)) continue + for (const modelID of modelAliases(model)) { + const modelEntry = providerEntry.models[modelID] + if (isRecord(modelEntry) && isRecord(modelEntry.cost)) + return { pricing: modelEntry.cost as Pricing, source: `${providerID}/${modelID}` } + } + } + return { pricing: undefined, source: "missing" } +} + +const estimateCost = (usage: Usage, pricing: Pricing | undefined) => { + if (!pricing) return 0 + return ( + (usage.inputTokens * (pricing.input ?? 0) + + usage.outputTokens * (pricing.output ?? 0) + + usage.cacheReadTokens * (pricing.cache_read ?? 0) + + usage.cacheWriteTokens * (pricing.cache_write ?? 0) + + usage.reasoningTokens * (pricing.reasoning ?? 0)) / + 1_000_000 + ) +} + +const emptyUsage = (): Usage => ({ + inputTokens: 0, + outputTokens: 0, + cacheReadTokens: 0, + cacheWriteTokens: 0, + reasoningTokens: 0, + reportedCost: 0, +}) + +const addUsage = (a: Usage, b: Usage): Usage => ({ + inputTokens: a.inputTokens + b.inputTokens, + outputTokens: a.outputTokens + b.outputTokens, + cacheReadTokens: a.cacheReadTokens + b.cacheReadTokens, + cacheWriteTokens: a.cacheWriteTokens + b.cacheWriteTokens, + reasoningTokens: a.reasoningTokens + b.reasoningTokens, + reportedCost: a.reportedCost + b.reportedCost, +}) + +const usageFromObject = (usage: unknown): Usage => { + if (!isRecord(usage)) return emptyUsage() + const promptDetails = isRecord(usage.prompt_tokens_details) ? usage.prompt_tokens_details : {} + const completionDetails = isRecord(usage.completion_tokens_details) ? usage.completion_tokens_details : {} + const inputDetails = isRecord(usage.input_tokens_details) ? usage.input_tokens_details : {} + const outputDetails = isRecord(usage.output_tokens_details) ? usage.output_tokens_details : {} + const cacheWriteTokens = asNumber(promptDetails.cache_write_tokens) + asNumber(inputDetails.cache_write_tokens) + return { + inputTokens: asNumber(usage.prompt_tokens) + asNumber(usage.input_tokens), + outputTokens: asNumber(usage.completion_tokens) + asNumber(usage.output_tokens), + cacheReadTokens: asNumber(promptDetails.cached_tokens) + asNumber(inputDetails.cached_tokens), + cacheWriteTokens, + reasoningTokens: asNumber(completionDetails.reasoning_tokens) + asNumber(outputDetails.reasoning_tokens), + reportedCost: asNumber(usage.cost), + } +} + +const jsonPayloads = (body: string) => + body + .split("\n") + .map((line) => line.trim()) + .filter((line) => line.startsWith("data:")) + .map((line) => line.slice("data:".length).trim()) + .filter((line) => line !== "" && line !== "[DONE]") + .flatMap((line) => { + try { + return [JSON.parse(line) as unknown] + } catch { + return [] + } + }) + +const usageFromResponseBody = (body: string) => + jsonPayloads(body).reduce((usage, payload) => { + if (!isRecord(payload)) return usage + return addUsage( + usage, + addUsage( + usageFromObject(payload.usage), + usageFromObject(isRecord(payload.response) ? payload.response.usage : undefined), + ), + ) + }, emptyUsage()) + +const modelFromRequest = (request: unknown) => { + if (!isRecord(request)) return "unknown" + const requestBody = asString(request.body) + if (!requestBody) return "unknown" + try { + const body = JSON.parse(requestBody) as unknown + if (!isRecord(body)) return "unknown" + return asString(body.model) ?? "unknown" + } catch { + return "unknown" + } +} + +const rowFor = (models: JsonRecord, file: string, cassette: unknown): Row | undefined => { + if (!isRecord(cassette) || !Array.isArray(cassette.interactions)) return undefined + const first = cassette.interactions.find(isRecord) + if (!first || !isRecord(first.request)) return undefined + const provider = providerFromUrl(asString(first.request.url) ?? "") + const model = modelFromRequest(first.request) + const usage = cassette.interactions.filter(isRecord).reduce((total, interaction) => { + if (!isRecord(interaction.response)) return total + const responseBody = asString(interaction.response.body) + if (!responseBody) return total + return addUsage(total, usageFromResponseBody(responseBody)) + }, emptyUsage()) + const priced = pricingFor(models, provider, model) + return { + cassette: path.relative(RECORDINGS_DIR, file), + provider, + model, + ...usage, + estimatedCost: estimateCost(usage, priced.pricing), + pricingSource: priced.source, + } +} + +const money = (value: number) => (value === 0 ? "$0.000000" : `$${value.toFixed(6)}`) +const tokens = (value: number) => value.toLocaleString("en-US") + +const models = (await (await fetch(MODELS_DEV_URL)).json()) as JsonRecord +const rows = ( + await Promise.all( + (await walk(RECORDINGS_DIR)) + .filter((file) => file.endsWith(".json")) + .map(async (file) => rowFor(models, file, await readJson(file))), + ) +).filter((row): row is Row => row !== undefined) + +const totals = rows.reduce( + (total, row) => ({ + ...addUsage(total, row), + estimatedCost: total.estimatedCost + row.estimatedCost, + }), + { ...emptyUsage(), estimatedCost: 0 }, +) + +console.log("# Recording Cost Report") +console.log("") +console.log(`Pricing: ${MODELS_DEV_URL}`) +console.log(`Cassettes: ${rows.length}`) +console.log(`Reported cost: ${money(totals.reportedCost)}`) +console.log(`Estimated cost: ${money(totals.estimatedCost)}`) +console.log("") +console.log("| Provider | Model | Input | Output | Reasoning | Reported | Estimated | Pricing | Cassette |") +console.log("|---|---:|---:|---:|---:|---:|---:|---|---|") +for (const row of rows.toSorted((a, b) => b.reportedCost + b.estimatedCost - (a.reportedCost + a.estimatedCost))) { + if (row.inputTokens + row.outputTokens + row.reasoningTokens + row.reportedCost + row.estimatedCost === 0) continue + console.log( + `| ${row.provider} | ${row.model} | ${tokens(row.inputTokens)} | ${tokens(row.outputTokens)} | ${tokens(row.reasoningTokens)} | ${money(row.reportedCost)} | ${money(row.estimatedCost)} | ${row.pricingSource} | ${row.cassette} |`, + ) +} diff --git a/packages/llm/script/setup-recording-env.ts b/packages/llm/script/setup-recording-env.ts new file mode 100644 index 000000000000..945f2b2ada5c --- /dev/null +++ b/packages/llm/script/setup-recording-env.ts @@ -0,0 +1,537 @@ +#!/usr/bin/env bun + +import { NodeFileSystem } from "@effect/platform-node" +import * as path from "node:path" +import * as prompts from "@clack/prompts" +import { AwsV4Signer } from "aws4fetch" +import { Config, ConfigProvider, Effect, FileSystem, PlatformError, Redacted } from "effect" +import { FetchHttpClient, HttpClient, HttpClientRequest, type HttpClientResponse } from "effect/unstable/http" +import * as ProviderShared from "../src/protocols/shared" +import * as Cloudflare from "../src/providers/cloudflare" + +type Provider = { + readonly id: string + readonly label: string + readonly tier: "core" | "canary" | "compatible" | "optional" + readonly note: string + readonly vars: ReadonlyArray<{ + readonly name: string + readonly label?: string + readonly optional?: boolean + readonly secret?: boolean + }> + readonly validate?: (env: Env) => Effect.Effect +} + +type Env = Record + +const PROVIDERS: ReadonlyArray = [ + { + id: "openai", + label: "OpenAI", + tier: "core", + note: "Native OpenAI Chat / Responses recorded tests", + vars: [{ name: "OPENAI_API_KEY" }], + validate: (env) => validateBearer("https://api.openai.com/v1/models", Redacted.make(env.OPENAI_API_KEY)), + }, + { + id: "anthropic", + label: "Anthropic", + tier: "core", + note: "Native Anthropic Messages recorded tests", + vars: [{ name: "ANTHROPIC_API_KEY" }], + validate: (env) => + HttpClientRequest.get("https://api.anthropic.com/v1/models").pipe( + HttpClientRequest.setHeaders({ + "anthropic-version": "2023-06-01", + "x-api-key": Redacted.value(Redacted.make(env.ANTHROPIC_API_KEY)), + }), + executeRequest, + ), + }, + { + id: "google", + label: "Google Gemini", + tier: "core", + note: "Native Gemini recorded tests", + vars: [{ name: "GOOGLE_GENERATIVE_AI_API_KEY" }], + validate: (env) => + HttpClientRequest.get( + `https://generativelanguage.googleapis.com/v1beta/models?key=${encodeURIComponent(env.GOOGLE_GENERATIVE_AI_API_KEY)}`, + ).pipe(executeRequest), + }, + { + id: "bedrock", + label: "Amazon Bedrock", + tier: "core", + note: "Native Bedrock Converse recorded tests", + vars: [ + { name: "AWS_ACCESS_KEY_ID" }, + { name: "AWS_SECRET_ACCESS_KEY" }, + { name: "AWS_SESSION_TOKEN", optional: true }, + { name: "BEDROCK_RECORDING_REGION", optional: true }, + { name: "BEDROCK_MODEL_ID", optional: true }, + ], + validate: (env) => validateBedrock(env), + }, + { + id: "groq", + label: "Groq", + tier: "canary", + note: "Fast OpenAI-compatible canary for text/tool streaming", + vars: [{ name: "GROQ_API_KEY" }], + validate: (env) => validateBearer("https://api.groq.com/openai/v1/models", Redacted.make(env.GROQ_API_KEY)), + }, + { + id: "openrouter", + label: "OpenRouter", + tier: "canary", + note: "Router canary for OpenAI-compatible text/tool streaming", + vars: [{ name: "OPENROUTER_API_KEY" }], + validate: (env) => + validateChat({ + url: "https://openrouter.ai/api/v1/chat/completions", + token: Redacted.make(env.OPENROUTER_API_KEY), + model: "openai/gpt-4o-mini", + }), + }, + { + id: "xai", + label: "xAI", + tier: "canary", + note: "OpenAI-compatible xAI chat endpoint", + vars: [{ name: "XAI_API_KEY" }], + validate: (env) => validateBearer("https://api.x.ai/v1/models", Redacted.make(env.XAI_API_KEY)), + }, + { + id: "cloudflare-ai-gateway", + label: "Cloudflare AI Gateway", + tier: "canary", + note: "Cloudflare Unified/OpenAI-compatible gateway; supports provider/model ids like workers-ai/@cf/...", + vars: [ + { name: "CLOUDFLARE_ACCOUNT_ID", label: "Cloudflare account ID", secret: false }, + { name: "CLOUDFLARE_GATEWAY_ID", label: "Cloudflare AI Gateway ID (defaults to default)", optional: true, secret: false }, + { name: "CLOUDFLARE_API_TOKEN", label: "Cloudflare AI Gateway token" }, + ], + validate: (env) => + validateChat({ + url: `${Cloudflare.aiGatewayBaseURL({ + accountId: env.CLOUDFLARE_ACCOUNT_ID, + gatewayId: env.CLOUDFLARE_GATEWAY_ID || undefined, + })}/chat/completions`, + token: Redacted.make(envValue(env, Cloudflare.aiGatewayAuthEnvVars)), + tokenHeader: "cf-aig-authorization", + model: "workers-ai/@cf/meta/llama-3.1-8b-instruct", + }), + }, + { + id: "cloudflare-workers-ai", + label: "Cloudflare Workers AI", + tier: "canary", + note: "Direct Workers AI OpenAI-compatible endpoint; supports model ids like @cf/meta/...", + vars: [ + { name: "CLOUDFLARE_ACCOUNT_ID", label: "Cloudflare account ID", secret: false }, + { name: "CLOUDFLARE_API_KEY", label: "Cloudflare Workers AI API token" }, + ], + validate: (env) => + validateChat({ + url: `${Cloudflare.workersAIBaseURL({ accountId: env.CLOUDFLARE_ACCOUNT_ID })}/chat/completions`, + token: Redacted.make(envValue(env, Cloudflare.workersAIAuthEnvVars)), + model: "@cf/meta/llama-3.1-8b-instruct", + }), + }, + { + id: "deepseek", + label: "DeepSeek", + tier: "compatible", + note: "Existing OpenAI-compatible recorded tests", + vars: [{ name: "DEEPSEEK_API_KEY" }], + validate: (env) => validateBearer("https://api.deepseek.com/models", Redacted.make(env.DEEPSEEK_API_KEY)), + }, + { + id: "togetherai", + label: "TogetherAI", + tier: "compatible", + note: "Existing OpenAI-compatible text/tool recorded tests", + vars: [{ name: "TOGETHER_AI_API_KEY" }], + validate: (env) => validateBearer("https://api.together.xyz/v1/models", Redacted.make(env.TOGETHER_AI_API_KEY)), + }, + { + id: "mistral", + label: "Mistral", + tier: "optional", + note: "OpenAI-compatible bridge; native reasoning parity is follow-up work", + vars: [{ name: "MISTRAL_API_KEY" }], + validate: (env) => validateBearer("https://api.mistral.ai/v1/models", Redacted.make(env.MISTRAL_API_KEY)), + }, + { + id: "perplexity", + label: "Perplexity", + tier: "optional", + note: "OpenAI-compatible bridge; citations/search metadata are follow-up work", + vars: [{ name: "PERPLEXITY_API_KEY" }], + validate: (env) => validateBearer("https://api.perplexity.ai/models", Redacted.make(env.PERPLEXITY_API_KEY)), + }, + { + id: "venice", + label: "Venice", + tier: "optional", + note: "OpenAI-compatible bridge", + vars: [{ name: "VENICE_API_KEY" }], + validate: (env) => validateBearer("https://api.venice.ai/api/v1/models", Redacted.make(env.VENICE_API_KEY)), + }, + { + id: "cerebras", + label: "Cerebras", + tier: "optional", + note: "OpenAI-compatible bridge", + vars: [{ name: "CEREBRAS_API_KEY" }], + validate: (env) => validateBearer("https://api.cerebras.ai/v1/models", Redacted.make(env.CEREBRAS_API_KEY)), + }, + { + id: "deepinfra", + label: "DeepInfra", + tier: "optional", + note: "OpenAI-compatible bridge", + vars: [{ name: "DEEPINFRA_API_KEY" }], + validate: (env) => + validateBearer("https://api.deepinfra.com/v1/openai/models", Redacted.make(env.DEEPINFRA_API_KEY)), + }, + { + id: "fireworks", + label: "Fireworks", + tier: "optional", + note: "OpenAI-compatible bridge", + vars: [{ name: "FIREWORKS_API_KEY" }], + validate: (env) => + validateBearer("https://api.fireworks.ai/inference/v1/models", Redacted.make(env.FIREWORKS_API_KEY)), + }, + { + id: "baseten", + label: "Baseten", + tier: "optional", + note: "OpenAI-compatible bridge", + vars: [{ name: "BASETEN_API_KEY" }], + }, +] + +const args = process.argv.slice(2) +const hasFlag = (name: string) => args.includes(name) +const option = (name: string) => { + const index = args.indexOf(name) + if (index === -1) return undefined + return args[index + 1] +} + +const envPath = path.resolve(process.cwd(), option("--env") ?? ".env.local") +const checkOnly = hasFlag("--check") +const providerOption = option("--providers") +const interactive = Boolean(process.stdin.isTTY && process.stdout.isTTY) + +const envNames = Array.from(new Set(PROVIDERS.flatMap((provider) => provider.vars.map((item) => item.name)))) + +const providersForOption = (value: string | undefined) => { + if (!value || value === "recommended") + return PROVIDERS.filter((provider) => provider.tier === "core" || provider.tier === "canary") + if (value === "recorded") return PROVIDERS.filter((provider) => provider.tier !== "optional") + if (value === "all") return PROVIDERS + const ids = new Set( + value + .split(",") + .map((item) => item.trim()) + .filter(Boolean), + ) + return PROVIDERS.filter((provider) => ids.has(provider.id)) +} + +const chooseProviders = async () => { + if (providerOption) return providersForOption(providerOption) + return providersForOption("recommended") +} + +const catchMissingFile = (error: PlatformError.PlatformError) => { + if (error.reason._tag === "NotFound") return Effect.succeed("") + return Effect.fail(error) +} + +const readEnvFile = Effect.fn("RecordingEnv.readFile")(function* () { + const fileSystem = yield* FileSystem.FileSystem + return yield* fileSystem.readFileString(envPath).pipe(Effect.catch(catchMissingFile)) +}) + +const readConfigString = (provider: ConfigProvider.ConfigProvider, name: string) => + Config.string(name) + .parse(provider) + .pipe( + Effect.match({ + onFailure: () => undefined, + onSuccess: (value) => value, + }), + ) + +const parseEnv = Effect.fn("RecordingEnv.parseEnv")(function* (contents: string) { + const provider = ConfigProvider.fromDotEnvContents(contents) + return Object.fromEntries( + (yield* Effect.forEach(envNames, (name) => + readConfigString(provider, name).pipe(Effect.map((value) => [name, value] as const)), + )).filter((entry): entry is readonly [string, string] => entry[1] !== undefined), + ) +}) + +const quote = (value: string) => JSON.stringify(value) + +const status = (name: string, fileEnv: Env) => { + if (fileEnv[name]) return "file" + if (process.env[name]) return "shell" + return "missing" +} + +const statusLine = (provider: Provider, fileEnv: Env) => + [ + `${provider.label} (${provider.tier})`, + provider.note, + ...provider.vars.map((item) => { + const value = status(item.name, fileEnv) + const suffix = item.optional ? " optional" : "" + return ` ${value === "missing" ? "missing" : "set"} ${item.name}${suffix}${value === "shell" ? " (shell only)" : ""}` + }), + ].join("\n") + +const printStatus = (providers: ReadonlyArray, fileEnv: Env) => { + prompts.note(providers.map((provider) => statusLine(provider, fileEnv)).join("\n\n"), `Recording env: ${envPath}`) +} + +const exitIfCancel = (value: A | symbol): A => { + if (!prompts.isCancel(value)) return value as A + prompts.cancel("Cancelled") + process.exit(130) +} + +const upsertEnv = (contents: string, values: Env) => { + const names = Object.keys(values) + const seen = new Set() + const lines = contents.split(/\r?\n/).map((line) => { + const match = line.match(/^\s*(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*=/) + if (!match || !names.includes(match[1])) return line + seen.add(match[1]) + return `${match[1]}=${quote(values[match[1]])}` + }) + const missing = names.filter((name) => !seen.has(name)) + if (missing.length === 0) return lines.join("\n").replace(/\n*$/, "\n") + const prefix = lines.join("\n").trimEnd() + const block = [ + "", + "# Added by bun run setup:recording-env", + ...missing.map((name) => `${name}=${quote(values[name])}`), + ].join("\n") + return `${prefix}${block}\n` +} + +const providerRequiredStatus = (provider: Provider, fileEnv: Env) => { + const required = requiredVars(provider) + if (required.some((item) => status(item.name, fileEnv) === "missing")) return "missing" + if (required.some((item) => status(item.name, fileEnv) === "shell")) return "set in shell" + return "already added" +} + +const requiredVars = (provider: Provider) => provider.vars.filter((item) => !item.optional) + +const promptVars = (provider: Provider) => provider.vars.filter((item) => !item.optional || item.secret === false) + +const processEnv = (): Env => + Object.fromEntries(Object.entries(process.env).filter((entry): entry is [string, string] => entry[1] !== undefined)) + +const envValue = (env: Env, names: ReadonlyArray) => names.map((name) => env[name]).find(Boolean) ?? "" + +const envWithValues = (fileEnv: Env, values: Env): Env => ({ + ...processEnv(), + ...fileEnv, + ...values, +}) + +const responseError = Effect.fn("RecordingEnv.responseError")(function* ( + response: HttpClientResponse.HttpClientResponse, +) { + if (response.status >= 200 && response.status < 300) return undefined + const body = yield* response.text.pipe(Effect.catch(() => Effect.succeed(""))) + return `${response.status}${body ? `: ${body.slice(0, 180)}` : ""}` +}) + +const executeRequest = Effect.fn("RecordingEnv.executeRequest")(function* ( + request: HttpClientRequest.HttpClientRequest, +) { + const http = yield* HttpClient.HttpClient + return yield* http.execute(request).pipe(Effect.flatMap(responseError)) +}) + +const validateBearer = (url: string, token: Redacted.Redacted, headers: Record = {}) => + HttpClientRequest.get(url).pipe( + HttpClientRequest.setHeaders({ ...headers, authorization: `Bearer ${Redacted.value(token)}` }), + executeRequest, + ) + +const validateChat = (input: { + readonly url: string + readonly token: Redacted.Redacted + readonly tokenHeader?: string + readonly model: string + readonly headers?: Record +}) => + ProviderShared.jsonPost({ + url: input.url, + headers: { ...input.headers, [input.tokenHeader ?? "authorization"]: `Bearer ${Redacted.value(input.token)}` }, + body: ProviderShared.encodeJson({ + model: input.model, + messages: [{ role: "user", content: "Reply with exactly: ok" }], + max_tokens: 3, + temperature: 0, + }), + }).pipe(executeRequest) + +const validateBedrock = (env: Env) => + Effect.gen(function* () { + const request = yield* Effect.promise(() => + new AwsV4Signer({ + url: `https://bedrock.${env.BEDROCK_RECORDING_REGION || "us-east-1"}.amazonaws.com/foundation-models`, + method: "GET", + service: "bedrock", + region: env.BEDROCK_RECORDING_REGION || "us-east-1", + accessKeyId: env.AWS_ACCESS_KEY_ID, + secretAccessKey: env.AWS_SECRET_ACCESS_KEY, + sessionToken: env.AWS_SESSION_TOKEN || undefined, + }).sign(), + ) + return yield* HttpClientRequest.get(request.url.toString()).pipe( + HttpClientRequest.setHeaders(Object.fromEntries(request.headers.entries())), + executeRequest, + ) + }) + +const validateProvider = Effect.fn("RecordingEnv.validateProvider")(function* (provider: Provider, env: Env) { + return yield* (provider.validate?.(env) ?? Effect.succeed("no lightweight validator")).pipe( + Effect.catch((error) => { + if (error instanceof Error) return Effect.succeed(error.message) + return Effect.succeed(String(error)) + }), + ) +}) + +const validateProviders = Effect.fn("RecordingEnv.validateProviders")(function* ( + providers: ReadonlyArray, + env: Env, +) { + const spinner = prompts.spinner() + spinner.start("Validating credentials") + const results = yield* Effect.forEach( + providers, + (provider) => validateProvider(provider, env).pipe(Effect.map((error) => ({ provider, error }))), + { concurrency: 4 }, + ) + spinner.stop("Validation complete") + prompts.note( + results + .map( + (result) => + `${result.error ? "failed" : "ok"} ${result.provider.label}${result.error ? ` - ${result.error}` : ""}`, + ) + .join("\n"), + "Credential validation", + ) +}) + +const writeEnvFile = Effect.fn("RecordingEnv.writeFile")(function* (contents: string) { + const fileSystem = yield* FileSystem.FileSystem + yield* fileSystem.makeDirectory(path.dirname(envPath), { recursive: true }) + yield* fileSystem.writeFileString(envPath, contents, { mode: 0o600 }) +}) + +const prompt = (run: () => Promise) => Effect.promise(run).pipe(Effect.map(exitIfCancel)) + +const chooseConfigurableProviders = Effect.fn("RecordingEnv.chooseConfigurableProviders")(function* ( + providers: ReadonlyArray, + fileEnv: Env, +) { + const configurable = providers.filter((provider) => requiredVars(provider).length > 0) + const selected = yield* prompt>(() => + prompts.multiselect({ + message: "Select provider credentials to add or override", + options: configurable.map((provider) => ({ + value: provider.id, + label: provider.label, + hint: `${providerRequiredStatus(provider, fileEnv)} - ${requiredVars(provider) + .map((item) => item.name) + .join(", ")}`, + })), + initialValues: configurable + .filter((provider) => providerRequiredStatus(provider, fileEnv) === "missing") + .map((provider) => provider.id), + }), + ) + return configurable.filter((provider) => selected.includes(provider.id)) +}) + +const promptEnvVar = (item: Provider["vars"][number]) => + prompt(() => { + const input = { + message: item.label ?? item.name, + validate: (input: string | undefined) => { + if (item.optional) return undefined + return !input || input.length === 0 ? "Leave blank by pressing Esc/cancel, or paste a value" : undefined + }, + } + return item.secret === false ? prompts.text(input) : prompts.password(input) + }) + +const promptProviderValues = Effect.fn("RecordingEnv.promptProviderValues")(function* ( + providers: ReadonlyArray, +) { + const values: Env = {} + for (const provider of providers) { + prompts.log.info(`${provider.label}: ${provider.note}`) + for (const item of promptVars(provider)) { + if (values[item.name]) continue + const value = yield* promptEnvVar(item) + if (value !== "") values[item.name] = value + } + } + return values +}) + +const main = Effect.fn("RecordingEnv.main")(function* () { + prompts.intro("LLM recording credentials") + const contents = yield* readEnvFile() + const fileEnv = yield* parseEnv(contents) + const providers = yield* Effect.promise(() => chooseProviders()) + printStatus(providers, fileEnv) + if (checkOnly) { + prompts.outro("Check complete") + return + } + if (!interactive) { + prompts.outro("Run this command in a terminal to enter credentials") + return + } + + const selectedProviders = yield* chooseConfigurableProviders(providers, fileEnv) + const values = yield* promptProviderValues(selectedProviders) + + if (Object.keys(values).length === 0) { + prompts.outro("No changes") + return + } + + if ( + interactive && + (yield* prompt(() => prompts.confirm({ message: "Validate credentials before saving?", initialValue: true }))) + ) { + yield* validateProviders(selectedProviders, envWithValues(fileEnv, values)) + } + + yield* writeEnvFile(upsertEnv(contents, values)) + prompts.log.success( + `Saved ${Object.keys(values).length} value${Object.keys(values).length === 1 ? "" : "s"} to ${envPath}`, + ) + prompts.outro("Keep .env.local local. Store shared team credentials in a password manager or vault.") +}) + +await Effect.runPromise(main().pipe(Effect.provide(NodeFileSystem.layer), Effect.provide(FetchHttpClient.layer))) diff --git a/packages/llm/src/index.ts b/packages/llm/src/index.ts new file mode 100644 index 000000000000..f4adf4859a9b --- /dev/null +++ b/packages/llm/src/index.ts @@ -0,0 +1,35 @@ +export { LLMClient, modelLimits, modelRef } from "./route/client" +export { Auth } from "./route/auth" +export { Provider } from "./provider" +export type { + RouteModelInput, + RouteRoutedModelInput, + Interface as LLMClientShape, + Service as LLMClientService, + ModelRefInput, +} from "./route/client" +export * from "./schema" +export { Tool, ToolFailure, toDefinitions, tool } from "./tool" +export type { + AnyExecutableTool, + AnyTool, + ExecutableTool, + ExecutableTools, + Tool as ToolShape, + ToolExecute, + Tools, + ToolSchema, +} from "./tool" +export type { + RunOptions as ToolRunOptions, + RuntimeState as ToolRuntimeState, + StopCondition as ToolStopCondition, + ToolExecution, +} from "./tool-runtime" + +export * as LLM from "./llm" +export type { + Definition as ProviderDefinition, + ModelFactory as ProviderModelFactory, + ModelOptions as ProviderModelOptions, +} from "./provider" diff --git a/packages/llm/src/llm.ts b/packages/llm/src/llm.ts new file mode 100644 index 000000000000..21d88302ed75 --- /dev/null +++ b/packages/llm/src/llm.ts @@ -0,0 +1,224 @@ +import { Effect, JsonSchema, Schema } from "effect" +import { + LLMClient, + modelLimits, + modelRef, + type ModelRefInput, +} from "./route/client" +import { + GenerationOptions, + HttpOptions, + InvalidProviderOutputReason, + LLMError, + LLMEvent, + LLMRequest, + LLMResponse, + Message, + SystemPart, + ToolChoice, + ToolDefinition, + type ContentPart, + ToolCallPart, + ToolResultPart, +} from "./schema" +import { make as makeTool, type ToolSchema } from "./tool" + +export type ModelInput = ModelRefInput + +export type MessageInput = Message.Input + +export type ToolChoiceInput = ToolChoice.Input +export type ToolChoiceMode = ToolChoice.Mode + +export type ToolResultInput = Parameters[0] + +/** Input accepted by `LLM.request`, normalized into the canonical `LLMRequest` class. */ +export type RequestInput = Omit< + ConstructorParameters[0], + "system" | "messages" | "tools" | "toolChoice" | "generation" | "http" | "providerOptions" +> & { + readonly system?: string | SystemPart | ReadonlyArray + readonly prompt?: string | ContentPart | ReadonlyArray + readonly messages?: ReadonlyArray + readonly tools?: ReadonlyArray + readonly toolChoice?: ToolChoiceInput + readonly generation?: GenerationOptions.Input + readonly providerOptions?: ConstructorParameters[0]["providerOptions"] + readonly http?: HttpOptions.Input +} + +export const limits = modelLimits + +export const text = Message.text + +export const system = SystemPart.make + +export const message = Message.make + +export const user = Message.user + +export const assistant = Message.assistant + +export const model = modelRef + +export const toolDefinition = ToolDefinition.make + +export const toolCall = ToolCallPart.make + +export const toolResult = ToolResultPart.make + +export const toolMessage = Message.tool + +export const toolChoiceName = ToolChoice.named + +export const toolChoice = ToolChoice.make + +export const generation = GenerationOptions.make + +export const generate = LLMClient.generate + +export const stream = LLMClient.stream + +export const stepCountIs = LLMClient.stepCountIs + +export const requestInput = (input: LLMRequest): RequestInput => ({ + ...LLMRequest.input(input), +}) + +export const request = (input: RequestInput) => { + const { + system: requestSystem, + prompt, + messages, + tools, + toolChoice: requestToolChoice, + generation: requestGeneration, + providerOptions: requestProviderOptions, + http: requestHttp, + ...rest + } = input + return new LLMRequest({ + ...rest, + system: SystemPart.content(requestSystem), + messages: [...(messages?.map(message) ?? []), ...(prompt === undefined ? [] : [user(prompt)])], + tools: tools?.map(toolDefinition) ?? [], + toolChoice: requestToolChoice ? toolChoice(requestToolChoice) : undefined, + generation: requestGeneration === undefined ? undefined : generation(requestGeneration), + providerOptions: requestProviderOptions, + http: requestHttp === undefined ? undefined : HttpOptions.make(requestHttp), + }) +} + +export const updateRequest = (input: LLMRequest, patch: Partial) => + request({ ...requestInput(input), ...patch }) + +const GENERATE_OBJECT_TOOL_NAME = "generate_object" + +const GENERATE_OBJECT_TOOL_DESCRIPTION = "Return the structured result by calling this tool." + +type GenerateObjectBase = Omit + +export class GenerateObjectResponse { + constructor( + readonly object: T, + readonly response: LLMResponse, + ) {} + + get events() { + return this.response.events + } + + get usage() { + return this.response.usage + } +} + +export interface GenerateObjectOptions> extends GenerateObjectBase { + readonly schema: S +} + +export interface GenerateObjectDynamicOptions extends GenerateObjectBase { + /** Raw JSON Schema object describing the expected output shape. */ + readonly jsonSchema: JsonSchema.JsonSchema +} + +const runGenerateObject = Effect.fn("LLM.generateObject")(function* ( + options: GenerateObjectBase, + tool: ReturnType, +) { + const baseRequest = request(options) + const generateRequest = LLMRequest.update(baseRequest, { + toolChoice: ToolChoice.named(GENERATE_OBJECT_TOOL_NAME), + }) + const response = yield* LLMClient.generate({ + request: generateRequest, + tools: { [GENERATE_OBJECT_TOOL_NAME]: tool }, + toolExecution: "none", + }) + const call = response.toolCalls.find( + (event) => LLMEvent.is.toolCall(event) && event.name === GENERATE_OBJECT_TOOL_NAME, + ) + if (!call || !LLMEvent.is.toolCall(call)) + return yield* new LLMError({ + module: "LLM", + method: "generateObject", + reason: new InvalidProviderOutputReason({ + message: `generateObject: model did not call the forced \`${GENERATE_OBJECT_TOOL_NAME}\` tool`, + }), + }) + const object = yield* tool._decode(call.input).pipe( + Effect.mapError( + (error) => + new LLMError({ + module: "LLM", + method: "generateObject", + reason: new InvalidProviderOutputReason({ + message: `generateObject: tool input failed schema decode: ${error.message}`, + }), + }), + ), + ) + return new GenerateObjectResponse(object, response) +}) + +/** + * Run a model and decode its output against `schema`. Works on every protocol + * because it forces a synthetic tool call internally — provider-native JSON + * modes are intentionally avoided so behaviour is uniform. + * + * Two input modes: + * + * 1. `schema: EffectSchema` — `.object` is decoded and typed as `T`. + * Decode failures surface as `LLMError`. + * 2. `jsonSchema: JsonSchema.JsonSchema` — `.object` is `unknown`. Use when + * the schema is only available at runtime (MCP, plugin manifests). Caller validates. + */ +export function generateObject>( + options: GenerateObjectOptions, +): Effect.Effect>, LLMError> +export function generateObject(options: GenerateObjectDynamicOptions): Effect.Effect, LLMError> +export function generateObject( + options: GenerateObjectOptions> | GenerateObjectDynamicOptions, +) { + if ("schema" in options) { + const { schema, ...rest } = options + return runGenerateObject( + rest, + makeTool({ + description: GENERATE_OBJECT_TOOL_DESCRIPTION, + parameters: schema, + success: Schema.Unknown as ToolSchema, + execute: () => Effect.void, + }), + ) + } + const { jsonSchema, ...rest } = options + return runGenerateObject( + rest, + makeTool({ + description: GENERATE_OBJECT_TOOL_DESCRIPTION, + jsonSchema, + execute: () => Effect.void, + }), + ) +} diff --git a/packages/llm/src/protocols/anthropic-messages.ts b/packages/llm/src/protocols/anthropic-messages.ts new file mode 100644 index 000000000000..ff2239c0d76a --- /dev/null +++ b/packages/llm/src/protocols/anthropic-messages.ts @@ -0,0 +1,592 @@ +import { Effect, Schema } from "effect" +import { Route } from "../route/client" +import { Auth } from "../route/auth" +import { Endpoint } from "../route/endpoint" +import { Framing } from "../route/framing" +import { Protocol } from "../route/protocol" +import { + Usage, + type CacheHint, + type FinishReason, + type LLMEvent, + type LLMRequest, + type ProviderMetadata, + type ToolCallPart, + type ToolDefinition, + type ToolResultPart, +} from "../schema" +import { JsonObject, optionalArray, optionalNull, ProviderShared } from "./shared" +import { ToolStream } from "./utils/tool-stream" + +const ADAPTER = "anthropic-messages" +export const DEFAULT_BASE_URL = "https://api.anthropic.com/v1" +export const PATH = "/messages" + +// ============================================================================= +// Request Body Schema +// ============================================================================= +const AnthropicCacheControl = Schema.Struct({ type: Schema.tag("ephemeral") }) + +const AnthropicTextBlock = Schema.Struct({ + type: Schema.tag("text"), + text: Schema.String, + cache_control: Schema.optional(AnthropicCacheControl), +}) +type AnthropicTextBlock = Schema.Schema.Type + +const AnthropicThinkingBlock = Schema.Struct({ + type: Schema.tag("thinking"), + thinking: Schema.String, + signature: Schema.optional(Schema.String), + cache_control: Schema.optional(AnthropicCacheControl), +}) + +const AnthropicToolUseBlock = Schema.Struct({ + type: Schema.tag("tool_use"), + id: Schema.String, + name: Schema.String, + input: Schema.Unknown, + cache_control: Schema.optional(AnthropicCacheControl), +}) +type AnthropicToolUseBlock = Schema.Schema.Type + +const AnthropicServerToolUseBlock = Schema.Struct({ + type: Schema.tag("server_tool_use"), + id: Schema.String, + name: Schema.String, + input: Schema.Unknown, + cache_control: Schema.optional(AnthropicCacheControl), +}) +type AnthropicServerToolUseBlock = Schema.Schema.Type + +// Server tool result blocks: web_search_tool_result, code_execution_tool_result, +// and web_fetch_tool_result. The provider executes the tool and inlines the +// structured result into the assistant turn — there is no client tool_result +// round-trip. We round-trip the structured `content` payload as opaque JSON so +// the next request can echo it back when continuing the conversation. +const AnthropicServerToolResultType = Schema.Literals([ + "web_search_tool_result", + "code_execution_tool_result", + "web_fetch_tool_result", +]) +type AnthropicServerToolResultType = Schema.Schema.Type + +const AnthropicServerToolResultBlock = Schema.Struct({ + type: AnthropicServerToolResultType, + tool_use_id: Schema.String, + content: Schema.Unknown, + cache_control: Schema.optional(AnthropicCacheControl), +}) +type AnthropicServerToolResultBlock = Schema.Schema.Type + +const AnthropicToolResultBlock = Schema.Struct({ + type: Schema.tag("tool_result"), + tool_use_id: Schema.String, + content: Schema.String, + is_error: Schema.optional(Schema.Boolean), + cache_control: Schema.optional(AnthropicCacheControl), +}) + +const AnthropicUserBlock = Schema.Union([AnthropicTextBlock, AnthropicToolResultBlock]) +const AnthropicAssistantBlock = Schema.Union([ + AnthropicTextBlock, + AnthropicThinkingBlock, + AnthropicToolUseBlock, + AnthropicServerToolUseBlock, + AnthropicServerToolResultBlock, +]) +type AnthropicAssistantBlock = Schema.Schema.Type +type AnthropicToolResultBlock = Schema.Schema.Type + +const AnthropicMessage = Schema.Union([ + Schema.Struct({ role: Schema.Literal("user"), content: Schema.Array(AnthropicUserBlock) }), + Schema.Struct({ role: Schema.Literal("assistant"), content: Schema.Array(AnthropicAssistantBlock) }), +]).pipe(Schema.toTaggedUnion("role")) +type AnthropicMessage = Schema.Schema.Type + +const AnthropicTool = Schema.Struct({ + name: Schema.String, + description: Schema.String, + input_schema: JsonObject, + cache_control: Schema.optional(AnthropicCacheControl), +}) +type AnthropicTool = Schema.Schema.Type + +const AnthropicToolChoice = Schema.Union([ + Schema.Struct({ type: Schema.Literals(["auto", "any"]) }), + Schema.Struct({ type: Schema.tag("tool"), name: Schema.String }), +]) + +const AnthropicThinking = Schema.Struct({ + type: Schema.tag("enabled"), + budget_tokens: Schema.Number, +}) + +const AnthropicBodyFields = { + model: Schema.String, + system: optionalArray(AnthropicTextBlock), + messages: Schema.Array(AnthropicMessage), + tools: optionalArray(AnthropicTool), + tool_choice: Schema.optional(AnthropicToolChoice), + stream: Schema.Literal(true), + max_tokens: Schema.Number, + temperature: Schema.optional(Schema.Number), + top_p: Schema.optional(Schema.Number), + top_k: Schema.optional(Schema.Number), + stop_sequences: optionalArray(Schema.String), + thinking: Schema.optional(AnthropicThinking), +} +const AnthropicMessagesBody = Schema.Struct(AnthropicBodyFields) +export type AnthropicMessagesBody = Schema.Schema.Type + +const AnthropicUsage = Schema.Struct({ + input_tokens: Schema.optional(Schema.Number), + output_tokens: Schema.optional(Schema.Number), + cache_creation_input_tokens: optionalNull(Schema.Number), + cache_read_input_tokens: optionalNull(Schema.Number), +}) +type AnthropicUsage = Schema.Schema.Type + +const AnthropicStreamBlock = Schema.Struct({ + type: Schema.String, + id: Schema.optional(Schema.String), + name: Schema.optional(Schema.String), + text: Schema.optional(Schema.String), + thinking: Schema.optional(Schema.String), + signature: Schema.optional(Schema.String), + input: Schema.optional(Schema.Unknown), + // *_tool_result blocks arrive whole as content_block_start (no streaming + // delta) with the structured payload in `content` and the originating + // server_tool_use id in `tool_use_id`. + tool_use_id: Schema.optional(Schema.String), + content: Schema.optional(Schema.Unknown), +}) + +const AnthropicStreamDelta = Schema.Struct({ + type: Schema.optional(Schema.String), + text: Schema.optional(Schema.String), + thinking: Schema.optional(Schema.String), + partial_json: Schema.optional(Schema.String), + signature: Schema.optional(Schema.String), + stop_reason: optionalNull(Schema.String), + stop_sequence: optionalNull(Schema.String), +}) + +const AnthropicEvent = Schema.Struct({ + type: Schema.String, + index: Schema.optional(Schema.Number), + message: Schema.optional(Schema.Struct({ usage: Schema.optional(AnthropicUsage) })), + content_block: Schema.optional(AnthropicStreamBlock), + delta: Schema.optional(AnthropicStreamDelta), + usage: Schema.optional(AnthropicUsage), + error: Schema.optional(Schema.Struct({ type: Schema.String, message: Schema.String })), +}) +type AnthropicEvent = Schema.Schema.Type + +interface ParserState { + readonly tools: ToolStream.State + readonly usage?: Usage +} + +const invalid = ProviderShared.invalidRequest + +// ============================================================================= +// Request Lowering +// ============================================================================= +const cacheControl = (cache: CacheHint | undefined) => + cache?.type === "ephemeral" ? { type: "ephemeral" as const } : undefined + +const anthropicMetadata = (metadata: Record): ProviderMetadata => ({ anthropic: metadata }) + +const signatureFromMetadata = (metadata: ProviderMetadata | undefined): string | undefined => { + const anthropic = metadata?.anthropic + if (!ProviderShared.isRecord(anthropic)) return undefined + return typeof anthropic.signature === "string" ? anthropic.signature : undefined +} + +const lowerTool = (tool: ToolDefinition): AnthropicTool => ({ + name: tool.name, + description: tool.description, + input_schema: tool.inputSchema, +}) + +const lowerToolChoice = (toolChoice: NonNullable) => + ProviderShared.matchToolChoice("Anthropic Messages", toolChoice, { + auto: () => ({ type: "auto" as const }), + none: () => undefined, + required: () => ({ type: "any" as const }), + tool: (name) => ({ type: "tool" as const, name }), + }) + +const lowerToolCall = (part: ToolCallPart): AnthropicToolUseBlock => ({ + type: "tool_use", + id: part.id, + name: part.name, + input: part.input, +}) + +const lowerServerToolCall = (part: ToolCallPart): AnthropicServerToolUseBlock => ({ + type: "server_tool_use", + id: part.id, + name: part.name, + input: part.input, +}) + +// Server tool result blocks are typed by name. Anthropic ships three today; +// extend this list when new server tools land. The block content is the +// structured payload returned by the provider, which we round-trip as-is. +const serverToolResultType = (name: string): AnthropicServerToolResultType | undefined => { + if (name === "web_search") return "web_search_tool_result" + if (name === "code_execution") return "code_execution_tool_result" + if (name === "web_fetch") return "web_fetch_tool_result" + return undefined +} + +const lowerServerToolResult = Effect.fn("AnthropicMessages.lowerServerToolResult")(function* (part: ToolResultPart) { + const wireType = serverToolResultType(part.name) + if (!wireType) + return yield* invalid(`Anthropic Messages does not know how to round-trip server tool result for ${part.name}`) + return { type: wireType, tool_use_id: part.id, content: part.result.value } satisfies AnthropicServerToolResultBlock +}) + +const lowerMessages = Effect.fn("AnthropicMessages.lowerMessages")(function* (request: LLMRequest) { + const messages: AnthropicMessage[] = [] + + for (const message of request.messages) { + if (message.role === "user") { + const content: AnthropicTextBlock[] = [] + for (const part of message.content) { + if (!ProviderShared.supportsContent(part, ["text"])) + return yield* ProviderShared.unsupportedContent("Anthropic Messages", "user", ["text"]) + content.push({ type: "text", text: part.text, cache_control: cacheControl(part.cache) }) + } + messages.push({ role: "user", content }) + continue + } + + if (message.role === "assistant") { + const content: AnthropicAssistantBlock[] = [] + for (const part of message.content) { + if (part.type === "text") { + content.push({ type: "text", text: part.text, cache_control: cacheControl(part.cache) }) + continue + } + if (part.type === "reasoning") { + content.push({ + type: "thinking", + thinking: part.text, + signature: part.encrypted ?? signatureFromMetadata(part.providerMetadata), + }) + continue + } + if (part.type === "tool-call") { + content.push(part.providerExecuted ? lowerServerToolCall(part) : lowerToolCall(part)) + continue + } + if (part.type === "tool-result" && part.providerExecuted) { + content.push(yield* lowerServerToolResult(part)) + continue + } + return yield* invalid( + `Anthropic Messages assistant messages only support text, reasoning, and tool-call content for now`, + ) + } + messages.push({ role: "assistant", content }) + continue + } + + const content: AnthropicToolResultBlock[] = [] + for (const part of message.content) { + if (!ProviderShared.supportsContent(part, ["tool-result"])) + return yield* ProviderShared.unsupportedContent("Anthropic Messages", "tool", ["tool-result"]) + content.push({ + type: "tool_result", + tool_use_id: part.id, + content: ProviderShared.toolResultText(part), + is_error: part.result.type === "error" ? true : undefined, + }) + } + messages.push({ role: "user", content }) + } + + return messages +}) + +const anthropicOptions = (request: LLMRequest) => request.providerOptions?.anthropic + +const lowerThinking = Effect.fn("AnthropicMessages.lowerThinking")(function* (request: LLMRequest) { + const thinking = anthropicOptions(request)?.thinking + if (!ProviderShared.isRecord(thinking) || thinking.type !== "enabled") return undefined + const budget = + typeof thinking.budgetTokens === "number" + ? thinking.budgetTokens + : typeof thinking.budget_tokens === "number" + ? thinking.budget_tokens + : undefined + if (budget === undefined) return yield* invalid("Anthropic thinking provider option requires budgetTokens") + return { type: "enabled" as const, budget_tokens: budget } +}) + +const fromRequest = Effect.fn("AnthropicMessages.fromRequest")(function* (request: LLMRequest) { + const toolChoice = request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined + const generation = request.generation + return { + model: request.model.id, + system: + request.system.length === 0 + ? undefined + : request.system.map((part) => ({ + type: "text" as const, + text: part.text, + cache_control: cacheControl(part.cache), + })), + messages: yield* lowerMessages(request), + tools: request.tools.length === 0 || request.toolChoice?.type === "none" ? undefined : request.tools.map(lowerTool), + tool_choice: toolChoice, + stream: true as const, + max_tokens: generation?.maxTokens ?? request.model.limits.output ?? 4096, + temperature: generation?.temperature, + top_p: generation?.topP, + top_k: generation?.topK, + stop_sequences: generation?.stop, + thinking: yield* lowerThinking(request), + } +}) + +// ============================================================================= +// Stream Parsing +// ============================================================================= +const mapFinishReason = (reason: string | null | undefined): FinishReason => { + if (reason === "end_turn" || reason === "stop_sequence" || reason === "pause_turn") return "stop" + if (reason === "max_tokens") return "length" + if (reason === "tool_use") return "tool-calls" + if (reason === "refusal") return "content-filter" + return "unknown" +} + +const mapUsage = (usage: AnthropicUsage | undefined): Usage | undefined => { + if (!usage) return undefined + return new Usage({ + inputTokens: usage.input_tokens, + outputTokens: usage.output_tokens, + cacheReadInputTokens: usage.cache_read_input_tokens ?? undefined, + cacheWriteInputTokens: usage.cache_creation_input_tokens ?? undefined, + totalTokens: ProviderShared.totalTokens(usage.input_tokens, usage.output_tokens, undefined), + native: usage, + }) +} + +// Anthropic emits usage on `message_start` and again on `message_delta` — the +// final delta carries the authoritative totals. Right-biased merge: each +// field prefers `right` when defined, falls back to `left`. `totalTokens` is +// recomputed from the merged input/output to stay consistent. +const mergeUsage = (left: Usage | undefined, right: Usage | undefined) => { + if (!left) return right + if (!right) return left + const inputTokens = right.inputTokens ?? left.inputTokens + const outputTokens = right.outputTokens ?? left.outputTokens + return new Usage({ + inputTokens, + outputTokens, + cacheReadInputTokens: right.cacheReadInputTokens ?? left.cacheReadInputTokens, + cacheWriteInputTokens: right.cacheWriteInputTokens ?? left.cacheWriteInputTokens, + totalTokens: ProviderShared.totalTokens(inputTokens, outputTokens, undefined), + native: { ...left.native, ...right.native }, + }) +} + +// Server tool result blocks come whole in `content_block_start` (no streaming +// delta sequence). We convert the payload to a `tool-result` event with +// `providerExecuted: true`. The runtime appends it to the assistant message +// for round-trip; downstream consumers can inspect `result.value` for the +// structured payload. +const SERVER_TOOL_RESULT_NAMES: Record = { + web_search_tool_result: "web_search", + code_execution_tool_result: "code_execution", + web_fetch_tool_result: "web_fetch", +} + +const isServerToolResultType = (type: string): type is AnthropicServerToolResultType => type in SERVER_TOOL_RESULT_NAMES + +const serverToolResultEvent = (block: NonNullable): LLMEvent | undefined => { + if (!block.type || !isServerToolResultType(block.type)) return undefined + const errorPayload = + typeof block.content === "object" && block.content !== null && "type" in block.content + ? String((block.content as Record).type) + : "" + const isError = errorPayload.endsWith("_tool_result_error") + return { + type: "tool-result", + id: block.tool_use_id ?? "", + name: SERVER_TOOL_RESULT_NAMES[block.type], + result: isError ? { type: "error", value: block.content } : { type: "json", value: block.content }, + providerExecuted: true, + providerMetadata: anthropicMetadata({ blockType: block.type }), + } +} + +type StepResult = readonly [ParserState, ReadonlyArray] + +const NO_EVENTS: StepResult["1"] = [] + +const onMessageStart = (state: ParserState, event: AnthropicEvent): StepResult => { + const usage = mapUsage(event.message?.usage) + return [usage ? { ...state, usage: mergeUsage(state.usage, usage) } : state, NO_EVENTS] +} + +const onContentBlockStart = (state: ParserState, event: AnthropicEvent): StepResult => { + const block = event.content_block + if (!block) return [state, NO_EVENTS] + + if ((block.type === "tool_use" || block.type === "server_tool_use") && event.index !== undefined) { + return [ + { + ...state, + tools: ToolStream.start(state.tools, event.index, { + id: block.id ?? String(event.index), + name: block.name ?? "", + providerExecuted: block.type === "server_tool_use", + }), + }, + NO_EVENTS, + ] + } + + if (block.type === "text" && block.text) { + return [state, [{ type: "text-delta", text: block.text }]] + } + + if (block.type === "thinking" && block.thinking) { + return [ + state, + [ + { + type: "reasoning-delta", + text: block.thinking, + ...(block.signature ? { providerMetadata: anthropicMetadata({ signature: block.signature }) } : {}), + }, + ], + ] + } + + const result = serverToolResultEvent(block) + return [state, result ? [result] : NO_EVENTS] +} + +const onContentBlockDelta = Effect.fn("AnthropicMessages.onContentBlockDelta")(function* ( + state: ParserState, + event: AnthropicEvent, +) { + const delta = event.delta + + if (delta?.type === "text_delta" && delta.text) { + return [state, [{ type: "text-delta", text: delta.text }]] satisfies StepResult + } + + if (delta?.type === "thinking_delta" && delta.thinking) { + return [state, [{ type: "reasoning-delta", text: delta.thinking }]] satisfies StepResult + } + + if (delta?.type === "signature_delta" && delta.signature) { + return [ + state, + [{ type: "reasoning-delta", text: "", providerMetadata: anthropicMetadata({ signature: delta.signature }) }], + ] satisfies StepResult + } + + if (delta?.type === "input_json_delta" && event.index !== undefined) { + if (!delta.partial_json) return [state, NO_EVENTS] satisfies StepResult + const result = ToolStream.appendExisting( + ADAPTER, + state.tools, + event.index, + delta.partial_json, + "Anthropic Messages tool argument delta is missing its tool call", + ) + if (ToolStream.isError(result)) return yield* result + return [{ ...state, tools: result.tools }, result.event ? [result.event] : NO_EVENTS] satisfies StepResult + } + + return [state, NO_EVENTS] satisfies StepResult +}) + +const onContentBlockStop = Effect.fn("AnthropicMessages.onContentBlockStop")(function* ( + state: ParserState, + event: AnthropicEvent, +) { + if (event.index === undefined) return [state, NO_EVENTS] satisfies StepResult + const result = yield* ToolStream.finish(ADAPTER, state.tools, event.index) + return [{ ...state, tools: result.tools }, result.event ? [result.event] : NO_EVENTS] satisfies StepResult +}) + +const onMessageDelta = (state: ParserState, event: AnthropicEvent): StepResult => { + const usage = mergeUsage(state.usage, mapUsage(event.usage)) + return [ + { ...state, usage }, + [ + { + type: "request-finish", + reason: mapFinishReason(event.delta?.stop_reason), + usage, + ...(event.delta?.stop_sequence + ? { providerMetadata: anthropicMetadata({ stopSequence: event.delta.stop_sequence }) } + : {}), + }, + ], + ] +} + +const onError = (state: ParserState, event: AnthropicEvent): StepResult => [ + state, + [{ type: "provider-error", message: event.error?.message ?? "Anthropic Messages stream error" }], +] + +const step = (state: ParserState, event: AnthropicEvent) => { + if (event.type === "message_start") return Effect.succeed(onMessageStart(state, event)) + if (event.type === "content_block_start") return Effect.succeed(onContentBlockStart(state, event)) + if (event.type === "content_block_delta") return onContentBlockDelta(state, event) + if (event.type === "content_block_stop") return onContentBlockStop(state, event) + if (event.type === "message_delta") return Effect.succeed(onMessageDelta(state, event)) + if (event.type === "error") return Effect.succeed(onError(state, event)) + return Effect.succeed([state, NO_EVENTS]) +} + +// ============================================================================= +// Protocol And Anthropic Route +// ============================================================================= +/** + * The Anthropic Messages protocol — request body construction, body schema, + * and the streaming-event state machine. Used by native Anthropic Cloud and + * (once registered) Vertex Anthropic / Bedrock-hosted Anthropic passthrough. + */ +export const protocol = Protocol.make({ + id: ADAPTER, + body: { + schema: AnthropicMessagesBody, + from: fromRequest, + }, + stream: { + event: Protocol.jsonEvent(AnthropicEvent), + initial: () => ({ tools: ToolStream.empty() }), + step, + }, +}) + +export const route = Route.make({ + id: ADAPTER, + protocol, + endpoint: Endpoint.path(PATH), + auth: Auth.apiKeyHeader("x-api-key"), + framing: Framing.sse, + headers: () => ({ "anthropic-version": "2023-06-01" }), +}) + +// ============================================================================= +// Model Helper +// ============================================================================= +export const model = Route.model(route, { + provider: "anthropic", + baseURL: DEFAULT_BASE_URL, +}) + +export * as AnthropicMessages from "./anthropic-messages" diff --git a/packages/llm/src/protocols/bedrock-converse.ts b/packages/llm/src/protocols/bedrock-converse.ts new file mode 100644 index 000000000000..09176104dfbc --- /dev/null +++ b/packages/llm/src/protocols/bedrock-converse.ts @@ -0,0 +1,531 @@ +import { Effect, Schema } from "effect" +import { Route, type RouteModelInput } from "../route/client" +import { Endpoint } from "../route/endpoint" +import { Protocol } from "../route/protocol" +import { + Usage, + type CacheHint, + type FinishReason, + type LLMEvent, + type LLMRequest, + type ToolCallPart, + type ToolDefinition, + type ToolResultPart, +} from "../schema" +import { BedrockEventStream } from "./bedrock-event-stream" +import { JsonObject, optionalArray, ProviderShared } from "./shared" +import { BedrockAuth, type Credentials as BedrockCredentials } from "./utils/bedrock-auth" +import { BedrockCache } from "./utils/bedrock-cache" +import { BedrockMedia } from "./utils/bedrock-media" +import { ToolStream } from "./utils/tool-stream" + +const ADAPTER = "bedrock-converse" + +export type { Credentials as BedrockCredentials } from "./utils/bedrock-auth" + +// ============================================================================= +// Public Model Input +// ============================================================================= +export type BedrockConverseModelInput = RouteModelInput & { + /** + * Bearer API key (Bedrock's newer API key auth). Sets the `Authorization` + * header and bypasses SigV4 signing. Mutually exclusive with `credentials`. + */ + readonly apiKey?: string + /** + * AWS credentials for SigV4 signing. The route signs each request at + * `toHttp` time using `aws4fetch`. Mutually exclusive with `apiKey`. + */ + readonly credentials?: BedrockCredentials + readonly headers?: Record +} + +// ============================================================================= +// Request Body Schema +// ============================================================================= +const BedrockTextBlock = Schema.Struct({ + text: Schema.String, +}) +type BedrockTextBlock = Schema.Schema.Type + +const BedrockToolUseBlock = Schema.Struct({ + toolUse: Schema.Struct({ + toolUseId: Schema.String, + name: Schema.String, + input: Schema.Unknown, + }), +}) +type BedrockToolUseBlock = Schema.Schema.Type + +const BedrockToolResultContentItem = Schema.Union([ + Schema.Struct({ text: Schema.String }), + Schema.Struct({ json: Schema.Unknown }), +]) + +const BedrockToolResultBlock = Schema.Struct({ + toolResult: Schema.Struct({ + toolUseId: Schema.String, + content: Schema.Array(BedrockToolResultContentItem), + status: Schema.optional(Schema.Literals(["success", "error"])), + }), +}) +type BedrockToolResultBlock = Schema.Schema.Type + +const BedrockReasoningBlock = Schema.Struct({ + reasoningContent: Schema.Struct({ + reasoningText: Schema.optional( + Schema.Struct({ + text: Schema.String, + signature: Schema.optional(Schema.String), + }), + ), + }), +}) + +const BedrockUserBlock = Schema.Union([ + BedrockTextBlock, + BedrockMedia.ImageBlock, + BedrockMedia.DocumentBlock, + BedrockToolResultBlock, + BedrockCache.CachePointBlock, +]) +type BedrockUserBlock = Schema.Schema.Type + +const BedrockAssistantBlock = Schema.Union([ + BedrockTextBlock, + BedrockReasoningBlock, + BedrockToolUseBlock, + BedrockCache.CachePointBlock, +]) +type BedrockAssistantBlock = Schema.Schema.Type + +const BedrockMessage = Schema.Union([ + Schema.Struct({ role: Schema.Literal("user"), content: Schema.Array(BedrockUserBlock) }), + Schema.Struct({ role: Schema.Literal("assistant"), content: Schema.Array(BedrockAssistantBlock) }), +]).pipe(Schema.toTaggedUnion("role")) +type BedrockMessage = Schema.Schema.Type + +const BedrockSystemBlock = Schema.Union([BedrockTextBlock, BedrockCache.CachePointBlock]) +type BedrockSystemBlock = Schema.Schema.Type + +const BedrockTool = Schema.Struct({ + toolSpec: Schema.Struct({ + name: Schema.String, + description: Schema.String, + inputSchema: Schema.Struct({ + json: JsonObject, + }), + }), +}) +type BedrockTool = Schema.Schema.Type + +const BedrockToolChoice = Schema.Union([ + Schema.Struct({ auto: Schema.Struct({}) }), + Schema.Struct({ any: Schema.Struct({}) }), + Schema.Struct({ tool: Schema.Struct({ name: Schema.String }) }), +]) + +const BedrockBodyFields = { + modelId: Schema.String, + messages: Schema.Array(BedrockMessage), + system: optionalArray(BedrockSystemBlock), + inferenceConfig: Schema.optional( + Schema.Struct({ + maxTokens: Schema.optional(Schema.Number), + temperature: Schema.optional(Schema.Number), + topP: Schema.optional(Schema.Number), + stopSequences: optionalArray(Schema.String), + }), + ), + toolConfig: Schema.optional( + Schema.Struct({ + tools: Schema.Array(BedrockTool), + toolChoice: Schema.optional(BedrockToolChoice), + }), + ), + additionalModelRequestFields: Schema.optional(JsonObject), +} +const BedrockConverseBody = Schema.Struct(BedrockBodyFields) +export type BedrockConverseBody = Schema.Schema.Type + +const BedrockUsageSchema = Schema.Struct({ + inputTokens: Schema.optional(Schema.Number), + outputTokens: Schema.optional(Schema.Number), + totalTokens: Schema.optional(Schema.Number), + cacheReadInputTokens: Schema.optional(Schema.Number), + cacheWriteInputTokens: Schema.optional(Schema.Number), +}) +type BedrockUsageSchema = Schema.Schema.Type + +// Streaming event shape — the AWS event stream wraps each JSON payload by its +// `:event-type` header (e.g. `messageStart`, `contentBlockDelta`). We +// reconstruct that wrapping in `decodeFrames` below so the event schema can +// stay a plain discriminated record. +const BedrockEvent = Schema.Struct({ + messageStart: Schema.optional(Schema.Struct({ role: Schema.String })), + contentBlockStart: Schema.optional( + Schema.Struct({ + contentBlockIndex: Schema.Number, + start: Schema.optional( + Schema.Struct({ + toolUse: Schema.optional(Schema.Struct({ toolUseId: Schema.String, name: Schema.String })), + }), + ), + }), + ), + contentBlockDelta: Schema.optional( + Schema.Struct({ + contentBlockIndex: Schema.Number, + delta: Schema.optional( + Schema.Struct({ + text: Schema.optional(Schema.String), + toolUse: Schema.optional(Schema.Struct({ input: Schema.String })), + reasoningContent: Schema.optional( + Schema.Struct({ + text: Schema.optional(Schema.String), + signature: Schema.optional(Schema.String), + }), + ), + }), + ), + }), + ), + contentBlockStop: Schema.optional(Schema.Struct({ contentBlockIndex: Schema.Number })), + messageStop: Schema.optional( + Schema.Struct({ + stopReason: Schema.String, + additionalModelResponseFields: Schema.optional(Schema.Unknown), + }), + ), + metadata: Schema.optional( + Schema.Struct({ + usage: Schema.optional(BedrockUsageSchema), + metrics: Schema.optional(Schema.Unknown), + }), + ), + internalServerException: Schema.optional(Schema.Struct({ message: Schema.String })), + modelStreamErrorException: Schema.optional(Schema.Struct({ message: Schema.String })), + validationException: Schema.optional(Schema.Struct({ message: Schema.String })), + throttlingException: Schema.optional(Schema.Struct({ message: Schema.String })), + serviceUnavailableException: Schema.optional(Schema.Struct({ message: Schema.String })), +}) +type BedrockEvent = Schema.Schema.Type + +// ============================================================================= +// Request Lowering +// ============================================================================= +const lowerTool = (tool: ToolDefinition): BedrockTool => ({ + toolSpec: { + name: tool.name, + description: tool.description, + inputSchema: { json: tool.inputSchema }, + }, +}) + +const textWithCache = ( + text: string, + cache: CacheHint | undefined, +): Array => { + const cachePoint = BedrockCache.block(cache) + return cachePoint ? [{ text }, cachePoint] : [{ text }] +} + +const lowerToolChoice = (toolChoice: NonNullable) => + ProviderShared.matchToolChoice("Bedrock Converse", toolChoice, { + auto: () => ({ auto: {} }) as const, + none: () => undefined, + required: () => ({ any: {} }) as const, + tool: (name) => ({ tool: { name } }) as const, + }) + +const lowerToolCall = (part: ToolCallPart): BedrockToolUseBlock => ({ + toolUse: { + toolUseId: part.id, + name: part.name, + input: part.input, + }, +}) + +const lowerToolResult = (part: ToolResultPart): BedrockToolResultBlock => ({ + toolResult: { + toolUseId: part.id, + content: + part.result.type === "text" || part.result.type === "error" + ? [{ text: ProviderShared.toolResultText(part) }] + : [{ json: part.result.value }], + status: part.result.type === "error" ? "error" : "success", + }, +}) + +const lowerMessages = Effect.fn("BedrockConverse.lowerMessages")(function* (request: LLMRequest) { + const messages: BedrockMessage[] = [] + + for (const message of request.messages) { + if (message.role === "user") { + const content: BedrockUserBlock[] = [] + for (const part of message.content) { + if (!ProviderShared.supportsContent(part, ["text", "media"])) + return yield* ProviderShared.unsupportedContent("Bedrock Converse", "user", ["text", "media"]) + if (part.type === "text") { + content.push(...textWithCache(part.text, part.cache)) + continue + } + if (part.type === "media") { + content.push(yield* BedrockMedia.lower(part)) + continue + } + } + messages.push({ role: "user", content }) + continue + } + + if (message.role === "assistant") { + const content: BedrockAssistantBlock[] = [] + for (const part of message.content) { + if (!ProviderShared.supportsContent(part, ["text", "reasoning", "tool-call"])) + return yield* ProviderShared.unsupportedContent("Bedrock Converse", "assistant", [ + "text", + "reasoning", + "tool-call", + ]) + if (part.type === "text") { + content.push(...textWithCache(part.text, part.cache)) + continue + } + if (part.type === "reasoning") { + content.push({ + reasoningContent: { + reasoningText: { text: part.text, signature: part.encrypted }, + }, + }) + continue + } + if (part.type === "tool-call") { + content.push(lowerToolCall(part)) + continue + } + } + messages.push({ role: "assistant", content }) + continue + } + + const content: BedrockToolResultBlock[] = [] + for (const part of message.content) { + if (!ProviderShared.supportsContent(part, ["tool-result"])) + return yield* ProviderShared.unsupportedContent("Bedrock Converse", "tool", ["tool-result"]) + content.push(lowerToolResult(part)) + } + messages.push({ role: "user", content }) + } + + return messages +}) + +// System prompts share the cache-point convention: emit the text block, then +// optionally a positional `cachePoint` marker. +const lowerSystem = (system: ReadonlyArray): BedrockSystemBlock[] => + system.flatMap((part) => textWithCache(part.text, part.cache)) + +const fromRequest = Effect.fn("BedrockConverse.fromRequest")(function* (request: LLMRequest) { + const toolChoice = request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined + const generation = request.generation + return { + modelId: request.model.id, + messages: yield* lowerMessages(request), + system: request.system.length === 0 ? undefined : lowerSystem(request.system), + inferenceConfig: + generation?.maxTokens === undefined && + generation?.temperature === undefined && + generation?.topP === undefined && + (generation?.stop === undefined || generation.stop.length === 0) + ? undefined + : { + maxTokens: generation?.maxTokens, + temperature: generation?.temperature, + topP: generation?.topP, + stopSequences: generation?.stop, + }, + toolConfig: + request.tools.length > 0 && request.toolChoice?.type !== "none" + ? { tools: request.tools.map(lowerTool), toolChoice } + : undefined, + } +}) + +// ============================================================================= +// Stream Parsing +// ============================================================================= +const mapFinishReason = (reason: string): FinishReason => { + if (reason === "end_turn" || reason === "stop_sequence") return "stop" + if (reason === "max_tokens") return "length" + if (reason === "tool_use") return "tool-calls" + if (reason === "content_filtered" || reason === "guardrail_intervened") return "content-filter" + return "unknown" +} + +const mapUsage = (usage: BedrockUsageSchema | undefined): Usage | undefined => { + if (!usage) return undefined + return new Usage({ + inputTokens: usage.inputTokens, + outputTokens: usage.outputTokens, + totalTokens: ProviderShared.totalTokens(usage.inputTokens, usage.outputTokens, usage.totalTokens), + cacheReadInputTokens: usage.cacheReadInputTokens, + cacheWriteInputTokens: usage.cacheWriteInputTokens, + native: usage, + }) +} + +interface ParserState { + readonly tools: ToolStream.State + // Bedrock splits the finish into `messageStop` (carries `stopReason`) and + // `metadata` (carries usage). Hold the terminal event in state so `onHalt` + // can emit exactly one finish after both chunks have had a chance to arrive. + readonly pendingFinish: { readonly reason: FinishReason; readonly usage?: Usage } | undefined +} + +const step = (state: ParserState, event: BedrockEvent) => + Effect.gen(function* () { + if (event.contentBlockStart?.start?.toolUse) { + const index = event.contentBlockStart.contentBlockIndex + return [ + { + ...state, + tools: ToolStream.start(state.tools, index, { + id: event.contentBlockStart.start.toolUse.toolUseId, + name: event.contentBlockStart.start.toolUse.name, + }), + }, + [], + ] as const + } + + if (event.contentBlockDelta?.delta?.text) { + return [state, [{ type: "text-delta" as const, text: event.contentBlockDelta.delta.text }]] as const + } + + if (event.contentBlockDelta?.delta?.reasoningContent?.text) { + return [ + state, + [{ type: "reasoning-delta" as const, text: event.contentBlockDelta.delta.reasoningContent.text }], + ] as const + } + + if (event.contentBlockDelta?.delta?.toolUse) { + const index = event.contentBlockDelta.contentBlockIndex + const result = ToolStream.appendExisting( + ADAPTER, + state.tools, + index, + event.contentBlockDelta.delta.toolUse.input, + "Bedrock Converse tool delta is missing its tool call", + ) + if (ToolStream.isError(result)) return yield* result + return [{ ...state, tools: result.tools }, result.event ? [result.event] : []] as const + } + + if (event.contentBlockStop) { + const result = yield* ToolStream.finish(ADAPTER, state.tools, event.contentBlockStop.contentBlockIndex) + return [{ ...state, tools: result.tools }, result.event ? [result.event] : []] as const + } + + if (event.messageStop) { + return [ + { + ...state, + pendingFinish: { reason: mapFinishReason(event.messageStop.stopReason), usage: state.pendingFinish?.usage }, + }, + [], + ] as const + } + + if (event.metadata) { + const usage = mapUsage(event.metadata.usage) + return [{ ...state, pendingFinish: { reason: state.pendingFinish?.reason ?? "stop", usage } }, []] as const + } + + if (event.internalServerException || event.modelStreamErrorException || event.serviceUnavailableException) { + const message = + event.internalServerException?.message ?? + event.modelStreamErrorException?.message ?? + event.serviceUnavailableException?.message ?? + "Bedrock Converse stream error" + return [state, [{ type: "provider-error" as const, message, retryable: true }]] as const + } + + if (event.validationException || event.throttlingException) { + const message = + event.validationException?.message ?? event.throttlingException?.message ?? "Bedrock Converse error" + return [ + state, + [{ type: "provider-error" as const, message, retryable: event.throttlingException !== undefined }], + ] as const + } + + return [state, []] as const + }) + +const framing = BedrockEventStream.framing(ADAPTER) + +const onHalt = (state: ParserState): ReadonlyArray => + state.pendingFinish + ? [{ type: "request-finish", reason: state.pendingFinish.reason, usage: state.pendingFinish.usage }] + : [] + +// ============================================================================= +// Protocol And Bedrock Route +// ============================================================================= +/** + * The Bedrock Converse protocol — request body construction, body schema, and + * the streaming-event state machine. + */ +export const protocol = Protocol.make({ + id: ADAPTER, + body: { + schema: BedrockConverseBody, + from: fromRequest, + }, + stream: { + event: BedrockEvent, + initial: () => ({ tools: ToolStream.empty(), pendingFinish: undefined }), + step, + onHalt, + }, +}) + +export const route = Route.make({ + id: ADAPTER, + protocol, + // Bedrock's URL embeds the region in the host (set on `model.baseURL` by + // the provider helper from credentials) and the validated modelId in the + // path. We read the validated body so the URL matches the body that gets + // signed. + endpoint: Endpoint.path( + ({ body }) => `/model/${encodeURIComponent(body.modelId)}/converse-stream`, + ), + auth: BedrockAuth.auth, + framing, +}) + +export const nativeCredentials = BedrockAuth.nativeCredentials + +const bedrockModel = Route.model( + route, + { + provider: "bedrock", + }, + { + mapInput: (input: BedrockConverseModelInput) => { + const { credentials, ...rest } = input + const region = credentials?.region ?? "us-east-1" + return { + ...rest, + baseURL: rest.baseURL ?? `https://bedrock-runtime.${region}.amazonaws.com`, + native: nativeCredentials(input.native, credentials), + } + }, + }, +) + +export const model = bedrockModel + +export * as BedrockConverse from "./bedrock-converse" diff --git a/packages/llm/src/protocols/bedrock-event-stream.ts b/packages/llm/src/protocols/bedrock-event-stream.ts new file mode 100644 index 000000000000..d07d7de47599 --- /dev/null +++ b/packages/llm/src/protocols/bedrock-event-stream.ts @@ -0,0 +1,87 @@ +import { EventStreamCodec } from "@smithy/eventstream-codec" +import { fromUtf8, toUtf8 } from "@smithy/util-utf8" +import { Effect, Stream } from "effect" +import type { Framing } from "../route/framing" +import { ProviderShared } from "./shared" + +// Bedrock streams responses using the AWS event stream binary protocol — each +// frame is `[length:4][headers-length:4][prelude-crc:4][headers][payload][crc:4]`. +// We use `@smithy/eventstream-codec` to validate framing and CRCs, then +// reconstruct the JSON wrapping by `:event-type` so the chunk schema can match. +const eventCodec = new EventStreamCodec(toUtf8, fromUtf8) +const utf8 = new TextDecoder() + +// Cursor-tracking buffer state. Bytes accumulate in `buffer`; `offset` is the +// read position. Reading by `subarray` is zero-copy. We only allocate a fresh +// buffer when a new network chunk arrives and we need to append. +interface FrameBufferState { + readonly buffer: Uint8Array + readonly offset: number +} + +const initialFrameBuffer: FrameBufferState = { buffer: new Uint8Array(0), offset: 0 } + +const appendChunk = (state: FrameBufferState, chunk: Uint8Array): FrameBufferState => { + const remaining = state.buffer.length - state.offset + // Compact: drop the consumed prefix and append the new chunk in one alloc. + // This bounds buffer growth to at most one network chunk past the live + // window, regardless of stream length. + const next = new Uint8Array(remaining + chunk.length) + next.set(state.buffer.subarray(state.offset), 0) + next.set(chunk, remaining) + return { buffer: next, offset: 0 } +} + +const consumeFrames = (route: string) => (state: FrameBufferState, chunk: Uint8Array) => + Effect.gen(function* () { + let cursor = appendChunk(state, chunk) + const out: object[] = [] + while (cursor.buffer.length - cursor.offset >= 4) { + const view = cursor.buffer.subarray(cursor.offset) + const totalLength = new DataView(view.buffer, view.byteOffset, view.byteLength).getUint32(0, false) + if (view.length < totalLength) break + + const decoded = yield* Effect.try({ + try: () => eventCodec.decode(view.subarray(0, totalLength)), + catch: (error) => + ProviderShared.eventError( + route, + `Failed to decode Bedrock Converse event-stream frame: ${ + error instanceof Error ? error.message : String(error) + }`, + ), + }) + cursor = { buffer: cursor.buffer, offset: cursor.offset + totalLength } + + if (decoded.headers[":message-type"]?.value !== "event") continue + const eventType = decoded.headers[":event-type"]?.value + if (typeof eventType !== "string") continue + const payload = utf8.decode(decoded.body) + if (!payload) continue + // The AWS event stream pads short payloads with a `p` field. Drop it + // before handing the object to the chunk schema. JSON decode goes + // through the shared Schema-driven codec to satisfy the package rule + // against ad-hoc `JSON.parse` calls. + const parsed = (yield* ProviderShared.parseJson( + route, + payload, + "Failed to parse Bedrock Converse event-stream payload", + )) as Record + delete parsed.p + out.push({ [eventType]: parsed }) + } + return [cursor, out] as const + }) + +/** + * AWS event-stream framing for Bedrock Converse. Each frame is decoded by + * `@smithy/eventstream-codec` (length + header + payload + CRC) and rewrapped + * under its `:event-type` header so the chunk schema can match the JSON + * payload directly. + */ +export const framing = (route: string): Framing => ({ + id: "aws-event-stream", + frame: (bytes) => bytes.pipe(Stream.mapAccumEffect(() => initialFrameBuffer, consumeFrames(route))), +}) + +export * as BedrockEventStream from "./bedrock-event-stream" diff --git a/packages/llm/src/protocols/gemini.ts b/packages/llm/src/protocols/gemini.ts new file mode 100644 index 000000000000..0d2bdc8e1497 --- /dev/null +++ b/packages/llm/src/protocols/gemini.ts @@ -0,0 +1,397 @@ +import { Effect, Schema } from "effect" +import { Route } from "../route/client" +import { Auth } from "../route/auth" +import { Endpoint } from "../route/endpoint" +import { Framing } from "../route/framing" +import { Protocol } from "../route/protocol" +import { + Usage, + type FinishReason, + type LLMEvent, + type LLMRequest, + type MediaPart, + type TextPart, + type ToolCallPart, + type ToolDefinition, +} from "../schema" +import { JsonObject, optionalArray, ProviderShared } from "./shared" +import { GeminiToolSchema } from "./utils/gemini-tool-schema" + +const ADAPTER = "gemini" +export const DEFAULT_BASE_URL = "https://generativelanguage.googleapis.com/v1beta" + +// ============================================================================= +// Request Body Schema +// ============================================================================= +const GeminiTextPart = Schema.Struct({ + text: Schema.String, + thought: Schema.optional(Schema.Boolean), + thoughtSignature: Schema.optional(Schema.String), +}) + +const GeminiInlineDataPart = Schema.Struct({ + inlineData: Schema.Struct({ + mimeType: Schema.String, + data: Schema.String, + }), +}) + +const GeminiFunctionCallPart = Schema.Struct({ + functionCall: Schema.Struct({ + name: Schema.String, + args: Schema.Unknown, + }), + thoughtSignature: Schema.optional(Schema.String), +}) + +const GeminiFunctionResponsePart = Schema.Struct({ + functionResponse: Schema.Struct({ + name: Schema.String, + response: Schema.Unknown, + }), +}) + +const GeminiContentPart = Schema.Union([ + GeminiTextPart, + GeminiInlineDataPart, + GeminiFunctionCallPart, + GeminiFunctionResponsePart, +]) + +const GeminiContent = Schema.Struct({ + role: Schema.Literals(["user", "model"]), + parts: Schema.Array(GeminiContentPart), +}) +type GeminiContent = Schema.Schema.Type + +const GeminiSystemInstruction = Schema.Struct({ + parts: Schema.Array(Schema.Struct({ text: Schema.String })), +}) + +const GeminiFunctionDeclaration = Schema.Struct({ + name: Schema.String, + description: Schema.String, + parameters: Schema.optional(JsonObject), +}) + +const GeminiTool = Schema.Struct({ + functionDeclarations: Schema.Array(GeminiFunctionDeclaration), +}) + +const GeminiToolConfig = Schema.Struct({ + functionCallingConfig: Schema.Struct({ + mode: Schema.Literals(["AUTO", "NONE", "ANY"]), + allowedFunctionNames: optionalArray(Schema.String), + }), +}) + +const GeminiThinkingConfig = Schema.Struct({ + thinkingBudget: Schema.optional(Schema.Number), + includeThoughts: Schema.optional(Schema.Boolean), +}) + +const GeminiGenerationConfig = Schema.Struct({ + maxOutputTokens: Schema.optional(Schema.Number), + temperature: Schema.optional(Schema.Number), + topP: Schema.optional(Schema.Number), + topK: Schema.optional(Schema.Number), + stopSequences: optionalArray(Schema.String), + thinkingConfig: Schema.optional(GeminiThinkingConfig), +}) + +const GeminiBodyFields = { + contents: Schema.Array(GeminiContent), + systemInstruction: Schema.optional(GeminiSystemInstruction), + tools: optionalArray(GeminiTool), + toolConfig: Schema.optional(GeminiToolConfig), + generationConfig: Schema.optional(GeminiGenerationConfig), +} +const GeminiBody = Schema.Struct(GeminiBodyFields) +export type GeminiBody = Schema.Schema.Type + +const GeminiUsage = Schema.Struct({ + cachedContentTokenCount: Schema.optional(Schema.Number), + thoughtsTokenCount: Schema.optional(Schema.Number), + promptTokenCount: Schema.optional(Schema.Number), + candidatesTokenCount: Schema.optional(Schema.Number), + totalTokenCount: Schema.optional(Schema.Number), +}) +type GeminiUsage = Schema.Schema.Type + +const GeminiCandidate = Schema.Struct({ + content: Schema.optional(GeminiContent), + finishReason: Schema.optional(Schema.String), +}) + +const GeminiEvent = Schema.Struct({ + candidates: optionalArray(GeminiCandidate), + usageMetadata: Schema.optional(GeminiUsage), +}) +type GeminiEvent = Schema.Schema.Type + +interface ParserState { + readonly finishReason?: string + readonly hasToolCalls: boolean + readonly nextToolCallId: number + readonly usage?: Usage +} + +const invalid = ProviderShared.invalidRequest + +const mediaData = ProviderShared.mediaBytes + +// ============================================================================= +// Tool Schema Conversion +// ============================================================================= +// Tool-schema conversion has two distinct concerns: +// +// 1. Sanitize — fix common authoring mistakes Gemini rejects: integer/number +// enums (must be strings), `required` entries that don't match a property, +// untyped arrays (`items` must be present), and `properties`/`required` +// keys on non-object scalars. Mirrors OpenCode's historical Gemini rules. +// +// 2. Project — lossy mapping from JSON Schema to Gemini's schema dialect: +// drop empty objects, derive `nullable: true` from `type: [..., "null"]`, +// coerce `const` to `[const]` enum, recurse properties/items, propagate +// only an allowlisted set of keys (description, required, format, type, +// properties, items, allOf, anyOf, oneOf, minLength). Anything outside the +// allowlist (e.g. `additionalProperties`, `$ref`) is silently dropped. +// +// Sanitize runs first, then project. The implementation lives in +// `utils/gemini-tool-schema` so this protocol keeps the same shape as the other +// provider protocols. + +// ============================================================================= +// Request Lowering +// ============================================================================= +const lowerTool = (tool: ToolDefinition) => ({ + name: tool.name, + description: tool.description, + parameters: GeminiToolSchema.convert(tool.inputSchema), +}) + +const lowerToolConfig = (toolChoice: NonNullable) => + ProviderShared.matchToolChoice("Gemini", toolChoice, { + auto: () => ({ functionCallingConfig: { mode: "AUTO" as const } }), + none: () => ({ functionCallingConfig: { mode: "NONE" as const } }), + required: () => ({ functionCallingConfig: { mode: "ANY" as const } }), + tool: (name) => ({ functionCallingConfig: { mode: "ANY" as const, allowedFunctionNames: [name] } }), + }) + +const lowerUserPart = (part: TextPart | MediaPart) => + part.type === "text" ? { text: part.text } : { inlineData: { mimeType: part.mediaType, data: mediaData(part) } } + +const lowerToolCall = (part: ToolCallPart) => ({ + functionCall: { name: part.name, args: part.input }, +}) + +const lowerMessages = Effect.fn("Gemini.lowerMessages")(function* (request: LLMRequest) { + const contents: GeminiContent[] = [] + + for (const message of request.messages) { + if (message.role === "user") { + const parts: Array> = [] + for (const part of message.content) { + if (!ProviderShared.supportsContent(part, ["text", "media"])) + return yield* ProviderShared.unsupportedContent("Gemini", "user", ["text", "media"]) + parts.push(lowerUserPart(part)) + } + contents.push({ role: "user", parts }) + continue + } + + if (message.role === "assistant") { + const parts: Array> = [] + for (const part of message.content) { + if (!ProviderShared.supportsContent(part, ["text", "reasoning", "tool-call"])) + return yield* ProviderShared.unsupportedContent("Gemini", "assistant", ["text", "reasoning", "tool-call"]) + if (part.type === "text") { + parts.push({ text: part.text }) + continue + } + if (part.type === "reasoning") { + parts.push({ text: part.text, thought: true }) + continue + } + if (part.type === "tool-call") { + parts.push(lowerToolCall(part)) + continue + } + } + contents.push({ role: "model", parts }) + continue + } + + const parts: Array> = [] + for (const part of message.content) { + if (!ProviderShared.supportsContent(part, ["tool-result"])) + return yield* ProviderShared.unsupportedContent("Gemini", "tool", ["tool-result"]) + parts.push({ + functionResponse: { + name: part.name, + response: { + name: part.name, + content: ProviderShared.toolResultText(part), + }, + }, + }) + } + contents.push({ role: "user", parts }) + } + + return contents +}) + +const geminiOptions = (request: LLMRequest) => request.providerOptions?.gemini + +const thinkingConfig = (request: LLMRequest) => { + const value = geminiOptions(request)?.thinkingConfig + if (!ProviderShared.isRecord(value)) return undefined + const result = { + thinkingBudget: typeof value.thinkingBudget === "number" ? value.thinkingBudget : undefined, + includeThoughts: typeof value.includeThoughts === "boolean" ? value.includeThoughts : undefined, + } + return Object.values(result).some((item) => item !== undefined) ? result : undefined +} + +const fromRequest = Effect.fn("Gemini.fromRequest")(function* (request: LLMRequest) { + const toolsEnabled = request.tools.length > 0 && request.toolChoice?.type !== "none" + const generation = request.generation + const generationConfig = { + maxOutputTokens: generation?.maxTokens, + temperature: generation?.temperature, + topP: generation?.topP, + topK: generation?.topK, + stopSequences: generation?.stop, + thinkingConfig: thinkingConfig(request), + } + + return { + contents: yield* lowerMessages(request), + systemInstruction: + request.system.length === 0 ? undefined : { parts: [{ text: ProviderShared.joinText(request.system) }] }, + tools: toolsEnabled ? [{ functionDeclarations: request.tools.map(lowerTool) }] : undefined, + toolConfig: toolsEnabled && request.toolChoice ? yield* lowerToolConfig(request.toolChoice) : undefined, + generationConfig: Object.values(generationConfig).some((value) => value !== undefined) + ? generationConfig + : undefined, + } +}) + +// ============================================================================= +// Stream Parsing +// ============================================================================= +const mapUsage = (usage: GeminiUsage | undefined) => { + if (!usage) return undefined + return new Usage({ + inputTokens: usage.promptTokenCount, + outputTokens: usage.candidatesTokenCount, + reasoningTokens: usage.thoughtsTokenCount, + cacheReadInputTokens: usage.cachedContentTokenCount, + totalTokens: ProviderShared.totalTokens(usage.promptTokenCount, usage.candidatesTokenCount, usage.totalTokenCount), + native: usage, + }) +} + +const mapFinishReason = (finishReason: string | undefined, hasToolCalls: boolean): FinishReason => { + if (finishReason === "STOP") return hasToolCalls ? "tool-calls" : "stop" + if (finishReason === "MAX_TOKENS") return "length" + if ( + finishReason === "IMAGE_SAFETY" || + finishReason === "RECITATION" || + finishReason === "SAFETY" || + finishReason === "BLOCKLIST" || + finishReason === "PROHIBITED_CONTENT" || + finishReason === "SPII" + ) + return "content-filter" + if (finishReason === "MALFORMED_FUNCTION_CALL") return "error" + return "unknown" +} + +const finish = (state: ParserState): ReadonlyArray => + state.finishReason || state.usage + ? [{ type: "request-finish", reason: mapFinishReason(state.finishReason, state.hasToolCalls), usage: state.usage }] + : [] + +const step = (state: ParserState, event: GeminiEvent) => { + const nextState = { + ...state, + usage: event.usageMetadata ? (mapUsage(event.usageMetadata) ?? state.usage) : state.usage, + } + const candidate = event.candidates?.[0] + if (!candidate?.content) + return Effect.succeed([ + { ...nextState, finishReason: candidate?.finishReason ?? nextState.finishReason }, + [], + ] as const) + + const events: LLMEvent[] = [] + let hasToolCalls = nextState.hasToolCalls + let nextToolCallId = nextState.nextToolCallId + + for (const part of candidate.content.parts) { + if ("text" in part && part.text.length > 0) { + events.push({ type: part.thought ? "reasoning-delta" : "text-delta", text: part.text }) + continue + } + + if ("functionCall" in part) { + const input = part.functionCall.args + const id = `tool_${nextToolCallId++}` + events.push({ type: "tool-call", id, name: part.functionCall.name, input }) + hasToolCalls = true + } + } + + return Effect.succeed([ + { + ...nextState, + hasToolCalls, + nextToolCallId, + finishReason: candidate.finishReason ?? nextState.finishReason, + }, + events, + ] as const) +} + +// ============================================================================= +// Protocol And Gemini Route +// ============================================================================= +/** + * The Gemini protocol — request body construction, body schema, and the + * streaming-event state machine. Used by Google AI Studio Gemini and (once + * registered) Vertex Gemini. + */ +export const protocol = Protocol.make({ + id: ADAPTER, + body: { + schema: GeminiBody, + from: fromRequest, + }, + stream: { + event: Protocol.jsonEvent(GeminiEvent), + initial: () => ({ hasToolCalls: false, nextToolCallId: 0 }), + step, + onHalt: finish, + }, +}) + +export const route = Route.make({ + id: ADAPTER, + protocol, + // Gemini's path embeds the model id and pins SSE framing at the URL level. + endpoint: Endpoint.path(({ request }) => `/models/${request.model.id}:streamGenerateContent?alt=sse`), + auth: Auth.apiKeyHeader("x-goog-api-key"), + framing: Framing.sse, +}) + +// ============================================================================= +// Model Helper +// ============================================================================= +export const model = Route.model(route, { + provider: "google", + baseURL: DEFAULT_BASE_URL, +}) + +export * as Gemini from "./gemini" diff --git a/packages/llm/src/protocols/index.ts b/packages/llm/src/protocols/index.ts new file mode 100644 index 000000000000..bd8c8d3d9d9b --- /dev/null +++ b/packages/llm/src/protocols/index.ts @@ -0,0 +1,6 @@ +export * as AnthropicMessages from "./anthropic-messages" +export * as BedrockConverse from "./bedrock-converse" +export * as Gemini from "./gemini" +export * as OpenAIChat from "./openai-chat" +export * as OpenAICompatibleChat from "./openai-compatible-chat" +export * as OpenAIResponses from "./openai-responses" diff --git a/packages/llm/src/protocols/openai-chat.ts b/packages/llm/src/protocols/openai-chat.ts new file mode 100644 index 000000000000..974e22950d45 --- /dev/null +++ b/packages/llm/src/protocols/openai-chat.ts @@ -0,0 +1,404 @@ +import { Array as Arr, Effect, Schema } from "effect" +import { Route } from "../route/client" +import { Auth } from "../route/auth" +import { Endpoint } from "../route/endpoint" +import { Framing } from "../route/framing" +import { HttpTransport } from "../route/transport" +import { Protocol } from "../route/protocol" +import { + Usage, + type FinishReason, + type LLMEvent, + type LLMRequest, + type TextPart, + type ToolCallPart, + type ToolDefinition, +} from "../schema" +import { isRecord, JsonObject, optionalArray, optionalNull, ProviderShared } from "./shared" +import { OpenAIOptions } from "./utils/openai-options" +import { ToolStream } from "./utils/tool-stream" + +const ADAPTER = "openai-chat" +export const DEFAULT_BASE_URL = "https://api.openai.com/v1" +export const PATH = "/chat/completions" + +// ============================================================================= +// Request Body Schema +// ============================================================================= +// The body schema is the provider-native JSON body. `fromRequest` below builds +// this shape from the common `LLMRequest`, then `Route.make` validates and +// JSON-encodes it before transport. +const OpenAIChatFunction = Schema.Struct({ + name: Schema.String, + description: Schema.String, + parameters: JsonObject, +}) + +const OpenAIChatTool = Schema.Struct({ + type: Schema.tag("function"), + function: OpenAIChatFunction, +}) +type OpenAIChatTool = Schema.Schema.Type + +const OpenAIChatAssistantToolCall = Schema.Struct({ + id: Schema.String, + type: Schema.tag("function"), + function: Schema.Struct({ + name: Schema.String, + arguments: Schema.String, + }), +}) +type OpenAIChatAssistantToolCall = Schema.Schema.Type + +const OpenAIChatMessage = Schema.Union([ + Schema.Struct({ role: Schema.Literal("system"), content: Schema.String }), + Schema.Struct({ role: Schema.Literal("user"), content: Schema.String }), + Schema.Struct({ + role: Schema.Literal("assistant"), + content: Schema.NullOr(Schema.String), + tool_calls: optionalArray(OpenAIChatAssistantToolCall), + reasoning_content: Schema.optional(Schema.String), + }), + Schema.Struct({ role: Schema.Literal("tool"), tool_call_id: Schema.String, content: Schema.String }), +]).pipe(Schema.toTaggedUnion("role")) +type OpenAIChatMessage = Schema.Schema.Type + +const OpenAIChatToolChoice = Schema.Union([ + Schema.Literals(["auto", "none", "required"]), + Schema.Struct({ + type: Schema.tag("function"), + function: Schema.Struct({ name: Schema.String }), + }), +]) + +export const bodyFields = { + model: Schema.String, + messages: Schema.Array(OpenAIChatMessage), + tools: optionalArray(OpenAIChatTool), + tool_choice: Schema.optional(OpenAIChatToolChoice), + stream: Schema.Literal(true), + stream_options: Schema.optional(Schema.Struct({ include_usage: Schema.Boolean })), + store: Schema.optional(Schema.Boolean), + reasoning_effort: Schema.optional(OpenAIOptions.OpenAIReasoningEffort), + max_tokens: Schema.optional(Schema.Number), + temperature: Schema.optional(Schema.Number), + top_p: Schema.optional(Schema.Number), + frequency_penalty: Schema.optional(Schema.Number), + presence_penalty: Schema.optional(Schema.Number), + seed: Schema.optional(Schema.Number), + stop: optionalArray(Schema.String), +} +const OpenAIChatBody = Schema.Struct(bodyFields) +export type OpenAIChatBody = Schema.Schema.Type + +// ============================================================================= +// Streaming Event Schema +// ============================================================================= +// The event schema is one decoded SSE `data:` payload. `Framing.sse` splits the +// byte stream into strings, then `Protocol.jsonEvent` decodes each string into +// this provider-native event shape. +const OpenAIChatUsage = Schema.Struct({ + prompt_tokens: Schema.optional(Schema.Number), + completion_tokens: Schema.optional(Schema.Number), + total_tokens: Schema.optional(Schema.Number), + prompt_tokens_details: optionalNull( + Schema.Struct({ + cached_tokens: Schema.optional(Schema.Number), + }), + ), + completion_tokens_details: optionalNull( + Schema.Struct({ + reasoning_tokens: Schema.optional(Schema.Number), + }), + ), +}) + +const OpenAIChatToolCallDeltaFunction = Schema.Struct({ + name: optionalNull(Schema.String), + arguments: optionalNull(Schema.String), +}) + +const OpenAIChatToolCallDelta = Schema.Struct({ + index: Schema.Number, + id: optionalNull(Schema.String), + function: optionalNull(OpenAIChatToolCallDeltaFunction), +}) +type OpenAIChatToolCallDelta = Schema.Schema.Type + +const OpenAIChatDelta = Schema.Struct({ + content: optionalNull(Schema.String), + tool_calls: optionalNull(Schema.Array(OpenAIChatToolCallDelta)), +}) + +const OpenAIChatChoice = Schema.Struct({ + delta: optionalNull(OpenAIChatDelta), + finish_reason: optionalNull(Schema.String), +}) + +const OpenAIChatEvent = Schema.Struct({ + choices: Schema.Array(OpenAIChatChoice), + usage: optionalNull(OpenAIChatUsage), +}) +type OpenAIChatEvent = Schema.Schema.Type +type OpenAIChatRequestMessage = LLMRequest["messages"][number] + +interface ParserState { + readonly tools: ToolStream.State + readonly toolCallEvents: ReadonlyArray + readonly usage?: Usage + readonly finishReason?: FinishReason +} + +const invalid = ProviderShared.invalidRequest + +// ============================================================================= +// Request Lowering +// ============================================================================= +// Lowering is the only place that knows how common LLM messages map onto the +// OpenAI Chat wire format. Keep provider quirks here instead of leaking native +// fields into `LLMRequest`. +const lowerTool = (tool: ToolDefinition): OpenAIChatTool => ({ + type: "function", + function: { + name: tool.name, + description: tool.description, + parameters: tool.inputSchema, + }, +}) + +const lowerToolChoice = (toolChoice: NonNullable) => + ProviderShared.matchToolChoice("OpenAI Chat", toolChoice, { + auto: () => "auto" as const, + none: () => "none" as const, + required: () => "required" as const, + tool: (name) => ({ type: "function" as const, function: { name } }), + }) + +const lowerToolCall = (part: ToolCallPart): OpenAIChatAssistantToolCall => ({ + id: part.id, + type: "function", + function: { + name: part.name, + arguments: ProviderShared.encodeJson(part.input), + }, +}) + +const openAICompatibleReasoningContent = (native: unknown) => + isRecord(native) && typeof native.reasoning_content === "string" ? native.reasoning_content : undefined + +const lowerUserMessage = Effect.fn("OpenAIChat.lowerUserMessage")(function* (message: OpenAIChatRequestMessage) { + const content: TextPart[] = [] + for (const part of message.content) { + if (!ProviderShared.supportsContent(part, ["text"])) + return yield* ProviderShared.unsupportedContent("OpenAI Chat", "user", ["text"]) + content.push(part) + } + return { role: "user" as const, content: ProviderShared.joinText(content) } +}) + +const lowerAssistantMessage = Effect.fn("OpenAIChat.lowerAssistantMessage")(function* ( + message: OpenAIChatRequestMessage, +) { + const content: TextPart[] = [] + const toolCalls: OpenAIChatAssistantToolCall[] = [] + for (const part of message.content) { + if (!ProviderShared.supportsContent(part, ["text", "tool-call"])) + return yield* ProviderShared.unsupportedContent("OpenAI Chat", "assistant", ["text", "tool-call"]) + if (part.type === "text") { + content.push(part) + continue + } + if (part.type === "tool-call") { + toolCalls.push(lowerToolCall(part)) + continue + } + } + return { + role: "assistant" as const, + content: content.length === 0 ? null : ProviderShared.joinText(content), + tool_calls: toolCalls.length === 0 ? undefined : toolCalls, + reasoning_content: openAICompatibleReasoningContent(message.native?.openaiCompatible), + } +}) + +const lowerToolMessages = Effect.fn("OpenAIChat.lowerToolMessages")(function* (message: OpenAIChatRequestMessage) { + const messages: OpenAIChatMessage[] = [] + for (const part of message.content) { + if (!ProviderShared.supportsContent(part, ["tool-result"])) + return yield* ProviderShared.unsupportedContent("OpenAI Chat", "tool", ["tool-result"]) + messages.push({ role: "tool", tool_call_id: part.id, content: ProviderShared.toolResultText(part) }) + } + return messages +}) + +const lowerMessage = Effect.fn("OpenAIChat.lowerMessage")(function* (message: OpenAIChatRequestMessage) { + if (message.role === "user") return [yield* lowerUserMessage(message)] + if (message.role === "assistant") return [yield* lowerAssistantMessage(message)] + return yield* lowerToolMessages(message) +}) + +const lowerMessages = Effect.fn("OpenAIChat.lowerMessages")(function* (request: LLMRequest) { + const system: OpenAIChatMessage[] = + request.system.length === 0 ? [] : [{ role: "system", content: ProviderShared.joinText(request.system) }] + return [...system, ...Arr.flatten(yield* Effect.forEach(request.messages, lowerMessage))] +}) + +const lowerOptions = Effect.fn("OpenAIChat.lowerOptions")(function* (request: LLMRequest) { + const store = OpenAIOptions.store(request) + const reasoningEffort = OpenAIOptions.reasoningEffort(request) + if (reasoningEffort && !OpenAIOptions.isReasoningEffort(reasoningEffort)) + return yield* invalid(`OpenAI Chat does not support reasoning effort ${reasoningEffort}`) + return { + ...(store !== undefined ? { store } : {}), + ...(reasoningEffort ? { reasoning_effort: reasoningEffort } : {}), + } +}) + +const fromRequest = Effect.fn("OpenAIChat.fromRequest")(function* (request: LLMRequest) { + // `fromRequest` returns the provider body only. Endpoint, auth, framing, + // validation, and HTTP execution are composed by `Route.make`. + const generation = request.generation + return { + model: request.model.id, + messages: yield* lowerMessages(request), + tools: request.tools.length === 0 ? undefined : request.tools.map(lowerTool), + tool_choice: request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined, + stream: true as const, + stream_options: { include_usage: true }, + max_tokens: generation?.maxTokens, + temperature: generation?.temperature, + top_p: generation?.topP, + frequency_penalty: generation?.frequencyPenalty, + presence_penalty: generation?.presencePenalty, + seed: generation?.seed, + stop: generation?.stop, + ...(yield* lowerOptions(request)), + } +}) + +// ============================================================================= +// Stream Parsing +// ============================================================================= +// Streaming parsers are small state machines: every event returns a new state +// plus the common `LLMEvent`s produced by that event. Tool calls are accumulated +// because OpenAI streams JSON arguments across multiple deltas. +const mapFinishReason = (reason: string | null | undefined): FinishReason => { + if (reason === "stop") return "stop" + if (reason === "length") return "length" + if (reason === "content_filter") return "content-filter" + if (reason === "function_call" || reason === "tool_calls") return "tool-calls" + return "unknown" +} + +const mapUsage = (usage: OpenAIChatEvent["usage"]): Usage | undefined => { + if (!usage) return undefined + return new Usage({ + inputTokens: usage.prompt_tokens, + outputTokens: usage.completion_tokens, + reasoningTokens: usage.completion_tokens_details?.reasoning_tokens, + cacheReadInputTokens: usage.prompt_tokens_details?.cached_tokens, + totalTokens: ProviderShared.totalTokens(usage.prompt_tokens, usage.completion_tokens, usage.total_tokens), + native: usage, + }) +} + +const step = (state: ParserState, event: OpenAIChatEvent) => + Effect.gen(function* () { + const events: LLMEvent[] = [] + const usage = mapUsage(event.usage) ?? state.usage + const choice = event.choices[0] + const finishReason = choice?.finish_reason ? mapFinishReason(choice.finish_reason) : state.finishReason + const delta = choice?.delta + const toolDeltas = delta?.tool_calls ?? [] + let tools = state.tools + + if (delta?.content) events.push({ type: "text-delta", text: delta.content }) + + for (const tool of toolDeltas) { + const result = ToolStream.appendOrStart( + ADAPTER, + tools, + tool.index, + { id: tool.id ?? undefined, name: tool.function?.name ?? undefined, text: tool.function?.arguments ?? "" }, + "OpenAI Chat tool call delta is missing id or name", + ) + if (ToolStream.isError(result)) return yield* result + tools = result.tools + if (result.event) events.push(result.event) + } + + // Finalize accumulated tool inputs eagerly when finish_reason arrives so + // JSON parse failures fail the stream at the boundary rather than at halt. + const finished = + finishReason !== undefined && state.finishReason === undefined && Object.keys(tools).length > 0 + ? yield* ToolStream.finishAll(ADAPTER, tools) + : undefined + + return [ + { + tools: finished?.tools ?? tools, + toolCallEvents: finished?.events ?? state.toolCallEvents, + usage, + finishReason, + }, + events, + ] as const + }) + +const finishEvents = (state: ParserState): ReadonlyArray => { + const hasToolCalls = state.toolCallEvents.length > 0 + const reason = state.finishReason === "stop" && hasToolCalls ? "tool-calls" : state.finishReason + return [ + ...state.toolCallEvents, + ...(reason ? ([{ type: "request-finish", reason, usage: state.usage }] satisfies ReadonlyArray) : []), + ] +} + +// ============================================================================= +// Protocol And OpenAI Route +// ============================================================================= +/** + * The OpenAI Chat protocol — request body construction, body schema, and the + * streaming-event state machine. Reused by every route that speaks OpenAI Chat + * over HTTP+SSE: native OpenAI, DeepSeek, TogetherAI, Cerebras, Baseten, + * Fireworks, DeepInfra, and (once added) Azure OpenAI Chat. + */ +export const protocol = Protocol.make({ + id: ADAPTER, + body: { + schema: OpenAIChatBody, + from: fromRequest, + }, + stream: { + event: Protocol.jsonEvent(OpenAIChatEvent), + initial: () => ({ tools: ToolStream.empty(), toolCallEvents: [] }), + step, + onHalt: finishEvents, + }, +}) + +const encodeBody = Schema.encodeSync(Schema.fromJsonString(OpenAIChatBody)) + +export const httpTransport = HttpTransport.httpJson({ + endpoint: Endpoint.path(PATH), + auth: Auth.bearer(), + framing: Framing.sse, + encodeBody, +}) + +export const route = Route.make({ + id: ADAPTER, + provider: "openai", + protocol, + transport: httpTransport, + defaults: { + baseURL: DEFAULT_BASE_URL, + }, +}) + +// ============================================================================= +// Model Helper +// ============================================================================= +export const model = route.model + +export * as OpenAIChat from "./openai-chat" diff --git a/packages/llm/src/protocols/openai-compatible-chat.ts b/packages/llm/src/protocols/openai-compatible-chat.ts new file mode 100644 index 000000000000..76deeac45136 --- /dev/null +++ b/packages/llm/src/protocols/openai-compatible-chat.ts @@ -0,0 +1,28 @@ +import { Route, type RouteRoutedModelInput } from "../route/client" +import { Endpoint } from "../route/endpoint" +import { Framing } from "../route/framing" +import * as OpenAIChat from "./openai-chat" + +const ADAPTER = "openai-compatible-chat" + +export type OpenAICompatibleChatModelInput = Omit & { + readonly baseURL: string +} + +/** + * Route for non-OpenAI providers that expose an OpenAI Chat-compatible + * `/chat/completions` endpoint. Reuses `OpenAIChat.protocol` end-to-end and + * overrides only the route id so providers can be resolved per-family without + * colliding with native OpenAI. The model carries the host on `baseURL`, + * supplied by whichever profile/provider helper builds it. + */ +export const route = Route.make({ + id: ADAPTER, + protocol: OpenAIChat.protocol, + endpoint: Endpoint.path("/chat/completions"), + framing: Framing.sse, +}) + +export const model = Route.model(route) + +export * as OpenAICompatibleChat from "./openai-compatible-chat" diff --git a/packages/llm/src/protocols/openai-responses.ts b/packages/llm/src/protocols/openai-responses.ts new file mode 100644 index 000000000000..780ed31bfcfa --- /dev/null +++ b/packages/llm/src/protocols/openai-responses.ts @@ -0,0 +1,575 @@ +import { Effect, Schema } from "effect" +import { Route } from "../route/client" +import { Auth } from "../route/auth" +import { Endpoint } from "../route/endpoint" +import { Framing } from "../route/framing" +import { HttpTransport, WebSocketTransport } from "../route/transport" +import { Protocol } from "../route/protocol" +import { + Usage, + type FinishReason, + type LLMEvent, + type LLMRequest, + type ProviderMetadata, + type TextPart, + type ToolCallPart, + type ToolDefinition, +} from "../schema" +import { JsonObject, optionalArray, optionalNull, ProviderShared } from "./shared" +import { OpenAIOptions } from "./utils/openai-options" +import { ToolStream } from "./utils/tool-stream" + +const ADAPTER = "openai-responses" +export const DEFAULT_BASE_URL = "https://api.openai.com/v1" +export const PATH = "/responses" + +// ============================================================================= +// Request Body Schema +// ============================================================================= +const OpenAIResponsesInputText = Schema.Struct({ + type: Schema.tag("input_text"), + text: Schema.String, +}) + +const OpenAIResponsesOutputText = Schema.Struct({ + type: Schema.tag("output_text"), + text: Schema.String, +}) + +const OpenAIResponsesInputItem = Schema.Union([ + Schema.Struct({ role: Schema.tag("system"), content: Schema.String }), + Schema.Struct({ role: Schema.tag("user"), content: Schema.Array(OpenAIResponsesInputText) }), + Schema.Struct({ role: Schema.tag("assistant"), content: Schema.Array(OpenAIResponsesOutputText) }), + Schema.Struct({ + type: Schema.tag("function_call"), + call_id: Schema.String, + name: Schema.String, + arguments: Schema.String, + }), + Schema.Struct({ + type: Schema.tag("function_call_output"), + call_id: Schema.String, + output: Schema.String, + }), +]) +type OpenAIResponsesInputItem = Schema.Schema.Type + +const OpenAIResponsesTool = Schema.Struct({ + type: Schema.tag("function"), + name: Schema.String, + description: Schema.String, + parameters: JsonObject, + strict: Schema.optional(Schema.Boolean), +}) +type OpenAIResponsesTool = Schema.Schema.Type + +const OpenAIResponsesToolChoice = Schema.Union([ + Schema.Literals(["auto", "none", "required"]), + Schema.Struct({ type: Schema.tag("function"), name: Schema.String }), +]) + +// Fields shared between the HTTP body and the WebSocket `response.create` +// message. The HTTP body adds `stream: true`; the WebSocket message adds +// `type: "response.create"`. Defining the shared shape once keeps the two +// transports in sync without a destructure-and-strip dance. +const OpenAIResponsesCoreFields = { + model: Schema.String, + input: Schema.Array(OpenAIResponsesInputItem), + tools: optionalArray(OpenAIResponsesTool), + tool_choice: Schema.optional(OpenAIResponsesToolChoice), + store: Schema.optional(Schema.Boolean), + prompt_cache_key: Schema.optional(Schema.String), + include: optionalArray(Schema.Literal("reasoning.encrypted_content")), + reasoning: Schema.optional( + Schema.Struct({ + effort: Schema.optional(OpenAIOptions.OpenAIReasoningEffort), + summary: Schema.optional(Schema.Literal("auto")), + }), + ), + text: Schema.optional( + Schema.Struct({ + verbosity: Schema.optional(OpenAIOptions.OpenAITextVerbosity), + }), + ), + max_output_tokens: Schema.optional(Schema.Number), + temperature: Schema.optional(Schema.Number), + top_p: Schema.optional(Schema.Number), +} + +const OpenAIResponsesBody = Schema.Struct({ + ...OpenAIResponsesCoreFields, + stream: Schema.Literal(true), +}) +export type OpenAIResponsesBody = Schema.Schema.Type + +const OpenAIResponsesWebSocketMessage = Schema.StructWithRest( + Schema.Struct({ + type: Schema.tag("response.create"), + ...OpenAIResponsesCoreFields, + }), + [Schema.Record(Schema.String, Schema.Unknown)], +) +type OpenAIResponsesWebSocketMessage = Schema.Schema.Type +const encodeWebSocketMessage = Schema.encodeSync(Schema.fromJsonString(OpenAIResponsesWebSocketMessage)) + +const OpenAIResponsesUsage = Schema.Struct({ + input_tokens: Schema.optional(Schema.Number), + input_tokens_details: optionalNull(Schema.Struct({ cached_tokens: Schema.optional(Schema.Number) })), + output_tokens: Schema.optional(Schema.Number), + output_tokens_details: optionalNull(Schema.Struct({ reasoning_tokens: Schema.optional(Schema.Number) })), + total_tokens: Schema.optional(Schema.Number), +}) +type OpenAIResponsesUsage = Schema.Schema.Type + +const OpenAIResponsesStreamItem = Schema.Struct({ + type: Schema.String, + id: Schema.optional(Schema.String), + call_id: Schema.optional(Schema.String), + name: Schema.optional(Schema.String), + arguments: Schema.optional(Schema.String), + // Hosted (provider-executed) tool fields. Each hosted tool item carries its + // own subset of these — we capture them generically so we can surface the + // call's typed input portion and round-trip the full result payload without + // hand-rolling a per-tool schema. + status: Schema.optional(Schema.String), + action: Schema.optional(Schema.Unknown), + queries: Schema.optional(Schema.Unknown), + results: Schema.optional(Schema.Unknown), + code: Schema.optional(Schema.String), + container_id: Schema.optional(Schema.String), + outputs: Schema.optional(Schema.Unknown), + server_label: Schema.optional(Schema.String), + output: Schema.optional(Schema.Unknown), + error: Schema.optional(Schema.Unknown), +}) +type OpenAIResponsesStreamItem = Schema.Schema.Type + +const OpenAIResponsesEvent = Schema.Struct({ + type: Schema.String, + delta: Schema.optional(Schema.String), + item_id: Schema.optional(Schema.String), + item: Schema.optional(OpenAIResponsesStreamItem), + response: Schema.optional( + Schema.Struct({ + id: Schema.optional(Schema.String), + service_tier: Schema.optional(Schema.String), + incomplete_details: optionalNull(Schema.Struct({ reason: Schema.String })), + usage: optionalNull(OpenAIResponsesUsage), + }), + ), + code: Schema.optional(Schema.String), + message: Schema.optional(Schema.String), +}) +type OpenAIResponsesEvent = Schema.Schema.Type + +interface ParserState { + readonly tools: ToolStream.State + readonly hasFunctionCall: boolean +} + +const invalid = ProviderShared.invalidRequest + +// ============================================================================= +// Request Lowering +// ============================================================================= +const lowerTool = (tool: ToolDefinition): OpenAIResponsesTool => ({ + type: "function", + name: tool.name, + description: tool.description, + parameters: tool.inputSchema, +}) + +const lowerToolChoice = (toolChoice: NonNullable) => + ProviderShared.matchToolChoice("OpenAI Responses", toolChoice, { + auto: () => "auto" as const, + none: () => "none" as const, + required: () => "required" as const, + tool: (name) => ({ type: "function" as const, name }), + }) + +const lowerToolCall = (part: ToolCallPart): OpenAIResponsesInputItem => ({ + type: "function_call", + call_id: part.id, + name: part.name, + arguments: ProviderShared.encodeJson(part.input), +}) + +const lowerMessages = Effect.fn("OpenAIResponses.lowerMessages")(function* (request: LLMRequest) { + const system: OpenAIResponsesInputItem[] = + request.system.length === 0 ? [] : [{ role: "system", content: ProviderShared.joinText(request.system) }] + const input: OpenAIResponsesInputItem[] = [...system] + + for (const message of request.messages) { + if (message.role === "user") { + const content: TextPart[] = [] + for (const part of message.content) { + if (!ProviderShared.supportsContent(part, ["text"])) + return yield* ProviderShared.unsupportedContent("OpenAI Responses", "user", ["text"]) + content.push(part) + } + input.push({ role: "user", content: content.map((part) => ({ type: "input_text", text: part.text })) }) + continue + } + + if (message.role === "assistant") { + const content: TextPart[] = [] + for (const part of message.content) { + if (!ProviderShared.supportsContent(part, ["text", "tool-call"])) + return yield* ProviderShared.unsupportedContent("OpenAI Responses", "assistant", ["text", "tool-call"]) + if (part.type === "text") { + content.push(part) + continue + } + if (part.type === "tool-call") { + input.push(lowerToolCall(part)) + continue + } + } + if (content.length > 0) + input.push({ role: "assistant", content: content.map((part) => ({ type: "output_text", text: part.text })) }) + continue + } + + for (const part of message.content) { + if (!ProviderShared.supportsContent(part, ["tool-result"])) + return yield* ProviderShared.unsupportedContent("OpenAI Responses", "tool", ["tool-result"]) + input.push({ type: "function_call_output", call_id: part.id, output: ProviderShared.toolResultText(part) }) + } + } + + return input +}) + +const lowerOptions = Effect.fn("OpenAIResponses.lowerOptions")(function* (request: LLMRequest) { + const store = OpenAIOptions.store(request) + const promptCacheKey = OpenAIOptions.promptCacheKey(request) + const effort = OpenAIOptions.reasoningEffort(request) + if (effort && !OpenAIOptions.isReasoningEffort(effort)) + return yield* invalid(`OpenAI Responses does not support reasoning effort ${effort}`) + const summary = OpenAIOptions.reasoningSummary(request) + const encryptedState = OpenAIOptions.encryptedReasoning(request) + const verbosity = OpenAIOptions.textVerbosity(request) + return { + ...(store !== undefined ? { store } : {}), + ...(promptCacheKey ? { prompt_cache_key: promptCacheKey } : {}), + ...(encryptedState ? { include: ["reasoning.encrypted_content"] as const } : {}), + ...(effort || summary ? { reasoning: { effort, summary } } : {}), + ...(verbosity ? { text: { verbosity } } : {}), + } +}) + +const fromRequest = Effect.fn("OpenAIResponses.fromRequest")(function* (request: LLMRequest) { + const generation = request.generation + return { + model: request.model.id, + input: yield* lowerMessages(request), + tools: request.tools.length === 0 ? undefined : request.tools.map(lowerTool), + tool_choice: request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined, + stream: true as const, + max_output_tokens: generation?.maxTokens, + temperature: generation?.temperature, + top_p: generation?.topP, + ...(yield* lowerOptions(request)), + } +}) + +// ============================================================================= +// Stream Parsing +// ============================================================================= +const mapUsage = (usage: OpenAIResponsesUsage | null | undefined) => { + if (!usage) return undefined + return new Usage({ + inputTokens: usage.input_tokens, + outputTokens: usage.output_tokens, + reasoningTokens: usage.output_tokens_details?.reasoning_tokens, + cacheReadInputTokens: usage.input_tokens_details?.cached_tokens, + totalTokens: ProviderShared.totalTokens(usage.input_tokens, usage.output_tokens, usage.total_tokens), + native: usage, + }) +} + +const mapFinishReason = (event: OpenAIResponsesEvent, hasFunctionCall: boolean): FinishReason => { + const reason = event.response?.incomplete_details?.reason + if (reason === undefined || reason === null) return hasFunctionCall ? "tool-calls" : "stop" + if (reason === "max_output_tokens") return "length" + if (reason === "content_filter") return "content-filter" + return hasFunctionCall ? "tool-calls" : "unknown" +} + +const openaiMetadata = (metadata: Record): ProviderMetadata => ({ openai: metadata }) + +// Hosted tool items (provider-executed) ship their typed input + status + +// result fields all in one item. We expose them as a `tool-call` + +// `tool-result` pair so consumers can treat them uniformly with client tools, +// only differentiated by `providerExecuted: true`. +// +// One record per OpenAI Responses item type that represents a hosted +// (provider-executed) tool call: the common name we surface, plus an `input` +// extractor that picks the fields the model actually populated for that tool. +// Falling back to `{}` when an entry isn't fully typed keeps unknown tools +// observable without rolling a per-tool schema. +const HOSTED_TOOLS = { + web_search_call: { name: "web_search", input: (item) => item.action ?? {} }, + web_search_preview_call: { name: "web_search_preview", input: (item) => item.action ?? {} }, + file_search_call: { name: "file_search", input: (item) => ({ queries: item.queries ?? [] }) }, + code_interpreter_call: { + name: "code_interpreter", + input: (item) => ({ code: item.code, container_id: item.container_id }), + }, + computer_use_call: { name: "computer_use", input: (item) => item.action ?? {} }, + image_generation_call: { name: "image_generation", input: () => ({}) }, + mcp_call: { + name: "mcp", + input: (item) => ({ server_label: item.server_label, name: item.name, arguments: item.arguments }), + }, + local_shell_call: { name: "local_shell", input: (item) => item.action ?? {} }, +} as const satisfies Record< + string, + { readonly name: string; readonly input: (item: OpenAIResponsesStreamItem) => unknown } +> + +type HostedToolType = keyof typeof HOSTED_TOOLS + +const isHostedToolItem = ( + item: OpenAIResponsesStreamItem, +): item is OpenAIResponsesStreamItem & { type: HostedToolType; id: string } => + item.type in HOSTED_TOOLS && typeof item.id === "string" && item.id.length > 0 + +// Round-trip the full item as the structured result so consumers can extract +// outputs / sources / status without re-decoding. +const hostedToolResult = (item: OpenAIResponsesStreamItem) => { + const isError = typeof item.error !== "undefined" && item.error !== null + return isError ? { type: "error" as const, value: item.error } : { type: "json" as const, value: item } +} + +const hostedToolEvents = ( + item: OpenAIResponsesStreamItem & { type: HostedToolType; id: string }, +): ReadonlyArray => { + const tool = HOSTED_TOOLS[item.type] + const providerMetadata = openaiMetadata({ itemId: item.id }) + return [ + { + type: "tool-call", + id: item.id, + name: tool.name, + input: tool.input(item), + providerExecuted: true, + providerMetadata, + }, + { + type: "tool-result", + id: item.id, + name: tool.name, + result: hostedToolResult(item), + providerExecuted: true, + providerMetadata, + }, + ] +} + +type StepResult = readonly [ParserState, ReadonlyArray] + +const NO_EVENTS: StepResult["1"] = [] + +// `response.completed` / `response.incomplete` are clean finishes that emit a +// `request-finish` event; `response.failed` is a hard failure that emits a +// `provider-error`. All three end the stream — kept in one set so `step` and +// the protocol's `terminal` predicate stay in sync. +const TERMINAL_TYPES = new Set(["response.completed", "response.incomplete", "response.failed"]) + +const onOutputTextDelta = (state: ParserState, event: OpenAIResponsesEvent): StepResult => { + if (!event.delta) return [state, NO_EVENTS] + return [ + state, + [ + { + type: "text-delta", + id: event.item_id, + text: event.delta, + ...(event.item_id ? { providerMetadata: openaiMetadata({ itemId: event.item_id }) } : {}), + }, + ], + ] +} + +const onOutputItemAdded = (state: ParserState, event: OpenAIResponsesEvent): StepResult => { + const item = event.item + if (item?.type !== "function_call" || !item.id) return [state, NO_EVENTS] + return [ + { + hasFunctionCall: state.hasFunctionCall, + tools: ToolStream.start(state.tools, item.id, { + id: item.call_id ?? item.id, + name: item.name ?? "", + input: item.arguments ?? "", + providerMetadata: openaiMetadata({ itemId: item.id }), + }), + }, + NO_EVENTS, + ] +} + +const onFunctionCallArgumentsDelta = Effect.fn("OpenAIResponses.onFunctionCallArgumentsDelta")(function* ( + state: ParserState, + event: OpenAIResponsesEvent, +) { + if (!event.item_id || !event.delta) return [state, NO_EVENTS] satisfies StepResult + const result = ToolStream.appendExisting( + ADAPTER, + state.tools, + event.item_id, + event.delta, + "OpenAI Responses tool argument delta is missing its tool call", + ) + if (ToolStream.isError(result)) return yield* result + return [ + { hasFunctionCall: state.hasFunctionCall, tools: result.tools }, + result.event ? [result.event] : NO_EVENTS, + ] satisfies StepResult +}) + +const onOutputItemDone = Effect.fn("OpenAIResponses.onOutputItemDone")(function* ( + state: ParserState, + event: OpenAIResponsesEvent, +) { + const item = event.item + if (!item) return [state, NO_EVENTS] satisfies StepResult + + if (item.type === "function_call") { + if (!item.id || !item.call_id || !item.name) return [state, NO_EVENTS] satisfies StepResult + const tools = state.tools[item.id] + ? state.tools + : ToolStream.start(state.tools, item.id, { id: item.call_id, name: item.name }) + const result = + item.arguments === undefined + ? yield* ToolStream.finish(ADAPTER, tools, item.id) + : yield* ToolStream.finishWithInput(ADAPTER, tools, item.id, item.arguments) + return [ + { hasFunctionCall: result.event ? true : state.hasFunctionCall, tools: result.tools }, + result.event ? [result.event] : NO_EVENTS, + ] satisfies StepResult + } + + if (isHostedToolItem(item)) return [state, hostedToolEvents(item)] satisfies StepResult + + return [state, NO_EVENTS] satisfies StepResult +}) + +const onResponseFinish = (state: ParserState, event: OpenAIResponsesEvent): StepResult => [ + state, + [ + { + type: "request-finish", + reason: mapFinishReason(event, state.hasFunctionCall), + usage: mapUsage(event.response?.usage), + ...(event.response?.id || event.response?.service_tier + ? { + providerMetadata: openaiMetadata({ + responseId: event.response.id, + serviceTier: event.response.service_tier, + }), + } + : {}), + }, + ], +] + +const onResponseFailed = (state: ParserState, event: OpenAIResponsesEvent): StepResult => [ + state, + [{ type: "provider-error", message: event.message ?? event.code ?? "OpenAI Responses response failed" }], +] + +const onError = (state: ParserState, event: OpenAIResponsesEvent): StepResult => [ + state, + [{ type: "provider-error", message: event.message ?? event.code ?? "OpenAI Responses stream error" }], +] + +const step = (state: ParserState, event: OpenAIResponsesEvent) => { + if (event.type === "response.output_text.delta") return Effect.succeed(onOutputTextDelta(state, event)) + if (event.type === "response.output_item.added") return Effect.succeed(onOutputItemAdded(state, event)) + if (event.type === "response.function_call_arguments.delta") return onFunctionCallArgumentsDelta(state, event) + if (event.type === "response.output_item.done") return onOutputItemDone(state, event) + if (event.type === "response.completed" || event.type === "response.incomplete") + return Effect.succeed(onResponseFinish(state, event)) + if (event.type === "response.failed") return Effect.succeed(onResponseFailed(state, event)) + if (event.type === "error") return Effect.succeed(onError(state, event)) + return Effect.succeed([state, NO_EVENTS]) +} + +// ============================================================================= +// Protocol And OpenAI Route +// ============================================================================= +/** + * The OpenAI Responses protocol — request body construction, body schema, and + * the streaming-event state machine. Used by native OpenAI and (once + * registered) Azure OpenAI Responses. + */ +export const protocol = Protocol.make({ + id: ADAPTER, + body: { + schema: OpenAIResponsesBody, + from: fromRequest, + }, + stream: { + event: Protocol.jsonEvent(OpenAIResponsesEvent), + initial: () => ({ hasFunctionCall: false, tools: ToolStream.empty() }), + step, + terminal: (event) => TERMINAL_TYPES.has(event.type), + }, +}) + +const encodeBody = Schema.encodeSync(Schema.fromJsonString(OpenAIResponsesBody)) +const transportBase = { + endpoint: Endpoint.path(PATH), + auth: Auth.bearer(), + encodeBody, +} +const routeDefaults = { + baseURL: DEFAULT_BASE_URL, +} + +export const httpTransport = HttpTransport.httpJson({ + ...transportBase, + framing: Framing.sse, +}) + +export const route = Route.make({ + id: ADAPTER, + provider: "openai", + protocol, + transport: httpTransport, + defaults: routeDefaults, +}) + +const decodeWebSocketMessage = ProviderShared.validateWith(Schema.decodeUnknownEffect(OpenAIResponsesWebSocketMessage)) + +const webSocketMessage = (body: OpenAIResponsesBody | Record) => + Effect.gen(function* () { + if (!ProviderShared.isRecord(body)) + return yield* ProviderShared.invalidRequest("OpenAI Responses WebSocket body must be a JSON object") + const { stream: _stream, ...message } = body + return yield* decodeWebSocketMessage({ ...message, type: "response.create" }) + }) + +export const webSocketTransport = WebSocketTransport.json({ + ...transportBase, + toMessage: webSocketMessage, + encodeMessage: encodeWebSocketMessage, +}) + +export const webSocketRoute = Route.make({ + id: `${ADAPTER}-websocket`, + provider: "openai", + protocol, + transport: webSocketTransport, + defaults: routeDefaults, +}) + +// ============================================================================= +// Model Helper +// ============================================================================= +export const model = route.model + +export const webSocketModel = webSocketRoute.model + +export * as OpenAIResponses from "./openai-responses" diff --git a/packages/llm/src/protocols/shared.ts b/packages/llm/src/protocols/shared.ts new file mode 100644 index 000000000000..c931353998e4 --- /dev/null +++ b/packages/llm/src/protocols/shared.ts @@ -0,0 +1,203 @@ +import { Buffer } from "node:buffer" +import { Effect, Schema, Stream } from "effect" +import * as Sse from "effect/unstable/encoding/Sse" +import { Headers, HttpClientRequest } from "effect/unstable/http" +import { + InvalidProviderOutputReason, + InvalidRequestReason, + LLMError, + type ContentPart, + type LLMRequest, + type MediaPart, + type ToolResultPart, +} from "../schema" + +export const Json = Schema.fromJsonString(Schema.Unknown) +export const decodeJson = Schema.decodeUnknownSync(Json) +export const encodeJson = Schema.encodeSync(Json) +export const JsonObject = Schema.Record(Schema.String, Schema.Unknown) +export const optionalArray = (schema: S) => Schema.optional(Schema.Array(schema)) +export const optionalNull = (schema: S) => Schema.optional(Schema.NullOr(schema)) + +/** + * Plain-record narrowing. Excludes arrays so routes checking nested JSON + * Schema fragments don't accidentally treat a tuple as a key/value bag. + */ +export const isRecord = (value: unknown): value is Record => + typeof value === "object" && value !== null && !Array.isArray(value) + +/** + * Streaming tool-call accumulator. Adapters that build a tool call across + * multiple `tool-input-delta` chunks store the partial JSON input string here + * and finalize it with `parseToolInput` once the call completes. + */ +export interface ToolAccumulator { + readonly id: string + readonly name: string + readonly input: string +} + +/** + * `Usage.totalTokens` policy shared by every route. Honors a provider- + * supplied total; otherwise falls back to `inputTokens + outputTokens` only + * when at least one is defined. Returns `undefined` when neither input nor + * output is known so routes don't publish a misleading `0`. + */ +export const totalTokens = ( + inputTokens: number | undefined, + outputTokens: number | undefined, + total: number | undefined, +) => { + if (total !== undefined) return total + if (inputTokens === undefined && outputTokens === undefined) return undefined + return (inputTokens ?? 0) + (outputTokens ?? 0) +} + +export const eventError = (route: string, message: string, raw?: string) => + new LLMError({ + module: "ProviderShared", + method: "stream", + reason: new InvalidProviderOutputReason({ route, message, raw }), + }) + +export const parseJson = (route: string, input: string, message: string) => + Effect.try({ + try: () => decodeJson(input), + catch: () => eventError(route, message, input), + }) + +/** + * Join the `text` field of a list of parts with newlines. Used by routes + * that flatten system / message content arrays into a single provider string + * (OpenAI Chat `system` content, OpenAI Responses `system` content, Gemini + * `systemInstruction.parts[].text`). + */ +export const joinText = (parts: ReadonlyArray<{ readonly text: string }>) => parts.map((part) => part.text).join("\n") + +/** + * Parse the streamed JSON input of a tool call. Treats an empty string as + * `"{}"` — providers occasionally finish a tool call without ever emitting + * input deltas (e.g. zero-arg tools). The error message is uniform across + * routes: `Invalid JSON input for tool call `. + */ +export const parseToolInput = (route: string, name: string, raw: string) => + parseJson(route, raw || "{}", `Invalid JSON input for ${route} tool call ${name}`) + +/** + * Encode a `MediaPart`'s raw bytes for inclusion in a JSON request body. + * `data: string` is assumed to already be base64 (matches caller convention + * across Gemini / Bedrock); `data: Uint8Array` is base64-encoded here. Used + * by every route that supports image / document inputs. + */ +export const mediaBytes = (part: MediaPart) => + typeof part.data === "string" ? part.data : Buffer.from(part.data).toString("base64") + +export const trimBaseUrl = (value: string) => value.replace(/\/+$/, "") + +export const toolResultText = (part: ToolResultPart) => { + if (part.result.type === "text" || part.result.type === "error") return String(part.result.value) + return encodeJson(part.result.value) +} + +export const errorText = (error: unknown) => { + if (error instanceof Error) return error.message + if (typeof error === "string") return error + if (typeof error === "number" || typeof error === "boolean" || typeof error === "bigint") return String(error) + if (error === null) return "null" + if (error === undefined) return "undefined" + return "Unknown stream error" +} + +/** + * `framing` step for Server-Sent Events. Decodes UTF-8, runs the SSE channel + * decoder, and drops empty / `[DONE]` keep-alive events so the downstream + * `decodeChunk` sees one JSON string per element. The SSE channel emits a + * `Retry` control event on its error channel; we drop it here (we don't + * implement client-driven retries) so the public error channel stays + * `LLMError`. + */ +export const sseFraming = (bytes: Stream.Stream): Stream.Stream => + bytes.pipe( + Stream.decodeText(), + Stream.pipeThroughChannel(Sse.decode()), + Stream.catchTag("Retry", () => Stream.empty), + Stream.filter((event) => event.data.length > 0 && event.data !== "[DONE]"), + Stream.map((event) => event.data), + ) + +/** + * Canonical invalid-request constructor. Lift one-line `const invalid = + * (message) => invalidRequest(message)` aliases out of every + * route so the error constructor lives in one place. If we ever extend + * `InvalidRequestReason` with route context or trace metadata, the change + * lands here. + */ +export const invalidRequest = (message: string) => + new LLMError({ + module: "ProviderShared", + method: "request", + reason: new InvalidRequestReason({ message }), + }) + +export const matchToolChoice = ( + route: string, + toolChoice: NonNullable, + cases: { + readonly auto: () => Auto + readonly none: () => None + readonly required: () => Required + readonly tool: (name: string) => Tool + }, +) => + Effect.gen(function* () { + if (toolChoice.type === "auto") return cases.auto() + if (toolChoice.type === "none") return cases.none() + if (toolChoice.type === "required") return cases.required() + if (!toolChoice.name) return yield* invalidRequest(`${route} tool choice requires a tool name`) + return cases.tool(toolChoice.name) + }) + +type ContentType = ContentPart["type"] + +const formatContentTypes = (types: ReadonlyArray) => { + if (types.length <= 1) return types[0] ?? "" + if (types.length === 2) return `${types[0]} and ${types[1]}` + return `${types.slice(0, -1).join(", ")}, and ${types.at(-1)}` +} + +export const supportsContent = ( + part: ContentPart, + types: ReadonlyArray, +): part is Extract => (types as ReadonlyArray).includes(part.type) + +export const unsupportedContent = ( + route: string, + role: LLMRequest["messages"][number]["role"], + types: ReadonlyArray, +) => invalidRequest(`${route} ${role} messages only support ${formatContentTypes(types)} content for now`) + +/** + * Build a `validate` step from a Schema decoder. Replaces the per-route + * lambda body `(payload) => decode(payload).pipe(Effect.mapError((e) => + * invalid(e.message)))`. Any decode error is translated into + * `LLMError` carrying the original parse-error message. + */ +export const validateWith = + (decode: (input: I) => Effect.Effect) => + (payload: I) => + decode(payload).pipe(Effect.mapError((error) => invalidRequest(error.message))) + +/** + * Build an HTTP POST with a JSON body. Sets `content-type: application/json` + * automatically after caller-supplied headers so routes cannot accidentally + * send JSON with a stale content type. The body is passed pre-encoded so + * routes can choose between + * `Schema.encodeSync(payload)` and `ProviderShared.encodeJson(payload)`. + */ +export const jsonPost = (input: { readonly url: string; readonly body: string; readonly headers?: Headers.Input }) => + HttpClientRequest.post(input.url).pipe( + HttpClientRequest.setHeaders(Headers.set(Headers.fromInput(input.headers), "content-type", "application/json")), + HttpClientRequest.bodyText(input.body, "application/json"), + ) + +export * as ProviderShared from "./shared" diff --git a/packages/llm/src/protocols/utils/bedrock-auth.ts b/packages/llm/src/protocols/utils/bedrock-auth.ts new file mode 100644 index 000000000000..58d16d95f81e --- /dev/null +++ b/packages/llm/src/protocols/utils/bedrock-auth.ts @@ -0,0 +1,103 @@ +import { AwsV4Signer } from "aws4fetch" +import { Effect, Option, Schema } from "effect" +import { Headers } from "effect/unstable/http" +import { Auth, type AuthInput } from "../../route/auth" +import type { LLMRequest } from "../../schema" +import { ProviderShared } from "../shared" + +/** + * AWS credentials for SigV4 signing. Bedrock also supports Bearer API key auth + * via `model.apiKey`, which bypasses SigV4 signing. STS-vended credentials + * should be refreshed by the consumer (rebuild the model) before they expire; + * the route does not refresh. + */ +export interface Credentials { + readonly region: string + readonly accessKeyId: string + readonly secretAccessKey: string + readonly sessionToken?: string +} + +const NativeCredentials = Schema.Struct({ + accessKeyId: Schema.String, + secretAccessKey: Schema.String, + region: Schema.optional(Schema.String), + sessionToken: Schema.optional(Schema.String), +}) + +const decodeNativeCredentials = Schema.decodeUnknownOption(NativeCredentials) + +export const region = (request: LLMRequest) => { + const fromNative = request.model.native?.aws_region + if (typeof fromNative === "string" && fromNative !== "") return fromNative + return ( + decodeNativeCredentials(request.model.native?.aws_credentials).pipe( + Option.map((credentials) => credentials.region), + Option.getOrUndefined, + ) ?? "us-east-1" + ) +} + +const credentialsFromInput = (request: LLMRequest): Credentials | undefined => + decodeNativeCredentials(request.model.native?.aws_credentials).pipe( + Option.map((creds) => ({ ...creds, region: creds.region ?? region(request) })), + Option.getOrUndefined, + ) + +const signRequest = (input: { + readonly url: string + readonly body: string + readonly headers: Headers.Headers + readonly credentials: Credentials +}) => + Effect.tryPromise({ + try: async () => { + const signed = await new AwsV4Signer({ + url: input.url, + method: "POST", + headers: Object.entries(input.headers), + body: input.body, + region: input.credentials.region, + accessKeyId: input.credentials.accessKeyId, + secretAccessKey: input.credentials.secretAccessKey, + sessionToken: input.credentials.sessionToken, + service: "bedrock", + }).sign() + return Object.fromEntries(signed.headers.entries()) + }, + catch: (error) => + ProviderShared.invalidRequest( + `Bedrock Converse SigV4 signing failed: ${error instanceof Error ? error.message : String(error)}`, + ), + }) + +/** + * Bedrock auth. `model.apiKey` (Bedrock's newer Bearer API key auth) wins if + * set; otherwise sign the exact JSON bytes with SigV4 using credentials from + * `model.native.aws_credentials`. + */ +export const auth = Auth.custom((input: AuthInput) => { + if (input.request.model.apiKey) return Auth.toEffect(Auth.bearer())(input) + return Effect.gen(function* () { + const credentials = credentialsFromInput(input.request) + if (!credentials) { + return yield* ProviderShared.invalidRequest( + "Bedrock Converse requires either model.apiKey or AWS credentials in model.native.aws_credentials", + ) + } + const headersForSigning = Headers.set(input.headers, "content-type", "application/json") + const signed = yield* signRequest({ url: input.url, body: input.body, headers: headersForSigning, credentials }) + return Headers.setAll(headersForSigning, signed) + }) +}) + +export const nativeCredentials = (native: Record | undefined, credentials: Credentials | undefined) => + credentials + ? { + ...native, + aws_credentials: credentials, + aws_region: credentials.region, + } + : native + +export * as BedrockAuth from "./bedrock-auth" diff --git a/packages/llm/src/protocols/utils/bedrock-cache.ts b/packages/llm/src/protocols/utils/bedrock-cache.ts new file mode 100644 index 000000000000..ca6e52cd118e --- /dev/null +++ b/packages/llm/src/protocols/utils/bedrock-cache.ts @@ -0,0 +1,20 @@ +import { Schema } from "effect" +import type { CacheHint } from "../../schema" + +// Bedrock cache markers are positional: emit a `cachePoint` block immediately +// after the content the caller wants treated as a cacheable prefix. +export const CachePointBlock = Schema.Struct({ + cachePoint: Schema.Struct({ type: Schema.tag("default") }), +}) +export type CachePointBlock = Schema.Schema.Type + +// Bedrock recently added optional `ttl: "5m" | "1h"` on cachePoint. Map +// `CacheHint.ttlSeconds` here once a recorded cassette validates the wire shape. +const DEFAULT: CachePointBlock = { cachePoint: { type: "default" } } + +export const block = (cache: CacheHint | undefined): CachePointBlock | undefined => { + if (cache?.type !== "ephemeral" && cache?.type !== "persistent") return undefined + return DEFAULT +} + +export * as BedrockCache from "./bedrock-cache" diff --git a/packages/llm/src/protocols/utils/bedrock-media.ts b/packages/llm/src/protocols/utils/bedrock-media.ts new file mode 100644 index 000000000000..0fbb396f9694 --- /dev/null +++ b/packages/llm/src/protocols/utils/bedrock-media.ts @@ -0,0 +1,80 @@ +import { Effect, Schema } from "effect" +import type { MediaPart } from "../../schema" +import { ProviderShared } from "../shared" + +// Bedrock Converse accepts image `format` as the file extension and +// `source.bytes` as base64 in the JSON wire format. +export const ImageFormat = Schema.Literals(["png", "jpeg", "gif", "webp"]) +export type ImageFormat = Schema.Schema.Type + +export const ImageBlock = Schema.Struct({ + image: Schema.Struct({ + format: ImageFormat, + source: Schema.Struct({ bytes: Schema.String }), + }), +}) +export type ImageBlock = Schema.Schema.Type + +// Bedrock document blocks require a user-facing name so the model can refer to +// the uploaded document. +export const DocumentFormat = Schema.Literals(["pdf", "csv", "doc", "docx", "xls", "xlsx", "html", "txt", "md"]) +export type DocumentFormat = Schema.Schema.Type + +export const DocumentBlock = Schema.Struct({ + document: Schema.Struct({ + format: DocumentFormat, + name: Schema.String, + source: Schema.Struct({ bytes: Schema.String }), + }), +}) +export type DocumentBlock = Schema.Schema.Type + +const IMAGE_FORMATS = { + "image/png": "png", + "image/jpeg": "jpeg", + "image/jpg": "jpeg", + "image/gif": "gif", + "image/webp": "webp", +} as const satisfies Record + +const DOCUMENT_FORMATS = { + "application/pdf": "pdf", + "text/csv": "csv", + "application/msword": "doc", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx", + "application/vnd.ms-excel": "xls", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx", + "text/html": "html", + "text/plain": "txt", + "text/markdown": "md", +} as const satisfies Record + +const imageBlock = (part: MediaPart, format: ImageFormat): ImageBlock => ({ + image: { format, source: { bytes: ProviderShared.mediaBytes(part) } }, +}) + +const documentBlock = (part: MediaPart, format: DocumentFormat): DocumentBlock => ({ + document: { + format, + name: part.filename ?? `document.${format}`, + source: { bytes: ProviderShared.mediaBytes(part) }, + }, +}) + +// Route by MIME. Known image/document formats lower into a typed block; anything +// else fails with a clear error instead of silently degrading to a malformed +// document block. Image MIME types not in `IMAGE_FORMATS` (e.g. `image/svg+xml`) +// get an image-specific error so the caller knows it's a format-support issue, +// not a kind-detection issue. +export const lower = (part: MediaPart) => { + const mime = part.mediaType.toLowerCase() + const imageFormat = IMAGE_FORMATS[mime as keyof typeof IMAGE_FORMATS] + if (imageFormat) return Effect.succeed(imageBlock(part, imageFormat)) + if (mime.startsWith("image/")) + return ProviderShared.invalidRequest(`Bedrock Converse does not support image media type ${part.mediaType}`) + const documentFormat = DOCUMENT_FORMATS[mime as keyof typeof DOCUMENT_FORMATS] + if (documentFormat) return Effect.succeed(documentBlock(part, documentFormat)) + return ProviderShared.invalidRequest(`Bedrock Converse does not support media type ${part.mediaType}`) +} + +export * as BedrockMedia from "./bedrock-media" diff --git a/packages/llm/src/protocols/utils/gemini-tool-schema.ts b/packages/llm/src/protocols/utils/gemini-tool-schema.ts new file mode 100644 index 000000000000..7690b2e60018 --- /dev/null +++ b/packages/llm/src/protocols/utils/gemini-tool-schema.ts @@ -0,0 +1,101 @@ +import { ProviderShared } from "../shared" + +// Gemini accepts a JSON Schema-like dialect for tool parameters, but rejects a +// handful of common JSON Schema shapes. Keep this projection isolated so the +// Gemini protocol file still reads like the other protocol modules. +const SCHEMA_INTENT_KEYS = [ + "type", + "properties", + "items", + "prefixItems", + "enum", + "const", + "$ref", + "additionalProperties", + "patternProperties", + "required", + "not", + "if", + "then", + "else", +] + +const isRecord = ProviderShared.isRecord + +const hasCombiner = (schema: unknown) => + isRecord(schema) && (Array.isArray(schema.anyOf) || Array.isArray(schema.oneOf) || Array.isArray(schema.allOf)) + +const hasSchemaIntent = (schema: unknown) => + isRecord(schema) && (hasCombiner(schema) || SCHEMA_INTENT_KEYS.some((key) => key in schema)) + +const sanitizeNode = (schema: unknown): unknown => { + if (!isRecord(schema)) return Array.isArray(schema) ? schema.map(sanitizeNode) : schema + + const result: Record = Object.fromEntries( + Object.entries(schema).map(([key, value]) => [ + key, + key === "enum" && Array.isArray(value) ? value.map(String) : sanitizeNode(value), + ]), + ) + + if (Array.isArray(result.enum) && (result.type === "integer" || result.type === "number")) result.type = "string" + + const properties = result.properties + if (result.type === "object" && isRecord(properties) && Array.isArray(result.required)) { + result.required = result.required.filter((field) => typeof field === "string" && field in properties) + } + + if (result.type === "array" && !hasCombiner(result)) { + result.items = result.items ?? {} + if (isRecord(result.items) && !hasSchemaIntent(result.items)) result.items = { ...result.items, type: "string" } + } + + if (typeof result.type === "string" && result.type !== "object" && !hasCombiner(result)) { + delete result.properties + delete result.required + } + + return result +} + +const emptyObjectSchema = (schema: Record) => + schema.type === "object" && + (!isRecord(schema.properties) || Object.keys(schema.properties).length === 0) && + !schema.additionalProperties + +const projectNode = (schema: unknown): Record | undefined => { + if (!isRecord(schema)) return undefined + if (emptyObjectSchema(schema)) return undefined + return Object.fromEntries( + [ + ["description", schema.description], + ["required", schema.required], + ["format", schema.format], + ["type", Array.isArray(schema.type) ? schema.type.filter((type) => type !== "null")[0] : schema.type], + ["nullable", Array.isArray(schema.type) && schema.type.includes("null") ? true : undefined], + ["enum", schema.const !== undefined ? [schema.const] : schema.enum], + [ + "properties", + isRecord(schema.properties) + ? Object.fromEntries(Object.entries(schema.properties).map(([key, value]) => [key, projectNode(value)])) + : undefined, + ], + [ + "items", + Array.isArray(schema.items) + ? schema.items.map(projectNode) + : schema.items === undefined + ? undefined + : projectNode(schema.items), + ], + ["allOf", Array.isArray(schema.allOf) ? schema.allOf.map(projectNode) : undefined], + ["anyOf", Array.isArray(schema.anyOf) ? schema.anyOf.map(projectNode) : undefined], + ["oneOf", Array.isArray(schema.oneOf) ? schema.oneOf.map(projectNode) : undefined], + ["minLength", schema.minLength], + ].filter((entry) => entry[1] !== undefined), + ) +} + +export const convert = (schema: unknown) => projectNode(sanitizeNode(schema)) + +export * as GeminiToolSchema from "./gemini-tool-schema" diff --git a/packages/llm/src/protocols/utils/openai-options.ts b/packages/llm/src/protocols/utils/openai-options.ts new file mode 100644 index 000000000000..080ef83f50b6 --- /dev/null +++ b/packages/llm/src/protocols/utils/openai-options.ts @@ -0,0 +1,55 @@ +import { Schema } from "effect" +import type { LLMRequest, ReasoningEffort, TextVerbosity as TextVerbosityValue } from "../../schema" +import { ReasoningEfforts, TextVerbosity } from "../../schema" + +export const OpenAIReasoningEfforts = ReasoningEfforts.filter( + (effort): effort is Exclude => effort !== "max", +) +export type OpenAIReasoningEffort = (typeof OpenAIReasoningEfforts)[number] + +const REASONING_EFFORTS = new Set(ReasoningEfforts) +const OPENAI_REASONING_EFFORTS = new Set(OpenAIReasoningEfforts) +const TEXT_VERBOSITY = new Set(["low", "medium", "high"]) + +export const OpenAIReasoningEffort = Schema.Literals(OpenAIReasoningEfforts) +export const OpenAITextVerbosity = TextVerbosity + +const isAnyReasoningEffort = (effort: unknown): effort is ReasoningEffort => + typeof effort === "string" && REASONING_EFFORTS.has(effort) + +export const isReasoningEffort = (effort: unknown): effort is OpenAIReasoningEffort => + typeof effort === "string" && OPENAI_REASONING_EFFORTS.has(effort) + +const isTextVerbosity = (value: unknown): value is TextVerbosityValue => + typeof value === "string" && TEXT_VERBOSITY.has(value) + +const options = (request: LLMRequest) => request.providerOptions?.openai + +export const store = (request: LLMRequest): boolean | undefined => { + const value = options(request)?.store + return typeof value === "boolean" ? value : undefined +} + +export const reasoningEffort = (request: LLMRequest): ReasoningEffort | undefined => { + const value = options(request)?.reasoningEffort + return isAnyReasoningEffort(value) ? value : undefined +} + +export const reasoningSummary = (request: LLMRequest): "auto" | undefined => { + return options(request)?.reasoningSummary === "auto" ? "auto" : undefined +} + +export const encryptedReasoning = (request: LLMRequest) => + options(request)?.includeEncryptedReasoning === true ? true : undefined + +export const promptCacheKey = (request: LLMRequest) => { + const value = options(request)?.promptCacheKey + return typeof value === "string" ? value : undefined +} + +export const textVerbosity = (request: LLMRequest) => { + const value = options(request)?.textVerbosity + return isTextVerbosity(value) ? value : undefined +} + +export * as OpenAIOptions from "./openai-options" diff --git a/packages/llm/src/protocols/utils/tool-stream.ts b/packages/llm/src/protocols/utils/tool-stream.ts new file mode 100644 index 000000000000..e6ac5fefd0bf --- /dev/null +++ b/packages/llm/src/protocols/utils/tool-stream.ts @@ -0,0 +1,196 @@ +import { Effect } from "effect" +import { LLMError, type ProviderMetadata, type ToolCall, type ToolInputDelta } from "../../schema" +import { eventError, parseToolInput, type ToolAccumulator } from "../shared" + +type StreamKey = string | number + +/** + * One pending streamed tool call. Providers emit the tool identity and JSON + * argument text across separate chunks; `input` is the raw JSON string collected + * so far, not the parsed object. + */ +export interface PendingTool extends ToolAccumulator { + readonly providerExecuted?: boolean + readonly providerMetadata?: ProviderMetadata +} + +/** + * Sparse parser state keyed by the provider's stream-local tool identifier. + * + * This key is not the final tool-call id (`call_...`). It is the id/index the + * provider uses while streaming a partial call: OpenAI Chat / Anthropic / + * Bedrock use numeric content indexes, while OpenAI Responses uses string + * `item_id`s. The generic keeps each protocol internally consistent. + */ +export type State = Partial> + +/** + * Result of adding argument text to one pending tool call. It returns both the + * next `tools` state and the updated `tool` because parsers often need the + * current id/name immediately. `event` is present only when new text arrived; + * metadata-only deltas update identity without emitting `tool-input-delta`. + */ +export interface AppendOutcome { + readonly tools: State + readonly tool: PendingTool + readonly event?: ToolInputDelta +} + +/** Create empty accumulator state for one provider stream. */ +export const empty = (): State => ({}) + +const withTool = (tools: State, key: K, tool: PendingTool): State => { + return { ...tools, [key]: tool } +} + +const withoutTool = (tools: State, key: K): State => { + const next = { ...tools } + delete next[key] + return next +} + +const inputDelta = (tool: PendingTool, text: string): ToolInputDelta => ({ + type: "tool-input-delta", + id: tool.id, + name: tool.name, + text, + ...(tool.providerMetadata ? { providerMetadata: tool.providerMetadata } : {}), +}) + +const toolCall = (route: string, tool: PendingTool, inputOverride?: string) => + parseToolInput(route, tool.name, inputOverride ?? tool.input).pipe( + Effect.map( + (input): ToolCall => + tool.providerExecuted + ? { + type: "tool-call", + id: tool.id, + name: tool.name, + input, + providerExecuted: true, + ...(tool.providerMetadata ? { providerMetadata: tool.providerMetadata } : {}), + } + : { + type: "tool-call", + id: tool.id, + name: tool.name, + input, + ...(tool.providerMetadata ? { providerMetadata: tool.providerMetadata } : {}), + }, + ), + ) + +/** Store the updated tool and produce the optional public delta event. */ +const appendTool = ( + tools: State, + key: K, + tool: PendingTool, + text: string, +): AppendOutcome => ({ + tools: withTool(tools, key, tool), + tool, + event: text.length === 0 ? undefined : inputDelta(tool, text), +}) + +export const isError = (result: AppendOutcome | LLMError): result is LLMError => + result instanceof LLMError + +/** + * Register a tool call whose start event arrived before any argument deltas. + * Used by Anthropic `content_block_start`, Bedrock `contentBlockStart`, and + * OpenAI Responses `response.output_item.added`. + */ +export const start = ( + tools: State, + key: K, + tool: Omit & { readonly input?: string }, +) => withTool(tools, key, { ...tool, input: tool.input ?? "" }) + +/** + * Append a streamed argument delta, starting the tool if this provider encodes + * identity on the first delta instead of a separate start event. OpenAI Chat has + * this shape: `tool_calls[].index` is the stream key, and `id` / `name` may only + * appear on the first delta for that index. + */ +export const appendOrStart = ( + route: string, + tools: State, + key: K, + delta: { readonly id?: string; readonly name?: string; readonly text: string }, + missingToolMessage: string, +): AppendOutcome | LLMError => { + const current = tools[key] + const id = delta.id ?? current?.id + const name = delta.name ?? current?.name + if (!id || !name) return eventError(route, missingToolMessage) + + const tool = { + id, + name, + input: `${current?.input ?? ""}${delta.text}`, + providerExecuted: current?.providerExecuted, + providerMetadata: current?.providerMetadata, + } + if (current && delta.text.length === 0 && current.id === id && current.name === name) return { tools, tool: current } + return appendTool(tools, key, tool, delta.text) +} + +/** + * Append argument text to a tool that must already have been started. This keeps + * protocols honest when their stream grammar promises a start event before any + * argument delta. + */ +export const appendExisting = ( + route: string, + tools: State, + key: K, + text: string, + missingToolMessage: string, +): AppendOutcome | LLMError => { + const current = tools[key] + if (!current) return eventError(route, missingToolMessage) + if (text.length === 0) return { tools, tool: current } + return appendTool(tools, key, { ...current, input: `${current.input}${text}` }, text) +} + +/** + * Finalize one pending tool call: parse the accumulated raw JSON, remove it + * from state, and return the optional public `tool-call` event. Missing keys are + * a no-op because some providers emit stop events for non-tool content blocks. + */ +export const finish = (route: string, tools: State, key: K) => + Effect.gen(function* () { + const tool = tools[key] + if (!tool) return { tools } + return { tools: withoutTool(tools, key), event: yield* toolCall(route, tool) } + }) + +/** + * Finalize one pending tool call with an authoritative final input string. + * OpenAI Responses can send accumulated deltas and then repeat the completed + * arguments on `response.output_item.done`; the final value wins. + */ +export const finishWithInput = (route: string, tools: State, key: K, input: string) => + Effect.gen(function* () { + const tool = tools[key] + if (!tool) return { tools } + return { tools: withoutTool(tools, key), event: yield* toolCall(route, tool, input) } + }) + +/** + * Finalize every pending tool call at once. OpenAI Chat has this shape: it does + * not emit per-tool stop events, so all accumulated calls finish when the choice + * receives a terminal `finish_reason`. + */ +export const finishAll = (route: string, tools: State) => + Effect.gen(function* () { + const pending = Object.values(tools).filter( + (tool): tool is PendingTool => tool !== undefined, + ) + return { + tools: empty(), + events: yield* Effect.forEach(pending, (tool) => toolCall(route, tool)), + } + }) + +export * as ToolStream from "./tool-stream" diff --git a/packages/llm/src/provider.ts b/packages/llm/src/provider.ts new file mode 100644 index 000000000000..8299b5865ca0 --- /dev/null +++ b/packages/llm/src/provider.ts @@ -0,0 +1,31 @@ +import type { RouteModelInput } from "./route/client" +import type { ModelID, ModelRef, ProviderID } from "./schema" + +export type ModelOptions = Omit + +export type ModelFactory = ( + id: string | ModelID, + options?: Options, +) => ModelRef + +type AnyModelFactory = (...args: never[]) => ModelRef + +export interface Definition { + readonly id: ProviderID + readonly model: Factory + readonly apis?: Record +} + +type DefinitionShape = { + readonly id: ProviderID + readonly model: (...args: never[]) => ModelRef + readonly apis?: Record ModelRef> +} + +type NoExtraFields = Input & Record, never> + +export const make = ( + definition: NoExtraFields, +) => definition + +export * as Provider from "./provider" diff --git a/packages/llm/src/providers/amazon-bedrock.ts b/packages/llm/src/providers/amazon-bedrock.ts new file mode 100644 index 000000000000..82408d514e98 --- /dev/null +++ b/packages/llm/src/providers/amazon-bedrock.ts @@ -0,0 +1,48 @@ +import { Route, type RouteModelInput } from "../route/client" +import { Provider } from "../provider" +import { ProviderID, type ModelID } from "../schema" +import * as BedrockConverse from "../protocols/bedrock-converse" +import type { BedrockCredentials } from "../protocols/bedrock-converse" + +export const id = ProviderID.make("amazon-bedrock") + +export type ModelOptions = Omit & { + readonly apiKey?: string + readonly headers?: Record + readonly credentials?: BedrockCredentials + /** AWS region. Defaults to `us-east-1` when neither this nor `credentials.region` is set. */ + readonly region?: string + /** Override the computed `https://bedrock-runtime..amazonaws.com` URL. */ + readonly baseURL?: string +} +type ModelInput = ModelOptions & Pick + +export const routes = [BedrockConverse.route] + +const bedrockBaseURL = (region: string) => `https://bedrock-runtime.${region}.amazonaws.com` + +const converseModel = Route.model( + BedrockConverse.route, + { + provider: "amazon-bedrock", + }, + { + mapInput: (input) => { + const { credentials, region, baseURL, ...rest } = input + const resolvedRegion = region ?? credentials?.region ?? "us-east-1" + return { + ...rest, + baseURL: baseURL ?? bedrockBaseURL(resolvedRegion), + native: BedrockConverse.nativeCredentials(input.native, credentials), + } + }, + }, +) + +export const model = (modelID: string | ModelID, options: ModelOptions = {}) => + converseModel({ ...options, id: modelID }) + +export const provider = Provider.make({ + id, + model, +}) diff --git a/packages/llm/src/providers/anthropic.ts b/packages/llm/src/providers/anthropic.ts new file mode 100644 index 000000000000..a7ec7ede9fbd --- /dev/null +++ b/packages/llm/src/providers/anthropic.ts @@ -0,0 +1,16 @@ +import type { RouteModelInput } from "../route/client" +import { Provider } from "../provider" +import { ProviderID, type ModelID } from "../schema" +import * as AnthropicMessages from "../protocols/anthropic-messages" + +export const id = ProviderID.make("anthropic") + +export const routes = [AnthropicMessages.route] + +export const model = (id: string | ModelID, options: Omit & { readonly baseURL?: string } = {}) => + AnthropicMessages.model({ ...options, id }) + +export const provider = Provider.make({ + id, + model, +}) diff --git a/packages/llm/src/providers/azure.ts b/packages/llm/src/providers/azure.ts new file mode 100644 index 000000000000..8d60fb6669b5 --- /dev/null +++ b/packages/llm/src/providers/azure.ts @@ -0,0 +1,83 @@ +import { Auth } from "../route/auth" +import { type AtLeastOne, type ProviderAuthOption } from "../route/auth-options" +import { Route } from "../route/client" +import type { ModelInput } from "../llm" +import { Provider } from "../provider" +import { ProviderID, type ModelID } from "../schema" +import * as OpenAIChat from "../protocols/openai-chat" +import * as OpenAIResponses from "../protocols/openai-responses" +import { withOpenAIOptions, type OpenAIProviderOptionsInput } from "./openai-options" + +export const id = ProviderID.make("azure") +const routeAuth = Auth.remove("authorization").andThen(Auth.apiKeyHeader("api-key")) + +// Azure needs the customer's resource URL; supply either `resourceName` +// (helper builds the URL) or `baseURL` directly. +type AzureURL = AtLeastOne<{ readonly resourceName: string; readonly baseURL: string }> + +export type ModelOptions = AzureURL & + Omit & + ProviderAuthOption<"optional"> & { + readonly apiVersion?: string + readonly useCompletionUrls?: boolean + readonly providerOptions?: OpenAIProviderOptionsInput + } +type AzureModelInput = ModelOptions & Pick + +const resourceBaseURL = (resourceName: string) => `https://${resourceName.trim()}.openai.azure.com/openai/v1` + +const responsesRoute = OpenAIResponses.route.with({ + id: "azure-openai-responses", + provider: id, + transport: OpenAIResponses.httpTransport.with({ auth: routeAuth }), +}) + +const chatRoute = OpenAIChat.route.with({ + id: "azure-openai-chat", + provider: id, + transport: OpenAIChat.httpTransport.with({ auth: routeAuth }), +}) + +export const routes = [responsesRoute, chatRoute] + +const mapInput = (input: AzureModelInput) => { + const { apiKey: _, apiVersion, resourceName, useCompletionUrls, ...rest } = input + return { + ...withOpenAIOptions(input.id, rest), + auth: + "auth" in input && input.auth + ? input.auth + : Auth.remove("authorization").andThen( + Auth.optional("apiKey" in input ? input.apiKey : undefined, "apiKey") + .orElse(Auth.config("AZURE_OPENAI_API_KEY")) + .pipe(Auth.header("api-key")), + ), + // AtLeastOne guarantees at least one is set; baseURL wins if both are. + baseURL: rest.baseURL ?? resourceBaseURL(resourceName!), + queryParams: { + ...rest.queryParams, + "api-version": apiVersion ?? rest.queryParams?.["api-version"] ?? "v1", + }, + } +} + +const chatModel = Route.model(chatRoute, {}, { mapInput }) +const responsesModel = Route.model(responsesRoute, {}, { mapInput }) + +export const responses = (modelID: string | ModelID, options: ModelOptions) => + responsesModel({ ...options, id: modelID }) + +export const chat = (modelID: string | ModelID, options: ModelOptions) => chatModel({ ...options, id: modelID }) + +export const model = (modelID: string | ModelID, options: ModelOptions) => { + if (options.useCompletionUrls === true) return chat(modelID, options) + return responses(modelID, options) +} + +export const provider = Provider.make({ + id, + model, + apis: { responses, chat }, +}) + +export const apis = provider.apis diff --git a/packages/llm/src/providers/cloudflare.ts b/packages/llm/src/providers/cloudflare.ts new file mode 100644 index 000000000000..263595a75507 --- /dev/null +++ b/packages/llm/src/providers/cloudflare.ts @@ -0,0 +1,139 @@ +import type { Config, Redacted } from "effect" +import { type ModelInput } from "../llm" +import { Provider } from "../provider" +import * as OpenAICompatibleChat from "../protocols/openai-compatible-chat" +import { Auth } from "../route/auth" +import { AuthOptions, type AtLeastOne, type ProviderAuthOption } from "../route/auth-options" +import { Route } from "../route/client" +import { ProviderID, type ModelID } from "../schema" + +export const aiGatewayID = ProviderID.make("cloudflare-ai-gateway") +export const workersAIID = ProviderID.make("cloudflare-workers-ai") +export const id = aiGatewayID +export const aiGatewayAuthEnvVars = ["CLOUDFLARE_API_TOKEN", "CF_AIG_TOKEN"] as const +export const workersAIAuthEnvVars = ["CLOUDFLARE_API_KEY", "CLOUDFLARE_WORKERS_AI_TOKEN"] as const + +type CloudflareSecret = string | Redacted.Redacted | Config.Config> + +type GatewayURL = AtLeastOne<{ + readonly accountId: string + readonly baseURL: string +}> & { + readonly gatewayId?: string +} + +export type AIGatewayOptions = GatewayURL & + Omit & + ProviderAuthOption<"optional"> & { + /** Cloudflare AI Gateway authentication token. Sent as `cf-aig-authorization`. */ + readonly gatewayApiKey?: CloudflareSecret + } + +type AIGatewayInput = AIGatewayOptions & Pick + +type WorkersAIURL = AtLeastOne<{ + readonly accountId: string + readonly baseURL: string +}> + +export type WorkersAIOptions = WorkersAIURL & + Omit & + ProviderAuthOption<"optional"> + +type WorkersAIInput = WorkersAIOptions & Pick + +export const aiGatewayBaseURL = (input: GatewayURL) => { + if (input.baseURL) return input.baseURL + if (!input.accountId) throw new Error("Cloudflare.aiGateway requires accountId unless baseURL is supplied") + return `https://gateway.ai.cloudflare.com/v1/${encodeURIComponent(input.accountId)}/${encodeURIComponent(input.gatewayId?.trim() || "default")}/compat` +} + +const aiGatewayAuth = (input: AIGatewayInput) => { + if ("auth" in input && input.auth) return input.auth + const gateway = Auth.optional(input.gatewayApiKey, "gatewayApiKey") + .orElse(Auth.config("CLOUDFLARE_API_TOKEN")) + .orElse(Auth.config("CF_AIG_TOKEN")) + .pipe(Auth.bearerHeader("cf-aig-authorization")) + if (!("apiKey" in input) || input.apiKey === undefined) return gateway + if (input.gatewayApiKey === undefined) return Auth.bearer(input.apiKey) + return Auth.bearerHeader("cf-aig-authorization", input.gatewayApiKey).andThen(Auth.bearer(input.apiKey)) +} + +export const workersAIBaseURL = (input: WorkersAIURL) => { + if (input.baseURL) return input.baseURL + if (!input.accountId) throw new Error("Cloudflare.workersAI requires accountId unless baseURL is supplied") + return `https://api.cloudflare.com/client/v4/accounts/${encodeURIComponent(input.accountId)}/ai/v1` +} + +const workersAIAuth = (input: WorkersAIInput) => { + return AuthOptions.bearer(input, workersAIAuthEnvVars) +} + +export const aiGatewayRoute = OpenAICompatibleChat.route.with({ + id: "cloudflare-ai-gateway", + provider: aiGatewayID, +}) + +export const workersAIRoute = OpenAICompatibleChat.route.with({ + id: "cloudflare-workers-ai", + provider: workersAIID, +}) + +export const routes = [aiGatewayRoute, workersAIRoute] + +const aiGatewayModel = Route.model( + aiGatewayRoute, + { + provider: id, + }, + { + mapInput: (input) => { + const { + accountId: _accountId, + gatewayId: _gatewayId, + apiKey: _apiKey, + gatewayApiKey: _gatewayApiKey, + auth: _auth, + ...rest + } = input + return { + ...rest, + auth: aiGatewayAuth(input), + baseURL: aiGatewayBaseURL(input), + } + }, + }, +) + +const workersAIModel = Route.model( + workersAIRoute, + { + provider: workersAIID, + }, + { + mapInput: (input) => { + const { accountId: _accountId, apiKey: _apiKey, auth: _auth, ...rest } = input + return { + ...rest, + auth: workersAIAuth(input), + baseURL: workersAIBaseURL(input), + } + }, + }, +) + +export const aiGateway = (modelID: string | ModelID, options: AIGatewayOptions) => + aiGatewayModel({ ...options, id: modelID }) + +export const workersAI = (modelID: string | ModelID, options: WorkersAIOptions) => + workersAIModel({ ...options, id: modelID }) + +export const model = aiGateway + +export const provider = Provider.make({ + id, + model, + apis: { aiGateway, workersAI }, +}) + +export const apis = provider.apis diff --git a/packages/llm/src/providers/github-copilot.ts b/packages/llm/src/providers/github-copilot.ts new file mode 100644 index 000000000000..5de738a3bfc3 --- /dev/null +++ b/packages/llm/src/providers/github-copilot.ts @@ -0,0 +1,48 @@ +import { Route } from "../route/client" +import type { ModelInput } from "../llm" +import { Provider } from "../provider" +import { ProviderID, type ModelID } from "../schema" +import * as OpenAIChat from "../protocols/openai-chat" +import * as OpenAIResponses from "../protocols/openai-responses" +import { withOpenAIOptions, type OpenAIProviderOptionsInput } from "./openai-options" + +export const id = ProviderID.make("github-copilot") + +// GitHub Copilot has no canonical public URL — callers (opencode, etc.) must +// supply `baseURL` explicitly. +export type ModelOptions = Omit & { + readonly providerOptions?: OpenAIProviderOptionsInput +} +type CopilotModelInput = ModelOptions & Pick + +export const shouldUseResponsesApi = (modelID: string | ModelID) => { + const model = String(modelID) + const match = /^gpt-(\d+)/.exec(model) + if (!match) return false + return Number(match[1]) >= 5 && !model.startsWith("gpt-5-mini") +} + +export const routes = [OpenAIResponses.route, OpenAIChat.route] + +const mapInput = (input: CopilotModelInput) => withOpenAIOptions(input.id, input) + +const chatModel = Route.model(OpenAIChat.route, { provider: id }, { mapInput }) +const responsesModel = Route.model(OpenAIResponses.route, { provider: id }, { mapInput }) + +export const responses = (modelID: string | ModelID, options: ModelOptions) => + responsesModel({ ...options, id: modelID }) + +export const chat = (modelID: string | ModelID, options: ModelOptions) => chatModel({ ...options, id: modelID }) + +export const model = (modelID: string | ModelID, options: ModelOptions) => { + const create = shouldUseResponsesApi(modelID) ? responsesModel : chatModel + return create({ ...options, id: modelID }) +} + +export const provider = Provider.make({ + id, + model, + apis: { responses, chat }, +}) + +export const apis = provider.apis diff --git a/packages/llm/src/providers/google.ts b/packages/llm/src/providers/google.ts new file mode 100644 index 000000000000..d63439bfec0b --- /dev/null +++ b/packages/llm/src/providers/google.ts @@ -0,0 +1,16 @@ +import type { RouteModelInput } from "../route/client" +import { Provider } from "../provider" +import { ProviderID, type ModelID } from "../schema" +import * as Gemini from "../protocols/gemini" + +export const id = ProviderID.make("google") + +export const routes = [Gemini.route] + +export const model = (id: string | ModelID, options: Omit & { readonly baseURL?: string } = {}) => + Gemini.model({ ...options, id }) + +export const provider = Provider.make({ + id, + model, +}) diff --git a/packages/llm/src/providers/index.ts b/packages/llm/src/providers/index.ts new file mode 100644 index 000000000000..39adbe25c0cf --- /dev/null +++ b/packages/llm/src/providers/index.ts @@ -0,0 +1,10 @@ +export * as Anthropic from "./anthropic" +export * as AmazonBedrock from "./amazon-bedrock" +export * as Azure from "./azure" +export * as Cloudflare from "./cloudflare" +export * as GitHubCopilot from "./github-copilot" +export * as Google from "./google" +export * as OpenAI from "./openai" +export * as OpenAICompatible from "./openai-compatible" +export * as OpenRouter from "./openrouter" +export * as XAI from "./xai" diff --git a/packages/llm/src/providers/openai-compatible-profile.ts b/packages/llm/src/providers/openai-compatible-profile.ts new file mode 100644 index 000000000000..30770c9671cd --- /dev/null +++ b/packages/llm/src/providers/openai-compatible-profile.ts @@ -0,0 +1,20 @@ +export interface OpenAICompatibleProfile { + readonly provider: string + readonly baseURL: string +} + +export const profiles = { + baseten: { provider: "baseten", baseURL: "https://inference.baseten.co/v1" }, + cerebras: { provider: "cerebras", baseURL: "https://api.cerebras.ai/v1" }, + deepinfra: { provider: "deepinfra", baseURL: "https://api.deepinfra.com/v1/openai" }, + deepseek: { provider: "deepseek", baseURL: "https://api.deepseek.com/v1" }, + fireworks: { provider: "fireworks", baseURL: "https://api.fireworks.ai/inference/v1" }, + groq: { provider: "groq", baseURL: "https://api.groq.com/openai/v1" }, + openrouter: { provider: "openrouter", baseURL: "https://openrouter.ai/api/v1" }, + togetherai: { provider: "togetherai", baseURL: "https://api.together.xyz/v1" }, + xai: { provider: "xai", baseURL: "https://api.x.ai/v1" }, +} as const satisfies Record + +export const byProvider: Record = Object.fromEntries( + Object.values(profiles).map((profile) => [profile.provider, profile]), +) diff --git a/packages/llm/src/providers/openai-compatible.ts b/packages/llm/src/providers/openai-compatible.ts new file mode 100644 index 000000000000..e37dcb4adffd --- /dev/null +++ b/packages/llm/src/providers/openai-compatible.ts @@ -0,0 +1,61 @@ +import { Provider } from "../provider" +import { ProviderID, type ModelID } from "../schema" +import * as OpenAICompatibleChat from "../protocols/openai-compatible-chat" +import type { OpenAICompatibleChatModelInput } from "../protocols/openai-compatible-chat" +import { profiles, type OpenAICompatibleProfile } from "./openai-compatible-profile" + +export const id = ProviderID.make("openai-compatible") + +export type ModelOptions = Omit & { + readonly provider: string +} + +type GenericModelOptions = Omit & { + readonly provider?: string +} + +export type FamilyModelOptions = Omit & { + readonly baseURL?: string +} + +export const routes = [OpenAICompatibleChat.route] + +export const model = (id: string | ModelID, options: ModelOptions) => { + return OpenAICompatibleChat.model({ + ...options, + id, + provider: ProviderID.make(options.provider), + }) +} + +export const profileModel = ( + profile: OpenAICompatibleProfile, + id: string | ModelID, + options: FamilyModelOptions = {}, +) => + OpenAICompatibleChat.model({ + ...options, + id, + provider: profile.provider, + baseURL: options.baseURL ?? profile.baseURL, + }) + +const define = (profile: OpenAICompatibleProfile) => + Provider.make({ + id: ProviderID.make(profile.provider), + model: (id: string | ModelID, options: FamilyModelOptions = {}) => profileModel(profile, id, options), + }) + +export const provider = Provider.make({ + id, + model: (id: string | ModelID, options: GenericModelOptions) => + model(id, { ...options, provider: options.provider ?? "openai-compatible" }), +}) + +export const baseten = define(profiles.baseten) +export const cerebras = define(profiles.cerebras) +export const deepinfra = define(profiles.deepinfra) +export const deepseek = define(profiles.deepseek) +export const fireworks = define(profiles.fireworks) +export const groq = define(profiles.groq) +export const togetherai = define(profiles.togetherai) diff --git a/packages/llm/src/providers/openai-options.ts b/packages/llm/src/providers/openai-options.ts new file mode 100644 index 000000000000..8d3980f60995 --- /dev/null +++ b/packages/llm/src/providers/openai-options.ts @@ -0,0 +1,70 @@ +import type { ProviderOptions, ReasoningEffort, TextVerbosity } from "../schema" +import { mergeProviderOptions } from "../schema" + +export interface OpenAIOptionsInput { + readonly [key: string]: unknown + readonly store?: boolean + readonly promptCacheKey?: string + readonly reasoningEffort?: ReasoningEffort + readonly reasoningSummary?: "auto" + readonly includeEncryptedReasoning?: boolean + readonly textVerbosity?: TextVerbosity +} + +export type OpenAIProviderOptionsInput = ProviderOptions & { + readonly openai?: OpenAIOptionsInput +} + +const definedEntries = (input: Record) => + Object.entries(input).filter((entry) => entry[1] !== undefined) + +const openAIProviderOptions = (options: OpenAIOptionsInput | undefined): ProviderOptions | undefined => { + const openai = Object.fromEntries( + definedEntries({ + store: options?.store, + promptCacheKey: options?.promptCacheKey, + reasoningEffort: options?.reasoningEffort, + reasoningSummary: options?.reasoningSummary, + includeEncryptedReasoning: options?.includeEncryptedReasoning, + textVerbosity: options?.textVerbosity, + }), + ) + if (Object.keys(openai).length === 0) return undefined + return { openai } +} + +export const gpt5DefaultOptions = ( + modelID: string, + options: { readonly textVerbosity?: boolean } = {}, +): ProviderOptions | undefined => { + const id = modelID.toLowerCase() + if (!id.includes("gpt-5") || id.includes("gpt-5-chat") || id.includes("gpt-5-pro")) return undefined + return openAIProviderOptions({ + reasoningEffort: "medium", + reasoningSummary: "auto", + textVerbosity: + options.textVerbosity === true && id.includes("gpt-5.") && !id.includes("codex") && !id.includes("-chat") + ? "low" + : undefined, + }) +} + +export const openAIDefaultOptions = ( + modelID: string, + options: { readonly textVerbosity?: boolean } = {}, +): ProviderOptions | undefined => + mergeProviderOptions(openAIProviderOptions({ store: false }), gpt5DefaultOptions(modelID, options)) + +export const withOpenAIOptions = ( + modelID: string, + options: Options, + defaults: { readonly textVerbosity?: boolean } = {}, +): Options & { readonly id: string; readonly providerOptions?: ProviderOptions } => { + return { + ...options, + id: modelID, + providerOptions: mergeProviderOptions(openAIDefaultOptions(modelID, defaults), options.providerOptions), + } +} + +export * as OpenAIProviderOptions from "./openai-options" diff --git a/packages/llm/src/providers/openai.ts b/packages/llm/src/providers/openai.ts new file mode 100644 index 000000000000..cbd9b9952294 --- /dev/null +++ b/packages/llm/src/providers/openai.ts @@ -0,0 +1,53 @@ +import { AuthOptions, type ProviderAuthOption } from "../route/auth-options" +import type { RouteModelInput } from "../route/client" +import { Provider } from "../provider" +import { ProviderID, type ModelID } from "../schema" +import * as OpenAIChat from "../protocols/openai-chat" +import * as OpenAIResponses from "../protocols/openai-responses" +import { withOpenAIOptions, type OpenAIProviderOptionsInput } from "./openai-options" + +export type { OpenAIOptionsInput } from "./openai-options" + +export const id = ProviderID.make("openai") + +export const routes = [OpenAIResponses.route, OpenAIResponses.webSocketRoute, OpenAIChat.route] + +// This provider facade wraps the lower-level Responses and Chat model factories +// with OpenAI-specific conveniences: typed options, API-key sugar, env fallback, +// and default option normalization. +type OpenAIModelInput = Omit & + ProviderAuthOption<"optional"> & { + readonly baseURL?: string + readonly providerOptions?: OpenAIProviderOptionsInput + } + +const auth = (options: ProviderAuthOption<"optional">) => AuthOptions.bearer(options, "OPENAI_API_KEY") + +export const responses = (id: string | ModelID, options: OpenAIModelInput> = {}) => { + const { apiKey: _, ...rest } = options + return OpenAIResponses.model(withOpenAIOptions(id, { ...rest, auth: auth(options) }, { textVerbosity: true })) +} + +export const responsesWebSocket = ( + id: string | ModelID, + options: OpenAIModelInput> = {}, +) => { + const { apiKey: _, ...rest } = options + return OpenAIResponses.webSocketModel( + withOpenAIOptions(id, { ...rest, auth: auth(options) }, { textVerbosity: true }), + ) +} + +export const chat = (id: string | ModelID, options: OpenAIModelInput> = {}) => { + const { apiKey: _, ...rest } = options + return OpenAIChat.model(withOpenAIOptions(id, { ...rest, auth: auth(options) })) +} + +export const provider = Provider.make({ + id, + model: responses, + apis: { responses, responsesWebSocket, chat }, +}) + +export const model = provider.model +export const apis = provider.apis diff --git a/packages/llm/src/providers/openrouter.ts b/packages/llm/src/providers/openrouter.ts new file mode 100644 index 000000000000..4c1a4321061e --- /dev/null +++ b/packages/llm/src/providers/openrouter.ts @@ -0,0 +1,88 @@ +import { Effect, Schema } from "effect" +import { Route, type RouteModelInput } from "../route/client" +import { Endpoint } from "../route/endpoint" +import { Framing } from "../route/framing" +import { Provider } from "../provider" +import { Protocol } from "../route/protocol" +import { ProviderID, type ModelID, type ProviderOptions } from "../schema" +import * as OpenAICompatibleProfiles from "./openai-compatible-profile" +import * as OpenAIChat from "../protocols/openai-chat" +import { isRecord } from "../protocols/shared" + +export const profile = OpenAICompatibleProfiles.profiles.openrouter +export const id = ProviderID.make(profile.provider) +const ADAPTER = "openrouter" + +export interface OpenRouterOptions { + readonly [key: string]: unknown + readonly usage?: boolean | Record + readonly reasoning?: Record + readonly promptCacheKey?: string +} + +export type OpenRouterProviderOptionsInput = ProviderOptions & { + readonly openrouter?: OpenRouterOptions +} + +export type ModelOptions = Omit & { + readonly baseURL?: string + readonly providerOptions?: OpenRouterProviderOptionsInput +} +type ModelInput = ModelOptions & Pick + +const OpenRouterBody = Schema.StructWithRest(Schema.Struct(OpenAIChat.bodyFields), [ + Schema.Record(Schema.String, Schema.Any), +]) +export type OpenRouterBody = Schema.Schema.Type + +export const protocol = Protocol.make({ + id: "openrouter-chat", + body: { + schema: OpenRouterBody, + from: (request) => + OpenAIChat.protocol.body.from(request).pipe( + Effect.map( + (body) => + ({ + ...body, + ...bodyOptions(request.providerOptions?.openrouter), + }) as OpenRouterBody, + ), + ), + }, + stream: OpenAIChat.protocol.stream, +}) + +const bodyOptions = (input: unknown) => { + const openrouter = isRecord(input) ? input : {} + return { + ...(openrouter.usage === true + ? { usage: { include: true } } + : isRecord(openrouter.usage) + ? { usage: openrouter.usage } + : {}), + ...(isRecord(openrouter.reasoning) ? { reasoning: openrouter.reasoning } : {}), + ...(typeof openrouter.promptCacheKey === "string" ? { prompt_cache_key: openrouter.promptCacheKey } : {}), + } +} + +export const route = Route.make({ + id: ADAPTER, + protocol, + endpoint: Endpoint.path("/chat/completions"), + framing: Framing.sse, +}) + +export const routes = [route] + +const modelRef = Route.model(route, { + provider: profile.provider, + baseURL: profile.baseURL, +}) + +export const model = (id: string | ModelID, options: ModelOptions = {}) => modelRef({ ...options, id }) + +export const provider = Provider.make({ + id, + model, +}) diff --git a/packages/llm/src/providers/xai.ts b/packages/llm/src/providers/xai.ts new file mode 100644 index 000000000000..089c8c7339a7 --- /dev/null +++ b/packages/llm/src/providers/xai.ts @@ -0,0 +1,52 @@ +import { AuthOptions, type ProviderAuthOption } from "../route/auth-options" +import { Route } from "../route/client" +import type { RouteModelInput } from "../route/client" +import { Provider } from "../provider" +import { ProviderID, type ModelID } from "../schema" +import * as OpenAICompatibleProfiles from "./openai-compatible-profile" +import * as OpenAICompatibleChat from "../protocols/openai-compatible-chat" +import * as OpenAIResponses from "../protocols/openai-responses" + +export const id = ProviderID.make("xai") + +export type ModelOptions = Omit & + ProviderAuthOption<"optional"> & { + readonly baseURL?: string + } + +export const routes = [OpenAIResponses.route, OpenAICompatibleChat.route] + +const responsesModel = Route.model(OpenAIResponses.route, { provider: id }) +const chatModel = OpenAICompatibleChat.model + +const auth = (options: ProviderAuthOption<"optional">) => AuthOptions.bearer(options, "XAI_API_KEY") + +export const responses = (modelID: string | ModelID, options: ModelOptions = {}) => { + const { apiKey: _, ...rest } = options + return responsesModel({ + ...rest, + auth: auth(options), + id: modelID, + baseURL: options.baseURL ?? OpenAICompatibleProfiles.profiles.xai.baseURL, + }) +} + +export const chat = (modelID: string | ModelID, options: ModelOptions = {}) => { + const { apiKey: _, ...rest } = options + return chatModel({ + ...rest, + auth: auth(options), + id: modelID, + provider: id, + baseURL: options.baseURL ?? OpenAICompatibleProfiles.profiles.xai.baseURL, + }) +} + +export const provider = Provider.make({ + id, + model: responses, + apis: { responses, chat }, +}) + +export const model = provider.model +export const apis = provider.apis diff --git a/packages/llm/src/route/auth-options.ts b/packages/llm/src/route/auth-options.ts new file mode 100644 index 000000000000..7e40aa12a210 --- /dev/null +++ b/packages/llm/src/route/auth-options.ts @@ -0,0 +1,57 @@ +import type { Config, Redacted } from "effect" +import { Auth } from "./auth" + +export type ApiKeyMode = "optional" | "required" + +export type AuthOverride = { + readonly auth: Auth + readonly apiKey?: never +} + +export type OptionalApiKeyAuth = { + readonly apiKey?: string | Redacted.Redacted | Config.Config> + readonly auth?: never +} + +export type RequiredApiKeyAuth = { + readonly apiKey: string | Redacted.Redacted | Config.Config> + readonly auth?: never +} + +export type ProviderAuthOption = + | AuthOverride + | (Mode extends "optional" ? OptionalApiKeyAuth : RequiredApiKeyAuth) + +export type ModelOptions = Omit & ProviderAuthOption + +export type ModelArgs = Mode extends "optional" + ? readonly [options?: ModelOptions] + : readonly [options: ModelOptions] + +export type ModelFactory = (id: string, ...args: ModelArgs) => Model + +/** + * Require at least one of the keys in `T`. Use for option shapes where any + * subset of fields is acceptable but at least one must be present (e.g. Azure + * accepts `resourceName` or `baseURL`). + */ +export type AtLeastOne = { + [K in keyof T]: Required> & Partial> +}[keyof T] + +/** + * Standard bearer-auth resolution for providers: honor an explicit `auth` + * override, otherwise resolve `apiKey` (option > config var) and apply it as + * a bearer token. + */ +export const bearer = (options: ProviderAuthOption<"optional">, envVar: string | ReadonlyArray): Auth => { + if ("auth" in options && options.auth) return options.auth + return (Array.isArray(envVar) ? envVar : [envVar]) + .reduce( + (auth, name) => auth.orElse(Auth.config(name)), + Auth.optional("apiKey" in options ? options.apiKey : undefined, "apiKey"), + ) + .bearer() +} + +export * as AuthOptions from "./auth-options" diff --git a/packages/llm/src/route/auth.ts b/packages/llm/src/route/auth.ts new file mode 100644 index 000000000000..540c2845f208 --- /dev/null +++ b/packages/llm/src/route/auth.ts @@ -0,0 +1,196 @@ +import { Config, Effect, Redacted } from "effect" +import { Headers } from "effect/unstable/http" +import { AuthenticationReason, InvalidRequestReason, LLMError, type LLMRequest } from "../schema" + +export class MissingCredentialError extends Error { + readonly _tag = "MissingCredentialError" + + constructor(readonly source: string) { + super(`Missing auth credential: ${source}`) + } +} + +export type CredentialError = MissingCredentialError | Config.ConfigError +export type AuthError = CredentialError | LLMError + +export interface AuthInput { + readonly request: LLMRequest + readonly method: "POST" | "GET" + readonly url: string + readonly body: string + readonly headers: Headers.Headers +} + +export interface Credential { + readonly load: Effect.Effect, CredentialError> + readonly orElse: (that: Credential) => Credential + readonly bearer: () => Auth + readonly header: (name: string) => Auth + readonly pipe: (f: (self: Credential) => A) => A +} + +export interface Auth { + readonly apply: (input: AuthInput) => Effect.Effect + readonly andThen: (that: Auth) => Auth + readonly orElse: (that: Auth) => Auth + readonly pipe: (f: (self: Auth) => A) => A +} + +export const isAuth = (input: unknown): input is Auth => + typeof input === "object" && input !== null && "apply" in input && typeof input.apply === "function" + +const credential = (load: Effect.Effect, CredentialError>): Credential => { + const self: Credential = { + load, + orElse: (that) => credential(load.pipe(Effect.catch(() => that.load))), + bearer: () => fromCredential(self, (secret) => ({ authorization: `Bearer ${secret}` })), + header: (name) => fromCredential(self, (secret) => ({ [name]: secret })), + pipe: (f) => f(self), + } + return self +} + +const auth = (apply: Auth["apply"]): Auth => { + const self: Auth = { + apply, + andThen: (that) => + auth((input) => apply(input).pipe(Effect.flatMap((headers) => that.apply({ ...input, headers })))), + orElse: (that) => auth((input) => apply(input).pipe(Effect.catch(() => that.apply(input)))), + pipe: (f) => f(self), + } + return self +} + +const fromCredential = (source: Credential, render: (secret: string) => Headers.Input) => + auth((input) => + source.load.pipe(Effect.map((secret) => Headers.setAll(input.headers, render(Redacted.value(secret))))), + ) + +const secretEffect = (secret: string | Redacted.Redacted, source: string) => { + const redacted = typeof secret === "string" ? Redacted.make(secret) : secret + if (Redacted.value(redacted) === "") return Effect.fail(new MissingCredentialError(source)) + return Effect.succeed(redacted) +} + +const credentialFromSecret = ( + secret: string | Redacted.Redacted | Config.Config>, + source: string, +) => { + if (typeof secret === "string" || Redacted.isRedacted(secret)) return credential(secretEffect(secret, source)) + return credential( + Effect.gen(function* () { + return yield* secretEffect(yield* secret, source) + }), + ) +} + +export const value = (secret: string, source = "value") => credentialFromSecret(secret, source) + +export const optional = ( + secret: string | Redacted.Redacted | Config.Config> | undefined, + source = "optional value", +) => + secret === undefined + ? credential(Effect.fail(new MissingCredentialError(source))) + : credentialFromSecret(secret, source) + +export const config = (name: string) => credentialFromSecret(Config.redacted(name), name) + +export const effect = (load: Effect.Effect, CredentialError>) => credential(load) + +export const none = auth((input) => Effect.succeed(input.headers)) + +export const headers = (input: Headers.Input) => + auth((inputAuth) => Effect.succeed(Headers.setAll(inputAuth.headers, input))) + +export const remove = (name: string) => auth((input) => Effect.succeed(Headers.remove(input.headers, name))) + +export const custom = (apply: (input: AuthInput) => Effect.Effect) => auth(apply) + +export const passthrough = none + +const fromModelApiKey = (from: (apiKey: string) => Headers.Input) => + auth(({ request, headers }) => { + const key = request.model.apiKey + if (!key) return Effect.succeed(headers) + return Effect.succeed(Headers.setAll(headers, from(key))) + }) + +const credentialInput = ( + source: string | Redacted.Redacted | Config.Config> | Credential, +) => + typeof source === "string" || Redacted.isRedacted(source) || Config.isConfig(source) + ? credentialFromSecret(source, "value") + : source + +export function bearer(): Auth +export function bearer( + source: string | Redacted.Redacted | Config.Config> | Credential, +): Auth +export function bearer( + source?: string | Redacted.Redacted | Config.Config> | Credential, +) { + if (source === undefined) return fromModelApiKey((key) => ({ authorization: `Bearer ${key}` })) + return credentialInput(source).bearer() +} + +export const apiKey = bearer + +export const apiKeyHeader = (name: string) => fromModelApiKey((key) => ({ [name]: key })) + +export function header( + name: string, +): (source: string | Redacted.Redacted | Config.Config> | Credential) => Auth +export function header( + name: string, + source: string | Redacted.Redacted | Config.Config> | Credential, +): Auth +export function header( + name: string, + source?: string | Redacted.Redacted | Config.Config> | Credential, +) { + if (source === undefined) { + return ( + next: string | Redacted.Redacted | Config.Config> | Credential, + ) => credentialInput(next).header(name) + } + return credentialInput(source).header(name) +} + +export function bearerHeader( + name: string, +): (source: string | Redacted.Redacted | Config.Config> | Credential) => Auth +export function bearerHeader( + name: string, + source: string | Redacted.Redacted | Config.Config> | Credential, +): Auth +export function bearerHeader( + name: string, + source?: string | Redacted.Redacted | Config.Config> | Credential, +) { + const render = (input: string | Redacted.Redacted | Config.Config> | Credential) => + fromCredential(credentialInput(input), (secret) => ({ [name]: `Bearer ${secret}` })) + if (source === undefined) return render + return render(source) +} + +const toLLMError = (error: AuthError): LLMError => { + if (error instanceof MissingCredentialError || error instanceof Config.ConfigError) { + return new LLMError({ + module: "Auth", + method: "apply", + reason: + error instanceof MissingCredentialError + ? new AuthenticationReason({ message: error.message, kind: "missing" }) + : new InvalidRequestReason({ message: `Failed to resolve auth config: ${error.message}` }), + }) + } + return error +} + +export const toEffect = + (input: Auth) => + (authInput: AuthInput): Effect.Effect => + input.apply(authInput).pipe(Effect.mapError(toLLMError)) + +export * as Auth from "./auth" diff --git a/packages/llm/src/route/client.ts b/packages/llm/src/route/client.ts new file mode 100644 index 000000000000..0b9d92cecb59 --- /dev/null +++ b/packages/llm/src/route/client.ts @@ -0,0 +1,528 @@ +import { Cause, Context, Effect, Layer, Schema, Stream } from "effect" +import type { Auth as AuthDef } from "./auth" +import type { Endpoint } from "./endpoint" +import { RequestExecutor } from "./executor" +import type { Framing } from "./framing" +import { HttpTransport } from "./transport" +import type { Transport, TransportRuntime } from "./transport" +import { WebSocketExecutor } from "./transport" +import type { Service as WebSocketExecutorService } from "./transport/websocket" +import type { Protocol } from "./protocol" +import * as ProviderShared from "../protocols/shared" +import * as ToolRuntime from "../tool-runtime" +import type { Tools } from "../tool" +import type { LLMError, LLMEvent, PreparedRequestOf, ProtocolID } from "../schema" +import { + GenerationOptions, + HttpOptions, + LLMRequest, + LLMResponse, + ModelID, + ModelLimits, + ModelRef, + LLMError as LLMErrorClass, + NoRouteReason, + PreparedRequest, + ProviderID, + RouteID, + mergeGenerationOptions, + mergeHttpOptions, + mergeProviderOptions, +} from "../schema" + +export interface RouteBody { + /** Schema for the validated provider-native body sent as the JSON request. */ + readonly schema: Schema.Codec + /** Build the provider-native body from a common `LLMRequest`. */ + readonly from: (request: LLMRequest) => Effect.Effect +} + +export interface Route { + readonly id: string + readonly provider?: ProviderID + readonly protocol: ProtocolID + readonly transport: Transport + readonly defaults: RouteDefaults + readonly body: RouteBody + readonly with: (patch: RoutePatch) => Route + readonly model: (input: Input) => ModelRef + readonly prepareTransport: (body: Body, request: LLMRequest) => Effect.Effect + readonly streamPrepared: ( + prepared: Prepared, + request: LLMRequest, + runtime: TransportRuntime, + ) => Stream.Stream +} + +// Route registries intentionally erase body generics after construction. +// Normal call sites use `OpenAIChat.route`; callers only need body types +// when preparing a request with a protocol-specific type assertion. +// oxlint-disable-next-line typescript-eslint/no-explicit-any +export type AnyRoute = Route + +const routeRegistry = new Map() + +// Route lookup is intentionally global: model refs name a route id, and +// importing the provider/protocol/custom-route module registers the runnable +// implementation. Duplicate ids are bugs because model refs cannot disambiguate +// them. +const register = (route: R): R => { + const existing = routeRegistry.get(route.id) + if (existing && existing !== route) throw new Error(`Duplicate LLM route id "${route.id}"`) + routeRegistry.set(route.id, route) + return route +} + +const registeredRoute = (id: string) => routeRegistry.get(id) + +export type HttpOptionsInput = HttpOptions.Input + +export type ModelRefInput = Omit< + ConstructorParameters[0], + "id" | "provider" | "route" | "limits" | "generation" | "http" | "auth" +> & { + readonly id: string | ModelID + readonly provider: string | ProviderID + readonly route: string | RouteID + readonly auth?: AuthDef + readonly limits?: ModelLimits.Input + readonly generation?: GenerationOptions.Input + readonly http?: HttpOptionsInput +} + +// `baseURL` is required on `ModelRefInput` (every materialized `ModelRef` has +// a host) but optional at the route-input layers below. The route's `defaults` +// can supply a canonical URL (e.g. OpenAI/Anthropic) so the user's input may +// omit it. Routes without a canonical URL (OpenAI-compatible, GitHub Copilot) +// re-tighten this in their own input type. +export type RouteModelInput = Omit & { + readonly baseURL?: string +} + +export type RouteModelDefaults = Omit & { + readonly baseURL?: string +} + +export type RouteRoutedModelInput = Omit & { + readonly baseURL?: string +} + +export type RouteRoutedModelDefaults = Partial> + +export type RouteDefaults = Partial> + +export interface RoutePatch extends RouteDefaults { + readonly id: string + readonly provider?: string | ProviderID + readonly transport?: Transport +} + +type RouteMappedModelInput = RouteModelInput | RouteRoutedModelInput + +export interface RouteModelOptions< + Input extends RouteMappedModelInput, + Output extends RouteMappedModelInput = RouteMappedModelInput, +> { + readonly mapInput?: (input: Input) => Output +} + +export interface RouteMappedModelOptions { + readonly mapInput: (input: Input) => Output +} + +const modelWithDefaults = + ( + route: AnyRoute, + defaults: Partial>, + options: { readonly mapInput?: (input: Input) => RouteMappedModelInput }, + ) => + (input: Input) => { + const mapped = options.mapInput === undefined ? (input as RouteMappedModelInput) : options.mapInput(input) + const provider = defaults.provider ?? route.provider ?? ("provider" in mapped ? mapped.provider : undefined) + if (!provider) throw new Error(`Route.model(${route.id}) requires a provider`) + const baseURL = mapped.baseURL ?? defaults.baseURL ?? route.defaults.baseURL + if (!baseURL) + throw new Error( + `Route.model(${route.id}) requires a baseURL — supply it via input, defaults, or route defaults`, + ) + const generation = mergeGenerationOptions(route.defaults.generation, defaults.generation) + const providerOptions = mergeProviderOptions(route.defaults.providerOptions, defaults.providerOptions) + const http = mergeHttpOptions(httpOptions(route.defaults.http), httpOptions(defaults.http)) + return modelRef({ + ...route.defaults, + ...defaults, + ...mapped, + baseURL, + provider, + route: route.id, + limits: mapped.limits ?? defaults.limits ?? route.defaults.limits, + generation: mergeGenerationOptions(generation, mapped.generation), + providerOptions: mergeProviderOptions(providerOptions, mapped.providerOptions), + http: mergeHttpOptions(http, httpOptions(mapped.http)), + }) + } + +const mergeRouteDefaults = (base: RouteDefaults | undefined, patch: RouteDefaults): RouteDefaults => ({ + ...base, + ...patch, + limits: patch.limits ?? base?.limits, + generation: mergeGenerationOptions(generationOptions(base?.generation), generationOptions(patch.generation)), + providerOptions: mergeProviderOptions(base?.providerOptions, patch.providerOptions), + http: mergeHttpOptions(httpOptions(base?.http), httpOptions(patch.http)), +}) + +export const modelLimits = ModelLimits.make + +export const generationOptions = (input: GenerationOptions.Input | undefined) => + input === undefined ? undefined : GenerationOptions.make(input) + +export const httpOptions = (input: HttpOptionsInput | undefined) => { + if (input === undefined) return input + return HttpOptions.make(input) +} + +export const modelRef = (input: ModelRefInput) => + new ModelRef({ + ...input, + id: ModelID.make(input.id), + provider: ProviderID.make(input.provider), + route: RouteID.make(input.route), + limits: modelLimits(input.limits), + generation: generationOptions(input.generation), + http: httpOptions(input.http), + }) + +function model( + route: AnyRoute, + defaults: RouteModelDefaults, + options?: RouteModelOptions, +): (input: Input) => ModelRef +function model( + route: AnyRoute, + defaults?: RouteRoutedModelDefaults, + options?: RouteModelOptions, +): (input: Input) => ModelRef +function model( + route: AnyRoute, + defaults: Partial>, + options: RouteMappedModelOptions, +): (input: Input) => ModelRef +function model( + route: AnyRoute, + defaults: Partial> = {}, + options: { readonly mapInput?: (input: Input) => RouteMappedModelInput } = {}, +) { + return modelWithDefaults(route, defaults, options) +} + +export interface Interface { + /** + * Compile a request through protocol body construction, validation, and HTTP + * preparation without sending it. Returns the prepared request including the + * provider-native body. + * + * Pass a `Body` type argument to statically expose the route's body + * shape (e.g. `prepare(...)`) — the runtime body is + * identical, so this is a type-level assertion the caller makes about which + * route the request will resolve to. + */ + readonly prepare: (request: LLMRequest) => Effect.Effect, LLMError> + readonly stream: StreamMethod + readonly generate: GenerateMethod +} + +export interface StreamMethod { + (request: LLMRequest): Stream.Stream + (options: ToolRuntime.RunOptions): Stream.Stream +} + +export interface GenerateMethod { + (request: LLMRequest): Effect.Effect + (options: ToolRuntime.RunOptions): Effect.Effect +} + +export class Service extends Context.Service()("@opencode/LLMClient") {} + +const noRoute = (model: ModelRef) => + new LLMErrorClass({ + module: "LLMClient", + method: "resolveRoute", + reason: new NoRouteReason({ route: model.route, provider: model.provider, model: model.id }), + }) + +const resolveRequestOptions = (request: LLMRequest) => + LLMRequest.update(request, { + generation: mergeGenerationOptions(request.model.generation, request.generation) ?? new GenerationOptions({}), + providerOptions: mergeProviderOptions(request.model.providerOptions, request.providerOptions), + http: mergeHttpOptions(request.model.http, request.http), + }) + +export interface MakeInput { + /** Route id used in registry lookup and error messages. */ + readonly id: string + /** Provider identity for route-owned model construction. */ + readonly provider?: string | ProviderID + /** Semantic API contract — owns body construction, body schema, and parsing. */ + readonly protocol: Protocol + /** Where the request is sent. */ + readonly endpoint: Endpoint + /** Per-request transport auth. Model-level `Auth` overrides this. */ + readonly auth?: AuthDef + /** Stream framing — bytes -> frames before `protocol.stream.event` decoding. */ + readonly framing: Framing + /** Static / per-request headers added before `auth` runs. */ + readonly headers?: (input: { readonly request: LLMRequest }) => Record + /** Model defaults used by the route's `.model(...)` helper. */ + readonly defaults?: RouteDefaults +} + +export interface MakeTransportInput { + /** Route id used in registry lookup and error messages. */ + readonly id: string + /** Provider identity for route-owned model construction. */ + readonly provider?: string | ProviderID + /** Semantic API contract — owns body construction, body schema, and parsing. */ + readonly protocol: Protocol + /** Runnable transport route. */ + readonly transport: Transport + /** Provider/model defaults used by the route's `.model(...)` helper. */ + readonly defaults?: RouteDefaults +} + +const streamError = (route: string, message: string, cause: Cause.Cause) => { + const failed = cause.reasons.find(Cause.isFailReason)?.error + if (failed instanceof LLMErrorClass) return failed + return ProviderShared.eventError(route, message, Cause.pretty(cause)) +} + +function makeFromTransport( + input: MakeTransportInput, +): Route { + const protocol = input.protocol + const decodeEventEffect = Schema.decodeUnknownEffect(protocol.stream.event) + const decodeEvent = (route: string) => (frame: Frame) => + decodeEventEffect(frame).pipe( + Effect.mapError(() => + ProviderShared.eventError( + input.id, + `Invalid ${route} stream event`, + typeof frame === "string" ? frame : ProviderShared.encodeJson(frame), + ), + ), + ) + + const build = (routeInput: MakeTransportInput): Route => { + const route: Route = { + id: routeInput.id, + provider: routeInput.provider === undefined ? undefined : ProviderID.make(routeInput.provider), + protocol: protocol.id, + transport: routeInput.transport, + defaults: routeInput.defaults ?? {}, + body: protocol.body, + with: (patch: RoutePatch) => { + const { id, provider, transport, ...defaults } = patch + if (!id || id === routeInput.id) throw new Error(`Route.with(${routeInput.id}) requires a new route id`) + return build({ + ...routeInput, + id, + provider: provider ?? routeInput.provider, + transport: (transport as Transport | undefined) ?? routeInput.transport, + defaults: mergeRouteDefaults(routeInput.defaults, defaults), + }) + }, + model: (input: RouteModelInput): ModelRef => modelWithDefaults(route, {}, {})(input), + prepareTransport: routeInput.transport.prepare, + streamPrepared: (prepared: Prepared, request: LLMRequest, runtime: TransportRuntime) => { + const route = `${request.model.provider}/${request.model.route}` + const events = routeInput.transport + .frames(prepared, request, runtime) + .pipe( + Stream.mapEffect(decodeEvent(route)), + protocol.stream.terminal ? Stream.takeUntil(protocol.stream.terminal) : (stream) => stream, + ) + return events.pipe( + Stream.mapAccumEffect( + protocol.stream.initial, + protocol.stream.step, + protocol.stream.onHalt ? { onHalt: protocol.stream.onHalt } : undefined, + ), + Stream.catchCause((cause) => Stream.fail(streamError(route, `Failed to read ${route} stream`, cause))), + ) + }, + } satisfies Route + return register(route) + } + + return build(input) +} + +export function make( + input: MakeTransportInput, +): Route +/** + * Build a `Route` by composing the four orthogonal pieces of a deployment: + * + * - `Protocol` — what is the API I'm speaking? + * - `Endpoint` — where do I send the request? + * - `Auth` — how do I authenticate it? + * - `Framing` — how do I cut the response stream into protocol frames? + * + * Plus optional `headers` for cross-cutting deployment concerns (provider + * version pins, per-deployment quirks). + * + * This is the canonical route constructor. If a new route does not fit + * this four-axis model, add a purpose-built constructor rather than widening + * the public surface preemptively. + */ +export function make( + input: MakeInput, +): Route> +export function make( + input: MakeInput | MakeTransportInput, +): Route | Route> { + if ("transport" in input) return makeFromTransport(input) + const protocol = input.protocol + const encodeBody = Schema.encodeSync(Schema.fromJsonString(protocol.body.schema)) + return makeFromTransport({ + id: input.id, + provider: input.provider, + protocol, + transport: HttpTransport.httpJson({ + endpoint: input.endpoint, + auth: input.auth, + framing: input.framing, + encodeBody, + headers: input.headers, + }), + defaults: input.defaults, + }) +} + +// `compile` is the important boundary: it turns a common `LLMRequest` into a +// validated provider body plus transport-private prepared data, but does not +// execute transport. +const compile = Effect.fn("LLM.compile")(function* (request: LLMRequest) { + const resolved = resolveRequestOptions(request) + const route = registeredRoute(resolved.model.route) + if (!route) return yield* noRoute(resolved.model) + + const body = yield* route.body + .from(resolved) + .pipe(Effect.flatMap(ProviderShared.validateWith(Schema.decodeUnknownEffect(route.body.schema)))) + const prepared = yield* route.prepareTransport(body, resolved) + + return { + request: resolved, + route, + body, + prepared, + } +}) + +const prepareWith = Effect.fn("LLMClient.prepare")(function* (request: LLMRequest) { + const compiled = yield* compile(request) + + return new PreparedRequest({ + id: compiled.request.id ?? "request", + route: compiled.route.id, + protocol: compiled.route.protocol, + model: compiled.request.model, + body: compiled.body, + metadata: { transport: compiled.route.transport.id }, + }) +}) + +const streamRequestWith = (runtime: TransportRuntime) => (request: LLMRequest) => + Stream.unwrap( + Effect.gen(function* () { + const compiled = yield* compile(request) + return compiled.route.streamPrepared(compiled.prepared, compiled.request, runtime) + }), + ) + +const isToolRunOptions = (input: LLMRequest | ToolRuntime.RunOptions): input is ToolRuntime.RunOptions => + "request" in input && "tools" in input + +const streamWith = (streamRequest: (request: LLMRequest) => Stream.Stream): StreamMethod => + ((input: LLMRequest | ToolRuntime.RunOptions) => { + if (isToolRunOptions(input)) return ToolRuntime.stream({ ...input, stream: streamRequest }) + return streamRequest(input) + }) as StreamMethod + +const generateWith = (stream: Interface["stream"]) => + Effect.fn("LLM.generate")(function* (input: LLMRequest | ToolRuntime.RunOptions) { + return new LLMResponse( + yield* stream(input as never).pipe( + Stream.runFold( + () => ({ events: [] as LLMEvent[], usage: undefined as LLMResponse["usage"] }), + (acc, event) => { + acc.events.push(event) + if ("usage" in event && event.usage !== undefined) acc.usage = event.usage + return acc + }, + ), + ), + ) + }) + +export const prepare = (request: LLMRequest) => + prepareWith(request) as Effect.Effect, LLMError> + +export function stream(request: LLMRequest): Stream.Stream +export function stream(options: ToolRuntime.RunOptions): Stream.Stream +export function stream(input: LLMRequest | ToolRuntime.RunOptions) { + return Stream.unwrap( + Effect.gen(function* () { + return (yield* Service).stream(input as never) + }), + ) +} + +export function generate(request: LLMRequest): Effect.Effect +export function generate(options: ToolRuntime.RunOptions): Effect.Effect +export function generate(input: LLMRequest | ToolRuntime.RunOptions) { + return Effect.gen(function* () { + return yield* (yield* Service).generate(input as never) + }) +} + +export const streamRequest = (request: LLMRequest) => + Stream.unwrap( + Effect.gen(function* () { + return (yield* Service).stream(request) + }), + ) + +export const layer: Layer.Layer = Layer.effect( + Service, + Effect.gen(function* () { + const stream = streamWith(streamRequestWith({ http: yield* RequestExecutor.Service })) + return Service.of({ prepare: prepareWith as Interface["prepare"], stream, generate: generateWith(stream) }) + }), +) + +export const layerWithWebSocket: Layer.Layer = + Layer.effect( + Service, + Effect.gen(function* () { + const stream = streamWith( + streamRequestWith({ + http: yield* RequestExecutor.Service, + webSocket: yield* WebSocketExecutor.Service, + }), + ) + return Service.of({ prepare: prepareWith as Interface["prepare"], stream, generate: generateWith(stream) }) + }), + ) + +export const Route = { make, model } as const + +export const LLMClient = { + Service, + layer, + layerWithWebSocket, + prepare, + stream, + generate, + stepCountIs: ToolRuntime.stepCountIs, +} as const diff --git a/packages/llm/src/route/endpoint.ts b/packages/llm/src/route/endpoint.ts new file mode 100644 index 000000000000..71d551893d97 --- /dev/null +++ b/packages/llm/src/route/endpoint.ts @@ -0,0 +1,41 @@ +import type { LLMRequest } from "../schema" +import * as ProviderShared from "../protocols/shared" + +export interface EndpointInput { + readonly request: LLMRequest + readonly body: Body +} + +export type EndpointPart = string | ((input: EndpointInput) => string) + +/** + * Declarative URL construction for one route. + * + * `Endpoint` carries only the path. The host always lives on `model.baseURL`, + * supplied by the provider helper that constructs the model. `render(...)` + * just appends the path (and any `model.queryParams`) to that host. + * + * `path` may be a string or a function of `EndpointInput`, for routes whose + * URL embeds the model id, region, or another body field (e.g. Bedrock, + * Gemini). + */ +export interface Endpoint { + readonly path: EndpointPart +} + +/** Construct an `Endpoint` from a path string or path function. */ +export const path = (value: EndpointPart): Endpoint => ({ path: value }) + +const renderPart = (part: EndpointPart, input: EndpointInput) => + typeof part === "function" ? part(input) : part + +export const render = (endpoint: Endpoint, input: EndpointInput) => { + const url = new URL( + `${ProviderShared.trimBaseUrl(input.request.model.baseURL)}${renderPart(endpoint.path, input)}`, + ) + const params = input.request.model.queryParams + if (params) for (const [key, value] of Object.entries(params)) url.searchParams.set(key, value) + return url +} + +export * as Endpoint from "./endpoint" diff --git a/packages/llm/src/route/executor.ts b/packages/llm/src/route/executor.ts new file mode 100644 index 000000000000..815b2c289c8a --- /dev/null +++ b/packages/llm/src/route/executor.ts @@ -0,0 +1,374 @@ +import { Cause, Context, Effect, Layer, Random } from "effect" +import { + FetchHttpClient, + Headers, + HttpClient, + HttpClientError, + HttpClientRequest, + HttpClientResponse, +} from "effect/unstable/http" +import { + AuthenticationReason, + ContentPolicyReason, + HttpContext, + HttpRateLimitDetails, + HttpRequestDetails, + HttpResponseDetails, + InvalidRequestReason, + LLMError, + ProviderInternalReason, + QuotaExceededReason, + RateLimitReason, + TransportReason, + UnknownProviderReason, +} from "../schema" + +export interface Interface { + readonly execute: ( + request: HttpClientRequest.HttpClientRequest, + ) => Effect.Effect +} + +export class Service extends Context.Service()("@opencode/LLM/RequestExecutor") {} + +const BODY_LIMIT = 16_384 +const MAX_RETRIES = 2 +const BASE_DELAY_MS = 500 +const MAX_DELAY_MS = 10_000 +const REDACTED = "" + +// One source of truth for what counts as a sensitive name across headers, +// URL query keys, and field names embedded inside request/response bodies. +// +// `SENSITIVE_NAME` is used as both a substring matcher (for free-form header +// names like `Authorization` / `X-API-Key`) and as the body-field alternation +// list. `SHORT_QUERY_NAME` covers anchored short keys like `?key=…` / `?sig=…` +// that are too generic to redact substring-style without false positives. +const SENSITIVE_NAME_SOURCE = + "authorization|api[-_]?key|access[-_]?token|refresh[-_]?token|id[-_]?token|token|secret|credential|signature|x-amz-signature" +const SENSITIVE_NAME = new RegExp(SENSITIVE_NAME_SOURCE, "i") +const SHORT_QUERY_NAME = /^(key|sig)$/i +const SENSITIVE_BODY_FIELD = new RegExp(`(?:${SENSITIVE_NAME_SOURCE}|key)`, "i") +const REDACT_JSON_FIELD = new RegExp(`("(?:${SENSITIVE_BODY_FIELD.source})"\\s*:\\s*)"[^"]*"`, "gi") +const REDACT_QUERY_FIELD = new RegExp(`((?:${SENSITIVE_BODY_FIELD.source})=)[^&\\s"]+`, "gi") + +const isSensitiveHeaderName = (name: string) => SENSITIVE_NAME.test(name) + +const isSensitiveQueryName = (name: string) => isSensitiveHeaderName(name) || SHORT_QUERY_NAME.test(name) + +const redactHeaders = (headers: Headers.Headers, redactedNames: ReadonlyArray) => + Object.fromEntries( + Object.entries(Headers.redact(headers, [...redactedNames, SENSITIVE_NAME])).map(([name, value]) => [ + name, + String(value), + ]), + ) + +const redactUrl = (value: string) => { + if (!URL.canParse(value)) return REDACTED + const url = new URL(value) + url.searchParams.forEach((_, key) => { + if (isSensitiveQueryName(key)) url.searchParams.set(key, REDACTED) + }) + return url.toString() +} + +const normalizedHeaders = (headers: Headers.Headers) => + Object.fromEntries(Object.entries(headers).map(([key, value]) => [key.toLowerCase(), value])) + +const requestId = (headers: Record) => { + return ( + headers["x-request-id"] ?? + headers["request-id"] ?? + headers["x-amzn-requestid"] ?? + headers["x-amz-request-id"] ?? + headers["x-goog-request-id"] ?? + headers["cf-ray"] + ) +} + +const retryableStatus = (status: number) => status === 429 || status === 503 || status === 504 || status === 529 + +const retryAfterMs = (headers: Record) => { + const millis = Number(headers["retry-after-ms"]) + if (Number.isFinite(millis)) return Math.max(0, millis) + + const value = headers["retry-after"] + if (!value) return undefined + + const seconds = Number(value) + if (Number.isFinite(seconds)) return Math.max(0, seconds * 1000) + + const date = Date.parse(value) + if (!Number.isNaN(date)) return Math.max(0, date - Date.now()) + return undefined +} + +const addRateLimitValue = (target: Record, key: string, value: string) => { + if (key.length > 0) target[key] = value +} + +const rateLimitDetails = (headers: Record, retryAfter: number | undefined) => { + const limit: Record = {} + const remaining: Record = {} + const reset: Record = {} + + Object.entries(headers).forEach(([name, value]) => { + const openaiLimit = /^x-ratelimit-limit-(.+)$/.exec(name)?.[1] + if (openaiLimit) return addRateLimitValue(limit, openaiLimit, value) + + const openaiRemaining = /^x-ratelimit-remaining-(.+)$/.exec(name)?.[1] + if (openaiRemaining) return addRateLimitValue(remaining, openaiRemaining, value) + + const openaiReset = /^x-ratelimit-reset-(.+)$/.exec(name)?.[1] + if (openaiReset) return addRateLimitValue(reset, openaiReset, value) + + const anthropic = /^anthropic-ratelimit-(.+)-(limit|remaining|reset)$/.exec(name) + if (!anthropic) return + if (anthropic[2] === "limit") return addRateLimitValue(limit, anthropic[1], value) + if (anthropic[2] === "remaining") return addRateLimitValue(remaining, anthropic[1], value) + return addRateLimitValue(reset, anthropic[1], value) + }) + + if ( + retryAfter === undefined && + Object.keys(limit).length === 0 && + Object.keys(remaining).length === 0 && + Object.keys(reset).length === 0 + ) + return undefined + + return new HttpRateLimitDetails({ + retryAfterMs: retryAfter, + limit: Object.keys(limit).length === 0 ? undefined : limit, + remaining: Object.keys(remaining).length === 0 ? undefined : remaining, + reset: Object.keys(reset).length === 0 ? undefined : reset, + }) +} + +const requestDetails = (request: HttpClientRequest.HttpClientRequest, redactedNames: ReadonlyArray) => + new HttpRequestDetails({ + method: request.method, + url: redactUrl(request.url), + headers: redactHeaders(request.headers, redactedNames), + }) + +const responseDetails = ( + response: HttpClientResponse.HttpClientResponse, + redactedNames: ReadonlyArray, +) => + new HttpResponseDetails({ + status: response.status, + headers: redactHeaders(response.headers, redactedNames), + }) + +const secretValues = (request: HttpClientRequest.HttpClientRequest) => { + const values = new Set() + const add = (value: string) => { + if (value.length < 4) return + values.add(value) + values.add(encodeURIComponent(value)) + } + + Object.entries(request.headers).forEach(([name, value]) => { + if (!isSensitiveHeaderName(name)) return + add(value) + const bearer = /^Bearer\s+(.+)$/i.exec(value)?.[1] + if (bearer) add(bearer) + }) + + if (!URL.canParse(request.url)) return values + new URL(request.url).searchParams.forEach((value, key) => { + if (isSensitiveQueryName(key)) add(value) + }) + return values +} + +// Two passes: structural (redact `"name": "value"` and `name=value` patterns +// for any field name that looks sensitive) plus literal (replace any actual +// secret values we sent in the request, in case the response echoes one back). +const redactBody = (body: string, request: HttpClientRequest.HttpClientRequest) => + Array.from(secretValues(request)).reduce( + (text, secret) => text.split(secret).join(REDACTED), + body.replace(REDACT_JSON_FIELD, `$1"${REDACTED}"`).replace(REDACT_QUERY_FIELD, `$1${REDACTED}`), + ) + +const responseBody = (body: string | void, request: HttpClientRequest.HttpClientRequest) => { + if (body === undefined) return {} + const redacted = redactBody(body, request) + if (redacted.length <= BODY_LIMIT) return { body: redacted } + return { body: redacted.slice(0, BODY_LIMIT), bodyTruncated: true } +} + +const providerMessage = (status: number, body: { readonly body?: string }) => { + if (body.body && body.body.length <= 500) return `Provider request failed with HTTP ${status}: ${body.body}` + return `Provider request failed with HTTP ${status}` +} + +const responseHttp = (input: { + readonly request: HttpClientRequest.HttpClientRequest + readonly response: HttpClientResponse.HttpClientResponse + readonly redactedNames: ReadonlyArray + readonly body: ReturnType + readonly requestId?: string | undefined + readonly rateLimit?: HttpRateLimitDetails | undefined +}) => + new HttpContext({ + request: requestDetails(input.request, input.redactedNames), + response: responseDetails(input.response, input.redactedNames), + ...input.body, + requestId: input.requestId, + rateLimit: input.rateLimit, + }) + +const statusReason = (input: { + readonly status: number + readonly message: string + readonly retryAfterMs?: number | undefined + readonly rateLimit?: HttpRateLimitDetails | undefined + readonly http: HttpContext +}) => { + const body = input.http.body ?? "" + if (/content[-_\s]?policy|content_filter|safety/i.test(body)) { + return new ContentPolicyReason({ message: input.message, http: input.http }) + } + if (input.status === 401) { + return new AuthenticationReason({ message: input.message, kind: "invalid", http: input.http }) + } + if (input.status === 403) { + return new AuthenticationReason({ message: input.message, kind: "insufficient-permissions", http: input.http }) + } + if (input.status === 429) { + if (/insufficient[-_\s]?quota|quota[-_\s]?exceeded/i.test(body)) { + return new QuotaExceededReason({ message: input.message, http: input.http }) + } + return new RateLimitReason({ + message: input.message, + retryAfterMs: input.retryAfterMs, + rateLimit: input.rateLimit, + http: input.http, + }) + } + if (input.status === 400 || input.status === 404 || input.status === 409 || input.status === 422) { + return new InvalidRequestReason({ message: input.message, http: input.http }) + } + if (input.status >= 500 || retryableStatus(input.status)) { + return new ProviderInternalReason({ + message: input.message, + status: input.status, + retryAfterMs: input.retryAfterMs, + http: input.http, + }) + } + return new UnknownProviderReason({ message: input.message, status: input.status, http: input.http }) +} + +const statusError = + (request: HttpClientRequest.HttpClientRequest, redactedNames: ReadonlyArray) => + (response: HttpClientResponse.HttpClientResponse) => + Effect.gen(function* () { + if (response.status < 400) return response + const body = yield* response.text.pipe(Effect.catch(() => Effect.void)) + const headers = normalizedHeaders(response.headers) + const retryAfter = retryAfterMs(headers) + const rateLimit = rateLimitDetails(headers, retryAfter) + const details = responseBody(body, request) + return yield* new LLMError({ + module: "RequestExecutor", + method: "execute", + reason: statusReason({ + status: response.status, + message: providerMessage(response.status, details), + retryAfterMs: retryAfter, + rateLimit, + http: responseHttp({ + request, + response, + redactedNames, + body: details, + requestId: requestId(headers), + rateLimit, + }), + }), + }) + }) + +const toHttpError = (redactedNames: ReadonlyArray) => (error: unknown) => { + const transportError = (input: { + readonly message: string + readonly kind?: string | undefined + readonly request?: HttpClientRequest.HttpClientRequest | undefined + }) => + new LLMError({ + module: "RequestExecutor", + method: "execute", + reason: new TransportReason({ + message: input.message, + kind: input.kind, + url: input.request ? redactUrl(input.request.url) : undefined, + http: input.request ? new HttpContext({ request: requestDetails(input.request, redactedNames) }) : undefined, + }), + }) + + if (Cause.isTimeoutError(error)) { + return transportError({ message: error.message, kind: "Timeout" }) + } + if (!HttpClientError.isHttpClientError(error)) { + return transportError({ message: "HTTP transport failed" }) + } + const request = "request" in error ? error.request : undefined + if (error.reason._tag === "TransportError") { + return transportError({ + message: error.reason.description ?? "HTTP transport failed", + kind: error.reason._tag, + request, + }) + } + return transportError({ + message: `HTTP transport failed: ${error.reason._tag}`, + kind: error.reason._tag, + request, + }) +} + +const retryDelay = (error: LLMError, attempt: number) => { + if (error.retryAfterMs !== undefined) return Effect.succeed(Math.min(error.retryAfterMs, MAX_DELAY_MS)) + return Random.nextBetween( + Math.min(BASE_DELAY_MS * 2 ** attempt * 0.8, MAX_DELAY_MS), + Math.min(BASE_DELAY_MS * 2 ** attempt * 1.2, MAX_DELAY_MS), + ).pipe(Effect.map((delay) => Math.round(delay))) +} + +const retryStatusFailures = ( + effect: Effect.Effect, + retries = MAX_RETRIES, + attempt = 0, +): Effect.Effect => + Effect.catchTag(effect, "LLM.Error", (error): Effect.Effect => { + if (!error.retryable || retries <= 0) return Effect.fail(error) + return retryDelay(error, attempt).pipe( + Effect.flatMap((delay) => Effect.sleep(delay)), + Effect.flatMap(() => retryStatusFailures(effect, retries - 1, attempt + 1)), + ) + }) + +export const layer: Layer.Layer = Layer.effect( + Service, + Effect.gen(function* () { + const http = yield* HttpClient.HttpClient + const executeOnce = (request: HttpClientRequest.HttpClientRequest) => + Effect.gen(function* () { + const redactedNames = yield* Headers.CurrentRedactedNames + return yield* http + .execute(request) + .pipe(Effect.mapError(toHttpError(redactedNames)), Effect.flatMap(statusError(request, redactedNames))) + }) + return Service.of({ + execute: (request) => retryStatusFailures(executeOnce(request)), + }) + }), +) + +export const defaultLayer = layer.pipe(Layer.provide(FetchHttpClient.layer)) + +export * as RequestExecutor from "./executor" diff --git a/packages/llm/src/route/framing.ts b/packages/llm/src/route/framing.ts new file mode 100644 index 000000000000..ef4855817d08 --- /dev/null +++ b/packages/llm/src/route/framing.ts @@ -0,0 +1,27 @@ +import type { Stream } from "effect" +import * as ProviderShared from "../protocols/shared" +import type { LLMError } from "../schema" + +/** + * Decode a streaming HTTP response body into provider-protocol frames. + * + * `Framing` is the byte-stream-shaped seam between transport and protocol: + * + * - SSE (`Framing.sse`) — UTF-8 decode the body, run the SSE channel decoder, + * drop empty / `[DONE]` keep-alives. Each emitted frame is the JSON `data:` + * payload of one event. + * - AWS event stream — length-prefixed binary frames with CRC checksums. + * Each emitted frame is one parsed binary event record. + * + * The frame type is opaque to this layer; the protocol's `decode` step turns + * a frame into a typed chunk. + */ +export interface Framing { + readonly id: string + readonly frame: (bytes: Stream.Stream) => Stream.Stream +} + +/** Server-Sent Events framing. Used by every JSON-streaming HTTP provider. */ +export const sse: Framing = { id: "sse", frame: ProviderShared.sseFraming } + +export * as Framing from "./framing" diff --git a/packages/llm/src/route/index.ts b/packages/llm/src/route/index.ts new file mode 100644 index 000000000000..a75dd3e03879 --- /dev/null +++ b/packages/llm/src/route/index.ts @@ -0,0 +1,26 @@ +export { Route, LLMClient, modelLimits, modelRef } from "./client" +export type { + Route as RouteShape, + RouteModelDefaults, + RouteModelInput, + RouteRoutedModelDefaults, + RouteRoutedModelInput, + AnyRoute, + Interface as LLMClientShape, + Service as LLMClientService, + ModelRefInput, +} from "./client" +export * from "./executor" +export { Auth } from "./auth" +export { AuthOptions } from "./auth-options" +export { Endpoint } from "./endpoint" +export { Framing } from "./framing" +export { Protocol } from "./protocol" +export { HttpTransport, WebSocketExecutor, WebSocketTransport } from "./transport" +export * as Transport from "./transport" +export type { Auth as AuthShape, AuthInput, Credential, CredentialError } from "./auth" +export type { ApiKeyMode, AuthOverride, ProviderAuthOption } from "./auth-options" +export type { Endpoint as EndpointFn, EndpointInput } from "./endpoint" +export type { Framing as FramingDef } from "./framing" +export type { Protocol as ProtocolDef } from "./protocol" +export type { Transport as TransportDef, TransportRuntime } from "./transport" diff --git a/packages/llm/src/route/protocol.ts b/packages/llm/src/route/protocol.ts new file mode 100644 index 000000000000..3ce0f7827dd9 --- /dev/null +++ b/packages/llm/src/route/protocol.ts @@ -0,0 +1,84 @@ +import { Schema, type Effect } from "effect" +import type { LLMError, LLMEvent, LLMRequest, ProtocolID } from "../schema" + +/** + * The semantic API contract of one model server family. + * + * A `Protocol` owns the parts of a route that are intrinsic to "what does + * this API look like": how a common `LLMRequest` becomes a provider-native + * body, what schema that body must satisfy before it is JSON-encoded, and + * how the streaming response decodes back into common `LLMEvent`s. + * + * Examples: + * + * - `OpenAIChat.protocol` — chat completions style + * - `OpenAIResponses.protocol` — responses API + * - `AnthropicMessages.protocol` — messages API with content blocks + * - `Gemini.protocol` — generateContent + * - `BedrockConverse.protocol` — Converse with binary event-stream framing + * + * A `Protocol` is **not** a deployment. It does not know which URL, which + * headers, or which auth scheme to use. Those are deployment concerns owned + * by `Route.make(...)` along with the chosen `Endpoint`, `Auth`, + * and `Framing`. This separation is what lets DeepSeek, TogetherAI, Cerebras, + * etc. all reuse `OpenAIChat.protocol` without forking 300 lines per provider. + * + * The four type parameters reflect the pipeline: + * + * - `Body` — provider-native request body candidate. `Route.make(...)` + * validates and JSON-encodes it with `body.schema`. + * - `Frame` — one unit of the framed response stream. SSE: a JSON data + * string. AWS event stream: a parsed binary frame. + * - `Event` — schema-decoded provider event produced from one frame. + * - `State` — accumulator threaded through `stream.step` to translate event + * sequences into `LLMEvent` sequences. + */ +export interface Protocol { + /** Stable id for the wire protocol implementation. */ + readonly id: ProtocolID + /** Request side: schema for the provider-native body and how to build it. */ + readonly body: ProtocolBody + /** Response side: streaming state machine. */ + readonly stream: ProtocolStream +} + +export interface ProtocolBody { + /** Schema for the validated provider-native body sent as the JSON request. */ + readonly schema: Schema.Codec + /** Build the provider-native body from a common `LLMRequest`. */ + readonly from: (request: LLMRequest) => Effect.Effect +} + +export interface ProtocolStream { + /** Schema for one decoded streaming event, decoded from a transport frame. */ + readonly event: Schema.Codec + /** Initial parser state. Called once per response. */ + readonly initial: () => State + /** Translate one event into emitted `LLMEvent`s plus the next state. */ + readonly step: (state: State, event: Event) => Effect.Effect], LLMError> + /** Optional request-completion signal for transports that do not end naturally. */ + readonly terminal?: (event: Event) => boolean + /** Optional flush emitted when the framed stream ends. */ + readonly onHalt?: (state: State) => ReadonlyArray +} + +/** + * Construct a `Protocol` from its body and stream pieces: + * + * - `body.schema` infers the provider-native request body shape. + * - `body.from` ties the common `LLMRequest` to the provider body. + * - `stream.event` infers the decoded streaming event and the wire frame. + * - `stream.initial`, `stream.step`, and `stream.onHalt` infer the parser state. + * + * Provider implementations should usually call `Protocol.make({ ... })` + * without explicit type arguments; the schemas and parser functions are the + * source of truth. The constructor remains as the public seam for future + * cross-cutting concerns such as tracing or instrumentation. + */ +export const make = ( + input: Protocol, +): Protocol => input + +export const jsonEvent = (schema: S) => Schema.fromJsonString(schema) + +export * as Protocol from "./protocol" diff --git a/packages/llm/src/route/transport/http.ts b/packages/llm/src/route/transport/http.ts new file mode 100644 index 000000000000..2159ce90b0f0 --- /dev/null +++ b/packages/llm/src/route/transport/http.ts @@ -0,0 +1,122 @@ +import { Effect, Stream } from "effect" +import { Headers, HttpClientRequest } from "effect/unstable/http" +import { Auth, type Auth as AuthDef } from "../auth" +import { type Endpoint, render as renderEndpoint } from "../endpoint" +import type { Framing } from "../framing" +import type { Transport } from "./index" +import * as ProviderShared from "../../protocols/shared" +import { mergeJsonRecords, type LLMRequest } from "../../schema" + +export interface JsonRequestInput { + readonly body: Body + readonly request: LLMRequest + readonly endpoint: Endpoint + readonly auth: AuthDef + readonly encodeBody: (body: Body) => string + readonly headers?: (input: { readonly request: LLMRequest }) => Record +} + +export interface JsonRequestParts { + readonly url: string + readonly jsonBody: Body | Record + readonly bodyText: string + readonly headers: Headers.Headers +} + +export interface HttpPrepared { + readonly request: HttpClientRequest.HttpClientRequest + readonly framing: Framing +} + +const applyQuery = (url: string, query: Record | undefined) => { + if (!query) return url + const next = new URL(url) + Object.entries(query).forEach(([key, value]) => next.searchParams.set(key, value)) + return next.toString() +} + +const bodyWithOverlay = (body: Body, request: LLMRequest, encodeBody: (body: Body) => string) => + Effect.gen(function* () { + if (request.http?.body === undefined) return { jsonBody: body, bodyText: encodeBody(body) } + if (ProviderShared.isRecord(body)) { + const overlaid = mergeJsonRecords(body, request.http.body) ?? {} + return { jsonBody: overlaid, bodyText: ProviderShared.encodeJson(overlaid) } + } + return yield* ProviderShared.invalidRequest("http.body can only overlay JSON object request bodies") + }) + +export const jsonRequestParts = (input: JsonRequestInput) => + Effect.gen(function* () { + const url = applyQuery( + renderEndpoint(input.endpoint, { request: input.request, body: input.body }).toString(), + input.request.http?.query, + ) + const body = yield* bodyWithOverlay(input.body, input.request, input.encodeBody) + const headers = yield* Auth.toEffect(Auth.isAuth(input.request.model.auth) ? input.request.model.auth : input.auth)( + { + request: input.request, + method: "POST", + url, + body: body.bodyText, + headers: Headers.fromInput({ + ...(input.headers?.({ request: input.request }) ?? {}), + ...input.request.model.headers, + ...input.request.http?.headers, + }), + }, + ) + return { url, jsonBody: body.jsonBody, bodyText: body.bodyText, headers } + }) + +export interface HttpJsonInput { + readonly endpoint: Endpoint + readonly auth?: AuthDef + readonly framing: Framing + readonly encodeBody: (body: Body) => string + readonly headers?: (input: { readonly request: LLMRequest }) => Record +} + +export type HttpJsonPatch = Partial> + +export interface HttpJsonTransport extends Transport, Frame> { + readonly with: (patch: HttpJsonPatch) => HttpJsonTransport +} + +export const httpJson = (input: HttpJsonInput): HttpJsonTransport => ({ + id: "http-json", + with: (patch) => httpJson({ ...input, ...patch }), + prepare: (body, request) => + jsonRequestParts({ + body, + request, + endpoint: input.endpoint, + auth: input.auth ?? Auth.bearer(), + encodeBody: input.encodeBody, + headers: input.headers, + }).pipe( + Effect.map((parts) => ({ + request: ProviderShared.jsonPost({ url: parts.url, body: parts.bodyText, headers: parts.headers }), + framing: input.framing, + })), + ), + frames: (prepared, request, runtime) => + Stream.unwrap( + runtime.http + .execute(prepared.request) + .pipe( + Effect.map((response) => + prepared.framing.frame( + response.stream.pipe( + Stream.mapError((error) => + ProviderShared.eventError( + `${request.model.provider}/${request.model.route}`, + `Failed to read ${request.model.provider}/${request.model.route} stream`, + ProviderShared.errorText(error), + ), + ), + ), + ), + ), + ), + ), +}) diff --git a/packages/llm/src/route/transport/index.ts b/packages/llm/src/route/transport/index.ts new file mode 100644 index 000000000000..f4d5fb29b7f6 --- /dev/null +++ b/packages/llm/src/route/transport/index.ts @@ -0,0 +1,22 @@ +import type { Effect, Stream } from "effect" +import type { Interface as RequestExecutorInterface } from "../executor" +import type { Interface as WebSocketExecutorInterface } from "./websocket" +import type { LLMError, LLMRequest } from "../../schema" + +export interface TransportRuntime { + readonly http: RequestExecutorInterface + readonly webSocket?: WebSocketExecutorInterface +} + +export interface Transport { + readonly id: string + readonly prepare: (body: Body, request: LLMRequest) => Effect.Effect + readonly frames: ( + prepared: Prepared, + request: LLMRequest, + runtime: TransportRuntime, + ) => Stream.Stream +} + +export * as HttpTransport from "./http" +export { WebSocketExecutor, WebSocketTransport } from "./websocket" diff --git a/packages/llm/src/route/transport/websocket.ts b/packages/llm/src/route/transport/websocket.ts new file mode 100644 index 000000000000..647a6db43dd3 --- /dev/null +++ b/packages/llm/src/route/transport/websocket.ts @@ -0,0 +1,282 @@ +import { Cause, Context, Effect, Queue, Stream } from "effect" +import { Headers } from "effect/unstable/http" +import { Auth, type Auth as AuthDef } from "../auth" +import type { Endpoint } from "../endpoint" +import { LLMError, TransportReason, type LLMRequest } from "../../schema" +import * as HttpTransport from "./http" +import type { Transport } from "./index" + +export interface WebSocketRequest { + readonly url: string + readonly headers: Headers.Headers +} + +export interface WebSocketConnection { + readonly sendText: (message: string) => Effect.Effect + readonly messages: Stream.Stream + readonly close: Effect.Effect +} + +export interface Interface { + readonly open: (input: WebSocketRequest) => Effect.Effect +} + +type WebSocketConstructorWithHeaders = new ( + url: string, + options?: { readonly headers?: Headers.Headers }, +) => globalThis.WebSocket + +export class Service extends Context.Service()("@opencode/LLM/WebSocketExecutor") {} + +const transportError = ( + method: string, + message: string, + input: { readonly url?: string; readonly kind?: string } = {}, +) => + new LLMError({ + module: "WebSocketExecutor", + method, + reason: new TransportReason({ message, url: input.url, kind: input.kind }), + }) + +const eventMessage = (event: Event) => { + if ("message" in event && typeof event.message === "string") return event.message + return event.type +} + +const binaryMessage = (data: unknown) => { + if (data instanceof Uint8Array) return data + if (data instanceof ArrayBuffer) return new Uint8Array(data) + if (ArrayBuffer.isView(data)) return new Uint8Array(data.buffer, data.byteOffset, data.byteLength) + return undefined +} + +const waitOpen = (ws: globalThis.WebSocket, input: WebSocketRequest) => { + if (ws.readyState === globalThis.WebSocket.OPEN) return Effect.void + if (ws.readyState === globalThis.WebSocket.CLOSING || ws.readyState === globalThis.WebSocket.CLOSED) { + return Effect.fail( + transportError("open", `WebSocket closed before opening (state ${ws.readyState})`, { + url: input.url, + kind: "open", + }), + ) + } + return Effect.callback((resume, signal) => { + const cleanup = () => { + ws.removeEventListener("open", onOpen) + ws.removeEventListener("error", onError) + ws.removeEventListener("close", onClose) + signal.removeEventListener("abort", onAbort) + } + const onAbort = () => { + cleanup() + if (ws.readyState !== globalThis.WebSocket.CLOSED && ws.readyState !== globalThis.WebSocket.CLOSING) + ws.close(1000) + } + const onOpen = () => { + cleanup() + resume(Effect.void) + } + const onError = (event: Event) => { + cleanup() + resume( + Effect.fail( + transportError("open", `Failed to open WebSocket: ${eventMessage(event)}`, { url: input.url, kind: "open" }), + ), + ) + } + const onClose = (event: CloseEvent) => { + cleanup() + resume( + Effect.fail( + transportError("open", `WebSocket closed before opening with code ${event.code}`, { + url: input.url, + kind: "open", + }), + ), + ) + } + ws.addEventListener("open", onOpen, { once: true }) + ws.addEventListener("error", onError, { once: true }) + ws.addEventListener("close", onClose, { once: true }) + signal.addEventListener("abort", onAbort, { once: true }) + }) +} + +const webSocketUrl = (value: string) => + Effect.try({ + try: () => { + const url = new URL(value) + if (url.protocol === "https:") { + url.protocol = "wss:" + return url.toString() + } + if (url.protocol === "http:") { + url.protocol = "ws:" + return url.toString() + } + throw new Error(`Unsupported WebSocket URL protocol ${url.protocol}`) + }, + catch: (error) => + transportError("prepare", error instanceof Error ? error.message : "Invalid WebSocket URL", { + url: value, + kind: "websocket", + }), + }) + +export const open = (input: WebSocketRequest) => + Effect.try({ + try: () => + new (globalThis.WebSocket as unknown as WebSocketConstructorWithHeaders)(input.url, { headers: input.headers }), + catch: (error) => + transportError("open", error instanceof Error ? error.message : "Failed to construct WebSocket", { + url: input.url, + kind: "open", + }), + }).pipe(Effect.flatMap((ws) => fromWebSocket(ws, input))) + +export const fromWebSocket = ( + ws: globalThis.WebSocket, + input: WebSocketRequest, +): Effect.Effect => + Effect.gen(function* () { + yield* waitOpen(ws, input) + const messages = yield* Queue.bounded>(128) + + const onMessage = (event: MessageEvent) => { + if (typeof event.data === "string") return Queue.offerUnsafe(messages, event.data) + const binary = binaryMessage(event.data) + if (binary) return Queue.offerUnsafe(messages, binary) + Queue.failCauseUnsafe( + messages, + Cause.fail( + transportError("message", "Unsupported WebSocket message payload", { url: input.url, kind: "message" }), + ), + ) + } + const onError = (event: Event) => { + Queue.failCauseUnsafe( + messages, + Cause.fail( + transportError("message", `WebSocket error: ${eventMessage(event)}`, { url: input.url, kind: "message" }), + ), + ) + } + const onClose = (event: CloseEvent) => { + if (event.code === 1000 || event.code === 1005) return Queue.endUnsafe(messages) + Queue.failCauseUnsafe( + messages, + Cause.fail( + transportError("message", `WebSocket closed with code ${event.code}`, { url: input.url, kind: "close" }), + ), + ) + } + const cleanup = Effect.sync(() => { + ws.removeEventListener("message", onMessage) + ws.removeEventListener("error", onError) + ws.removeEventListener("close", onClose) + }).pipe(Effect.andThen(Queue.shutdown(messages))) + + ws.addEventListener("message", onMessage) + ws.addEventListener("error", onError) + ws.addEventListener("close", onClose) + + return { + sendText: (message) => + Effect.try({ + try: () => ws.send(message), + catch: (error) => + transportError("sendText", error instanceof Error ? error.message : "Failed to send WebSocket message", { + url: input.url, + kind: "write", + }), + }), + messages: Stream.fromQueue(messages), + close: cleanup.pipe( + Effect.andThen( + Effect.sync(() => { + if (ws.readyState === globalThis.WebSocket.CLOSED || ws.readyState === globalThis.WebSocket.CLOSING) return + ws.close(1000) + }), + ), + ), + } + }) + +export const messageText = (message: string | Uint8Array, decoder: TextDecoder) => + typeof message === "string" ? message : decoder.decode(message) + +export interface JsonPrepared { + readonly url: string + readonly headers: Headers.Headers + readonly message: string +} + +export interface JsonInput { + readonly endpoint: Endpoint + readonly auth?: AuthDef + readonly encodeBody: (body: Body) => string + readonly toMessage: (body: Body | Record) => Effect.Effect + readonly encodeMessage: (message: Message) => string + readonly headers?: (input: { readonly request: LLMRequest }) => Record +} + +export type JsonPatch = Partial> + +export interface JsonTransport extends Transport { + readonly with: (patch: JsonPatch) => JsonTransport +} + +export const json = (input: JsonInput): JsonTransport => ({ + id: "websocket-json", + with: (patch) => json({ ...input, ...patch }), + prepare: (body, request) => + Effect.gen(function* () { + const parts = yield* HttpTransport.jsonRequestParts({ + body, + request, + endpoint: input.endpoint, + auth: input.auth ?? Auth.bearer(), + encodeBody: input.encodeBody, + headers: input.headers, + }) + return { + url: yield* webSocketUrl(parts.url), + headers: parts.headers, + message: input.encodeMessage(yield* input.toMessage(parts.jsonBody)), + } + }), + frames: (prepared, _request, runtime) => { + const webSocket = runtime.webSocket + if (!webSocket) { + return Stream.fail( + transportError("json", "WebSocket JSON transport requires WebSocketExecutor.Service", { + url: prepared.url, + kind: "websocket", + }), + ) + } + const decoder = new TextDecoder() + return Stream.unwrap( + Effect.gen(function* () { + const connection = yield* Effect.acquireRelease( + webSocket.open({ url: prepared.url, headers: prepared.headers }), + (connection) => connection.close, + ) + yield* connection.sendText(prepared.message) + return connection.messages.pipe(Stream.map((message) => messageText(message, decoder))) + }), + ) + }, +}) + +export const WebSocketExecutor = { + Service, + open, + fromWebSocket, + messageText, +} as const + +export const WebSocketTransport = { + json, +} as const diff --git a/packages/llm/src/schema/errors.ts b/packages/llm/src/schema/errors.ts new file mode 100644 index 000000000000..9bcc8e16941c --- /dev/null +++ b/packages/llm/src/schema/errors.ts @@ -0,0 +1,202 @@ +import { Schema } from "effect" +import { ModelID, ProviderID, ProviderMetadata, RouteID } from "./ids" + +export class HttpRequestDetails extends Schema.Class("LLM.HttpRequestDetails")({ + method: Schema.String, + url: Schema.String, + headers: Schema.Record(Schema.String, Schema.String), +}) {} + +export class HttpResponseDetails extends Schema.Class("LLM.HttpResponseDetails")({ + status: Schema.Number, + headers: Schema.Record(Schema.String, Schema.String), +}) {} + +export class HttpRateLimitDetails extends Schema.Class("LLM.HttpRateLimitDetails")({ + retryAfterMs: Schema.optional(Schema.Number), + limit: Schema.optional(Schema.Record(Schema.String, Schema.String)), + remaining: Schema.optional(Schema.Record(Schema.String, Schema.String)), + reset: Schema.optional(Schema.Record(Schema.String, Schema.String)), +}) {} + +export class HttpContext extends Schema.Class("LLM.HttpContext")({ + request: HttpRequestDetails, + response: Schema.optional(HttpResponseDetails), + body: Schema.optional(Schema.String), + bodyTruncated: Schema.optional(Schema.Boolean), + requestId: Schema.optional(Schema.String), + rateLimit: Schema.optional(HttpRateLimitDetails), +}) {} + +export class InvalidRequestReason extends Schema.Class("LLM.Error.InvalidRequest")({ + _tag: Schema.tag("InvalidRequest"), + message: Schema.String, + parameter: Schema.optional(Schema.String), + providerMetadata: Schema.optional(ProviderMetadata), + http: Schema.optional(HttpContext), +}) { + get retryable() { + return false + } +} + +export class NoRouteReason extends Schema.Class("LLM.Error.NoRoute")({ + _tag: Schema.tag("NoRoute"), + route: RouteID, + provider: ProviderID, + model: ModelID, +}) { + get retryable() { + return false + } + + get message() { + return `No LLM route for ${this.provider}/${this.model} using ${this.route}` + } +} + +export class AuthenticationReason extends Schema.Class("LLM.Error.Authentication")({ + _tag: Schema.tag("Authentication"), + message: Schema.String, + kind: Schema.Literals(["missing", "invalid", "expired", "insufficient-permissions", "unknown"]), + providerMetadata: Schema.optional(ProviderMetadata), + http: Schema.optional(HttpContext), +}) { + get retryable() { + return false + } +} + +export class RateLimitReason extends Schema.Class("LLM.Error.RateLimit")({ + _tag: Schema.tag("RateLimit"), + message: Schema.String, + retryAfterMs: Schema.optional(Schema.Number), + rateLimit: Schema.optional(HttpRateLimitDetails), + providerMetadata: Schema.optional(ProviderMetadata), + http: Schema.optional(HttpContext), +}) { + get retryable() { + return true + } +} + +export class QuotaExceededReason extends Schema.Class("LLM.Error.QuotaExceeded")({ + _tag: Schema.tag("QuotaExceeded"), + message: Schema.String, + providerMetadata: Schema.optional(ProviderMetadata), + http: Schema.optional(HttpContext), +}) { + get retryable() { + return false + } +} + +export class ContentPolicyReason extends Schema.Class("LLM.Error.ContentPolicy")({ + _tag: Schema.tag("ContentPolicy"), + message: Schema.String, + providerMetadata: Schema.optional(ProviderMetadata), + http: Schema.optional(HttpContext), +}) { + get retryable() { + return false + } +} + +export class ProviderInternalReason extends Schema.Class("LLM.Error.ProviderInternal")({ + _tag: Schema.tag("ProviderInternal"), + message: Schema.String, + status: Schema.Number, + retryAfterMs: Schema.optional(Schema.Number), + providerMetadata: Schema.optional(ProviderMetadata), + http: Schema.optional(HttpContext), +}) { + get retryable() { + return true + } +} + +export class TransportReason extends Schema.Class("LLM.Error.Transport")({ + _tag: Schema.tag("Transport"), + message: Schema.String, + kind: Schema.optional(Schema.String), + url: Schema.optional(Schema.String), + http: Schema.optional(HttpContext), +}) { + get retryable() { + return false + } +} + +export class InvalidProviderOutputReason extends Schema.Class( + "LLM.Error.InvalidProviderOutput", +)({ + _tag: Schema.tag("InvalidProviderOutput"), + message: Schema.String, + route: Schema.optional(Schema.String), + raw: Schema.optional(Schema.String), + providerMetadata: Schema.optional(ProviderMetadata), +}) { + get retryable() { + return false + } +} + +export class UnknownProviderReason extends Schema.Class("LLM.Error.UnknownProvider")({ + _tag: Schema.tag("UnknownProvider"), + message: Schema.String, + status: Schema.optional(Schema.Number), + providerMetadata: Schema.optional(ProviderMetadata), + http: Schema.optional(HttpContext), +}) { + get retryable() { + return false + } +} + +export const LLMErrorReason = Schema.Union([ + InvalidRequestReason, + NoRouteReason, + AuthenticationReason, + RateLimitReason, + QuotaExceededReason, + ContentPolicyReason, + ProviderInternalReason, + TransportReason, + InvalidProviderOutputReason, + UnknownProviderReason, +]).pipe(Schema.toTaggedUnion("_tag")) +export type LLMErrorReason = Schema.Schema.Type + +export class LLMError extends Schema.TaggedErrorClass()("LLM.Error", { + module: Schema.String, + method: Schema.String, + reason: LLMErrorReason, +}) { + override readonly cause = this.reason + + get retryable() { + return this.reason.retryable + } + + get retryAfterMs() { + return "retryAfterMs" in this.reason ? this.reason.retryAfterMs : undefined + } + + override get message() { + return `${this.module}.${this.method}: ${this.reason.message}` + } +} + +/** + * Failure type for tool execute handlers. Handlers must map their internal + * errors to this shape; the runtime catches `ToolFailure`s and surfaces them + * as `tool-error` events plus a `tool-result` of `type: "error"` so the model + * can self-correct. + * + * Anything thrown or yielded by a handler that is not a `ToolFailure` is + * treated as a defect and fails the stream. + */ +export class ToolFailure extends Schema.TaggedErrorClass()("LLM.ToolFailure", { + message: Schema.String, + metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)), +}) {} diff --git a/packages/llm/src/schema/events.ts b/packages/llm/src/schema/events.ts new file mode 100644 index 000000000000..2fa69370f40b --- /dev/null +++ b/packages/llm/src/schema/events.ts @@ -0,0 +1,237 @@ +import { Schema } from "effect" +import { FinishReason, ProtocolID, ProviderMetadata, RouteID } from "./ids" +import { ModelRef } from "./options" +import { ToolResultValue } from "./messages" + +export class Usage extends Schema.Class("LLM.Usage")({ + inputTokens: Schema.optional(Schema.Number), + outputTokens: Schema.optional(Schema.Number), + reasoningTokens: Schema.optional(Schema.Number), + cacheReadInputTokens: Schema.optional(Schema.Number), + cacheWriteInputTokens: Schema.optional(Schema.Number), + totalTokens: Schema.optional(Schema.Number), + native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)), +}) {} + +export const RequestStart = Schema.Struct({ + type: Schema.Literal("request-start"), + id: Schema.String, + model: ModelRef, +}).annotate({ identifier: "LLM.Event.RequestStart" }) +export type RequestStart = Schema.Schema.Type + +export const StepStart = Schema.Struct({ + type: Schema.Literal("step-start"), + index: Schema.Number, +}).annotate({ identifier: "LLM.Event.StepStart" }) +export type StepStart = Schema.Schema.Type + +export const TextStart = Schema.Struct({ + type: Schema.Literal("text-start"), + id: Schema.String, + providerMetadata: Schema.optional(ProviderMetadata), +}).annotate({ identifier: "LLM.Event.TextStart" }) +export type TextStart = Schema.Schema.Type + +export const TextDelta = Schema.Struct({ + type: Schema.Literal("text-delta"), + id: Schema.optional(Schema.String), + text: Schema.String, + providerMetadata: Schema.optional(ProviderMetadata), +}).annotate({ identifier: "LLM.Event.TextDelta" }) +export type TextDelta = Schema.Schema.Type + +export const TextEnd = Schema.Struct({ + type: Schema.Literal("text-end"), + id: Schema.String, + providerMetadata: Schema.optional(ProviderMetadata), +}).annotate({ identifier: "LLM.Event.TextEnd" }) +export type TextEnd = Schema.Schema.Type + +export const ReasoningDelta = Schema.Struct({ + type: Schema.Literal("reasoning-delta"), + id: Schema.optional(Schema.String), + text: Schema.String, + providerMetadata: Schema.optional(ProviderMetadata), +}).annotate({ identifier: "LLM.Event.ReasoningDelta" }) +export type ReasoningDelta = Schema.Schema.Type + +export const ToolInputDelta = Schema.Struct({ + type: Schema.Literal("tool-input-delta"), + id: Schema.String, + name: Schema.String, + text: Schema.String, + providerMetadata: Schema.optional(ProviderMetadata), +}).annotate({ identifier: "LLM.Event.ToolInputDelta" }) +export type ToolInputDelta = Schema.Schema.Type + +export const ToolCall = Schema.Struct({ + type: Schema.Literal("tool-call"), + id: Schema.String, + name: Schema.String, + input: Schema.Unknown, + providerExecuted: Schema.optional(Schema.Boolean), + providerMetadata: Schema.optional(ProviderMetadata), +}).annotate({ identifier: "LLM.Event.ToolCall" }) +export type ToolCall = Schema.Schema.Type + +export const ToolResult = Schema.Struct({ + type: Schema.Literal("tool-result"), + id: Schema.String, + name: Schema.String, + result: ToolResultValue, + providerExecuted: Schema.optional(Schema.Boolean), + providerMetadata: Schema.optional(ProviderMetadata), +}).annotate({ identifier: "LLM.Event.ToolResult" }) +export type ToolResult = Schema.Schema.Type + +export const ToolError = Schema.Struct({ + type: Schema.Literal("tool-error"), + id: Schema.String, + name: Schema.String, + message: Schema.String, + providerMetadata: Schema.optional(ProviderMetadata), +}).annotate({ identifier: "LLM.Event.ToolError" }) +export type ToolError = Schema.Schema.Type + +export const StepFinish = Schema.Struct({ + type: Schema.Literal("step-finish"), + index: Schema.Number, + reason: FinishReason, + usage: Schema.optional(Usage), + providerMetadata: Schema.optional(ProviderMetadata), +}).annotate({ identifier: "LLM.Event.StepFinish" }) +export type StepFinish = Schema.Schema.Type + +export const RequestFinish = Schema.Struct({ + type: Schema.Literal("request-finish"), + reason: FinishReason, + usage: Schema.optional(Usage), + providerMetadata: Schema.optional(ProviderMetadata), +}).annotate({ identifier: "LLM.Event.RequestFinish" }) +export type RequestFinish = Schema.Schema.Type + +export const ProviderErrorEvent = Schema.Struct({ + type: Schema.Literal("provider-error"), + message: Schema.String, + retryable: Schema.optional(Schema.Boolean), + providerMetadata: Schema.optional(ProviderMetadata), +}).annotate({ identifier: "LLM.Event.ProviderError" }) +export type ProviderErrorEvent = Schema.Schema.Type + +const llmEventTagged = Schema.Union([ + RequestStart, + StepStart, + TextStart, + TextDelta, + TextEnd, + ReasoningDelta, + ToolInputDelta, + ToolCall, + ToolResult, + ToolError, + StepFinish, + RequestFinish, + ProviderErrorEvent, +]).pipe(Schema.toTaggedUnion("type")) + +/** + * camelCase aliases for `LLMEvent.guards` (provided by `Schema.toTaggedUnion`). + * Lets consumers write `events.filter(LLMEvent.is.toolCall)` instead of + * `events.filter(LLMEvent.guards["tool-call"])`. + */ +export const LLMEvent = Object.assign(llmEventTagged, { + is: { + requestStart: llmEventTagged.guards["request-start"], + stepStart: llmEventTagged.guards["step-start"], + textStart: llmEventTagged.guards["text-start"], + textDelta: llmEventTagged.guards["text-delta"], + textEnd: llmEventTagged.guards["text-end"], + reasoningDelta: llmEventTagged.guards["reasoning-delta"], + toolInputDelta: llmEventTagged.guards["tool-input-delta"], + toolCall: llmEventTagged.guards["tool-call"], + toolResult: llmEventTagged.guards["tool-result"], + toolError: llmEventTagged.guards["tool-error"], + stepFinish: llmEventTagged.guards["step-finish"], + requestFinish: llmEventTagged.guards["request-finish"], + providerError: llmEventTagged.guards["provider-error"], + }, +}) +export type LLMEvent = Schema.Schema.Type + +export class PreparedRequest extends Schema.Class("LLM.PreparedRequest")({ + id: Schema.String, + route: RouteID, + protocol: ProtocolID, + model: ModelRef, + body: Schema.Unknown, + metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)), +}) {} + +/** + * A `PreparedRequest` whose `body` is typed as `Body`. Use with the generic + * on `LLMClient.prepare(...)` when the caller knows which route their + * request will resolve to and wants its native shape statically exposed + * (debug UIs, request previews, plan rendering). + * + * The runtime body is identical — the route still emits `body: unknown` — so + * this is a type-level assertion the caller makes about what they expect to + * find. The prepare runtime does not validate the assertion. + */ +export type PreparedRequestOf = Omit & { + readonly body: Body +} + +const responseText = (events: ReadonlyArray) => + events + .filter(LLMEvent.is.textDelta) + .map((event) => event.text) + .join("") + +const responseReasoning = (events: ReadonlyArray) => + events + .filter(LLMEvent.is.reasoningDelta) + .map((event) => event.text) + .join("") + +const responseUsage = (events: ReadonlyArray) => + events.reduce( + (usage, event) => ("usage" in event && event.usage !== undefined ? event.usage : usage), + undefined, + ) + +export class LLMResponse extends Schema.Class("LLM.Response")({ + events: Schema.Array(LLMEvent), + usage: Schema.optional(Usage), +}) { + /** Concatenated assistant text assembled from streamed `text-delta` events. */ + get text() { + return responseText(this.events) + } + + /** Concatenated reasoning text assembled from streamed `reasoning-delta` events. */ + get reasoning() { + return responseReasoning(this.events) + } + + /** Completed tool calls emitted by the provider. */ + get toolCalls() { + return this.events.filter(LLMEvent.is.toolCall) + } +} + +export namespace LLMResponse { + export type Output = LLMResponse | { readonly events: ReadonlyArray; readonly usage?: Usage } + + /** Concatenate assistant text from a response or collected event list. */ + export const text = (response: Output) => responseText(response.events) + + /** Return response usage, falling back to the latest usage-bearing event. */ + export const usage = (response: Output) => response.usage ?? responseUsage(response.events) + + /** Return completed tool calls from a response or collected event list. */ + export const toolCalls = (response: Output) => response.events.filter(LLMEvent.is.toolCall) + + /** Concatenate reasoning text from a response or collected event list. */ + export const reasoning = (response: Output) => responseReasoning(response.events) +} diff --git a/packages/llm/src/schema/ids.ts b/packages/llm/src/schema/ids.ts new file mode 100644 index 000000000000..926184277023 --- /dev/null +++ b/packages/llm/src/schema/ids.ts @@ -0,0 +1,34 @@ +import { Schema } from "effect" + +/** Stable string identifier for a protocol implementation. */ +export const ProtocolID = Schema.String +export type ProtocolID = Schema.Schema.Type + +/** Stable string identifier for the runnable route. */ +export const RouteID = Schema.String +export type RouteID = Schema.Schema.Type + +export const ModelID = Schema.String.pipe(Schema.brand("LLM.ModelID")) +export type ModelID = typeof ModelID.Type + +export const ProviderID = Schema.String.pipe(Schema.brand("LLM.ProviderID")) +export type ProviderID = typeof ProviderID.Type + +export const ReasoningEfforts = ["none", "minimal", "low", "medium", "high", "xhigh", "max"] as const +export const ReasoningEffort = Schema.Literals(ReasoningEfforts) +export type ReasoningEffort = Schema.Schema.Type + +export const TextVerbosity = Schema.Literals(["low", "medium", "high"]) +export type TextVerbosity = Schema.Schema.Type + +export const MessageRole = Schema.Literals(["user", "assistant", "tool"]) +export type MessageRole = Schema.Schema.Type + +export const FinishReason = Schema.Literals(["stop", "length", "tool-calls", "content-filter", "error", "unknown"]) +export type FinishReason = Schema.Schema.Type + +export const JsonSchema = Schema.Record(Schema.String, Schema.Unknown) +export type JsonSchema = Schema.Schema.Type + +export const ProviderMetadata = Schema.Record(Schema.String, Schema.Record(Schema.String, Schema.Unknown)) +export type ProviderMetadata = Schema.Schema.Type diff --git a/packages/llm/src/schema/index.ts b/packages/llm/src/schema/index.ts new file mode 100644 index 000000000000..0c0fede8fa79 --- /dev/null +++ b/packages/llm/src/schema/index.ts @@ -0,0 +1,5 @@ +export * from "./ids" +export * from "./options" +export * from "./messages" +export * from "./events" +export * from "./errors" diff --git a/packages/llm/src/schema/messages.ts b/packages/llm/src/schema/messages.ts new file mode 100644 index 000000000000..3daf00bbc0d0 --- /dev/null +++ b/packages/llm/src/schema/messages.ts @@ -0,0 +1,234 @@ +import { Schema } from "effect" +import { JsonSchema, MessageRole, ProviderMetadata } from "./ids" +import { CacheHint, GenerationOptions, HttpOptions, ModelRef, ProviderOptions } from "./options" + +const isRecord = (value: unknown): value is Record => + typeof value === "object" && value !== null && !Array.isArray(value) + +const systemPartSchema = Schema.Struct({ + type: Schema.Literal("text"), + text: Schema.String, + cache: Schema.optional(CacheHint), + metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)), +}).annotate({ identifier: "LLM.SystemPart" }) +export type SystemPart = Schema.Schema.Type + +const makeSystemPart = (text: string): SystemPart => ({ type: "text", text }) + +export const SystemPart = Object.assign(systemPartSchema, { + make: makeSystemPart, + content: (input?: string | SystemPart | ReadonlyArray) => { + if (input === undefined) return [] + return typeof input === "string" ? [makeSystemPart(input)] : Array.isArray(input) ? [...input] : [input] + }, +}) + +export const TextPart = Schema.Struct({ + type: Schema.Literal("text"), + text: Schema.String, + cache: Schema.optional(CacheHint), + metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)), + providerMetadata: Schema.optional(ProviderMetadata), +}).annotate({ identifier: "LLM.Content.Text" }) +export type TextPart = Schema.Schema.Type + +export const MediaPart = Schema.Struct({ + type: Schema.Literal("media"), + mediaType: Schema.String, + data: Schema.Union([Schema.String, Schema.Uint8Array]), + filename: Schema.optional(Schema.String), + metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)), +}).annotate({ identifier: "LLM.Content.Media" }) +export type MediaPart = Schema.Schema.Type + +const isToolResultValue = (value: unknown): value is ToolResultValue => + isRecord(value) && (value.type === "text" || value.type === "json" || value.type === "error") && "value" in value + +export const ToolResultValue = Object.assign( + Schema.Struct({ + type: Schema.Literals(["json", "text", "error"]), + value: Schema.Unknown, + }).annotate({ identifier: "LLM.ToolResult" }), + { + make: (value: unknown, type: ToolResultValue["type"] = "json"): ToolResultValue => + isToolResultValue(value) ? value : { type, value }, + }, +) +export type ToolResultValue = Schema.Schema.Type + +export const ToolCallPart = Object.assign( + Schema.Struct({ + type: Schema.Literal("tool-call"), + id: Schema.String, + name: Schema.String, + input: Schema.Unknown, + providerExecuted: Schema.optional(Schema.Boolean), + metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)), + providerMetadata: Schema.optional(ProviderMetadata), + }).annotate({ identifier: "LLM.Content.ToolCall" }), + { + make: (input: Omit): ToolCallPart => ({ type: "tool-call", ...input }), + }, +) +export type ToolCallPart = Schema.Schema.Type + +export const ToolResultPart = Object.assign( + Schema.Struct({ + type: Schema.Literal("tool-result"), + id: Schema.String, + name: Schema.String, + result: ToolResultValue, + providerExecuted: Schema.optional(Schema.Boolean), + metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)), + providerMetadata: Schema.optional(ProviderMetadata), + }).annotate({ identifier: "LLM.Content.ToolResult" }), + { + make: ( + input: Omit & { + readonly result: unknown + readonly resultType?: ToolResultValue["type"] + }, + ): ToolResultPart => ({ + type: "tool-result", + id: input.id, + name: input.name, + result: ToolResultValue.make(input.result, input.resultType), + providerExecuted: input.providerExecuted, + metadata: input.metadata, + providerMetadata: input.providerMetadata, + }), + }, +) +export type ToolResultPart = Schema.Schema.Type + +export const ReasoningPart = Schema.Struct({ + type: Schema.Literal("reasoning"), + text: Schema.String, + encrypted: Schema.optional(Schema.String), + metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)), + providerMetadata: Schema.optional(ProviderMetadata), +}).annotate({ identifier: "LLM.Content.Reasoning" }) +export type ReasoningPart = Schema.Schema.Type + +export const ContentPart = Schema.Union([TextPart, MediaPart, ToolCallPart, ToolResultPart, ReasoningPart]).pipe( + Schema.toTaggedUnion("type"), +) +export type ContentPart = Schema.Schema.Type + +export class Message extends Schema.Class("LLM.Message")({ + id: Schema.optional(Schema.String), + role: MessageRole, + content: Schema.Array(ContentPart), + metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)), + native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)), +}) {} + +export namespace Message { + export type ContentInput = string | ContentPart | ReadonlyArray + export type Input = Omit[0], "content"> & { + readonly content: ContentInput + } + + export const text = (value: string): ContentPart => ({ type: "text", text: value }) + + export const content = (input: ContentInput) => + typeof input === "string" ? [text(input)] : Array.isArray(input) ? [...input] : [input] + + export const make = (input: Message | Input) => { + if (input instanceof Message) return input + return new Message({ ...input, content: content(input.content) }) + } + + export const user = (content: ContentInput) => make({ role: "user", content }) + + export const assistant = (content: ContentInput) => make({ role: "assistant", content }) + + export const tool = (result: ToolResultPart | Parameters[0]) => + make({ role: "tool", content: ["type" in result ? result : ToolResultPart.make(result)] }) +} + +export class ToolDefinition extends Schema.Class("LLM.ToolDefinition")({ + name: Schema.String, + description: Schema.String, + inputSchema: JsonSchema, + metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)), + native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)), +}) {} + +export namespace ToolDefinition { + export type Input = ToolDefinition | ConstructorParameters[0] + + /** Normalize tool definition input into the canonical `ToolDefinition` class. */ + export const make = (input: Input) => (input instanceof ToolDefinition ? input : new ToolDefinition(input)) +} + +export class ToolChoice extends Schema.Class("LLM.ToolChoice")({ + type: Schema.Literals(["auto", "none", "required", "tool"]), + name: Schema.optional(Schema.String), +}) {} + +export namespace ToolChoice { + export type Mode = Exclude + export type Input = ToolChoice | ConstructorParameters[0] | ToolDefinition | string + + const isMode = (value: string): value is Mode => value === "auto" || value === "none" || value === "required" + + /** Select a specific named tool. */ + export const named = (value: string) => new ToolChoice({ type: "tool", name: value }) + + /** Normalize ergonomic tool-choice inputs into the canonical `ToolChoice` class. */ + export const make = (input: Input) => { + if (input instanceof ToolChoice) return input + if (input instanceof ToolDefinition) return named(input.name) + if (typeof input === "string") return isMode(input) ? new ToolChoice({ type: input }) : named(input) + return new ToolChoice(input) + } +} + +export const ResponseFormat = Schema.Union([ + Schema.Struct({ type: Schema.Literal("text") }), + Schema.Struct({ type: Schema.Literal("json"), schema: JsonSchema }), + Schema.Struct({ type: Schema.Literal("tool"), tool: ToolDefinition }), +]).pipe(Schema.toTaggedUnion("type")) +export type ResponseFormat = Schema.Schema.Type + +export class LLMRequest extends Schema.Class("LLM.Request")({ + id: Schema.optional(Schema.String), + model: ModelRef, + system: Schema.Array(SystemPart), + messages: Schema.Array(Message), + tools: Schema.Array(ToolDefinition), + toolChoice: Schema.optional(ToolChoice), + generation: Schema.optional(GenerationOptions), + providerOptions: Schema.optional(ProviderOptions), + http: Schema.optional(HttpOptions), + responseFormat: Schema.optional(ResponseFormat), + metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)), +}) {} + +export namespace LLMRequest { + export type Input = ConstructorParameters[0] + + export const input = (request: LLMRequest): Input => ({ + id: request.id, + model: request.model, + system: request.system, + messages: request.messages, + tools: request.tools, + toolChoice: request.toolChoice, + generation: request.generation, + providerOptions: request.providerOptions, + http: request.http, + responseFormat: request.responseFormat, + metadata: request.metadata, + }) + + export const update = (request: LLMRequest, patch: Partial) => { + if (Object.keys(patch).length === 0) return request + return new LLMRequest({ + ...input(request), + ...patch, + model: patch.model ?? request.model, + }) + } +} diff --git a/packages/llm/src/schema/options.ts b/packages/llm/src/schema/options.ts new file mode 100644 index 000000000000..9a618aa8ae83 --- /dev/null +++ b/packages/llm/src/schema/options.ts @@ -0,0 +1,202 @@ +import { Schema } from "effect" +import { JsonSchema, ModelID, ProviderID, RouteID } from "./ids" + +const isRecord = (value: unknown): value is Record => + typeof value === "object" && value !== null && !Array.isArray(value) + +export const mergeJsonRecords = ( + ...items: ReadonlyArray | undefined> +): Record | undefined => { + const defined = items.filter((item): item is Record => item !== undefined) + if (defined.length === 0) return undefined + if (defined.length === 1 && Object.values(defined[0]).every((value) => value !== undefined)) return defined[0] + const result: Record = {} + for (const item of defined) { + for (const [key, value] of Object.entries(item)) { + if (value === undefined) continue + result[key] = isRecord(result[key]) && isRecord(value) ? mergeJsonRecords(result[key], value) : value + } + } + return Object.keys(result).length === 0 ? undefined : result +} + +const mergeStringRecords = ( + ...items: ReadonlyArray | undefined> +): Record | undefined => { + const defined = items.filter((item): item is Record => item !== undefined) + if (defined.length === 0) return undefined + if (defined.length === 1) return defined[0] + const result = Object.fromEntries( + defined.flatMap((item) => + Object.entries(item).filter((entry): entry is [string, string] => entry[1] !== undefined), + ), + ) + return Object.keys(result).length === 0 ? undefined : result +} + +export const ProviderOptions = Schema.Record(Schema.String, Schema.Record(Schema.String, Schema.Unknown)) +export type ProviderOptions = Schema.Schema.Type + +export const mergeProviderOptions = ( + ...items: ReadonlyArray +): ProviderOptions | undefined => { + const result: Record> = {} + for (const item of items) { + if (!item) continue + for (const [provider, options] of Object.entries(item)) { + const merged = mergeJsonRecords(result[provider], options) + if (merged) result[provider] = merged + } + } + return Object.keys(result).length === 0 ? undefined : result +} + +export class HttpOptions extends Schema.Class("LLM.HttpOptions")({ + body: Schema.optional(JsonSchema), + headers: Schema.optional(Schema.Record(Schema.String, Schema.String)), + query: Schema.optional(Schema.Record(Schema.String, Schema.String)), +}) {} + +export namespace HttpOptions { + export type Input = HttpOptions | ConstructorParameters[0] + + /** Normalize HTTP option input into the canonical `HttpOptions` class. */ + export const make = (input: Input) => (input instanceof HttpOptions ? input : new HttpOptions(input)) +} + +export const mergeHttpOptions = (...items: ReadonlyArray): HttpOptions | undefined => { + const body = mergeJsonRecords(...items.map((item) => item?.body)) + const headers = mergeStringRecords(...items.map((item) => item?.headers)) + const query = mergeStringRecords(...items.map((item) => item?.query)) + if (!body && !headers && !query) return undefined + return new HttpOptions({ body, headers, query }) +} + +export class GenerationOptions extends Schema.Class("LLM.GenerationOptions")({ + maxTokens: Schema.optional(Schema.Number), + temperature: Schema.optional(Schema.Number), + topP: Schema.optional(Schema.Number), + topK: Schema.optional(Schema.Number), + frequencyPenalty: Schema.optional(Schema.Number), + presencePenalty: Schema.optional(Schema.Number), + seed: Schema.optional(Schema.Number), + stop: Schema.optional(Schema.Array(Schema.String)), +}) {} + +export namespace GenerationOptions { + export type Input = GenerationOptions | ConstructorParameters[0] + + /** Normalize generation option input into the canonical `GenerationOptions` class. */ + export const make = (input: Input = {}) => (input instanceof GenerationOptions ? input : new GenerationOptions(input)) +} + +export type GenerationOptionsFields = { + readonly maxTokens?: number + readonly temperature?: number + readonly topP?: number + readonly topK?: number + readonly frequencyPenalty?: number + readonly presencePenalty?: number + readonly seed?: number + readonly stop?: ReadonlyArray +} + +export type GenerationOptionsInput = GenerationOptions | GenerationOptionsFields + +const latestGeneration = ( + items: ReadonlyArray, + key: Key, +) => items.findLast((item) => item?.[key] !== undefined)?.[key] + +export const mergeGenerationOptions = (...items: ReadonlyArray) => { + const result = new GenerationOptions({ + maxTokens: latestGeneration(items, "maxTokens"), + temperature: latestGeneration(items, "temperature"), + topP: latestGeneration(items, "topP"), + topK: latestGeneration(items, "topK"), + frequencyPenalty: latestGeneration(items, "frequencyPenalty"), + presencePenalty: latestGeneration(items, "presencePenalty"), + seed: latestGeneration(items, "seed"), + stop: latestGeneration(items, "stop"), + }) + return Object.values(result).some((value) => value !== undefined) ? result : undefined +} + +export class ModelLimits extends Schema.Class("LLM.ModelLimits")({ + context: Schema.optional(Schema.Number), + output: Schema.optional(Schema.Number), +}) {} + +export namespace ModelLimits { + export type Input = ModelLimits | ConstructorParameters[0] + + /** Normalize model limit input into the canonical `ModelLimits` class. */ + export const make = (input: Input | undefined) => + input instanceof ModelLimits ? input : new ModelLimits(input ?? {}) +} + +export class ModelRef extends Schema.Class("LLM.ModelRef")({ + id: ModelID, + provider: ProviderID, + route: RouteID, + baseURL: Schema.String, + /** Provider-specific API key convenience. Provider helpers normalize this into `auth`. */ + apiKey: Schema.optional(Schema.String), + /** Optional transport auth policy. Opaque because it may contain functions. */ + auth: Schema.optional(Schema.Any), + headers: Schema.optional(Schema.Record(Schema.String, Schema.String)), + /** + * Query params appended to the request URL by `Endpoint.baseURL`. Used for + * deployment-level URL-scoped settings such as Azure's `api-version` or any + * provider that requires a per-request key in the URL. Generic concern, so + * lives as a typed first-class field instead of `native`. + */ + queryParams: Schema.optional(Schema.Record(Schema.String, Schema.String)), + limits: ModelLimits, + /** Provider-neutral generation defaults. Request-level values override them. */ + generation: Schema.optional(GenerationOptions), + /** Provider-owned typed-at-the-facade options for non-portable knobs. */ + providerOptions: Schema.optional(ProviderOptions), + /** Serializable raw HTTP overlays applied to the final outgoing request. */ + http: Schema.optional(HttpOptions), + /** + * Provider-specific opaque options. Reach for this only when the value is + * genuinely provider-private and does not fit a typed axis (e.g. Bedrock's + * `aws_credentials` / `aws_region` for SigV4). Anything used by more than + * one route should grow into a typed field instead. + */ + native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)), +}) {} + +export namespace ModelRef { + export type Input = ConstructorParameters[0] + + export const input = (model: ModelRef): Input => ({ + id: model.id, + provider: model.provider, + route: model.route, + baseURL: model.baseURL, + apiKey: model.apiKey, + auth: model.auth, + headers: model.headers, + queryParams: model.queryParams, + limits: model.limits, + generation: model.generation, + providerOptions: model.providerOptions, + http: model.http, + native: model.native, + }) + + export const update = (model: ModelRef, patch: Partial) => { + if (Object.keys(patch).length === 0) return model + return new ModelRef({ + ...input(model), + ...patch, + }) + } +} + +export class CacheHint extends Schema.Class("LLM.CacheHint")({ + type: Schema.Literals(["ephemeral", "persistent"]), + ttlSeconds: Schema.optional(Schema.Number), +}) {} diff --git a/packages/llm/src/tool-runtime.ts b/packages/llm/src/tool-runtime.ts new file mode 100644 index 000000000000..20e27379bd97 --- /dev/null +++ b/packages/llm/src/tool-runtime.ts @@ -0,0 +1,240 @@ +import { Effect, Stream } from "effect" +import type { Concurrency } from "effect/Types" +import { + type ContentPart, + type FinishReason, + type LLMError, + type LLMEvent, + LLMRequest, + Message, + type ProviderMetadata, + ToolCallPart, + ToolFailure, + ToolResultPart, + type ToolResultValue, +} from "./schema" +import { type AnyTool, type ExecutableTools, type Tools, toDefinitions } from "./tool" + +export interface RuntimeState { + readonly step: number + readonly request: LLMRequest +} + +export type StopCondition = (state: RuntimeState) => boolean + +export type ToolExecution = "auto" | "none" + +interface RunOptionsBase { + readonly request: LLMRequest + readonly concurrency?: Concurrency + readonly stopWhen?: StopCondition +} + +export type RunOptions = RunOptionsAuto | RunOptionsNone + +export interface RunOptionsAuto extends RunOptionsBase { + readonly request: LLMRequest + readonly tools: T + readonly toolExecution?: "auto" +} + +export interface RunOptionsNone extends RunOptionsBase { + readonly request: LLMRequest + readonly tools: T + /** Advertise tool schemas but leave model-emitted tool calls for the caller. */ + readonly toolExecution: "none" +} + +export type StreamOptions = RunOptions & { + readonly stream: (request: LLMRequest) => Stream.Stream +} + +export const stepCountIs = + (count: number): StopCondition => + (state) => + state.step + 1 >= count + +/** + * Run a model with typed tools. This helper owns tool orchestration, while the + * caller supplies the actual model stream function. It can advertise schemas + * only (`toolExecution: "none"`), execute one step, or continue model rounds + * when `stopWhen` is provided. + */ +export const stream = (options: StreamOptions): Stream.Stream => { + const concurrency = options.concurrency ?? 10 + const tools = options.tools as Tools + const runtimeTools = toDefinitions(tools) + const runtimeToolNames = new Set(runtimeTools.map((tool) => tool.name)) + const initialRequest = + runtimeTools.length === 0 + ? options.request + : LLMRequest.update(options.request, { + tools: [...options.request.tools.filter((tool) => !runtimeToolNames.has(tool.name)), ...runtimeTools], + }) + + const loop = (request: LLMRequest, step: number): Stream.Stream => + Stream.unwrap( + Effect.gen(function* () { + const state: StepState = { assistantContent: [], toolCalls: [], finishReason: undefined } + + const modelStream = options + .stream(request) + .pipe(Stream.tap((event) => Effect.sync(() => accumulate(state, event)))) + + const continuation = Stream.unwrap( + Effect.gen(function* () { + if (state.finishReason !== "tool-calls" || state.toolCalls.length === 0) return Stream.empty + if (options.toolExecution === "none") return Stream.empty + + const dispatched = yield* Effect.forEach( + state.toolCalls, + (call) => dispatch(tools, call).pipe(Effect.map((result) => [call, result] as const)), + { concurrency }, + ) + const resultStream = Stream.fromIterable(dispatched.flatMap(([call, result]) => emitEvents(call, result))) + + if (!options.stopWhen) return resultStream + if (options.stopWhen({ step, request })) return resultStream + + return resultStream.pipe(Stream.concat(loop(followUpRequest(request, state, dispatched), step + 1))) + }), + ) + + return modelStream.pipe(Stream.concat(continuation)) + }), + ) + + return loop(initialRequest, 0) +} + +interface StepState { + assistantContent: ContentPart[] + toolCalls: ToolCallPart[] + finishReason: FinishReason | undefined +} + +const accumulate = (state: StepState, event: LLMEvent) => { + if (event.type === "text-delta") { + appendStreamingText(state, "text", event.text, event.providerMetadata) + return + } + if (event.type === "reasoning-delta") { + appendStreamingText(state, "reasoning", event.text, event.providerMetadata) + return + } + if (event.type === "tool-call") { + const part = ToolCallPart.make({ + id: event.id, + name: event.name, + input: event.input, + providerExecuted: event.providerExecuted, + providerMetadata: event.providerMetadata, + }) + state.assistantContent.push(part) + if (!event.providerExecuted) state.toolCalls.push(part) + return + } + if (event.type === "tool-result" && event.providerExecuted) { + state.assistantContent.push( + ToolResultPart.make({ + id: event.id, + name: event.name, + result: event.result, + providerExecuted: true, + providerMetadata: event.providerMetadata, + }), + ) + return + } + if (event.type === "request-finish") { + state.finishReason = event.reason + } +} + +const sameProviderMetadata = (left: ProviderMetadata | undefined, right: ProviderMetadata | undefined) => + left === right || JSON.stringify(left) === JSON.stringify(right) + +const mergeProviderMetadata = (left: ProviderMetadata | undefined, right: ProviderMetadata | undefined) => { + if (!left) return right + if (!right) return left + return Object.fromEntries( + Array.from(new Set([...Object.keys(left), ...Object.keys(right)])).map((provider) => [ + provider, + { ...left[provider], ...right[provider] }, + ]), + ) +} + +const appendStreamingText = ( + state: StepState, + type: "text" | "reasoning", + text: string, + providerMetadata: ProviderMetadata | undefined, +) => { + const last = state.assistantContent.at(-1) + if (last?.type === type && text.length === 0) { + state.assistantContent[state.assistantContent.length - 1] = { + ...last, + providerMetadata: mergeProviderMetadata(last.providerMetadata, providerMetadata), + } + return + } + if (last?.type === type && sameProviderMetadata(last.providerMetadata, providerMetadata)) { + state.assistantContent[state.assistantContent.length - 1] = { ...last, text: `${last.text}${text}` } + return + } + state.assistantContent.push({ type, text, providerMetadata }) +} + +const dispatch = (tools: Tools, call: ToolCallPart): Effect.Effect => { + const tool = tools[call.name] + if (!tool) return Effect.succeed({ type: "error" as const, value: `Unknown tool: ${call.name}` }) + if (!tool.execute) + return Effect.succeed({ type: "error" as const, value: `Tool has no execute handler: ${call.name}` }) + + return decodeAndExecute(tool, call.input).pipe( + Effect.catchTag("LLM.ToolFailure", (failure) => + Effect.succeed({ type: "error" as const, value: failure.message } satisfies ToolResultValue), + ), + ) +} + +const decodeAndExecute = (tool: AnyTool, input: unknown): Effect.Effect => + tool._decode(input).pipe( + Effect.mapError((error) => new ToolFailure({ message: `Invalid tool input: ${error.message}` })), + Effect.flatMap((decoded) => tool.execute!(decoded)), + Effect.flatMap((value) => + tool._encode(value).pipe( + Effect.mapError( + (error) => + new ToolFailure({ + message: `Tool returned an invalid value for its success schema: ${error.message}`, + }), + ), + ), + ), + Effect.map((encoded): ToolResultValue => ({ type: "json", value: encoded })), + ) + +const emitEvents = (call: ToolCallPart, result: ToolResultValue): ReadonlyArray => + result.type === "error" + ? [ + { type: "tool-error", id: call.id, name: call.name, message: String(result.value) }, + { type: "tool-result", id: call.id, name: call.name, result }, + ] + : [{ type: "tool-result", id: call.id, name: call.name, result }] + +const followUpRequest = ( + request: LLMRequest, + state: StepState, + dispatched: ReadonlyArray, +) => + LLMRequest.update(request, { + messages: [ + ...request.messages, + Message.assistant(state.assistantContent), + ...dispatched.map(([call, result]) => Message.tool({ id: call.id, name: call.name, result })), + ], + }) + +export const ToolRuntime = { stream, stepCountIs } as const diff --git a/packages/llm/src/tool.ts b/packages/llm/src/tool.ts new file mode 100644 index 000000000000..311c8798b6fa --- /dev/null +++ b/packages/llm/src/tool.ts @@ -0,0 +1,185 @@ +import { Effect, JsonSchema, Schema } from "effect" +import type { ToolDefinition as ToolDefinitionClass } from "./schema" +import { ToolDefinition, ToolFailure } from "./schema" + +/** + * Schema constraint for tool parameters / success values: no decoding or + * encoding services are allowed. Tools should be self-contained — anything + * beyond pure data conversion belongs in the handler closure. + */ +export type ToolSchema = Schema.Codec + +export type ToolExecute, Success extends ToolSchema> = ( + params: Schema.Schema.Type, +) => Effect.Effect, ToolFailure> + +/** + * A type-safe LLM tool. Each tool bundles its own description, parameter + * Schema and success Schema. The execute handler is optional: omit it when you + * only want to expose a tool schema to the model and handle tool calls outside + * this package. + * + * Errors must be expressed as `ToolFailure`. Unmapped errors and defects fail + * the stream. + * + * Internally each tool also carries memoized codecs and a precomputed + * `ToolDefinition` so the runtime doesn't rebuild them per invocation. + */ +export interface Tool, Success extends ToolSchema> { + readonly description: string + readonly parameters: Parameters + readonly success: Success + readonly execute?: ToolExecute + /** @internal */ + readonly _decode: (input: unknown) => Effect.Effect, Schema.SchemaError> + /** @internal */ + readonly _encode: (value: Schema.Schema.Type) => Effect.Effect + /** @internal */ + readonly _definition: ToolDefinitionClass +} + +export type AnyTool = Tool, ToolSchema> + +export type ExecutableTool, Success extends ToolSchema> = Tool< + Parameters, + Success +> & { + readonly execute: ToolExecute +} + +export type AnyExecutableTool = ExecutableTool, ToolSchema> + +export type ExecutableTools = Record + +type TypedToolConfig = { + readonly description: string + readonly parameters: ToolSchema + readonly success: ToolSchema + readonly execute?: ToolExecute, ToolSchema> +} + +type DynamicToolConfig = { + readonly description: string + readonly jsonSchema: JsonSchema.JsonSchema + readonly execute?: (params: unknown) => Effect.Effect +} + +/** + * Constructs a tool. Two input modes: + * + * 1. **Typed** — pass Effect `parameters` and `success` Schemas; inputs and + * outputs are statically typed and decoded/encoded automatically. + * + * ```ts + * Tool.make({ + * description: "Get current weather", + * parameters: Schema.Struct({ city: Schema.String }), + * success: Schema.Struct({ temperature: Schema.Number }), + * execute: ({ city }) => Effect.succeed({ temperature: 22 }), + * }) + * ``` + * + * 2. **Dynamic** — pass raw JSON Schema as `jsonSchema`. Use this when the + * schema comes from an external source (MCP server, plugin manifest, + * dynamic config) and is not known at compile time. Inputs are typed as + * `unknown`; the handler is responsible for any validation it needs. + * + * ```ts + * Tool.make({ + * description: "Look something up", + * jsonSchema: { type: "object", properties: { ... } }, + * execute: (params) => Effect.succeed(...), + * }) + * ``` + * + * In both modes the produced tool flows through `toDefinitions(...)` and the + * runtime identically. + */ +export function make, Success extends ToolSchema>(config: { + readonly description: string + readonly parameters: Parameters + readonly success: Success + readonly execute: ToolExecute +}): ExecutableTool +export function make, Success extends ToolSchema>(config: { + readonly description: string + readonly parameters: Parameters + readonly success: Success + readonly execute?: undefined +}): Tool +export function make(config: { + readonly description: string + readonly jsonSchema: JsonSchema.JsonSchema + readonly execute: (params: unknown) => Effect.Effect +}): AnyExecutableTool +export function make(config: { + readonly description: string + readonly jsonSchema: JsonSchema.JsonSchema + readonly execute?: undefined +}): AnyTool +export function make(config: TypedToolConfig | DynamicToolConfig): AnyTool { + if ("jsonSchema" in config) { + return { + description: config.description, + parameters: Schema.Unknown as ToolSchema, + success: Schema.Unknown as ToolSchema, + execute: config.execute, + _decode: Effect.succeed, + _encode: Effect.succeed, + _definition: new ToolDefinition({ + name: "", + description: config.description, + inputSchema: config.jsonSchema, + }), + } + } + return { + description: config.description, + parameters: config.parameters, + success: config.success, + execute: config.execute, + _decode: Schema.decodeUnknownEffect(config.parameters), + _encode: Schema.encodeEffect(config.success), + _definition: new ToolDefinition({ + name: "", + description: config.description, + inputSchema: toJsonSchema(config.parameters), + }), + } +} + +export const tool = make + +/** + * A record of named tools. The record key becomes the tool name on the wire. + */ +export type Tools = Record + +/** + * Convert a tools record into the `ToolDefinition[]` shape that + * `LLMRequest.tools` expects. The runtime calls this internally; consumers + * that build `LLMRequest` themselves can use it too. + * + * Tool names come from the record keys, so the per-tool cached + * `_definition` is rebuilt with the correct name here. The JSON Schema body + * is reused. + */ +export const toDefinitions = (tools: Tools): ReadonlyArray => + Object.entries(tools).map( + ([name, item]) => + new ToolDefinition({ + name, + description: item._definition.description, + inputSchema: item._definition.inputSchema, + }), + ) + +const toJsonSchema = (schema: Schema.Top): JsonSchema.JsonSchema => { + const document = Schema.toJsonSchemaDocument(schema) + if (Object.keys(document.definitions).length === 0) return document.schema + return { ...document.schema, $defs: document.definitions } +} + +export { ToolFailure } + +export * as Tool from "./tool" diff --git a/packages/llm/test/adapter.test.ts b/packages/llm/test/adapter.test.ts new file mode 100644 index 000000000000..191b8529c06a --- /dev/null +++ b/packages/llm/test/adapter.test.ts @@ -0,0 +1,175 @@ +import { describe, expect } from "bun:test" +import { Effect, Schema, Stream } from "effect" +import { LLM } from "../src" +import { Route, Endpoint, LLMClient, Protocol, type RouteModelInput, type FramingDef } from "../src/route" +import { ModelRef } from "../src/schema" +import { testEffect } from "./lib/effect" +import { dynamicResponse } from "./lib/http" + +const updateModel = (model: ModelRef, patch: Partial) => ModelRef.update(model, patch) + +const Json = Schema.fromJsonString(Schema.Unknown) +const encodeJson = Schema.encodeSync(Json) + +type FakeBody = { + readonly body: string +} + +const FakeEvent = Schema.Union([ + Schema.Struct({ type: Schema.Literal("text"), text: Schema.String }), + Schema.Struct({ type: Schema.Literal("finish"), reason: Schema.Literal("stop") }), +]) +type FakeEvent = Schema.Schema.Type +const decodeFakeEvents = Schema.decodeUnknownEffect(Schema.fromJsonString(Schema.Array(FakeEvent))) + +const fakeFraming: FramingDef = { + id: "fake-json-array", + frame: (bytes) => + Stream.fromEffect( + bytes.pipe( + Stream.decodeText(), + Stream.runFold( + () => "", + (text, event) => text + event, + ), + Effect.flatMap(decodeFakeEvents), + Effect.orDie, + ), + ).pipe(Stream.flatMap(Stream.fromIterable)), +} + +const request = LLM.request({ + id: "req_1", + model: LLM.model({ + id: "fake-model", + provider: "fake-provider", + route: "fake", + baseURL: "https://fake.local", + }), + prompt: "hello", +}) + +const raiseEvent = (event: FakeEvent): import("../src/schema").LLMEvent => + event.type === "finish" ? { type: "request-finish", reason: event.reason } : { type: "text-delta", text: event.text } + +const fakeProtocol = Protocol.make({ + id: "fake", + body: { + schema: Schema.Struct({ + body: Schema.String, + }), + from: (request) => + Effect.succeed({ + body: [ + ...request.messages + .flatMap((message) => message.content) + .filter((part) => part.type === "text") + .map((part) => part.text), + ...request.tools.map((tool) => `tool:${tool.name}:${tool.description}`), + ].join("\n"), + }), + }, + stream: { + event: FakeEvent, + initial: () => undefined, + step: (state, event) => Effect.succeed([state, [raiseEvent(event)]] as const), + }, +}) + +const fake = Route.make({ + id: "fake", + protocol: fakeProtocol, + endpoint: Endpoint.path("/chat"), + framing: fakeFraming, +}) + +const gemini = Route.make({ + id: "gemini-fake", + protocol: fakeProtocol, + endpoint: Endpoint.path("/chat"), + framing: fakeFraming, +}) + +const echoLayer = dynamicResponse(({ text, respond }) => + Effect.succeed( + respond( + encodeJson([ + { type: "text", text: `echo:${text}` }, + { type: "finish", reason: "stop" }, + ]), + ), + ), +) + +const it = testEffect(echoLayer) + +describe("llm route", () => { + it.effect("stream and generate use the route pipeline", () => + Effect.gen(function* () { + const llm = yield* LLMClient.Service + const events = Array.from(yield* llm.stream(request).pipe(Stream.runCollect)) + const response = yield* llm.generate(request) + + expect(events.map((event) => event.type)).toEqual(["text-delta", "request-finish"]) + expect(response.events.map((event) => event.type)).toEqual(["text-delta", "request-finish"]) + }), + ) + + it.effect("selects routes by request route", () => + Effect.gen(function* () { + const llm = yield* LLMClient.Service + const prepared = yield* llm.prepare( + LLM.updateRequest(request, { model: updateModel(request.model, { route: "gemini-fake" }) }), + ) + + expect(prepared.route).toBe("gemini-fake") + }), + ) + + it.effect("maps model input before building refs", () => + Effect.gen(function* () { + const mapped = Route.model( + fake, + { provider: "fake-provider", baseURL: "https://fake.local" }, + { + mapInput: (input) => { + const { region, ...rest } = input + return { ...rest, native: { region } } + }, + }, + ) + + expect(mapped({ id: "fake-model", region: "us-east-1" }).native).toEqual({ region: "us-east-1" }) + }), + ) + + it.effect("rejects duplicate route ids", () => + Effect.gen(function* () { + expect(() => + Route.make({ + id: "fake", + protocol: Protocol.make({ + ...fakeProtocol, + body: { + ...fakeProtocol.body, + from: () => Effect.succeed({ body: "late-default" }), + }, + }), + endpoint: Endpoint.path("/chat"), + framing: fakeFraming, + }), + ).toThrow('Duplicate LLM route id "fake"') + }), + ) + + it.effect("rejects missing route", () => + Effect.gen(function* () { + const llm = yield* LLMClient.Service + const error = yield* llm + .prepare(LLM.updateRequest(request, { model: updateModel(request.model, { route: "missing" }) })) + .pipe(Effect.flip) + + expect(error.message).toContain("No LLM route") + }), + ) +}) diff --git a/packages/llm/test/auth-options.types.ts b/packages/llm/test/auth-options.types.ts new file mode 100644 index 000000000000..a44efa2274f7 --- /dev/null +++ b/packages/llm/test/auth-options.types.ts @@ -0,0 +1,100 @@ +import { Config } from "effect" +import type { Auth } from "../src/route/auth" +import type { ModelFactory } from "../src/route/auth-options" +import { Auth as RuntimeAuth } from "../src/route/auth" +import * as Azure from "../src/providers/azure" +import * as OpenAI from "../src/providers/openai" + +type BaseOptions = { + readonly baseURL?: string + readonly headers?: Record +} + +type Model = { + readonly id: string +} + +declare const auth: Auth +declare const optionalAuthModel: ModelFactory +declare const requiredAuthModel: ModelFactory +const configApiKey = Config.redacted("OPENAI_API_KEY") + +optionalAuthModel("gpt-4.1-mini") +optionalAuthModel("gpt-4.1-mini", {}) +optionalAuthModel("gpt-4.1-mini", { apiKey: "sk-test" }) +optionalAuthModel("gpt-4.1-mini", { apiKey: configApiKey }) +optionalAuthModel("gpt-4.1-mini", { auth }) +optionalAuthModel("gpt-4.1-mini", { auth, baseURL: "https://gateway.example.com/v1" }) +optionalAuthModel("gpt-4.1-mini", { apiKey: "sk-test", headers: { "x-source": "test" } }) + +// @ts-expect-error auth is an override, so apiKey cannot be supplied with it. +optionalAuthModel("gpt-4.1-mini", { apiKey: "sk-test", auth }) + +requiredAuthModel("custom-model", { apiKey: "key" }) +requiredAuthModel("custom-model", { apiKey: configApiKey }) +requiredAuthModel("custom-model", { auth }) +requiredAuthModel("custom-model", { auth, headers: { "x-tenant-id": "tenant" } }) + +// @ts-expect-error providers without config fallback need apiKey or auth. +requiredAuthModel("custom-model") + +// @ts-expect-error providers without config fallback need apiKey or auth. +requiredAuthModel("custom-model", {}) + +// @ts-expect-error auth is an override, so apiKey cannot be supplied with it. +requiredAuthModel("custom-model", { apiKey: "key", auth }) + +OpenAI.responses("gpt-4.1-mini") +OpenAI.responses("gpt-4.1-mini", {}) +OpenAI.responses("gpt-4.1-mini", { apiKey: "sk-test" }) +OpenAI.responses("gpt-4.1-mini", { apiKey: configApiKey }) +OpenAI.responses("gpt-4.1-mini", { auth: RuntimeAuth.bearer("oauth-token") }) +OpenAI.responses("gpt-4.1-mini", { + auth: RuntimeAuth.headers({ authorization: "Bearer gateway" }), + baseURL: "https://gateway.example.com/v1", +}) +OpenAI.responses("gpt-4.1-mini", { + generation: { maxTokens: 100 }, + providerOptions: { openai: { store: false } }, +}) + +// @ts-expect-error apiKey only accepts string, Redacted, or Config>. +OpenAI.responses("gpt-4.1-mini", { apiKey: 123 }) + +// @ts-expect-error provider helpers reject unknown top-level options. +OpenAI.responses("gpt-4.1-mini", { bogus: true }) + +// @ts-expect-error common generation options remain typed. +OpenAI.responses("gpt-4.1-mini", { generation: { maxTokens: "many" } }) + +// @ts-expect-error provider-native options remain typed. +OpenAI.responses("gpt-4.1-mini", { providerOptions: { openai: { store: "false" } } }) + +// @ts-expect-error auth is an override, so OpenAI rejects apiKey with auth. +OpenAI.responses("gpt-4.1-mini", { apiKey: "sk-test", auth: RuntimeAuth.bearer("oauth-token") }) + +OpenAI.chat("gpt-4.1-mini") +OpenAI.chat("gpt-4.1-mini", { apiKey: "sk-test" }) +OpenAI.chat("gpt-4.1-mini", { apiKey: configApiKey }) +OpenAI.chat("gpt-4.1-mini", { auth: RuntimeAuth.bearer("oauth-token") }) + +// @ts-expect-error auth is an override, so OpenAI Chat rejects apiKey with auth. +OpenAI.chat("gpt-4.1-mini", { apiKey: "sk-test", auth: RuntimeAuth.bearer("oauth-token") }) + +// @ts-expect-error Azure requires at least one of `resourceName` or `baseURL`. +Azure.responses("deployment") +Azure.responses("deployment", { apiKey: "azure-key", resourceName: "resource" }) +Azure.responses("deployment", { apiKey: configApiKey, resourceName: "resource" }) +Azure.responses("deployment", { auth: RuntimeAuth.header("api-key", "azure-key"), resourceName: "resource" }) + +// @ts-expect-error auth is an override, so Azure rejects apiKey with auth. +Azure.responses("deployment", { apiKey: "azure-key", auth: RuntimeAuth.header("api-key", "override") }) + +// @ts-expect-error Azure requires at least one of `resourceName` or `baseURL`. +Azure.chat("deployment") +Azure.chat("deployment", { apiKey: "azure-key", resourceName: "resource" }) +Azure.chat("deployment", { apiKey: configApiKey, resourceName: "resource" }) +Azure.chat("deployment", { auth: RuntimeAuth.header("api-key", "azure-key"), resourceName: "resource" }) + +// @ts-expect-error auth is an override, so Azure Chat rejects apiKey with auth. +Azure.chat("deployment", { apiKey: "azure-key", auth: RuntimeAuth.header("api-key", "override") }) diff --git a/packages/llm/test/auth.test.ts b/packages/llm/test/auth.test.ts new file mode 100644 index 000000000000..6b53f4d5ebd4 --- /dev/null +++ b/packages/llm/test/auth.test.ts @@ -0,0 +1,101 @@ +import { describe, expect } from "bun:test" +import { ConfigProvider, Effect } from "effect" +import { Headers } from "effect/unstable/http" +import { LLM } from "../src" +import { Auth } from "../src/route/auth" +import { it } from "./lib/effect" + +const request = LLM.request({ + id: "req_auth", + model: LLM.model({ id: "fake-model", provider: "fake", route: "fake", baseURL: "https://fake.local" }), + prompt: "hello", +}) + +const input = { + request, + method: "POST" as const, + url: "https://example.test/v1/chat", + body: "{}", + headers: Headers.fromInput({ "x-existing": "yes" }), +} + +const withEnv = (env: Record) => Effect.provide(ConfigProvider.layer(ConfigProvider.fromEnv({ env }))) + +describe("Auth", () => { + it.effect("renders a config credential as bearer auth", () => + Effect.gen(function* () { + const headers = yield* Auth.config("OPENAI_API_KEY") + .bearer() + .apply(input) + .pipe(withEnv({ OPENAI_API_KEY: "sk-test" })) + + expect(headers.authorization).toBe("Bearer sk-test") + expect(headers["x-existing"]).toBe("yes") + }), + ) + + it.effect("falls back between credential sources before rendering", () => + Effect.gen(function* () { + const headers = yield* Auth.config("PRIMARY_KEY") + .orElse(Auth.value("fallback-key")) + .pipe(Auth.header("x-api-key")) + .apply(input) + .pipe(withEnv({})) + + expect(headers["x-api-key"]).toBe("fallback-key") + expect(headers["x-existing"]).toBe("yes") + }), + ) + + it.effect("composes header auth in sequence", () => + Effect.gen(function* () { + const headers = yield* Auth.headers({ "x-tenant-id": "tenant-1" }) + .andThen(Auth.bearer("gateway-token")) + .apply(input) + + expect(headers["x-tenant-id"]).toBe("tenant-1") + expect(headers.authorization).toBe("Bearer gateway-token") + expect(headers["x-existing"]).toBe("yes") + }), + ) + + it.effect("renders a direct secret as a custom header", () => + Effect.gen(function* () { + const headers = yield* Auth.header("api-key", "direct-key").apply(input) + + expect(headers["api-key"]).toBe("direct-key") + expect(headers["x-existing"]).toBe("yes") + }), + ) + + it.effect("renders bearer auth into a custom header", () => + Effect.gen(function* () { + const headers = yield* Auth.bearerHeader("cf-aig-authorization", "gateway-token").apply(input) + + expect(headers["cf-aig-authorization"]).toBe("Bearer gateway-token") + expect(headers["x-existing"]).toBe("yes") + }), + ) + + it.effect("falls back between full auth values", () => + Effect.gen(function* () { + const headers = yield* Auth.config("OPENAI_API_KEY") + .bearer() + .orElse(Auth.headers({ authorization: "Bearer supplied" })) + .apply(input) + .pipe(withEnv({})) + + expect(headers.authorization).toBe("Bearer supplied") + expect(headers["x-existing"]).toBe("yes") + }), + ) + + it.effect("can intentionally leave auth untouched", () => + Effect.gen(function* () { + const headers = yield* Auth.none.apply(input) + + expect(headers.authorization).toBeUndefined() + expect(headers["x-existing"]).toBe("yes") + }), + ) +}) diff --git a/packages/llm/test/endpoint.test.ts b/packages/llm/test/endpoint.test.ts new file mode 100644 index 000000000000..f708a87ea0d9 --- /dev/null +++ b/packages/llm/test/endpoint.test.ts @@ -0,0 +1,57 @@ +import { describe, expect, test } from "bun:test" +import { LLM } from "../src" +import { Endpoint } from "../src/route" + +const request = ( + input: { + readonly baseURL: string + readonly queryParams?: Record + }, +) => + LLM.request({ + model: LLM.model({ + id: "model-1", + provider: "test", + route: "test-route", + baseURL: input.baseURL, + queryParams: input.queryParams, + }), + prompt: "hello", + }) + +describe("Endpoint", () => { + test("appends a static path to the model's baseURL", () => { + const url = Endpoint.render(Endpoint.path("/chat"), { + request: request({ baseURL: "https://api.example.test/v1/" }), + body: {}, + }) + + expect(url.toString()).toBe("https://api.example.test/v1/chat") + }) + + test("model query params are appended to the rendered URL", () => { + const url = Endpoint.render(Endpoint.path("/chat?alt=sse"), { + request: request({ + baseURL: "https://custom.example.test/root/", + queryParams: { "api-version": "2026-01-01", alt: "json" }, + }), + body: {}, + }) + + expect(url.toString()).toBe("https://custom.example.test/root/chat?alt=json&api-version=2026-01-01") + }) + + test("path may be a function of the validated body", () => { + const url = Endpoint.render( + Endpoint.path<{ readonly modelId: string }>(({ body }) => `/model/${encodeURIComponent(body.modelId)}/converse-stream`), + { + request: request({ baseURL: "https://bedrock-runtime.us-east-1.amazonaws.com" }), + body: { modelId: "us.amazon.nova-micro-v1:0" }, + }, + ) + + expect(url.toString()).toBe( + "https://bedrock-runtime.us-east-1.amazonaws.com/model/us.amazon.nova-micro-v1%3A0/converse-stream", + ) + }) +}) diff --git a/packages/llm/test/executor.test.ts b/packages/llm/test/executor.test.ts new file mode 100644 index 000000000000..b294606ff34f --- /dev/null +++ b/packages/llm/test/executor.test.ts @@ -0,0 +1,416 @@ +import { describe, expect } from "bun:test" +import { Effect, Fiber, Layer, Random, Ref } from "effect" +import * as TestClock from "effect/testing/TestClock" +import { Headers, HttpClient, HttpClientRequest, HttpClientResponse } from "effect/unstable/http" +import { LLM, LLMError } from "../src" +import { LLMClient, RequestExecutor } from "../src/route" +import * as OpenAIChat from "../src/protocols/openai-chat" +import { dynamicResponse } from "./lib/http" +import { deltaChunk } from "./lib/openai-chunks" +import { sseRaw } from "./lib/sse" +import { it } from "./lib/effect" + +const request = HttpClientRequest.post("https://provider.test/v1/chat?api_key=secret&key=secret&debug=1").pipe( + HttpClientRequest.setHeaders(Headers.fromInput({ authorization: "Bearer secret", "x-safe": "visible" })), +) + +const secretRequest = HttpClientRequest.post("https://provider.test/v1/chat?api_key=query-secret-123&debug=1").pipe( + HttpClientRequest.setHeaders(Headers.fromInput({ authorization: "Bearer header-secret-456" })), +) + +const responsesLayer = (responses: ReadonlyArray) => + RequestExecutor.layer.pipe( + Layer.provide( + Layer.unwrap( + Effect.gen(function* () { + const cursor = yield* Ref.make(0) + return Layer.succeed( + HttpClient.HttpClient, + HttpClient.make((request) => + Effect.gen(function* () { + const index = yield* Ref.getAndUpdate(cursor, (value) => value + 1) + return HttpClientResponse.fromWeb(request, responses[index] ?? responses[responses.length - 1]) + }), + ), + ) + }), + ), + ), + ) + +const countedResponsesLayer = (attempts: Ref.Ref, responses: ReadonlyArray) => + RequestExecutor.layer.pipe( + Layer.provide( + Layer.unwrap( + Effect.gen(function* () { + const cursor = yield* Ref.make(0) + return Layer.succeed( + HttpClient.HttpClient, + HttpClient.make((request) => + Effect.gen(function* () { + yield* Ref.update(attempts, (value) => value + 1) + const index = yield* Ref.getAndUpdate(cursor, (value) => value + 1) + return HttpClientResponse.fromWeb(request, responses[index] ?? responses[responses.length - 1]) + }), + ), + ) + }), + ), + ), + ) + +const randomMidpoint = { + nextDoubleUnsafe: () => 0.5, + nextIntUnsafe: () => 0, +} + +const expectLLMError = (error: unknown) => { + expect(error).toBeInstanceOf(LLMError) + if (!(error instanceof LLMError)) throw new Error("expected LLMError") + return error +} + +const errorHttp = (error: LLMError) => ("http" in error.reason ? error.reason.http : undefined) + +describe("RequestExecutor", () => { + it.effect("returns redacted diagnostics for retryable rate limits", () => + Effect.gen(function* () { + const executor = yield* RequestExecutor.Service + const error = yield* executor.execute(request).pipe(Effect.flip) + + expectLLMError(error) + expect(error).toMatchObject({ + retryable: true, + retryAfterMs: 0, + reason: { + _tag: "RateLimit", + rateLimit: { retryAfterMs: 0 }, + http: { + requestId: "req_123", + request: { + method: "POST", + url: "https://provider.test/v1/chat?api_key=%3Credacted%3E&key=%3Credacted%3E&debug=1", + headers: { authorization: "", "x-safe": "visible" }, + }, + response: { + status: 429, + headers: { + "retry-after-ms": "0", + "x-request-id": "req_123", + "x-api-key": "", + }, + }, + }, + }, + }) + expect(errorHttp(error)?.body).toBe("rate limited") + }).pipe( + Effect.provide( + responsesLayer([ + ...Array.from( + { length: 3 }, + () => + new Response("rate limited", { + status: 429, + headers: { "retry-after-ms": "0", "x-request-id": "req_123", "x-api-key": "secret" }, + }), + ), + ]), + ), + ), + ) + + it.effect("honors current redacted header names in diagnostics", () => + Effect.gen(function* () { + const executor = yield* RequestExecutor.Service + const error = yield* executor.execute(request).pipe(Effect.flip) + + expectLLMError(error) + expect(errorHttp(error)?.request.headers["x-safe"]).toBe("") + expect(errorHttp(error)?.response?.headers["x-safe"]).toBe("") + }).pipe( + Effect.provide(responsesLayer([new Response("bad", { status: 400, headers: { "x-safe": "response-secret" } })])), + Effect.provideService(Headers.CurrentRedactedNames, ["x-safe"]), + ), + ) + + it.effect("extracts OpenAI-style rate-limit diagnostics", () => + Effect.gen(function* () { + const executor = yield* RequestExecutor.Service + const error = yield* executor.execute(request).pipe(Effect.flip) + + expectLLMError(error) + expect(error.reason).toMatchObject({ _tag: "RateLimit" }) + expect(error.reason._tag === "RateLimit" ? error.reason.rateLimit : undefined).toEqual({ + retryAfterMs: 0, + limit: { requests: "500", tokens: "30000" }, + remaining: { requests: "499", tokens: "29900" }, + reset: { requests: "1s", tokens: "10s" }, + }) + }).pipe( + Effect.provide( + responsesLayer( + Array.from( + { length: 3 }, + () => + new Response("rate limited", { + status: 429, + headers: { + "retry-after-ms": "0", + "x-ratelimit-limit-requests": "500", + "x-ratelimit-limit-tokens": "30000", + "x-ratelimit-remaining-requests": "499", + "x-ratelimit-remaining-tokens": "29900", + "x-ratelimit-reset-requests": "1s", + "x-ratelimit-reset-tokens": "10s", + }, + }), + ), + ), + ), + ), + ) + + it.effect("extracts Anthropic-style rate-limit diagnostics", () => + Effect.gen(function* () { + const executor = yield* RequestExecutor.Service + const error = yield* executor.execute(request).pipe(Effect.flip) + + expectLLMError(error) + expect(error.reason).toMatchObject({ _tag: "ProviderInternal" }) + expect(errorHttp(error)?.rateLimit).toEqual({ + retryAfterMs: 0, + limit: { requests: "100", "input-tokens": "10000" }, + remaining: { requests: "12", "input-tokens": "9000" }, + reset: { requests: "2026-05-06T12:00:00Z", "input-tokens": "2026-05-06T12:00:10Z" }, + }) + }).pipe( + Effect.provide( + responsesLayer( + Array.from( + { length: 3 }, + () => + new Response("overloaded", { + status: 529, + headers: { + "retry-after-ms": "0", + "anthropic-ratelimit-requests-limit": "100", + "anthropic-ratelimit-requests-remaining": "12", + "anthropic-ratelimit-requests-reset": "2026-05-06T12:00:00Z", + "anthropic-ratelimit-input-tokens-limit": "10000", + "anthropic-ratelimit-input-tokens-remaining": "9000", + "anthropic-ratelimit-input-tokens-reset": "2026-05-06T12:00:10Z", + }, + }), + ), + ), + ), + ), + ) + + it.effect("retries retryable status responses before returning the stream", () => + Effect.gen(function* () { + const executor = yield* RequestExecutor.Service + const response = yield* executor.execute(request) + + expect(response.status).toBe(200) + expect(yield* response.text).toBe("ok") + }).pipe( + Effect.provide( + responsesLayer([ + new Response("busy", { status: 503, headers: { "retry-after-ms": "0" } }), + new Response("ok", { status: 200 }), + ]), + ), + ), + ) + + it.effect("marks 504 and 529 status responses retryable", () => + Effect.gen(function* () { + const failWith = (status: number) => + Effect.gen(function* () { + const executor = yield* RequestExecutor.Service + const error = yield* executor.execute(request).pipe(Effect.flip) + + expectLLMError(error) + expect(error.reason).toMatchObject({ _tag: "ProviderInternal", status }) + expect(error.retryable).toBe(true) + }).pipe( + Effect.provide( + responsesLayer( + Array.from( + { length: 3 }, + () => + new Response("retry", { + status, + headers: { "retry-after-ms": "0" }, + }), + ), + ), + ), + ) + + yield* failWith(504) + yield* failWith(529) + }), + ) + + it.effect("does not retry non-retryable status responses and truncates large bodies", () => + Effect.gen(function* () { + const executor = yield* RequestExecutor.Service + const error = yield* executor.execute(request).pipe(Effect.flip) + + expectLLMError(error) + expect(error.reason).toMatchObject({ _tag: "Authentication" }) + expect(error.retryable).toBe(false) + expect(errorHttp(error)?.bodyTruncated).toBe(true) + expect(errorHttp(error)?.body).toHaveLength(16_384) + }).pipe( + Effect.provide( + responsesLayer([ + new Response("x".repeat(20_000), { status: 401 }), + new Response("should not retry", { status: 200 }), + ]), + ), + ), + ) + + it.effect("redacts common secret fields in response bodies", () => + Effect.gen(function* () { + const executor = yield* RequestExecutor.Service + const error = yield* executor.execute(request).pipe(Effect.flip) + + expectLLMError(error) + expect(errorHttp(error)?.body).toContain('"key":""') + expect(errorHttp(error)?.body).toContain("api_key=") + expect(errorHttp(error)?.body).not.toContain("body-secret") + expect(errorHttp(error)?.body).not.toContain("query-secret") + }).pipe( + Effect.provide( + responsesLayer([ + new Response('{"error":{"message":"bad","key":"body-secret","detail":"api_key=query-secret"}}', { + status: 400, + }), + ]), + ), + ), + ) + + it.effect("redacts echoed request secret values in response bodies", () => + Effect.gen(function* () { + const executor = yield* RequestExecutor.Service + const error = yield* executor.execute(secretRequest).pipe(Effect.flip) + + expectLLMError(error) + expect(errorHttp(error)?.body).toContain("provider echoed ") + expect(errorHttp(error)?.body).toContain("authorization ") + expect(errorHttp(error)?.body).not.toContain("query-secret-123") + expect(errorHttp(error)?.body).not.toContain("header-secret-456") + }).pipe( + Effect.provide( + responsesLayer([ + new Response("provider echoed query-secret-123 and authorization header-secret-456", { status: 400 }), + ]), + ), + ), + ) + + it.effect("honors Retry-After delta seconds before retrying", () => + Effect.gen(function* () { + const attempts = yield* Ref.make(0) + return yield* Effect.gen(function* () { + const executor = yield* RequestExecutor.Service + const fiber = yield* executor.execute(request).pipe(Effect.forkChild) + + yield* Effect.yieldNow + expect(yield* Ref.get(attempts)).toBe(1) + + yield* TestClock.adjust(1_999) + yield* Effect.yieldNow + expect(yield* Ref.get(attempts)).toBe(1) + + yield* TestClock.adjust(1) + const response = yield* Fiber.join(fiber) + + expect(response.status).toBe(200) + expect(yield* Ref.get(attempts)).toBe(2) + }).pipe( + Effect.provide( + countedResponsesLayer(attempts, [ + new Response("busy", { status: 503, headers: { "retry-after": "2" } }), + new Response("ok", { status: 200 }), + ]), + ), + ) + }), + ) + + it.effect("uses exponential jittered delay when retry-after is absent", () => + Effect.gen(function* () { + const attempts = yield* Ref.make(0) + return yield* Effect.gen(function* () { + const executor = yield* RequestExecutor.Service + const fiber = yield* executor.execute(request).pipe(Effect.flip, Effect.forkChild) + + yield* Effect.yieldNow + expect(yield* Ref.get(attempts)).toBe(1) + + yield* TestClock.adjust(499) + yield* Effect.yieldNow + expect(yield* Ref.get(attempts)).toBe(1) + + yield* TestClock.adjust(1) + yield* Effect.yieldNow + expect(yield* Ref.get(attempts)).toBe(2) + + yield* TestClock.adjust(999) + yield* Effect.yieldNow + expect(yield* Ref.get(attempts)).toBe(2) + + yield* TestClock.adjust(1) + const error = yield* Fiber.join(fiber) + + expectLLMError(error) + expect(error.reason).toMatchObject({ _tag: "ProviderInternal" }) + expect(yield* Ref.get(attempts)).toBe(3) + }).pipe( + Effect.provide( + countedResponsesLayer(attempts, [ + new Response("busy", { status: 503 }), + new Response("still busy", { status: 503 }), + new Response("done retrying", { status: 503 }), + ]), + ), + ) + }).pipe(Effect.provideService(Random.Random, randomMidpoint)), + ) + + it.effect("does not retry after a successful response reaches stream parsing", () => + Effect.gen(function* () { + const attempts = yield* Ref.make(0) + const model = OpenAIChat.model({ id: "gpt-4o-mini", baseURL: "https://api.openai.test/v1" }) + const error = yield* LLMClient.generate(LLM.request({ model, prompt: "Say hello." })).pipe( + Effect.provide( + dynamicResponse((input) => + Ref.update(attempts, (value) => value + 1).pipe( + Effect.as( + input.respond( + sseRaw( + `data: ${JSON.stringify(deltaChunk({ role: "assistant", content: "Hello" }))}`, + "data: not-json", + ), + { headers: { "content-type": "text/event-stream" } }, + ), + ), + ), + ), + ), + Effect.flip, + ) + + expectLLMError(error) + expect(error.reason).toMatchObject({ _tag: "InvalidProviderOutput" }) + expect(yield* Ref.get(attempts)).toBe(1) + }), + ) +}) diff --git a/packages/llm/test/exports.test.ts b/packages/llm/test/exports.test.ts new file mode 100644 index 000000000000..237dadb27dc1 --- /dev/null +++ b/packages/llm/test/exports.test.ts @@ -0,0 +1,56 @@ +import { describe, expect, test } from "bun:test" +import { LLM, LLMClient, Provider } from "@opencode-ai/llm" +import { Route, Protocol } from "@opencode-ai/llm/route" +import { Provider as ProviderSubpath } from "@opencode-ai/llm/provider" +import { Cloudflare, OpenAI, OpenAICompatible, OpenRouter, XAI } from "@opencode-ai/llm/providers" +import * as GitHubCopilot from "@opencode-ai/llm/providers/github-copilot" +import { OpenAIChat, OpenAICompatibleChat, OpenAIResponses } from "@opencode-ai/llm/protocols" +import * as AnthropicMessages from "@opencode-ai/llm/protocols/anthropic-messages" + +describe("public exports", () => { + test("root exposes app-facing runtime APIs", () => { + expect(LLM.request).toBeFunction() + expect(LLMClient.Service).toBeFunction() + expect(LLMClient.layer).toBeDefined() + expect(Provider.make).toBeFunction() + expect(ProviderSubpath.make).toBe(Provider.make) + }) + + test("route barrel exposes route-authoring APIs", () => { + expect(Route.make).toBeFunction() + expect(Protocol.make).toBeFunction() + }) + + test("provider barrels expose user-facing facades", () => { + expect(OpenAI.model).toBeFunction() + expect(OpenAI.provider.model).toBe(OpenAI.model) + expect(OpenAI.apis.responses).toBe(OpenAI.responses) + expect(OpenAI.apis.responsesWebSocket).toBe(OpenAI.responsesWebSocket) + expect(OpenAICompatible.deepseek.model).toBeFunction() + expect(Cloudflare.model).toBeFunction() + expect(Cloudflare.provider.model).toBe(Cloudflare.model) + expect(Cloudflare.aiGateway).toBeFunction() + expect(Cloudflare.workersAI).toBeFunction() + expect(OpenRouter.model).toBeFunction() + expect(OpenRouter.provider.model).toBe(OpenRouter.model) + expect(XAI.model).toBeFunction() + expect(XAI.provider.model).toBe(XAI.model) + expect(XAI.apis.responses).toBe(XAI.responses) + expect(XAI.apis.chat).toBe(XAI.chat) + expect(XAI.responses("grok-4.3", { apiKey: "fixture" })).toMatchObject({ + route: "openai-responses", + }) + expect(XAI.chat("grok-4.3", { apiKey: "fixture" })).toMatchObject({ + route: "openai-compatible-chat", + }) + expect(GitHubCopilot.model).toBeFunction() + }) + + test("protocol barrels expose supported low-level routes", () => { + expect(OpenAIChat.route.id).toBe("openai-chat") + expect(OpenAICompatibleChat.route.id).toBe("openai-compatible-chat") + expect(OpenAIResponses.route.id).toBe("openai-responses") + expect(OpenAIResponses.webSocketRoute.id).toBe("openai-responses-websocket") + expect(AnthropicMessages.route.id).toBe("anthropic-messages") + }) +}) diff --git a/packages/llm/test/fixtures/recordings/anthropic-messages/accepts-malformed-assistant-tool-order-with-default-patch.json b/packages/llm/test/fixtures/recordings/anthropic-messages/accepts-malformed-assistant-tool-order-with-default-patch.json new file mode 100644 index 000000000000..7730485cb4d6 --- /dev/null +++ b/packages/llm/test/fixtures/recordings/anthropic-messages/accepts-malformed-assistant-tool-order-with-default-patch.json @@ -0,0 +1,29 @@ +{ + "version": 1, + "metadata": { + "name": "anthropic-messages/accepts-malformed-assistant-tool-order-with-default-patch", + "recordedAt": "2026-05-05T20:09:16.245Z", + "tags": ["prefix:anthropic-messages", "provider:anthropic", "protocol:anthropic-messages", "tool"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.anthropic.com/v1/messages", + "headers": { + "anthropic-version": "2023-06-01", + "content-type": "application/json" + }, + "body": "{\"model\":\"claude-haiku-4-5-20251001\",\"messages\":[{\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"I will check the weather.\"}]},{\"role\":\"assistant\",\"content\":[{\"type\":\"tool_use\",\"id\":\"call_1\",\"name\":\"get_weather\",\"input\":{\"city\":\"Paris\"}}]},{\"role\":\"user\",\"content\":[{\"type\":\"tool_result\",\"tool_use_id\":\"call_1\",\"content\":\"{\\\"temperature\\\":\\\"72F\\\"}\"}]},{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Use that result to answer briefly.\",\"cache_control\":{\"type\":\"ephemeral\"}}]}],\"tools\":[{\"name\":\"get_weather\",\"description\":\"Get weather\",\"input_schema\":{\"type\":\"object\",\"properties\":{}}}],\"stream\":true,\"max_tokens\":4096}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream; charset=utf-8" + }, + "body": "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-haiku-4-5-20251001\",\"id\":\"msg_01SikJVFaMR1XLMtavUhvuog\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"stop_details\":null,\"usage\":{\"input_tokens\":638,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":1,\"service_tier\":\"standard\",\"inference_geo\":\"not_available\"}} }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"text\",\"text\":\"\"} }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"The\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\" weather in Paris is currently 72°F.\"} }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0}\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null,\"stop_details\":null},\"usage\":{\"input_tokens\":638,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":14} }\n\nevent: message_stop\ndata: {\"type\":\"message_stop\" }\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/anthropic-messages/claude-opus-4-7-drives-a-tool-loop.json b/packages/llm/test/fixtures/recordings/anthropic-messages/claude-opus-4-7-drives-a-tool-loop.json new file mode 100644 index 000000000000..316f4308fc1d --- /dev/null +++ b/packages/llm/test/fixtures/recordings/anthropic-messages/claude-opus-4-7-drives-a-tool-loop.json @@ -0,0 +1,56 @@ +{ + "version": 1, + "metadata": { + "name": "anthropic-messages/claude-opus-4-7-drives-a-tool-loop", + "recordedAt": "2026-05-03T19:59:44.186Z", + "tags": [ + "prefix:anthropic-messages", + "provider:anthropic", + "protocol:anthropic-messages", + "tool", + "tool-loop", + "golden", + "flagship" + ] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.anthropic.com/v1/messages", + "headers": { + "anthropic-version": "2023-06-01", + "content-type": "application/json" + }, + "body": "{\"model\":\"claude-opus-4-7\",\"system\":[{\"type\":\"text\",\"text\":\"Use the get_weather tool, then answer in one short sentence.\"}],\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"What is the weather in Paris?\"}]}],\"tools\":[{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}],\"stream\":true,\"max_tokens\":80}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream; charset=utf-8" + }, + "body": "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-opus-4-7\",\"id\":\"msg_01DgAEgLgB1ZhavZon4qGE1t\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"stop_details\":null,\"usage\":{\"input_tokens\":798,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":0,\"service_tier\":\"standard\",\"inference_geo\":\"global\"}} }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"tool_use\",\"id\":\"toolu_01M8nJQQMxqpv1VaPYuJKT4j\",\"name\":\"get_weather\",\"input\":{},\"caller\":{\"type\":\"direct\"}} }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"{\\\"city\\\": \"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"\\\"Pa\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"ris\\\"}\"} }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0 }\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"tool_use\",\"stop_sequence\":null,\"stop_details\":null},\"usage\":{\"input_tokens\":798,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":66} }\n\nevent: message_stop\ndata: {\"type\":\"message_stop\" }\n\n" + } + }, + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.anthropic.com/v1/messages", + "headers": { + "anthropic-version": "2023-06-01", + "content-type": "application/json" + }, + "body": "{\"model\":\"claude-opus-4-7\",\"system\":[{\"type\":\"text\",\"text\":\"Use the get_weather tool, then answer in one short sentence.\"}],\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"What is the weather in Paris?\"}]},{\"role\":\"assistant\",\"content\":[{\"type\":\"tool_use\",\"id\":\"toolu_01M8nJQQMxqpv1VaPYuJKT4j\",\"name\":\"get_weather\",\"input\":{\"city\":\"Paris\"}}]},{\"role\":\"user\",\"content\":[{\"type\":\"tool_result\",\"tool_use_id\":\"toolu_01M8nJQQMxqpv1VaPYuJKT4j\",\"content\":\"{\\\"temperature\\\":22,\\\"condition\\\":\\\"sunny\\\"}\"}]}],\"tools\":[{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}],\"stream\":true,\"max_tokens\":80}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream; charset=utf-8" + }, + "body": "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-opus-4-7\",\"id\":\"msg_011KJqj32QjkrUAiBFxhmEoG\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"stop_details\":null,\"usage\":{\"input_tokens\":895,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":5,\"service_tier\":\"standard\",\"inference_geo\":\"global\"}} }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"text\",\"text\":\"\"} }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"Paris is curr\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"ently sunny at 22°C.\"} }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0 }\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null,\"stop_details\":null},\"usage\":{\"input_tokens\":895,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":19}}\n\nevent: message_stop\ndata: {\"type\":\"message_stop\"}\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/anthropic-messages/rejects-malformed-assistant-tool-order-without-patch.json b/packages/llm/test/fixtures/recordings/anthropic-messages/rejects-malformed-assistant-tool-order-without-patch.json new file mode 100644 index 000000000000..cd0990cec5cf --- /dev/null +++ b/packages/llm/test/fixtures/recordings/anthropic-messages/rejects-malformed-assistant-tool-order-without-patch.json @@ -0,0 +1,29 @@ +{ + "version": 1, + "metadata": { + "name": "anthropic-messages/rejects-malformed-assistant-tool-order-without-patch", + "recordedAt": "2026-05-05T20:08:42.597Z", + "tags": ["prefix:anthropic-messages", "provider:anthropic", "protocol:anthropic-messages", "tool", "sad-path"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.anthropic.com/v1/messages", + "headers": { + "anthropic-version": "2023-06-01", + "content-type": "application/json" + }, + "body": "{\"model\":\"claude-haiku-4-5-20251001\",\"messages\":[{\"role\":\"assistant\",\"content\":[{\"type\":\"tool_use\",\"id\":\"call_1\",\"name\":\"get_weather\",\"input\":{\"city\":\"Paris\"}},{\"type\":\"text\",\"text\":\"I will check the weather.\"}]},{\"role\":\"user\",\"content\":[{\"type\":\"tool_result\",\"tool_use_id\":\"call_1\",\"content\":\"{\\\"temperature\\\":\\\"72F\\\"}\"}]},{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Use that result to answer briefly.\"}]}],\"tools\":[{\"name\":\"get_weather\",\"description\":\"Get weather\",\"input_schema\":{\"type\":\"object\",\"properties\":{}}}],\"stream\":true,\"max_tokens\":4096}" + }, + "response": { + "status": 400, + "headers": { + "content-type": "application/json" + }, + "body": "{\"type\":\"error\",\"error\":{\"type\":\"invalid_request_error\",\"message\":\"messages.1: `tool_use` ids were found without `tool_result` blocks immediately after: call_1. Each `tool_use` block must have a corresponding `tool_result` block in the next message.\"},\"request_id\":\"req_011Cak2XdJgnzxKCY2BC2Beh\"}" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/anthropic-messages/streams-text.json b/packages/llm/test/fixtures/recordings/anthropic-messages/streams-text.json new file mode 100644 index 000000000000..e80a0dac34b5 --- /dev/null +++ b/packages/llm/test/fixtures/recordings/anthropic-messages/streams-text.json @@ -0,0 +1,29 @@ +{ + "version": 1, + "metadata": { + "name": "anthropic-messages/streams-text", + "recordedAt": "2026-04-28T21:18:45.535Z", + "tags": ["prefix:anthropic-messages", "provider:anthropic", "protocol:anthropic-messages"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.anthropic.com/v1/messages", + "headers": { + "anthropic-version": "2023-06-01", + "content-type": "application/json" + }, + "body": "{\"model\":\"claude-haiku-4-5-20251001\",\"system\":[{\"type\":\"text\",\"text\":\"You are concise.\"}],\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Reply with exactly: Hello!\"}]}],\"stream\":true,\"max_tokens\":20,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream; charset=utf-8" + }, + "body": "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-haiku-4-5-20251001\",\"id\":\"msg_01UodR8c3ezAK8rAfi8HAs8g\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"stop_details\":null,\"usage\":{\"input_tokens\":18,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":2,\"service_tier\":\"standard\",\"inference_geo\":\"not_available\"}} }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"text\",\"text\":\"\"} }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"Hello!\"} }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0 }\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null,\"stop_details\":null},\"usage\":{\"input_tokens\":18,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":5} }\n\nevent: message_stop\ndata: {\"type\":\"message_stop\" }\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/anthropic-messages/streams-tool-call.json b/packages/llm/test/fixtures/recordings/anthropic-messages/streams-tool-call.json new file mode 100644 index 000000000000..ef8f69c21d3f --- /dev/null +++ b/packages/llm/test/fixtures/recordings/anthropic-messages/streams-tool-call.json @@ -0,0 +1,29 @@ +{ + "version": 1, + "metadata": { + "name": "anthropic-messages/streams-tool-call", + "recordedAt": "2026-04-28T21:18:46.878Z", + "tags": ["prefix:anthropic-messages", "provider:anthropic", "protocol:anthropic-messages", "tool"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.anthropic.com/v1/messages", + "headers": { + "anthropic-version": "2023-06-01", + "content-type": "application/json" + }, + "body": "{\"model\":\"claude-haiku-4-5-20251001\",\"system\":[{\"type\":\"text\",\"text\":\"Call tools exactly as requested.\"}],\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Call get_weather with city exactly Paris.\"}]}],\"tools\":[{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}],\"tool_choice\":{\"type\":\"tool\",\"name\":\"get_weather\"},\"stream\":true,\"max_tokens\":80,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream; charset=utf-8" + }, + "body": "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-haiku-4-5-20251001\",\"id\":\"msg_01RYgU7NUPMK4B9v8S7gVpCS\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"stop_details\":null,\"usage\":{\"input_tokens\":677,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":16,\"service_tier\":\"standard\",\"inference_geo\":\"not_available\"}} }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"tool_use\",\"id\":\"toolu_012rmAruviySvUXSjgCPWVRu\",\"name\":\"get_weather\",\"input\":{},\"caller\":{\"type\":\"direct\"}} }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"{\\\"city\\\":\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\" \\\"Paris\\\"}\"} }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0 }\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"tool_use\",\"stop_sequence\":null,\"stop_details\":null},\"usage\":{\"input_tokens\":677,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":33} }\n\nevent: message_stop\ndata: {\"type\":\"message_stop\" }\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/bedrock-converse/drives-a-tool-loop.json b/packages/llm/test/fixtures/recordings/bedrock-converse/drives-a-tool-loop.json new file mode 100644 index 000000000000..26eca01609a1 --- /dev/null +++ b/packages/llm/test/fixtures/recordings/bedrock-converse/drives-a-tool-loop.json @@ -0,0 +1,55 @@ +{ + "version": 1, + "metadata": { + "name": "bedrock-converse/drives-a-tool-loop", + "recordedAt": "2026-05-03T20:01:48.334Z", + "tags": [ + "prefix:bedrock-converse", + "provider:amazon-bedrock", + "protocol:bedrock-converse", + "tool", + "tool-loop", + "golden" + ] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/us.amazon.nova-micro-v1%3A0/converse-stream", + "headers": { + "content-type": "application/json" + }, + "body": "{\"modelId\":\"us.amazon.nova-micro-v1:0\",\"messages\":[{\"role\":\"user\",\"content\":[{\"text\":\"What is the weather in Paris?\"}]}],\"system\":[{\"text\":\"Use the get_weather tool, then answer in one short sentence.\"}],\"inferenceConfig\":{\"maxTokens\":80,\"temperature\":0},\"toolConfig\":{\"tools\":[{\"toolSpec\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"inputSchema\":{\"json\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}}]}}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "application/vnd.amazon.eventstream" + }, + "body": "AAAAtwAAAFJCoDu1CzpldmVudC10eXBlBwAMbWVzc2FnZVN0YXJ0DTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVowMTIzNDUiLCJyb2xlIjoiYXNzaXN0YW50In1xBrKfAAAA0gAAAFdjGDcHCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6Ijx0aGlua2luZyJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFVWIn17Hkd0AAAAuQAAAFeN+nFbCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6Ij4ifSwicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREUifXAgJvgAAADMAAAAV7zIHuQLOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tEZWx0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJkZWx0YSI6eyJ0ZXh0IjoiIFRvIn0sInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkdISUpLTE1OT1BRUlNUVVYifaOASr0AAACrAAAAV5fatbkLOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tEZWx0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJkZWx0YSI6eyJ0ZXh0IjoiIGRldGVybWluZSJ9LCJwIjoiYWJjZGVmZ2gifQUyd0MAAADQAAAAVxnYZGcLOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tEZWx0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJkZWx0YSI6eyJ0ZXh0IjoiIHRoZSJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZIn0ZHcgRAAAAxwAAAFfLGC/1CzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IiB3ZWF0aGVyIn0sInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkdISUpLTCJ9QpgceQAAALsAAABX9zoiOws6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0RlbHRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleHQiOiIgaW4ifSwicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREUifRLNLa0AAACkAAAAVxWKImgLOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tEZWx0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJkZWx0YSI6eyJ0ZXh0IjoiIFBhcmlzIn0sInAiOiJhYmNkZSJ9QOSGZQAAAKgAAABX0HrPaQs6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0RlbHRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleHQiOiIsIn0sInAiOiJhYmNkZWZnaGlqa2xtbiJ9bgd/VgAAALAAAABXgOoTKgs6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0RlbHRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleHQiOiIgSSJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1In3RkbiWAAAA0QAAAFckuE3XCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IiB3aWxsIn0sInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkdISUpLTE1OT1BRUlNUVVZXWFkifa2kMpYAAACfAAAAV8N7q/8LOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tEZWx0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJkZWx0YSI6eyJ0ZXh0IjoiIHVzZSJ9LCJwIjoiYWIifWRVyJsAAADFAAAAV7HYfJULOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tEZWx0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJkZWx0YSI6eyJ0ZXh0IjoiIHRoZSJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTiJ99QGTXwAAALwAAABXRRr+Kws6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0RlbHRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleHQiOiIgZ2V0In0sInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFIn3A1pHkAAAArAAAAFcl+mmpCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6Il8ifSwicCI6ImFiY2RlZmdoaWprbG1ub3BxciJ9Jl4BhgAAAMwAAABXvMge5As6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0RlbHRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleHQiOiJ3ZWF0aGVyIn0sInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkdISUpLTE1OT1BRUiJ9zDOXNgAAANMAAABXXngetws6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0RlbHRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleHQiOiIgdG9vbCJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWjAifYuc7T0AAADXAAAAV6v4uHcLOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tEZWx0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJkZWx0YSI6eyJ0ZXh0IjoiIGFuZCJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NSJ9Z1WRPAAAANYAAABXlpiRxws6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0RlbHRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleHQiOiIgcHJvdmlkZSJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWjAifWuffy4AAACiAAAAV5rK18gLOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tEZWx0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJkZWx0YSI6eyJ0ZXh0IjoiIHRoZSJ9LCJwIjoiYWJjZGUifR59TKYAAADUAAAAV+xYwqcLOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tEZWx0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJkZWx0YSI6eyJ0ZXh0IjoiIGNpdHkifSwicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVowMSJ9JF6q4AAAANQAAABX7FjCpws6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0RlbHRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleHQiOiIgYXMifSwicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVowMTIzIn3T44iVAAAA1gAAAFeWmJHHCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IiBcIiJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NSJ9T89b0AAAANkAAABXFMgGFgs6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0RlbHRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleHQiOiJQYXJpcyJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NTYifYX0tNEAAAClAAAAVyjqC9gLOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tEZWx0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJkZWx0YSI6eyJ0ZXh0IjoiXCIuIn0sInAiOiJhYmNkZWZnaGkifUbVohIAAAC9AAAAV3h615sLOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tEZWx0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJkZWx0YSI6eyJ0ZXh0IjoiIDwvIn0sInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkcifU+fapUAAADEAAAAV4y4VSULOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tEZWx0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJkZWx0YSI6eyJ0ZXh0IjoidGhpbmtpbmcifSwicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJIn0npV45AAAAoQAAAFfdaq0YCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6Ij5cbiJ9LCJwIjoiYWJjZGUifXpOZ6MAAACtAAAAVm+dcI8LOmV2ZW50LXR5cGUHABBjb250ZW50QmxvY2tTdG9wDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjAsInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkdISUpLTE1OTyJ9wp8EHgAAAQwAAABXnoElmgs6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja1N0YXJ0DTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjEsInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkdISUpLTE1OT1BRUlNUVSIsInN0YXJ0Ijp7InRvb2xVc2UiOnsibmFtZSI6ImdldF93ZWF0aGVyIiwidG9vbFVzZUlkIjoidG9vbHVzZV9hOG5sZjJicUdMY1p2YVNvQnBRMXNIIn19fY7FuJUAAADLAAAAVw7owvQLOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tEZWx0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjoxLCJkZWx0YSI6eyJ0b29sVXNlIjp7ImlucHV0Ijoie1wiY2l0eVwiOlwiUGFyaXNcIn0ifX0sInAiOiJhYmNkZWZnaGlqa2xtbm9wcSJ9r3QETwAAALQAAABWAm2FfAs6ZXZlbnQtdHlwZQcAEGNvbnRlbnRCbG9ja1N0b3ANOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MSwicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSU1RVViJ9shQTDgAAAKUAAABRwYmu7Qs6ZXZlbnQtdHlwZQcAC21lc3NhZ2VTdG9wDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSiIsInN0b3BSZWFzb24iOiJ0b29sX3VzZSJ9i4+/2gAAAO4AAABOY6LKQAs6ZXZlbnQtdHlwZQcACG1ldGFkYXRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsibWV0cmljcyI6eyJsYXRlbmN5TXMiOjQ5OX0sInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2dyIsInVzYWdlIjp7ImlucHV0VG9rZW5zIjo0MjUsIm91dHB1dFRva2VucyI6NDUsInNlcnZlclRvb2xVc2FnZSI6e30sInRvdGFsVG9rZW5zIjo0NzB9fSAjG74=", + "bodyEncoding": "base64" + } + }, + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/us.amazon.nova-micro-v1%3A0/converse-stream", + "headers": { + "content-type": "application/json" + }, + "body": "{\"modelId\":\"us.amazon.nova-micro-v1:0\",\"messages\":[{\"role\":\"user\",\"content\":[{\"text\":\"What is the weather in Paris?\"}]},{\"role\":\"assistant\",\"content\":[{\"text\":\" To determine the weather in Paris, I will use the get_weather tool and provide the city as \\\"Paris\\\". \\n\"},{\"toolUse\":{\"toolUseId\":\"tooluse_a8nlf2bqGLcZvaSoBpQ1sH\",\"name\":\"get_weather\",\"input\":{\"city\":\"Paris\"}}}]},{\"role\":\"user\",\"content\":[{\"toolResult\":{\"toolUseId\":\"tooluse_a8nlf2bqGLcZvaSoBpQ1sH\",\"content\":[{\"json\":{\"temperature\":22,\"condition\":\"sunny\"}}],\"status\":\"success\"}}]}],\"system\":[{\"text\":\"Use the get_weather tool, then answer in one short sentence.\"}],\"inferenceConfig\":{\"maxTokens\":80,\"temperature\":0},\"toolConfig\":{\"tools\":[{\"toolSpec\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"inputSchema\":{\"json\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}}]}}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "application/vnd.amazon.eventstream" + }, + "body": "AAAAgQAAAFJswXaTCzpldmVudC10eXBlBwAMbWVzc2FnZVN0YXJ0DTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsicCI6ImFiY2QiLCJyb2xlIjoiYXNzaXN0YW50In31EqAFAAAAoQAAAFfdaq0YCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IlRoZSJ9LCJwIjoiYWJjZGUifZ8hzYkAAACmAAAAV29KcQgLOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tEZWx0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJkZWx0YSI6eyJ0ZXh0IjoiIHdlYXRoZXIifSwicCI6ImFiY2RlIn0dzksTAAAAsQAAAFe9ijqaCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IiBpbiJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1In1AJhvbAAAAqgAAAFequpwJCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IiBQYXJpcyJ9LCJwIjoiYWJjZGVmZ2hpamsifQpyKMQAAADBAAAAV0RY2lULOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tEZWx0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJkZWx0YSI6eyJ0ZXh0IjoiIGlzIn0sInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkdISUpLIn1gvC8JAAAA2QAAAFcUyAYWCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IiBzdW5ueSJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NSJ9j+j/gQAAAK8AAABXYloTeQs6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0RlbHRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleHQiOiIgd2l0aCJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHEifRRyjnsAAACyAAAAV/oqQEoLOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tEZWx0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJkZWx0YSI6eyJ0ZXh0IjoiIGEifSwicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3In2kLJI+AAAAuAAAAFewmljrCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IiB0ZW1wZXJhdHVyZSJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFycyJ9JuTWEQAAAKEAAABX3WqtGAs6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0RlbHRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleHQiOiIgb2YifSwicCI6ImFiY2RlIn1Uu0Z+AAAAmwAAAFc2+w0/CzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IiJ9LCJwIjoiYWIifaR9kNQAAAC4AAAAV7CaWOsLOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tEZWx0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJkZWx0YSI6eyJ0ZXh0IjoiIDIifSwicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDIn04fpEGAAAApQAAAFco6gvYCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IjIifSwicCI6ImFiY2RlZmdoaWprIn0ws3/UAAAA1gAAAFeWmJHHCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IiBkZWdyZWVzIn0sInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFRkdISUpLTE1OT1BRUlNUVVZXWFlaMCJ9q7xKeQAAAJ8AAABXw3ur/ws6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0RlbHRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleHQiOiIuIn0sInAiOiJhYmNkZSJ9t7YAjQAAAMUAAABXsdh8lQs6ZXZlbnQtdHlwZQcAEWNvbnRlbnRCbG9ja0RlbHRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsiY29udGVudEJsb2NrSW5kZXgiOjAsImRlbHRhIjp7InRleHQiOiIifSwicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSIn1NJJR+AAAAsQAAAFbKjQoMCzpldmVudC10eXBlBwAQY29udGVudEJsb2NrU3RvcA06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTIn1DzHT/AAAAiAAAAFH42EVYCzpldmVudC10eXBlBwALbWVzc2FnZVN0b3ANOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJwIjoiYWJjZGVmZyIsInN0b3BSZWFzb24iOiJlbmRfdHVybiJ9rwP92gAAAOAAAABO3JJ0IQs6ZXZlbnQtdHlwZQcACG1ldGFkYXRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsibWV0cmljcyI6eyJsYXRlbmN5TXMiOjM4MX0sInAiOiJhYmNkZWZnaGkiLCJ1c2FnZSI6eyJpbnB1dFRva2VucyI6NTEwLCJvdXRwdXRUb2tlbnMiOjE2LCJzZXJ2ZXJUb29sVXNhZ2UiOnt9LCJ0b3RhbFRva2VucyI6NTI2fX2ZCNET", + "bodyEncoding": "base64" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/bedrock-converse/streams-a-tool-call.json b/packages/llm/test/fixtures/recordings/bedrock-converse/streams-a-tool-call.json new file mode 100644 index 000000000000..4f22ce22da80 --- /dev/null +++ b/packages/llm/test/fixtures/recordings/bedrock-converse/streams-a-tool-call.json @@ -0,0 +1,29 @@ +{ + "version": 1, + "metadata": { + "name": "bedrock-converse/streams-a-tool-call", + "recordedAt": "2026-04-28T21:18:46.929Z", + "tags": ["prefix:bedrock-converse", "provider:amazon-bedrock", "protocol:bedrock-converse", "tool"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/us.amazon.nova-micro-v1%3A0/converse-stream", + "headers": { + "content-type": "application/json" + }, + "body": "{\"modelId\":\"us.amazon.nova-micro-v1:0\",\"messages\":[{\"role\":\"user\",\"content\":[{\"text\":\"Call get_weather with city exactly Paris.\"}]}],\"system\":[{\"text\":\"Call tools exactly as requested.\"}],\"inferenceConfig\":{\"maxTokens\":80,\"temperature\":0},\"toolConfig\":{\"tools\":[{\"toolSpec\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"inputSchema\":{\"json\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}}],\"toolChoice\":{\"tool\":{\"name\":\"get_weather\"}}}}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "application/vnd.amazon.eventstream" + }, + "body": "AAAAuQAAAFL9kIXUCzpldmVudC10eXBlBwAMbWVzc2FnZVN0YXJ0DTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVowMTIzNDU2NyIsInJvbGUiOiJhc3Npc3RhbnQifWf51EkAAAEMAAAAV56BJZoLOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tTdGFydA06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFUiLCJzdGFydCI6eyJ0b29sVXNlIjp7Im5hbWUiOiJnZXRfd2VhdGhlciIsInRvb2xVc2VJZCI6InRvb2x1c2VfNmExcFB2bmM5OUdMS08zS0drVUEyTiJ9fX2LR7PFAAAA4gAAAFfCOY+BCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidG9vbFVzZSI6eyJpbnB1dCI6IntcImNpdHlcIjpcIlBhcmlzXCJ9In19LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTiJ9RkW+2gAAAIcAAABW5OxHKgs6ZXZlbnQtdHlwZQcAEGNvbnRlbnRCbG9ja1N0b3ANOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwicCI6ImFiYyJ9y6nrtwAAAK4AAABRtlmf/As6ZXZlbnQtdHlwZQcAC21lc3NhZ2VTdG9wDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSUyIsInN0b3BSZWFzb24iOiJ0b29sX3VzZSJ9MTlQawAAAOIAAABOplInQQs6ZXZlbnQtdHlwZQcACG1ldGFkYXRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsibWV0cmljcyI6eyJsYXRlbmN5TXMiOjM1NX0sInAiOiJhYmNkZWZnaGlqayIsInVzYWdlIjp7ImlucHV0VG9rZW5zIjo0MTksIm91dHB1dFRva2VucyI6MTYsInNlcnZlclRvb2xVc2FnZSI6e30sInRvdGFsVG9rZW5zIjo0MzV9fU1tVJc=", + "bodyEncoding": "base64" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/bedrock-converse/streams-text.json b/packages/llm/test/fixtures/recordings/bedrock-converse/streams-text.json new file mode 100644 index 000000000000..7eaacec02baf --- /dev/null +++ b/packages/llm/test/fixtures/recordings/bedrock-converse/streams-text.json @@ -0,0 +1,29 @@ +{ + "version": 1, + "metadata": { + "name": "bedrock-converse/streams-text", + "recordedAt": "2026-04-28T21:18:46.553Z", + "tags": ["prefix:bedrock-converse", "provider:amazon-bedrock", "protocol:bedrock-converse"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/us.amazon.nova-micro-v1%3A0/converse-stream", + "headers": { + "content-type": "application/json" + }, + "body": "{\"modelId\":\"us.amazon.nova-micro-v1:0\",\"messages\":[{\"role\":\"user\",\"content\":[{\"text\":\"Say hello.\"}]}],\"system\":[{\"text\":\"Reply with the single word 'Hello'.\"}],\"inferenceConfig\":{\"maxTokens\":16,\"temperature\":0}}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "application/vnd.amazon.eventstream" + }, + "body": "AAAAmQAAAFI8UarQCzpldmVudC10eXBlBwAMbWVzc2FnZVN0YXJ0DTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUIiLCJyb2xlIjoiYXNzaXN0YW50In3SL1jNAAAAvQAAAFd4etebCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IkhlbGxvIn0sInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFIn2B0NR6AAAAxgAAAFf2eAZFCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IiJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTIn3XaHMvAAAAhwAAAFbk7EcqCzpldmVudC10eXBlBwAQY29udGVudEJsb2NrU3RvcA06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJwIjoiYWJjIn3Lqeu3AAAAjwAAAFFK+JlICzpldmVudC10eXBlBwALbWVzc2FnZVN0b3ANOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJwIjoiYWJjZGVmZ2hpamtsbW4iLCJzdG9wUmVhc29uIjoiZW5kX3R1cm4ifZ+RQqEAAAECAAAATkXaMzsLOmV2ZW50LXR5cGUHAAhtZXRhZGF0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7Im1ldHJpY3MiOnsibGF0ZW5jeU1zIjozMDZ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVCIsInVzYWdlIjp7ImlucHV0VG9rZW5zIjoxMiwib3V0cHV0VG9rZW5zIjoyLCJzZXJ2ZXJUb29sVXNhZ2UiOnt9LCJ0b3RhbFRva2VucyI6MTR9fSnnkUk=", + "bodyEncoding": "base64" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-gpt-oss-20b-tools-tool-call.json b/packages/llm/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-gpt-oss-20b-tools-tool-call.json new file mode 100644 index 000000000000..80ade53b9cf6 --- /dev/null +++ b/packages/llm/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-gpt-oss-20b-tools-tool-call.json @@ -0,0 +1,38 @@ +{ + "version": 1, + "metadata": { + "name": "cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-gpt-oss-20b-tools-tool-call", + "recordedAt": "2026-05-08T17:20:08.287Z", + "provider": "cloudflare-ai-gateway", + "route": "cloudflare-ai-gateway", + "transport": "http", + "model": "workers-ai/@cf/openai/gpt-oss-20b", + "tags": [ + "prefix:cloudflare-ai-gateway", + "provider:cloudflare-ai-gateway", + "tool", + "tool-call", + "golden" + ] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://gateway.ai.cloudflare.com/v1/{account}/{gateway}/compat/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"workers-ai/@cf/openai/gpt-oss-20b\",\"messages\":[{\"role\":\"system\",\"content\":\"Call tools exactly as requested.\"},{\"role\":\"user\",\"content\":\"Call get_weather with city exactly Paris.\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"tool_choice\":{\"type\":\"function\",\"function\":{\"name\":\"get_weather\"}},\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":120,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream" + }, + "body": "data: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\"\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\"\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\"We\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\" need\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\" to\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\" call\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\" the\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\" function\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\" get\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\"_weather\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\" with\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\" city\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\" \\\"\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\"Paris\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\"\\\".\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"id\":\"chatcmpl-tool-b975da5af1f843e095ba7062d8e108ba\",\"type\":\"function\",\"index\":0,\"function\":{\"name\":\"get_weather\",\"arguments\":\"\"}}]},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"{\\\"\"}}]},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"city\"}}]},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\":\\\"\"}}]},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"Paris\"}}]},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\"}\"}}]},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"tool_calls\",\"stop_reason\":200012,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260808196\",\"object\":\"chat.completion.chunk\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"tool_calls\"}],\"usage\":{\"prompt_tokens\":136,\"total_tokens\":173,\"completion_tokens\":37}}\n\ndata: {\"id\":\"id-1778260808196\",\"object\":\"chat.completion.chunk\",\"created\":1778260808,\"model\":\"@cf/openai/gpt-oss-20b\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"tool_calls\"}],\"usage\":{\"prompt_tokens\":136,\"completion_tokens\":37,\"total_tokens\":173,\"prompt_tokens_details\":{\"cached_tokens\":0}}}\n\ndata: [DONE]\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-llama-3-1-8b-text.json b/packages/llm/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-llama-3-1-8b-text.json new file mode 100644 index 000000000000..ff535b578bf5 --- /dev/null +++ b/packages/llm/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-llama-3-1-8b-text.json @@ -0,0 +1,37 @@ +{ + "version": 1, + "metadata": { + "name": "cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-llama-3-1-8b-text", + "recordedAt": "2026-05-08T15:55:48.952Z", + "provider": "cloudflare-ai-gateway", + "route": "cloudflare-ai-gateway", + "transport": "http", + "model": "workers-ai/@cf/meta/llama-3.1-8b-instruct", + "tags": [ + "prefix:cloudflare-ai-gateway", + "provider:cloudflare-ai-gateway", + "text", + "golden" + ] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://gateway.ai.cloudflare.com/v1/{account}/{gateway}/compat/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"workers-ai/@cf/meta/llama-3.1-8b-instruct\",\"messages\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":\"Reply exactly with: Hello!\"}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":40,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream" + }, + "body": "data: {\"id\":\"id-1778255748911\",\"created\":1778255748,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"}}]}\n\ndata: {\"id\":\"id-1778255748911\",\"created\":1778255748,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"!\"}}]}\n\ndata: {\"id\":\"id-1778255748911\",\"object\":\"chat.completion.chunk\",\"created\":1778255748,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":45,\"completion_tokens\":2,\"total_tokens\":47}}\n\ndata: {\"id\":\"id-1778255748911\",\"object\":\"chat.completion.chunk\",\"created\":1778255748,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":0,\"completion_tokens\":0,\"total_tokens\":0,\"prompt_tokens_details\":{\"cached_tokens\":0}}}\n\ndata: [DONE]\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-gpt-oss-20b-tools-tool-call.json b/packages/llm/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-gpt-oss-20b-tools-tool-call.json new file mode 100644 index 000000000000..2c973bffe166 --- /dev/null +++ b/packages/llm/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-gpt-oss-20b-tools-tool-call.json @@ -0,0 +1,38 @@ +{ + "version": 1, + "metadata": { + "name": "cloudflare-workers-ai/cloudflare-workers-ai-gpt-oss-20b-tools-tool-call", + "recordedAt": "2026-05-08T17:20:14.106Z", + "provider": "cloudflare-workers-ai", + "route": "cloudflare-workers-ai", + "transport": "http", + "model": "@cf/openai/gpt-oss-20b", + "tags": [ + "prefix:cloudflare-workers-ai", + "provider:cloudflare-workers-ai", + "tool", + "tool-call", + "golden" + ] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.cloudflare.com/client/v4/accounts/{account}/ai/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"@cf/openai/gpt-oss-20b\",\"messages\":[{\"role\":\"system\",\"content\":\"Call tools exactly as requested.\"},{\"role\":\"user\",\"content\":\"Call get_weather with city exactly Paris.\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"tool_choice\":{\"type\":\"function\",\"function\":{\"name\":\"get_weather\"}},\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":120,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream" + }, + "body": "data: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\"\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\"\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\"We\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\" need\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\" to\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\" call\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\" the\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\" function\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\" get\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\"_weather\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\" with\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\" city\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\" \\\"\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\"Paris\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"reasoning_content\":\"\\\".\"},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"id\":\"chatcmpl-tool-ed7127682c90443da222d0f8c607b5d5\",\"type\":\"function\",\"index\":0,\"function\":{\"name\":\"get_weather\",\"arguments\":\"\"}}]},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"{\\\"\"}}]},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"city\"}}]},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\":\\\"\"}}]},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"Paris\"}}]},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\"}\"}}]},\"logprobs\":null,\"finish_reason\":null,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":null,\"stop_reason\":200012,\"token_ids\":null}]}\n\ndata: {\"id\":\"id-1778260814069\",\"object\":\"chat.completion.chunk\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"tool_calls\"}],\"usage\":{\"prompt_tokens\":136,\"total_tokens\":173,\"completion_tokens\":37}}\n\ndata: {\"id\":\"id-1778260814069\",\"object\":\"chat.completion.chunk\",\"created\":1778260814,\"model\":\"@cf/openai/gpt-oss-20b\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"tool_calls\"}],\"usage\":{\"prompt_tokens\":136,\"completion_tokens\":37,\"total_tokens\":173,\"prompt_tokens_details\":{\"cached_tokens\":0}}}\n\ndata: [DONE]\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-llama-3-1-8b-text.json b/packages/llm/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-llama-3-1-8b-text.json new file mode 100644 index 000000000000..4ed314e15f4f --- /dev/null +++ b/packages/llm/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-llama-3-1-8b-text.json @@ -0,0 +1,37 @@ +{ + "version": 1, + "metadata": { + "name": "cloudflare-workers-ai/cloudflare-workers-ai-llama-3-1-8b-text", + "recordedAt": "2026-05-08T15:56:18.284Z", + "provider": "cloudflare-workers-ai", + "route": "cloudflare-workers-ai", + "transport": "http", + "model": "@cf/meta/llama-3.1-8b-instruct", + "tags": [ + "prefix:cloudflare-workers-ai", + "provider:cloudflare-workers-ai", + "text", + "golden" + ] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.cloudflare.com/client/v4/accounts/{account}/ai/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"messages\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":\"Reply exactly with: Hello!\"}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":40,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream" + }, + "body": "data: {\"id\":\"id-1778255778230\",\"created\":1778255778,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"}}]}\n\ndata: {\"id\":\"id-1778255778230\",\"created\":1778255778,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"!\"}}]}\n\ndata: {\"id\":\"id-1778255778230\",\"object\":\"chat.completion.chunk\",\"created\":1778255778,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":45,\"completion_tokens\":2,\"total_tokens\":47}}\n\ndata: {\"id\":\"id-1778255778230\",\"object\":\"chat.completion.chunk\",\"created\":1778255778,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":0,\"completion_tokens\":0,\"total_tokens\":0,\"prompt_tokens_details\":{\"cached_tokens\":0}}}\n\ndata: [DONE]\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/gemini/streams-text.json b/packages/llm/test/fixtures/recordings/gemini/streams-text.json new file mode 100644 index 000000000000..7f0e6b390e48 --- /dev/null +++ b/packages/llm/test/fixtures/recordings/gemini/streams-text.json @@ -0,0 +1,28 @@ +{ + "version": 1, + "metadata": { + "name": "gemini/streams-text", + "recordedAt": "2026-04-28T21:18:47.483Z", + "tags": ["prefix:gemini", "provider:google", "protocol:gemini"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent?alt=sse", + "headers": { + "content-type": "application/json" + }, + "body": "{\"contents\":[{\"role\":\"user\",\"parts\":[{\"text\":\"Reply with exactly: Hello!\"}]}],\"systemInstruction\":{\"parts\":[{\"text\":\"You are concise.\"}]},\"generationConfig\":{\"maxOutputTokens\":80,\"temperature\":0}}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream" + }, + "body": "data: {\"candidates\": [{\"content\": {\"parts\": [{\"text\": \"Hello!\"}],\"role\": \"model\"},\"finishReason\": \"STOP\",\"index\": 0}],\"usageMetadata\": {\"promptTokenCount\": 11,\"candidatesTokenCount\": 2,\"totalTokenCount\": 29,\"promptTokensDetails\": [{\"modality\": \"TEXT\",\"tokenCount\": 11}],\"thoughtsTokenCount\": 16},\"modelVersion\": \"gemini-2.5-flash\",\"responseId\": \"NyTxaczMAZ-b_uMP6u--iQg\"}\r\n\r\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/gemini/streams-tool-call.json b/packages/llm/test/fixtures/recordings/gemini/streams-tool-call.json new file mode 100644 index 000000000000..a526910f0daf --- /dev/null +++ b/packages/llm/test/fixtures/recordings/gemini/streams-tool-call.json @@ -0,0 +1,28 @@ +{ + "version": 1, + "metadata": { + "name": "gemini/streams-tool-call", + "recordedAt": "2026-04-28T21:18:48.285Z", + "tags": ["prefix:gemini", "provider:google", "protocol:gemini", "tool"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent?alt=sse", + "headers": { + "content-type": "application/json" + }, + "body": "{\"contents\":[{\"role\":\"user\",\"parts\":[{\"text\":\"Call get_weather with city exactly Paris.\"}]}],\"systemInstruction\":{\"parts\":[{\"text\":\"Call tools exactly as requested.\"}]},\"tools\":[{\"functionDeclarations\":[{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"required\":[\"city\"],\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}}}}]}],\"toolConfig\":{\"functionCallingConfig\":{\"mode\":\"ANY\",\"allowedFunctionNames\":[\"get_weather\"]}},\"generationConfig\":{\"maxOutputTokens\":80,\"temperature\":0}}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream" + }, + "body": "data: {\"candidates\": [{\"content\": {\"parts\": [{\"functionCall\": {\"name\": \"get_weather\",\"args\": {\"city\": \"Paris\"}},\"thoughtSignature\": \"CiQBDDnWx5RcSsS1UMbykQ5HWlrMu6wrxXGUhmZ0uRKLaMhDZaEKXwEMOdbHVoJAlfbOQyKB378pDZ/gkjWr3HP+dWw1us1kMG22g4G3oJvuTq/SrWS+7KYtSlvOxCKhW2l/2/TczpyGyGmANmsusDcxF1SKOYA5/8Hg0nI24MAlT3+91V/MCoUBAQw51seClFLy3E71v2H44F1kpmjgz8FeTRZofrjbaazfrT+w8Yxgdr3UgGagLMY4OadZemQTWckq9IAqRum78hrBg6NGtQvn15SbtfTNqI4PcxX/+qPo4/g4/ZT5kVORDhVqO8BVP/RA5GQ3ce3sRK8hSkvQlXSoXIPpHh6x7hBezIGXzw==\"}],\"role\": \"model\"},\"finishReason\": \"STOP\",\"index\": 0,\"finishMessage\": \"Model generated function call(s).\"}],\"usageMetadata\": {\"promptTokenCount\": 55,\"candidatesTokenCount\": 15,\"totalTokenCount\": 115,\"promptTokensDetails\": [{\"modality\": \"TEXT\",\"tokenCount\": 55}],\"thoughtsTokenCount\": 45},\"modelVersion\": \"gemini-2.5-flash\",\"responseId\": \"NyTxaYuTJ_OW_uMPgIPKgAg\"}\r\n\r\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/openai-chat/continues-after-tool-result.json b/packages/llm/test/fixtures/recordings/openai-chat/continues-after-tool-result.json new file mode 100644 index 000000000000..7c02a93f0b4e --- /dev/null +++ b/packages/llm/test/fixtures/recordings/openai-chat/continues-after-tool-result.json @@ -0,0 +1,28 @@ +{ + "version": 1, + "metadata": { + "name": "openai-chat/continues-after-tool-result", + "recordedAt": "2026-05-06T01:33:31.878Z", + "tags": ["prefix:openai-chat", "provider:openai", "protocol:openai-chat", "tool"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.openai.com/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"gpt-4o-mini\",\"messages\":[{\"role\":\"system\",\"content\":\"Answer using only the provided tool result.\"},{\"role\":\"user\",\"content\":\"What is the weather in Paris?\"},{\"role\":\"assistant\",\"content\":null,\"tool_calls\":[{\"id\":\"call_weather\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"{\\\"city\\\":\\\"Paris\\\"}\"}}]},{\"role\":\"tool\",\"tool_call_id\":\"call_weather\",\"content\":\"{\\\"forecast\\\":\\\"sunny\\\",\\\"temperature_c\\\":22}\"}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":40,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream; charset=utf-8" + }, + "body": "data: {\"id\":\"chatcmpl-DcLQhErGVsn8x3hNFmX5A0yM0T9Km\",\"object\":\"chat.completion.chunk\",\"created\":1778031211,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\"\",\"refusal\":null},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"gJ6VDZ2ZE\"}\n\ndata: {\"id\":\"chatcmpl-DcLQhErGVsn8x3hNFmX5A0yM0T9Km\",\"object\":\"chat.completion.chunk\",\"created\":1778031211,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"The\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"B2pU6Neg\"}\n\ndata: {\"id\":\"chatcmpl-DcLQhErGVsn8x3hNFmX5A0yM0T9Km\",\"object\":\"chat.completion.chunk\",\"created\":1778031211,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" weather\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"sa2\"}\n\ndata: {\"id\":\"chatcmpl-DcLQhErGVsn8x3hNFmX5A0yM0T9Km\",\"object\":\"chat.completion.chunk\",\"created\":1778031211,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" in\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"ENFjAfta\"}\n\ndata: {\"id\":\"chatcmpl-DcLQhErGVsn8x3hNFmX5A0yM0T9Km\",\"object\":\"chat.completion.chunk\",\"created\":1778031211,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" Paris\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"E1Kbi\"}\n\ndata: {\"id\":\"chatcmpl-DcLQhErGVsn8x3hNFmX5A0yM0T9Km\",\"object\":\"chat.completion.chunk\",\"created\":1778031211,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" is\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"NWj8HasA\"}\n\ndata: {\"id\":\"chatcmpl-DcLQhErGVsn8x3hNFmX5A0yM0T9Km\",\"object\":\"chat.completion.chunk\",\"created\":1778031211,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" sunny\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"irmMg\"}\n\ndata: {\"id\":\"chatcmpl-DcLQhErGVsn8x3hNFmX5A0yM0T9Km\",\"object\":\"chat.completion.chunk\",\"created\":1778031211,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" with\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"3eCMq6\"}\n\ndata: {\"id\":\"chatcmpl-DcLQhErGVsn8x3hNFmX5A0yM0T9Km\",\"object\":\"chat.completion.chunk\",\"created\":1778031211,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" a\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"XKMqPUsnt\"}\n\ndata: {\"id\":\"chatcmpl-DcLQhErGVsn8x3hNFmX5A0yM0T9Km\",\"object\":\"chat.completion.chunk\",\"created\":1778031211,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" temperature\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"BFVrBA09z9Y3lAC\"}\n\ndata: {\"id\":\"chatcmpl-DcLQhErGVsn8x3hNFmX5A0yM0T9Km\",\"object\":\"chat.completion.chunk\",\"created\":1778031211,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" of\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"AwG4puOX\"}\n\ndata: {\"id\":\"chatcmpl-DcLQhErGVsn8x3hNFmX5A0yM0T9Km\",\"object\":\"chat.completion.chunk\",\"created\":1778031211,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" \"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"pKQU39KXN6\"}\n\ndata: {\"id\":\"chatcmpl-DcLQhErGVsn8x3hNFmX5A0yM0T9Km\",\"object\":\"chat.completion.chunk\",\"created\":1778031211,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"22\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"xeTNA1JuE\"}\n\ndata: {\"id\":\"chatcmpl-DcLQhErGVsn8x3hNFmX5A0yM0T9Km\",\"object\":\"chat.completion.chunk\",\"created\":1778031211,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"°C\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"kNilBK4Nm\"}\n\ndata: {\"id\":\"chatcmpl-DcLQhErGVsn8x3hNFmX5A0yM0T9Km\",\"object\":\"chat.completion.chunk\",\"created\":1778031211,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\".\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"BrXQlZOd1Q\"}\n\ndata: {\"id\":\"chatcmpl-DcLQhErGVsn8x3hNFmX5A0yM0T9Km\",\"object\":\"chat.completion.chunk\",\"created\":1778031211,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"usage\":null,\"obfuscation\":\"lzLXy\"}\n\ndata: {\"id\":\"chatcmpl-DcLQhErGVsn8x3hNFmX5A0yM0T9Km\",\"object\":\"chat.completion.chunk\",\"created\":1778031211,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[],\"usage\":{\"prompt_tokens\":59,\"completion_tokens\":14,\"total_tokens\":73,\"prompt_tokens_details\":{\"cached_tokens\":0,\"audio_tokens\":0},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"audio_tokens\":0,\"accepted_prediction_tokens\":0,\"rejected_prediction_tokens\":0}},\"obfuscation\":\"5z1JJjgtey\"}\n\ndata: [DONE]\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/openai-chat/drives-a-tool-loop-end-to-end.json b/packages/llm/test/fixtures/recordings/openai-chat/drives-a-tool-loop-end-to-end.json new file mode 100644 index 000000000000..fdc5fa7916b0 --- /dev/null +++ b/packages/llm/test/fixtures/recordings/openai-chat/drives-a-tool-loop-end-to-end.json @@ -0,0 +1,46 @@ +{ + "version": 1, + "metadata": { + "name": "openai-chat/drives-a-tool-loop-end-to-end", + "recordedAt": "2026-05-06T01:33:29.747Z", + "tags": ["prefix:openai-chat", "provider:openai", "protocol:openai-chat", "tool", "tool-loop"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.openai.com/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"gpt-4o-mini\",\"messages\":[{\"role\":\"system\",\"content\":\"Use the get_weather tool, then answer in one short sentence.\"},{\"role\":\"user\",\"content\":\"What is the weather in Paris?\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":80,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream; charset=utf-8" + }, + "body": "data: {\"id\":\"chatcmpl-DcLQeieQn9xQe2QqsLPi7rN15bnJF\",\"object\":\"chat.completion.chunk\",\"created\":1778031208,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":null,\"tool_calls\":[{\"index\":0,\"id\":\"call_tyZNHs2AudCbG4XJUEmX5Waw\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"\"}}],\"refusal\":null},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"ayQl\"}\n\ndata: {\"id\":\"chatcmpl-DcLQeieQn9xQe2QqsLPi7rN15bnJF\",\"object\":\"chat.completion.chunk\",\"created\":1778031208,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"{\\\"\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"TWZNUL5mYYtjWu\"}\n\ndata: {\"id\":\"chatcmpl-DcLQeieQn9xQe2QqsLPi7rN15bnJF\",\"object\":\"chat.completion.chunk\",\"created\":1778031208,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"city\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"QidSCtgZRvDHL\"}\n\ndata: {\"id\":\"chatcmpl-DcLQeieQn9xQe2QqsLPi7rN15bnJF\",\"object\":\"chat.completion.chunk\",\"created\":1778031208,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\":\\\"\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"nupQO1L4GdWo\"}\n\ndata: {\"id\":\"chatcmpl-DcLQeieQn9xQe2QqsLPi7rN15bnJF\",\"object\":\"chat.completion.chunk\",\"created\":1778031208,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"Paris\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"3W5B3hzGrFvl\"}\n\ndata: {\"id\":\"chatcmpl-DcLQeieQn9xQe2QqsLPi7rN15bnJF\",\"object\":\"chat.completion.chunk\",\"created\":1778031208,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\"}\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"JgscYuZR4Lmp5S\"}\n\ndata: {\"id\":\"chatcmpl-DcLQeieQn9xQe2QqsLPi7rN15bnJF\",\"object\":\"chat.completion.chunk\",\"created\":1778031208,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"tool_calls\"}],\"usage\":null,\"obfuscation\":\"BtZF5TaQjX3UwLN\"}\n\ndata: {\"id\":\"chatcmpl-DcLQeieQn9xQe2QqsLPi7rN15bnJF\",\"object\":\"chat.completion.chunk\",\"created\":1778031208,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[],\"usage\":{\"prompt_tokens\":64,\"completion_tokens\":14,\"total_tokens\":78,\"prompt_tokens_details\":{\"cached_tokens\":0,\"audio_tokens\":0},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"audio_tokens\":0,\"accepted_prediction_tokens\":0,\"rejected_prediction_tokens\":0}},\"obfuscation\":\"bZ51l7ptxM\"}\n\ndata: [DONE]\n\n" + } + }, + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.openai.com/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"gpt-4o-mini\",\"messages\":[{\"role\":\"system\",\"content\":\"Use the get_weather tool, then answer in one short sentence.\"},{\"role\":\"user\",\"content\":\"What is the weather in Paris?\"},{\"role\":\"assistant\",\"content\":null,\"tool_calls\":[{\"id\":\"call_tyZNHs2AudCbG4XJUEmX5Waw\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"{\\\"city\\\":\\\"Paris\\\"}\"}}]},{\"role\":\"tool\",\"tool_call_id\":\"call_tyZNHs2AudCbG4XJUEmX5Waw\",\"content\":\"{\\\"temperature\\\":22,\\\"condition\\\":\\\"sunny\\\"}\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":80,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream; charset=utf-8" + }, + "body": "data: {\"id\":\"chatcmpl-DcLQfUuhXefq7QDmGNhpEN5IqEKMM\",\"object\":\"chat.completion.chunk\",\"created\":1778031209,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\"\",\"refusal\":null},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"SCCu2B8Ri\"}\n\ndata: {\"id\":\"chatcmpl-DcLQfUuhXefq7QDmGNhpEN5IqEKMM\",\"object\":\"chat.completion.chunk\",\"created\":1778031209,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"The\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"vuE4h8te\"}\n\ndata: {\"id\":\"chatcmpl-DcLQfUuhXefq7QDmGNhpEN5IqEKMM\",\"object\":\"chat.completion.chunk\",\"created\":1778031209,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" weather\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"uzt\"}\n\ndata: {\"id\":\"chatcmpl-DcLQfUuhXefq7QDmGNhpEN5IqEKMM\",\"object\":\"chat.completion.chunk\",\"created\":1778031209,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" in\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"4vVdGuJc\"}\n\ndata: {\"id\":\"chatcmpl-DcLQfUuhXefq7QDmGNhpEN5IqEKMM\",\"object\":\"chat.completion.chunk\",\"created\":1778031209,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" Paris\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"hAfFt\"}\n\ndata: {\"id\":\"chatcmpl-DcLQfUuhXefq7QDmGNhpEN5IqEKMM\",\"object\":\"chat.completion.chunk\",\"created\":1778031209,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" is\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"uuNXNXne\"}\n\ndata: {\"id\":\"chatcmpl-DcLQfUuhXefq7QDmGNhpEN5IqEKMM\",\"object\":\"chat.completion.chunk\",\"created\":1778031209,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" sunny\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"HRMlI\"}\n\ndata: {\"id\":\"chatcmpl-DcLQfUuhXefq7QDmGNhpEN5IqEKMM\",\"object\":\"chat.completion.chunk\",\"created\":1778031209,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" with\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"Ii1R2u\"}\n\ndata: {\"id\":\"chatcmpl-DcLQfUuhXefq7QDmGNhpEN5IqEKMM\",\"object\":\"chat.completion.chunk\",\"created\":1778031209,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" a\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"ay3ddthfT\"}\n\ndata: {\"id\":\"chatcmpl-DcLQfUuhXefq7QDmGNhpEN5IqEKMM\",\"object\":\"chat.completion.chunk\",\"created\":1778031209,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" temperature\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"PtxyVsfiluBGiWj\"}\n\ndata: {\"id\":\"chatcmpl-DcLQfUuhXefq7QDmGNhpEN5IqEKMM\",\"object\":\"chat.completion.chunk\",\"created\":1778031209,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" of\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"WuI4V7O6\"}\n\ndata: {\"id\":\"chatcmpl-DcLQfUuhXefq7QDmGNhpEN5IqEKMM\",\"object\":\"chat.completion.chunk\",\"created\":1778031209,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" \"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"Z5wHwpykrS\"}\n\ndata: {\"id\":\"chatcmpl-DcLQfUuhXefq7QDmGNhpEN5IqEKMM\",\"object\":\"chat.completion.chunk\",\"created\":1778031209,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"22\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"Fi66TTzMb\"}\n\ndata: {\"id\":\"chatcmpl-DcLQfUuhXefq7QDmGNhpEN5IqEKMM\",\"object\":\"chat.completion.chunk\",\"created\":1778031209,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"°C\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"AFnwTAm2P\"}\n\ndata: {\"id\":\"chatcmpl-DcLQfUuhXefq7QDmGNhpEN5IqEKMM\",\"object\":\"chat.completion.chunk\",\"created\":1778031209,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\".\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"xW7U4YToVK\"}\n\ndata: {\"id\":\"chatcmpl-DcLQfUuhXefq7QDmGNhpEN5IqEKMM\",\"object\":\"chat.completion.chunk\",\"created\":1778031209,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"usage\":null,\"obfuscation\":\"O0Tks\"}\n\ndata: {\"id\":\"chatcmpl-DcLQfUuhXefq7QDmGNhpEN5IqEKMM\",\"object\":\"chat.completion.chunk\",\"created\":1778031209,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_99cf176092\",\"choices\":[],\"usage\":{\"prompt_tokens\":96,\"completion_tokens\":15,\"total_tokens\":111,\"prompt_tokens_details\":{\"cached_tokens\":0,\"audio_tokens\":0},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"audio_tokens\":0,\"accepted_prediction_tokens\":0,\"rejected_prediction_tokens\":0}},\"obfuscation\":\"advcu5qYJ\"}\n\ndata: [DONE]\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/openai-chat/streams-text.json b/packages/llm/test/fixtures/recordings/openai-chat/streams-text.json new file mode 100644 index 000000000000..c86a29a462bd --- /dev/null +++ b/packages/llm/test/fixtures/recordings/openai-chat/streams-text.json @@ -0,0 +1,28 @@ +{ + "version": 1, + "metadata": { + "name": "openai-chat/streams-text", + "recordedAt": "2026-05-06T01:33:30.542Z", + "tags": ["prefix:openai-chat", "provider:openai", "protocol:openai-chat"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.openai.com/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"gpt-4o-mini\",\"messages\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":\"Say hello in one short sentence.\"}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":20,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream; charset=utf-8" + }, + "body": "data: {\"id\":\"chatcmpl-DcLQgbFetadY4JFl0fHK0g7OYsCOL\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\"\",\"refusal\":null},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"g9SWm2h6J\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgbFetadY4JFl0fHK0g7OYsCOL\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"lVzwlh\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgbFetadY4JFl0fHK0g7OYsCOL\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"!\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"onzhziaLGv\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgbFetadY4JFl0fHK0g7OYsCOL\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"usage\":null,\"obfuscation\":\"LzUj1\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgbFetadY4JFl0fHK0g7OYsCOL\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[],\"usage\":{\"prompt_tokens\":22,\"completion_tokens\":2,\"total_tokens\":24,\"prompt_tokens_details\":{\"cached_tokens\":0,\"audio_tokens\":0},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"audio_tokens\":0,\"accepted_prediction_tokens\":0,\"rejected_prediction_tokens\":0}},\"obfuscation\":\"emMuPcvvOkI\"}\n\ndata: [DONE]\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/openai-chat/streams-tool-call.json b/packages/llm/test/fixtures/recordings/openai-chat/streams-tool-call.json new file mode 100644 index 000000000000..fef4d8cd14a2 --- /dev/null +++ b/packages/llm/test/fixtures/recordings/openai-chat/streams-tool-call.json @@ -0,0 +1,28 @@ +{ + "version": 1, + "metadata": { + "name": "openai-chat/streams-tool-call", + "recordedAt": "2026-05-06T01:33:31.127Z", + "tags": ["prefix:openai-chat", "provider:openai", "protocol:openai-chat", "tool"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.openai.com/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"gpt-4o-mini\",\"messages\":[{\"role\":\"system\",\"content\":\"Call tools exactly as requested.\"},{\"role\":\"user\",\"content\":\"Call get_weather with city exactly Paris.\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"tool_choice\":{\"type\":\"function\",\"function\":{\"name\":\"get_weather\"}},\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":80,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream; charset=utf-8" + }, + "body": "data: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":null,\"tool_calls\":[{\"index\":0,\"id\":\"call_5wBV98AvGPwOyC6a2HtKh85w\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"\"}}],\"refusal\":null},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"hrw8\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"{\\\"\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"MzOlaTohF20Sbb\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"city\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"QuYBQ5vYEUVxR\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\":\\\"\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"spyXlsV2hl6l\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"Paris\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"Db1cjFKa6YAI\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\"}\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"oPu35nrhXcjTL5\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"usage\":null,\"obfuscation\":\"63TVy\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[],\"usage\":{\"prompt_tokens\":67,\"completion_tokens\":5,\"total_tokens\":72,\"prompt_tokens_details\":{\"cached_tokens\":0,\"audio_tokens\":0},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"audio_tokens\":0,\"accepted_prediction_tokens\":0,\"rejected_prediction_tokens\":0}},\"obfuscation\":\"NxJjur40z4H\"}\n\ndata: [DONE]\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/openai-compatible-chat/deepseek-streams-text.json b/packages/llm/test/fixtures/recordings/openai-compatible-chat/deepseek-streams-text.json new file mode 100644 index 000000000000..a71b1121cb01 --- /dev/null +++ b/packages/llm/test/fixtures/recordings/openai-compatible-chat/deepseek-streams-text.json @@ -0,0 +1,28 @@ +{ + "version": 1, + "metadata": { + "name": "openai-compatible-chat/deepseek-streams-text", + "recordedAt": "2026-04-28T21:18:49.498Z", + "tags": ["prefix:openai-compatible-chat", "protocol:openai-compatible-chat", "provider:deepseek"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.deepseek.com/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"deepseek-chat\",\"messages\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":\"Reply with exactly: Hello!\"}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":20,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream; charset=utf-8" + }, + "body": "data: {\"id\":\"0c811926-1e0c-4160-baf8-6e71247c8ad7\",\"object\":\"chat.completion.chunk\",\"created\":1777411128,\"model\":\"deepseek-v4-flash\",\"system_fingerprint\":\"fp_058df29938_prod0820_fp8_kvcache_20260402\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\"\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"0c811926-1e0c-4160-baf8-6e71247c8ad7\",\"object\":\"chat.completion.chunk\",\"created\":1777411128,\"model\":\"deepseek-v4-flash\",\"system_fingerprint\":\"fp_058df29938_prod0820_fp8_kvcache_20260402\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"0c811926-1e0c-4160-baf8-6e71247c8ad7\",\"object\":\"chat.completion.chunk\",\"created\":1777411128,\"model\":\"deepseek-v4-flash\",\"system_fingerprint\":\"fp_058df29938_prod0820_fp8_kvcache_20260402\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"!\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"0c811926-1e0c-4160-baf8-6e71247c8ad7\",\"object\":\"chat.completion.chunk\",\"created\":1777411128,\"model\":\"deepseek-v4-flash\",\"system_fingerprint\":\"fp_058df29938_prod0820_fp8_kvcache_20260402\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\"},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":14,\"completion_tokens\":2,\"total_tokens\":16,\"prompt_tokens_details\":{\"cached_tokens\":0},\"prompt_cache_hit_tokens\":0,\"prompt_cache_miss_tokens\":14}}\n\ndata: [DONE]\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/openai-compatible-chat/groq-llama-3-3-70b-drives-a-tool-loop.json b/packages/llm/test/fixtures/recordings/openai-compatible-chat/groq-llama-3-3-70b-drives-a-tool-loop.json new file mode 100644 index 000000000000..403260b88b2e --- /dev/null +++ b/packages/llm/test/fixtures/recordings/openai-compatible-chat/groq-llama-3-3-70b-drives-a-tool-loop.json @@ -0,0 +1,53 @@ +{ + "version": 1, + "metadata": { + "name": "openai-compatible-chat/groq-llama-3-3-70b-drives-a-tool-loop", + "recordedAt": "2026-05-06T01:35:06.032Z", + "tags": [ + "prefix:openai-compatible-chat", + "protocol:openai-compatible-chat", + "provider:groq", + "tool", + "tool-loop", + "golden" + ] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.groq.com/openai/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"llama-3.3-70b-versatile\",\"messages\":[{\"role\":\"system\",\"content\":\"Use the get_weather tool, then answer in one short sentence.\"},{\"role\":\"user\",\"content\":\"What is the weather in Paris?\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":80,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream" + }, + "body": "data: {\"id\":\"chatcmpl-74a8ff95-296e-4c98-8e51-4b23d5d7f261\",\"object\":\"chat.completion.chunk\",\"created\":1778031305,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":null},\"logprobs\":null,\"finish_reason\":null}],\"x_groq\":{\"id\":\"req_01kqxes90afm8r12en80ez1vhw\",\"seed\":1587279809}}\n\ndata: {\"id\":\"chatcmpl-74a8ff95-296e-4c98-8e51-4b23d5d7f261\",\"object\":\"chat.completion.chunk\",\"created\":1778031305,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"id\":\"4vgxtgdfg\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"{\\\"city\\\":\\\"Paris\\\"}\"},\"index\":0}]},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-74a8ff95-296e-4c98-8e51-4b23d5d7f261\",\"object\":\"chat.completion.chunk\",\"created\":1778031305,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"tool_calls\"}],\"x_groq\":{\"id\":\"req_01kqxes90afm8r12en80ez1vhw\",\"usage\":{\"queue_time\":0.036768035,\"prompt_tokens\":237,\"prompt_time\":0.012356963,\"completion_tokens\":14,\"completion_time\":0.047052437,\"total_tokens\":251,\"total_time\":0.0594094}},\"usage\":{\"queue_time\":0.036768035,\"prompt_tokens\":237,\"prompt_time\":0.012356963,\"completion_tokens\":14,\"completion_time\":0.047052437,\"total_tokens\":251,\"total_time\":0.0594094}}\n\ndata: {\"id\":\"chatcmpl-74a8ff95-296e-4c98-8e51-4b23d5d7f261\",\"object\":\"chat.completion.chunk\",\"created\":1778031305,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[],\"usage\":{\"queue_time\":0.036768035,\"prompt_tokens\":237,\"prompt_time\":0.012356963,\"completion_tokens\":14,\"completion_time\":0.047052437,\"total_tokens\":251,\"total_time\":0.0594094},\"service_tier\":\"on_demand\"}\n\ndata: [DONE]\n\n" + } + }, + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.groq.com/openai/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"llama-3.3-70b-versatile\",\"messages\":[{\"role\":\"system\",\"content\":\"Use the get_weather tool, then answer in one short sentence.\"},{\"role\":\"user\",\"content\":\"What is the weather in Paris?\"},{\"role\":\"assistant\",\"content\":null,\"tool_calls\":[{\"id\":\"4vgxtgdfg\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"{\\\"city\\\":\\\"Paris\\\"}\"}}]},{\"role\":\"tool\",\"tool_call_id\":\"4vgxtgdfg\",\"content\":\"{\\\"temperature\\\":22,\\\"condition\\\":\\\"sunny\\\"}\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":80,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream" + }, + "body": "data: {\"id\":\"chatcmpl-52c0acaf-3f4b-45c8-8aa5-93a3b6adb045\",\"object\":\"chat.completion.chunk\",\"created\":1778031305,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\"\"},\"logprobs\":null,\"finish_reason\":null}],\"x_groq\":{\"id\":\"req_01kqxes966fm8r4q94e70a83gn\",\"seed\":524268521}}\n\ndata: {\"id\":\"chatcmpl-52c0acaf-3f4b-45c8-8aa5-93a3b6adb045\",\"object\":\"chat.completion.chunk\",\"created\":1778031305,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"The\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-52c0acaf-3f4b-45c8-8aa5-93a3b6adb045\",\"object\":\"chat.completion.chunk\",\"created\":1778031305,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" weather\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-52c0acaf-3f4b-45c8-8aa5-93a3b6adb045\",\"object\":\"chat.completion.chunk\",\"created\":1778031305,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" in\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-52c0acaf-3f4b-45c8-8aa5-93a3b6adb045\",\"object\":\"chat.completion.chunk\",\"created\":1778031305,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" Paris\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-52c0acaf-3f4b-45c8-8aa5-93a3b6adb045\",\"object\":\"chat.completion.chunk\",\"created\":1778031305,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" is\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-52c0acaf-3f4b-45c8-8aa5-93a3b6adb045\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" sunny\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-52c0acaf-3f4b-45c8-8aa5-93a3b6adb045\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" with\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-52c0acaf-3f4b-45c8-8aa5-93a3b6adb045\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" a\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-52c0acaf-3f4b-45c8-8aa5-93a3b6adb045\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" temperature\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-52c0acaf-3f4b-45c8-8aa5-93a3b6adb045\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" of\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-52c0acaf-3f4b-45c8-8aa5-93a3b6adb045\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" \"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-52c0acaf-3f4b-45c8-8aa5-93a3b6adb045\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"22\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-52c0acaf-3f4b-45c8-8aa5-93a3b6adb045\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" degrees\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-52c0acaf-3f4b-45c8-8aa5-93a3b6adb045\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\".\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-52c0acaf-3f4b-45c8-8aa5-93a3b6adb045\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"x_groq\":{\"id\":\"req_01kqxes966fm8r4q94e70a83gn\",\"usage\":{\"queue_time\":0.036680462,\"prompt_tokens\":270,\"prompt_time\":0.014468555,\"completion_tokens\":15,\"completion_time\":0.057896947,\"total_tokens\":285,\"total_time\":0.072365502}},\"usage\":{\"queue_time\":0.036680462,\"prompt_tokens\":270,\"prompt_time\":0.014468555,\"completion_tokens\":15,\"completion_time\":0.057896947,\"total_tokens\":285,\"total_time\":0.072365502}}\n\ndata: {\"id\":\"chatcmpl-52c0acaf-3f4b-45c8-8aa5-93a3b6adb045\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_43d97c5965\",\"choices\":[],\"usage\":{\"queue_time\":0.036680462,\"prompt_tokens\":270,\"prompt_time\":0.014468555,\"completion_tokens\":15,\"completion_time\":0.057896947,\"total_tokens\":285,\"total_time\":0.072365502},\"service_tier\":\"on_demand\"}\n\ndata: [DONE]\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/openai-compatible-chat/groq-streams-text.json b/packages/llm/test/fixtures/recordings/openai-compatible-chat/groq-streams-text.json new file mode 100644 index 000000000000..561dbfda0629 --- /dev/null +++ b/packages/llm/test/fixtures/recordings/openai-compatible-chat/groq-streams-text.json @@ -0,0 +1,28 @@ +{ + "version": 1, + "metadata": { + "name": "openai-compatible-chat/groq-streams-text", + "recordedAt": "2026-05-06T01:35:05.532Z", + "tags": ["prefix:openai-compatible-chat", "protocol:openai-compatible-chat", "provider:groq"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.groq.com/openai/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"llama-3.3-70b-versatile\",\"messages\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":\"Reply with exactly: Hello!\"}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":20,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream" + }, + "body": "data: {\"id\":\"chatcmpl-dd5aae9f-7032-44a7-aca8-01027903b4c9\",\"object\":\"chat.completion.chunk\",\"created\":1778031305,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_d42c28f9ce\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\"\"},\"logprobs\":null,\"finish_reason\":null}],\"x_groq\":{\"id\":\"req_01kqxes8r3fmja0yhxvt665m6h\",\"seed\":687314058}}\n\ndata: {\"id\":\"chatcmpl-dd5aae9f-7032-44a7-aca8-01027903b4c9\",\"object\":\"chat.completion.chunk\",\"created\":1778031305,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_d42c28f9ce\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-dd5aae9f-7032-44a7-aca8-01027903b4c9\",\"object\":\"chat.completion.chunk\",\"created\":1778031305,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_d42c28f9ce\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"!\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-dd5aae9f-7032-44a7-aca8-01027903b4c9\",\"object\":\"chat.completion.chunk\",\"created\":1778031305,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_d42c28f9ce\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"x_groq\":{\"id\":\"req_01kqxes8r3fmja0yhxvt665m6h\",\"usage\":{\"queue_time\":0.0381395,\"prompt_tokens\":45,\"prompt_time\":0.003985297,\"completion_tokens\":3,\"completion_time\":0.014171875,\"total_tokens\":48,\"total_time\":0.018157172}},\"usage\":{\"queue_time\":0.0381395,\"prompt_tokens\":45,\"prompt_time\":0.003985297,\"completion_tokens\":3,\"completion_time\":0.014171875,\"total_tokens\":48,\"total_time\":0.018157172}}\n\ndata: {\"id\":\"chatcmpl-dd5aae9f-7032-44a7-aca8-01027903b4c9\",\"object\":\"chat.completion.chunk\",\"created\":1778031305,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_d42c28f9ce\",\"choices\":[],\"usage\":{\"queue_time\":0.0381395,\"prompt_tokens\":45,\"prompt_time\":0.003985297,\"completion_tokens\":3,\"completion_time\":0.014171875,\"total_tokens\":48,\"total_time\":0.018157172},\"service_tier\":\"on_demand\"}\n\ndata: [DONE]\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/openai-compatible-chat/groq-streams-tool-call.json b/packages/llm/test/fixtures/recordings/openai-compatible-chat/groq-streams-tool-call.json new file mode 100644 index 000000000000..70e9a765d281 --- /dev/null +++ b/packages/llm/test/fixtures/recordings/openai-compatible-chat/groq-streams-tool-call.json @@ -0,0 +1,28 @@ +{ + "version": 1, + "metadata": { + "name": "openai-compatible-chat/groq-streams-tool-call", + "recordedAt": "2026-05-06T01:35:05.706Z", + "tags": ["prefix:openai-compatible-chat", "protocol:openai-compatible-chat", "provider:groq", "tool"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.groq.com/openai/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"llama-3.3-70b-versatile\",\"messages\":[{\"role\":\"system\",\"content\":\"Call tools exactly as requested.\"},{\"role\":\"user\",\"content\":\"Call get_weather with city exactly Paris.\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"tool_choice\":{\"type\":\"function\",\"function\":{\"name\":\"get_weather\"}},\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":80,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream" + }, + "body": "data: {\"id\":\"chatcmpl-05380361-f8e4-444a-ae80-296b4d1d46f7\",\"object\":\"chat.completion.chunk\",\"created\":1778031305,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_0761e44d7b\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":null},\"logprobs\":null,\"finish_reason\":null}],\"x_groq\":{\"id\":\"req_01kqxes8v4fm7baf4smt42f0qn\",\"seed\":1846647562}}\n\ndata: {\"id\":\"chatcmpl-05380361-f8e4-444a-ae80-296b4d1d46f7\",\"object\":\"chat.completion.chunk\",\"created\":1778031305,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_0761e44d7b\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"id\":\"mcf2d8nn1\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"{\\\"city\\\":\\\"Paris\\\"}\"},\"index\":0}]},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-05380361-f8e4-444a-ae80-296b4d1d46f7\",\"object\":\"chat.completion.chunk\",\"created\":1778031305,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_0761e44d7b\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"tool_calls\"}],\"x_groq\":{\"id\":\"req_01kqxes8v4fm7baf4smt42f0qn\",\"usage\":{\"queue_time\":0.07684935,\"prompt_tokens\":249,\"prompt_time\":0.014815006,\"completion_tokens\":10,\"completion_time\":0.036435756,\"total_tokens\":259,\"total_time\":0.051250762}},\"usage\":{\"queue_time\":0.07684935,\"prompt_tokens\":249,\"prompt_time\":0.014815006,\"completion_tokens\":10,\"completion_time\":0.036435756,\"total_tokens\":259,\"total_time\":0.051250762}}\n\ndata: {\"id\":\"chatcmpl-05380361-f8e4-444a-ae80-296b4d1d46f7\",\"object\":\"chat.completion.chunk\",\"created\":1778031305,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_0761e44d7b\",\"choices\":[],\"usage\":{\"queue_time\":0.07684935,\"prompt_tokens\":249,\"prompt_time\":0.014815006,\"completion_tokens\":10,\"completion_time\":0.036435756,\"total_tokens\":259,\"total_time\":0.051250762},\"service_tier\":\"on_demand\"}\n\ndata: [DONE]\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/openai-compatible-chat/openrouter-claude-opus-4-7-drives-a-tool-loop.json b/packages/llm/test/fixtures/recordings/openai-compatible-chat/openrouter-claude-opus-4-7-drives-a-tool-loop.json new file mode 100644 index 000000000000..e67d280678c3 --- /dev/null +++ b/packages/llm/test/fixtures/recordings/openai-compatible-chat/openrouter-claude-opus-4-7-drives-a-tool-loop.json @@ -0,0 +1,54 @@ +{ + "version": 1, + "metadata": { + "name": "openai-compatible-chat/openrouter-claude-opus-4-7-drives-a-tool-loop", + "recordedAt": "2026-05-06T01:35:14.282Z", + "tags": [ + "prefix:openai-compatible-chat", + "protocol:openai-compatible-chat", + "provider:openrouter", + "tool", + "tool-loop", + "golden", + "flagship" + ] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://openrouter.ai/api/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"anthropic/claude-opus-4.7\",\"messages\":[{\"role\":\"system\",\"content\":\"Use the get_weather tool exactly once, then answer in one short sentence.\"},{\"role\":\"user\",\"content\":\"What is the weather in Paris?\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":80,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream" + }, + "body": ": OPENROUTER PROCESSING\n\n: OPENROUTER PROCESSING\n\n: OPENROUTER PROCESSING\n\ndata: {\"id\":\"gen-1778031311-S3NlfYGRwAnOoPoNrThK\",\"object\":\"chat.completion.chunk\",\"created\":1778031311,\"model\":\"anthropic/claude-4.7-opus-20260416\",\"provider\":\"Amazon Bedrock\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"id\":\"toolu_bdrk_01AVRkzbigpMbNJ3zjnuQ6ZE\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031311-S3NlfYGRwAnOoPoNrThK\",\"object\":\"chat.completion.chunk\",\"created\":1778031311,\"model\":\"anthropic/claude-4.7-opus-20260416\",\"provider\":\"Amazon Bedrock\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031311-S3NlfYGRwAnOoPoNrThK\",\"object\":\"chat.completion.chunk\",\"created\":1778031311,\"model\":\"anthropic/claude-4.7-opus-20260416\",\"provider\":\"Amazon Bedrock\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031311-S3NlfYGRwAnOoPoNrThK\",\"object\":\"chat.completion.chunk\",\"created\":1778031311,\"model\":\"anthropic/claude-4.7-opus-20260416\",\"provider\":\"Amazon Bedrock\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"{\\\"city\\\": \\\"P\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031311-S3NlfYGRwAnOoPoNrThK\",\"object\":\"chat.completion.chunk\",\"created\":1778031311,\"model\":\"anthropic/claude-4.7-opus-20260416\",\"provider\":\"Amazon Bedrock\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"ari\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031311-S3NlfYGRwAnOoPoNrThK\",\"object\":\"chat.completion.chunk\",\"created\":1778031311,\"model\":\"anthropic/claude-4.7-opus-20260416\",\"provider\":\"Amazon Bedrock\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"s\\\"}\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031311-S3NlfYGRwAnOoPoNrThK\",\"object\":\"chat.completion.chunk\",\"created\":1778031311,\"model\":\"anthropic/claude-4.7-opus-20260416\",\"provider\":\"Amazon Bedrock\",\"service_tier\":\"standard\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\",\"role\":\"assistant\"},\"finish_reason\":\"tool_calls\",\"native_finish_reason\":\"tool_use\"}]}\n\ndata: {\"id\":\"gen-1778031311-S3NlfYGRwAnOoPoNrThK\",\"object\":\"chat.completion.chunk\",\"created\":1778031311,\"model\":\"anthropic/claude-4.7-opus-20260416\",\"provider\":\"Amazon Bedrock\",\"service_tier\":\"standard\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\",\"role\":\"assistant\"},\"finish_reason\":\"tool_calls\",\"native_finish_reason\":\"tool_use\"}],\"usage\":{\"prompt_tokens\":802,\"completion_tokens\":66,\"total_tokens\":868,\"cost\":0.00566,\"is_byok\":false,\"prompt_tokens_details\":{\"cached_tokens\":0,\"cache_write_tokens\":0,\"audio_tokens\":0,\"video_tokens\":0},\"cost_details\":{\"upstream_inference_cost\":0.00566,\"upstream_inference_prompt_cost\":0.00401,\"upstream_inference_completions_cost\":0.00165},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"image_tokens\":0,\"audio_tokens\":0}}}\n\ndata: [DONE]\n\n" + } + }, + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://openrouter.ai/api/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"anthropic/claude-opus-4.7\",\"messages\":[{\"role\":\"system\",\"content\":\"Use the get_weather tool exactly once, then answer in one short sentence.\"},{\"role\":\"user\",\"content\":\"What is the weather in Paris?\"},{\"role\":\"assistant\",\"content\":null,\"tool_calls\":[{\"id\":\"toolu_bdrk_01AVRkzbigpMbNJ3zjnuQ6ZE\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"{\\\"city\\\":\\\"Paris\\\"}\"}}]},{\"role\":\"tool\",\"tool_call_id\":\"toolu_bdrk_01AVRkzbigpMbNJ3zjnuQ6ZE\",\"content\":\"{\\\"temperature\\\":22,\\\"condition\\\":\\\"sunny\\\"}\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":80,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream" + }, + "body": ": OPENROUTER PROCESSING\n\ndata: {\"id\":\"gen-1778031313-XM4XZGmFyt6jg3GZ772w\",\"object\":\"chat.completion.chunk\",\"created\":1778031313,\"model\":\"anthropic/claude-4.7-opus-20260416\",\"provider\":\"Amazon Bedrock\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"It\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031313-XM4XZGmFyt6jg3GZ772w\",\"object\":\"chat.completion.chunk\",\"created\":1778031313,\"model\":\"anthropic/claude-4.7-opus-20260416\",\"provider\":\"Amazon Bedrock\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"'s sunny and\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031313-XM4XZGmFyt6jg3GZ772w\",\"object\":\"chat.completion.chunk\",\"created\":1778031313,\"model\":\"anthropic/claude-4.7-opus-20260416\",\"provider\":\"Amazon Bedrock\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" 22°C in\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031313-XM4XZGmFyt6jg3GZ772w\",\"object\":\"chat.completion.chunk\",\"created\":1778031313,\"model\":\"anthropic/claude-4.7-opus-20260416\",\"provider\":\"Amazon Bedrock\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" Paris.\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031313-XM4XZGmFyt6jg3GZ772w\",\"object\":\"chat.completion.chunk\",\"created\":1778031313,\"model\":\"anthropic/claude-4.7-opus-20260416\",\"provider\":\"Amazon Bedrock\",\"service_tier\":\"standard\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\",\"role\":\"assistant\"},\"finish_reason\":\"stop\",\"native_finish_reason\":\"end_turn\"}]}\n\ndata: {\"id\":\"gen-1778031313-XM4XZGmFyt6jg3GZ772w\",\"object\":\"chat.completion.chunk\",\"created\":1778031313,\"model\":\"anthropic/claude-4.7-opus-20260416\",\"provider\":\"Amazon Bedrock\",\"service_tier\":\"standard\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\",\"role\":\"assistant\"},\"finish_reason\":\"stop\",\"native_finish_reason\":\"end_turn\"}],\"usage\":{\"prompt_tokens\":899,\"completion_tokens\":19,\"total_tokens\":918,\"cost\":0.00497,\"is_byok\":false,\"prompt_tokens_details\":{\"cached_tokens\":0,\"cache_write_tokens\":0,\"audio_tokens\":0,\"video_tokens\":0},\"cost_details\":{\"upstream_inference_cost\":0.00497,\"upstream_inference_prompt_cost\":0.004495,\"upstream_inference_completions_cost\":0.000475},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"image_tokens\":0,\"audio_tokens\":0}}}\n\ndata: [DONE]\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/openai-compatible-chat/openrouter-gpt-4o-mini-drives-a-tool-loop.json b/packages/llm/test/fixtures/recordings/openai-compatible-chat/openrouter-gpt-4o-mini-drives-a-tool-loop.json new file mode 100644 index 000000000000..7883285e581a --- /dev/null +++ b/packages/llm/test/fixtures/recordings/openai-compatible-chat/openrouter-gpt-4o-mini-drives-a-tool-loop.json @@ -0,0 +1,53 @@ +{ + "version": 1, + "metadata": { + "name": "openai-compatible-chat/openrouter-gpt-4o-mini-drives-a-tool-loop", + "recordedAt": "2026-05-06T01:35:08.922Z", + "tags": [ + "prefix:openai-compatible-chat", + "protocol:openai-compatible-chat", + "provider:openrouter", + "tool", + "tool-loop", + "golden" + ] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://openrouter.ai/api/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"openai/gpt-4o-mini\",\"messages\":[{\"role\":\"system\",\"content\":\"Use the get_weather tool exactly once, then answer in one short sentence.\"},{\"role\":\"user\",\"content\":\"What is the weather in Paris?\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":80,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream" + }, + "body": ": OPENROUTER PROCESSING\n\ndata: {\"id\":\"gen-1778031307-FcHCDYW9unDVyRRL841T\",\"object\":\"chat.completion.chunk\",\"created\":1778031307,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"id\":\"call_S63bjYITINemSHZ4Uqns7PIu\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031307-FcHCDYW9unDVyRRL841T\",\"object\":\"chat.completion.chunk\",\"created\":1778031307,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031307-FcHCDYW9unDVyRRL841T\",\"object\":\"chat.completion.chunk\",\"created\":1778031307,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"{\\\"\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031307-FcHCDYW9unDVyRRL841T\",\"object\":\"chat.completion.chunk\",\"created\":1778031307,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"city\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031307-FcHCDYW9unDVyRRL841T\",\"object\":\"chat.completion.chunk\",\"created\":1778031307,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\":\\\"\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031307-FcHCDYW9unDVyRRL841T\",\"object\":\"chat.completion.chunk\",\"created\":1778031307,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"Paris\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031307-FcHCDYW9unDVyRRL841T\",\"object\":\"chat.completion.chunk\",\"created\":1778031307,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\"}\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031307-FcHCDYW9unDVyRRL841T\",\"object\":\"chat.completion.chunk\",\"created\":1778031307,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\",\"role\":\"assistant\"},\"finish_reason\":\"tool_calls\",\"native_finish_reason\":\"tool_calls\"}]}\n\ndata: {\"id\":\"gen-1778031307-FcHCDYW9unDVyRRL841T\",\"object\":\"chat.completion.chunk\",\"created\":1778031307,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\",\"role\":\"assistant\"},\"finish_reason\":\"tool_calls\",\"native_finish_reason\":\"tool_calls\"}],\"usage\":{\"prompt_tokens\":66,\"completion_tokens\":14,\"total_tokens\":80,\"cost\":0.0000183,\"is_byok\":false,\"prompt_tokens_details\":{\"cached_tokens\":0,\"cache_write_tokens\":0,\"audio_tokens\":0,\"video_tokens\":0},\"cost_details\":{\"upstream_inference_cost\":0.0000183,\"upstream_inference_prompt_cost\":0.0000099,\"upstream_inference_completions_cost\":0.0000084},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"image_tokens\":0,\"audio_tokens\":0}}}\n\ndata: [DONE]\n\n" + } + }, + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://openrouter.ai/api/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"openai/gpt-4o-mini\",\"messages\":[{\"role\":\"system\",\"content\":\"Use the get_weather tool exactly once, then answer in one short sentence.\"},{\"role\":\"user\",\"content\":\"What is the weather in Paris?\"},{\"role\":\"assistant\",\"content\":null,\"tool_calls\":[{\"id\":\"call_S63bjYITINemSHZ4Uqns7PIu\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"{\\\"city\\\":\\\"Paris\\\"}\"}}]},{\"role\":\"tool\",\"tool_call_id\":\"call_S63bjYITINemSHZ4Uqns7PIu\",\"content\":\"{\\\"temperature\\\":22,\\\"condition\\\":\\\"sunny\\\"}\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":80,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream" + }, + "body": ": OPENROUTER PROCESSING\n\ndata: {\"id\":\"gen-1778031308-uNHYY6MdDXOs0BYMXXVb\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"The\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-uNHYY6MdDXOs0BYMXXVb\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" weather\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-uNHYY6MdDXOs0BYMXXVb\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" in\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-uNHYY6MdDXOs0BYMXXVb\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" Paris\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-uNHYY6MdDXOs0BYMXXVb\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" is\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-uNHYY6MdDXOs0BYMXXVb\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" sunny\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-uNHYY6MdDXOs0BYMXXVb\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" with\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-uNHYY6MdDXOs0BYMXXVb\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" a\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-uNHYY6MdDXOs0BYMXXVb\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" temperature\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-uNHYY6MdDXOs0BYMXXVb\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" of\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-uNHYY6MdDXOs0BYMXXVb\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" \",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-uNHYY6MdDXOs0BYMXXVb\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"22\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-uNHYY6MdDXOs0BYMXXVb\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"°C\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-uNHYY6MdDXOs0BYMXXVb\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\".\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-uNHYY6MdDXOs0BYMXXVb\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\",\"role\":\"assistant\"},\"finish_reason\":\"stop\",\"native_finish_reason\":\"stop\"}]}\n\ndata: {\"id\":\"gen-1778031308-uNHYY6MdDXOs0BYMXXVb\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_7e69b4ef44\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\",\"role\":\"assistant\"},\"finish_reason\":\"stop\",\"native_finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":98,\"completion_tokens\":15,\"total_tokens\":113,\"cost\":0.0000237,\"is_byok\":false,\"prompt_tokens_details\":{\"cached_tokens\":0,\"cache_write_tokens\":0,\"audio_tokens\":0,\"video_tokens\":0},\"cost_details\":{\"upstream_inference_cost\":0.0000237,\"upstream_inference_prompt_cost\":0.0000147,\"upstream_inference_completions_cost\":0.000009},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"image_tokens\":0,\"audio_tokens\":0}}}\n\ndata: [DONE]\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/openai-compatible-chat/openrouter-gpt-5-5-drives-a-tool-loop.json b/packages/llm/test/fixtures/recordings/openai-compatible-chat/openrouter-gpt-5-5-drives-a-tool-loop.json new file mode 100644 index 000000000000..e1cbab70faac --- /dev/null +++ b/packages/llm/test/fixtures/recordings/openai-compatible-chat/openrouter-gpt-5-5-drives-a-tool-loop.json @@ -0,0 +1,54 @@ +{ + "version": 1, + "metadata": { + "name": "openai-compatible-chat/openrouter-gpt-5-5-drives-a-tool-loop", + "recordedAt": "2026-05-06T01:35:11.662Z", + "tags": [ + "prefix:openai-compatible-chat", + "protocol:openai-compatible-chat", + "provider:openrouter", + "tool", + "tool-loop", + "golden", + "flagship" + ] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://openrouter.ai/api/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"openai/gpt-5.5\",\"messages\":[{\"role\":\"system\",\"content\":\"Use the get_weather tool exactly once, then answer in one short sentence.\"},{\"role\":\"user\",\"content\":\"What is the weather in Paris?\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":80,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream" + }, + "body": ": OPENROUTER PROCESSING\n\n: OPENROUTER PROCESSING\n\ndata: {\"id\":\"gen-1778031308-dVa9axcHcOlG9GcilZkz\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-5.5-20260423\",\"provider\":\"OpenAI\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"id\":\"call_4A7V7UN36HXCUUn8qAOQaKGw\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-dVa9axcHcOlG9GcilZkz\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-5.5-20260423\",\"provider\":\"OpenAI\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-dVa9axcHcOlG9GcilZkz\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-5.5-20260423\",\"provider\":\"OpenAI\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"{\\\"\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-dVa9axcHcOlG9GcilZkz\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-5.5-20260423\",\"provider\":\"OpenAI\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"city\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-dVa9axcHcOlG9GcilZkz\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-5.5-20260423\",\"provider\":\"OpenAI\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\":\\\"\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-dVa9axcHcOlG9GcilZkz\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-5.5-20260423\",\"provider\":\"OpenAI\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"Paris\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-dVa9axcHcOlG9GcilZkz\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-5.5-20260423\",\"provider\":\"OpenAI\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\"}\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031308-dVa9axcHcOlG9GcilZkz\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-5.5-20260423\",\"provider\":\"OpenAI\",\"service_tier\":\"default\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\",\"role\":\"assistant\"},\"finish_reason\":\"tool_calls\",\"native_finish_reason\":\"completed\"}]}\n\ndata: {\"id\":\"gen-1778031308-dVa9axcHcOlG9GcilZkz\",\"object\":\"chat.completion.chunk\",\"created\":1778031308,\"model\":\"openai/gpt-5.5-20260423\",\"provider\":\"OpenAI\",\"service_tier\":\"default\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\",\"role\":\"assistant\"},\"finish_reason\":\"tool_calls\",\"native_finish_reason\":\"completed\"}],\"usage\":{\"prompt_tokens\":69,\"completion_tokens\":18,\"total_tokens\":87,\"cost\":0.000885,\"is_byok\":false,\"prompt_tokens_details\":{\"cached_tokens\":0,\"cache_write_tokens\":0,\"audio_tokens\":0,\"video_tokens\":0},\"cost_details\":{\"upstream_inference_cost\":0.000885,\"upstream_inference_prompt_cost\":0.000345,\"upstream_inference_completions_cost\":0.00054},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"image_tokens\":0,\"audio_tokens\":0}}}\n\ndata: [DONE]\n\n" + } + }, + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://openrouter.ai/api/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"openai/gpt-5.5\",\"messages\":[{\"role\":\"system\",\"content\":\"Use the get_weather tool exactly once, then answer in one short sentence.\"},{\"role\":\"user\",\"content\":\"What is the weather in Paris?\"},{\"role\":\"assistant\",\"content\":null,\"tool_calls\":[{\"id\":\"call_4A7V7UN36HXCUUn8qAOQaKGw\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"{\\\"city\\\":\\\"Paris\\\"}\"}}]},{\"role\":\"tool\",\"tool_call_id\":\"call_4A7V7UN36HXCUUn8qAOQaKGw\",\"content\":\"{\\\"temperature\\\":22,\\\"condition\\\":\\\"sunny\\\"}\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":80,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream" + }, + "body": ": OPENROUTER PROCESSING\n\n: OPENROUTER PROCESSING\n\ndata: {\"id\":\"gen-1778031310-JUYfFzDbun699uUYoA4N\",\"object\":\"chat.completion.chunk\",\"created\":1778031310,\"model\":\"openai/gpt-5.5-20260423\",\"provider\":\"OpenAI\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Paris\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031310-JUYfFzDbun699uUYoA4N\",\"object\":\"chat.completion.chunk\",\"created\":1778031310,\"model\":\"openai/gpt-5.5-20260423\",\"provider\":\"OpenAI\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" is\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031310-JUYfFzDbun699uUYoA4N\",\"object\":\"chat.completion.chunk\",\"created\":1778031310,\"model\":\"openai/gpt-5.5-20260423\",\"provider\":\"OpenAI\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" sunny\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031310-JUYfFzDbun699uUYoA4N\",\"object\":\"chat.completion.chunk\",\"created\":1778031310,\"model\":\"openai/gpt-5.5-20260423\",\"provider\":\"OpenAI\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" and\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031310-JUYfFzDbun699uUYoA4N\",\"object\":\"chat.completion.chunk\",\"created\":1778031310,\"model\":\"openai/gpt-5.5-20260423\",\"provider\":\"OpenAI\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" \",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031310-JUYfFzDbun699uUYoA4N\",\"object\":\"chat.completion.chunk\",\"created\":1778031310,\"model\":\"openai/gpt-5.5-20260423\",\"provider\":\"OpenAI\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"22\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031310-JUYfFzDbun699uUYoA4N\",\"object\":\"chat.completion.chunk\",\"created\":1778031310,\"model\":\"openai/gpt-5.5-20260423\",\"provider\":\"OpenAI\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"°C\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031310-JUYfFzDbun699uUYoA4N\",\"object\":\"chat.completion.chunk\",\"created\":1778031310,\"model\":\"openai/gpt-5.5-20260423\",\"provider\":\"OpenAI\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\".\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031310-JUYfFzDbun699uUYoA4N\",\"object\":\"chat.completion.chunk\",\"created\":1778031310,\"model\":\"openai/gpt-5.5-20260423\",\"provider\":\"OpenAI\",\"service_tier\":\"default\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\",\"role\":\"assistant\"},\"finish_reason\":\"stop\",\"native_finish_reason\":\"completed\"}]}\n\ndata: {\"id\":\"gen-1778031310-JUYfFzDbun699uUYoA4N\",\"object\":\"chat.completion.chunk\",\"created\":1778031310,\"model\":\"openai/gpt-5.5-20260423\",\"provider\":\"OpenAI\",\"service_tier\":\"default\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\",\"role\":\"assistant\"},\"finish_reason\":\"stop\",\"native_finish_reason\":\"completed\"}],\"usage\":{\"prompt_tokens\":108,\"completion_tokens\":12,\"total_tokens\":120,\"cost\":0.0009,\"is_byok\":false,\"prompt_tokens_details\":{\"cached_tokens\":0,\"cache_write_tokens\":0,\"audio_tokens\":0,\"video_tokens\":0},\"cost_details\":{\"upstream_inference_cost\":0.0009,\"upstream_inference_prompt_cost\":0.00054,\"upstream_inference_completions_cost\":0.00036},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"image_tokens\":0,\"audio_tokens\":0}}}\n\ndata: [DONE]\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/openai-compatible-chat/openrouter-streams-text.json b/packages/llm/test/fixtures/recordings/openai-compatible-chat/openrouter-streams-text.json new file mode 100644 index 000000000000..1a95146931ee --- /dev/null +++ b/packages/llm/test/fixtures/recordings/openai-compatible-chat/openrouter-streams-text.json @@ -0,0 +1,28 @@ +{ + "version": 1, + "metadata": { + "name": "openai-compatible-chat/openrouter-streams-text", + "recordedAt": "2026-05-06T01:35:06.767Z", + "tags": ["prefix:openai-compatible-chat", "protocol:openai-compatible-chat", "provider:openrouter"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://openrouter.ai/api/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"openai/gpt-4o-mini\",\"messages\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":\"Reply with exactly: Hello!\"}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":20,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream" + }, + "body": ": OPENROUTER PROCESSING\n\ndata: {\"id\":\"gen-1778031306-UD7bR0I1JNCsPvVzlXat\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"Azure\",\"system_fingerprint\":\"fp_eb37e061ec\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031306-UD7bR0I1JNCsPvVzlXat\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"Azure\",\"system_fingerprint\":\"fp_eb37e061ec\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"!\",\"role\":\"assistant\"},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031306-UD7bR0I1JNCsPvVzlXat\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"Azure\",\"system_fingerprint\":\"fp_eb37e061ec\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\",\"role\":\"assistant\"},\"finish_reason\":\"stop\",\"native_finish_reason\":\"stop\"}]}\n\ndata: {\"id\":\"gen-1778031306-UD7bR0I1JNCsPvVzlXat\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"Azure\",\"system_fingerprint\":\"fp_eb37e061ec\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\",\"role\":\"assistant\"},\"finish_reason\":\"stop\",\"native_finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":21,\"completion_tokens\":3,\"total_tokens\":24,\"cost\":0.00000495,\"is_byok\":false,\"prompt_tokens_details\":{\"cached_tokens\":0,\"cache_write_tokens\":0,\"audio_tokens\":0,\"video_tokens\":0},\"cost_details\":{\"upstream_inference_cost\":0.00000495,\"upstream_inference_prompt_cost\":0.00000315,\"upstream_inference_completions_cost\":0.0000018},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"image_tokens\":0,\"audio_tokens\":0}}}\n\ndata: [DONE]\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/openai-compatible-chat/openrouter-streams-tool-call.json b/packages/llm/test/fixtures/recordings/openai-compatible-chat/openrouter-streams-tool-call.json new file mode 100644 index 000000000000..36d0ad99c56b --- /dev/null +++ b/packages/llm/test/fixtures/recordings/openai-compatible-chat/openrouter-streams-tool-call.json @@ -0,0 +1,28 @@ +{ + "version": 1, + "metadata": { + "name": "openai-compatible-chat/openrouter-streams-tool-call", + "recordedAt": "2026-05-06T01:35:07.466Z", + "tags": ["prefix:openai-compatible-chat", "protocol:openai-compatible-chat", "provider:openrouter", "tool"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://openrouter.ai/api/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"openai/gpt-4o-mini\",\"messages\":[{\"role\":\"system\",\"content\":\"Call tools exactly as requested.\"},{\"role\":\"user\",\"content\":\"Call get_weather with city exactly Paris.\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"tool_choice\":{\"type\":\"function\",\"function\":{\"name\":\"get_weather\"}},\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":80,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream" + }, + "body": ": OPENROUTER PROCESSING\n\ndata: {\"id\":\"gen-1778031306-HYzOq04JIk1hZQ4iaNjD\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_b6580bbee1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"id\":\"call_L7mHMq49ZSUTBHjLJfBIP2eT\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031306-HYzOq04JIk1hZQ4iaNjD\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_b6580bbee1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031306-HYzOq04JIk1hZQ4iaNjD\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_b6580bbee1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"{\\\"\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031306-HYzOq04JIk1hZQ4iaNjD\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_b6580bbee1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"city\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031306-HYzOq04JIk1hZQ4iaNjD\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_b6580bbee1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\":\\\"\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031306-HYzOq04JIk1hZQ4iaNjD\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_b6580bbee1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"Paris\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031306-HYzOq04JIk1hZQ4iaNjD\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_b6580bbee1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":null,\"role\":\"assistant\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\"}\"}}]},\"finish_reason\":null,\"native_finish_reason\":null}]}\n\ndata: {\"id\":\"gen-1778031306-HYzOq04JIk1hZQ4iaNjD\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_b6580bbee1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\",\"role\":\"assistant\"},\"finish_reason\":\"tool_calls\",\"native_finish_reason\":\"stop\"}]}\n\ndata: {\"id\":\"gen-1778031306-HYzOq04JIk1hZQ4iaNjD\",\"object\":\"chat.completion.chunk\",\"created\":1778031306,\"model\":\"openai/gpt-4o-mini\",\"provider\":\"OpenAI\",\"system_fingerprint\":\"fp_b6580bbee1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\",\"role\":\"assistant\"},\"finish_reason\":\"tool_calls\",\"native_finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":67,\"completion_tokens\":5,\"total_tokens\":72,\"cost\":0.00001305,\"is_byok\":false,\"prompt_tokens_details\":{\"cached_tokens\":0,\"cache_write_tokens\":0,\"audio_tokens\":0,\"video_tokens\":0},\"cost_details\":{\"upstream_inference_cost\":0.00001305,\"upstream_inference_prompt_cost\":0.00001005,\"upstream_inference_completions_cost\":0.000003},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"image_tokens\":0,\"audio_tokens\":0}}}\n\ndata: [DONE]\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-text.json b/packages/llm/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-text.json new file mode 100644 index 000000000000..640565b14faa --- /dev/null +++ b/packages/llm/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-text.json @@ -0,0 +1,28 @@ +{ + "version": 1, + "metadata": { + "name": "openai-compatible-chat/togetherai-streams-text", + "recordedAt": "2026-04-28T21:18:55.266Z", + "tags": ["prefix:openai-compatible-chat", "protocol:openai-compatible-chat", "provider:togetherai"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.together.xyz/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"messages\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":\"Reply with exactly: Hello!\"}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":20,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream;charset=utf-8" + }, + "body": "data: {\"id\":\"ogzjdpL-6Ng1vN-9f391a08f8af75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411129,\"choices\":[{\"index\":0,\"text\":\"Hello\",\"logprobs\":null,\"finish_reason\":null,\"seed\":null,\"delta\":{\"token_id\":9906,\"role\":\"assistant\",\"content\":\"Hello\"}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"usage\":null}\n\ndata: {\"id\":\"ogzjdpL-6Ng1vN-9f391a08f8af75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411129,\"choices\":[{\"index\":0,\"text\":\"!\",\"logprobs\":null,\"finish_reason\":null,\"seed\":null,\"delta\":{\"token_id\":null,\"role\":\"assistant\",\"content\":\"!\"}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"usage\":null}\n\ndata: {\"id\":\"ogzjdpL-6Ng1vN-9f391a08f8af75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411129,\"choices\":[{\"index\":0,\"text\":\"\",\"logprobs\":null,\"finish_reason\":\"stop\",\"seed\":15924764223251450000,\"delta\":{\"token_id\":128009,\"role\":\"assistant\",\"content\":\"\"}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"usage\":{\"prompt_tokens\":45,\"completion_tokens\":3,\"total_tokens\":48,\"cached_tokens\":0}}\n\ndata: [DONE]\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-tool-call.json b/packages/llm/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-tool-call.json new file mode 100644 index 000000000000..6c1d9c1a7fc4 --- /dev/null +++ b/packages/llm/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-tool-call.json @@ -0,0 +1,28 @@ +{ + "version": 1, + "metadata": { + "name": "openai-compatible-chat/togetherai-streams-tool-call", + "recordedAt": "2026-04-28T21:18:59.123Z", + "tags": ["prefix:openai-compatible-chat", "protocol:openai-compatible-chat", "provider:togetherai", "tool"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.together.xyz/v1/chat/completions", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"messages\":[{\"role\":\"system\",\"content\":\"Call tools exactly as requested.\"},{\"role\":\"user\",\"content\":\"Call get_weather with city exactly Paris.\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"tool_choice\":{\"type\":\"function\",\"function\":{\"name\":\"get_weather\"}},\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":80,\"temperature\":0}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream;charset=utf-8" + }, + "body": "data: {\"id\":\"ogzjfRD-6Ng1vN-9f391a2bb8ca75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411135,\"choices\":[{\"index\":0,\"role\":\"assistant\",\"text\":\"\",\"logprobs\":null,\"finish_reason\":null,\"delta\":{\"token_id\":null,\"role\":\"assistant\",\"content\":\"\"}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\"}\n\ndata: {\"id\":\"ogzjfRD-6Ng1vN-9f391a2bb8ca75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411135,\"choices\":[{\"index\":0,\"text\":\"\",\"logprobs\":null,\"finish_reason\":null,\"delta\":{\"token_id\":null,\"role\":\"assistant\",\"content\":\"\",\"tool_calls\":[{\"index\":0,\"id\":\"call_yu1mxtmex7x48nximi9c8jpo\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"\"}}]}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\"}\n\ndata: {\"id\":\"ogzjfRD-6Ng1vN-9f391a2bb8ca75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411135,\"choices\":[{\"index\":0,\"text\":\"\",\"logprobs\":null,\"finish_reason\":\"tool_calls\",\"delta\":{\"token_id\":null,\"role\":\"assistant\",\"content\":\"\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"{\\\"city\\\":\\\"Paris\\\"}\"}}]}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\"}\n\ndata: {\"id\":\"ogzjfRD-6Ng1vN-9f391a2bb8ca75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411135,\"choices\":[{\"index\":0,\"text\":\"\",\"logprobs\":null,\"finish_reason\":\"tool_calls\",\"seed\":9033012299842426000,\"delta\":{\"token_id\":128009,\"role\":\"assistant\",\"content\":\"\"}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"usage\":{\"prompt_tokens\":194,\"completion_tokens\":19,\"total_tokens\":213,\"cached_tokens\":0}}\n\ndata: [DONE]\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/openai-responses/gpt-5-5-drives-a-tool-loop.json b/packages/llm/test/fixtures/recordings/openai-responses/gpt-5-5-drives-a-tool-loop.json new file mode 100644 index 000000000000..a3f2e014df4c --- /dev/null +++ b/packages/llm/test/fixtures/recordings/openai-responses/gpt-5-5-drives-a-tool-loop.json @@ -0,0 +1,54 @@ +{ + "version": 1, + "metadata": { + "name": "openai-responses/gpt-5-5-drives-a-tool-loop", + "recordedAt": "2026-05-06T00:26:15.209Z", + "tags": [ + "prefix:openai-responses", + "provider:openai", + "protocol:openai-responses", + "tool", + "tool-loop", + "golden", + "flagship" + ] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.openai.com/v1/responses", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"gpt-5.5\",\"input\":[{\"role\":\"system\",\"content\":\"Use the get_weather tool, then answer in one short sentence.\"},{\"role\":\"user\",\"content\":[{\"type\":\"input_text\",\"text\":\"What is the weather in Paris?\"}]}],\"tools\":[{\"type\":\"function\",\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}],\"stream\":true,\"max_output_tokens\":80}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream; charset=utf-8" + }, + "body": "event: response.created\ndata: {\"type\":\"response.created\",\"response\":{\"id\":\"resp_01394305fdec6fdd0069fa8aa414cc81a1908662495e7c9bd9\",\"object\":\"response\",\"created_at\":1778027172,\"status\":\"in_progress\",\"background\":false,\"completed_at\":null,\"error\":null,\"frequency_penalty\":0.0,\"incomplete_details\":null,\"instructions\":null,\"max_output_tokens\":80,\"max_tool_calls\":null,\"model\":\"gpt-5.5-2026-04-23\",\"moderation\":null,\"output\":[],\"parallel_tool_calls\":true,\"presence_penalty\":0.0,\"previous_response_id\":null,\"prompt_cache_key\":null,\"prompt_cache_retention\":\"24h\",\"reasoning\":{\"effort\":\"medium\",\"summary\":null},\"safety_identifier\":null,\"service_tier\":\"auto\",\"store\":true,\"temperature\":1.0,\"text\":{\"format\":{\"type\":\"text\"},\"verbosity\":\"medium\"},\"tool_choice\":\"auto\",\"tools\":[{\"type\":\"function\",\"description\":\"Get current weather for a city.\",\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false},\"strict\":true}],\"top_logprobs\":0,\"top_p\":0.98,\"truncation\":\"disabled\",\"usage\":null,\"user\":null,\"metadata\":{}},\"sequence_number\":0}\n\nevent: response.in_progress\ndata: {\"type\":\"response.in_progress\",\"response\":{\"id\":\"resp_01394305fdec6fdd0069fa8aa414cc81a1908662495e7c9bd9\",\"object\":\"response\",\"created_at\":1778027172,\"status\":\"in_progress\",\"background\":false,\"completed_at\":null,\"error\":null,\"frequency_penalty\":0.0,\"incomplete_details\":null,\"instructions\":null,\"max_output_tokens\":80,\"max_tool_calls\":null,\"model\":\"gpt-5.5-2026-04-23\",\"moderation\":null,\"output\":[],\"parallel_tool_calls\":true,\"presence_penalty\":0.0,\"previous_response_id\":null,\"prompt_cache_key\":null,\"prompt_cache_retention\":\"24h\",\"reasoning\":{\"effort\":\"medium\",\"summary\":null},\"safety_identifier\":null,\"service_tier\":\"auto\",\"store\":true,\"temperature\":1.0,\"text\":{\"format\":{\"type\":\"text\"},\"verbosity\":\"medium\"},\"tool_choice\":\"auto\",\"tools\":[{\"type\":\"function\",\"description\":\"Get current weather for a city.\",\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false},\"strict\":true}],\"top_logprobs\":0,\"top_p\":0.98,\"truncation\":\"disabled\",\"usage\":null,\"user\":null,\"metadata\":{}},\"sequence_number\":1}\n\nevent: response.output_item.added\ndata: {\"type\":\"response.output_item.added\",\"item\":{\"id\":\"fc_01394305fdec6fdd0069fa8aa51a3881a1a2e74c58f5c368d4\",\"type\":\"function_call\",\"status\":\"in_progress\",\"arguments\":\"\",\"call_id\":\"call_JCuVTkQxVB3cCmFWx52adJKZ\",\"name\":\"get_weather\"},\"output_index\":0,\"sequence_number\":2}\n\nevent: response.function_call_arguments.delta\ndata: {\"type\":\"response.function_call_arguments.delta\",\"delta\":\"{\\\"\",\"item_id\":\"fc_01394305fdec6fdd0069fa8aa51a3881a1a2e74c58f5c368d4\",\"obfuscation\":\"5DTUG002eUNyAN\",\"output_index\":0,\"sequence_number\":3}\n\nevent: response.function_call_arguments.delta\ndata: {\"type\":\"response.function_call_arguments.delta\",\"delta\":\"city\",\"item_id\":\"fc_01394305fdec6fdd0069fa8aa51a3881a1a2e74c58f5c368d4\",\"obfuscation\":\"cbezJUlKOHJ8\",\"output_index\":0,\"sequence_number\":4}\n\nevent: response.function_call_arguments.delta\ndata: {\"type\":\"response.function_call_arguments.delta\",\"delta\":\"\\\":\\\"\",\"item_id\":\"fc_01394305fdec6fdd0069fa8aa51a3881a1a2e74c58f5c368d4\",\"obfuscation\":\"Du6y75R0eXTqj\",\"output_index\":0,\"sequence_number\":5}\n\nevent: response.function_call_arguments.delta\ndata: {\"type\":\"response.function_call_arguments.delta\",\"delta\":\"Paris\",\"item_id\":\"fc_01394305fdec6fdd0069fa8aa51a3881a1a2e74c58f5c368d4\",\"obfuscation\":\"dHUPwHp6aIB\",\"output_index\":0,\"sequence_number\":6}\n\nevent: response.function_call_arguments.delta\ndata: {\"type\":\"response.function_call_arguments.delta\",\"delta\":\"\\\"}\",\"item_id\":\"fc_01394305fdec6fdd0069fa8aa51a3881a1a2e74c58f5c368d4\",\"obfuscation\":\"4A6QSCyeBQa1fC\",\"output_index\":0,\"sequence_number\":7}\n\nevent: response.function_call_arguments.done\ndata: {\"type\":\"response.function_call_arguments.done\",\"arguments\":\"{\\\"city\\\":\\\"Paris\\\"}\",\"item_id\":\"fc_01394305fdec6fdd0069fa8aa51a3881a1a2e74c58f5c368d4\",\"output_index\":0,\"sequence_number\":8}\n\nevent: response.output_item.done\ndata: {\"type\":\"response.output_item.done\",\"item\":{\"id\":\"fc_01394305fdec6fdd0069fa8aa51a3881a1a2e74c58f5c368d4\",\"type\":\"function_call\",\"status\":\"completed\",\"arguments\":\"{\\\"city\\\":\\\"Paris\\\"}\",\"call_id\":\"call_JCuVTkQxVB3cCmFWx52adJKZ\",\"name\":\"get_weather\"},\"output_index\":0,\"sequence_number\":9}\n\nevent: response.completed\ndata: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp_01394305fdec6fdd0069fa8aa414cc81a1908662495e7c9bd9\",\"object\":\"response\",\"created_at\":1778027172,\"status\":\"completed\",\"background\":false,\"completed_at\":1778027173,\"error\":null,\"frequency_penalty\":0.0,\"incomplete_details\":null,\"instructions\":null,\"max_output_tokens\":80,\"max_tool_calls\":null,\"model\":\"gpt-5.5-2026-04-23\",\"moderation\":null,\"output\":[{\"id\":\"fc_01394305fdec6fdd0069fa8aa51a3881a1a2e74c58f5c368d4\",\"type\":\"function_call\",\"status\":\"completed\",\"arguments\":\"{\\\"city\\\":\\\"Paris\\\"}\",\"call_id\":\"call_JCuVTkQxVB3cCmFWx52adJKZ\",\"name\":\"get_weather\"}],\"parallel_tool_calls\":true,\"presence_penalty\":0.0,\"previous_response_id\":null,\"prompt_cache_key\":null,\"prompt_cache_retention\":\"24h\",\"reasoning\":{\"effort\":\"medium\",\"summary\":null},\"safety_identifier\":null,\"service_tier\":\"default\",\"store\":true,\"temperature\":1.0,\"text\":{\"format\":{\"type\":\"text\"},\"verbosity\":\"medium\"},\"tool_choice\":\"auto\",\"tools\":[{\"type\":\"function\",\"description\":\"Get current weather for a city.\",\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false},\"strict\":true}],\"top_logprobs\":0,\"top_p\":0.98,\"truncation\":\"disabled\",\"usage\":{\"input_tokens\":67,\"input_tokens_details\":{\"cached_tokens\":0},\"output_tokens\":18,\"output_tokens_details\":{\"reasoning_tokens\":0},\"total_tokens\":85},\"user\":null,\"metadata\":{}},\"sequence_number\":10}\n\n" + } + }, + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.openai.com/v1/responses", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"gpt-5.5\",\"input\":[{\"role\":\"system\",\"content\":\"Use the get_weather tool, then answer in one short sentence.\"},{\"role\":\"user\",\"content\":[{\"type\":\"input_text\",\"text\":\"What is the weather in Paris?\"}]},{\"type\":\"function_call\",\"call_id\":\"call_JCuVTkQxVB3cCmFWx52adJKZ\",\"name\":\"get_weather\",\"arguments\":\"{\\\"city\\\":\\\"Paris\\\"}\"},{\"type\":\"function_call_output\",\"call_id\":\"call_JCuVTkQxVB3cCmFWx52adJKZ\",\"output\":\"{\\\"temperature\\\":22,\\\"condition\\\":\\\"sunny\\\"}\"}],\"tools\":[{\"type\":\"function\",\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}],\"stream\":true,\"max_output_tokens\":80}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream; charset=utf-8" + }, + "body": "event: response.created\ndata: {\"type\":\"response.created\",\"response\":{\"id\":\"resp_00daac70c40e5f4c0069fa8aa5a58c819db01baef7149e9043\",\"object\":\"response\",\"created_at\":1778027173,\"status\":\"in_progress\",\"background\":false,\"completed_at\":null,\"error\":null,\"frequency_penalty\":0.0,\"incomplete_details\":null,\"instructions\":null,\"max_output_tokens\":80,\"max_tool_calls\":null,\"model\":\"gpt-5.5-2026-04-23\",\"moderation\":null,\"output\":[],\"parallel_tool_calls\":true,\"presence_penalty\":0.0,\"previous_response_id\":null,\"prompt_cache_key\":null,\"prompt_cache_retention\":\"24h\",\"reasoning\":{\"effort\":\"medium\",\"summary\":null},\"safety_identifier\":null,\"service_tier\":\"auto\",\"store\":true,\"temperature\":1.0,\"text\":{\"format\":{\"type\":\"text\"},\"verbosity\":\"medium\"},\"tool_choice\":\"auto\",\"tools\":[{\"type\":\"function\",\"description\":\"Get current weather for a city.\",\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false},\"strict\":true}],\"top_logprobs\":0,\"top_p\":0.98,\"truncation\":\"disabled\",\"usage\":null,\"user\":null,\"metadata\":{}},\"sequence_number\":0}\n\nevent: response.in_progress\ndata: {\"type\":\"response.in_progress\",\"response\":{\"id\":\"resp_00daac70c40e5f4c0069fa8aa5a58c819db01baef7149e9043\",\"object\":\"response\",\"created_at\":1778027173,\"status\":\"in_progress\",\"background\":false,\"completed_at\":null,\"error\":null,\"frequency_penalty\":0.0,\"incomplete_details\":null,\"instructions\":null,\"max_output_tokens\":80,\"max_tool_calls\":null,\"model\":\"gpt-5.5-2026-04-23\",\"moderation\":null,\"output\":[],\"parallel_tool_calls\":true,\"presence_penalty\":0.0,\"previous_response_id\":null,\"prompt_cache_key\":null,\"prompt_cache_retention\":\"24h\",\"reasoning\":{\"effort\":\"medium\",\"summary\":null},\"safety_identifier\":null,\"service_tier\":\"auto\",\"store\":true,\"temperature\":1.0,\"text\":{\"format\":{\"type\":\"text\"},\"verbosity\":\"medium\"},\"tool_choice\":\"auto\",\"tools\":[{\"type\":\"function\",\"description\":\"Get current weather for a city.\",\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false},\"strict\":true}],\"top_logprobs\":0,\"top_p\":0.98,\"truncation\":\"disabled\",\"usage\":null,\"user\":null,\"metadata\":{}},\"sequence_number\":1}\n\nevent: response.output_item.added\ndata: {\"type\":\"response.output_item.added\",\"item\":{\"id\":\"msg_00daac70c40e5f4c0069fa8aa697a8819daf6660168cb19951\",\"type\":\"message\",\"status\":\"in_progress\",\"content\":[],\"phase\":\"final_answer\",\"role\":\"assistant\"},\"output_index\":0,\"sequence_number\":2}\n\nevent: response.content_part.added\ndata: {\"type\":\"response.content_part.added\",\"content_index\":0,\"item_id\":\"msg_00daac70c40e5f4c0069fa8aa697a8819daf6660168cb19951\",\"output_index\":0,\"part\":{\"type\":\"output_text\",\"annotations\":[],\"logprobs\":[],\"text\":\"\"},\"sequence_number\":3}\n\nevent: response.output_text.delta\ndata: {\"type\":\"response.output_text.delta\",\"content_index\":0,\"delta\":\"It\",\"item_id\":\"msg_00daac70c40e5f4c0069fa8aa697a8819daf6660168cb19951\",\"logprobs\":[],\"obfuscation\":\"chiK1sgLg8rTyK\",\"output_index\":0,\"sequence_number\":4}\n\nevent: response.output_text.delta\ndata: {\"type\":\"response.output_text.delta\",\"content_index\":0,\"delta\":\"’s\",\"item_id\":\"msg_00daac70c40e5f4c0069fa8aa697a8819daf6660168cb19951\",\"logprobs\":[],\"obfuscation\":\"ltAaX7wDQM1X8W\",\"output_index\":0,\"sequence_number\":5}\n\nevent: response.output_text.delta\ndata: {\"type\":\"response.output_text.delta\",\"content_index\":0,\"delta\":\" sunny\",\"item_id\":\"msg_00daac70c40e5f4c0069fa8aa697a8819daf6660168cb19951\",\"logprobs\":[],\"obfuscation\":\"a6nggmY4w0\",\"output_index\":0,\"sequence_number\":6}\n\nevent: response.output_text.delta\ndata: {\"type\":\"response.output_text.delta\",\"content_index\":0,\"delta\":\" and\",\"item_id\":\"msg_00daac70c40e5f4c0069fa8aa697a8819daf6660168cb19951\",\"logprobs\":[],\"obfuscation\":\"Fm6HNREc68IM\",\"output_index\":0,\"sequence_number\":7}\n\nevent: response.output_text.delta\ndata: {\"type\":\"response.output_text.delta\",\"content_index\":0,\"delta\":\" \",\"item_id\":\"msg_00daac70c40e5f4c0069fa8aa697a8819daf6660168cb19951\",\"logprobs\":[],\"obfuscation\":\"AvKNavT4eKhSpud\",\"output_index\":0,\"sequence_number\":8}\n\nevent: response.output_text.delta\ndata: {\"type\":\"response.output_text.delta\",\"content_index\":0,\"delta\":\"22\",\"item_id\":\"msg_00daac70c40e5f4c0069fa8aa697a8819daf6660168cb19951\",\"logprobs\":[],\"obfuscation\":\"xfJpoPh3ZBNXow\",\"output_index\":0,\"sequence_number\":9}\n\nevent: response.output_text.delta\ndata: {\"type\":\"response.output_text.delta\",\"content_index\":0,\"delta\":\"°C\",\"item_id\":\"msg_00daac70c40e5f4c0069fa8aa697a8819daf6660168cb19951\",\"logprobs\":[],\"obfuscation\":\"PbrlZXftzmtJBV\",\"output_index\":0,\"sequence_number\":10}\n\nevent: response.output_text.delta\ndata: {\"type\":\"response.output_text.delta\",\"content_index\":0,\"delta\":\" in\",\"item_id\":\"msg_00daac70c40e5f4c0069fa8aa697a8819daf6660168cb19951\",\"logprobs\":[],\"obfuscation\":\"PLrf8voVO2egp\",\"output_index\":0,\"sequence_number\":11}\n\nevent: response.output_text.delta\ndata: {\"type\":\"response.output_text.delta\",\"content_index\":0,\"delta\":\" Paris\",\"item_id\":\"msg_00daac70c40e5f4c0069fa8aa697a8819daf6660168cb19951\",\"logprobs\":[],\"obfuscation\":\"U4wLv1H29b\",\"output_index\":0,\"sequence_number\":12}\n\nevent: response.output_text.delta\ndata: {\"type\":\"response.output_text.delta\",\"content_index\":0,\"delta\":\".\",\"item_id\":\"msg_00daac70c40e5f4c0069fa8aa697a8819daf6660168cb19951\",\"logprobs\":[],\"obfuscation\":\"1n14oh7kAoCuo4f\",\"output_index\":0,\"sequence_number\":13}\n\nevent: response.output_text.done\ndata: {\"type\":\"response.output_text.done\",\"content_index\":0,\"item_id\":\"msg_00daac70c40e5f4c0069fa8aa697a8819daf6660168cb19951\",\"logprobs\":[],\"output_index\":0,\"sequence_number\":14,\"text\":\"It’s sunny and 22°C in Paris.\"}\n\nevent: response.content_part.done\ndata: {\"type\":\"response.content_part.done\",\"content_index\":0,\"item_id\":\"msg_00daac70c40e5f4c0069fa8aa697a8819daf6660168cb19951\",\"output_index\":0,\"part\":{\"type\":\"output_text\",\"annotations\":[],\"logprobs\":[],\"text\":\"It’s sunny and 22°C in Paris.\"},\"sequence_number\":15}\n\nevent: response.output_item.done\ndata: {\"type\":\"response.output_item.done\",\"item\":{\"id\":\"msg_00daac70c40e5f4c0069fa8aa697a8819daf6660168cb19951\",\"type\":\"message\",\"status\":\"completed\",\"content\":[{\"type\":\"output_text\",\"annotations\":[],\"logprobs\":[],\"text\":\"It’s sunny and 22°C in Paris.\"}],\"phase\":\"final_answer\",\"role\":\"assistant\"},\"output_index\":0,\"sequence_number\":16}\n\nevent: response.completed\ndata: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp_00daac70c40e5f4c0069fa8aa5a58c819db01baef7149e9043\",\"object\":\"response\",\"created_at\":1778027173,\"status\":\"completed\",\"background\":false,\"completed_at\":1778027174,\"error\":null,\"frequency_penalty\":0.0,\"incomplete_details\":null,\"instructions\":null,\"max_output_tokens\":80,\"max_tool_calls\":null,\"model\":\"gpt-5.5-2026-04-23\",\"moderation\":null,\"output\":[{\"id\":\"msg_00daac70c40e5f4c0069fa8aa697a8819daf6660168cb19951\",\"type\":\"message\",\"status\":\"completed\",\"content\":[{\"type\":\"output_text\",\"annotations\":[],\"logprobs\":[],\"text\":\"It’s sunny and 22°C in Paris.\"}],\"phase\":\"final_answer\",\"role\":\"assistant\"}],\"parallel_tool_calls\":true,\"presence_penalty\":0.0,\"previous_response_id\":null,\"prompt_cache_key\":null,\"prompt_cache_retention\":\"24h\",\"reasoning\":{\"effort\":\"medium\",\"summary\":null},\"safety_identifier\":null,\"service_tier\":\"default\",\"store\":true,\"temperature\":1.0,\"text\":{\"format\":{\"type\":\"text\"},\"verbosity\":\"medium\"},\"tool_choice\":\"auto\",\"tools\":[{\"type\":\"function\",\"description\":\"Get current weather for a city.\",\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false},\"strict\":true}],\"top_logprobs\":0,\"top_p\":0.98,\"truncation\":\"disabled\",\"usage\":{\"input_tokens\":106,\"input_tokens_details\":{\"cached_tokens\":0},\"output_tokens\":14,\"output_tokens_details\":{\"reasoning_tokens\":0},\"total_tokens\":120},\"user\":null,\"metadata\":{}},\"sequence_number\":17}\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/openai-responses/gpt-5-5-streams-text.json b/packages/llm/test/fixtures/recordings/openai-responses/gpt-5-5-streams-text.json new file mode 100644 index 000000000000..92c7b7e0f1a0 --- /dev/null +++ b/packages/llm/test/fixtures/recordings/openai-responses/gpt-5-5-streams-text.json @@ -0,0 +1,28 @@ +{ + "version": 1, + "metadata": { + "name": "openai-responses/gpt-5-5-streams-text", + "recordedAt": "2026-05-06T00:26:10.447Z", + "tags": ["prefix:openai-responses", "provider:openai", "protocol:openai-responses", "flagship"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.openai.com/v1/responses", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"gpt-5.5\",\"input\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":[{\"type\":\"input_text\",\"text\":\"Reply with exactly: Hello!\"}]}],\"stream\":true,\"max_output_tokens\":80}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream; charset=utf-8" + }, + "body": "event: response.created\ndata: {\"type\":\"response.created\",\"response\":{\"id\":\"resp_0ea948e2f42449980069fa8aa0e4b4819ca3395b74c53c13fa\",\"object\":\"response\",\"created_at\":1778027168,\"status\":\"in_progress\",\"background\":false,\"completed_at\":null,\"error\":null,\"frequency_penalty\":0.0,\"incomplete_details\":null,\"instructions\":null,\"max_output_tokens\":80,\"max_tool_calls\":null,\"model\":\"gpt-5.5-2026-04-23\",\"moderation\":null,\"output\":[],\"parallel_tool_calls\":true,\"presence_penalty\":0.0,\"previous_response_id\":null,\"prompt_cache_key\":null,\"prompt_cache_retention\":\"24h\",\"reasoning\":{\"effort\":\"medium\",\"summary\":null},\"safety_identifier\":null,\"service_tier\":\"auto\",\"store\":true,\"temperature\":1.0,\"text\":{\"format\":{\"type\":\"text\"},\"verbosity\":\"medium\"},\"tool_choice\":\"auto\",\"tools\":[],\"top_logprobs\":0,\"top_p\":0.98,\"truncation\":\"disabled\",\"usage\":null,\"user\":null,\"metadata\":{}},\"sequence_number\":0}\n\nevent: response.in_progress\ndata: {\"type\":\"response.in_progress\",\"response\":{\"id\":\"resp_0ea948e2f42449980069fa8aa0e4b4819ca3395b74c53c13fa\",\"object\":\"response\",\"created_at\":1778027168,\"status\":\"in_progress\",\"background\":false,\"completed_at\":null,\"error\":null,\"frequency_penalty\":0.0,\"incomplete_details\":null,\"instructions\":null,\"max_output_tokens\":80,\"max_tool_calls\":null,\"model\":\"gpt-5.5-2026-04-23\",\"moderation\":null,\"output\":[],\"parallel_tool_calls\":true,\"presence_penalty\":0.0,\"previous_response_id\":null,\"prompt_cache_key\":null,\"prompt_cache_retention\":\"24h\",\"reasoning\":{\"effort\":\"medium\",\"summary\":null},\"safety_identifier\":null,\"service_tier\":\"auto\",\"store\":true,\"temperature\":1.0,\"text\":{\"format\":{\"type\":\"text\"},\"verbosity\":\"medium\"},\"tool_choice\":\"auto\",\"tools\":[],\"top_logprobs\":0,\"top_p\":0.98,\"truncation\":\"disabled\",\"usage\":null,\"user\":null,\"metadata\":{}},\"sequence_number\":1}\n\nevent: response.output_item.added\ndata: {\"type\":\"response.output_item.added\",\"item\":{\"id\":\"rs_0ea948e2f42449980069fa8aa1d588819cbbcb9b056624d27c\",\"type\":\"reasoning\",\"summary\":[]},\"output_index\":0,\"sequence_number\":2}\n\nevent: response.output_item.done\ndata: {\"type\":\"response.output_item.done\",\"item\":{\"id\":\"rs_0ea948e2f42449980069fa8aa1d588819cbbcb9b056624d27c\",\"type\":\"reasoning\",\"summary\":[]},\"output_index\":0,\"sequence_number\":3}\n\nevent: response.output_item.added\ndata: {\"type\":\"response.output_item.added\",\"item\":{\"id\":\"msg_0ea948e2f42449980069fa8aa20e38819cbf5be70e4d02a1c7\",\"type\":\"message\",\"status\":\"in_progress\",\"content\":[],\"phase\":\"final_answer\",\"role\":\"assistant\"},\"output_index\":1,\"sequence_number\":4}\n\nevent: response.content_part.added\ndata: {\"type\":\"response.content_part.added\",\"content_index\":0,\"item_id\":\"msg_0ea948e2f42449980069fa8aa20e38819cbf5be70e4d02a1c7\",\"output_index\":1,\"part\":{\"type\":\"output_text\",\"annotations\":[],\"logprobs\":[],\"text\":\"\"},\"sequence_number\":5}\n\nevent: response.output_text.delta\ndata: {\"type\":\"response.output_text.delta\",\"content_index\":0,\"delta\":\"Hello\",\"item_id\":\"msg_0ea948e2f42449980069fa8aa20e38819cbf5be70e4d02a1c7\",\"logprobs\":[],\"obfuscation\":\"VTjmFwAGgIo\",\"output_index\":1,\"sequence_number\":6}\n\nevent: response.output_text.delta\ndata: {\"type\":\"response.output_text.delta\",\"content_index\":0,\"delta\":\"!\",\"item_id\":\"msg_0ea948e2f42449980069fa8aa20e38819cbf5be70e4d02a1c7\",\"logprobs\":[],\"obfuscation\":\"PfjFymS7MZa7aYf\",\"output_index\":1,\"sequence_number\":7}\n\nevent: response.output_text.done\ndata: {\"type\":\"response.output_text.done\",\"content_index\":0,\"item_id\":\"msg_0ea948e2f42449980069fa8aa20e38819cbf5be70e4d02a1c7\",\"logprobs\":[],\"output_index\":1,\"sequence_number\":8,\"text\":\"Hello!\"}\n\nevent: response.content_part.done\ndata: {\"type\":\"response.content_part.done\",\"content_index\":0,\"item_id\":\"msg_0ea948e2f42449980069fa8aa20e38819cbf5be70e4d02a1c7\",\"output_index\":1,\"part\":{\"type\":\"output_text\",\"annotations\":[],\"logprobs\":[],\"text\":\"Hello!\"},\"sequence_number\":9}\n\nevent: response.output_item.done\ndata: {\"type\":\"response.output_item.done\",\"item\":{\"id\":\"msg_0ea948e2f42449980069fa8aa20e38819cbf5be70e4d02a1c7\",\"type\":\"message\",\"status\":\"completed\",\"content\":[{\"type\":\"output_text\",\"annotations\":[],\"logprobs\":[],\"text\":\"Hello!\"}],\"phase\":\"final_answer\",\"role\":\"assistant\"},\"output_index\":1,\"sequence_number\":10}\n\nevent: response.completed\ndata: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp_0ea948e2f42449980069fa8aa0e4b4819ca3395b74c53c13fa\",\"object\":\"response\",\"created_at\":1778027168,\"status\":\"completed\",\"background\":false,\"completed_at\":1778027170,\"error\":null,\"frequency_penalty\":0.0,\"incomplete_details\":null,\"instructions\":null,\"max_output_tokens\":80,\"max_tool_calls\":null,\"model\":\"gpt-5.5-2026-04-23\",\"moderation\":null,\"output\":[{\"id\":\"rs_0ea948e2f42449980069fa8aa1d588819cbbcb9b056624d27c\",\"type\":\"reasoning\",\"summary\":[]},{\"id\":\"msg_0ea948e2f42449980069fa8aa20e38819cbf5be70e4d02a1c7\",\"type\":\"message\",\"status\":\"completed\",\"content\":[{\"type\":\"output_text\",\"annotations\":[],\"logprobs\":[],\"text\":\"Hello!\"}],\"phase\":\"final_answer\",\"role\":\"assistant\"}],\"parallel_tool_calls\":true,\"presence_penalty\":0.0,\"previous_response_id\":null,\"prompt_cache_key\":null,\"prompt_cache_retention\":\"24h\",\"reasoning\":{\"effort\":\"medium\",\"summary\":null},\"safety_identifier\":null,\"service_tier\":\"default\",\"store\":true,\"temperature\":1.0,\"text\":{\"format\":{\"type\":\"text\"},\"verbosity\":\"medium\"},\"tool_choice\":\"auto\",\"tools\":[],\"top_logprobs\":0,\"top_p\":0.98,\"truncation\":\"disabled\",\"usage\":{\"input_tokens\":20,\"input_tokens_details\":{\"cached_tokens\":0},\"output_tokens\":18,\"output_tokens_details\":{\"reasoning_tokens\":10},\"total_tokens\":38},\"user\":null,\"metadata\":{}},\"sequence_number\":11}\n\n" + } + } + ] +} diff --git a/packages/llm/test/fixtures/recordings/openai-responses/gpt-5-5-streams-tool-call.json b/packages/llm/test/fixtures/recordings/openai-responses/gpt-5-5-streams-tool-call.json new file mode 100644 index 000000000000..172b8407e602 --- /dev/null +++ b/packages/llm/test/fixtures/recordings/openai-responses/gpt-5-5-streams-tool-call.json @@ -0,0 +1,28 @@ +{ + "version": 1, + "metadata": { + "name": "openai-responses/gpt-5-5-streams-tool-call", + "recordedAt": "2026-05-06T00:26:12.011Z", + "tags": ["prefix:openai-responses", "provider:openai", "protocol:openai-responses", "tool", "flagship"] + }, + "interactions": [ + { + "transport": "http", + "request": { + "method": "POST", + "url": "https://api.openai.com/v1/responses", + "headers": { + "content-type": "application/json" + }, + "body": "{\"model\":\"gpt-5.5\",\"input\":[{\"role\":\"system\",\"content\":\"Call tools exactly as requested.\"},{\"role\":\"user\",\"content\":[{\"type\":\"input_text\",\"text\":\"Call get_weather with city exactly Paris.\"}]}],\"tools\":[{\"type\":\"function\",\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}],\"tool_choice\":{\"type\":\"function\",\"name\":\"get_weather\"},\"stream\":true,\"max_output_tokens\":80}" + }, + "response": { + "status": 200, + "headers": { + "content-type": "text/event-stream; charset=utf-8" + }, + "body": "event: response.created\ndata: {\"type\":\"response.created\",\"response\":{\"id\":\"resp_05200a06f78f5b310069fa8aa28134819eba958e34eb1db6ae\",\"object\":\"response\",\"created_at\":1778027170,\"status\":\"in_progress\",\"background\":false,\"completed_at\":null,\"error\":null,\"frequency_penalty\":0.0,\"incomplete_details\":null,\"instructions\":null,\"max_output_tokens\":80,\"max_tool_calls\":null,\"model\":\"gpt-5.5-2026-04-23\",\"moderation\":null,\"output\":[],\"parallel_tool_calls\":true,\"presence_penalty\":0.0,\"previous_response_id\":null,\"prompt_cache_key\":null,\"prompt_cache_retention\":\"24h\",\"reasoning\":{\"effort\":\"medium\",\"summary\":null},\"safety_identifier\":null,\"service_tier\":\"auto\",\"store\":true,\"temperature\":1.0,\"text\":{\"format\":{\"type\":\"text\"},\"verbosity\":\"medium\"},\"tool_choice\":{\"type\":\"function\",\"name\":\"get_weather\"},\"tools\":[{\"type\":\"function\",\"description\":\"Get current weather for a city.\",\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false},\"strict\":true}],\"top_logprobs\":0,\"top_p\":0.98,\"truncation\":\"disabled\",\"usage\":null,\"user\":null,\"metadata\":{}},\"sequence_number\":0}\n\nevent: response.in_progress\ndata: {\"type\":\"response.in_progress\",\"response\":{\"id\":\"resp_05200a06f78f5b310069fa8aa28134819eba958e34eb1db6ae\",\"object\":\"response\",\"created_at\":1778027170,\"status\":\"in_progress\",\"background\":false,\"completed_at\":null,\"error\":null,\"frequency_penalty\":0.0,\"incomplete_details\":null,\"instructions\":null,\"max_output_tokens\":80,\"max_tool_calls\":null,\"model\":\"gpt-5.5-2026-04-23\",\"moderation\":null,\"output\":[],\"parallel_tool_calls\":true,\"presence_penalty\":0.0,\"previous_response_id\":null,\"prompt_cache_key\":null,\"prompt_cache_retention\":\"24h\",\"reasoning\":{\"effort\":\"medium\",\"summary\":null},\"safety_identifier\":null,\"service_tier\":\"auto\",\"store\":true,\"temperature\":1.0,\"text\":{\"format\":{\"type\":\"text\"},\"verbosity\":\"medium\"},\"tool_choice\":{\"type\":\"function\",\"name\":\"get_weather\"},\"tools\":[{\"type\":\"function\",\"description\":\"Get current weather for a city.\",\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false},\"strict\":true}],\"top_logprobs\":0,\"top_p\":0.98,\"truncation\":\"disabled\",\"usage\":null,\"user\":null,\"metadata\":{}},\"sequence_number\":1}\n\nevent: response.output_item.added\ndata: {\"type\":\"response.output_item.added\",\"item\":{\"id\":\"fc_05200a06f78f5b310069fa8aa37ca8819e9f131e85e47bcff9\",\"type\":\"function_call\",\"status\":\"in_progress\",\"arguments\":\"\",\"call_id\":\"call_ZAbAwsIFeJSyPqz3HaHRXBSn\",\"name\":\"get_weather\"},\"output_index\":0,\"sequence_number\":2}\n\nevent: response.function_call_arguments.delta\ndata: {\"type\":\"response.function_call_arguments.delta\",\"delta\":\"{\\\"\",\"item_id\":\"fc_05200a06f78f5b310069fa8aa37ca8819e9f131e85e47bcff9\",\"obfuscation\":\"X7dp3R85iTgHxP\",\"output_index\":0,\"sequence_number\":3}\n\nevent: response.function_call_arguments.delta\ndata: {\"type\":\"response.function_call_arguments.delta\",\"delta\":\"city\",\"item_id\":\"fc_05200a06f78f5b310069fa8aa37ca8819e9f131e85e47bcff9\",\"obfuscation\":\"ECfxJgedKWUn\",\"output_index\":0,\"sequence_number\":4}\n\nevent: response.function_call_arguments.delta\ndata: {\"type\":\"response.function_call_arguments.delta\",\"delta\":\"\\\":\\\"\",\"item_id\":\"fc_05200a06f78f5b310069fa8aa37ca8819e9f131e85e47bcff9\",\"obfuscation\":\"BYRjhhZxbw5AR\",\"output_index\":0,\"sequence_number\":5}\n\nevent: response.function_call_arguments.delta\ndata: {\"type\":\"response.function_call_arguments.delta\",\"delta\":\"Paris\",\"item_id\":\"fc_05200a06f78f5b310069fa8aa37ca8819e9f131e85e47bcff9\",\"obfuscation\":\"lmbnKOW4qyI\",\"output_index\":0,\"sequence_number\":6}\n\nevent: response.function_call_arguments.delta\ndata: {\"type\":\"response.function_call_arguments.delta\",\"delta\":\"\\\"}\",\"item_id\":\"fc_05200a06f78f5b310069fa8aa37ca8819e9f131e85e47bcff9\",\"obfuscation\":\"2PHhvsR2H0PNaP\",\"output_index\":0,\"sequence_number\":7}\n\nevent: response.function_call_arguments.done\ndata: {\"type\":\"response.function_call_arguments.done\",\"arguments\":\"{\\\"city\\\":\\\"Paris\\\"}\",\"item_id\":\"fc_05200a06f78f5b310069fa8aa37ca8819e9f131e85e47bcff9\",\"output_index\":0,\"sequence_number\":8}\n\nevent: response.output_item.done\ndata: {\"type\":\"response.output_item.done\",\"item\":{\"id\":\"fc_05200a06f78f5b310069fa8aa37ca8819e9f131e85e47bcff9\",\"type\":\"function_call\",\"status\":\"completed\",\"arguments\":\"{\\\"city\\\":\\\"Paris\\\"}\",\"call_id\":\"call_ZAbAwsIFeJSyPqz3HaHRXBSn\",\"name\":\"get_weather\"},\"output_index\":0,\"sequence_number\":9}\n\nevent: response.completed\ndata: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp_05200a06f78f5b310069fa8aa28134819eba958e34eb1db6ae\",\"object\":\"response\",\"created_at\":1778027170,\"status\":\"completed\",\"background\":false,\"completed_at\":1778027171,\"error\":null,\"frequency_penalty\":0.0,\"incomplete_details\":null,\"instructions\":null,\"max_output_tokens\":80,\"max_tool_calls\":null,\"model\":\"gpt-5.5-2026-04-23\",\"moderation\":null,\"output\":[{\"id\":\"fc_05200a06f78f5b310069fa8aa37ca8819e9f131e85e47bcff9\",\"type\":\"function_call\",\"status\":\"completed\",\"arguments\":\"{\\\"city\\\":\\\"Paris\\\"}\",\"call_id\":\"call_ZAbAwsIFeJSyPqz3HaHRXBSn\",\"name\":\"get_weather\"}],\"parallel_tool_calls\":true,\"presence_penalty\":0.0,\"previous_response_id\":null,\"prompt_cache_key\":null,\"prompt_cache_retention\":\"24h\",\"reasoning\":{\"effort\":\"medium\",\"summary\":null},\"safety_identifier\":null,\"service_tier\":\"default\",\"store\":true,\"temperature\":1.0,\"text\":{\"format\":{\"type\":\"text\"},\"verbosity\":\"medium\"},\"tool_choice\":{\"type\":\"function\",\"name\":\"get_weather\"},\"tools\":[{\"type\":\"function\",\"description\":\"Get current weather for a city.\",\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false},\"strict\":true}],\"top_logprobs\":0,\"top_p\":0.98,\"truncation\":\"disabled\",\"usage\":{\"input_tokens\":61,\"input_tokens_details\":{\"cached_tokens\":0},\"output_tokens\":18,\"output_tokens_details\":{\"reasoning_tokens\":0},\"total_tokens\":79},\"user\":null,\"metadata\":{}},\"sequence_number\":10}\n\n" + } + } + ] +} diff --git a/packages/llm/test/generate-object.test.ts b/packages/llm/test/generate-object.test.ts new file mode 100644 index 000000000000..a9e6b5bf7ab5 --- /dev/null +++ b/packages/llm/test/generate-object.test.ts @@ -0,0 +1,182 @@ +import { describe, expect, test } from "bun:test" +import { Effect, Schema } from "effect" +import { LLM } from "../src" +import * as OpenAIChat from "../src/protocols/openai-chat" +import { Tool, toDefinitions } from "../src/tool" +import { it } from "./lib/effect" +import { dynamicResponse } from "./lib/http" +import { finishChunk, toolCallChunk } from "./lib/openai-chunks" +import { sseEvents } from "./lib/sse" + +type OpenAIChatBody = { + readonly tool_choice?: unknown + readonly tools?: ReadonlyArray<{ + readonly function: { + readonly parameters: unknown + } + }> +} + +const model = OpenAIChat.model({ + id: "gpt-4o-mini", + baseURL: "https://api.openai.test/v1/", + headers: { authorization: "Bearer test" }, +}) + +const Json = Schema.fromJsonString(Schema.Unknown) +const decodeJson = Schema.decodeUnknownSync(Json) +const decodeBody = (text: string): OpenAIChatBody => decodeJson(text) as OpenAIChatBody + +describe("Tool.make (dynamic JSON Schema)", () => { + test("forwards JSON Schema and description through toDefinitions", () => { + const jsonSchema = { + type: "object" as const, + properties: { city: { type: "string" } }, + required: ["city"], + } + const lookup = Tool.make({ + description: "Look up something", + jsonSchema, + execute: () => Effect.succeed({ ok: true }), + }) + const [definition] = toDefinitions({ lookup }) + expect(definition?.name).toBe("lookup") + expect(definition?.description).toBe("Look up something") + expect(definition?.inputSchema).toEqual(jsonSchema) + }) + + test("execute receives the raw input untouched", async () => { + const seen: unknown[] = [] + const tool = Tool.make({ + description: "echo", + jsonSchema: { type: "object" }, + execute: (params) => + Effect.sync(() => { + seen.push(params) + return { ok: true } + }), + }) + const result = await Effect.runPromise(tool.execute({ hello: "world" })) + expect(seen).toEqual([{ hello: "world" }]) + expect(result).toEqual({ ok: true }) + }) +}) + +describe("LLM.generateObject", () => { + it.effect("forces a synthetic tool call and decodes the input", () => + Effect.gen(function* () { + const bodies: OpenAIChatBody[] = [] + const layer = dynamicResponse((input) => + Effect.sync(() => { + bodies.push(decodeBody(input.text)) + return input.respond( + sseEvents( + toolCallChunk("call_1", "generate_object", '{"city":"Paris","temp":22}'), + finishChunk("tool_calls"), + ), + { headers: { "content-type": "text/event-stream" } }, + ) + }), + ) + + const response = yield* LLM.generateObject({ + model, + prompt: "Return a structured weather report.", + schema: Schema.Struct({ city: Schema.String, temp: Schema.Number }), + }).pipe(Effect.provide(layer)) + + expect(response.object).toEqual({ city: "Paris", temp: 22 }) + expect(response.response.toolCalls).toHaveLength(1) + expect(bodies).toHaveLength(1) + expect(bodies[0].tool_choice).toEqual({ type: "function", function: { name: "generate_object" } }) + const tool = bodies[0].tools?.[0] + expect(bodies[0].tools).toHaveLength(1) + expect(tool).toMatchObject({ + type: "function", + function: { name: "generate_object" }, + }) + const params = tool?.function.parameters as { + readonly type?: unknown + readonly required?: unknown + readonly properties?: Record + } + expect(params.type).toBe("object") + expect(params.required).toEqual(["city", "temp"]) + expect(params.properties?.city).toMatchObject({ type: "string" }) + expect(params.properties?.temp).toBeDefined() + }), + ) + + it.effect("accepts a raw JSON Schema and returns the input untouched", () => + Effect.gen(function* () { + const bodies: OpenAIChatBody[] = [] + const layer = dynamicResponse((input) => + Effect.sync(() => { + bodies.push(decodeBody(input.text)) + return input.respond( + sseEvents(toolCallChunk("call_1", "generate_object", '{"name":"Ada","age":30}'), finishChunk("tool_calls")), + { headers: { "content-type": "text/event-stream" } }, + ) + }), + ) + + const response = yield* LLM.generateObject({ + model, + prompt: "Extract the user.", + jsonSchema: { + type: "object", + properties: { name: { type: "string" }, age: { type: "number" } }, + required: ["name", "age"], + }, + }).pipe(Effect.provide(layer)) + + expect(response.object).toEqual({ name: "Ada", age: 30 }) + expect(bodies[0].tools?.[0]?.function.parameters).toEqual({ + type: "object", + properties: { name: { type: "string" }, age: { type: "number" } }, + required: ["name", "age"], + }) + }), + ) + + it.effect("fails when the model does not call the synthetic tool", () => + Effect.gen(function* () { + const layer = dynamicResponse((input) => + Effect.sync(() => + input.respond(sseEvents({ id: "x", choices: [{ delta: { content: "no thanks" }, finish_reason: "stop" }] }), { + headers: { "content-type": "text/event-stream" }, + }), + ), + ) + + const exit = yield* LLM.generateObject({ + model, + prompt: "Return a structured value.", + schema: Schema.Struct({ value: Schema.Number }), + }).pipe(Effect.provide(layer), Effect.exit) + + expect(exit._tag).toBe("Failure") + }), + ) + + it.effect("fails with a decode error when the tool input does not match the schema", () => + Effect.gen(function* () { + const layer = dynamicResponse((input) => + Effect.sync(() => + input.respond( + sseEvents(toolCallChunk("call_1", "generate_object", '{"value":"not-a-number"}'), finishChunk("tool_calls")), + { headers: { "content-type": "text/event-stream" } }, + ), + ), + ) + + const exit = yield* LLM.generateObject({ + model, + prompt: "Return a structured value.", + schema: Schema.Struct({ value: Schema.Number }), + }).pipe(Effect.provide(layer), Effect.exit) + + expect(exit._tag).toBe("Failure") + }), + ) +}) diff --git a/packages/llm/test/lib/effect.ts b/packages/llm/test/lib/effect.ts new file mode 100644 index 000000000000..05cf017b2be5 --- /dev/null +++ b/packages/llm/test/lib/effect.ts @@ -0,0 +1,50 @@ +import { test, type TestOptions } from "bun:test" +import { Cause, Effect, Exit, Layer } from "effect" +import type * as Scope from "effect/Scope" +import * as TestClock from "effect/testing/TestClock" +import * as TestConsole from "effect/testing/TestConsole" + +type Body = Effect.Effect | (() => Effect.Effect) + +const body = (value: Body) => Effect.suspend(() => (typeof value === "function" ? value() : value)) + +const run = (value: Body, layer: Layer.Layer) => + Effect.gen(function* () { + const exit = yield* body(value).pipe(Effect.scoped, Effect.provide(layer), Effect.exit) + if (Exit.isFailure(exit)) { + for (const err of Cause.prettyErrors(exit.cause)) { + yield* Effect.logError(err) + } + } + return yield* exit + }).pipe(Effect.runPromise) + +const make = (testLayer: Layer.Layer, liveLayer: Layer.Layer) => { + const effect = (name: string, value: Body, opts?: number | TestOptions) => + test(name, () => run(value, testLayer), opts) + + effect.only = (name: string, value: Body, opts?: number | TestOptions) => + test.only(name, () => run(value, testLayer), opts) + + effect.skip = (name: string, value: Body, opts?: number | TestOptions) => + test.skip(name, () => run(value, testLayer), opts) + + const live = (name: string, value: Body, opts?: number | TestOptions) => + test(name, () => run(value, liveLayer), opts) + + live.only = (name: string, value: Body, opts?: number | TestOptions) => + test.only(name, () => run(value, liveLayer), opts) + + live.skip = (name: string, value: Body, opts?: number | TestOptions) => + test.skip(name, () => run(value, liveLayer), opts) + + return { effect, live } +} + +const testEnv = Layer.mergeAll(TestConsole.layer, TestClock.layer()) +const liveEnv = TestConsole.layer + +export const it = make(testEnv, liveEnv) + +export const testEffect = (layer: Layer.Layer) => + make(Layer.provideMerge(layer, testEnv), Layer.provideMerge(layer, liveEnv)) diff --git a/packages/llm/test/lib/http.ts b/packages/llm/test/lib/http.ts new file mode 100644 index 000000000000..cfe7e6883be1 --- /dev/null +++ b/packages/llm/test/lib/http.ts @@ -0,0 +1,96 @@ +import { Effect, Layer, Ref } from "effect" +import { HttpClient, HttpClientRequest, HttpClientResponse } from "effect/unstable/http" +import { LLMClient, RequestExecutor } from "../../src/route" +import type { Service as LLMClientService } from "../../src/route/client" +import type { Service as RequestExecutorService } from "../../src/route/executor" + +export type HandlerInput = { + readonly request: HttpClientRequest.HttpClientRequest + readonly text: string + readonly respond: ( + body: ConstructorParameters[0], + init?: ResponseInit, + ) => HttpClientResponse.HttpClientResponse +} + +export type Handler = (input: HandlerInput) => Effect.Effect + +const handlerLayer = (handler: Handler): Layer.Layer => + Layer.succeed( + HttpClient.HttpClient, + HttpClient.make((request) => + Effect.gen(function* () { + const web = yield* HttpClientRequest.toWeb(request).pipe(Effect.orDie) + const text = yield* Effect.promise(() => web.text()) + return yield* handler({ + request, + text, + respond: (body, init) => HttpClientResponse.fromWeb(request, new Response(body, init)), + }) + }), + ), + ) + +export type RuntimeEnv = RequestExecutorService | LLMClientService + +export const runtimeLayer = (layer: Layer.Layer): Layer.Layer => { + const requestExecutorLayer = RequestExecutor.layer.pipe(Layer.provide(layer)) + const llmClientLayer = LLMClient.layer.pipe(Layer.provide(requestExecutorLayer)) + return Layer.mergeAll(requestExecutorLayer, llmClientLayer) +} + +const SSE_HEADERS = { "content-type": "text/event-stream" } as const + +/** + * Layer that returns a single fixed response body. Use for stream-parser + * fixture tests where the request shape is irrelevant. The body type widens + * to whatever `Response` accepts so binary fixtures (`Uint8Array`, + * `ReadableStream`, etc.) flow through without casts. + */ +export const fixedResponse = ( + body: ConstructorParameters[0], + init: ResponseInit = { headers: SSE_HEADERS }, +) => runtimeLayer(handlerLayer((input) => Effect.succeed(input.respond(body, init)))) + +/** + * Layer that builds a response per request. Useful for echo servers. + */ +export const dynamicResponse = (handler: Handler) => runtimeLayer(handlerLayer(handler)) + +/** + * Layer that emits the supplied SSE chunks and then aborts mid-stream. Used to + * exercise transport errors that surface during parsing. + */ +export const truncatedStream = (chunks: ReadonlyArray) => + dynamicResponse((input) => + Effect.sync(() => { + const encoder = new TextEncoder() + const stream = new ReadableStream({ + start(controller) { + for (const chunk of chunks) controller.enqueue(encoder.encode(chunk)) + controller.error(new Error("connection reset")) + }, + }) + return input.respond(stream, { headers: SSE_HEADERS }) + }), + ) + +/** + * Layer that returns successive bodies on each request. Useful for scripting + * multi-step model exchanges (e.g. tool-call loops). The last body in the + * array is reused if the test makes more requests than scripted. + */ +export const scriptedResponses = (bodies: ReadonlyArray, init: ResponseInit = { headers: SSE_HEADERS }) => { + if (bodies.length === 0) throw new Error("scriptedResponses requires at least one body") + return Layer.unwrap( + Effect.gen(function* () { + const cursor = yield* Ref.make(0) + return dynamicResponse((input) => + Effect.gen(function* () { + const index = yield* Ref.getAndUpdate(cursor, (n) => n + 1) + return input.respond(bodies[index] ?? bodies[bodies.length - 1], init) + }), + ) + }), + ) +} diff --git a/packages/llm/test/lib/openai-chunks.ts b/packages/llm/test/lib/openai-chunks.ts new file mode 100644 index 000000000000..77a7c919e1a1 --- /dev/null +++ b/packages/llm/test/lib/openai-chunks.ts @@ -0,0 +1,27 @@ +/** + * Shared chunk shapes for OpenAI Chat / OpenAI-compatible Chat fixture tests. + * Multiple test files build the same `{ id, choices: [{ delta, finish_reason }], usage }` + * envelope; consolidating here keeps tool-call event shapes consistent. + */ + +const FIXTURE_ID = "chatcmpl_fixture" + +export const deltaChunk = (delta: object, finishReason: string | null = null) => ({ + id: FIXTURE_ID, + choices: [{ delta, finish_reason: finishReason }], + usage: null, +}) + +export const usageChunk = (usage: object) => ({ + id: FIXTURE_ID, + choices: [], + usage, +}) + +export const finishChunk = (reason: string) => deltaChunk({}, reason) + +export const toolCallChunk = (id: string, name: string, args: string, index = 0) => + deltaChunk({ + role: "assistant", + tool_calls: [{ index, id, function: { name, arguments: args } }], + }) diff --git a/packages/llm/test/lib/sse.ts b/packages/llm/test/lib/sse.ts new file mode 100644 index 000000000000..80b275d296e8 --- /dev/null +++ b/packages/llm/test/lib/sse.ts @@ -0,0 +1,17 @@ +/** + * Helpers for building deterministic SSE bodies in tests. + * + * Inline template-literal SSE strings are hard to write and review when chunks + * contain JSON; this helper accepts plain values and serializes them, so test + * authors only think about the chunk shapes, not the wire format. + */ +export const sseEvents = (...chunks: ReadonlyArray): string => + `${chunks.map(formatChunk).join("")}data: [DONE]\n\n` + +const formatChunk = (chunk: unknown) => `data: ${typeof chunk === "string" ? chunk : JSON.stringify(chunk)}\n\n` + +/** + * Build an SSE body from already-serialized strings (used when the chunk shape + * itself is part of what's being tested, e.g. malformed chunks). + */ +export const sseRaw = (...lines: ReadonlyArray): string => lines.map((line) => `${line}\n\n`).join("") diff --git a/packages/llm/test/lib/tool-runtime.ts b/packages/llm/test/lib/tool-runtime.ts new file mode 100644 index 000000000000..a12941603a1b --- /dev/null +++ b/packages/llm/test/lib/tool-runtime.ts @@ -0,0 +1,9 @@ +import { Stream } from "effect" +import { LLMClient } from "../../src/route" +import type { Tools } from "../../src/tool" +import type { RunOptions } from "../../src/tool-runtime" + +type CompatRunOptions = RunOptions & { readonly maxSteps?: number } + +export const runTools = (options: CompatRunOptions) => + LLMClient.stream({ ...options, stopWhen: options.stopWhen ?? LLMClient.stepCountIs(options.maxSteps ?? 10) }) diff --git a/packages/llm/test/llm.test.ts b/packages/llm/test/llm.test.ts new file mode 100644 index 000000000000..9380e554bf34 --- /dev/null +++ b/packages/llm/test/llm.test.ts @@ -0,0 +1,135 @@ +import { describe, expect, test } from "bun:test" +import { LLM, LLMResponse } from "../src" +import { LLMRequest, Message, ModelRef, ToolChoice, ToolDefinition } from "../src/schema" + +describe("llm constructors", () => { + test("builds canonical schema classes from ergonomic input", () => { + const request = LLM.request({ + id: "req_1", + model: LLM.model({ id: "fake-model", provider: "fake", route: "openai-chat", baseURL: "https://fake.local" }), + system: "You are concise.", + prompt: "Say hello.", + }) + + expect(request).toBeInstanceOf(LLMRequest) + expect(request.model).toBeInstanceOf(ModelRef) + expect(request.messages[0]).toBeInstanceOf(Message) + expect(request.system).toEqual([{ type: "text", text: "You are concise." }]) + expect(request.messages[0]?.content).toEqual([{ type: "text", text: "Say hello." }]) + expect(request.generation).toBeUndefined() + expect(request.tools).toEqual([]) + }) + + test("updates requests without spreading schema class instances", () => { + const base = LLM.request({ + id: "req_1", + model: LLM.model({ id: "fake-model", provider: "fake", route: "openai-chat", baseURL: "https://fake.local" }), + prompt: "Say hello.", + }) + const updated = LLM.updateRequest(base, { + generation: { maxTokens: 20 }, + messages: [...base.messages, LLM.assistant("Hi.")], + }) + + expect(updated).toBeInstanceOf(LLMRequest) + expect(updated.id).toBe("req_1") + expect(updated.model).toEqual(base.model) + expect(updated.generation).toEqual({ maxTokens: 20 }) + expect(updated.messages.map((message) => message.role)).toEqual(["user", "assistant"]) + }) + + test("keeps request options separate from model defaults", () => { + const request = LLM.request({ + model: LLM.model({ + id: "fake-model", + provider: "fake", + route: "openai-chat", + baseURL: "https://fake.local", + generation: { maxTokens: 100, temperature: 1 }, + providerOptions: { openai: { store: false, metadata: { model: true } } }, + http: { body: { metadata: { model: true } }, headers: { "x-shared": "model" }, query: { model: "1" } }, + }), + prompt: "Say hello.", + generation: { temperature: 0 }, + providerOptions: { openai: { store: true, metadata: { request: true } } }, + http: { body: { metadata: { request: true } }, headers: { "x-shared": "request" }, query: { request: "1" } }, + }) + + expect(request.generation).toEqual({ temperature: 0 }) + expect(request.providerOptions).toEqual({ openai: { store: true, metadata: { request: true } } }) + expect(request.http).toEqual({ + body: { metadata: { request: true } }, + headers: { "x-shared": "request" }, + query: { request: "1" }, + }) + }) + + test("updates canonical requests from the request datatype", () => { + const base = LLM.request({ + id: "req_1", + model: LLM.model({ id: "fake-model", provider: "fake", route: "openai-chat", baseURL: "https://fake.local" }), + prompt: "Say hello.", + }) + const updated = LLMRequest.update(base, { messages: [...base.messages, LLM.assistant("Hi.")] }) + + expect(updated).toBeInstanceOf(LLMRequest) + expect(updated.id).toBe("req_1") + expect(LLMRequest.input(updated).id).toBe("req_1") + expect(updated.messages.map((message) => message.role)).toEqual(["user", "assistant"]) + expect(LLMRequest.update(updated, {})).toBe(updated) + }) + + test("updates canonical models from the model datatype", () => { + const base = LLM.model({ id: "fake-model", provider: "fake", route: "openai-chat", baseURL: "https://fake.local" }) + const updated = ModelRef.update(base, { route: "openai-responses" }) + + expect(updated).toBeInstanceOf(ModelRef) + expect(String(updated.id)).toBe("fake-model") + expect(updated.route).toBe("openai-responses") + expect(String(ModelRef.input(updated).provider)).toBe("fake") + expect(ModelRef.update(updated, {})).toBe(updated) + }) + + test("builds tool choices from names and tools", () => { + const tool = LLM.toolDefinition({ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }) + + expect(tool).toBeInstanceOf(ToolDefinition) + expect(LLM.toolChoice("lookup")).toEqual(new ToolChoice({ type: "tool", name: "lookup" })) + expect(LLM.toolChoiceName("required")).toEqual(new ToolChoice({ type: "tool", name: "required" })) + expect(LLM.toolChoice(tool)).toEqual(new ToolChoice({ type: "tool", name: "lookup" })) + }) + + test("builds tool choice modes from reserved strings", () => { + expect(LLM.toolChoice("auto")).toEqual(new ToolChoice({ type: "auto" })) + expect(LLM.toolChoice("none")).toEqual(new ToolChoice({ type: "none" })) + expect(LLM.toolChoice("required")).toEqual(new ToolChoice({ type: "required" })) + expect( + LLM.request({ + model: LLM.model({ id: "fake-model", provider: "fake", route: "openai-chat", baseURL: "https://fake.local" }), + prompt: "Use tools if needed.", + toolChoice: "required", + }).toolChoice, + ).toEqual(new ToolChoice({ type: "required" })) + }) + + test("builds assistant tool calls and tool result messages", () => { + const call = LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } }) + const result = LLM.toolResult({ id: "call_1", name: "lookup", result: { temperature: 72 } }) + + expect(LLM.assistant([call]).content).toEqual([call]) + expect(LLM.toolMessage(result).content).toEqual([ + { type: "tool-result", id: "call_1", name: "lookup", result: { type: "json", value: { temperature: 72 } } }, + ]) + }) + + test("extracts output text from response events", () => { + expect( + LLMResponse.text({ + events: [ + { type: "text-delta", text: "hi" }, + { type: "request-finish", reason: "stop" }, + ], + }), + ).toBe("hi") + }) +}) diff --git a/packages/llm/test/provider.types.ts b/packages/llm/test/provider.types.ts new file mode 100644 index 000000000000..a04ce8bc609d --- /dev/null +++ b/packages/llm/test/provider.types.ts @@ -0,0 +1,39 @@ +import { Provider } from "../src/provider" +import { ProviderID, type ModelRef } from "../src/schema" + +declare const model: (id: string) => ModelRef +declare const requiredModel: (id: string, options: { readonly baseURL: string }) => ModelRef +declare const chat: (id: string, options: { readonly apiKey: string }) => ModelRef + +Provider.make({ + id: ProviderID.make("example"), + model, +}) + +Provider.make({ + id: ProviderID.make("bad"), + model, + // @ts-expect-error provider definitions should not grow accidental top-level fields. + routes: [], +}) + +const requiredProvider = Provider.make({ + id: ProviderID.make("required"), + model: requiredModel, +}) + +requiredProvider.model("custom", { baseURL: "https://example.com/v1" }) + +// @ts-expect-error Provider.make preserves required model options. +requiredProvider.model("custom") + +const multiApiProvider = Provider.make({ + id: ProviderID.make("multi-api"), + model, + apis: { chat }, +}) + +multiApiProvider.apis.chat("chat-model", { apiKey: "key" }) + +// @ts-expect-error Provider.make preserves API-specific option types. +multiApiProvider.apis.chat("chat-model") diff --git a/packages/llm/test/provider/anthropic-messages.recorded.test.ts b/packages/llm/test/provider/anthropic-messages.recorded.test.ts new file mode 100644 index 000000000000..a8d87c46ffa5 --- /dev/null +++ b/packages/llm/test/provider/anthropic-messages.recorded.test.ts @@ -0,0 +1,46 @@ +import { describe, expect } from "bun:test" +import { Effect } from "effect" +import { LLM, LLMError } from "../../src" +import { LLMClient } from "../../src/route" +import * as AnthropicMessages from "../../src/protocols/anthropic-messages" +import { weatherToolName } from "../recorded-scenarios" +import { recordedTests } from "../recorded-test" + +const model = AnthropicMessages.model({ + id: "claude-haiku-4-5-20251001", + apiKey: process.env.ANTHROPIC_API_KEY ?? "fixture", +}) + +const malformedToolOrderRequest = LLM.request({ + id: "recorded_anthropic_malformed_tool_order", + model, + messages: [ + LLM.assistant([ + LLM.toolCall({ id: "call_1", name: weatherToolName, input: { city: "Paris" } }), + { type: "text", text: "I will check the weather." }, + ]), + LLM.toolMessage({ id: "call_1", name: weatherToolName, result: { temperature: "72F" } }), + LLM.user("Use that result to answer briefly."), + ], + tools: [{ name: weatherToolName, description: "Get weather", inputSchema: { type: "object", properties: {} } }], +}) + +const recorded = recordedTests({ + prefix: "anthropic-messages", + provider: "anthropic", + protocol: "anthropic-messages", + requires: ["ANTHROPIC_API_KEY"], + options: { requestHeaders: ["content-type", "anthropic-version"] }, +}) + +describe("Anthropic Messages sad-path recorded", () => { + recorded.effect.with("rejects malformed assistant tool order", { tags: ["tool", "sad-path"] }, () => + Effect.gen(function* () { + const error = yield* LLMClient.generate(malformedToolOrderRequest).pipe(Effect.flip) + + expect(error).toBeInstanceOf(LLMError) + expect(error.reason).toMatchObject({ _tag: "InvalidRequest" }) + expect(error.message).toContain("HTTP 400") + }), + ) +}) diff --git a/packages/llm/test/provider/anthropic-messages.test.ts b/packages/llm/test/provider/anthropic-messages.test.ts new file mode 100644 index 000000000000..263828a0ade3 --- /dev/null +++ b/packages/llm/test/provider/anthropic-messages.test.ts @@ -0,0 +1,377 @@ +import { describe, expect } from "bun:test" +import { Effect } from "effect" +import { CacheHint, LLM, LLMError } from "../../src" +import { LLMClient } from "../../src/route" +import * as AnthropicMessages from "../../src/protocols/anthropic-messages" +import { it } from "../lib/effect" +import { fixedResponse } from "../lib/http" +import { sseEvents } from "../lib/sse" + +const model = AnthropicMessages.model({ + id: "claude-sonnet-4-5", + baseURL: "https://api.anthropic.test/v1/", + headers: { "x-api-key": "test" }, +}) + +const request = LLM.request({ + id: "req_1", + model, + system: { type: "text", text: "You are concise.", cache: new CacheHint({ type: "ephemeral" }) }, + prompt: "Say hello.", + generation: { maxTokens: 20, temperature: 0 }, +}) + +describe("Anthropic Messages route", () => { + it.effect("prepares Anthropic Messages target", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare(request) + + expect(prepared.body).toEqual({ + model: "claude-sonnet-4-5", + system: [{ type: "text", text: "You are concise.", cache_control: { type: "ephemeral" } }], + messages: [{ role: "user", content: [{ type: "text", text: "Say hello." }] }], + stream: true, + max_tokens: 20, + temperature: 0, + }) + }), + ) + + it.effect("prepares tool call and tool result messages", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.request({ + id: "req_tool_result", + model, + messages: [ + LLM.user("What is the weather?"), + LLM.assistant([LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } })]), + LLM.toolMessage({ id: "call_1", name: "lookup", result: { forecast: "sunny" } }), + ], + }), + ) + + expect(prepared.body).toEqual({ + model: "claude-sonnet-4-5", + messages: [ + { role: "user", content: [{ type: "text", text: "What is the weather?" }] }, + { + role: "assistant", + content: [{ type: "tool_use", id: "call_1", name: "lookup", input: { query: "weather" } }], + }, + { role: "user", content: [{ type: "tool_result", tool_use_id: "call_1", content: '{"forecast":"sunny"}' }] }, + ], + stream: true, + max_tokens: 4096, + }) + }), + ) + + it.effect("lowers preserved Anthropic reasoning signature metadata", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.request({ + model, + messages: [ + LLM.assistant([ + { type: "reasoning", text: "thinking", providerMetadata: { anthropic: { signature: "sig_1" } } }, + ]), + ], + }), + ) + + expect(prepared.body).toMatchObject({ + messages: [{ role: "assistant", content: [{ type: "thinking", thinking: "thinking", signature: "sig_1" }] }], + }) + }), + ) + + it.effect("parses text, reasoning, and usage stream fixtures", () => + Effect.gen(function* () { + const body = sseEvents( + { type: "message_start", message: { usage: { input_tokens: 5, cache_read_input_tokens: 1 } } }, + { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } }, + { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Hello" } }, + { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "!" } }, + { type: "content_block_stop", index: 0 }, + { type: "content_block_start", index: 1, content_block: { type: "thinking", thinking: "" } }, + { type: "content_block_delta", index: 1, delta: { type: "thinking_delta", thinking: "thinking" } }, + { type: "content_block_delta", index: 1, delta: { type: "signature_delta", signature: "sig_1" } }, + { type: "content_block_stop", index: 1 }, + { + type: "message_delta", + delta: { stop_reason: "end_turn", stop_sequence: "\n\nHuman:" }, + usage: { output_tokens: 2 }, + }, + { type: "message_stop" }, + ) + const response = yield* LLMClient.generate(request).pipe(Effect.provide(fixedResponse(body))) + + expect(response.text).toBe("Hello!") + expect(response.reasoning).toBe("thinking") + expect(response.usage).toMatchObject({ + inputTokens: 5, + outputTokens: 2, + cacheReadInputTokens: 1, + totalTokens: 7, + }) + expect(response.events.find((event) => event.type === "reasoning-delta" && event.text === "")).toMatchObject({ + providerMetadata: { anthropic: { signature: "sig_1" } }, + }) + expect(response.events.at(-1)).toMatchObject({ + type: "request-finish", + reason: "stop", + providerMetadata: { anthropic: { stopSequence: "\n\nHuman:" } }, + }) + }), + ) + + it.effect("assembles streamed tool call input", () => + Effect.gen(function* () { + const body = sseEvents( + { type: "message_start", message: { usage: { input_tokens: 5 } } }, + { type: "content_block_start", index: 0, content_block: { type: "tool_use", id: "call_1", name: "lookup" } }, + { type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: '{"query"' } }, + { type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: ':"weather"}' } }, + { type: "content_block_stop", index: 0 }, + { type: "message_delta", delta: { stop_reason: "tool_use" }, usage: { output_tokens: 1 } }, + ) + const response = yield* LLMClient.generate( + LLM.updateRequest(request, { + tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }], + }), + ).pipe(Effect.provide(fixedResponse(body))) + + expect(response.toolCalls).toEqual([ + { type: "tool-call", id: "call_1", name: "lookup", input: { query: "weather" } }, + ]) + expect(response.events).toEqual([ + { type: "tool-input-delta", id: "call_1", name: "lookup", text: '{"query"' }, + { type: "tool-input-delta", id: "call_1", name: "lookup", text: ':"weather"}' }, + { type: "tool-call", id: "call_1", name: "lookup", input: { query: "weather" } }, + { + type: "request-finish", + reason: "tool-calls", + usage: { inputTokens: 5, outputTokens: 1, totalTokens: 6, native: { input_tokens: 5, output_tokens: 1 } }, + }, + ]) + }), + ) + + it.effect("emits provider-error events for mid-stream provider errors", () => + Effect.gen(function* () { + const response = yield* LLMClient.generate(request).pipe( + Effect.provide( + fixedResponse(sseEvents({ type: "error", error: { type: "overloaded_error", message: "Overloaded" } })), + ), + ) + + expect(response.events).toEqual([{ type: "provider-error", message: "Overloaded" }]) + }), + ) + + it.effect("fails HTTP provider errors before stream parsing", () => + Effect.gen(function* () { + const error = yield* LLMClient.generate(request).pipe( + Effect.provide( + fixedResponse('{"type":"error","error":{"type":"invalid_request_error","message":"Bad request"}}', { + status: 400, + headers: { "content-type": "application/json" }, + }), + ), + Effect.flip, + ) + + expect(error).toBeInstanceOf(LLMError) + expect(error.reason).toMatchObject({ _tag: "InvalidRequest" }) + expect(error.message).toContain("HTTP 400") + }), + ) + + it.effect("decodes server_tool_use + web_search_tool_result as provider-executed events", () => + Effect.gen(function* () { + const body = sseEvents( + { type: "message_start", message: { usage: { input_tokens: 5 } } }, + { + type: "content_block_start", + index: 0, + content_block: { type: "server_tool_use", id: "srvtoolu_abc", name: "web_search" }, + }, + { + type: "content_block_delta", + index: 0, + delta: { type: "input_json_delta", partial_json: '{"query":"effect 4"}' }, + }, + { type: "content_block_stop", index: 0 }, + { + type: "content_block_start", + index: 1, + content_block: { + type: "web_search_tool_result", + tool_use_id: "srvtoolu_abc", + content: [{ type: "web_search_result", url: "https://example.com", title: "Example" }], + }, + }, + { type: "content_block_stop", index: 1 }, + { type: "content_block_start", index: 2, content_block: { type: "text", text: "" } }, + { type: "content_block_delta", index: 2, delta: { type: "text_delta", text: "Found it." } }, + { type: "content_block_stop", index: 2 }, + { type: "message_delta", delta: { stop_reason: "end_turn" }, usage: { output_tokens: 8 } }, + ) + const response = yield* LLMClient.generate( + LLM.updateRequest(request, { + tools: [{ name: "web_search", description: "Web search", inputSchema: { type: "object" } }], + }), + ).pipe(Effect.provide(fixedResponse(body))) + + const toolCall = response.events.find((event) => event.type === "tool-call") + expect(toolCall).toEqual({ + type: "tool-call", + id: "srvtoolu_abc", + name: "web_search", + input: { query: "effect 4" }, + providerExecuted: true, + }) + const toolResult = response.events.find((event) => event.type === "tool-result") + expect(toolResult).toEqual({ + type: "tool-result", + id: "srvtoolu_abc", + name: "web_search", + result: { type: "json", value: [{ type: "web_search_result", url: "https://example.com", title: "Example" }] }, + providerExecuted: true, + providerMetadata: { anthropic: { blockType: "web_search_tool_result" } }, + }) + expect(response.text).toBe("Found it.") + expect(response.events.at(-1)).toMatchObject({ type: "request-finish", reason: "stop" }) + }), + ) + + it.effect("decodes web_search_tool_result_error as provider-executed error result", () => + Effect.gen(function* () { + const body = sseEvents( + { type: "message_start", message: { usage: { input_tokens: 5 } } }, + { + type: "content_block_start", + index: 0, + content_block: { type: "server_tool_use", id: "srvtoolu_x", name: "web_search" }, + }, + { type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: '{"query":"q"}' } }, + { type: "content_block_stop", index: 0 }, + { + type: "content_block_start", + index: 1, + content_block: { + type: "web_search_tool_result", + tool_use_id: "srvtoolu_x", + content: { type: "web_search_tool_result_error", error_code: "max_uses_exceeded" }, + }, + }, + { type: "content_block_stop", index: 1 }, + { type: "message_delta", delta: { stop_reason: "end_turn" }, usage: { output_tokens: 1 } }, + ) + const response = yield* LLMClient.generate( + LLM.updateRequest(request, { + tools: [{ name: "web_search", description: "Web search", inputSchema: { type: "object" } }], + }), + ).pipe(Effect.provide(fixedResponse(body))) + + const toolResult = response.events.find((event) => event.type === "tool-result") + expect(toolResult).toMatchObject({ + type: "tool-result", + id: "srvtoolu_x", + name: "web_search", + result: { type: "error" }, + providerExecuted: true, + }) + }), + ) + + it.effect("round-trips provider-executed assistant content into server tool blocks", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.request({ + id: "req_round_trip", + model, + messages: [ + LLM.user("Search for something."), + LLM.assistant([ + { + type: "tool-call", + id: "srvtoolu_abc", + name: "web_search", + input: { query: "effect 4" }, + providerExecuted: true, + }, + { + type: "tool-result", + id: "srvtoolu_abc", + name: "web_search", + result: { type: "json", value: [{ url: "https://example.com" }] }, + providerExecuted: true, + }, + { type: "text", text: "Found it." }, + ]), + LLM.user("Thanks."), + ], + }), + ) + + expect(prepared.body).toMatchObject({ + messages: [ + { role: "user", content: [{ type: "text", text: "Search for something." }] }, + { + role: "assistant", + content: [ + { type: "server_tool_use", id: "srvtoolu_abc", name: "web_search", input: { query: "effect 4" } }, + { + type: "web_search_tool_result", + tool_use_id: "srvtoolu_abc", + content: [{ url: "https://example.com" }], + }, + { type: "text", text: "Found it." }, + ], + }, + { role: "user", content: [{ type: "text", text: "Thanks." }] }, + ], + }) + }), + ) + + it.effect("rejects round-trip for unknown server tool names", () => + Effect.gen(function* () { + const error = yield* LLMClient.prepare( + LLM.request({ + id: "req_unknown_server_tool", + model, + messages: [ + LLM.assistant([ + { + type: "tool-result", + id: "srvtoolu_abc", + name: "future_server_tool", + result: { type: "json", value: {} }, + providerExecuted: true, + }, + ]), + ], + }), + ).pipe(Effect.flip) + + expect(error.message).toContain("future_server_tool") + }), + ) + + it.effect("rejects unsupported user media content", () => + Effect.gen(function* () { + const error = yield* LLMClient.prepare( + LLM.request({ + id: "req_media", + model, + messages: [LLM.user({ type: "media", mediaType: "image/png", data: "AAECAw==" })], + }), + ).pipe(Effect.flip) + + expect(error.message).toContain("Anthropic Messages user messages only support text content for now") + }), + ) +}) diff --git a/packages/llm/test/provider/bedrock-converse.test.ts b/packages/llm/test/provider/bedrock-converse.test.ts new file mode 100644 index 000000000000..28be714bdf3b --- /dev/null +++ b/packages/llm/test/provider/bedrock-converse.test.ts @@ -0,0 +1,533 @@ +import { EventStreamCodec } from "@smithy/eventstream-codec" +import { fromUtf8, toUtf8 } from "@smithy/util-utf8" +import { describe, expect } from "bun:test" +import { Effect } from "effect" +import { CacheHint, LLM } from "../../src" +import { LLMClient } from "../../src/route" +import * as BedrockConverse from "../../src/protocols/bedrock-converse" +import { it } from "../lib/effect" +import { fixedResponse } from "../lib/http" +import { + eventSummary, + expectWeatherToolLoop, + runWeatherToolLoop, + weatherTool, + weatherToolLoopRequest, + weatherToolName, +} from "../recorded-scenarios" +import { recordedTests } from "../recorded-test" + +const codec = new EventStreamCodec(toUtf8, fromUtf8) +const utf8Encoder = new TextEncoder() + +// Build a single AWS event-stream frame for a Converse stream event. Each +// frame carries `:message-type=event` + `:event-type=` headers and a +// JSON payload body. +const eventFrame = (type: string, payload: object) => + codec.encode({ + headers: { + ":message-type": { type: "string", value: "event" }, + ":event-type": { type: "string", value: type }, + ":content-type": { type: "string", value: "application/json" }, + }, + body: utf8Encoder.encode(JSON.stringify(payload)), + }) + +const concat = (frames: ReadonlyArray) => { + const total = frames.reduce((sum, frame) => sum + frame.length, 0) + const out = new Uint8Array(total) + let offset = 0 + for (const frame of frames) { + out.set(frame, offset) + offset += frame.length + } + return out +} + +const eventStreamBody = (...payloads: ReadonlyArray) => + concat(payloads.map(([type, payload]) => eventFrame(type, payload))) + +// Override the default SSE content-type with the binary event-stream type so +// the cassette layer treats the body as bytes when recording. +const fixedBytes = (bytes: Uint8Array) => + fixedResponse(bytes.slice().buffer, { headers: { "content-type": "application/vnd.amazon.eventstream" } }) + +const model = BedrockConverse.model({ + id: "anthropic.claude-3-5-sonnet-20240620-v1:0", + baseURL: "https://bedrock-runtime.test", + apiKey: "test-bearer", +}) + +const baseRequest = LLM.request({ + id: "req_1", + model, + system: "You are concise.", + prompt: "Say hello.", + generation: { maxTokens: 64, temperature: 0 }, +}) + +describe("Bedrock Converse route", () => { + it.effect("prepares Converse target with system, inference config, and messages", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare(baseRequest) + + expect(prepared.body).toEqual({ + modelId: "anthropic.claude-3-5-sonnet-20240620-v1:0", + system: [{ text: "You are concise." }], + messages: [{ role: "user", content: [{ text: "Say hello." }] }], + inferenceConfig: { maxTokens: 64, temperature: 0 }, + }) + }), + ) + + it.effect("prepares tool config with toolSpec and toolChoice", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.updateRequest(baseRequest, { + tools: [ + { + name: "lookup", + description: "Lookup data", + inputSchema: { type: "object", properties: { query: { type: "string" } }, required: ["query"] }, + }, + ], + toolChoice: LLM.toolChoice({ type: "required" }), + }), + ) + + expect(prepared.body).toMatchObject({ + toolConfig: { + tools: [ + { + toolSpec: { + name: "lookup", + description: "Lookup data", + inputSchema: { + json: { type: "object", properties: { query: { type: "string" } }, required: ["query"] }, + }, + }, + }, + ], + toolChoice: { any: {} }, + }, + }) + }), + ) + + it.effect("lowers assistant tool-call + tool-result message history", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.request({ + id: "req_history", + model, + messages: [ + LLM.user("What is the weather?"), + LLM.assistant([LLM.toolCall({ id: "tool_1", name: "lookup", input: { query: "weather" } })]), + LLM.toolMessage({ id: "tool_1", name: "lookup", result: { forecast: "sunny" } }), + ], + }), + ) + + expect(prepared.body).toMatchObject({ + messages: [ + { role: "user", content: [{ text: "What is the weather?" }] }, + { + role: "assistant", + content: [{ toolUse: { toolUseId: "tool_1", name: "lookup", input: { query: "weather" } } }], + }, + { + role: "user", + content: [ + { + toolResult: { + toolUseId: "tool_1", + content: [{ json: { forecast: "sunny" } }], + status: "success", + }, + }, + ], + }, + ], + }) + }), + ) + + it.effect("decodes text-delta + messageStop + metadata usage from binary event stream", () => + Effect.gen(function* () { + const body = eventStreamBody( + ["messageStart", { role: "assistant" }], + ["contentBlockDelta", { contentBlockIndex: 0, delta: { text: "Hello" } }], + ["contentBlockDelta", { contentBlockIndex: 0, delta: { text: "!" } }], + ["contentBlockStop", { contentBlockIndex: 0 }], + ["messageStop", { stopReason: "end_turn" }], + ["metadata", { usage: { inputTokens: 5, outputTokens: 2, totalTokens: 7 } }], + ) + const response = yield* LLMClient.generate(baseRequest).pipe(Effect.provide(fixedBytes(body))) + + expect(response.text).toBe("Hello!") + const finishes = response.events.filter((event) => event.type === "request-finish") + // Bedrock splits the finish across `messageStop` (carries reason) and + // `metadata` (carries usage). We consolidate them into a single + // terminal `request-finish` event with both. + expect(finishes).toHaveLength(1) + expect(finishes[0]).toMatchObject({ type: "request-finish", reason: "stop" }) + expect(response.usage).toMatchObject({ + inputTokens: 5, + outputTokens: 2, + totalTokens: 7, + }) + }), + ) + + it.effect("assembles streamed tool call input", () => + Effect.gen(function* () { + const body = eventStreamBody( + ["messageStart", { role: "assistant" }], + [ + "contentBlockStart", + { + contentBlockIndex: 0, + start: { toolUse: { toolUseId: "tool_1", name: "lookup" } }, + }, + ], + ["contentBlockDelta", { contentBlockIndex: 0, delta: { toolUse: { input: '{"query"' } } }], + ["contentBlockDelta", { contentBlockIndex: 0, delta: { toolUse: { input: ':"weather"}' } } }], + ["contentBlockStop", { contentBlockIndex: 0 }], + ["messageStop", { stopReason: "tool_use" }], + ) + const response = yield* LLMClient.generate( + LLM.updateRequest(baseRequest, { + tools: [{ name: "lookup", description: "Lookup", inputSchema: { type: "object" } }], + }), + ).pipe(Effect.provide(fixedBytes(body))) + + expect(response.toolCalls).toEqual([ + { type: "tool-call", id: "tool_1", name: "lookup", input: { query: "weather" } }, + ]) + const events = response.events.filter((event) => event.type === "tool-input-delta") + expect(events).toEqual([ + { type: "tool-input-delta", id: "tool_1", name: "lookup", text: '{"query"' }, + { type: "tool-input-delta", id: "tool_1", name: "lookup", text: ':"weather"}' }, + ]) + expect(response.events.at(-1)).toMatchObject({ type: "request-finish", reason: "tool-calls" }) + }), + ) + + it.effect("decodes reasoning deltas", () => + Effect.gen(function* () { + const body = eventStreamBody( + ["messageStart", { role: "assistant" }], + ["contentBlockDelta", { contentBlockIndex: 0, delta: { reasoningContent: { text: "Let me think." } } }], + ["contentBlockStop", { contentBlockIndex: 0 }], + ["messageStop", { stopReason: "end_turn" }], + ) + const response = yield* LLMClient.generate(baseRequest).pipe(Effect.provide(fixedBytes(body))) + + expect(response.reasoning).toBe("Let me think.") + }), + ) + + it.effect("emits provider-error for throttlingException", () => + Effect.gen(function* () { + const body = eventStreamBody( + ["messageStart", { role: "assistant" }], + ["throttlingException", { message: "Slow down" }], + ) + const response = yield* LLMClient.generate(baseRequest).pipe(Effect.provide(fixedBytes(body))) + + expect(response.events.find((event) => event.type === "provider-error")).toEqual({ + type: "provider-error", + message: "Slow down", + retryable: true, + }) + }), + ) + + it.effect("rejects requests with no auth path", () => + Effect.gen(function* () { + const unsignedModel = BedrockConverse.model({ + id: "anthropic.claude-3-5-sonnet-20240620-v1:0", + baseURL: "https://bedrock-runtime.test", + }) + const error = yield* LLMClient.generate(LLM.updateRequest(baseRequest, { model: unsignedModel })).pipe( + Effect.provide(fixedBytes(eventStreamBody(["messageStop", { stopReason: "end_turn" }]))), + Effect.flip, + ) + + expect(error.message).toContain("Bedrock Converse requires either model.apiKey") + }), + ) + + it.effect("signs requests with SigV4 when AWS credentials are provided (deterministic plumbing check)", () => + Effect.gen(function* () { + const signed = BedrockConverse.model({ + id: "anthropic.claude-3-5-sonnet-20240620-v1:0", + baseURL: "https://bedrock-runtime.test", + credentials: { + region: "us-east-1", + accessKeyId: "AKIAIOSFODNN7EXAMPLE", + secretAccessKey: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", + }, + }) + const prepared = yield* LLMClient.prepare(LLM.updateRequest(baseRequest, { model: signed })) + + expect(prepared.route).toBe("bedrock-converse") + // The prepare phase doesn't sign — toHttp does. We assert the credential + // is plumbed onto the model native field for the signer to find. + expect(prepared.model.native).toMatchObject({ + aws_credentials: { region: "us-east-1", accessKeyId: "AKIAIOSFODNN7EXAMPLE" }, + aws_region: "us-east-1", + }) + }), + ) + + it.effect("emits cachePoint markers after system, user-text, and assistant-text with cache hints", () => + Effect.gen(function* () { + const cache = new CacheHint({ type: "ephemeral" }) + const prepared = yield* LLMClient.prepare( + LLM.request({ + id: "req_cache", + model, + system: [{ type: "text", text: "System prefix.", cache }], + messages: [ + LLM.user([{ type: "text", text: "User prefix.", cache }]), + LLM.assistant([{ type: "text", text: "Assistant prefix.", cache }]), + ], + generation: { maxTokens: 16, temperature: 0 }, + }), + ) + + expect(prepared.body).toMatchObject({ + // System: text block followed by cachePoint marker. + system: [{ text: "System prefix." }, { cachePoint: { type: "default" } }], + messages: [ + { + role: "user", + content: [{ text: "User prefix." }, { cachePoint: { type: "default" } }], + }, + { + role: "assistant", + content: [{ text: "Assistant prefix." }, { cachePoint: { type: "default" } }], + }, + ], + }) + }), + ) + + it.effect("does not emit cachePoint when no cache hint is set", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare(baseRequest) + expect(prepared.body).toMatchObject({ + system: [{ text: "You are concise." }], + messages: [{ role: "user", content: [{ text: "Say hello." }] }], + }) + }), + ) + + it.effect("lowers image media into Bedrock image blocks", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.request({ + id: "req_image", + model, + messages: [ + LLM.user([ + { type: "text", text: "What is in this image?" }, + { type: "media", mediaType: "image/png", data: "AAAA" }, + { type: "media", mediaType: "image/jpeg", data: "BBBB" }, + { type: "media", mediaType: "image/jpg", data: "CCCC" }, + { type: "media", mediaType: "image/webp", data: "DDDD" }, + ]), + ], + }), + ) + + expect(prepared.body).toMatchObject({ + messages: [ + { + role: "user", + content: [ + { text: "What is in this image?" }, + { image: { format: "png", source: { bytes: "AAAA" } } }, + { image: { format: "jpeg", source: { bytes: "BBBB" } } }, + // image/jpg is a non-standard alias; we map it to jpeg. + { image: { format: "jpeg", source: { bytes: "CCCC" } } }, + { image: { format: "webp", source: { bytes: "DDDD" } } }, + ], + }, + ], + }) + }), + ) + + it.effect("base64-encodes Uint8Array image bytes", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.request({ + id: "req_image_bytes", + model, + messages: [LLM.user([{ type: "media", mediaType: "image/png", data: new Uint8Array([1, 2, 3, 4, 5]) }])], + }), + ) + + // Buffer.from([1,2,3,4,5]).toString("base64") === "AQIDBAU=" + expect(prepared.body).toMatchObject({ + messages: [ + { + role: "user", + content: [{ image: { format: "png", source: { bytes: "AQIDBAU=" } } }], + }, + ], + }) + }), + ) + + it.effect("lowers document media into Bedrock document blocks with format and name", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.request({ + id: "req_doc", + model, + messages: [ + LLM.user([ + { type: "media", mediaType: "application/pdf", data: "PDFDATA", filename: "report.pdf" }, + { type: "media", mediaType: "text/csv", data: "CSVDATA" }, + ]), + ], + }), + ) + + expect(prepared.body).toMatchObject({ + messages: [ + { + role: "user", + content: [ + // Filename round-trips when supplied. + { document: { format: "pdf", name: "report.pdf", source: { bytes: "PDFDATA" } } }, + // Falls back to a stable placeholder when filename is missing. + { document: { format: "csv", name: "document.csv", source: { bytes: "CSVDATA" } } }, + ], + }, + ], + }) + }), + ) + + it.effect("rejects unsupported image media types", () => + Effect.gen(function* () { + const error = yield* LLMClient.prepare( + LLM.request({ + id: "req_bad_image", + model, + messages: [LLM.user([{ type: "media", mediaType: "image/svg+xml", data: "x" }])], + }), + ).pipe(Effect.flip) + + expect(error.message).toContain("Bedrock Converse does not support image media type image/svg+xml") + }), + ) + + it.effect("rejects unsupported document media types", () => + Effect.gen(function* () { + const error = yield* LLMClient.prepare( + LLM.request({ + id: "req_bad_doc", + model, + messages: [LLM.user([{ type: "media", mediaType: "application/x-tar", data: "x", filename: "a.tar" }])], + }), + ).pipe(Effect.flip) + + expect(error.message).toContain("Bedrock Converse does not support media type application/x-tar") + }), + ) +}) + +// Live recorded integration tests. Run with `RECORD=true AWS_ACCESS_KEY_ID=... +// AWS_SECRET_ACCESS_KEY=... [AWS_SESSION_TOKEN=...] bun run test ...` to refresh +// cassettes; replay is the default and works without credentials. +// +// Region is pinned to us-east-1 in tests so the request URL is stable across +// machines on replay. If you need to record from a different region (e.g. your +// account has access elsewhere), pass `BEDROCK_RECORDING_REGION=eu-west-1` — +// but then commit the resulting cassette and others should record from the +// same region too. +const RECORDING_REGION = process.env.BEDROCK_RECORDING_REGION ?? "us-east-1" + +const recordedModel = () => + BedrockConverse.model({ + // Most newer Anthropic models on Bedrock require a cross-region inference + // profile (`us.` prefix). Nova does not require an Anthropic use-case form + // and is on-demand-throughput accessible by default for most accounts. + id: process.env.BEDROCK_MODEL_ID ?? "us.amazon.nova-micro-v1:0", + credentials: { + region: RECORDING_REGION, + accessKeyId: process.env.AWS_ACCESS_KEY_ID ?? "fixture", + secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY ?? "fixture", + sessionToken: process.env.AWS_SESSION_TOKEN, + }, + }) + +const recorded = recordedTests({ + prefix: "bedrock-converse", + provider: "amazon-bedrock", + protocol: "bedrock-converse", + requires: ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"], +}) + +describe("Bedrock Converse recorded", () => { + recorded.effect("streams text", () => + Effect.gen(function* () { + const llm = yield* LLMClient.Service + const response = yield* llm.generate( + LLM.request({ + id: "recorded_bedrock_text", + model: recordedModel(), + system: "Reply with the single word 'Hello'.", + prompt: "Say hello.", + generation: { maxTokens: 16, temperature: 0 }, + }), + ) + + expect(eventSummary(response.events)).toEqual([ + { type: "text", value: "Hello" }, + { type: "finish", reason: "stop", usage: { inputTokens: 12, outputTokens: 2, totalTokens: 14 } }, + ]) + }), + ) + + recorded.effect.with("streams a tool call", { tags: ["tool"] }, () => + Effect.gen(function* () { + const llm = yield* LLMClient.Service + const response = yield* llm.generate( + LLM.request({ + id: "recorded_bedrock_tool_call", + model: recordedModel(), + system: "Call tools exactly as requested.", + prompt: "Call get_weather with city exactly Paris.", + tools: [weatherTool], + toolChoice: LLM.toolChoice(weatherTool), + generation: { maxTokens: 80, temperature: 0 }, + }), + ) + + expect(eventSummary(response.events)).toEqual([ + { type: "tool-call", name: weatherToolName, input: { city: "Paris" } }, + { type: "finish", reason: "tool-calls", usage: { inputTokens: 419, outputTokens: 16, totalTokens: 435 } }, + ]) + }), + ) + + recorded.effect.with("drives a tool loop", { tags: ["tool", "tool-loop", "golden"] }, () => + Effect.gen(function* () { + const llm = yield* LLMClient.Service + expectWeatherToolLoop( + yield* runWeatherToolLoop( + weatherToolLoopRequest({ + id: "recorded_bedrock_tool_loop", + model: recordedModel(), + }), + ), + ) + }), + ) +}) diff --git a/packages/llm/test/provider/cloudflare.test.ts b/packages/llm/test/provider/cloudflare.test.ts new file mode 100644 index 000000000000..00b69fa18c4b --- /dev/null +++ b/packages/llm/test/provider/cloudflare.test.ts @@ -0,0 +1,232 @@ +import { describe, expect } from "bun:test" +import { ConfigProvider, Effect, Schema } from "effect" +import { HttpClientRequest } from "effect/unstable/http" +import { LLM } from "../../src" +import * as Cloudflare from "../../src/providers/cloudflare" +import { LLMClient } from "../../src/route" +import { it } from "../lib/effect" +import { dynamicResponse } from "../lib/http" +import { sseEvents } from "../lib/sse" + +const Json = Schema.fromJsonString(Schema.Unknown) +const decodeJson = Schema.decodeUnknownSync(Json) +const withEnv = (env: Record) => Effect.provide(ConfigProvider.layer(ConfigProvider.fromEnv({ env }))) + +const deltaChunk = (delta: object, finishReason: string | null = null) => ({ + id: "chatcmpl_fixture", + choices: [{ delta, finish_reason: finishReason }], + usage: null, +}) + +describe("Cloudflare", () => { + it.effect("prepares AI Gateway models through the OpenAI-compatible Chat protocol", () => + Effect.gen(function* () { + const model = Cloudflare.aiGateway("workers-ai/@cf/meta/llama-3.3-70b-instruct", { + accountId: "test-account", + gatewayId: "test-gateway", + apiKey: "test-token", + }) + + expect(model).toMatchObject({ + id: "workers-ai/@cf/meta/llama-3.3-70b-instruct", + provider: "cloudflare-ai-gateway", + route: "cloudflare-ai-gateway", + baseURL: "https://gateway.ai.cloudflare.com/v1/test-account/test-gateway/compat", + }) + + const prepared = yield* LLMClient.prepare(LLM.request({ model, prompt: "Say hello." })) + + expect(prepared.route).toBe("cloudflare-ai-gateway") + expect(prepared.body).toMatchObject({ + model: "workers-ai/@cf/meta/llama-3.3-70b-instruct", + messages: [{ role: "user", content: "Say hello." }], + stream: true, + }) + }), + ) + + it.effect("posts to the derived gateway endpoint with bearer auth", () => + Effect.gen(function* () { + const response = yield* LLM.generate( + LLM.request({ + model: Cloudflare.aiGateway("openai/gpt-4o-mini", { + accountId: "test-account", + gatewayId: "test-gateway", + apiKey: "test-token", + }), + prompt: "Say hello.", + }), + ).pipe( + Effect.provide( + dynamicResponse((input) => + Effect.gen(function* () { + const web = yield* HttpClientRequest.toWeb(input.request).pipe(Effect.orDie) + expect(web.url).toBe( + "https://gateway.ai.cloudflare.com/v1/test-account/test-gateway/compat/chat/completions", + ) + expect(web.headers.get("authorization")).toBe("Bearer test-token") + expect(decodeJson(input.text)).toMatchObject({ + model: "openai/gpt-4o-mini", + stream: true, + messages: [{ role: "user", content: "Say hello." }], + }) + return input.respond( + sseEvents(deltaChunk({ role: "assistant", content: "Hello" }), deltaChunk({}, "stop")), + { headers: { "content-type": "text/event-stream" } }, + ) + }), + ), + ), + ) + + expect(response.text).toBe("Hello") + }), + ) + + it.effect("defaults AI Gateway id to default when omitted or blank", () => + Effect.gen(function* () { + expect( + Cloudflare.aiGateway("workers-ai/@cf/meta/llama-3.3-70b-instruct", { + accountId: "test-account", + gatewayId: "", + gatewayApiKey: "test-token", + }).baseURL, + ).toBe("https://gateway.ai.cloudflare.com/v1/test-account/default/compat") + }), + ) + + it.effect("supports authenticated AI Gateway plus upstream provider auth", () => + Effect.gen(function* () { + yield* LLM.generate( + LLM.request({ + model: Cloudflare.aiGateway("openai/gpt-4o-mini", { + accountId: "test-account", + gatewayApiKey: "gateway-token", + apiKey: "provider-token", + }), + prompt: "Say hello.", + }), + ).pipe( + Effect.provide( + dynamicResponse((input) => + Effect.gen(function* () { + const web = yield* HttpClientRequest.toWeb(input.request).pipe(Effect.orDie) + expect(web.url).toBe("https://gateway.ai.cloudflare.com/v1/test-account/default/compat/chat/completions") + expect(web.headers.get("cf-aig-authorization")).toBe("Bearer gateway-token") + expect(web.headers.get("authorization")).toBe("Bearer provider-token") + return input.respond( + sseEvents(deltaChunk({ role: "assistant", content: "Hello" }), deltaChunk({}, "stop")), + { headers: { "content-type": "text/event-stream" } }, + ) + }), + ), + ), + ) + }), + ) + + it.effect("allows a fully configured baseURL override", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.request({ + model: Cloudflare.aiGateway("openai/gpt-4o-mini", { + baseURL: "https://gateway.proxy.test/v1/custom/compat", + apiKey: "test-token", + }), + prompt: "Say hello.", + }), + ) + + expect(prepared.model.baseURL).toBe("https://gateway.proxy.test/v1/custom/compat") + }), + ) + + it.effect("prepares direct Workers AI models through the OpenAI-compatible Chat protocol", () => + Effect.gen(function* () { + const model = Cloudflare.workersAI("@cf/meta/llama-3.1-8b-instruct", { + accountId: "test-account", + apiKey: "test-token", + }) + + expect(model).toMatchObject({ + id: "@cf/meta/llama-3.1-8b-instruct", + provider: "cloudflare-workers-ai", + route: "cloudflare-workers-ai", + baseURL: "https://api.cloudflare.com/client/v4/accounts/test-account/ai/v1", + }) + + const prepared = yield* LLMClient.prepare(LLM.request({ model, prompt: "Say hello." })) + + expect(prepared.route).toBe("cloudflare-workers-ai") + expect(prepared.body).toMatchObject({ + model: "@cf/meta/llama-3.1-8b-instruct", + messages: [{ role: "user", content: "Say hello." }], + stream: true, + }) + }), + ) + + it.effect("posts direct Workers AI requests to the account endpoint with bearer auth", () => + Effect.gen(function* () { + const response = yield* LLM.generate( + LLM.request({ + model: Cloudflare.workersAI("@cf/meta/llama-3.1-8b-instruct", { + accountId: "test-account", + apiKey: "test-token", + }), + prompt: "Say hello.", + }), + ).pipe( + Effect.provide( + dynamicResponse((input) => + Effect.gen(function* () { + const web = yield* HttpClientRequest.toWeb(input.request).pipe(Effect.orDie) + expect(web.url).toBe( + "https://api.cloudflare.com/client/v4/accounts/test-account/ai/v1/chat/completions", + ) + expect(web.headers.get("authorization")).toBe("Bearer test-token") + expect(decodeJson(input.text)).toMatchObject({ + model: "@cf/meta/llama-3.1-8b-instruct", + stream: true, + messages: [{ role: "user", content: "Say hello." }], + }) + return input.respond( + sseEvents(deltaChunk({ role: "assistant", content: "Hello" }), deltaChunk({}, "stop")), + { headers: { "content-type": "text/event-stream" } }, + ) + }), + ), + ), + ) + + expect(response.text).toBe("Hello") + }), + ) + + it.effect("supports direct Workers AI token aliases through auth config", () => + Effect.gen(function* () { + yield* LLM.generate( + LLM.request({ + model: Cloudflare.workersAI("@cf/meta/llama-3.1-8b-instruct", { + accountId: "test-account", + }), + prompt: "Say hello.", + }), + ).pipe( + withEnv({ CLOUDFLARE_WORKERS_AI_TOKEN: "test-token" }), + Effect.provide( + dynamicResponse((input) => + Effect.gen(function* () { + const web = yield* HttpClientRequest.toWeb(input.request).pipe(Effect.orDie) + expect(web.headers.get("authorization")).toBe("Bearer test-token") + return input.respond( + sseEvents(deltaChunk({ role: "assistant", content: "Hello" }), deltaChunk({}, "stop")), + { headers: { "content-type": "text/event-stream" } }, + ) + }), + ), + ), + ) + }), + ) +}) diff --git a/packages/llm/test/provider/gemini.test.ts b/packages/llm/test/provider/gemini.test.ts new file mode 100644 index 000000000000..a80ab740c3f7 --- /dev/null +++ b/packages/llm/test/provider/gemini.test.ts @@ -0,0 +1,360 @@ +import { describe, expect } from "bun:test" +import { Effect } from "effect" +import { LLM, LLMError } from "../../src" +import { LLMClient } from "../../src/route" +import * as Gemini from "../../src/protocols/gemini" +import { it } from "../lib/effect" +import { fixedResponse } from "../lib/http" +import { sseEvents, sseRaw } from "../lib/sse" + +const model = Gemini.model({ + id: "gemini-2.5-flash", + baseURL: "https://generativelanguage.test/v1beta/", + headers: { "x-goog-api-key": "test" }, +}) + +const request = LLM.request({ + id: "req_1", + model, + system: "You are concise.", + prompt: "Say hello.", + generation: { maxTokens: 20, temperature: 0 }, +}) + +describe("Gemini route", () => { + it.effect("prepares Gemini target", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare(request) + + expect(prepared.body).toEqual({ + contents: [{ role: "user", parts: [{ text: "Say hello." }] }], + systemInstruction: { parts: [{ text: "You are concise." }] }, + generationConfig: { maxOutputTokens: 20, temperature: 0 }, + }) + }), + ) + + it.effect("prepares multimodal user input and tool history", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.request({ + id: "req_tool_result", + model, + tools: [ + { + name: "lookup", + description: "Lookup data", + inputSchema: { type: "object", properties: { query: { type: "string" } } }, + }, + ], + toolChoice: { type: "tool", name: "lookup" }, + messages: [ + LLM.user([ + { type: "text", text: "What is in this image?" }, + { type: "media", mediaType: "image/png", data: "AAECAw==" }, + ]), + LLM.assistant([LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } })]), + LLM.toolMessage({ id: "call_1", name: "lookup", result: { forecast: "sunny" } }), + ], + }), + ) + + expect(prepared.body).toEqual({ + contents: [ + { + role: "user", + parts: [{ text: "What is in this image?" }, { inlineData: { mimeType: "image/png", data: "AAECAw==" } }], + }, + { + role: "model", + parts: [{ functionCall: { name: "lookup", args: { query: "weather" } } }], + }, + { + role: "user", + parts: [ + { functionResponse: { name: "lookup", response: { name: "lookup", content: '{"forecast":"sunny"}' } } }, + ], + }, + ], + tools: [ + { + functionDeclarations: [ + { + name: "lookup", + description: "Lookup data", + parameters: { type: "object", properties: { query: { type: "string" } } }, + }, + ], + }, + ], + toolConfig: { functionCallingConfig: { mode: "ANY", allowedFunctionNames: ["lookup"] } }, + }) + }), + ) + + it.effect("omits tools when tool choice is none", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.request({ + id: "req_no_tools", + model, + prompt: "Say hello.", + tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }], + toolChoice: { type: "none" }, + }), + ) + + expect(prepared.body).toEqual({ + contents: [{ role: "user", parts: [{ text: "Say hello." }] }], + }) + }), + ) + + it.effect("sanitizes integer enums, dangling required, untyped arrays, and scalar object keys", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.request({ + id: "req_schema_patch", + model, + prompt: "Use the tool.", + tools: [ + { + name: "lookup", + description: "Lookup data", + inputSchema: { + type: "object", + required: ["status", "missing"], + properties: { + status: { type: "integer", enum: [1, 2] }, + tags: { type: "array" }, + name: { type: "string", properties: { ignored: { type: "string" } }, required: ["ignored"] }, + }, + }, + }, + ], + }), + ) + + expect(prepared.body).toMatchObject({ + tools: [ + { + functionDeclarations: [ + { + parameters: { + type: "object", + required: ["status"], + properties: { + status: { type: "string", enum: ["1", "2"] }, + tags: { type: "array", items: { type: "string" } }, + name: { type: "string" }, + }, + }, + }, + ], + }, + ], + }) + }), + ) + + it.effect("parses text, reasoning, and usage stream fixtures", () => + Effect.gen(function* () { + const body = sseEvents( + { + candidates: [ + { + content: { role: "model", parts: [{ text: "thinking", thought: true }] }, + }, + ], + }, + { + candidates: [ + { + content: { role: "model", parts: [{ text: "Hello" }] }, + }, + ], + }, + { + candidates: [ + { + content: { role: "model", parts: [{ text: "!" }] }, + finishReason: "STOP", + }, + ], + }, + { + usageMetadata: { + promptTokenCount: 5, + candidatesTokenCount: 2, + totalTokenCount: 7, + thoughtsTokenCount: 1, + cachedContentTokenCount: 1, + }, + }, + ) + const response = yield* LLMClient.generate(request).pipe(Effect.provide(fixedResponse(body))) + + expect(response.text).toBe("Hello!") + expect(response.reasoning).toBe("thinking") + expect(response.usage).toMatchObject({ + inputTokens: 5, + outputTokens: 2, + reasoningTokens: 1, + cacheReadInputTokens: 1, + totalTokens: 7, + }) + expect(response.events).toEqual([ + { type: "reasoning-delta", text: "thinking" }, + { type: "text-delta", text: "Hello" }, + { type: "text-delta", text: "!" }, + { + type: "request-finish", + reason: "stop", + usage: { + inputTokens: 5, + outputTokens: 2, + reasoningTokens: 1, + cacheReadInputTokens: 1, + totalTokens: 7, + native: { + promptTokenCount: 5, + candidatesTokenCount: 2, + totalTokenCount: 7, + thoughtsTokenCount: 1, + cachedContentTokenCount: 1, + }, + }, + }, + ]) + }), + ) + + it.effect("emits streamed tool calls and maps finish reason", () => + Effect.gen(function* () { + const body = sseEvents({ + candidates: [ + { + content: { + role: "model", + parts: [{ functionCall: { name: "lookup", args: { query: "weather" } } }], + }, + finishReason: "STOP", + }, + ], + usageMetadata: { promptTokenCount: 5, candidatesTokenCount: 1 }, + }) + const response = yield* LLMClient.generate( + LLM.updateRequest(request, { + tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }], + }), + ).pipe(Effect.provide(fixedResponse(body))) + + expect(response.toolCalls).toEqual([ + { type: "tool-call", id: "tool_0", name: "lookup", input: { query: "weather" } }, + ]) + expect(response.events).toEqual([ + { type: "tool-call", id: "tool_0", name: "lookup", input: { query: "weather" } }, + { + type: "request-finish", + reason: "tool-calls", + usage: { + inputTokens: 5, + outputTokens: 1, + totalTokens: 6, + native: { promptTokenCount: 5, candidatesTokenCount: 1 }, + }, + }, + ]) + }), + ) + + it.effect("assigns unique ids to multiple streamed tool calls", () => + Effect.gen(function* () { + const body = sseEvents({ + candidates: [ + { + content: { + role: "model", + parts: [ + { functionCall: { name: "lookup", args: { query: "weather" } } }, + { functionCall: { name: "lookup", args: { query: "news" } } }, + ], + }, + finishReason: "STOP", + }, + ], + }) + const response = yield* LLMClient.generate( + LLM.updateRequest(request, { + tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }], + }), + ).pipe(Effect.provide(fixedResponse(body))) + + expect(response.toolCalls).toEqual([ + { type: "tool-call", id: "tool_0", name: "lookup", input: { query: "weather" } }, + { type: "tool-call", id: "tool_1", name: "lookup", input: { query: "news" } }, + ]) + expect(response.events.at(-1)).toMatchObject({ type: "request-finish", reason: "tool-calls" }) + }), + ) + + it.effect("maps length and content-filter finish reasons", () => + Effect.gen(function* () { + const length = yield* LLMClient.generate(request).pipe( + Effect.provide( + fixedResponse( + sseEvents({ candidates: [{ content: { role: "model", parts: [] }, finishReason: "MAX_TOKENS" }] }), + ), + ), + ) + const filtered = yield* LLMClient.generate(request).pipe( + Effect.provide( + fixedResponse(sseEvents({ candidates: [{ content: { role: "model", parts: [] }, finishReason: "SAFETY" }] })), + ), + ) + + expect(length.events).toEqual([{ type: "request-finish", reason: "length" }]) + expect(filtered.events).toEqual([{ type: "request-finish", reason: "content-filter" }]) + }), + ) + + it.effect("leaves total usage undefined when component counts are missing", () => + Effect.gen(function* () { + const response = yield* LLMClient.generate(request).pipe( + Effect.provide(fixedResponse(sseEvents({ usageMetadata: { thoughtsTokenCount: 1 } }))), + ) + + expect(response.usage).toMatchObject({ reasoningTokens: 1 }) + expect(response.usage?.totalTokens).toBeUndefined() + }), + ) + + it.effect("fails invalid stream events", () => + Effect.gen(function* () { + const error = yield* LLMClient.generate(request).pipe( + Effect.provide(fixedResponse(sseRaw("data: {not json}"))), + Effect.flip, + ) + + expect(error).toBeInstanceOf(LLMError) + expect(error.reason).toMatchObject({ _tag: "InvalidProviderOutput" }) + expect(error.message).toContain("Invalid google/gemini stream event") + }), + ) + + it.effect("rejects unsupported assistant media content", () => + Effect.gen(function* () { + const error = yield* LLMClient.prepare( + LLM.request({ + id: "req_media", + model, + messages: [LLM.assistant({ type: "media", mediaType: "image/png", data: "AAECAw==" })], + }), + ).pipe(Effect.flip) + + expect(error.message).toContain( + "Gemini assistant messages only support text, reasoning, and tool-call content for now", + ) + }), + ) +}) diff --git a/packages/llm/test/provider/golden.recorded.test.ts b/packages/llm/test/provider/golden.recorded.test.ts new file mode 100644 index 000000000000..0e1151b7af06 --- /dev/null +++ b/packages/llm/test/provider/golden.recorded.test.ts @@ -0,0 +1,215 @@ +import * as AnthropicMessages from "../../src/protocols/anthropic-messages" +import * as Gemini from "../../src/protocols/gemini" +import * as OpenAIChat from "../../src/protocols/openai-chat" +import * as OpenAIResponses from "../../src/protocols/openai-responses" +import * as Cloudflare from "../../src/providers/cloudflare" +import * as OpenAI from "../../src/providers/openai" +import * as OpenAICompatible from "../../src/providers/openai-compatible" +import * as OpenRouter from "../../src/providers/openrouter" +import * as XAI from "../../src/providers/xai" +import { describeRecordedGoldenScenarios } from "../recorded-golden" + +const openAIChat = OpenAIChat.model({ id: "gpt-4o-mini", apiKey: process.env.OPENAI_API_KEY ?? "fixture" }) +const openAIResponses = OpenAIResponses.model({ id: "gpt-5.5", apiKey: process.env.OPENAI_API_KEY ?? "fixture" }) +const openAIResponsesWebSocket = OpenAI.responsesWebSocket("gpt-4.1-mini", { + apiKey: process.env.OPENAI_API_KEY ?? "fixture", +}) +const anthropicHaiku = AnthropicMessages.model({ + id: "claude-haiku-4-5-20251001", + apiKey: process.env.ANTHROPIC_API_KEY ?? "fixture", +}) +const anthropicOpus = AnthropicMessages.model({ + id: "claude-opus-4-7", + apiKey: process.env.ANTHROPIC_API_KEY ?? "fixture", +}) +const gemini = Gemini.model({ id: "gemini-2.5-flash", apiKey: process.env.GOOGLE_GENERATIVE_AI_API_KEY ?? "fixture" }) +const xaiBasic = XAI.model("grok-3-mini", { apiKey: process.env.XAI_API_KEY ?? "fixture" }) +const xaiFlagship = XAI.model("grok-4.3", { apiKey: process.env.XAI_API_KEY ?? "fixture" }) +const cloudflareAIGatewayWorkers = Cloudflare.aiGateway("workers-ai/@cf/meta/llama-3.1-8b-instruct", { + accountId: process.env.CLOUDFLARE_ACCOUNT_ID ?? "fixture-account", + gatewayId: + process.env.CLOUDFLARE_GATEWAY_ID && process.env.CLOUDFLARE_GATEWAY_ID !== process.env.CLOUDFLARE_ACCOUNT_ID + ? process.env.CLOUDFLARE_GATEWAY_ID + : undefined, + gatewayApiKey: process.env.CLOUDFLARE_API_TOKEN ?? "fixture", +}) +const cloudflareAIGatewayWorkersTools = Cloudflare.aiGateway("workers-ai/@cf/openai/gpt-oss-20b", { + accountId: process.env.CLOUDFLARE_ACCOUNT_ID ?? "fixture-account", + gatewayId: + process.env.CLOUDFLARE_GATEWAY_ID && process.env.CLOUDFLARE_GATEWAY_ID !== process.env.CLOUDFLARE_ACCOUNT_ID + ? process.env.CLOUDFLARE_GATEWAY_ID + : undefined, + gatewayApiKey: process.env.CLOUDFLARE_API_TOKEN ?? "fixture", +}) +const cloudflareWorkersAI = Cloudflare.workersAI("@cf/meta/llama-3.1-8b-instruct", { + accountId: process.env.CLOUDFLARE_ACCOUNT_ID ?? "fixture-account", + apiKey: process.env.CLOUDFLARE_API_KEY ?? "fixture", +}) +const cloudflareWorkersAITools = Cloudflare.workersAI("@cf/openai/gpt-oss-20b", { + accountId: process.env.CLOUDFLARE_ACCOUNT_ID ?? "fixture-account", + apiKey: process.env.CLOUDFLARE_API_KEY ?? "fixture", +}) +const deepseek = OpenAICompatible.deepseek.model("deepseek-chat", { apiKey: process.env.DEEPSEEK_API_KEY ?? "fixture" }) +const together = OpenAICompatible.togetherai.model("meta-llama/Llama-3.3-70B-Instruct-Turbo", { + apiKey: process.env.TOGETHER_AI_API_KEY ?? "fixture", +}) +const groq = OpenAICompatible.groq.model("llama-3.3-70b-versatile", { apiKey: process.env.GROQ_API_KEY ?? "fixture" }) +const openrouter = OpenRouter.model("openai/gpt-4o-mini", { apiKey: process.env.OPENROUTER_API_KEY ?? "fixture" }) +const openrouterGpt55 = OpenRouter.model("openai/gpt-5.5", { apiKey: process.env.OPENROUTER_API_KEY ?? "fixture" }) +const openrouterOpus = OpenRouter.model("anthropic/claude-opus-4.7", { + apiKey: process.env.OPENROUTER_API_KEY ?? "fixture", +}) + +const redactCloudflareURL = (url: string) => + url + .replace(/\/client\/v4\/accounts\/[^/]+\/ai\/v1\//, "/client/v4/accounts/{account}/ai/v1/") + .replace(/\/v1\/[^/]+\/[^/]+\/compat\//, "/v1/{account}/{gateway}/compat/") + +const cloudflareOptions = { + redact: { url: redactCloudflareURL }, +} + +describeRecordedGoldenScenarios([ + { + name: "OpenAI Chat gpt-4o-mini", + prefix: "openai-chat", + model: openAIChat, + requires: ["OPENAI_API_KEY"], + scenarios: ["text", "tool-call", "tool-loop"], + }, + { + name: "OpenAI Responses gpt-5.5", + prefix: "openai-responses", + model: openAIResponses, + requires: ["OPENAI_API_KEY"], + tags: ["flagship"], + scenarios: [ + { id: "text", temperature: false }, + { id: "tool-call", temperature: false }, + { id: "tool-loop", temperature: false }, + ], + }, + { + name: "OpenAI Responses WebSocket gpt-4.1-mini", + prefix: "openai-responses-websocket", + model: openAIResponsesWebSocket, + transport: "websocket", + requires: ["OPENAI_API_KEY"], + scenarios: ["tool-loop"], + }, + { + name: "Anthropic Haiku 4.5", + prefix: "anthropic-messages", + model: anthropicHaiku, + requires: ["ANTHROPIC_API_KEY"], + options: { requestHeaders: ["content-type", "anthropic-version"] }, + scenarios: ["text", "tool-call"], + }, + { + name: "Anthropic Opus 4.7", + prefix: "anthropic-messages", + model: anthropicOpus, + requires: ["ANTHROPIC_API_KEY"], + tags: ["flagship"], + options: { requestHeaders: ["content-type", "anthropic-version"] }, + scenarios: [{ id: "tool-loop", temperature: false }], + }, + { + name: "Gemini 2.5 Flash", + prefix: "gemini", + model: gemini, + requires: ["GOOGLE_GENERATIVE_AI_API_KEY"], + scenarios: [{ id: "text", maxTokens: 80 }, "tool-call"], + }, + { + name: "xAI Grok 3 Mini", + prefix: "xai", + model: xaiBasic, + requires: ["XAI_API_KEY"], + scenarios: ["text", "tool-call"], + }, + { + name: "xAI Grok 4.3", + prefix: "xai", + model: xaiFlagship, + requires: ["XAI_API_KEY"], + tags: ["flagship"], + scenarios: [{ id: "tool-loop", timeout: 30_000 }], + }, + { + name: "Cloudflare AI Gateway Workers AI Llama 3.1 8B", + prefix: "cloudflare-ai-gateway", + model: cloudflareAIGatewayWorkers, + requires: ["CLOUDFLARE_ACCOUNT_ID", "CLOUDFLARE_API_TOKEN"], + options: cloudflareOptions, + scenarios: ["text"], + }, + { + name: "Cloudflare AI Gateway Workers AI GPT OSS 20B Tools", + prefix: "cloudflare-ai-gateway", + model: cloudflareAIGatewayWorkersTools, + requires: ["CLOUDFLARE_ACCOUNT_ID", "CLOUDFLARE_API_TOKEN"], + options: cloudflareOptions, + scenarios: [{ id: "tool-call", maxTokens: 120 }], + }, + { + name: "Cloudflare Workers AI Llama 3.1 8B", + prefix: "cloudflare-workers-ai", + model: cloudflareWorkersAI, + requires: ["CLOUDFLARE_ACCOUNT_ID", "CLOUDFLARE_API_KEY"], + options: cloudflareOptions, + scenarios: ["text"], + }, + { + name: "Cloudflare Workers AI GPT OSS 20B Tools", + prefix: "cloudflare-workers-ai", + model: cloudflareWorkersAITools, + requires: ["CLOUDFLARE_ACCOUNT_ID", "CLOUDFLARE_API_KEY"], + options: cloudflareOptions, + scenarios: [{ id: "tool-call", maxTokens: 120 }], + }, + { + name: "DeepSeek Chat", + prefix: "openai-compatible-chat", + model: deepseek, + requires: ["DEEPSEEK_API_KEY"], + scenarios: ["text"], + }, + { + name: "TogetherAI Llama 3.3 70B", + prefix: "openai-compatible-chat", + model: together, + requires: ["TOGETHER_AI_API_KEY"], + scenarios: ["text", "tool-call"], + }, + { + name: "Groq Llama 3.3 70B", + prefix: "openai-compatible-chat", + model: groq, + requires: ["GROQ_API_KEY"], + scenarios: ["text", "tool-call", { id: "tool-loop", timeout: 30_000 }], + }, + { + name: "OpenRouter gpt-4o-mini", + prefix: "openai-compatible-chat", + model: openrouter, + requires: ["OPENROUTER_API_KEY"], + scenarios: ["text", "tool-call", "tool-loop"], + }, + { + name: "OpenRouter gpt-5.5", + prefix: "openai-compatible-chat", + model: openrouterGpt55, + requires: ["OPENROUTER_API_KEY"], + tags: ["flagship"], + scenarios: ["tool-loop"], + }, + { + name: "OpenRouter Claude Opus 4.7", + prefix: "openai-compatible-chat", + model: openrouterOpus, + requires: ["OPENROUTER_API_KEY"], + tags: ["flagship"], + scenarios: ["tool-loop"], + }, +]) diff --git a/packages/llm/test/provider/openai-chat.test.ts b/packages/llm/test/provider/openai-chat.test.ts new file mode 100644 index 000000000000..09984010947f --- /dev/null +++ b/packages/llm/test/provider/openai-chat.test.ts @@ -0,0 +1,355 @@ +import { describe, expect } from "bun:test" +import { Effect, Schema, Stream } from "effect" +import { HttpClientRequest } from "effect/unstable/http" +import { LLM, LLMError } from "../../src" +import * as Azure from "../../src/providers/azure" +import * as OpenAI from "../../src/providers/openai" +import * as OpenAIChat from "../../src/protocols/openai-chat" +import { LLMClient } from "../../src/route" +import { it } from "../lib/effect" +import { dynamicResponse, fixedResponse, truncatedStream } from "../lib/http" +import { deltaChunk, usageChunk } from "../lib/openai-chunks" +import { sseEvents } from "../lib/sse" + +const TargetJson = Schema.fromJsonString(Schema.Unknown) +const encodeJson = Schema.encodeSync(TargetJson) +const decodeJson = Schema.decodeUnknownSync(TargetJson) + +const model = OpenAIChat.model({ + id: "gpt-4o-mini", + baseURL: "https://api.openai.test/v1/", + headers: { authorization: "Bearer test" }, +}) + +const request = LLM.request({ + id: "req_1", + model, + system: "You are concise.", + prompt: "Say hello.", + generation: { maxTokens: 20, temperature: 0 }, +}) + +describe("OpenAI Chat route", () => { + it.effect("prepares OpenAI Chat payload", () => + Effect.gen(function* () { + // Pass the OpenAIChat payload type so `prepared.body` is statically + // typed to the route's native shape — the assertions below read field + // names without `unknown` casts. + const prepared = yield* LLMClient.prepare(request) + const _typed: { readonly model: string; readonly stream: true } = prepared.body + + expect(prepared.body).toEqual({ + model: "gpt-4o-mini", + messages: [ + { role: "system", content: "You are concise." }, + { role: "user", content: "Say hello." }, + ], + stream: true, + stream_options: { include_usage: true }, + max_tokens: 20, + temperature: 0, + }) + }), + ) + + it.effect("maps OpenAI provider options to Chat options", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.request({ + model: OpenAI.chat("gpt-4o-mini", { baseURL: "https://api.openai.test/v1/" }), + prompt: "think", + providerOptions: { openai: { reasoningEffort: "low" } }, + }), + ) + + expect(prepared.body.store).toBe(false) + expect(prepared.body.reasoning_effort).toBe("low") + }), + ) + + it.effect("adds native query params to the Chat Completions URL", () => + LLMClient.generate( + LLM.updateRequest(request, { model: OpenAIChat.model({ ...model, queryParams: { "api-version": "v1" } }) }), + ).pipe( + Effect.provide( + dynamicResponse((input) => + Effect.gen(function* () { + const web = yield* HttpClientRequest.toWeb(input.request).pipe(Effect.orDie) + expect(web.url).toBe("https://api.openai.test/v1/chat/completions?api-version=v1") + return input.respond(sseEvents(deltaChunk({}, "stop")), { + headers: { "content-type": "text/event-stream" }, + }) + }), + ), + ), + ), + ) + + it.effect("uses Azure api-key header for static OpenAI Chat keys", () => + LLMClient.generate( + LLM.updateRequest(request, { + model: Azure.chat("gpt-4o-mini", { + baseURL: "https://opencode-test.openai.azure.com/openai/v1/", + apiKey: "azure-key", + headers: { authorization: "Bearer stale" }, + }), + }), + ).pipe( + Effect.provide( + dynamicResponse((input) => + Effect.gen(function* () { + const web = yield* HttpClientRequest.toWeb(input.request).pipe(Effect.orDie) + expect(web.headers.get("api-key")).toBe("azure-key") + expect(web.headers.get("authorization")).toBeNull() + return input.respond(sseEvents(deltaChunk({}, "stop")), { + headers: { "content-type": "text/event-stream" }, + }) + }), + ), + ), + ), + ) + + it.effect("applies serializable HTTP overlays after payload lowering", () => + LLMClient.generate( + LLM.updateRequest(request, { + model: OpenAIChat.model({ ...model, apiKey: "fresh-key", headers: { authorization: "Bearer stale" } }), + http: { + body: { metadata: { source: "test" } }, + headers: { authorization: "Bearer request", "x-custom": "yes" }, + query: { debug: "1" }, + }, + }), + ).pipe( + Effect.provide( + dynamicResponse((input) => + Effect.gen(function* () { + const web = yield* HttpClientRequest.toWeb(input.request).pipe(Effect.orDie) + expect(web.url).toBe("https://api.openai.test/v1/chat/completions?debug=1") + expect(web.headers.get("authorization")).toBe("Bearer fresh-key") + expect(web.headers.get("x-custom")).toBe("yes") + expect(decodeJson(input.text)).toMatchObject({ + stream: true, + stream_options: { include_usage: true }, + metadata: { source: "test" }, + }) + return input.respond(sseEvents(deltaChunk({}, "stop")), { + headers: { "content-type": "text/event-stream" }, + }) + }), + ), + ), + ), + ) + + it.effect("prepares assistant tool-call and tool-result messages", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.request({ + id: "req_tool_result", + model, + messages: [ + LLM.user("What is the weather?"), + LLM.assistant([LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } })]), + LLM.toolMessage({ id: "call_1", name: "lookup", result: { forecast: "sunny" } }), + ], + }), + ) + + expect(prepared.body).toEqual({ + model: "gpt-4o-mini", + messages: [ + { role: "user", content: "What is the weather?" }, + { + role: "assistant", + content: null, + tool_calls: [ + { + id: "call_1", + type: "function", + function: { name: "lookup", arguments: encodeJson({ query: "weather" }) }, + }, + ], + }, + { role: "tool", tool_call_id: "call_1", content: encodeJson({ forecast: "sunny" }) }, + ], + stream: true, + stream_options: { include_usage: true }, + }) + }), + ) + + it.effect("rejects unsupported user media content", () => + Effect.gen(function* () { + const error = yield* LLMClient.prepare( + LLM.request({ + id: "req_media", + model, + messages: [LLM.user({ type: "media", mediaType: "image/png", data: "AAECAw==" })], + }), + ).pipe(Effect.flip) + + expect(error.message).toContain("OpenAI Chat user messages only support text content for now") + }), + ) + + it.effect("rejects unsupported assistant reasoning content", () => + Effect.gen(function* () { + const error = yield* LLMClient.prepare( + LLM.request({ + id: "req_reasoning", + model, + messages: [LLM.assistant({ type: "reasoning", text: "hidden" })], + }), + ).pipe(Effect.flip) + + expect(error.message).toContain("OpenAI Chat assistant messages only support text and tool-call content for now") + }), + ) + + it.effect("parses text and usage stream fixtures", () => + Effect.gen(function* () { + const body = sseEvents( + deltaChunk({ role: "assistant", content: "Hello" }), + deltaChunk({ content: "!" }), + deltaChunk({}, "stop"), + usageChunk({ + prompt_tokens: 5, + completion_tokens: 2, + total_tokens: 7, + prompt_tokens_details: { cached_tokens: 1 }, + completion_tokens_details: { reasoning_tokens: 0 }, + }), + ) + const response = yield* LLMClient.generate(request).pipe(Effect.provide(fixedResponse(body))) + + expect(response.text).toBe("Hello!") + expect(response.events).toEqual([ + { type: "text-delta", text: "Hello" }, + { type: "text-delta", text: "!" }, + { + type: "request-finish", + reason: "stop", + usage: { + inputTokens: 5, + outputTokens: 2, + reasoningTokens: 0, + cacheReadInputTokens: 1, + totalTokens: 7, + native: { + prompt_tokens: 5, + completion_tokens: 2, + total_tokens: 7, + prompt_tokens_details: { cached_tokens: 1 }, + completion_tokens_details: { reasoning_tokens: 0 }, + }, + }, + }, + ]) + }), + ) + + it.effect("assembles streamed tool call input", () => + Effect.gen(function* () { + const body = sseEvents( + deltaChunk({ + role: "assistant", + tool_calls: [{ index: 0, id: "call_1", function: { name: "lookup", arguments: '{"query"' } }], + }), + deltaChunk({ tool_calls: [{ index: 0, function: { arguments: ':"weather"}' } }] }), + deltaChunk({}, "tool_calls"), + ) + const response = yield* LLMClient.generate( + LLM.updateRequest(request, { + tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }], + }), + ).pipe(Effect.provide(fixedResponse(body))) + + expect(response.events).toEqual([ + { type: "tool-input-delta", id: "call_1", name: "lookup", text: '{"query"' }, + { type: "tool-input-delta", id: "call_1", name: "lookup", text: ':"weather"}' }, + { type: "tool-call", id: "call_1", name: "lookup", input: { query: "weather" } }, + { type: "request-finish", reason: "tool-calls", usage: undefined }, + ]) + }), + ) + + it.effect("does not finalize streamed tool calls without a finish reason", () => + Effect.gen(function* () { + const body = sseEvents( + deltaChunk({ + role: "assistant", + tool_calls: [{ index: 0, id: "call_1", function: { name: "lookup", arguments: '{"query"' } }], + }), + deltaChunk({ tool_calls: [{ index: 0, function: { arguments: ':"weather"}' } }] }), + ) + const response = yield* LLMClient.generate( + LLM.updateRequest(request, { + tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }], + }), + ).pipe(Effect.provide(fixedResponse(body))) + + expect(response.events).toEqual([ + { type: "tool-input-delta", id: "call_1", name: "lookup", text: '{"query"' }, + { type: "tool-input-delta", id: "call_1", name: "lookup", text: ':"weather"}' }, + ]) + expect(response.toolCalls).toEqual([]) + }), + ) + + it.effect("fails on malformed stream events", () => + Effect.gen(function* () { + const body = sseEvents(deltaChunk({ content: 123 })) + const error = yield* LLMClient.generate(request).pipe(Effect.provide(fixedResponse(body)), Effect.flip) + + expect(error.message).toContain("Invalid openai/openai-chat stream event") + }), + ) + + it.effect("surfaces transport errors that occur mid-stream", () => + Effect.gen(function* () { + const layer = truncatedStream([ + `data: ${JSON.stringify(deltaChunk({ role: "assistant", content: "Hello" }))}\n\n`, + ]) + const error = yield* LLMClient.generate(request).pipe(Effect.provide(layer), Effect.flip) + + expect(error.message).toContain("Failed to read openai/openai-chat stream") + }), + ) + + it.effect("fails HTTP provider errors before stream parsing", () => + Effect.gen(function* () { + const error = yield* LLMClient.generate(request).pipe( + Effect.provide( + fixedResponse('{"error":{"message":"Bad request","type":"invalid_request_error"}}', { + status: 400, + headers: { "content-type": "application/json" }, + }), + ), + Effect.flip, + ) + + expect(error).toBeInstanceOf(LLMError) + expect(error.reason).toMatchObject({ _tag: "InvalidRequest" }) + expect(error.message).toContain("HTTP 400") + }), + ) + + it.effect("short-circuits the upstream stream when the consumer takes a prefix", () => + Effect.gen(function* () { + // The body has more chunks than we'll consume. If `Stream.take(1)` did + // not interrupt the upstream HTTP body the test would hang waiting for + // the rest of the stream to drain. + const body = sseEvents( + deltaChunk({ role: "assistant", content: "Hello" }), + deltaChunk({ content: " world" }), + deltaChunk({}, "stop"), + ) + + const events = Array.from( + yield* LLMClient.stream(request).pipe(Stream.take(1), Stream.runCollect, Effect.provide(fixedResponse(body))), + ) + expect(events.map((event) => event.type)).toEqual(["text-delta"]) + }), + ) +}) diff --git a/packages/llm/test/provider/openai-compatible-chat.test.ts b/packages/llm/test/provider/openai-compatible-chat.test.ts new file mode 100644 index 000000000000..627e6ef4a0f2 --- /dev/null +++ b/packages/llm/test/provider/openai-compatible-chat.test.ts @@ -0,0 +1,237 @@ +import { describe, expect } from "bun:test" +import { Effect, Schema } from "effect" +import { HttpClientRequest } from "effect/unstable/http" +import { LLM } from "../../src" +import { LLMClient } from "../../src/route" +import * as OpenAICompatible from "../../src/providers/openai-compatible" +import * as OpenAICompatibleChat from "../../src/protocols/openai-compatible-chat" +import { it } from "../lib/effect" +import { dynamicResponse } from "../lib/http" +import { sseEvents } from "../lib/sse" + +const Json = Schema.fromJsonString(Schema.Unknown) +const decodeJson = Schema.decodeUnknownSync(Json) + +const model = OpenAICompatibleChat.model({ + id: "deepseek-chat", + provider: "deepseek", + baseURL: "https://api.deepseek.test/v1/", + apiKey: "test-key", + queryParams: { "api-version": "2026-01-01" }, +}) + +const request = LLM.request({ + id: "req_1", + model, + system: "You are concise.", + prompt: "Say hello.", + generation: { maxTokens: 20, temperature: 0 }, +}) + +const deltaChunk = (delta: object, finishReason: string | null = null) => ({ + id: "chatcmpl_fixture", + choices: [{ delta, finish_reason: finishReason }], + usage: null, +}) + +const usageChunk = (usage: object) => ({ + id: "chatcmpl_fixture", + choices: [], + usage, +}) + +const providerFamilies = [ + ["baseten", OpenAICompatible.baseten, "https://inference.baseten.co/v1"], + ["cerebras", OpenAICompatible.cerebras, "https://api.cerebras.ai/v1"], + ["deepinfra", OpenAICompatible.deepinfra, "https://api.deepinfra.com/v1/openai"], + ["deepseek", OpenAICompatible.deepseek, "https://api.deepseek.com/v1"], + ["fireworks", OpenAICompatible.fireworks, "https://api.fireworks.ai/inference/v1"], + ["togetherai", OpenAICompatible.togetherai, "https://api.together.xyz/v1"], +] as const + +describe("OpenAI-compatible Chat route", () => { + it.effect("prepares generic Chat target", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.updateRequest(request, { + tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }], + toolChoice: { type: "required" }, + }), + ) + + expect(prepared.route).toBe("openai-compatible-chat") + expect(prepared.model).toMatchObject({ + id: "deepseek-chat", + provider: "deepseek", + route: "openai-compatible-chat", + baseURL: "https://api.deepseek.test/v1/", + apiKey: "test-key", + queryParams: { "api-version": "2026-01-01" }, + }) + expect(prepared.body).toEqual({ + model: "deepseek-chat", + messages: [ + { role: "system", content: "You are concise." }, + { role: "user", content: "Say hello." }, + ], + tools: [ + { + type: "function", + function: { name: "lookup", description: "Lookup data", parameters: { type: "object" } }, + }, + ], + tool_choice: "required", + stream: true, + stream_options: { include_usage: true }, + max_tokens: 20, + temperature: 0, + }) + }), + ) + + it.effect("provides model helpers for compatible provider families", () => + Effect.gen(function* () { + expect( + providerFamilies.map(([provider, family]) => { + const model = family.model(`${provider}-model`, { apiKey: "test-key" }) + return { + id: String(model.id), + provider: String(model.provider), + route: model.route, + baseURL: model.baseURL, + apiKey: model.apiKey, + } + }), + ).toEqual( + providerFamilies.map(([provider, _, baseURL]) => ({ + id: `${provider}-model`, + provider, + route: "openai-compatible-chat", + baseURL, + apiKey: "test-key", + })), + ) + + const custom = OpenAICompatible.deepseek.model("deepseek-chat", { + apiKey: "test-key", + baseURL: "https://custom.deepseek.test/v1", + }) + expect(custom).toMatchObject({ + provider: "deepseek", + route: "openai-compatible-chat", + baseURL: "https://custom.deepseek.test/v1", + }) + }), + ) + + it.effect("matches AI SDK compatible basic request body fixture", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare(request) + + expect(prepared.body).toEqual({ + model: "deepseek-chat", + messages: [ + { role: "system", content: "You are concise." }, + { role: "user", content: "Say hello." }, + ], + stream: true, + stream_options: { include_usage: true }, + max_tokens: 20, + temperature: 0, + }) + }), + ) + + it.effect("matches AI SDK compatible tool request body fixture", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.request({ + id: "req_tool_parity", + model, + tools: [ + { + name: "lookup", + description: "Lookup data", + inputSchema: { type: "object", properties: { query: { type: "string" } }, required: ["query"] }, + }, + ], + toolChoice: "lookup", + messages: [ + LLM.user("What is the weather?"), + LLM.assistant([LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } })]), + LLM.toolMessage({ id: "call_1", name: "lookup", result: { forecast: "sunny" } }), + ], + }), + ) + + expect(prepared.body).toEqual({ + model: "deepseek-chat", + messages: [ + { role: "user", content: "What is the weather?" }, + { + role: "assistant", + content: null, + tool_calls: [ + { + id: "call_1", + type: "function", + function: { name: "lookup", arguments: '{"query":"weather"}' }, + }, + ], + }, + { role: "tool", tool_call_id: "call_1", content: '{"forecast":"sunny"}' }, + ], + tools: [ + { + type: "function", + function: { + name: "lookup", + description: "Lookup data", + parameters: { type: "object", properties: { query: { type: "string" } }, required: ["query"] }, + }, + }, + ], + tool_choice: { type: "function", function: { name: "lookup" } }, + stream: true, + stream_options: { include_usage: true }, + }) + }), + ) + + it.effect("posts to the configured compatible endpoint and parses text usage", () => + Effect.gen(function* () { + const response = yield* LLMClient.generate(request).pipe( + Effect.provide( + dynamicResponse((input) => + Effect.gen(function* () { + const web = yield* HttpClientRequest.toWeb(input.request).pipe(Effect.orDie) + expect(web.url).toBe("https://api.deepseek.test/v1/chat/completions?api-version=2026-01-01") + expect(web.headers.get("authorization")).toBe("Bearer test-key") + expect(decodeJson(input.text)).toMatchObject({ + model: "deepseek-chat", + stream: true, + messages: [ + { role: "system", content: "You are concise." }, + { role: "user", content: "Say hello." }, + ], + }) + return input.respond( + sseEvents( + deltaChunk({ role: "assistant", content: "Hello" }), + deltaChunk({ content: "!" }), + deltaChunk({}, "stop"), + usageChunk({ prompt_tokens: 5, completion_tokens: 2, total_tokens: 7 }), + ), + { headers: { "content-type": "text/event-stream" } }, + ) + }), + ), + ), + ) + + expect(response.text).toBe("Hello!") + expect(response.usage).toMatchObject({ inputTokens: 5, outputTokens: 2, totalTokens: 7 }) + expect(response.events.at(-1)).toMatchObject({ type: "request-finish", reason: "stop" }) + }), + ) +}) diff --git a/packages/llm/test/provider/openai-responses.test.ts b/packages/llm/test/provider/openai-responses.test.ts new file mode 100644 index 000000000000..30add06d83e5 --- /dev/null +++ b/packages/llm/test/provider/openai-responses.test.ts @@ -0,0 +1,549 @@ +import { describe, expect } from "bun:test" +import { ConfigProvider, Effect, Layer, Stream } from "effect" +import { Headers, HttpClientRequest } from "effect/unstable/http" +import { LLM, LLMError } from "../../src" +import { Auth, LLMClient, RequestExecutor, WebSocketExecutor } from "../../src/route" +import * as Azure from "../../src/providers/azure" +import * as OpenAI from "../../src/providers/openai" +import * as OpenAIResponses from "../../src/protocols/openai-responses" +import * as ProviderShared from "../../src/protocols/shared" +import { it } from "../lib/effect" +import { dynamicResponse, fixedResponse } from "../lib/http" +import { sseEvents } from "../lib/sse" + +const model = OpenAIResponses.model({ + id: "gpt-4.1-mini", + baseURL: "https://api.openai.test/v1/", + headers: { authorization: "Bearer test" }, +}) + +const request = LLM.request({ + id: "req_1", + model, + system: "You are concise.", + prompt: "Say hello.", + generation: { maxTokens: 20, temperature: 0 }, +}) + +const configEnv = (env: Record) => Effect.provide(ConfigProvider.layer(ConfigProvider.fromEnv({ env }))) + +describe("OpenAI Responses route", () => { + it.effect("prepares OpenAI Responses target", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare(request) + + expect(prepared.body).toEqual({ + model: "gpt-4.1-mini", + input: [ + { role: "system", content: "You are concise." }, + { role: "user", content: [{ type: "input_text", text: "Say hello." }] }, + ], + stream: true, + max_output_tokens: 20, + temperature: 0, + }) + }), + ) + + it.effect("prepares OpenAI Responses WebSocket target", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.updateRequest(request, { + model: OpenAI.responsesWebSocket("gpt-4.1-mini", { baseURL: "https://api.openai.test/v1/", apiKey: "test" }), + }), + ) + + expect(prepared.route).toBe("openai-responses-websocket") + expect(prepared.protocol).toBe("openai-responses") + expect(prepared.metadata).toEqual({ transport: "websocket-json" }) + expect(prepared.body).toMatchObject({ model: "gpt-4.1-mini", stream: true }) + }), + ) + + it.effect("streams OpenAI Responses over WebSocket", () => + Effect.gen(function* () { + const sent: string[] = [] + const opened: Array<{ readonly url: string; readonly authorization: string | undefined }> = [] + let closed = false + const deps = Layer.mergeAll( + Layer.succeed( + RequestExecutor.Service, + RequestExecutor.Service.of({ + execute: () => Effect.die("unexpected HTTP request"), + }), + ), + Layer.succeed( + WebSocketExecutor.Service, + WebSocketExecutor.Service.of({ + open: (input) => + Effect.succeed({ + sendText: (message) => + Effect.sync(() => { + opened.push({ url: input.url, authorization: input.headers.authorization }) + sent.push(message) + }), + messages: Stream.fromArray([ + ProviderShared.encodeJson({ type: "response.output_text.delta", item_id: "msg_1", delta: "Hi" }), + ProviderShared.encodeJson({ type: "response.completed", response: { id: "resp_ws" } }), + ]), + close: Effect.sync(() => { + closed = true + }), + }), + }), + ), + ) + const response = yield* LLMClient.generate( + LLM.request({ + model: OpenAI.responsesWebSocket("gpt-4.1-mini", { baseURL: "https://api.openai.test/v1/", apiKey: "test" }), + prompt: "Say hello.", + }), + ).pipe(Effect.provide(LLMClient.layerWithWebSocket.pipe(Layer.provide(deps)))) + + expect(response.text).toBe("Hi") + expect(opened).toEqual([{ url: "wss://api.openai.test/v1/responses", authorization: "Bearer test" }]) + expect(closed).toBe(true) + expect(sent).toHaveLength(1) + expect(JSON.parse(sent[0])).toEqual({ + type: "response.create", + model: "gpt-4.1-mini", + input: [{ role: "user", content: [{ type: "input_text", text: "Say hello." }] }], + store: false, + }) + }), + ) + + it.effect("requires WebSocket runtime for OpenAI Responses WebSocket", () => + Effect.gen(function* () { + const error = yield* LLMClient.generate( + LLM.request({ + model: OpenAI.responsesWebSocket("gpt-4.1-mini", { baseURL: "https://api.openai.test/v1/", apiKey: "test" }), + prompt: "Say hello.", + }), + ).pipe( + Effect.provide( + LLMClient.layer.pipe( + Layer.provide( + Layer.succeed( + RequestExecutor.Service, + RequestExecutor.Service.of({ + execute: () => Effect.die("unexpected HTTP request"), + }), + ), + ), + ), + ), + Effect.flip, + ) + + expect(error.message).toContain("requires WebSocketExecutor.Service") + }), + ) + + it.effect("fails immediately when WebSocket is already closed", () => + Effect.gen(function* () { + const error = yield* WebSocketExecutor.fromWebSocket( + { readyState: globalThis.WebSocket.CLOSED } as globalThis.WebSocket, + { url: "wss://api.openai.test/v1/responses", headers: Headers.empty }, + ).pipe(Effect.flip) + + expect(error.message).toContain("closed before opening") + }), + ) + + it.effect("adds native query params to the Responses URL", () => + Effect.gen(function* () { + yield* LLMClient.generate( + LLM.updateRequest(request, { + model: OpenAIResponses.model({ ...model, queryParams: { "api-version": "v1" } }), + }), + ).pipe( + Effect.provide( + dynamicResponse((input) => + Effect.gen(function* () { + const web = yield* HttpClientRequest.toWeb(input.request).pipe(Effect.orDie) + expect(web.url).toBe("https://api.openai.test/v1/responses?api-version=v1") + return input.respond(sseEvents({ type: "response.completed", response: {} }), { + headers: { "content-type": "text/event-stream" }, + }) + }), + ), + ), + ) + }), + ) + + it.effect("uses Azure api-key header for static OpenAI Responses keys", () => + Effect.gen(function* () { + yield* LLMClient.generate( + LLM.updateRequest(request, { + model: Azure.responses("gpt-4.1-mini", { + baseURL: "https://opencode-test.openai.azure.com/openai/v1/", + apiKey: "azure-key", + headers: { authorization: "Bearer stale" }, + }), + }), + ).pipe( + Effect.provide( + dynamicResponse((input) => + Effect.gen(function* () { + const web = yield* HttpClientRequest.toWeb(input.request).pipe(Effect.orDie) + expect(web.headers.get("api-key")).toBe("azure-key") + expect(web.headers.get("authorization")).toBeNull() + return input.respond(sseEvents({ type: "response.completed", response: {} }), { + headers: { "content-type": "text/event-stream" }, + }) + }), + ), + ), + ) + }), + ) + + it.effect("loads OpenAI default auth from Effect Config", () => + LLMClient.generate( + LLM.updateRequest(request, { + model: OpenAI.responses("gpt-4.1-mini", { baseURL: "https://api.openai.test/v1/" }), + }), + ).pipe( + configEnv({ OPENAI_API_KEY: "env-key" }), + Effect.provide( + dynamicResponse((input) => + Effect.gen(function* () { + const web = yield* HttpClientRequest.toWeb(input.request).pipe(Effect.orDie) + expect(web.headers.get("authorization")).toBe("Bearer env-key") + return input.respond(sseEvents({ type: "response.completed", response: {} }), { + headers: { "content-type": "text/event-stream" }, + }) + }), + ), + ), + ), + ) + + it.effect("lets explicit auth override OpenAI default API key auth", () => + LLMClient.generate( + LLM.updateRequest(request, { + model: OpenAI.responses("gpt-4.1-mini", { + baseURL: "https://api.openai.test/v1/", + auth: Auth.bearer("oauth-token"), + }), + }), + ).pipe( + Effect.provide( + dynamicResponse((input) => + Effect.gen(function* () { + const web = yield* HttpClientRequest.toWeb(input.request).pipe(Effect.orDie) + expect(web.headers.get("authorization")).toBe("Bearer oauth-token") + return input.respond(sseEvents({ type: "response.completed", response: {} }), { + headers: { "content-type": "text/event-stream" }, + }) + }), + ), + ), + ), + ) + + it.effect("prepares function call and function output input items", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.request({ + id: "req_tool_result", + model, + messages: [ + LLM.user("What is the weather?"), + LLM.assistant([LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } })]), + LLM.toolMessage({ id: "call_1", name: "lookup", result: { forecast: "sunny" } }), + ], + }), + ) + + expect(prepared.body).toEqual({ + model: "gpt-4.1-mini", + input: [ + { role: "user", content: [{ type: "input_text", text: "What is the weather?" }] }, + { type: "function_call", call_id: "call_1", name: "lookup", arguments: '{"query":"weather"}' }, + { type: "function_call_output", call_id: "call_1", output: '{"forecast":"sunny"}' }, + ], + stream: true, + }) + }), + ) + + it.effect("maps OpenAI provider options to Responses options", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.request({ + model: OpenAI.model("gpt-5.2", { baseURL: "https://api.openai.test/v1/" }), + prompt: "think", + providerOptions: { + openai: { + promptCacheKey: "session_123", + reasoningEffort: "high", + reasoningSummary: "auto", + includeEncryptedReasoning: true, + }, + }, + }), + ) + + expect(prepared.body.store).toBe(false) + expect(prepared.body.prompt_cache_key).toBe("session_123") + expect(prepared.body.include).toEqual(["reasoning.encrypted_content"]) + expect(prepared.body.reasoning).toEqual({ effort: "high", summary: "auto" }) + expect(prepared.body.text).toEqual({ verbosity: "low" }) + }), + ) + + it.effect("request OpenAI provider options override model defaults", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.request({ + model: OpenAI.model("gpt-4.1-mini", { + baseURL: "https://api.openai.test/v1/", + providerOptions: { openai: { promptCacheKey: "model_cache" } }, + }), + prompt: "no cache", + providerOptions: { openai: { promptCacheKey: "request_cache" } }, + }), + ) + + expect(prepared.body.prompt_cache_key).toBe("request_cache") + }), + ) + + it.effect("parses text and usage stream fixtures", () => + Effect.gen(function* () { + const body = sseEvents( + { type: "response.output_text.delta", item_id: "msg_1", delta: "Hello" }, + { type: "response.output_text.delta", item_id: "msg_1", delta: "!" }, + { + type: "response.completed", + response: { + id: "resp_1", + service_tier: "default", + usage: { + input_tokens: 5, + output_tokens: 2, + total_tokens: 7, + input_tokens_details: { cached_tokens: 1 }, + output_tokens_details: { reasoning_tokens: 0 }, + }, + }, + }, + ) + const response = yield* LLMClient.generate(request).pipe(Effect.provide(fixedResponse(body))) + + expect(response.text).toBe("Hello!") + expect(response.events).toEqual([ + { type: "text-delta", id: "msg_1", text: "Hello", providerMetadata: { openai: { itemId: "msg_1" } } }, + { type: "text-delta", id: "msg_1", text: "!", providerMetadata: { openai: { itemId: "msg_1" } } }, + { + type: "request-finish", + reason: "stop", + providerMetadata: { openai: { responseId: "resp_1", serviceTier: "default" } }, + usage: { + inputTokens: 5, + outputTokens: 2, + reasoningTokens: 0, + cacheReadInputTokens: 1, + totalTokens: 7, + native: { + input_tokens: 5, + output_tokens: 2, + total_tokens: 7, + input_tokens_details: { cached_tokens: 1 }, + output_tokens_details: { reasoning_tokens: 0 }, + }, + }, + }, + ]) + }), + ) + + it.effect("assembles streamed function call input", () => + Effect.gen(function* () { + const body = sseEvents( + { + type: "response.output_item.added", + item: { type: "function_call", id: "item_1", call_id: "call_1", name: "lookup", arguments: "" }, + }, + { type: "response.function_call_arguments.delta", item_id: "item_1", delta: '{"query"' }, + { type: "response.function_call_arguments.delta", item_id: "item_1", delta: ':"weather"}' }, + { + type: "response.output_item.done", + item: { + type: "function_call", + id: "item_1", + call_id: "call_1", + name: "lookup", + arguments: '{"query":"weather"}', + }, + }, + { type: "response.completed", response: { usage: { input_tokens: 5, output_tokens: 1 } } }, + ) + const response = yield* LLMClient.generate( + LLM.updateRequest(request, { + tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }], + }), + ).pipe(Effect.provide(fixedResponse(body))) + + expect(response.events).toEqual([ + { + type: "tool-input-delta", + id: "call_1", + name: "lookup", + text: '{"query"', + providerMetadata: { openai: { itemId: "item_1" } }, + }, + { + type: "tool-input-delta", + id: "call_1", + name: "lookup", + text: ':"weather"}', + providerMetadata: { openai: { itemId: "item_1" } }, + }, + { + type: "tool-call", + id: "call_1", + name: "lookup", + input: { query: "weather" }, + providerMetadata: { openai: { itemId: "item_1" } }, + }, + { + type: "request-finish", + reason: "tool-calls", + usage: { inputTokens: 5, outputTokens: 1, totalTokens: 6, native: { input_tokens: 5, output_tokens: 1 } }, + }, + ]) + }), + ) + + it.effect("decodes web_search_call as provider-executed tool-call + tool-result", () => + Effect.gen(function* () { + const item = { + type: "web_search_call", + id: "ws_1", + status: "completed", + action: { type: "search", query: "effect 4" }, + } + const body = sseEvents( + { type: "response.output_item.added", item }, + { type: "response.output_item.done", item }, + { type: "response.completed", response: { usage: { input_tokens: 5, output_tokens: 1 } } }, + ) + const response = yield* LLMClient.generate(request).pipe(Effect.provide(fixedResponse(body))) + + const callsAndResults = response.events.filter( + (event) => event.type === "tool-call" || event.type === "tool-result", + ) + expect(callsAndResults).toEqual([ + { + type: "tool-call", + id: "ws_1", + name: "web_search", + input: { type: "search", query: "effect 4" }, + providerExecuted: true, + providerMetadata: { openai: { itemId: "ws_1" } }, + }, + { + type: "tool-result", + id: "ws_1", + name: "web_search", + result: { type: "json", value: item }, + providerExecuted: true, + providerMetadata: { openai: { itemId: "ws_1" } }, + }, + ]) + }), + ) + + it.effect("decodes code_interpreter_call as provider-executed events with code input", () => + Effect.gen(function* () { + const item = { + type: "code_interpreter_call", + id: "ci_1", + status: "completed", + code: "print(1+1)", + container_id: "cnt_xyz", + outputs: [{ type: "logs", logs: "2\n" }], + } + const body = sseEvents( + { type: "response.output_item.done", item }, + { type: "response.completed", response: { usage: { input_tokens: 5, output_tokens: 1 } } }, + ) + const response = yield* LLMClient.generate(request).pipe(Effect.provide(fixedResponse(body))) + + const toolCall = response.events.find((event) => event.type === "tool-call") + expect(toolCall).toEqual({ + type: "tool-call", + id: "ci_1", + name: "code_interpreter", + input: { code: "print(1+1)", container_id: "cnt_xyz" }, + providerExecuted: true, + providerMetadata: { openai: { itemId: "ci_1" } }, + }) + const toolResult = response.events.find((event) => event.type === "tool-result") + expect(toolResult).toEqual({ + type: "tool-result", + id: "ci_1", + name: "code_interpreter", + result: { type: "json", value: item }, + providerExecuted: true, + providerMetadata: { openai: { itemId: "ci_1" } }, + }) + }), + ) + + it.effect("rejects unsupported user media content", () => + Effect.gen(function* () { + const error = yield* LLMClient.prepare( + LLM.request({ + id: "req_media", + model, + messages: [LLM.user({ type: "media", mediaType: "image/png", data: "AAECAw==" })], + }), + ).pipe(Effect.flip) + + expect(error.message).toContain("OpenAI Responses user messages only support text content for now") + }), + ) + + it.effect("emits provider-error events for mid-stream provider errors", () => + Effect.gen(function* () { + const response = yield* LLMClient.generate(request).pipe( + Effect.provide(fixedResponse(sseEvents({ type: "error", code: "rate_limit_exceeded", message: "Slow down" }))), + ) + + expect(response.events).toEqual([{ type: "provider-error", message: "Slow down" }]) + }), + ) + + it.effect("falls back to error code when no message is present", () => + Effect.gen(function* () { + const response = yield* LLMClient.generate(request).pipe( + Effect.provide(fixedResponse(sseEvents({ type: "error", code: "internal_error" }))), + ) + + expect(response.events).toEqual([{ type: "provider-error", message: "internal_error" }]) + }), + ) + + it.effect("fails HTTP provider errors before stream parsing", () => + Effect.gen(function* () { + const error = yield* LLMClient.generate(request).pipe( + Effect.provide( + fixedResponse('{"error":{"type":"invalid_request_error","message":"Bad request"}}', { + status: 400, + headers: { "content-type": "application/json" }, + }), + ), + Effect.flip, + ) + + expect(error).toBeInstanceOf(LLMError) + expect(error.reason).toMatchObject({ _tag: "InvalidRequest" }) + expect(error.message).toContain("HTTP 400") + }), + ) +}) diff --git a/packages/llm/test/provider/openrouter.test.ts b/packages/llm/test/provider/openrouter.test.ts new file mode 100644 index 000000000000..b3fb6bddc76a --- /dev/null +++ b/packages/llm/test/provider/openrouter.test.ts @@ -0,0 +1,56 @@ +import { describe, expect } from "bun:test" +import { Effect } from "effect" +import { LLM } from "../../src" +import { LLMClient } from "../../src/route" +import * as OpenRouter from "../../src/providers/openrouter" +import { it } from "../lib/effect" + +describe("OpenRouter", () => { + it.effect("prepares OpenRouter models through the OpenAI-compatible Chat route", () => + Effect.gen(function* () { + const model = OpenRouter.model("openai/gpt-4o-mini", { apiKey: "test-key" }) + + expect(model).toMatchObject({ + id: "openai/gpt-4o-mini", + provider: "openrouter", + route: "openrouter", + baseURL: "https://openrouter.ai/api/v1", + apiKey: "test-key", + }) + + const prepared = yield* LLMClient.prepare(LLM.request({ model, prompt: "Say hello." })) + + expect(prepared.route).toBe("openrouter") + expect(prepared.body).toMatchObject({ + model: "openai/gpt-4o-mini", + messages: [{ role: "user", content: "Say hello." }], + stream: true, + }) + }), + ) + + it.effect("applies OpenRouter payload options from the model helper", () => + Effect.gen(function* () { + const prepared = yield* LLMClient.prepare( + LLM.request({ + model: OpenRouter.model("anthropic/claude-3.7-sonnet:thinking", { + providerOptions: { + openrouter: { + usage: true, + reasoning: { effort: "high" }, + promptCacheKey: "session_123", + }, + }, + }), + prompt: "Think briefly.", + }), + ) + + expect(prepared.body).toMatchObject({ + usage: { include: true }, + reasoning: { effort: "high" }, + prompt_cache_key: "session_123", + }) + }), + ) +}) diff --git a/packages/llm/test/recorded-golden.ts b/packages/llm/test/recorded-golden.ts new file mode 100644 index 000000000000..6a6c8c7ac9d4 --- /dev/null +++ b/packages/llm/test/recorded-golden.ts @@ -0,0 +1,103 @@ +import type { HttpRecorder } from "@opencode-ai/http-recorder" +import { describe, type TestOptions } from "bun:test" +import { Effect } from "effect" +import type { ModelRef } from "../src" +import { goldenScenarioTags, runGoldenScenario, type GoldenScenarioID } from "./recorded-scenarios" +import { recordedTests } from "./recorded-test" +import { kebab } from "./recorded-utils" + +type Transport = "http" | "websocket" + +type ScenarioInput = + | GoldenScenarioID + | { + readonly id: GoldenScenarioID + readonly name?: string + readonly cassette?: string + readonly tags?: ReadonlyArray + readonly maxTokens?: number + readonly temperature?: number | false + readonly timeout?: number | TestOptions + } + +type TargetInput = { + readonly name: string + readonly model: ModelRef + readonly protocol?: string + readonly requires?: ReadonlyArray + readonly transport?: Transport + readonly prefix?: string + readonly tags?: ReadonlyArray + readonly metadata?: Record + readonly options?: HttpRecorder.RecordReplayOptions + readonly scenarios: ReadonlyArray +} + +const scenarioInput = (input: ScenarioInput) => (typeof input === "string" ? { id: input } : input) + +const scenarioTitle = (id: GoldenScenarioID) => { + if (id === "text") return "streams text" + if (id === "tool-call") return "streams tool call" + return "drives a tool loop" +} + +const defaultPrefix = (target: TargetInput) => { + if (target.prefix) return target.prefix + const transport = target.transport === "websocket" ? "-websocket" : "" + return `${target.model.provider}-${target.protocol ?? target.model.route}${transport}` +} + +const metadata = (target: TargetInput) => ({ + provider: target.model.provider, + protocol: target.protocol, + route: target.model.route, + transport: target.transport ?? "http", + model: target.model.id, + ...target.metadata, +}) + +const tags = (target: TargetInput) => [ + ...(target.transport === "websocket" ? ["transport:websocket"] : []), + ...(target.tags ?? []), +] + +const runTarget = (target: TargetInput) => { + const recorded = recordedTests({ + prefix: defaultPrefix(target), + provider: target.model.provider, + protocol: target.protocol, + requires: target.requires, + tags: tags(target), + metadata: metadata(target), + options: target.options, + }) + + describe(`${target.name} recorded`, () => { + target.scenarios.forEach((raw) => { + const input = scenarioInput(raw) + const name = input.name ?? scenarioTitle(input.id) + recorded.effect.with( + name, + { + cassette: input.cassette, + id: `${kebab(target.name)}-${input.id}`, + tags: [...goldenScenarioTags(input.id), ...(input.tags ?? [])], + }, + () => + Effect.gen(function* () { + yield* runGoldenScenario(input.id, { + id: `recorded_${kebab(target.name).replaceAll("-", "_")}_${input.id.replaceAll("-", "_")}`, + model: target.model, + maxTokens: input.maxTokens, + temperature: input.temperature, + }) + }), + input.timeout, + ) + }) + }) +} + +export const describeRecordedGoldenScenarios = (targets: ReadonlyArray) => { + targets.forEach(runTarget) +} diff --git a/packages/llm/test/recorded-runner.ts b/packages/llm/test/recorded-runner.ts new file mode 100644 index 000000000000..97d9b03f5462 --- /dev/null +++ b/packages/llm/test/recorded-runner.ts @@ -0,0 +1,100 @@ +import { test, type TestOptions } from "bun:test" +import { Effect, type Layer } from "effect" +import { testEffect } from "./lib/effect" +import { cassetteName, classifiedTags, matchesSelected, missingEnv, unique } from "./recorded-utils" + +export type RecordedBody = Effect.Effect | (() => Effect.Effect) + +export type RecordedGroupOptions = { + readonly prefix: string + readonly provider?: string + readonly protocol?: string + readonly requires?: ReadonlyArray + readonly tags?: ReadonlyArray + readonly metadata?: Record +} + +export type RecordedCaseOptions = { + readonly cassette?: string + readonly id?: string + readonly provider?: string + readonly protocol?: string + readonly requires?: ReadonlyArray + readonly tags?: ReadonlyArray + readonly metadata?: Record +} + +export const recordedEffectGroup = < + R, + E, + Options extends RecordedGroupOptions, + CaseOptions extends RecordedCaseOptions, +>(input: { + readonly duplicateLabel: string + readonly options: Options + readonly cassetteExists: (cassette: string) => boolean + readonly layer: (input: { + readonly cassette: string + readonly tags: ReadonlyArray + readonly metadata: Record + readonly recording: boolean + readonly options: Options + readonly caseOptions: CaseOptions + }) => Layer.Layer +}) => { + const cassettes = new Set() + + const run = ( + name: string, + caseOptions: CaseOptions, + body: RecordedBody, + testOptions?: number | TestOptions, + ) => { + const cassette = cassetteName(input.options.prefix, name, caseOptions) + if (cassettes.has(cassette)) throw new Error(`Duplicate ${input.duplicateLabel} "${cassette}"`) + cassettes.add(cassette) + const tags = unique([ + ...classifiedTags(input.options), + ...classifiedTags({ + provider: caseOptions.provider, + protocol: caseOptions.protocol, + tags: caseOptions.tags, + }), + ]) + + if (!matchesSelected({ prefix: input.options.prefix, name, cassette, tags })) + return test.skip(name, () => {}, testOptions) + + const recording = process.env.RECORD === "true" + if (recording) { + if (missingEnv([...(input.options.requires ?? []), ...(caseOptions.requires ?? [])]).length > 0) { + return test.skip(name, () => {}, testOptions) + } + } else if (!input.cassetteExists(cassette)) { + return test.skip(name, () => {}, testOptions) + } + + return testEffect( + input.layer({ + cassette, + tags, + metadata: { ...input.options.metadata, ...caseOptions.metadata, tags }, + recording, + options: input.options, + caseOptions, + }), + ).live(name, body, testOptions) + } + + const effect = (name: string, body: RecordedBody, testOptions?: number | TestOptions) => + run(name, {} as CaseOptions, body, testOptions) + + effect.with = ( + name: string, + caseOptions: CaseOptions, + body: RecordedBody, + testOptions?: number | TestOptions, + ) => run(name, caseOptions, body, testOptions) + + return { effect } +} diff --git a/packages/llm/test/recorded-scenarios.ts b/packages/llm/test/recorded-scenarios.ts new file mode 100644 index 000000000000..3fb3e0b9a950 --- /dev/null +++ b/packages/llm/test/recorded-scenarios.ts @@ -0,0 +1,265 @@ +import { expect } from "bun:test" +import { Effect, Schema, Stream } from "effect" +import { LLM, LLMEvent, LLMResponse, type LLMRequest, type ModelRef } from "../src" +import { LLMClient } from "../src/route" +import { tool } from "../src/tool" + +export const weatherToolName = "get_weather" + +export const weatherTool = LLM.toolDefinition({ + name: weatherToolName, + description: "Get current weather for a city.", + inputSchema: { + type: "object", + properties: { city: { type: "string" } }, + required: ["city"], + additionalProperties: false, + }, +}) + +export const weatherRuntimeTool = tool({ + description: weatherTool.description, + parameters: Schema.Struct({ city: Schema.String }), + success: Schema.Struct({ temperature: Schema.Number, condition: Schema.String }), + execute: ({ city }) => + Effect.succeed( + city === "Paris" ? { temperature: 22, condition: "sunny" } : { temperature: 0, condition: "unknown" }, + ), +}) + +export const textRequest = (input: { + readonly id: string + readonly model: ModelRef + readonly prompt?: string + readonly maxTokens?: number + readonly temperature?: number | false +}) => + LLM.request({ + id: input.id, + model: input.model, + system: "You are concise.", + prompt: input.prompt ?? "Reply with exactly: Hello!", + generation: + input.temperature === false + ? { maxTokens: input.maxTokens ?? 20 } + : { maxTokens: input.maxTokens ?? 20, temperature: input.temperature ?? 0 }, + }) + +export const weatherToolRequest = (input: { + readonly id: string + readonly model: ModelRef + readonly maxTokens?: number + readonly temperature?: number | false +}) => + LLM.request({ + id: input.id, + model: input.model, + system: "Call tools exactly as requested.", + prompt: "Call get_weather with city exactly Paris.", + tools: [weatherTool], + toolChoice: LLM.toolChoice(weatherTool), + generation: + input.temperature === false + ? { maxTokens: input.maxTokens ?? 80 } + : { maxTokens: input.maxTokens ?? 80, temperature: input.temperature ?? 0 }, + }) + +export const weatherToolLoopRequest = (input: { + readonly id: string + readonly model: ModelRef + readonly system?: string + readonly maxTokens?: number + readonly temperature?: number | false +}) => + LLM.request({ + id: input.id, + model: input.model, + system: input.system ?? "Use the get_weather tool, then answer in one short sentence.", + prompt: "What is the weather in Paris?", + generation: + input.temperature === false + ? { maxTokens: input.maxTokens ?? 80 } + : { maxTokens: input.maxTokens ?? 80, temperature: input.temperature ?? 0 }, + }) + +export const goldenWeatherToolLoopRequest = (input: { + readonly id: string + readonly model: ModelRef + readonly maxTokens?: number + readonly temperature?: number | false +}) => + weatherToolLoopRequest({ + ...input, + system: "Use the get_weather tool exactly once. After the tool result, reply exactly: Paris is sunny.", + }) + +export const runWeatherToolLoop = (request: LLMRequest) => + LLMClient.stream({ + request, + tools: { [weatherToolName]: weatherRuntimeTool }, + stopWhen: LLMClient.stepCountIs(10), + }).pipe( + Stream.runCollect, + Effect.map((events) => Array.from(events)), + ) + +export const expectFinish = ( + events: ReadonlyArray, + reason: Extract["reason"], +) => expect(events.at(-1)).toMatchObject({ type: "request-finish", reason }) + +export const expectWeatherToolCall = (response: LLMResponse) => + expect(response.toolCalls).toMatchObject([ + { type: "tool-call", id: expect.any(String), name: weatherToolName, input: { city: "Paris" } }, + ]) + +export const expectWeatherToolLoop = (events: ReadonlyArray) => { + const finishes = events.filter(LLMEvent.is.requestFinish) + expect(finishes).toHaveLength(2) + expect(finishes[0]?.reason).toBe("tool-calls") + expect(finishes.at(-1)?.reason).toBe("stop") + + const toolCalls = events.filter(LLMEvent.is.toolCall) + expect(toolCalls).toHaveLength(1) + expect(toolCalls[0]).toMatchObject({ type: "tool-call", name: weatherToolName, input: { city: "Paris" } }) + + const toolResults = events.filter(LLMEvent.is.toolResult) + expect(toolResults).toHaveLength(1) + expect(toolResults[0]).toMatchObject({ + type: "tool-result", + name: weatherToolName, + result: { type: "json", value: { temperature: 22, condition: "sunny" } }, + }) + + const output = LLMResponse.text({ events }) + expect(output).toContain("Paris") + expect(output.trim().length).toBeGreaterThan(0) +} + +export const expectGoldenWeatherToolLoop = (events: ReadonlyArray) => { + expectWeatherToolLoop(events) + expect(LLMResponse.text({ events }).trim()).toMatch(/^Paris is sunny\.?$/) +} + +export type GoldenScenarioID = "text" | "tool-call" | "tool-loop" + +export interface GoldenScenarioContext { + readonly id: string + readonly model: ModelRef + readonly maxTokens?: number + readonly temperature?: number | false +} + +const generate = (request: LLMRequest) => LLMClient.generate(request) + +export const goldenScenarioTags = (id: GoldenScenarioID) => { + if (id === "text") return ["text", "golden"] + if (id === "tool-call") return ["tool", "tool-call", "golden"] + return ["tool", "tool-loop", "golden"] +} + +export const runGoldenScenario = (id: GoldenScenarioID, context: GoldenScenarioContext) => + Effect.gen(function* () { + if (id === "text") { + const response = yield* generate( + textRequest({ + id: context.id, + model: context.model, + prompt: "Reply exactly with: Hello!", + maxTokens: context.maxTokens ?? 40, + temperature: context.temperature, + }), + ) + expect(response.text.trim()).toMatch(/^Hello!?$/) + expectFinish(response.events, "stop") + return + } + + if (id === "tool-call") { + const response = yield* generate( + weatherToolRequest({ + id: context.id, + model: context.model, + maxTokens: context.maxTokens ?? 80, + temperature: context.temperature, + }), + ) + expectWeatherToolCall(response) + expectFinish(response.events, "tool-calls") + return + } + + expectGoldenWeatherToolLoop( + yield* runWeatherToolLoop( + goldenWeatherToolLoopRequest({ + id: context.id, + model: context.model, + maxTokens: context.maxTokens ?? 80, + temperature: context.temperature, + }), + ), + ) + }) + +const usageSummary = (usage: LLMResponse["usage"] | undefined) => { + if (!usage) return undefined + return Object.fromEntries( + [ + ["inputTokens", usage.inputTokens], + ["outputTokens", usage.outputTokens], + ["reasoningTokens", usage.reasoningTokens], + ["cacheReadInputTokens", usage.cacheReadInputTokens], + ["cacheWriteInputTokens", usage.cacheWriteInputTokens], + ["totalTokens", usage.totalTokens], + ].filter((entry) => entry[1] !== undefined), + ) +} + +const pushText = (summary: Array>, type: "text" | "reasoning", value: string) => { + const last = summary.at(-1) + if (last?.type === type) { + last.value = `${last.value ?? ""}${value}` + return + } + summary.push({ type, value }) +} + +export const eventSummary = (events: ReadonlyArray) => { + const summary: Array> = [] + for (const event of events) { + if (event.type === "text-delta") { + pushText(summary, "text", event.text) + continue + } + if (event.type === "reasoning-delta") { + pushText(summary, "reasoning", event.text) + continue + } + if (event.type === "tool-call") { + summary.push({ + type: "tool-call", + name: event.name, + input: event.input, + providerExecuted: event.providerExecuted, + }) + continue + } + if (event.type === "tool-result") { + summary.push({ + type: "tool-result", + name: event.name, + result: event.result, + providerExecuted: event.providerExecuted, + }) + continue + } + if (event.type === "tool-error") { + summary.push({ type: "tool-error", name: event.name, message: event.message }) + continue + } + if (event.type === "request-finish") { + summary.push({ type: "finish", reason: event.reason, usage: usageSummary(event.usage) }) + } + } + return summary.map((item) => Object.fromEntries(Object.entries(item).filter((entry) => entry[1] !== undefined))) +} diff --git a/packages/llm/test/recorded-test.ts b/packages/llm/test/recorded-test.ts new file mode 100644 index 000000000000..6514f13dad2f --- /dev/null +++ b/packages/llm/test/recorded-test.ts @@ -0,0 +1,76 @@ +import { NodeFileSystem } from "@effect/platform-node" +import { HttpRecorder } from "@opencode-ai/http-recorder" +import { Layer } from "effect" +import { FetchHttpClient } from "effect/unstable/http" +import * as path from "node:path" +import { fileURLToPath } from "node:url" +import { LLMClient, RequestExecutor } from "../src/route" +import type { Service as LLMClientService } from "../src/route/client" +import type { Service as RequestExecutorService } from "../src/route/executor" +import type { Service as WebSocketExecutorService } from "../src/route/transport/websocket" +import { + recordedEffectGroup, + type RecordedCaseOptions as RunnerCaseOptions, + type RecordedGroupOptions, +} from "./recorded-runner" +import { webSocketCassetteLayer } from "./recorded-websocket" + +const __dirname = path.dirname(fileURLToPath(import.meta.url)) +const FIXTURES_DIR = path.resolve(__dirname, "fixtures", "recordings") + +type RecordedEnv = RequestExecutorService | WebSocketExecutorService | LLMClientService + +type RecordedTestsOptions = RecordedGroupOptions & { + readonly options?: HttpRecorder.RecordReplayOptions +} + +type RecordedCaseOptions = RunnerCaseOptions & { + readonly options?: HttpRecorder.RecordReplayOptions +} + +const mergeOptions = ( + base: HttpRecorder.RecordReplayOptions | undefined, + override: HttpRecorder.RecordReplayOptions | undefined, +) => { + if (!base) return override + if (!override) return base + return { + ...base, + ...override, + metadata: base.metadata || override.metadata ? { ...base.metadata, ...override.metadata } : undefined, + } +} + +export const recordedTests = (options: RecordedTestsOptions) => + recordedEffectGroup({ + duplicateLabel: "recorded cassette", + options, + cassetteExists: (cassette) => HttpRecorder.hasCassetteSync(cassette, { directory: FIXTURES_DIR }), + layer: ({ cassette, metadata, options, caseOptions, recording }) => { + const recorderOptions = mergeOptions(options.options, caseOptions.options) + const recorderMetadata = { + ...recorderOptions?.metadata, + ...metadata, + } + const mode = recorderOptions?.mode ?? (recording ? "record" : "replay") + const cassetteService = HttpRecorder.Cassette.layer({ directory: FIXTURES_DIR }).pipe( + Layer.provide(NodeFileSystem.layer), + ) + const requestExecutor = RequestExecutor.layer.pipe( + Layer.provide( + HttpRecorder.recordingLayer(cassette, { + ...recorderOptions, + mode, + metadata: recorderMetadata, + }).pipe(Layer.provide(FetchHttpClient.layer)), + ), + ) + const deps = Layer.mergeAll( + requestExecutor, + webSocketCassetteLayer(cassette, { metadata: recorderMetadata, mode }), + ) + return Layer.mergeAll(deps, LLMClient.layerWithWebSocket.pipe(Layer.provide(deps))).pipe( + Layer.provide(cassetteService), + ) + }, + }) diff --git a/packages/llm/test/recorded-utils.ts b/packages/llm/test/recorded-utils.ts new file mode 100644 index 000000000000..513b2f819ce4 --- /dev/null +++ b/packages/llm/test/recorded-utils.ts @@ -0,0 +1,56 @@ +export const kebab = (value: string) => + value + .trim() + .replace(/['"]/g, "") + .replace(/[^a-zA-Z0-9]+/g, "-") + .replace(/^-|-$/g, "") + .toLowerCase() + +export const missingEnv = (names: ReadonlyArray) => names.filter((name) => !process.env[name]) + +export const envList = (name: string) => + (process.env[name] ?? "") + .split(",") + .map((item) => item.trim().toLowerCase()) + .filter((item) => item !== "") + +export const unique = (items: ReadonlyArray) => Array.from(new Set(items)) + +export const classifiedTags = (input: { + readonly prefix?: string + readonly provider?: string + readonly protocol?: string + readonly tags?: ReadonlyArray +}) => + unique([ + ...(input.prefix ? [`prefix:${input.prefix}`] : []), + ...(input.provider ? [`provider:${input.provider}`] : []), + ...(input.protocol ? [`protocol:${input.protocol}`] : []), + ...(input.tags ?? []), + ]) + +export const matchesSelected = (input: { + readonly prefix: string + readonly name: string + readonly cassette: string + readonly tags: ReadonlyArray +}) => { + const prefixes = envList("RECORDED_PREFIX") + const providers = envList("RECORDED_PROVIDER") + const requiredTags = envList("RECORDED_TAGS") + const tests = envList("RECORDED_TEST") + const tags = input.tags.map((tag) => tag.toLowerCase()) + const names = [input.name, kebab(input.name), input.cassette].map((item) => item.toLowerCase()) + + if (prefixes.length > 0 && !prefixes.includes(input.prefix.toLowerCase())) return false + if (providers.length > 0 && !providers.some((provider) => tags.includes(`provider:${provider}`))) return false + if (requiredTags.length > 0 && !requiredTags.every((tag) => tags.includes(tag))) return false + if (tests.length > 0 && !tests.some((test) => names.some((name) => name.includes(test)))) return false + return true +} + +export const cassetteName = ( + prefix: string, + name: string, + options: { readonly cassette?: string; readonly id?: string }, +) => options.cassette ?? `${prefix}/${options.id ?? kebab(name)}` diff --git a/packages/llm/test/recorded-websocket.ts b/packages/llm/test/recorded-websocket.ts new file mode 100644 index 000000000000..eeea9f1b780a --- /dev/null +++ b/packages/llm/test/recorded-websocket.ts @@ -0,0 +1,27 @@ +import { Cassette, makeWebSocketExecutor } from "@opencode-ai/http-recorder" +import { Effect, Layer } from "effect" +import { WebSocketExecutor } from "../src/route" +import type { Service as WebSocketExecutorService } from "../src/route/transport/websocket" + +const liveWebSocket = WebSocketExecutor.open +type Mode = "record" | "replay" | "passthrough" + +export const webSocketCassetteLayer = ( + cassette: string, + input: { readonly metadata?: Record; readonly mode: Mode }, +): Layer.Layer => + Layer.effect( + WebSocketExecutor.Service, + Effect.gen(function* () { + const cassetteService = yield* Cassette.Service + const executor = yield* makeWebSocketExecutor({ + name: cassette, + mode: input.mode, + metadata: input.metadata, + cassette: cassetteService, + live: { open: liveWebSocket }, + compareClientMessagesAsJson: true, + }) + return WebSocketExecutor.Service.of(executor) + }), + ) diff --git a/packages/llm/test/schema.test.ts b/packages/llm/test/schema.test.ts new file mode 100644 index 000000000000..1c9bbf1e09c1 --- /dev/null +++ b/packages/llm/test/schema.test.ts @@ -0,0 +1,58 @@ +import { describe, expect, test } from "bun:test" +import { Schema } from "effect" +import { + ContentPart, + LLMEvent, + LLMRequest, + ModelID, + ModelLimits, + ModelRef, + ProviderID, +} from "../src/schema" + +const model = new ModelRef({ + id: ModelID.make("fake-model"), + provider: ProviderID.make("fake-provider"), + route: "openai-chat", + baseURL: "https://fake.local", + limits: new ModelLimits({}), +}) + +describe("llm schema", () => { + test("decodes a minimal request", () => { + const input: unknown = { + id: "req_1", + model, + system: [{ type: "text", text: "You are terse." }], + messages: [{ role: "user", content: [{ type: "text", text: "hi" }] }], + tools: [], + generation: {}, + } + + const decoded = Schema.decodeUnknownSync(LLMRequest)(input) + + expect(decoded.id).toBe("req_1") + expect(decoded.messages[0]?.content[0]?.type).toBe("text") + }) + + test("accepts custom route ids", () => { + const decoded = Schema.decodeUnknownSync(LLMRequest)({ + model: { ...model, route: "custom-route" }, + system: [], + messages: [], + tools: [], + generation: {}, + }) + + expect(decoded.model.route).toBe("custom-route") + }) + + test("rejects invalid event type", () => { + expect(() => Schema.decodeUnknownSync(LLMEvent)({ type: "bogus" })).toThrow() + }) + + test("content part tagged union exposes guards", () => { + expect(ContentPart.guards.text({ type: "text", text: "hi" })).toBe(true) + expect(ContentPart.guards.media({ type: "text", text: "hi" })).toBe(false) + }) +}) diff --git a/packages/llm/test/tool-runtime.test.ts b/packages/llm/test/tool-runtime.test.ts new file mode 100644 index 000000000000..7251dee8af88 --- /dev/null +++ b/packages/llm/test/tool-runtime.test.ts @@ -0,0 +1,454 @@ +import { describe, expect } from "bun:test" +import { Effect, Schema, Stream } from "effect" +import { LLM, LLMEvent, LLMRequest, LLMResponse } from "../src" +import { LLMClient } from "../src/route" +import * as AnthropicMessages from "../src/protocols/anthropic-messages" +import * as OpenAIChat from "../src/protocols/openai-chat" +import { tool, ToolFailure } from "../src/tool" +import { it } from "./lib/effect" +import * as TestToolRuntime from "./lib/tool-runtime" +import { dynamicResponse, scriptedResponses } from "./lib/http" +import { deltaChunk, finishChunk, toolCallChunk } from "./lib/openai-chunks" +import { sseEvents } from "./lib/sse" + +const model = OpenAIChat.model({ + id: "gpt-4o-mini", + baseURL: "https://api.openai.test/v1/", + headers: { authorization: "Bearer test" }, +}) +const Json = Schema.fromJsonString(Schema.Unknown) +const decodeJson = Schema.decodeUnknownSync(Json) + +const baseRequest = LLM.request({ + id: "req_1", + model, + prompt: "Use the tool.", +}) + +const get_weather = tool({ + description: "Get current weather for a city.", + parameters: Schema.Struct({ city: Schema.String }), + success: Schema.Struct({ temperature: Schema.Number, condition: Schema.String }), + execute: ({ city }) => + Effect.gen(function* () { + if (city === "FAIL") return yield* new ToolFailure({ message: `Weather lookup failed for ${city}` }) + return { temperature: 22, condition: "sunny" } + }), +}) + +const schema_only_weather = tool({ + description: "Get current weather for a city.", + parameters: Schema.Struct({ city: Schema.String }), + success: Schema.Struct({ temperature: Schema.Number, condition: Schema.String }), +}) + +describe("LLMClient tools", () => { + it.effect("uses the registered model route when adding runtime tools", () => + Effect.gen(function* () { + const layer = scriptedResponses([ + sseEvents(deltaChunk({ role: "assistant", content: "Done." }), finishChunk("stop")), + ]) + + const events = Array.from( + yield* TestToolRuntime.runTools({ request: baseRequest, tools: { get_weather } }).pipe( + Stream.runCollect, + Effect.provide(layer), + ), + ) + + expect(LLMResponse.text({ events })).toBe("Done.") + }), + ) + + it.effect("sends tool-call history and request options on the follow-up request", () => + Effect.gen(function* () { + const bodies: unknown[] = [] + const responses = [ + sseEvents(toolCallChunk("call_1", "get_weather", '{"city":"Paris"}'), finishChunk("tool_calls")), + sseEvents(deltaChunk({ role: "assistant", content: "It's sunny in Paris." }), finishChunk("stop")), + ] + const layer = dynamicResponse((input) => + Effect.sync(() => { + bodies.push(decodeJson(input.text)) + return input.respond(responses[bodies.length - 1] ?? responses[responses.length - 1], { + headers: { "content-type": "text/event-stream" }, + }) + }), + ) + + yield* TestToolRuntime.runTools({ + request: LLMRequest.update(baseRequest, { + generation: LLM.generation({ maxTokens: 50 }), + toolChoice: LLM.toolChoice("auto"), + }), + tools: { get_weather }, + }).pipe(Stream.runCollect, Effect.provide(layer)) + + const second = bodies[1] as { + readonly messages?: ReadonlyArray> + readonly tools?: ReadonlyArray + readonly tool_choice?: unknown + readonly max_tokens?: unknown + } + + expect(second.max_tokens).toBe(50) + expect(second.tool_choice).toBe("auto") + expect(second.tools).toHaveLength(1) + expect(second.messages?.map((message) => message.role)).toEqual(["user", "assistant", "tool"]) + expect(second.messages?.[1]).toMatchObject({ + role: "assistant", + content: null, + tool_calls: [{ id: "call_1", type: "function", function: { name: "get_weather" } }], + }) + expect(second.messages?.[2]).toMatchObject({ + role: "tool", + tool_call_id: "call_1", + content: '{"temperature":22,"condition":"sunny"}', + }) + }), + ) + + it.effect("dispatches a tool call, appends results, and resumes streaming", () => + Effect.gen(function* () { + const layer = scriptedResponses([ + sseEvents(toolCallChunk("call_1", "get_weather", '{"city":"Paris"}'), finishChunk("tool_calls")), + sseEvents(deltaChunk({ role: "assistant", content: "It's sunny in Paris." }), finishChunk("stop")), + ]) + + const events = Array.from( + yield* TestToolRuntime.runTools({ request: baseRequest, tools: { get_weather } }).pipe( + Stream.runCollect, + Effect.provide(layer), + ), + ) + + const result = events.find(LLMEvent.is.toolResult) + expect(result).toMatchObject({ + type: "tool-result", + id: "call_1", + name: "get_weather", + result: { type: "json", value: { temperature: 22, condition: "sunny" } }, + }) + expect(events.at(-1)?.type).toBe("request-finish") + expect(LLMResponse.text({ events })).toBe("It's sunny in Paris.") + }), + ) + + it.effect("executes tool calls for one step without looping by default", () => + Effect.gen(function* () { + const layer = scriptedResponses([ + sseEvents(toolCallChunk("call_1", "get_weather", '{"city":"Paris"}'), finishChunk("tool_calls")), + sseEvents(deltaChunk({ role: "assistant", content: "Should not run." }), finishChunk("stop")), + ]) + + const events = Array.from( + yield* LLMClient.stream({ request: baseRequest, tools: { get_weather } }).pipe( + Stream.runCollect, + Effect.provide(layer), + ), + ) + + expect(events.filter(LLMEvent.is.requestFinish)).toHaveLength(1) + expect(events.find(LLMEvent.is.toolResult)).toMatchObject({ type: "tool-result", id: "call_1" }) + }), + ) + + it.effect("can expose tool schemas without executing tool calls", () => + Effect.gen(function* () { + const layer = scriptedResponses([ + sseEvents(toolCallChunk("call_1", "get_weather", '{"city":"Paris"}'), finishChunk("tool_calls")), + ]) + + const events = Array.from( + yield* LLMClient.stream({ + request: baseRequest, + tools: { get_weather: schema_only_weather }, + toolExecution: "none", + }).pipe(Stream.runCollect, Effect.provide(layer)), + ) + + expect(events.find(LLMEvent.is.toolCall)).toMatchObject({ type: "tool-call", id: "call_1" }) + expect(events.find(LLMEvent.is.toolResult)).toBeUndefined() + }), + ) + + it.effect("preserves provider metadata when folding streamed assistant content into follow-up history", () => + Effect.gen(function* () { + const bodies: unknown[] = [] + const layer = dynamicResponse((input) => + Effect.sync(() => { + bodies.push(decodeJson(input.text)) + return input.respond( + bodies.length === 1 + ? sseEvents( + { type: "message_start", message: { usage: { input_tokens: 5 } } }, + { type: "content_block_start", index: 0, content_block: { type: "thinking", thinking: "" } }, + { type: "content_block_delta", index: 0, delta: { type: "thinking_delta", thinking: "thinking" } }, + { type: "content_block_delta", index: 0, delta: { type: "signature_delta", signature: "sig_1" } }, + { type: "content_block_stop", index: 0 }, + { + type: "content_block_start", + index: 1, + content_block: { type: "tool_use", id: "call_1", name: "get_weather" }, + }, + { + type: "content_block_delta", + index: 1, + delta: { type: "input_json_delta", partial_json: '{"city":"Paris"}' }, + }, + { type: "content_block_stop", index: 1 }, + { type: "message_delta", delta: { stop_reason: "tool_use" }, usage: { output_tokens: 5 } }, + ) + : sseEvents( + { type: "message_start", message: { usage: { input_tokens: 5 } } }, + { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } }, + { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Done." } }, + { type: "content_block_stop", index: 0 }, + { type: "message_delta", delta: { stop_reason: "end_turn" }, usage: { output_tokens: 1 } }, + ), + { headers: { "content-type": "text/event-stream" } }, + ) + }), + ) + + yield* TestToolRuntime.runTools({ + request: LLM.updateRequest(baseRequest, { + model: AnthropicMessages.model({ id: "claude-sonnet-4-5", apiKey: "test" }), + }), + tools: { get_weather }, + }).pipe(Stream.runCollect, Effect.provide(layer)) + + expect(bodies[1]).toMatchObject({ + messages: [ + { role: "user" }, + { + role: "assistant", + content: [ + { type: "thinking", thinking: "thinking", signature: "sig_1" }, + { type: "tool_use", id: "call_1", name: "get_weather", input: { city: "Paris" } }, + ], + }, + { role: "user", content: [{ type: "tool_result", tool_use_id: "call_1" }] }, + ], + }) + }), + ) + + it.effect("emits tool-error for unknown tools so the model can self-correct", () => + Effect.gen(function* () { + const layer = scriptedResponses([ + sseEvents(toolCallChunk("call_1", "missing_tool", "{}"), finishChunk("tool_calls")), + sseEvents(deltaChunk({ role: "assistant", content: "Sorry." }), finishChunk("stop")), + ]) + + const events = Array.from( + yield* TestToolRuntime.runTools({ request: baseRequest, tools: { get_weather } }).pipe( + Stream.runCollect, + Effect.provide(layer), + ), + ) + + const toolError = events.find(LLMEvent.is.toolError) + expect(toolError).toMatchObject({ type: "tool-error", id: "call_1", name: "missing_tool" }) + expect(toolError?.message).toContain("Unknown tool") + expect(events.find(LLMEvent.is.toolResult)).toMatchObject({ + type: "tool-result", + id: "call_1", + name: "missing_tool", + result: { type: "error", value: "Unknown tool: missing_tool" }, + }) + }), + ) + + it.effect("emits tool-error when the LLM input fails the parameters schema", () => + Effect.gen(function* () { + const layer = scriptedResponses([ + sseEvents(toolCallChunk("call_1", "get_weather", '{"city":42}'), finishChunk("tool_calls")), + sseEvents(deltaChunk({ role: "assistant", content: "Done." }), finishChunk("stop")), + ]) + + const events = Array.from( + yield* TestToolRuntime.runTools({ request: baseRequest, tools: { get_weather } }).pipe( + Stream.runCollect, + Effect.provide(layer), + ), + ) + + const toolError = events.find(LLMEvent.is.toolError) + expect(toolError).toMatchObject({ type: "tool-error", id: "call_1", name: "get_weather" }) + expect(toolError?.message).toContain("Invalid tool input") + }), + ) + + it.effect("emits tool-error when the handler returns a ToolFailure", () => + Effect.gen(function* () { + const layer = scriptedResponses([ + sseEvents(toolCallChunk("call_1", "get_weather", '{"city":"FAIL"}'), finishChunk("tool_calls")), + sseEvents(deltaChunk({ role: "assistant", content: "Sorry." }), finishChunk("stop")), + ]) + + const events = Array.from( + yield* TestToolRuntime.runTools({ request: baseRequest, tools: { get_weather } }).pipe( + Stream.runCollect, + Effect.provide(layer), + ), + ) + + const toolError = events.find(LLMEvent.is.toolError) + expect(toolError).toMatchObject({ type: "tool-error", id: "call_1", name: "get_weather" }) + expect(toolError?.message).toBe("Weather lookup failed for FAIL") + }), + ) + + it.effect("stops when the model finishes without requesting more tools", () => + Effect.gen(function* () { + const layer = scriptedResponses([ + sseEvents(deltaChunk({ role: "assistant", content: "Done." }), finishChunk("stop")), + ]) + + const events = Array.from( + yield* TestToolRuntime.runTools({ request: baseRequest, tools: { get_weather } }).pipe( + Stream.runCollect, + Effect.provide(layer), + ), + ) + + expect(events.map((event) => event.type)).toEqual(["text-delta", "request-finish"]) + expect(LLMResponse.text({ events })).toBe("Done.") + }), + ) + + it.effect("respects maxSteps and stops the loop", () => + Effect.gen(function* () { + // Every script entry asks for another tool call. With maxSteps: 2 the + // runtime should run at most two model rounds and then exit even though + // the model still wants to keep going. + const toolCallStep = sseEvents( + toolCallChunk("call_x", "get_weather", '{"city":"Paris"}'), + finishChunk("tool_calls"), + ) + const layer = scriptedResponses([toolCallStep, toolCallStep, toolCallStep]) + + const events = Array.from( + yield* TestToolRuntime.runTools({ request: baseRequest, tools: { get_weather }, maxSteps: 2 }).pipe( + Stream.runCollect, + Effect.provide(layer), + ), + ) + + expect(events.filter(LLMEvent.is.requestFinish)).toHaveLength(2) + }), + ) + + it.effect("stops follow-up when stopWhen returns true after the first step", () => + Effect.gen(function* () { + const layer = scriptedResponses([ + sseEvents(toolCallChunk("call_1", "get_weather", '{"city":"Paris"}'), finishChunk("tool_calls")), + sseEvents(deltaChunk({ role: "assistant", content: "Should not run." }), finishChunk("stop")), + ]) + + const events = Array.from( + yield* TestToolRuntime.runTools({ + request: baseRequest, + tools: { get_weather }, + stopWhen: (state) => state.step >= 0, + }).pipe(Stream.runCollect, Effect.provide(layer)), + ) + + expect(events.filter(LLMEvent.is.requestFinish)).toHaveLength(1) + expect(events.find(LLMEvent.is.toolResult)).toMatchObject({ type: "tool-result", id: "call_1" }) + }), + ) + + it.effect("does not dispatch provider-executed tool calls", () => + Effect.gen(function* () { + let streams = 0 + const layer = dynamicResponse((input) => + Effect.sync(() => { + streams++ + return input.respond( + sseEvents( + { type: "message_start", message: { usage: { input_tokens: 5 } } }, + { + type: "content_block_start", + index: 0, + content_block: { type: "server_tool_use", id: "srvtoolu_abc", name: "web_search" }, + }, + { + type: "content_block_delta", + index: 0, + delta: { type: "input_json_delta", partial_json: '{"query":"x"}' }, + }, + { type: "content_block_stop", index: 0 }, + { + type: "content_block_start", + index: 1, + content_block: { + type: "web_search_tool_result", + tool_use_id: "srvtoolu_abc", + content: [{ type: "web_search_result", url: "https://example.com", title: "Example" }], + }, + }, + { type: "content_block_stop", index: 1 }, + { type: "content_block_start", index: 2, content_block: { type: "text", text: "" } }, + { type: "content_block_delta", index: 2, delta: { type: "text_delta", text: "Done." } }, + { type: "content_block_stop", index: 2 }, + { type: "message_delta", delta: { stop_reason: "end_turn" }, usage: { output_tokens: 8 } }, + ), + { headers: { "content-type": "text/event-stream" } }, + ) + }), + ) + const events = Array.from( + yield* TestToolRuntime.runTools({ + request: LLM.updateRequest(baseRequest, { + model: AnthropicMessages.model({ id: "claude-sonnet-4-5", apiKey: "test" }), + }), + tools: {}, + }).pipe(Stream.runCollect, Effect.provide(layer)), + ) + + expect(streams).toBe(1) + expect(events.find(LLMEvent.is.toolError)).toBeUndefined() + expect(events.filter(LLMEvent.is.toolCall)).toEqual([ + { + type: "tool-call", + id: "srvtoolu_abc", + name: "web_search", + input: { query: "x" }, + providerExecuted: true, + }, + ]) + expect(LLMResponse.text({ events })).toBe("Done.") + }), + ) + + it.effect("dispatches multiple tool calls in one step concurrently", () => + Effect.gen(function* () { + const layer = scriptedResponses([ + sseEvents( + deltaChunk({ + role: "assistant", + tool_calls: [ + { index: 0, id: "c1", function: { name: "get_weather", arguments: '{"city":"Paris"}' } }, + { index: 1, id: "c2", function: { name: "get_weather", arguments: '{"city":"Tokyo"}' } }, + ], + }), + finishChunk("tool_calls"), + ), + sseEvents(deltaChunk({ role: "assistant", content: "Both done." }), finishChunk("stop")), + ]) + + const events = Array.from( + yield* TestToolRuntime.runTools({ request: baseRequest, tools: { get_weather } }).pipe( + Stream.runCollect, + Effect.provide(layer), + ), + ) + + const results = events.filter(LLMEvent.is.toolResult) + expect(results).toHaveLength(2) + expect(results.map((event) => event.id).toSorted()).toEqual(["c1", "c2"]) + }), + ) +}) diff --git a/packages/llm/test/tool-stream.test.ts b/packages/llm/test/tool-stream.test.ts new file mode 100644 index 000000000000..04a0035c993f --- /dev/null +++ b/packages/llm/test/tool-stream.test.ts @@ -0,0 +1,88 @@ +import { describe, expect } from "bun:test" +import { Effect } from "effect" +import { LLMError } from "../src/schema" +import { ToolStream } from "../src/protocols/utils/tool-stream" +import { it } from "./lib/effect" + +const ADAPTER = "test-route" + +describe("ToolStream", () => { + it.effect("starts from OpenAI-style deltas and finalizes parsed input", () => + Effect.gen(function* () { + const first = ToolStream.appendOrStart( + ADAPTER, + ToolStream.empty(), + 0, + { id: "call_1", name: "lookup", text: '{"query"' }, + "missing tool", + ) + if (ToolStream.isError(first)) return yield* first + const second = ToolStream.appendOrStart(ADAPTER, first.tools, 0, { text: ':"weather"}' }, "missing tool") + if (ToolStream.isError(second)) return yield* second + const finished = yield* ToolStream.finish(ADAPTER, second.tools, 0) + + expect(first.event).toEqual({ type: "tool-input-delta", id: "call_1", name: "lookup", text: '{"query"' }) + expect(second.event).toEqual({ type: "tool-input-delta", id: "call_1", name: "lookup", text: ':"weather"}' }) + expect(finished).toEqual({ + tools: {}, + event: { type: "tool-call", id: "call_1", name: "lookup", input: { query: "weather" } }, + }) + }), + ) + + it.effect("fails appendExisting when the provider skipped the tool start", () => + Effect.gen(function* () { + const error = ToolStream.appendExisting(ADAPTER, ToolStream.empty(), 0, "{}", "missing tool") + + expect(error).toBeInstanceOf(LLMError) + if (ToolStream.isError(error)) expect(error.reason.message).toBe("missing tool") + }), + ) + + it.effect("uses final input override without losing accumulated deltas", () => + Effect.gen(function* () { + const tools = ToolStream.start(ToolStream.empty(), "item_1", { + id: "call_1", + name: "lookup", + input: '{"query":"partial"}', + }) + const finished = yield* ToolStream.finishWithInput(ADAPTER, tools, "item_1", '{"query":"final"}') + + expect(finished).toEqual({ + tools: {}, + event: { type: "tool-call", id: "call_1", name: "lookup", input: { query: "final" } }, + }) + }), + ) + + it.effect("preserves providerExecuted and clears all tools", () => + Effect.gen(function* () { + const first: ToolStream.State = ToolStream.start(ToolStream.empty(), 0, { + id: "call_1", + name: "lookup", + input: "{}", + }) + const tools = ToolStream.start(first, 1, { + id: "call_2", + name: "web_search", + input: '{"query":"docs"}', + providerExecuted: true, + }) + const finished = yield* ToolStream.finishAll(ADAPTER, tools) + + expect(finished).toEqual({ + tools: {}, + events: [ + { type: "tool-call", id: "call_1", name: "lookup", input: {} }, + { + type: "tool-call", + id: "call_2", + name: "web_search", + input: { query: "docs" }, + providerExecuted: true, + }, + ], + }) + }), + ) +}) diff --git a/packages/llm/test/tool.types.ts b/packages/llm/test/tool.types.ts new file mode 100644 index 000000000000..4ffc30c986cf --- /dev/null +++ b/packages/llm/test/tool.types.ts @@ -0,0 +1,29 @@ +import { Effect, Schema } from "effect" +import { LLM } from "../src" +import * as OpenAIChat from "../src/protocols/openai-chat" +import { tool } from "../src/tool" + +const request = LLM.request({ + model: OpenAIChat.model({ id: "gpt-4o-mini", apiKey: "fixture" }), + prompt: "Use the tool.", +}) + +const executable = tool({ + description: "Get weather.", + parameters: Schema.Struct({ city: Schema.String }), + success: Schema.Struct({ forecast: Schema.String }), + execute: (input) => Effect.succeed({ forecast: input.city }), +}) + +const schemaOnly = tool({ + description: "Get weather.", + parameters: Schema.Struct({ city: Schema.String }), + success: Schema.Struct({ forecast: Schema.String }), +}) + +LLM.stream({ request, tools: { executable } }) +LLM.generate({ request, tools: { executable }, stopWhen: LLM.stepCountIs(2) }) +LLM.stream({ request, tools: { schemaOnly }, toolExecution: "none" }) + +// @ts-expect-error Handler-less tools can only be passed with toolExecution: "none". +LLM.stream({ request, tools: { schemaOnly } }) diff --git a/packages/llm/tsconfig.json b/packages/llm/tsconfig.json new file mode 100644 index 000000000000..2bc480ffbb60 --- /dev/null +++ b/packages/llm/tsconfig.json @@ -0,0 +1,15 @@ +{ + "$schema": "https://json.schemastore.org/tsconfig", + "extends": "@tsconfig/bun/tsconfig.json", + "compilerOptions": { + "lib": ["ESNext", "DOM", "DOM.Iterable"], + "noUncheckedIndexedAccess": false, + "plugins": [ + { + "name": "@effect/language-service", + "transform": "@effect/language-service/transform", + "namespaceImportPackages": ["effect", "@effect/*"] + } + ] + } +} diff --git a/turbo.json b/turbo.json index 0183fabca411..6c65881b857d 100644 --- a/turbo.json +++ b/turbo.json @@ -13,6 +13,10 @@ "outputs": [], "passThroughEnv": ["*"] }, + "test:ci": { + "outputs": [".artifacts/unit/junit.xml"], + "passThroughEnv": ["*"] + }, "opencode#test:ci": { "dependsOn": ["^build"], "outputs": [".artifacts/unit/junit.xml"],