diff --git a/src/agent/llm.ts b/src/agent/llm.ts index 840346bd..b7cc474a 100644 --- a/src/agent/llm.ts +++ b/src/agent/llm.ts @@ -444,6 +444,9 @@ export class ModelClient { const isAnthropic = request.model.startsWith('anthropic/'); const isGLM = request.model.startsWith('zai/') || request.model.includes('glm'); + const isGeminiThinkingRequired = + request.model.startsWith('google/gemini-3.1') || + request.model.startsWith('google/gemini-2.5-pro'); // Build the request payload, injecting model-specific optimizations let requestPayload: Record = { ...request, stream: true }; @@ -482,6 +485,30 @@ export class ModelClient { } } + // Gemini Pro reasoning models reject a missing/zero thinking budget. Normalize + // the gateway default so fallback routing doesn't fail with "Budget 0 is invalid." + if (isGeminiThinkingRequired) { + // The gateway's streaming path currently drops Gemini's thinking budget; + // non-streaming preserves it. We convert the JSON response back into the + // same internal chunks below so callers keep one code path. + requestPayload['stream'] = false; + const maxOut = request.max_tokens ?? 16_384; + const budgetTokens = Math.min(maxOut, 8_192); + const thinking = requestPayload['thinking']; + if (thinking && typeof thinking === 'object' && !Array.isArray(thinking)) { + requestPayload['thinking'] = { + ...thinking, + type: 'enabled', + budget_tokens: budgetTokens, + }; + } else { + requestPayload['thinking'] = { + type: 'enabled', + budget_tokens: budgetTokens, + }; + } + } + if (isAnthropic) { // ─ Anthropic extended thinking ────────────────────────────────────── // Enable the `thinking` API block only for models that accept it. @@ -677,6 +704,11 @@ export class ModelClient { } } + if (requestPayload['stream'] === false) { + yield* this.parseNonStreamingMessage(response, request.model); + return; + } + // Parse SSE stream yield* this.parseSSEStream(response, requestController, streamTimeoutMs, request.model); } finally { @@ -684,6 +716,58 @@ export class ModelClient { } } + private async *parseNonStreamingMessage( + response: Response, + model: string, + ): AsyncGenerator { + const parsed = await response.json() as Record; + yield { kind: 'message_start', payload: { message: parsed } }; + + const content = Array.isArray(parsed['content']) ? parsed['content'] as Record[] : []; + for (let index = 0; index < content.length; index++) { + const block = content[index]; + yield { kind: 'content_block_start', payload: { index, content_block: block } }; + + if (block.type === 'text' && typeof block.text === 'string') { + yield { + kind: 'content_block_delta', + payload: { index, delta: { type: 'text_delta', text: block.text } }, + }; + } else if (block.type === 'thinking' && typeof block.thinking === 'string') { + yield { + kind: 'content_block_delta', + payload: { index, delta: { type: 'thinking_delta', thinking: block.thinking } }, + }; + if (typeof block.signature === 'string') { + yield { + kind: 'content_block_delta', + payload: { index, delta: { type: 'signature_delta', signature: block.signature } }, + }; + } + } else if (block.type === 'tool_use') { + yield { + kind: 'content_block_delta', + payload: { index, delta: { type: 'input_json_delta', partial_json: JSON.stringify(block.input ?? {}) } }, + }; + } + + yield { kind: 'content_block_stop', payload: { index } }; + } + + yield { + kind: 'message_delta', + payload: { + delta: { stop_reason: parsed['stop_reason'] ?? 'end_turn' }, + usage: parsed['usage'] ?? {}, + }, + }; + yield { kind: 'message_stop', payload: {} }; + + if (this.debug) { + console.error(`[franklin] Parsed non-streaming response for ${model}`); + } + } + /** * Non-streaming completion for simple requests. */