RooCodeInc · roomote · Jan 31, 2026 · roomote · Jan 31, 2026
@@ -367,6 +367,7 @@ const litellmSchema = baseProviderSettingsSchema.extend({
 	litellmApiKey: z.string().optional(),
 	litellmModelId: z.string().optional(),
 	litellmUsePromptCache: z.boolean().optional(),
+	litellmFlattenContent: z.boolean().optional(),
 })
 
 const cerebrasSchema = apiModelIdProviderModelSchema.extend({

@@ -920,4 +920,206 @@ describe("LiteLLMHandler", () => {
 			expect(id1).not.toBe(id2)
 		})
 	})
+
+	describe("content flattening for auto_router compatibility", () => {
+		it("should flatten array content to string by default (when litellmFlattenContent is undefined)", async () => {
+			const systemPrompt = "You are a helpful assistant"
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: [
+						{ type: "text", text: "Hello" },
+						{ type: "text", text: "World" },
+					],
+				},
+			]
+
+			const mockStream = {
+				async *[Symbol.asyncIterator]() {
+					yield {
+						choices: [{ delta: { content: "Hi!" } }],
+						usage: { prompt_tokens: 10, completion_tokens: 5 },
+					}
+				},
+			}
+
+			mockCreate.mockReturnValue({
+				withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
+			})
+
+			const generator = handler.createMessage(systemPrompt, messages)
+			for await (const _chunk of generator) {
+				// Consume
+			}
+
+			const createCall = mockCreate.mock.calls[0][0]
+			const userMessage = createCall.messages.find((msg: any) => msg.role === "user")
+
+			// Content should be flattened to a string
+			expect(typeof userMessage.content).toBe("string")
+			expect(userMessage.content).toBe("Hello\n\nWorld")
+		})
+
+		it("should flatten array content to string when litellmFlattenContent is true", async () => {
+			const optionsWithFlatten: ApiHandlerOptions = {
+				...mockOptions,
+				litellmFlattenContent: true,
+			}
+			handler = new LiteLLMHandler(optionsWithFlatten)
+
+			const systemPrompt = "You are a helpful assistant"
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: [{ type: "text", text: "Single block" }],
+				},
+			]
+
+			const mockStream = {
+				async *[Symbol.asyncIterator]() {
+					yield {
+						choices: [{ delta: { content: "Response" } }],
+						usage: { prompt_tokens: 10, completion_tokens: 5 },
+					}
+				},
+			}
+
+			mockCreate.mockReturnValue({
+				withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
+			})
+
+			const generator = handler.createMessage(systemPrompt, messages)
+			for await (const _chunk of generator) {
+				// Consume
+			}
+
+			const createCall = mockCreate.mock.calls[0][0]
+			const userMessage = createCall.messages.find((msg: any) => msg.role === "user")
+
+			expect(typeof userMessage.content).toBe("string")
+			expect(userMessage.content).toBe("Single block")
+		})
+
+		it("should NOT flatten array content when litellmFlattenContent is false", async () => {
+			const optionsWithoutFlatten: ApiHandlerOptions = {
+				...mockOptions,
+				litellmFlattenContent: false,
+			}
+			handler = new LiteLLMHandler(optionsWithoutFlatten)
+
+			const systemPrompt = "You are a helpful assistant"
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: [
+						{ type: "text", text: "Hello" },
+						{ type: "text", text: "World" },
+					],
+				},
+			]
+
+			const mockStream = {
+				async *[Symbol.asyncIterator]() {
+					yield {
+						choices: [{ delta: { content: "Hi!" } }],
+						usage: { prompt_tokens: 10, completion_tokens: 5 },
+					}
+				},
+			}
+
+			mockCreate.mockReturnValue({
+				withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
+			})
+
+			const generator = handler.createMessage(systemPrompt, messages)
+			for await (const _chunk of generator) {
+				// Consume
+			}
+
+			const createCall = mockCreate.mock.calls[0][0]
+			const userMessage = createCall.messages.find((msg: any) => msg.role === "user")
+
+			// Content should remain as array
+			expect(Array.isArray(userMessage.content)).toBe(true)
+			expect(userMessage.content).toHaveLength(2)
+		})
+
+		it("should preserve string content unchanged regardless of flatten setting", async () => {
+			const systemPrompt = "You are a helpful assistant"
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: "Simple string message",
+				},
+			]
+
+			const mockStream = {
+				async *[Symbol.asyncIterator]() {
+					yield {
+						choices: [{ delta: { content: "Response" } }],
+						usage: { prompt_tokens: 10, completion_tokens: 5 },
+					}
+				},
+			}
+
+			mockCreate.mockReturnValue({
+				withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
+			})
+
+			const generator = handler.createMessage(systemPrompt, messages)
+			for await (const _chunk of generator) {
+				// Consume
+			}
+
+			const createCall = mockCreate.mock.calls[0][0]
+			const userMessage = createCall.messages.find((msg: any) => msg.role === "user")
+
+			expect(typeof userMessage.content).toBe("string")
+			expect(userMessage.content).toBe("Simple string message")
+		})
+
+		it("should NOT flatten array content if it contains non-text blocks (images)", async () => {
+			const systemPrompt = "You are a helpful assistant"
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: [
+						{ type: "text", text: "Look at this image:" },
+						{
+							type: "image",
+							source: {
+								type: "base64",
+								media_type: "image/png",
+								data: "iVBORw0KGgo=",
+							},
+						},
+					],
+				},
+			]
+
+			const mockStream = {
+				async *[Symbol.asyncIterator]() {
+					yield {
+						choices: [{ delta: { content: "I see..." } }],
+						usage: { prompt_tokens: 10, completion_tokens: 5 },
+					}
+				},
+			}
+
+			mockCreate.mockReturnValue({
+				withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
+			})
+
+			const generator = handler.createMessage(systemPrompt, messages)
+			for await (const _chunk of generator) {
+				// Consume
+			}
+
+			const createCall = mockCreate.mock.calls[0][0]
+			const userMessage = createCall.messages.find((msg: any) => msg.role === "user")
+
+			// Content should remain as array since it contains image
+			expect(Array.isArray(userMessage.content)).toBe(true)
+		})
+	})
 })
@@ -109,17 +109,62 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
 		})
 	}
 
+	/**
+	 * Flatten array content in messages to simple string format.
+	 *
+	 * LiteLLM's auto_router feature passes user content to embedding models for semantic routing.
+	 * However, embedding models expect plain strings, not arrays of content blocks.
+	 * When Roo Code sends messages with array content like:
+	 *   {"role": "user", "content": [{"type": "text", "text": "..."}]}
+	 *
+	 * The auto_router fails because it passes this array directly to embeddings.
+	 * This method flattens such content to simple strings:
+	 *   {"role": "user", "content": "..."}
+	 *
+	 * This is enabled by default via litellmFlattenContent option.
+	 * Users who need multimodal content (images) can disable this.
+	 */
+	private flattenMessageContent(
+		messages: OpenAI.Chat.ChatCompletionMessageParam[],
+	): OpenAI.Chat.ChatCompletionMessageParam[] {
+		return messages.map((msg) => {
+			// Only flatten user and system messages with array content
+			if ((msg.role === "user" || msg.role === "system") && Array.isArray(msg.content)) {
+				// Check if all content blocks are text type
+				const allText = msg.content.every(
+					(part) => typeof part === "object" && "type" in part && part.type === "text",
+				)
+
+				// Only flatten if all content is text (no images)
+				if (allText) {
+					const textParts = msg.content.map((part) => (part as { type: "text"; text: string }).text)
+					return {
+						...msg,
+						content: textParts.join("\n\n"),
+					}
+				}
+			}
+			return msg
+		})
+	}
+
 	override async *createMessage(
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
 		const { id: modelId, info } = await this.fetchModel()
 
-		const openAiMessages = convertToOpenAiMessages(messages, {
+		let openAiMessages = convertToOpenAiMessages(messages, {
 			normalizeToolCallId: sanitizeOpenAiCallId,
 		})
 
+		// Flatten array content to string for compatibility with LiteLLM's auto_router
+		// This is enabled by default (when litellmFlattenContent is undefined or true)
+		if (this.options.litellmFlattenContent !== false) {
+			openAiMessages = this.flattenMessageContent(openAiMessages)
+		}
+
 		// Prepare messages with cache control if enabled and supported
 		let systemMessage: OpenAI.Chat.ChatCompletionMessageParam
 		let enhancedMessages: OpenAI.Chat.ChatCompletionMessageParam[]

@@ -182,6 +182,20 @@ export const LiteLLM = ({
 				}
 				return null
 			})()}
+
+			{/* Flatten content option for auto_router compatibility */}
+			<div className="mt-4">
+				<VSCodeCheckbox
+					checked={apiConfiguration.litellmFlattenContent !== false}
+					onChange={(e: any) => {
+						setApiConfigurationField("litellmFlattenContent", e.target.checked)
+					}}>
+					<span className="font-medium">{t("settings:providers.litellmFlattenContent")}</span>
+				</VSCodeCheckbox>
+				<div className="text-sm text-vscode-descriptionForeground ml-6 mt-1">
+					{t("settings:providers.litellmFlattenContentDescription")}
+				</div>
+			</div>
 		</>
 	)
 }
@@ -456,6 +456,8 @@
 		"getXaiApiKey": "Get xAI API Key",
 		"litellmApiKey": "LiteLLM API Key",
 		"litellmBaseUrl": "LiteLLM Base URL",
+		"litellmFlattenContent": "Flatten message content",
+		"litellmFlattenContentDescription": "Convert array-formatted message content to plain strings. Enable this for compatibility with LiteLLM's auto_router and embedding-based routing features. Disable only if you need multimodal content (images).",
 		"awsCredentials": "AWS Credentials",
 		"awsProfile": "AWS Profile",
 		"awsApiKey": "Amazon Bedrock API Key",