Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/types/src/provider-settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,7 @@ const litellmSchema = baseProviderSettingsSchema.extend({
litellmApiKey: z.string().optional(),
litellmModelId: z.string().optional(),
litellmUsePromptCache: z.boolean().optional(),
litellmFlattenContent: z.boolean().optional(),
})

const cerebrasSchema = apiModelIdProviderModelSchema.extend({
Expand Down
202 changes: 202 additions & 0 deletions src/api/providers/__tests__/lite-llm.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -920,4 +920,206 @@ describe("LiteLLMHandler", () => {
expect(id1).not.toBe(id2)
})
})

describe("content flattening for auto_router compatibility", () => {
it("should flatten array content to string by default (when litellmFlattenContent is undefined)", async () => {
const systemPrompt = "You are a helpful assistant"
const messages: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: [
{ type: "text", text: "Hello" },
{ type: "text", text: "World" },
],
},
]

const mockStream = {
async *[Symbol.asyncIterator]() {
yield {
choices: [{ delta: { content: "Hi!" } }],
usage: { prompt_tokens: 10, completion_tokens: 5 },
}
},
}

mockCreate.mockReturnValue({
withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
})

const generator = handler.createMessage(systemPrompt, messages)
for await (const _chunk of generator) {
// Consume
}

const createCall = mockCreate.mock.calls[0][0]
const userMessage = createCall.messages.find((msg: any) => msg.role === "user")

// Content should be flattened to a string
expect(typeof userMessage.content).toBe("string")
expect(userMessage.content).toBe("Hello\n\nWorld")
})

it("should flatten array content to string when litellmFlattenContent is true", async () => {
const optionsWithFlatten: ApiHandlerOptions = {
...mockOptions,
litellmFlattenContent: true,
}
handler = new LiteLLMHandler(optionsWithFlatten)

const systemPrompt = "You are a helpful assistant"
const messages: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: [{ type: "text", text: "Single block" }],
},
]

const mockStream = {
async *[Symbol.asyncIterator]() {
yield {
choices: [{ delta: { content: "Response" } }],
usage: { prompt_tokens: 10, completion_tokens: 5 },
}
},
}

mockCreate.mockReturnValue({
withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
})

const generator = handler.createMessage(systemPrompt, messages)
for await (const _chunk of generator) {
// Consume
}

const createCall = mockCreate.mock.calls[0][0]
const userMessage = createCall.messages.find((msg: any) => msg.role === "user")

expect(typeof userMessage.content).toBe("string")
expect(userMessage.content).toBe("Single block")
})

it("should NOT flatten array content when litellmFlattenContent is false", async () => {
const optionsWithoutFlatten: ApiHandlerOptions = {
...mockOptions,
litellmFlattenContent: false,
}
handler = new LiteLLMHandler(optionsWithoutFlatten)

const systemPrompt = "You are a helpful assistant"
const messages: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: [
{ type: "text", text: "Hello" },
{ type: "text", text: "World" },
],
},
]

const mockStream = {
async *[Symbol.asyncIterator]() {
yield {
choices: [{ delta: { content: "Hi!" } }],
usage: { prompt_tokens: 10, completion_tokens: 5 },
}
},
}

mockCreate.mockReturnValue({
withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
})

const generator = handler.createMessage(systemPrompt, messages)
for await (const _chunk of generator) {
// Consume
}

const createCall = mockCreate.mock.calls[0][0]
const userMessage = createCall.messages.find((msg: any) => msg.role === "user")

// Content should remain as array
expect(Array.isArray(userMessage.content)).toBe(true)
expect(userMessage.content).toHaveLength(2)
})

it("should preserve string content unchanged regardless of flatten setting", async () => {
const systemPrompt = "You are a helpful assistant"
const messages: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: "Simple string message",
},
]

const mockStream = {
async *[Symbol.asyncIterator]() {
yield {
choices: [{ delta: { content: "Response" } }],
usage: { prompt_tokens: 10, completion_tokens: 5 },
}
},
}

mockCreate.mockReturnValue({
withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
})

const generator = handler.createMessage(systemPrompt, messages)
for await (const _chunk of generator) {
// Consume
}

const createCall = mockCreate.mock.calls[0][0]
const userMessage = createCall.messages.find((msg: any) => msg.role === "user")

expect(typeof userMessage.content).toBe("string")
expect(userMessage.content).toBe("Simple string message")
})

it("should NOT flatten array content if it contains non-text blocks (images)", async () => {
const systemPrompt = "You are a helpful assistant"
const messages: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: [
{ type: "text", text: "Look at this image:" },
{
type: "image",
source: {
type: "base64",
media_type: "image/png",
data: "iVBORw0KGgo=",
},
},
],
},
]

const mockStream = {
async *[Symbol.asyncIterator]() {
yield {
choices: [{ delta: { content: "I see..." } }],
usage: { prompt_tokens: 10, completion_tokens: 5 },
}
},
}

mockCreate.mockReturnValue({
withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
})

const generator = handler.createMessage(systemPrompt, messages)
for await (const _chunk of generator) {
// Consume
}

const createCall = mockCreate.mock.calls[0][0]
const userMessage = createCall.messages.find((msg: any) => msg.role === "user")

// Content should remain as array since it contains image
expect(Array.isArray(userMessage.content)).toBe(true)
})
})
})
47 changes: 46 additions & 1 deletion src/api/providers/lite-llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -109,17 +109,62 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
})
}

/**
* Flatten array content in messages to simple string format.
*
* LiteLLM's auto_router feature passes user content to embedding models for semantic routing.
* However, embedding models expect plain strings, not arrays of content blocks.
* When Roo Code sends messages with array content like:
* {"role": "user", "content": [{"type": "text", "text": "..."}]}
*
* The auto_router fails because it passes this array directly to embeddings.
* This method flattens such content to simple strings:
* {"role": "user", "content": "..."}
*
* This is enabled by default via litellmFlattenContent option.
* Users who need multimodal content (images) can disable this.
*/
private flattenMessageContent(
messages: OpenAI.Chat.ChatCompletionMessageParam[],
): OpenAI.Chat.ChatCompletionMessageParam[] {
return messages.map((msg) => {
// Only flatten user and system messages with array content
if ((msg.role === "user" || msg.role === "system") && Array.isArray(msg.content)) {
// Check if all content blocks are text type
const allText = msg.content.every(
(part) => typeof part === "object" && "type" in part && part.type === "text",
)

// Only flatten if all content is text (no images)
if (allText) {
const textParts = msg.content.map((part) => (part as { type: "text"; text: string }).text)
return {
...msg,
content: textParts.join("\n\n"),
}
}
}
return msg
})
}

override async *createMessage(
systemPrompt: string,
messages: Anthropic.Messages.MessageParam[],
metadata?: ApiHandlerCreateMessageMetadata,
): ApiStream {
const { id: modelId, info } = await this.fetchModel()

const openAiMessages = convertToOpenAiMessages(messages, {
let openAiMessages = convertToOpenAiMessages(messages, {
normalizeToolCallId: sanitizeOpenAiCallId,
})

// Flatten array content to string for compatibility with LiteLLM's auto_router
// This is enabled by default (when litellmFlattenContent is undefined or true)
if (this.options.litellmFlattenContent !== false) {
openAiMessages = this.flattenMessageContent(openAiMessages)
}
Comment on lines +162 to +166
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When both litellmFlattenContent (enabled by default) and litellmUsePromptCache are enabled with a model that supports prompt caching, the flattening is undone. The flattening converts array content to strings here, but lines 194-207 below wrap string content back into arrays to add cache_control. This means auto_router users who also enable prompt caching will still encounter the embedding model failure this PR intends to fix. Consider either moving the flattening after the prompt caching logic, or having the prompt caching logic skip re-wrapping when flattening is enabled.

Fix it with Roo Code or mention @roomote and request a fix.


// Prepare messages with cache control if enabled and supported
let systemMessage: OpenAI.Chat.ChatCompletionMessageParam
let enhancedMessages: OpenAI.Chat.ChatCompletionMessageParam[]
Expand Down
14 changes: 14 additions & 0 deletions webview-ui/src/components/settings/providers/LiteLLM.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,20 @@ export const LiteLLM = ({
}
return null
})()}

{/* Flatten content option for auto_router compatibility */}
<div className="mt-4">
<VSCodeCheckbox
checked={apiConfiguration.litellmFlattenContent !== false}
onChange={(e: any) => {
setApiConfigurationField("litellmFlattenContent", e.target.checked)
}}>
<span className="font-medium">{t("settings:providers.litellmFlattenContent")}</span>
</VSCodeCheckbox>
<div className="text-sm text-vscode-descriptionForeground ml-6 mt-1">
{t("settings:providers.litellmFlattenContentDescription")}
</div>
</div>
</>
)
}
2 changes: 2 additions & 0 deletions webview-ui/src/i18n/locales/ca/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions webview-ui/src/i18n/locales/de/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions webview-ui/src/i18n/locales/en/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,8 @@
"getXaiApiKey": "Get xAI API Key",
"litellmApiKey": "LiteLLM API Key",
"litellmBaseUrl": "LiteLLM Base URL",
"litellmFlattenContent": "Flatten message content",
"litellmFlattenContentDescription": "Convert array-formatted message content to plain strings. Enable this for compatibility with LiteLLM's auto_router and embedding-based routing features. Disable only if you need multimodal content (images).",
"awsCredentials": "AWS Credentials",
"awsProfile": "AWS Profile",
"awsApiKey": "Amazon Bedrock API Key",
Expand Down
2 changes: 2 additions & 0 deletions webview-ui/src/i18n/locales/es/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions webview-ui/src/i18n/locales/fr/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions webview-ui/src/i18n/locales/hi/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions webview-ui/src/i18n/locales/id/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions webview-ui/src/i18n/locales/it/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions webview-ui/src/i18n/locales/ja/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions webview-ui/src/i18n/locales/ko/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions webview-ui/src/i18n/locales/nl/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading