diff --git a/src/core/mentions/__tests__/folder-limits.spec.ts b/src/core/mentions/__tests__/folder-limits.spec.ts new file mode 100644 index 00000000000..a3ae8341e22 --- /dev/null +++ b/src/core/mentions/__tests__/folder-limits.spec.ts @@ -0,0 +1,251 @@ +// npx vitest src/core/mentions/__tests__/folder-limits.spec.ts + +import * as path from "path" +import { MAX_FOLDER_FILES_TO_READ, MAX_FOLDER_CONTENT_SIZE } from "../index" + +// Mock vscode +vi.mock("vscode", () => ({ + window: { + showErrorMessage: vi.fn(), + }, + languages: { + getDiagnostics: vi.fn().mockReturnValue([]), + }, +})) + +// Mock i18n +vi.mock("../../../i18n", () => ({ + t: vi.fn((key: string) => key), +})) + +// Mock isbinaryfile +vi.mock("isbinaryfile", () => ({ + isBinaryFile: vi.fn().mockResolvedValue(false), +})) + +// Mock fs/promises +const mockReaddir = vi.fn() +const mockStat = vi.fn() +const mockReadFile = vi.fn() + +vi.mock("fs/promises", () => ({ + default: { + readdir: (...args: any[]) => mockReaddir(...args), + stat: (...args: any[]) => mockStat(...args), + readFile: (...args: any[]) => mockReadFile(...args), + access: vi.fn().mockResolvedValue(undefined), + }, + readdir: (...args: any[]) => mockReaddir(...args), + stat: (...args: any[]) => mockStat(...args), + readFile: (...args: any[]) => mockReadFile(...args), + access: vi.fn().mockResolvedValue(undefined), +})) + +// Mock extract-text module +vi.mock("../../../integrations/misc/extract-text", () => ({ + extractTextFromFileWithMetadata: vi.fn().mockImplementation(async (filePath: string) => { + // Return a predictable result for testing + return { + content: `1 | // Content of ${path.basename(filePath)}\n2 | const x = 1;`, + totalLines: 2, + returnedLines: 2, + wasTruncated: false, + } + }), +})) + +describe("Folder Mention Content Limits", () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + describe("Constants", () => { + it("should export MAX_FOLDER_FILES_TO_READ constant", () => { + expect(MAX_FOLDER_FILES_TO_READ).toBeDefined() + expect(typeof MAX_FOLDER_FILES_TO_READ).toBe("number") + expect(MAX_FOLDER_FILES_TO_READ).toBeGreaterThan(0) + }) + + it("should export MAX_FOLDER_CONTENT_SIZE constant", () => { + expect(MAX_FOLDER_CONTENT_SIZE).toBeDefined() + expect(typeof MAX_FOLDER_CONTENT_SIZE).toBe("number") + expect(MAX_FOLDER_CONTENT_SIZE).toBeGreaterThan(0) + }) + + it("should have reasonable default values", () => { + // MAX_FOLDER_FILES_TO_READ should be reasonable (e.g., 10) + expect(MAX_FOLDER_FILES_TO_READ).toBeLessThanOrEqual(50) + expect(MAX_FOLDER_FILES_TO_READ).toBeGreaterThanOrEqual(5) + + // MAX_FOLDER_CONTENT_SIZE should be reasonable (e.g., 100KB) + expect(MAX_FOLDER_CONTENT_SIZE).toBeGreaterThanOrEqual(50_000) + expect(MAX_FOLDER_CONTENT_SIZE).toBeLessThanOrEqual(500_000) + }) + }) + + describe("parseMentions with folder limits", () => { + // These tests import parseMentions dynamically to ensure mocks are applied + it("should limit the number of files read from a folder", async () => { + // Create many file entries to exceed the limit + const numFiles = MAX_FOLDER_FILES_TO_READ + 5 + const entries = Array.from({ length: numFiles }, (_, i) => ({ + name: `file${i}.ts`, + isFile: () => true, + isDirectory: () => false, + })) + + mockStat.mockResolvedValue({ + isFile: () => false, + isDirectory: () => true, + }) + mockReaddir.mockResolvedValue(entries) + + // Import the module after mocks are set up + const { parseMentions } = await import("../index") + const mockUrlContentFetcher = { + launchBrowser: vi.fn(), + urlToMarkdown: vi.fn(), + closeBrowser: vi.fn(), + } as any + + const result = await parseMentions("Check @/test-folder/", "/workspace", mockUrlContentFetcher) + + // Should have content blocks with folder content + expect(result.contentBlocks.length).toBeGreaterThan(0) + const folderBlock = result.contentBlocks.find((b) => b.type === "folder") + expect(folderBlock).toBeDefined() + + // Should contain truncation notice + expect(folderBlock?.content).toContain("Content Truncated") + expect(folderBlock?.content).toContain(`Only ${MAX_FOLDER_FILES_TO_READ} files were read`) + }) + + it("should limit total content size", async () => { + // Create a few files that together exceed the content size limit + const numFiles = 3 + const entries = Array.from({ length: numFiles }, (_, i) => ({ + name: `file${i}.ts`, + isFile: () => true, + isDirectory: () => false, + })) + + mockStat.mockResolvedValue({ + isFile: () => false, + isDirectory: () => true, + }) + mockReaddir.mockResolvedValue(entries) + + // Mock extractTextFromFileWithMetadata to return large content + const { extractTextFromFileWithMetadata } = await import("../../../integrations/misc/extract-text") + vi.mocked(extractTextFromFileWithMetadata).mockImplementation(async () => { + // Return content that's about half of MAX_FOLDER_CONTENT_SIZE + const largeContent = "x".repeat(Math.ceil(MAX_FOLDER_CONTENT_SIZE / 2)) + return { + content: largeContent, + totalLines: 1000, + returnedLines: 1000, + wasTruncated: false, + } + }) + + // Import the module after mocks are set up + const { parseMentions } = await import("../index") + const mockUrlContentFetcher = { + launchBrowser: vi.fn(), + urlToMarkdown: vi.fn(), + closeBrowser: vi.fn(), + } as any + + const result = await parseMentions("Check @/test-folder/", "/workspace", mockUrlContentFetcher) + + // Should have content blocks with folder content + expect(result.contentBlocks.length).toBeGreaterThan(0) + const folderBlock = result.contentBlocks.find((b) => b.type === "folder") + expect(folderBlock).toBeDefined() + + // Should contain truncation notice due to size + expect(folderBlock?.content).toContain("Content Truncated") + expect(folderBlock?.content).toContain("KB to prevent context window overflow") + }) + + it("should not add truncation notice when within limits", async () => { + // Create a few small files within limits + const numFiles = 3 + const entries = Array.from({ length: numFiles }, (_, i) => ({ + name: `file${i}.ts`, + isFile: () => true, + isDirectory: () => false, + })) + + mockStat.mockResolvedValue({ + isFile: () => false, + isDirectory: () => true, + }) + mockReaddir.mockResolvedValue(entries) + + // Mock extractTextFromFileWithMetadata to return small content + const { extractTextFromFileWithMetadata } = await import("../../../integrations/misc/extract-text") + vi.mocked(extractTextFromFileWithMetadata).mockImplementation(async (filePath: string) => { + return { + content: `1 | // Small file ${path.basename(filePath)}`, + totalLines: 1, + returnedLines: 1, + wasTruncated: false, + } + }) + + // Import the module after mocks are set up + const { parseMentions } = await import("../index") + const mockUrlContentFetcher = { + launchBrowser: vi.fn(), + urlToMarkdown: vi.fn(), + closeBrowser: vi.fn(), + } as any + + const result = await parseMentions("Check @/small-folder/", "/workspace", mockUrlContentFetcher) + + // Should have content blocks with folder content + expect(result.contentBlocks.length).toBeGreaterThan(0) + const folderBlock = result.contentBlocks.find((b) => b.type === "folder") + expect(folderBlock).toBeDefined() + + // Should NOT contain truncation notice + expect(folderBlock?.content).not.toContain("Content Truncated") + }) + + it("should still show folder listing even when files are skipped", async () => { + // Create many file entries + const numFiles = MAX_FOLDER_FILES_TO_READ + 10 + const entries = Array.from({ length: numFiles }, (_, i) => ({ + name: `file${i}.ts`, + isFile: () => true, + isDirectory: () => false, + })) + + mockStat.mockResolvedValue({ + isFile: () => false, + isDirectory: () => true, + }) + mockReaddir.mockResolvedValue(entries) + + // Import the module after mocks are set up + const { parseMentions } = await import("../index") + const mockUrlContentFetcher = { + launchBrowser: vi.fn(), + urlToMarkdown: vi.fn(), + closeBrowser: vi.fn(), + } as any + + const result = await parseMentions("Check @/large-folder/", "/workspace", mockUrlContentFetcher) + + const folderBlock = result.contentBlocks.find((b) => b.type === "folder") + expect(folderBlock).toBeDefined() + + // Should contain the folder listing with all files listed + // (the listing shows all files, only content reading is limited) + expect(folderBlock?.content).toContain("Folder listing:") + expect(folderBlock?.content).toContain("file0.ts") + expect(folderBlock?.content).toContain(`file${numFiles - 1}.ts`) + }) + }) +}) diff --git a/src/core/mentions/index.ts b/src/core/mentions/index.ts index faa7236e67c..4104e8b7428 100644 --- a/src/core/mentions/index.ts +++ b/src/core/mentions/index.ts @@ -13,6 +13,18 @@ import { extractTextFromFileWithMetadata, type ExtractTextResult } from "../../i import { diagnosticsToProblemsString } from "../../integrations/diagnostics" import { DEFAULT_LINE_LIMIT } from "../prompts/tools/native-tools/read_file" +/** + * Maximum number of files to read from a folder mention. + * This prevents context window explosion when mentioning large directories. + */ +export const MAX_FOLDER_FILES_TO_READ = 10 + +/** + * Maximum total content size (in characters) to read from a folder mention. + * This is approximately 100KB which should be safe for most context windows. + */ +export const MAX_FOLDER_CONTENT_SIZE = 100_000 + import { UrlContentFetcher } from "../../services/browser/UrlContentFetcher" import { FileContextTracker } from "../context-tracking/FileContextTracker" @@ -390,6 +402,12 @@ async function getFileOrFolderContentWithMetadata( const fileReadResults: string[] = [] const LOCK_SYMBOL = "🔒" + // Track limits to prevent context window explosion + let filesRead = 0 + let totalContentSize = 0 + let limitReached: "files" | "size" | null = null + let skippedFilesCount = 0 + for (let index = 0; index < entries.length; index++) { const entry = entries[index] const isLast = index === entries.length - 1 @@ -410,13 +428,44 @@ async function getFileOrFolderContentWithMetadata( if (entry.isFile()) { folderListing += `${linePrefix}${displayName}\n` if (!isIgnored) { + // Check if we've hit the file limit + if (filesRead >= MAX_FOLDER_FILES_TO_READ) { + if (!limitReached) { + limitReached = "files" + } + skippedFilesCount++ + continue + } + + // Check if we've hit the content size limit + if (totalContentSize >= MAX_FOLDER_CONTENT_SIZE) { + if (!limitReached) { + limitReached = "size" + } + skippedFilesCount++ + continue + } + const filePath = path.join(mentionPath, entry.name) const absoluteFilePath = path.resolve(absPath, entry.name) try { const isBinary = await isBinaryFile(absoluteFilePath).catch(() => false) if (!isBinary) { const result = await extractTextFromFileWithMetadata(absoluteFilePath) - fileReadResults.push(formatFileReadResult(filePath.toPosix(), result)) + const fileContent = formatFileReadResult(filePath.toPosix(), result) + + // Check if adding this file would exceed the size limit + if (totalContentSize + fileContent.length > MAX_FOLDER_CONTENT_SIZE) { + if (!limitReached) { + limitReached = "size" + } + skippedFilesCount++ + continue + } + + fileReadResults.push(fileContent) + filesRead++ + totalContentSize += fileContent.length } } catch (error) { // Skip files that can't be read @@ -435,6 +484,15 @@ async function getFileOrFolderContentWithMetadata( content += `\n\n--- File Contents ---\n\n${fileReadResults.join("\n\n")}` } + // Add truncation notice if limits were hit + if (limitReached) { + const limitMessage = + limitReached === "files" + ? `\n\n--- Content Truncated ---\nNote: Only ${MAX_FOLDER_FILES_TO_READ} files were read to prevent context window overflow. ${skippedFilesCount} additional file(s) were skipped.\nTo read specific files, use individual @file mentions instead of @folder.` + : `\n\n--- Content Truncated ---\nNote: Content was limited to approximately ${Math.round(MAX_FOLDER_CONTENT_SIZE / 1000)}KB to prevent context window overflow. ${skippedFilesCount} additional file(s) were skipped.\nTo read specific files, use individual @file mentions instead of @folder.` + content += limitMessage + } + return { type: "folder", path: mentionPath,