-
Notifications
You must be signed in to change notification settings - Fork 238
fix: use RE2-compatible escaper for repo/file filter queries #1004
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a9252bd
890f6c4
c1bea08
ac166d6
6b97af2
79c3147
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -10,7 +10,9 @@ import { useBrowseParams } from "./hooks/useBrowseParams"; | |||||||||||||
| import { FileSearchCommandDialog } from "./components/fileSearchCommandDialog"; | ||||||||||||||
| import { useDomain } from "@/hooks/useDomain"; | ||||||||||||||
| import { SearchBar } from "../components/searchBar"; | ||||||||||||||
| import escapeStringRegexp from "escape-string-regexp"; | ||||||||||||||
| // Escapes special RE2 regex characters using backslash (compatible with Zoekt/Go RE2). | ||||||||||||||
| // escape-string-regexp v5 uses \xNN hex escapes which RE2 does not support. | ||||||||||||||
| const escapeRE2 = (s: string) => s.replace(/[.+*?^${}[\]|(\\]/g, '\\$&'); | ||||||||||||||
|
Comment on lines
+13
to
+15
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
set -euo pipefail
file="packages/web/src/app/[domain]/browse/layoutClient.tsx"
rg -n "const escapeRE2" "$file"
python - <<'PY'
from pathlib import Path
import sys
file = Path("packages/web/src/app/[domain]/browse/layoutClient.tsx")
line = next((l for l in file.read_text().splitlines() if "const escapeRE2" in l), None)
if not line:
print("escapeRE2 declaration not found")
sys.exit(1)
# Pull content inside /[ ... ]/g for quick static validation
try:
char_class = line.split("/[", 1)[1].split("]/g", 1)[0]
except Exception:
print("Could not parse regex char class from:", line)
sys.exit(1)
print("escapeRE2 char class:", char_class)
required = [".", "+", "*", "?", "^", "$", "{", "}", "[", "]", "|", "(", ")", "\\\\"]
missing = [c for c in required if c not in char_class]
print("missing:", missing)
sys.exit(1 if missing else 0)
PYRepository: sourcebot-dev/sourcebot Length of output: 196 🏁 Script executed: # Check the file structure and verify the patch suggestion
cat -n packages/web/src/app/[domain]/browse/layoutClient.tsx | head -50
# Also check if escapeRE2 is used anywhere else in the codebase
rg "escapeRE2" --type ts --type tsxRepository: sourcebot-dev/sourcebot Length of output: 2431 🏁 Script executed: # Search for escapeRE2 usage with a simpler approach
rg "escapeRE2"
# Verify the suggested patch - check if all RE2 metacharacters are covered
python - <<'PY'
import re
# Current regex pattern from line 15
current = r"[.+*?^${}[\]|(\\]"
# Suggested patch
suggested = r"[\\.^$|?*+()[\]{}]"
# All RE2 regex metacharacters that need escaping
re2_metacharacters = {'.', '+', '*', '?', '^', '$', '{', '}', '[', ']', '|', '(', ')', '\\'}
print("=== Current Pattern Analysis ===")
print(f"Pattern: {current}")
current_chars = set()
# Parse the character class manually since it's complex
for c in '.+*?^${}[|(\\':
if c in current:
current_chars.add(c)
current_chars.add(']') # ] is in the class as \]
print(f"Escapes: {sorted(current_chars)}")
current_missing = re2_metacharacters - current_chars
print(f"Missing: {current_missing}")
print("\n=== Suggested Pattern Analysis ===")
print(f"Pattern: {suggested}")
suggested_chars = set()
for c in '.^$|?*+()[]{\\':
if c in suggested:
suggested_chars.add(c)
print(f"Escapes: {sorted(suggested_chars)}")
suggested_missing = re2_metacharacters - suggested_chars
print(f"Missing: {suggested_missing}")
print(f"\n=== Verdict ===")
print(f"Current pattern missing: {current_missing}")
print(f"Suggested patch fixes issue: {')' in suggested_chars}")
PYRepository: sourcebot-dev/sourcebot Length of output: 1838 Escape set is incomplete: The character class on line 15 escapes
All instances must be fixed consistently. Apply the corrected pattern to all locations: Suggested patch-const escapeRE2 = (s: string) => s.replace(/[.+*?^${}[\]|(\\]/g, '\\$&');
+const escapeRE2 = (s: string) => s.replace(/[\\.^$|?*+()[\]{}]/g, '\\$&');Consider extracting this function to a shared utility module to prevent duplication and ensure consistency across the codebase. 📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||
| import { Session } from "next-auth"; | ||||||||||||||
|
|
||||||||||||||
| interface LayoutProps { | ||||||||||||||
|
|
@@ -37,7 +39,7 @@ export function LayoutClient({ | |||||||||||||
| <SearchBar | ||||||||||||||
| size="sm" | ||||||||||||||
| defaults={{ | ||||||||||||||
| query: `repo:^${escapeStringRegexp(repoName)}$${revisionName ? ` rev:${revisionName}` : ''} `, | ||||||||||||||
| query: `repo:^${escapeRE2(repoName)}$${revisionName ? ` rev:${revisionName}` : ''} `, | ||||||||||||||
| }} | ||||||||||||||
| className="w-full" | ||||||||||||||
| isSearchAssistSupported={isSearchAssistSupported} | ||||||||||||||
|
|
||||||||||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -9,7 +9,9 @@ import { toolNames } from "./constants"; | |||||
| import { listReposQueryParamsSchema } from "@/lib/schemas"; | ||||||
| import { ListReposQueryParams } from "@/lib/types"; | ||||||
| import { listRepos } from "@/app/api/(server)/repos/listReposApi"; | ||||||
| import escapeStringRegexp from "escape-string-regexp"; | ||||||
| // Escapes special RE2 regex characters using backslash (compatible with Zoekt/Go RE2). | ||||||
| // escape-string-regexp v5 uses \xNN hex escapes which RE2 does not support. | ||||||
| const escapeRE2 = (s: string) => s.replace(/[.+*?^${}[\]|(\\]/g, '\\$&'); | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🌐 Web query:
💡 Result: Outside character classes, RE2 treats these as metacharacters (i.e., not literal unless escaped): Yes—this includes both Sources: [1] RE2 syntax (google/re2 wiki). [2] RE2 syntax reference (CRAN mirror).
The regex character class escapes Suggested fix-const escapeRE2 = (s: string) => s.replace(/[.+*?^${}[\]|(\\]/g, '\\$&');
+const escapeRE2 = (s: string) => s.replace(/[.+*?^${}()|[\]\\]/g, '\\$&');📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||
|
|
||||||
| // @NOTE: When adding a new tool, follow these steps: | ||||||
| // 1. Add the tool to the `toolNames` constant in `constants.ts`. | ||||||
|
|
@@ -198,15 +200,15 @@ export const createCodeSearchTool = (selectedRepos: string[]) => tool({ | |||||
| } | ||||||
|
|
||||||
| if (repos.length > 0) { | ||||||
| query += ` (repo:${repos.map(id => escapeStringRegexp(id)).join(' or repo:')})`; | ||||||
| query += ` (repo:${repos.map(id => escapeRE2(id)).join(' or repo:')})`; | ||||||
| } | ||||||
|
|
||||||
| if (languages.length > 0) { | ||||||
| query += ` (lang:${languages.join(' or lang:')})`; | ||||||
| } | ||||||
|
|
||||||
| if (filepaths.length > 0) { | ||||||
| query += ` (file:${filepaths.map(filepath => escapeStringRegexp(filepath)).join(' or file:')})`; | ||||||
| query += ` (file:${filepaths.map(filepath => escapeRE2(filepath)).join(' or file:')})`; | ||||||
| } | ||||||
|
|
||||||
| if (ref) { | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -11,7 +11,9 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; | |||||||||||||
| import { ChatVisibility } from '@sourcebot/db'; | ||||||||||||||
| import { SOURCEBOT_VERSION } from '@sourcebot/shared'; | ||||||||||||||
| import _dedent from 'dedent'; | ||||||||||||||
| import escapeStringRegexp from 'escape-string-regexp'; | ||||||||||||||
| // Escapes special RE2 regex characters using backslash (compatible with Zoekt/Go RE2). | ||||||||||||||
| // escape-string-regexp v5 uses \xNN hex escapes which RE2 does not support. | ||||||||||||||
| const escapeRE2 = (s: string) => s.replace(/[.+*?^${}[\]|(\\]/g, '\\$&'); | ||||||||||||||
|
Comment on lines
+14
to
+16
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: cat -n packages/web/src/features/mcp/server.ts | head -30Repository: sourcebot-dev/sourcebot Length of output: 1532 🏁 Script executed: cat -n packages/web/src/features/mcp/server.ts | sed -n '140,160p'Repository: sourcebot-dev/sourcebot Length of output: 958 🏁 Script executed: rg "escapeRE2" packages/web/src/features/mcp/server.ts -B 2 -A 2Repository: sourcebot-dev/sourcebot Length of output: 722 🌐 Web query:
💡 Result: In Go, the Because Go string literals also treat regexp.MustCompile("\\(\\d+,\\d+\\)") // interpreted string literalOr avoid double escaping by using a raw string literal (backticks): regexp.MustCompile(`\(\d+,\d+\)`) // raw string literalIf you need to safely insert arbitrary text (including parentheses) into a regex, use Sources: Go [1] (pkg.go.dev) Citations:
🏁 Script executed: rg "escapeRE2|filterByRepos|filterByFilepaths" packages/web/src/features/mcp/ -B 1 -A 1Repository: sourcebot-dev/sourcebot Length of output: 2149 🏁 Script executed: find packages -name "*.test.ts" -o -name "*.test.tsx" -o -name "*.spec.ts" | xargs grep -l "escapeRE2\|server.ts" 2>/dev/null | head -5Repository: sourcebot-dev/sourcebot Length of output: 49 Escape The current pattern at line 16 escapes Proposed fix-const escapeRE2 = (s: string) => s.replace(/[.+*?^${}[\]|(\\]/g, '\\$&');
+const escapeRE2 = (s: string) => s.replace(/[\\.^$|?*+()[\]{}]/g, '\\$&');📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||
| import { z } from 'zod'; | ||||||||||||||
| import { | ||||||||||||||
| ListTreeEntry, | ||||||||||||||
|
|
@@ -141,13 +143,13 @@ export function createMcpServer(): McpServer { | |||||||||||||
| maxTokens?: number; | ||||||||||||||
| }) => { | ||||||||||||||
| if (repos.length > 0) { | ||||||||||||||
| query += ` (repo:${repos.map(id => escapeStringRegexp(id)).join(' or repo:')})`; | ||||||||||||||
| query += ` (repo:${repos.map(id => escapeRE2(id)).join(' or repo:')})`; | ||||||||||||||
| } | ||||||||||||||
| if (languages.length > 0) { | ||||||||||||||
| query += ` (lang:${languages.join(' or lang:')})`; | ||||||||||||||
| } | ||||||||||||||
| if (filepaths.length > 0) { | ||||||||||||||
| query += ` (file:${filepaths.map(fp => escapeStringRegexp(fp)).join(' or file:')})`; | ||||||||||||||
| query += ` (file:${filepaths.map(fp => escapeRE2(fp)).join(' or file:')})`; | ||||||||||||||
| } | ||||||||||||||
| if (ref) { | ||||||||||||||
| query += ` ( rev:${ref} )`; | ||||||||||||||
|
|
||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
cat -n packages/mcp/src/index.ts | head -20Repository: sourcebot-dev/sourcebot
Length of output: 1496
🏁 Script executed:
# Check if the file exists and examine the context around lines 7-9 wc -l packages/mcp/src/index.tsRepository: sourcebot-dev/sourcebot
Length of output: 96
🏁 Script executed:
Repository: sourcebot-dev/sourcebot
Length of output: 4830
🏁 Script executed:
Repository: sourcebot-dev/sourcebot
Length of output: 94
🏁 Script executed:
Repository: sourcebot-dev/sourcebot
Length of output: 94
🌐 Web query:
RE2 regular expression special characters that need escaping💡 Result:
In RE2, you escape a metacharacter by prefixing it with a backslash (e.g.,
\+to match a literal+). [1]Metacharacters you typically need to escape (to match literally)
.^$*+?()[{\|(and, in general, other punctuation when used as operators) [2]RE2 also supports escaping “any punctuation character” with
\(e.g.,\*), and you can escape a whole literal chunk with\Q...\E. [3]Inside a character class (
[...])You generally only need to escape (or place carefully):
\]-^(when^would otherwise negate the class) [2]Sources: [1] [2] [3]
escapeRE2is missing closing-parenthesis escaping.Line 9's regex pattern fails to escape
), which is a special character in RE2. This can cause query filters to fail or parse incorrectly when repo IDs or filepaths contain closing parentheses.Proposed fix
📝 Committable suggestion
🤖 Prompt for AI Agents