Skip to content

Commit d9e99a4

Browse files
feat(knowledge) (#449)
* feat(knowledge): setup knowledge and base UI * improvement(knowledge): empty state UI * feat(knowledge): created schema * added s3 bucket for kbs * fix: remove dummy values * feat(knowledge): embedding view; schema adjustments; migration history; navigation * feat(knowledge): block/tool for vector search --------- Co-authored-by: Waleed Latif <[email protected]>
1 parent 7d64082 commit d9e99a4

File tree

49 files changed

+11889
-38
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+11889
-38
lines changed

apps/sim/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,6 @@ next-env.d.ts
4747

4848
# Sentry Config File
4949
.env.sentry-build-plugin
50+
51+
# Uploads
52+
/uploads
Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
import { and, eq, isNull } from 'drizzle-orm'
2+
import { type NextRequest, NextResponse } from 'next/server'
3+
import { z } from 'zod'
4+
import { getSession } from '@/lib/auth'
5+
import { createLogger } from '@/lib/logs/console-logger'
6+
import { db } from '@/db'
7+
import { document, embedding, knowledgeBase } from '@/db/schema'
8+
9+
const logger = createLogger('ChunkByIdAPI')
10+
11+
// Schema for chunk updates
12+
const UpdateChunkSchema = z.object({
13+
content: z.string().min(1, 'Content is required').optional(),
14+
enabled: z.boolean().optional(),
15+
searchRank: z.number().min(0).optional(),
16+
qualityScore: z.number().min(0).max(1).optional(),
17+
})
18+
19+
async function checkChunkAccess(
20+
knowledgeBaseId: string,
21+
documentId: string,
22+
chunkId: string,
23+
userId: string
24+
) {
25+
// First check knowledge base access
26+
const kb = await db
27+
.select({
28+
id: knowledgeBase.id,
29+
userId: knowledgeBase.userId,
30+
})
31+
.from(knowledgeBase)
32+
.where(and(eq(knowledgeBase.id, knowledgeBaseId), isNull(knowledgeBase.deletedAt)))
33+
.limit(1)
34+
35+
if (kb.length === 0) {
36+
return { hasAccess: false, notFound: true, reason: 'Knowledge base not found' }
37+
}
38+
39+
const kbData = kb[0]
40+
41+
// Check if user owns the knowledge base
42+
if (kbData.userId !== userId) {
43+
return { hasAccess: false, reason: 'Unauthorized knowledge base access' }
44+
}
45+
46+
// Check if document exists and belongs to the knowledge base
47+
const doc = await db
48+
.select()
49+
.from(document)
50+
.where(
51+
and(
52+
eq(document.id, documentId),
53+
eq(document.knowledgeBaseId, knowledgeBaseId),
54+
isNull(document.deletedAt)
55+
)
56+
)
57+
.limit(1)
58+
59+
if (doc.length === 0) {
60+
return { hasAccess: false, notFound: true, reason: 'Document not found' }
61+
}
62+
63+
// Check if chunk exists and belongs to the document
64+
const chunk = await db
65+
.select()
66+
.from(embedding)
67+
.where(and(eq(embedding.id, chunkId), eq(embedding.documentId, documentId)))
68+
.limit(1)
69+
70+
if (chunk.length === 0) {
71+
return { hasAccess: false, notFound: true, reason: 'Chunk not found' }
72+
}
73+
74+
return { hasAccess: true, chunk: chunk[0], document: doc[0], knowledgeBase: kbData }
75+
}
76+
77+
export async function GET(
78+
req: NextRequest,
79+
{ params }: { params: Promise<{ id: string; documentId: string; chunkId: string }> }
80+
) {
81+
const requestId = crypto.randomUUID().slice(0, 8)
82+
const { id: knowledgeBaseId, documentId, chunkId } = await params
83+
84+
try {
85+
const session = await getSession()
86+
if (!session?.user?.id) {
87+
logger.warn(`[${requestId}] Unauthorized chunk access attempt`)
88+
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
89+
}
90+
91+
const accessCheck = await checkChunkAccess(
92+
knowledgeBaseId,
93+
documentId,
94+
chunkId,
95+
session.user.id
96+
)
97+
98+
if (accessCheck.notFound) {
99+
logger.warn(
100+
`[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}, Chunk=${chunkId}`
101+
)
102+
return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
103+
}
104+
105+
if (!accessCheck.hasAccess) {
106+
logger.warn(
107+
`[${requestId}] User ${session.user.id} attempted unauthorized chunk access: ${accessCheck.reason}`
108+
)
109+
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
110+
}
111+
112+
logger.info(
113+
`[${requestId}] Retrieved chunk: ${chunkId} from document ${documentId} in knowledge base ${knowledgeBaseId}`
114+
)
115+
116+
return NextResponse.json({
117+
success: true,
118+
data: accessCheck.chunk,
119+
})
120+
} catch (error) {
121+
logger.error(`[${requestId}] Error fetching chunk`, error)
122+
return NextResponse.json({ error: 'Failed to fetch chunk' }, { status: 500 })
123+
}
124+
}
125+
126+
export async function PUT(
127+
req: NextRequest,
128+
{ params }: { params: Promise<{ id: string; documentId: string; chunkId: string }> }
129+
) {
130+
const requestId = crypto.randomUUID().slice(0, 8)
131+
const { id: knowledgeBaseId, documentId, chunkId } = await params
132+
133+
try {
134+
const session = await getSession()
135+
if (!session?.user?.id) {
136+
logger.warn(`[${requestId}] Unauthorized chunk update attempt`)
137+
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
138+
}
139+
140+
const accessCheck = await checkChunkAccess(
141+
knowledgeBaseId,
142+
documentId,
143+
chunkId,
144+
session.user.id
145+
)
146+
147+
if (accessCheck.notFound) {
148+
logger.warn(
149+
`[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}, Chunk=${chunkId}`
150+
)
151+
return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
152+
}
153+
154+
if (!accessCheck.hasAccess) {
155+
logger.warn(
156+
`[${requestId}] User ${session.user.id} attempted unauthorized chunk update: ${accessCheck.reason}`
157+
)
158+
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
159+
}
160+
161+
const body = await req.json()
162+
163+
try {
164+
const validatedData = UpdateChunkSchema.parse(body)
165+
166+
const updateData: any = {
167+
updatedAt: new Date(),
168+
}
169+
170+
if (validatedData.content !== undefined) {
171+
updateData.content = validatedData.content
172+
updateData.contentLength = validatedData.content.length
173+
// Update token count estimation (rough approximation: 4 chars per token)
174+
updateData.tokenCount = Math.ceil(validatedData.content.length / 4)
175+
}
176+
if (validatedData.enabled !== undefined) updateData.enabled = validatedData.enabled
177+
if (validatedData.searchRank !== undefined)
178+
updateData.searchRank = validatedData.searchRank.toString()
179+
if (validatedData.qualityScore !== undefined)
180+
updateData.qualityScore = validatedData.qualityScore.toString()
181+
182+
await db.update(embedding).set(updateData).where(eq(embedding.id, chunkId))
183+
184+
// Fetch the updated chunk
185+
const updatedChunk = await db
186+
.select()
187+
.from(embedding)
188+
.where(eq(embedding.id, chunkId))
189+
.limit(1)
190+
191+
logger.info(
192+
`[${requestId}] Chunk updated: ${chunkId} in document ${documentId} in knowledge base ${knowledgeBaseId}`
193+
)
194+
195+
return NextResponse.json({
196+
success: true,
197+
data: updatedChunk[0],
198+
})
199+
} catch (validationError) {
200+
if (validationError instanceof z.ZodError) {
201+
logger.warn(`[${requestId}] Invalid chunk update data`, {
202+
errors: validationError.errors,
203+
})
204+
return NextResponse.json(
205+
{ error: 'Invalid request data', details: validationError.errors },
206+
{ status: 400 }
207+
)
208+
}
209+
throw validationError
210+
}
211+
} catch (error) {
212+
logger.error(`[${requestId}] Error updating chunk`, error)
213+
return NextResponse.json({ error: 'Failed to update chunk' }, { status: 500 })
214+
}
215+
}
216+
217+
export async function DELETE(
218+
req: NextRequest,
219+
{ params }: { params: Promise<{ id: string; documentId: string; chunkId: string }> }
220+
) {
221+
const requestId = crypto.randomUUID().slice(0, 8)
222+
const { id: knowledgeBaseId, documentId, chunkId } = await params
223+
224+
try {
225+
const session = await getSession()
226+
if (!session?.user?.id) {
227+
logger.warn(`[${requestId}] Unauthorized chunk delete attempt`)
228+
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
229+
}
230+
231+
const accessCheck = await checkChunkAccess(
232+
knowledgeBaseId,
233+
documentId,
234+
chunkId,
235+
session.user.id
236+
)
237+
238+
if (accessCheck.notFound) {
239+
logger.warn(
240+
`[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}, Chunk=${chunkId}`
241+
)
242+
return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
243+
}
244+
245+
if (!accessCheck.hasAccess) {
246+
logger.warn(
247+
`[${requestId}] User ${session.user.id} attempted unauthorized chunk deletion: ${accessCheck.reason}`
248+
)
249+
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
250+
}
251+
252+
// Delete the chunk
253+
await db.delete(embedding).where(eq(embedding.id, chunkId))
254+
255+
logger.info(
256+
`[${requestId}] Chunk deleted: ${chunkId} from document ${documentId} in knowledge base ${knowledgeBaseId}`
257+
)
258+
259+
return NextResponse.json({
260+
success: true,
261+
data: { message: 'Chunk deleted successfully' },
262+
})
263+
} catch (error) {
264+
logger.error(`[${requestId}] Error deleting chunk`, error)
265+
return NextResponse.json({ error: 'Failed to delete chunk' }, { status: 500 })
266+
}
267+
}

0 commit comments

Comments
 (0)