Skip to content

Commit 3dfc9cc

Browse files
committed
feat(file): add Decompress operation to extract .zip archives
Adds the inbound half of the archive pair: extracts a .zip back into the workspace with zip-slip path sanitization, symlink skipping, and entry/ size caps to bound zip-bomb expansion. Extracted files are returned in the files output, ready to chain downstream.
1 parent f35278a commit 3dfc9cc

7 files changed

Lines changed: 348 additions & 5 deletions

File tree

apps/sim/app/api/tools/file/manage/route.ts

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,47 @@ const uniqueZipEntryName = (name: string, usedNames: Set<string>): string => {
172172
return candidate
173173
}
174174

175+
/** Input archive download cap for the decompress operation. */
176+
const MAX_DECOMPRESS_ARCHIVE_BYTES = 100 * 1024 * 1024
177+
/** Maximum number of entries extracted from a single archive. */
178+
const MAX_DECOMPRESS_ENTRIES = 1000
179+
/** Maximum uncompressed size for any single archive entry. */
180+
const MAX_DECOMPRESS_ENTRY_BYTES = 100 * 1024 * 1024
181+
/** Maximum total uncompressed size across all entries, to bound zip-bomb expansion. */
182+
const MAX_DECOMPRESS_TOTAL_BYTES = 200 * 1024 * 1024
183+
184+
const S_IFMT = 0o170000
185+
const S_IFLNK = 0o120000
186+
187+
/** Read a zip entry's declared uncompressed size without materializing it (zip-bomb pre-check). */
188+
const readEntryUncompressedSize = (entry: JSZip.JSZipObject): number | undefined => {
189+
const data = (entry as JSZip.JSZipObject & { _data?: { uncompressedSize?: number } })._data
190+
const size = data?.uncompressedSize
191+
return typeof size === 'number' && Number.isFinite(size) ? size : undefined
192+
}
193+
194+
/** True when a zip entry's unix mode marks it as a symlink (never extracted). */
195+
const isSymlinkEntry = (entry: JSZip.JSZipObject): boolean => {
196+
const mode = (entry as JSZip.JSZipObject & { unixPermissions?: number | null }).unixPermissions
197+
return typeof mode === 'number' && (mode & S_IFMT) === S_IFLNK
198+
}
199+
200+
/**
201+
* Normalize a zip entry path into safe workspace folder segments, guarding against
202+
* zip-slip. Returns null for traversal (`..`), so the entry is skipped rather than
203+
* written outside its intended location.
204+
*/
205+
const sanitizeArchiveEntryPath = (rawPath: string): string[] | null => {
206+
const segments = rawPath
207+
.replace(/\\/g, '/')
208+
.split('/')
209+
.map((segment) => segment.trim())
210+
.filter((segment) => segment.length > 0 && segment !== '.')
211+
212+
if (segments.length === 0 || segments.includes('..')) return null
213+
return segments
214+
}
215+
175216
const isLikelyTextBuffer = (buffer: Buffer): boolean => isUtf8(buffer) && !buffer.includes(0)
176217

177218
/**
@@ -610,6 +651,143 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
610651
},
611652
})
612653
}
654+
655+
case 'decompress': {
656+
const { fileId, fileInput } = body
657+
const requestId = generateRequestId()
658+
659+
const selectedFileIds = fileId ? [fileId] : extractFileIdsFromInput(fileInput)
660+
const selectedInputFiles = fileId ? [] : extractUserFilesFromInput(fileInput)
661+
662+
const workspaceFiles = await Promise.all(
663+
selectedFileIds.map((id) => getWorkspaceFile(workspaceId, id))
664+
)
665+
const missingFileId = selectedFileIds.find((_, index) => !workspaceFiles[index])
666+
if (missingFileId) {
667+
return NextResponse.json(
668+
{ success: false, error: `File not found: "${missingFileId}"` },
669+
{ status: 404 }
670+
)
671+
}
672+
673+
const archive = workspaceFiles
674+
.map((file) => workspaceFileToUserFile(file))
675+
.filter((file): file is NonNullable<ReturnType<typeof workspaceFileToUserFile>> =>
676+
Boolean(file)
677+
)
678+
.concat(selectedInputFiles)[0]
679+
680+
if (!archive) {
681+
return NextResponse.json({ success: false, error: 'File is required' }, { status: 400 })
682+
}
683+
684+
const denied = await assertToolFileAccess(archive.key, userId, requestId, logger)
685+
if (denied) return denied
686+
687+
const archiveBuffer = await downloadFileFromStorage(archive, requestId, logger, {
688+
maxBytes: MAX_DECOMPRESS_ARCHIVE_BYTES,
689+
})
690+
691+
let zip: JSZip
692+
try {
693+
zip = await JSZip.loadAsync(archiveBuffer)
694+
} catch {
695+
return NextResponse.json(
696+
{ success: false, error: `"${archive.name}" is not a valid .zip archive` },
697+
{ status: 400 }
698+
)
699+
}
700+
701+
const entries = Object.values(zip.files).filter(
702+
(entry) => !entry.dir && !isSymlinkEntry(entry)
703+
)
704+
if (entries.length > MAX_DECOMPRESS_ENTRIES) {
705+
return NextResponse.json(
706+
{
707+
success: false,
708+
error: `Archive has too many entries to extract. Maximum is ${MAX_DECOMPRESS_ENTRIES}.`,
709+
},
710+
{ status: 413 }
711+
)
712+
}
713+
714+
const folderIdCache = new Map<string, string | null>()
715+
const extractedFiles: UserFile[] = []
716+
let totalBytes = 0
717+
718+
for (const entry of entries) {
719+
const declaredSize = readEntryUncompressedSize(entry)
720+
if (declaredSize !== undefined && declaredSize > MAX_DECOMPRESS_ENTRY_BYTES) {
721+
return NextResponse.json(
722+
{
723+
success: false,
724+
error: `Archive entry "${entry.name}" is too large to extract. Maximum is ${
725+
MAX_DECOMPRESS_ENTRY_BYTES / (1024 * 1024)
726+
} MB per file.`,
727+
},
728+
{ status: 413 }
729+
)
730+
}
731+
732+
const segments = sanitizeArchiveEntryPath(entry.name)
733+
if (!segments) {
734+
logger.warn('Skipping unsafe archive entry', { name: entry.name })
735+
continue
736+
}
737+
738+
const buffer = await entry.async('nodebuffer')
739+
totalBytes += buffer.length
740+
if (totalBytes > MAX_DECOMPRESS_TOTAL_BYTES) {
741+
return NextResponse.json(
742+
{
743+
success: false,
744+
error: `Archive expands to more than the ${
745+
MAX_DECOMPRESS_TOTAL_BYTES / (1024 * 1024)
746+
} MB extraction limit.`,
747+
},
748+
{ status: 413 }
749+
)
750+
}
751+
752+
const leafName = segments[segments.length - 1]
753+
const folderSegments = segments.slice(0, -1)
754+
const folderKey = folderSegments.join('/')
755+
let folderId = folderIdCache.get(folderKey)
756+
if (folderId === undefined) {
757+
folderId = await ensureWorkspaceFileFolderPath({
758+
workspaceId,
759+
userId,
760+
pathSegments: folderSegments,
761+
})
762+
folderIdCache.set(folderKey, folderId)
763+
}
764+
765+
const mimeType = getMimeTypeFromExtension(getFileExtension(leafName))
766+
const uploaded = await uploadWorkspaceFile(
767+
workspaceId,
768+
userId,
769+
buffer,
770+
leafName,
771+
mimeType,
772+
{ folderId }
773+
)
774+
extractedFiles.push({ ...uploaded, url: ensureAbsoluteUrl(uploaded.url) })
775+
}
776+
777+
logger.info('Archive decompressed', {
778+
fileId: archive.id,
779+
name: archive.name,
780+
extractedCount: extractedFiles.length,
781+
})
782+
783+
return NextResponse.json({
784+
success: true,
785+
data: {
786+
file: extractedFiles[0],
787+
files: extractedFiles,
788+
},
789+
})
790+
}
613791
}
614792
} catch (error) {
615793
if (isWorkspaceAccessDeniedError(error)) {

apps/sim/blocks/blocks/file.ts

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -822,9 +822,9 @@ export const FileV5Block: BlockConfig<FileParserV3Output> = {
822822
...FileV4Block,
823823
type: 'file_v5',
824824
name: 'File',
825-
description: 'Read, get content, fetch, write, append, and compress files',
825+
description: 'Read, get content, fetch, write, append, compress, and decompress files',
826826
longDescription:
827-
'Read workspace file objects, extract the text content of files, fetch and parse files from URLs with optional headers, write new workspace files, append content to existing files, or compress files into a .zip archive.',
827+
'Read workspace file objects, extract the text content of files, fetch and parse files from URLs with optional headers, write new workspace files, append content to existing files, compress files into a .zip archive, or extract a .zip archive into the workspace.',
828828
hideFromToolbar: false,
829829
bestPractices: `
830830
- Read returns workspace file objects in the "files" output and does NOT include their text. Use it to pick files or pass file references downstream (e.g. as attachments).
@@ -833,7 +833,8 @@ export const FileV5Block: BlockConfig<FileParserV3Output> = {
833833
- Get Content's "contents" can be large; it is persisted through the execution large-value system automatically, so prefer it over inlining file text any other way.
834834
- Use Fetch for external file URLs. Add headers for authenticated downloads, for example Slack private file URLs require an Authorization Bearer token.
835835
- Use Write to create a new workspace file and Append to add content to an existing one.
836-
- Use Compress to bundle one or more files into a single .zip archive stored in the workspace. The new archive is returned in the "file"/"files" outputs, which is handy for getting large attachments under provider upload limits.
836+
- Use Compress to bundle one or more files into a single .zip archive stored in the workspace. The new archive is returned in the "file"/"files" outputs.
837+
- Use Decompress to extract a .zip archive back into the workspace; the extracted files are returned in the "files" output, ready to chain into Get Content or downstream blocks.
837838
`,
838839
subBlocks: [
839840
{
@@ -847,6 +848,7 @@ export const FileV5Block: BlockConfig<FileParserV3Output> = {
847848
{ label: 'Write', id: 'file_write' },
848849
{ label: 'Append', id: 'file_append' },
849850
{ label: 'Compress', id: 'file_compress' },
851+
{ label: 'Decompress', id: 'file_decompress' },
850852
],
851853
value: () => 'file_read',
852854
},
@@ -993,6 +995,27 @@ export const FileV5Block: BlockConfig<FileParserV3Output> = {
993995
placeholder: 'archive.zip (auto-named from source if omitted)',
994996
condition: { field: 'operation', value: 'file_compress' },
995997
},
998+
{
999+
id: 'decompressFile',
1000+
title: 'Archive',
1001+
type: 'file-upload' as SubBlockType,
1002+
canonicalParamId: 'decompressInput',
1003+
acceptedTypes: '.zip',
1004+
placeholder: 'Select a .zip archive',
1005+
mode: 'basic',
1006+
condition: { field: 'operation', value: 'file_decompress' },
1007+
required: { field: 'operation', value: 'file_decompress' },
1008+
},
1009+
{
1010+
id: 'decompressFileId',
1011+
title: 'File ID',
1012+
type: 'short-input' as SubBlockType,
1013+
canonicalParamId: 'decompressInput',
1014+
placeholder: 'Workspace file ID of the .zip archive',
1015+
mode: 'advanced',
1016+
condition: { field: 'operation', value: 'file_decompress' },
1017+
required: { field: 'operation', value: 'file_decompress' },
1018+
},
9961019
],
9971020
tools: {
9981021
access: [
@@ -1002,6 +1025,7 @@ export const FileV5Block: BlockConfig<FileParserV3Output> = {
10021025
'file_write',
10031026
'file_append',
10041027
'file_compress',
1028+
'file_decompress',
10051029
],
10061030
config: {
10071031
tool: (params) => params.operation || 'file_read',
@@ -1075,6 +1099,31 @@ export const FileV5Block: BlockConfig<FileParserV3Output> = {
10751099
}
10761100
}
10771101

1102+
if (operation === 'file_decompress') {
1103+
const decompressInput = params.decompressInput
1104+
if (!decompressInput) {
1105+
throw new Error('File is required for decompress')
1106+
}
1107+
1108+
const fileIds = parseReadFileIds(decompressInput)
1109+
if (fileIds) {
1110+
return {
1111+
fileId: Array.isArray(fileIds) ? fileIds[0] : fileIds,
1112+
workspaceId: params._context?.workspaceId,
1113+
}
1114+
}
1115+
1116+
const normalized = normalizeFileInput(decompressInput, { single: true })
1117+
if (!normalized) {
1118+
throw new Error('File is required for decompress')
1119+
}
1120+
1121+
return {
1122+
fileInput: normalized,
1123+
workspaceId: params._context?.workspaceId,
1124+
}
1125+
}
1126+
10781127
if (operation === 'file_fetch') {
10791128
const fileUrl = resolveHttpFileUrl(params.fileUrl)
10801129

@@ -1164,6 +1213,10 @@ export const FileV5Block: BlockConfig<FileParserV3Output> = {
11641213
description: 'Selected workspace files or canonical file IDs to compress',
11651214
},
11661215
archiveName: { type: 'string', description: 'Name for the compressed .zip archive' },
1216+
decompressInput: {
1217+
type: 'json',
1218+
description: 'Selected .zip archive or canonical file ID to extract',
1219+
},
11671220
},
11681221
outputs: {
11691222
files: {

apps/sim/lib/api/contracts/tools/file.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,17 @@ export const fileManageCompressBodySchema = z
7676
message: 'Either fileId or fileInput is required for compress operation',
7777
})
7878

79+
export const fileManageDecompressBodySchema = z
80+
.object({
81+
operation: z.literal('decompress'),
82+
workspaceId: z.string().min(1).optional(),
83+
fileId: z.string().min(1).optional(),
84+
fileInput: z.unknown().optional(),
85+
})
86+
.refine((data) => data.fileId !== undefined || data.fileInput !== undefined, {
87+
message: 'Either fileId or fileInput is required for decompress operation',
88+
})
89+
7990
export const fileManageBodySchema = z.union([
8091
fileManageWriteBodySchema,
8192
fileManageAppendBodySchema,
@@ -84,6 +95,7 @@ export const fileManageBodySchema = z.union([
8495
fileManageReadBodySchema,
8596
fileManageContentBodySchema,
8697
fileManageCompressBodySchema,
98+
fileManageDecompressBodySchema,
8799
])
88100

89101
export const fileManageContract = defineRouteContract({

apps/sim/tools/file/compress.test.ts

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* @vitest-environment node
33
*/
44
import { describe, expect, it } from 'vitest'
5-
import { fileCompressTool } from '@/tools/file/compress'
5+
import { fileCompressTool, fileDecompressTool } from '@/tools/file/compress'
66

77
describe('fileCompressTool', () => {
88
it('builds a compress request body from file IDs and archive name', () => {
@@ -76,3 +76,46 @@ describe('fileCompressTool', () => {
7676
})
7777
})
7878
})
79+
80+
describe('fileDecompressTool', () => {
81+
it('builds a decompress request body from a file ID', () => {
82+
const body = fileDecompressTool.request.body?.({
83+
fileId: 'wf_zip',
84+
_context: { workspaceId: 'ws_1' },
85+
} as Parameters<NonNullable<typeof fileDecompressTool.request.body>>[0])
86+
87+
expect(body).toMatchObject({
88+
operation: 'decompress',
89+
fileId: 'wf_zip',
90+
workspaceId: 'ws_1',
91+
})
92+
})
93+
94+
it('returns the extracted files on success', async () => {
95+
const extracted = [
96+
{ id: 'wf_a', name: 'a.txt', url: 'https://example.com/a.txt', key: 'k/a.txt' },
97+
{ id: 'wf_b', name: 'b.txt', url: 'https://example.com/b.txt', key: 'k/b.txt' },
98+
]
99+
100+
const result = await fileDecompressTool.transformResponse?.(
101+
Response.json({ success: true, data: { file: extracted[0], files: extracted } })
102+
)
103+
104+
expect(result).toMatchObject({
105+
success: true,
106+
output: { file: extracted[0], files: extracted },
107+
})
108+
})
109+
110+
it('propagates route failures as tool failures', async () => {
111+
const result = await fileDecompressTool.transformResponse?.(
112+
Response.json({ success: false, error: '"data.txt" is not a valid .zip archive' })
113+
)
114+
115+
expect(result).toMatchObject({
116+
success: false,
117+
error: '"data.txt" is not a valid .zip archive',
118+
output: {},
119+
})
120+
})
121+
})

0 commit comments

Comments
 (0)