Skip to content

Commit 05c4538

Browse files
committed
progress
1 parent 69614d2 commit 05c4538

File tree

21 files changed

+870
-105
lines changed

21 files changed

+870
-105
lines changed

apps/sim/app/api/files/parse/route.ts

Lines changed: 143 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@ import { createLogger } from '@sim/logger'
66
import binaryExtensionsList from 'binary-extensions'
77
import { type NextRequest, NextResponse } from 'next/server'
88
import { checkHybridAuth } from '@/lib/auth/hybrid'
9-
import { createPinnedUrl, validateUrlWithDNS } from '@/lib/core/security/input-validation'
9+
import { validateUrlWithDNS } from '@/lib/core/security/input-validation'
1010
import { isSupportedFileType, parseFile } from '@/lib/file-parsers'
1111
import { isUsingCloudStorage, type StorageContext, StorageService } from '@/lib/uploads'
12+
import { uploadExecutionFile } from '@/lib/uploads/contexts/execution'
1213
import { UPLOAD_DIR_SERVER } from '@/lib/uploads/core/setup.server'
1314
import { getFileMetadataByKey } from '@/lib/uploads/server/metadata'
1415
import {
@@ -21,6 +22,7 @@ import {
2122
} from '@/lib/uploads/utils/file-utils'
2223
import { getUserEntityPermissions } from '@/lib/workspaces/permissions/utils'
2324
import { verifyFileAccess } from '@/app/api/files/authorization'
25+
import type { UserFile } from '@/executor/types'
2426
import '@/lib/uploads/core/setup.server'
2527

2628
export const dynamic = 'force-dynamic'
@@ -30,13 +32,20 @@ const logger = createLogger('FilesParseAPI')
3032
const MAX_DOWNLOAD_SIZE_BYTES = 100 * 1024 * 1024 // 100 MB
3133
const DOWNLOAD_TIMEOUT_MS = 30000 // 30 seconds
3234

35+
interface ExecutionContext {
36+
workspaceId: string
37+
workflowId: string
38+
executionId: string
39+
}
40+
3341
interface ParseResult {
3442
success: boolean
3543
content?: string
3644
error?: string
3745
filePath: string
3846
originalName?: string // Original filename from database (for workspace files)
3947
viewerUrl?: string | null // Viewer URL for the file if available
48+
userFile?: UserFile // UserFile object for the raw file
4049
metadata?: {
4150
fileType: string
4251
size: number
@@ -70,27 +79,45 @@ export async function POST(request: NextRequest) {
7079

7180
const userId = authResult.userId
7281
const requestData = await request.json()
73-
const { filePath, fileType, workspaceId } = requestData
82+
const { filePath, fileType, workspaceId, workflowId, executionId } = requestData
7483

7584
if (!filePath || (typeof filePath === 'string' && filePath.trim() === '')) {
7685
return NextResponse.json({ success: false, error: 'No file path provided' }, { status: 400 })
7786
}
7887

79-
logger.info('File parse request received:', { filePath, fileType, workspaceId, userId })
88+
// Build execution context if all required fields are present
89+
const executionContext: ExecutionContext | undefined =
90+
workspaceId && workflowId && executionId
91+
? { workspaceId, workflowId, executionId }
92+
: undefined
93+
94+
logger.info('File parse request received:', {
95+
filePath,
96+
fileType,
97+
workspaceId,
98+
userId,
99+
hasExecutionContext: !!executionContext,
100+
})
80101

81102
if (Array.isArray(filePath)) {
82103
const results = []
83-
for (const path of filePath) {
84-
if (!path || (typeof path === 'string' && path.trim() === '')) {
104+
for (const singlePath of filePath) {
105+
if (!singlePath || (typeof singlePath === 'string' && singlePath.trim() === '')) {
85106
results.push({
86107
success: false,
87108
error: 'Empty file path in array',
88-
filePath: path || '',
109+
filePath: singlePath || '',
89110
})
90111
continue
91112
}
92113

93-
const result = await parseFileSingle(path, fileType, workspaceId, userId)
114+
const result = await parseFileSingle(
115+
singlePath,
116+
fileType,
117+
workspaceId,
118+
userId,
119+
executionContext
120+
)
94121
if (result.metadata) {
95122
result.metadata.processingTime = Date.now() - startTime
96123
}
@@ -106,6 +133,7 @@ export async function POST(request: NextRequest) {
106133
fileType: result.metadata?.fileType || 'application/octet-stream',
107134
size: result.metadata?.size || 0,
108135
binary: false,
136+
file: result.userFile,
109137
},
110138
filePath: result.filePath,
111139
viewerUrl: result.viewerUrl,
@@ -121,7 +149,7 @@ export async function POST(request: NextRequest) {
121149
})
122150
}
123151

124-
const result = await parseFileSingle(filePath, fileType, workspaceId, userId)
152+
const result = await parseFileSingle(filePath, fileType, workspaceId, userId, executionContext)
125153

126154
if (result.metadata) {
127155
result.metadata.processingTime = Date.now() - startTime
@@ -137,6 +165,7 @@ export async function POST(request: NextRequest) {
137165
fileType: result.metadata?.fileType || 'application/octet-stream',
138166
size: result.metadata?.size || 0,
139167
binary: false,
168+
file: result.userFile,
140169
},
141170
filePath: result.filePath,
142171
viewerUrl: result.viewerUrl,
@@ -164,7 +193,8 @@ async function parseFileSingle(
164193
filePath: string,
165194
fileType: string,
166195
workspaceId: string,
167-
userId: string
196+
userId: string,
197+
executionContext?: ExecutionContext
168198
): Promise<ParseResult> {
169199
logger.info('Parsing file:', filePath)
170200

@@ -186,18 +216,18 @@ async function parseFileSingle(
186216
}
187217

188218
if (filePath.includes('/api/files/serve/')) {
189-
return handleCloudFile(filePath, fileType, undefined, userId)
219+
return handleCloudFile(filePath, fileType, undefined, userId, executionContext)
190220
}
191221

192222
if (filePath.startsWith('http://') || filePath.startsWith('https://')) {
193-
return handleExternalUrl(filePath, fileType, workspaceId, userId)
223+
return handleExternalUrl(filePath, fileType, workspaceId, userId, executionContext)
194224
}
195225

196226
if (isUsingCloudStorage()) {
197-
return handleCloudFile(filePath, fileType, undefined, userId)
227+
return handleCloudFile(filePath, fileType, undefined, userId, executionContext)
198228
}
199229

200-
return handleLocalFile(filePath, fileType, userId)
230+
return handleLocalFile(filePath, fileType, userId, executionContext)
201231
}
202232

203233
/**
@@ -230,12 +260,14 @@ function validateFilePath(filePath: string): { isValid: boolean; error?: string
230260
/**
231261
* Handle external URL
232262
* If workspaceId is provided, checks if file already exists and saves to workspace if not
263+
* If executionContext is provided, also stores the file in execution storage and returns UserFile
233264
*/
234265
async function handleExternalUrl(
235266
url: string,
236267
fileType: string,
237268
workspaceId: string,
238-
userId: string
269+
userId: string,
270+
executionContext?: ExecutionContext
239271
): Promise<ParseResult> {
240272
try {
241273
logger.info('Fetching external URL:', url)
@@ -312,17 +344,16 @@ async function handleExternalUrl(
312344

313345
if (existingFile) {
314346
const storageFilePath = `/api/files/serve/${existingFile.key}`
315-
return handleCloudFile(storageFilePath, fileType, 'workspace', userId)
347+
return handleCloudFile(storageFilePath, fileType, 'workspace', userId, executionContext)
316348
}
317349
}
318350
}
319351

320-
const pinnedUrl = createPinnedUrl(url, urlValidation.resolvedIP!)
321-
const response = await fetch(pinnedUrl, {
352+
// Use the original URL after DNS validation passes.
353+
// DNS pinning (connecting to IP directly) breaks TLS SNI for HTTPS.
354+
// Since we've validated the IP is not private/reserved, using the original URL is safe.
355+
const response = await fetch(url, {
322356
signal: AbortSignal.timeout(DOWNLOAD_TIMEOUT_MS),
323-
headers: {
324-
Host: urlValidation.originalHostname!,
325-
},
326357
})
327358
if (!response.ok) {
328359
throw new Error(`Failed to fetch URL: ${response.status} ${response.statusText}`)
@@ -341,6 +372,20 @@ async function handleExternalUrl(
341372

342373
logger.info(`Downloaded file from URL: ${url}, size: ${buffer.length} bytes`)
343374

375+
// Store file in execution storage if execution context is provided
376+
let userFile: UserFile | undefined
377+
const mimeType = response.headers.get('content-type') || getMimeTypeFromExtension(extension)
378+
379+
if (executionContext) {
380+
try {
381+
userFile = await uploadExecutionFile(executionContext, buffer, filename, mimeType, userId)
382+
logger.info(`Stored file in execution storage: ${filename}`, { key: userFile.key })
383+
} catch (uploadError) {
384+
logger.warn(`Failed to store file in execution storage:`, uploadError)
385+
// Continue without userFile - parsing can still work
386+
}
387+
}
388+
344389
if (shouldCheckWorkspace) {
345390
try {
346391
const permission = await getUserEntityPermissions(userId, 'workspace', workspaceId)
@@ -353,8 +398,6 @@ async function handleExternalUrl(
353398
})
354399
} else {
355400
const { uploadWorkspaceFile } = await import('@/lib/uploads/contexts/workspace')
356-
const mimeType =
357-
response.headers.get('content-type') || getMimeTypeFromExtension(extension)
358401
await uploadWorkspaceFile(workspaceId, userId, buffer, filename, mimeType)
359402
logger.info(`Saved URL file to workspace storage: ${filename}`)
360403
}
@@ -363,17 +406,23 @@ async function handleExternalUrl(
363406
}
364407
}
365408

409+
let parseResult: ParseResult
366410
if (extension === 'pdf') {
367-
return await handlePdfBuffer(buffer, filename, fileType, url)
368-
}
369-
if (extension === 'csv') {
370-
return await handleCsvBuffer(buffer, filename, fileType, url)
411+
parseResult = await handlePdfBuffer(buffer, filename, fileType, url)
412+
} else if (extension === 'csv') {
413+
parseResult = await handleCsvBuffer(buffer, filename, fileType, url)
414+
} else if (isSupportedFileType(extension)) {
415+
parseResult = await handleGenericTextBuffer(buffer, filename, extension, fileType, url)
416+
} else {
417+
parseResult = handleGenericBuffer(buffer, filename, extension, fileType)
371418
}
372-
if (isSupportedFileType(extension)) {
373-
return await handleGenericTextBuffer(buffer, filename, extension, fileType, url)
419+
420+
// Attach userFile to the result
421+
if (userFile) {
422+
parseResult.userFile = userFile
374423
}
375424

376-
return handleGenericBuffer(buffer, filename, extension, fileType)
425+
return parseResult
377426
} catch (error) {
378427
logger.error(`Error handling external URL ${url}:`, error)
379428
return {
@@ -386,12 +435,15 @@ async function handleExternalUrl(
386435

387436
/**
388437
* Handle file stored in cloud storage
438+
* If executionContext is provided and file is not already from execution storage,
439+
* copies the file to execution storage and returns UserFile
389440
*/
390441
async function handleCloudFile(
391442
filePath: string,
392443
fileType: string,
393444
explicitContext: string | undefined,
394-
userId: string
445+
userId: string,
446+
executionContext?: ExecutionContext
395447
): Promise<ParseResult> {
396448
try {
397449
const cloudKey = extractStorageKey(filePath)
@@ -438,6 +490,7 @@ async function handleCloudFile(
438490

439491
const filename = originalFilename || cloudKey.split('/').pop() || cloudKey
440492
const extension = path.extname(filename).toLowerCase().substring(1)
493+
const mimeType = getMimeTypeFromExtension(extension)
441494

442495
const normalizedFilePath = `/api/files/serve/${encodeURIComponent(cloudKey)}?context=${context}`
443496
let workspaceIdFromKey: string | undefined
@@ -453,6 +506,39 @@ async function handleCloudFile(
453506

454507
const viewerUrl = getViewerUrl(cloudKey, workspaceIdFromKey)
455508

509+
// Store file in execution storage if executionContext is provided
510+
let userFile: UserFile | undefined
511+
512+
if (executionContext) {
513+
// If file is already from execution context, create UserFile reference without re-uploading
514+
if (context === 'execution') {
515+
userFile = {
516+
id: `file_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`,
517+
name: filename,
518+
url: normalizedFilePath,
519+
size: fileBuffer.length,
520+
type: mimeType,
521+
key: cloudKey,
522+
context: 'execution',
523+
}
524+
logger.info(`Created UserFile reference for existing execution file: ${filename}`)
525+
} else {
526+
// Copy from workspace/other storage to execution storage
527+
try {
528+
userFile = await uploadExecutionFile(
529+
executionContext,
530+
fileBuffer,
531+
filename,
532+
mimeType,
533+
userId
534+
)
535+
logger.info(`Copied file to execution storage: ${filename}`, { key: userFile.key })
536+
} catch (uploadError) {
537+
logger.warn(`Failed to copy file to execution storage:`, uploadError)
538+
}
539+
}
540+
}
541+
456542
let parseResult: ParseResult
457543
if (extension === 'pdf') {
458544
parseResult = await handlePdfBuffer(fileBuffer, filename, fileType, normalizedFilePath)
@@ -477,6 +563,11 @@ async function handleCloudFile(
477563

478564
parseResult.viewerUrl = viewerUrl
479565

566+
// Attach userFile to the result
567+
if (userFile) {
568+
parseResult.userFile = userFile
569+
}
570+
480571
return parseResult
481572
} catch (error) {
482573
logger.error(`Error handling cloud file ${filePath}:`, error)
@@ -500,7 +591,8 @@ async function handleCloudFile(
500591
async function handleLocalFile(
501592
filePath: string,
502593
fileType: string,
503-
userId: string
594+
userId: string,
595+
executionContext?: ExecutionContext
504596
): Promise<ParseResult> {
505597
try {
506598
const filename = filePath.split('/').pop() || filePath
@@ -540,13 +632,32 @@ async function handleLocalFile(
540632
const hash = createHash('md5').update(fileBuffer).digest('hex')
541633

542634
const extension = path.extname(filename).toLowerCase().substring(1)
635+
const mimeType = fileType || getMimeTypeFromExtension(extension)
636+
637+
// Store file in execution storage if executionContext is provided
638+
let userFile: UserFile | undefined
639+
if (executionContext) {
640+
try {
641+
userFile = await uploadExecutionFile(
642+
executionContext,
643+
fileBuffer,
644+
filename,
645+
mimeType,
646+
userId
647+
)
648+
logger.info(`Stored local file in execution storage: ${filename}`, { key: userFile.key })
649+
} catch (uploadError) {
650+
logger.warn(`Failed to store local file in execution storage:`, uploadError)
651+
}
652+
}
543653

544654
return {
545655
success: true,
546656
content: result.content,
547657
filePath,
658+
userFile,
548659
metadata: {
549-
fileType: fileType || getMimeTypeFromExtension(extension),
660+
fileType: mimeType,
550661
size: stats.size,
551662
hash,
552663
processingTime: 0,

apps/sim/app/api/proxy/route.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { z } from 'zod'
55
import { checkHybridAuth } from '@/lib/auth/hybrid'
66
import { generateInternalToken } from '@/lib/auth/internal'
77
import { isDev } from '@/lib/core/config/feature-flags'
8-
import { createPinnedUrl, validateUrlWithDNS } from '@/lib/core/security/input-validation'
8+
import { validateUrlWithDNS } from '@/lib/core/security/input-validation'
99
import { generateRequestId } from '@/lib/core/utils/request'
1010
import { getBaseUrl } from '@/lib/core/utils/urls'
1111
import { executeTool } from '@/tools'
@@ -211,13 +211,13 @@ export async function GET(request: Request) {
211211
logger.info(`[${requestId}] Proxying ${method} request to: ${targetUrl}`)
212212

213213
try {
214-
const pinnedUrl = createPinnedUrl(targetUrl, urlValidation.resolvedIP!)
215-
const response = await fetch(pinnedUrl, {
214+
// Use the original URL after DNS validation passes.
215+
// DNS pinning breaks TLS SNI for HTTPS; validation already ensures IP is safe.
216+
const response = await fetch(targetUrl, {
216217
method: method,
217218
headers: {
218219
...getProxyHeaders(),
219220
...customHeaders,
220-
Host: urlValidation.originalHostname!,
221221
},
222222
body: body || undefined,
223223
})

0 commit comments

Comments
 (0)