@@ -6,9 +6,10 @@ import { createLogger } from '@sim/logger'
66import binaryExtensionsList from 'binary-extensions'
77import { type NextRequest , NextResponse } from 'next/server'
88import { checkHybridAuth } from '@/lib/auth/hybrid'
9- import { createPinnedUrl , validateUrlWithDNS } from '@/lib/core/security/input-validation'
9+ import { validateUrlWithDNS } from '@/lib/core/security/input-validation'
1010import { isSupportedFileType , parseFile } from '@/lib/file-parsers'
1111import { isUsingCloudStorage , type StorageContext , StorageService } from '@/lib/uploads'
12+ import { uploadExecutionFile } from '@/lib/uploads/contexts/execution'
1213import { UPLOAD_DIR_SERVER } from '@/lib/uploads/core/setup.server'
1314import { getFileMetadataByKey } from '@/lib/uploads/server/metadata'
1415import {
@@ -21,6 +22,7 @@ import {
2122} from '@/lib/uploads/utils/file-utils'
2223import { getUserEntityPermissions } from '@/lib/workspaces/permissions/utils'
2324import { verifyFileAccess } from '@/app/api/files/authorization'
25+ import type { UserFile } from '@/executor/types'
2426import '@/lib/uploads/core/setup.server'
2527
2628export const dynamic = 'force-dynamic'
@@ -30,13 +32,20 @@ const logger = createLogger('FilesParseAPI')
3032const MAX_DOWNLOAD_SIZE_BYTES = 100 * 1024 * 1024 // 100 MB
3133const DOWNLOAD_TIMEOUT_MS = 30000 // 30 seconds
3234
35+ interface ExecutionContext {
36+ workspaceId : string
37+ workflowId : string
38+ executionId : string
39+ }
40+
3341interface ParseResult {
3442 success : boolean
3543 content ?: string
3644 error ?: string
3745 filePath : string
3846 originalName ?: string // Original filename from database (for workspace files)
3947 viewerUrl ?: string | null // Viewer URL for the file if available
48+ userFile ?: UserFile // UserFile object for the raw file
4049 metadata ?: {
4150 fileType : string
4251 size : number
@@ -70,27 +79,45 @@ export async function POST(request: NextRequest) {
7079
7180 const userId = authResult . userId
7281 const requestData = await request . json ( )
73- const { filePath, fileType, workspaceId } = requestData
82+ const { filePath, fileType, workspaceId, workflowId , executionId } = requestData
7483
7584 if ( ! filePath || ( typeof filePath === 'string' && filePath . trim ( ) === '' ) ) {
7685 return NextResponse . json ( { success : false , error : 'No file path provided' } , { status : 400 } )
7786 }
7887
79- logger . info ( 'File parse request received:' , { filePath, fileType, workspaceId, userId } )
88+ // Build execution context if all required fields are present
89+ const executionContext : ExecutionContext | undefined =
90+ workspaceId && workflowId && executionId
91+ ? { workspaceId, workflowId, executionId }
92+ : undefined
93+
94+ logger . info ( 'File parse request received:' , {
95+ filePath,
96+ fileType,
97+ workspaceId,
98+ userId,
99+ hasExecutionContext : ! ! executionContext ,
100+ } )
80101
81102 if ( Array . isArray ( filePath ) ) {
82103 const results = [ ]
83- for ( const path of filePath ) {
84- if ( ! path || ( typeof path === 'string' && path . trim ( ) === '' ) ) {
104+ for ( const singlePath of filePath ) {
105+ if ( ! singlePath || ( typeof singlePath === 'string' && singlePath . trim ( ) === '' ) ) {
85106 results . push ( {
86107 success : false ,
87108 error : 'Empty file path in array' ,
88- filePath : path || '' ,
109+ filePath : singlePath || '' ,
89110 } )
90111 continue
91112 }
92113
93- const result = await parseFileSingle ( path , fileType , workspaceId , userId )
114+ const result = await parseFileSingle (
115+ singlePath ,
116+ fileType ,
117+ workspaceId ,
118+ userId ,
119+ executionContext
120+ )
94121 if ( result . metadata ) {
95122 result . metadata . processingTime = Date . now ( ) - startTime
96123 }
@@ -106,6 +133,7 @@ export async function POST(request: NextRequest) {
106133 fileType : result . metadata ?. fileType || 'application/octet-stream' ,
107134 size : result . metadata ?. size || 0 ,
108135 binary : false ,
136+ file : result . userFile ,
109137 } ,
110138 filePath : result . filePath ,
111139 viewerUrl : result . viewerUrl ,
@@ -121,7 +149,7 @@ export async function POST(request: NextRequest) {
121149 } )
122150 }
123151
124- const result = await parseFileSingle ( filePath , fileType , workspaceId , userId )
152+ const result = await parseFileSingle ( filePath , fileType , workspaceId , userId , executionContext )
125153
126154 if ( result . metadata ) {
127155 result . metadata . processingTime = Date . now ( ) - startTime
@@ -137,6 +165,7 @@ export async function POST(request: NextRequest) {
137165 fileType : result . metadata ?. fileType || 'application/octet-stream' ,
138166 size : result . metadata ?. size || 0 ,
139167 binary : false ,
168+ file : result . userFile ,
140169 } ,
141170 filePath : result . filePath ,
142171 viewerUrl : result . viewerUrl ,
@@ -164,7 +193,8 @@ async function parseFileSingle(
164193 filePath : string ,
165194 fileType : string ,
166195 workspaceId : string ,
167- userId : string
196+ userId : string ,
197+ executionContext ?: ExecutionContext
168198) : Promise < ParseResult > {
169199 logger . info ( 'Parsing file:' , filePath )
170200
@@ -186,18 +216,18 @@ async function parseFileSingle(
186216 }
187217
188218 if ( filePath . includes ( '/api/files/serve/' ) ) {
189- return handleCloudFile ( filePath , fileType , undefined , userId )
219+ return handleCloudFile ( filePath , fileType , undefined , userId , executionContext )
190220 }
191221
192222 if ( filePath . startsWith ( 'http://' ) || filePath . startsWith ( 'https://' ) ) {
193- return handleExternalUrl ( filePath , fileType , workspaceId , userId )
223+ return handleExternalUrl ( filePath , fileType , workspaceId , userId , executionContext )
194224 }
195225
196226 if ( isUsingCloudStorage ( ) ) {
197- return handleCloudFile ( filePath , fileType , undefined , userId )
227+ return handleCloudFile ( filePath , fileType , undefined , userId , executionContext )
198228 }
199229
200- return handleLocalFile ( filePath , fileType , userId )
230+ return handleLocalFile ( filePath , fileType , userId , executionContext )
201231}
202232
203233/**
@@ -230,12 +260,14 @@ function validateFilePath(filePath: string): { isValid: boolean; error?: string
230260/**
231261 * Handle external URL
232262 * If workspaceId is provided, checks if file already exists and saves to workspace if not
263+ * If executionContext is provided, also stores the file in execution storage and returns UserFile
233264 */
234265async function handleExternalUrl (
235266 url : string ,
236267 fileType : string ,
237268 workspaceId : string ,
238- userId : string
269+ userId : string ,
270+ executionContext ?: ExecutionContext
239271) : Promise < ParseResult > {
240272 try {
241273 logger . info ( 'Fetching external URL:' , url )
@@ -312,17 +344,16 @@ async function handleExternalUrl(
312344
313345 if ( existingFile ) {
314346 const storageFilePath = `/api/files/serve/${ existingFile . key } `
315- return handleCloudFile ( storageFilePath , fileType , 'workspace' , userId )
347+ return handleCloudFile ( storageFilePath , fileType , 'workspace' , userId , executionContext )
316348 }
317349 }
318350 }
319351
320- const pinnedUrl = createPinnedUrl ( url , urlValidation . resolvedIP ! )
321- const response = await fetch ( pinnedUrl , {
352+ // Use the original URL after DNS validation passes.
353+ // DNS pinning (connecting to IP directly) breaks TLS SNI for HTTPS.
354+ // Since we've validated the IP is not private/reserved, using the original URL is safe.
355+ const response = await fetch ( url , {
322356 signal : AbortSignal . timeout ( DOWNLOAD_TIMEOUT_MS ) ,
323- headers : {
324- Host : urlValidation . originalHostname ! ,
325- } ,
326357 } )
327358 if ( ! response . ok ) {
328359 throw new Error ( `Failed to fetch URL: ${ response . status } ${ response . statusText } ` )
@@ -341,6 +372,20 @@ async function handleExternalUrl(
341372
342373 logger . info ( `Downloaded file from URL: ${ url } , size: ${ buffer . length } bytes` )
343374
375+ // Store file in execution storage if execution context is provided
376+ let userFile : UserFile | undefined
377+ const mimeType = response . headers . get ( 'content-type' ) || getMimeTypeFromExtension ( extension )
378+
379+ if ( executionContext ) {
380+ try {
381+ userFile = await uploadExecutionFile ( executionContext , buffer , filename , mimeType , userId )
382+ logger . info ( `Stored file in execution storage: ${ filename } ` , { key : userFile . key } )
383+ } catch ( uploadError ) {
384+ logger . warn ( `Failed to store file in execution storage:` , uploadError )
385+ // Continue without userFile - parsing can still work
386+ }
387+ }
388+
344389 if ( shouldCheckWorkspace ) {
345390 try {
346391 const permission = await getUserEntityPermissions ( userId , 'workspace' , workspaceId )
@@ -353,8 +398,6 @@ async function handleExternalUrl(
353398 } )
354399 } else {
355400 const { uploadWorkspaceFile } = await import ( '@/lib/uploads/contexts/workspace' )
356- const mimeType =
357- response . headers . get ( 'content-type' ) || getMimeTypeFromExtension ( extension )
358401 await uploadWorkspaceFile ( workspaceId , userId , buffer , filename , mimeType )
359402 logger . info ( `Saved URL file to workspace storage: ${ filename } ` )
360403 }
@@ -363,17 +406,23 @@ async function handleExternalUrl(
363406 }
364407 }
365408
409+ let parseResult : ParseResult
366410 if ( extension === 'pdf' ) {
367- return await handlePdfBuffer ( buffer , filename , fileType , url )
368- }
369- if ( extension === 'csv' ) {
370- return await handleCsvBuffer ( buffer , filename , fileType , url )
411+ parseResult = await handlePdfBuffer ( buffer , filename , fileType , url )
412+ } else if ( extension === 'csv' ) {
413+ parseResult = await handleCsvBuffer ( buffer , filename , fileType , url )
414+ } else if ( isSupportedFileType ( extension ) ) {
415+ parseResult = await handleGenericTextBuffer ( buffer , filename , extension , fileType , url )
416+ } else {
417+ parseResult = handleGenericBuffer ( buffer , filename , extension , fileType )
371418 }
372- if ( isSupportedFileType ( extension ) ) {
373- return await handleGenericTextBuffer ( buffer , filename , extension , fileType , url )
419+
420+ // Attach userFile to the result
421+ if ( userFile ) {
422+ parseResult . userFile = userFile
374423 }
375424
376- return handleGenericBuffer ( buffer , filename , extension , fileType )
425+ return parseResult
377426 } catch ( error ) {
378427 logger . error ( `Error handling external URL ${ url } :` , error )
379428 return {
@@ -386,12 +435,15 @@ async function handleExternalUrl(
386435
387436/**
388437 * Handle file stored in cloud storage
438+ * If executionContext is provided and file is not already from execution storage,
439+ * copies the file to execution storage and returns UserFile
389440 */
390441async function handleCloudFile (
391442 filePath : string ,
392443 fileType : string ,
393444 explicitContext : string | undefined ,
394- userId : string
445+ userId : string ,
446+ executionContext ?: ExecutionContext
395447) : Promise < ParseResult > {
396448 try {
397449 const cloudKey = extractStorageKey ( filePath )
@@ -438,6 +490,7 @@ async function handleCloudFile(
438490
439491 const filename = originalFilename || cloudKey . split ( '/' ) . pop ( ) || cloudKey
440492 const extension = path . extname ( filename ) . toLowerCase ( ) . substring ( 1 )
493+ const mimeType = getMimeTypeFromExtension ( extension )
441494
442495 const normalizedFilePath = `/api/files/serve/${ encodeURIComponent ( cloudKey ) } ?context=${ context } `
443496 let workspaceIdFromKey : string | undefined
@@ -453,6 +506,39 @@ async function handleCloudFile(
453506
454507 const viewerUrl = getViewerUrl ( cloudKey , workspaceIdFromKey )
455508
509+ // Store file in execution storage if executionContext is provided
510+ let userFile : UserFile | undefined
511+
512+ if ( executionContext ) {
513+ // If file is already from execution context, create UserFile reference without re-uploading
514+ if ( context === 'execution' ) {
515+ userFile = {
516+ id : `file_${ Date . now ( ) } _${ Math . random ( ) . toString ( 36 ) . substring ( 2 , 9 ) } ` ,
517+ name : filename ,
518+ url : normalizedFilePath ,
519+ size : fileBuffer . length ,
520+ type : mimeType ,
521+ key : cloudKey ,
522+ context : 'execution' ,
523+ }
524+ logger . info ( `Created UserFile reference for existing execution file: ${ filename } ` )
525+ } else {
526+ // Copy from workspace/other storage to execution storage
527+ try {
528+ userFile = await uploadExecutionFile (
529+ executionContext ,
530+ fileBuffer ,
531+ filename ,
532+ mimeType ,
533+ userId
534+ )
535+ logger . info ( `Copied file to execution storage: ${ filename } ` , { key : userFile . key } )
536+ } catch ( uploadError ) {
537+ logger . warn ( `Failed to copy file to execution storage:` , uploadError )
538+ }
539+ }
540+ }
541+
456542 let parseResult : ParseResult
457543 if ( extension === 'pdf' ) {
458544 parseResult = await handlePdfBuffer ( fileBuffer , filename , fileType , normalizedFilePath )
@@ -477,6 +563,11 @@ async function handleCloudFile(
477563
478564 parseResult . viewerUrl = viewerUrl
479565
566+ // Attach userFile to the result
567+ if ( userFile ) {
568+ parseResult . userFile = userFile
569+ }
570+
480571 return parseResult
481572 } catch ( error ) {
482573 logger . error ( `Error handling cloud file ${ filePath } :` , error )
@@ -500,7 +591,8 @@ async function handleCloudFile(
500591async function handleLocalFile (
501592 filePath : string ,
502593 fileType : string ,
503- userId : string
594+ userId : string ,
595+ executionContext ?: ExecutionContext
504596) : Promise < ParseResult > {
505597 try {
506598 const filename = filePath . split ( '/' ) . pop ( ) || filePath
@@ -540,13 +632,32 @@ async function handleLocalFile(
540632 const hash = createHash ( 'md5' ) . update ( fileBuffer ) . digest ( 'hex' )
541633
542634 const extension = path . extname ( filename ) . toLowerCase ( ) . substring ( 1 )
635+ const mimeType = fileType || getMimeTypeFromExtension ( extension )
636+
637+ // Store file in execution storage if executionContext is provided
638+ let userFile : UserFile | undefined
639+ if ( executionContext ) {
640+ try {
641+ userFile = await uploadExecutionFile (
642+ executionContext ,
643+ fileBuffer ,
644+ filename ,
645+ mimeType ,
646+ userId
647+ )
648+ logger . info ( `Stored local file in execution storage: ${ filename } ` , { key : userFile . key } )
649+ } catch ( uploadError ) {
650+ logger . warn ( `Failed to store local file in execution storage:` , uploadError )
651+ }
652+ }
543653
544654 return {
545655 success : true ,
546656 content : result . content ,
547657 filePath,
658+ userFile,
548659 metadata : {
549- fileType : fileType || getMimeTypeFromExtension ( extension ) ,
660+ fileType : mimeType ,
550661 size : stats . size ,
551662 hash,
552663 processingTime : 0 ,
0 commit comments