Skip to content

Commit c5d3405

Browse files
committed
removed upload for textract async version
1 parent 0ac6fec commit c5d3405

File tree

10 files changed

+109
-84
lines changed

10 files changed

+109
-84
lines changed

apps/docs/content/docs/en/tools/s3.mdx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ Upload a file to an AWS S3 bucket
5858
| Parameter | Type | Description |
5959
| --------- | ---- | ----------- |
6060
| `url` | string | URL of the uploaded S3 object |
61+
| `uri` | string | S3 URI of the uploaded object \(s3://bucket/key\) |
6162
| `metadata` | object | Upload metadata including ETag and location |
6263

6364
### `s3_get_object`
@@ -149,6 +150,7 @@ Copy an object within or between AWS S3 buckets
149150
| Parameter | Type | Description |
150151
| --------- | ---- | ----------- |
151152
| `url` | string | URL of the copied S3 object |
153+
| `uri` | string | S3 URI of the copied object \(s3://bucket/key\) |
152154
| `metadata` | object | Copy operation metadata |
153155

154156

apps/sim/app/api/tools/s3/copy-object/route.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,13 @@ export async function POST(request: NextRequest) {
7979
// Generate public URL for destination (properly encode the destination key)
8080
const encodedDestKey = validatedData.destinationKey.split('/').map(encodeURIComponent).join('/')
8181
const url = `https://${validatedData.destinationBucket}.s3.${validatedData.region}.amazonaws.com/${encodedDestKey}`
82+
const uri = `s3://${validatedData.destinationBucket}/${validatedData.destinationKey}`
8283

8384
return NextResponse.json({
8485
success: true,
8586
output: {
8687
url,
88+
uri,
8789
copySourceVersionId: result.CopySourceVersionId,
8890
versionId: result.VersionId,
8991
etag: result.CopyObjectResult?.ETag,

apps/sim/app/api/tools/s3/put-object/route.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,13 @@ export async function POST(request: NextRequest) {
117117

118118
const encodedKey = validatedData.objectKey.split('/').map(encodeURIComponent).join('/')
119119
const url = `https://${validatedData.bucketName}.s3.${validatedData.region}.amazonaws.com/${encodedKey}`
120+
const uri = `s3://${validatedData.bucketName}/${validatedData.objectKey}`
120121

121122
return NextResponse.json({
122123
success: true,
123124
output: {
124125
url,
126+
uri,
125127
etag: result.ETag,
126128
location: url,
127129
key: validatedData.objectKey,

apps/sim/app/api/tools/textract/parse/route.ts

Lines changed: 3 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -322,44 +322,17 @@ export async function POST(request: NextRequest) {
322322
})
323323

324324
if (processingMode === 'async') {
325-
if (!validatedData.s3Uri && !validatedData.filePath) {
325+
if (!validatedData.s3Uri) {
326326
return NextResponse.json(
327327
{
328328
success: false,
329-
error: 'S3 URI or file path is required for multi-page processing',
329+
error: 'S3 URI is required for multi-page processing (s3://bucket/key)',
330330
},
331331
{ status: 400 }
332332
)
333333
}
334334

335-
let s3Bucket: string
336-
let s3Key: string
337-
338-
if (validatedData.s3Uri) {
339-
const parsed = parseS3Uri(validatedData.s3Uri)
340-
s3Bucket = parsed.bucket
341-
s3Key = parsed.key
342-
} else if (validatedData.filePath?.includes('/api/files/serve/')) {
343-
const storageKey = extractStorageKey(validatedData.filePath)
344-
const context = inferContextFromKey(storageKey)
345-
346-
const hasAccess = await verifyFileAccess(storageKey, userId, undefined, context, false)
347-
if (!hasAccess) {
348-
return NextResponse.json({ success: false, error: 'File not found' }, { status: 404 })
349-
}
350-
351-
const s3Info = StorageService.getS3InfoForKey(storageKey, context)
352-
s3Bucket = s3Info.bucket
353-
s3Key = s3Info.key
354-
} else {
355-
return NextResponse.json(
356-
{
357-
success: false,
358-
error: 'Multi-page mode requires an S3 URI (s3://bucket/key) or an uploaded file',
359-
},
360-
{ status: 400 }
361-
)
362-
}
335+
const { bucket: s3Bucket, key: s3Key } = parseS3Uri(validatedData.s3Uri)
363336

364337
logger.info(`[${requestId}] Starting async Textract job`, { s3Bucket, s3Key })
365338

apps/sim/blocks/blocks/s3.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,10 @@ export const S3Block: BlockConfig<S3Response> = {
414414
},
415415
outputs: {
416416
url: { type: 'string', description: 'URL of S3 object' },
417+
uri: {
418+
type: 'string',
419+
description: 'S3 URI (s3://bucket/key) for use with other AWS services',
420+
},
417421
objects: { type: 'json', description: 'List of objects (for list operation)' },
418422
deleted: { type: 'boolean', description: 'Deletion status' },
419423
metadata: { type: 'json', description: 'Operation metadata' },

apps/sim/blocks/blocks/textract.ts

Lines changed: 5 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -36,19 +36,6 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
3636
not: true,
3737
},
3838
},
39-
{
40-
id: 'asyncInputMethod',
41-
title: 'Select Input Method',
42-
type: 'dropdown' as SubBlockType,
43-
options: [
44-
{ id: 's3', label: 'S3 URI' },
45-
{ id: 'upload', label: 'Upload Document' },
46-
],
47-
condition: {
48-
field: 'processingMode',
49-
value: 'async',
50-
},
51-
},
5239
{
5340
id: 'filePath',
5441
title: 'Document URL',
@@ -70,12 +57,8 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
7057
type: 'short-input' as SubBlockType,
7158
placeholder: 's3://bucket-name/path/to/document.pdf',
7259
condition: {
73-
field: 'asyncInputMethod',
74-
value: 's3',
75-
and: {
76-
field: 'processingMode',
77-
value: 'async',
78-
},
60+
field: 'processingMode',
61+
value: 'async',
7962
},
8063
},
8164
{
@@ -94,21 +77,6 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
9477
},
9578
maxSize: 10,
9679
},
97-
{
98-
id: 'asyncFileUpload',
99-
title: 'Upload Document',
100-
type: 'file-upload' as SubBlockType,
101-
acceptedTypes: 'application/pdf,image/jpeg,image/png,image/tiff',
102-
condition: {
103-
field: 'asyncInputMethod',
104-
value: 'upload',
105-
and: {
106-
field: 'processingMode',
107-
value: 'async',
108-
},
109-
},
110-
maxSize: 50,
111-
},
11280
{
11381
id: 'region',
11482
title: 'AWS Region',
@@ -177,18 +145,10 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
177145
}
178146

179147
if (processingMode === 'async') {
180-
const asyncInputMethod = params.asyncInputMethod || 's3'
181-
if (asyncInputMethod === 's3') {
182-
if (!params.s3Uri || params.s3Uri.trim() === '') {
183-
throw new Error('S3 URI is required for multi-page processing')
184-
}
185-
parameters.s3Uri = params.s3Uri.trim()
186-
} else if (asyncInputMethod === 'upload') {
187-
if (!params.asyncFileUpload) {
188-
throw new Error('Please upload a document')
189-
}
190-
parameters.fileUpload = params.asyncFileUpload
148+
if (!params.s3Uri || params.s3Uri.trim() === '') {
149+
throw new Error('S3 URI is required for multi-page processing')
191150
}
151+
parameters.s3Uri = params.s3Uri.trim()
192152
} else {
193153
const inputMethod = params.inputMethod || 'url'
194154
if (inputMethod === 'url') {
@@ -221,11 +181,9 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
221181
inputs: {
222182
processingMode: { type: 'string', description: 'Document type: single-page or multi-page' },
223183
inputMethod: { type: 'string', description: 'Input method selection for single-page mode' },
224-
asyncInputMethod: { type: 'string', description: 'Input method selection for multi-page mode' },
225184
filePath: { type: 'string', description: 'Document URL' },
226185
s3Uri: { type: 'string', description: 'S3 URI for multi-page processing (s3://bucket/key)' },
227186
fileUpload: { type: 'json', description: 'Uploaded document file for single-page mode' },
228-
asyncFileUpload: { type: 'json', description: 'Uploaded document file for multi-page mode' },
229187
extractTables: { type: 'boolean', description: 'Extract tables from document' },
230188
extractForms: { type: 'boolean', description: 'Extract form key-value pairs' },
231189
detectSignatures: { type: 'boolean', description: 'Detect signatures' },

apps/sim/lib/core/security/input-validation.test.ts

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1196,7 +1196,7 @@ describe('validateAirtableId', () => {
11961196
})
11971197

11981198
describe('validateAwsRegion', () => {
1199-
describe('valid regions', () => {
1199+
describe('valid standard regions', () => {
12001200
it.concurrent('should accept us-east-1', () => {
12011201
const result = validateAwsRegion('us-east-1')
12021202
expect(result.isValid).toBe(true)
@@ -1213,11 +1213,21 @@ describe('validateAwsRegion', () => {
12131213
expect(result.isValid).toBe(true)
12141214
})
12151215

1216+
it.concurrent('should accept eu-central-1', () => {
1217+
const result = validateAwsRegion('eu-central-1')
1218+
expect(result.isValid).toBe(true)
1219+
})
1220+
12161221
it.concurrent('should accept ap-southeast-1', () => {
12171222
const result = validateAwsRegion('ap-southeast-1')
12181223
expect(result.isValid).toBe(true)
12191224
})
12201225

1226+
it.concurrent('should accept ap-northeast-1', () => {
1227+
const result = validateAwsRegion('ap-northeast-1')
1228+
expect(result.isValid).toBe(true)
1229+
})
1230+
12211231
it.concurrent('should accept sa-east-1', () => {
12221232
const result = validateAwsRegion('sa-east-1')
12231233
expect(result.isValid).toBe(true)
@@ -1233,12 +1243,58 @@ describe('validateAwsRegion', () => {
12331243
expect(result.isValid).toBe(true)
12341244
})
12351245

1246+
it.concurrent('should accept ca-central-1', () => {
1247+
const result = validateAwsRegion('ca-central-1')
1248+
expect(result.isValid).toBe(true)
1249+
})
1250+
1251+
it.concurrent('should accept il-central-1', () => {
1252+
const result = validateAwsRegion('il-central-1')
1253+
expect(result.isValid).toBe(true)
1254+
})
1255+
12361256
it.concurrent('should accept regions with double-digit numbers', () => {
12371257
const result = validateAwsRegion('ap-northeast-12')
12381258
expect(result.isValid).toBe(true)
12391259
})
12401260
})
12411261

1262+
describe('valid GovCloud regions', () => {
1263+
it.concurrent('should accept us-gov-west-1', () => {
1264+
const result = validateAwsRegion('us-gov-west-1')
1265+
expect(result.isValid).toBe(true)
1266+
})
1267+
1268+
it.concurrent('should accept us-gov-east-1', () => {
1269+
const result = validateAwsRegion('us-gov-east-1')
1270+
expect(result.isValid).toBe(true)
1271+
})
1272+
})
1273+
1274+
describe('valid China regions', () => {
1275+
it.concurrent('should accept cn-north-1', () => {
1276+
const result = validateAwsRegion('cn-north-1')
1277+
expect(result.isValid).toBe(true)
1278+
})
1279+
1280+
it.concurrent('should accept cn-northwest-1', () => {
1281+
const result = validateAwsRegion('cn-northwest-1')
1282+
expect(result.isValid).toBe(true)
1283+
})
1284+
})
1285+
1286+
describe('valid ISO regions', () => {
1287+
it.concurrent('should accept us-iso-east-1', () => {
1288+
const result = validateAwsRegion('us-iso-east-1')
1289+
expect(result.isValid).toBe(true)
1290+
})
1291+
1292+
it.concurrent('should accept us-isob-east-1', () => {
1293+
const result = validateAwsRegion('us-isob-east-1')
1294+
expect(result.isValid).toBe(true)
1295+
})
1296+
})
1297+
12421298
describe('invalid regions', () => {
12431299
it.concurrent('should reject null', () => {
12441300
const result = validateAwsRegion(null)
@@ -1282,6 +1338,16 @@ describe('validateAwsRegion', () => {
12821338
expect(result.isValid).toBe(false)
12831339
})
12841340

1341+
it.concurrent('should reject invalid prefix', () => {
1342+
const result = validateAwsRegion('xx-east-1')
1343+
expect(result.isValid).toBe(false)
1344+
})
1345+
1346+
it.concurrent('should reject invalid direction', () => {
1347+
const result = validateAwsRegion('us-middle-1')
1348+
expect(result.isValid).toBe(false)
1349+
})
1350+
12851351
it.concurrent('should use custom param name in errors', () => {
12861352
const result = validateAwsRegion('', 'awsRegion')
12871353
expect(result.error).toContain('awsRegion')

apps/sim/lib/core/security/input-validation.ts

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -937,8 +937,12 @@ export function validateAirtableId(
937937
/**
938938
* Validates an AWS region identifier
939939
*
940-
* AWS regions follow the pattern: {area}-{sub-area}-{number}
941-
* Examples: us-east-1, eu-west-2, ap-southeast-1, sa-east-1
940+
* Supported region formats:
941+
* - Standard: us-east-1, eu-west-2, ap-southeast-1, sa-east-1, af-south-1
942+
* - GovCloud: us-gov-east-1, us-gov-west-1
943+
* - China: cn-north-1, cn-northwest-1
944+
* - Israel: il-central-1
945+
* - ISO partitions: us-iso-east-1, us-isob-east-1
942946
*
943947
* @param value - The AWS region to validate
944948
* @param paramName - Name of the parameter for error messages
@@ -963,9 +967,13 @@ export function validateAwsRegion(
963967
}
964968
}
965969

966-
// AWS region format: {area}-{sub-area}-{number}
967-
// Examples: us-east-1, eu-west-2, ap-southeast-1, me-south-1, af-south-1
968-
const awsRegionPattern = /^[a-z]{2}-[a-z]+-\d{1,2}$/
970+
// AWS region patterns:
971+
// - Standard: af|ap|ca|eu|me|sa|us|il followed by direction and number
972+
// - GovCloud: us-gov-east-1, us-gov-west-1
973+
// - China: cn-north-1, cn-northwest-1
974+
// - ISO: us-iso-east-1, us-iso-west-1, us-isob-east-1
975+
const awsRegionPattern =
976+
/^(af|ap|ca|cn|eu|il|me|sa|us|us-gov|us-iso|us-isob)-(central|north|northeast|northwest|south|southeast|southwest|east|west)-\d{1,2}$/
969977

970978
if (!awsRegionPattern.test(value)) {
971979
logger.warn('Invalid AWS region format', {
@@ -974,7 +982,7 @@ export function validateAwsRegion(
974982
})
975983
return {
976984
isValid: false,
977-
error: `${paramName} must be a valid AWS region (e.g., us-east-1, eu-west-2)`,
985+
error: `${paramName} must be a valid AWS region (e.g., us-east-1, eu-west-2, us-gov-west-1)`,
978986
}
979987
}
980988

apps/sim/tools/s3/copy_object.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ export const s3CopyObjectTool: ToolConfig = {
9595
success: true,
9696
output: {
9797
url: data.output.url,
98+
uri: data.output.uri,
9899
metadata: {
99100
copySourceVersionId: data.output.copySourceVersionId,
100101
versionId: data.output.versionId,
@@ -109,6 +110,10 @@ export const s3CopyObjectTool: ToolConfig = {
109110
type: 'string',
110111
description: 'URL of the copied S3 object',
111112
},
113+
uri: {
114+
type: 'string',
115+
description: 'S3 URI of the copied object (s3://bucket/key)',
116+
},
112117
metadata: {
113118
type: 'object',
114119
description: 'Copy operation metadata',

apps/sim/tools/s3/put_object.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ export const s3PutObjectTool: ToolConfig = {
102102
success: true,
103103
output: {
104104
url: data.output.url,
105+
uri: data.output.uri,
105106
metadata: {
106107
etag: data.output.etag,
107108
location: data.output.location,
@@ -117,6 +118,10 @@ export const s3PutObjectTool: ToolConfig = {
117118
type: 'string',
118119
description: 'URL of the uploaded S3 object',
119120
},
121+
uri: {
122+
type: 'string',
123+
description: 'S3 URI of the uploaded object (s3://bucket/key)',
124+
},
120125
metadata: {
121126
type: 'object',
122127
description: 'Upload metadata including ETag and location',

0 commit comments

Comments
 (0)