|
1 | 1 | import { createLogger } from '@sim/logger' |
2 | | -import { getBaseUrl } from '@/lib/core/utils/urls' |
3 | 2 | import type { TextractParserInput, TextractParserOutput } from '@/tools/textract/types' |
4 | 3 | import type { ToolConfig } from '@/tools/types' |
5 | 4 |
|
@@ -92,158 +91,35 @@ export const textractParserTool: ToolConfig<TextractParserInput, TextractParserO |
92 | 91 | } |
93 | 92 | }, |
94 | 93 | body: (params) => { |
95 | | - if (!params || typeof params !== 'object') { |
96 | | - throw new Error('Invalid parameters: Parameters must be provided as an object') |
97 | | - } |
98 | | - |
99 | | - if ( |
100 | | - !params.accessKeyId || |
101 | | - typeof params.accessKeyId !== 'string' || |
102 | | - params.accessKeyId.trim() === '' |
103 | | - ) { |
104 | | - throw new Error('Missing or invalid AWS Access Key ID') |
105 | | - } |
106 | | - |
107 | | - if ( |
108 | | - !params.secretAccessKey || |
109 | | - typeof params.secretAccessKey !== 'string' || |
110 | | - params.secretAccessKey.trim() === '' |
111 | | - ) { |
112 | | - throw new Error('Missing or invalid AWS Secret Access Key') |
113 | | - } |
114 | | - |
115 | | - if (!params.region || typeof params.region !== 'string' || params.region.trim() === '') { |
116 | | - throw new Error('Missing or invalid AWS region') |
117 | | - } |
118 | | - |
119 | 94 | const processingMode = params.processingMode || 'sync' |
120 | 95 |
|
121 | 96 | const requestBody: Record<string, unknown> = { |
122 | | - accessKeyId: params.accessKeyId.trim(), |
123 | | - secretAccessKey: params.secretAccessKey.trim(), |
124 | | - region: params.region.trim(), |
| 97 | + accessKeyId: params.accessKeyId?.trim(), |
| 98 | + secretAccessKey: params.secretAccessKey?.trim(), |
| 99 | + region: params.region?.trim(), |
125 | 100 | processingMode, |
126 | 101 | } |
127 | 102 |
|
128 | 103 | if (processingMode === 'async') { |
129 | | - if (params.s3Uri && typeof params.s3Uri === 'string' && params.s3Uri.trim() !== '') { |
130 | | - const s3UriTrimmed = params.s3Uri.trim() |
131 | | - if (!s3UriTrimmed.match(/^s3:\/\/[^/]+\/.+$/)) { |
132 | | - throw new Error('Invalid S3 URI format. Expected: s3://bucket-name/path/to/object') |
133 | | - } |
134 | | - requestBody.s3Uri = s3UriTrimmed |
135 | | - } else if (params.fileUpload) { |
136 | | - if ( |
137 | | - typeof params.fileUpload === 'object' && |
138 | | - params.fileUpload !== null && |
139 | | - (params.fileUpload.url || params.fileUpload.path) |
140 | | - ) { |
141 | | - const uploadedFilePath = (params.fileUpload.path || params.fileUpload.url) as string |
142 | | - if (uploadedFilePath.startsWith('/api/files/serve/')) { |
143 | | - requestBody.filePath = uploadedFilePath |
144 | | - } else { |
145 | | - throw new Error('Multi-page mode with upload requires files stored in S3') |
146 | | - } |
147 | | - } else { |
148 | | - throw new Error('Invalid file upload: Upload data is missing or invalid') |
149 | | - } |
150 | | - } else { |
151 | | - throw new Error('Multi-page mode requires either an S3 URI or an uploaded file') |
152 | | - } |
| 104 | + requestBody.s3Uri = params.s3Uri?.trim() |
153 | 105 | } else { |
154 | | - if ( |
155 | | - params.fileUpload && |
156 | | - (!params.filePath || params.filePath === 'null' || params.filePath === '') |
157 | | - ) { |
158 | | - if ( |
159 | | - typeof params.fileUpload === 'object' && |
160 | | - params.fileUpload !== null && |
161 | | - (params.fileUpload.url || params.fileUpload.path) |
162 | | - ) { |
163 | | - let uploadedFilePath = (params.fileUpload.url || params.fileUpload.path) as string |
164 | | - |
165 | | - if (uploadedFilePath.startsWith('/')) { |
166 | | - const baseUrl = getBaseUrl() |
167 | | - if (!baseUrl) throw new Error('Failed to get base URL for file path conversion') |
168 | | - uploadedFilePath = `${baseUrl}${uploadedFilePath}` |
169 | | - } |
170 | | - |
171 | | - params.filePath = uploadedFilePath |
172 | | - logger.info('Using uploaded file:', uploadedFilePath) |
173 | | - } else { |
174 | | - throw new Error('Invalid file upload: Upload data is missing or invalid') |
175 | | - } |
176 | | - } |
177 | | - |
178 | | - if ( |
179 | | - !params.filePath || |
180 | | - typeof params.filePath !== 'string' || |
181 | | - params.filePath.trim() === '' |
182 | | - ) { |
183 | | - throw new Error('Missing or invalid file path: Please provide a URL to a document') |
184 | | - } |
185 | | - |
186 | | - let filePathToValidate = params.filePath.trim() |
187 | | - if (filePathToValidate.startsWith('/')) { |
188 | | - const baseUrl = getBaseUrl() |
189 | | - if (!baseUrl) throw new Error('Failed to get base URL for file path conversion') |
190 | | - filePathToValidate = `${baseUrl}${filePathToValidate}` |
191 | | - } |
192 | | - |
193 | | - let url |
194 | | - try { |
195 | | - url = new URL(filePathToValidate) |
196 | | - |
197 | | - if (!['http:', 'https:'].includes(url.protocol)) { |
198 | | - throw new Error( |
199 | | - `Invalid protocol: ${url.protocol}. URL must use HTTP or HTTPS protocol` |
200 | | - ) |
| 106 | + // Handle file upload by extracting the path |
| 107 | + if (params.fileUpload && !params.filePath) { |
| 108 | + const uploadPath = params.fileUpload.path || params.fileUpload.url |
| 109 | + if (uploadPath) { |
| 110 | + requestBody.filePath = uploadPath |
201 | 111 | } |
202 | | - } catch (error) { |
203 | | - const errorMessage = error instanceof Error ? error.message : String(error) |
204 | | - throw new Error( |
205 | | - `Invalid URL format: ${errorMessage}. Please provide a valid HTTP or HTTPS URL to a document.` |
206 | | - ) |
207 | | - } |
208 | | - |
209 | | - requestBody.filePath = url.toString() |
210 | | - |
211 | | - if (params.fileUpload?.path?.startsWith('/api/files/serve/')) { |
212 | | - requestBody.filePath = params.fileUpload.path |
| 112 | + } else { |
| 113 | + requestBody.filePath = params.filePath?.trim() |
213 | 114 | } |
214 | 115 | } |
215 | 116 |
|
216 | 117 | if (params.featureTypes && Array.isArray(params.featureTypes)) { |
217 | | - const validFeatures = ['TABLES', 'FORMS', 'QUERIES', 'SIGNATURES', 'LAYOUT'] |
218 | | - const filteredFeatures = params.featureTypes.filter((f) => |
219 | | - validFeatures.includes(f as string) |
220 | | - ) |
221 | | - if (filteredFeatures.length > 0) { |
222 | | - requestBody.featureTypes = filteredFeatures |
223 | | - } |
| 118 | + requestBody.featureTypes = params.featureTypes |
224 | 119 | } |
225 | 120 |
|
226 | | - if (params.queries && Array.isArray(params.queries) && params.queries.length > 0) { |
227 | | - const validQueries = params.queries |
228 | | - .filter((q) => q && typeof q === 'object' && typeof q.Text === 'string' && q.Text.trim()) |
229 | | - .map((q) => ({ |
230 | | - Text: q.Text.trim(), |
231 | | - Alias: q.Alias?.trim() || undefined, |
232 | | - Pages: q.Pages || undefined, |
233 | | - })) |
234 | | - |
235 | | - if (validQueries.length > 0) { |
236 | | - requestBody.queries = validQueries |
237 | | - |
238 | | - if (!requestBody.featureTypes) { |
239 | | - requestBody.featureTypes = ['QUERIES'] |
240 | | - } else if ( |
241 | | - Array.isArray(requestBody.featureTypes) && |
242 | | - !requestBody.featureTypes.includes('QUERIES') |
243 | | - ) { |
244 | | - ;(requestBody.featureTypes as string[]).push('QUERIES') |
245 | | - } |
246 | | - } |
| 121 | + if (params.queries && Array.isArray(params.queries)) { |
| 122 | + requestBody.queries = params.queries |
247 | 123 | } |
248 | 124 |
|
249 | 125 | return requestBody |
|
0 commit comments