|
| 1 | +import { randomUUID } from 'crypto' |
| 2 | +import { db } from '@sim/db' |
| 3 | +import { document } from '@sim/db/schema' |
| 4 | +import { createLogger } from '@sim/logger' |
| 5 | +import { and, eq, isNull } from 'drizzle-orm' |
| 6 | +import { type NextRequest, NextResponse } from 'next/server' |
| 7 | +import { z } from 'zod' |
| 8 | +import { AuditAction, AuditResourceType, recordAudit } from '@/lib/audit/log' |
| 9 | +import { checkSessionOrInternalAuth } from '@/lib/auth/hybrid' |
| 10 | +import { |
| 11 | + createDocumentRecords, |
| 12 | + deleteDocument, |
| 13 | + getProcessingConfig, |
| 14 | + processDocumentsWithQueue, |
| 15 | +} from '@/lib/knowledge/documents/service' |
| 16 | +import { authorizeWorkflowByWorkspacePermission } from '@/lib/workflows/utils' |
| 17 | +import { checkKnowledgeBaseWriteAccess } from '@/app/api/knowledge/utils' |
| 18 | + |
| 19 | +const logger = createLogger('DocumentUpsertAPI') |
| 20 | + |
| 21 | +const UpsertDocumentSchema = z.object({ |
| 22 | + documentId: z.string().optional(), |
| 23 | + filename: z.string().min(1, 'Filename is required'), |
| 24 | + fileUrl: z.string().min(1, 'File URL is required'), |
| 25 | + fileSize: z.number().min(1, 'File size must be greater than 0'), |
| 26 | + mimeType: z.string().min(1, 'MIME type is required'), |
| 27 | + documentTagsData: z.string().optional(), |
| 28 | + processingOptions: z.object({ |
| 29 | + chunkSize: z.number().min(100).max(4000), |
| 30 | + minCharactersPerChunk: z.number().min(1).max(2000), |
| 31 | + recipe: z.string(), |
| 32 | + lang: z.string(), |
| 33 | + chunkOverlap: z.number().min(0).max(500), |
| 34 | + }), |
| 35 | + workflowId: z.string().optional(), |
| 36 | +}) |
| 37 | + |
| 38 | +export async function POST(req: NextRequest, { params }: { params: Promise<{ id: string }> }) { |
| 39 | + const requestId = randomUUID().slice(0, 8) |
| 40 | + const { id: knowledgeBaseId } = await params |
| 41 | + |
| 42 | + try { |
| 43 | + const body = await req.json() |
| 44 | + |
| 45 | + logger.info(`[${requestId}] Knowledge base document upsert request`, { |
| 46 | + knowledgeBaseId, |
| 47 | + hasDocumentId: !!body.documentId, |
| 48 | + filename: body.filename, |
| 49 | + }) |
| 50 | + |
| 51 | + const auth = await checkSessionOrInternalAuth(req, { requireWorkflowId: false }) |
| 52 | + if (!auth.success || !auth.userId) { |
| 53 | + logger.warn(`[${requestId}] Authentication failed: ${auth.error || 'Unauthorized'}`) |
| 54 | + return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }) |
| 55 | + } |
| 56 | + const userId = auth.userId |
| 57 | + |
| 58 | + if (body.workflowId) { |
| 59 | + const authorization = await authorizeWorkflowByWorkspacePermission({ |
| 60 | + workflowId: body.workflowId, |
| 61 | + userId, |
| 62 | + action: 'write', |
| 63 | + }) |
| 64 | + if (!authorization.allowed) { |
| 65 | + return NextResponse.json( |
| 66 | + { error: authorization.message || 'Access denied' }, |
| 67 | + { status: authorization.status } |
| 68 | + ) |
| 69 | + } |
| 70 | + } |
| 71 | + |
| 72 | + const accessCheck = await checkKnowledgeBaseWriteAccess(knowledgeBaseId, userId) |
| 73 | + |
| 74 | + if (!accessCheck.hasAccess) { |
| 75 | + if ('notFound' in accessCheck && accessCheck.notFound) { |
| 76 | + logger.warn(`[${requestId}] Knowledge base not found: ${knowledgeBaseId}`) |
| 77 | + return NextResponse.json({ error: 'Knowledge base not found' }, { status: 404 }) |
| 78 | + } |
| 79 | + logger.warn( |
| 80 | + `[${requestId}] User ${userId} attempted to upsert document in unauthorized knowledge base ${knowledgeBaseId}` |
| 81 | + ) |
| 82 | + return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }) |
| 83 | + } |
| 84 | + |
| 85 | + const validatedData = UpsertDocumentSchema.parse(body) |
| 86 | + |
| 87 | + let existingDocumentId: string | null = null |
| 88 | + let isUpdate = false |
| 89 | + |
| 90 | + if (validatedData.documentId) { |
| 91 | + const existingDoc = await db |
| 92 | + .select({ id: document.id }) |
| 93 | + .from(document) |
| 94 | + .where( |
| 95 | + and( |
| 96 | + eq(document.id, validatedData.documentId), |
| 97 | + eq(document.knowledgeBaseId, knowledgeBaseId), |
| 98 | + isNull(document.deletedAt) |
| 99 | + ) |
| 100 | + ) |
| 101 | + .limit(1) |
| 102 | + |
| 103 | + if (existingDoc.length > 0) { |
| 104 | + existingDocumentId = existingDoc[0].id |
| 105 | + } |
| 106 | + } |
| 107 | + |
| 108 | + if (!existingDocumentId) { |
| 109 | + const docsByFilename = await db |
| 110 | + .select({ id: document.id }) |
| 111 | + .from(document) |
| 112 | + .where( |
| 113 | + and( |
| 114 | + eq(document.filename, validatedData.filename), |
| 115 | + eq(document.knowledgeBaseId, knowledgeBaseId), |
| 116 | + isNull(document.deletedAt) |
| 117 | + ) |
| 118 | + ) |
| 119 | + .limit(1) |
| 120 | + |
| 121 | + if (docsByFilename.length > 0) { |
| 122 | + existingDocumentId = docsByFilename[0].id |
| 123 | + } |
| 124 | + } |
| 125 | + |
| 126 | + if (existingDocumentId) { |
| 127 | + isUpdate = true |
| 128 | + logger.info( |
| 129 | + `[${requestId}] Found existing document ${existingDocumentId}, deleting before re-creation` |
| 130 | + ) |
| 131 | + await deleteDocument(existingDocumentId, requestId) |
| 132 | + } |
| 133 | + |
| 134 | + const createdDocuments = await createDocumentRecords( |
| 135 | + [ |
| 136 | + { |
| 137 | + filename: validatedData.filename, |
| 138 | + fileUrl: validatedData.fileUrl, |
| 139 | + fileSize: validatedData.fileSize, |
| 140 | + mimeType: validatedData.mimeType, |
| 141 | + ...(validatedData.documentTagsData && { |
| 142 | + documentTagsData: validatedData.documentTagsData, |
| 143 | + }), |
| 144 | + }, |
| 145 | + ], |
| 146 | + knowledgeBaseId, |
| 147 | + requestId |
| 148 | + ) |
| 149 | + |
| 150 | + const firstDocument = createdDocuments[0] |
| 151 | + |
| 152 | + processDocumentsWithQueue( |
| 153 | + createdDocuments, |
| 154 | + knowledgeBaseId, |
| 155 | + validatedData.processingOptions, |
| 156 | + requestId |
| 157 | + ).catch((error: unknown) => { |
| 158 | + logger.error(`[${requestId}] Critical error in document processing pipeline:`, error) |
| 159 | + }) |
| 160 | + |
| 161 | + try { |
| 162 | + const { PlatformEvents } = await import('@/lib/core/telemetry') |
| 163 | + PlatformEvents.knowledgeBaseDocumentsUploaded({ |
| 164 | + knowledgeBaseId, |
| 165 | + documentsCount: 1, |
| 166 | + uploadType: 'single', |
| 167 | + chunkSize: validatedData.processingOptions.chunkSize, |
| 168 | + recipe: validatedData.processingOptions.recipe, |
| 169 | + }) |
| 170 | + } catch (_e) { |
| 171 | + // Silently fail |
| 172 | + } |
| 173 | + |
| 174 | + recordAudit({ |
| 175 | + workspaceId: accessCheck.knowledgeBase?.workspaceId ?? null, |
| 176 | + actorId: userId, |
| 177 | + actorName: auth.userName, |
| 178 | + actorEmail: auth.userEmail, |
| 179 | + action: isUpdate ? AuditAction.DOCUMENT_UPDATED : AuditAction.DOCUMENT_UPLOADED, |
| 180 | + resourceType: AuditResourceType.DOCUMENT, |
| 181 | + resourceId: knowledgeBaseId, |
| 182 | + resourceName: validatedData.filename, |
| 183 | + description: isUpdate |
| 184 | + ? `Upserted (replaced) document "${validatedData.filename}" in knowledge base "${knowledgeBaseId}"` |
| 185 | + : `Upserted (created) document "${validatedData.filename}" in knowledge base "${knowledgeBaseId}"`, |
| 186 | + metadata: { |
| 187 | + fileName: validatedData.filename, |
| 188 | + previousDocumentId: existingDocumentId, |
| 189 | + isUpdate, |
| 190 | + }, |
| 191 | + request: req, |
| 192 | + }) |
| 193 | + |
| 194 | + return NextResponse.json({ |
| 195 | + success: true, |
| 196 | + data: { |
| 197 | + documentsCreated: [ |
| 198 | + { |
| 199 | + documentId: firstDocument.documentId, |
| 200 | + filename: firstDocument.filename, |
| 201 | + status: 'pending', |
| 202 | + }, |
| 203 | + ], |
| 204 | + isUpdate, |
| 205 | + previousDocumentId: existingDocumentId, |
| 206 | + processingMethod: 'background', |
| 207 | + processingConfig: { |
| 208 | + maxConcurrentDocuments: getProcessingConfig().maxConcurrentDocuments, |
| 209 | + batchSize: getProcessingConfig().batchSize, |
| 210 | + }, |
| 211 | + }, |
| 212 | + }) |
| 213 | + } catch (error) { |
| 214 | + if (error instanceof z.ZodError) { |
| 215 | + logger.warn(`[${requestId}] Invalid upsert request data`, { errors: error.errors }) |
| 216 | + return NextResponse.json( |
| 217 | + { error: 'Invalid request data', details: error.errors }, |
| 218 | + { status: 400 } |
| 219 | + ) |
| 220 | + } |
| 221 | + |
| 222 | + logger.error(`[${requestId}] Error upserting document`, error) |
| 223 | + |
| 224 | + const errorMessage = error instanceof Error ? error.message : 'Failed to upsert document' |
| 225 | + const isStorageLimitError = |
| 226 | + errorMessage.includes('Storage limit exceeded') || errorMessage.includes('storage limit') |
| 227 | + const isMissingKnowledgeBase = errorMessage === 'Knowledge base not found' |
| 228 | + |
| 229 | + return NextResponse.json( |
| 230 | + { error: errorMessage }, |
| 231 | + { status: isMissingKnowledgeBase ? 404 : isStorageLimitError ? 413 : 500 } |
| 232 | + ) |
| 233 | + } |
| 234 | +} |
0 commit comments