Skip to content

Commit 8bf63fe

Browse files
waleedlatif1claude
andcommitted
feat(knowledge): add upsert document operation to Knowledge block
Add a "Create or Update" (upsert) document capability that finds an existing document by ID or filename, deletes it if found, then creates a new document and queues re-processing. Includes new tool, API route, block wiring, and typed interfaces. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 75a3e2c commit 8bf63fe

File tree

6 files changed

+488
-1
lines changed

6 files changed

+488
-1
lines changed
Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
import { randomUUID } from 'crypto'
2+
import { db } from '@sim/db'
3+
import { document } from '@sim/db/schema'
4+
import { createLogger } from '@sim/logger'
5+
import { and, eq, isNull } from 'drizzle-orm'
6+
import { type NextRequest, NextResponse } from 'next/server'
7+
import { z } from 'zod'
8+
import { AuditAction, AuditResourceType, recordAudit } from '@/lib/audit/log'
9+
import { checkSessionOrInternalAuth } from '@/lib/auth/hybrid'
10+
import {
11+
createDocumentRecords,
12+
deleteDocument,
13+
getProcessingConfig,
14+
processDocumentsWithQueue,
15+
} from '@/lib/knowledge/documents/service'
16+
import { authorizeWorkflowByWorkspacePermission } from '@/lib/workflows/utils'
17+
import { checkKnowledgeBaseWriteAccess } from '@/app/api/knowledge/utils'
18+
19+
const logger = createLogger('DocumentUpsertAPI')
20+
21+
const UpsertDocumentSchema = z.object({
22+
documentId: z.string().optional(),
23+
filename: z.string().min(1, 'Filename is required'),
24+
fileUrl: z.string().min(1, 'File URL is required'),
25+
fileSize: z.number().min(1, 'File size must be greater than 0'),
26+
mimeType: z.string().min(1, 'MIME type is required'),
27+
documentTagsData: z.string().optional(),
28+
processingOptions: z.object({
29+
chunkSize: z.number().min(100).max(4000),
30+
minCharactersPerChunk: z.number().min(1).max(2000),
31+
recipe: z.string(),
32+
lang: z.string(),
33+
chunkOverlap: z.number().min(0).max(500),
34+
}),
35+
workflowId: z.string().optional(),
36+
})
37+
38+
export async function POST(req: NextRequest, { params }: { params: Promise<{ id: string }> }) {
39+
const requestId = randomUUID().slice(0, 8)
40+
const { id: knowledgeBaseId } = await params
41+
42+
try {
43+
const body = await req.json()
44+
45+
logger.info(`[${requestId}] Knowledge base document upsert request`, {
46+
knowledgeBaseId,
47+
hasDocumentId: !!body.documentId,
48+
filename: body.filename,
49+
})
50+
51+
const auth = await checkSessionOrInternalAuth(req, { requireWorkflowId: false })
52+
if (!auth.success || !auth.userId) {
53+
logger.warn(`[${requestId}] Authentication failed: ${auth.error || 'Unauthorized'}`)
54+
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
55+
}
56+
const userId = auth.userId
57+
58+
if (body.workflowId) {
59+
const authorization = await authorizeWorkflowByWorkspacePermission({
60+
workflowId: body.workflowId,
61+
userId,
62+
action: 'write',
63+
})
64+
if (!authorization.allowed) {
65+
return NextResponse.json(
66+
{ error: authorization.message || 'Access denied' },
67+
{ status: authorization.status }
68+
)
69+
}
70+
}
71+
72+
const accessCheck = await checkKnowledgeBaseWriteAccess(knowledgeBaseId, userId)
73+
74+
if (!accessCheck.hasAccess) {
75+
if ('notFound' in accessCheck && accessCheck.notFound) {
76+
logger.warn(`[${requestId}] Knowledge base not found: ${knowledgeBaseId}`)
77+
return NextResponse.json({ error: 'Knowledge base not found' }, { status: 404 })
78+
}
79+
logger.warn(
80+
`[${requestId}] User ${userId} attempted to upsert document in unauthorized knowledge base ${knowledgeBaseId}`
81+
)
82+
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
83+
}
84+
85+
const validatedData = UpsertDocumentSchema.parse(body)
86+
87+
let existingDocumentId: string | null = null
88+
let isUpdate = false
89+
90+
if (validatedData.documentId) {
91+
const existingDoc = await db
92+
.select({ id: document.id })
93+
.from(document)
94+
.where(
95+
and(
96+
eq(document.id, validatedData.documentId),
97+
eq(document.knowledgeBaseId, knowledgeBaseId),
98+
isNull(document.deletedAt)
99+
)
100+
)
101+
.limit(1)
102+
103+
if (existingDoc.length > 0) {
104+
existingDocumentId = existingDoc[0].id
105+
}
106+
}
107+
108+
if (!existingDocumentId) {
109+
const docsByFilename = await db
110+
.select({ id: document.id })
111+
.from(document)
112+
.where(
113+
and(
114+
eq(document.filename, validatedData.filename),
115+
eq(document.knowledgeBaseId, knowledgeBaseId),
116+
isNull(document.deletedAt)
117+
)
118+
)
119+
.limit(1)
120+
121+
if (docsByFilename.length > 0) {
122+
existingDocumentId = docsByFilename[0].id
123+
}
124+
}
125+
126+
if (existingDocumentId) {
127+
isUpdate = true
128+
logger.info(
129+
`[${requestId}] Found existing document ${existingDocumentId}, deleting before re-creation`
130+
)
131+
await deleteDocument(existingDocumentId, requestId)
132+
}
133+
134+
const createdDocuments = await createDocumentRecords(
135+
[
136+
{
137+
filename: validatedData.filename,
138+
fileUrl: validatedData.fileUrl,
139+
fileSize: validatedData.fileSize,
140+
mimeType: validatedData.mimeType,
141+
...(validatedData.documentTagsData && {
142+
documentTagsData: validatedData.documentTagsData,
143+
}),
144+
},
145+
],
146+
knowledgeBaseId,
147+
requestId
148+
)
149+
150+
const firstDocument = createdDocuments[0]
151+
152+
processDocumentsWithQueue(
153+
createdDocuments,
154+
knowledgeBaseId,
155+
validatedData.processingOptions,
156+
requestId
157+
).catch((error: unknown) => {
158+
logger.error(`[${requestId}] Critical error in document processing pipeline:`, error)
159+
})
160+
161+
try {
162+
const { PlatformEvents } = await import('@/lib/core/telemetry')
163+
PlatformEvents.knowledgeBaseDocumentsUploaded({
164+
knowledgeBaseId,
165+
documentsCount: 1,
166+
uploadType: 'single',
167+
chunkSize: validatedData.processingOptions.chunkSize,
168+
recipe: validatedData.processingOptions.recipe,
169+
})
170+
} catch (_e) {
171+
// Silently fail
172+
}
173+
174+
recordAudit({
175+
workspaceId: accessCheck.knowledgeBase?.workspaceId ?? null,
176+
actorId: userId,
177+
actorName: auth.userName,
178+
actorEmail: auth.userEmail,
179+
action: isUpdate ? AuditAction.DOCUMENT_UPDATED : AuditAction.DOCUMENT_UPLOADED,
180+
resourceType: AuditResourceType.DOCUMENT,
181+
resourceId: knowledgeBaseId,
182+
resourceName: validatedData.filename,
183+
description: isUpdate
184+
? `Upserted (replaced) document "${validatedData.filename}" in knowledge base "${knowledgeBaseId}"`
185+
: `Upserted (created) document "${validatedData.filename}" in knowledge base "${knowledgeBaseId}"`,
186+
metadata: {
187+
fileName: validatedData.filename,
188+
previousDocumentId: existingDocumentId,
189+
isUpdate,
190+
},
191+
request: req,
192+
})
193+
194+
return NextResponse.json({
195+
success: true,
196+
data: {
197+
documentsCreated: [
198+
{
199+
documentId: firstDocument.documentId,
200+
filename: firstDocument.filename,
201+
status: 'pending',
202+
},
203+
],
204+
isUpdate,
205+
previousDocumentId: existingDocumentId,
206+
processingMethod: 'background',
207+
processingConfig: {
208+
maxConcurrentDocuments: getProcessingConfig().maxConcurrentDocuments,
209+
batchSize: getProcessingConfig().batchSize,
210+
},
211+
},
212+
})
213+
} catch (error) {
214+
if (error instanceof z.ZodError) {
215+
logger.warn(`[${requestId}] Invalid upsert request data`, { errors: error.errors })
216+
return NextResponse.json(
217+
{ error: 'Invalid request data', details: error.errors },
218+
{ status: 400 }
219+
)
220+
}
221+
222+
logger.error(`[${requestId}] Error upserting document`, error)
223+
224+
const errorMessage = error instanceof Error ? error.message : 'Failed to upsert document'
225+
const isStorageLimitError =
226+
errorMessage.includes('Storage limit exceeded') || errorMessage.includes('storage limit')
227+
const isMissingKnowledgeBase = errorMessage === 'Knowledge base not found'
228+
229+
return NextResponse.json(
230+
{ error: errorMessage },
231+
{ status: isMissingKnowledgeBase ? 404 : isStorageLimitError ? 413 : 500 }
232+
)
233+
}
234+
}

apps/sim/blocks/blocks/knowledge.ts

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ export const KnowledgeBlock: BlockConfig = {
2929
{ label: 'List Documents', id: 'list_documents' },
3030
{ label: 'Get Document', id: 'get_document' },
3131
{ label: 'Create Document', id: 'create_document' },
32+
{ label: 'Upsert Document', id: 'upsert_document' },
3233
{ label: 'Delete Document', id: 'delete_document' },
3334
{ label: 'List Chunks', id: 'list_chunks' },
3435
{ label: 'Upload Chunk', id: 'upload_chunk' },
@@ -198,7 +199,33 @@ export const KnowledgeBlock: BlockConfig = {
198199
title: 'Document Tags',
199200
type: 'document-tag-entry',
200201
dependsOn: ['knowledgeBaseSelector'],
201-
condition: { field: 'operation', value: 'create_document' },
202+
condition: { field: 'operation', value: ['create_document', 'upsert_document'] },
203+
},
204+
205+
// --- Upsert Document ---
206+
{
207+
id: 'name',
208+
title: 'Document Name',
209+
type: 'short-input',
210+
placeholder: 'Enter document name',
211+
required: true,
212+
condition: { field: 'operation', value: 'upsert_document' },
213+
},
214+
{
215+
id: 'content',
216+
title: 'Document Content',
217+
type: 'long-input',
218+
placeholder: 'Enter the document content',
219+
rows: 6,
220+
required: true,
221+
condition: { field: 'operation', value: 'upsert_document' },
222+
},
223+
{
224+
id: 'upsertDocumentId',
225+
title: 'Document ID (Optional)',
226+
type: 'short-input',
227+
placeholder: 'Enter existing document ID to update (or leave empty to match by name)',
228+
condition: { field: 'operation', value: 'upsert_document' },
202229
},
203230

204231
// --- Update Chunk / Delete Chunk ---
@@ -264,6 +291,7 @@ export const KnowledgeBlock: BlockConfig = {
264291
'knowledge_search',
265292
'knowledge_upload_chunk',
266293
'knowledge_create_document',
294+
'knowledge_upsert_document',
267295
'knowledge_list_tags',
268296
'knowledge_list_documents',
269297
'knowledge_get_document',
@@ -284,6 +312,8 @@ export const KnowledgeBlock: BlockConfig = {
284312
return 'knowledge_upload_chunk'
285313
case 'create_document':
286314
return 'knowledge_create_document'
315+
case 'upsert_document':
316+
return 'knowledge_upsert_document'
287317
case 'list_tags':
288318
return 'knowledge_list_tags'
289319
case 'list_documents':
@@ -355,6 +385,11 @@ export const KnowledgeBlock: BlockConfig = {
355385
if (params.chunkEnabledFilter) params.enabled = params.chunkEnabledFilter
356386
}
357387

388+
// Map upsert sub-block field to tool param
389+
if (params.operation === 'upsert_document' && params.upsertDocumentId) {
390+
params.documentId = String(params.upsertDocumentId).trim()
391+
}
392+
358393
// Convert enabled dropdown string to boolean for update_chunk
359394
if (params.operation === 'update_chunk' && typeof params.enabled === 'string') {
360395
params.enabled = params.enabled === 'true'
@@ -382,6 +417,7 @@ export const KnowledgeBlock: BlockConfig = {
382417
documentTags: { type: 'string', description: 'Document tags' },
383418
chunkSearch: { type: 'string', description: 'Search filter for chunks' },
384419
chunkEnabledFilter: { type: 'string', description: 'Filter chunks by enabled status' },
420+
upsertDocumentId: { type: 'string', description: 'Document ID for upsert operation' },
385421
connectorId: { type: 'string', description: 'Connector identifier' },
386422
},
387423
outputs: {

apps/sim/tools/knowledge/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import { knowledgeSearchTool } from '@/tools/knowledge/search'
1111
import { knowledgeTriggerSyncTool } from '@/tools/knowledge/trigger_sync'
1212
import { knowledgeUpdateChunkTool } from '@/tools/knowledge/update_chunk'
1313
import { knowledgeUploadChunkTool } from '@/tools/knowledge/upload_chunk'
14+
import { knowledgeUpsertDocumentTool } from '@/tools/knowledge/upsert_document'
1415

1516
export {
1617
knowledgeSearchTool,
@@ -26,4 +27,5 @@ export {
2627
knowledgeListConnectorsTool,
2728
knowledgeGetConnectorTool,
2829
knowledgeTriggerSyncTool,
30+
knowledgeUpsertDocumentTool,
2931
}

apps/sim/tools/knowledge/types.ts

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,3 +286,33 @@ export interface KnowledgeTriggerSyncResponse {
286286
}
287287
error?: string
288288
}
289+
290+
export interface KnowledgeUpsertDocumentParams {
291+
knowledgeBaseId: string
292+
name: string
293+
content: string
294+
documentId?: string
295+
documentTags?: Record<string, unknown>
296+
_context?: { workflowId?: string }
297+
}
298+
299+
export interface KnowledgeUpsertDocumentResult {
300+
documentId: string
301+
documentName: string
302+
type: string
303+
enabled: boolean
304+
isUpdate: boolean
305+
previousDocumentId: string | null
306+
createdAt: string
307+
updatedAt: string
308+
}
309+
310+
export interface KnowledgeUpsertDocumentResponse {
311+
success: boolean
312+
output: {
313+
data: KnowledgeUpsertDocumentResult
314+
message: string
315+
documentId: string
316+
}
317+
error?: string
318+
}

0 commit comments

Comments
 (0)