1+ import crypto from 'crypto'
12import { and , asc , eq , ilike , sql } from 'drizzle-orm'
23import { type NextRequest , NextResponse } from 'next/server'
34import { z } from 'zod'
45import { getSession } from '@/lib/auth'
56import { createLogger } from '@/lib/logs/console-logger'
67import { db } from '@/db'
7- import { embedding } from '@/db/schema'
8- import { checkDocumentAccess } from '../../../../utils'
8+ import { document , embedding } from '@/db/schema'
9+ import { checkDocumentAccess , generateEmbeddings } from '../../../../utils'
910
1011const logger = createLogger ( 'DocumentChunksAPI' )
1112
@@ -17,6 +18,12 @@ const GetChunksQuerySchema = z.object({
1718 offset : z . coerce . number ( ) . min ( 0 ) . optional ( ) . default ( 0 ) ,
1819} )
1920
21+ // Schema for creating manual chunks
22+ const CreateChunkSchema = z . object ( {
23+ content : z . string ( ) . min ( 1 , 'Content is required' ) . max ( 10000 , 'Content too long' ) ,
24+ enabled : z . boolean ( ) . optional ( ) . default ( true ) ,
25+ } )
26+
2027export async function GET (
2128 req : NextRequest ,
2229 { params } : { params : Promise < { id : string ; documentId : string } > }
@@ -142,3 +149,135 @@ export async function GET(
142149 return NextResponse . json ( { error : 'Failed to fetch chunks' } , { status : 500 } )
143150 }
144151}
152+
153+ export async function POST (
154+ req : NextRequest ,
155+ { params } : { params : Promise < { id : string ; documentId : string } > }
156+ ) {
157+ const requestId = crypto . randomUUID ( ) . slice ( 0 , 8 )
158+ const { id : knowledgeBaseId , documentId } = await params
159+
160+ try {
161+ const session = await getSession ( )
162+ if ( ! session ?. user ?. id ) {
163+ logger . warn ( `[${ requestId } ] Unauthorized chunk creation attempt` )
164+ return NextResponse . json ( { error : 'Unauthorized' } , { status : 401 } )
165+ }
166+
167+ const accessCheck = await checkDocumentAccess ( knowledgeBaseId , documentId , session . user . id )
168+
169+ if ( ! accessCheck . hasAccess ) {
170+ if ( accessCheck . notFound ) {
171+ logger . warn (
172+ `[${ requestId } ] ${ accessCheck . reason } : KB=${ knowledgeBaseId } , Doc=${ documentId } `
173+ )
174+ return NextResponse . json ( { error : accessCheck . reason } , { status : 404 } )
175+ }
176+ logger . warn (
177+ `[${ requestId } ] User ${ session . user . id } attempted unauthorized chunk creation: ${ accessCheck . reason } `
178+ )
179+ return NextResponse . json ( { error : 'Unauthorized' } , { status : 401 } )
180+ }
181+
182+ const doc = accessCheck . document
183+ if ( ! doc ) {
184+ logger . warn (
185+ `[${ requestId } ] Document data not available: KB=${ knowledgeBaseId } , Doc=${ documentId } `
186+ )
187+ return NextResponse . json ( { error : 'Document not found' } , { status : 404 } )
188+ }
189+
190+ // Allow manual chunk creation even if document is not fully processed
191+ // but it should exist and not be in failed state
192+ if ( doc . processingStatus === 'failed' ) {
193+ logger . warn ( `[${ requestId } ] Document ${ documentId } is in failed state, cannot add chunks` )
194+ return NextResponse . json ( { error : 'Cannot add chunks to failed document' } , { status : 400 } )
195+ }
196+
197+ const body = await req . json ( )
198+
199+ try {
200+ const validatedData = CreateChunkSchema . parse ( body )
201+
202+ // Generate embedding for the content first (outside transaction for performance)
203+ logger . info ( `[${ requestId } ] Generating embedding for manual chunk` )
204+ const embeddings = await generateEmbeddings ( [ validatedData . content ] )
205+
206+ const chunkId = crypto . randomUUID ( )
207+ const now = new Date ( )
208+
209+ // Use transaction to atomically get next index and insert chunk
210+ const newChunk = await db . transaction ( async ( tx ) => {
211+ // Get the next chunk index atomically within the transaction
212+ const lastChunk = await tx
213+ . select ( { chunkIndex : embedding . chunkIndex } )
214+ . from ( embedding )
215+ . where ( eq ( embedding . documentId , documentId ) )
216+ . orderBy ( sql `${ embedding . chunkIndex } DESC` )
217+ . limit ( 1 )
218+
219+ const nextChunkIndex = lastChunk . length > 0 ? lastChunk [ 0 ] . chunkIndex + 1 : 0
220+
221+ const chunkData = {
222+ id : chunkId ,
223+ knowledgeBaseId,
224+ documentId,
225+ chunkIndex : nextChunkIndex ,
226+ chunkHash : crypto . createHash ( 'sha256' ) . update ( validatedData . content ) . digest ( 'hex' ) ,
227+ content : validatedData . content ,
228+ contentLength : validatedData . content . length ,
229+ tokenCount : Math . ceil ( validatedData . content . length / 4 ) , // Rough approximation
230+ embedding : embeddings [ 0 ] ,
231+ embeddingModel : 'text-embedding-3-small' ,
232+ startOffset : 0 , // Manual chunks don't have document offsets
233+ endOffset : validatedData . content . length ,
234+ overlapTokens : 0 ,
235+ metadata : { manual : true } , // Mark as manually created
236+ searchRank : '1.0' ,
237+ accessCount : 0 ,
238+ lastAccessedAt : null ,
239+ qualityScore : null ,
240+ enabled : validatedData . enabled ,
241+ createdAt : now ,
242+ updatedAt : now ,
243+ }
244+
245+ // Insert the new chunk
246+ await tx . insert ( embedding ) . values ( chunkData )
247+
248+ // Update document statistics
249+ await tx
250+ . update ( document )
251+ . set ( {
252+ chunkCount : sql `${ document . chunkCount } + 1` ,
253+ tokenCount : sql `${ document . tokenCount } + ${ chunkData . tokenCount } ` ,
254+ characterCount : sql `${ document . characterCount } + ${ chunkData . contentLength } ` ,
255+ } )
256+ . where ( eq ( document . id , documentId ) )
257+
258+ return chunkData
259+ } )
260+
261+ logger . info ( `[${ requestId } ] Manual chunk created: ${ chunkId } in document ${ documentId } ` )
262+
263+ return NextResponse . json ( {
264+ success : true ,
265+ data : newChunk ,
266+ } )
267+ } catch ( validationError ) {
268+ if ( validationError instanceof z . ZodError ) {
269+ logger . warn ( `[${ requestId } ] Invalid chunk creation data` , {
270+ errors : validationError . errors ,
271+ } )
272+ return NextResponse . json (
273+ { error : 'Invalid request data' , details : validationError . errors } ,
274+ { status : 400 }
275+ )
276+ }
277+ throw validationError
278+ }
279+ } catch ( error ) {
280+ logger . error ( `[${ requestId } ] Error creating chunk` , error )
281+ return NextResponse . json ( { error : 'Failed to create chunk' } , { status : 500 } )
282+ }
283+ }
0 commit comments