Skip to content

Commit 3902e64

Browse files
authored
refactor(kb): use chonkie locally (#475)
1 parent aa2577b commit 3902e64

File tree

2 files changed

+8
-13
lines changed

2 files changed

+8
-13
lines changed

apps/sim/lib/documents/document-processor.ts

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { RecursiveChunker } from 'chonkie/cloud'
1+
import { RecursiveChunker } from 'chonkie'
22
import type { RecursiveChunk } from 'chonkie/types'
33
import { env } from '@/lib/env'
44
import { isSupportedFileType, parseBuffer, parseFile } from '@/lib/file-parsers'
@@ -78,7 +78,11 @@ async function parseDocument(
7878
fileUrl: string,
7979
filename: string,
8080
mimeType: string
81-
): Promise<{ content: string; processingMethod: 'file-parser' | 'mistral-ocr'; s3Url?: string }> {
81+
): Promise<{
82+
content: string
83+
processingMethod: 'file-parser' | 'mistral-ocr'
84+
s3Url?: string
85+
}> {
8286
const processingMethod = determineProcessingMethod(mimeType, filename)
8387

8488
logger.info(`Processing document "${filename}" using ${processingMethod}`)
@@ -237,15 +241,8 @@ async function chunkContent(
237241
content: string,
238242
options: DocumentProcessingOptions
239243
): Promise<RecursiveChunk[]> {
240-
const apiKey = env.CHONKIE_API_KEY
241-
if (!apiKey) {
242-
throw new Error('CHONKIE_API_KEY not configured')
243-
}
244-
245-
const chunker = new RecursiveChunker(apiKey, {
244+
const chunker = await RecursiveChunker.create({
246245
chunkSize: options.chunkSize || 512,
247-
recipe: options.recipe || 'default',
248-
lang: options.lang || 'en',
249246
minCharactersPerChunk: options.minCharactersPerChunk || 24,
250247
})
251248

@@ -255,7 +252,7 @@ async function chunkContent(
255252
chunkSize: options.chunkSize || 512,
256253
})
257254

258-
const chunks = await chunker.chunk({ text: content })
255+
const chunks = await chunker.chunk(content)
259256

260257
logger.info(`Successfully created ${chunks.length} chunks`)
261258
return chunks as RecursiveChunk[]
@@ -266,7 +263,6 @@ async function chunkContent(
266263
)
267264
}
268265
}
269-
270266
/**
271267
* Calculate token count estimation (rough approximation: 4 chars per token)
272268
*/

apps/sim/lib/env.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ export const env = createEnv({
7272
FREE_PLAN_LOG_RETENTION_DAYS: z.string().optional(),
7373
NODE_ENV: z.string().optional(),
7474
GITHUB_TOKEN: z.string().optional(),
75-
CHONKIE_API_KEY: z.string().min(1).optional(),
7675
ELEVENLABS_API_KEY: z.string().min(1).optional(),
7776

7877
// OAuth blocks (all optional)

0 commit comments

Comments
 (0)