1+ import { NextRequest , NextResponse } from 'next/server'
2+ import { z } from 'zod'
3+ import { db } from '@/db'
4+ import { docsEmbeddings } from '@/db/schema'
5+ import { generateEmbeddings } from '@/app/api/knowledge/utils'
6+ import { createLogger } from '@/lib/logs/console-logger'
7+ import { sql } from 'drizzle-orm'
8+ import { env } from '@/lib/env'
9+ import { executeProviderRequest } from '@/providers'
10+ import { getProviderDefaultModel } from '@/providers/models'
11+ import { getRotatingApiKey } from '@/lib/utils'
12+
13+ const logger = createLogger ( 'DocsRAG' )
14+
15+ // Configuration for docs RAG
16+ const DOCS_RAG_CONFIG = {
17+ // Default provider for docs RAG - change this constant to switch providers
18+ defaultProvider : 'anthropic' , // Options: 'openai', 'anthropic', 'deepseek', 'google', 'xai', etc.
19+ // Default model for docs RAG - will use provider's default if not specified
20+ defaultModel : 'claude-3-7-sonnet-latest' , // e.g., 'gpt-4o-mini', 'claude-3-5-sonnet-latest', 'deepseek-chat'
21+ // Temperature for response generation
22+ temperature : 0.1 ,
23+ // Max tokens for response
24+ maxTokens : 1000 ,
25+ } as const
26+
27+ const DocsQuerySchema = z . object ( {
28+ query : z . string ( ) . min ( 1 , 'Query is required' ) ,
29+ topK : z . number ( ) . min ( 1 ) . max ( 20 ) . default ( 10 ) ,
30+ provider : z . string ( ) . optional ( ) , // Allow override of provider per request
31+ model : z . string ( ) . optional ( ) , // Allow override of model per request
32+ stream : z . boolean ( ) . optional ( ) . default ( false ) , // Enable streaming responses
33+ } )
34+
35+ /**
36+ * Generate embedding for search query
37+ */
38+ async function generateSearchEmbedding ( query : string ) : Promise < number [ ] > {
39+ try {
40+ const embeddings = await generateEmbeddings ( [ query ] )
41+ return embeddings [ 0 ] || [ ]
42+ } catch ( error ) {
43+ logger . error ( 'Failed to generate search embedding:' , error )
44+ throw new Error ( 'Failed to generate search embedding' )
45+ }
46+ }
47+
48+ /**
49+ * Search docs embeddings using vector similarity
50+ */
51+ async function searchDocs ( queryEmbedding : number [ ] , topK : number ) {
52+ try {
53+ const results = await db
54+ . select ( {
55+ chunkId : docsEmbeddings . chunkId ,
56+ chunkText : docsEmbeddings . chunkText ,
57+ sourceDocument : docsEmbeddings . sourceDocument ,
58+ sourceLink : docsEmbeddings . sourceLink ,
59+ headerText : docsEmbeddings . headerText ,
60+ headerLevel : docsEmbeddings . headerLevel ,
61+ similarity : sql < number > `1 - (${ docsEmbeddings . embedding } <=> ${ JSON . stringify ( queryEmbedding ) } ::vector)` ,
62+ } )
63+ . from ( docsEmbeddings )
64+ . orderBy ( sql `${ docsEmbeddings . embedding } <=> ${ JSON . stringify ( queryEmbedding ) } ::vector` )
65+ . limit ( topK )
66+
67+ return results
68+ } catch ( error ) {
69+ logger . error ( 'Failed to search docs:' , error )
70+ throw new Error ( 'Failed to search docs' )
71+ }
72+ }
73+
74+ /**
75+ * Generate response using LLM with retrieved context
76+ */
77+ async function generateResponse ( query : string , chunks : any [ ] , provider ?: string , model ?: string , stream : boolean = false ) : Promise < string | ReadableStream > {
78+ // Determine which provider and model to use
79+ const selectedProvider = provider || DOCS_RAG_CONFIG . defaultProvider
80+ const selectedModel = model || DOCS_RAG_CONFIG . defaultModel || getProviderDefaultModel ( selectedProvider )
81+
82+ // Get API key for the selected provider
83+ let apiKey : string
84+ try {
85+ if ( selectedProvider === 'openai' || selectedProvider === 'azure-openai' ) {
86+ apiKey = getRotatingApiKey ( 'openai' )
87+ } else if ( selectedProvider === 'anthropic' ) {
88+ apiKey = getRotatingApiKey ( 'anthropic' )
89+ } else {
90+ // For other providers, try to get from environment
91+ const envKey = `${ selectedProvider . toUpperCase ( ) . replace ( '-' , '_' ) } _API_KEY`
92+ apiKey = process . env [ envKey ] || ''
93+ if ( ! apiKey ) {
94+ throw new Error ( `API key not configured for provider: ${ selectedProvider } ` )
95+ }
96+ }
97+ } catch ( error ) {
98+ logger . error ( `Failed to get API key for provider ${ selectedProvider } :` , error )
99+ throw new Error ( `API key not configured for provider: ${ selectedProvider } ` )
100+ }
101+
102+ // Format chunks as context with numbered sources
103+ const context = chunks
104+ . map ( ( chunk , index ) => {
105+ // Ensure all chunk properties are strings to avoid object serialization
106+ const headerText = typeof chunk . headerText === 'string' ? chunk . headerText : String ( chunk . headerText || 'Untitled Section' )
107+ const sourceDocument = typeof chunk . sourceDocument === 'string' ? chunk . sourceDocument : String ( chunk . sourceDocument || 'Unknown Document' )
108+ const sourceLink = typeof chunk . sourceLink === 'string' ? chunk . sourceLink : String ( chunk . sourceLink || '#' )
109+ const chunkText = typeof chunk . chunkText === 'string' ? chunk . chunkText : String ( chunk . chunkText || '' )
110+
111+ return `[${ index + 1 } ] ${ headerText }
112+ Document: ${ sourceDocument }
113+ URL: ${ sourceLink }
114+ Content: ${ chunkText } `
115+ } )
116+ . join ( '\n\n' )
117+
118+ const systemPrompt = `You are a helpful assistant that answers questions about Sim Studio documentation.
119+
120+ IMPORTANT: Use inline citations throughout your response. When referencing information from the sources, include the citation number in square brackets like [1], [2], etc.
121+
122+ Guidelines:
123+ - Answer the user's question accurately using the provided documentation
124+ - Include inline citations [1], [2], etc. when referencing specific information
125+ - Use multiple citations for comprehensive answers
126+ - Format your response in clean, readable markdown
127+ - Use bullet points, code blocks, and headers where appropriate
128+ - If information spans multiple sources, cite all relevant ones
129+ - If the question cannot be answered from the context, say so clearly
130+ - Be conversational but precise
131+ - NEVER include object representations like "[object Object]" - always use proper text
132+ - When mentioning tool names, use their actual names from the documentation
133+
134+ The sources are numbered [1] through [${ chunks . length } ] in the context below.`
135+
136+ const userPrompt = `Question: ${ query }
137+
138+ Documentation Context:
139+ ${ context } `
140+
141+ try {
142+ logger . info ( `Generating response using provider: ${ selectedProvider } , model: ${ selectedModel } ` )
143+
144+ const providerRequest = {
145+ model : selectedModel ,
146+ systemPrompt,
147+ context : userPrompt ,
148+ temperature : DOCS_RAG_CONFIG . temperature ,
149+ maxTokens : DOCS_RAG_CONFIG . maxTokens ,
150+ apiKey,
151+ stream,
152+ // Azure OpenAI specific parameters if needed
153+ ...( selectedProvider === 'azure-openai' && {
154+ azureEndpoint : env . AZURE_OPENAI_ENDPOINT ,
155+ azureApiVersion : env . AZURE_OPENAI_API_VERSION ,
156+ } ) ,
157+ }
158+
159+ const response = await executeProviderRequest ( selectedProvider , providerRequest )
160+
161+ // Handle different response types
162+ if ( response instanceof ReadableStream ) {
163+ if ( stream ) {
164+ return response // Return the stream directly for streaming requests
165+ } else {
166+ throw new Error ( 'Unexpected streaming response when non-streaming was requested' )
167+ }
168+ }
169+
170+ if ( 'stream' in response && 'execution' in response ) {
171+ // Handle StreamingExecution for providers like Anthropic
172+ if ( stream ) {
173+ return response . stream // Return the stream from StreamingExecution
174+ } else {
175+ throw new Error ( 'Unexpected streaming execution response when non-streaming was requested' )
176+ }
177+ }
178+
179+ // At this point, we have a ProviderResponse
180+ const content = response . content || 'Sorry, I could not generate a response.'
181+
182+ // Clean up any object serialization artifacts
183+ const cleanedContent = content
184+ . replace ( / \[ o b j e c t O b j e c t \] , ? / g, '' ) // Remove [object Object] artifacts
185+ . replace ( / \s + / g, ' ' ) // Normalize whitespace
186+ . trim ( )
187+
188+ return cleanedContent
189+ } catch ( error ) {
190+ logger . error ( 'Failed to generate LLM response:' , error )
191+ throw new Error ( `Failed to generate response using ${ selectedProvider } : ${ error instanceof Error ? error . message : 'Unknown error' } ` )
192+ }
193+ }
194+
195+ /**
196+ * POST /api/docs/ask
197+ * Ask questions about Sim Studio documentation using RAG
198+ */
199+ export async function POST ( req : NextRequest ) {
200+ const requestId = crypto . randomUUID ( )
201+
202+ try {
203+ const body = await req . json ( )
204+ const { query, topK, provider, model, stream } = DocsQuerySchema . parse ( body )
205+
206+ logger . info ( `[${ requestId } ] Docs RAG query: "${ query } "` , {
207+ provider : provider || DOCS_RAG_CONFIG . defaultProvider ,
208+ model : model || DOCS_RAG_CONFIG . defaultModel || getProviderDefaultModel ( provider || DOCS_RAG_CONFIG . defaultProvider ) ,
209+ topK,
210+ } )
211+
212+ // Step 1: Generate embedding for the query
213+ logger . info ( `[${ requestId } ] Generating query embedding...` )
214+ const queryEmbedding = await generateSearchEmbedding ( query )
215+
216+ if ( queryEmbedding . length === 0 ) {
217+ return NextResponse . json (
218+ { error : 'Failed to generate query embedding' } ,
219+ { status : 500 }
220+ )
221+ }
222+
223+ // Step 2: Search for relevant docs chunks
224+ logger . info ( `[${ requestId } ] Searching docs for top ${ topK } chunks...` )
225+ const chunks = await searchDocs ( queryEmbedding , topK )
226+
227+ if ( chunks . length === 0 ) {
228+ return NextResponse . json ( {
229+ success : true ,
230+ response : "I couldn't find any relevant documentation for your question. Please try rephrasing your query or check if you're asking about a feature that exists in Sim Studio." ,
231+ sources : [ ] ,
232+ metadata : {
233+ requestId,
234+ chunksFound : 0 ,
235+ query,
236+ provider : provider || DOCS_RAG_CONFIG . defaultProvider ,
237+ model : model || DOCS_RAG_CONFIG . defaultModel || getProviderDefaultModel ( provider || DOCS_RAG_CONFIG . defaultProvider ) ,
238+ } ,
239+ } )
240+ }
241+
242+ // Step 3: Generate response using LLM
243+ logger . info ( `[${ requestId } ] Generating LLM response with ${ chunks . length } chunks...` )
244+ const response = await generateResponse ( query , chunks , provider , model , stream )
245+
246+ // Step 4: Format sources for response
247+ const sources = chunks . map ( ( chunk ) => ( {
248+ title : chunk . headerText ,
249+ document : chunk . sourceDocument ,
250+ link : chunk . sourceLink ,
251+ similarity : Math . round ( chunk . similarity * 100 ) / 100 ,
252+ } ) )
253+
254+ // Handle streaming response
255+ if ( response instanceof ReadableStream ) {
256+ logger . info ( `[${ requestId } ] Returning streaming response` )
257+
258+ // Create a new stream that includes metadata
259+ const encoder = new TextEncoder ( )
260+ const decoder = new TextDecoder ( )
261+
262+ return new Response (
263+ new ReadableStream ( {
264+ async start ( controller ) {
265+ const reader = response . getReader ( )
266+
267+ // Send initial metadata
268+ const metadata = {
269+ type : 'metadata' ,
270+ sources,
271+ metadata : {
272+ requestId,
273+ chunksFound : chunks . length ,
274+ query,
275+ topSimilarity : sources [ 0 ] ?. similarity ,
276+ provider : provider || DOCS_RAG_CONFIG . defaultProvider ,
277+ model : model || DOCS_RAG_CONFIG . defaultModel || getProviderDefaultModel ( provider || DOCS_RAG_CONFIG . defaultProvider ) ,
278+ } ,
279+ }
280+ controller . enqueue ( encoder . encode ( `data: ${ JSON . stringify ( metadata ) } \n\n` ) )
281+
282+ try {
283+ while ( true ) {
284+ const { done, value } = await reader . read ( )
285+ if ( done ) break
286+
287+ // Forward the chunk with content type
288+ const chunkText = decoder . decode ( value )
289+ // Clean up any object serialization artifacts in streaming content
290+ const cleanedChunk = chunkText . replace ( / \[ o b j e c t O b j e c t \] , ? / g, '' )
291+ const contentChunk = {
292+ type : 'content' ,
293+ content : cleanedChunk ,
294+ }
295+ controller . enqueue ( encoder . encode ( `data: ${ JSON . stringify ( contentChunk ) } \n\n` ) )
296+ }
297+
298+ // Send end marker
299+ controller . enqueue ( encoder . encode ( `data: {"type":"done"}\n\n` ) )
300+ } catch ( error ) {
301+ logger . error ( `[${ requestId } ] Streaming error:` , error )
302+ const errorChunk = {
303+ type : 'error' ,
304+ error : 'Streaming failed' ,
305+ }
306+ controller . enqueue ( encoder . encode ( `data: ${ JSON . stringify ( errorChunk ) } \n\n` ) )
307+ } finally {
308+ controller . close ( )
309+ }
310+ } ,
311+ } ) ,
312+ {
313+ headers : {
314+ 'Content-Type' : 'text/event-stream' ,
315+ 'Cache-Control' : 'no-cache' ,
316+ 'Connection' : 'keep-alive' ,
317+ } ,
318+ }
319+ )
320+ }
321+
322+ logger . info ( `[${ requestId } ] RAG response generated successfully` )
323+
324+ return NextResponse . json ( {
325+ success : true ,
326+ response,
327+ sources,
328+ metadata : {
329+ requestId,
330+ chunksFound : chunks . length ,
331+ query,
332+ topSimilarity : sources [ 0 ] ?. similarity ,
333+ provider : provider || DOCS_RAG_CONFIG . defaultProvider ,
334+ model : model || DOCS_RAG_CONFIG . defaultModel || getProviderDefaultModel ( provider || DOCS_RAG_CONFIG . defaultProvider ) ,
335+ } ,
336+ } )
337+
338+ } catch ( error ) {
339+ if ( error instanceof z . ZodError ) {
340+ return NextResponse . json (
341+ { error : 'Invalid request data' , details : error . errors } ,
342+ { status : 400 }
343+ )
344+ }
345+
346+ logger . error ( `[${ requestId } ] RAG error:` , error )
347+ return NextResponse . json (
348+ { error : 'Internal server error' } ,
349+ { status : 500 }
350+ )
351+ }
352+ }
0 commit comments