@@ -3,7 +3,6 @@ import { userStats, workflow } from '@sim/db/schema'
33import { createLogger } from '@sim/logger'
44import { eq, sql } from 'drizzle-orm'
55import { type NextRequest, NextResponse } from 'next/server'
6- import OpenAI, { AzureOpenAI } from 'openai'
76import { getBYOKKey } from '@/lib/api-key/byok'
87import { getSession } from '@/lib/auth'
98import { logModelUsage } from '@/lib/billing/core/usage-log'
@@ -12,6 +11,7 @@ import { env } from '@/lib/core/config/env'
1211import { getCostMultiplier, isBillingEnabled } from '@/lib/core/config/feature-flags'
1312import { generateRequestId } from '@/lib/core/utils/request'
1413import { verifyWorkspaceMembership } from '@/app/api/workflows/utils'
14+ import { extractResponseText, parseResponsesUsage } from '@/providers/openai/utils'
1515import { getModelPricing } from '@/providers/utils'
1616
1717export const dynamic = 'force-dynamic'
@@ -28,18 +28,6 @@ const openaiApiKey = env.OPENAI_API_KEY
2828
2929const useWandAzure = azureApiKey && azureEndpoint && azureApiVersion
3030
31- const client = useWandAzure
32- ? new AzureOpenAI({
33- apiKey: azureApiKey,
34- apiVersion: azureApiVersion,
35- endpoint: azureEndpoint,
36- })
37- : openaiApiKey
38- ? new OpenAI({
39- apiKey: openaiApiKey,
40- })
41- : null
42-
4331if (!useWandAzure && !openaiApiKey) {
4432 logger.warn(
4533 'Neither Azure OpenAI nor OpenAI API key found. Wand generation API will not function.'
@@ -202,20 +190,18 @@ export async function POST(req: NextRequest) {
202190 }
203191
204192 let isBYOK = false
205- let activeClient = client
206- let byokApiKey: string | null = null
193+ let activeOpenAIKey = openaiApiKey
207194
208195 if (workspaceId && !useWandAzure) {
209196 const byokResult = await getBYOKKey(workspaceId, 'openai')
210197 if (byokResult) {
211198 isBYOK = true
212- byokApiKey = byokResult.apiKey
213- activeClient = new OpenAI({ apiKey: byokResult.apiKey })
199+ activeOpenAIKey = byokResult.apiKey
214200 logger.info(`[${requestId}] Using BYOK OpenAI key for wand generation`)
215201 }
216202 }
217203
218- if (!activeClient ) {
204+ if (!useWandAzure && !activeOpenAIKey ) {
219205 logger.error(`[${requestId}] AI client not initialized. Missing API key.`)
220206 return NextResponse.json(
221207 { success: false, error: 'Wand generation service is not configured.' },
@@ -276,17 +262,18 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
276262 )
277263
278264 const apiUrl = useWandAzure
279- ? `${azureEndpoint}/openai/deployments/${wandModelName}/chat/completions ?api-version=${azureApiVersion}`
280- : 'https://api.openai.com/v1/chat/completions '
265+ ? `${azureEndpoint?.replace(/\/$/, '')}/openai/v1/responses ?api-version=${azureApiVersion}`
266+ : 'https://api.openai.com/v1/responses '
281267
282268 const headers: Record<string, string> = {
283269 'Content-Type': 'application/json',
270+ 'OpenAI-Beta': 'responses=v1',
284271 }
285272
286273 if (useWandAzure) {
287274 headers['api-key'] = azureApiKey!
288275 } else {
289- headers.Authorization = `Bearer ${byokApiKey || openaiApiKey }`
276+ headers.Authorization = `Bearer ${activeOpenAIKey }`
290277 }
291278
292279 logger.debug(`[${requestId}] Making streaming request to: ${apiUrl}`)
@@ -296,11 +283,10 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
296283 headers,
297284 body: JSON.stringify({
298285 model: useWandAzure ? wandModelName : 'gpt-4o',
299- messages : messages,
286+ input : messages,
300287 temperature: 0.2,
301- max_tokens : 10000,
288+ max_output_tokens : 10000,
302289 stream: true,
303- stream_options: { include_usage: true },
304290 }),
305291 })
306292
@@ -327,16 +313,29 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
327313 return
328314 }
329315
316+ let finalUsage: any = null
317+ let usageRecorded = false
318+
319+ const recordUsage = async () => {
320+ if (usageRecorded || !finalUsage) {
321+ return
322+ }
323+
324+ usageRecorded = true
325+ await updateUserStatsForWand(session.user.id, finalUsage, requestId, isBYOK)
326+ }
327+
330328 try {
331329 let buffer = ''
332330 let chunkCount = 0
333- let finalUsage: any = null
331+ let activeEventType: string | undefined
334332
335333 while (true) {
336334 const { done, value } = await reader.read()
337335
338336 if (done) {
339337 logger.info(`[${requestId}] Stream completed. Total chunks: ${chunkCount}`)
338+ await recordUsage()
340339 controller.enqueue(encoder.encode(`data: ${JSON.stringify({ done: true })}\n\n`))
341340 controller.close()
342341 break
@@ -348,47 +347,90 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
348347 buffer = lines.pop() || ''
349348
350349 for (const line of lines) {
351- if (line.startsWith('data: ')) {
352- const data = line.slice(6).trim()
350+ const trimmed = line.trim()
351+ if (!trimmed) {
352+ continue
353+ }
353354
354- if (data === '[DONE]') {
355- logger.info(`[${requestId}] Received [DONE] signal`)
355+ if (trimmed.startsWith('event:')) {
356+ activeEventType = trimmed.slice(6).trim()
357+ continue
358+ }
356359
357- if (finalUsage ) {
358- await updateUserStatsForWand(session.user.id, finalUsage, requestId, isBYOK)
359- }
360+ if (!trimmed.startsWith('data:') ) {
361+ continue
362+ }
360363
361- controller.enqueue(
362- encoder.encode(`data: ${JSON.stringify({ done: true })}\n\n`)
363- )
364- controller.close()
365- return
366- }
364+ const data = trimmed.slice(5).trim()
365+ if (data === '[DONE]') {
366+ logger.info(`[${requestId}] Received [DONE] signal`)
367367
368- try {
369- const parsed = JSON.parse(data)
370- const content = parsed.choices?.[0]?.delta?.content
368+ await recordUsage()
371369
372- if (content) {
373- chunkCount++
374- if (chunkCount === 1) {
375- logger.info(`[${requestId}] Received first content chunk`)
376- }
370+ controller.enqueue(
371+ encoder.encode(`data: ${JSON.stringify({ done: true })}\n\n`)
372+ )
373+ controller.close()
374+ return
375+ }
376+
377+ let parsed: any
378+ try {
379+ parsed = JSON.parse(data)
380+ } catch (parseError) {
381+ logger.debug(`[${requestId}] Skipped non-JSON line: ${data.substring(0, 100)}`)
382+ continue
383+ }
384+
385+ const eventType = parsed?.type ?? activeEventType
386+
387+ if (
388+ eventType === 'response.error' ||
389+ eventType === 'error' ||
390+ eventType === 'response.failed'
391+ ) {
392+ throw new Error(parsed?.error?.message || 'Responses stream error')
393+ }
394+
395+ if (
396+ eventType === 'response.output_text.delta' ||
397+ eventType === 'response.output_json.delta'
398+ ) {
399+ let content = ''
400+ if (typeof parsed.delta === 'string') {
401+ content = parsed.delta
402+ } else if (parsed.delta && typeof parsed.delta.text === 'string') {
403+ content = parsed.delta.text
404+ } else if (parsed.delta && parsed.delta.json !== undefined) {
405+ content = JSON.stringify(parsed.delta.json)
406+ } else if (parsed.json !== undefined) {
407+ content = JSON.stringify(parsed.json)
408+ } else if (typeof parsed.text === 'string') {
409+ content = parsed.text
410+ }
377411
378- controller.enqueue(
379- encoder.encode(`data: ${JSON.stringify({ chunk: content })}\n\n`)
380- )
412+ if (content) {
413+ chunkCount++
414+ if (chunkCount === 1) {
415+ logger.info(`[${requestId}] Received first content chunk`)
381416 }
382417
383- if (parsed.usage) {
384- finalUsage = parsed.usage
385- logger.info(
386- `[${requestId}] Received usage data: ${JSON.stringify(parsed.usage)}`
387- )
418+ controller.enqueue(
419+ encoder.encode(`data: ${JSON.stringify({ chunk: content })}\n\n`)
420+ )
421+ }
422+ }
423+
424+ if (eventType === 'response.completed') {
425+ const usage = parseResponsesUsage(parsed?.response?.usage ?? parsed?.usage)
426+ if (usage) {
427+ finalUsage = {
428+ prompt_tokens: usage.promptTokens,
429+ completion_tokens: usage.completionTokens,
430+ total_tokens: usage.totalTokens,
388431 }
389- } catch (parseError) {
390- logger.debug(
391- `[${requestId}] Skipped non-JSON line: ${data.substring(0, 100)}`
432+ logger.info(
433+ `[${requestId}] Received usage data: ${JSON.stringify(finalUsage)}`
392434 )
393435 }
394436 }
@@ -401,6 +443,12 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
401443 stack: streamError?.stack,
402444 })
403445
446+ try {
447+ await recordUsage()
448+ } catch (usageError) {
449+ logger.warn(`[${requestId}] Failed to record usage after stream error`, usageError)
450+ }
451+
404452 const errorData = `data: ${JSON.stringify({ error: 'Streaming failed', done: true })}\n\n`
405453 controller.enqueue(encoder.encode(errorData))
406454 controller.close()
@@ -424,8 +472,6 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
424472 message: error?.message || 'Unknown error',
425473 code: error?.code,
426474 status: error?.status,
427- responseStatus: error?.response?.status,
428- responseData: error?.response?.data ? safeStringify(error.response.data) : undefined,
429475 stack: error?.stack,
430476 useWandAzure,
431477 model: useWandAzure ? wandModelName : 'gpt-4o',
@@ -440,14 +486,43 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
440486 }
441487 }
442488
443- const completion = await activeClient.chat.completions.create({
444- model: useWandAzure ? wandModelName : 'gpt-4o',
445- messages: messages,
446- temperature: 0.3,
447- max_tokens: 10000,
489+ const apiUrl = useWandAzure
490+ ? `${azureEndpoint?.replace(/\/$/, '')}/openai/v1/responses?api-version=${azureApiVersion}`
491+ : 'https://api.openai.com/v1/responses'
492+
493+ const headers: Record<string, string> = {
494+ 'Content-Type': 'application/json',
495+ 'OpenAI-Beta': 'responses=v1',
496+ }
497+
498+ if (useWandAzure) {
499+ headers['api-key'] = azureApiKey!
500+ } else {
501+ headers.Authorization = `Bearer ${activeOpenAIKey}`
502+ }
503+
504+ const response = await fetch(apiUrl, {
505+ method: 'POST',
506+ headers,
507+ body: JSON.stringify({
508+ model: useWandAzure ? wandModelName : 'gpt-4o',
509+ input: messages,
510+ temperature: 0.2,
511+ max_output_tokens: 10000,
512+ }),
448513 })
449514
450- const generatedContent = completion.choices[0]?.message?.content?.trim()
515+ if (!response.ok) {
516+ const errorText = await response.text()
517+ const apiError = new Error(
518+ `API request failed: ${response.status} ${response.statusText} - ${errorText}`
519+ )
520+ ;(apiError as any).status = response.status
521+ throw apiError
522+ }
523+
524+ const completion = await response.json()
525+ const generatedContent = extractResponseText(completion.output)?.trim()
451526
452527 if (!generatedContent) {
453528 logger.error(
@@ -461,8 +536,18 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
461536
462537 logger.info(`[${requestId}] Wand generation successful`)
463538
464- if (completion.usage) {
465- await updateUserStatsForWand(session.user.id, completion.usage, requestId, isBYOK)
539+ const usage = parseResponsesUsage(completion.usage)
540+ if (usage) {
541+ await updateUserStatsForWand(
542+ session.user.id,
543+ {
544+ prompt_tokens: usage.promptTokens,
545+ completion_tokens: usage.completionTokens,
546+ total_tokens: usage.totalTokens,
547+ },
548+ requestId,
549+ isBYOK
550+ )
466551 }
467552
468553 return NextResponse.json({ success: true, content: generatedContent })
@@ -472,10 +557,6 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
472557 message: error?.message || 'Unknown error',
473558 code: error?.code,
474559 status: error?.status,
475- responseStatus: error instanceof OpenAI.APIError ? error.status : error?.response?.status,
476- responseData: (error as any)?.response?.data
477- ? safeStringify((error as any).response.data)
478- : undefined,
479560 stack: error?.stack,
480561 useWandAzure,
481562 model: useWandAzure ? wandModelName : 'gpt-4o',
@@ -484,26 +565,19 @@ Use this context to calculate relative dates like "yesterday", "last week", "beg
484565 })
485566
486567 let clientErrorMessage = 'Wand generation failed. Please try again later.'
487- let status = 500
568+ let status = typeof (error as any)?.status === 'number' ? (error as any).status : 500
488569
489- if (error instanceof OpenAI.APIError) {
490- status = error.status || 500
491- logger.error(
492- `[${requestId}] ${useWandAzure ? 'Azure OpenAI' : 'OpenAI'} API Error: ${status} - ${error.message}`
493- )
494-
495- if (status === 401) {
496- clientErrorMessage = 'Authentication failed. Please check your API key configuration.'
497- } else if (status === 429) {
498- clientErrorMessage = 'Rate limit exceeded. Please try again later.'
499- } else if (status >= 500) {
500- clientErrorMessage =
501- 'The wand generation service is currently unavailable. Please try again later.'
502- }
503- } else if (useWandAzure && error.message?.includes('DeploymentNotFound')) {
570+ if (useWandAzure && error?.message?.includes('DeploymentNotFound')) {
504571 clientErrorMessage =
505572 'Azure OpenAI deployment not found. Please check your model deployment configuration.'
506573 status = 404
574+ } else if (status === 401) {
575+ clientErrorMessage = 'Authentication failed. Please check your API key configuration.'
576+ } else if (status === 429) {
577+ clientErrorMessage = 'Rate limit exceeded. Please try again later.'
578+ } else if (status >= 500) {
579+ clientErrorMessage =
580+ 'The wand generation service is currently unavailable. Please try again later.'
507581 }
508582
509583 return NextResponse.json(
0 commit comments