feat(chat): add chat.defer(), preload toggle, TTFB measurement, and fix ChatTaskWirePayload export

ericallam · ericallam · commit 95aba16581fb · 2026-03-09T15:34:50.000Z
diff --git a/docs/guides/ai-chat.mdx b/docs/guides/ai-chat.mdx
@@ -1387,6 +1387,7 @@ See [onTurnComplete](#onturncomplete) for the full field reference.
 | `chat.setTurnTimeout(duration)` | Override turn timeout at runtime (e.g. `"2h"`) |
 | `chat.setTurnTimeoutInSeconds(seconds)` | Override turn timeout at runtime (in seconds) |
 | `chat.setWarmTimeoutInSeconds(seconds)` | Override warm timeout at runtime |
+| `chat.defer(promise)` | Run background work in parallel with streaming, awaited before `onTurnComplete` |
 | `chat.isStopped()` | Check if the current turn was stopped by the user (works anywhere during a turn) |
 | `chat.cleanupAbortedParts(message)` | Remove incomplete parts from a stopped response message |
 | `chat.stream` | Typed chat output stream — use `.writer()`, `.pipe()`, `.append()`, `.read()` |
diff --git a/packages/trigger-sdk/src/v3/ai.ts b/packages/trigger-sdk/src/v3/ai.ts
@@ -309,9 +309,8 @@ const chatStream = streams.define<UIMessageChunk>({ id: _CHAT_STREAM_KEY });
 /**
  * The wire payload shape sent by `TriggerChatTransport`.
  * Uses `metadata` to match the AI SDK's `ChatRequestOptions` field name.
- * @internal
  */
-type ChatTaskWirePayload<TMessage extends UIMessage = UIMessage, TMetadata = unknown> = {
+export type ChatTaskWirePayload<TMessage extends UIMessage = UIMessage, TMetadata = unknown> = {
   messages: TMessage[];
   chatId: string;
   trigger: "submit-message" | "regenerate-message" | "preload";
@@ -384,6 +383,13 @@ export type ChatTaskRunPayload<TClientData = unknown> = ChatTaskPayload<TClientD
 const messagesInput = streams.input<ChatTaskWirePayload>({ id: CHAT_MESSAGES_STREAM_ID });
 const stopInput = streams.input<{ stop: true; message?: string }>({ id: CHAT_STOP_STREAM_ID });
 
+/**
+ * Per-turn deferred promises. Registered via `chat.defer()`, awaited
+ * before `onTurnComplete` fires. Reset each turn.
+ * @internal
+ */
+const chatDeferKey = locals.create<Set<Promise<unknown>>>("chat.defer");
+
 /**
  * Run-scoped pipe counter. Stored in locals so concurrent runs in the
  * same worker don't share state.
@@ -1016,6 +1022,7 @@ function chatTask<
             `chat turn ${turn + 1}`,
             async () => {
               locals.set(chatPipeCountKey, 0);
+              locals.set(chatDeferKey, new Set());
 
               // Store chat context for auto-detection by ai.tool subtasks
               locals.set(chatTurnContextKey, {
@@ -1270,6 +1277,16 @@ function chatTask<
                 turnAccessToken
               );
 
+              // Await deferred background work (e.g. DB writes from onTurnStart)
+              // before firing onTurnComplete so hooks can rely on the work being done.
+              const deferredWork = locals.get(chatDeferKey);
+              if (deferredWork && deferredWork.size > 0) {
+                await Promise.race([
+                  Promise.allSettled(deferredWork),
+                  new Promise<void>((r) => setTimeout(r, 5_000)),
+                ]);
+              }
+
               // Fire onTurnComplete after response capture
               if (onTurnComplete) {
                 await tracer.startActiveSpan(
@@ -1487,6 +1504,32 @@ function isStopped(): boolean {
   return controller?.signal.aborted ?? false;
 }
 
+// ---------------------------------------------------------------------------
+// Per-turn deferred work
+// ---------------------------------------------------------------------------
+
+/**
+ * Register a promise that runs in the background during the current turn.
+ *
+ * Use this to move non-blocking work (DB writes, analytics, etc.) out of
+ * the critical path. The promise runs in parallel with streaming and is
+ * awaited (with a 5 s timeout) before `onTurnComplete` fires.
+ *
+ * @example
+ * ```ts
+ * onTurnStart: async ({ chatId, uiMessages }) => {
+ *   // Persist messages without blocking the LLM call
+ *   chat.defer(db.chat.update({ where: { id: chatId }, data: { messages: uiMessages } }));
+ * },
+ * ```
+ */
+function chatDefer(promise: Promise<unknown>): void {
+  const promises = locals.get(chatDeferKey);
+  if (promises) {
+    promises.add(promise);
+  }
+}
+
 // ---------------------------------------------------------------------------
 // Aborted message cleanup
 // ---------------------------------------------------------------------------
@@ -1806,6 +1849,8 @@ export const chat = {
   isStopped,
   /** Clean up aborted parts from a UIMessage. See {@link cleanupAbortedParts}. */
   cleanupAbortedParts,
+  /** Register background work that runs in parallel with streaming. See {@link chatDefer}. */
+  defer: chatDefer,
   /** Typed chat output stream for writing custom chunks or piping from subtasks. */
   stream: chatStream,
 };
diff --git a/references/ai-chat/src/app/page.tsx b/references/ai-chat/src/app/page.tsx
@@ -12,6 +12,7 @@ import {
 type ChatMeta = {
   id: string;
   title: string;
+  model: string;
   createdAt: number;
   updatedAt: number;
 };
diff --git a/references/ai-chat/src/components/chat-app.tsx b/references/ai-chat/src/components/chat-app.tsx
@@ -51,6 +51,7 @@ export function ChatApp({
 
   // Model for new chats (before first message is sent)
   const [newChatModel, setNewChatModel] = useState(DEFAULT_MODEL);
+  const [preloadEnabled, setPreloadEnabled] = useState(true);
 
   const handleSessionChange = useCallback(
     (chatId: string, session: SessionInfo | null) => {
@@ -98,8 +99,10 @@ export function ChatApp({
     setActiveChatId(id);
     setMessages([]);
     setNewChatModel(DEFAULT_MODEL);
-    // Eagerly start the run — onPreload fires immediately for initialization
-    transport.preload(id);
+    if (preloadEnabled) {
+      // Eagerly start the run — onPreload fires immediately for initialization
+      transport.preload(id);
+    }
   }
 
   function handleSelectChat(id: string) {
@@ -149,6 +152,8 @@ export function ChatApp({
         onSelectChat={handleSelectChat}
         onNewChat={handleNewChat}
         onDeleteChat={handleDeleteChat}
+        preloadEnabled={preloadEnabled}
+        onPreloadChange={setPreloadEnabled}
       />
       <div className="flex-1">
         {activeChatId ? (
diff --git a/references/ai-chat/src/components/chat-sidebar.tsx b/references/ai-chat/src/components/chat-sidebar.tsx
@@ -24,6 +24,8 @@ type ChatSidebarProps = {
   onSelectChat: (id: string) => void;
   onNewChat: () => void;
   onDeleteChat: (id: string) => void;
+  preloadEnabled: boolean;
+  onPreloadChange: (enabled: boolean) => void;
 };
 
 export function ChatSidebar({
@@ -32,6 +34,8 @@ export function ChatSidebar({
   onSelectChat,
   onNewChat,
   onDeleteChat,
+  preloadEnabled,
+  onPreloadChange,
 }: ChatSidebarProps) {
   const sorted = [...chats].sort((a, b) => b.updatedAt - a.updatedAt);
 
@@ -77,6 +81,18 @@ export function ChatSidebar({
           </button>
         ))}
       </div>
+
+      <div className="shrink-0 border-t border-gray-200 px-3 py-2.5">
+        <label className="flex items-center gap-2 text-xs text-gray-500 cursor-pointer select-none">
+          <input
+            type="checkbox"
+            checked={preloadEnabled}
+            onChange={(e) => onPreloadChange(e.target.checked)}
+            className="rounded border-gray-300"
+          />
+          Preload new chats
+        </label>
+      </div>
     </div>
   );
 }
diff --git a/references/ai-chat/src/components/chat.tsx b/references/ai-chat/src/components/chat.tsx
@@ -110,20 +110,24 @@ function ResearchProgress({ part }: { part: any }) {
   );
 }
 
+type TtfbEntry = { turn: number; ttfbMs: number };
+
 function DebugPanel({
   chatId,
   model,
   status,
   session,
   dashboardUrl,
   messageCount,
+  ttfbHistory,
 }: {
   chatId: string;
   model: string;
   status: string;
   session?: { runId: string; publicAccessToken: string; lastEventId?: string };
   dashboardUrl?: string;
   messageCount: number;
+  ttfbHistory: TtfbEntry[];
 }) {
   const [open, setOpen] = useState(false);
 
@@ -132,6 +136,12 @@ function DebugPanel({
       ? `${dashboardUrl}/runs/${session.runId}`
       : undefined;
 
+  const latestTtfb = ttfbHistory.length > 0 ? ttfbHistory[ttfbHistory.length - 1]! : undefined;
+  const avgTtfb =
+    ttfbHistory.length > 0
+      ? Math.round(ttfbHistory.reduce((sum, e) => sum + e.ttfbMs, 0) / ttfbHistory.length)
+      : undefined;
+
   return (
     <div className="shrink-0 border-t border-gray-200 bg-gray-50 text-xs text-gray-500">
       <button
@@ -150,6 +160,9 @@ function DebugPanel({
           }`}
         />
         <span>{status}</span>
+        {latestTtfb && (
+          <span className="font-mono text-blue-600">TTFB {latestTtfb.ttfbMs.toLocaleString()}ms</span>
+        )}
         {session?.runId && (
           <span className="font-mono">{session.runId.slice(0, 16)}...</span>
         )}
@@ -170,6 +183,22 @@ function DebugPanel({
           ) : (
             <Row label="Session" value="none" />
           )}
+          {ttfbHistory.length > 0 && (
+            <>
+              <div className="mt-2 border-t border-gray-200 pt-2">
+                <span className="font-medium text-gray-600">TTFB</span>
+                {avgTtfb !== undefined && (
+                  <span className="ml-2 text-gray-400">avg {avgTtfb.toLocaleString()}ms</span>
+                )}
+              </div>
+              {ttfbHistory.map((entry) => (
+                <div key={entry.turn} className="flex items-center gap-2">
+                  <span className="w-24 shrink-0 text-gray-400">Turn {entry.turn}</span>
+                  <span className="font-mono">{entry.ttfbMs.toLocaleString()}ms</span>
+                </div>
+              ))}
+            </>
+          )}
         </div>
       )}
     </div>
@@ -236,6 +265,11 @@ export function Chat({
   const [input, setInput] = useState("");
   const hasCalledFirstMessage = useRef(false);
 
+  // TTFB tracking
+  const sendTimestamp = useRef<number | null>(null);
+  const turnCounter = useRef(0);
+  const [ttfbHistory, setTtfbHistory] = useState<TtfbEntry[]>([]);
+
   const { messages, sendMessage, stop, status, error } = useChat({
     id: chatId,
     messages: initialMessages,
@@ -257,6 +291,19 @@ export function Chat({
     }
   }, [messages, chatId, onFirstMessage]);
 
+  // TTFB detection: record when first assistant content appears after send
+  useEffect(() => {
+    if (status !== "streaming") return;
+    if (sendTimestamp.current === null) return;
+    const lastMsg = messages[messages.length - 1];
+    if (lastMsg?.role === "assistant") {
+      const ttfbMs = Date.now() - sendTimestamp.current;
+      const turn = turnCounter.current;
+      sendTimestamp.current = null;
+      setTtfbHistory((prev) => [...prev, { turn, ttfbMs }]);
+    }
+  }, [status, messages]);
+
   // Pending message to send after the current turn completes
   const [pendingMessage, setPendingMessage] = useState<string | null>(null);
 
@@ -277,6 +324,8 @@ export function Chat({
     if (pendingMessage) {
       const text = pendingMessage;
       setPendingMessage(null);
+      turnCounter.current++;
+      sendTimestamp.current = Date.now();
       sendMessage({ text }, { metadata: { model } });
     }
   }, [status, messages, chatId, onMessagesChange, sendMessage, pendingMessage, model]);
@@ -423,6 +472,7 @@ export function Chat({
         session={session}
         dashboardUrl={dashboardUrl}
         messageCount={messages.length}
+        ttfbHistory={ttfbHistory}
       />
 
       <form
@@ -432,6 +482,8 @@ export function Chat({
           if (status === "streaming") {
             setPendingMessage(input);
           } else {
+            turnCounter.current++;
+            sendTimestamp.current = Date.now();
             sendMessage({ text: input }, { metadata: { model } });
           }
           setInput("");
diff --git a/references/ai-chat/src/trigger/chat.ts b/references/ai-chat/src/trigger/chat.ts
@@ -137,9 +137,9 @@ const userContext = chat.local<{
 }>({ id: "userContext" });
 
 // Per-run dynamic tools — loaded from DB in onPreload/onChatStart
-const userToolDefs = chat.local<
-  Array<{ name: string; description: string; responseTemplate: string }>
->({ id: "userToolDefs" });
+const userToolDefs = chat.local<{
+  value: Array<{ name: string; description: string; responseTemplate: string }>;
+}>({ id: "userToolDefs" });
 
 // --------------------------------------------------------------------------
 // Subtask: deep research — fetches multiple URLs and streams progress
@@ -250,6 +250,7 @@ export const aiChat = chat.task({
   warmTimeoutInSeconds: 60,
   chatAccessTokenTTL: "2h",
   onPreload: async ({ chatId, runId, chatAccessToken, clientData }) => {
+    if (!clientData) return;
     // Eagerly initialize before the user's first message arrives
     const user = await prisma.user.upsert({
       where: { id: clientData.userId },
@@ -266,7 +267,7 @@ export const aiChat = chat.task({
 
     // Load user-specific dynamic tools
     const tools = await prisma.userTool.findMany({ where: { userId: clientData.userId } });
-    userToolDefs.init(tools);
+    userToolDefs.init({ value: tools });
 
     // Create chat record and session
     await prisma.chat.upsert({
@@ -287,12 +288,8 @@ export const aiChat = chat.task({
   },
   onChatStart: async ({ chatId, runId, chatAccessToken, clientData, continuation, preloaded }) => {
     if (preloaded) {
-      // Already initialized in onPreload — just update session
-      await prisma.chatSession.upsert({
-        where: { id: chatId },
-        create: { id: chatId, runId, publicAccessToken: chatAccessToken },
-        update: { runId, publicAccessToken: chatAccessToken },
-      });
+      // Everything was already initialized in onPreload — skip entirely.
+      // The session, chat record, user context, and tools are all set up.
       return;
     }
 
@@ -312,7 +309,7 @@ export const aiChat = chat.task({
 
     // Load user-specific dynamic tools
     const tools = await prisma.userTool.findMany({ where: { userId: clientData.userId } });
-    userToolDefs.init(tools);
+    userToolDefs.init({ value: tools });
 
     if (!continuation) {
       await prisma.chat.upsert({
@@ -333,17 +330,10 @@ export const aiChat = chat.task({
       update: { runId, publicAccessToken: chatAccessToken },
     });
   },
-  onTurnStart: async ({ chatId, uiMessages, runId, chatAccessToken }) => {
-    // Persist messages BEFORE streaming so mid-stream refresh has the user message
-    await prisma.chat.update({
-      where: { id: chatId },
-      data: { messages: uiMessages as any },
-    });
-    await prisma.chatSession.upsert({
-      where: { id: chatId },
-      create: { id: chatId, runId, publicAccessToken: chatAccessToken },
-      update: { runId, publicAccessToken: chatAccessToken },
-    });
+  onTurnStart: async ({ chatId, uiMessages }) => {
+    // Persist messages so mid-stream refresh still shows the user message.
+    // Deferred — runs in parallel with streaming, awaited before onTurnComplete.
+    chat.defer(prisma.chat.update({ where: { id: chatId }, data: { messages: uiMessages as any } }));
   },
   onTurnComplete: async ({ chatId, uiMessages, runId, chatAccessToken, lastEventId, clientData, stopped }) => {
     // Persist final messages + assistant response + stream position