fix: update visibility manager release/reclaim to use dispatch indexes

ericallam · ericallam · commit 6052e6315ef6 · 2026-02-26T10:19:29.000Z
The releaseMessage and releaseMessageBatch Lua scripts were still writing
to the old master queue shards. Updated them to write to the new dispatch
indexes instead, so released/reclaimed messages go into the new two-level
index atomically.

Also removed the legacyDrainComplete flag in favor of checking ZCARD on
each iteration (O(1)), and removed the redundant legacyDrainComplete
otel metric since master_queue.length already shows drain status.
diff --git a/.scratch/ai-chat-overview.md b/.scratch/ai-chat-overview.md
@@ -0,0 +1,207 @@
+# AI SDK Chat Transport & Chat Task System
+
+Run AI chat completions as durable Trigger.dev tasks — with built-in realtime streaming, multi-turn conversations in a single run, typed per-run state, cancellation from the frontend, and tool support. No API routes needed.
+
+## How it works
+
+1. Frontend sends messages via AI SDK's `useChat` hook through `TriggerChatTransport`
+2. Transport triggers a Trigger.dev task with the conversation as payload
+3. Task streams `UIMessageChunk` events back via realtime streams
+4. AI SDK processes the stream natively — text, tool calls, reasoning, everything
+5. Frontend can cancel generation mid-stream — the transport sends a cancel signal via input streams and `chat.task` aborts `streamText` automatically
+
+```
+useChat → TriggerChatTransport → Trigger.dev Task → streamText → realtime stream → useChat
+                ↑ cancel                                   ↓ abort
+                └──── input stream ("cancel") ─────────────┘
+```
+
+## Backend: `chat.task`
+
+Define a chat task in one function. Return a `streamText` result and it's automatically piped to the frontend.
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+export const myChat = chat.task({
+  id: "my-chat",
+  run: async ({ modelMessages, signal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      messages: modelMessages,
+      abortSignal: signal, // enables frontend cancellation
+    });
+  },
+});
+```
+
+No `convertToModelMessages` call needed — `chat.task` handles the conversion and passes both `modelMessages` (for the model) and `messages` (raw `UIMessage[]`) in the payload. The `signal` is an `AbortSignal` that fires when the frontend cancels generation.
+
+## Frontend: `useTriggerChatTransport`
+
+A React hook that creates a type-safe transport for `useChat`. No `useMemo` needed — the hook handles memoization internally.
+
+```tsx
+import { useChat } from "@ai-sdk/react";
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import type { myChat } from "@/trigger/chat";
+
+function Chat() {
+  const transport = useTriggerChatTransport<typeof myChat>({
+    task: "my-chat",
+    accessToken: getChatToken, // async function for token refresh
+  });
+
+  const { messages, sendMessage, stop, status } = useChat({ transport });
+
+  // stop() cancels the in-flight generation — chat.task aborts streamText automatically
+}
+```
+
+The `<typeof myChat>` generic gives compile-time validation of the task ID string.
+
+Cancellation just works — calling `stop()` from `useChat` sends a cancel signal via an input stream to the running task. `chat.task` listens for it and aborts the `streamText` call. No extra wiring needed.
+
+## Single-run mode (multi-turn conversations)
+
+`chat.task` keeps the entire conversation inside a single run using waitpoint tokens. After each AI response, the run pauses until the next message arrives — then resumes in the same process.
+
+- All turns share the same run ID, logs, and metadata
+- In-memory state persists across turns without external storage
+- The full conversation is observable as one run in the dashboard
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  maxTurns: 50,      // default: 100
+  turnTimeout: "30m", // default: "1h"
+  run: async ({ modelMessages, signal }) => { ... },
+});
+```
+
+## Per-run state with `chat.state`
+
+Define typed, per-run state that's accessible from anywhere during task execution — tools, the run function, nested helpers. Each conversation gets its own isolated copy.
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, tool } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { z } from "zod";
+
+const state = chat.state({
+  init: () => ({ score: 0, questionsAsked: 0, streak: 0 }),
+});
+
+// Tools at module level — access state directly
+const checkAnswer = tool({
+  description: "Check the user's answer",
+  inputSchema: z.object({ correct: z.boolean() }),
+  execute: async ({ correct }) => {
+    state.questionsAsked++;
+    if (correct) { state.score++; state.streak++; }
+    else { state.streak = 0; }
+    return { score: state.score, total: state.questionsAsked };
+  },
+});
+
+export const quiz = chat.task({
+  id: "quiz-bot",
+  state,
+  run: async ({ modelMessages, signal }) => {
+    return streamText({
+      model: openai("gpt-4o-mini"),
+      system: `Score: ${state.score}/${state.questionsAsked}`,
+      messages: modelMessages,
+      tools: { checkAnswer },
+      maxSteps: 5,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+State is backed by a Proxy over locals — no globals, fully isolated per run. Supports optional `persist` callback for external storage:
+
+```ts
+const state = chat.state({
+  init: () => ({ preferences: [] }),
+  persist: async ({ state, chatId }) => {
+    await db.sessions.upsert({ where: { chatId }, data: state });
+  },
+  persistDebounceMs: 1000, // debounce rapid mutations
+});
+```
+
+## AI SDK tools
+
+Tools work through the full pipeline. Tool calls and results stream to the frontend and appear in `message.parts`.
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  run: async ({ modelMessages, signal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      messages: modelMessages,
+      tools: { weather: weatherTool },
+      maxSteps: 5,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+## `chat.pipe` for complex flows
+
+For agent loops where `streamText` is called deep in your code, use `chat.pipe` instead of returning the result:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+
+export const agent = chat.task({
+  id: "agent-chat",
+  run: async ({ modelMessages, signal }) => {
+    await runAgentLoop(modelMessages, signal);
+    // Don't return — chat.pipe handles streaming
+  },
+});
+
+async function runAgentLoop(messages: ModelMessage[], signal: AbortSignal) {
+  const result = streamText({ model, messages, abortSignal: signal });
+  await chat.pipe(result); // works from anywhere inside the task
+}
+```
+
+## Cancellation
+
+Frontend cancellation flows through input streams to an `AbortSignal` provided in the run payload:
+
+1. User clicks stop (or calls `stop()` from `useChat`)
+2. `TriggerChatTransport` sends a cancel signal via an input stream to the running task
+3. `chat.task` receives the signal and aborts the `signal` passed to your `run` function
+4. `streamText` stops generating — `useChat` shows the partial response
+
+Just pass `signal` to `abortSignal` on `streamText` and cancellation works end-to-end. No manual abort controller wiring.
+
+## Type-safe access tokens
+
+```ts
+// Server action
+import { chat } from "@trigger.dev/sdk/ai";
+import type { myChat } from "@/trigger/chat";
+
+export const getChatToken = () => chat.createAccessToken<typeof myChat>("my-chat");
+```
+
+## Package imports
+
+| Import | Package |
+|--------|---------|
+| `chat.task`, `chat.state`, `chat.pipe`, `chat.createAccessToken` | `@trigger.dev/sdk/ai` |
+| `TriggerChatTransport` | `@trigger.dev/sdk/chat` |
+| `useTriggerChatTransport` | `@trigger.dev/sdk/chat/react` |
+
+Requires `ai` package v6+.
diff --git a/packages/redis-worker/src/fair-queue/index.ts b/packages/redis-worker/src/fair-queue/index.ts
@@ -1094,7 +1094,6 @@ export class FairQueue<TPayloadSchema extends z.ZodTypeAny = z.ZodUnknown> {
   ): Promise<number> {
     const queueKey = this.keys.queueKey(queueId);
     const queueItemsKey = this.keys.queueItemsKey(queueId);
-    const masterQueueKey = this.keys.masterQueueKey(shardId);
     const descriptor = this.queueDescriptorCache.get(queueId) ?? {
       id: queueId,
       tenantId,
@@ -1153,12 +1152,16 @@ export class FairQueue<TPayloadSchema extends z.ZodTypeAny = z.ZodUnknown> {
         if (!reserved) {
           // Release ALL remaining messages (from index i onward) back to queue
           // This prevents messages from being stranded in the in-flight set
+          const tenantQueueIndexKey = this.keys.tenantQueueIndexKey(tenantId);
+          const dispatchKey = this.keys.dispatchKey(shardId);
           await this.visibilityManager.releaseBatch(
             claimedMessages.slice(i),
             queueId,
             queueKey,
             queueItemsKey,
-            masterQueueKey
+            tenantQueueIndexKey,
+            dispatchKey,
+            tenantId
           );
           // Stop processing more messages from this queue since we're at capacity
           break;
@@ -1293,7 +1296,6 @@ export class FairQueue<TPayloadSchema extends z.ZodTypeAny = z.ZodUnknown> {
     const shardId = this.masterQueue.getShardForQueue(queueId);
     const queueKey = this.keys.queueKey(queueId);
     const queueItemsKey = this.keys.queueItemsKey(queueId);
-    const masterQueueKey = this.keys.masterQueueKey(shardId);
     const inflightDataKey = this.keys.inflightDataKey(shardId);
 
     // Get stored message for concurrency release
@@ -1315,13 +1317,17 @@ export class FairQueue<TPayloadSchema extends z.ZodTypeAny = z.ZodUnknown> {
         }
       : { id: queueId, tenantId: "", metadata: {} };
 
-    // Release back to queue (visibility manager updates old master queue internally)
+    // Release back to queue (visibility manager updates dispatch indexes atomically)
+    const tenantQueueIndexKey = this.keys.tenantQueueIndexKey(descriptor.tenantId);
+    const dispatchKey = this.keys.dispatchKey(shardId);
     await this.visibilityManager.release(
       messageId,
       queueId,
       queueKey,
       queueItemsKey,
-      masterQueueKey,
+      tenantQueueIndexKey,
+      dispatchKey,
+      descriptor.tenantId,
       Date.now() // Put at back of queue
     );
 
@@ -1330,17 +1336,6 @@ export class FairQueue<TPayloadSchema extends z.ZodTypeAny = z.ZodUnknown> {
       await this.concurrencyManager.release(descriptor, messageId);
     }
 
-    // Update new dispatch indexes (message is back in queue, update scores)
-    const tenantQueueIndexKey = this.keys.tenantQueueIndexKey(descriptor.tenantId);
-    const dispatchKey = this.keys.dispatchKey(shardId);
-    await this.redis.updateDispatchIndexes(
-      queueKey,
-      tenantQueueIndexKey,
-      dispatchKey,
-      queueId,
-      descriptor.tenantId
-    );
-
     this.logger.debug("Message released", {
       messageId,
       queueId,
@@ -1359,7 +1354,6 @@ export class FairQueue<TPayloadSchema extends z.ZodTypeAny = z.ZodUnknown> {
     const shardId = this.masterQueue.getShardForQueue(queueId);
     const queueKey = this.keys.queueKey(queueId);
     const queueItemsKey = this.keys.queueItemsKey(queueId);
-    const masterQueueKey = this.keys.masterQueueKey(shardId);
     const inflightDataKey = this.keys.inflightDataKey(shardId);
 
     // Get stored message
@@ -1391,7 +1385,6 @@ export class FairQueue<TPayloadSchema extends z.ZodTypeAny = z.ZodUnknown> {
       queueId,
       queueKey,
       queueItemsKey,
-      masterQueueKey,
       shardId,
       descriptor,
       error
@@ -1407,7 +1400,6 @@ export class FairQueue<TPayloadSchema extends z.ZodTypeAny = z.ZodUnknown> {
     queueId: string,
     queueKey: string,
     queueItemsKey: string,
-    masterQueueKey: string,
     shardId: number,
     descriptor: QueueDescriptor,
     error?: Error
@@ -1427,12 +1419,16 @@ export class FairQueue<TPayloadSchema extends z.ZodTypeAny = z.ZodUnknown> {
 
         // Release with delay, passing the updated message data so the Lua script
         // atomically writes the incremented attempt count when re-queuing.
+        const tenantQueueIndexKey = this.keys.tenantQueueIndexKey(descriptor.tenantId);
+        const dispatchKey = this.keys.dispatchKey(shardId);
         await this.visibilityManager.release(
           storedMessage.id,
           queueId,
           queueKey,
           queueItemsKey,
-          masterQueueKey,
+          tenantQueueIndexKey,
+          dispatchKey,
+          descriptor.tenantId,
           Date.now() + nextDelay,
           JSON.stringify(updatedMessage)
         );
@@ -1442,17 +1438,6 @@ export class FairQueue<TPayloadSchema extends z.ZodTypeAny = z.ZodUnknown> {
           await this.concurrencyManager.release(descriptor, storedMessage.id);
         }
 
-        // Update dispatch indexes (message is back in queue with delay)
-        const tenantQueueIndexKey = this.keys.tenantQueueIndexKey(descriptor.tenantId);
-        const dispatchKey = this.keys.dispatchKey(shardId);
-        await this.redis.updateDispatchIndexes(
-          queueKey,
-          tenantQueueIndexKey,
-          dispatchKey,
-          queueId,
-          descriptor.tenantId
-        );
-
         this.telemetry.recordRetry();
 
         this.logger.debug("Message scheduled for retry", {
@@ -1550,11 +1535,17 @@ export class FairQueue<TPayloadSchema extends z.ZodTypeAny = z.ZodUnknown> {
     let totalReclaimed = 0;
 
     for (let shardId = 0; shardId < this.shardCount; shardId++) {
-      const reclaimedMessages = await this.visibilityManager.reclaimTimedOut(shardId, (queueId) => ({
-        queueKey: this.keys.queueKey(queueId),
-        queueItemsKey: this.keys.queueItemsKey(queueId),
-        masterQueueKey: this.keys.masterQueueKey(this.masterQueue.getShardForQueue(queueId)),
-      }));
+      const reclaimedMessages = await this.visibilityManager.reclaimTimedOut(shardId, (queueId) => {
+        const tenantId = this.keys.extractTenantId(queueId);
+        const queueShardId = this.masterQueue.getShardForQueue(queueId);
+        return {
+          queueKey: this.keys.queueKey(queueId),
+          queueItemsKey: this.keys.queueItemsKey(queueId),
+          tenantQueueIndexKey: this.keys.tenantQueueIndexKey(tenantId),
+          dispatchKey: this.keys.dispatchKey(queueShardId),
+          tenantId,
+        };
+      });
 
       if (reclaimedMessages.length > 0) {
         // Release concurrency for all reclaimed messages in a single batch
@@ -1580,32 +1571,8 @@ export class FairQueue<TPayloadSchema extends z.ZodTypeAny = z.ZodUnknown> {
           }
         }
 
-        // Update dispatch indexes for reclaimed queues (messages are back in queue)
-        const updatedQueues = new Set<string>();
-        for (const msg of reclaimedMessages) {
-          const key = `${msg.tenantId}:${msg.queueId}`;
-          if (updatedQueues.has(key)) continue;
-          updatedQueues.add(key);
-
-          try {
-            const queueKey = this.keys.queueKey(msg.queueId);
-            const tenantQueueIndexKey = this.keys.tenantQueueIndexKey(msg.tenantId);
-            const dispatchKey = this.keys.dispatchKey(shardId);
-            await this.redis.updateDispatchIndexes(
-              queueKey,
-              tenantQueueIndexKey,
-              dispatchKey,
-              msg.queueId,
-              msg.tenantId
-            );
-          } catch (error) {
-            this.logger.error("Failed to update dispatch indexes for reclaimed message", {
-              queueId: msg.queueId,
-              tenantId: msg.tenantId,
-              error: error instanceof Error ? error.message : String(error),
-            });
-          }
-        }
+        // Dispatch indexes are updated atomically by the releaseMessage Lua script
+        // inside reclaimTimedOut, so no separate index update needed here.
       }
 
       totalReclaimed += reclaimedMessages.length;
diff --git a/packages/redis-worker/src/fair-queue/visibility.ts b/packages/redis-worker/src/fair-queue/visibility.ts