From e0389baaa41f02d18ac550c4c960692261edb7b9 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Sun, 15 Mar 2026 20:19:04 -0700 Subject: [PATCH 01/11] feat(concurrency): bullmq based queueing system --- apps/sim/app/api/jobs/[jobId]/route.test.ts | 160 +++++ apps/sim/app/api/jobs/[jobId]/route.ts | 56 +- .../app/api/schedules/execute/route.test.ts | 85 ++- apps/sim/app/api/schedules/execute/route.ts | 63 +- .../app/api/webhooks/trigger/[path]/route.ts | 50 +- .../[id]/execute/route.async.test.ts | 35 +- .../app/api/workflows/[id]/execute/route.ts | 279 ++++++++- apps/sim/background/schedule-execution.ts | 1 + apps/sim/background/workflow-execution.ts | 1 + .../workspace-notification-delivery.ts | 218 ++++++- apps/sim/lib/billing/subscriptions/utils.ts | 20 +- apps/sim/lib/billing/types/index.ts | 40 +- apps/sim/lib/billing/webhooks/enterprise.ts | 35 +- .../lib/billing/workspace-concurrency.test.ts | 146 +++++ apps/sim/lib/billing/workspace-concurrency.ts | 170 ++++++ apps/sim/lib/core/admission/gate.ts | 60 ++ .../lib/core/async-jobs/backends/bullmq.ts | 106 ++++ .../sim/lib/core/async-jobs/backends/index.ts | 1 + apps/sim/lib/core/async-jobs/config.ts | 46 +- apps/sim/lib/core/async-jobs/index.ts | 1 + apps/sim/lib/core/async-jobs/types.ts | 6 +- apps/sim/lib/core/bullmq/connection.ts | 29 + apps/sim/lib/core/bullmq/index.ts | 16 + apps/sim/lib/core/bullmq/queues.ts | 196 ++++++ apps/sim/lib/core/config/env.ts | 9 + .../lib/core/workspace-dispatch/adapter.ts | 80 +++ .../workspace-dispatch/dispatcher.test.ts | 175 ++++++ .../lib/core/workspace-dispatch/dispatcher.ts | 156 +++++ .../lib/core/workspace-dispatch/factory.ts | 42 ++ apps/sim/lib/core/workspace-dispatch/index.ts | 32 + .../workspace-dispatch/memory-store.test.ts | 65 ++ .../core/workspace-dispatch/memory-store.ts | 478 +++++++++++++++ .../lib/core/workspace-dispatch/planner.ts | 154 +++++ .../workspace-dispatch/reconciler.test.ts | 225 +++++++ .../lib/core/workspace-dispatch/reconciler.ts | 196 ++++++ .../core/workspace-dispatch/redis-store.ts | 574 ++++++++++++++++++ .../core/workspace-dispatch/status.test.ts | 102 ++++ .../sim/lib/core/workspace-dispatch/status.ts | 110 ++++ apps/sim/lib/core/workspace-dispatch/store.ts | 193 ++++++ apps/sim/lib/core/workspace-dispatch/types.ts | 107 ++++ .../core/workspace-dispatch/worker.test.ts | 98 +++ .../sim/lib/core/workspace-dispatch/worker.ts | 104 ++++ apps/sim/lib/execution/buffered-stream.ts | 111 ++++ .../lib/knowledge/connectors/sync-engine.ts | 83 ++- .../knowledge/documents/document-processor.ts | 14 +- .../documents/parser-extension.test.ts | 27 + .../knowledge/documents/parser-extension.ts | 48 ++ apps/sim/lib/knowledge/documents/queue.ts | 227 ------- apps/sim/lib/knowledge/documents/service.ts | 178 +++--- apps/sim/lib/logs/events.ts | 6 + .../lib/notifications/inactivity-polling.ts | 3 + .../uploads/utils/user-file-base64.server.ts | 8 +- apps/sim/lib/webhooks/processor.test.ts | 26 +- apps/sim/lib/webhooks/processor.ts | 112 ++-- .../workflows/executor/execution-events.ts | 61 +- .../executor/queued-workflow-execution.ts | 339 +++++++++++ apps/sim/lib/workflows/utils.ts | 8 +- apps/sim/package.json | 5 +- apps/sim/worker/health.ts | 77 +++ apps/sim/worker/index.ts | 190 ++++++ .../processors/knowledge-connector-sync.ts | 22 + .../knowledge-document-processing.ts | 26 + .../processors/mothership-job-execution.ts | 20 + apps/sim/worker/processors/schedule.ts | 21 + apps/sim/worker/processors/webhook.ts | 21 + apps/sim/worker/processors/workflow.ts | 51 ++ .../workspace-notification-delivery.ts | 32 + bun.lock | 30 +- docker-compose.prod.yml | 40 ++ helm/sim/values.yaml | 5 + 70 files changed, 5871 insertions(+), 640 deletions(-) create mode 100644 apps/sim/app/api/jobs/[jobId]/route.test.ts create mode 100644 apps/sim/lib/billing/workspace-concurrency.test.ts create mode 100644 apps/sim/lib/billing/workspace-concurrency.ts create mode 100644 apps/sim/lib/core/admission/gate.ts create mode 100644 apps/sim/lib/core/async-jobs/backends/bullmq.ts create mode 100644 apps/sim/lib/core/bullmq/connection.ts create mode 100644 apps/sim/lib/core/bullmq/index.ts create mode 100644 apps/sim/lib/core/bullmq/queues.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/adapter.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/dispatcher.test.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/dispatcher.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/factory.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/index.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/memory-store.test.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/memory-store.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/planner.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/reconciler.test.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/reconciler.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/redis-store.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/status.test.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/status.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/store.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/types.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/worker.test.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/worker.ts create mode 100644 apps/sim/lib/execution/buffered-stream.ts create mode 100644 apps/sim/lib/knowledge/documents/parser-extension.test.ts create mode 100644 apps/sim/lib/knowledge/documents/parser-extension.ts delete mode 100644 apps/sim/lib/knowledge/documents/queue.ts create mode 100644 apps/sim/lib/workflows/executor/queued-workflow-execution.ts create mode 100644 apps/sim/worker/health.ts create mode 100644 apps/sim/worker/index.ts create mode 100644 apps/sim/worker/processors/knowledge-connector-sync.ts create mode 100644 apps/sim/worker/processors/knowledge-document-processing.ts create mode 100644 apps/sim/worker/processors/mothership-job-execution.ts create mode 100644 apps/sim/worker/processors/schedule.ts create mode 100644 apps/sim/worker/processors/webhook.ts create mode 100644 apps/sim/worker/processors/workflow.ts create mode 100644 apps/sim/worker/processors/workspace-notification-delivery.ts diff --git a/apps/sim/app/api/jobs/[jobId]/route.test.ts b/apps/sim/app/api/jobs/[jobId]/route.test.ts new file mode 100644 index 00000000000..050c0bee2c5 --- /dev/null +++ b/apps/sim/app/api/jobs/[jobId]/route.test.ts @@ -0,0 +1,160 @@ +/** + * @vitest-environment node + */ +import type { NextRequest } from 'next/server' +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { + mockCheckHybridAuth, + mockGetDispatchJobRecord, + mockGetJobQueue, + mockVerifyWorkflowAccess, + mockGetWorkflowById, +} = vi.hoisted(() => ({ + mockCheckHybridAuth: vi.fn(), + mockGetDispatchJobRecord: vi.fn(), + mockGetJobQueue: vi.fn(), + mockVerifyWorkflowAccess: vi.fn(), + mockGetWorkflowById: vi.fn(), +})) + +vi.mock('@sim/logger', () => ({ + createLogger: () => ({ + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }), +})) + +vi.mock('@/lib/auth/hybrid', () => ({ + checkHybridAuth: mockCheckHybridAuth, +})) + +vi.mock('@/lib/core/async-jobs', () => ({ + JOB_STATUS: { + PENDING: 'pending', + PROCESSING: 'processing', + COMPLETED: 'completed', + FAILED: 'failed', + }, + getJobQueue: mockGetJobQueue, +})) + +vi.mock('@/lib/core/workspace-dispatch/store', () => ({ + getDispatchJobRecord: mockGetDispatchJobRecord, +})) + +vi.mock('@/lib/core/utils/request', () => ({ + generateRequestId: vi.fn().mockReturnValue('request-1'), +})) + +vi.mock('@/socket/middleware/permissions', () => ({ + verifyWorkflowAccess: mockVerifyWorkflowAccess, +})) + +vi.mock('@/lib/workflows/utils', () => ({ + getWorkflowById: mockGetWorkflowById, +})) + +import { GET } from './route' + +function createMockRequest(): NextRequest { + return { + headers: { + get: () => null, + }, + } as NextRequest +} + +describe('GET /api/jobs/[jobId]', () => { + beforeEach(() => { + vi.clearAllMocks() + + mockCheckHybridAuth.mockResolvedValue({ + success: true, + userId: 'user-1', + apiKeyType: undefined, + workspaceId: undefined, + }) + + mockVerifyWorkflowAccess.mockResolvedValue({ hasAccess: true }) + mockGetWorkflowById.mockResolvedValue({ + id: 'workflow-1', + workspaceId: 'workspace-1', + }) + + mockGetJobQueue.mockResolvedValue({ + getJob: vi.fn().mockResolvedValue(null), + }) + }) + + it('returns dispatcher-aware waiting status with metadata', async () => { + mockGetDispatchJobRecord.mockResolvedValue({ + id: 'dispatch-1', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: { + workflowId: 'workflow-1', + }, + priority: 10, + status: 'waiting', + createdAt: 1000, + admittedAt: 2000, + }) + + const response = await GET(createMockRequest(), { + params: Promise.resolve({ jobId: 'dispatch-1' }), + }) + const body = await response.json() + + expect(response.status).toBe(200) + expect(body.status).toBe('waiting') + expect(body.metadata.queueName).toBe('workflow-execution') + expect(body.metadata.lane).toBe('runtime') + expect(body.metadata.workspaceId).toBe('workspace-1') + }) + + it('returns completed output from dispatch state', async () => { + mockGetDispatchJobRecord.mockResolvedValue({ + id: 'dispatch-2', + workspaceId: 'workspace-1', + lane: 'interactive', + queueName: 'workflow-execution', + bullmqJobName: 'direct-workflow-execution', + bullmqPayload: {}, + metadata: { + workflowId: 'workflow-1', + }, + priority: 1, + status: 'completed', + createdAt: 1000, + startedAt: 2000, + completedAt: 7000, + output: { success: true }, + }) + + const response = await GET(createMockRequest(), { + params: Promise.resolve({ jobId: 'dispatch-2' }), + }) + const body = await response.json() + + expect(response.status).toBe(200) + expect(body.status).toBe('completed') + expect(body.output).toEqual({ success: true }) + expect(body.metadata.duration).toBe(5000) + }) + + it('returns 404 when neither dispatch nor BullMQ job exists', async () => { + mockGetDispatchJobRecord.mockResolvedValue(null) + + const response = await GET(createMockRequest(), { + params: Promise.resolve({ jobId: 'missing-job' }), + }) + + expect(response.status).toBe(404) + }) +}) diff --git a/apps/sim/app/api/jobs/[jobId]/route.ts b/apps/sim/app/api/jobs/[jobId]/route.ts index cb8a43a80de..aed0a106f8c 100644 --- a/apps/sim/app/api/jobs/[jobId]/route.ts +++ b/apps/sim/app/api/jobs/[jobId]/route.ts @@ -1,8 +1,10 @@ import { createLogger } from '@sim/logger' import { type NextRequest, NextResponse } from 'next/server' import { checkHybridAuth } from '@/lib/auth/hybrid' -import { getJobQueue, JOB_STATUS } from '@/lib/core/async-jobs' +import { getJobQueue } from '@/lib/core/async-jobs' import { generateRequestId } from '@/lib/core/utils/request' +import { presentDispatchOrJobStatus } from '@/lib/core/workspace-dispatch/status' +import { getDispatchJobRecord } from '@/lib/core/workspace-dispatch/store' import { createErrorResponse } from '@/app/api/workflows/utils' const logger = createLogger('TaskStatusAPI') @@ -23,68 +25,54 @@ export async function GET( const authenticatedUserId = authResult.userId + const dispatchJob = await getDispatchJobRecord(taskId) const jobQueue = await getJobQueue() - const job = await jobQueue.getJob(taskId) + const job = dispatchJob ? await jobQueue.getJob(taskId) : await jobQueue.getJob(taskId) - if (!job) { + if (!job && !dispatchJob) { return createErrorResponse('Task not found', 404) } - if (job.metadata?.workflowId) { + const metadataToCheck = dispatchJob?.metadata ?? job?.metadata + + if (metadataToCheck?.workflowId) { const { verifyWorkflowAccess } = await import('@/socket/middleware/permissions') const accessCheck = await verifyWorkflowAccess( authenticatedUserId, - job.metadata.workflowId as string + metadataToCheck.workflowId as string ) if (!accessCheck.hasAccess) { - logger.warn(`[${requestId}] Access denied to workflow ${job.metadata.workflowId}`) + logger.warn(`[${requestId}] Access denied to workflow ${metadataToCheck.workflowId}`) return createErrorResponse('Access denied', 403) } if (authResult.apiKeyType === 'workspace' && authResult.workspaceId) { const { getWorkflowById } = await import('@/lib/workflows/utils') - const workflow = await getWorkflowById(job.metadata.workflowId as string) + const workflow = await getWorkflowById(metadataToCheck.workflowId as string) if (!workflow?.workspaceId || workflow.workspaceId !== authResult.workspaceId) { return createErrorResponse('API key is not authorized for this workspace', 403) } } - } else if (job.metadata?.userId && job.metadata.userId !== authenticatedUserId) { - logger.warn(`[${requestId}] Access denied to user ${job.metadata.userId}`) + } else if (metadataToCheck?.userId && metadataToCheck.userId !== authenticatedUserId) { + logger.warn(`[${requestId}] Access denied to user ${metadataToCheck.userId}`) return createErrorResponse('Access denied', 403) - } else if (!job.metadata?.userId && !job.metadata?.workflowId) { + } else if (!metadataToCheck?.userId && !metadataToCheck?.workflowId) { logger.warn(`[${requestId}] Access denied to job ${taskId}`) return createErrorResponse('Access denied', 403) } - const mappedStatus = job.status === JOB_STATUS.PENDING ? 'queued' : job.status - + const presented = presentDispatchOrJobStatus(dispatchJob, job) const response: any = { success: true, taskId, - status: mappedStatus, - metadata: { - startedAt: job.startedAt, - }, - } - - if (job.status === JOB_STATUS.COMPLETED) { - response.output = job.output - response.metadata.completedAt = job.completedAt - if (job.startedAt && job.completedAt) { - response.metadata.duration = job.completedAt.getTime() - job.startedAt.getTime() - } - } - - if (job.status === JOB_STATUS.FAILED) { - response.error = job.error - response.metadata.completedAt = job.completedAt - if (job.startedAt && job.completedAt) { - response.metadata.duration = job.completedAt.getTime() - job.startedAt.getTime() - } + status: presented.status, + metadata: presented.metadata, } - if (job.status === JOB_STATUS.PROCESSING || job.status === JOB_STATUS.PENDING) { - response.estimatedDuration = 300000 + if (presented.output !== undefined) response.output = presented.output + if (presented.error !== undefined) response.error = presented.error + if (presented.estimatedDuration !== undefined) { + response.estimatedDuration = presented.estimatedDuration } return NextResponse.json(response) diff --git a/apps/sim/app/api/schedules/execute/route.test.ts b/apps/sim/app/api/schedules/execute/route.test.ts index cfdf6c3877b..80c59e537d1 100644 --- a/apps/sim/app/api/schedules/execute/route.test.ts +++ b/apps/sim/app/api/schedules/execute/route.test.ts @@ -9,10 +9,12 @@ import { beforeEach, describe, expect, it, vi } from 'vitest' const { mockVerifyCronAuth, mockExecuteScheduleJob, + mockExecuteJobInline, mockFeatureFlags, mockDbReturning, mockDbUpdate, mockEnqueue, + mockEnqueueWorkspaceDispatch, mockStartJob, mockCompleteJob, mockMarkJobFailed, @@ -22,6 +24,7 @@ const { const mockDbSet = vi.fn().mockReturnValue({ where: mockDbWhere }) const mockDbUpdate = vi.fn().mockReturnValue({ set: mockDbSet }) const mockEnqueue = vi.fn().mockResolvedValue('job-id-1') + const mockEnqueueWorkspaceDispatch = vi.fn().mockResolvedValue('job-id-1') const mockStartJob = vi.fn().mockResolvedValue(undefined) const mockCompleteJob = vi.fn().mockResolvedValue(undefined) const mockMarkJobFailed = vi.fn().mockResolvedValue(undefined) @@ -29,6 +32,7 @@ const { return { mockVerifyCronAuth: vi.fn().mockReturnValue(null), mockExecuteScheduleJob: vi.fn().mockResolvedValue(undefined), + mockExecuteJobInline: vi.fn().mockResolvedValue(undefined), mockFeatureFlags: { isTriggerDevEnabled: false, isHosted: false, @@ -38,6 +42,7 @@ const { mockDbReturning, mockDbUpdate, mockEnqueue, + mockEnqueueWorkspaceDispatch, mockStartJob, mockCompleteJob, mockMarkJobFailed, @@ -50,6 +55,8 @@ vi.mock('@/lib/auth/internal', () => ({ vi.mock('@/background/schedule-execution', () => ({ executeScheduleJob: mockExecuteScheduleJob, + executeJobInline: mockExecuteJobInline, + releaseScheduleLock: vi.fn().mockResolvedValue(undefined), })) vi.mock('@/lib/core/config/feature-flags', () => mockFeatureFlags) @@ -68,6 +75,22 @@ vi.mock('@/lib/core/async-jobs', () => ({ shouldExecuteInline: vi.fn().mockReturnValue(false), })) +vi.mock('@/lib/core/bullmq', () => ({ + isBullMQEnabled: vi.fn().mockReturnValue(true), + createBullMQJobData: vi.fn((payload: unknown) => ({ payload })), +})) + +vi.mock('@/lib/core/workspace-dispatch', () => ({ + enqueueWorkspaceDispatch: mockEnqueueWorkspaceDispatch, +})) + +vi.mock('@/lib/workflows/utils', () => ({ + getWorkflowById: vi.fn().mockResolvedValue({ + id: 'workflow-1', + workspaceId: 'workspace-1', + }), +})) + vi.mock('drizzle-orm', () => ({ and: vi.fn((...conditions: unknown[]) => ({ type: 'and', conditions })), eq: vi.fn((field: unknown, value: unknown) => ({ field, value, type: 'eq' })), @@ -142,6 +165,18 @@ const MULTIPLE_SCHEDULES = [ }, ] +const SINGLE_JOB = [ + { + id: 'job-1', + cronExpression: '0 * * * *', + failedCount: 0, + lastQueuedAt: undefined, + sourceUserId: 'user-1', + sourceWorkspaceId: 'workspace-1', + sourceType: 'job', + }, +] + function createMockRequest(): NextRequest { const mockHeaders = new Map([ ['authorization', 'Bearer test-cron-secret'], @@ -211,30 +246,44 @@ describe('Scheduled Workflow Execution API Route', () => { expect(data).toHaveProperty('executedCount', 2) }) + it('should queue mothership jobs to BullMQ when available', async () => { + mockDbReturning.mockReturnValueOnce([]).mockReturnValueOnce(SINGLE_JOB) + + const response = await GET(createMockRequest()) + + expect(response.status).toBe(200) + expect(mockEnqueueWorkspaceDispatch).toHaveBeenCalledWith( + expect.objectContaining({ + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'mothership-job-execution', + bullmqJobName: 'mothership-job-execution', + bullmqPayload: { + payload: { + scheduleId: 'job-1', + cronExpression: '0 * * * *', + failedCount: 0, + now: expect.any(String), + }, + }, + }) + ) + expect(mockExecuteJobInline).not.toHaveBeenCalled() + }) + it('should enqueue preassigned correlation metadata for schedules', async () => { mockDbReturning.mockReturnValue(SINGLE_SCHEDULE) const response = await GET(createMockRequest()) expect(response.status).toBe(200) - expect(mockEnqueue).toHaveBeenCalledWith( - 'schedule-execution', + expect(mockEnqueueWorkspaceDispatch).toHaveBeenCalledWith( expect.objectContaining({ - scheduleId: 'schedule-1', - workflowId: 'workflow-1', - executionId: 'schedule-execution-1', - requestId: 'test-request-id', - correlation: { - executionId: 'schedule-execution-1', - requestId: 'test-request-id', - source: 'schedule', - workflowId: 'workflow-1', - scheduleId: 'schedule-1', - triggerType: 'schedule', - scheduledFor: '2025-01-01T00:00:00.000Z', - }, - }), - { + id: 'schedule-execution-1', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'schedule-execution', + bullmqJobName: 'schedule-execution', metadata: { workflowId: 'workflow-1', correlation: { @@ -247,7 +296,7 @@ describe('Scheduled Workflow Execution API Route', () => { scheduledFor: '2025-01-01T00:00:00.000Z', }, }, - } + }) ) }) }) diff --git a/apps/sim/app/api/schedules/execute/route.ts b/apps/sim/app/api/schedules/execute/route.ts index cef36bfb25b..1744e5743ff 100644 --- a/apps/sim/app/api/schedules/execute/route.ts +++ b/apps/sim/app/api/schedules/execute/route.ts @@ -5,7 +5,9 @@ import { type NextRequest, NextResponse } from 'next/server' import { v4 as uuidv4 } from 'uuid' import { verifyCronAuth } from '@/lib/auth/internal' import { getJobQueue, shouldExecuteInline } from '@/lib/core/async-jobs' +import { createBullMQJobData, isBullMQEnabled } from '@/lib/core/bullmq' import { generateRequestId } from '@/lib/core/utils/request' +import { enqueueWorkspaceDispatch } from '@/lib/core/workspace-dispatch' import { executeJobInline, executeScheduleJob, @@ -73,6 +75,8 @@ export async function GET(request: NextRequest) { cronExpression: workflowSchedule.cronExpression, failedCount: workflowSchedule.failedCount, lastQueuedAt: workflowSchedule.lastQueuedAt, + sourceWorkspaceId: workflowSchedule.sourceWorkspaceId, + sourceUserId: workflowSchedule.sourceUserId, sourceType: workflowSchedule.sourceType, }) @@ -111,9 +115,41 @@ export async function GET(request: NextRequest) { } try { - const jobId = await jobQueue.enqueue('schedule-execution', payload, { - metadata: { workflowId: schedule.workflowId ?? undefined, correlation }, - }) + const workspaceId = schedule.workflowId + ? (await import('@/lib/workflows/utils')).getWorkflowById + : null + const resolvedWorkflow = + schedule.workflowId && workspaceId ? await workspaceId(schedule.workflowId) : null + const resolvedWorkspaceId = resolvedWorkflow?.workspaceId + + let jobId: string + if (isBullMQEnabled()) { + if (!resolvedWorkspaceId) { + throw new Error( + `Missing workspace for scheduled workflow ${schedule.workflowId}; refusing to bypass workspace admission` + ) + } + + jobId = await enqueueWorkspaceDispatch({ + id: executionId, + workspaceId: resolvedWorkspaceId, + lane: 'runtime', + queueName: 'schedule-execution', + bullmqJobName: 'schedule-execution', + bullmqPayload: createBullMQJobData(payload, { + workflowId: schedule.workflowId ?? undefined, + correlation, + }), + metadata: { + workflowId: schedule.workflowId ?? undefined, + correlation, + }, + }) + } else { + jobId = await jobQueue.enqueue('schedule-execution', payload, { + metadata: { workflowId: schedule.workflowId ?? undefined, correlation }, + }) + } logger.info( `[${requestId}] Queued schedule execution task ${jobId} for workflow ${schedule.workflowId}` ) @@ -165,7 +201,7 @@ export async function GET(request: NextRequest) { } }) - // Jobs always execute inline (no TriggerDev) + // Mothership jobs use BullMQ when available, otherwise direct inline execution. const jobPromises = dueJobs.map(async (job) => { const queueTime = job.lastQueuedAt ?? queuedAt const payload = { @@ -176,7 +212,24 @@ export async function GET(request: NextRequest) { } try { - await executeJobInline(payload) + if (isBullMQEnabled()) { + if (!job.sourceWorkspaceId || !job.sourceUserId) { + throw new Error(`Mothership job ${job.id} is missing workspace/user ownership`) + } + + await enqueueWorkspaceDispatch({ + workspaceId: job.sourceWorkspaceId!, + lane: 'runtime', + queueName: 'mothership-job-execution', + bullmqJobName: 'mothership-job-execution', + bullmqPayload: createBullMQJobData(payload), + metadata: { + userId: job.sourceUserId, + }, + }) + } else { + await executeJobInline(payload) + } } catch (error) { logger.error(`[${requestId}] Job execution failed for ${job.id}`, { error: error instanceof Error ? error.message : String(error), diff --git a/apps/sim/app/api/webhooks/trigger/[path]/route.ts b/apps/sim/app/api/webhooks/trigger/[path]/route.ts index 56304c3e850..2c283b72fdb 100644 --- a/apps/sim/app/api/webhooks/trigger/[path]/route.ts +++ b/apps/sim/app/api/webhooks/trigger/[path]/route.ts @@ -1,6 +1,8 @@ import { createLogger } from '@sim/logger' import { type NextRequest, NextResponse } from 'next/server' +import { admissionRejectedResponse, tryAdmit } from '@/lib/core/admission/gate' import { generateRequestId } from '@/lib/core/utils/request' +import { DispatchQueueFullError } from '@/lib/core/workspace-dispatch' import { checkWebhookPreprocessing, findAllWebhooksForPath, @@ -41,10 +43,25 @@ export async function POST( request: NextRequest, { params }: { params: Promise<{ path: string }> } ) { + const ticket = tryAdmit() + if (!ticket) { + return admissionRejectedResponse() + } + + try { + return await handleWebhookPost(request, params) + } finally { + ticket.release() + } +} + +async function handleWebhookPost( + request: NextRequest, + params: Promise<{ path: string }> +): Promise { const requestId = generateRequestId() const { path } = await params - // Handle provider challenges before body parsing (Microsoft Graph validationToken, etc.) const earlyChallenge = await handleProviderChallenges({}, request, requestId, path) if (earlyChallenge) { return earlyChallenge @@ -140,17 +157,30 @@ export async function POST( continue } - const response = await queueWebhookExecution(foundWebhook, foundWorkflow, body, request, { - requestId, - path, - actorUserId: preprocessResult.actorUserId, - executionId: preprocessResult.executionId, - correlation: preprocessResult.correlation, - }) - responses.push(response) + try { + const response = await queueWebhookExecution(foundWebhook, foundWorkflow, body, request, { + requestId, + path, + actorUserId: preprocessResult.actorUserId, + executionId: preprocessResult.executionId, + correlation: preprocessResult.correlation, + }) + responses.push(response) + } catch (error) { + if (error instanceof DispatchQueueFullError) { + return NextResponse.json( + { + error: 'Service temporarily at capacity', + message: error.message, + retryAfterSeconds: 10, + }, + { status: 503, headers: { 'Retry-After': '10' } } + ) + } + throw error + } } - // Return the last successful response, or a combined response for multiple webhooks if (responses.length === 0) { return new NextResponse('No webhooks processed successfully', { status: 500 }) } diff --git a/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts b/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts index 7d6c599dcfd..1a4e0bd980f 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts @@ -10,11 +10,13 @@ const { mockAuthorizeWorkflowByWorkspacePermission, mockPreprocessExecution, mockEnqueue, + mockEnqueueWorkspaceDispatch, } = vi.hoisted(() => ({ mockCheckHybridAuth: vi.fn(), mockAuthorizeWorkflowByWorkspacePermission: vi.fn(), mockPreprocessExecution: vi.fn(), mockEnqueue: vi.fn().mockResolvedValue('job-123'), + mockEnqueueWorkspaceDispatch: vi.fn().mockResolvedValue('job-123'), })) vi.mock('@/lib/auth/hybrid', () => ({ @@ -44,6 +46,16 @@ vi.mock('@/lib/core/async-jobs', () => ({ markJobFailed: vi.fn(), }), shouldExecuteInline: vi.fn().mockReturnValue(false), + shouldUseBullMQ: vi.fn().mockReturnValue(true), +})) + +vi.mock('@/lib/core/bullmq', () => ({ + createBullMQJobData: vi.fn((payload: unknown, metadata?: unknown) => ({ payload, metadata })), +})) + +vi.mock('@/lib/core/workspace-dispatch', () => ({ + enqueueWorkspaceDispatch: mockEnqueueWorkspaceDispatch, + waitForDispatchJob: vi.fn(), })) vi.mock('@/lib/core/utils/request', () => ({ @@ -132,22 +144,13 @@ describe('workflow execute async route', () => { expect(response.status).toBe(202) expect(body.executionId).toBe('execution-123') expect(body.jobId).toBe('job-123') - expect(mockEnqueue).toHaveBeenCalledWith( - 'workflow-execution', + expect(mockEnqueueWorkspaceDispatch).toHaveBeenCalledWith( expect.objectContaining({ - workflowId: 'workflow-1', - userId: 'actor-1', - executionId: 'execution-123', - requestId: 'req-12345678', - correlation: { - executionId: 'execution-123', - requestId: 'req-12345678', - source: 'workflow', - workflowId: 'workflow-1', - triggerType: 'manual', - }, - }), - { + id: 'execution-123', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', metadata: { workflowId: 'workflow-1', userId: 'actor-1', @@ -159,7 +162,7 @@ describe('workflow execute async route', () => { triggerType: 'manual', }, }, - } + }) ) }) }) diff --git a/apps/sim/app/api/workflows/[id]/execute/route.ts b/apps/sim/app/api/workflows/[id]/execute/route.ts index 8cee947272f..0b92df8aece 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.ts @@ -3,7 +3,9 @@ import { type NextRequest, NextResponse } from 'next/server' import { validate as uuidValidate, v4 as uuidv4 } from 'uuid' import { z } from 'zod' import { AuthType, checkHybridAuth } from '@/lib/auth/hybrid' -import { getJobQueue, shouldExecuteInline } from '@/lib/core/async-jobs' +import { admissionRejectedResponse, tryAdmit } from '@/lib/core/admission/gate' +import { getJobQueue, shouldExecuteInline, shouldUseBullMQ } from '@/lib/core/async-jobs' +import { createBullMQJobData } from '@/lib/core/bullmq' import { createTimeoutAbortController, getTimeoutErrorMessage, @@ -12,6 +14,13 @@ import { import { generateRequestId } from '@/lib/core/utils/request' import { SSE_HEADERS } from '@/lib/core/utils/sse' import { getBaseUrl } from '@/lib/core/utils/urls' +import { + DispatchQueueFullError, + enqueueWorkspaceDispatch, + type WorkspaceDispatchLane, + waitForDispatchJob, +} from '@/lib/core/workspace-dispatch' +import { createBufferedExecutionStream } from '@/lib/execution/buffered-stream' import { buildNextCallChain, parseCallChain, @@ -33,6 +42,11 @@ import { import { executeWorkflowCore } from '@/lib/workflows/executor/execution-core' import { type ExecutionEvent, encodeSSEEvent } from '@/lib/workflows/executor/execution-events' import { PauseResumeManager } from '@/lib/workflows/executor/human-in-the-loop-manager' +import { + DIRECT_WORKFLOW_JOB_NAME, + type QueuedWorkflowExecutionPayload, + type QueuedWorkflowExecutionResult, +} from '@/lib/workflows/executor/queued-workflow-execution' import { loadDeployedWorkflowState, loadWorkflowFromNormalizedTables, @@ -161,6 +175,7 @@ type AsyncExecutionParams = { requestId: string workflowId: string userId: string + workspaceId: string input: any triggerType: CoreTriggerType executionId: string @@ -168,7 +183,8 @@ type AsyncExecutionParams = { } async function handleAsyncExecution(params: AsyncExecutionParams): Promise { - const { requestId, workflowId, userId, input, triggerType, executionId, callChain } = params + const { requestId, workflowId, userId, workspaceId, input, triggerType, executionId, callChain } = + params const correlation = { executionId, @@ -181,6 +197,7 @@ async function handleAsyncExecution(params: AsyncExecutionParams): Promise { try { - await jobQueue.startJob(jobId) + await inlineJobQueue.startJob(jobId) const output = await executeWorkflowJob(payload) - await jobQueue.completeJob(jobId, output) + await inlineJobQueue.completeJob(jobId, output) } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error) logger.error(`[${requestId}] Async workflow execution failed`, { @@ -213,7 +249,7 @@ async function handleAsyncExecution(params: AsyncExecutionParams): Promise }) { + const ticket = tryAdmit() + if (!ticket) { + return admissionRejectedResponse() + } + + try { + return await handleExecutePost(req, params) + } finally { + ticket.release() + } +} + +async function handleExecutePost( + req: NextRequest, + params: Promise<{ id: string }> +): Promise { const requestId = generateRequestId() const { id: workflowId } = await params @@ -584,6 +672,7 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id: requestId, workflowId, userId: actorUserId, + workspaceId, input, triggerType: loggingTriggerType, executionId, @@ -676,30 +765,105 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id: if (!enableSSE) { logger.info(`[${requestId}] Using non-SSE execution (direct JSON response)`) + const metadata: ExecutionMetadata = { + requestId, + executionId, + workflowId, + workspaceId, + userId: actorUserId, + sessionUserId: isClientSession ? userId : undefined, + workflowUserId: workflow.userId, + triggerType, + useDraftState: shouldUseDraftState, + startTime: new Date().toISOString(), + isClientSession, + enforceCredentialAccess: useAuthenticatedUserAsActor, + workflowStateOverride: effectiveWorkflowStateOverride, + callChain, + } + + const executionVariables = cachedWorkflowData?.variables ?? workflow.variables ?? {} + + if (shouldUseBullMQ()) { + try { + const dispatchJobId = await enqueueDirectWorkflowExecution( + { + workflow, + metadata, + input: processedInput, + variables: executionVariables, + selectedOutputs, + includeFileBase64, + base64MaxBytes, + stopAfterBlockId, + timeoutMs: preprocessResult.executionTimeout?.sync, + runFromBlock: resolvedRunFromBlock, + }, + triggerType === 'manual' ? 1 : 5, + 'interactive' + ) + + const resultRecord = await waitForDispatchJob( + dispatchJobId, + (preprocessResult.executionTimeout?.sync ?? 300000) + 30000 + ) + + const result = resultRecord.output as QueuedWorkflowExecutionResult + + const resultForResponseBlock = { + success: result.success, + logs: result.logs, + output: result.output, + } + + if ( + auth.authType !== AuthType.INTERNAL_JWT && + workflowHasResponseBlock(resultForResponseBlock) + ) { + return createHttpResponseFromBlock(resultForResponseBlock) + } + + return NextResponse.json( + { + success: result.success, + executionId, + output: result.output, + error: result.error, + metadata: result.metadata, + }, + { status: result.statusCode ?? 200 } + ) + } catch (error: unknown) { + if (error instanceof DispatchQueueFullError) { + return NextResponse.json( + { + error: 'Service temporarily at capacity', + message: error.message, + retryAfterSeconds: 10, + }, + { status: 503, headers: { 'Retry-After': '10' } } + ) + } + + const errorMessage = error instanceof Error ? error.message : 'Unknown error' + + logger.error(`[${requestId}] Queued non-SSE execution failed: ${errorMessage}`) + + return NextResponse.json( + { + success: false, + error: errorMessage, + }, + { status: 500 } + ) + } + } + const timeoutController = createTimeoutAbortController( preprocessResult.executionTimeout?.sync ) try { - const metadata: ExecutionMetadata = { - requestId, - executionId, - workflowId, - workspaceId, - userId: actorUserId, - sessionUserId: isClientSession ? userId : undefined, - workflowUserId: workflow.userId, - triggerType, - useDraftState: shouldUseDraftState, - startTime: new Date().toISOString(), - isClientSession, - enforceCredentialAccess: useAuthenticatedUserAsActor, - workflowStateOverride: effectiveWorkflowStateOverride, - callChain, - } - - const executionVariables = cachedWorkflowData?.variables ?? workflow.variables ?? {} - const snapshot = new ExecutionSnapshot( metadata, workflow, @@ -809,6 +973,52 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id: } if (shouldUseDraftState) { + if (shouldUseBullMQ()) { + const metadata: ExecutionMetadata = { + requestId, + executionId, + workflowId, + workspaceId, + userId: actorUserId, + sessionUserId: isClientSession ? userId : undefined, + workflowUserId: workflow.userId, + triggerType, + useDraftState: shouldUseDraftState, + startTime: new Date().toISOString(), + isClientSession, + enforceCredentialAccess: useAuthenticatedUserAsActor, + workflowStateOverride: effectiveWorkflowStateOverride, + callChain, + } + + const executionVariables = cachedWorkflowData?.variables ?? workflow.variables ?? {} + + await enqueueDirectWorkflowExecution( + { + workflow, + metadata, + input: processedInput, + variables: executionVariables, + selectedOutputs, + includeFileBase64, + base64MaxBytes, + stopAfterBlockId, + timeoutMs: preprocessResult.executionTimeout?.sync, + runFromBlock: resolvedRunFromBlock, + streamEvents: true, + }, + 1, + 'interactive' + ) + + return new NextResponse(createBufferedExecutionStream(executionId), { + headers: { + ...SSE_HEADERS, + 'X-Execution-Id': executionId, + }, + }) + } + logger.info(`[${requestId}] Using SSE console log streaming (manual execution)`) } else { logger.info(`[${requestId}] Using streaming API response`) @@ -1271,6 +1481,17 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id: }, }) } catch (error: any) { + if (error instanceof DispatchQueueFullError) { + return NextResponse.json( + { + error: 'Service temporarily at capacity', + message: error.message, + retryAfterSeconds: 10, + }, + { status: 503, headers: { 'Retry-After': '10' } } + ) + } + logger.error(`[${requestId}] Failed to start workflow execution:`, error) return NextResponse.json( { error: error.message || 'Failed to start workflow execution' }, diff --git a/apps/sim/background/schedule-execution.ts b/apps/sim/background/schedule-execution.ts index d1231e16a61..1136e716d9a 100644 --- a/apps/sim/background/schedule-execution.ts +++ b/apps/sim/background/schedule-execution.ts @@ -303,6 +303,7 @@ async function runWorkflowExecution({ export type ScheduleExecutionPayload = { scheduleId: string workflowId: string + workspaceId?: string executionId?: string requestId?: string correlation?: AsyncExecutionCorrelation diff --git a/apps/sim/background/workflow-execution.ts b/apps/sim/background/workflow-execution.ts index 49756d82c27..aa2411a2580 100644 --- a/apps/sim/background/workflow-execution.ts +++ b/apps/sim/background/workflow-execution.ts @@ -36,6 +36,7 @@ export function buildWorkflowCorrelation( export type WorkflowExecutionPayload = { workflowId: string userId: string + workspaceId?: string input?: any triggerType?: CoreTriggerType executionId?: string diff --git a/apps/sim/background/workspace-notification-delivery.ts b/apps/sim/background/workspace-notification-delivery.ts index 1886d5462e3..230d33dae67 100644 --- a/apps/sim/background/workspace-notification-delivery.ts +++ b/apps/sim/background/workspace-notification-delivery.ts @@ -1,5 +1,5 @@ import { createHmac } from 'crypto' -import { db } from '@sim/db' +import { db, workflowExecutionLogs } from '@sim/db' import { account, workspaceNotificationDelivery, @@ -17,11 +17,14 @@ import { import { checkUsageStatus } from '@/lib/billing/calculations/usage-monitor' import { getHighestPrioritySubscription } from '@/lib/billing/core/subscription' import { dollarsToCredits } from '@/lib/billing/credits/conversion' +import { createBullMQJobData, isBullMQEnabled } from '@/lib/core/bullmq' +import { acquireLock } from '@/lib/core/config/redis' import { RateLimiter } from '@/lib/core/rate-limiter' import { decryptSecret } from '@/lib/core/security/encryption' import { secureFetchWithValidation } from '@/lib/core/security/input-validation.server' import { formatDuration } from '@/lib/core/utils/formatting' import { getBaseUrl } from '@/lib/core/utils/urls' +import { enqueueWorkspaceDispatch } from '@/lib/core/workspace-dispatch' import type { TraceSpan, WorkflowExecutionLog } from '@/lib/logs/types' import { sendEmail } from '@/lib/messaging/email/mailer' import type { AlertConfig } from '@/lib/notifications/alert-rules' @@ -32,6 +35,7 @@ const logger = createLogger('WorkspaceNotificationDelivery') const MAX_ATTEMPTS = 5 const RETRY_DELAYS = [5 * 1000, 15 * 1000, 60 * 1000, 3 * 60 * 1000, 10 * 60 * 1000] +const NOTIFICATION_DISPATCH_LOCK_TTL_SECONDS = 3 function getRetryDelayWithJitter(baseDelay: number): number { const jitter = Math.random() * 0.1 * baseDelay @@ -486,12 +490,170 @@ async function updateDeliveryStatus( export interface NotificationDeliveryParams { deliveryId: string subscriptionId: string + workspaceId: string notificationType: 'webhook' | 'email' | 'slack' log: WorkflowExecutionLog alertConfig?: AlertConfig } -export async function executeNotificationDelivery(params: NotificationDeliveryParams) { +export type NotificationDeliveryResult = + | { status: 'success' | 'skipped' | 'failed' } + | { status: 'retry'; retryDelayMs: number } + +async function buildRetryLog(params: NotificationDeliveryParams): Promise { + const [storedLog] = await db + .select() + .from(workflowExecutionLogs) + .where( + and( + eq(workflowExecutionLogs.executionId, params.log.executionId), + eq(workflowExecutionLogs.workflowId, params.log.workflowId!) + ) + ) + .limit(1) + + if (storedLog) { + return storedLog as unknown as WorkflowExecutionLog + } + + const now = new Date().toISOString() + return { + id: `retry_log_${params.deliveryId}`, + workflowId: params.log.workflowId, + executionId: params.log.executionId, + stateSnapshotId: '', + level: 'info', + trigger: 'system', + startedAt: now, + endedAt: now, + totalDurationMs: 0, + executionData: {}, + cost: { total: 0 }, + createdAt: now, + } +} + +export async function enqueueNotificationDeliveryDispatch( + params: NotificationDeliveryParams +): Promise { + if (!isBullMQEnabled()) { + return false + } + + const lockAcquired = await acquireLock( + `workspace-notification-dispatch:${params.deliveryId}`, + params.deliveryId, + NOTIFICATION_DISPATCH_LOCK_TTL_SECONDS + ) + if (!lockAcquired) { + return false + } + + await enqueueWorkspaceDispatch({ + workspaceId: params.workspaceId, + lane: 'lightweight', + queueName: 'workspace-notification-delivery', + bullmqJobName: 'workspace-notification-delivery', + bullmqPayload: createBullMQJobData(params), + metadata: { + workflowId: params.log.workflowId ?? undefined, + }, + }) + + return true +} + +const STUCK_IN_PROGRESS_THRESHOLD_MS = 5 * 60 * 1000 + +export async function sweepPendingNotificationDeliveries(limit = 50): Promise { + if (!isBullMQEnabled()) { + return 0 + } + + const stuckThreshold = new Date(Date.now() - STUCK_IN_PROGRESS_THRESHOLD_MS) + + await db + .update(workspaceNotificationDelivery) + .set({ + status: 'pending', + updatedAt: new Date(), + }) + .where( + and( + eq(workspaceNotificationDelivery.status, 'in_progress'), + lte(workspaceNotificationDelivery.lastAttemptAt, stuckThreshold) + ) + ) + + const dueDeliveries = await db + .select({ + deliveryId: workspaceNotificationDelivery.id, + subscriptionId: workspaceNotificationDelivery.subscriptionId, + workflowId: workspaceNotificationDelivery.workflowId, + executionId: workspaceNotificationDelivery.executionId, + workspaceId: workspaceNotificationSubscription.workspaceId, + alertConfig: workspaceNotificationSubscription.alertConfig, + notificationType: workspaceNotificationSubscription.notificationType, + }) + .from(workspaceNotificationDelivery) + .innerJoin( + workspaceNotificationSubscription, + eq(workspaceNotificationDelivery.subscriptionId, workspaceNotificationSubscription.id) + ) + .where( + and( + eq(workspaceNotificationDelivery.status, 'pending'), + or( + isNull(workspaceNotificationDelivery.nextAttemptAt), + lte(workspaceNotificationDelivery.nextAttemptAt, new Date()) + ) + ) + ) + .limit(limit) + + let enqueued = 0 + + for (const delivery of dueDeliveries) { + const params: NotificationDeliveryParams = { + deliveryId: delivery.deliveryId, + subscriptionId: delivery.subscriptionId, + workspaceId: delivery.workspaceId, + notificationType: delivery.notificationType, + log: await buildRetryLog({ + deliveryId: delivery.deliveryId, + subscriptionId: delivery.subscriptionId, + workspaceId: delivery.workspaceId, + notificationType: delivery.notificationType, + log: { + id: '', + workflowId: delivery.workflowId, + executionId: delivery.executionId, + stateSnapshotId: '', + level: 'info', + trigger: 'system', + startedAt: '', + endedAt: '', + totalDurationMs: 0, + executionData: {}, + cost: { total: 0 }, + createdAt: '', + }, + alertConfig: (delivery.alertConfig as AlertConfig | null) ?? undefined, + }), + alertConfig: (delivery.alertConfig as AlertConfig | null) ?? undefined, + } + + if (await enqueueNotificationDeliveryDispatch(params)) { + enqueued += 1 + } + } + + return enqueued +} + +export async function executeNotificationDelivery( + params: NotificationDeliveryParams +): Promise { const { deliveryId, subscriptionId, notificationType, log, alertConfig } = params try { @@ -504,7 +666,7 @@ export async function executeNotificationDelivery(params: NotificationDeliveryPa if (!subscription || !subscription.active) { logger.warn(`Subscription ${subscriptionId} not found or inactive`) await updateDeliveryStatus(deliveryId, 'failed', 'Subscription not found or inactive') - return + return { status: 'failed' } } const claimed = await db @@ -529,7 +691,7 @@ export async function executeNotificationDelivery(params: NotificationDeliveryPa if (claimed.length === 0) { logger.info(`Delivery ${deliveryId} not claimable`) - return + return { status: 'skipped' } } const attempts = claimed[0].attempts @@ -539,7 +701,7 @@ export async function executeNotificationDelivery(params: NotificationDeliveryPa if (!payload) { await updateDeliveryStatus(deliveryId, 'failed', 'Workflow was archived or deleted') logger.info(`Skipping delivery ${deliveryId} - workflow was archived or deleted`) - return + return { status: 'failed' } } let result: { success: boolean; status?: number; error?: string } @@ -561,39 +723,35 @@ export async function executeNotificationDelivery(params: NotificationDeliveryPa if (result.success) { await updateDeliveryStatus(deliveryId, 'success', undefined, result.status) logger.info(`${notificationType} notification delivered successfully`, { deliveryId }) - } else { - if (attempts < MAX_ATTEMPTS) { - const retryDelay = getRetryDelayWithJitter( - RETRY_DELAYS[attempts - 1] || RETRY_DELAYS[RETRY_DELAYS.length - 1] - ) - const nextAttemptAt = new Date(Date.now() + retryDelay) + return { status: 'success' } + } + if (attempts < MAX_ATTEMPTS) { + const retryDelay = getRetryDelayWithJitter( + RETRY_DELAYS[attempts - 1] || RETRY_DELAYS[RETRY_DELAYS.length - 1] + ) + const nextAttemptAt = new Date(Date.now() + retryDelay) - await updateDeliveryStatus( - deliveryId, - 'pending', - result.error, - result.status, - nextAttemptAt - ) + await updateDeliveryStatus(deliveryId, 'pending', result.error, result.status, nextAttemptAt) - logger.info( - `${notificationType} notification failed, scheduled retry ${attempts}/${MAX_ATTEMPTS}`, - { - deliveryId, - error: result.error, - } - ) - } else { - await updateDeliveryStatus(deliveryId, 'failed', result.error, result.status) - logger.error(`${notificationType} notification failed after ${MAX_ATTEMPTS} attempts`, { + logger.info( + `${notificationType} notification failed, scheduled retry ${attempts}/${MAX_ATTEMPTS}`, + { deliveryId, error: result.error, - }) - } + } + ) + return { status: 'retry', retryDelayMs: retryDelay } } + await updateDeliveryStatus(deliveryId, 'failed', result.error, result.status) + logger.error(`${notificationType} notification failed after ${MAX_ATTEMPTS} attempts`, { + deliveryId, + error: result.error, + }) + return { status: 'failed' } } catch (error) { logger.error('Notification delivery failed', { deliveryId, error }) await updateDeliveryStatus(deliveryId, 'failed', 'Internal error') + return { status: 'failed' } } } diff --git a/apps/sim/lib/billing/subscriptions/utils.ts b/apps/sim/lib/billing/subscriptions/utils.ts index d5ddbe33223..b8095cd732a 100644 --- a/apps/sim/lib/billing/subscriptions/utils.ts +++ b/apps/sim/lib/billing/subscriptions/utils.ts @@ -13,7 +13,7 @@ import { isPro, isTeam, } from '@/lib/billing/plan-helpers' -import type { EnterpriseSubscriptionMetadata } from '@/lib/billing/types' +import { parseEnterpriseSubscriptionMetadata } from '@/lib/billing/types' import { env } from '@/lib/core/config/env' /** @@ -48,27 +48,15 @@ export function checkEnterprisePlan(subscription: any): boolean { return isEnterprise(subscription?.plan) && subscription?.status === 'active' } -/** - * Type guard to check if metadata is valid EnterpriseSubscriptionMetadata - */ -function isEnterpriseMetadata(metadata: unknown): metadata is EnterpriseSubscriptionMetadata { - return ( - !!metadata && - typeof metadata === 'object' && - 'seats' in metadata && - typeof (metadata as EnterpriseSubscriptionMetadata).seats === 'string' - ) -} - export function getEffectiveSeats(subscription: any): number { if (!subscription) { return 0 } if (isEnterprise(subscription.plan)) { - const metadata = subscription.metadata as EnterpriseSubscriptionMetadata | null - if (isEnterpriseMetadata(metadata)) { - return Number.parseInt(metadata.seats, 10) + const metadata = parseEnterpriseSubscriptionMetadata(subscription.metadata) + if (metadata) { + return metadata.seats } return 0 } diff --git a/apps/sim/lib/billing/types/index.ts b/apps/sim/lib/billing/types/index.ts index e3c3f2de559..cd81abda7f2 100644 --- a/apps/sim/lib/billing/types/index.ts +++ b/apps/sim/lib/billing/types/index.ts @@ -2,18 +2,44 @@ * Billing System Types * Centralized type definitions for the billing system */ +import { z } from 'zod' -export interface EnterpriseSubscriptionMetadata { - plan: 'enterprise' +export const enterpriseSubscriptionMetadataSchema = z.object({ + plan: z.literal('enterprise'), // The referenceId must be provided in Stripe metadata to link to the organization // This gets stored in the subscription.referenceId column - referenceId: string + referenceId: z.string().min(1), // The fixed monthly price for this enterprise customer (as string from Stripe metadata) // This will be used to set the organization's usage limit - monthlyPrice: string - // Number of seats for invitation limits (not for billing) (as string from Stripe metadata) - // We set Stripe quantity to 1 and use this for actual seat count - seats: string + monthlyPrice: z.coerce.number().positive(), + // Number of seats for invitation limits (not for billing) + seats: z.coerce.number().int().positive(), + // Optional custom workspace concurrency limit for enterprise workspaces + workspaceConcurrencyLimit: z.coerce.number().int().positive().optional(), +}) + +export type EnterpriseSubscriptionMetadata = z.infer + +const enterpriseWorkspaceConcurrencyMetadataSchema = z.object({ + workspaceConcurrencyLimit: z.coerce.number().int().positive().optional(), +}) + +export type EnterpriseWorkspaceConcurrencyMetadata = z.infer< + typeof enterpriseWorkspaceConcurrencyMetadataSchema +> + +export function parseEnterpriseSubscriptionMetadata( + value: unknown +): EnterpriseSubscriptionMetadata | null { + const result = enterpriseSubscriptionMetadataSchema.safeParse(value) + return result.success ? result.data : null +} + +export function parseEnterpriseWorkspaceConcurrencyMetadata( + value: unknown +): EnterpriseWorkspaceConcurrencyMetadata | null { + const result = enterpriseWorkspaceConcurrencyMetadataSchema.safeParse(value) + return result.success ? result.data : null } export interface UsageData { diff --git a/apps/sim/lib/billing/webhooks/enterprise.ts b/apps/sim/lib/billing/webhooks/enterprise.ts index cf20b52b395..c4bc6a19f22 100644 --- a/apps/sim/lib/billing/webhooks/enterprise.ts +++ b/apps/sim/lib/billing/webhooks/enterprise.ts @@ -6,26 +6,10 @@ import type Stripe from 'stripe' import { getEmailSubject, renderEnterpriseSubscriptionEmail } from '@/components/emails' import { sendEmail } from '@/lib/messaging/email/mailer' import { getFromEmailAddress } from '@/lib/messaging/email/utils' -import type { EnterpriseSubscriptionMetadata } from '../types' +import { parseEnterpriseSubscriptionMetadata } from '../types' const logger = createLogger('BillingEnterprise') -function isEnterpriseMetadata(value: unknown): value is EnterpriseSubscriptionMetadata { - return ( - !!value && - typeof value === 'object' && - 'plan' in value && - 'referenceId' in value && - 'monthlyPrice' in value && - 'seats' in value && - typeof value.plan === 'string' && - value.plan.toLowerCase() === 'enterprise' && - typeof value.referenceId === 'string' && - typeof value.monthlyPrice === 'string' && - typeof value.seats === 'string' - ) -} - export async function handleManualEnterpriseSubscription(event: Stripe.Event) { const stripeSubscription = event.data.object as Stripe.Subscription @@ -63,19 +47,24 @@ export async function handleManualEnterpriseSubscription(event: Stripe.Event) { throw new Error('Unable to resolve referenceId for subscription') } - if (!isEnterpriseMetadata(metadata)) { + const enterpriseMetadata = parseEnterpriseSubscriptionMetadata(metadata) + if (!enterpriseMetadata) { logger.error('[subscription.created] Invalid enterprise metadata shape', { subscriptionId: stripeSubscription.id, metadata, }) throw new Error('Invalid enterprise metadata for subscription') } - const enterpriseMetadata = metadata - const metadataJson: Record = { ...enterpriseMetadata } + const metadataJson: Record = { + ...metadata, + workspaceConcurrencyLimit: + typeof metadata.workspaceConcurrencyLimit === 'string' + ? Number.parseInt(metadata.workspaceConcurrencyLimit, 10) + : metadata.workspaceConcurrencyLimit, + } - // Extract and parse seats and monthly price from metadata (they come as strings from Stripe) - const seats = Number.parseInt(enterpriseMetadata.seats, 10) - const monthlyPrice = Number.parseFloat(enterpriseMetadata.monthlyPrice) + const seats = enterpriseMetadata.seats + const monthlyPrice = enterpriseMetadata.monthlyPrice if (!seats || seats <= 0 || Number.isNaN(seats)) { logger.error('[subscription.created] Invalid or missing seats in enterprise metadata', { diff --git a/apps/sim/lib/billing/workspace-concurrency.test.ts b/apps/sim/lib/billing/workspace-concurrency.test.ts new file mode 100644 index 00000000000..462e24a8e06 --- /dev/null +++ b/apps/sim/lib/billing/workspace-concurrency.test.ts @@ -0,0 +1,146 @@ +/** + * @vitest-environment node + */ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { + mockGetHighestPrioritySubscription, + mockGetWorkspaceBilledAccountUserId, + mockFeatureFlags, + mockRedisGet, + mockRedisSet, + mockRedisDel, + mockRedisKeys, + mockGetRedisClient, +} = vi.hoisted(() => ({ + mockGetHighestPrioritySubscription: vi.fn(), + mockGetWorkspaceBilledAccountUserId: vi.fn(), + mockFeatureFlags: { + isBillingEnabled: true, + }, + mockRedisGet: vi.fn(), + mockRedisSet: vi.fn(), + mockRedisDel: vi.fn(), + mockRedisKeys: vi.fn(), + mockGetRedisClient: vi.fn(), +})) + +vi.mock('@sim/logger', () => ({ + createLogger: () => ({ + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }), +})) + +vi.mock('@/lib/billing/core/plan', () => ({ + getHighestPrioritySubscription: mockGetHighestPrioritySubscription, +})) + +vi.mock('@/lib/workspaces/utils', () => ({ + getWorkspaceBilledAccountUserId: mockGetWorkspaceBilledAccountUserId, +})) + +vi.mock('@/lib/core/config/redis', () => ({ + getRedisClient: mockGetRedisClient, +})) + +vi.mock('@/lib/core/config/feature-flags', () => mockFeatureFlags) + +import { + getWorkspaceConcurrencyLimit, + resetWorkspaceConcurrencyLimitCache, +} from '@/lib/billing/workspace-concurrency' + +describe('workspace concurrency billing', () => { + beforeEach(() => { + vi.clearAllMocks() + mockFeatureFlags.isBillingEnabled = true + + mockRedisGet.mockResolvedValue(null) + mockRedisSet.mockResolvedValue('OK') + mockRedisDel.mockResolvedValue(1) + mockRedisKeys.mockResolvedValue([]) + mockGetRedisClient.mockReturnValue({ + get: mockRedisGet, + set: mockRedisSet, + del: mockRedisDel, + keys: mockRedisKeys, + }) + }) + + it('returns free tier when no billed account exists', async () => { + mockGetWorkspaceBilledAccountUserId.mockResolvedValue(null) + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(5) + }) + + it('returns pro limit for pro billing accounts', async () => { + mockGetWorkspaceBilledAccountUserId.mockResolvedValue('user-1') + mockGetHighestPrioritySubscription.mockResolvedValue({ + plan: 'pro_6000', + metadata: null, + }) + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(50) + }) + + it('returns max limit for max plan tiers', async () => { + mockGetWorkspaceBilledAccountUserId.mockResolvedValue('user-1') + mockGetHighestPrioritySubscription.mockResolvedValue({ + plan: 'pro_25000', + metadata: null, + }) + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(200) + }) + + it('returns max limit for legacy team plans', async () => { + mockGetWorkspaceBilledAccountUserId.mockResolvedValue('user-1') + mockGetHighestPrioritySubscription.mockResolvedValue({ + plan: 'team', + metadata: null, + }) + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(200) + }) + + it('returns enterprise metadata override when present', async () => { + mockGetWorkspaceBilledAccountUserId.mockResolvedValue('user-1') + mockGetHighestPrioritySubscription.mockResolvedValue({ + plan: 'enterprise', + metadata: { + workspaceConcurrencyLimit: '350', + }, + }) + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(350) + }) + + it('uses free-tier limit when billing is disabled', async () => { + mockFeatureFlags.isBillingEnabled = false + mockGetWorkspaceBilledAccountUserId.mockResolvedValue('user-1') + mockGetHighestPrioritySubscription.mockResolvedValue({ + plan: 'pro_25000', + metadata: { + workspaceConcurrencyLimit: 999, + }, + }) + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(5) + }) + + it('uses redis cache when available', async () => { + mockRedisGet.mockResolvedValueOnce('123') + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(123) + expect(mockGetWorkspaceBilledAccountUserId).not.toHaveBeenCalled() + }) + + it('can clear a specific workspace cache entry', async () => { + await resetWorkspaceConcurrencyLimitCache('workspace-1') + + expect(mockRedisDel).toHaveBeenCalledWith('workspace-concurrency-limit:workspace-1') + }) +}) diff --git a/apps/sim/lib/billing/workspace-concurrency.ts b/apps/sim/lib/billing/workspace-concurrency.ts new file mode 100644 index 00000000000..e164bdb2ccd --- /dev/null +++ b/apps/sim/lib/billing/workspace-concurrency.ts @@ -0,0 +1,170 @@ +import { createLogger } from '@sim/logger' +import { getHighestPrioritySubscription } from '@/lib/billing/core/plan' +import { getPlanTierCredits, isEnterprise, isPro, isTeam } from '@/lib/billing/plan-helpers' +import { parseEnterpriseWorkspaceConcurrencyMetadata } from '@/lib/billing/types' +import { env } from '@/lib/core/config/env' +import { isBillingEnabled } from '@/lib/core/config/feature-flags' +import { getRedisClient } from '@/lib/core/config/redis' +import { getWorkspaceBilledAccountUserId } from '@/lib/workspaces/utils' + +const logger = createLogger('WorkspaceConcurrencyBilling') + +const CACHE_TTL_MS = 60_000 +const CACHE_TTL_SECONDS = Math.floor(CACHE_TTL_MS / 1000) + +interface CacheEntry { + value: number + expiresAt: number +} + +const inMemoryConcurrencyCache = new Map() + +function cacheKey(workspaceId: string): string { + return `workspace-concurrency-limit:${workspaceId}` +} + +function parsePositiveLimit(value: unknown): number | null { + if (typeof value === 'number' && Number.isFinite(value) && value > 0) { + return Math.floor(value) + } + + if (typeof value === 'string') { + const parsed = Number.parseInt(value, 10) + if (Number.isFinite(parsed) && parsed > 0) { + return parsed + } + } + + return null +} + +function getFreeConcurrencyLimit(): number { + return Number.parseInt(env.WORKSPACE_CONCURRENCY_FREE, 10) || 5 +} + +function getProConcurrencyLimit(): number { + return Number.parseInt(env.WORKSPACE_CONCURRENCY_PRO, 10) || 50 +} + +function getTeamConcurrencyLimit(): number { + return Number.parseInt(env.WORKSPACE_CONCURRENCY_TEAM, 10) || 200 +} + +function getEnterpriseDefaultConcurrencyLimit(): number { + return Number.parseInt(env.WORKSPACE_CONCURRENCY_ENTERPRISE, 10) || 200 +} + +function getEnterpriseConcurrencyLimit(metadata: unknown): number { + const enterpriseMetadata = parseEnterpriseWorkspaceConcurrencyMetadata(metadata) + return enterpriseMetadata?.workspaceConcurrencyLimit ?? getEnterpriseDefaultConcurrencyLimit() +} + +function getPlanConcurrencyLimit(plan: string | null | undefined, metadata: unknown): number { + if (!isBillingEnabled) { + return getFreeConcurrencyLimit() + } + + if (!plan) { + return getFreeConcurrencyLimit() + } + + if (isEnterprise(plan)) { + return getEnterpriseConcurrencyLimit(metadata) + } + + if (plan === 'team') { + return getTeamConcurrencyLimit() + } + + const credits = getPlanTierCredits(plan) + if (credits >= 25_000) { + return getTeamConcurrencyLimit() + } + + if (isPro(plan) || isTeam(plan)) { + return getProConcurrencyLimit() + } + + return getFreeConcurrencyLimit() +} + +export async function getWorkspaceConcurrencyLimit(workspaceId: string): Promise { + const redis = getRedisClient() + + if (redis) { + const cached = await redis.get(cacheKey(workspaceId)) + const cachedValue = parsePositiveLimit(cached) + if (cachedValue !== null) { + return cachedValue + } + } else { + const cached = inMemoryConcurrencyCache.get(workspaceId) + if (cached && cached.expiresAt > Date.now()) { + return cached.value + } + } + + try { + const billedAccountUserId = await getWorkspaceBilledAccountUserId(workspaceId) + if (!billedAccountUserId) { + if (redis) { + await redis.set( + cacheKey(workspaceId), + String(getFreeConcurrencyLimit()), + 'EX', + CACHE_TTL_SECONDS + ) + } else { + inMemoryConcurrencyCache.set(workspaceId, { + value: getFreeConcurrencyLimit(), + expiresAt: Date.now() + CACHE_TTL_MS, + }) + } + return getFreeConcurrencyLimit() + } + + const subscription = await getHighestPrioritySubscription(billedAccountUserId) + const limit = getPlanConcurrencyLimit(subscription?.plan, subscription?.metadata) + + if (redis) { + await redis.set(cacheKey(workspaceId), String(limit), 'EX', CACHE_TTL_SECONDS) + } else { + inMemoryConcurrencyCache.set(workspaceId, { + value: limit, + expiresAt: Date.now() + CACHE_TTL_MS, + }) + } + + return limit + } catch (error) { + logger.error('Failed to resolve workspace concurrency limit, using free tier', { + workspaceId, + error, + }) + + return getFreeConcurrencyLimit() + } +} + +export async function resetWorkspaceConcurrencyLimitCache(workspaceId?: string): Promise { + if (!workspaceId) { + inMemoryConcurrencyCache.clear() + } else { + inMemoryConcurrencyCache.delete(workspaceId) + } + + const redis = getRedisClient() + if (!redis) { + return + } + + if (workspaceId) { + await redis.del(cacheKey(workspaceId)) + return + } + + const keys = await redis.keys('workspace-concurrency-limit:*') + if (keys.length > 0) { + await redis.del(...keys) + } +} diff --git a/apps/sim/lib/core/admission/gate.ts b/apps/sim/lib/core/admission/gate.ts new file mode 100644 index 00000000000..a1dc7e0dce9 --- /dev/null +++ b/apps/sim/lib/core/admission/gate.ts @@ -0,0 +1,60 @@ +import { createLogger } from '@sim/logger' +import { NextResponse } from 'next/server' +import { env } from '@/lib/core/config/env' + +const logger = createLogger('AdmissionGate') + +const MAX_INFLIGHT = Number.parseInt(env.ADMISSION_GATE_MAX_INFLIGHT ?? '') || 500 + +let inflight = 0 + +export interface AdmissionTicket { + release: () => void +} + +/** + * Attempts to admit a request through the in-process gate. + * Returns a ticket with a release() handle on success, or null if at capacity. + * Zero external calls — purely in-process atomic counter. + */ +export function tryAdmit(): AdmissionTicket | null { + if (inflight >= MAX_INFLIGHT) { + return null + } + + inflight++ + let released = false + + return { + release() { + if (released) return + released = true + inflight-- + }, + } +} + +/** + * Returns a 429 response for requests rejected by the admission gate. + */ +export function admissionRejectedResponse(): NextResponse { + logger.warn('Admission gate rejecting request', { inflight, maxInflight: MAX_INFLIGHT }) + return NextResponse.json( + { + error: 'Too many requests', + message: 'Server is at capacity. Please retry shortly.', + retryAfterSeconds: 5, + }, + { + status: 429, + headers: { 'Retry-After': '5' }, + } + ) +} + +/** + * Returns the current gate metrics for observability. + */ +export function getAdmissionGateStatus(): { inflight: number; maxInflight: number } { + return { inflight, maxInflight: MAX_INFLIGHT } +} diff --git a/apps/sim/lib/core/async-jobs/backends/bullmq.ts b/apps/sim/lib/core/async-jobs/backends/bullmq.ts new file mode 100644 index 00000000000..a7bb4647ef4 --- /dev/null +++ b/apps/sim/lib/core/async-jobs/backends/bullmq.ts @@ -0,0 +1,106 @@ +import { createLogger } from '@sim/logger' +import type { Job as BullMQJob } from 'bullmq' +import { + type EnqueueOptions, + JOB_STATUS, + type Job, + type JobQueueBackend, + type JobStatus, + type JobType, +} from '@/lib/core/async-jobs/types' +import { type BullMQJobData, createBullMQJobData, getBullMQQueue } from '@/lib/core/bullmq' + +const logger = createLogger('BullMQJobQueue') + +function mapBullMQStatus(status: string): JobStatus { + switch (status) { + case 'active': + return JOB_STATUS.PROCESSING + case 'completed': + return JOB_STATUS.COMPLETED + case 'failed': + return JOB_STATUS.FAILED + default: + return JOB_STATUS.PENDING + } +} + +async function toJob( + queueType: JobType, + bullJob: BullMQJob> | null +): Promise { + if (!bullJob) { + return null + } + + const status = mapBullMQStatus(await bullJob.getState()) + + return { + id: bullJob.id ?? '', + type: queueType, + payload: bullJob.data.payload, + status, + createdAt: new Date(bullJob.timestamp), + startedAt: bullJob.processedOn ? new Date(bullJob.processedOn) : undefined, + completedAt: bullJob.finishedOn ? new Date(bullJob.finishedOn) : undefined, + attempts: bullJob.attemptsMade, + maxAttempts: bullJob.opts.attempts ?? 1, + error: bullJob.failedReason || undefined, + output: bullJob.returnvalue, + metadata: bullJob.data.metadata ?? {}, + } +} + +export class BullMQJobQueue implements JobQueueBackend { + async enqueue( + type: JobType, + payload: TPayload, + options?: EnqueueOptions + ): Promise { + const queue = getBullMQQueue(type) + + const job = await queue.add( + options?.name ?? type, + createBullMQJobData(payload, options?.metadata), + { + jobId: options?.jobId, + attempts: options?.maxAttempts, + priority: options?.priority, + delay: options?.delayMs, + } + ) + + logger.debug('Enqueued job via BullMQ', { + jobId: job.id, + type, + name: options?.name ?? type, + }) + + return String(job.id) + } + + async getJob(jobId: string): Promise { + const workflowJob = await getBullMQQueue('workflow-execution').getJob(jobId) + if (workflowJob) { + return toJob('workflow-execution', workflowJob) + } + + const webhookJob = await getBullMQQueue('webhook-execution').getJob(jobId) + if (webhookJob) { + return toJob('webhook-execution', webhookJob) + } + + const scheduleJob = await getBullMQQueue('schedule-execution').getJob(jobId) + if (scheduleJob) { + return toJob('schedule-execution', scheduleJob) + } + + return null + } + + async startJob(_jobId: string): Promise {} + + async completeJob(_jobId: string, _output: unknown): Promise {} + + async markJobFailed(_jobId: string, _error: string): Promise {} +} diff --git a/apps/sim/lib/core/async-jobs/backends/index.ts b/apps/sim/lib/core/async-jobs/backends/index.ts index 144094e6407..ef84a232233 100644 --- a/apps/sim/lib/core/async-jobs/backends/index.ts +++ b/apps/sim/lib/core/async-jobs/backends/index.ts @@ -1,3 +1,4 @@ +export { BullMQJobQueue } from './bullmq' export { DatabaseJobQueue } from './database' export { RedisJobQueue } from './redis' export { TriggerDevJobQueue } from './trigger-dev' diff --git a/apps/sim/lib/core/async-jobs/config.ts b/apps/sim/lib/core/async-jobs/config.ts index 0537a6a8ef9..c4f0a4dcf83 100644 --- a/apps/sim/lib/core/async-jobs/config.ts +++ b/apps/sim/lib/core/async-jobs/config.ts @@ -1,7 +1,7 @@ import { createLogger } from '@sim/logger' import type { AsyncBackendType, JobQueueBackend } from '@/lib/core/async-jobs/types' +import { isBullMQEnabled } from '@/lib/core/bullmq' import { isTriggerDevEnabled } from '@/lib/core/config/feature-flags' -import { getRedisClient } from '@/lib/core/config/redis' const logger = createLogger('AsyncJobsConfig') @@ -11,16 +11,15 @@ let cachedInlineBackend: JobQueueBackend | null = null /** * Determines which async backend to use based on environment configuration. - * Follows the fallback chain: trigger.dev → redis → database + * Follows the fallback chain: trigger.dev → bullmq → database */ export function getAsyncBackendType(): AsyncBackendType { if (isTriggerDevEnabled) { return 'trigger-dev' } - const redis = getRedisClient() - if (redis) { - return 'redis' + if (isBullMQEnabled()) { + return 'bullmq' } return 'database' @@ -43,13 +42,9 @@ export async function getJobQueue(): Promise { cachedBackend = new TriggerDevJobQueue() break } - case 'redis': { - const redis = getRedisClient() - if (!redis) { - throw new Error('Redis client not available but redis backend was selected') - } - const { RedisJobQueue } = await import('@/lib/core/async-jobs/backends/redis') - cachedBackend = new RedisJobQueue(redis) + case 'bullmq': { + const { BullMQJobQueue } = await import('@/lib/core/async-jobs/backends/bullmq') + cachedBackend = new BullMQJobQueue() break } case 'database': { @@ -62,6 +57,10 @@ export async function getJobQueue(): Promise { cachedBackendType = type logger.info(`Async job backend initialized: ${type}`) + if (!cachedBackend) { + throw new Error(`Failed to initialize async backend: ${type}`) + } + return cachedBackend } @@ -73,20 +72,19 @@ export function getCurrentBackendType(): AsyncBackendType | null { } /** - * Gets a job queue backend that bypasses Trigger.dev (Redis -> Database). - * Used for non-polling webhooks that should always execute inline. + * Gets a job queue backend that bypasses Trigger.dev (BullMQ -> Database). + * Used for execution paths that must avoid Trigger.dev cold starts. */ export async function getInlineJobQueue(): Promise { if (cachedInlineBackend) { return cachedInlineBackend } - const redis = getRedisClient() let type: string - if (redis) { - const { RedisJobQueue } = await import('@/lib/core/async-jobs/backends/redis') - cachedInlineBackend = new RedisJobQueue(redis) - type = 'redis' + if (isBullMQEnabled()) { + const { BullMQJobQueue } = await import('@/lib/core/async-jobs/backends/bullmq') + cachedInlineBackend = new BullMQJobQueue() + type = 'bullmq' } else { const { DatabaseJobQueue } = await import('@/lib/core/async-jobs/backends/database') cachedInlineBackend = new DatabaseJobQueue() @@ -98,11 +96,15 @@ export async function getInlineJobQueue(): Promise { } /** - * Checks if jobs should be executed inline (fire-and-forget). - * For Redis/DB backends, we execute inline. Trigger.dev handles execution itself. + * Checks if jobs should be executed inline in-process. + * Database fallback is the only mode that still relies on inline execution. */ export function shouldExecuteInline(): boolean { - return getAsyncBackendType() !== 'trigger-dev' + return getAsyncBackendType() === 'database' +} + +export function shouldUseBullMQ(): boolean { + return isBullMQEnabled() } /** diff --git a/apps/sim/lib/core/async-jobs/index.ts b/apps/sim/lib/core/async-jobs/index.ts index 24e6f1e526f..76ec7072207 100644 --- a/apps/sim/lib/core/async-jobs/index.ts +++ b/apps/sim/lib/core/async-jobs/index.ts @@ -5,6 +5,7 @@ export { getJobQueue, resetJobQueueCache, shouldExecuteInline, + shouldUseBullMQ, } from './config' export type { AsyncBackendType, diff --git a/apps/sim/lib/core/async-jobs/types.ts b/apps/sim/lib/core/async-jobs/types.ts index 27137ddadc3..a2ccf1d680c 100644 --- a/apps/sim/lib/core/async-jobs/types.ts +++ b/apps/sim/lib/core/async-jobs/types.ts @@ -62,6 +62,10 @@ export interface JobMetadata { export interface EnqueueOptions { maxAttempts?: number metadata?: JobMetadata + jobId?: string + priority?: number + name?: string + delayMs?: number } /** @@ -95,4 +99,4 @@ export interface JobQueueBackend { markJobFailed(jobId: string, error: string): Promise } -export type AsyncBackendType = 'trigger-dev' | 'redis' | 'database' +export type AsyncBackendType = 'trigger-dev' | 'bullmq' | 'redis' | 'database' diff --git a/apps/sim/lib/core/bullmq/connection.ts b/apps/sim/lib/core/bullmq/connection.ts new file mode 100644 index 00000000000..80def9d5cb5 --- /dev/null +++ b/apps/sim/lib/core/bullmq/connection.ts @@ -0,0 +1,29 @@ +import type { ConnectionOptions } from 'bullmq' +import { env } from '@/lib/core/config/env' + +export function isBullMQEnabled(): boolean { + return Boolean(env.REDIS_URL) +} + +export function getBullMQConnectionOptions(): ConnectionOptions { + if (!env.REDIS_URL) { + throw new Error('BullMQ requires REDIS_URL') + } + + const redisUrl = new URL(env.REDIS_URL) + const isTls = redisUrl.protocol === 'rediss:' + const port = redisUrl.port ? Number.parseInt(redisUrl.port, 10) : 6379 + const dbPath = redisUrl.pathname.replace('/', '') + const db = dbPath ? Number.parseInt(dbPath, 10) : undefined + + return { + host: redisUrl.hostname, + port, + username: redisUrl.username || undefined, + password: redisUrl.password || undefined, + db: Number.isFinite(db) ? db : undefined, + maxRetriesPerRequest: null, + enableReadyCheck: false, + ...(isTls ? { tls: {} } : {}), + } +} diff --git a/apps/sim/lib/core/bullmq/index.ts b/apps/sim/lib/core/bullmq/index.ts new file mode 100644 index 00000000000..efe937aa476 --- /dev/null +++ b/apps/sim/lib/core/bullmq/index.ts @@ -0,0 +1,16 @@ +export { getBullMQConnectionOptions, isBullMQEnabled } from './connection' +export { + type BullMQJobData, + createBullMQJobData, + getBullMQQueue, + getBullMQQueueByName, + getKnowledgeConnectorSyncQueue, + getKnowledgeDocumentProcessingQueue, + getMothershipJobExecutionQueue, + getWorkflowQueueEvents, + getWorkspaceNotificationDeliveryQueue, + KNOWLEDGE_CONNECTOR_SYNC_QUEUE, + KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE, + MOTHERSHIP_JOB_EXECUTION_QUEUE, + WORKSPACE_NOTIFICATION_DELIVERY_QUEUE, +} from './queues' diff --git a/apps/sim/lib/core/bullmq/queues.ts b/apps/sim/lib/core/bullmq/queues.ts new file mode 100644 index 00000000000..0e526030d61 --- /dev/null +++ b/apps/sim/lib/core/bullmq/queues.ts @@ -0,0 +1,196 @@ +import { Queue, QueueEvents } from 'bullmq' +import type { JobMetadata, JobType } from '@/lib/core/async-jobs/types' +import { getBullMQConnectionOptions } from '@/lib/core/bullmq/connection' +import type { WorkspaceDispatchQueueName } from '@/lib/core/workspace-dispatch/types' + +export const KNOWLEDGE_CONNECTOR_SYNC_QUEUE = 'knowledge-connector-sync' as const +export const KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE = 'knowledge-process-document' as const +export const MOTHERSHIP_JOB_EXECUTION_QUEUE = 'mothership-job-execution' as const +export const WORKSPACE_NOTIFICATION_DELIVERY_QUEUE = 'workspace-notification-delivery' as const + +export interface BullMQJobData { + payload: TPayload + metadata?: JobMetadata +} + +let workflowQueueInstance: Queue | null = null +let webhookQueueInstance: Queue | null = null +let scheduleQueueInstance: Queue | null = null +let knowledgeConnectorSyncQueueInstance: Queue | null = null +let knowledgeDocumentProcessingQueueInstance: Queue | null = null +let mothershipJobExecutionQueueInstance: Queue | null = null +let workspaceNotificationDeliveryQueueInstance: Queue | null = null +let workflowQueueEventsInstance: QueueEvents | null = null + +function getQueueDefaultOptions(type: JobType) { + switch (type) { + case 'workflow-execution': + return { + attempts: 3, + backoff: { type: 'exponential' as const, delay: 1000 }, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 7 * 24 * 60 * 60 }, + } + case 'webhook-execution': + return { + attempts: 2, + backoff: { type: 'exponential' as const, delay: 2000 }, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 3 * 24 * 60 * 60 }, + } + case 'schedule-execution': + return { + attempts: 2, + backoff: { type: 'exponential' as const, delay: 5000 }, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 3 * 24 * 60 * 60 }, + } + } +} + +function createQueue(type: JobType): Queue { + return new Queue(type, { + connection: getBullMQConnectionOptions(), + defaultJobOptions: getQueueDefaultOptions(type), + }) +} + +function createNamedQueue( + name: + | typeof KNOWLEDGE_CONNECTOR_SYNC_QUEUE + | typeof KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE + | typeof MOTHERSHIP_JOB_EXECUTION_QUEUE + | typeof WORKSPACE_NOTIFICATION_DELIVERY_QUEUE +): Queue { + switch (name) { + case KNOWLEDGE_CONNECTOR_SYNC_QUEUE: + return new Queue(name, { + connection: getBullMQConnectionOptions(), + defaultJobOptions: { + attempts: 3, + backoff: { type: 'exponential', delay: 5000 }, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 7 * 24 * 60 * 60 }, + }, + }) + case KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE: + return new Queue(name, { + connection: getBullMQConnectionOptions(), + defaultJobOptions: { + attempts: 3, + backoff: { type: 'exponential', delay: 1000 }, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 7 * 24 * 60 * 60 }, + }, + }) + case MOTHERSHIP_JOB_EXECUTION_QUEUE: + return new Queue(name, { + connection: getBullMQConnectionOptions(), + defaultJobOptions: { + attempts: 1, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 7 * 24 * 60 * 60 }, + }, + }) + case WORKSPACE_NOTIFICATION_DELIVERY_QUEUE: + return new Queue(name, { + connection: getBullMQConnectionOptions(), + defaultJobOptions: { + attempts: 1, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 7 * 24 * 60 * 60 }, + }, + }) + } +} + +export function getBullMQQueue(type: JobType): Queue { + switch (type) { + case 'workflow-execution': + if (!workflowQueueInstance) { + workflowQueueInstance = createQueue(type) + } + return workflowQueueInstance + case 'webhook-execution': + if (!webhookQueueInstance) { + webhookQueueInstance = createQueue(type) + } + return webhookQueueInstance + case 'schedule-execution': + if (!scheduleQueueInstance) { + scheduleQueueInstance = createQueue(type) + } + return scheduleQueueInstance + } +} + +export function getBullMQQueueByName(queueName: WorkspaceDispatchQueueName): Queue { + switch (queueName) { + case 'workflow-execution': + case 'webhook-execution': + case 'schedule-execution': + return getBullMQQueue(queueName) + case KNOWLEDGE_CONNECTOR_SYNC_QUEUE: + return getKnowledgeConnectorSyncQueue() + case KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE: + return getKnowledgeDocumentProcessingQueue() + case MOTHERSHIP_JOB_EXECUTION_QUEUE: + return getMothershipJobExecutionQueue() + case WORKSPACE_NOTIFICATION_DELIVERY_QUEUE: + return getWorkspaceNotificationDeliveryQueue() + } +} + +export function getWorkflowQueueEvents(): QueueEvents { + if (!workflowQueueEventsInstance) { + workflowQueueEventsInstance = new QueueEvents('workflow-execution', { + connection: getBullMQConnectionOptions(), + }) + } + + return workflowQueueEventsInstance +} + +export function getKnowledgeConnectorSyncQueue(): Queue { + if (!knowledgeConnectorSyncQueueInstance) { + knowledgeConnectorSyncQueueInstance = createNamedQueue(KNOWLEDGE_CONNECTOR_SYNC_QUEUE) + } + + return knowledgeConnectorSyncQueueInstance +} + +export function getKnowledgeDocumentProcessingQueue(): Queue { + if (!knowledgeDocumentProcessingQueueInstance) { + knowledgeDocumentProcessingQueueInstance = createNamedQueue(KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE) + } + + return knowledgeDocumentProcessingQueueInstance +} + +export function getMothershipJobExecutionQueue(): Queue { + if (!mothershipJobExecutionQueueInstance) { + mothershipJobExecutionQueueInstance = createNamedQueue(MOTHERSHIP_JOB_EXECUTION_QUEUE) + } + + return mothershipJobExecutionQueueInstance +} + +export function getWorkspaceNotificationDeliveryQueue(): Queue { + if (!workspaceNotificationDeliveryQueueInstance) { + workspaceNotificationDeliveryQueueInstance = createNamedQueue( + WORKSPACE_NOTIFICATION_DELIVERY_QUEUE + ) + } + + return workspaceNotificationDeliveryQueueInstance +} + +export function createBullMQJobData( + payload: TPayload, + metadata?: JobMetadata +): BullMQJobData { + return { + payload, + metadata, + } +} diff --git a/apps/sim/lib/core/config/env.ts b/apps/sim/lib/core/config/env.ts index 65492cccb67..f858a09905b 100644 --- a/apps/sim/lib/core/config/env.ts +++ b/apps/sim/lib/core/config/env.ts @@ -180,6 +180,11 @@ export const env = createEnv({ // Data Retention FREE_PLAN_LOG_RETENTION_DAYS: z.string().optional(), // Log retention days for free plan users + // Admission & Burst Protection + ADMISSION_GATE_MAX_INFLIGHT: z.string().optional().default('500'), // Max concurrent in-flight execution requests per pod + DISPATCH_MAX_QUEUE_PER_WORKSPACE: z.string().optional().default('1000'), // Max queued dispatch jobs per workspace + DISPATCH_MAX_QUEUE_GLOBAL: z.string().optional().default('50000'), // Max queued dispatch jobs globally + // Rate Limiting Configuration RATE_LIMIT_WINDOW_MS: z.string().optional().default('60000'), // Rate limit window duration in milliseconds (default: 1 minute) MANUAL_EXECUTION_LIMIT: z.string().optional().default('999999'),// Manual execution bypass value (effectively unlimited) @@ -191,6 +196,10 @@ export const env = createEnv({ RATE_LIMIT_TEAM_ASYNC: z.string().optional().default('2500'), // Team tier async API executions per minute RATE_LIMIT_ENTERPRISE_SYNC: z.string().optional().default('600'), // Enterprise tier sync API executions per minute RATE_LIMIT_ENTERPRISE_ASYNC: z.string().optional().default('5000'), // Enterprise tier async API executions per minute + WORKSPACE_CONCURRENCY_FREE: z.string().optional().default('5'), // Free tier concurrent workspace executions + WORKSPACE_CONCURRENCY_PRO: z.string().optional().default('50'), // Pro tier concurrent workspace executions + WORKSPACE_CONCURRENCY_TEAM: z.string().optional().default('200'), // Team/Max tier concurrent workspace executions + WORKSPACE_CONCURRENCY_ENTERPRISE: z.string().optional().default('200'), // Enterprise default concurrent workspace executions // Timeout Configuration EXECUTION_TIMEOUT_FREE: z.string().optional().default('300'), // 5 minutes diff --git a/apps/sim/lib/core/workspace-dispatch/adapter.ts b/apps/sim/lib/core/workspace-dispatch/adapter.ts new file mode 100644 index 00000000000..637688d1117 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/adapter.ts @@ -0,0 +1,80 @@ +import type { + WorkspaceDispatchClaimResult, + WorkspaceDispatchEnqueueInput, + WorkspaceDispatchJobRecord, + WorkspaceDispatchLane, +} from '@/lib/core/workspace-dispatch/types' + +export interface WorkspaceDispatchStorageAdapter { + saveDispatchJob(record: WorkspaceDispatchJobRecord): Promise + getDispatchJobRecord(jobId: string): Promise + listDispatchJobsByStatuses( + statuses: readonly WorkspaceDispatchJobRecord['status'][] + ): Promise + updateDispatchJobRecord( + jobId: string, + updater: (record: WorkspaceDispatchJobRecord) => WorkspaceDispatchJobRecord + ): Promise + enqueueWorkspaceDispatchJob( + input: WorkspaceDispatchEnqueueInput + ): Promise + restoreWorkspaceDispatchJob(record: WorkspaceDispatchJobRecord): Promise + claimWorkspaceJob( + workspaceId: string, + options: { + lanes: readonly WorkspaceDispatchLane[] + concurrencyLimit: number + leaseId: string + now: number + leaseTtlMs: number + } + ): Promise + getWorkspaceQueueDepth( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise + getGlobalQueueDepth(): Promise + reconcileGlobalQueueDepth(): Promise + popNextWorkspaceId(): Promise + getQueuedWorkspaceCount(): Promise + hasActiveWorkspace(workspaceId: string): Promise + ensureWorkspaceActive(workspaceId: string, readyAt?: number): Promise + requeueWorkspaceId(workspaceId: string): Promise + workspaceHasPendingJobs( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise + getNextWorkspaceJob( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise + removeWorkspaceJobFromLane( + workspaceId: string, + lane: WorkspaceDispatchLane, + jobId: string + ): Promise + cleanupExpiredWorkspaceLeases(workspaceId: string): Promise + countActiveWorkspaceLeases(workspaceId: string): Promise + hasWorkspaceLease(workspaceId: string, leaseId: string): Promise + createWorkspaceLease(workspaceId: string, leaseId: string, ttlMs: number): Promise + refreshWorkspaceLease(workspaceId: string, leaseId: string, ttlMs: number): Promise + releaseWorkspaceLease(workspaceId: string, leaseId: string): Promise + removeWorkspaceIfIdle(workspaceId: string, lanes: readonly WorkspaceDispatchLane[]): Promise + markDispatchJobAdmitted( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number + ): Promise + markDispatchJobAdmitting( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number + ): Promise + markDispatchJobRunning(jobId: string): Promise + markDispatchJobCompleted(jobId: string, output: unknown): Promise + markDispatchJobFailed(jobId: string, error: string): Promise + clear(): Promise + dispose(): void +} diff --git a/apps/sim/lib/core/workspace-dispatch/dispatcher.test.ts b/apps/sim/lib/core/workspace-dispatch/dispatcher.test.ts new file mode 100644 index 00000000000..6daa485f918 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/dispatcher.test.ts @@ -0,0 +1,175 @@ +/** + * @vitest-environment node + */ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { mockGetWorkspaceConcurrencyLimit, mockAcquireLock, mockReleaseLock } = vi.hoisted(() => ({ + mockGetWorkspaceConcurrencyLimit: vi.fn(), + mockAcquireLock: vi.fn(), + mockReleaseLock: vi.fn(), +})) + +vi.mock('@/lib/billing/workspace-concurrency', () => ({ + getWorkspaceConcurrencyLimit: mockGetWorkspaceConcurrencyLimit, +})) + +vi.mock('@/lib/core/config/redis', () => ({ + acquireLock: mockAcquireLock, + releaseLock: mockReleaseLock, + getRedisClient: vi.fn().mockReturnValue(null), +})) + +vi.mock('@/lib/core/bullmq', () => ({ + getBullMQQueueByName: vi.fn().mockReturnValue({ + add: vi.fn().mockResolvedValue({ id: 'bullmq-1' }), + }), +})) + +import { MemoryWorkspaceDispatchStorage } from '@/lib/core/workspace-dispatch/memory-store' +import { + DISPATCH_SCAN_RESULTS, + dispatchNextAdmissibleWorkspaceJob, +} from '@/lib/core/workspace-dispatch/planner' +import { + enqueueWorkspaceDispatchJob, + setWorkspaceDispatchStorageAdapter, +} from '@/lib/core/workspace-dispatch/store' + +describe('workspace dispatch integration (memory-backed)', () => { + let store: MemoryWorkspaceDispatchStorage + + beforeEach(async () => { + vi.clearAllMocks() + store = new MemoryWorkspaceDispatchStorage() + setWorkspaceDispatchStorageAdapter(store) + + mockGetWorkspaceConcurrencyLimit.mockResolvedValue(5) + mockAcquireLock.mockResolvedValue(true) + mockReleaseLock.mockResolvedValue(true) + }) + + async function enqueue( + workspaceId: string, + overrides: { lane?: string; delayMs?: number; priority?: number } = {} + ) { + return enqueueWorkspaceDispatchJob({ + workspaceId, + lane: (overrides.lane ?? 'runtime') as 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: { payload: { workflowId: 'wf-1' } }, + metadata: { workflowId: 'wf-1' }, + delayMs: overrides.delayMs, + priority: overrides.priority, + }) + } + + it('admits jobs round-robin across workspaces', async () => { + await enqueue('ws-a') + await enqueue('ws-b') + await enqueue('ws-a') + + const r1 = await dispatchNextAdmissibleWorkspaceJob() + const r2 = await dispatchNextAdmissibleWorkspaceJob() + const r3 = await dispatchNextAdmissibleWorkspaceJob() + + expect(r1).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + expect(r2).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + expect(r3).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + }) + + it('respects workspace concurrency limits', async () => { + mockGetWorkspaceConcurrencyLimit.mockResolvedValue(1) + + await enqueue('ws-a') + await enqueue('ws-a') + + const r1 = await dispatchNextAdmissibleWorkspaceJob() + expect(r1).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + + const r2 = await dispatchNextAdmissibleWorkspaceJob() + expect(r2).toBe(DISPATCH_SCAN_RESULTS.NO_PROGRESS) + }) + + it('skips delayed jobs and admits ready ones in same lane', async () => { + await enqueue('ws-a', { delayMs: 60_000 }) + await enqueue('ws-a', { delayMs: 0 }) + + const r1 = await dispatchNextAdmissibleWorkspaceJob() + expect(r1).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + }) + + it('returns delayed when all jobs are delayed', async () => { + await enqueue('ws-a', { delayMs: 60_000 }) + + const r1 = await dispatchNextAdmissibleWorkspaceJob() + expect(r1).toBe(DISPATCH_SCAN_RESULTS.NO_PROGRESS) + }) + + it('returns no_workspace when queue is empty', async () => { + const result = await dispatchNextAdmissibleWorkspaceJob() + expect(result).toBe(DISPATCH_SCAN_RESULTS.NO_WORKSPACE) + }) + + it('lease cleanup frees capacity for new admissions', async () => { + mockGetWorkspaceConcurrencyLimit.mockResolvedValue(1) + + const record = await enqueue('ws-a') + await enqueue('ws-a') + + const r1 = await dispatchNextAdmissibleWorkspaceJob() + expect(r1).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + + const updated = await store.getDispatchJobRecord(record.id) + if (updated?.lease) { + await store.releaseWorkspaceLease('ws-a', updated.lease.leaseId) + } + + const r2 = await dispatchNextAdmissibleWorkspaceJob() + expect(r2).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + }) + + it('expired leases are cleaned up during claim', async () => { + mockGetWorkspaceConcurrencyLimit.mockResolvedValue(1) + + await enqueue('ws-a') + await enqueue('ws-a') + + const claimResult = await store.claimWorkspaceJob('ws-a', { + lanes: ['runtime'], + concurrencyLimit: 1, + leaseId: 'old-lease', + now: Date.now(), + leaseTtlMs: 1, + }) + expect(claimResult.type).toBe('admitted') + + await new Promise((resolve) => setTimeout(resolve, 10)) + + const r2 = await dispatchNextAdmissibleWorkspaceJob() + expect(r2).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + }) + + it('recovers job to waiting via restoreWorkspaceDispatchJob', async () => { + const record = await enqueue('ws-a') + + await store.claimWorkspaceJob('ws-a', { + lanes: ['runtime'], + concurrencyLimit: 1, + leaseId: 'lease-1', + now: Date.now(), + leaseTtlMs: 1000, + }) + + await store.markDispatchJobAdmitted(record.id, 'ws-a', 'lease-1', Date.now() + 10000) + + const admitted = await store.getDispatchJobRecord(record.id) + expect(admitted).toBeDefined() + const resetRecord = { ...admitted!, status: 'waiting' as const, lease: undefined } + await store.restoreWorkspaceDispatchJob(resetRecord) + + const restored = await store.getDispatchJobRecord(record.id) + expect(restored?.status).toBe('waiting') + expect(restored?.lease).toBeUndefined() + }) +}) diff --git a/apps/sim/lib/core/workspace-dispatch/dispatcher.ts b/apps/sim/lib/core/workspace-dispatch/dispatcher.ts new file mode 100644 index 00000000000..1122107ea4b --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/dispatcher.ts @@ -0,0 +1,156 @@ +import { createLogger } from '@sim/logger' +import { env } from '@/lib/core/config/env' +import { + enqueueWorkspaceDispatchJob, + getDispatchJobRecord, + getGlobalQueueDepth, + getQueuedWorkspaceCount, + getWorkspaceQueueDepth, +} from '@/lib/core/workspace-dispatch/store' +import { + WORKSPACE_DISPATCH_LANES, + type WorkspaceDispatchEnqueueInput, + type WorkspaceDispatchJobRecord, +} from '@/lib/core/workspace-dispatch/types' +import { DISPATCH_SCAN_RESULTS, dispatchNextAdmissibleWorkspaceJob } from './planner' +import { reconcileWorkspaceDispatchState } from './reconciler' + +const logger = createLogger('WorkspaceDispatcher') +const WAIT_POLL_INTERVAL_MS = 250 +const RECONCILE_INTERVAL_MS = 30_000 +const MAX_QUEUE_PER_WORKSPACE = Number.parseInt(env.DISPATCH_MAX_QUEUE_PER_WORKSPACE ?? '') || 1000 +const MAX_QUEUE_GLOBAL = Number.parseInt(env.DISPATCH_MAX_QUEUE_GLOBAL ?? '') || 50_000 + +let dispatcherRunning = false +let dispatcherWakePending = false +let lastReconcileAt = 0 + +async function runDispatcherLoop(): Promise { + if (dispatcherRunning) { + dispatcherWakePending = true + return + } + + dispatcherRunning = true + + try { + const now = Date.now() + if (now - lastReconcileAt >= RECONCILE_INTERVAL_MS) { + await reconcileWorkspaceDispatchState() + lastReconcileAt = now + } + + do { + dispatcherWakePending = false + const queuedWorkspaces = await getQueuedWorkspaceCount() + if (queuedWorkspaces === 0) { + continue + } + + let admitted = 0 + let scanned = 0 + const loopStartMs = Date.now() + + for (let index = 0; index < queuedWorkspaces; index++) { + scanned++ + const result = await dispatchNextAdmissibleWorkspaceJob() + if (result === DISPATCH_SCAN_RESULTS.ADMITTED) { + admitted++ + } + if (result === DISPATCH_SCAN_RESULTS.NO_WORKSPACE) { + break + } + } + + if (admitted > 0) { + dispatcherWakePending = true + } + + if (admitted > 0 || scanned > 0) { + logger.info('Dispatcher pass', { + admitted, + scanned, + queuedWorkspaces, + durationMs: Date.now() - loopStartMs, + }) + } + } while (dispatcherWakePending) + } catch (error) { + logger.error('Workspace dispatcher loop failed', { error }) + } finally { + dispatcherRunning = false + } +} + +export class DispatchQueueFullError extends Error { + readonly statusCode = 503 + + constructor( + readonly scope: 'workspace' | 'global', + readonly depth: number, + readonly limit: number + ) { + super( + scope === 'workspace' + ? `Workspace queue is at capacity (${depth}/${limit})` + : `Global dispatch queue is at capacity (${depth}/${limit})` + ) + this.name = 'DispatchQueueFullError' + } +} + +export async function enqueueWorkspaceDispatch( + input: WorkspaceDispatchEnqueueInput +): Promise { + const [workspaceDepth, globalDepth] = await Promise.all([ + getWorkspaceQueueDepth(input.workspaceId, WORKSPACE_DISPATCH_LANES), + getGlobalQueueDepth(), + ]) + + if (workspaceDepth >= MAX_QUEUE_PER_WORKSPACE) { + logger.warn('Workspace dispatch queue at capacity', { + workspaceId: input.workspaceId, + depth: workspaceDepth, + limit: MAX_QUEUE_PER_WORKSPACE, + }) + throw new DispatchQueueFullError('workspace', workspaceDepth, MAX_QUEUE_PER_WORKSPACE) + } + + if (globalDepth >= MAX_QUEUE_GLOBAL) { + logger.warn('Global dispatch queue at capacity', { + depth: globalDepth, + limit: MAX_QUEUE_GLOBAL, + }) + throw new DispatchQueueFullError('global', globalDepth, MAX_QUEUE_GLOBAL) + } + + const record = await enqueueWorkspaceDispatchJob(input) + void runDispatcherLoop() + return record.id +} + +export async function wakeWorkspaceDispatcher(): Promise { + await runDispatcherLoop() +} + +export async function waitForDispatchJob( + dispatchJobId: string, + timeoutMs: number +): Promise { + const deadline = Date.now() + timeoutMs + + while (Date.now() < deadline) { + const record = await getDispatchJobRecord(dispatchJobId) + if (!record) { + throw new Error(`Dispatch job not found: ${dispatchJobId}`) + } + + if (record.status === 'completed' || record.status === 'failed') { + return record + } + + await new Promise((resolve) => setTimeout(resolve, WAIT_POLL_INTERVAL_MS)) + } + + throw new Error(`Timed out waiting for dispatch job ${dispatchJobId}`) +} diff --git a/apps/sim/lib/core/workspace-dispatch/factory.ts b/apps/sim/lib/core/workspace-dispatch/factory.ts new file mode 100644 index 00000000000..3a07c68cf01 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/factory.ts @@ -0,0 +1,42 @@ +import { createLogger } from '@sim/logger' +import { getRedisClient } from '@/lib/core/config/redis' +import type { WorkspaceDispatchStorageAdapter } from '@/lib/core/workspace-dispatch/adapter' +import { MemoryWorkspaceDispatchStorage } from '@/lib/core/workspace-dispatch/memory-store' +import { RedisWorkspaceDispatchStorage } from '@/lib/core/workspace-dispatch/redis-store' + +const logger = createLogger('WorkspaceDispatchFactory') + +let cachedAdapter: WorkspaceDispatchStorageAdapter | null = null + +export function createWorkspaceDispatchStorageAdapter(): WorkspaceDispatchStorageAdapter { + if (cachedAdapter) { + return cachedAdapter + } + + const redis = getRedisClient() + + if (redis) { + logger.info('Workspace dispatcher: Using Redis storage') + const adapter = new RedisWorkspaceDispatchStorage(redis) + cachedAdapter = adapter + return adapter + } + + logger.warn( + 'Workspace dispatcher: Using in-memory storage; distributed fairness is disabled in multi-process deployments' + ) + const adapter = new MemoryWorkspaceDispatchStorage() + cachedAdapter = adapter + return adapter +} + +export function setWorkspaceDispatchStorageAdapter(adapter: WorkspaceDispatchStorageAdapter): void { + cachedAdapter = adapter +} + +export function resetWorkspaceDispatchStorageAdapter(): void { + if (cachedAdapter) { + cachedAdapter.dispose() + cachedAdapter = null + } +} diff --git a/apps/sim/lib/core/workspace-dispatch/index.ts b/apps/sim/lib/core/workspace-dispatch/index.ts new file mode 100644 index 00000000000..74645372c9f --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/index.ts @@ -0,0 +1,32 @@ +export type { WorkspaceDispatchStorageAdapter } from './adapter' +export { + DispatchQueueFullError, + enqueueWorkspaceDispatch, + waitForDispatchJob, + wakeWorkspaceDispatcher, +} from './dispatcher' +export { + createWorkspaceDispatchStorageAdapter, + resetWorkspaceDispatchStorageAdapter, +} from './factory' +export { + markDispatchJobAdmitted, + markDispatchJobAdmitting, + markDispatchJobCompleted, + markDispatchJobFailed, + markDispatchJobRunning, + refreshWorkspaceLease, + releaseWorkspaceLease, +} from './store' +export { + WORKSPACE_DISPATCH_LANES, + WORKSPACE_DISPATCH_STATUSES, + type WorkspaceDispatchEnqueueInput, + type WorkspaceDispatchJobContext, + type WorkspaceDispatchJobRecord, + type WorkspaceDispatchLane, + type WorkspaceDispatchLeaseInfo, + type WorkspaceDispatchQueueName, + type WorkspaceDispatchStatus, +} from './types' +export { getDispatchRuntimeMetadata, runDispatchedJob } from './worker' diff --git a/apps/sim/lib/core/workspace-dispatch/memory-store.test.ts b/apps/sim/lib/core/workspace-dispatch/memory-store.test.ts new file mode 100644 index 00000000000..87a54de26d1 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/memory-store.test.ts @@ -0,0 +1,65 @@ +/** + * @vitest-environment node + */ +import { afterEach, describe, expect, it } from 'vitest' +import { MemoryWorkspaceDispatchStorage } from '@/lib/core/workspace-dispatch/memory-store' + +describe('memory workspace dispatch storage', () => { + const store = new MemoryWorkspaceDispatchStorage() + + afterEach(async () => { + await store.clear() + }) + + it('claims a runnable job and marks it admitting with a lease', async () => { + const record = await store.enqueueWorkspaceDispatchJob({ + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: { payload: { workflowId: 'workflow-1' } }, + metadata: { + workflowId: 'workflow-1', + }, + }) + + const result = await store.claimWorkspaceJob('workspace-1', { + lanes: ['runtime'], + concurrencyLimit: 1, + leaseId: 'lease-1', + now: Date.now(), + leaseTtlMs: 1000, + }) + + expect(result.type).toBe('admitted') + if (result.type === 'admitted') { + expect(result.record.id).toBe(record.id) + expect(result.record.status).toBe('admitting') + expect(result.record.lease?.leaseId).toBe('lease-1') + } + }) + + it('returns delayed when only delayed jobs exist', async () => { + await store.enqueueWorkspaceDispatchJob({ + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: { payload: { workflowId: 'workflow-1' } }, + metadata: { + workflowId: 'workflow-1', + }, + delayMs: 5000, + }) + + const result = await store.claimWorkspaceJob('workspace-1', { + lanes: ['runtime'], + concurrencyLimit: 1, + leaseId: 'lease-2', + now: Date.now(), + leaseTtlMs: 1000, + }) + + expect(result.type).toBe('delayed') + }) +}) diff --git a/apps/sim/lib/core/workspace-dispatch/memory-store.ts b/apps/sim/lib/core/workspace-dispatch/memory-store.ts new file mode 100644 index 00000000000..2f4e0966bbd --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/memory-store.ts @@ -0,0 +1,478 @@ +import { createLogger } from '@sim/logger' +import type { WorkspaceDispatchStorageAdapter } from '@/lib/core/workspace-dispatch/adapter' +import { + WORKSPACE_DISPATCH_CLAIM_RESULTS, + type WorkspaceDispatchClaimResult, + type WorkspaceDispatchEnqueueInput, + type WorkspaceDispatchJobRecord, + type WorkspaceDispatchLane, +} from '@/lib/core/workspace-dispatch/types' + +const logger = createLogger('WorkspaceDispatchMemoryStore') +const JOB_TTL_MS = 48 * 60 * 60 * 1000 + +export class MemoryWorkspaceDispatchStorage implements WorkspaceDispatchStorageAdapter { + private jobs = new Map() + private workspaceOrder: string[] = [] + private laneQueues = new Map() + private leases = new Map>() + private sequence = 0 + private cleanupInterval: NodeJS.Timeout | null = null + + constructor() { + this.cleanupInterval = setInterval(() => { + void this.clearExpiredState() + }, 60_000) + this.cleanupInterval.unref() + } + + private queueKey(workspaceId: string, lane: WorkspaceDispatchLane): string { + return `${workspaceId}:${lane}` + } + + private ensureWorkspaceQueued(workspaceId: string): void { + if (!this.workspaceOrder.includes(workspaceId)) { + this.workspaceOrder.push(workspaceId) + } + } + + private getLaneQueue(workspaceId: string, lane: WorkspaceDispatchLane): string[] { + const key = this.queueKey(workspaceId, lane) + const existing = this.laneQueues.get(key) + if (existing) { + return existing + } + + const queue: string[] = [] + this.laneQueues.set(key, queue) + return queue + } + + private sortQueue(queue: string[]): void { + queue.sort((leftId, rightId) => { + const left = this.jobs.get(leftId) + const right = this.jobs.get(rightId) + if (!left || !right) { + return 0 + } + + if (left.priority !== right.priority) { + return left.priority - right.priority + } + + return left.createdAt - right.createdAt + }) + } + + private getLeaseMap(workspaceId: string): Map { + const existing = this.leases.get(workspaceId) + if (existing) { + return existing + } + + const leaseMap = new Map() + this.leases.set(workspaceId, leaseMap) + return leaseMap + } + + private async clearExpiredState(): Promise { + const now = Date.now() + + for (const [jobId, record] of this.jobs.entries()) { + if ( + (record.status === 'completed' || record.status === 'failed') && + record.completedAt && + now - record.completedAt > JOB_TTL_MS + ) { + this.jobs.delete(jobId) + } + } + + for (const [workspaceId, leaseMap] of this.leases.entries()) { + for (const [leaseId, expiresAt] of leaseMap.entries()) { + if (expiresAt <= now) { + leaseMap.delete(leaseId) + } + } + if (leaseMap.size === 0) { + this.leases.delete(workspaceId) + } + } + } + + async saveDispatchJob(record: WorkspaceDispatchJobRecord): Promise { + this.jobs.set(record.id, record) + } + + async getDispatchJobRecord(jobId: string): Promise { + return this.jobs.get(jobId) ?? null + } + + async listDispatchJobsByStatuses( + statuses: readonly WorkspaceDispatchJobRecord['status'][] + ): Promise { + return Array.from(this.jobs.values()).filter((record) => statuses.includes(record.status)) + } + + async updateDispatchJobRecord( + jobId: string, + updater: (record: WorkspaceDispatchJobRecord) => WorkspaceDispatchJobRecord + ): Promise { + const current = this.jobs.get(jobId) + if (!current) { + return null + } + + const updated = updater(current) + this.jobs.set(jobId, updated) + return updated + } + + async enqueueWorkspaceDispatchJob( + input: WorkspaceDispatchEnqueueInput + ): Promise { + const id = input.id ?? `dispatch_${crypto.randomUUID().replace(/-/g, '').slice(0, 20)}` + const createdAt = Date.now() + + const record: WorkspaceDispatchJobRecord = { + id, + workspaceId: input.workspaceId, + lane: input.lane, + queueName: input.queueName, + bullmqJobName: input.bullmqJobName, + bullmqPayload: input.bullmqPayload, + metadata: input.metadata, + priority: input.priority ?? 100, + maxAttempts: input.maxAttempts, + delayMs: input.delayMs, + status: 'waiting', + createdAt, + } + + this.jobs.set(id, record) + const queue = this.getLaneQueue(record.workspaceId, record.lane) + queue.push(id) + this.sortQueue(queue) + this.ensureWorkspaceQueued(record.workspaceId) + return record + } + + async restoreWorkspaceDispatchJob(record: WorkspaceDispatchJobRecord): Promise { + this.jobs.set(record.id, record) + const queue = this.getLaneQueue(record.workspaceId, record.lane) + if (!queue.includes(record.id)) { + queue.push(record.id) + this.sortQueue(queue) + } + this.ensureWorkspaceQueued(record.workspaceId) + } + + async claimWorkspaceJob( + workspaceId: string, + options: { + lanes: readonly WorkspaceDispatchLane[] + concurrencyLimit: number + leaseId: string + now: number + leaseTtlMs: number + } + ): Promise { + await this.cleanupExpiredWorkspaceLeases(workspaceId) + if (this.getLeaseMap(workspaceId).size >= options.concurrencyLimit) { + this.ensureWorkspaceQueued(workspaceId) + return { type: WORKSPACE_DISPATCH_CLAIM_RESULTS.LIMIT_REACHED } + } + + let selectedRecord: WorkspaceDispatchJobRecord | null = null + let selectedLane: WorkspaceDispatchLane | null = null + let nextReadyAt: number | null = null + + for (const lane of options.lanes) { + const queue = this.getLaneQueue(workspaceId, lane) + for (let scanIndex = 0; scanIndex < queue.length && scanIndex < 20; ) { + const jobId = queue[scanIndex] + const record = this.jobs.get(jobId) + if (!record) { + queue.splice(scanIndex, 1) + continue + } + + const readyAt = record.createdAt + (record.delayMs ?? 0) + if (readyAt <= options.now) { + selectedRecord = record + selectedLane = lane + queue.splice(scanIndex, 1) + break + } + + nextReadyAt = nextReadyAt ? Math.min(nextReadyAt, readyAt) : readyAt + scanIndex++ + } + + if (selectedRecord) { + break + } + } + + if (!selectedRecord || !selectedLane) { + const hasPending = await this.workspaceHasPendingJobs(workspaceId, options.lanes) + if (!hasPending) { + this.workspaceOrder = this.workspaceOrder.filter((value) => value !== workspaceId) + return { type: WORKSPACE_DISPATCH_CLAIM_RESULTS.EMPTY } + } + + this.ensureWorkspaceQueued(workspaceId) + return { + type: WORKSPACE_DISPATCH_CLAIM_RESULTS.DELAYED, + nextReadyAt: nextReadyAt ?? options.now, + } + } + + const leaseExpiresAt = options.now + options.leaseTtlMs + this.getLeaseMap(workspaceId).set(options.leaseId, leaseExpiresAt) + + const updatedRecord: WorkspaceDispatchJobRecord = { + ...selectedRecord, + status: 'admitting', + lease: { + workspaceId, + leaseId: options.leaseId, + }, + metadata: { + ...selectedRecord.metadata, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + } + this.jobs.set(updatedRecord.id, updatedRecord) + + const hasPending = await this.workspaceHasPendingJobs(workspaceId, options.lanes) + if (hasPending) { + this.ensureWorkspaceQueued(workspaceId) + } else { + this.workspaceOrder = this.workspaceOrder.filter((value) => value !== workspaceId) + } + + return { + type: WORKSPACE_DISPATCH_CLAIM_RESULTS.ADMITTED, + record: updatedRecord, + leaseId: options.leaseId, + leaseExpiresAt, + } + } + + async getWorkspaceQueueDepth( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + let depth = 0 + for (const lane of lanes) { + depth += this.getLaneQueue(workspaceId, lane).length + } + return depth + } + + async getGlobalQueueDepth(): Promise { + const terminalStatuses = new Set(['completed', 'failed']) + let count = 0 + for (const job of this.jobs.values()) { + if (!terminalStatuses.has(job.status)) { + count++ + } + } + return count + } + + async reconcileGlobalQueueDepth(): Promise { + // no-op: memory store computes depth on the fly + } + + async popNextWorkspaceId(): Promise { + return this.workspaceOrder.shift() ?? null + } + + async getQueuedWorkspaceCount(): Promise { + return this.workspaceOrder.length + } + + async hasActiveWorkspace(workspaceId: string): Promise { + return this.workspaceOrder.includes(workspaceId) + } + + async ensureWorkspaceActive(workspaceId: string): Promise { + this.ensureWorkspaceQueued(workspaceId) + } + + async requeueWorkspaceId(workspaceId: string): Promise { + this.ensureWorkspaceQueued(workspaceId) + } + + async workspaceHasPendingJobs( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + return lanes.some((lane) => this.getLaneQueue(workspaceId, lane).length > 0) + } + + async getNextWorkspaceJob( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + for (const lane of lanes) { + const queue = this.getLaneQueue(workspaceId, lane) + while (queue.length > 0) { + const jobId = queue[0] + const job = this.jobs.get(jobId) + if (job) { + return job + } + queue.shift() + } + } + + return null + } + + async removeWorkspaceJobFromLane( + workspaceId: string, + lane: WorkspaceDispatchLane, + jobId: string + ): Promise { + const queue = this.getLaneQueue(workspaceId, lane) + const index = queue.indexOf(jobId) + if (index >= 0) { + queue.splice(index, 1) + } + } + + async cleanupExpiredWorkspaceLeases(workspaceId: string): Promise { + const leaseMap = this.getLeaseMap(workspaceId) + const now = Date.now() + for (const [leaseId, expiresAt] of leaseMap.entries()) { + if (expiresAt <= now) { + leaseMap.delete(leaseId) + } + } + } + + async countActiveWorkspaceLeases(workspaceId: string): Promise { + await this.cleanupExpiredWorkspaceLeases(workspaceId) + return this.getLeaseMap(workspaceId).size + } + + async hasWorkspaceLease(workspaceId: string, leaseId: string): Promise { + await this.cleanupExpiredWorkspaceLeases(workspaceId) + return this.getLeaseMap(workspaceId).has(leaseId) + } + + async createWorkspaceLease(workspaceId: string, leaseId: string, ttlMs: number): Promise { + const expiresAt = Date.now() + ttlMs + this.getLeaseMap(workspaceId).set(leaseId, expiresAt) + return expiresAt + } + + async refreshWorkspaceLease( + workspaceId: string, + leaseId: string, + ttlMs: number + ): Promise { + return this.createWorkspaceLease(workspaceId, leaseId, ttlMs) + } + + async releaseWorkspaceLease(workspaceId: string, leaseId: string): Promise { + this.getLeaseMap(workspaceId).delete(leaseId) + } + + async removeWorkspaceIfIdle( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + const hasPending = await this.workspaceHasPendingJobs(workspaceId, lanes) + if (!hasPending) { + this.workspaceOrder = this.workspaceOrder.filter((value) => value !== workspaceId) + } + } + + async markDispatchJobAdmitted( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number + ): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'admitted', + admittedAt: Date.now(), + lease: { + workspaceId, + leaseId, + }, + metadata: { + ...record.metadata, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + })) + } + + async markDispatchJobAdmitting( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number + ): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'admitting', + lease: { + workspaceId, + leaseId, + }, + metadata: { + ...record.metadata, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + })) + } + + async markDispatchJobRunning(jobId: string): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'running', + startedAt: record.startedAt ?? Date.now(), + })) + } + + async markDispatchJobCompleted(jobId: string, output: unknown): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'completed', + completedAt: Date.now(), + output, + })) + } + + async markDispatchJobFailed(jobId: string, error: string): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'failed', + completedAt: Date.now(), + error, + })) + } + + async clear(): Promise { + this.jobs.clear() + this.workspaceOrder = [] + this.laneQueues.clear() + this.leases.clear() + } + + dispose(): void { + if (this.cleanupInterval) { + clearInterval(this.cleanupInterval) + this.cleanupInterval = null + } + void this.clear().catch((error) => { + logger.error('Failed to clear memory workspace dispatch storage', { error }) + }) + } +} diff --git a/apps/sim/lib/core/workspace-dispatch/planner.ts b/apps/sim/lib/core/workspace-dispatch/planner.ts new file mode 100644 index 00000000000..8ba42f83a0a --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/planner.ts @@ -0,0 +1,154 @@ +import { createLogger } from '@sim/logger' +import { getWorkspaceConcurrencyLimit } from '@/lib/billing/workspace-concurrency' +import { type BullMQJobData, getBullMQQueueByName } from '@/lib/core/bullmq' +import { acquireLock, releaseLock } from '@/lib/core/config/redis' +import { + claimWorkspaceJob, + markDispatchJobAdmitted, + popNextWorkspaceId, + releaseWorkspaceLease, + removeWorkspaceIfIdle, + requeueWorkspaceId, +} from '@/lib/core/workspace-dispatch/store' +import { + WORKSPACE_DISPATCH_CLAIM_RESULTS, + WORKSPACE_DISPATCH_LANES, + type WorkspaceDispatchJobRecord, +} from '@/lib/core/workspace-dispatch/types' + +const logger = createLogger('WorkspaceDispatchPlanner') + +const LEASE_TTL_MS = 15 * 60 * 1000 +const WORKSPACE_CLAIM_LOCK_TTL_SECONDS = 10 + +export const DISPATCH_SCAN_RESULTS = { + NO_WORKSPACE: 'no_workspace', + NO_PROGRESS: 'no_progress', + ADMITTED: 'admitted', +} as const + +export type DispatchScanResult = (typeof DISPATCH_SCAN_RESULTS)[keyof typeof DISPATCH_SCAN_RESULTS] + +function attachDispatchMetadata( + bullmqPayload: unknown, + record: WorkspaceDispatchJobRecord, + leaseId: string, + leaseExpiresAt: number +): BullMQJobData { + if ( + bullmqPayload && + typeof bullmqPayload === 'object' && + 'payload' in bullmqPayload && + 'metadata' in bullmqPayload + ) { + const data = bullmqPayload as BullMQJobData + return { + payload: data.payload, + metadata: { + ...(data.metadata ?? {}), + dispatchJobId: record.id, + dispatchWorkspaceId: record.workspaceId, + dispatchLeaseId: leaseId, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + } + } + + return { + payload: bullmqPayload, + metadata: { + ...record.metadata, + dispatchJobId: record.id, + dispatchWorkspaceId: record.workspaceId, + dispatchLeaseId: leaseId, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + } +} + +async function finalizeAdmittedJob( + record: WorkspaceDispatchJobRecord, + leaseId: string, + leaseExpiresAt: number +): Promise { + try { + await getBullMQQueueByName(record.queueName).add( + record.bullmqJobName, + attachDispatchMetadata(record.bullmqPayload, record, leaseId, leaseExpiresAt), + { + jobId: record.id, + attempts: record.maxAttempts, + priority: record.priority, + } + ) + + await markDispatchJobAdmitted(record.id, record.workspaceId, leaseId, leaseExpiresAt) + } catch (error) { + await releaseWorkspaceLease(record.workspaceId, leaseId).catch(() => undefined) + throw error + } +} + +export async function dispatchNextAdmissibleWorkspaceJob(): Promise { + const workspaceId = await popNextWorkspaceId() + if (!workspaceId) { + return DISPATCH_SCAN_RESULTS.NO_WORKSPACE + } + + const lockValue = `lock_${crypto.randomUUID()}` + try { + const lockKey = `workspace-dispatch:claim-lock:${workspaceId}` + const acquired = await acquireLock(lockKey, lockValue, WORKSPACE_CLAIM_LOCK_TTL_SECONDS) + if (!acquired) { + await requeueWorkspaceId(workspaceId) + return DISPATCH_SCAN_RESULTS.NO_PROGRESS + } + + const limit = await getWorkspaceConcurrencyLimit(workspaceId) + const leaseId = `lease_${crypto.randomUUID()}` + const claimResult = await claimWorkspaceJob(workspaceId, { + lanes: WORKSPACE_DISPATCH_LANES, + concurrencyLimit: limit, + leaseId, + now: Date.now(), + leaseTtlMs: LEASE_TTL_MS, + }) + + switch (claimResult.type) { + case WORKSPACE_DISPATCH_CLAIM_RESULTS.LIMIT_REACHED: + logger.debug('Workspace concurrency limit reached', { workspaceId, limit }) + await requeueWorkspaceId(workspaceId) + return DISPATCH_SCAN_RESULTS.NO_PROGRESS + case WORKSPACE_DISPATCH_CLAIM_RESULTS.DELAYED: + logger.debug('Workspace has only delayed jobs', { + workspaceId, + nextReadyAt: claimResult.nextReadyAt, + }) + return DISPATCH_SCAN_RESULTS.NO_PROGRESS + case WORKSPACE_DISPATCH_CLAIM_RESULTS.EMPTY: + await removeWorkspaceIfIdle(workspaceId, WORKSPACE_DISPATCH_LANES) + return DISPATCH_SCAN_RESULTS.NO_PROGRESS + case WORKSPACE_DISPATCH_CLAIM_RESULTS.ADMITTED: + logger.info('Admitting workspace job', { + workspaceId, + dispatchJobId: claimResult.record.id, + lane: claimResult.record.lane, + queueName: claimResult.record.queueName, + }) + await finalizeAdmittedJob( + claimResult.record, + claimResult.leaseId, + claimResult.leaseExpiresAt + ) + return DISPATCH_SCAN_RESULTS.ADMITTED + } + } catch (error) { + logger.error('Failed to dispatch workspace job', { workspaceId, error }) + await requeueWorkspaceId(workspaceId) + return DISPATCH_SCAN_RESULTS.NO_PROGRESS + } finally { + await releaseLock(`workspace-dispatch:claim-lock:${workspaceId}`, lockValue).catch( + () => undefined + ) + } +} diff --git a/apps/sim/lib/core/workspace-dispatch/reconciler.test.ts b/apps/sim/lib/core/workspace-dispatch/reconciler.test.ts new file mode 100644 index 00000000000..a61d0dc4d1b --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/reconciler.test.ts @@ -0,0 +1,225 @@ +/** + * @vitest-environment node + */ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { + mockGetBullMQQueueByName, + mockHasActiveWorkspace, + mockEnsureWorkspaceActive, + mockHasWorkspaceLease, + mockListDispatchJobsByStatuses, + mockMarkDispatchJobAdmitted, + mockMarkDispatchJobCompleted, + mockMarkDispatchJobFailed, + mockRefreshWorkspaceLease, + mockReleaseWorkspaceLease, + mockRemoveWorkspaceJobFromLane, + mockRestoreWorkspaceDispatchJob, + mockWakeWorkspaceDispatcher, +} = vi.hoisted(() => ({ + mockGetBullMQQueueByName: vi.fn(), + mockHasActiveWorkspace: vi.fn(), + mockEnsureWorkspaceActive: vi.fn(), + mockHasWorkspaceLease: vi.fn(), + mockListDispatchJobsByStatuses: vi.fn(), + mockMarkDispatchJobAdmitted: vi.fn(), + mockMarkDispatchJobCompleted: vi.fn(), + mockMarkDispatchJobFailed: vi.fn(), + mockRefreshWorkspaceLease: vi.fn(), + mockReleaseWorkspaceLease: vi.fn(), + mockRemoveWorkspaceJobFromLane: vi.fn(), + mockRestoreWorkspaceDispatchJob: vi.fn(), + mockWakeWorkspaceDispatcher: vi.fn(), +})) + +vi.mock('@sim/logger', () => ({ + createLogger: () => ({ + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }), +})) + +vi.mock('@/lib/core/bullmq', () => ({ + getBullMQQueueByName: mockGetBullMQQueueByName, +})) + +vi.mock('@/lib/core/workspace-dispatch/store', () => ({ + ensureWorkspaceActive: mockEnsureWorkspaceActive, + hasActiveWorkspace: mockHasActiveWorkspace, + hasWorkspaceLease: mockHasWorkspaceLease, + listDispatchJobsByStatuses: mockListDispatchJobsByStatuses, + markDispatchJobAdmitted: mockMarkDispatchJobAdmitted, + markDispatchJobCompleted: mockMarkDispatchJobCompleted, + markDispatchJobFailed: mockMarkDispatchJobFailed, + reconcileGlobalQueueDepth: vi.fn().mockResolvedValue(undefined), + refreshWorkspaceLease: mockRefreshWorkspaceLease, + releaseWorkspaceLease: mockReleaseWorkspaceLease, + removeWorkspaceJobFromLane: mockRemoveWorkspaceJobFromLane, + restoreWorkspaceDispatchJob: mockRestoreWorkspaceDispatchJob, +})) + +vi.mock('@/lib/core/workspace-dispatch/dispatcher', () => ({ + wakeWorkspaceDispatcher: mockWakeWorkspaceDispatcher, +})) + +import { reconcileWorkspaceDispatchState } from '@/lib/core/workspace-dispatch/reconciler' + +describe('workspace dispatch reconciler', () => { + beforeEach(() => { + vi.clearAllMocks() + mockHasActiveWorkspace.mockResolvedValue(true) + mockRemoveWorkspaceJobFromLane.mockResolvedValue(undefined) + }) + + it('marks dispatch job completed when BullMQ job is completed', async () => { + mockListDispatchJobsByStatuses.mockResolvedValue([ + { + id: 'dispatch-1', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: {}, + priority: 10, + status: 'running', + createdAt: 1, + lease: { + workspaceId: 'workspace-1', + leaseId: 'lease-1', + }, + }, + ]) + mockGetBullMQQueueByName.mockReturnValue({ + getJob: vi.fn().mockResolvedValue({ + getState: vi.fn().mockResolvedValue('completed'), + returnvalue: { ok: true }, + }), + }) + + await reconcileWorkspaceDispatchState() + + expect(mockMarkDispatchJobCompleted).toHaveBeenCalledWith('dispatch-1', { ok: true }) + expect(mockReleaseWorkspaceLease).toHaveBeenCalledWith('workspace-1', 'lease-1') + expect(mockWakeWorkspaceDispatcher).toHaveBeenCalled() + }) + + it('restores admitted jobs to waiting when lease and BullMQ job are gone', async () => { + mockListDispatchJobsByStatuses.mockResolvedValue([ + { + id: 'dispatch-2', + workspaceId: 'workspace-2', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: {}, + priority: 10, + status: 'admitted', + createdAt: 1, + admittedAt: 2, + lease: { + workspaceId: 'workspace-2', + leaseId: 'lease-2', + }, + }, + ]) + mockGetBullMQQueueByName.mockReturnValue({ + getJob: vi.fn().mockResolvedValue(null), + }) + mockHasWorkspaceLease.mockResolvedValue(false) + + await reconcileWorkspaceDispatchState() + + expect(mockRestoreWorkspaceDispatchJob).toHaveBeenCalledWith( + expect.objectContaining({ + id: 'dispatch-2', + status: 'waiting', + lease: undefined, + }) + ) + expect(mockWakeWorkspaceDispatcher).toHaveBeenCalled() + }) + + it('reacquires the lease for a live admitting BullMQ job', async () => { + mockListDispatchJobsByStatuses.mockResolvedValue([ + { + id: 'dispatch-3', + workspaceId: 'workspace-3', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: { + dispatchLeaseExpiresAt: 12345, + }, + priority: 10, + status: 'admitting', + createdAt: 1, + lease: { + workspaceId: 'workspace-3', + leaseId: 'lease-3', + }, + }, + ]) + mockGetBullMQQueueByName.mockReturnValue({ + getJob: vi.fn().mockResolvedValue({ + getState: vi.fn().mockResolvedValue('active'), + }), + }) + mockHasWorkspaceLease.mockResolvedValue(false) + + await reconcileWorkspaceDispatchState() + + expect(mockRefreshWorkspaceLease).toHaveBeenCalledWith('workspace-3', 'lease-3', 15 * 60 * 1000) + expect(mockMarkDispatchJobAdmitted).toHaveBeenCalledWith( + 'dispatch-3', + 'workspace-3', + 'lease-3', + 12345 + ) + expect(mockRemoveWorkspaceJobFromLane).toHaveBeenCalledWith( + 'workspace-3', + 'runtime', + 'dispatch-3' + ) + }) + + it('releases leaked lease and restores waiting when BullMQ job is gone but lease remains', async () => { + mockListDispatchJobsByStatuses.mockResolvedValue([ + { + id: 'dispatch-4', + workspaceId: 'workspace-4', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: {}, + priority: 10, + status: 'running', + createdAt: 1, + lease: { + workspaceId: 'workspace-4', + leaseId: 'lease-4', + }, + }, + ]) + mockGetBullMQQueueByName.mockReturnValue({ + getJob: vi.fn().mockResolvedValue(null), + }) + mockHasWorkspaceLease.mockResolvedValue(true) + + await reconcileWorkspaceDispatchState() + + expect(mockReleaseWorkspaceLease).toHaveBeenCalledWith('workspace-4', 'lease-4') + expect(mockRestoreWorkspaceDispatchJob).toHaveBeenCalledWith( + expect.objectContaining({ + id: 'dispatch-4', + status: 'waiting', + }) + ) + }) +}) diff --git a/apps/sim/lib/core/workspace-dispatch/reconciler.ts b/apps/sim/lib/core/workspace-dispatch/reconciler.ts new file mode 100644 index 00000000000..af67edb522a --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/reconciler.ts @@ -0,0 +1,196 @@ +import { createLogger } from '@sim/logger' +import { getBullMQQueueByName } from '@/lib/core/bullmq' +import { + ensureWorkspaceActive, + hasActiveWorkspace, + hasWorkspaceLease, + listDispatchJobsByStatuses, + markDispatchJobAdmitted, + markDispatchJobCompleted, + markDispatchJobFailed, + markDispatchJobRunning, + reconcileGlobalQueueDepth, + refreshWorkspaceLease, + releaseWorkspaceLease, + removeWorkspaceJobFromLane, + restoreWorkspaceDispatchJob, +} from '@/lib/core/workspace-dispatch/store' +import type { WorkspaceDispatchJobRecord } from '@/lib/core/workspace-dispatch/types' +import { wakeWorkspaceDispatcher } from './dispatcher' + +const logger = createLogger('WorkspaceDispatchReconciler') +const LEASE_TTL_MS = 15 * 60 * 1000 + +function resetToWaiting(record: WorkspaceDispatchJobRecord): WorkspaceDispatchJobRecord { + return { + ...record, + status: 'waiting', + admittedAt: undefined, + startedAt: undefined, + completedAt: undefined, + output: undefined, + error: undefined, + lease: undefined, + } +} + +async function reconcileTerminalBullMQState(record: WorkspaceDispatchJobRecord): Promise { + const queue = getBullMQQueueByName(record.queueName) + const job = await queue.getJob(record.id) + if (!job) { + return false + } + + const state = await job.getState() + if (state === 'completed') { + await markDispatchJobCompleted(record.id, job.returnvalue) + if (record.lease) { + await releaseWorkspaceLease(record.lease.workspaceId, record.lease.leaseId) + } + return true + } + + if (state === 'failed' && job.attemptsMade >= (job.opts.attempts ?? 1)) { + await markDispatchJobFailed(record.id, job.failedReason || 'Job failed') + if (record.lease) { + await releaseWorkspaceLease(record.lease.workspaceId, record.lease.leaseId) + } + return true + } + + return false +} + +async function reconcileStrandedDispatchJob(record: WorkspaceDispatchJobRecord): Promise { + if (!record.lease && record.status !== 'waiting') { + await restoreWorkspaceDispatchJob(resetToWaiting(record)) + return true + } + + if (!record.lease) { + return false + } + + const hasLease = await hasWorkspaceLease(record.lease.workspaceId, record.lease.leaseId) + const queue = getBullMQQueueByName(record.queueName) + const job = await queue.getJob(record.id) + if (hasLease) { + if (!job) { + await releaseWorkspaceLease(record.lease.workspaceId, record.lease.leaseId) + await restoreWorkspaceDispatchJob(resetToWaiting(record)) + return true + } + + return false + } + + if (job) { + if (record.status === 'admitting') { + await refreshWorkspaceLease(record.lease.workspaceId, record.lease.leaseId, LEASE_TTL_MS) + await markDispatchJobAdmitted( + record.id, + record.lease.workspaceId, + record.lease.leaseId, + (record.metadata as { dispatchLeaseExpiresAt?: number }).dispatchLeaseExpiresAt ?? + Date.now() + ) + await removeWorkspaceJobFromLane(record.workspaceId, record.lane, record.id).catch( + () => undefined + ) + return true + } + await refreshWorkspaceLease(record.lease.workspaceId, record.lease.leaseId, LEASE_TTL_MS) + if (record.status === 'admitted') { + await markDispatchJobRunning(record.id) + return true + } + return false + } + + await restoreWorkspaceDispatchJob(resetToWaiting(record)) + return true +} + +async function reconcileTerminalDispatchLease( + record: WorkspaceDispatchJobRecord +): Promise { + if ((record.status !== 'completed' && record.status !== 'failed') || !record.lease) { + return false + } + + const hasLease = await hasWorkspaceLease(record.lease.workspaceId, record.lease.leaseId) + if (!hasLease) { + return false + } + + await releaseWorkspaceLease(record.lease.workspaceId, record.lease.leaseId) + return true +} + +async function reconcileWaitingWorkspaceTracking( + waitingJobs: WorkspaceDispatchJobRecord[] +): Promise { + let changed = false + const earliestByWorkspace = new Map() + + for (const record of waitingJobs) { + const readyAt = record.createdAt + (record.delayMs ?? 0) + const current = earliestByWorkspace.get(record.workspaceId) + if (current === undefined || readyAt < current) { + earliestByWorkspace.set(record.workspaceId, readyAt) + } + } + + for (const [workspaceId, nextReadyAt] of earliestByWorkspace.entries()) { + const active = await hasActiveWorkspace(workspaceId) + if (!active) { + await ensureWorkspaceActive(workspaceId, nextReadyAt) + changed = true + } + } + + return changed +} + +export async function reconcileWorkspaceDispatchState(): Promise { + const activeJobs = await listDispatchJobsByStatuses(['admitting', 'admitted', 'running']) + const waitingJobs = await listDispatchJobsByStatuses(['waiting']) + const terminalJobs = await listDispatchJobsByStatuses(['completed', 'failed']) + let changed = false + + for (const record of activeJobs) { + const terminal = await reconcileTerminalBullMQState(record) + if (terminal) { + changed = true + continue + } + + const restored = await reconcileStrandedDispatchJob(record) + if (restored) { + changed = true + } + } + + if (await reconcileWaitingWorkspaceTracking(waitingJobs)) { + changed = true + } + + for (const record of terminalJobs) { + if (await reconcileTerminalDispatchLease(record)) { + changed = true + } + } + + await reconcileGlobalQueueDepth().catch((error) => { + logger.error('Failed to reconcile global queue depth', { error }) + }) + + if (changed) { + logger.info('Workspace dispatch reconciliation updated state', { + activeJobsInspected: activeJobs.length, + waitingJobsInspected: waitingJobs.length, + terminalJobsInspected: terminalJobs.length, + }) + await wakeWorkspaceDispatcher() + } +} diff --git a/apps/sim/lib/core/workspace-dispatch/redis-store.ts b/apps/sim/lib/core/workspace-dispatch/redis-store.ts new file mode 100644 index 00000000000..82ac3202803 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/redis-store.ts @@ -0,0 +1,574 @@ +import { createLogger } from '@sim/logger' +import type Redis from 'ioredis' +import type { WorkspaceDispatchStorageAdapter } from '@/lib/core/workspace-dispatch/adapter' +import { + WORKSPACE_DISPATCH_CLAIM_RESULTS, + type WorkspaceDispatchClaimResult, + type WorkspaceDispatchEnqueueInput, + type WorkspaceDispatchJobRecord, + type WorkspaceDispatchLane, +} from '@/lib/core/workspace-dispatch/types' + +const logger = createLogger('WorkspaceDispatchRedisStore') + +const DISPATCH_PREFIX = 'workspace-dispatch:v1' +const JOB_TTL_SECONDS = 48 * 60 * 60 +const SEQUENCE_KEY = `${DISPATCH_PREFIX}:sequence` +const ACTIVE_WORKSPACES_KEY = `${DISPATCH_PREFIX}:workspaces` +const GLOBAL_DEPTH_KEY = `${DISPATCH_PREFIX}:global-depth` +const CLAIM_JOB_SCRIPT = ` +local workspaceId = ARGV[1] +local now = tonumber(ARGV[2]) +local concurrencyLimit = tonumber(ARGV[3]) +local leaseId = ARGV[4] +local leaseExpiresAt = tonumber(ARGV[5]) +local lanes = cjson.decode(ARGV[6]) +local sequenceKey = ARGV[7] +local activeWorkspacesKey = ARGV[8] +local jobPrefix = ARGV[9] +local workspacePrefix = ARGV[10] +local jobTtlSeconds = tonumber(ARGV[11]) + +local function laneKey(lane) + return workspacePrefix .. workspaceId .. ':lane:' .. lane +end + +local function leaseKey() + return workspacePrefix .. workspaceId .. ':leases' +end + +local function workspaceHasPending() + local minReadyAt = nil + local hasPending = false + + for _, lane in ipairs(lanes) do + local ids = redis.call('ZRANGE', laneKey(lane), 0, 0) + if #ids > 0 then + local raw = redis.call('GET', jobPrefix .. ids[1]) + if raw then + hasPending = true + local record = cjson.decode(raw) + local readyAt = (record.createdAt or 0) + (record.delayMs or 0) + if (minReadyAt == nil) or (readyAt < minReadyAt) then + minReadyAt = readyAt + end + else + redis.call('ZREM', laneKey(lane), ids[1]) + end + end + end + + return hasPending, minReadyAt +end + +redis.call('ZREMRANGEBYSCORE', leaseKey(), 0, now) +local activeLeaseCount = redis.call('ZCARD', leaseKey()) +if activeLeaseCount >= concurrencyLimit then + return cjson.encode({ type = 'limit_reached' }) +end + +local selectedId = nil +local selectedLane = nil +local selectedRecord = nil +local delayedNextReadyAt = nil + +local maxScanPerLane = 20 + +for _, lane in ipairs(lanes) do + local ids = redis.call('ZRANGE', laneKey(lane), 0, maxScanPerLane - 1) + for _, candidateId in ipairs(ids) do + local raw = redis.call('GET', jobPrefix .. candidateId) + if raw then + local record = cjson.decode(raw) + local readyAt = (record.createdAt or 0) + (record.delayMs or 0) + if readyAt <= now then + selectedId = candidateId + selectedLane = lane + selectedRecord = record + break + end + + if (delayedNextReadyAt == nil) or (readyAt < delayedNextReadyAt) then + delayedNextReadyAt = readyAt + end + else + redis.call('ZREM', laneKey(lane), candidateId) + end + end + + if selectedRecord then + break + end +end + +if selectedRecord == nil then + local hasPending, minReadyAt = workspaceHasPending() + if not hasPending then + return cjson.encode({ type = 'empty' }) + end + + local sequence = redis.call('INCR', sequenceKey) + local score = sequence + if minReadyAt ~= nil and minReadyAt > now then + score = minReadyAt * 1000000 + sequence + end + redis.call('ZADD', activeWorkspacesKey, score, workspaceId) + + return cjson.encode({ + type = 'delayed', + nextReadyAt = delayedNextReadyAt or minReadyAt or now + }) +end + +redis.call('ZADD', leaseKey(), leaseExpiresAt, leaseId) +selectedRecord.status = 'admitting' +selectedRecord.lease = { + workspaceId = workspaceId, + leaseId = leaseId +} +if selectedRecord.metadata == nil then + selectedRecord.metadata = {} +end +selectedRecord.metadata.dispatchLeaseExpiresAt = leaseExpiresAt + +redis.call('SET', jobPrefix .. selectedId, cjson.encode(selectedRecord), 'EX', jobTtlSeconds) +redis.call('ZREM', laneKey(selectedLane), selectedId) + +local hasPending, minReadyAt = workspaceHasPending() +if hasPending then + local sequence = redis.call('INCR', sequenceKey) + local score = sequence + if minReadyAt ~= nil and minReadyAt > now then + score = minReadyAt * 1000000 + sequence + end + redis.call('ZADD', activeWorkspacesKey, score, workspaceId) +end + +return cjson.encode({ + type = 'admitted', + record = selectedRecord, + leaseId = leaseId, + leaseExpiresAt = leaseExpiresAt +}) +` + +function jobKey(jobId: string): string { + return `${DISPATCH_PREFIX}:job:${jobId}` +} + +function workspaceLaneKey(workspaceId: string, lane: WorkspaceDispatchLane): string { + return `${DISPATCH_PREFIX}:workspace:${workspaceId}:lane:${lane}` +} + +function workspaceLeaseKey(workspaceId: string): string { + return `${DISPATCH_PREFIX}:workspace:${workspaceId}:leases` +} + +function createPriorityScore(priority: number, sequence: number): number { + return priority * 1_000_000_000_000 + sequence +} + +export class RedisWorkspaceDispatchStorage implements WorkspaceDispatchStorageAdapter { + constructor(private redis: Redis) {} + + private async nextSequence(): Promise { + return this.redis.incr(SEQUENCE_KEY) + } + + async saveDispatchJob(record: WorkspaceDispatchJobRecord): Promise { + await this.redis.set(jobKey(record.id), JSON.stringify(record), 'EX', JOB_TTL_SECONDS) + } + + async getDispatchJobRecord(jobId: string): Promise { + const raw = await this.redis.get(jobKey(jobId)) + if (!raw) { + return null + } + + try { + return JSON.parse(raw) as WorkspaceDispatchJobRecord + } catch (error) { + logger.warn('Corrupted dispatch job record, deleting', { jobId, error }) + await this.redis.del(jobKey(jobId)) + return null + } + } + + async listDispatchJobsByStatuses( + statuses: readonly WorkspaceDispatchJobRecord['status'][] + ): Promise { + let cursor = '0' + const jobs: WorkspaceDispatchJobRecord[] = [] + + do { + const [nextCursor, keys] = await this.redis.scan( + cursor, + 'MATCH', + `${DISPATCH_PREFIX}:job:*`, + 'COUNT', + 100 + ) + cursor = nextCursor + + if (keys.length === 0) { + continue + } + + const values = await this.redis.mget(...keys) + for (const value of values) { + if (!value) { + continue + } + try { + const record = JSON.parse(value) as WorkspaceDispatchJobRecord + if (statuses.includes(record.status)) { + jobs.push(record) + } + } catch { + // Best effort during reconciliation scans. + } + } + } while (cursor !== '0') + + return jobs + } + + async updateDispatchJobRecord( + jobId: string, + updater: (record: WorkspaceDispatchJobRecord) => WorkspaceDispatchJobRecord + ): Promise { + const current = await this.getDispatchJobRecord(jobId) + if (!current) { + return null + } + + const updated = updater(current) + await this.saveDispatchJob(updated) + return updated + } + + async enqueueWorkspaceDispatchJob( + input: WorkspaceDispatchEnqueueInput + ): Promise { + const id = input.id ?? `dispatch_${crypto.randomUUID().replace(/-/g, '').slice(0, 20)}` + const createdAt = Date.now() + const sequence = await this.nextSequence() + + const record: WorkspaceDispatchJobRecord = { + id, + workspaceId: input.workspaceId, + lane: input.lane, + queueName: input.queueName, + bullmqJobName: input.bullmqJobName, + bullmqPayload: input.bullmqPayload, + metadata: input.metadata, + priority: input.priority ?? 100, + maxAttempts: input.maxAttempts, + delayMs: input.delayMs, + status: 'waiting', + createdAt, + } + + const score = createPriorityScore(record.priority, sequence) + const pipeline = this.redis.pipeline() + pipeline.set(jobKey(id), JSON.stringify(record), 'EX', JOB_TTL_SECONDS) + pipeline.zadd(workspaceLaneKey(record.workspaceId, record.lane), score, id) + pipeline.zadd(ACTIVE_WORKSPACES_KEY, 'NX', sequence, record.workspaceId) + pipeline.incr(GLOBAL_DEPTH_KEY) + await pipeline.exec() + + return record + } + + async restoreWorkspaceDispatchJob(record: WorkspaceDispatchJobRecord): Promise { + const sequence = await this.nextSequence() + const score = createPriorityScore(record.priority, sequence) + const pipeline = this.redis.pipeline() + pipeline.set(jobKey(record.id), JSON.stringify(record), 'EX', JOB_TTL_SECONDS) + pipeline.zadd(workspaceLaneKey(record.workspaceId, record.lane), score, record.id) + pipeline.zadd(ACTIVE_WORKSPACES_KEY, 'NX', sequence, record.workspaceId) + await pipeline.exec() + } + + async claimWorkspaceJob( + workspaceId: string, + options: { + lanes: readonly WorkspaceDispatchLane[] + concurrencyLimit: number + leaseId: string + now: number + leaseTtlMs: number + } + ): Promise { + const raw = await this.redis.eval( + CLAIM_JOB_SCRIPT, + 0, + workspaceId, + String(options.now), + String(options.concurrencyLimit), + options.leaseId, + String(options.now + options.leaseTtlMs), + JSON.stringify(options.lanes), + SEQUENCE_KEY, + ACTIVE_WORKSPACES_KEY, + `${DISPATCH_PREFIX}:job:`, + `${DISPATCH_PREFIX}:workspace:`, + String(JOB_TTL_SECONDS) + ) + + const parsed = JSON.parse(String(raw)) as WorkspaceDispatchClaimResult + switch (parsed.type) { + case WORKSPACE_DISPATCH_CLAIM_RESULTS.ADMITTED: + case WORKSPACE_DISPATCH_CLAIM_RESULTS.DELAYED: + case WORKSPACE_DISPATCH_CLAIM_RESULTS.LIMIT_REACHED: + case WORKSPACE_DISPATCH_CLAIM_RESULTS.EMPTY: + return parsed + default: + throw new Error( + `Unknown dispatch claim result: ${String((parsed as { type?: string }).type)}` + ) + } + } + + async getWorkspaceQueueDepth( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + if (lanes.length === 0) return 0 + const pipeline = this.redis.pipeline() + for (const lane of lanes) { + pipeline.zcard(workspaceLaneKey(workspaceId, lane)) + } + const results = await pipeline.exec() + let depth = 0 + for (const result of results ?? []) { + if (result && !result[0]) { + depth += (result[1] as number) ?? 0 + } + } + return depth + } + + async getGlobalQueueDepth(): Promise { + const count = await this.redis.get(GLOBAL_DEPTH_KEY) + return count ? Math.max(0, Number.parseInt(count, 10)) : 0 + } + + async reconcileGlobalQueueDepth(): Promise { + const allJobs = await this.listDispatchJobsByStatuses([ + 'waiting', + 'admitting', + 'admitted', + 'running', + ]) + await this.redis.set(GLOBAL_DEPTH_KEY, allJobs.length) + } + + async popNextWorkspaceId(): Promise { + const result = await this.redis.zpopmin(ACTIVE_WORKSPACES_KEY) + if (!result || result.length === 0) { + return null + } + + return result[0] ?? null + } + + async getQueuedWorkspaceCount(): Promise { + return this.redis.zcard(ACTIVE_WORKSPACES_KEY) + } + + async hasActiveWorkspace(workspaceId: string): Promise { + return (await this.redis.zscore(ACTIVE_WORKSPACES_KEY, workspaceId)) !== null + } + + async ensureWorkspaceActive(workspaceId: string, readyAt?: number): Promise { + const sequence = await this.nextSequence() + const score = readyAt && readyAt > Date.now() ? readyAt * 1_000_000 + sequence : sequence + await this.redis.zadd(ACTIVE_WORKSPACES_KEY, 'NX', score, workspaceId) + } + + async requeueWorkspaceId(workspaceId: string): Promise { + const sequence = await this.nextSequence() + await this.redis.zadd(ACTIVE_WORKSPACES_KEY, sequence, workspaceId) + } + + async workspaceHasPendingJobs( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + for (const lane of lanes) { + const count = await this.redis.zcard(workspaceLaneKey(workspaceId, lane)) + if (count > 0) { + return true + } + } + + return false + } + + async getNextWorkspaceJob( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + for (const lane of lanes) { + const ids = await this.redis.zrange(workspaceLaneKey(workspaceId, lane), 0, 0) + if (ids.length === 0) { + continue + } + + const record = await this.getDispatchJobRecord(ids[0]) + if (!record) { + await this.redis.zrem(workspaceLaneKey(workspaceId, lane), ids[0]) + continue + } + + return record + } + + return null + } + + async removeWorkspaceJobFromLane( + workspaceId: string, + lane: WorkspaceDispatchLane, + jobId: string + ): Promise { + await this.redis.zrem(workspaceLaneKey(workspaceId, lane), jobId) + } + + async cleanupExpiredWorkspaceLeases(workspaceId: string): Promise { + await this.redis.zremrangebyscore(workspaceLeaseKey(workspaceId), 0, Date.now()) + } + + async countActiveWorkspaceLeases(workspaceId: string): Promise { + await this.cleanupExpiredWorkspaceLeases(workspaceId) + return this.redis.zcard(workspaceLeaseKey(workspaceId)) + } + + async hasWorkspaceLease(workspaceId: string, leaseId: string): Promise { + await this.cleanupExpiredWorkspaceLeases(workspaceId) + return (await this.redis.zscore(workspaceLeaseKey(workspaceId), leaseId)) !== null + } + + async createWorkspaceLease(workspaceId: string, leaseId: string, ttlMs: number): Promise { + const expiresAt = Date.now() + ttlMs + await this.redis.zadd(workspaceLeaseKey(workspaceId), expiresAt, leaseId) + return expiresAt + } + + async refreshWorkspaceLease( + workspaceId: string, + leaseId: string, + ttlMs: number + ): Promise { + return this.createWorkspaceLease(workspaceId, leaseId, ttlMs) + } + + async releaseWorkspaceLease(workspaceId: string, leaseId: string): Promise { + await this.redis.zrem(workspaceLeaseKey(workspaceId), leaseId) + } + + async removeWorkspaceIfIdle( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + const hasPendingJobs = await this.workspaceHasPendingJobs(workspaceId, lanes) + if (!hasPendingJobs) { + await this.redis.zrem(ACTIVE_WORKSPACES_KEY, workspaceId) + } + } + + async markDispatchJobAdmitted( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number + ): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'admitted', + admittedAt: Date.now(), + lease: { + workspaceId, + leaseId, + }, + metadata: { + ...record.metadata, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + })) + } + + async markDispatchJobAdmitting( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number + ): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'admitting', + lease: { + workspaceId, + leaseId, + }, + metadata: { + ...record.metadata, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + })) + } + + async markDispatchJobRunning(jobId: string): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'running', + startedAt: record.startedAt ?? Date.now(), + })) + } + + async markDispatchJobCompleted(jobId: string, output: unknown): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'completed', + completedAt: Date.now(), + output, + })) + await this.redis.decr(GLOBAL_DEPTH_KEY).catch(() => undefined) + } + + async markDispatchJobFailed(jobId: string, error: string): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'failed', + completedAt: Date.now(), + error, + })) + await this.redis.decr(GLOBAL_DEPTH_KEY).catch(() => undefined) + } + + async clear(): Promise { + let cursor = '0' + const keys: string[] = [] + + do { + const [nextCursor, foundKeys] = await this.redis.scan( + cursor, + 'MATCH', + `${DISPATCH_PREFIX}:*`, + 'COUNT', + 100 + ) + cursor = nextCursor + keys.push(...foundKeys) + } while (cursor !== '0') + + if (keys.length > 0) { + await this.redis.del(...keys) + } + } + + dispose(): void { + logger.info('Redis workspace dispatch storage disposed') + } +} diff --git a/apps/sim/lib/core/workspace-dispatch/status.test.ts b/apps/sim/lib/core/workspace-dispatch/status.test.ts new file mode 100644 index 00000000000..e72e210b18d --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/status.test.ts @@ -0,0 +1,102 @@ +/** + * @vitest-environment node + */ +import { describe, expect, it } from 'vitest' +import { presentDispatchOrJobStatus } from '@/lib/core/workspace-dispatch/status' + +describe('workspace dispatch status presentation', () => { + it('presents waiting dispatch jobs with queue metadata', () => { + const result = presentDispatchOrJobStatus( + { + id: 'dispatch-1', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: { workflowId: 'workflow-1' }, + priority: 10, + status: 'waiting', + createdAt: 1000, + }, + null + ) + + expect(result).toEqual({ + status: 'waiting', + metadata: { + createdAt: new Date(1000), + admittedAt: undefined, + startedAt: undefined, + completedAt: undefined, + queueName: 'workflow-execution', + lane: 'runtime', + workspaceId: 'workspace-1', + }, + estimatedDuration: 300000, + }) + }) + + it('presents admitting dispatch jobs distinctly', () => { + const result = presentDispatchOrJobStatus( + { + id: 'dispatch-1a', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: { workflowId: 'workflow-1' }, + priority: 10, + status: 'admitting', + createdAt: 1000, + }, + null + ) + + expect(result.status).toBe('admitting') + expect(result.estimatedDuration).toBe(300000) + }) + + it('presents completed dispatch jobs with output and duration', () => { + const result = presentDispatchOrJobStatus( + { + id: 'dispatch-2', + workspaceId: 'workspace-1', + lane: 'interactive', + queueName: 'workflow-execution', + bullmqJobName: 'direct-workflow-execution', + bullmqPayload: {}, + metadata: { workflowId: 'workflow-1' }, + priority: 1, + status: 'completed', + createdAt: 1000, + admittedAt: 1500, + startedAt: 2000, + completedAt: 7000, + output: { success: true }, + }, + null + ) + + expect(result.status).toBe('completed') + expect(result.output).toEqual({ success: true }) + expect(result.metadata.duration).toBe(5000) + }) + + it('falls back to legacy job status when no dispatch record exists', () => { + const result = presentDispatchOrJobStatus(null, { + id: 'job-1', + type: 'workflow-execution', + payload: {}, + status: 'pending', + createdAt: new Date(1000), + attempts: 0, + maxAttempts: 3, + metadata: {}, + }) + + expect(result.status).toBe('queued') + expect(result.estimatedDuration).toBe(300000) + }) +}) diff --git a/apps/sim/lib/core/workspace-dispatch/status.ts b/apps/sim/lib/core/workspace-dispatch/status.ts new file mode 100644 index 00000000000..fc5d934434c --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/status.ts @@ -0,0 +1,110 @@ +import type { Job, JobStatus } from '@/lib/core/async-jobs/types' +import type { WorkspaceDispatchJobRecord } from '@/lib/core/workspace-dispatch/types' + +export type DispatchPresentedStatus = + | 'waiting' + | 'admitting' + | 'admitted' + | 'running' + | 'completed' + | 'failed' + | 'queued' + | JobStatus + +export interface DispatchStatusPresentation { + status: DispatchPresentedStatus + metadata: { + createdAt?: Date + admittedAt?: Date + startedAt?: Date + completedAt?: Date + queueName?: string + lane?: string + workspaceId?: string + duration?: number + } + output?: unknown + error?: string + estimatedDuration?: number +} + +export function presentDispatchOrJobStatus( + dispatchJob: WorkspaceDispatchJobRecord | null, + job: Job | null +): DispatchStatusPresentation { + if (dispatchJob) { + const startedAt = dispatchJob.startedAt ? new Date(dispatchJob.startedAt) : undefined + const completedAt = dispatchJob.completedAt ? new Date(dispatchJob.completedAt) : undefined + + const response: DispatchStatusPresentation = { + status: dispatchJob.status, + metadata: { + createdAt: new Date(dispatchJob.createdAt), + admittedAt: dispatchJob.admittedAt ? new Date(dispatchJob.admittedAt) : undefined, + startedAt, + completedAt, + queueName: dispatchJob.queueName, + lane: dispatchJob.lane, + workspaceId: dispatchJob.workspaceId, + }, + } + + if (startedAt && completedAt) { + response.metadata.duration = completedAt.getTime() - startedAt.getTime() + } + + if (dispatchJob.status === 'completed') { + response.output = dispatchJob.output + } + + if (dispatchJob.status === 'failed') { + response.error = dispatchJob.error + } + + if ( + dispatchJob.status === 'waiting' || + dispatchJob.status === 'admitting' || + dispatchJob.status === 'admitted' || + dispatchJob.status === 'running' + ) { + response.estimatedDuration = 300000 + } + + return response + } + + if (!job) { + return { + status: 'queued', + metadata: {}, + } + } + + const mappedStatus = job.status === 'pending' ? 'queued' : job.status + const response: DispatchStatusPresentation = { + status: mappedStatus, + metadata: { + createdAt: job.createdAt, + startedAt: job.startedAt, + completedAt: job.completedAt, + }, + } + + if (job.startedAt && job.completedAt) { + response.metadata.duration = job.completedAt.getTime() - job.startedAt.getTime() + } + + if (job.status === 'completed') { + response.output = job.output + } + + if (job.status === 'failed') { + response.error = job.error + } + + if (job.status === 'processing' || job.status === 'pending') { + response.estimatedDuration = 300000 + } + + return response +} diff --git a/apps/sim/lib/core/workspace-dispatch/store.ts b/apps/sim/lib/core/workspace-dispatch/store.ts new file mode 100644 index 00000000000..9bc7f0bebe9 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/store.ts @@ -0,0 +1,193 @@ +import type { WorkspaceDispatchStorageAdapter } from '@/lib/core/workspace-dispatch/adapter' +import { + setWorkspaceDispatchStorageAdapter as _setAdapter, + createWorkspaceDispatchStorageAdapter, +} from '@/lib/core/workspace-dispatch/factory' +import type { + WorkspaceDispatchClaimResult, + WorkspaceDispatchEnqueueInput, + WorkspaceDispatchJobRecord, + WorkspaceDispatchLane, +} from '@/lib/core/workspace-dispatch/types' + +function getAdapter() { + return createWorkspaceDispatchStorageAdapter() +} + +export function setWorkspaceDispatchStorageAdapter(adapter: WorkspaceDispatchStorageAdapter): void { + _setAdapter(adapter) +} + +export async function saveDispatchJob(record: WorkspaceDispatchJobRecord): Promise { + return getAdapter().saveDispatchJob(record) +} + +export async function getDispatchJobRecord( + jobId: string +): Promise { + return getAdapter().getDispatchJobRecord(jobId) +} + +export async function listDispatchJobsByStatuses( + statuses: readonly WorkspaceDispatchJobRecord['status'][] +): Promise { + return getAdapter().listDispatchJobsByStatuses(statuses) +} + +export async function updateDispatchJobRecord( + jobId: string, + updater: (record: WorkspaceDispatchJobRecord) => WorkspaceDispatchJobRecord +): Promise { + return getAdapter().updateDispatchJobRecord(jobId, updater) +} + +export async function enqueueWorkspaceDispatchJob( + input: WorkspaceDispatchEnqueueInput +): Promise { + return getAdapter().enqueueWorkspaceDispatchJob(input) +} + +export async function restoreWorkspaceDispatchJob( + record: WorkspaceDispatchJobRecord +): Promise { + return getAdapter().restoreWorkspaceDispatchJob(record) +} + +export async function claimWorkspaceJob( + workspaceId: string, + options: { + lanes: readonly WorkspaceDispatchLane[] + concurrencyLimit: number + leaseId: string + now: number + leaseTtlMs: number + } +): Promise { + return getAdapter().claimWorkspaceJob(workspaceId, options) +} + +export async function getWorkspaceQueueDepth( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] +): Promise { + return getAdapter().getWorkspaceQueueDepth(workspaceId, lanes) +} + +export async function getGlobalQueueDepth(): Promise { + return getAdapter().getGlobalQueueDepth() +} + +export async function reconcileGlobalQueueDepth(): Promise { + return getAdapter().reconcileGlobalQueueDepth() +} + +export async function popNextWorkspaceId(): Promise { + return getAdapter().popNextWorkspaceId() +} + +export async function getQueuedWorkspaceCount(): Promise { + return getAdapter().getQueuedWorkspaceCount() +} + +export async function hasActiveWorkspace(workspaceId: string): Promise { + return getAdapter().hasActiveWorkspace(workspaceId) +} + +export async function ensureWorkspaceActive(workspaceId: string, readyAt?: number): Promise { + return getAdapter().ensureWorkspaceActive(workspaceId, readyAt) +} + +export async function requeueWorkspaceId(workspaceId: string): Promise { + return getAdapter().requeueWorkspaceId(workspaceId) +} + +export async function workspaceHasPendingJobs( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] +): Promise { + return getAdapter().workspaceHasPendingJobs(workspaceId, lanes) +} + +export async function getNextWorkspaceJob( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] +): Promise { + return getAdapter().getNextWorkspaceJob(workspaceId, lanes) +} + +export async function removeWorkspaceJobFromLane( + workspaceId: string, + lane: WorkspaceDispatchLane, + jobId: string +): Promise { + return getAdapter().removeWorkspaceJobFromLane(workspaceId, lane, jobId) +} + +export async function cleanupExpiredWorkspaceLeases(workspaceId: string): Promise { + return getAdapter().cleanupExpiredWorkspaceLeases(workspaceId) +} + +export async function countActiveWorkspaceLeases(workspaceId: string): Promise { + return getAdapter().countActiveWorkspaceLeases(workspaceId) +} + +export async function hasWorkspaceLease(workspaceId: string, leaseId: string): Promise { + return getAdapter().hasWorkspaceLease(workspaceId, leaseId) +} + +export async function createWorkspaceLease( + workspaceId: string, + leaseId: string, + ttlMs: number +): Promise { + return getAdapter().createWorkspaceLease(workspaceId, leaseId, ttlMs) +} + +export async function refreshWorkspaceLease( + workspaceId: string, + leaseId: string, + ttlMs: number +): Promise { + return getAdapter().refreshWorkspaceLease(workspaceId, leaseId, ttlMs) +} + +export async function releaseWorkspaceLease(workspaceId: string, leaseId: string): Promise { + return getAdapter().releaseWorkspaceLease(workspaceId, leaseId) +} + +export async function removeWorkspaceIfIdle( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] +): Promise { + return getAdapter().removeWorkspaceIfIdle(workspaceId, lanes) +} + +export async function markDispatchJobAdmitted( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number +): Promise { + return getAdapter().markDispatchJobAdmitted(jobId, workspaceId, leaseId, leaseExpiresAt) +} + +export async function markDispatchJobAdmitting( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number +): Promise { + return getAdapter().markDispatchJobAdmitting(jobId, workspaceId, leaseId, leaseExpiresAt) +} + +export async function markDispatchJobRunning(jobId: string): Promise { + return getAdapter().markDispatchJobRunning(jobId) +} + +export async function markDispatchJobCompleted(jobId: string, output: unknown): Promise { + return getAdapter().markDispatchJobCompleted(jobId, output) +} + +export async function markDispatchJobFailed(jobId: string, error: string): Promise { + return getAdapter().markDispatchJobFailed(jobId, error) +} diff --git a/apps/sim/lib/core/workspace-dispatch/types.ts b/apps/sim/lib/core/workspace-dispatch/types.ts new file mode 100644 index 00000000000..87218956644 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/types.ts @@ -0,0 +1,107 @@ +import type { JobMetadata, JobType } from '@/lib/core/async-jobs/types' +import type { + KNOWLEDGE_CONNECTOR_SYNC_QUEUE, + KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE, + MOTHERSHIP_JOB_EXECUTION_QUEUE, + WORKSPACE_NOTIFICATION_DELIVERY_QUEUE, +} from '@/lib/core/bullmq/queues' + +export const WORKSPACE_DISPATCH_LANES = [ + 'interactive', + 'runtime', + 'knowledge', + 'lightweight', +] as const + +export type WorkspaceDispatchLane = (typeof WORKSPACE_DISPATCH_LANES)[number] + +export type WorkspaceDispatchQueueName = + | JobType + | typeof KNOWLEDGE_CONNECTOR_SYNC_QUEUE + | typeof KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE + | typeof MOTHERSHIP_JOB_EXECUTION_QUEUE + | typeof WORKSPACE_NOTIFICATION_DELIVERY_QUEUE + +export const WORKSPACE_DISPATCH_STATUSES = { + WAITING: 'waiting', + ADMITTING: 'admitting', + ADMITTED: 'admitted', + RUNNING: 'running', + COMPLETED: 'completed', + FAILED: 'failed', +} as const + +export type WorkspaceDispatchStatus = + (typeof WORKSPACE_DISPATCH_STATUSES)[keyof typeof WORKSPACE_DISPATCH_STATUSES] + +export interface WorkspaceDispatchLeaseInfo { + workspaceId: string + leaseId: string +} + +export interface WorkspaceDispatchJobContext { + dispatchJobId: string + workspaceId: string + lane: WorkspaceDispatchLane + queueName: WorkspaceDispatchQueueName + bullmqJobName: string + priority: number +} + +export interface WorkspaceDispatchJobRecord { + id: string + workspaceId: string + lane: WorkspaceDispatchLane + queueName: WorkspaceDispatchQueueName + bullmqJobName: string + bullmqPayload: unknown + metadata: JobMetadata + priority: number + maxAttempts?: number + delayMs?: number + status: WorkspaceDispatchStatus + createdAt: number + admittedAt?: number + startedAt?: number + completedAt?: number + output?: unknown + error?: string + lease?: WorkspaceDispatchLeaseInfo +} + +export interface WorkspaceDispatchEnqueueInput { + id?: string + workspaceId: string + lane: WorkspaceDispatchLane + queueName: WorkspaceDispatchQueueName + bullmqJobName: string + bullmqPayload: unknown + metadata: JobMetadata + priority?: number + maxAttempts?: number + delayMs?: number +} + +export const WORKSPACE_DISPATCH_CLAIM_RESULTS = { + ADMITTED: 'admitted', + LIMIT_REACHED: 'limit_reached', + DELAYED: 'delayed', + EMPTY: 'empty', +} as const + +export type WorkspaceDispatchClaimResult = + | { + type: typeof WORKSPACE_DISPATCH_CLAIM_RESULTS.ADMITTED + record: WorkspaceDispatchJobRecord + leaseId: string + leaseExpiresAt: number + } + | { + type: + | typeof WORKSPACE_DISPATCH_CLAIM_RESULTS.LIMIT_REACHED + | typeof WORKSPACE_DISPATCH_CLAIM_RESULTS.EMPTY + } + | { + type: typeof WORKSPACE_DISPATCH_CLAIM_RESULTS.DELAYED + nextReadyAt: number + } diff --git a/apps/sim/lib/core/workspace-dispatch/worker.test.ts b/apps/sim/lib/core/workspace-dispatch/worker.test.ts new file mode 100644 index 00000000000..1833b128cdd --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/worker.test.ts @@ -0,0 +1,98 @@ +/** + * @vitest-environment node + */ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { + mockMarkDispatchJobCompleted, + mockMarkDispatchJobFailed, + mockMarkDispatchJobRunning, + mockReleaseWorkspaceLease, + mockWakeWorkspaceDispatcher, +} = vi.hoisted(() => ({ + mockMarkDispatchJobCompleted: vi.fn(), + mockMarkDispatchJobFailed: vi.fn(), + mockMarkDispatchJobRunning: vi.fn(), + mockReleaseWorkspaceLease: vi.fn(), + mockWakeWorkspaceDispatcher: vi.fn(), +})) + +vi.mock('@sim/logger', () => ({ + createLogger: () => ({ + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }), +})) + +vi.mock('@/lib/core/workspace-dispatch', () => ({ + markDispatchJobCompleted: mockMarkDispatchJobCompleted, + markDispatchJobFailed: mockMarkDispatchJobFailed, + markDispatchJobRunning: mockMarkDispatchJobRunning, + releaseWorkspaceLease: mockReleaseWorkspaceLease, + wakeWorkspaceDispatcher: mockWakeWorkspaceDispatcher, +})) + +import { getDispatchRuntimeMetadata, runDispatchedJob } from '@/lib/core/workspace-dispatch/worker' + +describe('workspace dispatch worker lifecycle', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('returns null for missing metadata', () => { + expect(getDispatchRuntimeMetadata(undefined)).toBeNull() + }) + + it('extracts dispatch runtime metadata when all fields are present', () => { + expect( + getDispatchRuntimeMetadata({ + dispatchJobId: 'dispatch-1', + dispatchWorkspaceId: 'workspace-1', + dispatchLeaseId: 'lease-1', + }) + ).toEqual({ + dispatchJobId: 'dispatch-1', + dispatchWorkspaceId: 'workspace-1', + dispatchLeaseId: 'lease-1', + }) + }) + + it('marks running, completed, releases lease, and wakes dispatcher on success', async () => { + const result = await runDispatchedJob( + { + dispatchJobId: 'dispatch-1', + dispatchWorkspaceId: 'workspace-1', + dispatchLeaseId: 'lease-1', + }, + async () => ({ success: true }) + ) + + expect(result).toEqual({ success: true }) + expect(mockMarkDispatchJobRunning).toHaveBeenCalledWith('dispatch-1') + expect(mockMarkDispatchJobCompleted).toHaveBeenCalledWith('dispatch-1', { success: true }) + expect(mockReleaseWorkspaceLease).toHaveBeenCalledWith('workspace-1', 'lease-1') + expect(mockWakeWorkspaceDispatcher).toHaveBeenCalled() + }) + + it('marks failed and still releases lease on error', async () => { + await expect( + runDispatchedJob( + { + dispatchJobId: 'dispatch-2', + dispatchWorkspaceId: 'workspace-2', + dispatchLeaseId: 'lease-2', + }, + async () => { + throw new Error('boom') + } + ) + ).rejects.toThrow('boom') + + expect(mockMarkDispatchJobRunning).toHaveBeenCalledWith('dispatch-2') + expect(mockMarkDispatchJobFailed).toHaveBeenCalledWith('dispatch-2', 'boom') + expect(mockReleaseWorkspaceLease).toHaveBeenCalledWith('workspace-2', 'lease-2') + expect(mockWakeWorkspaceDispatcher).toHaveBeenCalled() + }) +}) diff --git a/apps/sim/lib/core/workspace-dispatch/worker.ts b/apps/sim/lib/core/workspace-dispatch/worker.ts new file mode 100644 index 00000000000..ced31a599a3 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/worker.ts @@ -0,0 +1,104 @@ +import { createLogger } from '@sim/logger' +import { + markDispatchJobCompleted, + markDispatchJobFailed, + markDispatchJobRunning, + refreshWorkspaceLease, + releaseWorkspaceLease, + wakeWorkspaceDispatcher, +} from '@/lib/core/workspace-dispatch' + +const logger = createLogger('WorkspaceDispatchWorker') + +interface DispatchRuntimeMetadata { + dispatchJobId: string + dispatchWorkspaceId: string + dispatchLeaseId: string +} + +interface RunDispatchedJobOptions { + isFinalAttempt?: boolean + leaseTtlMs?: number +} + +const DEFAULT_LEASE_TTL_MS = 15 * 60 * 1000 +const LEASE_HEARTBEAT_INTERVAL_MS = 60_000 + +export function getDispatchRuntimeMetadata(metadata: unknown): DispatchRuntimeMetadata | null { + if (!metadata || typeof metadata !== 'object') { + return null + } + + const value = metadata as Partial + if (!value.dispatchJobId || !value.dispatchWorkspaceId || !value.dispatchLeaseId) { + return null + } + + return { + dispatchJobId: value.dispatchJobId, + dispatchWorkspaceId: value.dispatchWorkspaceId, + dispatchLeaseId: value.dispatchLeaseId, + } +} + +export async function runDispatchedJob( + metadata: unknown, + run: () => Promise, + options: RunDispatchedJobOptions = {} +): Promise { + const dispatchMetadata = getDispatchRuntimeMetadata(metadata) + + if (!dispatchMetadata) { + return run() + } + + const leaseTtlMs = options.leaseTtlMs ?? DEFAULT_LEASE_TTL_MS + const isFinalAttempt = options.isFinalAttempt ?? true + + await markDispatchJobRunning(dispatchMetadata.dispatchJobId) + + let heartbeatTimer: NodeJS.Timeout | null = setInterval(() => { + void refreshWorkspaceLease( + dispatchMetadata.dispatchWorkspaceId, + dispatchMetadata.dispatchLeaseId, + leaseTtlMs + ).catch((error) => { + logger.error('Failed to refresh dispatch lease', { error, dispatchMetadata }) + }) + }, LEASE_HEARTBEAT_INTERVAL_MS) + heartbeatTimer.unref() + + let succeeded = false + try { + const result = await run() + succeeded = true + await markDispatchJobCompleted(dispatchMetadata.dispatchJobId, result) + return result + } catch (error) { + if (isFinalAttempt && !succeeded) { + await markDispatchJobFailed( + dispatchMetadata.dispatchJobId, + error instanceof Error ? error.message : String(error) + ) + } + throw error + } finally { + if (heartbeatTimer) { + clearInterval(heartbeatTimer) + heartbeatTimer = null + } + + const shouldReleaseLease = succeeded || isFinalAttempt + if (shouldReleaseLease) { + try { + await releaseWorkspaceLease( + dispatchMetadata.dispatchWorkspaceId, + dispatchMetadata.dispatchLeaseId + ) + await wakeWorkspaceDispatcher() + } catch (error) { + logger.error('Failed to release dispatch lease', { error, dispatchMetadata }) + } + } + } +} diff --git a/apps/sim/lib/execution/buffered-stream.ts b/apps/sim/lib/execution/buffered-stream.ts new file mode 100644 index 00000000000..f1b413b6f96 --- /dev/null +++ b/apps/sim/lib/execution/buffered-stream.ts @@ -0,0 +1,111 @@ +import { createLogger } from '@sim/logger' +import { + type ExecutionStreamStatus, + getExecutionMeta, + readExecutionEvents, +} from '@/lib/execution/event-buffer' +import { formatSSEEvent } from '@/lib/workflows/executor/execution-events' + +const logger = createLogger('BufferedExecutionStream') + +const POLL_INTERVAL_MS = 500 +const MAX_POLL_DURATION_MS = 10 * 60 * 1000 + +function isTerminalStatus(status: ExecutionStreamStatus): boolean { + return status === 'complete' || status === 'error' || status === 'cancelled' +} + +export function createBufferedExecutionStream( + executionId: string, + initialEventId = 0 +): ReadableStream { + const encoder = new TextEncoder() + let closed = false + + return new ReadableStream({ + async start(controller) { + let lastEventId = initialEventId + const pollDeadline = Date.now() + MAX_POLL_DURATION_MS + + const enqueue = (text: string) => { + if (closed) { + return + } + + try { + controller.enqueue(encoder.encode(text)) + } catch { + closed = true + } + } + + try { + const initialEvents = await readExecutionEvents(executionId, lastEventId) + for (const entry of initialEvents) { + if (closed) { + return + } + + enqueue(formatSSEEvent(entry.event)) + lastEventId = entry.eventId + } + + while (!closed && Date.now() < pollDeadline) { + const meta = await getExecutionMeta(executionId) + + if (meta && isTerminalStatus(meta.status)) { + const finalEvents = await readExecutionEvents(executionId, lastEventId) + for (const entry of finalEvents) { + if (closed) { + return + } + + enqueue(formatSSEEvent(entry.event)) + lastEventId = entry.eventId + } + + enqueue('data: [DONE]\n\n') + controller.close() + return + } + + await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS)) + if (closed) { + return + } + + const newEvents = await readExecutionEvents(executionId, lastEventId) + for (const entry of newEvents) { + if (closed) { + return + } + + enqueue(formatSSEEvent(entry.event)) + lastEventId = entry.eventId + } + } + + if (!closed) { + logger.warn('Buffered execution stream deadline reached', { executionId }) + enqueue('data: [DONE]\n\n') + controller.close() + } + } catch (error) { + logger.error('Buffered execution stream failed', { + executionId, + error: error instanceof Error ? error.message : String(error), + }) + + if (!closed) { + try { + controller.close() + } catch {} + } + } + }, + cancel() { + closed = true + logger.info('Client disconnected from buffered execution stream', { executionId }) + }, + }) +} diff --git a/apps/sim/lib/knowledge/connectors/sync-engine.ts b/apps/sim/lib/knowledge/connectors/sync-engine.ts index 3ec619e723a..b388f4066de 100644 --- a/apps/sim/lib/knowledge/connectors/sync-engine.ts +++ b/apps/sim/lib/knowledge/connectors/sync-engine.ts @@ -8,11 +8,13 @@ import { import { createLogger } from '@sim/logger' import { and, eq, inArray, isNull, ne, sql } from 'drizzle-orm' import { decryptApiKey } from '@/lib/api-key/crypto' +import { createBullMQJobData, isBullMQEnabled } from '@/lib/core/bullmq' import { getInternalApiBaseUrl } from '@/lib/core/utils/urls' +import { enqueueWorkspaceDispatch } from '@/lib/core/workspace-dispatch' import { + dispatchDocumentProcessingJob, hardDeleteDocuments, isTriggerAvailable, - processDocumentAsync, } from '@/lib/knowledge/documents/service' import { StorageService } from '@/lib/uploads' import { deleteFile } from '@/lib/uploads/core/storage-service' @@ -131,8 +133,7 @@ export function resolveTagMapping( } /** - * Dispatch a connector sync — uses Trigger.dev when available, - * otherwise falls back to direct executeSync. + * Dispatch a connector sync using the configured background execution backend. */ export async function dispatchSync( connectorId: string, @@ -147,6 +148,38 @@ export async function dispatchSync( requestId, }) logger.info(`Dispatched connector sync to Trigger.dev`, { connectorId, requestId }) + } else if (isBullMQEnabled()) { + const connectorRows = await db + .select({ + workspaceId: knowledgeBase.workspaceId, + userId: knowledgeBase.userId, + }) + .from(knowledgeConnector) + .innerJoin(knowledgeBase, eq(knowledgeBase.id, knowledgeConnector.knowledgeBaseId)) + .where(eq(knowledgeConnector.id, connectorId)) + .limit(1) + + const workspaceId = connectorRows[0]?.workspaceId + const userId = connectorRows[0]?.userId + if (!workspaceId || !userId) { + throw new Error(`No workspace found for connector ${connectorId}`) + } + + await enqueueWorkspaceDispatch({ + workspaceId, + lane: 'knowledge', + queueName: 'knowledge-connector-sync', + bullmqJobName: 'knowledge-connector-sync', + bullmqPayload: createBullMQJobData({ + connectorId, + fullSync: options?.fullSync, + requestId, + }), + metadata: { + userId, + }, + }) + logger.info(`Dispatched connector sync to BullMQ`, { connectorId, requestId }) } else { executeSync(connectorId, { fullSync: options?.fullSync }).catch((error) => { logger.error(`Sync failed for connector ${connectorId}`, { @@ -498,21 +531,17 @@ export async function executeSync( if (stuckDocs.length > 0) { logger.info(`Retrying ${stuckDocs.length} stuck documents`, { connectorId }) for (const doc of stuckDocs) { - processDocumentAsync( - connector.knowledgeBaseId, - doc.id, - { + await dispatchDocumentProcessingJob({ + knowledgeBaseId: connector.knowledgeBaseId, + documentId: doc.id, + docData: { filename: doc.filename ?? 'document.txt', fileUrl: doc.fileUrl ?? '', fileSize: doc.fileSize ?? 0, mimeType: 'text/plain', }, - {} - ).catch((error) => { - logger.warn('Failed to retry stuck document', { - documentId: doc.id, - error: error instanceof Error ? error.message : String(error), - }) + processingOptions: {}, + requestId: `connector-retry-${connectorId}`, }) } } @@ -686,22 +715,17 @@ async function addDocument( throw error } - processDocumentAsync( + await dispatchDocumentProcessingJob({ knowledgeBaseId, documentId, - { + docData: { filename: processingFilename, fileUrl, fileSize: contentBuffer.length, mimeType: 'text/plain', }, - {} - ).catch((error) => { - logger.error('Failed to process connector document', { - documentId, - connectorId, - error: error instanceof Error ? error.message : String(error), - }) + processingOptions: {}, + requestId: `connector-sync-${connectorId}`, }) } @@ -807,21 +831,16 @@ async function updateDocument( } } - processDocumentAsync( + await dispatchDocumentProcessingJob({ knowledgeBaseId, - existingDocId, - { + documentId: existingDocId, + docData: { filename: processingFilename, fileUrl, fileSize: contentBuffer.length, mimeType: 'text/plain', }, - {} - ).catch((error) => { - logger.error('Failed to re-process updated connector document', { - documentId: existingDocId, - connectorId, - error: error instanceof Error ? error.message : String(error), - }) + processingOptions: {}, + requestId: `connector-sync-${connectorId}`, }) } diff --git a/apps/sim/lib/knowledge/documents/document-processor.ts b/apps/sim/lib/knowledge/documents/document-processor.ts index 0185de495b1..0a64dbf547c 100644 --- a/apps/sim/lib/knowledge/documents/document-processor.ts +++ b/apps/sim/lib/knowledge/documents/document-processor.ts @@ -5,6 +5,7 @@ import { type Chunk, JsonYamlChunker, StructuredDataChunker, TextChunker } from import { env } from '@/lib/core/config/env' import { parseBuffer, parseFile } from '@/lib/file-parsers' import type { FileParseMetadata } from '@/lib/file-parsers/types' +import { resolveParserExtension } from '@/lib/knowledge/documents/parser-extension' import { retryWithExponentialBackoff } from '@/lib/knowledge/documents/utils' import { StorageService } from '@/lib/uploads' import { isInternalFileUrl } from '@/lib/uploads/utils/file-utils' @@ -727,7 +728,7 @@ async function parseWithFileParser(fileUrl: string, filename: string, mimeType: if (fileUrl.startsWith('data:')) { content = await parseDataURI(fileUrl, filename, mimeType) } else if (fileUrl.startsWith('http')) { - const result = await parseHttpFile(fileUrl, filename) + const result = await parseHttpFile(fileUrl, filename, mimeType) content = result.content metadata = result.metadata || {} } else { @@ -759,7 +760,7 @@ async function parseDataURI(fileUrl: string, filename: string, mimeType: string) : decodeURIComponent(base64Data) } - const extension = filename.split('.').pop()?.toLowerCase() || 'txt' + const extension = resolveParserExtension(filename, mimeType) const buffer = Buffer.from(base64Data, 'base64') const result = await parseBuffer(buffer, extension) return result.content @@ -767,15 +768,12 @@ async function parseDataURI(fileUrl: string, filename: string, mimeType: string) async function parseHttpFile( fileUrl: string, - filename: string + filename: string, + mimeType: string ): Promise<{ content: string; metadata?: FileParseMetadata }> { const buffer = await downloadFileWithTimeout(fileUrl) - const extension = filename.split('.').pop()?.toLowerCase() - if (!extension) { - throw new Error(`Could not determine file extension: ${filename}`) - } - + const extension = resolveParserExtension(filename, mimeType) const result = await parseBuffer(buffer, extension) return result } diff --git a/apps/sim/lib/knowledge/documents/parser-extension.test.ts b/apps/sim/lib/knowledge/documents/parser-extension.test.ts new file mode 100644 index 00000000000..4d65abdfef5 --- /dev/null +++ b/apps/sim/lib/knowledge/documents/parser-extension.test.ts @@ -0,0 +1,27 @@ +/** + * @vitest-environment node + */ +import { describe, expect, it } from 'vitest' +import { resolveParserExtension } from '@/lib/knowledge/documents/parser-extension' + +describe('resolveParserExtension', () => { + it('uses a supported filename extension when present', () => { + expect(resolveParserExtension('report.pdf', 'application/pdf')).toBe('pdf') + }) + + it('falls back to mime type when filename has no extension', () => { + expect( + resolveParserExtension('[Business] Your Thursday morning trip with Uber', 'text/plain') + ).toBe('txt') + }) + + it('falls back to mime type when filename extension is unsupported', () => { + expect(resolveParserExtension('uber-message.business', 'text/plain')).toBe('txt') + }) + + it('throws when neither filename nor mime type resolves to a supported parser', () => { + expect(() => + resolveParserExtension('uber-message.unknown', 'application/octet-stream') + ).toThrow('Unsupported file type') + }) +}) diff --git a/apps/sim/lib/knowledge/documents/parser-extension.ts b/apps/sim/lib/knowledge/documents/parser-extension.ts new file mode 100644 index 00000000000..7260ee318c0 --- /dev/null +++ b/apps/sim/lib/knowledge/documents/parser-extension.ts @@ -0,0 +1,48 @@ +import { getExtensionFromMimeType } from '@/lib/uploads/utils/file-utils' + +const SUPPORTED_FILE_TYPES = [ + 'pdf', + 'csv', + 'docx', + 'doc', + 'txt', + 'md', + 'xlsx', + 'xls', + 'pptx', + 'ppt', + 'html', + 'htm', + 'json', + 'yaml', + 'yml', +] as const + +const SUPPORTED_FILE_TYPES_TEXT = SUPPORTED_FILE_TYPES.join(', ') + +function isSupportedParserExtension(extension: string): boolean { + return SUPPORTED_FILE_TYPES.includes(extension as (typeof SUPPORTED_FILE_TYPES)[number]) +} + +export function resolveParserExtension(filename: string, mimeType: string): string { + const filenameExtension = filename.includes('.') + ? filename.split('.').pop()?.toLowerCase() + : undefined + + if (filenameExtension && isSupportedParserExtension(filenameExtension)) { + return filenameExtension + } + + const mimeExtension = getExtensionFromMimeType(mimeType) + if (mimeExtension && isSupportedParserExtension(mimeExtension)) { + return mimeExtension + } + + if (filenameExtension) { + throw new Error( + `Unsupported file type: ${filenameExtension}. Supported types are: ${SUPPORTED_FILE_TYPES_TEXT}` + ) + } + + throw new Error(`Could not determine file type for ${filename || 'document'}`) +} diff --git a/apps/sim/lib/knowledge/documents/queue.ts b/apps/sim/lib/knowledge/documents/queue.ts deleted file mode 100644 index 31dd0879c70..00000000000 --- a/apps/sim/lib/knowledge/documents/queue.ts +++ /dev/null @@ -1,227 +0,0 @@ -import { createLogger } from '@sim/logger' -import { getRedisClient } from '@/lib/core/config/redis' -import { getStorageMethod, type StorageMethod } from '@/lib/core/storage' - -const logger = createLogger('DocumentQueue') - -interface QueueJob { - id: string - type: string - data: T - timestamp: number - attempts: number - maxAttempts: number -} - -interface QueueConfig { - maxConcurrent: number - retryDelay: number - maxRetries: number -} - -/** - * Document processing queue that uses either Redis or in-memory storage. - * Storage method is determined once at construction based on configuration. - * No switching on transient errors. - */ -export class DocumentProcessingQueue { - private config: QueueConfig - private storageMethod: StorageMethod - private processing = new Map>() - private inMemoryQueue: QueueJob[] = [] - private inMemoryProcessing = 0 - private processingStarted = false - - constructor(config: QueueConfig) { - this.config = config - this.storageMethod = getStorageMethod() - logger.info(`DocumentProcessingQueue using ${this.storageMethod} storage`) - } - - async addJob(type: string, data: T, options: { maxAttempts?: number } = {}): Promise { - const job: QueueJob = { - id: `${type}-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`, - type, - data, - timestamp: Date.now(), - attempts: 0, - maxAttempts: options.maxAttempts || this.config.maxRetries, - } - - if (this.storageMethod === 'redis') { - const redis = getRedisClient() - if (!redis) { - throw new Error('Redis configured but client unavailable') - } - await redis.lpush('document-queue', JSON.stringify(job)) - logger.info(`Job ${job.id} added to Redis queue`) - } else { - this.inMemoryQueue.push(job) - logger.info(`Job ${job.id} added to in-memory queue`) - } - - return job.id - } - - async processJobs(processor: (job: QueueJob) => Promise): Promise { - if (this.processingStarted) { - logger.info('Queue processing already started, skipping') - return - } - - this.processingStarted = true - logger.info(`Starting queue processing (${this.storageMethod})`) - - if (this.storageMethod === 'redis') { - await this.processRedisJobs(processor) - } else { - await this.processInMemoryJobs(processor) - } - } - - private async processRedisJobs(processor: (job: QueueJob) => Promise) { - const redis = getRedisClient() - if (!redis) { - throw new Error('Redis configured but client unavailable') - } - - const processJobsContinuously = async () => { - while (true) { - if (this.processing.size >= this.config.maxConcurrent) { - await new Promise((resolve) => setTimeout(resolve, 100)) - continue - } - - try { - const result = await redis.rpop('document-queue') - if (!result) { - await new Promise((resolve) => setTimeout(resolve, 500)) - continue - } - - const job: QueueJob = JSON.parse(result) - const promise = this.executeJob(job, processor) - this.processing.set(job.id, promise) - - promise.finally(() => { - this.processing.delete(job.id) - }) - } catch (error: any) { - logger.error('Error processing Redis job:', error) - await new Promise((resolve) => setTimeout(resolve, 1000)) - } - } - } - - const processors = Array(this.config.maxConcurrent) - .fill(null) - .map(() => processJobsContinuously()) - - Promise.allSettled(processors).catch((error) => { - logger.error('Error in Redis queue processors:', error) - }) - } - - private async processInMemoryJobs(processor: (job: QueueJob) => Promise) { - const processInMemoryContinuously = async () => { - while (true) { - if (this.inMemoryProcessing >= this.config.maxConcurrent) { - await new Promise((resolve) => setTimeout(resolve, 100)) - continue - } - - const job = this.inMemoryQueue.shift() - if (!job) { - await new Promise((resolve) => setTimeout(resolve, 500)) - continue - } - - this.inMemoryProcessing++ - - this.executeJob(job, processor).finally(() => { - this.inMemoryProcessing-- - }) - } - } - - const processors = Array(this.config.maxConcurrent) - .fill(null) - .map(() => processInMemoryContinuously()) - - Promise.allSettled(processors).catch((error) => { - logger.error('Error in in-memory queue processors:', error) - }) - } - - private async executeJob( - job: QueueJob, - processor: (job: QueueJob) => Promise - ): Promise { - try { - job.attempts++ - logger.info(`Processing job ${job.id} (attempt ${job.attempts}/${job.maxAttempts})`) - - await processor(job) - logger.info(`Job ${job.id} completed successfully`) - } catch (error) { - logger.error(`Job ${job.id} failed (attempt ${job.attempts}):`, error) - - if (job.attempts < job.maxAttempts) { - const delay = this.config.retryDelay * 2 ** (job.attempts - 1) - - setTimeout(async () => { - if (this.storageMethod === 'redis') { - const redis = getRedisClient() - if (!redis) { - logger.error('Redis unavailable for retry, job lost:', job.id) - return - } - await redis.lpush('document-queue', JSON.stringify(job)) - } else { - this.inMemoryQueue.push(job) - } - }, delay) - - logger.info(`Job ${job.id} will retry in ${delay}ms`) - } else { - logger.error(`Job ${job.id} failed permanently after ${job.attempts} attempts`) - } - } - } - - async getQueueStats(): Promise<{ - pending: number - processing: number - storageMethod: StorageMethod - }> { - let pending = 0 - - if (this.storageMethod === 'redis') { - const redis = getRedisClient() - if (redis) { - pending = await redis.llen('document-queue') - } - } else { - pending = this.inMemoryQueue.length - } - - return { - pending, - processing: this.storageMethod === 'redis' ? this.processing.size : this.inMemoryProcessing, - storageMethod: this.storageMethod, - } - } - - async clearQueue(): Promise { - if (this.storageMethod === 'redis') { - const redis = getRedisClient() - if (redis) { - await redis.del('document-queue') - logger.info('Redis queue cleared') - } - } - - this.inMemoryQueue.length = 0 - logger.info('In-memory queue cleared') - } -} diff --git a/apps/sim/lib/knowledge/documents/service.ts b/apps/sim/lib/knowledge/documents/service.ts index 2d607c41da5..ccb00f5633c 100644 --- a/apps/sim/lib/knowledge/documents/service.ts +++ b/apps/sim/lib/knowledge/documents/service.ts @@ -25,10 +25,11 @@ import { type SQL, sql, } from 'drizzle-orm' +import { createBullMQJobData, isBullMQEnabled } from '@/lib/core/bullmq' import { env } from '@/lib/core/config/env' -import { getStorageMethod, isRedisStorage } from '@/lib/core/storage' +import { isTriggerDevEnabled } from '@/lib/core/config/feature-flags' +import { enqueueWorkspaceDispatch } from '@/lib/core/workspace-dispatch' import { processDocument } from '@/lib/knowledge/documents/document-processor' -import { DocumentProcessingQueue } from '@/lib/knowledge/documents/queue' import type { DocumentSortField, SortOrder } from '@/lib/knowledge/documents/types' import { generateEmbeddings } from '@/lib/knowledge/embeddings' import { @@ -88,22 +89,8 @@ const REDIS_PROCESSING_CONFIG = { delayBetweenDocuments: env.KB_CONFIG_DELAY_BETWEEN_DOCUMENTS || 50, } -let documentQueue: DocumentProcessingQueue | null = null - -export function getDocumentQueue(): DocumentProcessingQueue { - if (!documentQueue) { - const config = isRedisStorage() ? REDIS_PROCESSING_CONFIG : PROCESSING_CONFIG - documentQueue = new DocumentProcessingQueue({ - maxConcurrent: config.maxConcurrentDocuments, - retryDelay: env.KB_CONFIG_MIN_TIMEOUT || 1000, - maxRetries: env.KB_CONFIG_MAX_ATTEMPTS || 3, - }) - } - return documentQueue -} - export function getProcessingConfig() { - return isRedisStorage() ? REDIS_PROCESSING_CONFIG : PROCESSING_CONFIG + return isBullMQEnabled() ? REDIS_PROCESSING_CONFIG : PROCESSING_CONFIG } export interface DocumentData { @@ -115,11 +102,11 @@ export interface DocumentData { } export interface ProcessingOptions { - chunkSize: number - minCharactersPerChunk: number - recipe: string - lang: string - chunkOverlap: number + chunkSize?: number + minCharactersPerChunk?: number + recipe?: string + lang?: string + chunkOverlap?: number } export interface DocumentJobData { @@ -135,6 +122,54 @@ export interface DocumentJobData { requestId: string } +export async function dispatchDocumentProcessingJob(payload: DocumentJobData): Promise { + if (isTriggerAvailable()) { + await tasks.trigger('knowledge-process-document', payload) + return + } + + if (isBullMQEnabled()) { + const workspaceRows = await db + .select({ + workspaceId: knowledgeBase.workspaceId, + userId: knowledgeBase.userId, + }) + .from(knowledgeBase) + .where(and(eq(knowledgeBase.id, payload.knowledgeBaseId), isNull(knowledgeBase.deletedAt))) + .limit(1) + + const workspaceId = workspaceRows[0]?.workspaceId + const userId = workspaceRows[0]?.userId + if (!workspaceId || !userId) { + throw new Error(`Knowledge base not found: ${payload.knowledgeBaseId}`) + } + + await enqueueWorkspaceDispatch({ + workspaceId, + lane: 'knowledge', + queueName: 'knowledge-process-document', + bullmqJobName: 'knowledge-process-document', + bullmqPayload: createBullMQJobData(payload), + metadata: { + userId, + }, + }) + return + } + + void processDocumentAsync( + payload.knowledgeBaseId, + payload.documentId, + payload.docData, + payload.processingOptions + ).catch((error) => { + logger.error(`[${payload.requestId}] Direct document processing failed`, { + documentId: payload.documentId, + error: error instanceof Error ? error.message : String(error), + }) + }) +} + export interface DocumentTagData { tagName: string fieldType: string @@ -323,7 +358,7 @@ export async function processDocumentTags( } /** - * Process documents with best available method: Trigger.dev > Redis queue > in-memory concurrency control + * Process documents with the configured background execution backend. */ export async function processDocumentsWithQueue( createdDocuments: DocumentData[], @@ -331,76 +366,29 @@ export async function processDocumentsWithQueue( processingOptions: ProcessingOptions, requestId: string ): Promise { - // Priority 1: Trigger.dev - if (isTriggerAvailable()) { - try { - logger.info( - `[${requestId}] Using Trigger.dev background processing for ${createdDocuments.length} documents` - ) - - const triggerPayloads = createdDocuments.map((doc) => ({ - knowledgeBaseId, - documentId: doc.documentId, - docData: { - filename: doc.filename, - fileUrl: doc.fileUrl, - fileSize: doc.fileSize, - mimeType: doc.mimeType, - }, - processingOptions, - requestId, - })) - - const result = await processDocumentsWithTrigger(triggerPayloads, requestId) - - if (result.success) { - logger.info( - `[${requestId}] Successfully triggered background processing: ${result.message}` - ) - return - } - logger.warn(`[${requestId}] Trigger.dev failed: ${result.message}, falling back to Redis`) - } catch (error) { - logger.warn(`[${requestId}] Trigger.dev processing failed, falling back to Redis:`, error) - } - } - - // Priority 2: Queue-based processing (Redis or in-memory based on storage method) - const queue = getDocumentQueue() - const storageMethod = getStorageMethod() + const jobPayloads = createdDocuments.map((doc) => ({ + knowledgeBaseId, + documentId: doc.documentId, + docData: { + filename: doc.filename, + fileUrl: doc.fileUrl, + fileSize: doc.fileSize, + mimeType: doc.mimeType, + }, + processingOptions, + requestId, + })) logger.info( - `[${requestId}] Using ${storageMethod} queue for ${createdDocuments.length} documents` - ) - - const jobPromises = createdDocuments.map((doc) => - queue.addJob('process-document', { - knowledgeBaseId, - documentId: doc.documentId, - docData: { - filename: doc.filename, - fileUrl: doc.fileUrl, - fileSize: doc.fileSize, - mimeType: doc.mimeType, - }, - processingOptions, - requestId, - }) + `[${requestId}] Dispatching background processing for ${jobPayloads.length} documents`, + { + backend: isTriggerAvailable() ? 'trigger-dev' : isBullMQEnabled() ? 'bullmq' : 'direct', + } ) - await Promise.all(jobPromises) + await Promise.all(jobPayloads.map((payload) => dispatchDocumentProcessingJob(payload))) - queue - .processJobs(async (job) => { - const data = job.data as DocumentJobData - const { knowledgeBaseId, documentId, docData, processingOptions } = data - await processDocumentAsync(knowledgeBaseId, documentId, docData, processingOptions) - }) - .catch((error) => { - logger.error(`[${requestId}] Error in queue processing:`, error) - }) - - logger.info(`[${requestId}] All documents queued for processing`) + logger.info(`[${requestId}] All documents dispatched for processing`) return } @@ -660,7 +648,7 @@ export async function processDocumentAsync( * Check if Trigger.dev is available and configured */ export function isTriggerAvailable(): boolean { - return !!(env.TRIGGER_SECRET_KEY && env.TRIGGER_DEV_ENABLED !== false) + return Boolean(env.TRIGGER_SECRET_KEY) && isTriggerDevEnabled } /** @@ -1591,11 +1579,13 @@ export async function retryDocumentProcessing( chunkOverlap: kbConfig.overlap, } - processDocumentAsync(knowledgeBaseId, documentId, docData, processingOptions).catch( - (error: unknown) => { - logger.error(`[${requestId}] Background retry processing error:`, error) - } - ) + await dispatchDocumentProcessingJob({ + knowledgeBaseId, + documentId, + docData, + processingOptions, + requestId, + }) logger.info(`[${requestId}] Document retry initiated: ${documentId}`) diff --git a/apps/sim/lib/logs/events.ts b/apps/sim/lib/logs/events.ts index bbf17b2320e..54392e2ebbf 100644 --- a/apps/sim/lib/logs/events.ts +++ b/apps/sim/lib/logs/events.ts @@ -12,6 +12,7 @@ import { } from '@/lib/notifications/alert-rules' import { getActiveWorkflowContext } from '@/lib/workflows/active-context' import { + enqueueNotificationDeliveryDispatch, executeNotificationDelivery, workspaceNotificationDeliveryTask, } from '@/background/workspace-notification-delivery' @@ -131,6 +132,7 @@ export async function emitWorkflowExecutionCompleted(log: WorkflowExecutionLog): const payload = { deliveryId, subscriptionId: subscription.id, + workspaceId, notificationType: subscription.notificationType, log: notificationLog, alertConfig: alertConfig || undefined, @@ -141,6 +143,10 @@ export async function emitWorkflowExecutionCompleted(log: WorkflowExecutionLog): logger.info( `Enqueued ${subscription.notificationType} notification ${deliveryId} via Trigger.dev` ) + } else if (await enqueueNotificationDeliveryDispatch(payload)) { + logger.info( + `Enqueued ${subscription.notificationType} notification ${deliveryId} via BullMQ` + ) } else { void executeNotificationDelivery(payload).catch((error) => { logger.error(`Direct notification delivery failed for ${deliveryId}`, { error }) diff --git a/apps/sim/lib/notifications/inactivity-polling.ts b/apps/sim/lib/notifications/inactivity-polling.ts index 3a4505346d8..81aa0692dba 100644 --- a/apps/sim/lib/notifications/inactivity-polling.ts +++ b/apps/sim/lib/notifications/inactivity-polling.ts @@ -12,6 +12,7 @@ import { v4 as uuidv4 } from 'uuid' import { isTriggerDevEnabled } from '@/lib/core/config/feature-flags' import { TRIGGER_TYPES } from '@/lib/workflows/triggers/triggers' import { + enqueueNotificationDeliveryDispatch, executeNotificationDelivery, workspaceNotificationDeliveryTask, } from '@/background/workspace-notification-delivery' @@ -181,6 +182,7 @@ async function checkWorkflowInactivity( const payload = { deliveryId, subscriptionId: subscription.id, + workspaceId: workflowData.workspaceId, notificationType: subscription.notificationType, log: mockLog, alertConfig, @@ -188,6 +190,7 @@ async function checkWorkflowInactivity( if (isTriggerDevEnabled) { await workspaceNotificationDeliveryTask.trigger(payload) + } else if (await enqueueNotificationDeliveryDispatch(payload)) { } else { void executeNotificationDelivery(payload).catch((error) => { logger.error(`Direct notification delivery failed for ${deliveryId}`, { error }) diff --git a/apps/sim/lib/uploads/utils/user-file-base64.server.ts b/apps/sim/lib/uploads/utils/user-file-base64.server.ts index f3abdf5acde..3aa2f219eb1 100644 --- a/apps/sim/lib/uploads/utils/user-file-base64.server.ts +++ b/apps/sim/lib/uploads/utils/user-file-base64.server.ts @@ -244,13 +244,13 @@ async function hydrateValue( * Hydrates UserFile objects within a value to include base64 content. * Returns the original structure with UserFile.base64 set where available. */ -export async function hydrateUserFilesWithBase64( - value: unknown, +export async function hydrateUserFilesWithBase64( + value: T, options: Base64HydrationOptions -): Promise { +): Promise { const logger = getHydrationLogger(options) const state = createHydrationState(options, logger) - return hydrateValue(value, options, state, logger) + return (await hydrateValue(value, options, state, logger)) as T } function isPlainObject(value: unknown): value is Record { diff --git a/apps/sim/lib/webhooks/processor.test.ts b/apps/sim/lib/webhooks/processor.test.ts index 20ae4408cd8..86876fda02b 100644 --- a/apps/sim/lib/webhooks/processor.test.ts +++ b/apps/sim/lib/webhooks/processor.test.ts @@ -9,12 +9,14 @@ const { mockUuidV4, mockPreprocessExecution, mockEnqueue, + mockEnqueueWorkspaceDispatch, mockGetJobQueue, mockShouldExecuteInline, } = vi.hoisted(() => ({ mockUuidV4: vi.fn(), mockPreprocessExecution: vi.fn(), mockEnqueue: vi.fn(), + mockEnqueueWorkspaceDispatch: vi.fn(), mockGetJobQueue: vi.fn(), mockShouldExecuteInline: vi.fn(), })) @@ -62,6 +64,15 @@ vi.mock('@/lib/core/async-jobs', () => ({ shouldExecuteInline: mockShouldExecuteInline, })) +vi.mock('@/lib/core/bullmq', () => ({ + isBullMQEnabled: vi.fn().mockReturnValue(true), + createBullMQJobData: vi.fn((payload: unknown, metadata?: unknown) => ({ payload, metadata })), +})) + +vi.mock('@/lib/core/workspace-dispatch', () => ({ + enqueueWorkspaceDispatch: mockEnqueueWorkspaceDispatch, +})) + vi.mock('@/lib/core/config/feature-flags', () => ({ isProd: false, })) @@ -142,6 +153,7 @@ describe('webhook processor execution identity', () => { actorUserId: 'actor-user-1', }) mockEnqueue.mockResolvedValue('job-1') + mockEnqueueWorkspaceDispatch.mockResolvedValue('job-1') mockGetJobQueue.mockResolvedValue({ enqueue: mockEnqueue }) mockShouldExecuteInline.mockReturnValue(false) mockUuidV4.mockReturnValue('generated-execution-id') @@ -202,15 +214,15 @@ describe('webhook processor execution identity', () => { ) expect(mockUuidV4).toHaveBeenCalledTimes(1) - expect(mockEnqueue).toHaveBeenCalledWith( - 'webhook-execution', - expect.objectContaining({ - executionId: 'generated-execution-id', - requestId: 'request-1', - correlation: preprocessingResult.correlation, - }), + expect(mockEnqueueWorkspaceDispatch).toHaveBeenCalledWith( expect.objectContaining({ + id: 'generated-execution-id', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'webhook-execution', metadata: expect.objectContaining({ + workflowId: 'workflow-1', + userId: 'actor-user-1', correlation: preprocessingResult.correlation, }), }) diff --git a/apps/sim/lib/webhooks/processor.ts b/apps/sim/lib/webhooks/processor.ts index 48604026691..5b537944337 100644 --- a/apps/sim/lib/webhooks/processor.ts +++ b/apps/sim/lib/webhooks/processor.ts @@ -7,8 +7,10 @@ import { v4 as uuidv4 } from 'uuid' import { checkEnterprisePlan, checkTeamPlan } from '@/lib/billing/subscriptions/utils' import { getInlineJobQueue, getJobQueue, shouldExecuteInline } from '@/lib/core/async-jobs' import type { AsyncExecutionCorrelation } from '@/lib/core/async-jobs/types' +import { createBullMQJobData, isBullMQEnabled } from '@/lib/core/bullmq' import { isProd } from '@/lib/core/config/feature-flags' import { safeCompare } from '@/lib/core/security/encryption' +import { enqueueWorkspaceDispatch } from '@/lib/core/workspace-dispatch' import { getEffectiveDecryptedEnv } from '@/lib/environment/utils' import { preprocessExecution } from '@/lib/execution/preprocessing' import { @@ -1277,53 +1279,91 @@ export async function queueWebhookExecution( const isPolling = isPollingWebhookProvider(payload.provider) if (isPolling && !shouldExecuteInline()) { - const jobQueue = await getJobQueue() - const jobId = await jobQueue.enqueue('webhook-execution', payload, { - metadata: { - workflowId: foundWorkflow.id, - userId: actorUserId, - correlation, - }, - }) + const jobId = isBullMQEnabled() + ? await enqueueWorkspaceDispatch({ + id: executionId, + workspaceId: foundWorkflow.workspaceId, + lane: 'runtime', + queueName: 'webhook-execution', + bullmqJobName: 'webhook-execution', + bullmqPayload: createBullMQJobData(payload, { + workflowId: foundWorkflow.id, + userId: actorUserId, + correlation, + }), + metadata: { + workflowId: foundWorkflow.id, + userId: actorUserId, + correlation, + }, + }) + : await (await getJobQueue()).enqueue('webhook-execution', payload, { + metadata: { + workflowId: foundWorkflow.id, + userId: actorUserId, + correlation, + }, + }) logger.info( `[${options.requestId}] Queued polling webhook execution task ${jobId} for ${foundWebhook.provider} webhook via job queue` ) } else { const jobQueue = await getInlineJobQueue() - const jobId = await jobQueue.enqueue('webhook-execution', payload, { - metadata: { - workflowId: foundWorkflow.id, - userId: actorUserId, - correlation, - }, - }) + const jobId = isBullMQEnabled() + ? await enqueueWorkspaceDispatch({ + id: executionId, + workspaceId: foundWorkflow.workspaceId, + lane: 'runtime', + queueName: 'webhook-execution', + bullmqJobName: 'webhook-execution', + bullmqPayload: createBullMQJobData(payload, { + workflowId: foundWorkflow.id, + userId: actorUserId, + correlation, + }), + metadata: { + workflowId: foundWorkflow.id, + userId: actorUserId, + correlation, + }, + }) + : await jobQueue.enqueue('webhook-execution', payload, { + metadata: { + workflowId: foundWorkflow.id, + userId: actorUserId, + correlation, + }, + }) logger.info( - `[${options.requestId}] Executing ${foundWebhook.provider} webhook ${jobId} inline` + `[${options.requestId}] Queued ${foundWebhook.provider} webhook execution ${jobId} via inline backend` ) - void (async () => { - try { - await jobQueue.startJob(jobId) - const output = await executeWebhookJob(payload) - await jobQueue.completeJob(jobId, output) - } catch (error) { - const errorMessage = error instanceof Error ? error.message : String(error) - logger.error(`[${options.requestId}] Webhook execution failed`, { - jobId, - error: errorMessage, - }) + + if (shouldExecuteInline()) { + void (async () => { try { - await jobQueue.markJobFailed(jobId, errorMessage) - } catch (markFailedError) { - logger.error(`[${options.requestId}] Failed to mark job as failed`, { + await jobQueue.startJob(jobId) + const output = await executeWebhookJob(payload) + await jobQueue.completeJob(jobId, output) + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + logger.error(`[${options.requestId}] Webhook execution failed`, { jobId, - error: - markFailedError instanceof Error - ? markFailedError.message - : String(markFailedError), + error: errorMessage, }) + try { + await jobQueue.markJobFailed(jobId, errorMessage) + } catch (markFailedError) { + logger.error(`[${options.requestId}] Failed to mark job as failed`, { + jobId, + error: + markFailedError instanceof Error + ? markFailedError.message + : String(markFailedError), + }) + } } - } - })() + })() + } } if (foundWebhook.provider === 'microsoft-teams') { diff --git a/apps/sim/lib/workflows/executor/execution-events.ts b/apps/sim/lib/workflows/executor/execution-events.ts index 2a2c06d4016..5872c1db5cc 100644 --- a/apps/sim/lib/workflows/executor/execution-events.ts +++ b/apps/sim/lib/workflows/executor/execution-events.ts @@ -241,18 +241,17 @@ export interface SSECallbackOptions { } /** - * Creates SSE callbacks for workflow execution streaming + * Creates execution callbacks using a provided event sink. */ -export function createSSECallbacks(options: SSECallbackOptions) { - const { executionId, workflowId, controller, isStreamClosed, setStreamClosed } = options +export function createExecutionCallbacks(options: { + executionId: string + workflowId: string + sendEvent: (event: ExecutionEvent) => void | Promise +}) { + const { executionId, workflowId, sendEvent } = options - const sendEvent = (event: ExecutionEvent) => { - if (isStreamClosed()) return - try { - controller.enqueue(encodeSSEEvent(event)) - } catch { - setStreamClosed() - } + const sendBufferedEvent = async (event: ExecutionEvent) => { + await sendEvent(event) } const onBlockStart = async ( @@ -263,7 +262,7 @@ export function createSSECallbacks(options: SSECallbackOptions) { iterationContext?: IterationContext, childWorkflowContext?: ChildWorkflowContext ) => { - sendEvent({ + await sendBufferedEvent({ type: 'block:started', timestamp: new Date().toISOString(), executionId, @@ -330,7 +329,7 @@ export function createSSECallbacks(options: SSECallbackOptions) { : {} if (hasError) { - sendEvent({ + await sendBufferedEvent({ type: 'block:error', timestamp: new Date().toISOString(), executionId, @@ -351,7 +350,7 @@ export function createSSECallbacks(options: SSECallbackOptions) { }, }) } else { - sendEvent({ + await sendBufferedEvent({ type: 'block:completed', timestamp: new Date().toISOString(), executionId, @@ -385,7 +384,7 @@ export function createSSECallbacks(options: SSECallbackOptions) { const { done, value } = await reader.read() if (done) break const chunk = decoder.decode(value, { stream: true }) - sendEvent({ + await sendBufferedEvent({ type: 'stream:chunk', timestamp: new Date().toISOString(), executionId, @@ -393,7 +392,7 @@ export function createSSECallbacks(options: SSECallbackOptions) { data: { blockId, chunk }, }) } - sendEvent({ + await sendBufferedEvent({ type: 'stream:done', timestamp: new Date().toISOString(), executionId, @@ -413,7 +412,7 @@ export function createSSECallbacks(options: SSECallbackOptions) { iterationContext?: IterationContext, executionOrder?: number ) => { - sendEvent({ + void sendBufferedEvent({ type: 'block:childWorkflowStarted', timestamp: new Date().toISOString(), executionId, @@ -430,5 +429,33 @@ export function createSSECallbacks(options: SSECallbackOptions) { }) } - return { sendEvent, onBlockStart, onBlockComplete, onStream, onChildWorkflowInstanceReady } + return { + sendEvent: sendBufferedEvent, + onBlockStart, + onBlockComplete, + onStream, + onChildWorkflowInstanceReady, + } +} + +/** + * Creates SSE callbacks for workflow execution streaming + */ +export function createSSECallbacks(options: SSECallbackOptions) { + const { executionId, workflowId, controller, isStreamClosed, setStreamClosed } = options + + const sendEvent = (event: ExecutionEvent) => { + if (isStreamClosed()) return + try { + controller.enqueue(encodeSSEEvent(event)) + } catch { + setStreamClosed() + } + } + + return createExecutionCallbacks({ + executionId, + workflowId, + sendEvent, + }) } diff --git a/apps/sim/lib/workflows/executor/queued-workflow-execution.ts b/apps/sim/lib/workflows/executor/queued-workflow-execution.ts new file mode 100644 index 00000000000..c60ba860a11 --- /dev/null +++ b/apps/sim/lib/workflows/executor/queued-workflow-execution.ts @@ -0,0 +1,339 @@ +import { createLogger } from '@sim/logger' +import { createTimeoutAbortController, getTimeoutErrorMessage } from '@/lib/core/execution-limits' +import { createExecutionEventWriter, setExecutionMeta } from '@/lib/execution/event-buffer' +import { LoggingSession } from '@/lib/logs/execution/logging-session' +import { buildTraceSpans } from '@/lib/logs/execution/trace-spans/trace-spans' +import { + cleanupExecutionBase64Cache, + hydrateUserFilesWithBase64, +} from '@/lib/uploads/utils/user-file-base64.server' +import { + executeWorkflowCore, + wasExecutionFinalizedByCore, +} from '@/lib/workflows/executor/execution-core' +import { + createExecutionCallbacks, + type ExecutionEvent, +} from '@/lib/workflows/executor/execution-events' +import { PauseResumeManager } from '@/lib/workflows/executor/human-in-the-loop-manager' +import { ExecutionSnapshot } from '@/executor/execution/snapshot' +import type { ExecutionMetadata, SerializableExecutionState } from '@/executor/execution/types' +import type { BlockLog, NormalizedBlockOutput } from '@/executor/types' +import { hasExecutionResult } from '@/executor/utils/errors' + +const logger = createLogger('QueuedWorkflowExecution') + +export const DIRECT_WORKFLOW_JOB_NAME = 'direct-workflow-execution' + +export interface QueuedWorkflowExecutionPayload { + workflow: Record + metadata: ExecutionMetadata + input: unknown + variables: Record + selectedOutputs?: string[] + includeFileBase64?: boolean + base64MaxBytes?: number + stopAfterBlockId?: string + timeoutMs?: number + runFromBlock?: { + startBlockId: string + sourceSnapshot: SerializableExecutionState + } + streamEvents?: boolean +} + +export interface QueuedWorkflowExecutionResult { + success: boolean + executionId: string + output: NormalizedBlockOutput + error?: string + logs?: BlockLog[] + status: 'success' | 'cancelled' | 'paused' | 'failed' + statusCode?: number + metadata?: { + duration?: number + startTime?: string + endTime?: string + } +} + +function buildResult( + status: QueuedWorkflowExecutionResult['status'], + result: { + success: boolean + output: NormalizedBlockOutput + error?: string + logs?: BlockLog[] + metadata?: { + duration?: number + startTime?: string + endTime?: string + } + }, + executionId: string, + statusCode?: number +): QueuedWorkflowExecutionResult { + return { + success: result.success, + executionId, + output: result.output, + error: result.error, + logs: result.logs, + status, + statusCode, + metadata: result.metadata, + } +} + +export async function executeQueuedWorkflowJob( + payload: QueuedWorkflowExecutionPayload +): Promise { + const { metadata } = payload + const { executionId, requestId, workflowId, triggerType } = metadata + const loggingSession = new LoggingSession(workflowId, executionId, triggerType, requestId) + const timeoutController = createTimeoutAbortController(payload.timeoutMs) + const eventWriter = payload.streamEvents ? createExecutionEventWriter(executionId) : null + + if (payload.streamEvents) { + await setExecutionMeta(executionId, { + status: 'active', + userId: metadata.userId, + workflowId, + }) + } + + try { + const snapshot = new ExecutionSnapshot( + metadata, + payload.workflow, + payload.input, + payload.variables, + payload.selectedOutputs ?? [] + ) + + let callbacks = {} + + if (eventWriter) { + const executionCallbacks = createExecutionCallbacks({ + executionId, + workflowId, + sendEvent: async (event: ExecutionEvent) => { + await eventWriter.write(event) + }, + }) + + callbacks = { + onBlockStart: executionCallbacks.onBlockStart, + onBlockComplete: executionCallbacks.onBlockComplete, + onStream: executionCallbacks.onStream, + onChildWorkflowInstanceReady: executionCallbacks.onChildWorkflowInstanceReady, + } + + await executionCallbacks.sendEvent({ + type: 'execution:started', + timestamp: new Date().toISOString(), + executionId, + workflowId, + data: { + startTime: metadata.startTime, + }, + }) + } + + const result = await executeWorkflowCore({ + snapshot, + callbacks, + loggingSession, + includeFileBase64: payload.includeFileBase64, + base64MaxBytes: payload.base64MaxBytes, + stopAfterBlockId: payload.stopAfterBlockId, + runFromBlock: payload.runFromBlock, + abortSignal: timeoutController.signal, + }) + + if ( + result.status === 'cancelled' && + timeoutController.isTimedOut() && + timeoutController.timeoutMs + ) { + const timeoutErrorMessage = getTimeoutErrorMessage(null, timeoutController.timeoutMs) + await loggingSession.markAsFailed(timeoutErrorMessage) + + if (eventWriter) { + await eventWriter.write({ + type: 'execution:error', + timestamp: new Date().toISOString(), + executionId, + workflowId, + data: { + error: timeoutErrorMessage, + duration: result.metadata?.duration || 0, + }, + }) + + await setExecutionMeta(executionId, { status: 'error' }) + } + + return buildResult( + 'cancelled', + { + success: false, + output: result.output, + error: timeoutErrorMessage, + logs: result.logs, + metadata: result.metadata + ? { + duration: result.metadata.duration, + startTime: result.metadata.startTime, + endTime: result.metadata.endTime, + } + : undefined, + }, + executionId, + 408 + ) + } + + if (result.status === 'paused') { + if (!result.snapshotSeed) { + await loggingSession.markAsFailed('Missing snapshot seed for paused execution') + } else { + await PauseResumeManager.persistPauseResult({ + workflowId, + executionId, + pausePoints: result.pausePoints || [], + snapshotSeed: result.snapshotSeed, + executorUserId: result.metadata?.userId, + }) + } + } else { + await PauseResumeManager.processQueuedResumes(executionId) + } + + const outputWithBase64 = payload.includeFileBase64 + ? await hydrateUserFilesWithBase64(result.output, { + requestId, + executionId, + maxBytes: payload.base64MaxBytes, + }) + : result.output + + if (eventWriter) { + if (result.status === 'cancelled') { + await eventWriter.write({ + type: 'execution:cancelled', + timestamp: new Date().toISOString(), + executionId, + workflowId, + data: { + duration: result.metadata?.duration || 0, + }, + }) + await setExecutionMeta(executionId, { status: 'cancelled' }) + } else { + await eventWriter.write({ + type: 'execution:completed', + timestamp: new Date().toISOString(), + executionId, + workflowId, + data: { + success: result.success, + output: outputWithBase64, + duration: result.metadata?.duration || 0, + startTime: result.metadata?.startTime || metadata.startTime, + endTime: result.metadata?.endTime || new Date().toISOString(), + }, + }) + await setExecutionMeta(executionId, { status: 'complete' }) + } + } + + return buildResult( + result.status === 'paused' + ? 'paused' + : result.status === 'cancelled' + ? 'cancelled' + : 'success', + { + success: result.success, + output: outputWithBase64, + error: result.error, + logs: result.logs, + metadata: result.metadata + ? { + duration: result.metadata.duration, + startTime: result.metadata.startTime, + endTime: result.metadata.endTime, + } + : undefined, + }, + executionId + ) + } catch (error) { + logger.error('Queued workflow execution failed', { + workflowId, + executionId, + error: error instanceof Error ? error.message : String(error), + }) + + if (!wasExecutionFinalizedByCore(error, executionId)) { + const executionResult = hasExecutionResult(error) ? error.executionResult : undefined + const { traceSpans } = executionResult ? buildTraceSpans(executionResult) : { traceSpans: [] } + await loggingSession.safeCompleteWithError({ + error: { + message: error instanceof Error ? error.message : String(error), + stackTrace: error instanceof Error ? error.stack : undefined, + }, + traceSpans, + }) + } + + if (eventWriter) { + await eventWriter.write({ + type: 'execution:error', + timestamp: new Date().toISOString(), + executionId, + workflowId, + data: { + error: error instanceof Error ? error.message : String(error), + duration: 0, + }, + }) + await setExecutionMeta(executionId, { status: 'error' }) + } + + const executionResult = hasExecutionResult(error) ? error.executionResult : undefined + + return buildResult( + 'failed', + { + success: false, + output: executionResult?.output ?? {}, + error: executionResult?.error || (error instanceof Error ? error.message : String(error)), + logs: executionResult?.logs, + metadata: executionResult?.metadata + ? { + duration: executionResult.metadata.duration, + startTime: executionResult.metadata.startTime, + endTime: executionResult.metadata.endTime, + } + : undefined, + }, + executionId, + 500 + ) + } finally { + timeoutController.cleanup() + + if (eventWriter) { + await eventWriter.close() + } + + await cleanupExecutionBase64Cache(executionId).catch((error) => { + logger.error('Failed to cleanup queued workflow base64 cache', { + executionId, + error: error instanceof Error ? error.message : String(error), + }) + }) + } +} diff --git a/apps/sim/lib/workflows/utils.ts b/apps/sim/lib/workflows/utils.ts index d5c50b47ee6..c77521f3b1b 100644 --- a/apps/sim/lib/workflows/utils.ts +++ b/apps/sim/lib/workflows/utils.ts @@ -249,7 +249,9 @@ export async function updateWorkflowRunCounts(workflowId: string, runs = 1) { } } -export const workflowHasResponseBlock = (executionResult: ExecutionResult): boolean => { +export const workflowHasResponseBlock = ( + executionResult: Pick +): boolean => { if (!executionResult?.logs || !Array.isArray(executionResult.logs) || !executionResult.success) { return false } @@ -261,7 +263,9 @@ export const workflowHasResponseBlock = (executionResult: ExecutionResult): bool return responseBlock !== undefined } -export const createHttpResponseFromBlock = (executionResult: ExecutionResult): NextResponse => { +export const createHttpResponseFromBlock = ( + executionResult: Pick +): NextResponse => { const { data = {}, status = 200, headers = {} } = executionResult.output const responseHeaders = new Headers({ diff --git a/apps/sim/package.json b/apps/sim/package.json index 8e9fee648e5..f2f3b050f32 100644 --- a/apps/sim/package.json +++ b/apps/sim/package.json @@ -11,9 +11,11 @@ "dev": "next dev --port 3000", "dev:webpack": "next dev --webpack", "dev:sockets": "bun run socket/index.ts", - "dev:full": "bunx concurrently -n \"App,Realtime\" -c \"cyan,magenta\" \"bun run dev\" \"bun run dev:sockets\"", + "dev:worker": "bun run worker/index.ts", + "dev:full": "bunx concurrently -n \"App,Realtime,Worker\" -c \"cyan,magenta,yellow\" \"bun run dev\" \"bun run dev:sockets\" \"bun run dev:worker\"", "build": "next build", "start": "next start", + "worker": "NODE_ENV=production bun run worker/index.ts", "prepare": "cd ../.. && bun husky", "test": "vitest run", "test:watch": "vitest", @@ -89,6 +91,7 @@ "better-auth": "1.3.12", "binary-extensions": "^2.0.0", "browser-image-compression": "^2.0.2", + "bullmq": "5.71.0", "chalk": "5.6.2", "cheerio": "1.1.2", "class-variance-authority": "^0.7.1", diff --git a/apps/sim/worker/health.ts b/apps/sim/worker/health.ts new file mode 100644 index 00000000000..f2a5fea3c11 --- /dev/null +++ b/apps/sim/worker/health.ts @@ -0,0 +1,77 @@ +import { createServer } from 'http' +import { createLogger } from '@sim/logger' +import { getRedisClient } from '@/lib/core/config/redis' + +const logger = createLogger('BullMQWorkerHealth') + +export interface WorkerHealthServer { + close: () => Promise +} + +interface WorkerHealthCheck { + redisConnected: boolean + dispatcherLastWakeAt: number +} + +let healthState: WorkerHealthCheck = { + redisConnected: false, + dispatcherLastWakeAt: 0, +} + +export function updateWorkerHealthState(update: Partial): void { + healthState = { ...healthState, ...update } +} + +export function startWorkerHealthServer(port: number): WorkerHealthServer { + const server = createServer((req, res) => { + if (req.method === 'GET' && req.url === '/health') { + const redis = getRedisClient() + const redisConnected = redis !== null + const dispatcherActive = + healthState.dispatcherLastWakeAt > 0 && + Date.now() - healthState.dispatcherLastWakeAt < 30_000 + + const healthy = redisConnected && dispatcherActive + + res.writeHead(healthy ? 200 : 503, { 'Content-Type': 'application/json' }) + res.end( + JSON.stringify({ + ok: healthy, + redis: redisConnected, + dispatcher: dispatcherActive, + lastWakeAgoMs: healthState.dispatcherLastWakeAt + ? Date.now() - healthState.dispatcherLastWakeAt + : null, + }) + ) + return + } + + if (req.method === 'GET' && req.url === '/health/live') { + res.writeHead(200, { 'Content-Type': 'application/json' }) + res.end(JSON.stringify({ ok: true })) + return + } + + res.writeHead(404, { 'Content-Type': 'application/json' }) + res.end(JSON.stringify({ error: 'Not found' })) + }) + + server.listen(port, '0.0.0.0', () => { + logger.info(`Worker health server listening on port ${port}`) + }) + + return { + close: () => + new Promise((resolve, reject) => { + server.close((error) => { + if (error) { + reject(error) + return + } + + resolve() + }) + }), + } +} diff --git a/apps/sim/worker/index.ts b/apps/sim/worker/index.ts new file mode 100644 index 00000000000..aaf71dd5aab --- /dev/null +++ b/apps/sim/worker/index.ts @@ -0,0 +1,190 @@ +import { createLogger } from '@sim/logger' +import { Worker } from 'bullmq' +import { + getBullMQConnectionOptions, + isBullMQEnabled, + KNOWLEDGE_CONNECTOR_SYNC_QUEUE, + KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE, + MOTHERSHIP_JOB_EXECUTION_QUEUE, + WORKSPACE_NOTIFICATION_DELIVERY_QUEUE, +} from '@/lib/core/bullmq' +import { wakeWorkspaceDispatcher } from '@/lib/core/workspace-dispatch' +import { sweepPendingNotificationDeliveries } from '@/background/workspace-notification-delivery' +import { startWorkerHealthServer, updateWorkerHealthState } from '@/worker/health' +import { processKnowledgeConnectorSync } from '@/worker/processors/knowledge-connector-sync' +import { processKnowledgeDocument } from '@/worker/processors/knowledge-document-processing' +import { processMothershipJobExecution } from '@/worker/processors/mothership-job-execution' +import { processSchedule } from '@/worker/processors/schedule' +import { processWebhook } from '@/worker/processors/webhook' +import { processWorkflow } from '@/worker/processors/workflow' +import { processWorkspaceNotificationDelivery } from '@/worker/processors/workspace-notification-delivery' + +const logger = createLogger('BullMQWorker') + +const DEFAULT_WORKER_PORT = 3001 +const DEFAULT_WORKFLOW_CONCURRENCY = 50 +const DEFAULT_WEBHOOK_CONCURRENCY = 30 +const DEFAULT_SCHEDULE_CONCURRENCY = 20 +const DEFAULT_MOTHERSHIP_JOB_CONCURRENCY = 10 +const DEFAULT_CONNECTOR_SYNC_CONCURRENCY = 5 +const DEFAULT_DOCUMENT_PROCESSING_CONCURRENCY = 20 +const DEFAULT_NOTIFICATION_DELIVERY_CONCURRENCY = 10 +const DISPATCHER_WAKE_INTERVAL_MS = 5_000 +const NOTIFICATION_SWEEPER_INTERVAL_MS = 10_000 + +function parseWorkerNumber(value: string | undefined, fallback: number): number { + const parsed = Number.parseInt(value ?? '', 10) + return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback +} + +async function main() { + const workerPort = parseWorkerNumber(process.env.WORKER_PORT, DEFAULT_WORKER_PORT) + const healthServer = startWorkerHealthServer(workerPort) + + if (!isBullMQEnabled()) { + logger.warn('BullMQ worker started without REDIS_URL; worker will remain idle') + + const shutdownWithoutRedis = async () => { + await healthServer.close() + process.exit(0) + } + + process.on('SIGINT', shutdownWithoutRedis) + process.on('SIGTERM', shutdownWithoutRedis) + return + } + + const connection = getBullMQConnectionOptions() + + const workflowWorker = new Worker('workflow-execution', processWorkflow, { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_WORKFLOW, + DEFAULT_WORKFLOW_CONCURRENCY + ), + }) + + const webhookWorker = new Worker('webhook-execution', processWebhook, { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_WEBHOOK, + DEFAULT_WEBHOOK_CONCURRENCY + ), + }) + + const scheduleWorker = new Worker('schedule-execution', processSchedule, { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_SCHEDULE, + DEFAULT_SCHEDULE_CONCURRENCY + ), + }) + + const mothershipJobWorker = new Worker( + MOTHERSHIP_JOB_EXECUTION_QUEUE, + processMothershipJobExecution, + { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_MOTHERSHIP_JOB, + DEFAULT_MOTHERSHIP_JOB_CONCURRENCY + ), + } + ) + + const connectorSyncWorker = new Worker( + KNOWLEDGE_CONNECTOR_SYNC_QUEUE, + processKnowledgeConnectorSync, + { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_CONNECTOR_SYNC, + DEFAULT_CONNECTOR_SYNC_CONCURRENCY + ), + } + ) + + const documentProcessingWorker = new Worker( + KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE, + processKnowledgeDocument, + { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_DOCUMENT_PROCESSING, + DEFAULT_DOCUMENT_PROCESSING_CONCURRENCY + ), + } + ) + + const notificationDeliveryWorker = new Worker( + WORKSPACE_NOTIFICATION_DELIVERY_QUEUE, + processWorkspaceNotificationDelivery, + { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_NOTIFICATION_DELIVERY, + DEFAULT_NOTIFICATION_DELIVERY_CONCURRENCY + ), + } + ) + + const workers = [ + workflowWorker, + webhookWorker, + scheduleWorker, + mothershipJobWorker, + connectorSyncWorker, + documentProcessingWorker, + notificationDeliveryWorker, + ] + + for (const worker of workers) { + worker.on('failed', (job, error) => { + logger.error('BullMQ job failed', { + queue: worker.name, + jobId: job?.id, + name: job?.name, + error: error.message, + }) + }) + } + + const dispatcherWakeInterval = setInterval(() => { + void wakeWorkspaceDispatcher() + .then(() => { + updateWorkerHealthState({ dispatcherLastWakeAt: Date.now() }) + }) + .catch((error) => { + logger.error('Periodic workspace dispatcher wake failed', { error }) + }) + }, DISPATCHER_WAKE_INTERVAL_MS) + dispatcherWakeInterval.unref() + + const notificationSweeperInterval = setInterval(() => { + void sweepPendingNotificationDeliveries().catch((error) => { + logger.error('Pending notification sweeper failed', { error }) + }) + }, NOTIFICATION_SWEEPER_INTERVAL_MS) + notificationSweeperInterval.unref() + + const shutdown = async () => { + logger.info('Shutting down BullMQ worker') + + clearInterval(dispatcherWakeInterval) + clearInterval(notificationSweeperInterval) + await Promise.allSettled(workers.map((worker) => worker.close())) + await healthServer.close() + + process.exit(0) + } + + process.on('SIGINT', shutdown) + process.on('SIGTERM', shutdown) +} + +main().catch((error) => { + logger.error('Failed to start BullMQ worker', { + error: error instanceof Error ? error.message : String(error), + }) + process.exit(1) +}) diff --git a/apps/sim/worker/processors/knowledge-connector-sync.ts b/apps/sim/worker/processors/knowledge-connector-sync.ts new file mode 100644 index 00000000000..9a504ebde0f --- /dev/null +++ b/apps/sim/worker/processors/knowledge-connector-sync.ts @@ -0,0 +1,22 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { executeSync } from '@/lib/knowledge/connectors/sync-engine' +import type { ConnectorSyncPayload } from '@/background/knowledge-connector-sync' + +const logger = createLogger('BullMQKnowledgeConnectorSync') + +export async function processKnowledgeConnectorSync(job: Job>) { + const { connectorId, fullSync } = job.data.payload + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing connector sync job', { + jobId: job.id, + connectorId, + }) + + return runDispatchedJob(job.data.metadata, () => executeSync(connectorId, { fullSync }), { + isFinalAttempt, + }) +} diff --git a/apps/sim/worker/processors/knowledge-document-processing.ts b/apps/sim/worker/processors/knowledge-document-processing.ts new file mode 100644 index 00000000000..74fff94fb7b --- /dev/null +++ b/apps/sim/worker/processors/knowledge-document-processing.ts @@ -0,0 +1,26 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { type DocumentJobData, processDocumentAsync } from '@/lib/knowledge/documents/service' + +const logger = createLogger('BullMQKnowledgeDocumentProcessing') + +export async function processKnowledgeDocument(job: Job>) { + const { knowledgeBaseId, documentId, docData, processingOptions } = job.data.payload + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing knowledge document job', { + jobId: job.id, + knowledgeBaseId, + documentId, + }) + + await runDispatchedJob( + job.data.metadata, + () => processDocumentAsync(knowledgeBaseId, documentId, docData, processingOptions), + { + isFinalAttempt, + } + ) +} diff --git a/apps/sim/worker/processors/mothership-job-execution.ts b/apps/sim/worker/processors/mothership-job-execution.ts new file mode 100644 index 00000000000..d5980394388 --- /dev/null +++ b/apps/sim/worker/processors/mothership-job-execution.ts @@ -0,0 +1,20 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { executeJobInline, type JobExecutionPayload } from '@/background/schedule-execution' + +const logger = createLogger('BullMQMothershipJobExecution') + +export async function processMothershipJobExecution(job: Job>) { + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing mothership scheduled job', { + jobId: job.id, + scheduleId: job.data.payload.scheduleId, + }) + + await runDispatchedJob(job.data.metadata, () => executeJobInline(job.data.payload), { + isFinalAttempt, + }) +} diff --git a/apps/sim/worker/processors/schedule.ts b/apps/sim/worker/processors/schedule.ts new file mode 100644 index 00000000000..78f4cde7d79 --- /dev/null +++ b/apps/sim/worker/processors/schedule.ts @@ -0,0 +1,21 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { executeScheduleJob, type ScheduleExecutionPayload } from '@/background/schedule-execution' + +const logger = createLogger('BullMQScheduleProcessor') + +export async function processSchedule(job: Job>) { + const { payload } = job.data + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing schedule job', { + jobId: job.id, + name: job.name, + }) + + return runDispatchedJob(job.data.metadata, () => executeScheduleJob(payload), { + isFinalAttempt, + }) +} diff --git a/apps/sim/worker/processors/webhook.ts b/apps/sim/worker/processors/webhook.ts new file mode 100644 index 00000000000..da61aede1c8 --- /dev/null +++ b/apps/sim/worker/processors/webhook.ts @@ -0,0 +1,21 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { executeWebhookJob, type WebhookExecutionPayload } from '@/background/webhook-execution' + +const logger = createLogger('BullMQWebhookProcessor') + +export async function processWebhook(job: Job>) { + const { payload } = job.data + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing webhook job', { + jobId: job.id, + name: job.name, + }) + + return runDispatchedJob(job.data.metadata, () => executeWebhookJob(payload), { + isFinalAttempt, + }) +} diff --git a/apps/sim/worker/processors/workflow.ts b/apps/sim/worker/processors/workflow.ts new file mode 100644 index 00000000000..8648e76b556 --- /dev/null +++ b/apps/sim/worker/processors/workflow.ts @@ -0,0 +1,51 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { + DIRECT_WORKFLOW_JOB_NAME, + executeQueuedWorkflowJob, + type QueuedWorkflowExecutionPayload, +} from '@/lib/workflows/executor/queued-workflow-execution' +import { executeWorkflowJob, type WorkflowExecutionPayload } from '@/background/workflow-execution' + +const logger = createLogger('BullMQWorkflowProcessor') + +type WorkflowQueueJobData = + | BullMQJobData + | BullMQJobData + +function isDirectWorkflowJob( + job: Job +): job is Job> { + return job.name === DIRECT_WORKFLOW_JOB_NAME +} + +function isBackgroundWorkflowJob( + job: Job +): job is Job> { + return job.name !== DIRECT_WORKFLOW_JOB_NAME +} + +export async function processWorkflow(job: Job) { + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing workflow job', { + jobId: job.id, + name: job.name, + }) + + if (isDirectWorkflowJob(job)) { + return runDispatchedJob(job.data.metadata, () => executeQueuedWorkflowJob(job.data.payload), { + isFinalAttempt, + }) + } + + if (isBackgroundWorkflowJob(job)) { + return runDispatchedJob(job.data.metadata, () => executeWorkflowJob(job.data.payload), { + isFinalAttempt, + }) + } + + throw new Error(`Unsupported workflow job type: ${job.name}`) +} diff --git a/apps/sim/worker/processors/workspace-notification-delivery.ts b/apps/sim/worker/processors/workspace-notification-delivery.ts new file mode 100644 index 00000000000..7b6dbbc6da1 --- /dev/null +++ b/apps/sim/worker/processors/workspace-notification-delivery.ts @@ -0,0 +1,32 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { + executeNotificationDelivery, + type NotificationDeliveryParams, +} from '@/background/workspace-notification-delivery' + +const logger = createLogger('BullMQWorkspaceNotificationDelivery') + +export async function processWorkspaceNotificationDelivery( + job: Job> +) { + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing workspace notification delivery job', { + jobId: job.id, + deliveryId: job.data.payload.deliveryId, + }) + + const result = await runDispatchedJob( + job.data.metadata, + () => executeNotificationDelivery(job.data.payload), + { + isFinalAttempt, + } + ) + + // Retry scheduling is persisted in the notification delivery row and + // rehydrated by the periodic sweeper, which makes retries crash-safe. +} diff --git a/bun.lock b/bun.lock index 61df0c93763..9fc8ccc58a3 100644 --- a/bun.lock +++ b/bun.lock @@ -1,6 +1,5 @@ { "lockfileVersion": 1, - "configVersion": 0, "workspaces": { "": { "name": "simstudio", @@ -115,6 +114,7 @@ "better-auth": "1.3.12", "binary-extensions": "^2.0.0", "browser-image-compression": "^2.0.2", + "bullmq": "5.71.0", "chalk": "5.6.2", "cheerio": "1.1.2", "class-variance-authority": "^0.7.1", @@ -816,6 +816,18 @@ "@mongodb-js/saslprep": ["@mongodb-js/saslprep@1.4.5", "", { "dependencies": { "sparse-bitfield": "^3.0.3" } }, "sha512-k64Lbyb7ycCSXHSLzxVdb2xsKGPMvYZfCICXvDsI8Z65CeWQzTEKS4YmGbnqw+U9RBvLPTsB6UCmwkgsDTGWIw=="], + "@msgpackr-extract/msgpackr-extract-darwin-arm64": ["@msgpackr-extract/msgpackr-extract-darwin-arm64@3.0.3", "", { "os": "darwin", "cpu": "arm64" }, "sha512-QZHtlVgbAdy2zAqNA9Gu1UpIuI8Xvsd1v8ic6B2pZmeFnFcMWiPLfWXh7TVw4eGEZ/C9TH281KwhVoeQUKbyjw=="], + + "@msgpackr-extract/msgpackr-extract-darwin-x64": ["@msgpackr-extract/msgpackr-extract-darwin-x64@3.0.3", "", { "os": "darwin", "cpu": "x64" }, "sha512-mdzd3AVzYKuUmiWOQ8GNhl64/IoFGol569zNRdkLReh6LRLHOXxU4U8eq0JwaD8iFHdVGqSy4IjFL4reoWCDFw=="], + + "@msgpackr-extract/msgpackr-extract-linux-arm": ["@msgpackr-extract/msgpackr-extract-linux-arm@3.0.3", "", { "os": "linux", "cpu": "arm" }, "sha512-fg0uy/dG/nZEXfYilKoRe7yALaNmHoYeIoJuJ7KJ+YyU2bvY8vPv27f7UKhGRpY6euFYqEVhxCFZgAUNQBM3nw=="], + + "@msgpackr-extract/msgpackr-extract-linux-arm64": ["@msgpackr-extract/msgpackr-extract-linux-arm64@3.0.3", "", { "os": "linux", "cpu": "arm64" }, "sha512-YxQL+ax0XqBJDZiKimS2XQaf+2wDGVa1enVRGzEvLLVFeqa5kx2bWbtcSXgsxjQB7nRqqIGFIcLteF/sHeVtQg=="], + + "@msgpackr-extract/msgpackr-extract-linux-x64": ["@msgpackr-extract/msgpackr-extract-linux-x64@3.0.3", "", { "os": "linux", "cpu": "x64" }, "sha512-cvwNfbP07pKUfq1uH+S6KJ7dT9K8WOE4ZiAcsrSes+UY55E/0jLYc+vq+DO7jlmqRb5zAggExKm0H7O/CBaesg=="], + + "@msgpackr-extract/msgpackr-extract-win32-x64": ["@msgpackr-extract/msgpackr-extract-win32-x64@3.0.3", "", { "os": "win32", "cpu": "x64" }, "sha512-x0fWaQtYp4E6sktbsdAqnehxDgEc/VwM7uLsRCYWaiGu0ykYdZPiS8zCWdnjHwyiumousxfBm4SO31eXqwEZhQ=="], + "@napi-rs/canvas": ["@napi-rs/canvas@0.1.91", "", { "optionalDependencies": { "@napi-rs/canvas-android-arm64": "0.1.91", "@napi-rs/canvas-darwin-arm64": "0.1.91", "@napi-rs/canvas-darwin-x64": "0.1.91", "@napi-rs/canvas-linux-arm-gnueabihf": "0.1.91", "@napi-rs/canvas-linux-arm64-gnu": "0.1.91", "@napi-rs/canvas-linux-arm64-musl": "0.1.91", "@napi-rs/canvas-linux-riscv64-gnu": "0.1.91", "@napi-rs/canvas-linux-x64-gnu": "0.1.91", "@napi-rs/canvas-linux-x64-musl": "0.1.91", "@napi-rs/canvas-win32-arm64-msvc": "0.1.91", "@napi-rs/canvas-win32-x64-msvc": "0.1.91" } }, "sha512-eeIe1GoB74P1B0Nkw6pV8BCQ3hfCfvyYr4BntzlCsnFXzVJiPMDnLeIx3gVB0xQMblHYnjK/0nCLvirEhOjr5g=="], "@napi-rs/canvas-android-arm64": ["@napi-rs/canvas-android-arm64@0.1.91", "", { "os": "android", "cpu": "arm64" }, "sha512-SLLzXXgSnfct4zy/BVAfweZQkYkPJsNsJ2e5DOE8DFEHC6PufyUrwb12yqeu2So2IOIDpWJJaDAxKY/xpy6MYQ=="], @@ -1790,6 +1802,8 @@ "buildcheck": ["buildcheck@0.0.7", "", {}, "sha512-lHblz4ahamxpTmnsk+MNTRWsjYKv965MwOrSJyeD588rR3Jcu7swE+0wN5F+PbL5cjgu/9ObkhfzEPuofEMwLA=="], + "bullmq": ["bullmq@5.71.0", "", { "dependencies": { "cron-parser": "4.9.0", "ioredis": "5.9.3", "msgpackr": "1.11.5", "node-abort-controller": "3.1.1", "semver": "7.7.4", "tslib": "2.8.1", "uuid": "11.1.0" } }, "sha512-aeNWh4drsafSKnAJeiNH/nZP/5O8ZdtdMbnOPZmpjXj7NZUP5YC901U3bIH41iZValm7d1i3c34ojv7q31m30w=="], + "bytes": ["bytes@3.1.2", "", {}, "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg=="], "c12": ["c12@3.1.0", "", { "dependencies": { "chokidar": "^4.0.3", "confbox": "^0.2.2", "defu": "^6.1.4", "dotenv": "^16.6.1", "exsolve": "^1.0.7", "giget": "^2.0.0", "jiti": "^2.4.2", "ohash": "^2.0.11", "pathe": "^2.0.3", "perfect-debounce": "^1.0.0", "pkg-types": "^2.2.0", "rc9": "^2.1.2" }, "peerDependencies": { "magicast": "^0.3.5" }, "optionalPeers": ["magicast"] }, "sha512-uWoS8OU1MEIsOv8p/5a82c3H31LsWVR5qiyXVfBNOzfffjUWtPnhAb4BYI2uG2HfGmZmFjCtui5XNWaps+iFuw=="], @@ -1928,6 +1942,8 @@ "critters": ["critters@0.0.25", "", { "dependencies": { "chalk": "^4.1.0", "css-select": "^5.1.0", "dom-serializer": "^2.0.0", "domhandler": "^5.0.2", "htmlparser2": "^8.0.2", "postcss": "^8.4.23", "postcss-media-query-parser": "^0.2.3" } }, "sha512-ROF/tjJyyRdM8/6W0VqoN5Ql05xAGnkf5b7f3sTEl1bI5jTQQf8O918RD/V9tEb9pRY/TKcvJekDbJtniHyPtQ=="], + "cron-parser": ["cron-parser@4.9.0", "", { "dependencies": { "luxon": "^3.2.1" } }, "sha512-p0SaNjrHOnQeR8/VnfGbmg9te2kfyYSQ7Sc/j/6DtPL3JQvKxmjO9TSjNFpujqV3vEYYBvNNvXSxzyksBWAx1Q=="], + "croner": ["croner@9.1.0", "", {}, "sha512-p9nwwR4qyT5W996vBZhdvBCnMhicY5ytZkR4D1Xj0wuTDEiMnjwR57Q3RXYY/s0EpX6Ay3vgIcfaR+ewGHsi+g=="], "cronstrue": ["cronstrue@3.3.0", "", { "bin": { "cronstrue": "bin/cli.js" } }, "sha512-iwJytzJph1hosXC09zY8F5ACDJKerr0h3/2mOxg9+5uuFObYlgK0m35uUPk4GCvhHc2abK7NfnR9oMqY0qZFAg=="], @@ -2630,6 +2646,8 @@ "lucide-react": ["lucide-react@0.511.0", "", { "peerDependencies": { "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0" } }, "sha512-VK5a2ydJ7xm8GvBeKLS9mu1pVK6ucef9780JVUjw6bAjJL/QXnd4Y0p7SPeOUMC27YhzNCZvm5d/QX0Tp3rc0w=="], + "luxon": ["luxon@3.7.2", "", {}, "sha512-vtEhXh/gNjI9Yg1u4jX/0YVPMvxzHuGgCm6tC5kZyb08yjGWGnqAjGJvcXbqQR2P3MyMEFnRbpcdFS6PBcLqew=="], + "magic-string": ["magic-string@0.30.21", "", { "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.5" } }, "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ=="], "magicast": ["magicast@0.3.5", "", { "dependencies": { "@babel/parser": "^7.25.4", "@babel/types": "^7.25.4", "source-map-js": "^1.2.0" } }, "sha512-L0WhttDl+2BOsybvEOLK7fW3UA0OQ0IQ2d6Zl2x/a6vVRs3bAY0ECOSHHeL5jD+SbOpOCUEi0y1DgHEn9Qn1AQ=="], @@ -2816,6 +2834,10 @@ "ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="], + "msgpackr": ["msgpackr@1.11.5", "", { "optionalDependencies": { "msgpackr-extract": "^3.0.2" } }, "sha512-UjkUHN0yqp9RWKy0Lplhh+wlpdt9oQBYgULZOiFhV3VclSF1JnSQWZ5r9gORQlNYaUKQoR8itv7g7z1xDDuACA=="], + + "msgpackr-extract": ["msgpackr-extract@3.0.3", "", { "dependencies": { "node-gyp-build-optional-packages": "5.2.2" }, "optionalDependencies": { "@msgpackr-extract/msgpackr-extract-darwin-arm64": "3.0.3", "@msgpackr-extract/msgpackr-extract-darwin-x64": "3.0.3", "@msgpackr-extract/msgpackr-extract-linux-arm": "3.0.3", "@msgpackr-extract/msgpackr-extract-linux-arm64": "3.0.3", "@msgpackr-extract/msgpackr-extract-linux-x64": "3.0.3", "@msgpackr-extract/msgpackr-extract-win32-x64": "3.0.3" }, "bin": { "download-msgpackr-prebuilds": "bin/download-prebuilds.js" } }, "sha512-P0efT1C9jIdVRefqjzOQ9Xml57zpOXnIuS+csaB4MdZbTdmGDLo8XhzBG1N7aO11gKDDkJvBLULeFTo46wwreA=="], + "mustache": ["mustache@4.2.0", "", { "bin": { "mustache": "bin/mustache" } }, "sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ=="], "mute-stream": ["mute-stream@0.0.8", "", {}, "sha512-nnbWWOkoWyUsTjKrhgD0dcz22mdkSnpYqbEjIm2nhwhuxlSkpywJmBo8h0ZqJdkp73mb90SssHkN4rsRaBAfAA=="], @@ -2856,6 +2878,8 @@ "node-abi": ["node-abi@3.87.0", "", { "dependencies": { "semver": "^7.3.5" } }, "sha512-+CGM1L1CgmtheLcBuleyYOn7NWPVu0s0EJH2C4puxgEZb9h8QpR9G2dBfZJOAUhi7VQxuBPMd0hiISWcTyiYyQ=="], + "node-abort-controller": ["node-abort-controller@3.1.1", "", {}, "sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ=="], + "node-domexception": ["node-domexception@1.0.0", "", {}, "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ=="], "node-ensure": ["node-ensure@0.0.0", "", {}, "sha512-DRI60hzo2oKN1ma0ckc6nQWlHU69RH6xN0sjQTjMpChPfTYvKZdcQFfdYK2RWbJcKyUizSIy/l8OTGxMAM1QDw=="], @@ -2868,6 +2892,8 @@ "node-gyp-build": ["node-gyp-build@4.8.4", "", { "bin": { "node-gyp-build": "bin.js", "node-gyp-build-optional": "optional.js", "node-gyp-build-test": "build-test.js" } }, "sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ=="], + "node-gyp-build-optional-packages": ["node-gyp-build-optional-packages@5.2.2", "", { "dependencies": { "detect-libc": "^2.0.1" }, "bin": { "node-gyp-build-optional-packages": "bin.js", "node-gyp-build-optional-packages-optional": "optional.js", "node-gyp-build-optional-packages-test": "build-test.js" } }, "sha512-s+w+rBWnpTMwSFbaE0UXsRlg7hU4FjekKU4eyAih5T8nJuNZT1nNsskXpxmeqSK9UzkBl6UgRlnKc8hz8IEqOw=="], + "node-int64": ["node-int64@0.4.0", "", {}, "sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw=="], "node-readable-to-web-readable-stream": ["node-readable-to-web-readable-stream@0.4.2", "", {}, "sha512-/cMZNI34v//jUTrI+UIo4ieHAB5EZRY/+7OmXZgBxaWBMcW2tGdceIw06RFxWxrKZ5Jp3sI2i5TsRo+CBhtVLQ=="], @@ -4052,6 +4078,8 @@ "body-parser/iconv-lite": ["iconv-lite@0.7.1", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-2Tth85cXwGFHfvRgZWszZSvdo+0Xsqmw8k8ZwxScfcBneNUraK+dxRxRm24nszx80Y0TVio8kKLt5sLE7ZCLlw=="], + "bullmq/ioredis": ["ioredis@5.9.3", "", { "dependencies": { "@ioredis/commands": "1.5.0", "cluster-key-slot": "^1.1.0", "debug": "^4.3.4", "denque": "^2.1.0", "lodash.defaults": "^4.2.0", "lodash.isarguments": "^3.1.0", "redis-errors": "^1.2.0", "redis-parser": "^3.0.0", "standard-as-callback": "^2.1.0" } }, "sha512-VI5tMCdeoxZWU5vjHWsiE/Su76JGhBvWF1MJnV9ZtGltHk9BmD48oDq8Tj8haZ85aceXZMxLNDQZRVo5QKNgXA=="], + "c12/chokidar": ["chokidar@4.0.3", "", { "dependencies": { "readdirp": "^4.0.1" } }, "sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA=="], "c12/confbox": ["confbox@0.2.4", "", {}, "sha512-ysOGlgTFbN2/Y6Cg3Iye8YKulHw+R2fNXHrgSmXISQdMnomY6eNDprVdW9R5xBguEqI954+S6709UyiO7B+6OQ=="], diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 527c8d86b37..4c03862e35b 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -23,6 +23,9 @@ services: - OLLAMA_URL=${OLLAMA_URL:-http://localhost:11434} - SOCKET_SERVER_URL=${SOCKET_SERVER_URL:-http://realtime:3002} - NEXT_PUBLIC_SOCKET_URL=${NEXT_PUBLIC_SOCKET_URL:-http://localhost:3002} + - ADMISSION_GATE_MAX_INFLIGHT=${ADMISSION_GATE_MAX_INFLIGHT:-500} + - DISPATCH_MAX_QUEUE_PER_WORKSPACE=${DISPATCH_MAX_QUEUE_PER_WORKSPACE:-1000} + - DISPATCH_MAX_QUEUE_GLOBAL=${DISPATCH_MAX_QUEUE_GLOBAL:-50000} depends_on: db: condition: service_healthy @@ -37,6 +40,43 @@ services: retries: 3 start_period: 10s + sim-worker: + image: ghcr.io/simstudioai/simstudio:latest + command: ['bun', 'run', 'worker'] + restart: unless-stopped + deploy: + resources: + limits: + memory: 4G + environment: + - NODE_ENV=production + - DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio} + - REDIS_URL=${REDIS_URL:-} + - ENCRYPTION_KEY=${ENCRYPTION_KEY} + - API_ENCRYPTION_KEY=${API_ENCRYPTION_KEY:-} + - INTERNAL_API_SECRET=${INTERNAL_API_SECRET} + - WORKER_PORT=${WORKER_PORT:-3001} + - WORKER_CONCURRENCY_WORKFLOW=${WORKER_CONCURRENCY_WORKFLOW:-50} + - WORKER_CONCURRENCY_WEBHOOK=${WORKER_CONCURRENCY_WEBHOOK:-30} + - WORKER_CONCURRENCY_SCHEDULE=${WORKER_CONCURRENCY_SCHEDULE:-20} + - WORKER_CONCURRENCY_MOTHERSHIP_JOB=${WORKER_CONCURRENCY_MOTHERSHIP_JOB:-10} + - WORKER_CONCURRENCY_CONNECTOR_SYNC=${WORKER_CONCURRENCY_CONNECTOR_SYNC:-5} + - WORKER_CONCURRENCY_DOCUMENT_PROCESSING=${WORKER_CONCURRENCY_DOCUMENT_PROCESSING:-20} + - WORKER_CONCURRENCY_NOTIFICATION_DELIVERY=${WORKER_CONCURRENCY_NOTIFICATION_DELIVERY:-10} + - DISPATCH_MAX_QUEUE_PER_WORKSPACE=${DISPATCH_MAX_QUEUE_PER_WORKSPACE:-1000} + - DISPATCH_MAX_QUEUE_GLOBAL=${DISPATCH_MAX_QUEUE_GLOBAL:-50000} + depends_on: + db: + condition: service_healthy + migrations: + condition: service_completed_successfully + healthcheck: + test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:${WORKER_PORT:-3001}/health'] + interval: 90s + timeout: 5s + retries: 3 + start_period: 10s + realtime: image: ghcr.io/simstudioai/realtime:latest restart: unless-stopped diff --git a/helm/sim/values.yaml b/helm/sim/values.yaml index af6bbc10c3b..6f985f1cd2a 100644 --- a/helm/sim/values.yaml +++ b/helm/sim/values.yaml @@ -124,6 +124,11 @@ app: OLLAMA_URL: "" # Ollama local LLM server URL ELEVENLABS_API_KEY: "" # ElevenLabs API key for text-to-speech in deployed chat + # Admission & Dispatch Queue Configuration + ADMISSION_GATE_MAX_INFLIGHT: "500" # Max concurrent in-flight execution requests per pod + DISPATCH_MAX_QUEUE_PER_WORKSPACE: "1000" # Max queued dispatch jobs per workspace + DISPATCH_MAX_QUEUE_GLOBAL: "50000" # Max queued dispatch jobs globally + # Rate Limiting Configuration (per minute) RATE_LIMIT_WINDOW_MS: "60000" # Rate limit window duration (1 minute) RATE_LIMIT_FREE_SYNC: "50" # Sync API executions per minute From 75eac74b99e64edbc46ec28b821ed21639536ff1 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Sun, 15 Mar 2026 20:26:25 -0700 Subject: [PATCH 02/11] fix bun lock --- bun.lock | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bun.lock b/bun.lock index 9fc8ccc58a3..4ed891bb0b7 100644 --- a/bun.lock +++ b/bun.lock @@ -2438,7 +2438,7 @@ "inquirer": ["inquirer@8.2.7", "", { "dependencies": { "@inquirer/external-editor": "^1.0.0", "ansi-escapes": "^4.2.1", "chalk": "^4.1.1", "cli-cursor": "^3.1.0", "cli-width": "^3.0.0", "figures": "^3.0.0", "lodash": "^4.17.21", "mute-stream": "0.0.8", "ora": "^5.4.1", "run-async": "^2.4.0", "rxjs": "^7.5.5", "string-width": "^4.1.0", "strip-ansi": "^6.0.0", "through": "^2.3.6", "wrap-ansi": "^6.0.1" } }, "sha512-UjOaSel/iddGZJ5xP/Eixh6dY1XghiBw4XK13rCCIJcJfyhhoul/7KhLLUGtebEj6GDYM6Vnx/mVsjx2L/mFIA=="], - "ioredis": ["ioredis@5.9.2", "", { "dependencies": { "@ioredis/commands": "1.5.0", "cluster-key-slot": "^1.1.0", "debug": "^4.3.4", "denque": "^2.1.0", "lodash.defaults": "^4.2.0", "lodash.isarguments": "^3.1.0", "redis-errors": "^1.2.0", "redis-parser": "^3.0.0", "standard-as-callback": "^2.1.0" } }, "sha512-tAAg/72/VxOUW7RQSX1pIxJVucYKcjFjfvj60L57jrZpYCHC3XN0WCQ3sNYL4Gmvv+7GPvTAjc+KSdeNuE8oWQ=="], + "ioredis": ["ioredis@5.9.3", "", { "dependencies": { "@ioredis/commands": "1.5.0", "cluster-key-slot": "^1.1.0", "debug": "^4.3.4", "denque": "^2.1.0", "lodash.defaults": "^4.2.0", "lodash.isarguments": "^3.1.0", "redis-errors": "^1.2.0", "redis-parser": "^3.0.0", "standard-as-callback": "^2.1.0" } }, "sha512-VI5tMCdeoxZWU5vjHWsiE/Su76JGhBvWF1MJnV9ZtGltHk9BmD48oDq8Tj8haZ85aceXZMxLNDQZRVo5QKNgXA=="], "ip-address": ["ip-address@10.1.0", "", {}, "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q=="], @@ -4078,8 +4078,6 @@ "body-parser/iconv-lite": ["iconv-lite@0.7.1", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-2Tth85cXwGFHfvRgZWszZSvdo+0Xsqmw8k8ZwxScfcBneNUraK+dxRxRm24nszx80Y0TVio8kKLt5sLE7ZCLlw=="], - "bullmq/ioredis": ["ioredis@5.9.3", "", { "dependencies": { "@ioredis/commands": "1.5.0", "cluster-key-slot": "^1.1.0", "debug": "^4.3.4", "denque": "^2.1.0", "lodash.defaults": "^4.2.0", "lodash.isarguments": "^3.1.0", "redis-errors": "^1.2.0", "redis-parser": "^3.0.0", "standard-as-callback": "^2.1.0" } }, "sha512-VI5tMCdeoxZWU5vjHWsiE/Su76JGhBvWF1MJnV9ZtGltHk9BmD48oDq8Tj8haZ85aceXZMxLNDQZRVo5QKNgXA=="], - "c12/chokidar": ["chokidar@4.0.3", "", { "dependencies": { "readdirp": "^4.0.1" } }, "sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA=="], "c12/confbox": ["confbox@0.2.4", "", {}, "sha512-ysOGlgTFbN2/Y6Cg3Iye8YKulHw+R2fNXHrgSmXISQdMnomY6eNDprVdW9R5xBguEqI954+S6709UyiO7B+6OQ=="], From be83c97b83237e447a4218bb77c6069be7502a21 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Sun, 15 Mar 2026 20:50:43 -0700 Subject: [PATCH 03/11] remove manual execs off queues --- .../api/workflows/[id]/execute/route.async.test.ts | 1 + apps/sim/app/api/workflows/[id]/execute/route.ts | 13 +++++++++---- apps/sim/lib/auth/hybrid.ts | 14 ++++++++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts b/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts index 1a4e0bd980f..355ae6ddf06 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts @@ -21,6 +21,7 @@ const { vi.mock('@/lib/auth/hybrid', () => ({ checkHybridAuth: mockCheckHybridAuth, + hasExternalApiCredentials: vi.fn().mockReturnValue(true), AuthType: { SESSION: 'session', API_KEY: 'api_key', diff --git a/apps/sim/app/api/workflows/[id]/execute/route.ts b/apps/sim/app/api/workflows/[id]/execute/route.ts index 0b92df8aece..30e5b79f5c1 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.ts @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger' import { type NextRequest, NextResponse } from 'next/server' import { validate as uuidValidate, v4 as uuidv4 } from 'uuid' import { z } from 'zod' -import { AuthType, checkHybridAuth } from '@/lib/auth/hybrid' +import { AuthType, checkHybridAuth, hasExternalApiCredentials } from '@/lib/auth/hybrid' import { admissionRejectedResponse, tryAdmit } from '@/lib/core/admission/gate' import { getJobQueue, shouldExecuteInline, shouldUseBullMQ } from '@/lib/core/async-jobs' import { createBullMQJobData } from '@/lib/core/bullmq' @@ -326,6 +326,10 @@ async function enqueueDirectWorkflowExecution( * Supports both SSE streaming (for interactive/manual runs) and direct JSON responses (for background jobs). */ export async function POST(req: NextRequest, { params }: { params: Promise<{ id: string }> }) { + if (!hasExternalApiCredentials(req.headers)) { + return handleExecutePost(req, params) + } + const ticket = tryAdmit() if (!ticket) { return admissionRejectedResponse() @@ -784,7 +788,7 @@ async function handleExecutePost( const executionVariables = cachedWorkflowData?.variables ?? workflow.variables ?? {} - if (shouldUseBullMQ()) { + if (shouldUseBullMQ() && triggerType !== 'manual') { try { const dispatchJobId = await enqueueDirectWorkflowExecution( { @@ -799,7 +803,7 @@ async function handleExecutePost( timeoutMs: preprocessResult.executionTimeout?.sync, runFromBlock: resolvedRunFromBlock, }, - triggerType === 'manual' ? 1 : 5, + 5, 'interactive' ) @@ -973,7 +977,8 @@ async function handleExecutePost( } if (shouldUseDraftState) { - if (shouldUseBullMQ()) { + const useDispatchForManual = shouldUseBullMQ() && triggerType !== 'manual' + if (useDispatchForManual) { const metadata: ExecutionMetadata = { requestId, executionId, diff --git a/apps/sim/lib/auth/hybrid.ts b/apps/sim/lib/auth/hybrid.ts index af1e64da011..c461b12fcde 100644 --- a/apps/sim/lib/auth/hybrid.ts +++ b/apps/sim/lib/auth/hybrid.ts @@ -14,6 +14,20 @@ export const AuthType = { export type AuthTypeValue = (typeof AuthType)[keyof typeof AuthType] +const API_KEY_HEADER = 'x-api-key' +const BEARER_PREFIX = 'Bearer ' + +/** + * Lightweight header-only check for whether a request carries external API credentials. + * Does NOT validate the credentials — only inspects headers to classify the request + * as programmatic API traffic vs interactive session traffic. + */ +export function hasExternalApiCredentials(headers: Headers): boolean { + if (headers.has(API_KEY_HEADER)) return true + const auth = headers.get('authorization') + return auth !== null && auth.startsWith(BEARER_PREFIX) +} + export interface AuthResult { success: boolean userId?: string From 74de3319c6597952f1175aed0785577e53d27e52 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Sun, 15 Mar 2026 21:14:33 -0700 Subject: [PATCH 04/11] address comments --- apps/sim/app/api/jobs/[jobId]/route.ts | 2 +- apps/sim/app/api/schedules/execute/route.ts | 7 +++---- apps/sim/app/api/workflows/[id]/execute/route.ts | 15 +++++++++++++-- apps/sim/lib/core/admission/gate.ts | 4 +++- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/apps/sim/app/api/jobs/[jobId]/route.ts b/apps/sim/app/api/jobs/[jobId]/route.ts index aed0a106f8c..0ce749fa828 100644 --- a/apps/sim/app/api/jobs/[jobId]/route.ts +++ b/apps/sim/app/api/jobs/[jobId]/route.ts @@ -27,7 +27,7 @@ export async function GET( const dispatchJob = await getDispatchJobRecord(taskId) const jobQueue = await getJobQueue() - const job = dispatchJob ? await jobQueue.getJob(taskId) : await jobQueue.getJob(taskId) + const job = dispatchJob ? null : await jobQueue.getJob(taskId) if (!job && !dispatchJob) { return createErrorResponse('Task not found', 404) diff --git a/apps/sim/app/api/schedules/execute/route.ts b/apps/sim/app/api/schedules/execute/route.ts index 1744e5743ff..d739f3aa67b 100644 --- a/apps/sim/app/api/schedules/execute/route.ts +++ b/apps/sim/app/api/schedules/execute/route.ts @@ -115,11 +115,10 @@ export async function GET(request: NextRequest) { } try { - const workspaceId = schedule.workflowId - ? (await import('@/lib/workflows/utils')).getWorkflowById + const { getWorkflowById } = await import('@/lib/workflows/utils') + const resolvedWorkflow = schedule.workflowId + ? await getWorkflowById(schedule.workflowId) : null - const resolvedWorkflow = - schedule.workflowId && workspaceId ? await workspaceId(schedule.workflowId) : null const resolvedWorkspaceId = resolvedWorkflow?.workspaceId let jobId: string diff --git a/apps/sim/app/api/workflows/[id]/execute/route.ts b/apps/sim/app/api/workflows/[id]/execute/route.ts index 30e5b79f5c1..90b5a682d66 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.ts @@ -812,6 +812,17 @@ async function handleExecutePost( (preprocessResult.executionTimeout?.sync ?? 300000) + 30000 ) + if (resultRecord.status === 'failed') { + return NextResponse.json( + { + success: false, + executionId, + error: resultRecord.error ?? 'Workflow execution failed', + }, + { status: 500 } + ) + } + const result = resultRecord.output as QueuedWorkflowExecutionResult const resultForResponseBlock = { @@ -977,8 +988,8 @@ async function handleExecutePost( } if (shouldUseDraftState) { - const useDispatchForManual = shouldUseBullMQ() && triggerType !== 'manual' - if (useDispatchForManual) { + const shouldDispatchViaQueue = shouldUseBullMQ() && triggerType !== 'manual' + if (shouldDispatchViaQueue) { const metadata: ExecutionMetadata = { requestId, executionId, diff --git a/apps/sim/lib/core/admission/gate.ts b/apps/sim/lib/core/admission/gate.ts index a1dc7e0dce9..0e6b7552df3 100644 --- a/apps/sim/lib/core/admission/gate.ts +++ b/apps/sim/lib/core/admission/gate.ts @@ -15,7 +15,9 @@ export interface AdmissionTicket { /** * Attempts to admit a request through the in-process gate. * Returns a ticket with a release() handle on success, or null if at capacity. - * Zero external calls — purely in-process atomic counter. + * Zero external calls — purely in-process atomic counter. Each pod maintains its + * own counter, so the effective aggregate limit across N pods is N × MAX_INFLIGHT. + * Configure ADMISSION_GATE_MAX_INFLIGHT per pod based on what each pod can sustain. */ export function tryAdmit(): AdmissionTicket | null { if (inflight >= MAX_INFLIGHT) { From 7eab00b9335dc85f38b156203a89ab52f86b6641 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 16 Mar 2026 09:51:56 -0700 Subject: [PATCH 05/11] fix legacy team limits --- apps/sim/lib/billing/workspace-concurrency.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/sim/lib/billing/workspace-concurrency.ts b/apps/sim/lib/billing/workspace-concurrency.ts index e164bdb2ccd..acb07169f91 100644 --- a/apps/sim/lib/billing/workspace-concurrency.ts +++ b/apps/sim/lib/billing/workspace-concurrency.ts @@ -72,7 +72,7 @@ function getPlanConcurrencyLimit(plan: string | null | undefined, metadata: unkn return getEnterpriseConcurrencyLimit(metadata) } - if (plan === 'team') { + if (isTeam(plan)) { return getTeamConcurrencyLimit() } @@ -81,7 +81,7 @@ function getPlanConcurrencyLimit(plan: string | null | undefined, metadata: unkn return getTeamConcurrencyLimit() } - if (isPro(plan) || isTeam(plan)) { + if (isPro(plan)) { return getProConcurrencyLimit() } From d5fbc3c12d177d06cc62cee6c972921276dac249 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 16 Mar 2026 11:27:58 -0700 Subject: [PATCH 06/11] cleanup enterprise typing code --- .../app/api/workflows/[id]/execute/route.ts | 5 ++-- apps/sim/lib/billing/types/index.ts | 5 +++- apps/sim/lib/billing/webhooks/enterprise.ts | 30 ++----------------- 3 files changed, 9 insertions(+), 31 deletions(-) diff --git a/apps/sim/app/api/workflows/[id]/execute/route.ts b/apps/sim/app/api/workflows/[id]/execute/route.ts index 90b5a682d66..71de6ecccf6 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.ts @@ -207,8 +207,9 @@ async function handleAsyncExecution(params: AsyncExecutionParams): Promise v.toLowerCase()) + .pipe(z.literal('enterprise')), // The referenceId must be provided in Stripe metadata to link to the organization // This gets stored in the subscription.referenceId column referenceId: z.string().min(1), diff --git a/apps/sim/lib/billing/webhooks/enterprise.ts b/apps/sim/lib/billing/webhooks/enterprise.ts index c4bc6a19f22..92c3bcf459f 100644 --- a/apps/sim/lib/billing/webhooks/enterprise.ts +++ b/apps/sim/lib/billing/webhooks/enterprise.ts @@ -55,34 +55,8 @@ export async function handleManualEnterpriseSubscription(event: Stripe.Event) { }) throw new Error('Invalid enterprise metadata for subscription') } - const metadataJson: Record = { - ...metadata, - workspaceConcurrencyLimit: - typeof metadata.workspaceConcurrencyLimit === 'string' - ? Number.parseInt(metadata.workspaceConcurrencyLimit, 10) - : metadata.workspaceConcurrencyLimit, - } - - const seats = enterpriseMetadata.seats - const monthlyPrice = enterpriseMetadata.monthlyPrice - if (!seats || seats <= 0 || Number.isNaN(seats)) { - logger.error('[subscription.created] Invalid or missing seats in enterprise metadata', { - subscriptionId: stripeSubscription.id, - seatsRaw: enterpriseMetadata.seats, - seatsParsed: seats, - }) - throw new Error('Enterprise subscription must include valid seats in metadata') - } - - if (!monthlyPrice || monthlyPrice <= 0 || Number.isNaN(monthlyPrice)) { - logger.error('[subscription.created] Invalid or missing monthlyPrice in enterprise metadata', { - subscriptionId: stripeSubscription.id, - monthlyPriceRaw: enterpriseMetadata.monthlyPrice, - monthlyPriceParsed: monthlyPrice, - }) - throw new Error('Enterprise subscription must include valid monthlyPrice in metadata') - } + const { seats, monthlyPrice } = enterpriseMetadata // Get the first subscription item which contains the period information const referenceItem = stripeSubscription.items?.data?.[0] @@ -106,7 +80,7 @@ export async function handleManualEnterpriseSubscription(event: Stripe.Event) { ? new Date(stripeSubscription.trial_start * 1000) : null, trialEnd: stripeSubscription.trial_end ? new Date(stripeSubscription.trial_end * 1000) : null, - metadata: metadataJson, + metadata: metadata as Record, } const existing = await db From 8ee4c5913ed010cb54021a8ccb879c2c19fdd1ab Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 16 Mar 2026 14:58:49 -0700 Subject: [PATCH 07/11] inline child triggers --- apps/sim/app/api/workflows/[id]/execute/route.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/apps/sim/app/api/workflows/[id]/execute/route.ts b/apps/sim/app/api/workflows/[id]/execute/route.ts index 71de6ecccf6..4aad41a70ec 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.ts @@ -118,6 +118,8 @@ const ExecuteWorkflowSchema = z.object({ export const runtime = 'nodejs' export const dynamic = 'force-dynamic' +const INLINE_TRIGGER_TYPES = new Set(['manual', 'workflow']) + function resolveOutputIds( selectedOutputs: string[] | undefined, blocks: Record @@ -789,7 +791,7 @@ async function handleExecutePost( const executionVariables = cachedWorkflowData?.variables ?? workflow.variables ?? {} - if (shouldUseBullMQ() && triggerType !== 'manual') { + if (shouldUseBullMQ() && !INLINE_TRIGGER_TYPES.has(triggerType)) { try { const dispatchJobId = await enqueueDirectWorkflowExecution( { @@ -989,7 +991,7 @@ async function handleExecutePost( } if (shouldUseDraftState) { - const shouldDispatchViaQueue = shouldUseBullMQ() && triggerType !== 'manual' + const shouldDispatchViaQueue = shouldUseBullMQ() && !INLINE_TRIGGER_TYPES.has(triggerType) if (shouldDispatchViaQueue) { const metadata: ExecutionMetadata = { requestId, From 41e1c9cb1f2078fc92be49986ffe5b3d1cd2ad5c Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 16 Mar 2026 15:06:50 -0700 Subject: [PATCH 08/11] fix status check --- apps/sim/lib/core/workspace-dispatch/memory-store.ts | 9 +++++++++ apps/sim/lib/core/workspace-dispatch/redis-store.ts | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/apps/sim/lib/core/workspace-dispatch/memory-store.ts b/apps/sim/lib/core/workspace-dispatch/memory-store.ts index 2f4e0966bbd..a00d025e8a0 100644 --- a/apps/sim/lib/core/workspace-dispatch/memory-store.ts +++ b/apps/sim/lib/core/workspace-dispatch/memory-store.ts @@ -114,6 +114,8 @@ export class MemoryWorkspaceDispatchStorage implements WorkspaceDispatchStorageA return Array.from(this.jobs.values()).filter((record) => statuses.includes(record.status)) } + private static readonly TERMINAL_STATUSES = new Set(['completed', 'failed']) + async updateDispatchJobRecord( jobId: string, updater: (record: WorkspaceDispatchJobRecord) => WorkspaceDispatchJobRecord @@ -124,6 +126,13 @@ export class MemoryWorkspaceDispatchStorage implements WorkspaceDispatchStorageA } const updated = updater(current) + if ( + MemoryWorkspaceDispatchStorage.TERMINAL_STATUSES.has(current.status) && + !MemoryWorkspaceDispatchStorage.TERMINAL_STATUSES.has(updated.status) + ) { + return current + } + this.jobs.set(jobId, updated) return updated } diff --git a/apps/sim/lib/core/workspace-dispatch/redis-store.ts b/apps/sim/lib/core/workspace-dispatch/redis-store.ts index 82ac3202803..9ea1ea4caff 100644 --- a/apps/sim/lib/core/workspace-dispatch/redis-store.ts +++ b/apps/sim/lib/core/workspace-dispatch/redis-store.ts @@ -233,6 +233,8 @@ export class RedisWorkspaceDispatchStorage implements WorkspaceDispatchStorageAd return jobs } + private static readonly TERMINAL_STATUSES = new Set(['completed', 'failed']) + async updateDispatchJobRecord( jobId: string, updater: (record: WorkspaceDispatchJobRecord) => WorkspaceDispatchJobRecord @@ -243,6 +245,13 @@ export class RedisWorkspaceDispatchStorage implements WorkspaceDispatchStorageAd } const updated = updater(current) + if ( + RedisWorkspaceDispatchStorage.TERMINAL_STATUSES.has(current.status) && + !RedisWorkspaceDispatchStorage.TERMINAL_STATUSES.has(updated.status) + ) { + return current + } + await this.saveDispatchJob(updated) return updated } From 7bf9526578846010f4a0df9444ae3809447855db Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 16 Mar 2026 15:30:54 -0700 Subject: [PATCH 09/11] address more comments --- .../workspace-notification-delivery.ts | 12 +++++----- .../core/workspace-dispatch/memory-store.ts | 24 ++++++++++++++++--- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/apps/sim/background/workspace-notification-delivery.ts b/apps/sim/background/workspace-notification-delivery.ts index 230d33dae67..41d08f80cbd 100644 --- a/apps/sim/background/workspace-notification-delivery.ts +++ b/apps/sim/background/workspace-notification-delivery.ts @@ -501,15 +501,15 @@ export type NotificationDeliveryResult = | { status: 'retry'; retryDelayMs: number } async function buildRetryLog(params: NotificationDeliveryParams): Promise { + const conditions = [eq(workflowExecutionLogs.executionId, params.log.executionId)] + if (params.log.workflowId) { + conditions.push(eq(workflowExecutionLogs.workflowId, params.log.workflowId)) + } + const [storedLog] = await db .select() .from(workflowExecutionLogs) - .where( - and( - eq(workflowExecutionLogs.executionId, params.log.executionId), - eq(workflowExecutionLogs.workflowId, params.log.workflowId!) - ) - ) + .where(and(...conditions)) .limit(1) if (storedLog) { diff --git a/apps/sim/lib/core/workspace-dispatch/memory-store.ts b/apps/sim/lib/core/workspace-dispatch/memory-store.ts index a00d025e8a0..0cae504b607 100644 --- a/apps/sim/lib/core/workspace-dispatch/memory-store.ts +++ b/apps/sim/lib/core/workspace-dispatch/memory-store.ts @@ -16,7 +16,6 @@ export class MemoryWorkspaceDispatchStorage implements WorkspaceDispatchStorageA private workspaceOrder: string[] = [] private laneQueues = new Map() private leases = new Map>() - private sequence = 0 private cleanupInterval: NodeJS.Timeout | null = null constructor() { @@ -296,7 +295,20 @@ export class MemoryWorkspaceDispatchStorage implements WorkspaceDispatchStorageA } async popNextWorkspaceId(): Promise { - return this.workspaceOrder.shift() ?? null + const now = Date.now() + const maxScans = this.workspaceOrder.length + for (let i = 0; i < maxScans; i++) { + const id = this.workspaceOrder.shift() + if (!id) return null + const readyAt = this.workspaceReadyAt.get(id) + if (readyAt && readyAt > now) { + this.workspaceOrder.push(id) + continue + } + this.workspaceReadyAt.delete(id) + return id + } + return null } async getQueuedWorkspaceCount(): Promise { @@ -307,7 +319,12 @@ export class MemoryWorkspaceDispatchStorage implements WorkspaceDispatchStorageA return this.workspaceOrder.includes(workspaceId) } - async ensureWorkspaceActive(workspaceId: string): Promise { + private workspaceReadyAt = new Map() + + async ensureWorkspaceActive(workspaceId: string, readyAt?: number): Promise { + if (readyAt && readyAt > Date.now()) { + this.workspaceReadyAt.set(workspaceId, readyAt) + } this.ensureWorkspaceQueued(workspaceId) } @@ -473,6 +490,7 @@ export class MemoryWorkspaceDispatchStorage implements WorkspaceDispatchStorageA this.workspaceOrder = [] this.laneQueues.clear() this.leases.clear() + this.workspaceReadyAt.clear() } dispose(): void { From 9a6886c39b991f8183375355d8fb4dbd3f070f34 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 16 Mar 2026 15:52:52 -0700 Subject: [PATCH 10/11] optimize reconciler scan --- .../lib/core/workspace-dispatch/adapter.ts | 2 +- .../core/workspace-dispatch/memory-store.ts | 2 +- .../lib/core/workspace-dispatch/reconciler.ts | 38 +++++++++++++++++-- .../core/workspace-dispatch/redis-store.ts | 10 +---- apps/sim/lib/core/workspace-dispatch/store.ts | 4 +- 5 files changed, 40 insertions(+), 16 deletions(-) diff --git a/apps/sim/lib/core/workspace-dispatch/adapter.ts b/apps/sim/lib/core/workspace-dispatch/adapter.ts index 637688d1117..6d55a70ff3c 100644 --- a/apps/sim/lib/core/workspace-dispatch/adapter.ts +++ b/apps/sim/lib/core/workspace-dispatch/adapter.ts @@ -34,7 +34,7 @@ export interface WorkspaceDispatchStorageAdapter { lanes: readonly WorkspaceDispatchLane[] ): Promise getGlobalQueueDepth(): Promise - reconcileGlobalQueueDepth(): Promise + reconcileGlobalQueueDepth(knownCount: number): Promise popNextWorkspaceId(): Promise getQueuedWorkspaceCount(): Promise hasActiveWorkspace(workspaceId: string): Promise diff --git a/apps/sim/lib/core/workspace-dispatch/memory-store.ts b/apps/sim/lib/core/workspace-dispatch/memory-store.ts index 0cae504b607..1c874d091be 100644 --- a/apps/sim/lib/core/workspace-dispatch/memory-store.ts +++ b/apps/sim/lib/core/workspace-dispatch/memory-store.ts @@ -290,7 +290,7 @@ export class MemoryWorkspaceDispatchStorage implements WorkspaceDispatchStorageA return count } - async reconcileGlobalQueueDepth(): Promise { + async reconcileGlobalQueueDepth(_knownCount: number): Promise { // no-op: memory store computes depth on the fly } diff --git a/apps/sim/lib/core/workspace-dispatch/reconciler.ts b/apps/sim/lib/core/workspace-dispatch/reconciler.ts index af67edb522a..739d75533b1 100644 --- a/apps/sim/lib/core/workspace-dispatch/reconciler.ts +++ b/apps/sim/lib/core/workspace-dispatch/reconciler.ts @@ -153,9 +153,39 @@ async function reconcileWaitingWorkspaceTracking( } export async function reconcileWorkspaceDispatchState(): Promise { - const activeJobs = await listDispatchJobsByStatuses(['admitting', 'admitted', 'running']) - const waitingJobs = await listDispatchJobsByStatuses(['waiting']) - const terminalJobs = await listDispatchJobsByStatuses(['completed', 'failed']) + const allJobs = await listDispatchJobsByStatuses([ + 'waiting', + 'admitting', + 'admitted', + 'running', + 'completed', + 'failed', + ]) + + const activeJobs: WorkspaceDispatchJobRecord[] = [] + const waitingJobs: WorkspaceDispatchJobRecord[] = [] + const terminalJobs: WorkspaceDispatchJobRecord[] = [] + let nonTerminalCount = 0 + + for (const job of allJobs) { + switch (job.status) { + case 'admitting': + case 'admitted': + case 'running': + activeJobs.push(job) + nonTerminalCount++ + break + case 'waiting': + waitingJobs.push(job) + nonTerminalCount++ + break + case 'completed': + case 'failed': + terminalJobs.push(job) + break + } + } + let changed = false for (const record of activeJobs) { @@ -181,7 +211,7 @@ export async function reconcileWorkspaceDispatchState(): Promise { } } - await reconcileGlobalQueueDepth().catch((error) => { + await reconcileGlobalQueueDepth(nonTerminalCount).catch((error) => { logger.error('Failed to reconcile global queue depth', { error }) }) diff --git a/apps/sim/lib/core/workspace-dispatch/redis-store.ts b/apps/sim/lib/core/workspace-dispatch/redis-store.ts index 9ea1ea4caff..8fbf8dfee4f 100644 --- a/apps/sim/lib/core/workspace-dispatch/redis-store.ts +++ b/apps/sim/lib/core/workspace-dispatch/redis-store.ts @@ -363,14 +363,8 @@ export class RedisWorkspaceDispatchStorage implements WorkspaceDispatchStorageAd return count ? Math.max(0, Number.parseInt(count, 10)) : 0 } - async reconcileGlobalQueueDepth(): Promise { - const allJobs = await this.listDispatchJobsByStatuses([ - 'waiting', - 'admitting', - 'admitted', - 'running', - ]) - await this.redis.set(GLOBAL_DEPTH_KEY, allJobs.length) + async reconcileGlobalQueueDepth(knownCount: number): Promise { + await this.redis.set(GLOBAL_DEPTH_KEY, knownCount) } async popNextWorkspaceId(): Promise { diff --git a/apps/sim/lib/core/workspace-dispatch/store.ts b/apps/sim/lib/core/workspace-dispatch/store.ts index 9bc7f0bebe9..86c1c3951a9 100644 --- a/apps/sim/lib/core/workspace-dispatch/store.ts +++ b/apps/sim/lib/core/workspace-dispatch/store.ts @@ -77,8 +77,8 @@ export async function getGlobalQueueDepth(): Promise { return getAdapter().getGlobalQueueDepth() } -export async function reconcileGlobalQueueDepth(): Promise { - return getAdapter().reconcileGlobalQueueDepth() +export async function reconcileGlobalQueueDepth(knownCount: number): Promise { + return getAdapter().reconcileGlobalQueueDepth(knownCount) } export async function popNextWorkspaceId(): Promise { From 2bf1feb7b86eb782607192c9e9653a7071832fd2 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 16 Mar 2026 16:40:21 -0700 Subject: [PATCH 11/11] remove dead code --- .../app/api/workflows/[id]/execute/route.ts | 3 +- .../sim/lib/core/async-jobs/backends/index.ts | 1 - .../core/async-jobs/backends/redis.test.ts | 176 ------------------ .../sim/lib/core/async-jobs/backends/redis.ts | 146 --------------- apps/sim/lib/core/async-jobs/types.ts | 2 +- 5 files changed, 3 insertions(+), 325 deletions(-) delete mode 100644 apps/sim/lib/core/async-jobs/backends/redis.test.ts delete mode 100644 apps/sim/lib/core/async-jobs/backends/redis.ts diff --git a/apps/sim/app/api/workflows/[id]/execute/route.ts b/apps/sim/app/api/workflows/[id]/execute/route.ts index 4aad41a70ec..0a277f16e79 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.ts @@ -329,7 +329,8 @@ async function enqueueDirectWorkflowExecution( * Supports both SSE streaming (for interactive/manual runs) and direct JSON responses (for background jobs). */ export async function POST(req: NextRequest, { params }: { params: Promise<{ id: string }> }) { - if (!hasExternalApiCredentials(req.headers)) { + const isSessionRequest = req.headers.has('cookie') && !hasExternalApiCredentials(req.headers) + if (isSessionRequest) { return handleExecutePost(req, params) } diff --git a/apps/sim/lib/core/async-jobs/backends/index.ts b/apps/sim/lib/core/async-jobs/backends/index.ts index ef84a232233..0abb55d6af5 100644 --- a/apps/sim/lib/core/async-jobs/backends/index.ts +++ b/apps/sim/lib/core/async-jobs/backends/index.ts @@ -1,4 +1,3 @@ export { BullMQJobQueue } from './bullmq' export { DatabaseJobQueue } from './database' -export { RedisJobQueue } from './redis' export { TriggerDevJobQueue } from './trigger-dev' diff --git a/apps/sim/lib/core/async-jobs/backends/redis.test.ts b/apps/sim/lib/core/async-jobs/backends/redis.test.ts deleted file mode 100644 index ea0a5df6078..00000000000 --- a/apps/sim/lib/core/async-jobs/backends/redis.test.ts +++ /dev/null @@ -1,176 +0,0 @@ -/** - * @vitest-environment node - */ -import { createMockRedis, loggerMock, type MockRedis } from '@sim/testing' -import { beforeEach, describe, expect, it, vi } from 'vitest' - -vi.mock('@sim/logger', () => loggerMock) - -import { - JOB_MAX_LIFETIME_SECONDS, - JOB_RETENTION_SECONDS, - JOB_STATUS, -} from '@/lib/core/async-jobs/types' -import { RedisJobQueue } from './redis' - -describe('RedisJobQueue', () => { - let mockRedis: MockRedis - let queue: RedisJobQueue - - beforeEach(() => { - vi.clearAllMocks() - mockRedis = createMockRedis() - queue = new RedisJobQueue(mockRedis as never) - }) - - describe('enqueue', () => { - it.concurrent('should create a job with pending status', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - - const jobId = await localQueue.enqueue('workflow-execution', { test: 'data' }) - - expect(jobId).toMatch(/^run_/) - expect(localRedis.hset).toHaveBeenCalledTimes(1) - - const [key, data] = localRedis.hset.mock.calls[0] - expect(key).toBe(`async-jobs:job:${jobId}`) - expect(data.status).toBe(JOB_STATUS.PENDING) - expect(data.type).toBe('workflow-execution') - }) - - it.concurrent('should set max lifetime TTL on enqueue', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - - const jobId = await localQueue.enqueue('workflow-execution', { test: 'data' }) - - expect(localRedis.expire).toHaveBeenCalledWith( - `async-jobs:job:${jobId}`, - JOB_MAX_LIFETIME_SECONDS - ) - }) - }) - - describe('completeJob', () => { - it.concurrent('should set status to completed and set TTL', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - const jobId = 'run_test123' - - await localQueue.completeJob(jobId, { result: 'success' }) - - expect(localRedis.hset).toHaveBeenCalledWith(`async-jobs:job:${jobId}`, { - status: JOB_STATUS.COMPLETED, - completedAt: expect.any(String), - output: JSON.stringify({ result: 'success' }), - updatedAt: expect.any(String), - }) - expect(localRedis.expire).toHaveBeenCalledWith( - `async-jobs:job:${jobId}`, - JOB_RETENTION_SECONDS - ) - }) - - it.concurrent('should set TTL to 24 hours (86400 seconds)', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - - await localQueue.completeJob('run_test123', {}) - - expect(localRedis.expire).toHaveBeenCalledWith(expect.any(String), 86400) - }) - }) - - describe('markJobFailed', () => { - it.concurrent('should set status to failed and set TTL', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - const jobId = 'run_test456' - const error = 'Something went wrong' - - await localQueue.markJobFailed(jobId, error) - - expect(localRedis.hset).toHaveBeenCalledWith(`async-jobs:job:${jobId}`, { - status: JOB_STATUS.FAILED, - completedAt: expect.any(String), - error, - updatedAt: expect.any(String), - }) - expect(localRedis.expire).toHaveBeenCalledWith( - `async-jobs:job:${jobId}`, - JOB_RETENTION_SECONDS - ) - }) - - it.concurrent('should set TTL to 24 hours (86400 seconds)', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - - await localQueue.markJobFailed('run_test456', 'error') - - expect(localRedis.expire).toHaveBeenCalledWith(expect.any(String), 86400) - }) - }) - - describe('startJob', () => { - it.concurrent('should not set TTL when starting a job', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - - await localQueue.startJob('run_test789') - - expect(localRedis.hset).toHaveBeenCalled() - expect(localRedis.expire).not.toHaveBeenCalled() - }) - }) - - describe('getJob', () => { - it.concurrent('should return null for non-existent job', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - localRedis.hgetall.mockResolvedValue({}) - - const job = await localQueue.getJob('run_nonexistent') - - expect(job).toBeNull() - }) - - it.concurrent('should deserialize job data correctly', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - const now = new Date() - localRedis.hgetall.mockResolvedValue({ - id: 'run_test', - type: 'workflow-execution', - payload: JSON.stringify({ foo: 'bar' }), - status: JOB_STATUS.COMPLETED, - createdAt: now.toISOString(), - startedAt: now.toISOString(), - completedAt: now.toISOString(), - attempts: '1', - maxAttempts: '3', - error: '', - output: JSON.stringify({ result: 'ok' }), - metadata: JSON.stringify({ workflowId: 'wf_123' }), - }) - - const job = await localQueue.getJob('run_test') - - expect(job).not.toBeNull() - expect(job?.id).toBe('run_test') - expect(job?.type).toBe('workflow-execution') - expect(job?.payload).toEqual({ foo: 'bar' }) - expect(job?.status).toBe(JOB_STATUS.COMPLETED) - expect(job?.output).toEqual({ result: 'ok' }) - expect(job?.metadata.workflowId).toBe('wf_123') - }) - }) -}) - -describe('JOB_RETENTION_SECONDS', () => { - it.concurrent('should be 24 hours in seconds', async () => { - expect(JOB_RETENTION_SECONDS).toBe(24 * 60 * 60) - expect(JOB_RETENTION_SECONDS).toBe(86400) - }) -}) diff --git a/apps/sim/lib/core/async-jobs/backends/redis.ts b/apps/sim/lib/core/async-jobs/backends/redis.ts deleted file mode 100644 index 6a361d0e9c0..00000000000 --- a/apps/sim/lib/core/async-jobs/backends/redis.ts +++ /dev/null @@ -1,146 +0,0 @@ -import { createLogger } from '@sim/logger' -import type Redis from 'ioredis' -import { - type EnqueueOptions, - JOB_MAX_LIFETIME_SECONDS, - JOB_RETENTION_SECONDS, - JOB_STATUS, - type Job, - type JobMetadata, - type JobQueueBackend, - type JobStatus, - type JobType, -} from '@/lib/core/async-jobs/types' - -const logger = createLogger('RedisJobQueue') - -const KEYS = { - job: (id: string) => `async-jobs:job:${id}`, -} as const - -function serializeJob(job: Job): Record { - return { - id: job.id, - type: job.type, - payload: JSON.stringify(job.payload), - status: job.status, - createdAt: job.createdAt.toISOString(), - startedAt: job.startedAt?.toISOString() ?? '', - completedAt: job.completedAt?.toISOString() ?? '', - attempts: job.attempts.toString(), - maxAttempts: job.maxAttempts.toString(), - error: job.error ?? '', - output: job.output !== undefined ? JSON.stringify(job.output) : '', - metadata: JSON.stringify(job.metadata), - updatedAt: new Date().toISOString(), - } -} - -function deserializeJob(data: Record): Job | null { - if (!data || !data.id) return null - - try { - return { - id: data.id, - type: data.type as JobType, - payload: JSON.parse(data.payload), - status: data.status as JobStatus, - createdAt: new Date(data.createdAt), - startedAt: data.startedAt ? new Date(data.startedAt) : undefined, - completedAt: data.completedAt ? new Date(data.completedAt) : undefined, - attempts: Number.parseInt(data.attempts, 10), - maxAttempts: Number.parseInt(data.maxAttempts, 10), - error: data.error || undefined, - output: data.output ? JSON.parse(data.output) : undefined, - metadata: JSON.parse(data.metadata) as JobMetadata, - } - } catch (error) { - logger.error('Failed to deserialize job', { error, data }) - return null - } -} - -export class RedisJobQueue implements JobQueueBackend { - private redis: Redis - - constructor(redis: Redis) { - this.redis = redis - } - - async enqueue( - type: JobType, - payload: TPayload, - options?: EnqueueOptions - ): Promise { - const jobId = `run_${crypto.randomUUID().replace(/-/g, '').slice(0, 20)}` - const now = new Date() - - const job: Job = { - id: jobId, - type, - payload, - status: JOB_STATUS.PENDING, - createdAt: now, - attempts: 0, - maxAttempts: options?.maxAttempts ?? 3, - metadata: options?.metadata ?? {}, - } - - const key = KEYS.job(jobId) - const serialized = serializeJob(job as Job) - await this.redis.hset(key, serialized) - await this.redis.expire(key, JOB_MAX_LIFETIME_SECONDS) - - logger.debug('Enqueued job', { jobId, type }) - return jobId - } - - async getJob(jobId: string): Promise { - const data = await this.redis.hgetall(KEYS.job(jobId)) - return deserializeJob(data) - } - - async startJob(jobId: string): Promise { - const now = new Date() - const key = KEYS.job(jobId) - - await this.redis.hset(key, { - status: JOB_STATUS.PROCESSING, - startedAt: now.toISOString(), - updatedAt: now.toISOString(), - }) - await this.redis.hincrby(key, 'attempts', 1) - - logger.debug('Started job', { jobId }) - } - - async completeJob(jobId: string, output: unknown): Promise { - const now = new Date() - const key = KEYS.job(jobId) - - await this.redis.hset(key, { - status: JOB_STATUS.COMPLETED, - completedAt: now.toISOString(), - output: JSON.stringify(output), - updatedAt: now.toISOString(), - }) - await this.redis.expire(key, JOB_RETENTION_SECONDS) - - logger.debug('Completed job', { jobId }) - } - - async markJobFailed(jobId: string, error: string): Promise { - const now = new Date() - const key = KEYS.job(jobId) - - await this.redis.hset(key, { - status: JOB_STATUS.FAILED, - completedAt: now.toISOString(), - error, - updatedAt: now.toISOString(), - }) - await this.redis.expire(key, JOB_RETENTION_SECONDS) - - logger.debug('Marked job as failed', { jobId }) - } -} diff --git a/apps/sim/lib/core/async-jobs/types.ts b/apps/sim/lib/core/async-jobs/types.ts index a2ccf1d680c..c4bdc27c5ef 100644 --- a/apps/sim/lib/core/async-jobs/types.ts +++ b/apps/sim/lib/core/async-jobs/types.ts @@ -99,4 +99,4 @@ export interface JobQueueBackend { markJobFailed(jobId: string, error: string): Promise } -export type AsyncBackendType = 'trigger-dev' | 'bullmq' | 'redis' | 'database' +export type AsyncBackendType = 'trigger-dev' | 'bullmq' | 'database'