From 3983e223271707fd6aa5522bc6d745b77b685e63 Mon Sep 17 00:00:00 2001 From: FishEnjoyer2025 Date: Tue, 2 Jun 2026 17:40:40 -0500 Subject: [PATCH] fix(export): stream database dump in batches to support large DBs (#59) --- src/export/dump.test.ts | 23 +++++++ src/export/dump.ts | 134 ++++++++++++++++++++++++++++------------ 2 files changed, 118 insertions(+), 39 deletions(-) diff --git a/src/export/dump.test.ts b/src/export/dump.test.ts index ca65b43..35dcce3 100644 --- a/src/export/dump.test.ts +++ b/src/export/dump.test.ts @@ -128,6 +128,29 @@ describe('Database Dump Module', () => { ) }) + it('streams large tables in bounded batches (constant memory)', async () => { + vi.mocked(executeOperation) + .mockResolvedValueOnce([{ name: 'logs' }]) // tables + .mockResolvedValueOnce([{ sql: 'CREATE TABLE logs (id INTEGER);' }]) // schema + .mockResolvedValueOnce([{ id: 1 }, { id: 2 }]) // batch 1 (full) + .mockResolvedValueOnce([{ id: 3 }]) // batch 2 (partial -> stop) + + const response = await dumpDatabaseRoute(mockDataSource, mockConfig, 2) + const dumpText = await response.text() + + expect(dumpText).toContain('INSERT INTO logs VALUES (1);') + expect(dumpText).toContain('INSERT INTO logs VALUES (2);') + expect(dumpText).toContain('INSERT INTO logs VALUES (3);') + + // A second batch must have been requested with OFFSET advanced by batchSize. + const issuedOffsetQuery = vi + .mocked(executeOperation) + .mock.calls.some((args: any[]) => + args[0]?.[0]?.sql?.includes('OFFSET 2') + ) + expect(issuedOffsetQuery).toBe(true) + }) + it('should return a 500 response when an error occurs', async () => { const consoleErrorMock = vi .spyOn(console, 'error') diff --git a/src/export/dump.ts b/src/export/dump.ts index 91a2e89..84db713 100644 --- a/src/export/dump.ts +++ b/src/export/dump.ts @@ -3,67 +3,123 @@ import { StarbaseDBConfiguration } from '../handler' import { DataSource } from '../types' import { createResponse } from '../utils' +/** + * Default number of rows fetched per batch while streaming table data. Keeping the + * working set bounded is what lets arbitrarily-large tables be dumped without loading + * the whole database into memory. + */ +export const DEFAULT_DUMP_BATCH_SIZE = 1000 + +/** + * Streams a SQL dump of the database. + * + * Previously the whole dump was assembled in a single in-memory string and every table + * was read with one unbounded `SELECT *`, so large databases ran out of memory and/or + * exceeded the 30s request window (#59). This version instead: + * + * - Streams the response via a `ReadableStream`, so bytes flow to the client as they + * are produced (the connection stays active instead of waiting for one giant body). + * - Reads each table's rows in bounded batches (`LIMIT`/`OFFSET`), so memory usage stays + * roughly constant regardless of table size. + * + * The initial table-list query is performed up front so that an early failure still + * returns a clean 500 (rather than a half-streamed 200). + */ export async function dumpDatabaseRoute( dataSource: DataSource, - config: StarbaseDBConfiguration + config: StarbaseDBConfiguration, + batchSize: number = DEFAULT_DUMP_BATCH_SIZE ): Promise { try { - // Get all table names + // Resolve the table list up front so early errors surface as a 500. const tablesResult = await executeOperation( [{ sql: "SELECT name FROM sqlite_master WHERE type='table';" }], dataSource, config ) - const tables = tablesResult.map((row: any) => row.name) - let dumpContent = 'SQLite format 3\0' // SQLite file header - // Iterate through all tables - for (const table of tables) { - // Get table schema - const schemaResult = await executeOperation( - [ - { - sql: `SELECT sql FROM sqlite_master WHERE type='table' AND name='${table}';`, - }, - ], - dataSource, - config - ) + const encoder = new TextEncoder() + + const stream = new ReadableStream({ + async start(controller) { + try { + controller.enqueue(encoder.encode('SQLite format 3\0')) // SQLite file header + + for (const table of tables) { + // Table schema + const schemaResult = await executeOperation( + [ + { + sql: `SELECT sql FROM sqlite_master WHERE type='table' AND name = ?;`, + params: [table], + }, + ], + dataSource, + config + ) - if (schemaResult.length) { - const schema = schemaResult[0].sql - dumpContent += `\n-- Table: ${table}\n${schema};\n\n` - } + if (schemaResult.length) { + const schema = schemaResult[0].sql + controller.enqueue( + encoder.encode( + `\n-- Table: ${table}\n${schema};\n\n` + ) + ) + } - // Get table data - const dataResult = await executeOperation( - [{ sql: `SELECT * FROM ${table};` }], - dataSource, - config - ) + // Table data, streamed in bounded batches. + let offset = 0 + while (true) { + const dataResult = await executeOperation( + [ + { + sql: `SELECT * FROM "${table}" LIMIT ${batchSize} OFFSET ${offset};`, + }, + ], + dataSource, + config + ) - for (const row of dataResult) { - const values = Object.values(row).map((value) => - typeof value === 'string' - ? `'${value.replace(/'/g, "''")}'` - : value - ) - dumpContent += `INSERT INTO ${table} VALUES (${values.join(', ')});\n` - } + if (!dataResult.length) { + break + } - dumpContent += '\n' - } + let chunk = '' + for (const row of dataResult) { + const values = Object.values(row).map( + (value) => + typeof value === 'string' + ? `'${value.replace(/'/g, "''")}'` + : value + ) + chunk += `INSERT INTO ${table} VALUES (${values.join(', ')});\n` + } + controller.enqueue(encoder.encode(chunk)) - // Create a Blob from the dump content - const blob = new Blob([dumpContent], { type: 'application/x-sqlite3' }) + if (dataResult.length < batchSize) { + break + } + offset += batchSize + } + + controller.enqueue(encoder.encode('\n')) + } + + controller.close() + } catch (error: any) { + console.error('Database Dump Error (stream):', error) + controller.error(error) + } + }, + }) const headers = new Headers({ 'Content-Type': 'application/x-sqlite3', 'Content-Disposition': 'attachment; filename="database_dump.sql"', }) - return new Response(blob, { headers }) + return new Response(stream, { headers }) } catch (error: any) { console.error('Database Dump Error:', error) return createResponse(undefined, 'Failed to create database dump', 500)