diff --git a/.bounty_pr.json b/.bounty_pr.json
new file mode 100644
index 0000000..ff2d40c
--- /dev/null
+++ b/.bounty_pr.json
@@ -0,0 +1,18 @@
+{
+ "status": "ready",
+ "commit_message": "fix(export): stream and paginate database dumps to support large databases",
+ "pr_title": "fix(export): stream and paginate /export/dump for large databases",
+ "pr_body": "## Purpose\n\nFixes #59 — `/export/dump` previously failed on large databases because it loaded **every row of every table** into a single in-memory string before responding. On any non-trivial database this exceeds the Worker's memory budget and/or wall-clock and the dump never completes.\n\n## Changes\n\n- `src/export/dump.ts`: response is now produced via a `ReadableStream`, so chunks are flushed to the client as they are generated instead of being concatenated in memory.\n- Per-table data is paged with `SELECT * FROM
LIMIT 1000 OFFSET ` and the loop stops as soon as a page returns fewer rows than the page size. This keeps peak memory bounded to one page (~1000 rows) regardless of table size.\n- Small correctness improvement in value serialization: `NULL`/`undefined` are now emitted as the SQL `NULL` keyword (previously they were stringified to the literal text `null`, which only parsed correctly by accident).\n- All existing test cases continue to pass unchanged; added two new tests:\n - paginates across multiple `LIMIT/OFFSET` queries when a table is larger than the page size\n - serializes `NULL` values as the `NULL` keyword\n\nThe public route, headers, and dump format are unchanged — this is a drop-in fix.\n\n## Tasks\n\n- [x] Stream the dump response instead of buffering it\n- [x] Page through table rows instead of `SELECT *` in one shot\n- [x] Preserve existing dump format and test expectations\n- [x] Add tests for pagination and NULL handling\n\n## Verify\n\n- `npx vitest run src/export/` → 25 passed (4 files)\n- `npx vitest run src/export/dump.test.ts` → 7 passed (5 original + 2 new)\n- `npx tsc --noEmit` introduces no new errors in `src/export/dump.ts` (only pre-existing errors in unrelated files remain).\n\nCloses #59",
+ "branch": "fix/issue-59-starbasedb-database-dumps-do-not",
+ "tests_run": [
+ "npx vitest run src/export/dump.test.ts",
+ "npx vitest run src/export/",
+ "npx tsc --noEmit"
+ ],
+ "tests_passed": true,
+ "files_changed": [
+ "src/export/dump.ts",
+ "src/export/dump.test.ts"
+ ],
+ "notes": "Minimum-change fix scoped to the dump route only. I intentionally did NOT adopt the prior-analysis suggestion of R2 multipart uploads + Durable Object alarms — that is a much larger feature and a significant architectural decision that the maintainer should drive, not a bounty hunter. Streaming + pagination resolves the OOM root cause described in the issue while keeping the existing /export/dump API, headers, and format byte-compatible. The bogus 'SQLite format 3\\0' header at the top of a text SQL dump is preserved unchanged because removing it would change the dump format and existing tests depend on it. Pre-existing TypeScript errors in plugins/cdc, src/cache, src/do, and src/operation are unrelated to this change."
+}
diff --git a/src/export/dump.test.ts b/src/export/dump.test.ts
index ca65b43..d32f91f 100644
--- a/src/export/dump.test.ts
+++ b/src/export/dump.test.ts
@@ -128,6 +128,57 @@ describe('Database Dump Module', () => {
)
})
+ it('should paginate large tables across multiple queries', async () => {
+ const firstPage = Array.from({ length: 1000 }, (_, i) => ({
+ id: i + 1,
+ name: `User${i + 1}`,
+ }))
+ const secondPage = [{ id: 1001, name: 'User1001' }]
+
+ vi.mocked(executeOperation)
+ .mockResolvedValueOnce([{ name: 'users' }])
+ .mockResolvedValueOnce([
+ { sql: 'CREATE TABLE users (id INTEGER, name TEXT);' },
+ ])
+ .mockResolvedValueOnce(firstPage)
+ .mockResolvedValueOnce(secondPage)
+
+ const response = await dumpDatabaseRoute(mockDataSource, mockConfig)
+ const dumpText = await response.text()
+
+ expect(dumpText).toContain("INSERT INTO users VALUES (1, 'User1');")
+ expect(dumpText).toContain(
+ "INSERT INTO users VALUES (1000, 'User1000');"
+ )
+ expect(dumpText).toContain(
+ "INSERT INTO users VALUES (1001, 'User1001');"
+ )
+
+ const issuedSql = vi
+ .mocked(executeOperation)
+ .mock.calls.map((c) => (c[0] as any)[0].sql)
+ expect(issuedSql).toContain(
+ 'SELECT * FROM users LIMIT 1000 OFFSET 0;'
+ )
+ expect(issuedSql).toContain(
+ 'SELECT * FROM users LIMIT 1000 OFFSET 1000;'
+ )
+ })
+
+ it('should serialize NULL values as the NULL keyword', async () => {
+ vi.mocked(executeOperation)
+ .mockResolvedValueOnce([{ name: 'users' }])
+ .mockResolvedValueOnce([
+ { sql: 'CREATE TABLE users (id INTEGER, name TEXT);' },
+ ])
+ .mockResolvedValueOnce([{ id: 1, name: null }])
+
+ const response = await dumpDatabaseRoute(mockDataSource, mockConfig)
+ const dumpText = await response.text()
+
+ expect(dumpText).toContain('INSERT INTO users VALUES (1, NULL);')
+ })
+
it('should return a 500 response when an error occurs', async () => {
const consoleErrorMock = vi
.spyOn(console, 'error')
diff --git a/src/export/dump.ts b/src/export/dump.ts
index 91a2e89..33dd629 100644
--- a/src/export/dump.ts
+++ b/src/export/dump.ts
@@ -3,67 +3,101 @@ import { StarbaseDBConfiguration } from '../handler'
import { DataSource } from '../types'
import { createResponse } from '../utils'
+// Number of rows fetched per page when dumping a table. Keeping this
+// bounded avoids loading entire tables into Worker memory, which is
+// what previously caused dumps of large databases to fail.
+const DUMP_PAGE_SIZE = 1000
+
+function formatValue(value: unknown): string {
+ if (value === null || value === undefined) return 'NULL'
+ if (typeof value === 'string') return `'${value.replace(/'/g, "''")}'`
+ return String(value)
+}
+
export async function dumpDatabaseRoute(
dataSource: DataSource,
config: StarbaseDBConfiguration
): Promise {
try {
- // Get all table names
+ // Resolve the list of tables up front so any failure surfaces as a
+ // 500 (matching prior behavior) rather than mid-stream.
const tablesResult = await executeOperation(
[{ sql: "SELECT name FROM sqlite_master WHERE type='table';" }],
dataSource,
config
)
-
const tables = tablesResult.map((row: any) => row.name)
- let dumpContent = 'SQLite format 3\0' // SQLite file header
- // Iterate through all tables
- for (const table of tables) {
- // Get table schema
- const schemaResult = await executeOperation(
- [
- {
- sql: `SELECT sql FROM sqlite_master WHERE type='table' AND name='${table}';`,
- },
- ],
- dataSource,
- config
- )
+ const encoder = new TextEncoder()
+ const stream = new ReadableStream({
+ async start(controller) {
+ try {
+ controller.enqueue(encoder.encode('SQLite format 3\0'))
- if (schemaResult.length) {
- const schema = schemaResult[0].sql
- dumpContent += `\n-- Table: ${table}\n${schema};\n\n`
- }
+ for (const table of tables) {
+ const schemaResult = await executeOperation(
+ [
+ {
+ sql: `SELECT sql FROM sqlite_master WHERE type='table' AND name='${table}';`,
+ },
+ ],
+ dataSource,
+ config
+ )
- // Get table data
- const dataResult = await executeOperation(
- [{ sql: `SELECT * FROM ${table};` }],
- dataSource,
- config
- )
+ if (schemaResult.length) {
+ const schema = schemaResult[0].sql
+ controller.enqueue(
+ encoder.encode(
+ `\n-- Table: ${table}\n${schema};\n\n`
+ )
+ )
+ }
- for (const row of dataResult) {
- const values = Object.values(row).map((value) =>
- typeof value === 'string'
- ? `'${value.replace(/'/g, "''")}'`
- : value
- )
- dumpContent += `INSERT INTO ${table} VALUES (${values.join(', ')});\n`
- }
+ // Page through the table so we never materialize the
+ // full result set in memory.
+ let offset = 0
+ while (true) {
+ const dataResult = await executeOperation(
+ [
+ {
+ sql: `SELECT * FROM ${table} LIMIT ${DUMP_PAGE_SIZE} OFFSET ${offset};`,
+ },
+ ],
+ dataSource,
+ config
+ )
- dumpContent += '\n'
- }
+ if (!dataResult.length) break
- // Create a Blob from the dump content
- const blob = new Blob([dumpContent], { type: 'application/x-sqlite3' })
+ let chunk = ''
+ for (const row of dataResult) {
+ const values =
+ Object.values(row).map(formatValue)
+ chunk += `INSERT INTO ${table} VALUES (${values.join(', ')});\n`
+ }
+ controller.enqueue(encoder.encode(chunk))
+
+ if (dataResult.length < DUMP_PAGE_SIZE) break
+ offset += DUMP_PAGE_SIZE
+ }
+
+ controller.enqueue(encoder.encode('\n'))
+ }
+
+ controller.close()
+ } catch (error) {
+ controller.error(error)
+ }
+ },
+ })
const headers = new Headers({
'Content-Type': 'application/x-sqlite3',
'Content-Disposition': 'attachment; filename="database_dump.sql"',
})
- return new Response(blob, { headers })
+ return new Response(stream, { headers })
} catch (error: any) {
console.error('Database Dump Error:', error)
return createResponse(undefined, 'Failed to create database dump', 500)