From 0034b65ba813682a432df5d84847a7a2f36e4a76 Mon Sep 17 00:00:00 2001
From: Pham N Hong Thai <Pnhongthai098@gmail.com>
Date: Sun, 24 May 2026 14:54:06 +0700
Subject: [PATCH 1/2] fix(export): stream and paginate database dumps to
 support large databases

Resolves OOM/timeout failures when dumping large databases by switching
the /export/dump response to a ReadableStream and paginating per-table
SELECTs with LIMIT/OFFSET instead of loading the entire result set into
memory at once.

Closes #59
---
 src/export/dump.test.ts |  51 +++++++++++++++++++
 src/export/dump.ts      | 110 ++++++++++++++++++++++++++--------------
 2 files changed, 123 insertions(+), 38 deletions(-)
diff --git a/src/export/dump.test.ts b/src/export/dump.test.ts
index ca65b43..d32f91f 100644
--- a/src/export/dump.test.ts
+++ b/src/export/dump.test.ts
@@ -128,6 +128,57 @@ describe('Database Dump Module', () => {
         )
     })
 
+    it('should paginate large tables across multiple queries', async () => {
+        const firstPage = Array.from({ length: 1000 }, (_, i) => ({
+            id: i + 1,
+            name: `User${i + 1}`,
+        }))
+        const secondPage = [{ id: 1001, name: 'User1001' }]
+
+        vi.mocked(executeOperation)
+            .mockResolvedValueOnce([{ name: 'users' }])
+            .mockResolvedValueOnce([
+                { sql: 'CREATE TABLE users (id INTEGER, name TEXT);' },
+            ])
+            .mockResolvedValueOnce(firstPage)
+            .mockResolvedValueOnce(secondPage)
+
+        const response = await dumpDatabaseRoute(mockDataSource, mockConfig)
+        const dumpText = await response.text()
+
+        expect(dumpText).toContain("INSERT INTO users VALUES (1, 'User1');")
+        expect(dumpText).toContain(
+            "INSERT INTO users VALUES (1000, 'User1000');"
+        )
+        expect(dumpText).toContain(
+            "INSERT INTO users VALUES (1001, 'User1001');"
+        )
+
+        const issuedSql = vi
+            .mocked(executeOperation)
+            .mock.calls.map((c) => (c[0] as any)[0].sql)
+        expect(issuedSql).toContain(
+            'SELECT * FROM users LIMIT 1000 OFFSET 0;'
+        )
+        expect(issuedSql).toContain(
+            'SELECT * FROM users LIMIT 1000 OFFSET 1000;'
+        )
+    })
+
+    it('should serialize NULL values as the NULL keyword', async () => {
+        vi.mocked(executeOperation)
+            .mockResolvedValueOnce([{ name: 'users' }])
+            .mockResolvedValueOnce([
+                { sql: 'CREATE TABLE users (id INTEGER, name TEXT);' },
+            ])
+            .mockResolvedValueOnce([{ id: 1, name: null }])
+
+        const response = await dumpDatabaseRoute(mockDataSource, mockConfig)
+        const dumpText = await response.text()
+
+        expect(dumpText).toContain('INSERT INTO users VALUES (1, NULL);')
+    })
+
     it('should return a 500 response when an error occurs', async () => {
         const consoleErrorMock = vi
             .spyOn(console, 'error')
diff --git a/src/export/dump.ts b/src/export/dump.ts
index 91a2e89..33dd629 100644
--- a/src/export/dump.ts
+++ b/src/export/dump.ts
@@ -3,67 +3,101 @@ import { StarbaseDBConfiguration } from '../handler'
 import { DataSource } from '../types'
 import { createResponse } from '../utils'
 
+// Number of rows fetched per page when dumping a table. Keeping this
+// bounded avoids loading entire tables into Worker memory, which is
+// what previously caused dumps of large databases to fail.
+const DUMP_PAGE_SIZE = 1000
+
+function formatValue(value: unknown): string {
+    if (value === null || value === undefined) return 'NULL'
+    if (typeof value === 'string') return `'${value.replace(/'/g, "''")}'`
+    return String(value)
+}
+
 export async function dumpDatabaseRoute(
     dataSource: DataSource,
     config: StarbaseDBConfiguration
 ): Promise<Response> {
     try {
-        // Get all table names
+        // Resolve the list of tables up front so any failure surfaces as a
+        // 500 (matching prior behavior) rather than mid-stream.
         const tablesResult = await executeOperation(
             [{ sql: "SELECT name FROM sqlite_master WHERE type='table';" }],
             dataSource,
             config
         )
-
         const tables = tablesResult.map((row: any) => row.name)
-        let dumpContent = 'SQLite format 3\0' // SQLite file header
 
-        // Iterate through all tables
-        for (const table of tables) {
-            // Get table schema
-            const schemaResult = await executeOperation(
-                [
-                    {
-                        sql: `SELECT sql FROM sqlite_master WHERE type='table' AND name='${table}';`,
-                    },
-                ],
-                dataSource,
-                config
-            )
+        const encoder = new TextEncoder()
+        const stream = new ReadableStream<Uint8Array>({
+            async start(controller) {
+                try {
+                    controller.enqueue(encoder.encode('SQLite format 3\0'))
 
-            if (schemaResult.length) {
-                const schema = schemaResult[0].sql
-                dumpContent += `\n-- Table: ${table}\n${schema};\n\n`
-            }
+                    for (const table of tables) {
+                        const schemaResult = await executeOperation(
+                            [
+                                {
+                                    sql: `SELECT sql FROM sqlite_master WHERE type='table' AND name='${table}';`,
+                                },
+                            ],
+                            dataSource,
+                            config
+                        )
 
-            // Get table data
-            const dataResult = await executeOperation(
-                [{ sql: `SELECT * FROM ${table};` }],
-                dataSource,
-                config
-            )
+                        if (schemaResult.length) {
+                            const schema = schemaResult[0].sql
+                            controller.enqueue(
+                                encoder.encode(
+                                    `\n-- Table: ${table}\n${schema};\n\n`
+                                )
+                            )
+                        }
 
-            for (const row of dataResult) {
-                const values = Object.values(row).map((value) =>
-                    typeof value === 'string'
-                        ? `'${value.replace(/'/g, "''")}'`
-                        : value
-                )
-                dumpContent += `INSERT INTO ${table} VALUES (${values.join(', ')});\n`
-            }
+                        // Page through the table so we never materialize the
+                        // full result set in memory.
+                        let offset = 0
+                        while (true) {
+                            const dataResult = await executeOperation(
+                                [
+                                    {
+                                        sql: `SELECT * FROM ${table} LIMIT ${DUMP_PAGE_SIZE} OFFSET ${offset};`,
+                                    },
+                                ],
+                                dataSource,
+                                config
+                            )
 
-            dumpContent += '\n'
-        }
+                            if (!dataResult.length) break
 
-        // Create a Blob from the dump content
-        const blob = new Blob([dumpContent], { type: 'application/x-sqlite3' })
+                            let chunk = ''
+                            for (const row of dataResult) {
+                                const values =
+                                    Object.values(row).map(formatValue)
+                                chunk += `INSERT INTO ${table} VALUES (${values.join(', ')});\n`
+                            }
+                            controller.enqueue(encoder.encode(chunk))
+
+                            if (dataResult.length < DUMP_PAGE_SIZE) break
+                            offset += DUMP_PAGE_SIZE
+                        }
+
+                        controller.enqueue(encoder.encode('\n'))
+                    }
+
+                    controller.close()
+                } catch (error) {
+                    controller.error(error)
+                }
+            },
+        })
 
         const headers = new Headers({
             'Content-Type': 'application/x-sqlite3',
             'Content-Disposition': 'attachment; filename="database_dump.sql"',
         })
 
-        return new Response(blob, { headers })
+        return new Response(stream, { headers })
     } catch (error: any) {
         console.error('Database Dump Error:', error)
         return createResponse(undefined, 'Failed to create database dump', 500)

From 34b392720eec0265784976c6fc5051fe5ac4a180 Mon Sep 17 00:00:00 2001
From: Pham N Hong Thai <Pnhongthai098@gmail.com>
Date: Sun, 24 May 2026 14:55:58 +0700
Subject: [PATCH 2/2] fix(export): stream and paginate database dumps to
 support large databases

---
 .bounty_pr.json | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 .bounty_pr.json

diff --git a/.bounty_pr.json b/.bounty_pr.json
new file mode 100644
index 0000000..ff2d40c
--- /dev/null
+++ b/.bounty_pr.json
@@ -0,0 +1,18 @@
+{
+  "status": "ready",
+  "commit_message": "fix(export): stream and paginate database dumps to support large databases",
+  "pr_title": "fix(export): stream and paginate /export/dump for large databases",
+  "pr_body": "## Purpose\n\nFixes #59 — `/export/dump` previously failed on large databases because it loaded **every row of every table** into a single in-memory string before responding. On any non-trivial database this exceeds the Worker's memory budget and/or wall-clock and the dump never completes.\n\n## Changes\n\n- `src/export/dump.ts`: response is now produced via a `ReadableStream`, so chunks are flushed to the client as they are generated instead of being concatenated in memory.\n- Per-table data is paged with `SELECT * FROM <table> LIMIT 1000 OFFSET <n>` and the loop stops as soon as a page returns fewer rows than the page size. This keeps peak memory bounded to one page (~1000 rows) regardless of table size.\n- Small correctness improvement in value serialization: `NULL`/`undefined` are now emitted as the SQL `NULL` keyword (previously they were stringified to the literal text `null`, which only parsed correctly by accident).\n- All existing test cases continue to pass unchanged; added two new tests:\n  - paginates across multiple `LIMIT/OFFSET` queries when a table is larger than the page size\n  - serializes `NULL` values as the `NULL` keyword\n\nThe public route, headers, and dump format are unchanged — this is a drop-in fix.\n\n## Tasks\n\n- [x] Stream the dump response instead of buffering it\n- [x] Page through table rows instead of `SELECT *` in one shot\n- [x] Preserve existing dump format and test expectations\n- [x] Add tests for pagination and NULL handling\n\n## Verify\n\n- `npx vitest run src/export/` → 25 passed (4 files)\n- `npx vitest run src/export/dump.test.ts` → 7 passed (5 original + 2 new)\n- `npx tsc --noEmit` introduces no new errors in `src/export/dump.ts` (only pre-existing errors in unrelated files remain).\n\nCloses #59",
+  "branch": "fix/issue-59-starbasedb-database-dumps-do-not",
+  "tests_run": [
+    "npx vitest run src/export/dump.test.ts",
+    "npx vitest run src/export/",
+    "npx tsc --noEmit"
+  ],
+  "tests_passed": true,
+  "files_changed": [
+    "src/export/dump.ts",
+    "src/export/dump.test.ts"
+  ],
+  "notes": "Minimum-change fix scoped to the dump route only. I intentionally did NOT adopt the prior-analysis suggestion of R2 multipart uploads + Durable Object alarms — that is a much larger feature and a significant architectural decision that the maintainer should drive, not a bounty hunter. Streaming + pagination resolves the OOM root cause described in the issue while keeping the existing /export/dump API, headers, and format byte-compatible. The bogus 'SQLite format 3\\0' header at the top of a text SQL dump is preserved unchanged because removing it would change the dump format and existing tests depend on it. Pre-existing TypeScript errors in plugins/cdc, src/cache, src/do, and src/operation are unrelated to this change."
+}