Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions src/export/dump.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,29 @@ describe('Database Dump Module', () => {
)
})

it('streams large tables in bounded batches (constant memory)', async () => {
vi.mocked(executeOperation)
.mockResolvedValueOnce([{ name: 'logs' }]) // tables
.mockResolvedValueOnce([{ sql: 'CREATE TABLE logs (id INTEGER);' }]) // schema
.mockResolvedValueOnce([{ id: 1 }, { id: 2 }]) // batch 1 (full)
.mockResolvedValueOnce([{ id: 3 }]) // batch 2 (partial -> stop)

const response = await dumpDatabaseRoute(mockDataSource, mockConfig, 2)
const dumpText = await response.text()

expect(dumpText).toContain('INSERT INTO logs VALUES (1);')
expect(dumpText).toContain('INSERT INTO logs VALUES (2);')
expect(dumpText).toContain('INSERT INTO logs VALUES (3);')

// A second batch must have been requested with OFFSET advanced by batchSize.
const issuedOffsetQuery = vi
.mocked(executeOperation)
.mock.calls.some((args: any[]) =>
args[0]?.[0]?.sql?.includes('OFFSET 2')
)
expect(issuedOffsetQuery).toBe(true)
})

it('should return a 500 response when an error occurs', async () => {
const consoleErrorMock = vi
.spyOn(console, 'error')
Expand Down
134 changes: 95 additions & 39 deletions src/export/dump.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,67 +3,123 @@ import { StarbaseDBConfiguration } from '../handler'
import { DataSource } from '../types'
import { createResponse } from '../utils'

/**
* Default number of rows fetched per batch while streaming table data. Keeping the
* working set bounded is what lets arbitrarily-large tables be dumped without loading
* the whole database into memory.
*/
export const DEFAULT_DUMP_BATCH_SIZE = 1000

/**
* Streams a SQL dump of the database.
*
* Previously the whole dump was assembled in a single in-memory string and every table
* was read with one unbounded `SELECT *`, so large databases ran out of memory and/or
* exceeded the 30s request window (#59). This version instead:
*
* - Streams the response via a `ReadableStream`, so bytes flow to the client as they
* are produced (the connection stays active instead of waiting for one giant body).
* - Reads each table's rows in bounded batches (`LIMIT`/`OFFSET`), so memory usage stays
* roughly constant regardless of table size.
*
* The initial table-list query is performed up front so that an early failure still
* returns a clean 500 (rather than a half-streamed 200).
*/
export async function dumpDatabaseRoute(
dataSource: DataSource,
config: StarbaseDBConfiguration
config: StarbaseDBConfiguration,
batchSize: number = DEFAULT_DUMP_BATCH_SIZE
): Promise<Response> {
try {
// Get all table names
// Resolve the table list up front so early errors surface as a 500.
const tablesResult = await executeOperation(
[{ sql: "SELECT name FROM sqlite_master WHERE type='table';" }],
dataSource,
config
)

const tables = tablesResult.map((row: any) => row.name)
let dumpContent = 'SQLite format 3\0' // SQLite file header

// Iterate through all tables
for (const table of tables) {
// Get table schema
const schemaResult = await executeOperation(
[
{
sql: `SELECT sql FROM sqlite_master WHERE type='table' AND name='${table}';`,
},
],
dataSource,
config
)
const encoder = new TextEncoder()

const stream = new ReadableStream({
async start(controller) {
try {
controller.enqueue(encoder.encode('SQLite format 3\0')) // SQLite file header

for (const table of tables) {
// Table schema
const schemaResult = await executeOperation(
[
{
sql: `SELECT sql FROM sqlite_master WHERE type='table' AND name = ?;`,
params: [table],
},
],
dataSource,
config
)

if (schemaResult.length) {
const schema = schemaResult[0].sql
dumpContent += `\n-- Table: ${table}\n${schema};\n\n`
}
if (schemaResult.length) {
const schema = schemaResult[0].sql
controller.enqueue(
encoder.encode(
`\n-- Table: ${table}\n${schema};\n\n`
)
)
}

// Get table data
const dataResult = await executeOperation(
[{ sql: `SELECT * FROM ${table};` }],
dataSource,
config
)
// Table data, streamed in bounded batches.
let offset = 0
while (true) {
const dataResult = await executeOperation(
[
{
sql: `SELECT * FROM "${table}" LIMIT ${batchSize} OFFSET ${offset};`,
},
],
dataSource,
config
)

for (const row of dataResult) {
const values = Object.values(row).map((value) =>
typeof value === 'string'
? `'${value.replace(/'/g, "''")}'`
: value
)
dumpContent += `INSERT INTO ${table} VALUES (${values.join(', ')});\n`
}
if (!dataResult.length) {
break
}

dumpContent += '\n'
}
let chunk = ''
for (const row of dataResult) {
const values = Object.values(row).map(
(value) =>
typeof value === 'string'
? `'${value.replace(/'/g, "''")}'`
: value
)
chunk += `INSERT INTO ${table} VALUES (${values.join(', ')});\n`
}
controller.enqueue(encoder.encode(chunk))

// Create a Blob from the dump content
const blob = new Blob([dumpContent], { type: 'application/x-sqlite3' })
if (dataResult.length < batchSize) {
break
}
offset += batchSize
}

controller.enqueue(encoder.encode('\n'))
}

controller.close()
} catch (error: any) {
console.error('Database Dump Error (stream):', error)
controller.error(error)
}
},
})

const headers = new Headers({
'Content-Type': 'application/x-sqlite3',
'Content-Disposition': 'attachment; filename="database_dump.sql"',
})

return new Response(blob, { headers })
return new Response(stream, { headers })
} catch (error: any) {
console.error('Database Dump Error:', error)
return createResponse(undefined, 'Failed to create database dump', 500)
Expand Down