Skip to content

Commit 7037441

Browse files
committed
perf(common,sdk,bigquery): optimize JSON parsing, fix race condition, add query time bounds
- Optimize partial JSON delta parsing from O(n²) to O(n) - Fix race condition in code-search Promise.allSettled logic - Add time bounds to BigQuery queries for partition pruning
1 parent 8456dfb commit 7037441

File tree

4 files changed

+72
-17
lines changed

4 files changed

+72
-17
lines changed

common/src/util/__tests__/partial-json-delta.test.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,38 @@ describe('parsePartialJsonObjectSingle', () => {
108108
})
109109
})
110110

111+
describe('comma search optimization', () => {
112+
it('should efficiently find last valid comma in deeply nested incomplete JSON', () => {
113+
// This tests the O(n) backward comma search optimization
114+
const input = '{"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "incomplete":'
115+
const result = parsePartialJsonObjectSingle(input)
116+
expect(result).toEqual({
117+
lastParamComplete: true,
118+
params: { a: 1, b: 2, c: 3, d: 4, e: 5 },
119+
})
120+
})
121+
122+
it('should handle comma inside string value when searching backwards', () => {
123+
// Comma inside a string should not be treated as a separator
124+
const input = '{"message": "Hello, world", "incomplete":'
125+
const result = parsePartialJsonObjectSingle(input)
126+
expect(result).toEqual({
127+
lastParamComplete: true,
128+
params: { message: 'Hello, world' },
129+
})
130+
})
131+
132+
it('should find valid comma after skipping invalid parse attempts', () => {
133+
// Multiple commas, need to find the right one
134+
const input = '{"x": [1, 2, 3], "y": {"a": 1, "b": 2}, "z":'
135+
const result = parsePartialJsonObjectSingle(input)
136+
expect(result).toEqual({
137+
lastParamComplete: true,
138+
params: { x: [1, 2, 3], y: { a: 1, b: 2 } },
139+
})
140+
})
141+
})
142+
111143
describe('edge cases', () => {
112144
it('should return empty object for empty string', () => {
113145
const input = ''

common/src/util/partial-json-delta.ts

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
// TODO: optimize this to not be O(n^2)
21
export function parsePartialJsonObjectSingle(content: string): {
32
lastParamComplete: boolean
43
params: any
@@ -26,16 +25,14 @@ export function parsePartialJsonObjectSingle(content: string): {
2625
} catch {}
2726
}
2827

29-
let lastIndex = content.lastIndexOf(',')
30-
while (lastIndex > 0) {
28+
let commaPos = content.length
29+
while ((commaPos = content.lastIndexOf(',', commaPos - 1)) !== -1) {
3130
try {
3231
return {
3332
lastParamComplete: true,
34-
params: JSON.parse(content.slice(0, lastIndex) + '}'),
33+
params: JSON.parse(content.slice(0, commaPos) + '}'),
3534
}
3635
} catch {}
37-
38-
lastIndex = content.lastIndexOf(',', lastIndex - 1)
3936
}
4037

4138
return { lastParamComplete: true, params: {} }

packages/bigquery/src/client.ts

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -246,27 +246,39 @@ export async function getTracesWithoutRelabels(
246246
userId: string | undefined = undefined,
247247
dataset: string = DATASET,
248248
) {
249-
// TODO: Optimize query, maybe only get traces in last 30 days etc
249+
const thirtyDaysAgo = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000)
250+
.toISOString()
251+
.split('T')[0]
252+
250253
const query = `
251254
SELECT t.*
252255
FROM \`${dataset}.${TRACES_TABLE}\` t
253256
LEFT JOIN (
254257
SELECT r.agent_step_id, r.user_id, JSON_EXTRACT_SCALAR(r.payload, '$.user_input_id') as user_input_id
255258
FROM \`${dataset}.${RELABELS_TABLE}\` r
256-
WHERE r.model = '${model}'
257-
${userId ? `AND r.user_id = '${userId}'` : ''}
259+
WHERE r.model = @model
260+
${userId ? `AND r.user_id = @userId` : ''}
258261
) r
259262
ON t.agent_step_id = r.agent_step_id
260263
AND t.user_id = r.user_id
261264
AND JSON_EXTRACT_SCALAR(t.payload, '$.user_input_id') = r.user_input_id
262265
WHERE t.type = 'get-relevant-files'
266+
AND t.created_at >= @thirtyDaysAgo
263267
AND r.agent_step_id IS NULL
264-
${userId ? `AND t.user_id = '${userId}'` : ''}
268+
${userId ? `AND t.user_id = @userId` : ''}
265269
ORDER BY t.created_at DESC
266-
LIMIT ${limit}
270+
LIMIT @limit
267271
`
268272

269-
const [rows] = await getClient().query(query)
273+
const [rows] = await getClient().query({
274+
query,
275+
params: {
276+
model,
277+
thirtyDaysAgo,
278+
limit,
279+
...(userId ? { userId } : {}),
280+
},
281+
})
270282
// Parse the payload as JSON if it's a string
271283
return rows.map((row) => ({
272284
...row,

sdk/src/tools/code-search.ts

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,28 +104,42 @@ export function codeSearch({
104104
let estimatedOutputLen = 0
105105
let killedForLimit = false
106106

107+
// Guard to prevent double-settlement from concurrent timeout and process close events
108+
let killTimeoutId: ReturnType<typeof setTimeout> | null = null
109+
107110
const settle = (payload: any) => {
108111
if (isResolved) return
109112
isResolved = true
110113

111-
// Clean up listeners immediately
114+
// Clean up listeners immediately to prevent further events
112115
childProcess.stdout.removeAllListeners()
113116
childProcess.stderr.removeAllListeners()
114117
childProcess.removeAllListeners()
115118

119+
// Clear both the main timeout and the kill timeout to prevent late callbacks
116120
clearTimeout(timeoutId)
121+
if (killTimeoutId) {
122+
clearTimeout(killTimeoutId)
123+
killTimeoutId = null
124+
}
125+
117126
resolve([{ type: 'json', value: payload }])
118127
}
119128

120129
const hardKill = () => {
121130
try {
122131
childProcess.kill('SIGTERM')
123132
} catch {}
124-
setTimeout(() => {
133+
// Store timeout reference so it can be cleared if process closes normally
134+
killTimeoutId = setTimeout(() => {
125135
try {
126-
// SIGKILL doesn't exist on Windows, fall back to no-signal kill
127-
childProcess.kill('SIGKILL') || childProcess.kill()
128-
} catch {}
136+
childProcess.kill('SIGKILL')
137+
} catch {
138+
try {
139+
childProcess.kill()
140+
} catch {}
141+
}
142+
killTimeoutId = null
129143
}, 1000)
130144
}
131145

0 commit comments

Comments
 (0)