Skip to content

Commit cb2eb00

Browse files
committed
add bright data block and tools
1 parent 7bf3d73 commit cb2eb00

File tree

12 files changed

+859
-0
lines changed

12 files changed

+859
-0
lines changed
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
import { randomUUID } from 'crypto'
2+
import { createLogger } from '@sim/logger'
3+
import { NextResponse } from 'next/server'
4+
5+
const logger = createLogger('BrightDataDatasetAPI')
6+
7+
export const maxDuration = 600
8+
9+
export async function POST(request: Request) {
10+
const requestId = randomUUID().slice(0, 8)
11+
12+
try {
13+
const body = await request.json()
14+
const datasetId = typeof body?.datasetId === 'string' ? body.datasetId : undefined
15+
const apiToken = typeof body?.apiToken === 'string' ? body.apiToken : undefined
16+
17+
if (!datasetId || !apiToken) {
18+
return NextResponse.json({ error: 'Missing required parameters' }, { status: 400 })
19+
}
20+
21+
const params: Record<string, unknown> = { ...body }
22+
delete params.datasetId
23+
delete params.apiToken
24+
25+
logger.info(`[${requestId}] Triggering dataset`, { datasetId })
26+
27+
const triggerResponse = await fetch(
28+
`https://api.brightdata.com/datasets/v3/trigger?dataset_id=${encodeURIComponent(
29+
datasetId
30+
)}&include_errors=true`,
31+
{
32+
method: 'POST',
33+
headers: {
34+
Authorization: `Bearer ${apiToken}`,
35+
'Content-Type': 'application/json',
36+
},
37+
body: JSON.stringify([params]),
38+
}
39+
)
40+
41+
const triggerText = await triggerResponse.text()
42+
let triggerPayload: unknown = triggerText
43+
44+
try {
45+
triggerPayload = JSON.parse(triggerText)
46+
} catch {
47+
triggerPayload = triggerText
48+
}
49+
50+
if (!triggerResponse.ok) {
51+
const errorMessage =
52+
typeof triggerPayload === 'object' && triggerPayload !== null && 'error' in triggerPayload
53+
? String((triggerPayload as { error?: unknown }).error)
54+
: triggerResponse.statusText
55+
56+
logger.error(`[${requestId}] Dataset trigger failed`, {
57+
datasetId,
58+
status: triggerResponse.status,
59+
error: errorMessage,
60+
})
61+
62+
return NextResponse.json(
63+
{ error: errorMessage || 'Dataset trigger failed' },
64+
{ status: triggerResponse.status }
65+
)
66+
}
67+
68+
const snapshotId =
69+
typeof triggerPayload === 'object' && triggerPayload !== null && 'snapshot_id' in triggerPayload
70+
? String((triggerPayload as { snapshot_id?: unknown }).snapshot_id ?? '')
71+
: ''
72+
73+
if (!snapshotId) {
74+
logger.error(`[${requestId}] Dataset trigger missing snapshot ID`, { datasetId })
75+
return NextResponse.json({ error: 'No snapshot ID returned from request' }, { status: 500 })
76+
}
77+
78+
logger.info(`[${requestId}] Dataset triggered`, { datasetId, snapshotId })
79+
80+
const maxAttempts = 600
81+
let attempts = 0
82+
83+
while (attempts < maxAttempts) {
84+
const snapshotResponse = await fetch(
85+
`https://api.brightdata.com/datasets/v3/snapshot/${snapshotId}?format=json`,
86+
{
87+
method: 'GET',
88+
headers: {
89+
Authorization: `Bearer ${apiToken}`,
90+
'Content-Type': 'application/json',
91+
},
92+
}
93+
)
94+
95+
const snapshotText = await snapshotResponse.text()
96+
let snapshotPayload: unknown = snapshotText
97+
98+
try {
99+
snapshotPayload = JSON.parse(snapshotText)
100+
} catch {
101+
snapshotPayload = snapshotText
102+
}
103+
104+
if (!snapshotResponse.ok) {
105+
if (snapshotResponse.status === 400) {
106+
const errorMessage =
107+
typeof snapshotPayload === 'object' &&
108+
snapshotPayload !== null &&
109+
'error' in snapshotPayload
110+
? String((snapshotPayload as { error?: unknown }).error)
111+
: snapshotResponse.statusText
112+
113+
logger.error(`[${requestId}] Dataset snapshot fetch failed`, {
114+
datasetId,
115+
snapshotId,
116+
status: snapshotResponse.status,
117+
error: errorMessage,
118+
})
119+
120+
return NextResponse.json(
121+
{ error: errorMessage || 'Dataset snapshot fetch failed' },
122+
{ status: snapshotResponse.status }
123+
)
124+
}
125+
126+
attempts += 1
127+
await new Promise((resolve) => setTimeout(resolve, 1000))
128+
continue
129+
}
130+
131+
const status =
132+
typeof snapshotPayload === 'object' && snapshotPayload !== null && 'status' in snapshotPayload
133+
? String((snapshotPayload as { status?: unknown }).status ?? '')
134+
: ''
135+
136+
if (['running', 'building', 'starting'].includes(status)) {
137+
attempts += 1
138+
await new Promise((resolve) => setTimeout(resolve, 1000))
139+
continue
140+
}
141+
142+
const snapshotAt =
143+
typeof snapshotPayload === 'object' &&
144+
snapshotPayload !== null &&
145+
'snapshot_at' in snapshotPayload
146+
? String((snapshotPayload as { snapshot_at?: unknown }).snapshot_at ?? '')
147+
: undefined
148+
149+
logger.info(`[${requestId}] Dataset snapshot received`, { datasetId, snapshotId })
150+
151+
return NextResponse.json({
152+
data: snapshotPayload,
153+
snapshot_at: snapshotAt || undefined,
154+
})
155+
}
156+
157+
logger.error(`[${requestId}] Dataset snapshot timed out`, { datasetId, snapshotId })
158+
return NextResponse.json(
159+
{ error: 'Timeout waiting for dataset snapshot' },
160+
{ status: 504 }
161+
)
162+
} catch (error) {
163+
const message = error instanceof Error ? error.message : 'Dataset fetch failed'
164+
logger.error(`[${requestId}] Dataset fetch failed`, { error: message })
165+
return NextResponse.json({ error: message }, { status: 500 })
166+
}
167+
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import { randomUUID } from 'crypto'
2+
import { createLogger } from '@sim/logger'
3+
import { NextResponse } from 'next/server'
4+
5+
const logger = createLogger('BrightDataScrapeMarkdownAPI')
6+
7+
export async function POST(request: Request) {
8+
const requestId = randomUUID().slice(0, 8)
9+
10+
try {
11+
const body = await request.json()
12+
const url = typeof body?.url === 'string' ? body.url : undefined
13+
const apiToken = typeof body?.apiToken === 'string' ? body.apiToken : undefined
14+
const unlockerZone = typeof body?.unlockerZone === 'string' ? body.unlockerZone : undefined
15+
16+
if (!url || !apiToken) {
17+
return NextResponse.json({ error: 'Missing required parameters' }, { status: 400 })
18+
}
19+
20+
logger.info(`[${requestId}] Scraping URL as markdown`, { url })
21+
22+
const response = await fetch('https://api.brightdata.com/request', {
23+
method: 'POST',
24+
headers: {
25+
Authorization: `Bearer ${apiToken}`,
26+
'Content-Type': 'application/json',
27+
},
28+
body: JSON.stringify({
29+
zone: unlockerZone || 'mcp_unlocker',
30+
url,
31+
format: 'raw',
32+
data_format: 'markdown',
33+
}),
34+
})
35+
36+
const responseText = await response.text()
37+
let payload: unknown = responseText
38+
39+
try {
40+
payload = JSON.parse(responseText)
41+
} catch {
42+
payload = responseText
43+
}
44+
45+
if (!response.ok) {
46+
const errorMessage =
47+
typeof payload === 'object' && payload !== null && 'error' in payload
48+
? String((payload as { error?: unknown }).error)
49+
: response.statusText
50+
51+
logger.error(`[${requestId}] Scraping failed`, { url, status: response.status, error: errorMessage })
52+
53+
return NextResponse.json({ error: errorMessage || 'Scraping failed' }, { status: response.status })
54+
}
55+
56+
const markdown =
57+
typeof payload === 'object' && payload !== null && 'markdown' in payload
58+
? String((payload as { markdown?: unknown }).markdown ?? '')
59+
: typeof payload === 'string'
60+
? payload
61+
: JSON.stringify(payload)
62+
63+
const title =
64+
typeof payload === 'object' && payload !== null && 'title' in payload
65+
? String((payload as { title?: unknown }).title ?? '')
66+
: undefined
67+
68+
logger.info(`[${requestId}] Scraping completed`, { url })
69+
70+
return NextResponse.json({
71+
markdown,
72+
url,
73+
title: title || undefined,
74+
})
75+
} catch (error) {
76+
const message = error instanceof Error ? error.message : 'Scraping failed'
77+
logger.error(`[${requestId}] Scraping failed`, { error: message })
78+
return NextResponse.json({ error: message }, { status: 500 })
79+
}
80+
}
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import { randomUUID } from 'crypto'
2+
import { createLogger } from '@sim/logger'
3+
import { NextResponse } from 'next/server'
4+
5+
const logger = createLogger('BrightDataSearchEngineAPI')
6+
7+
export async function POST(request: Request) {
8+
const requestId = randomUUID().slice(0, 8)
9+
10+
try {
11+
const body = await request.json()
12+
const query = typeof body?.query === 'string' ? body.query : undefined
13+
const apiToken = typeof body?.apiToken === 'string' ? body.apiToken : undefined
14+
const unlockerZone = typeof body?.unlockerZone === 'string' ? body.unlockerZone : undefined
15+
const maxResults =
16+
typeof body?.maxResults === 'number'
17+
? body.maxResults
18+
: typeof body?.maxResults === 'string'
19+
? Number(body.maxResults)
20+
: undefined
21+
22+
if (!query || !apiToken) {
23+
return NextResponse.json({ error: 'Missing required parameters' }, { status: 400 })
24+
}
25+
26+
logger.info(`[${requestId}] Searching`, { query, maxResults })
27+
28+
const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(query)}&start=0&brd_json=1`
29+
30+
const response = await fetch('https://api.brightdata.com/request', {
31+
method: 'POST',
32+
headers: {
33+
Authorization: `Bearer ${apiToken}`,
34+
'Content-Type': 'application/json',
35+
},
36+
body: JSON.stringify({
37+
zone: unlockerZone || 'mcp_unlocker',
38+
url: searchUrl,
39+
format: 'raw',
40+
data_format: 'parsed_light',
41+
}),
42+
})
43+
44+
const responseText = await response.text()
45+
let payload: unknown = responseText
46+
47+
try {
48+
payload = JSON.parse(responseText)
49+
} catch {
50+
payload = responseText
51+
}
52+
53+
if (!response.ok) {
54+
const errorMessage =
55+
typeof payload === 'object' && payload !== null && 'error' in payload
56+
? String((payload as { error?: unknown }).error)
57+
: response.statusText
58+
59+
logger.error(`[${requestId}] Search failed`, { query, status: response.status, error: errorMessage })
60+
61+
return NextResponse.json({ error: errorMessage || 'Search failed' }, { status: response.status })
62+
}
63+
64+
let normalizedResults: Array<{ title: string; url: string; snippet: string }> = []
65+
66+
if (typeof payload === 'object' && payload !== null) {
67+
const organic = (payload as { organic?: unknown }).organic
68+
if (Array.isArray(organic)) {
69+
normalizedResults = organic
70+
.map((entry) => {
71+
if (!entry || typeof entry !== 'object') return null
72+
const rawTitle = (entry as { title?: unknown }).title
73+
const rawLink = (entry as { link?: unknown }).link
74+
const rawDescription = (entry as { description?: unknown }).description
75+
const title = typeof rawTitle === 'string' ? rawTitle : ''
76+
const url = typeof rawLink === 'string' ? rawLink : ''
77+
const snippet = typeof rawDescription === 'string' ? rawDescription : ''
78+
if (!title || !url) return null
79+
return { title, url, snippet }
80+
})
81+
.filter(Boolean) as Array<{ title: string; url: string; snippet: string }>
82+
}
83+
}
84+
85+
const maxCount = Number.isFinite(maxResults) ? Number(maxResults) : undefined
86+
const results = maxCount ? normalizedResults.slice(0, maxCount) : normalizedResults
87+
88+
logger.info(`[${requestId}] Search completed`, { resultCount: results.length })
89+
90+
return NextResponse.json({
91+
results,
92+
})
93+
} catch (error) {
94+
const message = error instanceof Error ? error.message : 'Search failed'
95+
logger.error(`[${requestId}] Search failed`, { error: message })
96+
return NextResponse.json({ error: message }, { status: 500 })
97+
}
98+
}

0 commit comments

Comments
 (0)