Skip to content

Commit fc21d5a

Browse files
authored
[mcp] Dedupe docs (facebook#32929)
Previously the resource would return a bunch of dupes because the algolia results would return multiple hashes (headings) for the same url. --- [//]: # (BEGIN SAPLING FOOTER) Stack created with [Sapling](https://sapling-scm.com). Best reviewed with [ReviewStack](https://reviewstack.dev/facebook/react/pull/32929). * facebook#32932 * facebook#32931 * facebook#32930 * __->__ facebook#32929 * facebook#32928
1 parent 35ab8ff commit fc21d5a

File tree

1 file changed

+21
-4
lines changed
  • compiler/packages/react-mcp-server/src

1 file changed

+21
-4
lines changed

compiler/packages/react-mcp-server/src/index.ts

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,30 @@ const server = new McpServer({
4040
version: '0.0.0',
4141
});
4242

43+
function slugify(heading: string): string {
44+
return heading
45+
.split(' ')
46+
.map(w => w.toLowerCase())
47+
.join('-');
48+
}
49+
4350
// TODO: how to verify this works?
4451
server.resource(
4552
'docs',
4653
new ResourceTemplate('docs://{message}', {list: undefined}),
47-
async (uri, {message}) => {
54+
async (_uri, {message}) => {
4855
const hits = await queryAlgolia(message);
56+
const deduped = new Map();
57+
for (const hit of hits) {
58+
// drop hashes to dedupe properly
59+
const u = new URL(hit.url);
60+
if (deduped.has(u.pathname)) {
61+
continue;
62+
}
63+
deduped.set(u.pathname, hit);
64+
}
4965
const pages: Array<string | null> = await Promise.all(
50-
hits.map(hit => {
66+
Array.from(deduped.values()).map(hit => {
5167
return fetch(hit.url, {
5268
headers: {
5369
'User-Agent':
@@ -70,16 +86,17 @@ server.resource(
7086
.filter(html => html !== null)
7187
.map(html => {
7288
const $ = cheerio.load(html);
89+
const title = encodeURIComponent(slugify($('h1').text()));
7390
// react.dev should always have at least one <article> with the main content
7491
const article = $('article').html();
7592
if (article != null) {
7693
return {
77-
uri: uri.href,
94+
uri: `docs://${title}`,
7895
text: turndownService.turndown(article),
7996
};
8097
} else {
8198
return {
82-
uri: uri.href,
99+
uri: `docs://${title}`,
83100
// Fallback to converting the whole page to markdown
84101
text: turndownService.turndown($.html()),
85102
};

0 commit comments

Comments
 (0)