Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions packages/vinext/src/entries/pages-server-entry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ export async function generateServerEntry(
headers: nextConfig?.headers ?? [],
expireTime: nextConfig?.expireTime,
cacheMaxMemorySize: nextConfig?.cacheMaxMemorySize,
htmlLimitedBots: nextConfig?.htmlLimitedBots,
i18n: nextConfig?.i18n ?? null,
// Mirrors Next.js `experimental.disableOptimizedLoading` — when false
// (the default), page scripts are emitted with `defer` in <head>. See
Expand Down Expand Up @@ -358,6 +359,7 @@ const _renderPage = __createPagesPageHandler({
assetPrefix: vinextConfig.assetPrefix,
trailingSlash: vinextConfig.trailingSlash,
expireTime: vinextConfig.expireTime,
htmlLimitedBots: vinextConfig.htmlLimitedBots,
clientTraceMetadata: vinextConfig.clientTraceMetadata,
disableOptimizedLoading: vinextConfig.disableOptimizedLoading,
},
Expand Down
1 change: 1 addition & 0 deletions packages/vinext/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3793,6 +3793,7 @@ export default function vinext(options: VinextOptions = {}): PluginOption[] {
(nextConfig?.rewrites.afterFiles.length ?? 0) > 0 ||
(nextConfig?.rewrites.fallback.length ?? 0) > 0,
nextConfig?.clientTraceMetadata,
nextConfig?.htmlLimitedBots,
);
flushStagedHeaders();
flushRequestHeaders();
Expand Down
7 changes: 6 additions & 1 deletion packages/vinext/src/server/dev-server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ import {
} from "./pages-document-initial-props.js";
import { callDocumentGetInitialProps } from "./document-initial-head.js";
import { loadPagesGetInitialProps } from "./pages-get-initial-props.js";
import { isBotUserAgent } from "../utils/html-limited-bots.js";

/**
* Render a React element to a string using renderToReadableStream.
Expand Down Expand Up @@ -409,6 +410,7 @@ export function createSSRHandler(
* `next.config`. When undefined or empty, no meta tags are emitted.
*/
clientTraceMetadata?: readonly string[],
htmlLimitedBots?: string,
) {
const matcher = fileMatcher ?? createValidFileMatcher();

Expand Down Expand Up @@ -740,7 +742,10 @@ export function createSSRHandler(
// Render the loading shell for `fallback: true` when the path
// wasn't pre-rendered. Data requests still resolve real props so
// the client can swap in after the shell ships.
if (fallback === true && !isValidPath && !isDataReq) {
const userAgentHeader = req.headers["user-agent"];
const userAgent = Array.isArray(userAgentHeader) ? userAgentHeader[0] : userAgentHeader;
const isBotRequest = !!userAgent && isBotUserAgent(userAgent, htmlLimitedBots);
if (fallback === true && !isValidPath && !isDataReq && !isBotRequest) {
isFallbackRender = true;
if (typeof routerShim.setSSRContext === "function") {
routerShim.setSSRContext({
Expand Down
6 changes: 5 additions & 1 deletion packages/vinext/src/server/pages-page-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import {
import { buildNextDataJsonResponse } from "./pages-data-route.js";
import { NEXTJS_DEPLOYMENT_ID_HEADER } from "./headers.js";
import { isSerializableProps } from "./pages-serializable-props.js";
import { isBotUserAgent } from "../utils/html-limited-bots.js";

type PagesRedirectResult = {
destination: string;
Expand Down Expand Up @@ -183,6 +184,7 @@ export type ResolvePagesPageDataOptions = {
* Typically sourced from `process.env.__VINEXT_DEPLOYMENT_ID || process.env.NEXT_DEPLOYMENT_ID`.
*/
deploymentId?: string;
htmlLimitedBots?: string;
pageModule: PagesPageModule;
params: Record<string, unknown>;
query: Record<string, unknown>;
Expand Down Expand Up @@ -544,7 +546,9 @@ export async function resolvePagesPageData(
// Render the fallback shell for unlisted paths under `fallback: true`.
// Data requests resolve props normally so the client can fill in after
// the loading shell ships (`fallback: 'blocking'` keeps SSRing as before).
if (fallback === true && !isValidPath && !options.isDataReq) {
const isBotRequest =
!!options.userAgent && isBotUserAgent(options.userAgent, options.htmlLimitedBots);
if (fallback === true && !isValidPath && !options.isDataReq && !isBotRequest) {
isFallback = true;
}
}
Expand Down
2 changes: 2 additions & 0 deletions packages/vinext/src/server/pages-page-handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ type VinextConfigSubset = {
assetPrefix: string;
trailingSlash: boolean;
expireTime?: number;
htmlLimitedBots?: string;
clientTraceMetadata?: readonly string[];
disableOptimizedLoading: boolean;
};
Expand Down Expand Up @@ -511,6 +512,7 @@ export function createPagesPageHandler(
applyRequestContexts: applySSRContext,
buildId,
deploymentId: process.env.__VINEXT_DEPLOYMENT_ID || process.env.NEXT_DEPLOYMENT_ID,
htmlLimitedBots: vinextConfig.htmlLimitedBots,
createGsspReqRes() {
return createPagesReqRes({ body: undefined, query, request, url: routeUrl });
},
Expand Down
29 changes: 29 additions & 0 deletions packages/vinext/src/utils/html-limited-bots.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@
// packages/next/src/shared/lib/router/utils/html-bots.ts
const HTML_LIMITED_BOT_UA_RE_STRING = String.raw`[\w-]+-Google|Google-[\w-]+|Chrome-Lighthouse|Slurp|DuckDuckBot|baiduspider|yandex|sogou|bitlybot|tumblr|vkShare|quora link preview|redditbot|ia_archiver|Bingbot|BingPreview|applebot|facebookexternalhit|facebookcatalog|Twitterbot|LinkedInBot|Slackbot|Discordbot|WhatsApp|SkypeUriPreview|Yeti|googleweblight`;

// Headless browser bot (executes JS). Mirrors Next.js
// `HEADLESS_BROWSER_BOT_UA_RE` in
// `.nextjs-ref/packages/next/src/shared/lib/router/utils/is-bot.ts`.
// Matches "Googlebot" but NOT "Mediapartners-Google" / "AdsBot-Google" /
// other Google crawlers, which are covered by the HTML-limited list.
const HEADLESS_BROWSER_BOT_UA_RE = /Googlebot(?!-)|Googlebot$/i;

const htmlLimitedBotRegexCache = new Map<string, RegExp>();

export function getHtmlLimitedBotRegex(htmlLimitedBots: string | undefined): RegExp {
Expand All @@ -13,3 +20,25 @@ export function getHtmlLimitedBotRegex(htmlLimitedBots: string | undefined): Reg
htmlLimitedBotRegexCache.set(source, regex);
return regex;
}

/**
* Returns true when the User-Agent matches a known crawler/bot. Combines
* Next.js's "headless browser bot" check (Googlebot proper) with the
* "HTML-limited bot" list (Bingbot, DuckDuckBot, facebookexternalhit, …).
*
* Used by the Pages Router fallback path: a bot hitting an unlisted
* `fallback: true` route should get a synchronous render (real content) and
* not the loading shell, so the crawler indexes the actual page. Mirrors
* Next.js's `isBot()` in `.nextjs-ref/packages/next/src/shared/lib/router/utils/is-bot.ts`
* and the bot-aware fallback flip in
* `.nextjs-ref/packages/next/src/server/route-modules/pages/pages-handler.ts`.
*
* `htmlLimitedBots` allows next.config to override the HTML-limited list
* (same flag that drives `getHtmlLimitedBotRegex`), so a custom list applies
* to both streaming metadata gating and bot-aware fallback rendering.
*/
export function isBotUserAgent(userAgent: string, htmlLimitedBots?: string): boolean {
if (!userAgent) return false;
if (HEADLESS_BROWSER_BOT_UA_RE.test(userAgent)) return true;
return getHtmlLimitedBotRegex(htmlLimitedBots).test(userAgent);
}
64 changes: 64 additions & 0 deletions tests/pages-page-data.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,70 @@ describe("pages page data", () => {
await expect(result.response.text()).resolves.toBe("{}");
});

// Refs #1543: a crawler/bot UA hitting an unlisted `fallback: true` path
// must NOT receive the loading shell — it should render synchronously so
// the bot indexes real content. Mirrors Next.js's bot check in
// `.nextjs-ref/packages/next/src/server/route-modules/pages/pages-handler.ts`.
it("does not set isFallback for bot User-Agent on unlisted fallback: true paths", async () => {
let gspCalled = false;
const result = await resolvePagesPageData(
createOptions({
pageModule: {
async getStaticPaths() {
return {
fallback: true,
paths: [{ params: { slug: "hello-world" } }],
};
},
async getStaticProps({ params }) {
gspCalled = true;
return { props: { slug: params?.slug ?? null } };
},
},
params: { slug: "unknown" },
query: { slug: "unknown" },
route: { isDynamic: true },
routeUrl: "/posts/unknown",
userAgent: "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
}),
);

expect(result.kind).toBe("render");
if (result.kind !== "render") throw new Error("expected render result");
expect(result.isFallback).toBe(false);
expect(gspCalled).toBe(true);
expect(result.pageProps).toMatchObject({ slug: "unknown" });
});

it("sets isFallback for normal browser User-Agent on unlisted fallback: true paths", async () => {
const result = await resolvePagesPageData(
createOptions({
pageModule: {
async getStaticPaths() {
return {
fallback: true,
paths: [{ params: { slug: "hello-world" } }],
};
},
async getStaticProps() {
throw new Error("getStaticProps should not run on a fallback shell render");
},
},
params: { slug: "unknown" },
query: { slug: "unknown" },
route: { isDynamic: true },
routeUrl: "/posts/unknown",
userAgent:
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36",
}),
);

expect(result.kind).toBe("render");
if (result.kind !== "render") throw new Error("expected render result");
expect(result.isFallback).toBe(true);
expect(result.pageProps).toEqual({});
});

it("short-circuits getServerSideProps responses after res.end()", async () => {
const responsePromise = Promise.resolve(
new Response('{"ok":true}', {
Expand Down
50 changes: 50 additions & 0 deletions tests/pages-router.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1601,6 +1601,56 @@ describe("Pages Router integration", () => {
expect(json.pageProps).toMatchObject({ pid: "unknown" });
});

// Refs #1543: bot/crawler requests must bypass the `fallback: true` loading
// shell and synchronously render real content so crawlers index the page,
// not `Loading...`. Mirrors Next.js's bot check in
// `.nextjs-ref/packages/next/src/server/route-modules/pages/pages-handler.ts`
// and the Next.js e2e regression test
// `.nextjs-ref/test/e2e/prerender-crawler.test.ts`.
it("renders synchronously (not the fallback shell) for crawler UAs on unlisted fallback: true paths", async () => {
const userAgents = [
"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
"Mozilla/5.0 (compatible; Bingbot/2.0; +http://www.bing.com/bingbot.htm)",
"DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)",
"Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
"facebookexternalhit/1.0 (+http://www.facebook.com/externalhit_uatext.php)",
];
for (const userAgent of userAgents) {
const slug = `bot-slug-${Math.random().toString(36).slice(2)}`;
const res = await fetch(`${baseUrl}/products/${slug}`, {
headers: { "user-agent": userAgent },
});
expect(res.status, `UA: ${userAgent}`).toBe(200);
const html = await res.text();
// Bot should see the real rendered page, not the loading shell.
expect(html, `UA: ${userAgent}`).not.toContain("Loading product...");
expect(html, `UA: ${userAgent}`).toMatch(new RegExp(`Product ID:.*${slug}`));
const match = html.match(/__NEXT_DATA__\s*=\s*(\{.*?\})\s*[;<]/);
expect(match, `UA: ${userAgent}`).toBeTruthy();
const nextData = JSON.parse(match![1]);
expect(nextData.isFallback, `UA: ${userAgent}`).toBe(false);
expect(nextData.props.pageProps).toMatchObject({ pid: slug });
}
});

it("still ships the fallback shell for normal browser UAs on unlisted fallback: true paths", async () => {
// Counterpart of the crawler test — the bot-flip must not catch real
// browsers. Plain Chrome UA should still receive the loading shell.
const res = await fetch(`${baseUrl}/products/non-bot-slug`, {
headers: {
"user-agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36",
},
});
expect(res.status).toBe(200);
const html = await res.text();
expect(html).toContain("Loading product...");
const match = html.match(/__NEXT_DATA__\s*=\s*(\{.*?\})\s*[;<]/);
expect(match).toBeTruthy();
const nextData = JSON.parse(match![1]);
expect(nextData.isFallback).toBe(true);
});

it("includes isFallback: false in __NEXT_DATA__", async () => {
const res = await fetch(`${baseUrl}/products/widget`);
const html = await res.text();
Expand Down
Loading