From 2ad9ee45ef2ab65375a158e6e13e669bcc1b7c3c Mon Sep 17 00:00:00 2001 From: John Clement <70238417+jclement136@users.noreply.github.com> Date: Mon, 26 Jan 2026 10:23:09 -0500 Subject: [PATCH 1/8] Readability experiment - "About custom agents" (#59300) Co-authored-by: Jenni C <97056108+dihydroJenoxide@users.noreply.github.com> --- .../coding-agent/about-custom-agents.md | 39 ++++++++++++------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/content/copilot/concepts/agents/coding-agent/about-custom-agents.md b/content/copilot/concepts/agents/coding-agent/about-custom-agents.md index e0f0f3964ec6..e0837e0e73f2 100644 --- a/content/copilot/concepts/agents/coding-agent/about-custom-agents.md +++ b/content/copilot/concepts/agents/coding-agent/about-custom-agents.md @@ -1,7 +1,7 @@ --- title: About custom agents shortTitle: Custom agents -intro: '{% data variables.copilot.custom_agents_caps_short %} enhance {% data variables.copilot.copilot_coding_agent %} with specialized assistance tailored to your needs.' +intro: '{% data variables.copilot.custom_agents_caps_short %} enhance {% data variables.copilot.copilot_coding_agent %} with assistance tailored to your needs.' product: '{% data reusables.gated-features.copilot-coding-agent %}
Sign up for {% data variables.product.prodname_copilot_short %} {% octicon "link-external" height:16 %}' versions: feature: copilot @@ -11,24 +11,26 @@ topics: ## About {% data variables.copilot.custom_agents_short %} -{% data variables.copilot.custom_agents_caps_short %} are specialized versions of {% data variables.copilot.copilot_coding_agent %} that you can tailor to your unique workflows, coding conventions, and use cases. Instead of repeatedly providing the same instructions and context, {% data variables.copilot.custom_agents_short %} allow you to define specialized agents that act like tailored teammates—following standards, using the right tools, and implementing team-specific practices. +{% data variables.copilot.custom_agents_caps_short %} are specialized versions of {% data variables.copilot.copilot_coding_agent %} that you can tailor to your unique workflows, coding conventions, and use cases. They act like tailored teammates that follow your standards, use the right tools, and implement team-specific practices. You define these agents once instead of repeatedly providing the same instructions and context. -{% data variables.copilot.custom_agents_caps_short %} are defined using Markdown files, called {% data variables.copilot.agent_profiles %}, that specify prompts, tools, and MCP servers. This allows individuals and teams to encode their conventions, frameworks, and desired outcomes directly into {% data variables.product.prodname_copilot_short %}. The {% data variables.copilot.agent_profile %} serves as the artifact that defines the {% data variables.copilot.copilot_custom_agent_short %}'s behavior, and assigning the agent to a task or issue instantiates the {% data variables.copilot.copilot_custom_agent_short %}. +You define {% data variables.copilot.custom_agents_short %} using Markdown files called {% data variables.copilot.agent_profiles %}. These files specify prompts, tools, and MCP servers. This allows you to encode your conventions, frameworks, and desired outcomes directly into {% data variables.product.prodname_copilot_short %}. + +The {% data variables.copilot.agent_profile %} defines the {% data variables.copilot.copilot_custom_agent_short %}'s behavior. When you assign the agent to a task or issue, it instantiates the {% data variables.copilot.copilot_custom_agent_short %}. ## {% data variables.copilot.agent_profile_caps %} format {% data variables.copilot.agent_profiles_caps %} are Markdown files with YAML frontmatter. In their simplest form, they include: -* **Name**: A unique identifier for the {% data variables.copilot.copilot_custom_agent_short %} -* **Description**: Explains the agent's purpose and capabilities -* **Prompt**: Custom instructions that define the agent's behavior and expertise -* **Tools**: Specific tools the agent can access. This is optional, and the default is access to all available tools, including built-in tools and MCP server tools. +* **Name**: A unique identifier for the {% data variables.copilot.copilot_custom_agent_short %}. +* **Description**: Explains the agent's purpose and capabilities. +* **Prompt**: Custom instructions that define the agent's behavior and expertise. +* **Tools** (optional): Specific tools the agent can access. By default, agents can access all available tools, including built-in tools and MCP server tools. -Organization and enterprise-level {% data variables.copilot.agent_profiles %} can also include MCP server configurations within the {% data variables.copilot.agent_profile %}, using the `mcp-server` property. +Organization and enterprise-level {% data variables.copilot.agent_profiles %} can also include MCP server configurations using the `mcp-server` property. ### Example {% data variables.copilot.agent_profile %} -This is a basic {% data variables.copilot.agent_profile %} with name, description, and prompt configured. +This example is a basic {% data variables.copilot.agent_profile %} with name, description, and prompt configured. ```text --- @@ -49,18 +51,27 @@ Focus on the following instructions: ## Where you can configure {% data variables.copilot.custom_agents_short %} -You can define {% data variables.copilot.agent_profiles %} at the repository level (`.github/agents/CUSTOM-AGENT-NAME.md` in your repository) for project-specific agents, or at the organization or enterprise level (`/agents/CUSTOM-AGENT-NAME.md` in a `.github-private` repository) for broader availability. See [AUTOTITLE](/copilot/how-tos/administer-copilot/manage-for-organization/prepare-for-custom-agents) and [AUTOTITLE](/copilot/how-tos/administer-copilot/manage-for-enterprise/manage-agents/prepare-for-custom-agents). +You can define {% data variables.copilot.agent_profiles %} at different levels: + +* **Repository level**: Create `.github/agents/CUSTOM-AGENT-NAME.md` in your repository for project-specific agents. +* **Organization or enterprise level**: Create `/agents/CUSTOM-AGENT-NAME.md` in a `.github-private` repository for broader availability. + +For more information, see [AUTOTITLE](/copilot/how-tos/administer-copilot/manage-for-organization/prepare-for-custom-agents) and [AUTOTITLE](/copilot/how-tos/administer-copilot/manage-for-enterprise/manage-agents/prepare-for-custom-agents). ## Where you can use {% data variables.copilot.custom_agents_short %} {% data reusables.copilot.custom-agents-ide-preview %} -Once created, your {% data variables.copilot.custom_agents_short %} are available wherever you can use {% data variables.copilot.copilot_coding_agent %}, including {% data variables.product.prodname_dotcom_the_website %} (the agents tab and panel, issue assignment, pull requests), the {% data variables.copilot.copilot_cli %}, and in {% data variables.product.prodname_vscode %}, JetBrains IDEs, Eclipse, and Xcode. +Once you create {% data variables.copilot.custom_agents_short %}, you can use them wherever {% data variables.copilot.copilot_coding_agent %} is available: + +* {% data variables.product.prodname_dotcom_the_website %}: The agents tab and panel, issue assignment, and pull requests +* {% data variables.copilot.copilot_cli %} +* IDEs: {% data variables.product.prodname_vscode %}, JetBrains IDEs, Eclipse, and Xcode -{% data variables.copilot.agent_profiles_caps %} can be used directly in {% data variables.product.prodname_vscode %}, JetBrains IDEs, Eclipse, and Xcode, though some properties may function differently, or be ignored, between environments. +You can use {% data variables.copilot.agent_profiles %} directly in {% data variables.product.prodname_vscode %}, JetBrains IDEs, Eclipse, and Xcode. Some properties may function differently or be ignored between environments. -For more information on using {% data variables.copilot.custom_agents_short %} in {% data variables.product.prodname_vscode %} specifically, see [{% data variables.copilot.custom_agents_caps_short %} in {% data variables.product.prodname_vscode_shortname %}](https://code.visualstudio.com/docs/copilot/customization/custom-agents) in the {% data variables.product.prodname_vscode_shortname %} documentation. +For more information on using {% data variables.copilot.custom_agents_short %} in {% data variables.product.prodname_vscode %}, see [{% data variables.copilot.custom_agents_caps_short %} in {% data variables.product.prodname_vscode_shortname %}](https://code.visualstudio.com/docs/copilot/customization/custom-agents). ## Next steps -To start creating your own {% data variables.copilot.custom_agents_short %}, see [AUTOTITLE](/copilot/how-tos/use-copilot-agents/coding-agent/create-custom-agents). +To create your own {% data variables.copilot.custom_agents_short %}, see [AUTOTITLE](/copilot/how-tos/use-copilot-agents/coding-agent/create-custom-agents). From bdbc204a9a73ad5d943f75ca4990641025eb647e Mon Sep 17 00:00:00 2001 From: John Clement <70238417+jclement136@users.noreply.github.com> Date: Mon, 26 Jan 2026 10:23:12 -0500 Subject: [PATCH 2/8] Readability experiment "What is GitHub Copilot?" (#59219) Co-authored-by: Jenni C <97056108+dihydroJenoxide@users.noreply.github.com> --- .../get-started/what-is-github-copilot.md | 64 ++++++++++--------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/content/copilot/get-started/what-is-github-copilot.md b/content/copilot/get-started/what-is-github-copilot.md index a188cb7254ae..8707d849287e 100644 --- a/content/copilot/get-started/what-is-github-copilot.md +++ b/content/copilot/get-started/what-is-github-copilot.md @@ -1,6 +1,6 @@ --- title: What is GitHub Copilot? -intro: 'Learn what {% data variables.product.prodname_copilot %} is and what you can do with it.' +intro: 'Learn what {% data variables.product.prodname_copilot_short %} is and what you can do with it.' versions: feature: copilot topics: @@ -29,55 +29,59 @@ category: - Learn about Copilot --- -{% data variables.product.prodname_copilot %} is an AI coding assistant that helps you write code faster and with less effort, allowing you to focus more energy on problem solving and collaboration. +{% data variables.product.prodname_copilot %} is an AI coding assistant that helps you write code faster and with less effort. Then, you can focus more energy on problem solving and collaboration. -{% data variables.product.prodname_copilot %} has been proven to increase developer productivity and accelerate the pace of software development. See [Research: quantifying {% data variables.product.prodname_copilot %}’s impact on developer productivity and happiness](https://github.blog/2022-09-07-research-quantifying-github-copilots-impact-on-developer-productivity-and-happiness/) in the {% data variables.product.prodname_dotcom %} blog. +Research shows that {% data variables.product.prodname_copilot_short %} increases developer productivity and accelerates software development. See [Research: quantifying {% data variables.product.prodname_copilot %}’s impact on developer productivity and happiness](https://github.blog/2022-09-07-research-quantifying-github-copilots-impact-on-developer-productivity-and-happiness/) in the {% data variables.product.prodname_dotcom %} blog. -## {% data variables.product.prodname_copilot_short %} features +## Features -{% data variables.product.prodname_copilot %} includes a suite of features. You can use {% data variables.product.prodname_copilot_short %} to: +You can use {% data variables.product.prodname_copilot_short %} to: -* Get code suggestions as you type in your IDE -* Chat with {% data variables.product.prodname_copilot_short %} to ask for help with your code -* Ask {% data variables.product.prodname_copilot_short %} for help using the command line -* Organize and share task-specific context with {% data variables.copilot.copilot_spaces %} to get more relevant answers -* Generate a description of the changes in a pull request -* Work on code changes and create a pull request for you to review _({% data variables.copilot.copilot_pro_plus_short %}, {% data variables.copilot.copilot_business_short %}, and {% data variables.copilot.copilot_enterprise_short %} only)_ +* Get code suggestions as you type in your IDE. +* Chat with {% data variables.product.prodname_copilot_short %} to get help with your code. +* Ask for help using the command line. +* Organize and share context with {% data variables.copilot.copilot_spaces %} to get more relevant answers. +* Generate descriptions of changes in a pull request. +* Work on code changes and create a pull request for you to review. Available in {% data variables.copilot.copilot_pro_plus_short %}, {% data variables.copilot.copilot_business_short %}, and {% data variables.copilot.copilot_enterprise_short %} only. -{% data variables.product.prodname_copilot_short %} is available: +Use {% data variables.product.prodname_copilot_short %} in the following places: -* In your IDE -* In {% data variables.product.prodname_mobile %}, as a chat interface -* In {% data variables.product.prodname_windows_terminal %} Canary, through the Terminal Chat interface -* On the command line, through the {% data variables.product.prodname_cli %} -* On the {% data variables.product.github %} website +* Your IDE +* {% data variables.product.prodname_mobile %}, as a chat interface +* {% data variables.product.prodname_windows_terminal %} Canary, through the Terminal Chat interface +* The command line, through the {% data variables.product.prodname_cli %} +* The {% data variables.product.github %} website See [AUTOTITLE](/copilot/about-github-copilot/github-copilot-features). -## Getting access to {% data variables.product.prodname_copilot_short %} +## Get access -There are a few ways you can start using {% data variables.product.prodname_copilot_short %}, depending on your role and needs. +You can start using {% data variables.product.prodname_copilot_short %} in several ways, depending on your role and needs. -### For individuals +### Individuals -* **Try {% data variables.product.prodname_copilot_short %} for free**: Use {% data variables.copilot.copilot_free_short %} to explore core {% data variables.product.prodname_copilot_short %} features with no paid plan required. -* **Subscribe to a paid plan**: Upgrade to {% data variables.copilot.copilot_pro_short %} or {% data variables.copilot.copilot_pro_plus_short %} for full access to premium features and more generous usage limits. You can try {% data variables.copilot.copilot_pro_short %} for free with a one-time 30-day trial. -* **Eligible for free {% data variables.copilot.copilot_pro_short %} access?** Students, teachers, and open source maintainers may qualify for {% data variables.copilot.copilot_pro_short %} at no cost. See [AUTOTITLE](/copilot/managing-copilot/managing-copilot-as-an-individual-subscriber/getting-free-access-to-copilot-as-a-student-teacher-or-maintainer). -* **Organization members**: If your organization or enterprise has a {% data variables.product.prodname_copilot %} plan, you can request access to {% data variables.product.prodname_copilot_short %} by going to [https://github.com/settings/copilot](https://github.com/settings/copilot) and requesting access under "Get {% data variables.product.prodname_copilot_short %} from an organization." +* **Try {% data variables.product.prodname_copilot_short %} for free.** Use {% data variables.copilot.copilot_free_short %} to explore core features with no paid plan required. +* **Subscribe to a paid plan.** Upgrade to {% data variables.copilot.copilot_pro_short %} or {% data variables.copilot.copilot_pro_plus_short %} for full access to premium features and more generous usage limits. + * Try {% data variables.copilot.copilot_pro_short %} for free with a one-time 30-day trial. +* **Get free access if you're eligible.** Students, teachers, and open source maintainers may qualify for {% data variables.copilot.copilot_pro_short %} at no cost. See [AUTOTITLE](/copilot/managing-copilot/managing-copilot-as-an-individual-subscriber/getting-free-access-to-copilot-as-a-student-teacher-or-maintainer). +* **Request access from your organization.** If your organization or enterprise has a {% data variables.product.prodname_copilot %} plan, you can request access by going to [https://github.com/settings/copilot](https://github.com/settings/copilot) and request access under "Get {% data variables.product.prodname_copilot_short %} from an organization." See [AUTOTITLE](/copilot/managing-copilot/managing-copilot-as-an-individual-subscriber/getting-started-with-copilot-on-your-personal-account/getting-started-with-a-copilot-plan) for more information. -### For organizations and enterprises +### Organizations and enterprises -* **Organization owners**: Purchase {% data variables.copilot.copilot_business_short %} for your team. See [AUTOTITLE](/copilot/managing-copilot/managing-github-copilot-in-your-organization/subscribing-to-copilot-for-your-organization). If your organization is owned by an enterprise that has a {% data variables.product.prodname_copilot_short %} subscription, you can ask your enterprise owner to enable {% data variables.product.prodname_copilot_short %} for your organization by going to [https://github.com/settings/copilot](https://github.com/settings/copilot) and requesting access under "Get {% data variables.product.prodname_copilot_short %} from an organization." -* **Enterprise owners**: Purchase {% data variables.copilot.copilot_business_short %} or {% data variables.copilot.copilot_enterprise_short %} for your enterprise. See [AUTOTITLE](/copilot/managing-copilot/managing-copilot-for-your-enterprise/subscribing-to-copilot-for-your-enterprise). +**Organization owners** can purchase {% data variables.copilot.copilot_business_short %} for their team. See [AUTOTITLE](/copilot/managing-copilot/managing-github-copilot-in-your-organization/subscribing-to-copilot-for-your-organization). -If you **don't need other {% data variables.product.github %} features**, you can create an enterprise account specifically for managing {% data variables.copilot.copilot_business_short %} licenses. This gives you enterprise-grade authentication options without charges for {% data variables.product.prodname_enterprise %} licenses. See [AUTOTITLE](/copilot/concepts/about-enterprise-accounts-for-copilot-business). +If your organization is owned by an enterprise that has a {% data variables.product.prodname_copilot_short %} subscription, you can ask your enterprise owner to enable {% data variables.product.prodname_copilot_short %} for your organization. Go to [https://github.com/settings/copilot](https://github.com/settings/copilot) and request access under "Get {% data variables.product.prodname_copilot_short %} from an organization." + +**Enterprise owners** can purchase {% data variables.copilot.copilot_business_short %} or {% data variables.copilot.copilot_enterprise_short %} for your enterprise. See [AUTOTITLE](/copilot/managing-copilot/managing-copilot-for-your-enterprise/subscribing-to-copilot-for-your-enterprise). + +If you don't need other {% data variables.product.github %} features, you can create an enterprise account specifically for managing {% data variables.copilot.copilot_business_short %} licenses. This gives you enterprise-grade authentication without charges for {% data variables.product.prodname_enterprise %} licenses. See [AUTOTITLE](/copilot/concepts/about-enterprise-accounts-for-copilot-business). ## Next steps -* To learn more about the {% data variables.product.prodname_copilot_short %} features, see [AUTOTITLE](/copilot/about-github-copilot/github-copilot-features). -* To start using {% data variables.product.prodname_copilot_short %}, see [AUTOTITLE](/copilot/setting-up-github-copilot). +* Learn more about {% data variables.product.prodname_copilot_short %} features. See [AUTOTITLE](/copilot/about-github-copilot/github-copilot-features). +* Start using {% data variables.product.prodname_copilot_short %}. See [AUTOTITLE](/copilot/setting-up-github-copilot). ## Further reading From cf3639fb2e5db6c98e96b4c18251976417f9eb8d Mon Sep 17 00:00:00 2001 From: Isaac Brown <101839405+isaacmbrown@users.noreply.github.com> Date: Mon, 26 Jan 2026 15:32:38 +0000 Subject: [PATCH 3/8] [EDI] CodeQL query packs (#59269) Co-authored-by: Sophie <29382425+sophietheking@users.noreply.github.com> --- .../codeql/codeql-query-packs.md | 48 +++++++++++++++++++ .../concepts/code-scanning/codeql/index.md | 1 + .../customizing-analysis-with-codeql-packs.md | 31 +----------- 3 files changed, 50 insertions(+), 30 deletions(-) create mode 100644 content/code-security/concepts/code-scanning/codeql/codeql-query-packs.md diff --git a/content/code-security/concepts/code-scanning/codeql/codeql-query-packs.md b/content/code-security/concepts/code-scanning/codeql/codeql-query-packs.md new file mode 100644 index 000000000000..70f80ffb838e --- /dev/null +++ b/content/code-security/concepts/code-scanning/codeql/codeql-query-packs.md @@ -0,0 +1,48 @@ +--- +title: CodeQL query packs +intro: You can choose from different built-in {% data variables.product.prodname_codeql %} query suites to use in your {% data variables.product.prodname_codeql %} {% data variables.product.prodname_code_scanning %} setup. +product: '{% data reusables.gated-features.codeql %}' +versions: + fpt: '*' + ghes: '*' + ghec: '*' +topics: + - Code scanning + - CodeQL +contentType: concepts +--- + +{% data reusables.code-scanning.codeql-cli-version-ghes %} + +## About {% data variables.product.prodname_codeql %} packs + +{% data variables.product.prodname_codeql %} packs are used to create, share, depend on, and run {% data variables.product.prodname_codeql %} queries and libraries. {% data variables.product.prodname_codeql %} packs contain queries, library files, query suites, and metadata. You can customize your {% data variables.product.prodname_codeql %} analysis by downloading packs created by others and running them on your codebase. + +The {% data variables.product.prodname_codeql_cli %} bundle includes queries that are maintained by {% data variables.product.company_short %} experts, security researchers, and community contributors. If you want to run queries developed by other organizations, {% data variables.product.prodname_codeql %} query packs provide an efficient and reliable way to download and run queries, while model packs ({% data variables.release-phases.public_preview %}) can be used to expand {% data variables.product.prodname_code_scanning %} analysis to recognize libraries and frameworks that are not supported by default. + +## Types of {% data variables.product.prodname_codeql %} packs + +There are three types of {% data variables.product.prodname_codeql %} packs: query packs, library packs, and model packs. + +* Query packs contain a set of pre-compiled queries that can be evaluated on a {% data variables.product.prodname_codeql %} database. Query packs are designed to be run. When a query pack is published, the bundle includes all the transitive dependencies and pre-compiled representations of each query, in addition to the query sources. This ensures consistent and efficient execution of the queries in the pack. + +* Library packs are designed to be used by query packs (or other library packs) and do not contain queries themselves. The libraries are not compiled separately. + +* Model packs can be used to expand {% data variables.product.prodname_code_scanning %} analysis to recognize libraries and frameworks that are not supported by default. Model packs are currently in {% data variables.release-phases.public_preview %} and subject to change. During the {% data variables.release-phases.public_preview %}, model packs are available for {% data variables.code-scanning.codeql_model_packs_support %} analysis. For more information about creating your own model packs, see [AUTOTITLE](/code-security/codeql-cli/using-the-advanced-functionality-of-the-codeql-cli/creating-and-working-with-codeql-packs#creating-a-codeql-model-pack). + +## Where to find query packs + +The standard {% data variables.product.prodname_codeql %} packs for all supported languages are published in the [{% data variables.product.prodname_container_registry %}](https://github.com/orgs/codeql/packages). If you installed the {% data variables.product.prodname_codeql_cli %} in the standard way, using the {% data variables.product.prodname_codeql_cli %} bundle, the core query packs are already downloaded and available to you. They are: + + * `codeql/cpp-queries` + * `codeql/csharp-queries` + * `codeql/go-queries` + * `codeql/java-queries` + * `codeql/javascript-queries` + * `codeql/python-queries` + * `codeql/ruby-queries` + * `codeql/swift-queries` + +You can also use the {% data variables.product.prodname_codeql_cli %} to create your own {% data variables.product.prodname_codeql %} packs, add dependencies to packs, and install or update dependencies. + +You can publish {% data variables.product.prodname_codeql %} packs that you have created, using the {% data variables.product.prodname_codeql_cli %}. For more information on publishing and downloading {% data variables.product.prodname_codeql %} packs, see [AUTOTITLE](/code-security/codeql-cli/using-the-advanced-functionality-of-the-codeql-cli/publishing-and-using-codeql-packs). diff --git a/content/code-security/concepts/code-scanning/codeql/index.md b/content/code-security/concepts/code-scanning/codeql/index.md index 91accd1bae58..008a3575d495 100644 --- a/content/code-security/concepts/code-scanning/codeql/index.md +++ b/content/code-security/concepts/code-scanning/codeql/index.md @@ -16,4 +16,5 @@ children: - /about-codeql-for-vs-code - /about-codeql-workspaces - /query-reference-files + - /codeql-query-packs --- diff --git a/content/code-security/tutorials/customize-code-scanning/customizing-analysis-with-codeql-packs.md b/content/code-security/tutorials/customize-code-scanning/customizing-analysis-with-codeql-packs.md index 63c8bdb91b64..5e00cd146905 100644 --- a/content/code-security/tutorials/customize-code-scanning/customizing-analysis-with-codeql-packs.md +++ b/content/code-security/tutorials/customize-code-scanning/customizing-analysis-with-codeql-packs.md @@ -18,39 +18,10 @@ redirect_from: contentType: tutorials --- -## About {% data variables.product.prodname_codeql %} packs - -{% data reusables.code-scanning.codeql-cli-version-ghes %} - - {% data variables.product.prodname_codeql %} packs are used to create, share, depend on, and run {% data variables.product.prodname_codeql %} queries and libraries. {% data variables.product.prodname_codeql %} packs contain queries, library files, query suites, and metadata. You can customize your {% data variables.product.prodname_codeql %} analysis by downloading packs created by others and running them on your codebase. - -There are three types of {% data variables.product.prodname_codeql %} packs: query packs, library packs, and model packs. - -* Query packs contain a set of pre-compiled queries that can be evaluated on a {% data variables.product.prodname_codeql %} database. Query packs are designed to be run. When a query pack is published, the bundle includes all the transitive dependencies and pre-compiled representations of each query, in addition to the query sources. This ensures consistent and efficient execution of the queries in the pack. - -* Library packs are designed to be used by query packs (or other library packs) and do not contain queries themselves. The libraries are not compiled separately. - -* Model packs can be used to expand {% data variables.product.prodname_code_scanning %} analysis to recognize libraries and frameworks that are not supported by default. Model packs are currently in {% data variables.release-phases.public_preview %} and subject to change. During the {% data variables.release-phases.public_preview %}, model packs are available for {% data variables.code-scanning.codeql_model_packs_support %} analysis. For more information about creating your own model packs, see [AUTOTITLE](/code-security/codeql-cli/using-the-advanced-functionality-of-the-codeql-cli/creating-and-working-with-codeql-packs#creating-a-codeql-model-pack). - -The standard {% data variables.product.prodname_codeql %} packs for all supported languages are published in the [{% data variables.product.prodname_container_registry %}](https://github.com/orgs/codeql/packages). If you installed the {% data variables.product.prodname_codeql_cli %} in the standard way, using the {% data variables.product.prodname_codeql_cli %} bundle, the core query packs are already downloaded and available to you. They are: - - * `codeql/cpp-queries` - * `codeql/csharp-queries` - * `codeql/go-queries` - * `codeql/java-queries` - * `codeql/javascript-queries` - * `codeql/python-queries` - * `codeql/ruby-queries` - * `codeql/swift-queries` - -You can also use the {% data variables.product.prodname_codeql_cli %} to create your own {% data variables.product.prodname_codeql %} packs, add dependencies to packs, and install or update dependencies. For more information, see [AUTOTITLE](/code-security/codeql-cli/using-the-advanced-functionality-of-the-codeql-cli/creating-and-working-with-codeql-packs#creating-and-working-with-codeql-packs). - -You can publish {% data variables.product.prodname_codeql %} packs that you have created, using the {% data variables.product.prodname_codeql_cli %}. For more information on publishing and downloading {% data variables.product.prodname_codeql %} packs, see [AUTOTITLE](/code-security/codeql-cli/using-the-advanced-functionality-of-the-codeql-cli/publishing-and-using-codeql-packs). +You can customize your {% data variables.product.prodname_codeql %} analysis by downloading packs created by others and running them on your codebase. For more information, see [AUTOTITLE](/code-security/concepts/code-scanning/codeql/codeql-query-packs). ## Downloading and using {% data variables.product.prodname_codeql %} query packs -The {% data variables.product.prodname_codeql_cli %} bundle includes queries that are maintained by {% data variables.product.company_short %} experts, security researchers, and community contributors. If you want to run queries developed by other organizations, {% data variables.product.prodname_codeql %} query packs provide an efficient and reliable way to download and run queries, while model packs ({% data variables.release-phases.public_preview %}) can be used to expand {% data variables.product.prodname_code_scanning %} analysis to recognize libraries and frameworks that are not supported by default. For more information about query packs, see [AUTOTITLE](/code-security/code-scanning/introduction-to-code-scanning/about-code-scanning-with-codeql#about-codeql-queries). For information about writing your own model packs, see [AUTOTITLE](/code-security/codeql-cli/using-the-advanced-functionality-of-the-codeql-cli/creating-and-working-with-codeql-packs#creating-a-model-pack). - Before you can use a {% data variables.product.prodname_codeql %} query pack to analyze a database, you must download any packages you require from the {% data variables.product.company_short %} {% data variables.product.prodname_container_registry %}. This can be done either by using the `--download` flag as part of the `codeql database analyze` command, or running `codeql pack download`. If a package is not publicly available, you will need to use a {% data variables.product.prodname_github_app %} or {% data variables.product.pat_generic %} to authenticate. For more information and an example, see [AUTOTITLE](/code-security/codeql-cli/getting-started-with-the-codeql-cli/uploading-codeql-analysis-results-to-github#uploading-results-to-github). | Option | Required | Usage | From 0793333ff03e5cfa0fda32b4977808150990fcf4 Mon Sep 17 00:00:00 2001 From: Isaac Brown <101839405+isaacmbrown@users.noreply.github.com> Date: Mon, 26 Jan 2026 15:40:02 +0000 Subject: [PATCH 4/8] [EDI] Concepts article on Dependabot PRs (#59265) Co-authored-by: Sam Browning <106113886+sabrowning1@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../about-dependabot-pull-requests.md | 42 +++++++++++++++++++ .../about-dependabot-security-updates.md | 8 ---- .../about-dependabot-version-updates.md | 10 ----- .../concepts/supply-chain-security/index.md | 1 + .../customizing-dependabot-security-prs.md | 17 +++----- 5 files changed, 48 insertions(+), 30 deletions(-) create mode 100644 content/code-security/concepts/supply-chain-security/about-dependabot-pull-requests.md diff --git a/content/code-security/concepts/supply-chain-security/about-dependabot-pull-requests.md b/content/code-security/concepts/supply-chain-security/about-dependabot-pull-requests.md new file mode 100644 index 000000000000..1b539d1883cc --- /dev/null +++ b/content/code-security/concepts/supply-chain-security/about-dependabot-pull-requests.md @@ -0,0 +1,42 @@ +--- +title: About Dependabot pull requests +intro: 'Understand the frequency and customization options of pull requests for version and security updates.' +shortTitle: Dependabot pull requests +versions: + fpt: '*' + ghec: '*' + ghes: '*' +contentType: concepts +--- + +## Pull requests for security updates + +If you've enabled security updates, pull requests for security updates are triggered by a {% data variables.product.prodname_dependabot %} alert for a dependency on your default branch. {% data variables.product.prodname_dependabot %} automatically raises a pull request to update the vulnerable dependency. + +Each pull request contains everything you need to quickly and safely review and merge a proposed fix into your project. This includes information about the vulnerability like release notes, changelog entries, and commit details. Details of which vulnerability a pull request resolves are hidden from anyone who does not have access to {% data variables.product.prodname_dependabot_alerts %} for the repository. + +When you merge a pull request that contains a security update, the corresponding {% data variables.product.prodname_dependabot %} alert is marked as resolved for your repository. For more information about {% data variables.product.prodname_dependabot %} pull requests, see [AUTOTITLE](/code-security/dependabot/working-with-dependabot/managing-pull-requests-for-dependency-updates). + +{% data reusables.dependabot.automated-tests-note %} + +### Customizing pull requests for security updates + +You can customize how {% data variables.product.prodname_dependabot %} raises pull requests for security updates, so that they best fit your project's security priorities and processes. For example: +* **Optimize {% data variables.product.prodname_dependabot %} pull requests to prioritize meaningful updates** by grouping multiple updates into a single pull request. +* Apply custom labels to **integrate {% data variables.product.prodname_dependabot %}'s pull requests** into your existing workflows. + +Similar to version updates, customization options for security updates are defined in the `dependabot.yml` file. If you have already customized the `dependabot.yml` for version updates, then many of the configuration options that you have defined could automatically apply to security updates, too. However, there are a couple of important points to note: +* {% data variables.product.prodname_dependabot_security_updates %} are **always triggered by a security advisory**, rather than running according to the `schedule` you have set in the `dependabot.yml` for version updates. +* {% data variables.product.prodname_dependabot %} raises pull requests for security updates against the **default branch only**. If your configuration sets a value for `target-branch`, then the customization for that package ecosystem will only apply to version updates by default. + +For more information, see [AUTOTITLE](/code-security/how-tos/secure-your-supply-chain/manage-your-dependency-security/customizing-dependabot-security-prs). + +## Pull requests for version updates + +For version updates, you specify how often to check each ecosystem for new versions in the configuration file: daily, weekly, or monthly. + +{% data reusables.dependabot.initial-updates %} For more information, see [AUTOTITLE](/code-security/dependabot/dependabot-version-updates/optimizing-pr-creation-version-updates). + +## Commands for {% data variables.product.prodname_dependabot %} pull requests + +{% data variables.product.prodname_dependabot %} responds to simple commands in comments. Each pull request contains details of the commands you can use to process the pull request (for example: to merge, squash, reopen, close, or rebase the pull request) under the "{% data variables.product.prodname_dependabot %} commands and options" section. The aim is to make it as easy as possible for you to triage these automatically generated pull requests. For more information, see [AUTOTITLE](/code-security/reference/supply-chain-security/dependabot-pull-request-comment-commands). diff --git a/content/code-security/concepts/supply-chain-security/about-dependabot-security-updates.md b/content/code-security/concepts/supply-chain-security/about-dependabot-security-updates.md index 28c67b3d048c..9755c2a11e49 100644 --- a/content/code-security/concepts/supply-chain-security/about-dependabot-security-updates.md +++ b/content/code-security/concepts/supply-chain-security/about-dependabot-security-updates.md @@ -65,14 +65,6 @@ If you enable _{% data variables.product.prodname_dependabot_security_updates %} {% data reusables.dependabot.dependabot-actions-support %} -## About pull requests for security updates - -Each pull request contains everything you need to quickly and safely review and merge a proposed fix into your project. This includes information about the vulnerability like release notes, changelog entries, and commit details. Details of which vulnerability a pull request resolves are hidden from anyone who does not have access to {% data variables.product.prodname_dependabot_alerts %} for the repository. - -When you merge a pull request that contains a security update, the corresponding {% data variables.product.prodname_dependabot %} alert is marked as resolved for your repository. For more information about {% data variables.product.prodname_dependabot %} pull requests, see [AUTOTITLE](/code-security/dependabot/working-with-dependabot/managing-pull-requests-for-dependency-updates). - -{% data reusables.dependabot.automated-tests-note %} - ## About grouped security updates To further reduce the number of pull requests you may be seeing, you can enable grouped security updates to group sets of dependencies together (per package ecosystem). {% data variables.product.prodname_dependabot %} then raises a single pull request to update as many vulnerable dependencies as possible in the group to secure versions at the same time. diff --git a/content/code-security/concepts/supply-chain-security/about-dependabot-version-updates.md b/content/code-security/concepts/supply-chain-security/about-dependabot-version-updates.md index c0bbfd9a6e5a..6426aa641a70 100644 --- a/content/code-security/concepts/supply-chain-security/about-dependabot-version-updates.md +++ b/content/code-security/concepts/supply-chain-security/about-dependabot-version-updates.md @@ -62,16 +62,6 @@ For each action in the file, {% data variables.product.prodname_dependabot %} ch To enable this feature, see [AUTOTITLE](/code-security/how-tos/secure-your-supply-chain/secure-your-dependencies/keeping-your-actions-up-to-date-with-dependabot). -## Frequency of {% data variables.product.prodname_dependabot %} pull requests - -You specify how often to check each ecosystem for new versions in the configuration file: daily, weekly, or monthly. - -{% data reusables.dependabot.initial-updates %} For more information, see [AUTOTITLE](/code-security/dependabot/dependabot-version-updates/optimizing-pr-creation-version-updates). - -If you've enabled security updates, you'll sometimes see extra pull requests for security updates. These are triggered by a {% data variables.product.prodname_dependabot %} alert for a dependency on your default branch. {% data variables.product.prodname_dependabot %} automatically raises a pull request to update the vulnerable dependency. - -{% data reusables.dependabot.version-updates-skip-scheduled-runs %} - ## About automatic deactivation of {% data variables.product.prodname_dependabot_updates %} {% data reusables.dependabot.automatic-deactivation-link %} diff --git a/content/code-security/concepts/supply-chain-security/index.md b/content/code-security/concepts/supply-chain-security/index.md index 663847714aa2..3157f2f7802a 100644 --- a/content/code-security/concepts/supply-chain-security/index.md +++ b/content/code-security/concepts/supply-chain-security/index.md @@ -16,6 +16,7 @@ children: - about-dependabot-alerts - about-dependabot-security-updates - about-dependabot-version-updates + - about-dependabot-pull-requests - about-the-dependabot-yml-file - about-dependabot-auto-triage-rules - about-dependabot-on-github-actions-runners diff --git a/content/code-security/how-tos/secure-your-supply-chain/manage-your-dependency-security/customizing-dependabot-security-prs.md b/content/code-security/how-tos/secure-your-supply-chain/manage-your-dependency-security/customizing-dependabot-security-prs.md index 7a9fbbdab26c..7ae2313a7a37 100644 --- a/content/code-security/how-tos/secure-your-supply-chain/manage-your-dependency-security/customizing-dependabot-security-prs.md +++ b/content/code-security/how-tos/secure-your-supply-chain/manage-your-dependency-security/customizing-dependabot-security-prs.md @@ -19,20 +19,13 @@ redirect_from: contentType: how-tos --- -## About customizing pull requests for security updates - -You can customize how {% data variables.product.prodname_dependabot %} raises pull requests for security updates, so that they best fit your project's security priorities and processes. For example: -* **Optimize {% data variables.product.prodname_dependabot %} pull requests to prioritize meaningful updates** by grouping multiple updates into a single pull request. -* Applying custom labels to **integrate {% data variables.product.prodname_dependabot %}'s pull requests** into your existing workflows. - -Similar to version updates, customization options for security updates are defined in the `dependabot.yml` file. If you have already customized the `dependabot.yml` for version updates, then many of the configuration options that you have defined could automatically apply to security updates, too. However, there's a couple of important points to note: -* {% data variables.product.prodname_dependabot_security_updates %} are **always triggered by a security advisory**, rather than running according to the `schedule` you have set in the `dependabot.yml` for version updates. -* {% data variables.product.prodname_dependabot %} raises pull requests for security updates against the **default branch only**. If your configuration sets a value for `target-branch`, then the customization for that package ecosystem will only apply to version updates by default. +## Preparing to customize pull requests If you haven't yet configured a `dependabot.yml` file for your repository and you want to customize pull requests for security updates, you must first: -* Check in a `dependabot.yml` file into the `.github` directory of your repository. For more information, see [AUTOTITLE](/code-security/dependabot/dependabot-version-updates/configuring-dependabot-version-updates#enabling-dependabot-version-updates). -* Set all the required keys. For more information, see [Required keys](/code-security/dependabot/working-with-dependabot/dependabot-options-reference#required-keys). -* If you want the customization for a package ecosystem to **only apply to security updates** (and exclude version updates), set the `open-pull-requests-limit` key to `0`. + +1. Check in a `dependabot.yml` file into the `.github` directory of your repository. For more information, see [AUTOTITLE](/code-security/dependabot/dependabot-version-updates/configuring-dependabot-version-updates#enabling-dependabot-version-updates). +1. Set all the required keys. For more information, see [Required keys](/code-security/dependabot/working-with-dependabot/dependabot-options-reference#required-keys). +1. If you want the customization for a package ecosystem to **only apply to security updates** (and exclude version updates), set the `open-pull-requests-limit` key to `0`. You can then consider what your needs and priorities are for security updates, and apply a combination of the customization options outlined below. From 434787e1ef307ddb00dfb38763d8413e71a3f01e Mon Sep 17 00:00:00 2001 From: Allan Guigou <34221163+AllanGuigou@users.noreply.github.com> Date: Mon, 26 Jan 2026 11:31:15 -0500 Subject: [PATCH 5/8] Add case function documentation (#58868) Co-authored-by: mc <42146119+mchammer01@users.noreply.github.com> --- .../workflows-and-actions/expressions.md | 52 +++++++++++++------ 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/content/actions/reference/workflows-and-actions/expressions.md b/content/actions/reference/workflows-and-actions/expressions.md index 418da047a5fe..c7c1915b7d16 100644 --- a/content/actions/reference/workflows-and-actions/expressions.md +++ b/content/actions/reference/workflows-and-actions/expressions.md @@ -81,21 +81,6 @@ env: * {% data variables.product.prodname_dotcom %} ignores case when comparing strings. * Objects and arrays are only considered equal when they are the same instance. -{% data variables.product.prodname_dotcom %} provides a way to create conditional logic in expressions using binary logical operators (`&&` and `||`). This pattern can be used to achieve similar functionality to the ternary operator (`?:`) found in many programming languages, while actually using only binary operators. - -### Example - -{% raw %} - -```yaml -env: - MY_ENV_VAR: ${{ github.ref == 'refs/heads/main' && 'value_for_main_branch' || 'value_for_other_branches' }} -``` - -{% endraw %} - -In this example, we're using a combination of `&&` and `||` operators to set the value of the `MY_ENV_VAR` environment variable based on whether the {% data variables.product.prodname_dotcom %} reference is set to `refs/heads/main` or not. If it is, the variable is set to `value_for_main_branch`. Otherwise, it is set to `value_for_other_branches`. It is important to note that the first value after the `&&` must be truthy. Otherwise, the value after the `||` will always be returned. - ## Functions {% data variables.product.prodname_dotcom %} offers a set of built-in functions that you can use in expressions. Some functions cast values to a string to perform comparisons. {% data variables.product.prodname_dotcom %} casts data types to a string using these conversions: @@ -287,6 +272,43 @@ Creates a hash for all `.rb` files in the `lib` directory at root level, includi `hashFiles('/lib/**/*.rb', '!/lib/foo/*.rb')` +### case + +`case( pred1, val1, pred2, val2, ..., default )` + +Evaluates predicates in order and returns the value corresponding to the first predicate that evaluates to `true`. If no predicate matches, it returns the last argument as the default value. + +#### Example with a single predicate + +{% raw %} + +```yaml +env: + MY_ENV_VAR: ${{ case(github.ref == 'refs/heads/main', 'production', 'development') }} +``` + +{% endraw %} + +Sets `MY_ENV_VAR` to `production` when the ref is `refs/heads/main`, otherwise sets it to `development`. + +#### Example with multiple predicates + +{% raw %} + +```yaml +env: + MY_ENV_VAR: ${{ case( + github.ref == 'refs/heads/main', 'production', + github.ref == 'refs/heads/staging', 'staging', + startsWith(github.ref, 'refs/heads/feature/'), 'development', + 'unknown' + ) }} +``` + +{% endraw %} + +Sets `MY_ENV_VAR` based on the branch: `production` for `main`, `staging` for `staging`, `development` for branches starting with `feature/`, or `unknown` for all other branches. + ## Status check functions You can use the following status check functions as expressions in `if` conditionals. A default status check of `success()` is applied unless you include one of these functions. For more information about `if` conditionals, see [AUTOTITLE](/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idif) and [AUTOTITLE](/actions/creating-actions/metadata-syntax-for-github-actions#runsstepsif). From 7abf80d21149b5337a768df5da7fc2d8980c49e9 Mon Sep 17 00:00:00 2001 From: Kevin Heis Date: Mon, 26 Jan 2026 08:50:54 -0800 Subject: [PATCH 6/8] Rebuild link checker system (#59317) --- .github/workflows/link-check-daily.yml | 106 -- .github/workflows/link-check-external.yml | 66 + .github/workflows/link-check-internal.yml | 159 ++ .github/workflows/link-check-on-pr.yml | 42 +- package.json | 5 +- src/links/lib/extract-links.ts | 339 ++++ src/links/lib/link-report.ts | 470 ++++++ src/links/scripts/check-links-external.ts | 372 +++++ src/links/scripts/check-links-internal.ts | 363 +++++ src/links/scripts/check-links-pr.ts | 331 ++++ .../rendered-content-link-checker-cli.ts | 149 -- .../scripts/rendered-content-link-checker.ts | 1358 ----------------- src/links/tests/extract-links.ts | 211 +++ src/links/tests/link-report.ts | 329 ++++ 14 files changed, 2667 insertions(+), 1633 deletions(-) delete mode 100644 .github/workflows/link-check-daily.yml create mode 100644 .github/workflows/link-check-external.yml create mode 100644 .github/workflows/link-check-internal.yml create mode 100644 src/links/lib/extract-links.ts create mode 100644 src/links/lib/link-report.ts create mode 100644 src/links/scripts/check-links-external.ts create mode 100644 src/links/scripts/check-links-internal.ts create mode 100644 src/links/scripts/check-links-pr.ts delete mode 100755 src/links/scripts/rendered-content-link-checker-cli.ts delete mode 100755 src/links/scripts/rendered-content-link-checker.ts create mode 100644 src/links/tests/extract-links.ts create mode 100644 src/links/tests/link-report.ts diff --git a/.github/workflows/link-check-daily.yml b/.github/workflows/link-check-daily.yml deleted file mode 100644 index b9469bd0b163..000000000000 --- a/.github/workflows/link-check-daily.yml +++ /dev/null @@ -1,106 +0,0 @@ -name: 'Link Checker: Daily' - -# **What it does**: This script once a day checks all English links and reports in issue if any are broken. -# **Why we have it**: We want to know if any links break internally or externally. -# **Who does it impact**: Docs content. - -on: - workflow_dispatch: - schedule: - - cron: '20 16 * * *' # Run every day at 16:20 UTC / 8:20 PST - -permissions: - contents: read - issues: write - -jobs: - check_all_english_links: - name: Check all links - if: github.repository == 'github/docs-internal' - runs-on: ubuntu-latest - steps: - - name: Check that gh CLI is installed - run: gh --version - - - name: Check out repo's default branch - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - - - uses: ./.github/actions/node-npm-setup - - - name: Figure out which docs-early-access branch to checkout, if internal repo - if: ${{ github.repository == 'github/docs-internal' }} - id: check-early-access - env: - BRANCH_NAME: ${{ github.head_ref || github.ref_name }} - GITHUB_TOKEN: ${{ secrets.DOCS_BOT_PAT_BASE }} - run: npm run what-docs-early-access-branch - - - name: Check out docs-early-access too, if internal repo - if: ${{ github.repository == 'github/docs-internal' }} - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - with: - repository: github/docs-early-access - token: ${{ secrets.DOCS_BOT_PAT_BASE }} - path: docs-early-access - ref: ${{ steps.check-early-access.outputs.branch }} - - - name: Merge docs-early-access repo's folders - if: ${{ github.repository == 'github/docs-internal' }} - run: src/early-access/scripts/merge-early-access.sh - - - name: Restore disk-cache file for external link checking - uses: actions/cache@v5 - with: - path: external-link-checker-db.json - key: external-link-checker-${{ hashFiles('src/links/scripts/rendered-content-link-checker.ts') }} - - - name: Insight into external link checker DB json file (before) - run: | - if [ -f external-link-checker-db.json ]; then - echo "external-link-checker-db.json exists" - echo -n "Number of URLs in cache: " - jq '.urls | keys_unsorted' external-link-checker-db.json | wc -l - else - echo "external-link-checker-db.json does not exist" - fi - - - name: Run link checker - env: - DISABLE_REWRITE_ASSET_URLS: true - LEVEL: 'critical' - # Set this to true in repo scope to enable debug logs - # ACTIONS_RUNNER_DEBUG = true - ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - GITHUB_TOKEN: ${{ secrets.DOCS_BOT_PAT_BASE }} - REPORT_AUTHOR: docs-bot - REPORT_LABEL: broken link report - REPORT_REPOSITORY: github/docs-content - CREATE_REPORT: true - CHECK_EXTERNAL_LINKS: true - PATIENT: true - # This means that we'll *re-check* external URLs once a week. - # But mind you that the number has a 10% chance of "jitter" - # to avoid a stampeding herd when they all expire some day. - EXTERNAL_LINK_CHECKER_MAX_AGE_DAYS: 7 - # If we're unable to connect or the server returns a 50x error, - # treat it as a warning and not as a broken link. - EXTERNAL_SERVER_ERRORS_AS_WARNINGS: true - FAIL_ON_FLAW: false - timeout-minutes: 120 - run: npm run rendered-content-link-checker - - - name: Insight into external link checker DB json file (after) - run: | - if [ -f external-link-checker-db.json ]; then - echo "external-link-checker-db.json exists" - echo -n "Number of URLs in cache: " - jq '.urls | keys_unsorted' external-link-checker-db.json | wc -l - else - echo "external-link-checker-db.json does not exist" - fi - - - uses: ./.github/actions/slack-alert - if: ${{ failure() && github.event_name != 'workflow_dispatch' }} - with: - slack_channel_id: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }} - slack_token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }} diff --git a/.github/workflows/link-check-external.yml b/.github/workflows/link-check-external.yml new file mode 100644 index 000000000000..af8a04adc6ad --- /dev/null +++ b/.github/workflows/link-check-external.yml @@ -0,0 +1,66 @@ +name: Check External Links + +# Runs weekly (Wednesday) at 16:20 UTC +# Validates external URLs in content files + +on: + schedule: + - cron: '20 16 * * 3' # Wednesday at 16:20 UTC + workflow_dispatch: + inputs: + max_urls: + description: 'Maximum number of URLs to check (leave blank for all)' + type: number + +permissions: + contents: read + issues: write + +jobs: + check-external-links: + if: github.repository == 'github/docs-internal' + runs-on: ubuntu-latest + timeout-minutes: 180 # 3 hours for external checks + steps: + - name: Checkout + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - uses: ./.github/actions/node-npm-setup + + - name: Install dependencies + run: npm ci + + - name: Check external links + env: + ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + CACHE_MAX_AGE_DAYS: '7' + run: | + if [[ -n "${{ inputs.max_urls }}" ]]; then + npm run check-links-external -- --max ${{ inputs.max_urls }} + else + npm run check-links-external + fi + + - name: Upload report artifact + if: failure() + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: external-link-report + path: artifacts/external-link-report.* + retention-days: 14 + + - name: Create issue if broken links found + if: failure() + uses: peter-evans/create-issue-from-file@fca9117c27cdc29c6c4db3b86c48e4115a786710 # v5 + with: + token: ${{ secrets.DOCS_BOT_PAT_WORKFLOW }} + repository: github/docs-content + title: '🌐 Broken External Links Report' + content-filepath: artifacts/external-link-report.md + labels: broken link report + + - uses: ./.github/actions/slack-alert + if: ${{ failure() && github.event_name != 'workflow_dispatch' }} + with: + slack_channel_id: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }} + slack_token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }} diff --git a/.github/workflows/link-check-internal.yml b/.github/workflows/link-check-internal.yml new file mode 100644 index 000000000000..a6071cd20b64 --- /dev/null +++ b/.github/workflows/link-check-internal.yml @@ -0,0 +1,159 @@ +name: Check Internal Links + +# Runs weekly (Tuesday) at 16:20 UTC +# On schedule: checks English free-pro-team and latest enterprise-server +# On workflow_dispatch: run any version/language combo + +on: + schedule: + - cron: '20 16 * * 2' # Tuesday at 16:20 UTC + workflow_dispatch: + inputs: + version: + description: 'Version to check (e.g., free-pro-team@latest, enterprise-server@3.19)' + type: string + required: true + language: + description: 'Language to check (e.g., en, es, ja)' + type: string + required: true + default: 'en' + +permissions: + contents: read + issues: write + +jobs: + # Determine which version/language combos to run + setup-matrix: + if: github.repository == 'github/docs-internal' + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - name: Checkout + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - uses: ./.github/actions/node-npm-setup + + - name: Set matrix + id: set-matrix + run: | + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + # Manual run: use the provided version and language + echo 'matrix={"include":[{"version":"${{ inputs.version }}","language":"${{ inputs.language }}"}]}' >> $GITHUB_OUTPUT + else + # Scheduled run: English free-pro-team + English latest enterprise-server + LATEST_GHES=$(npx tsx -e "import { latest } from './src/versions/lib/enterprise-server-releases'; console.log(latest)") + echo "matrix={\"include\":[{\"version\":\"free-pro-team@latest\",\"language\":\"en\"},{\"version\":\"enterprise-server@${LATEST_GHES}\",\"language\":\"en\"}]}" >> $GITHUB_OUTPUT + fi + + - uses: ./.github/actions/slack-alert + if: ${{ failure() && github.event_name != 'workflow_dispatch' }} + with: + slack_channel_id: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }} + slack_token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }} + + check-internal-links: + if: github.repository == 'github/docs-internal' + needs: setup-matrix + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: ${{ fromJson(needs.setup-matrix.outputs.matrix) }} + env: + # Disable Elasticsearch for faster warmServer + ELASTICSEARCH_URL: '' + steps: + - name: Checkout + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - uses: ./.github/actions/node-npm-setup + + - name: Install dependencies + run: npm ci + + # Clone translations if not English + - name: Clone translations + if: matrix.language != 'en' + uses: ./.github/actions/clone-translations + with: + token: ${{ secrets.DOCS_BOT_PAT_READPUBLICKEY }} + + - name: Check internal links + env: + VERSION: ${{ matrix.version }} + LANGUAGE: ${{ matrix.language }} + CHECK_ANCHORS: true + ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: npm run check-links-internal + + - name: Upload report artifact + if: failure() + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: link-report-${{ matrix.version }}-${{ matrix.language }} + path: artifacts/link-report-*.md + retention-days: 5 + + - uses: ./.github/actions/slack-alert + if: ${{ failure() && github.event_name != 'workflow_dispatch' }} + with: + slack_channel_id: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }} + slack_token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }} + + # Create combined report after all matrix jobs complete + create-report: + if: always() && github.repository == 'github/docs-internal' + needs: [setup-matrix, check-internal-links] + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - name: Download all artifacts + uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1 + with: + path: reports + pattern: link-report-* + merge-multiple: true + + - name: Combine reports + id: combine + run: | + # Check if any reports exist + if ls reports/*.md 1> /dev/null 2>&1; then + echo "has_reports=true" >> $GITHUB_OUTPUT + + # Combine all markdown reports + echo "# Internal Links Report" > combined-report.md + echo "" >> combined-report.md + echo "Generated: $(date -u +'%Y-%m-%d %H:%M UTC')" >> combined-report.md + echo "[Action run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})" >> combined-report.md + echo "" >> combined-report.md + + for report in reports/*.md; do + echo "---" >> combined-report.md + cat "$report" >> combined-report.md + echo "" >> combined-report.md + done + else + echo "has_reports=false" >> $GITHUB_OUTPUT + echo "No broken link reports generated - all links valid!" + fi + + - name: Create issue if broken links found + if: steps.combine.outputs.has_reports == 'true' + uses: peter-evans/create-issue-from-file@fca9117c27cdc29c6c4db3b86c48e4115a786710 # v5 + with: + token: ${{ secrets.DOCS_BOT_PAT_WORKFLOW }} + repository: github/docs-content + title: '🔗 Broken Internal Links Report' + content-filepath: combined-report.md + labels: broken link report + + - uses: ./.github/actions/slack-alert + if: ${{ failure() && github.event_name != 'workflow_dispatch' }} + with: + slack_channel_id: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }} + slack_token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }} diff --git a/.github/workflows/link-check-on-pr.yml b/.github/workflows/link-check-on-pr.yml index 484bc4c17b43..755fc0581e1c 100644 --- a/.github/workflows/link-check-on-pr.yml +++ b/.github/workflows/link-check-on-pr.yml @@ -1,7 +1,7 @@ name: 'Link Checker: On PR' -# **What it does**: Renders the content of every page and check all internal links on PR. -# **Why we have it**: To make sure all links connect correctly on changed files. +# **What it does**: Checks internal links in changed content files. +# **Why we have it**: To catch broken links before they're merged. # **Who does it impact**: Docs content. on: @@ -11,17 +11,17 @@ on: permissions: contents: read - # TODO: Uncomment if we uncomment below - # Needed for the 'trilom/file-changes-action' action - # pull-requests: read + pull-requests: write + issues: write -# This allows a subsequently queued workflow run to interrupt previous runs +# Cancel in-progress runs for the same PR concurrency: group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}' cancel-in-progress: true jobs: check-links: + name: Check links runs-on: ubuntu-latest if: github.repository == 'github/docs-internal' || github.repository == 'github/docs' steps: @@ -35,19 +35,25 @@ jobs: with: token: ${{ secrets.DOCS_BOT_PAT_BASE }} - - name: Link check all pages (internal links only) + - name: Get changed files + id: changed-files + uses: tj-actions/changed-files@48d8f15b2aaa3d255ca5af3eba4870f807ce6b3c # v45 + with: + files: | + content/**/*.md + data/**/*.md + + - name: Check links in changed files + if: steps.changed-files.outputs.any_changed == 'true' env: - LEVEL: 'critical' + FILES_CHANGED: ${{ steps.changed-files.outputs.all_changed_files }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - GITHUB_TOKEN: ${{ secrets.DOCS_BOT_PAT_BASE }} SHOULD_COMMENT: ${{ secrets.DOCS_BOT_PAT_BASE != '' }} - CHECK_EXTERNAL_LINKS: false - CREATE_REPORT: false - CHECK_ANCHORS: true - # Not strictly necessary bit it makes warmServer() a bit faster - # because it only bothers with English to begin with, which - # we're filtering on anyway once the list of all pages has - # been loaded. - ENABLED_LANGUAGES: en FAIL_ON_FLAW: true - run: npm run rendered-content-link-checker + ENABLED_LANGUAGES: en + run: npm run check-links-pr + + - name: No content changes + if: steps.changed-files.outputs.any_changed != 'true' + run: echo "No content files changed. Skipping link check." diff --git a/package.json b/package.json index ef3cd150ece9..aabadb835361 100644 --- a/package.json +++ b/package.json @@ -80,8 +80,9 @@ "repo-sync": "./src/workflows/local-repo-sync.sh", "reusables": "tsx src/content-render/scripts/reusables-cli.ts", "liquid-tags": "tsx src/content-render/scripts/liquid-tags.ts", - "rendered-content-link-checker": "tsx src/links/scripts/rendered-content-link-checker.ts", - "rendered-content-link-checker-cli": "tsx src/links/scripts/rendered-content-link-checker-cli.ts", + "check-links-pr": "tsx src/links/scripts/check-links-pr.ts", + "check-links-internal": "tsx src/links/scripts/check-links-internal.ts", + "check-links-external": "tsx src/links/scripts/check-links-external.ts", "rest-dev": "tsx src/rest/scripts/update-files.ts", "show-action-deps": "echo 'Action Dependencies:' && rg '^[\\s|-]*(uses:.*)$' .github -I -N --no-heading -r '$1$2' | sort | uniq | cut -c 7-", "start": "cross-env NODE_ENV=development ENABLED_LANGUAGES=en nodemon src/frame/server.ts", diff --git a/src/links/lib/extract-links.ts b/src/links/lib/extract-links.ts new file mode 100644 index 000000000000..86f70f5fb5d2 --- /dev/null +++ b/src/links/lib/extract-links.ts @@ -0,0 +1,339 @@ +/** + * Link extraction utilities for the link checker system. + * + * This module provides functions to extract internal and external links + * from Markdown content, with support for Liquid template rendering. + */ + +import fs from 'fs' +import path from 'path' + +import { allVersions } from '@/versions/lib/all-versions' +import { getDataByLanguage } from '@/data-directory/lib/get-data' +import type { Context, Page } from '@/types' + +// Link patterns for Markdown +const INTERNAL_LINK_PATTERN = /\]\(\/[^)]+\)/g +const AUTOTITLE_LINK_PATTERN = /\[AUTOTITLE\]\(([^)]+)\)/g +const EXTERNAL_LINK_PATTERN = /\]\((https?:\/\/[^)]+)\)/g +const IMAGE_LINK_PATTERN = /!\[[^\]]*\]\(([^)]+)\)/g + +// Anchor link patterns (for same-page links) +const ANCHOR_LINK_PATTERN = /\]\(#[^)]+\)/g + +export interface ExtractedLink { + href: string + line: number + column: number + text?: string + isAutotitle?: boolean + isImage?: boolean + isAnchor?: boolean +} + +export interface LinkExtractionResult { + internalLinks: ExtractedLink[] + externalLinks: ExtractedLink[] + anchorLinks: ExtractedLink[] + imageLinks: ExtractedLink[] +} + +/** + * Get line and column number for a match in content + */ +function getLineAndColumn(content: string, matchIndex: number): { line: number; column: number } { + const lines = content.substring(0, matchIndex).split('\n') + const line = lines.length + const column = lines[lines.length - 1].length + 1 + return { line, column } +} + +/** + * Extract the link text before a Markdown link + * For `[link text](/path)`, matchIndex points to the `]` in `](/path)` + */ +function extractLinkText(content: string, matchIndex: number): string | undefined { + // matchIndex points to ](/...), so we need to find the opening [ + // Scan backwards to find the matching [ + let start = matchIndex - 1 + + // Simple scan back to find opening bracket + // (nested brackets in link text are rare and handled approximately) + while (start >= 0 && content[start] !== '[') { + start-- + } + + if (start >= 0 && content[start] === '[') { + // Extract text between [ and ] + // matchIndex points to the start of ](, so the ] is at matchIndex + const text = content.substring(start + 1, matchIndex) + return text.length > 0 ? text : undefined + } + return undefined +} + +/** + * Extract all links from raw Markdown content (before Liquid rendering) + */ +export function extractLinksFromMarkdown(content: string): LinkExtractionResult { + const internalLinks: ExtractedLink[] = [] + const externalLinks: ExtractedLink[] = [] + const anchorLinks: ExtractedLink[] = [] + const imageLinks: ExtractedLink[] = [] + + // Extract AUTOTITLE links first (they're a special case of internal links) + let match + while ((match = AUTOTITLE_LINK_PATTERN.exec(content)) !== null) { + const { line, column } = getLineAndColumn(content, match.index) + const href = match[1].split('#')[0] // Remove anchor if present + if (href.startsWith('/')) { + internalLinks.push({ + href, + line, + column, + text: 'AUTOTITLE', + isAutotitle: true, + }) + } + } + + // Reset regex + AUTOTITLE_LINK_PATTERN.lastIndex = 0 + + // Extract regular internal links + while ((match = INTERNAL_LINK_PATTERN.exec(content)) !== null) { + // Skip if this is an AUTOTITLE link (already captured) + const fullMatch = match[0] + if (content.substring(match.index - 10, match.index).includes('AUTOTITLE')) { + continue + } + + const { line, column } = getLineAndColumn(content, match.index) + // Extract href from ](/path) format + const href = fullMatch.substring(2, fullMatch.length - 1).split('#')[0] + const text = extractLinkText(content, match.index) + + internalLinks.push({ + href, + line, + column, + text, + isAutotitle: false, + }) + } + + // Reset regex + INTERNAL_LINK_PATTERN.lastIndex = 0 + + // Extract external links + while ((match = EXTERNAL_LINK_PATTERN.exec(content)) !== null) { + const { line, column } = getLineAndColumn(content, match.index) + const href = match[1] + const text = extractLinkText(content, match.index) + + externalLinks.push({ + href, + line, + column, + text, + }) + } + + // Reset regex + EXTERNAL_LINK_PATTERN.lastIndex = 0 + + // Extract anchor links + while ((match = ANCHOR_LINK_PATTERN.exec(content)) !== null) { + const { line, column } = getLineAndColumn(content, match.index) + const href = match[0].substring(2, match[0].length - 1) + + anchorLinks.push({ + href, + line, + column, + isAnchor: true, + }) + } + + // Reset regex + ANCHOR_LINK_PATTERN.lastIndex = 0 + + // Extract image links + while ((match = IMAGE_LINK_PATTERN.exec(content)) !== null) { + const { line, column } = getLineAndColumn(content, match.index) + const href = match[1] + + // Only include internal images (starting with /) + if (href.startsWith('/')) { + imageLinks.push({ + href, + line, + column, + isImage: true, + }) + } + } + + // Reset regex + IMAGE_LINK_PATTERN.lastIndex = 0 + + return { + internalLinks, + externalLinks, + anchorLinks, + imageLinks, + } +} + +/** + * Create a minimal context for Liquid rendering + */ +export function createLiquidContext( + version: string = 'free-pro-team@latest', + language: string = 'en', +): Context { + const versionObj = allVersions[version] + if (!versionObj) { + throw new Error(`Unknown version: ${version}`) + } + + // Load data for the language + const siteData = getDataByLanguage('variables', language) + + return { + currentVersion: version, + currentLanguage: language, + currentVersionObj: versionObj, + // Feature flags and version checks + enterpriseServerVersions: Object.values(allVersions) + .filter((v) => v.plan === 'enterprise-server') + .map((v) => v.currentRelease), + // Site data for variable interpolation + site: siteData, + // Empty pages/redirects - not needed for link extraction + pages: {}, + redirects: {}, + } as Context +} + +/** + * Render Liquid templates in content and extract links + * + * This renders the Liquid tags (like {% ifversion %}) to get the actual + * content that would appear for a given version, then extracts links. + */ +export async function extractLinksWithLiquid( + content: string, + context: Context, +): Promise { + try { + // Dynamic import to avoid circular dependency issues + const { renderLiquid } = await import('@/content-render/liquid/index') + // Render Liquid to expand conditionals + const rendered = await renderLiquid(content, context) + return extractLinksFromMarkdown(rendered) + } catch (error) { + // If Liquid rendering fails, fall back to raw extraction + // This can happen with malformed templates + console.warn('Liquid rendering failed, falling back to raw extraction:', error) + return extractLinksFromMarkdown(content) + } +} + +/** + * Read a file and extract links + */ +export async function extractLinksFromFile( + filePath: string, + context?: Context, +): Promise { + const content = fs.readFileSync(filePath, 'utf-8') + + if (context) { + return extractLinksWithLiquid(content, context) + } + + return extractLinksFromMarkdown(content) +} + +/** + * Get relative path from content root + */ +export function getRelativePath(filePath: string): string { + const contentRoot = path.resolve('content') + const dataRoot = path.resolve('data') + + if (filePath.startsWith(contentRoot)) { + return path.relative(contentRoot, filePath) + } + if (filePath.startsWith(dataRoot)) { + return path.relative(dataRoot, filePath) + } + + return filePath +} + +/** + * Normalize a link path for comparison with pageMap + * + * - Removes trailing slashes + * - Removes anchor fragments + * - Ensures leading slash + */ +export function normalizeLinkPath(href: string): string { + // Remove anchor + let normalized = href.split('#')[0] + + // Remove trailing slash + if (normalized.endsWith('/') && normalized.length > 1) { + normalized = normalized.slice(0, -1) + } + + // Ensure leading slash + if (!normalized.startsWith('/')) { + normalized = `/${normalized}` + } + + return normalized +} + +/** + * Check if a path exists in the pageMap or redirects + */ +export function checkInternalLink( + href: string, + pageMap: Record, + redirects: Record, +): { exists: boolean; isRedirect: boolean; redirectTarget?: string } { + const normalized = normalizeLinkPath(href) + + // Check if it's a direct page + if (pageMap[normalized]) { + return { exists: true, isRedirect: false } + } + + // Check if it's a redirect + if (redirects[normalized]) { + return { + exists: true, + isRedirect: true, + redirectTarget: redirects[normalized], + } + } + + // Check with /en prefix (FPT pages are stored with language prefix) + const withLang = `/en${normalized}` + if (pageMap[withLang]) { + return { exists: true, isRedirect: false } + } + + if (redirects[withLang]) { + return { + exists: true, + isRedirect: true, + redirectTarget: redirects[withLang], + } + } + + return { exists: false, isRedirect: false } +} diff --git a/src/links/lib/link-report.ts b/src/links/lib/link-report.ts new file mode 100644 index 000000000000..79b98f4b3f2f --- /dev/null +++ b/src/links/lib/link-report.ts @@ -0,0 +1,470 @@ +/** + * Link report generation utilities. + * + * Creates actionable, well-grouped reports for the content team. + * Reports are grouped by broken link target, showing all files affected. + */ + +// ============================================================================ +// Types +// ============================================================================ + +export interface BrokenLink { + href: string + file: string + lines: number[] + text?: string + isAutotitle?: boolean + isRedirect?: boolean + redirectTarget?: string + statusCode?: number + errorMessage?: string +} + +export interface GroupedBrokenLinks { + target: string + occurrences: BrokenLink[] + suggestion?: string + isWarning: boolean +} + +export interface LinkReport { + title: string + summary: string + groups: GroupedBrokenLinks[] + uniqueTargets: number + totalOccurrences: number + timestamp: string + actionUrl?: string +} + +// ============================================================================ +// Report Templates +// ============================================================================ + +const TEMPLATES = { + // Main report header + reportHeader: (title: string, summary: string, timestamp: string, actionUrl?: string) => + ` +# ${title} + +${summary} + +--- + +**Generated:** ${timestamp}${actionUrl ? `\n**Action Run:** [View Details](${actionUrl})` : ''} +`.trim(), + + // Table of contents for large reports + tableOfContents: (groups: GroupedBrokenLinks[]) => { + const items = groups.map((g) => { + const icon = g.isWarning ? '⚠️' : '❌' + const anchor = g.target.replace(/[^a-zA-Z0-9]/g, '-').toLowerCase() + return `- ${icon} [\`${g.target}\`](#${anchor}) (${g.occurrences.length})` + }) + return `## Quick Navigation\n\n${items.join('\n')}` + }, + + // Section header (Broken Links or Redirects) + sectionHeader: (isWarning: boolean) => + isWarning ? '## ⚠️ Redirects to Update' : '## ❌ Broken Links', + + // Individual group within a section + group: (group: GroupedBrokenLinks, isExternal = false) => { + const icon = group.isWarning ? '⚠️' : '❌' + const count = group.occurrences.length + const plural = count === 1 ? '' : 's' + const first = group.occurrences[0] + + const statusInfo = + isExternal && first?.statusCode + ? `**Status:** ${first.statusCode}\n${first.errorMessage ? `**Error:** ${first.errorMessage}\n` : ''}\n` + : '' + + const suggestion = group.suggestion ? `💡 ${group.suggestion}\n\n` : '' + + const tableRows = group.occurrences + .map((occ) => `| \`${occ.file}\` | ${occ.lines.join(', ')} |`) + .join('\n') + + return `### ${icon} \`${group.target}\` + +${statusInfo}${suggestion}**Found in ${count} file${plural}:** + +| File | Line(s) | +|------|---------| +${tableRows}` + }, + + // Empty report + noIssues: () => 'No issues found! 🎉', + + // PR comment + prComment: (errors: GroupedBrokenLinks[], warnings: GroupedBrokenLinks[], actionUrl?: string) => { + const errorSection = + errors.length > 0 + ? `### ❌ ${errors.length} Broken Link${errors.length === 1 ? '' : 's'} + +${errors + .map((group) => { + const shown = group.occurrences.slice(0, 3) + const remaining = group.occurrences.length - 3 + const occLines = shown + .map((occ) => ` - \`${occ.file}\` line ${occ.lines.join(', ')}`) + .join('\n') + const moreLine = remaining > 0 ? `\n - ... and ${remaining} more` : '' + return `- \`${group.target}\`\n${occLines}${moreLine}` + }) + .join('\n')} + +` + : '' + + const warningSection = + warnings.length > 0 + ? `### ⚠️ ${warnings.length} Redirect${warnings.length === 1 ? '' : 's'} to Update + +${warnings.map((group) => `- \`${group.target}\` → \`${group.occurrences[0]?.redirectTarget || '?'}\``).join('\n')} + +` + : '' + + const detailsLink = actionUrl ? `[View full details](${actionUrl})\n` : '' + + return `## 🔗 Link Check Results + +${errorSection}${warningSection}${detailsLink} +` + }, +} + +// ============================================================================ +// Grouping Functions +// ============================================================================ + +/** + * Group links by href and determine if they are warnings (redirects) + */ +function groupByTarget(links: BrokenLink[]): Map { + const groups = new Map() + + for (const link of links) { + const existing = groups.get(link.href) || [] + existing.push(link) + groups.set(link.href, existing) + } + + return groups +} + +/** + * Create a suggestion message for a redirect + */ +function createRedirectSuggestion( + target: string, + occurrences: BrokenLink[], + redirects?: Record, +): string | undefined { + if (redirects?.[target]) { + return `This path redirects to \`${redirects[target]}\`. Consider updating to the new path.` + } + if (occurrences[0]?.redirectTarget) { + return `This path redirects to \`${occurrences[0].redirectTarget}\`. Consider updating to the new path.` + } + return undefined +} + +/** + * Sort occurrences by file path for consistent output + */ +function sortOccurrencesByFile(occurrences: BrokenLink[]): BrokenLink[] { + return [...occurrences].sort((a, b) => a.file.localeCompare(b.file)) +} + +/** + * Group broken links by their target href + */ +export function groupBrokenLinks( + brokenLinks: BrokenLink[], + redirects?: Record, +): GroupedBrokenLinks[] { + const groupMap = groupByTarget(brokenLinks) + + const groups = Array.from(groupMap.entries()).map(([target, occurrences]) => { + const isWarning = occurrences.some((o) => o.isRedirect) + const suggestion = isWarning + ? createRedirectSuggestion(target, occurrences, redirects) + : undefined + + return { + target, + occurrences: sortOccurrencesByFile(occurrences), + suggestion, + isWarning, + } + }) + + // Sort: errors first, then alphabetically + return groups.sort((a, b) => { + if (a.isWarning !== b.isWarning) return a.isWarning ? 1 : -1 + return a.target.localeCompare(b.target) + }) +} + +/** + * Extract domain from URL, handling invalid URLs + */ +function extractDomain(href: string): string { + try { + return new URL(href).hostname + } catch { + return 'invalid-urls' + } +} + +/** + * Group external broken links by domain + */ +export function groupExternalLinksByDomain(brokenLinks: BrokenLink[]): GroupedBrokenLinks[] { + const groups = new Map() + + for (const link of brokenLinks) { + const domain = extractDomain(link.href) + const existing = groups.get(domain) || [] + existing.push(link) + groups.set(domain, existing) + } + + return Array.from(groups.entries()) + .map(([target, occurrences]) => ({ + target, + occurrences: sortOccurrencesByFile(occurrences), + isWarning: false, + })) + .sort((a, b) => b.occurrences.length - a.occurrences.length) +} + +// ============================================================================ +// Report Generation +// ============================================================================ + +/** + * Create summary text for a report + */ +function createSummary(errorCount: number, warningCount: number, totalOccurrences: number): string { + if (errorCount === 0 && warningCount === 0) { + return 'All links are valid! ✅' + } + + const parts: string[] = [] + if (errorCount > 0) { + parts.push(`**${errorCount}** broken link${errorCount === 1 ? '' : 's'}`) + } + if (warningCount > 0) { + parts.push(`**${warningCount}** redirect${warningCount === 1 ? '' : 's'} to update`) + } + + const plural = totalOccurrences === 1 ? '' : 's' + return `Found ${parts.join(' and ')} across ${totalOccurrences} occurrence${plural}.` +} + +/** + * Generate a report for internal links + */ +export function generateInternalLinkReport( + brokenLinks: BrokenLink[], + options: { + actionUrl?: string + version?: string + language?: string + redirects?: Record + } = {}, +): LinkReport { + const groups = groupBrokenLinks(brokenLinks, options.redirects) + const errors = groups.filter((g) => !g.isWarning) + const warnings = groups.filter((g) => g.isWarning) + + return { + title: `Internal Link Check: ${errors.length} broken, ${warnings.length} redirects`, + summary: createSummary(errors.length, warnings.length, brokenLinks.length), + groups, + uniqueTargets: groups.length, + totalOccurrences: brokenLinks.length, + timestamp: new Date().toISOString(), + actionUrl: options.actionUrl, + } +} + +/** + * Generate a report for external links + */ +export function generateExternalLinkReport( + brokenLinks: BrokenLink[], + options: { actionUrl?: string } = {}, +): LinkReport { + const groups = groupExternalLinksByDomain(brokenLinks) + const count = groups.length + const plural = count === 1 ? '' : 's' + + return { + title: `External Link Check: ${count} domain${plural} with issues`, + summary: + brokenLinks.length > 0 + ? `Found **${brokenLinks.length}** broken external link${brokenLinks.length === 1 ? '' : 's'} across **${count}** domain${plural}.` + : 'All external links are valid! ✅', + groups, + uniqueTargets: count, + totalOccurrences: brokenLinks.length, + timestamp: new Date().toISOString(), + actionUrl: options.actionUrl, + } +} + +// ============================================================================ +// Markdown Rendering +// ============================================================================ + +/** + * Render groups as markdown sections + */ +function renderGroups(groups: GroupedBrokenLinks[], isExternal: boolean): string { + const errors = groups.filter((g) => !g.isWarning) + const warnings = groups.filter((g) => g.isWarning) + + const sections: string[] = [] + + if (errors.length > 0) { + sections.push(TEMPLATES.sectionHeader(false)) + sections.push('') + for (const group of errors) { + sections.push(TEMPLATES.group(group, isExternal)) + sections.push('') + } + } + + if (warnings.length > 0) { + sections.push(TEMPLATES.sectionHeader(true)) + sections.push('') + for (const group of warnings) { + sections.push(TEMPLATES.group(group, isExternal)) + sections.push('') + } + } + + return sections.join('\n') +} + +/** + * Convert a LinkReport to Markdown string + */ +export function reportToMarkdown(report: LinkReport, isExternal = false): string { + const parts: string[] = [] + + // Header + parts.push( + TEMPLATES.reportHeader(report.title, report.summary, report.timestamp, report.actionUrl), + ) + parts.push('') + + if (report.groups.length === 0) { + parts.push(TEMPLATES.noIssues()) + return parts.join('\n') + } + + // Table of contents for large reports + if (report.groups.length > 5) { + parts.push(TEMPLATES.tableOfContents(report.groups)) + parts.push('') + } + + // Groups + parts.push(renderGroups(report.groups, isExternal)) + + return parts.join('\n') +} + +/** + * Generate a compact PR comment for broken links + */ +export function generatePRComment( + brokenLinks: BrokenLink[], + options: { actionUrl?: string } = {}, +): string { + if (brokenLinks.length === 0) return '' + + const groups = groupBrokenLinks(brokenLinks) + const errors = groups.filter((g) => !g.isWarning) + const warnings = groups.filter((g) => g.isWarning) + + return TEMPLATES.prComment(errors, warnings, options.actionUrl) +} + +// ============================================================================ +// Demo / Sample Output +// ============================================================================ + +/** + * Generate sample reports for testing and documentation + */ +export function generateSampleReports(): { + internal: { report: LinkReport; markdown: string } + external: { report: LinkReport; markdown: string } + prComment: string +} { + const internalLinks: BrokenLink[] = [ + { href: '/old/broken/path', file: 'content/actions/index.md', lines: [42] }, + { href: '/old/broken/path', file: 'content/repos/setup.md', lines: [15, 23] }, + { + href: '/actions/reference/old-workflow', + file: 'content/actions/guide.md', + lines: [88], + isRedirect: true, + redirectTarget: '/actions/reference/workflow-syntax', + }, + ] + + const externalLinks: BrokenLink[] = [ + { + href: 'https://example.com/broken', + file: 'content/get-started/index.md', + lines: [10], + statusCode: 404, + errorMessage: 'Not Found', + }, + { + href: 'https://example.com/another', + file: 'content/repos/index.md', + lines: [55], + statusCode: 404, + }, + { + href: 'https://oldsite.org/page', + file: 'content/billing/index.md', + lines: [33], + statusCode: 503, + errorMessage: 'Service Unavailable', + }, + ] + + const internalReport = generateInternalLinkReport(internalLinks, { + actionUrl: 'https://github.com/github/docs-internal/actions/runs/12345', + }) + + const externalReport = generateExternalLinkReport(externalLinks, { + actionUrl: 'https://github.com/github/docs-internal/actions/runs/12345', + }) + + return { + internal: { + report: internalReport, + markdown: reportToMarkdown(internalReport, false), + }, + external: { + report: externalReport, + markdown: reportToMarkdown(externalReport, true), + }, + prComment: generatePRComment(internalLinks, { + actionUrl: 'https://github.com/github/docs-internal/actions/runs/12345', + }), + } +} diff --git a/src/links/scripts/check-links-external.ts b/src/links/scripts/check-links-external.ts new file mode 100644 index 000000000000..1fc3eebb602e --- /dev/null +++ b/src/links/scripts/check-links-external.ts @@ -0,0 +1,372 @@ +/** + * External Link Checker + * + * Validates external URLs in content files. + * Designed to run weekly with aggressive caching. + * + * Usage: + * npm run check-links-external + * npm run check-links-external -- --max 100 + * + * Environment variables: + * GITHUB_TOKEN - For creating issue reports + * ACTION_RUN_URL - Link to the action run + * CREATE_REPORT - Whether to create an issue report (default: false) + * REPORT_REPOSITORY - Repository to create report issues in + * CACHE_MAX_AGE_DAYS - How long to cache URL check results (default: 7) + */ + +import { program } from 'commander' +import chalk from 'chalk' +import fs from 'fs' +import { glob } from 'glob' +import { JSONFilePreset } from 'lowdb/node' + +import { extractLinksFromMarkdown } from '@/links/lib/extract-links' +import { + type BrokenLink, + generateExternalLinkReport, + reportToMarkdown, +} from '@/links/lib/link-report' +import { uploadArtifact } from '@/links/scripts/upload-artifact' +import { createReportIssue, linkReports } from '@/workflows/issue-report' +import github from '@/workflows/github' +import excludedLinks from '@/links/lib/excluded-links' +import coreLib from '@actions/core' + +// Cache configuration +const CACHE_FILE = process.env.EXTERNAL_LINK_CACHE_FILE || 'external-link-cache.json' +const CACHE_MAX_AGE_DAYS = parseInt(process.env.CACHE_MAX_AGE_DAYS || '7', 10) +const CACHE_MAX_AGE_MS = CACHE_MAX_AGE_DAYS * 24 * 60 * 60 * 1000 + +// Request configuration +const REQUEST_TIMEOUT_MS = 30000 // 30 seconds +const REQUEST_DELAY_MS = 100 // 100ms between requests to avoid rate limiting + +// Create a set for fast lookups of excluded links +const excludedLinksSet = new Set(excludedLinks.map(({ is }) => is).filter(Boolean)) +const excludedLinksPrefixes = excludedLinks.map(({ startsWith }) => startsWith).filter(Boolean) + +function isExcludedLink(href: string): boolean { + if (excludedLinksSet.has(href)) return true + return excludedLinksPrefixes.some((prefix) => prefix && href.startsWith(prefix)) +} + +// Cache type +interface CacheEntry { + timestamp: number + ok: boolean + statusCode?: number + error?: string +} + +interface CacheData { + urls: Record +} + +/** + * Sleep for a given number of milliseconds + */ +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)) +} + +/** + * Check a single external URL + * Uses HEAD first, falls back to GET if HEAD returns 4xx/5xx + */ +async function checkUrl( + url: string, + cache: CacheData, +): Promise<{ ok: boolean; statusCode?: number; error?: string; cached: boolean }> { + // Check cache first + const cached = cache.urls[url] + if (cached) { + const age = Date.now() - cached.timestamp + if (age < CACHE_MAX_AGE_MS) { + return { ok: cached.ok, statusCode: cached.statusCode, error: cached.error, cached: true } + } + } + + const headers = { 'User-Agent': 'GitHub-Docs-Link-Checker/1.0' } + + // Try HEAD first (faster, less data) + let response = await fetchWithTimeout(url, 'HEAD', headers) + + // Fall back to GET if HEAD fails (some servers don't support HEAD properly) + if (response && !response.ok && response.status >= 400) { + response = await fetchWithTimeout(url, 'GET', headers) + } + + if (!response) { + // Timeout or network error + return { ok: false, error: 'Request timed out or failed', cached: false } + } + + const result = { + ok: response.ok, + statusCode: response.status, + error: response.ok ? undefined : `HTTP ${response.status}`, + cached: false, + } + + // Update cache + cache.urls[url] = { + timestamp: Date.now(), + ok: result.ok, + statusCode: result.statusCode, + error: result.error, + } + + return result +} + +/** + * Fetch with timeout, returns null on error + */ +async function fetchWithTimeout( + url: string, + method: 'HEAD' | 'GET', + headers: Record, +): Promise { + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS) + + try { + const response = await fetch(url, { + method, + signal: controller.signal, + headers, + redirect: 'follow', + }) + clearTimeout(timeout) + return response + } catch { + clearTimeout(timeout) + return null + } +} + +/** + * Extract all external links from content files + */ +async function extractAllExternalLinks(): Promise> { + const links = new Map() + + // Find all Markdown files + const files = await glob('content/**/*.md') + + for (const file of files) { + const content = fs.readFileSync(file, 'utf-8') + const result = extractLinksFromMarkdown(content) + + for (const link of result.externalLinks) { + // Only check HTTPS links + if (!link.href.startsWith('https://')) continue + if (isExcludedLink(link.href)) continue + + // Normalize URL (remove anchors for checking) + const url = link.href.split('#')[0] + + if (!links.has(url)) { + links.set(url, []) + } + links.get(url)!.push({ file, line: link.line }) + } + } + + return links +} + +/** + * Main entry point + */ +async function main() { + program + .name('check-links-external') + .description('External link checker with caching') + .option('--max ', 'Maximum number of URLs to check', parseInt) + .option('--verbose', 'Verbose output') + .option('--dry-run', "Extract links but don't check them") + .parse() + + const options = program.opts() + const startTime = Date.now() + + console.log(chalk.blue('🌐 External Link Checker')) + console.log('') + + // Load cache + const defaultData: CacheData = { urls: {} } + const db = await JSONFilePreset(CACHE_FILE, defaultData) + await db.read() + + // Report cache stats + const now = Date.now() + let freshCount = 0 + let staleCount = 0 + for (const entry of Object.values(db.data.urls)) { + if (now - entry.timestamp < CACHE_MAX_AGE_MS) { + freshCount++ + } else { + staleCount++ + } + } + console.log(`Cache: ${freshCount} fresh, ${staleCount} stale entries`) + console.log('') + + // Extract all external links + console.log('Extracting external links from content files...') + const allLinks = await extractAllExternalLinks() + console.log(`Found ${allLinks.size} unique external URLs`) + console.log('') + + if (options.dryRun) { + console.log('Dry run mode - not checking URLs') + // Show sample of URLs + const sample = Array.from(allLinks.keys()).slice(0, 20) + for (const url of sample) { + console.log(` ${url}`) + } + if (allLinks.size > 20) { + console.log(` ... and ${allLinks.size - 20} more`) + } + process.exit(0) + } + + // Check URLs + const brokenLinks: BrokenLink[] = [] + let checkedCount = 0 + let cachedCount = 0 + + const urls = Array.from(allLinks.keys()) + const maxUrls = options.max ? Math.min(options.max, urls.length) : urls.length + + console.log(`Checking ${maxUrls} URLs (may take a while)...`) + + for (let i = 0; i < maxUrls; i++) { + const url = urls[i] + const occurrences = allLinks.get(url)! + + const result = await checkUrl(url, db.data) + checkedCount++ + + if (result.cached) { + cachedCount++ + } + + if (!result.ok) { + for (const occ of occurrences) { + brokenLinks.push({ + href: url, + file: occ.file, + lines: [occ.line], + statusCode: result.statusCode, + errorMessage: result.error, + }) + } + + if (options.verbose) { + console.log(` ❌ ${url} - ${result.error || `HTTP ${result.statusCode}`}`) + } + } else if (options.verbose && !result.cached) { + console.log(` ✅ ${url}`) + } + + // Progress update every 100 URLs + if (checkedCount % 100 === 0) { + console.log(` Checked ${checkedCount}/${maxUrls} URLs...`) + } + + // Small delay between non-cached requests to avoid rate limiting + if (!result.cached) { + await sleep(REQUEST_DELAY_MS) + } + } + + // Save cache + await db.write() + + // Report results + const duration = ((Date.now() - startTime) / 1000).toFixed(1) + console.log('') + console.log( + chalk.blue(`Checked ${checkedCount} URLs in ${duration}s (${cachedCount} from cache)`), + ) + + if (brokenLinks.length === 0) { + console.log(chalk.green('✅ All external links valid!')) + process.exit(0) + } + + // Generate report + const report = generateExternalLinkReport(brokenLinks, { + actionUrl: process.env.ACTION_RUN_URL, + }) + + console.log('') + console.log(chalk.red(`❌ ${report.uniqueTargets} domain(s) with broken links`)) + console.log(chalk.red(` ${report.totalOccurrences} total occurrence(s)`)) + + // Show summary by domain + console.log('') + console.log('Broken links by domain:') + for (const group of report.groups.slice(0, 10)) { + console.log(` ${group.target}: ${group.occurrences.length} occurrence(s)`) + } + if (report.groups.length > 10) { + console.log(` ... and ${report.groups.length - 10} more domains`) + } + + // Write artifact + const markdown = reportToMarkdown(report, true) + await uploadArtifact('external-link-report.md', markdown) + await uploadArtifact('external-link-report.json', JSON.stringify(report, null, 2)) + + // Create issue report if configured + const createReport = process.env.CREATE_REPORT === 'true' + const reportRepository = process.env.REPORT_REPOSITORY || 'github/docs-content' + + if (createReport && process.env.GITHUB_TOKEN) { + console.log('') + console.log('Creating issue report...') + + const octokit = github() + const reportLabel = process.env.REPORT_LABEL || 'broken link report' + const reportAuthor = process.env.REPORT_AUTHOR || 'docs-bot' + + const newReport = await createReportIssue({ + core: coreLib, + octokit, + reportTitle: report.title, + reportBody: markdown, + reportRepository, + reportLabel, + }) + + // Link to previous reports + await linkReports({ + core: coreLib, + octokit, + newReport, + reportRepository, + reportAuthor, + reportLabel, + }) + + console.log(`Created report issue: ${newReport.html_url}`) + } + + // Exit with error if broken links found + process.exit(1) +} + +// Run if invoked directly +;(async () => { + try { + await main() + } catch (err: unknown) { + console.error('Fatal error:', err) + process.exit(1) + } +})() diff --git a/src/links/scripts/check-links-internal.ts b/src/links/scripts/check-links-internal.ts new file mode 100644 index 000000000000..bd3c4cd7801f --- /dev/null +++ b/src/links/scripts/check-links-internal.ts @@ -0,0 +1,363 @@ +/** + * Internal Link Checker + * + * Comprehensive check of all internal links across all versions and languages. + * Designed to run as a scheduled workflow (twice weekly). + * + * Usage: + * npm run check-links-internal + * npm run check-links-internal -- --version free-pro-team@latest --language en + * + * Environment variables: + * VERSION - Version to check (e.g., free-pro-team@latest) + * LANGUAGE - Language to check (e.g., en) + * GITHUB_TOKEN - For creating issue reports + * ACTION_RUN_URL - Link to the action run + * CREATE_REPORT - Whether to create an issue report (default: false) + * REPORT_REPOSITORY - Repository to create report issues in + * CHECK_ANCHORS - Whether to check anchor links (default: true) + */ + +import { program } from 'commander' +import chalk from 'chalk' +import cheerio from 'cheerio' + +import warmServer from '@/frame/lib/warm-server' +import { renderContent } from '@/content-render/index' +import { allVersions, allVersionKeys } from '@/versions/lib/all-versions' +import languages from '@/languages/lib/languages-server' +import { normalizeLinkPath, checkInternalLink } from '@/links/lib/extract-links' +import { + type BrokenLink, + generateInternalLinkReport, + reportToMarkdown, +} from '@/links/lib/link-report' +import { uploadArtifact } from '@/links/scripts/upload-artifact' +import { createReportIssue, linkReports } from '@/workflows/issue-report' +import github from '@/workflows/github' +import excludedLinks from '@/links/lib/excluded-links' +import type { Page, Permalink, Context } from '@/types' +import coreLib from '@actions/core' + +// Create a set for fast lookups of excluded links +const excludedLinksSet = new Set(excludedLinks.map(({ is }) => is).filter(Boolean)) +const excludedLinksPrefixes = excludedLinks.map(({ startsWith }) => startsWith).filter(Boolean) + +function isExcludedLink(href: string): boolean { + if (excludedLinksSet.has(href)) return true + return excludedLinksPrefixes.some((prefix) => prefix && href.startsWith(prefix)) +} + +interface CheckResult { + brokenLinks: BrokenLink[] + redirectLinks: BrokenLink[] + totalPagesChecked: number + totalLinksChecked: number +} + +/** + * Render a page and extract all internal links from the HTML + */ +async function getLinksFromRenderedPage( + page: Page, + permalink: Permalink, + context: Context, +): Promise<{ href: string; text: string }[]> { + const links: { href: string; text: string }[] = [] + + try { + // Render the page content + const html = await renderContent(page.markdown, context) + const $ = cheerio.load(html) + + // Extract all anchor links + $('a[href]').each((_, el) => { + const href = $(el).attr('href') + const text = $(el).text() + + if (href && href.startsWith('/')) { + links.push({ href, text }) + } + }) + } catch (error) { + console.warn(`Failed to render ${page.relativePath} (${permalink.href}):`, error) + } + + return links +} + +/** + * Check anchor links on a rendered page + */ +async function checkAnchorsOnPage( + page: Page, + permalink: Permalink, + context: Context, +): Promise { + const brokenAnchors: BrokenLink[] = [] + + try { + const html = await renderContent(page.markdown, context) + const $ = cheerio.load(html) + + // Find all anchor links (same-page links) + $('a[href^="#"]').each((_, el) => { + const href = $(el).attr('href') + if (!href || href === '#' || href === '#top') return + + // Check if the anchor target exists + const targetId = href.slice(1) + // Escape special CSS selector characters for jQuery/cheerio + const escapedId = targetId.replace(/([!"#$%&'()*+,./:;<=>?@[\\\]^`{|}~])/g, '\\$1') + const targetExists = $(`#${escapedId}`).length > 0 || $(`[name="${targetId}"]`).length > 0 + + if (!targetExists) { + brokenAnchors.push({ + href, + file: page.relativePath, + lines: [0], // Line number not available from rendered HTML + text: $(el).text(), + isAutotitle: false, + }) + } + }) + } catch { + // Rendering errors are logged elsewhere + } + + return brokenAnchors +} + +/** + * Check all pages for a given version and language + */ +async function checkVersion( + version: string, + language: string, + pageList: Page[], + pageMap: Record, + redirects: Record, + options: { checkAnchors: boolean; verbose: boolean }, +): Promise { + const brokenLinks: BrokenLink[] = [] + const redirectLinks: BrokenLink[] = [] + let totalPagesChecked = 0 + let totalLinksChecked = 0 + + const versionObj = allVersions[version] + if (!versionObj) { + throw new Error(`Unknown version: ${version}`) + } + + // Filter pages for this version and language + const relevantPages = pageList.filter((page) => { + if (page.languageCode !== language) return false + if (!page.applicableVersions?.includes(version)) return false + return true + }) + + console.log(` Checking ${relevantPages.length} pages for ${version}/${language}`) + + for (const page of relevantPages) { + // Find the permalink for this version + const permalink = page.permalinks?.find((p) => p.pageVersion === version) + if (!permalink) continue + + totalPagesChecked++ + + // Create context for rendering + const context: Context = { + currentVersion: version, + currentLanguage: language, + currentVersionObj: versionObj, + page, + pages: pageMap, + redirects, + } as Context + + // Get links from rendered page + const links = await getLinksFromRenderedPage(page, permalink, context) + totalLinksChecked += links.length + + // Check each link + for (const link of links) { + if (isExcludedLink(link.href)) continue + + const normalized = normalizeLinkPath(link.href) + const result = checkInternalLink(normalized, pageMap, redirects) + + if (!result.exists) { + brokenLinks.push({ + href: link.href, + file: page.relativePath, + lines: [0], + text: link.text, + }) + } else if (result.isRedirect) { + redirectLinks.push({ + href: link.href, + file: page.relativePath, + lines: [0], + text: link.text, + isRedirect: true, + redirectTarget: result.redirectTarget, + }) + } + } + + // Check anchors if enabled + if (options.checkAnchors) { + const anchorFlaws = await checkAnchorsOnPage(page, permalink, context) + brokenLinks.push(...anchorFlaws) + } + + if (options.verbose && totalPagesChecked % 100 === 0) { + console.log(` Checked ${totalPagesChecked} pages...`) + } + } + + return { brokenLinks, redirectLinks, totalPagesChecked, totalLinksChecked } +} + +/** + * Main entry point + */ +async function main() { + program + .name('check-links-internal') + .description('Comprehensive internal link checker') + .option('-v, --version ', 'Version to check (e.g., free-pro-team@latest)') + .option('-l, --language ', 'Language to check (e.g., en)') + .option('--check-anchors', 'Check anchor links within pages', true) + .option('--no-check-anchors', 'Skip anchor link checking') + .option('--verbose', 'Verbose output') + .parse() + + const options = program.opts() + const startTime = Date.now() + + console.log(chalk.blue('🔗 Internal Link Checker')) + console.log('') + + // Determine version and language to check + const version = options.version || process.env.VERSION + const language = options.language || process.env.LANGUAGE || 'en' + const checkAnchors = options.checkAnchors && process.env.CHECK_ANCHORS !== 'false' + + if (!version) { + console.error('Error: --version or VERSION env var required') + console.error('Available versions:', allVersionKeys.join(', ')) + process.exit(1) + } + + if (!allVersions[version]) { + console.error(`Error: Unknown version "${version}"`) + console.error('Available versions:', allVersionKeys.join(', ')) + process.exit(1) + } + + if (!languages[language]) { + console.error(`Error: Unknown language "${language}"`) + console.error('Available languages:', Object.keys(languages).join(', ')) + process.exit(1) + } + + console.log(`Version: ${version}`) + console.log(`Language: ${language}`) + console.log(`Check anchors: ${checkAnchors}`) + console.log('') + + // Load page data + console.log('Loading page data...') + const { pages: pageMap, redirects, pageList } = await warmServer([language]) + console.log(`Loaded ${pageList.length} pages, ${Object.keys(redirects).length} redirects`) + console.log('') + + // Run the check + const result = await checkVersion(version, language, pageList, pageMap, redirects, { + checkAnchors, + verbose: options.verbose, + }) + + // Report results + const duration = ((Date.now() - startTime) / 1000).toFixed(1) + console.log('') + console.log( + chalk.blue( + `Checked ${result.totalPagesChecked} pages, ${result.totalLinksChecked} links in ${duration}s`, + ), + ) + + const allBrokenLinks = [...result.brokenLinks, ...result.redirectLinks] + + if (allBrokenLinks.length === 0) { + console.log(chalk.green('✅ All internal links valid!')) + process.exit(0) + } + + // Generate report + const report = generateInternalLinkReport(allBrokenLinks, { + actionUrl: process.env.ACTION_RUN_URL, + version, + language, + redirects, + }) + + console.log('') + console.log(chalk.red(`❌ ${result.brokenLinks.length} broken link(s)`)) + console.log(chalk.yellow(`⚠️ ${result.redirectLinks.length} redirect(s) to update`)) + + // Write artifact + const markdown = reportToMarkdown(report) + await uploadArtifact(`link-report-${version}-${language}.md`, markdown) + await uploadArtifact(`link-report-${version}-${language}.json`, JSON.stringify(report, null, 2)) + + // Create issue report if configured + const createReport = process.env.CREATE_REPORT === 'true' + const reportRepository = process.env.REPORT_REPOSITORY || 'github/docs-content' + + if (createReport && process.env.GITHUB_TOKEN) { + console.log('') + console.log('Creating issue report...') + + const octokit = github() + const reportLabel = process.env.REPORT_LABEL || 'broken link report' + const reportAuthor = process.env.REPORT_AUTHOR || 'docs-bot' + + const newReport = await createReportIssue({ + core: coreLib, + octokit, + reportTitle: report.title, + reportBody: markdown, + reportRepository, + reportLabel, + }) + + // Link to previous reports + await linkReports({ + core: coreLib, + octokit, + newReport, + reportRepository, + reportAuthor, + reportLabel, + }) + + console.log(`Created report issue: ${newReport.html_url}`) + } + + // Exit with error if broken links found + if (result.brokenLinks.length > 0) { + process.exit(1) + } +} + +// Run if invoked directly +;(async () => { + try { + await main() + } catch (err: unknown) { + console.error('Fatal error:', err) + process.exit(1) + } +})() diff --git a/src/links/scripts/check-links-pr.ts b/src/links/scripts/check-links-pr.ts new file mode 100644 index 000000000000..f4b8e24264dc --- /dev/null +++ b/src/links/scripts/check-links-pr.ts @@ -0,0 +1,331 @@ +/** + * PR Link Checker + * + * Fast validation of internal links in changed files. + * Designed to run in <10 minutes on typical PRs. + * + * Usage: + * npm run check-links-pr + * npm run check-links-pr -- --files content/actions/index.md content/repos/index.md + * + * Environment variables: + * FILES_CHANGED - JSON array of changed files (from GitHub Actions) + * GITHUB_TOKEN - For posting PR comments + * ACTION_RUN_URL - Link to the action run + * SHOULD_COMMENT - Whether to post PR comments (default: false) + * FAIL_ON_FLAW - Exit with error code if broken links found (default: true) + */ + +import { program } from 'commander' +import chalk from 'chalk' + +import warmServer from '@/frame/lib/warm-server' +import { + extractLinksWithLiquid, + createLiquidContext, + checkInternalLink, + getRelativePath, +} from '@/links/lib/extract-links' +import { type BrokenLink, generatePRComment, groupBrokenLinks } from '@/links/lib/link-report' +import { uploadArtifact } from '@/links/scripts/upload-artifact' +import github from '@/workflows/github' +import { getActionContext } from '@/workflows/action-context' +import type { Page } from '@/types' +import fs from 'fs' +import path from 'path' + +interface CheckResult { + file: string + brokenLinks: BrokenLink[] + redirectLinks: BrokenLink[] + totalLinksChecked: number +} + +/** + * Check all internal links in a single file + */ +async function checkFile( + filePath: string, + pageMap: Record, + redirects: Record, + version: string = 'free-pro-team@latest', + language: string = 'en', +): Promise { + const brokenLinks: BrokenLink[] = [] + const redirectLinks: BrokenLink[] = [] + let totalLinksChecked = 0 + + // Read file content + let content: string + try { + content = fs.readFileSync(filePath, 'utf-8') + } catch { + console.warn(`Could not read file: ${filePath}`) + return { file: filePath, brokenLinks, redirectLinks, totalLinksChecked } + } + + // Create context for Liquid rendering + const context = createLiquidContext(version, language) + + // Extract links after Liquid rendering + const { internalLinks } = await extractLinksWithLiquid(content, context) + + // Check each internal link (exclude imageLinks - they're static assets, not docs pages) + totalLinksChecked = internalLinks.length + + for (const link of internalLinks) { + const result = checkInternalLink(link.href, pageMap, redirects) + + if (!result.exists) { + brokenLinks.push({ + href: link.href, + file: getRelativePath(filePath), + lines: [link.line], + text: link.text, + isAutotitle: link.isAutotitle, + }) + } else if (result.isRedirect) { + redirectLinks.push({ + href: link.href, + file: getRelativePath(filePath), + lines: [link.line], + text: link.text, + isAutotitle: link.isAutotitle, + isRedirect: true, + redirectTarget: result.redirectTarget, + }) + } + } + + return { file: filePath, brokenLinks, redirectLinks, totalLinksChecked } +} + +/** + * Get list of changed files from environment or CLI args + */ +function getChangedFiles(cliFiles?: string[]): string[] { + // CLI args take precedence + if (cliFiles && cliFiles.length > 0) { + return cliFiles + } + + // Check environment variable (from GitHub Actions) + const filesChanged = process.env.FILES_CHANGED + if (filesChanged) { + // Try parsing as JSON first + try { + const parsed = JSON.parse(filesChanged) + if (Array.isArray(parsed)) { + return parsed + } + } catch { + // Not JSON, treat as space-separated string (tj-actions/changed-files format) + return filesChanged.split(/\s+/).filter(Boolean) + } + } + + return [] +} + +/** + * Filter to only content/data files that might contain links + */ +function filterContentFiles(files: string[]): string[] { + return files.filter((file) => { + // Only check Markdown files in content/ or data/ + if (!file.endsWith('.md')) return false + if (file.startsWith('content/') || file.startsWith('data/')) return true + return false + }) +} + +/** + * Post a comment on the PR with broken link results + */ +async function commentOnPR(brokenLinks: BrokenLink[], actionUrl?: string) { + const token = process.env.GITHUB_TOKEN + if (!token) { + console.warn('No GITHUB_TOKEN, skipping PR comment') + return + } + + const actionContext = getActionContext() + const { owner, repo } = actionContext + const pullNumber = actionContext.pull_request?.number + + if (!owner || !repo || !pullNumber) { + console.warn('Not in PR context, skipping comment') + return + } + + const octokit = github() + const comment = generatePRComment(brokenLinks, { actionUrl }) + + if (!comment) { + console.log('No broken links to report') + return + } + + // Find existing comment + const { data: comments } = await octokit.rest.issues.listComments({ + owner, + repo, + issue_number: pullNumber, + }) + + const marker = '' + const existingComment = comments.find((c) => c.body?.includes(marker)) + + if (existingComment) { + await octokit.rest.issues.updateComment({ + owner, + repo, + comment_id: existingComment.id, + body: comment, + }) + console.log(`Updated PR comment: ${existingComment.id}`) + } else { + await octokit.rest.issues.createComment({ + owner, + repo, + issue_number: pullNumber, + body: comment, + }) + console.log(`Created PR comment on #${pullNumber}`) + } +} + +/** + * Main entry point + */ +async function main() { + program + .name('check-links-pr') + .description('Check internal links in changed files') + .option('-f, --files ', 'Files to check (overrides FILES_CHANGED env)') + .option('--all', 'Check all content files (for testing)') + .option('--verbose', 'Verbose output') + .parse() + + const options = program.opts() + const startTime = Date.now() + + console.log(chalk.blue('🔗 PR Link Checker')) + console.log('') + + // Get files to check + let files = getChangedFiles(options.files) + + if (options.all) { + // For testing: check all content files (limited) + const glob = await import('glob') + files = glob.sync('content/**/*.md').slice(0, 50) + console.log(`Checking ${files.length} files (--all mode, limited to 50)`) + } else if (files.length === 0) { + console.log('No files to check. Exiting.') + process.exit(0) + } + + // Filter to content files only + const contentFiles = filterContentFiles(files) + if (contentFiles.length === 0) { + console.log('No content files in changed files. Exiting.') + process.exit(0) + } + + console.log(`Checking ${contentFiles.length} file(s)...`) + + // Load page data + console.log('Loading page data...') + const { pages: pageMap, redirects } = await warmServer(['en']) + console.log( + `Loaded ${Object.keys(pageMap).length} pages, ${Object.keys(redirects).length} redirects`, + ) + + // Check each file + const allBrokenLinks: BrokenLink[] = [] + const allRedirectLinks: BrokenLink[] = [] + let totalLinksChecked = 0 + + for (const file of contentFiles) { + const filePath = path.resolve(file) + if (!fs.existsSync(filePath)) { + console.warn(`File not found: ${file}`) + continue + } + + if (options.verbose) { + console.log(` Checking: ${file}`) + } + + const result = await checkFile(filePath, pageMap, redirects) + allBrokenLinks.push(...result.brokenLinks) + allRedirectLinks.push(...result.redirectLinks) + totalLinksChecked += result.totalLinksChecked + } + + // Report results + const duration = ((Date.now() - startTime) / 1000).toFixed(1) + console.log('') + console.log(chalk.blue(`Checked ${totalLinksChecked} links in ${duration}s`)) + + if (allBrokenLinks.length === 0 && allRedirectLinks.length === 0) { + console.log(chalk.green('✅ All links valid!')) + process.exit(0) + } + + // Group and display results + if (allBrokenLinks.length > 0) { + console.log('') + console.log(chalk.red(`❌ ${allBrokenLinks.length} broken link(s):`)) + const grouped = groupBrokenLinks(allBrokenLinks) + for (const group of grouped) { + console.log(` ${chalk.yellow(group.target)}`) + for (const occ of group.occurrences) { + console.log(` - ${occ.file}:${occ.lines.join(',')}`) + } + } + } + + if (allRedirectLinks.length > 0) { + console.log('') + console.log(chalk.yellow(`⚠️ ${allRedirectLinks.length} redirect(s) to update:`)) + const grouped = groupBrokenLinks(allRedirectLinks) + for (const group of grouped) { + const target = group.occurrences[0]?.redirectTarget || '?' + console.log(` ${chalk.yellow(group.target)} → ${chalk.green(target)}`) + for (const occ of group.occurrences) { + console.log(` - ${occ.file}:${occ.lines.join(',')}`) + } + } + } + + // Write artifact for debugging + const allFlaws = [...allBrokenLinks, ...allRedirectLinks] + await uploadArtifact('broken-links.json', JSON.stringify(groupBrokenLinks(allFlaws), null, 2)) + + // Post PR comment if configured + const shouldComment = process.env.SHOULD_COMMENT === 'true' + if (shouldComment) { + const actionUrl = process.env.ACTION_RUN_URL + await commentOnPR(allFlaws, actionUrl) + } + + // Exit with error if broken links found + const failOnFlaw = process.env.FAIL_ON_FLAW !== 'false' + if (failOnFlaw && allBrokenLinks.length > 0) { + console.log('') + console.log(chalk.red('Failing due to broken links')) + process.exit(1) + } +} + +// Run if invoked directly +;(async () => { + try { + await main() + } catch (err: unknown) { + console.error('Fatal error:', err) + process.exit(1) + } +})() diff --git a/src/links/scripts/rendered-content-link-checker-cli.ts b/src/links/scripts/rendered-content-link-checker-cli.ts deleted file mode 100755 index fb3c6a9dafc3..000000000000 --- a/src/links/scripts/rendered-content-link-checker-cli.ts +++ /dev/null @@ -1,149 +0,0 @@ -// [start-readme] -// -// This script goes through all content and renders their HTML and from there -// can analyze for various flaws (e.g. broken links) -// -// [end-readme] - -import fs from 'fs' -import path from 'path' -import { program, Option, InvalidArgumentError } from 'commander' -import renderedContentLinkChecker from './rendered-content-link-checker' -import { getCoreInject, getUploadArtifactInject } from '@/links/scripts/action-injections' -import { allVersions } from '@/versions/lib/all-versions' -import github from '@/workflows/github' - -const STATIC_PREFIXES = { - assets: path.resolve('assets'), - public: path.resolve(path.join('src', 'graphql', 'data')), -} -// Sanity check that these are valid paths -for (const [key, value] of Object.entries(STATIC_PREFIXES)) { - if (!fs.existsSync(value)) { - throw new Error(`Can't find static prefix (${key}): ${value}`) - } -} - -program - .description('Analyze all checked content files, render them, and check for flaws.') - .addOption( - new Option( - '-L, --level ', - 'Level of broken link to be marked as a flaw (default: "warning")', - ).choices(['all', 'warning', 'critical']), - ) - .option('-f, --filter ', 'Search filter(s) on the paths') - .option( - '-V, --version ', - "Specific versions to only do (e.g. 'free-pro-team@latest')", - (version) => { - if (!(version in allVersions)) { - for (const [key, data] of Object.entries(allVersions)) { - if (version === data.miscVersionName) { - return key - } - } - throw new InvalidArgumentError( - `'${version}' is not a recognized version. (not one of ${Object.keys(allVersions)})`, - ) - } - return version - }, - ) - .option('-v, --verbose', 'Verbose outputs') - .option( - '--create-report', - 'Create a report issue in report-repository if there are flaws. (default: false)', - ) - .option( - '--report-repository ', - 'Repository to create issue in. (default: "github/docs-content")', - ) - .option( - '--link-reports', - 'If comments should be made on previous report and new report "linking" them. (default: false)', - ) - .option( - '--report-author ', - 'Previous author of report PR for linking. (default: "docs-bot")', - ) - .option( - '--report-label