From 8b2128c1d2dbf4b5e99a637487d86b3a47b00f7d Mon Sep 17 00:00:00 2001 From: yushijie_27 Date: Thu, 4 Jun 2026 15:00:23 +0800 Subject: [PATCH 1/2] feat: add crawler-based in-site link and broken image checking --- .github/workflows/link-check-cron-crawler.yml | 17 +++++++ .github/workflows/link-check-pr-crawler.yml | 44 +++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 .github/workflows/link-check-cron-crawler.yml create mode 100644 .github/workflows/link-check-pr-crawler.yml diff --git a/.github/workflows/link-check-cron-crawler.yml b/.github/workflows/link-check-cron-crawler.yml new file mode 100644 index 0000000000000..c9da9df8ddfde --- /dev/null +++ b/.github/workflows/link-check-cron-crawler.yml @@ -0,0 +1,17 @@ +name: link-check-cron-crawler + +on: + schedule: + - cron: '0 2 * * *' + workflow_dispatch: + +permissions: + contents: read + +jobs: + cron-crawler: + runs-on: ubuntu-latest + steps: + - name: Run Crawler Link Checker + run: | + npx linkinator https://doris.apache.org --recurse --check-images --skip "^(?!https?://(www\.)?doris\.apache\.org)" diff --git a/.github/workflows/link-check-pr-crawler.yml b/.github/workflows/link-check-pr-crawler.yml new file mode 100644 index 0000000000000..60b4c53ee9877 --- /dev/null +++ b/.github/workflows/link-check-pr-crawler.yml @@ -0,0 +1,44 @@ +name: link-check-pr-crawler + +on: + pull_request: + paths: + - 'docs/**' + - 'i18n/**' + - 'src/**' + - 'static/**' + - 'docusaurus.config.js' + - 'sidebars.ts' + +concurrency: + group: link-check-pr-crawler-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + pr-crawler: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Use Node.js + uses: actions/setup-node@v4 + with: + node-version: 20 + cache: 'yarn' + + - name: Install dependencies + run: yarn install --frozen-lockfile + + - name: Build website + run: yarn build + + - name: Serve build folder and scan for 404s/broken images + run: | + npx serve build -l 3000 & + # Wait for server to start + sleep 5 + npx linkinator http://localhost:3000 --recurse --check-images --skip "^(?!http://localhost:3000)" From dfd5831e2015f1d15fd34f80d9346ced339b6b22 Mon Sep 17 00:00:00 2001 From: yushijie_27 Date: Thu, 4 Jun 2026 17:38:28 +0800 Subject: [PATCH 2/2] feat: add crawler-based in-site link and image checker workflows --- .github/workflows/link-check-cron-crawler.yml | 2 +- .github/workflows/link-check-pr-crawler.yml | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/link-check-cron-crawler.yml b/.github/workflows/link-check-cron-crawler.yml index c9da9df8ddfde..c3b5053b46333 100644 --- a/.github/workflows/link-check-cron-crawler.yml +++ b/.github/workflows/link-check-cron-crawler.yml @@ -14,4 +14,4 @@ jobs: steps: - name: Run Crawler Link Checker run: | - npx linkinator https://doris.apache.org --recurse --check-images --skip "^(?!https?://(www\.)?doris\.apache\.org)" + npx -y linkinator https://doris.apache.org --recurse --check-images --skip "^(?!https?://(www\.)?doris\.apache\.org)" diff --git a/.github/workflows/link-check-pr-crawler.yml b/.github/workflows/link-check-pr-crawler.yml index 60b4c53ee9877..3bbe63e4a62bf 100644 --- a/.github/workflows/link-check-pr-crawler.yml +++ b/.github/workflows/link-check-pr-crawler.yml @@ -4,11 +4,14 @@ on: pull_request: paths: - 'docs/**' + - 'versioned_docs/**' - 'i18n/**' - 'src/**' - 'static/**' - 'docusaurus.config.js' - 'sidebars.ts' + - 'versioned_sidebars/**' + - 'versions.json' concurrency: group: link-check-pr-crawler-${{ github.event.pull_request.number || github.ref }} @@ -38,7 +41,7 @@ jobs: - name: Serve build folder and scan for 404s/broken images run: | - npx serve build -l 3000 & + npx -y serve build -l 3000 & # Wait for server to start sleep 5 - npx linkinator http://localhost:3000 --recurse --check-images --skip "^(?!http://localhost:3000)" + npx -y linkinator http://localhost:3000 --recurse --check-images --skip "^(?!http://localhost:3000)"