From 50920c292681ff597202643ab742c76cb522fbcf Mon Sep 17 00:00:00 2001 From: shralex Date: Sun, 21 Dec 2025 05:08:14 +0000 Subject: [PATCH] Integrate MaxText CI with Codecov Integrates Codecov using a two-flag scheme (regular, scheduled) and carryforward logic to accurately track coverage across tiered test suites. Adds codecov.yml to enable carryforward for tests skipped in PRs (scheduled_only). Updates test workflows to generate coverage reports via pytest-cov and upload results with conditional flags. Sets Project coverage to track the full scheduled baseline and Patch coverage to evaluate new code against regular PR tests. --- .github/workflows/codecov.yml | 64 +++++++++++++++++++ .../workflows/run_tests_against_package.yml | 24 ++++++- .../checkpoint_compatibility_test.py | 1 + 3 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/codecov.yml diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml new file mode 100644 index 0000000000..aa951022d2 --- /dev/null +++ b/.github/workflows/codecov.yml @@ -0,0 +1,64 @@ +# Copyright 2023–2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MaxText Codecov Configuration +# +# We use a two-flag scheme ('regular' and 'scheduled') to handle our tiered test suite. +# 'carryforward' is enabled because Pull Requests only run a subset of tests (excluding 'scheduled_only'). +# Without it, PRs would show a significant coverage drop as they would 'overwrite' the full-suite results. +# +# Scheme: +# - 'regular': Updated by every PR/Schedule. Used to evaluate 'patch' (new code) coverage. +# - 'scheduled': Updated ONLY by scheduled full runs. Used to anchor 'project' (total health) coverage. +# During PRs, the 'scheduled' flag is carried forward from the last full run on 'main' to keep the score stable. + +# Exclude non-source code, deprecated and experimental folders from coverage tracking +ignore: + - "src/MaxText/assets" + - "src/MaxText/configs" + - "src/MaxText/examples" + - "src/MaxText/experimental" + - "src/MaxText/inference" + - "src/MaxText/inference_mlperf" + - "src/MaxText/scratch_code" + - "src/MaxText/test_assets" + + +flags: + # Updated on every PR and during every scheduled run (contains a subset of tests). + regular: + carryforward: true + # Updated ONLY during scheduled runs (contains all tests). + scheduled: + carryforward: true + +coverage: + status: + # Project score remains stable at the 'Full Suite' level. + # It carries forward the last 'scheduled' results during PRs. + project: + default: + target: auto + threshold: 5% # fail on 5+ percent degradation + flags: + - scheduled + + # Patch score provides feedback on the code changed in a PR. + patch: + default: + target: auto + threshold: 5% # fail on 5+ percent degradation + flags: + - regular + diff --git a/.github/workflows/run_tests_against_package.yml b/.github/workflows/run_tests_against_package.yml index 2e7094c8a8..8cd1013fd1 100644 --- a/.github/workflows/run_tests_against_package.yml +++ b/.github/workflows/run_tests_against_package.yml @@ -88,6 +88,7 @@ jobs: uv pip install -r src/install_maxtext_extra_deps/extra_deps_from_github.txt python3 --version python3 -m pip freeze + uv pip install pytest-cov - name: Copy test assets files run : gcloud storage cp gs://maxtext-test-assets/* src/MaxText/test_assets - name: Run Tests @@ -107,6 +108,25 @@ jobs: if [ "${{ inputs.device_type }}" != "cuda12" ]; then export LIBTPU_INIT_ARGS='--xla_tpu_scoped_vmem_limit_kib=65536' fi + if [ "${{ inputs.total_workers }}" -gt 1 ]; then + .venv/bin/python3 -m pip install --quiet pytest-split + SPLIT_ARGS="--splits ${{ inputs.total_workers }} --group ${{ inputs.worker_group }}" + else + SPLIT_ARGS="" + fi # TODO: Fix the skipped tests and remove the deselect flags - [ "${{ inputs.total_workers }}" -gt 1 ] && .venv/bin/python3 -m pip install --quiet pytest-split && SPLIT_ARGS="--splits ${{ inputs.total_workers }} --group ${{ inputs.worker_group }}" || SPLIT_ARGS="" - .venv/bin/python3 -m pytest ${{ inputs.pytest_addopts }} -v -m "${FINAL_PYTEST_MARKER}" --durations=0 --deselect "tests/tokenizer_test.py::TokenizerTest::test_detokenize" $SPLIT_ARGS + .venv/bin/python3 -m pytest ${{ inputs.pytest_addopts }} \ + -v \ + -m "${FINAL_PYTEST_MARKER}" \ + --durations=0 \ + --deselect "tests/tokenizer_test.py::TokenizerTest::test_detokenize" \ + --cov=src/MaxText \ + --cov-report=xml \ + $SPLIT_ARGS + - name: Upload results to Codecov + uses: codecov/codecov-action@v5 + continue-on-error: true + with: + token: ${{ secrets.CODECOV_TOKEN }} + # If scheduled, upload to BOTH flags. If PR, upload ONLY to regular. + flags: ${{ inputs.is_scheduled_run == 'true' && 'regular,scheduled' || 'regular' }} diff --git a/tests/integration_tests/checkpoint_compatibility_test.py b/tests/integration_tests/checkpoint_compatibility_test.py index 200d575c5f..a4f9d571ea 100644 --- a/tests/integration_tests/checkpoint_compatibility_test.py +++ b/tests/integration_tests/checkpoint_compatibility_test.py @@ -82,6 +82,7 @@ def run_checkpoint_compatibility(hardware, attention_type): @pytest.mark.integration_test @pytest.mark.tpu_only +@pytest.mark.skip(reason="Flaky test b/470704234") def test_autoselected_attention(): run_checkpoint_compatibility("tpu", "autoselected")