diff --git a/.github/workflows/build_and_test_maxtext.yml b/.github/workflows/build_and_test_maxtext.yml index 169e6cc334..0887c9210a 100644 --- a/.github/workflows/build_and_test_maxtext.yml +++ b/.github/workflows/build_and_test_maxtext.yml @@ -51,7 +51,7 @@ jobs: fail-fast: false # don't cancel all jobs on failure matrix: image_type: ["py312"] - worker_group: [1, 2, 3, 4] + worker_group: [1, 2] with: device_type: cpu device_name: X64 @@ -63,7 +63,7 @@ jobs: container_resource_option: "--privileged" is_scheduled_run: ${{ github.event_name == 'schedule' }} worker_group: ${{ matrix.worker_group }} - total_workers: 4 + total_workers: 2 maxtext_tpu_unit_tests: needs: build_and_upload_maxtext_package diff --git a/.github/workflows/run_tests_against_package.yml b/.github/workflows/run_tests_against_package.yml index 8cd1013fd1..8ac8da6a5a 100644 --- a/.github/workflows/run_tests_against_package.yml +++ b/.github/workflows/run_tests_against_package.yml @@ -71,6 +71,7 @@ jobs: TF_FORCE_GPU_ALLOW_GROWTH: ${{ inputs.tf_force_gpu_allow_growth }} TPU_SKIP_MDS_QUERY: ${{ inputs.device_type == 'cpu' && '1' || '' }} MAXTEXT_PACKAGE_EXTRA: ${{ inputs.device_type == 'cpu' && 'tpu' || inputs.device_type }} + ALLOW_MULTIPLE_LIBTPU_LOAD: ${{ inputs.device_type == 'cpu' && 'true' || '' }} # bypass /tmp/libtpu_lockfile check for cpu tests, which don't actually use accelerators (to allow concurrency) options: ${{ inputs.container_resource_option }} steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 @@ -109,8 +110,8 @@ jobs: export LIBTPU_INIT_ARGS='--xla_tpu_scoped_vmem_limit_kib=65536' fi if [ "${{ inputs.total_workers }}" -gt 1 ]; then - .venv/bin/python3 -m pip install --quiet pytest-split - SPLIT_ARGS="--splits ${{ inputs.total_workers }} --group ${{ inputs.worker_group }}" + .venv/bin/python3 -m pip install --quiet pytest-split pytest-xdist + SPLIT_ARGS="--splits ${{ inputs.total_workers }} --group ${{ inputs.worker_group }} -n auto" else SPLIT_ARGS="" fi