Skip to content

Commit 3114d15

Browse files
committed
initial commit for seed-env integration
Refactor CI/CD workflows: streamline Docker image build process and remove obsolete requirements
1 parent 6131d16 commit 3114d15

File tree

11 files changed

+587
-77
lines changed

11 files changed

+587
-77
lines changed

.github/workflows/UploadDockerImages.yml

Lines changed: 53 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,23 +20,60 @@ name: Build Images
2020
on:
2121
schedule:
2222
# Run the job daily at 12AM UTC
23-
- cron: '0 0 * * *'
24-
23+
- cron: '0 0 * * *'
2524
workflow_dispatch:
25+
inputs:
26+
target_device:
27+
description: 'Specify target device (all or tpu)'
28+
required: true
29+
type: choice
30+
default: 'tpu'
31+
options:
32+
- all
33+
- tpu
34+
35+
permissions:
36+
contents: read
2637

2738
jobs:
28-
build-image:
29-
runs-on: ["self-hosted", "e2", "cpu"]
39+
setup:
40+
runs-on: ubuntu-latest
41+
outputs:
42+
maxdiffusion_sha: ${{ steps.vars.outputs.maxdiffusion_sha }}
43+
image_date: ${{ steps.vars.outputs.image_date }}
3044
steps:
31-
- uses: actions/checkout@v3
32-
- name: Cleanup old docker images
33-
run: docker system prune --all --force
34-
- name: build maxdiffusion jax ai image
35-
run: |
36-
bash .github/workflows/build_and_upload_images.sh CLOUD_IMAGE_NAME=maxdiffusion_jax_stable_stack MODE=jax_ai_image PROJECT=tpu-prod-env-multipod LOCAL_IMAGE_NAME=maxdiffusion_jax_stable_stack BASEIMAGE=us-docker.pkg.dev/cloud-tpu-images/jax-ai-image/tpu:latest
37-
- name: build maxdiffusion w/ nightly jax ai image
38-
run: |
39-
bash .github/workflows/build_and_upload_images.sh CLOUD_IMAGE_NAME=maxdiffusion_jax_stable_stack_nightly MODE=jax_ai_image PROJECT=tpu-prod-env-multipod LOCAL_IMAGE_NAME=maxdiffusion_jax_stable_stack BASEIMAGE=us-docker.pkg.dev/tpu-prod-env-multipod/jax-stable-stack/tpu/jax_nightly:latest
40-
- name: build maxdiffusion jax nightly image
41-
run: |
42-
bash .github/workflows/build_and_upload_images.sh CLOUD_IMAGE_NAME=maxdiffusion_jax_nightly MODE=nightly PROJECT=tpu-prod-env-multipod LOCAL_IMAGE_NAME=maxdiffusion_jax_nightly
45+
- name: Checkout MaxDiffusion
46+
uses: actions/checkout@v5
47+
48+
- name: Get metadata
49+
id: vars
50+
run: |
51+
# MaxDiffusion SHA
52+
echo "maxdiffusion_sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
53+
54+
# Image date
55+
echo "image_date=$(date +%Y-%m-%d)" >> $GITHUB_OUTPUT
56+
57+
build-image:
58+
name: ${{ matrix.image_name }}
59+
needs: setup
60+
strategy:
61+
fail-fast: false
62+
matrix:
63+
include:
64+
- device: tpu
65+
build_mode: stable
66+
image_name: maxdiffusion_jax_stable
67+
dockerfile: ./maxdiffusion_dependencies.Dockerfile
68+
- device: tpu
69+
build_mode: nightly
70+
image_name: maxdiffusion_jax_nightly
71+
dockerfile: ./maxdiffusion_dependencies.Dockerfile
72+
uses: ./.github/workflows/build_and_push_docker_image.yml
73+
with:
74+
image_name: ${{ matrix.image_name }}
75+
device: ${{ matrix.device }}
76+
build_mode: ${{ matrix.build_mode }}
77+
dockerfile: ${{ matrix.dockerfile }}
78+
maxdiffusion_sha: ${{ needs.setup.outputs.maxdiffusion_sha }}
79+
image_date: ${{ needs.setup.outputs.image_date }}
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
# Copyright 2025 Google LLC
2+
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# This workflow will build and push MaxDiffusion Docker image to GCR.
16+
17+
name: Build and Push MaxDiffusion Docker Images
18+
19+
on:
20+
workflow_call:
21+
inputs:
22+
image_name:
23+
required: true
24+
type: string
25+
device:
26+
required: true
27+
type: string
28+
build_mode:
29+
required: true
30+
type: string
31+
dockerfile:
32+
required: true
33+
type: string
34+
maxdiffusion_sha:
35+
required: true
36+
type: string
37+
image_date:
38+
required: true
39+
type: string
40+
base_image:
41+
required: false
42+
type: string
43+
default: ''
44+
45+
permissions:
46+
contents: read
47+
48+
jobs:
49+
build_and_push:
50+
runs-on: linux-x86-n2-16-buildkit
51+
container: google/cloud-sdk:524.0.0
52+
if: >
53+
github.event_name == 'schedule' ||
54+
github.event_name == 'pull_request' ||
55+
github.event_name == 'workflow_dispatch' && (
56+
github.event.inputs.target_device == 'all' ||
57+
github.event.inputs.target_device == 'tpu' ||
58+
github.event.inputs.target_device == 'gpu'
59+
)
60+
steps:
61+
- name: Check if build should run
62+
id: check
63+
shell: bash
64+
run: |
65+
if [[ "${{ github.event_name }}" == "workflow_dispatch" && "${{ github.event.inputs.target_device }}" != "all" && "${{ github.event.inputs.target_device }}" != "${{ inputs.device }}" ]]; then
66+
echo "should_run=false" >> $GITHUB_OUTPUT
67+
echo "Skipping ${{ inputs.image_name }} build for device: ${{ inputs.device }} in ${{ inputs.build_mode }} mode."
68+
else
69+
echo "should_run=true" >> $GITHUB_OUTPUT
70+
echo "Building ${{ inputs.image_name }} for device: ${{ inputs.device }} in ${{ inputs.build_mode }} mode."
71+
fi
72+
73+
- name: Checkout MaxDiffusion
74+
uses: actions/checkout@v5
75+
if: steps.check.outputs.should_run == 'true'
76+
with:
77+
# This ensures that every job clones the exact same commit as "setup" job
78+
ref: ${{ inputs.maxdiffusion_sha }}
79+
80+
- name: Mark git repositories as safe
81+
run: git config --global --add safe.directory '*'
82+
if: steps.check.outputs.should_run == 'true'
83+
84+
- name: Configure Docker
85+
run: gcloud auth configure-docker us-docker.pkg.dev,gcr.io -q
86+
if: steps.check.outputs.should_run == 'true'
87+
88+
- name: Set up Docker BuildX
89+
uses: docker/setup-buildx-action@v3.11.1
90+
if: steps.check.outputs.should_run == 'true'
91+
with:
92+
driver: remote
93+
endpoint: tcp://localhost:1234
94+
95+
- name: Build and push Docker image
96+
uses: docker/build-push-action@v6
97+
if: steps.check.outputs.should_run == 'true'
98+
with:
99+
push: true
100+
context: .
101+
file: ${{ inputs.dockerfile }}
102+
tags: gcr.io/tpu-prod-env-multipod/${{ inputs.image_name }}:latest
103+
cache-from: type=gha
104+
outputs: type=image,compression=zstd,force-compression=true
105+
build-args: |
106+
DEVICE=${{ inputs.device }}
107+
MODE=${{ inputs.build_mode }}
108+
JAX_VERSION=NONE
109+
${{ inputs.base_image != '' && format('BASEIMAGE={0}', inputs.base_image) || '' }}
110+
111+
- name: Add tags to Docker image
112+
if: steps.check.outputs.should_run == 'true'
113+
shell: bash
114+
run: |
115+
SOURCE_IMAGE="gcr.io/tpu-prod-env-multipod/${{ inputs.image_name }}"
116+
117+
# Add date tag
118+
gcloud container images add-tag "$SOURCE_IMAGE:latest" "$SOURCE_IMAGE:${{ inputs.image_date }}" --quiet
119+
120+
# Convert date to YYYYMMDD format
121+
clean_date=$(echo "${{ inputs.image_date }}" | sed 's/[-:]//g' | cut -c1-8)
122+
123+
# Add MaxDiffusion tag
124+
maxdiffusion_hash=$(git rev-parse --short HEAD)
125+
gcloud container images add-tag "$SOURCE_IMAGE:latest" "$SOURCE_IMAGE:maxdiffusion_${maxdiffusion_hash}_${clean_date}" --quiet
126+
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
--extra-index-url https://download.pytorch.org/whl/cpu
2+
absl-py
3+
aqtp
4+
datasets
5+
einops
6+
flax
7+
ftfy
8+
google-cloud-storage
9+
grain
10+
hf_transfer
11+
huggingface_hub
12+
imageio-ffmpeg
13+
imageio
14+
jax
15+
jaxlib
16+
Jinja2
17+
opencv-python-headless
18+
optax
19+
orbax-checkpoint
20+
parameterized
21+
Pillow
22+
pyink
23+
pylint
24+
pytest
25+
ruff
26+
scikit-image
27+
sentencepiece
28+
tensorboard-plugin-profile
29+
tensorboard
30+
tensorboardx
31+
tensorflow-datasets
32+
tensorflow
33+
tokamax
34+
tokenizers
35+
transformers
36+
37+
# pinning torch and torchvision to specific versions to avoid
38+
# installing GPU versions from PyPI when running seed-env
39+
torch @ https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl
40+
torchvision @ https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl
41+
qwix @ https://github.com/google/qwix/archive/408a0f48f988b6c5b180e07f0cb1d05997bf0dcc.zip
42+

0 commit comments

Comments
 (0)