Skip to content

Commit fea74cc

Browse files
sjarmakclaude
andcommitted
perf: add pre-built base images for repo caching across 35 tasks
Create 7 CCB base images that pre-clone frequently-used repositories: - ccb-repo-django-674eda1c (13 tasks) — Django @ 674eda1c - ccb-repo-k8s-11602f08 (5 tasks) — Kubernetes @ v1.30.0 - ccb-repo-k8s-8c9c67c0 (5 tasks) — Kubernetes @ 8c9c67c0 - ccb-repo-flipt-3d5a345f (6 tasks) — Flipt @ 3d5a345f - ccb-repo-flink-0cc95fcc (2 tasks) — Flink @ 0cc95fcc - ccb-repo-kafka-0753c489 (2 tasks) — Kafka @ 0753c489 - ccb-repo-kafka-e678b4b (2 tasks) — Kafka @ e678b4b Task Dockerfiles now reference `FROM ccb-repo-xxx` and include only task-specific setup (credential injection, doc.go surgery, etc.), skipping the expensive git clone + package install. Infrastructure: - base_images/build.sh: builds all 7 images (with --parallel flag) - base_images/update_task_dockerfiles.py: updates matching tasks - _common.sh: ensure_base_images() for pre-run caching Estimated savings: ~45-60 min per full benchmark run (repo clone time for Django 15x, K8s 10x, Flipt 6x, Kafka 4x, Flink 2x eliminated). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent ffc48a8 commit fea74cc

File tree

45 files changed

+514
-576
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+514
-576
lines changed
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
FROM python:3.11-slim
2+
3+
WORKDIR /workspace
4+
5+
RUN apt-get update && apt-get install -y --no-install-recommends \
6+
git \
7+
curl \
8+
&& rm -rf /var/lib/apt/lists/*
9+
10+
RUN git clone --filter=blob:none --no-checkout https://github.com/django/django.git . && \
11+
git checkout 674eda1c03a3187905f48afee0f15226aa62fdf3 && \
12+
git config user.email "agent@example.com" && \
13+
git config user.name "Agent"
14+
15+
RUN pip install -e . 2>/dev/null || true
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
FROM eclipse-temurin:17-jdk
2+
3+
WORKDIR /workspace
4+
5+
RUN apt-get update && apt-get install -y --no-install-recommends \
6+
git \
7+
curl \
8+
python3 \
9+
python3-pip \
10+
&& rm -rf /var/lib/apt/lists/*
11+
12+
RUN git clone --filter=blob:none --no-checkout https://github.com/apache/flink.git . && \
13+
git checkout 0cc95fcc145eddcfc87fc1b4ddf96ddd0f2ee15f && \
14+
git config user.email "agent@example.com" && \
15+
git config user.name "Agent"
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
FROM golang:1.23-bookworm
2+
3+
WORKDIR /workspace
4+
5+
RUN apt-get update && apt-get install -y --no-install-recommends \
6+
git \
7+
curl \
8+
&& rm -rf /var/lib/apt/lists/*
9+
10+
RUN git clone --filter=blob:none --no-checkout https://github.com/flipt-io/flipt.git . && \
11+
git checkout 3d5a345f94c2adc8a0eaa102c189c08ad4c0f8e8 && \
12+
git config user.email "agent@example.com" && \
13+
git config user.name "Agent"
14+
15+
RUN go mod download 2>/dev/null || true
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
FROM golang:1.23-bookworm
2+
3+
WORKDIR /workspace
4+
5+
RUN apt-get update && apt-get install -y --no-install-recommends \
6+
git \
7+
curl \
8+
python3 \
9+
python3-pip \
10+
&& rm -rf /var/lib/apt/lists/*
11+
12+
RUN git clone --filter=blob:none --no-checkout https://github.com/kubernetes/kubernetes.git . && \
13+
git checkout 11602f083ca275dcfd4341641ae7fe338b7f6f69 && \
14+
git config user.email "agent@example.com" && \
15+
git config user.name "Agent"
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
FROM golang:1.23-bookworm
2+
3+
WORKDIR /workspace
4+
5+
RUN apt-get update && apt-get install -y --no-install-recommends \
6+
git \
7+
curl \
8+
python3 \
9+
python3-pip \
10+
&& rm -rf /var/lib/apt/lists/*
11+
12+
RUN git clone --filter=blob:none --no-checkout https://github.com/kubernetes/kubernetes.git . && \
13+
git checkout 8c9c67c000104450cfc5a5f48053a9a84b73cf93 && \
14+
git config user.email "agent@example.com" && \
15+
git config user.name "Agent"
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
FROM eclipse-temurin:17-jdk
2+
3+
WORKDIR /workspace
4+
5+
RUN apt-get update && apt-get install -y --no-install-recommends \
6+
git \
7+
curl \
8+
&& rm -rf /var/lib/apt/lists/*
9+
10+
RUN git clone --filter=blob:none --no-checkout https://github.com/apache/kafka.git . && \
11+
git checkout 0753c489afad403fb6e78fda4c4a380e46f500c0 && \
12+
git config user.email "agent@example.com" && \
13+
git config user.name "Agent"
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
FROM eclipse-temurin:21-jdk
2+
3+
WORKDIR /workspace
4+
5+
RUN apt-get update && apt-get install -y --no-install-recommends \
6+
git \
7+
curl \
8+
python3 \
9+
ripgrep \
10+
&& rm -rf /var/lib/apt/lists/*
11+
12+
RUN git clone --filter=blob:none --no-checkout https://github.com/apache/kafka.git . && \
13+
git checkout e678b4b && \
14+
git config user.email "agent@example.com" && \
15+
git config user.name "Agent"

base_images/build.sh

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#!/bin/bash
2+
# Build all CCB base images for Docker layer caching.
3+
# Run this ONCE before a benchmark batch. Subsequent task builds
4+
# will reuse these cached layers instead of re-cloning repos.
5+
#
6+
# Usage: ./base_images/build.sh [--parallel]
7+
8+
set -euo pipefail
9+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
10+
11+
# Enable BuildKit
12+
export DOCKER_BUILDKIT=1
13+
14+
PARALLEL=false
15+
if [ "${1:-}" = "--parallel" ]; then
16+
PARALLEL=true
17+
fi
18+
19+
build_image() {
20+
local dockerfile="$1"
21+
local tag="$2"
22+
23+
# Skip if image already exists and is recent (< 7 days old)
24+
local image_age
25+
image_age=$(docker inspect --format='{{.Created}}' "$tag" 2>/dev/null || echo "")
26+
if [ -n "$image_age" ]; then
27+
local age_seconds
28+
age_seconds=$(( $(date +%s) - $(date -d "$image_age" +%s 2>/dev/null || echo 0) ))
29+
if [ "$age_seconds" -lt 604800 ]; then
30+
echo "SKIP $tag (exists, ${age_seconds}s old)"
31+
return 0
32+
fi
33+
fi
34+
35+
echo "BUILD $tag ..."
36+
local start_time=$SECONDS
37+
docker build -f "$dockerfile" -t "$tag" "$SCRIPT_DIR" 2>&1 | tail -5
38+
local elapsed=$(( SECONDS - start_time ))
39+
echo "DONE $tag (${elapsed}s)"
40+
}
41+
42+
echo "=== Building CCB base images ==="
43+
echo ""
44+
45+
IMAGES=(
46+
"Dockerfile.django-674eda1c ccb-repo-django-674eda1c"
47+
"Dockerfile.flipt-3d5a345f ccb-repo-flipt-3d5a345f"
48+
"Dockerfile.k8s-11602f08 ccb-repo-k8s-11602f08"
49+
"Dockerfile.k8s-8c9c67c0 ccb-repo-k8s-8c9c67c0"
50+
"Dockerfile.kafka-0753c489 ccb-repo-kafka-0753c489"
51+
"Dockerfile.kafka-e678b4b ccb-repo-kafka-e678b4b"
52+
"Dockerfile.flink-0cc95fcc ccb-repo-flink-0cc95fcc"
53+
)
54+
55+
TOTAL_START=$SECONDS
56+
57+
if $PARALLEL; then
58+
echo "Building ${#IMAGES[@]} images in parallel (max 4 concurrent)..."
59+
for entry in "${IMAGES[@]}"; do
60+
read -r dockerfile tag <<< "$entry"
61+
(build_image "$SCRIPT_DIR/$dockerfile" "$tag") &
62+
# Limit to 4 concurrent builds
63+
while [ "$(jobs -rp | wc -l)" -ge 4 ]; do
64+
wait -n 2>/dev/null || true
65+
done
66+
done
67+
wait
68+
else
69+
echo "Building ${#IMAGES[@]} images sequentially..."
70+
for entry in "${IMAGES[@]}"; do
71+
read -r dockerfile tag <<< "$entry"
72+
build_image "$SCRIPT_DIR/$dockerfile" "$tag"
73+
done
74+
fi
75+
76+
TOTAL_ELAPSED=$(( SECONDS - TOTAL_START ))
77+
echo ""
78+
echo "=== All base images built in ${TOTAL_ELAPSED}s ==="
79+
echo ""
80+
echo "Base images available:"
81+
docker images --format " {{.Repository}}:{{.Tag}} {{.Size}}" | grep ccb-repo || true

0 commit comments

Comments
 (0)