Skip to content

Commit 80b0d68

Browse files
sjarmakclaude
andcommitted
perf: pre-build Docker images and schedule heavy tasks first
Two optimizations to reduce full benchmark run time by ~2-4 hours: 1. Docker image pre-building: New scripts/prebuild_images.sh builds all task images in parallel (8 concurrent) before the run starts, so Harbor's docker compose build hits layer cache and completes instantly. Integrated into sdlc_suite_2config.sh (opt-out via --no-prebuild). SG_only temp dirs changed from random mktemp to deterministic /tmp/sgonly_<task_id> to enable pre-buildable image names. 2. Priority scheduling: Tasks now sorted by expected duration descending (build_timeout_sec + time_limit_sec) so heavy tasks (K8s, Terraform, Quantlib) start in the first parallel wave instead of blocking the tail end. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent fea74cc commit 80b0d68

File tree

3 files changed

+254
-1
lines changed

3 files changed

+254
-1
lines changed

configs/_common.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,19 @@ ensure_base_images() {
6262
fi
6363
}
6464

65+
# Pre-build all Docker images for a suite to warm the layer cache.
66+
# Call before run_paired_configs so Harbor's docker compose build is instant.
67+
# Args: $1 = suite name (e.g., ccb_build), or empty for all suites
68+
prebuild_images() {
69+
local suite="${1:-}"
70+
local repo_root
71+
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
72+
local script="${repo_root}/scripts/prebuild_images.sh"
73+
if [ -x "$script" ]; then
74+
bash "$script" ${suite:+"$suite"}
75+
fi
76+
}
77+
6578
# ============================================
6679
# FAIL-FAST MODE
6780
# ============================================

configs/sdlc_suite_2config.sh

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,41 @@ fi
5858

5959
mapfile -t ALL_TASK_IDS < <(find "${BENCHMARK_DIR}/${SUITE}" -mindepth 1 -maxdepth 1 -type d -printf '%f\n' | sort)
6060

61+
# Re-sort tasks by expected duration descending (heaviest first).
62+
# This ensures long-running tasks start immediately in the first parallel wave,
63+
# overlapping with many lighter tasks instead of blocking the tail end.
64+
mapfile -t ALL_TASK_IDS < <(python3 - "${BENCHMARK_DIR}/${SUITE}" "${ALL_TASK_IDS[@]}" <<'SORT_EOF'
65+
import sys, os, re
66+
suite_dir = sys.argv[1]
67+
task_ids = sys.argv[2:]
68+
69+
def task_weight(task_id):
70+
toml_path = os.path.join(suite_dir, task_id, "task.toml")
71+
try:
72+
with open(toml_path) as f:
73+
text = f.read()
74+
except OSError:
75+
return 0
76+
build = 900 # default
77+
agent = 1200 # default
78+
m = re.search(r'build_timeout_sec\s*=\s*(\d+)', text)
79+
if m:
80+
build = int(m.group(1))
81+
m = re.search(r'timeout_sec\s*=\s*(\d+)', text)
82+
if m:
83+
agent = int(m.group(1))
84+
m = re.search(r'time_limit_sec\s*=\s*(\d+)', text)
85+
if m:
86+
agent = max(agent, int(m.group(1)))
87+
return build + agent
88+
89+
for tid in sorted(task_ids, key=task_weight, reverse=True):
90+
print(tid)
91+
SORT_EOF
92+
)
93+
6194
# Parse arguments
95+
SKIP_PREBUILD=false
6296
while [[ $# -gt 0 ]]; do
6397
case $1 in
6498
--baseline-only)
@@ -85,6 +119,10 @@ while [[ $# -gt 0 ]]; do
85119
TASK_FILTERS+=("$2")
86120
shift 2
87121
;;
122+
--no-prebuild)
123+
SKIP_PREBUILD=true
124+
shift
125+
;;
88126
*)
89127
echo "Unknown option: $1"
90128
exit 1
@@ -185,7 +223,9 @@ _sdlc_run_single() {
185223
return 1
186224
fi
187225

188-
temp_task_dir=$(mktemp -d "/tmp/sdlc_${SUITE_STEM}_${task_id}_XXXXXX")
226+
temp_task_dir="/tmp/sgonly_${task_id}"
227+
rm -rf "$temp_task_dir"
228+
mkdir -p "$temp_task_dir"
189229
cp -a "${task_path}/." "${temp_task_dir}/"
190230
cp "${temp_task_dir}/environment/Dockerfile.sg_only" "${temp_task_dir}/environment/Dockerfile"
191231
run_task_path="$temp_task_dir"
@@ -232,6 +272,13 @@ run_task_batch() {
232272
log_section "Completed ${SUITE_STEM} - Mode: $mode"
233273
}
234274

275+
# Pre-build all Docker images to warm the cache before agent runs.
276+
# This moves Docker build time out of the critical path (API session slots).
277+
if [ "$SKIP_PREBUILD" = false ]; then
278+
log_section "Pre-building Docker images for ${SUITE}"
279+
prebuild_images "$SUITE"
280+
fi
281+
235282
if [ "$RUN_BASELINE" = true ] && [ "$RUN_FULL" = true ]; then
236283
run_paired_configs TASK_IDS _sdlc_run_single "$JOBS_BASE"
237284

scripts/prebuild_images.sh

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
#!/bin/bash
2+
# Pre-build all Docker images for SDLC benchmark suites.
3+
# Run this before a benchmark batch to warm the Docker layer cache.
4+
# Harbor's docker compose build then exits near-instantly (cache hit).
5+
#
6+
# Usage:
7+
# ./scripts/prebuild_images.sh [SUITE...] [OPTIONS]
8+
# ./scripts/prebuild_images.sh ccb_build ccb_fix
9+
# ./scripts/prebuild_images.sh # all 8 suites
10+
# PREBUILD_JOBS=12 ./scripts/prebuild_images.sh
11+
#
12+
# Options:
13+
# --baseline-only Skip SG_only images
14+
# --sgonly-only Skip baseline images
15+
# --dry-run Print what would be built without building
16+
# --force Rebuild even if image exists
17+
18+
set -euo pipefail
19+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
20+
REPO_ROOT="$SCRIPT_DIR/.."
21+
BENCHMARK_DIR="$REPO_ROOT/benchmarks"
22+
23+
export DOCKER_BUILDKIT=1
24+
25+
PREBUILD_JOBS="${PREBUILD_JOBS:-8}"
26+
DRY_RUN=false
27+
FORCE=false
28+
SKIP_BASELINE=false
29+
SKIP_SGONLY=false
30+
SUITES=()
31+
32+
while [[ $# -gt 0 ]]; do
33+
case $1 in
34+
--baseline-only) SKIP_SGONLY=true; shift ;;
35+
--sgonly-only) SKIP_BASELINE=true; shift ;;
36+
--dry-run) DRY_RUN=true; shift ;;
37+
--force) FORCE=true; shift ;;
38+
--jobs) PREBUILD_JOBS="$2"; shift 2 ;;
39+
ccb_*) SUITES+=("$1"); shift ;;
40+
*) echo "Unknown option: $1"; exit 1 ;;
41+
esac
42+
done
43+
44+
if [ ${#SUITES[@]} -eq 0 ]; then
45+
SUITES=(ccb_build ccb_debug ccb_design ccb_document ccb_fix ccb_secure ccb_test ccb_understand)
46+
fi
47+
48+
# ============================================
49+
# Step 1: Ensure base images are built
50+
# ============================================
51+
BASE_BUILD="$REPO_ROOT/base_images/build.sh"
52+
if [ -x "$BASE_BUILD" ] && [ "$DRY_RUN" = false ]; then
53+
echo "=== Ensuring base images ==="
54+
bash "$BASE_BUILD" --parallel
55+
echo ""
56+
fi
57+
58+
# ============================================
59+
# Step 2: Collect build jobs
60+
# ============================================
61+
# Each job: "image_tag|context_dir|cleanup_dir"
62+
# cleanup_dir is empty for baseline (no temp dir to remove)
63+
declare -a JOBS=()
64+
65+
for suite in "${SUITES[@]}"; do
66+
suite_dir="${BENCHMARK_DIR}/${suite}"
67+
if [ ! -d "$suite_dir" ]; then
68+
echo "WARN: Suite directory not found: $suite_dir"
69+
continue
70+
fi
71+
72+
for task_dir in "$suite_dir"/*/; do
73+
[ -d "$task_dir" ] || continue
74+
task_id=$(basename "$task_dir")
75+
env_dir="${task_dir}environment"
76+
77+
# Baseline image
78+
if [ "$SKIP_BASELINE" = false ] && [ -f "${env_dir}/Dockerfile" ]; then
79+
image_tag="hb__${task_id}"
80+
JOBS+=("${image_tag}|${env_dir}|")
81+
fi
82+
83+
# SG_only image
84+
if [ "$SKIP_SGONLY" = false ] && [ -f "${env_dir}/Dockerfile.sg_only" ]; then
85+
image_tag="hb__sgonly_${task_id}"
86+
# Will create temp dir at build time
87+
JOBS+=("${image_tag}|${env_dir}|sgonly")
88+
fi
89+
done
90+
done
91+
92+
echo "=== Pre-building Docker images ==="
93+
echo "Suites: ${SUITES[*]}"
94+
echo "Jobs: ${#JOBS[@]} images (parallel: $PREBUILD_JOBS)"
95+
echo ""
96+
97+
if [ ${#JOBS[@]} -eq 0 ]; then
98+
echo "No images to build."
99+
exit 0
100+
fi
101+
102+
# ============================================
103+
# Step 3: Build function
104+
# ============================================
105+
OK_COUNT=0
106+
ERR_COUNT=0
107+
SKIP_COUNT=0
108+
LOG_DIR="/tmp/prebuild_logs"
109+
mkdir -p "$LOG_DIR"
110+
111+
build_one() {
112+
local image_tag=$1
113+
local env_dir=$2
114+
local mode=$3 # empty=baseline, "sgonly"=sg_only
115+
116+
# Skip if image already exists (unless --force)
117+
if [ "$FORCE" = false ] && docker image inspect "$image_tag" >/dev/null 2>&1; then
118+
echo "SKIP $image_tag (exists)"
119+
return 2 # special exit code for "skipped"
120+
fi
121+
122+
local context_dir="$env_dir"
123+
local cleanup_dir=""
124+
125+
# For sg_only: create temp dir with swapped Dockerfile
126+
if [ "$mode" = "sgonly" ]; then
127+
local task_dir
128+
task_dir=$(dirname "$env_dir")
129+
local task_id
130+
task_id=$(basename "$task_dir")
131+
cleanup_dir="/tmp/sgonly_${task_id}"
132+
rm -rf "$cleanup_dir"
133+
cp -a "$task_dir" "$cleanup_dir"
134+
cp "${cleanup_dir}/environment/Dockerfile.sg_only" "${cleanup_dir}/environment/Dockerfile"
135+
context_dir="${cleanup_dir}/environment"
136+
fi
137+
138+
local log_file="${LOG_DIR}/${image_tag}.log"
139+
local start=$SECONDS
140+
141+
if docker build --quiet -t "$image_tag" "$context_dir" > "$log_file" 2>&1; then
142+
echo "OK $image_tag ($(( SECONDS - start ))s)"
143+
local rc=0
144+
else
145+
echo "ERR $image_tag ($(( SECONDS - start ))s) — see $log_file"
146+
local rc=1
147+
fi
148+
149+
# Cleanup sg_only temp dir
150+
if [ -n "$cleanup_dir" ] && [ -d "$cleanup_dir" ]; then
151+
rm -rf "$cleanup_dir"
152+
fi
153+
154+
return $rc
155+
}
156+
157+
# ============================================
158+
# Step 4: Dry run or parallel execution
159+
# ============================================
160+
if [ "$DRY_RUN" = true ]; then
161+
echo "DRY RUN — would build:"
162+
for job in "${JOBS[@]}"; do
163+
IFS='|' read -r tag ctx mode <<< "$job"
164+
printf " %-50s %s\n" "$tag" "($mode${mode:+: }${ctx})"
165+
done
166+
echo ""
167+
echo "Total: ${#JOBS[@]} images"
168+
exit 0
169+
fi
170+
171+
TOTAL_START=$SECONDS
172+
173+
for job in "${JOBS[@]}"; do
174+
IFS='|' read -r tag ctx mode <<< "$job"
175+
176+
(
177+
build_one "$tag" "$ctx" "$mode"
178+
) &
179+
180+
# Limit concurrent builds
181+
while [ "$(jobs -rp | wc -l)" -ge "$PREBUILD_JOBS" ]; do
182+
wait -n 2>/dev/null || true
183+
done
184+
done
185+
186+
# Wait for all remaining builds
187+
wait 2>/dev/null || true
188+
189+
TOTAL_ELAPSED=$(( SECONDS - TOTAL_START ))
190+
191+
echo ""
192+
echo "=== Pre-build complete in ${TOTAL_ELAPSED}s ==="
193+
echo "Logs: $LOG_DIR/"

0 commit comments

Comments
 (0)