Skip to content

Commit ed628e2

Browse files
ryan-williamsclaude
andcommitted
Support local scripts for private repo, add SSH helper
- SCP all scripts to instance instead of fetching from GitHub - runner-setup.sh now checks SCRIPTS_DIR for local scripts first - Add scripts/update-lmbda-ssh helper for debugging SSH - Reduce boot status log spam (log every 30s with elapsed time) - Add tmp/ to .gitignore Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 9a69e7e commit ed628e2

5 files changed

Lines changed: 129 additions & 38 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,3 +173,4 @@ pyrightconfig.json
173173

174174
# Temportily ignore uv.lock
175175
uv.lock
176+
tmp/

scripts/update-lmbda-ssh

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/usr/bin/env bash
2+
# Update the lmbda SSH host IP from the current running Lambda instance
3+
# Usage: ./scripts/update-lmbda-ssh [instance_id]
4+
5+
set -e
6+
7+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
8+
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
9+
SSH_CONF="$PROJECT_DIR/tmp/lmbda-ssh.conf"
10+
11+
# Get instance IP (either by ID or first active instance)
12+
if [ -n "$1" ]; then
13+
IP=$(python -m lambda_gha.cli get "$1" 2>/dev/null | jq -r '.data.ip // empty')
14+
else
15+
IP=$(python -m lambda_gha.cli ls 2>/dev/null | jq -r '.data[0].ip // empty')
16+
fi
17+
18+
if [ -z "$IP" ]; then
19+
echo "No active Lambda instance found" >&2
20+
exit 1
21+
fi
22+
23+
# Create tmp dir if needed
24+
mkdir -p "$PROJECT_DIR/tmp"
25+
26+
# Write SSH config fragment
27+
cat > "$SSH_CONF" <<EOF
28+
# Auto-generated by scripts/update-lmbda-ssh
29+
# Include this in ~/.ssh/config with: Include $SSH_CONF
30+
Host lmbda
31+
HostName $IP
32+
User ubuntu
33+
IdentitiesOnly yes
34+
IdentityFile ~/.ssh/lambda_ecdsa
35+
StrictHostKeyChecking no
36+
UserKnownHostsFile /dev/null
37+
EOF
38+
39+
echo "Updated $SSH_CONF with IP: $IP"
40+
echo "Run: ssh lmbda"

src/lambda_gha/scripts/runner-setup.sh

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,22 @@ BIN_DIR=/usr/local/bin
2828
RUNNER_STATE_DIR=/var/run/github-runner
2929
mkdir -p $RUNNER_STATE_DIR
3030

31-
# Fetch shared functions from GitHub
32-
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Fetching shared functions from GitHub (SHA: ${action_sha})" | tee -a /var/log/runner-setup.log
33-
FUNCTIONS_URL="https://raw.githubusercontent.com/Open-Athena/lambda-gha/${action_sha}/src/lambda_gha/templates/shared-functions.sh"
34-
if ! curl -sSL "$FUNCTIONS_URL" -o /tmp/shared-functions.sh && ! wget -q "$FUNCTIONS_URL" -O /tmp/shared-functions.sh; then
35-
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: Failed to download shared functions" | tee -a /var/log/runner-setup.log
36-
# Terminate via Lambda API
37-
curl -s -X POST -H "Authorization: Bearer $LAMBDA_API_KEY" -H "Content-Type: application/json" \
38-
-d "{\"instance_ids\": [\"$LAMBDA_INSTANCE_ID\"]}" \
39-
"$LAMBDA_API_BASE/instance-operations/terminate" || true
40-
exit 1
31+
# Get shared functions - from local dir if available (private repo), else fetch from GitHub
32+
SCRIPTS_DIR="${SCRIPTS_DIR:-}"
33+
if [ -n "$SCRIPTS_DIR" ] && [ -f "$SCRIPTS_DIR/shared-functions.sh" ]; then
34+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using local shared functions from $SCRIPTS_DIR" | tee -a /var/log/runner-setup.log
35+
cp "$SCRIPTS_DIR/shared-functions.sh" /tmp/shared-functions.sh
36+
else
37+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Fetching shared functions from GitHub (SHA: ${action_sha})" | tee -a /var/log/runner-setup.log
38+
FUNCTIONS_URL="https://raw.githubusercontent.com/Open-Athena/lambda-gha/${action_sha}/src/lambda_gha/templates/shared-functions.sh"
39+
if ! curl -sSL "$FUNCTIONS_URL" -o /tmp/shared-functions.sh && ! wget -q "$FUNCTIONS_URL" -O /tmp/shared-functions.sh; then
40+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: Failed to download shared functions" | tee -a /var/log/runner-setup.log
41+
# Terminate via Lambda API
42+
curl -s -X POST -H "Authorization: Bearer $LAMBDA_API_KEY" -H "Content-Type: application/json" \
43+
-d "{\"instance_ids\": [\"$LAMBDA_INSTANCE_ID\"]}" \
44+
"$LAMBDA_API_BASE/instance-operations/terminate" || true
45+
exit 1
46+
fi
4147
fi
4248

4349
# Write shared functions that will be used by multiple scripts
@@ -134,12 +140,19 @@ else
134140
fi
135141
log "Downloaded runner binary"
136142

137-
# Helper function to fetch scripts
143+
# Helper function to fetch scripts - uses local copy if available, else downloads
138144
fetch_script() {
139145
local script_name="$1"
140-
local url="${BASE_URL}/${script_name}"
141146
local dest="${BIN_DIR}/${script_name}"
142147

148+
# Check for local copy first (private repo support)
149+
if [ -n "$SCRIPTS_DIR" ] && [ -f "$SCRIPTS_DIR/$script_name" ]; then
150+
cp "$SCRIPTS_DIR/$script_name" "$dest"
151+
return 0
152+
fi
153+
154+
# Fall back to downloading from GitHub
155+
local url="${BASE_URL}/${script_name}"
143156
if command -v curl >/dev/null 2>&1; then
144157
curl -fsSL "$url" -o "$dest" || {
145158
log_error "Failed to fetch $script_name"
@@ -156,9 +169,12 @@ fetch_script() {
156169
fi
157170
}
158171

159-
# Fetch job tracking scripts from GitHub
160-
# These scripts are called by GitHub runner hooks
161-
log "Fetching runner hook scripts"
172+
# Fetch job tracking scripts - from local if available (private repo), else GitHub
173+
if [ -n "$SCRIPTS_DIR" ]; then
174+
log "Copying runner hook scripts from local $SCRIPTS_DIR"
175+
else
176+
log "Fetching runner hook scripts from GitHub"
177+
fi
162178
BASE_URL="https://raw.githubusercontent.com/Open-Athena/lambda-gha/${action_sha}/src/lambda_gha/scripts"
163179

164180
fetch_script "job-started-hook.sh"

src/lambda_gha/start.py

Lines changed: 56 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,10 @@ def wait_until_ready(self, ids: list[str], timeout: int = INSTANCE_POLL_TIMEOUT)
268268
start_time = time.time()
269269
pending = set(ids)
270270
details = {}
271+
last_log_time = {} # Track last log time per instance to reduce spam
271272

272273
while pending and (time.time() - start_time) < timeout:
274+
elapsed = int(time.time() - start_time)
273275
for instance_id in list(pending):
274276
try:
275277
result = self._api_request("GET", f"/instances/{instance_id}")
@@ -283,22 +285,30 @@ def wait_until_ready(self, ids: list[str], timeout: int = INSTANCE_POLL_TIMEOUT)
283285
"status": status,
284286
}
285287
pending.remove(instance_id)
286-
print(f"Instance {instance_id} is ready: {instance.get('ip')}")
288+
print(f"[{elapsed}s] Instance {instance_id[:12]}... is ready: {instance.get('ip')}")
287289
elif status in ("terminated", "terminating"):
288290
raise RuntimeError(f"Instance {instance_id} terminated unexpectedly")
289291
else:
290-
print(f"Instance {instance_id} status: {status}")
292+
# Log every 30s to reduce spam, but always log first status
293+
last_log = last_log_time.get(instance_id, 0)
294+
if elapsed - last_log >= 30 or last_log == 0:
295+
print(f"[{elapsed}s] Instance {instance_id[:12]}... status: {status}")
296+
last_log_time[instance_id] = elapsed
291297
except requests.HTTPError as e:
292298
if e.response.status_code == 404:
293-
print(f"Instance {instance_id} not found yet, retrying...")
299+
last_log = last_log_time.get(instance_id, 0)
300+
if elapsed - last_log >= 30 or last_log == 0:
301+
print(f"[{elapsed}s] Instance {instance_id[:12]}... not found yet, retrying...")
302+
last_log_time[instance_id] = elapsed
294303
else:
295304
raise
296305

297306
if pending:
298307
time.sleep(INSTANCE_POLL_INTERVAL)
299308

300309
if pending:
301-
raise TimeoutError(f"Instances did not become ready within {timeout}s: {pending}")
310+
elapsed = int(time.time() - start_time)
311+
raise TimeoutError(f"[{elapsed}s] Instances did not become ready within {timeout}s: {pending}")
302312

303313
return details
304314

@@ -399,39 +409,62 @@ def execute_setup_via_ssh(
399409
else:
400410
raise RuntimeError(f"Failed to connect to {ip} via SSH after {max_retries} attempts")
401411

402-
# Read setup script from package (can't curl from private repo)
412+
# Read all required scripts from package (can't curl from private repo)
403413
from importlib.resources import files
404414
scripts_dir = files("lambda_gha.scripts")
405-
setup_script = (scripts_dir / "runner-setup.sh").read_text()
406-
407-
# Write script to temp file for SCP
408-
script_file = tempfile.NamedTemporaryFile(mode='w', suffix='.sh', delete=False)
409-
script_file.write(setup_script)
410-
script_file.close()
411-
os.chmod(script_file.name, stat.S_IRUSR | stat.S_IXUSR) # 0500
415+
templates_dir = files("lambda_gha.templates")
416+
417+
# Scripts to copy: (source, dest_name)
418+
scripts_to_copy = [
419+
(scripts_dir / "runner-setup.sh", "runner-setup.sh"),
420+
(scripts_dir / "check-runner-termination.sh", "check-runner-termination.sh"),
421+
(scripts_dir / "job-started-hook.sh", "job-started-hook.sh"),
422+
(scripts_dir / "job-completed-hook.sh", "job-completed-hook.sh"),
423+
(templates_dir / "shared-functions.sh", "shared-functions.sh"),
424+
]
412425

413-
# SCP the script to the instance
426+
# SCP options
414427
scp_opts = ["-o", "StrictHostKeyChecking=no", "-o", "UserKnownHostsFile=/dev/null"]
415428
if key_file:
416429
scp_opts.extend(["-i", key_file.name])
417430

418-
print(f"Copying setup script to instance...")
419-
scp_result = subprocess.run(
420-
["scp"] + scp_opts + [script_file.name, f"{ssh_user}@{ip}:/tmp/runner-setup.sh"],
431+
# Create scripts directory on instance
432+
print(f"Creating scripts directory on instance...")
433+
mkdir_result = subprocess.run(
434+
["ssh"] + ssh_opts + [f"{ssh_user}@{ip}", "mkdir -p /tmp/lambda-gha-scripts"],
421435
capture_output=True,
422436
text=True,
423437
)
424-
if scp_result.returncode != 0:
425-
raise RuntimeError(f"Failed to SCP script: {scp_result.stderr}")
426-
427-
# Build env export commands
438+
if mkdir_result.returncode != 0:
439+
raise RuntimeError(f"Failed to create scripts dir: {mkdir_result.stderr}")
440+
441+
# Copy all scripts
442+
print(f"Copying {len(scripts_to_copy)} scripts to instance...")
443+
for src_file, dest_name in scripts_to_copy:
444+
content = src_file.read_text()
445+
local_file = tempfile.NamedTemporaryFile(mode='w', suffix='.sh', delete=False)
446+
local_file.write(content)
447+
local_file.close()
448+
os.chmod(local_file.name, stat.S_IRUSR | stat.S_IXUSR)
449+
450+
scp_result = subprocess.run(
451+
["scp"] + scp_opts + [local_file.name, f"{ssh_user}@{ip}:/tmp/lambda-gha-scripts/{dest_name}"],
452+
capture_output=True,
453+
text=True,
454+
)
455+
os.unlink(local_file.name)
456+
if scp_result.returncode != 0:
457+
raise RuntimeError(f"Failed to SCP {dest_name}: {scp_result.stderr}")
458+
459+
# Build env export commands (add SCRIPTS_DIR for local script access)
460+
env_vars["SCRIPTS_DIR"] = "/tmp/lambda-gha-scripts"
428461
env_exports = "\n".join(f'export {k}="{v}"' for k, v in env_vars.items())
429462

430-
# Build the setup command: export vars, run script
463+
# Build the setup command: export vars, run script from scripts dir
431464
setup_cmd = f'''
432465
{env_exports}
433-
chmod +x /tmp/runner-setup.sh
434-
sudo -E nohup /tmp/runner-setup.sh > /var/log/runner-setup.log 2>&1 &
466+
chmod +x /tmp/lambda-gha-scripts/*.sh
467+
sudo -E nohup /tmp/lambda-gha-scripts/runner-setup.sh > /var/log/runner-setup.log 2>&1 &
435468
'''
436469

437470
print(f"Executing setup script...")
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Templates package - contains shell script templates

0 commit comments

Comments
 (0)