@@ -268,8 +268,10 @@ def wait_until_ready(self, ids: list[str], timeout: int = INSTANCE_POLL_TIMEOUT)
268268 start_time = time .time ()
269269 pending = set (ids )
270270 details = {}
271+ last_log_time = {} # Track last log time per instance to reduce spam
271272
272273 while pending and (time .time () - start_time ) < timeout :
274+ elapsed = int (time .time () - start_time )
273275 for instance_id in list (pending ):
274276 try :
275277 result = self ._api_request ("GET" , f"/instances/{ instance_id } " )
@@ -283,22 +285,30 @@ def wait_until_ready(self, ids: list[str], timeout: int = INSTANCE_POLL_TIMEOUT)
283285 "status" : status ,
284286 }
285287 pending .remove (instance_id )
286- print (f"Instance { instance_id } is ready: { instance .get ('ip' )} " )
288+ print (f"[ { elapsed } s] Instance { instance_id [: 12 ] } ... is ready: { instance .get ('ip' )} " )
287289 elif status in ("terminated" , "terminating" ):
288290 raise RuntimeError (f"Instance { instance_id } terminated unexpectedly" )
289291 else :
290- print (f"Instance { instance_id } status: { status } " )
292+ # Log every 30s to reduce spam, but always log first status
293+ last_log = last_log_time .get (instance_id , 0 )
294+ if elapsed - last_log >= 30 or last_log == 0 :
295+ print (f"[{ elapsed } s] Instance { instance_id [:12 ]} ... status: { status } " )
296+ last_log_time [instance_id ] = elapsed
291297 except requests .HTTPError as e :
292298 if e .response .status_code == 404 :
293- print (f"Instance { instance_id } not found yet, retrying..." )
299+ last_log = last_log_time .get (instance_id , 0 )
300+ if elapsed - last_log >= 30 or last_log == 0 :
301+ print (f"[{ elapsed } s] Instance { instance_id [:12 ]} ... not found yet, retrying..." )
302+ last_log_time [instance_id ] = elapsed
294303 else :
295304 raise
296305
297306 if pending :
298307 time .sleep (INSTANCE_POLL_INTERVAL )
299308
300309 if pending :
301- raise TimeoutError (f"Instances did not become ready within { timeout } s: { pending } " )
310+ elapsed = int (time .time () - start_time )
311+ raise TimeoutError (f"[{ elapsed } s] Instances did not become ready within { timeout } s: { pending } " )
302312
303313 return details
304314
@@ -399,39 +409,62 @@ def execute_setup_via_ssh(
399409 else :
400410 raise RuntimeError (f"Failed to connect to { ip } via SSH after { max_retries } attempts" )
401411
402- # Read setup script from package (can't curl from private repo)
412+ # Read all required scripts from package (can't curl from private repo)
403413 from importlib .resources import files
404414 scripts_dir = files ("lambda_gha.scripts" )
405- setup_script = (scripts_dir / "runner-setup.sh" ).read_text ()
406-
407- # Write script to temp file for SCP
408- script_file = tempfile .NamedTemporaryFile (mode = 'w' , suffix = '.sh' , delete = False )
409- script_file .write (setup_script )
410- script_file .close ()
411- os .chmod (script_file .name , stat .S_IRUSR | stat .S_IXUSR ) # 0500
415+ templates_dir = files ("lambda_gha.templates" )
416+
417+ # Scripts to copy: (source, dest_name)
418+ scripts_to_copy = [
419+ (scripts_dir / "runner-setup.sh" , "runner-setup.sh" ),
420+ (scripts_dir / "check-runner-termination.sh" , "check-runner-termination.sh" ),
421+ (scripts_dir / "job-started-hook.sh" , "job-started-hook.sh" ),
422+ (scripts_dir / "job-completed-hook.sh" , "job-completed-hook.sh" ),
423+ (templates_dir / "shared-functions.sh" , "shared-functions.sh" ),
424+ ]
412425
413- # SCP the script to the instance
426+ # SCP options
414427 scp_opts = ["-o" , "StrictHostKeyChecking=no" , "-o" , "UserKnownHostsFile=/dev/null" ]
415428 if key_file :
416429 scp_opts .extend (["-i" , key_file .name ])
417430
418- print (f"Copying setup script to instance..." )
419- scp_result = subprocess .run (
420- ["scp" ] + scp_opts + [script_file .name , f"{ ssh_user } @{ ip } :/tmp/runner-setup.sh" ],
431+ # Create scripts directory on instance
432+ print (f"Creating scripts directory on instance..." )
433+ mkdir_result = subprocess .run (
434+ ["ssh" ] + ssh_opts + [f"{ ssh_user } @{ ip } " , "mkdir -p /tmp/lambda-gha-scripts" ],
421435 capture_output = True ,
422436 text = True ,
423437 )
424- if scp_result .returncode != 0 :
425- raise RuntimeError (f"Failed to SCP script: { scp_result .stderr } " )
426-
427- # Build env export commands
438+ if mkdir_result .returncode != 0 :
439+ raise RuntimeError (f"Failed to create scripts dir: { mkdir_result .stderr } " )
440+
441+ # Copy all scripts
442+ print (f"Copying { len (scripts_to_copy )} scripts to instance..." )
443+ for src_file , dest_name in scripts_to_copy :
444+ content = src_file .read_text ()
445+ local_file = tempfile .NamedTemporaryFile (mode = 'w' , suffix = '.sh' , delete = False )
446+ local_file .write (content )
447+ local_file .close ()
448+ os .chmod (local_file .name , stat .S_IRUSR | stat .S_IXUSR )
449+
450+ scp_result = subprocess .run (
451+ ["scp" ] + scp_opts + [local_file .name , f"{ ssh_user } @{ ip } :/tmp/lambda-gha-scripts/{ dest_name } " ],
452+ capture_output = True ,
453+ text = True ,
454+ )
455+ os .unlink (local_file .name )
456+ if scp_result .returncode != 0 :
457+ raise RuntimeError (f"Failed to SCP { dest_name } : { scp_result .stderr } " )
458+
459+ # Build env export commands (add SCRIPTS_DIR for local script access)
460+ env_vars ["SCRIPTS_DIR" ] = "/tmp/lambda-gha-scripts"
428461 env_exports = "\n " .join (f'export { k } ="{ v } "' for k , v in env_vars .items ())
429462
430- # Build the setup command: export vars, run script
463+ # Build the setup command: export vars, run script from scripts dir
431464 setup_cmd = f'''
432465{ env_exports }
433- chmod +x /tmp/runner-setup .sh
434- sudo -E nohup /tmp/runner-setup.sh > /var/log/runner-setup.log 2>&1 &
466+ chmod +x /tmp/lambda-gha-scripts/* .sh
467+ sudo -E nohup /tmp/lambda-gha-scripts/ runner-setup.sh > /var/log/runner-setup.log 2>&1 &
435468'''
436469
437470 print (f"Executing setup script..." )
0 commit comments