1515# specific language governing permissions and limitations
1616# under the License.
1717#
18- from datetime import time
18+ import time
1919import requests as req
2020import json
2121import os
@@ -452,9 +452,10 @@ def delete_secret(self, secret_name: str, namespace="nuvolaris"):
452452 logging .error (f"delete_secret { ex } " )
453453 return False
454454
455- def get_jobs (self , name_filter : str = None , namespace = "nuvolaris" ):
455+ def get_jobs (self , name_filter : str | None = None , namespace = "nuvolaris" ):
456456 """
457457 Get all Kubernetes jobs in a specific namespace.
458+ :param name_filter: Optional filter to match job names.
458459 :param namespace: Namespace to list jobs from.
459460 :return: List of jobs or None if failed.
460461 """
@@ -512,7 +513,7 @@ def delete_job(self, job_name: str, namespace="nuvolaris"):
512513 logging .error (f"delete_job { ex } " )
513514 return False
514515
515- def post_job (self , job_manifest : json , namespace = "nuvolaris" ):
516+ def post_job (self , job_manifest : dict , namespace = "nuvolaris" ):
516517 """
517518 Create a Kubernetes job.
518519 :param job_manifest: Dictionary containing the job manifest.
@@ -601,4 +602,104 @@ def check_job_status(self, job_name: str, namespace="nuvolaris"):
601602 return False
602603 except Exception as ex :
603604 logging .error (f"check_job_status { ex } " )
604- return False
605+ return False
606+
607+ def get_pod (self , pod_name : str , namespace = "nuvolaris" ):
608+ """
609+ Get pod details by name.
610+ :param pod_name: Name of the pod.
611+ :param namespace: Namespace where the pod is located.
612+ :return: Pod object or None if not found.
613+ """
614+ url = f"{ self .host } /api/v1/namespaces/{ namespace } /pods/{ pod_name } "
615+ headers = {"Authorization" : self .token }
616+
617+ try :
618+ logging .info (f"GET request to { url } " )
619+ response = req .get (url , headers = headers , verify = self .ssl_ca_cert )
620+
621+ if response .status_code == 200 :
622+ logging .debug (
623+ f"GET to { url } succeeded with { response .status_code } . Body { response .text } "
624+ )
625+ return json .loads (response .text )
626+
627+ logging .error (
628+ f"GET to { url } failed with { response .status_code } . Body { response .text } "
629+ )
630+ return None
631+ except Exception as ex :
632+ logging .error (f"get_pod { ex } " )
633+ return None
634+
635+ def wait_for_init_container_completion (self , job_name : str , init_container_name : str , namespace = "nuvolaris" , timeout_seconds = 300 ):
636+ """
637+ Wait for a specific init container in a job's pod to complete (successfully or with error).
638+ :param job_name: Name of the job.
639+ :param init_container_name: Name of the init container to wait for.
640+ :param namespace: Namespace where the job is located.
641+ :param timeout_seconds: Maximum time to wait in seconds (default: 300 = 5 minutes).
642+ :return: True if init container completed (success or error), False if timeout or other failure.
643+ """
644+ import time
645+
646+ start_time = time .time ()
647+ pod_name = None
648+
649+ logging .info (f"Waiting for init container '{ init_container_name } ' in job '{ job_name } ' to complete" )
650+
651+ # First, wait for the pod to be created
652+ while time .time () - start_time < timeout_seconds :
653+ pod_name = self .get_pod_by_job_name (job_name , namespace )
654+ if pod_name :
655+ logging .info (f"Found pod '{ pod_name } ' for job '{ job_name } '" )
656+ break
657+ logging .debug (f"Pod for job '{ job_name } ' not yet created, waiting..." )
658+ time .sleep (2 )
659+
660+ if not pod_name :
661+ logging .error (f"Timeout waiting for pod to be created for job '{ job_name } '" )
662+ return False
663+
664+ # Now wait for the init container to complete
665+ while time .time () - start_time < timeout_seconds :
666+ pod = self .get_pod (pod_name , namespace )
667+
668+ if not pod :
669+ logging .error (f"Failed to get pod '{ pod_name } '" )
670+ return False
671+
672+ # Check init container status
673+ init_container_statuses = pod .get ("status" , {}).get ("initContainerStatuses" , [])
674+
675+ for status in init_container_statuses :
676+ if status .get ("name" ) == init_container_name :
677+ state = status .get ("state" , {})
678+
679+ # Check if terminated (completed or failed)
680+ if "terminated" in state :
681+ terminated = state ["terminated" ]
682+ exit_code = terminated .get ("exitCode" , - 1 )
683+ reason = terminated .get ("reason" , "Unknown" )
684+
685+ if exit_code == 0 :
686+ logging .info (f"Init container '{ init_container_name } ' completed successfully" )
687+ else :
688+ logging .warning (f"Init container '{ init_container_name } ' terminated with exit code { exit_code } , reason: { reason } " )
689+
690+ return True
691+
692+ # Check if still running
693+ if "running" in state :
694+ logging .debug (f"Init container '{ init_container_name } ' is still running" )
695+
696+ # Check if waiting
697+ if "waiting" in state :
698+ waiting = state ["waiting" ]
699+ reason = waiting .get ("reason" , "Unknown" )
700+ logging .debug (f"Init container '{ init_container_name } ' is waiting, reason: { reason } " )
701+
702+ time .sleep (2 )
703+
704+ logging .error (f"Timeout waiting for init container '{ init_container_name } ' to complete" )
705+ return False
0 commit comments