From 7ddd66d30dfff03d82028e5cf1cac6850260f549 Mon Sep 17 00:00:00 2001 From: Summer Date: Tue, 24 Jun 2025 17:25:49 +0000 Subject: [PATCH 01/34] [06/24] Add gke_code_executor.py --- src/google/adk/code_executors/__init__.py | 11 + .../adk/code_executors/gke_code_executor.py | 223 ++++++++++++++++++ 2 files changed, 234 insertions(+) create mode 100644 src/google/adk/code_executors/gke_code_executor.py diff --git a/src/google/adk/code_executors/__init__.py b/src/google/adk/code_executors/__init__.py index c0f1046f72..3797ea03be 100644 --- a/src/google/adk/code_executors/__init__.py +++ b/src/google/adk/code_executors/__init__.py @@ -49,3 +49,14 @@ ' Executor with agents, please install it. If not, you can ignore this' ' warning.' ) + +try: + from .gke_code_executor import GkeCodeExecutor + + __all__.append('GkeCodeExecutor') +except ImportError: + logger.debug( + 'The kubernetes sdk is not installed. If you want to use the GKE Code' + ' Executor with agents, please install it. If not, you can ignore this' + ' warning.' + ) \ No newline at end of file diff --git a/src/google/adk/code_executors/gke_code_executor.py b/src/google/adk/code_executors/gke_code_executor.py new file mode 100644 index 0000000000..dba2dcb4df --- /dev/null +++ b/src/google/adk/code_executors/gke_code_executor.py @@ -0,0 +1,223 @@ +import logging +import uuid +from typing import Any + +from google.adk.agents.invocation_context import InvocationContext +from google.adk.code_executors.base_code_executor import BaseCodeExecutor +from google.adk.code_executors.code_execution_utils import CodeExecutionInput, CodeExecutionResult + +from kubernetes import client, config +from kubernetes.client.rest import ApiException +from kubernetes.watch import Watch + +logger = logging.getLogger(__name__) + +class GkeCodeExecutor(BaseCodeExecutor): + """ + A secure, robust, and efficient code executor that runs Python code in a + sandboxed gVisor Pod on GKE. + + Features includes: + - Secure code execution via ConfigMaps and a strict security context. + - Kubernetes-native job and pod garbage collection via TTL. + - Efficient, event-driven waiting using the Kubernetes watch API. + - Explicit resource limits to prevent abuse. + """ + namespace: str = "default" + image: str = "python:3.11-slim" + timeout_seconds: int = 3000 + cpu_limit: str = "500m" + mem_limit: str = "512Mi" + use_gvisor_sandbox: bool = True + + _batch_v1: Any = None + _core_v1: Any = None + + def __init__(self, **data): + """ + Initializes the Pydantic model and the Kubernetes clients. + """ + super().__init__(**data) + + try: + config.load_incluster_config() + logger.info("Using in-cluster Kubernetes configuration.") + except config.ConfigException: + logger.info("In-cluster config not found. Falling back to local kubeconfig.") + config.load_kube_config() + + self._batch_v1 = client.BatchV1Api() + self._core_v1 = client.CoreV1Api() + + def execute_code( + self, + invocation_context: InvocationContext, + code_execution_input: CodeExecutionInput, + ) -> CodeExecutionResult: + """ + Orchestrates the secure execution of a code snippet on GKE. + """ + job_name = f"adk-exec-{uuid.uuid4().hex[:10]}" + configmap_name = f"code-src-{job_name}" + + try: + # 1. Create a ConfigMap to hold the code securely. + self._create_code_configmap(configmap_name, code_execution_input.code) + # 2. Create the Job manifest with all security features. + job_manifest = self._create_job_manifest(job_name, configmap_name) + # 3. Create and run the Job on the cluster. + self._batch_v1.create_namespaced_job( + body=job_manifest, namespace=self.namespace + ) + logger.info(f"Submitted Job '{job_name}' to namespace '{self.namespace}'.") + # 4. Efficiently watch for the Job's completion. + return self._watch_job_completion(job_name) + + except Exception as e: + logger.error( + f"An unexpected error occurred during execution of job '{job_name}': {e}", + exc_info=True, + ) + return CodeExecutionResult(stderr=f"Executor failed: {e}") + finally: + # 5. Always clean up the ConfigMap. The Job is cleaned up by Kubernetes. + self._cleanup_configmap(configmap_name) + + def _create_job_manifest(self, job_name: str, configmap_name: str) -> client.V1Job: + """Creates the complete V1Job object with security best practices.""" + # Define the container that will run the code. + container = client.V1Container( + name="code-runner", + image=self.image, + command=["python3", "/app/code.py"], + volume_mounts=[ + client.V1VolumeMount(name="code-volume", mount_path="/app") + ], + # BEST PRACTICE: Enforce a strict security context. + security_context=client.V1SecurityContext( + run_as_non_root=True, + run_as_user=1001, + allow_privilege_escalation=False, + read_only_root_filesystem=True, + capabilities=client.V1Capabilities(drop=["ALL"]), + ), + # BEST PRACTICE: Set resource limits to prevent abuse. + resources=client.V1ResourceRequirements( + requests={"cpu": "100m", "memory": "128Mi"}, + limits={"cpu": self.cpu_limit, "memory": self.mem_limit}, + ), + ) + + # Pod Spec Customization for A/B Testing + pod_spec_args = { + "restart_policy": "Never", + "containers": [container], + "volumes": [ + client.V1Volume( + name="code-volume", + config_map=client.V1ConfigMapVolumeSource(name=configmap_name), + ) + ], + } + + if self.use_gvisor_sandbox: + pod_spec_args["runtime_class_name"] = "gvisor" + pod_spec_args["node_selector"] = { + "cloud.google.com/gke-nodepool": "gvisor-nodepool" + } + pod_spec_args["tolerations"] = [ + client.V1Toleration( + key="sandbox.gke.io/runtime", + operator="Equal", + value="gvisor", + effect="NoSchedule", + ) + ] + else: + pod_spec_args["node_selector"] = { + "cloud.google.com/gke-nodepool": "standard-nodepool" + } + + # Define the pod spec, mounting the code and targeting gVisor. + pod_spec = client.V1PodSpec(**pod_spec_args) + + # Define the Job specification. + job_spec = client.V1JobSpec( + template=client.V1PodTemplateSpec(spec=pod_spec), + backoff_limit=0, # Do not retry the Job on failure. + # BEST PRACTICE: Let the Kubernetes TTL controller handle cleanup. + # This is more robust than client-side cleanup. + ttl_seconds_after_finished=600, # Garbage collect after 10 minutes. + ) + + # Assemble and return the final Job object. + return client.V1Job( + api_version="batch/v1", + kind="Job", + metadata=client.V1ObjectMeta(name=job_name), + spec=job_spec, + ) + + def _watch_job_completion(self, job_name: str) -> CodeExecutionResult: + """Uses the watch API to efficiently wait for job completion.""" + watch = Watch() + try: + for event in watch.stream( + self._batch_v1.list_namespaced_job, + namespace=self.namespace, + field_selector=f"metadata.name={job_name}", + timeout_seconds=self.timeout_seconds, + ): + job = event["object"] + if job.status.succeeded: + watch.stop() + logger.info(f"Job '{job_name}' succeeded.") + logs = self._get_pod_logs(job_name) + return CodeExecutionResult(stdout=logs) + if job.status.failed: + watch.stop() + logger.error(f"Job '{job_name}' failed.") + logs = self._get_pod_logs(job_name) + return CodeExecutionResult(stderr=f"Job failed. Logs:\n{logs}") + + # If the loop finishes without returning, the watch timed out. + raise TimeoutError( + f"Job '{job_name}' did not complete within {self.timeout_seconds}s." + ) + finally: + watch.stop() + + def _get_pod_logs(self, job_name: str) -> str: + """Retrieves logs from the pod created by the specified job.""" + try: + pods = self._core_v1.list_namespaced_pod( + namespace=self.namespace, label_selector=f"job-name={job_name}", limit=1 + ) + if not pods.items: + return "Error: Could not find pod for job." + pod_name = pods.items[0].metadata.name + + return self._core_v1.read_namespaced_pod_log( + name=pod_name, namespace=self.namespace + ) + except ApiException as e: + logger.error(f"Could not retrieve logs for job '{job_name}': {e}") + return f"Error retrieving logs: {e.reason}" + + def _create_code_configmap(self, name: str, code: str) -> None: + """Creates a ConfigMap to hold the Python code.""" + body = client.V1ConfigMap( + metadata=client.V1ObjectMeta(name=name), data={"code.py": code} + ) + self._core_v1.create_namespaced_config_map( + namespace=self.namespace, body=body + ) + + def _cleanup_configmap(self, name: str) -> None: + """Deletes a ConfigMap.""" + try: + self._core_v1.delete_namespaced_config_map(name=name, namespace=self.namespace) + logger.info(f"Cleaned up ConfigMap '{name}'.") + except ApiException as e: + if e.status != 404: + logger.warning(f"Could not delete ConfigMap '{name}': {e.reason}") From e8635b98bd709fe2955a425646648c9b4fad4d8a Mon Sep 17 00:00:00 2001 From: Summer Date: Tue, 24 Jun 2025 19:20:42 +0000 Subject: [PATCH 02/34] [06/24] Add gke_code_executor.py --- .../adk/code_executors/gke_code_executor.py | 119 +++++++++--------- 1 file changed, 58 insertions(+), 61 deletions(-) diff --git a/src/google/adk/code_executors/gke_code_executor.py b/src/google/adk/code_executors/gke_code_executor.py index dba2dcb4df..f0abeb982d 100644 --- a/src/google/adk/code_executors/gke_code_executor.py +++ b/src/google/adk/code_executors/gke_code_executor.py @@ -1,6 +1,6 @@ import logging import uuid -from typing import Any +from typing import Optional from google.adk.agents.invocation_context import InvocationContext from google.adk.code_executors.base_code_executor import BaseCodeExecutor @@ -13,37 +13,42 @@ logger = logging.getLogger(__name__) class GkeCodeExecutor(BaseCodeExecutor): - """ - A secure, robust, and efficient code executor that runs Python code in a - sandboxed gVisor Pod on GKE. + """Executes Python code in a secure gVisor-sandboxed Pod on GKE. + + This executor securely runs code by dynamically creating a Kubernetes Job for + each execution request. The user's code is mounted via a ConfigMap, and the + Pod is hardened with a strict security context and resource limits. - Features includes: - - Secure code execution via ConfigMaps and a strict security context. - - Kubernetes-native job and pod garbage collection via TTL. + Key Features: + - Sandboxed execution using the gVisor runtime. + - Ephemeral, per-execution environments using Kubernetes Jobs. + - Secure-by-default Pod configuration (non-root, no privileges). + - Automatic garbage collection of completed Jobs and Pods via TTL. - Efficient, event-driven waiting using the Kubernetes watch API. - - Explicit resource limits to prevent abuse. """ namespace: str = "default" image: str = "python:3.11-slim" - timeout_seconds: int = 3000 + timeout_seconds: int = 300 cpu_limit: str = "500m" mem_limit: str = "512Mi" - use_gvisor_sandbox: bool = True - _batch_v1: Any = None - _core_v1: Any = None + _batch_v1: client.BatchV1Api + _core_v1: client.CoreV1Api def __init__(self, **data): - """ - Initializes the Pydantic model and the Kubernetes clients. + """Initializes the executor and the Kubernetes API clients. + + This constructor supports overriding default class attributes (like + 'namespace', 'image', etc.) by passing them as keyword arguments. It + automatically configures the Kubernetes client to work either within a + cluster (in-cluster config) or locally using a kubeconfig file. """ super().__init__(**data) - try: config.load_incluster_config() logger.info("Using in-cluster Kubernetes configuration.") except config.ConfigException: - logger.info("In-cluster config not found. Falling back to local kubeconfig.") + logger.info("In-cluster config not found. Falling back to kubeconfig.") config.load_kube_config() self._batch_v1 = client.BatchV1Api() @@ -54,33 +59,40 @@ def execute_code( invocation_context: InvocationContext, code_execution_input: CodeExecutionInput, ) -> CodeExecutionResult: - """ - Orchestrates the secure execution of a code snippet on GKE. - """ + """Orchestrates the secure execution of a code snippet on GKE.""" job_name = f"adk-exec-{uuid.uuid4().hex[:10]}" configmap_name = f"code-src-{job_name}" try: - # 1. Create a ConfigMap to hold the code securely. self._create_code_configmap(configmap_name, code_execution_input.code) - # 2. Create the Job manifest with all security features. job_manifest = self._create_job_manifest(job_name, configmap_name) - # 3. Create and run the Job on the cluster. + self._batch_v1.create_namespaced_job( body=job_manifest, namespace=self.namespace ) logger.info(f"Submitted Job '{job_name}' to namespace '{self.namespace}'.") - # 4. Efficiently watch for the Job's completion. - return self._watch_job_completion(job_name) + return self._watch_for_job_completion(job_name) + except ApiException as e: + logger.error( + "A Kubernetes API error occurred during job" + f" '{job_name}': {e.reason}", + exc_info=True, + ) + return CodeExecutionResult(stderr=f"Kubernetes API error: {e.reason}") + except TimeoutError as e: + logger.error(e, exc_info=True) + logs = self._get_pod_logs(job_name) + stderr = f"Executor timed out: {e}\n\nPod Logs:\n{logs}" + return CodeExecutionResult(stderr=stderr) except Exception as e: logger.error( - f"An unexpected error occurred during execution of job '{job_name}': {e}", + f"An unexpected error occurred during job '{job_name}': {e}", exc_info=True, ) - return CodeExecutionResult(stderr=f"Executor failed: {e}") + return CodeExecutionResult(stderr=f"An unexpected executor error occurred: {e}") finally: - # 5. Always clean up the ConfigMap. The Job is cleaned up by Kubernetes. + # The Job is cleaned up by the TTL controller, and we ensure the ConfigMap is always deleted. self._cleanup_configmap(configmap_name) def _create_job_manifest(self, job_name: str, configmap_name: str) -> client.V1Job: @@ -93,7 +105,7 @@ def _create_job_manifest(self, job_name: str, configmap_name: str) -> client.V1J volume_mounts=[ client.V1VolumeMount(name="code-volume", mount_path="/app") ], - # BEST PRACTICE: Enforce a strict security context. + # Enforce a strict security context. security_context=client.V1SecurityContext( run_as_non_root=True, run_as_user=1001, @@ -101,52 +113,38 @@ def _create_job_manifest(self, job_name: str, configmap_name: str) -> client.V1J read_only_root_filesystem=True, capabilities=client.V1Capabilities(drop=["ALL"]), ), - # BEST PRACTICE: Set resource limits to prevent abuse. + # Set resource limits to prevent abuse. resources=client.V1ResourceRequirements( requests={"cpu": "100m", "memory": "128Mi"}, limits={"cpu": self.cpu_limit, "memory": self.mem_limit}, ), ) - # Pod Spec Customization for A/B Testing - pod_spec_args = { - "restart_policy": "Never", - "containers": [container], - "volumes": [ + # Use tolerations to request a gVisor node. + pod_spec = client.V1PodSpec( + restart_policy="Never", + containers=[container], + volumes=[ client.V1Volume( name="code-volume", config_map=client.V1ConfigMapVolumeSource(name=configmap_name), ) ], - } - - if self.use_gvisor_sandbox: - pod_spec_args["runtime_class_name"] = "gvisor" - pod_spec_args["node_selector"] = { - "cloud.google.com/gke-nodepool": "gvisor-nodepool" - } - pod_spec_args["tolerations"] = [ + runtime_class_name="gvisor", # Request the gVisor runtime. + tolerations=[ client.V1Toleration( key="sandbox.gke.io/runtime", operator="Equal", value="gvisor", effect="NoSchedule", ) - ] - else: - pod_spec_args["node_selector"] = { - "cloud.google.com/gke-nodepool": "standard-nodepool" - } - - # Define the pod spec, mounting the code and targeting gVisor. - pod_spec = client.V1PodSpec(**pod_spec_args) + ], + ) - # Define the Job specification. job_spec = client.V1JobSpec( template=client.V1PodTemplateSpec(spec=pod_spec), backoff_limit=0, # Do not retry the Job on failure. - # BEST PRACTICE: Let the Kubernetes TTL controller handle cleanup. - # This is more robust than client-side cleanup. + # Kubernetes TTL controller will handle Job/Pod cleanup. ttl_seconds_after_finished=600, # Garbage collect after 10 minutes. ) @@ -162,10 +160,11 @@ def _watch_job_completion(self, job_name: str) -> CodeExecutionResult: """Uses the watch API to efficiently wait for job completion.""" watch = Watch() try: + field_selector = f"metadata.name={job_name}" for event in watch.stream( self._batch_v1.list_namespaced_job, namespace=self.namespace, - field_selector=f"metadata.name={job_name}", + field_selector=field_selector, timeout_seconds=self.timeout_seconds, ): job = event["object"] @@ -179,13 +178,13 @@ def _watch_job_completion(self, job_name: str) -> CodeExecutionResult: logger.error(f"Job '{job_name}' failed.") logs = self._get_pod_logs(job_name) return CodeExecutionResult(stderr=f"Job failed. Logs:\n{logs}") + finally: + watch.stop() # If the loop finishes without returning, the watch timed out. raise TimeoutError( f"Job '{job_name}' did not complete within {self.timeout_seconds}s." ) - finally: - watch.stop() def _get_pod_logs(self, job_name: str) -> str: """Retrieves logs from the pod created by the specified job.""" @@ -194,14 +193,14 @@ def _get_pod_logs(self, job_name: str) -> str: namespace=self.namespace, label_selector=f"job-name={job_name}", limit=1 ) if not pods.items: + logger.warning(f"Could not find Pod for Job '{job_name}' to retrieve logs.") return "Error: Could not find pod for job." pod_name = pods.items[0].metadata.name - return self._core_v1.read_namespaced_pod_log( name=pod_name, namespace=self.namespace ) except ApiException as e: - logger.error(f"Could not retrieve logs for job '{job_name}': {e}") + logger.error(f"API error retrieving logs for job '{job_name}': {e.reason}") return f"Error retrieving logs: {e.reason}" def _create_code_configmap(self, name: str, code: str) -> None: @@ -209,9 +208,7 @@ def _create_code_configmap(self, name: str, code: str) -> None: body = client.V1ConfigMap( metadata=client.V1ObjectMeta(name=name), data={"code.py": code} ) - self._core_v1.create_namespaced_config_map( - namespace=self.namespace, body=body - ) + self._core_v1.create_namespaced_config_map(namespace=self.namespace, body=body) def _cleanup_configmap(self, name: str) -> None: """Deletes a ConfigMap.""" From 3dd917ba6fce0b27720a4e9d1e28b20571538f09 Mon Sep 17 00:00:00 2001 From: Summer Date: Tue, 24 Jun 2025 19:22:04 +0000 Subject: [PATCH 03/34] [06/24] Add gke_code_executor.py --- src/google/adk/code_executors/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/google/adk/code_executors/__init__.py b/src/google/adk/code_executors/__init__.py index 3797ea03be..aff6477be9 100644 --- a/src/google/adk/code_executors/__init__.py +++ b/src/google/adk/code_executors/__init__.py @@ -59,4 +59,4 @@ 'The kubernetes sdk is not installed. If you want to use the GKE Code' ' Executor with agents, please install it. If not, you can ignore this' ' warning.' - ) \ No newline at end of file + ) From bdbda057f625870ae83fab338e33303fd26712b5 Mon Sep 17 00:00:00 2001 From: Summer Date: Tue, 24 Jun 2025 19:47:34 +0000 Subject: [PATCH 04/34] [06/24] Add gke_code_executor.py --- src/google/adk/code_executors/gke_code_executor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/google/adk/code_executors/gke_code_executor.py b/src/google/adk/code_executors/gke_code_executor.py index f0abeb982d..ecd847aa77 100644 --- a/src/google/adk/code_executors/gke_code_executor.py +++ b/src/google/adk/code_executors/gke_code_executor.py @@ -1,6 +1,5 @@ import logging import uuid -from typing import Optional from google.adk.agents.invocation_context import InvocationContext from google.adk.code_executors.base_code_executor import BaseCodeExecutor From c011798c18a7c3ab9d54db7ee477ff2ad79ac2d6 Mon Sep 17 00:00:00 2001 From: Summer Date: Thu, 10 Jul 2025 20:30:34 +0000 Subject: [PATCH 05/34] [07/10] Add deployemnt_rbac.yaml into samples folder --- .../gke_agent_sandbox/deployment_rbac.yaml | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 contributing/samples/gke_agent_sandbox/deployment_rbac.yaml diff --git a/contributing/samples/gke_agent_sandbox/deployment_rbac.yaml b/contributing/samples/gke_agent_sandbox/deployment_rbac.yaml new file mode 100644 index 0000000000..db54c08bec --- /dev/null +++ b/contributing/samples/gke_agent_sandbox/deployment_rbac.yaml @@ -0,0 +1,50 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: agent-sandbox +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: adk-agent-sa + namespace: agent-sandbox +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: adk-agent-role + namespace: agent-sandbox +rules: +- apiGroups: ["batch"] + resources: ["jobs"] + # create: Needed for _batch_v1.create_namespaced_job(). + # watch: Needed for watch.stream(self._batch_v1.list_namespaced_job, ...) to efficiently wait for completion + # list/get: Required for the watch to initialize and to get job details. + verbs: ["create", "get", "watch", "list", "delete"] +- apiGroups: [""] + resources: ["configmaps"] + # create: Needed mount the agent's code into the Job's Pod. + # delete: Needed for cleanup in the finally block + verbs: ["create", "get", "list", "delete"] +- apiGroups: [""] + resources: ["pods"] + # list: Needed to find the correct Pod _core_v1.list_namespaced_pod(label_selector=...) + verbs: ["get", "list", "delete"] +- apiGroups: [""] + # get: Needed for _core_v1.read_namespaced_pod_log() to get the code execution results and logs. + resources: ["pods/log"] + verbs: ["get", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: adk-agent-binding + namespace: agent-sandbox +subjects: +- kind: ServiceAccount + name: adk-agent-sa + namespace: agent-sandbox +roleRef: + kind: Role + name: adk-agent-role + apiGroup: rbac.authorization.k8s.io From e54728e5e72eb5488be7c5a0394054d104bc2ec4 Mon Sep 17 00:00:00 2001 From: Summer Date: Thu, 10 Jul 2025 20:33:52 +0000 Subject: [PATCH 06/34] [07/10] Add deployemnt_rbac.yaml into samples folder --- src/google/adk/code_executors/gke_code_executor.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/google/adk/code_executors/gke_code_executor.py b/src/google/adk/code_executors/gke_code_executor.py index ecd847aa77..a9e74201bf 100644 --- a/src/google/adk/code_executors/gke_code_executor.py +++ b/src/google/adk/code_executors/gke_code_executor.py @@ -24,6 +24,13 @@ class GkeCodeExecutor(BaseCodeExecutor): - Secure-by-default Pod configuration (non-root, no privileges). - Automatic garbage collection of completed Jobs and Pods via TTL. - Efficient, event-driven waiting using the Kubernetes watch API. + + RBAC Permissions: + This executor interacts with the Kubernetes API and requires a ServiceAccount + with specific RBAC permissions to function. The agent's pod needs permissions + to create/watch Jobs, create/delete ConfigMaps, and list Pods to read logs. + For a complete, working example of the required Role and RoleBinding, see the + file at: contributing/samples/gke_agent_sandbox/deployment_rbac.yaml """ namespace: str = "default" image: str = "python:3.11-slim" From c6a7374b1b27bc36da45409ff3d439818cd802ed Mon Sep 17 00:00:00 2001 From: Summer Date: Thu, 10 Jul 2025 20:34:23 +0000 Subject: [PATCH 07/34] [07/10] Add deployemnt_rbac.yaml into samples folder --- contributing/samples/gke_agent_sandbox/deployment_rbac.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contributing/samples/gke_agent_sandbox/deployment_rbac.yaml b/contributing/samples/gke_agent_sandbox/deployment_rbac.yaml index db54c08bec..16572276d1 100644 --- a/contributing/samples/gke_agent_sandbox/deployment_rbac.yaml +++ b/contributing/samples/gke_agent_sandbox/deployment_rbac.yaml @@ -18,7 +18,7 @@ rules: - apiGroups: ["batch"] resources: ["jobs"] # create: Needed for _batch_v1.create_namespaced_job(). - # watch: Needed for watch.stream(self._batch_v1.list_namespaced_job, ...) to efficiently wait for completion + # watch: Needed for watch.stream(self._batch_v1.list_namespaced_job, ...) to wait for completion # list/get: Required for the watch to initialize and to get job details. verbs: ["create", "get", "watch", "list", "delete"] - apiGroups: [""] From b5d6ce4f20dc696bd3cd617c602dc7569456d2bb Mon Sep 17 00:00:00 2001 From: Summer Date: Thu, 10 Jul 2025 20:42:50 +0000 Subject: [PATCH 08/34] [07/10] Make cpu/memory requests configurable --- src/google/adk/code_executors/gke_code_executor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/google/adk/code_executors/gke_code_executor.py b/src/google/adk/code_executors/gke_code_executor.py index a9e74201bf..546bb80239 100644 --- a/src/google/adk/code_executors/gke_code_executor.py +++ b/src/google/adk/code_executors/gke_code_executor.py @@ -35,6 +35,8 @@ class GkeCodeExecutor(BaseCodeExecutor): namespace: str = "default" image: str = "python:3.11-slim" timeout_seconds: int = 300 + cpu_request: str = "200m" + mem_request: str = "256Mi" cpu_limit: str = "500m" mem_limit: str = "512Mi" @@ -121,7 +123,7 @@ def _create_job_manifest(self, job_name: str, configmap_name: str) -> client.V1J ), # Set resource limits to prevent abuse. resources=client.V1ResourceRequirements( - requests={"cpu": "100m", "memory": "128Mi"}, + requests={"cpu": self.cpu_request, "memory": self.mem_request}, limits={"cpu": self.cpu_limit, "memory": self.mem_limit}, ), ) From 3d839f9f4ee892e16ed355d55d0a8fd9a02f2517 Mon Sep 17 00:00:00 2001 From: Summer Date: Fri, 11 Jul 2025 03:48:17 +0000 Subject: [PATCH 09/34] [07/10] Add annotation --- src/google/adk/code_executors/gke_code_executor.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/google/adk/code_executors/gke_code_executor.py b/src/google/adk/code_executors/gke_code_executor.py index 546bb80239..1e14e3bffc 100644 --- a/src/google/adk/code_executors/gke_code_executor.py +++ b/src/google/adk/code_executors/gke_code_executor.py @@ -73,7 +73,7 @@ def execute_code( try: self._create_code_configmap(configmap_name, code_execution_input.code) - job_manifest = self._create_job_manifest(job_name, configmap_name) + job_manifest = self._create_job_manifest(job_name, configmap_name, invocation_context) self._batch_v1.create_namespaced_job( body=job_manifest, namespace=self.namespace @@ -103,7 +103,7 @@ def execute_code( # The Job is cleaned up by the TTL controller, and we ensure the ConfigMap is always deleted. self._cleanup_configmap(configmap_name) - def _create_job_manifest(self, job_name: str, configmap_name: str) -> client.V1Job: + def _create_job_manifest(self, job_name: str, configmap_name: str, invocation_context: InvocationContext) -> client.V1Job: """Creates the complete V1Job object with security best practices.""" # Define the container that will run the code. container = client.V1Container( @@ -157,10 +157,13 @@ def _create_job_manifest(self, job_name: str, configmap_name: str) -> client.V1J ) # Assemble and return the final Job object. + annotations = { + "adk.agent.google.com/invocation-id": invocation_context.invocation_id + } return client.V1Job( api_version="batch/v1", kind="Job", - metadata=client.V1ObjectMeta(name=job_name), + metadata=client.V1ObjectMeta(name=job_name, annotations=annotations), spec=job_spec, ) From ae43fad29b954dbc5891f5db02cef99aeac41b23 Mon Sep 17 00:00:00 2001 From: syangx39 Date: Fri, 11 Jul 2025 04:40:19 +0000 Subject: [PATCH 10/34] [07/10] Raise error in _get_pod_logs() --- .../adk/code_executors/gke_code_executor.py | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/google/adk/code_executors/gke_code_executor.py b/src/google/adk/code_executors/gke_code_executor.py index 1e14e3bffc..7674e5c753 100644 --- a/src/google/adk/code_executors/gke_code_executor.py +++ b/src/google/adk/code_executors/gke_code_executor.py @@ -56,7 +56,7 @@ def __init__(self, **data): config.load_incluster_config() logger.info("Using in-cluster Kubernetes configuration.") except config.ConfigException: - logger.info("In-cluster config not found. Falling back to kubeconfig.") + logger.info("In-cluster config not found. Falling back to local kubeconfig.") config.load_kube_config() self._batch_v1 = client.BatchV1Api() @@ -79,7 +79,7 @@ def execute_code( body=job_manifest, namespace=self.namespace ) logger.info(f"Submitted Job '{job_name}' to namespace '{self.namespace}'.") - return self._watch_for_job_completion(job_name) + return self._watch_job_completion(job_name) except ApiException as e: logger.error( @@ -171,11 +171,10 @@ def _watch_job_completion(self, job_name: str) -> CodeExecutionResult: """Uses the watch API to efficiently wait for job completion.""" watch = Watch() try: - field_selector = f"metadata.name={job_name}" for event in watch.stream( self._batch_v1.list_namespaced_job, namespace=self.namespace, - field_selector=field_selector, + field_selector=f"metadata.name={job_name}", timeout_seconds=self.timeout_seconds, ): job = event["object"] @@ -189,30 +188,33 @@ def _watch_job_completion(self, job_name: str) -> CodeExecutionResult: logger.error(f"Job '{job_name}' failed.") logs = self._get_pod_logs(job_name) return CodeExecutionResult(stderr=f"Job failed. Logs:\n{logs}") - finally: - watch.stop() # If the loop finishes without returning, the watch timed out. raise TimeoutError( f"Job '{job_name}' did not complete within {self.timeout_seconds}s." ) + finally: + watch.stop() def _get_pod_logs(self, job_name: str) -> str: - """Retrieves logs from the pod created by the specified job.""" + """Retrieves logs from the pod created by the specified job. + + Raises: + RuntimeError: If the pod cannot be found or logs cannot be fetched. + """ try: pods = self._core_v1.list_namespaced_pod( namespace=self.namespace, label_selector=f"job-name={job_name}", limit=1 ) if not pods.items: - logger.warning(f"Could not find Pod for Job '{job_name}' to retrieve logs.") - return "Error: Could not find pod for job." + raise RuntimeError(f"Could not find Pod for Job '{job_name}' to retrieve logs.") + pod_name = pods.items[0].metadata.name return self._core_v1.read_namespaced_pod_log( name=pod_name, namespace=self.namespace ) except ApiException as e: - logger.error(f"API error retrieving logs for job '{job_name}': {e.reason}") - return f"Error retrieving logs: {e.reason}" + raise RuntimeError(f"API error retrieving logs for job '{job_name}': {e.reason}") from e def _create_code_configmap(self, name: str, code: str) -> None: """Creates a ConfigMap to hold the Python code.""" From 764c3b04c27b55429e5545cd73fd8417711ed8d4 Mon Sep 17 00:00:00 2001 From: syangx39 Date: Fri, 11 Jul 2025 05:31:55 +0000 Subject: [PATCH 11/34] [07/10] Add owner_reference --- .../adk/code_executors/gke_code_executor.py | 38 +++++++++++++------ 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/src/google/adk/code_executors/gke_code_executor.py b/src/google/adk/code_executors/gke_code_executor.py index 7674e5c753..0fe110e03f 100644 --- a/src/google/adk/code_executors/gke_code_executor.py +++ b/src/google/adk/code_executors/gke_code_executor.py @@ -74,10 +74,11 @@ def execute_code( try: self._create_code_configmap(configmap_name, code_execution_input.code) job_manifest = self._create_job_manifest(job_name, configmap_name, invocation_context) - - self._batch_v1.create_namespaced_job( + created_job = self._batch_v1.create_namespaced_job( body=job_manifest, namespace=self.namespace ) + self._add_owner_reference(created_job, configmap_name) + logger.info(f"Submitted Job '{job_name}' to namespace '{self.namespace}'.") return self._watch_job_completion(job_name) @@ -99,9 +100,6 @@ def execute_code( exc_info=True, ) return CodeExecutionResult(stderr=f"An unexpected executor error occurred: {e}") - finally: - # The Job is cleaned up by the TTL controller, and we ensure the ConfigMap is always deleted. - self._cleanup_configmap(configmap_name) def _create_job_manifest(self, job_name: str, configmap_name: str, invocation_context: InvocationContext) -> client.V1Job: """Creates the complete V1Job object with security best practices.""" @@ -223,11 +221,29 @@ def _create_code_configmap(self, name: str, code: str) -> None: ) self._core_v1.create_namespaced_config_map(namespace=self.namespace, body=body) - def _cleanup_configmap(self, name: str) -> None: - """Deletes a ConfigMap.""" + def _add_owner_reference(self, owner_job: client.V1Job, configmap_name: str) -> None: + """Patches the ConfigMap to be owned by the Job for auto-cleanup.""" + owner_reference = client.V1OwnerReference( + api_version=owner_job.api_version, + kind=owner_job.kind, + name=owner_job.metadata.name, + uid=owner_job.metadata.uid, + controller=True, + ) + patch_body = { + "metadata": {"ownerReferences": [owner_reference.to_dict()]} + } + try: - self._core_v1.delete_namespaced_config_map(name=name, namespace=self.namespace) - logger.info(f"Cleaned up ConfigMap '{name}'.") + self._core_v1.patch_namespaced_config_map( + name=configmap_name, + namespace=self.namespace, + body=patch_body, + ) + logger.info(f"Set Job '{owner_job.metadata.name}' as owner of ConfigMap '{configmap_name}'.") except ApiException as e: - if e.status != 404: - logger.warning(f"Could not delete ConfigMap '{name}': {e.reason}") + logger.warning( + f"Failed to set ownerReference on ConfigMap '{configmap_name}'. " + f"Manual cleanup is required. Reason: {e.reason}" + ) + From cb14ea8da9b9473c3b9d50ca8b4b7759df5e1880 Mon Sep 17 00:00:00 2001 From: syangx39 Date: Fri, 11 Jul 2025 05:55:24 +0000 Subject: [PATCH 12/34] [07/10] Add owner_reference --- src/google/adk/code_executors/gke_code_executor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/google/adk/code_executors/gke_code_executor.py b/src/google/adk/code_executors/gke_code_executor.py index 0fe110e03f..769eb5fa48 100644 --- a/src/google/adk/code_executors/gke_code_executor.py +++ b/src/google/adk/code_executors/gke_code_executor.py @@ -246,4 +246,3 @@ def _add_owner_reference(self, owner_job: client.V1Job, configmap_name: str) -> f"Failed to set ownerReference on ConfigMap '{configmap_name}'. " f"Manual cleanup is required. Reason: {e.reason}" ) - From bb6c86545e4f1fe04829676a1cc5a47d0b85cab6 Mon Sep 17 00:00:00 2001 From: syangx39 Date: Fri, 25 Jul 2025 08:21:04 +0000 Subject: [PATCH 13/34] [07/25] add the sample --- .../code_execution/gke_sandbox.agent.py | 49 ++++ .../adk/code_executors/gke_code_executor.py | 5 + .../code_executors/test_gke_code_executor.py | 209 ++++++++++++++++++ 3 files changed, 263 insertions(+) create mode 100644 contributing/samples/code_execution/gke_sandbox.agent.py create mode 100644 tests/unittests/code_executors/test_gke_code_executor.py diff --git a/contributing/samples/code_execution/gke_sandbox.agent.py b/contributing/samples/code_execution/gke_sandbox.agent.py new file mode 100644 index 0000000000..5ee64513bf --- /dev/null +++ b/contributing/samples/code_execution/gke_sandbox.agent.py @@ -0,0 +1,49 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A Python coding agent using the GkeCodeExecutor for secure execution.""" + +from google.adk.agents import LlmAgent +from google.adk.code_executors import GkeCodeExecutor + + +def gke_agent_system_instruction(): + """Returns: The system instruction for the GKE-based coding agent.""" + return """You are a helpful and capable AI agent that can write and execute Python code to answer questions and perform tasks. + +When a user asks a question, follow these steps: +1. Analyze the request. +2. Write a complete, self-contained Python script to accomplish the task. +3. Your code will be executed in a secure, sandboxed environment. +4. Return the full and complete output from the code execution, including any text, results, or error messages.""" + + +gke_executor = GkeCodeExecutor( + # This must match the namespace in your deployment_rbac.yaml where the + # agent's ServiceAccount and Role have permissions. + namespace="agent-sandbox", + # Setting an explicit timeout prevents a stuck job from running forever. + timeout_seconds=600, +) + +root_agent = LlmAgent( + name="gke_coding_agent", + model="gemini-2.0-flash", + description=( + "A general-purpose agent that executes Python code in a secure GKE" + " Sandbox." + ), + instruction=gke_agent_system_instruction(), + code_executor=gke_executor, +) diff --git a/src/google/adk/code_executors/gke_code_executor.py b/src/google/adk/code_executors/gke_code_executor.py index 769eb5fa48..4901d2140b 100644 --- a/src/google/adk/code_executors/gke_code_executor.py +++ b/src/google/adk/code_executors/gke_code_executor.py @@ -37,6 +37,7 @@ class GkeCodeExecutor(BaseCodeExecutor): timeout_seconds: int = 300 cpu_request: str = "200m" mem_request: str = "256Mi" + # The maximum CPU the container can use, in "millicores". 1000m is 1 full CPU core. cpu_limit: str = "500m" mem_limit: str = "512Mi" @@ -72,6 +73,10 @@ def execute_code( configmap_name = f"code-src-{job_name}" try: + # The execution process: + # 1. Create a ConfigMap to mount LLM-generated code into the Pod. + # 2. Create a Job that runs the code from the ConfigMap. + # 3. Set the Job as the ConfigMap's owner for automatic cleanup. self._create_code_configmap(configmap_name, code_execution_input.code) job_manifest = self._create_job_manifest(job_name, configmap_name, invocation_context) created_job = self._batch_v1.create_namespaced_job( diff --git a/tests/unittests/code_executors/test_gke_code_executor.py b/tests/unittests/code_executors/test_gke_code_executor.py new file mode 100644 index 0000000000..549ec1398d --- /dev/null +++ b/tests/unittests/code_executors/test_gke_code_executor.py @@ -0,0 +1,209 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest.mock import MagicMock, patch + +from google.adk.agents.invocation_context import InvocationContext +from google.adk.code_executors.code_execution_utils import CodeExecutionInput +from google.adk.code_executors.gke_code_executor import GkeCodeExecutor +from kubernetes.client.rest import ApiException +from kubernetes import client, config +import pytest + + +@pytest.fixture +def mock_invocation_context() -> InvocationContext: + """Fixture for a mock InvocationContext.""" + mock = MagicMock(spec=InvocationContext) + mock.invocation_id = "test-invocation-123" + return mock + + +@pytest.fixture(autouse=True) +def mock_k8s_config(): + """Fixture for auto-mocking Kubernetes config loading.""" + with patch("google.adk.code_executors.gke_code_executor.config") as mock_config: + # Simulate fallback from in-cluster to kubeconfig + mock_config.ConfigException = config.ConfigException + mock_config.load_incluster_config.side_effect = config.ConfigException + yield mock_config + + +@pytest.fixture +def mock_k8s_clients(): + """Fixture for mock Kubernetes API clients.""" + with patch( + "google.adk.code_executors.gke_code_executor.client" + ) as mock_client_class: + mock_batch_v1 = MagicMock(spec=client.BatchV1Api) + mock_core_v1 = MagicMock(spec=client.CoreV1Api) + mock_client_class.BatchV1Api.return_value = mock_batch_v1 + mock_client_class.CoreV1Api.return_value = mock_core_v1 + yield { + "batch_v1": mock_batch_v1, + "core_v1": mock_core_v1, + } + + +class TestGkeCodeExecutor: + """Unit tests for the GkeCodeExecutor.""" + + def test_init_defaults(self): + """Tests that the executor initializes with correct default values.""" + executor = GkeCodeExecutor() + assert executor.namespace == "default" + assert executor.image == "python:3.11-slim" + assert executor.timeout_seconds == 300 + assert executor.cpu_request == "200m" + assert executor.mem_limit == "512Mi" + + def test_init_with_overrides(self): + """Tests that class attributes can be overridden at instantiation.""" + executor = GkeCodeExecutor( + namespace="test-ns", + image="custom-python:latest", + timeout_seconds=60, + cpu_limit="1000m", + ) + assert executor.namespace == "test-ns" + assert executor.image == "custom-python:latest" + assert executor.timeout_seconds == 60 + assert executor.cpu_limit == "1000m" + + @patch("google.adk.code_executors.gke_code_executor.Watch") + def test_execute_code_success( + self, + mock_watch, + mock_k8s_clients, + mock_invocation_context, + ): + """Tests the happy path for successful code execution.""" + # Setup Mocks + mock_job = MagicMock() + mock_job.status.succeeded = True + mock_job.status.failed = None + mock_watch.return_value.stream.return_value = [{"object": mock_job}] + + mock_pod_list = MagicMock() + mock_pod_list.items = [MagicMock()] + mock_pod_list.items[0].metadata.name = "test-pod-name" + mock_k8s_clients["core_v1"].list_namespaced_pod.return_value = mock_pod_list + mock_k8s_clients["core_v1"].read_namespaced_pod_log.return_value = "hello world" + + # Execute + executor = GkeCodeExecutor() + code_input = CodeExecutionInput(code='print("hello world")') + result = executor.execute_code(mock_invocation_context, code_input) + + # Assert + assert result.stdout == "hello world" + assert result.stderr == "" + mock_k8s_clients["core_v1"].create_namespaced_config_map.assert_called_once() + mock_k8s_clients["batch_v1"].create_namespaced_job.assert_called_once() + mock_k8s_clients["core_v1"].patch_namespaced_config_map.assert_called_once() + mock_k8s_clients["core_v1"].read_namespaced_pod_log.assert_called_once() + + @patch("google.adk.code_executors.gke_code_executor.Watch") + def test_execute_code_job_failed( + self, + mock_watch, + mock_k8s_clients, + mock_invocation_context, + ): + """Tests the path where the Kubernetes Job fails.""" + mock_job = MagicMock() + mock_job.status.succeeded = None + mock_job.status.failed = True + mock_watch.return_value.stream.return_value = [{"object": mock_job}] + mock_k8s_clients["core_v1"].read_namespaced_pod_log.return_value = ( + "Traceback...\nValueError: failure" + ) + + executor = GkeCodeExecutor() + result = executor.execute_code(mock_invocation_context, CodeExecutionInput(code="fail")) + + assert result.stdout == "" + assert "Job failed. Logs:" in result.stderr + assert "ValueError: failure" in result.stderr + + def test_execute_code_api_exception( + self, mock_k8s_clients, mock_invocation_context + ): + """Tests handling of an ApiException from the K8s client.""" + mock_k8s_clients["core_v1"].create_namespaced_config_map.side_effect = ( + ApiException(reason="Test API Error") + ) + executor = GkeCodeExecutor() + result = executor.execute_code(mock_invocation_context, CodeExecutionInput(code="...")) + + assert result.stdout == "" + assert "Kubernetes API error: Test API Error" in result.stderr + + @patch("google.adk.code_executors.gke_code_executor.Watch") + def test_execute_code_timeout( + self, + mock_watch, + mock_k8s_clients, + mock_invocation_context, + ): + """Tests the case where the job watch times out.""" + mock_watch.return_value.stream.return_value = [] # Empty stream simulates timeout + mock_k8s_clients["core_v1"].read_namespaced_pod_log.return_value = ( + "Still running..." + ) + + executor = GkeCodeExecutor(timeout_seconds=1) + result = executor.execute_code(mock_invocation_context, CodeExecutionInput(code="...")) + + assert result.stdout == "" + assert "Executor timed out" in result.stderr + assert "did not complete within 1s" in result.stderr + assert "Pod Logs:\nStill running..." in result.stderr + + def test_create_job_manifest_structure(self, mock_invocation_context): + """Tests the correctness of the generated Job manifest.""" + executor = GkeCodeExecutor(namespace="test-ns", image="test-img:v1") + job = executor._create_job_manifest("test-job", "test-cm", mock_invocation_context) + + # Check top-level properties + assert isinstance(job, client.V1Job) + assert job.api_version == "batch/v1" + assert job.kind == "Job" + assert job.metadata.name == "test-job" + assert job.spec.backoff_limit == 0 + assert job.spec.ttl_seconds_after_finished == 600 + + # Check pod template properties + pod_spec = job.spec.template.spec + assert pod_spec.restart_policy == "Never" + assert pod_spec.runtime_class_name == "gvisor" + assert len(pod_spec.tolerations) == 1 + assert pod_spec.tolerations[0].value == "gvisor" + assert len(pod_spec.volumes) == 1 + assert pod_spec.volumes[0].name == "code-volume" + assert pod_spec.volumes[0].config_map.name == "test-cm" + + # Check container properties + container = pod_spec.containers[0] + assert container.name == "code-runner" + assert container.image == "test-img:v1" + assert container.command == ["python3", "/app/code.py"] + + # Check security context + sec_context = container.security_context + assert sec_context.run_as_non_root is True + assert sec_context.run_as_user == 1001 + assert sec_context.allow_privilege_escalation is False + assert sec_context.read_only_root_filesystem is True + assert sec_context.capabilities.drop == ["ALL"] \ No newline at end of file From 2242a3691a4658c85e4caf8ea87f950e2f539ee6 Mon Sep 17 00:00:00 2001 From: syangx39 Date: Fri, 25 Jul 2025 08:50:21 +0000 Subject: [PATCH 14/34] [07/25] modify pyproject.toml to add gke-specific dependency --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index e85bdaff5e..4cf61bb161 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -132,6 +132,10 @@ extensions = [ "toolbox-core>=0.1.0", # For tools.toolbox_toolset.ToolboxToolset ] +# For GKE-specific features, primarily the GkeCodeExecutor. +gke = [ + "kubernetes>=29.0.0", +] [tool.pyink] # Format py files following Google style-guide From 45bdd7c5fdbd617b8998ab49328260cdc943e3e4 Mon Sep 17 00:00:00 2001 From: syangx39 Date: Fri, 15 Aug 2025 20:35:01 +0000 Subject: [PATCH 15/34] [08/15] rename cpu_request --- src/google/adk/code_executors/gke_code_executor.py | 6 +++--- tests/unittests/code_executors/test_gke_code_executor.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/google/adk/code_executors/gke_code_executor.py b/src/google/adk/code_executors/gke_code_executor.py index 4901d2140b..2c94f3a3f7 100644 --- a/src/google/adk/code_executors/gke_code_executor.py +++ b/src/google/adk/code_executors/gke_code_executor.py @@ -35,8 +35,8 @@ class GkeCodeExecutor(BaseCodeExecutor): namespace: str = "default" image: str = "python:3.11-slim" timeout_seconds: int = 300 - cpu_request: str = "200m" - mem_request: str = "256Mi" + cpu_requested: str = "200m" + mem_requested: str = "256Mi" # The maximum CPU the container can use, in "millicores". 1000m is 1 full CPU core. cpu_limit: str = "500m" mem_limit: str = "512Mi" @@ -126,7 +126,7 @@ def _create_job_manifest(self, job_name: str, configmap_name: str, invocation_co ), # Set resource limits to prevent abuse. resources=client.V1ResourceRequirements( - requests={"cpu": self.cpu_request, "memory": self.mem_request}, + requests={"cpu": self.cpu_requested, "memory": self.mem_requested}, limits={"cpu": self.cpu_limit, "memory": self.mem_limit}, ), ) diff --git a/tests/unittests/code_executors/test_gke_code_executor.py b/tests/unittests/code_executors/test_gke_code_executor.py index 549ec1398d..6be0ea4f19 100644 --- a/tests/unittests/code_executors/test_gke_code_executor.py +++ b/tests/unittests/code_executors/test_gke_code_executor.py @@ -65,7 +65,7 @@ def test_init_defaults(self): assert executor.namespace == "default" assert executor.image == "python:3.11-slim" assert executor.timeout_seconds == 300 - assert executor.cpu_request == "200m" + assert executor.cpu_requested == "200m" assert executor.mem_limit == "512Mi" def test_init_with_overrides(self): From d0b9397d391638b3b5d5d7c3c412e0324c7f117e Mon Sep 17 00:00:00 2001 From: syangx39 Date: Fri, 15 Aug 2025 21:40:50 +0000 Subject: [PATCH 16/34] [08/15] rename cpu_request --- .../adk/code_executors/gke_code_executor.py | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/google/adk/code_executors/gke_code_executor.py b/src/google/adk/code_executors/gke_code_executor.py index 2c94f3a3f7..8deffdfb16 100644 --- a/src/google/adk/code_executors/gke_code_executor.py +++ b/src/google/adk/code_executors/gke_code_executor.py @@ -26,11 +26,23 @@ class GkeCodeExecutor(BaseCodeExecutor): - Efficient, event-driven waiting using the Kubernetes watch API. RBAC Permissions: - This executor interacts with the Kubernetes API and requires a ServiceAccount - with specific RBAC permissions to function. The agent's pod needs permissions - to create/watch Jobs, create/delete ConfigMaps, and list Pods to read logs. - For a complete, working example of the required Role and RoleBinding, see the - file at: contributing/samples/gke_agent_sandbox/deployment_rbac.yaml + This executor requires a ServiceAccount with specific RBAC permissions. The + Role granted to the ServiceAccount must include rules to manage Jobs, + ConfigMaps, and Pod logs. Below is a minimal set of required permissions: + + rules: + # For creating/deleting code ConfigMaps and patching ownerReferences + - apiGroups: [""] # Core API Group + resources: ["configmaps"] + verbs: ["create", "delete", "get", "patch"] + # For watching Job completion status + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["get", "list", "watch", "create", "delete"] + # For retrieving logs from the completed Job's Pod + - apiGroups: [""] # Core API Group + resources: ["pods", "pods/log"] + verbs: ["get", "list"] """ namespace: str = "default" image: str = "python:3.11-slim" From 8748117b44eb3cf5d956c8dc048d00950c4b8496 Mon Sep 17 00:00:00 2001 From: syangx39 Date: Fri, 15 Aug 2025 21:53:04 +0000 Subject: [PATCH 17/34] [08/15] rename cpu_request --- tests/unittests/code_executors/test_gke_code_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unittests/code_executors/test_gke_code_executor.py b/tests/unittests/code_executors/test_gke_code_executor.py index 6be0ea4f19..9738a1b950 100644 --- a/tests/unittests/code_executors/test_gke_code_executor.py +++ b/tests/unittests/code_executors/test_gke_code_executor.py @@ -206,4 +206,4 @@ def test_create_job_manifest_structure(self, mock_invocation_context): assert sec_context.run_as_user == 1001 assert sec_context.allow_privilege_escalation is False assert sec_context.read_only_root_filesystem is True - assert sec_context.capabilities.drop == ["ALL"] \ No newline at end of file + assert sec_context.capabilities.drop == ["ALL"] From 5d851e065c053e7b32bf4c0077b477565b57485c Mon Sep 17 00:00:00 2001 From: syangx39 Date: Mon, 25 Aug 2025 18:56:25 +0000 Subject: [PATCH 18/34] [08/25] Modify ADK --- src/google/adk/code_executors/__init__.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/src/google/adk/code_executors/__init__.py b/src/google/adk/code_executors/__init__.py index a467a7dfcf..1c792a6ebd 100644 --- a/src/google/adk/code_executors/__init__.py +++ b/src/google/adk/code_executors/__init__.py @@ -30,6 +30,7 @@ 'ContainerCodeExecutor', ] + def __getattr__(name: str): if name == 'VertexAiCodeExecutor': try: @@ -51,17 +52,4 @@ def __getattr__(name: str): 'ContainerCodeExecutor requires additional dependencies. ' 'Please install with: pip install "google-adk[extensions]"' ) from e - - raise AttributeError(f"module '{__name__}' has no attribute '{name}'") - - -try: - from .gke_code_executor import GkeCodeExecutor - - __all__.append('GkeCodeExecutor') -except ImportError: - logger.debug( - 'The kubernetes sdk is not installed. If you want to use the GKE Code' - ' Executor with agents, please install it. If not, you can ignore this' - ' warning.' - ) \ No newline at end of file + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") \ No newline at end of file From 51b94a182337126ae0bf2df336aa97b7652b7d00 Mon Sep 17 00:00:00 2001 From: syangx39 Date: Mon, 25 Aug 2025 18:57:51 +0000 Subject: [PATCH 19/34] [08/25] Modify ADK --- src/google/adk/code_executors/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/google/adk/code_executors/__init__.py b/src/google/adk/code_executors/__init__.py index 1c792a6ebd..12b6d870ad 100644 --- a/src/google/adk/code_executors/__init__.py +++ b/src/google/adk/code_executors/__init__.py @@ -52,4 +52,4 @@ def __getattr__(name: str): 'ContainerCodeExecutor requires additional dependencies. ' 'Please install with: pip install "google-adk[extensions]"' ) from e - raise AttributeError(f"module '{__name__}' has no attribute '{name}'") \ No newline at end of file + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") From b429edb2cdf20b2b2143bd8428fd6068b6b6b8e2 Mon Sep 17 00:00:00 2001 From: syangx39 Date: Mon, 25 Aug 2025 19:03:15 +0000 Subject: [PATCH 20/34] [08/25] Modify ADK --- .../code_execution/{gke_sandbox.agent.py => gke_sandbox_agent.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename contributing/samples/code_execution/{gke_sandbox.agent.py => gke_sandbox_agent.py} (100%) diff --git a/contributing/samples/code_execution/gke_sandbox.agent.py b/contributing/samples/code_execution/gke_sandbox_agent.py similarity index 100% rename from contributing/samples/code_execution/gke_sandbox.agent.py rename to contributing/samples/code_execution/gke_sandbox_agent.py From 80c17ed4e170db2e99c6403178e429f1f1d40294 Mon Sep 17 00:00:00 2001 From: syangx39 Date: Mon, 25 Aug 2025 19:35:46 +0000 Subject: [PATCH 21/34] [08/25] Modify ADK --- pyproject.toml | 6 +----- src/google/adk/code_executors/__init__.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index dd7f714dce..f5b5fcc454 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -133,16 +133,12 @@ extensions = [ "docker>=7.0.0", # For ContainerCodeExecutor "langgraph>=0.2.60", # For LangGraphAgent "litellm>=1.75.5", # For LiteLlm class. Currently has OpenAI limitations. TODO: once LiteLlm fix it + "kubernetes>=29.0.0", # For GkeCodeExecutor "llama-index-readers-file>=0.4.0", # For retrieval using LlamaIndex. "lxml>=5.3.0", # For load_web_page tool. "toolbox-core>=0.1.0", # For tools.toolbox_toolset.ToolboxToolset ] -# For GKE-specific features, primarily the GkeCodeExecutor. -gke = [ - "kubernetes>=29.0.0", -] - [tool.pyink] # Format py files following Google style-guide line-length = 80 diff --git a/src/google/adk/code_executors/__init__.py b/src/google/adk/code_executors/__init__.py index 12b6d870ad..41d406f63d 100644 --- a/src/google/adk/code_executors/__init__.py +++ b/src/google/adk/code_executors/__init__.py @@ -28,6 +28,7 @@ 'UnsafeLocalCodeExecutor', 'VertexAiCodeExecutor', 'ContainerCodeExecutor', + 'GkeCodeExecutor', ] @@ -52,4 +53,14 @@ def __getattr__(name: str): 'ContainerCodeExecutor requires additional dependencies. ' 'Please install with: pip install "google-adk[extensions]"' ) from e + elif name == 'GkeCodeExecutor': + try: + from .gke_code_executor import GkeCodeExecutor + + return GkeCodeExecutor + except ImportError as e: + raise ImportError( + 'GkeCodeExecutor requires additional dependencies. ' + 'Please install with: pip install "google-adk[extensions]"' + ) from e raise AttributeError(f"module '{__name__}' has no attribute '{name}'") From 6c1dba9ac4302220f3c7e9b1db2c95e8cc3cfabf Mon Sep 17 00:00:00 2001 From: syangx39 Date: Mon, 25 Aug 2025 19:36:38 +0000 Subject: [PATCH 22/34] [08/25] Modify ADK --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index f5b5fcc454..a871ada582 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -139,6 +139,7 @@ extensions = [ "toolbox-core>=0.1.0", # For tools.toolbox_toolset.ToolboxToolset ] + [tool.pyink] # Format py files following Google style-guide line-length = 80 From ef8aef936ca11afa6d8d1031a4fa7da0d339c8a9 Mon Sep 17 00:00:00 2001 From: syangx39 Date: Mon, 25 Aug 2025 20:03:02 +0000 Subject: [PATCH 23/34] [08/25] Modify ADK --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index a871ada582..277dd810f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,6 +104,7 @@ test = [ "langchain-community>=0.3.17", "langgraph>=0.2.60, <= 0.4.10", # For LangGraphAgent "litellm>=1.75.5, <2.0.0", # For LiteLLM tests + "kubernetes>=29.0.0", # For GkeCodeExecutor "llama-index-readers-file>=0.4.0", # For retrieval tests "openai>=1.100.2", # For LiteLLM "pytest-asyncio>=0.25.0", From d1a8d112d7ddbe1586ecdfdcbefa7798f921a035 Mon Sep 17 00:00:00 2001 From: Summer Date: Thu, 28 Aug 2025 15:06:55 -0700 Subject: [PATCH 24/34] [08/28] GKE cli fix --- .../gke_agent_sandbox/deployment_rbac.yaml | 50 --- src/google/adk/cli/cli_deploy.py | 230 ++++--------- src/google/adk/cli/cli_tools_click.py | 68 +--- src/google/adk/code_executors/__init__.py | 11 - .../adk/code_executors/gke_code_executor.py | 265 --------------- tests/unittests/cli/test_fast_api.py | 14 +- tests/unittests/cli/utils/test_cli_deploy.py | 321 +++++++++++++++++- .../code_executors/test_gke_code_executor.py | 209 ------------ 8 files changed, 397 insertions(+), 771 deletions(-) delete mode 100644 contributing/samples/gke_agent_sandbox/deployment_rbac.yaml delete mode 100644 src/google/adk/code_executors/gke_code_executor.py delete mode 100644 tests/unittests/code_executors/test_gke_code_executor.py diff --git a/contributing/samples/gke_agent_sandbox/deployment_rbac.yaml b/contributing/samples/gke_agent_sandbox/deployment_rbac.yaml deleted file mode 100644 index 16572276d1..0000000000 --- a/contributing/samples/gke_agent_sandbox/deployment_rbac.yaml +++ /dev/null @@ -1,50 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: agent-sandbox ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: adk-agent-sa - namespace: agent-sandbox ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: adk-agent-role - namespace: agent-sandbox -rules: -- apiGroups: ["batch"] - resources: ["jobs"] - # create: Needed for _batch_v1.create_namespaced_job(). - # watch: Needed for watch.stream(self._batch_v1.list_namespaced_job, ...) to wait for completion - # list/get: Required for the watch to initialize and to get job details. - verbs: ["create", "get", "watch", "list", "delete"] -- apiGroups: [""] - resources: ["configmaps"] - # create: Needed mount the agent's code into the Job's Pod. - # delete: Needed for cleanup in the finally block - verbs: ["create", "get", "list", "delete"] -- apiGroups: [""] - resources: ["pods"] - # list: Needed to find the correct Pod _core_v1.list_namespaced_pod(label_selector=...) - verbs: ["get", "list", "delete"] -- apiGroups: [""] - # get: Needed for _core_v1.read_namespaced_pod_log() to get the code execution results and logs. - resources: ["pods/log"] - verbs: ["get", "list"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: adk-agent-binding - namespace: agent-sandbox -subjects: -- kind: ServiceAccount - name: adk-agent-sa - namespace: agent-sandbox -roleRef: - kind: Role - name: adk-agent-role - apiGroup: rbac.authorization.k8s.io diff --git a/src/google/adk/cli/cli_deploy.py b/src/google/adk/cli/cli_deploy.py index d26b3c9660..7ddb4f6c9b 100644 --- a/src/google/adk/cli/cli_deploy.py +++ b/src/google/adk/cli/cli_deploy.py @@ -13,23 +13,25 @@ # limitations under the License. from __future__ import annotations -import json import os +from pathlib import Path import shutil import subprocess -from typing import Final from typing import Optional import click from packaging.version import parse -_DOCKERFILE_TEMPLATE: Final[str] = """ +_DOCKERFILE_TEMPLATE = """ FROM python:3.11-slim WORKDIR /app # Create a non-root user RUN adduser --disabled-password --gecos "" myuser +# Change ownership of /app to myuser +RUN chown -R myuser:myuser /app + # Switch to the non-root user USER myuser @@ -43,7 +45,7 @@ # Set up environment variables - End # Install ADK - Start -RUN pip install google-adk=={adk_version} +{adk_install_instructions} # Install ADK - End # Copy agent - Start @@ -51,18 +53,16 @@ # Set permission COPY --chown=myuser:myuser "agents/{app_name}/" "/app/agents/{app_name}/" -# Copy agent - End - -# Install Agent Deps - Start {install_agent_deps} -# Install Agent Deps - End + +# Copy agent - End EXPOSE {port} CMD adk {command} --port={port} {host_option} {service_option} {trace_to_cloud_option} {allow_origins_option} {a2a_option} "/app/agents" """ -_AGENT_ENGINE_APP_TEMPLATE: Final[str] = """ +_AGENT_ENGINE_APP_TEMPLATE = """ from vertexai.preview.reasoning_engines import AdkApp if {is_config_agent}: @@ -98,52 +98,6 @@ def _resolve_project(project_in_option: Optional[str]) -> str: return project -def _validate_gcloud_extra_args( - extra_gcloud_args: Optional[tuple[str, ...]], adk_managed_args: set[str] -) -> None: - """Validates that extra gcloud args don't conflict with ADK-managed args. - - This function dynamically checks for conflicts based on the actual args - that ADK will set, rather than using a hardcoded list. - - Args: - extra_gcloud_args: User-provided extra arguments for gcloud. - adk_managed_args: Set of argument names that ADK will set automatically. - Should include '--' prefix (e.g., '--project'). - - Raises: - click.ClickException: If any conflicts are found. - """ - if not extra_gcloud_args: - return - - # Parse user arguments into a set of argument names for faster lookup - user_arg_names = set() - for arg in extra_gcloud_args: - if arg.startswith('--'): - # Handle both '--arg=value' and '--arg value' formats - arg_name = arg.split('=')[0] - user_arg_names.add(arg_name) - - # Check for conflicts with ADK-managed args - conflicts = user_arg_names.intersection(adk_managed_args) - - if conflicts: - conflict_list = ', '.join(f"'{arg}'" for arg in sorted(conflicts)) - if len(conflicts) == 1: - raise click.ClickException( - f"The argument {conflict_list} conflicts with ADK's automatic" - ' configuration. ADK will set this argument automatically, so please' - ' remove it from your command.' - ) - else: - raise click.ClickException( - f"The arguments {conflict_list} conflict with ADK's automatic" - ' configuration. ADK will set these arguments automatically, so' - ' please remove them from your command.' - ) - - def _get_service_option_by_adk_version( adk_version: str, session_uri: Optional[str], @@ -190,7 +144,6 @@ def to_cloud_run( artifact_service_uri: Optional[str] = None, memory_service_uri: Optional[str] = None, a2a: bool = False, - extra_gcloud_args: Optional[tuple[str, ...]] = None, ): """Deploys an agent to Google Cloud Run. @@ -242,10 +195,15 @@ def to_cloud_run( install_agent_deps = ( f'RUN pip install -r "/app/agents/{app_name}/requirements.txt"' if os.path.exists(requirements_txt_path) - else '# No requirements.txt found.' + else '' ) click.echo('Copying agent source code completed.') + adk_install_instructions = ( + '# Install ADK from PyPI\n' + f'RUN pip install "google-adk[extensions]=={adk_version}"' + ) + # create Dockerfile click.echo('Creating Dockerfile...') host_option = '--host=0.0.0.0' if adk_version > '0.5.0' else '' @@ -268,7 +226,7 @@ def to_cloud_run( ), trace_to_cloud_option='--trace_to_cloud' if trace_to_cloud else '', allow_origins_option=allow_origins_option, - adk_version=adk_version, + adk_install_instructions=adk_install_instructions, host_option=host_option, a2a_option=a2a_option, ) @@ -284,56 +242,26 @@ def to_cloud_run( click.echo('Deploying to Cloud Run...') region_options = ['--region', region] if region else [] project = _resolve_project(project) - - # Build the set of args that ADK will manage - adk_managed_args = {'--source', '--project', '--port', '--verbosity'} - if region: - adk_managed_args.add('--region') - - # Validate that extra gcloud args don't conflict with ADK-managed args - _validate_gcloud_extra_args(extra_gcloud_args, adk_managed_args) - - # Build the command with extra gcloud args - gcloud_cmd = [ - 'gcloud', - 'run', - 'deploy', - service_name, - '--source', - temp_folder, - '--project', - project, - *region_options, - '--port', - str(port), - '--verbosity', - log_level.lower() if log_level else verbosity, - ] - - # Handle labels specially - merge user labels with ADK label - user_labels = [] - extra_args_without_labels = [] - - if extra_gcloud_args: - for arg in extra_gcloud_args: - if arg.startswith('--labels='): - # Extract user-provided labels - user_labels_value = arg[9:] # Remove '--labels=' prefix - user_labels.append(user_labels_value) - else: - extra_args_without_labels.append(arg) - - # Combine ADK label with user labels - all_labels = ['created-by=adk'] - all_labels.extend(user_labels) - labels_arg = ','.join(all_labels) - - gcloud_cmd.extend(['--labels', labels_arg]) - - # Add any remaining extra passthrough args - gcloud_cmd.extend(extra_args_without_labels) - - subprocess.run(gcloud_cmd, check=True) + subprocess.run( + [ + 'gcloud', + 'run', + 'deploy', + service_name, + '--source', + temp_folder, + '--project', + project, + *region_options, + '--port', + str(port), + '--verbosity', + log_level.lower() if log_level else verbosity, + '--labels', + 'created-by=adk', + ], + check=True, + ) finally: click.echo(f'Cleaning up the temp folder: {temp_folder}') shutil.rmtree(temp_folder) @@ -354,7 +282,6 @@ def to_agent_engine( description: Optional[str] = None, requirements_file: Optional[str] = None, env_file: Optional[str] = None, - agent_engine_config_file: Optional[str] = None, ): """Deploys an agent to Vertex AI Agent Engine. @@ -404,9 +331,6 @@ def to_agent_engine( variables. If not specified, the `.env` file in the `agent_folder` will be used. The values of `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION` will be overridden by `project` and `region` if they are specified. - agent_engine_config_file (str): The filepath to the agent engine config file - to use. If not specified, the `.agent_engine_config.json` file in the - `agent_folder` will be used. """ app_name = os.path.basename(agent_folder) agent_src_path = os.path.join(temp_folder, app_name) @@ -437,34 +361,6 @@ def to_agent_engine( project = _resolve_project(project) click.echo('Resolving files and dependencies...') - agent_config = {} - if not agent_engine_config_file: - # Attempt to read the agent engine config from .agent_engine_config.json in the dir (if any). - agent_engine_config_file = os.path.join( - agent_folder, '.agent_engine_config.json' - ) - if os.path.exists(agent_engine_config_file): - click.echo(f'Reading agent engine config from {agent_engine_config_file}') - with open(agent_engine_config_file, 'r') as f: - agent_config = json.load(f) - if display_name: - if 'display_name' in agent_config: - click.echo( - 'Overriding display_name in agent engine config with' - f' {display_name}' - ) - agent_config['display_name'] = display_name - if description: - if 'description' in agent_config: - click.echo( - f'Overriding description in agent engine config with {description}' - ) - agent_config['description'] = description - if agent_config.get('extra_packages'): - agent_config['extra_packages'].append(temp_folder) - else: - agent_config['extra_packages'] = [temp_folder] - if not requirements_file: # Attempt to read requirements from requirements.txt in the dir (if any). requirements_txt_path = os.path.join(agent_src_path, 'requirements.txt') @@ -473,18 +369,7 @@ def to_agent_engine( with open(requirements_txt_path, 'w', encoding='utf-8') as f: f.write('google-cloud-aiplatform[adk,agent_engines]') click.echo(f'Created {requirements_txt_path}') - agent_config['requirements'] = agent_config.get( - 'requirements', - requirements_txt_path, - ) - else: - if 'requirements' in agent_config: - click.echo( - 'Overriding requirements in agent engine config with ' - f'{requirements_file}' - ) - agent_config['requirements'] = requirements_file - + requirements_file = requirements_txt_path env_vars = None if not env_file: # Attempt to read the env variables from .env in the dir (if any). @@ -518,14 +403,6 @@ def to_agent_engine( else: region = env_region click.echo(f'{region=} set by GOOGLE_CLOUD_LOCATION in {env_file}') - if env_vars: - if 'env_vars' in agent_config: - click.echo( - f'Overriding env_vars in agent engine config with {env_vars}' - ) - agent_config['env_vars'] = env_vars - # Set env_vars in agent_config to None if it is not set. - agent_config['env_vars'] = agent_config.get('env_vars', env_vars) vertexai.init( project=project, @@ -537,7 +414,7 @@ def to_agent_engine( is_config_agent = False config_root_agent_file = os.path.join(agent_src_path, 'root_agent.yaml') if os.path.exists(config_root_agent_file): - click.echo(f'Config agent detected: {config_root_agent_file}') + click.echo('Config agent detected.') is_config_agent = True adk_app_file = os.path.join(temp_folder, f'{adk_app}.py') @@ -570,7 +447,7 @@ def to_agent_engine( click.echo(f'The following exception was raised: {e}') click.echo('Deploying to agent engine...') - agent_config['agent_engine'] = agent_engines.ModuleAgent( + agent_engine = agent_engines.ModuleAgent( module_name=adk_app, agent_name='adk_app', register_operations={ @@ -592,6 +469,14 @@ def to_agent_engine( sys_paths=[temp_folder[1:]], agent_framework='google-adk', ) + agent_config = dict( + agent_engine=agent_engine, + requirements=requirements_file, + display_name=display_name, + description=description, + env_vars=env_vars, + extra_packages=[temp_folder], + ) if not agent_engine_id: agent_engines.create(**agent_config) @@ -622,6 +507,7 @@ def to_gke( artifact_service_uri: Optional[str] = None, memory_service_uri: Optional[str] = None, a2a: bool = False, + editable: bool = False, ): """Deploys an agent to Google Kubernetes Engine(GKE). @@ -680,6 +566,23 @@ def to_gke( ) click.secho('āœ… Environment prepared.', fg='green') + adk_install_instructions = ( + '# Install ADK from PyPI\n' + f'RUN pip install "google-adk[extensions]=={adk_version}"' + ) + if editable: + click.echo(' - Preparing local ADK source for editable install...') + # Find the project root to include pyproject.toml + adk_source_path = Path(__file__).resolve().parents[4] + temp_adk_source_dest = os.path.join(temp_folder, 'adk_local_src') + shutil.copytree(adk_source_path, temp_adk_source_dest) + adk_install_instructions = ( + '# Install ADK from local source with extensions\n' + 'COPY --chown=myuser:myuser adk_local_src/ /app/adk_local_src/\n' + 'RUN pip install --editable "/app/adk_local_src/[extensions]"' + ) + click.secho('āœ… Local ADK source prepared.', fg='green') + allow_origins_option = ( f'--allow_origins={",".join(allow_origins)}' if allow_origins else '' ) @@ -703,7 +606,7 @@ def to_gke( ), trace_to_cloud_option='--trace_to_cloud' if trace_to_cloud else '', allow_origins_option=allow_origins_option, - adk_version=adk_version, + adk_install_instructions=adk_install_instructions, host_option=host_option, a2a_option='--a2a' if a2a else '', ) @@ -766,6 +669,7 @@ def to_gke( app.kubernetes.io/instance: {service_name} app.kubernetes.io/managed-by: adk-cli spec: + serviceAccountName: adk-code-executor-sa containers: - name: {service_name} image: {image_name} diff --git a/src/google/adk/cli/cli_tools_click.py b/src/google/adk/cli/cli_tools_click.py index c45fdd37ea..47d755baa8 100644 --- a/src/google/adk/cli/cli_tools_click.py +++ b/src/google/adk/cli/cli_tools_click.py @@ -858,13 +858,7 @@ def cli_api_server( server.run() -@deploy.command( - "cloud_run", - context_settings={ - "allow_extra_args": True, - "allow_interspersed_args": False, - }, -) +@deploy.command("cloud_run") @click.option( "--project", type=str, @@ -977,9 +971,7 @@ def cli_api_server( # TODO: Add eval_storage_uri option back when evals are supported in Cloud Run. @adk_services_options() @deprecated_adk_services_options() -@click.pass_context def cli_deploy_cloud_run( - ctx, agent: str, project: Optional[str], region: Optional[str], @@ -1004,13 +996,9 @@ def cli_deploy_cloud_run( AGENT: The path to the agent source code folder. - Use '--' to separate gcloud arguments from adk arguments. - - Examples: + Example: adk deploy cloud_run --project=[project] --region=[region] path/to/my_agent - - adk deploy cloud_run --project=[project] --region=[region] path/to/my_agent -- --no-allow-unauthenticated --min-instances=2 """ if verbosity: click.secho( @@ -1022,36 +1010,6 @@ def cli_deploy_cloud_run( session_service_uri = session_service_uri or session_db_url artifact_service_uri = artifact_service_uri or artifact_storage_uri - - # Parse arguments to separate gcloud args (after --) from regular args - gcloud_args = [] - if "--" in ctx.args: - separator_index = ctx.args.index("--") - gcloud_args = ctx.args[separator_index + 1 :] - regular_args = ctx.args[:separator_index] - - # If there are regular args before --, that's an error - if regular_args: - click.secho( - "Error: Unexpected arguments after agent path and before '--':" - f" {' '.join(regular_args)}. \nOnly arguments after '--' are passed" - " to gcloud.", - fg="red", - err=True, - ) - ctx.exit(2) - else: - # No -- separator, treat all args as an error to enforce the new behavior - if ctx.args: - click.secho( - f"Error: Unexpected arguments: {' '.join(ctx.args)}. \nUse '--' to" - " separate gcloud arguments, e.g.: adk deploy cloud_run [options]" - " agent_path -- --min-instances=2", - fg="red", - err=True, - ) - ctx.exit(2) - try: cli_deploy.to_cloud_run( agent_folder=agent, @@ -1071,7 +1029,6 @@ def cli_deploy_cloud_run( artifact_service_uri=artifact_service_uri, memory_service_uri=memory_service_uri, a2a=a2a, - extra_gcloud_args=tuple(gcloud_args), ) except Exception as e: click.secho(f"Deploy failed: {e}", fg="red", err=True) @@ -1184,17 +1141,6 @@ def cli_deploy_cloud_run( " NOTE: This flag is temporary and will be removed in the future." ), ) -@click.option( - "--agent_engine_config_file", - type=str, - default="", - help=( - "Optional. The filepath to the `.agent_engine_config.json` file to use." - " The values in this file will be overriden by the values set by other" - " flags. (default: the `.agent_engine_config.json` file in the `agent`" - " directory, if any.)" - ), -) @click.argument( "agent", type=click.Path( @@ -1215,7 +1161,6 @@ def cli_deploy_agent_engine( env_file: str, requirements_file: str, absolutize_imports: bool, - agent_engine_config_file: str, ): """Deploys an agent to Agent Engine. @@ -1239,7 +1184,6 @@ def cli_deploy_agent_engine( env_file=env_file, requirements_file=requirements_file, absolutize_imports=absolutize_imports, - agent_engine_config_file=agent_engine_config_file, ) except Exception as e: click.secho(f"Deploy failed: {e}", fg="red", err=True) @@ -1276,6 +1220,12 @@ def cli_deploy_agent_engine( " 'adk-default-service-name')." ), ) +@click.option( + '--editable', + is_flag=True, + default=False, + help='Build the container using the local ADK source code.' +) @click.option( "--app_name", type=str, @@ -1357,6 +1307,7 @@ def cli_deploy_gke( with_ui: bool, adk_version: str, log_level: Optional[str] = None, + editable: bool = False, session_service_uri: Optional[str] = None, artifact_service_uri: Optional[str] = None, memory_service_uri: Optional[str] = None, @@ -1383,6 +1334,7 @@ def cli_deploy_gke( with_ui=with_ui, log_level=log_level, adk_version=adk_version, + editable=editable, session_service_uri=session_service_uri, artifact_service_uri=artifact_service_uri, memory_service_uri=memory_service_uri, diff --git a/src/google/adk/code_executors/__init__.py b/src/google/adk/code_executors/__init__.py index 41d406f63d..e6f5ce6387 100644 --- a/src/google/adk/code_executors/__init__.py +++ b/src/google/adk/code_executors/__init__.py @@ -28,7 +28,6 @@ 'UnsafeLocalCodeExecutor', 'VertexAiCodeExecutor', 'ContainerCodeExecutor', - 'GkeCodeExecutor', ] @@ -43,16 +42,6 @@ def __getattr__(name: str): 'VertexAiCodeExecutor requires additional dependencies. ' 'Please install with: pip install "google-adk[extensions]"' ) from e - elif name == 'ContainerCodeExecutor': - try: - from .container_code_executor import ContainerCodeExecutor - - return ContainerCodeExecutor - except ImportError as e: - raise ImportError( - 'ContainerCodeExecutor requires additional dependencies. ' - 'Please install with: pip install "google-adk[extensions]"' - ) from e elif name == 'GkeCodeExecutor': try: from .gke_code_executor import GkeCodeExecutor diff --git a/src/google/adk/code_executors/gke_code_executor.py b/src/google/adk/code_executors/gke_code_executor.py deleted file mode 100644 index 8deffdfb16..0000000000 --- a/src/google/adk/code_executors/gke_code_executor.py +++ /dev/null @@ -1,265 +0,0 @@ -import logging -import uuid - -from google.adk.agents.invocation_context import InvocationContext -from google.adk.code_executors.base_code_executor import BaseCodeExecutor -from google.adk.code_executors.code_execution_utils import CodeExecutionInput, CodeExecutionResult - -from kubernetes import client, config -from kubernetes.client.rest import ApiException -from kubernetes.watch import Watch - -logger = logging.getLogger(__name__) - -class GkeCodeExecutor(BaseCodeExecutor): - """Executes Python code in a secure gVisor-sandboxed Pod on GKE. - - This executor securely runs code by dynamically creating a Kubernetes Job for - each execution request. The user's code is mounted via a ConfigMap, and the - Pod is hardened with a strict security context and resource limits. - - Key Features: - - Sandboxed execution using the gVisor runtime. - - Ephemeral, per-execution environments using Kubernetes Jobs. - - Secure-by-default Pod configuration (non-root, no privileges). - - Automatic garbage collection of completed Jobs and Pods via TTL. - - Efficient, event-driven waiting using the Kubernetes watch API. - - RBAC Permissions: - This executor requires a ServiceAccount with specific RBAC permissions. The - Role granted to the ServiceAccount must include rules to manage Jobs, - ConfigMaps, and Pod logs. Below is a minimal set of required permissions: - - rules: - # For creating/deleting code ConfigMaps and patching ownerReferences - - apiGroups: [""] # Core API Group - resources: ["configmaps"] - verbs: ["create", "delete", "get", "patch"] - # For watching Job completion status - - apiGroups: ["batch"] - resources: ["jobs"] - verbs: ["get", "list", "watch", "create", "delete"] - # For retrieving logs from the completed Job's Pod - - apiGroups: [""] # Core API Group - resources: ["pods", "pods/log"] - verbs: ["get", "list"] - """ - namespace: str = "default" - image: str = "python:3.11-slim" - timeout_seconds: int = 300 - cpu_requested: str = "200m" - mem_requested: str = "256Mi" - # The maximum CPU the container can use, in "millicores". 1000m is 1 full CPU core. - cpu_limit: str = "500m" - mem_limit: str = "512Mi" - - _batch_v1: client.BatchV1Api - _core_v1: client.CoreV1Api - - def __init__(self, **data): - """Initializes the executor and the Kubernetes API clients. - - This constructor supports overriding default class attributes (like - 'namespace', 'image', etc.) by passing them as keyword arguments. It - automatically configures the Kubernetes client to work either within a - cluster (in-cluster config) or locally using a kubeconfig file. - """ - super().__init__(**data) - try: - config.load_incluster_config() - logger.info("Using in-cluster Kubernetes configuration.") - except config.ConfigException: - logger.info("In-cluster config not found. Falling back to local kubeconfig.") - config.load_kube_config() - - self._batch_v1 = client.BatchV1Api() - self._core_v1 = client.CoreV1Api() - - def execute_code( - self, - invocation_context: InvocationContext, - code_execution_input: CodeExecutionInput, - ) -> CodeExecutionResult: - """Orchestrates the secure execution of a code snippet on GKE.""" - job_name = f"adk-exec-{uuid.uuid4().hex[:10]}" - configmap_name = f"code-src-{job_name}" - - try: - # The execution process: - # 1. Create a ConfigMap to mount LLM-generated code into the Pod. - # 2. Create a Job that runs the code from the ConfigMap. - # 3. Set the Job as the ConfigMap's owner for automatic cleanup. - self._create_code_configmap(configmap_name, code_execution_input.code) - job_manifest = self._create_job_manifest(job_name, configmap_name, invocation_context) - created_job = self._batch_v1.create_namespaced_job( - body=job_manifest, namespace=self.namespace - ) - self._add_owner_reference(created_job, configmap_name) - - logger.info(f"Submitted Job '{job_name}' to namespace '{self.namespace}'.") - return self._watch_job_completion(job_name) - - except ApiException as e: - logger.error( - "A Kubernetes API error occurred during job" - f" '{job_name}': {e.reason}", - exc_info=True, - ) - return CodeExecutionResult(stderr=f"Kubernetes API error: {e.reason}") - except TimeoutError as e: - logger.error(e, exc_info=True) - logs = self._get_pod_logs(job_name) - stderr = f"Executor timed out: {e}\n\nPod Logs:\n{logs}" - return CodeExecutionResult(stderr=stderr) - except Exception as e: - logger.error( - f"An unexpected error occurred during job '{job_name}': {e}", - exc_info=True, - ) - return CodeExecutionResult(stderr=f"An unexpected executor error occurred: {e}") - - def _create_job_manifest(self, job_name: str, configmap_name: str, invocation_context: InvocationContext) -> client.V1Job: - """Creates the complete V1Job object with security best practices.""" - # Define the container that will run the code. - container = client.V1Container( - name="code-runner", - image=self.image, - command=["python3", "/app/code.py"], - volume_mounts=[ - client.V1VolumeMount(name="code-volume", mount_path="/app") - ], - # Enforce a strict security context. - security_context=client.V1SecurityContext( - run_as_non_root=True, - run_as_user=1001, - allow_privilege_escalation=False, - read_only_root_filesystem=True, - capabilities=client.V1Capabilities(drop=["ALL"]), - ), - # Set resource limits to prevent abuse. - resources=client.V1ResourceRequirements( - requests={"cpu": self.cpu_requested, "memory": self.mem_requested}, - limits={"cpu": self.cpu_limit, "memory": self.mem_limit}, - ), - ) - - # Use tolerations to request a gVisor node. - pod_spec = client.V1PodSpec( - restart_policy="Never", - containers=[container], - volumes=[ - client.V1Volume( - name="code-volume", - config_map=client.V1ConfigMapVolumeSource(name=configmap_name), - ) - ], - runtime_class_name="gvisor", # Request the gVisor runtime. - tolerations=[ - client.V1Toleration( - key="sandbox.gke.io/runtime", - operator="Equal", - value="gvisor", - effect="NoSchedule", - ) - ], - ) - - job_spec = client.V1JobSpec( - template=client.V1PodTemplateSpec(spec=pod_spec), - backoff_limit=0, # Do not retry the Job on failure. - # Kubernetes TTL controller will handle Job/Pod cleanup. - ttl_seconds_after_finished=600, # Garbage collect after 10 minutes. - ) - - # Assemble and return the final Job object. - annotations = { - "adk.agent.google.com/invocation-id": invocation_context.invocation_id - } - return client.V1Job( - api_version="batch/v1", - kind="Job", - metadata=client.V1ObjectMeta(name=job_name, annotations=annotations), - spec=job_spec, - ) - - def _watch_job_completion(self, job_name: str) -> CodeExecutionResult: - """Uses the watch API to efficiently wait for job completion.""" - watch = Watch() - try: - for event in watch.stream( - self._batch_v1.list_namespaced_job, - namespace=self.namespace, - field_selector=f"metadata.name={job_name}", - timeout_seconds=self.timeout_seconds, - ): - job = event["object"] - if job.status.succeeded: - watch.stop() - logger.info(f"Job '{job_name}' succeeded.") - logs = self._get_pod_logs(job_name) - return CodeExecutionResult(stdout=logs) - if job.status.failed: - watch.stop() - logger.error(f"Job '{job_name}' failed.") - logs = self._get_pod_logs(job_name) - return CodeExecutionResult(stderr=f"Job failed. Logs:\n{logs}") - - # If the loop finishes without returning, the watch timed out. - raise TimeoutError( - f"Job '{job_name}' did not complete within {self.timeout_seconds}s." - ) - finally: - watch.stop() - - def _get_pod_logs(self, job_name: str) -> str: - """Retrieves logs from the pod created by the specified job. - - Raises: - RuntimeError: If the pod cannot be found or logs cannot be fetched. - """ - try: - pods = self._core_v1.list_namespaced_pod( - namespace=self.namespace, label_selector=f"job-name={job_name}", limit=1 - ) - if not pods.items: - raise RuntimeError(f"Could not find Pod for Job '{job_name}' to retrieve logs.") - - pod_name = pods.items[0].metadata.name - return self._core_v1.read_namespaced_pod_log( - name=pod_name, namespace=self.namespace - ) - except ApiException as e: - raise RuntimeError(f"API error retrieving logs for job '{job_name}': {e.reason}") from e - - def _create_code_configmap(self, name: str, code: str) -> None: - """Creates a ConfigMap to hold the Python code.""" - body = client.V1ConfigMap( - metadata=client.V1ObjectMeta(name=name), data={"code.py": code} - ) - self._core_v1.create_namespaced_config_map(namespace=self.namespace, body=body) - - def _add_owner_reference(self, owner_job: client.V1Job, configmap_name: str) -> None: - """Patches the ConfigMap to be owned by the Job for auto-cleanup.""" - owner_reference = client.V1OwnerReference( - api_version=owner_job.api_version, - kind=owner_job.kind, - name=owner_job.metadata.name, - uid=owner_job.metadata.uid, - controller=True, - ) - patch_body = { - "metadata": {"ownerReferences": [owner_reference.to_dict()]} - } - - try: - self._core_v1.patch_namespaced_config_map( - name=configmap_name, - namespace=self.namespace, - body=patch_body, - ) - logger.info(f"Set Job '{owner_job.metadata.name}' as owner of ConfigMap '{configmap_name}'.") - except ApiException as e: - logger.warning( - f"Failed to set ownerReference on ConfigMap '{configmap_name}'. " - f"Manual cleanup is required. Reason: {e.reason}" - ) diff --git a/tests/unittests/cli/test_fast_api.py b/tests/unittests/cli/test_fast_api.py index 423581dfd9..f1c9e9d6ef 100755 --- a/tests/unittests/cli/test_fast_api.py +++ b/tests/unittests/cli/test_fast_api.py @@ -845,20 +845,18 @@ def verify_eval_case_result(actual_eval_case_result): assert data == [f"{info['app_name']}_test_eval_set_id_eval_result"] -def test_list_metrics_info(test_app): - """Test listing metrics info.""" - url = "/apps/test_app/metrics-info" +def test_list_eval_metrics(test_app): + """Test listing eval metrics.""" + url = "/apps/test_app/eval_metrics" response = test_app.get(url) # Verify the response assert response.status_code == 200 data = response.json() - metrics_info_key = "metricsInfo" - assert metrics_info_key in data - assert isinstance(data[metrics_info_key], list) + assert isinstance(data, list) # Add more assertions based on the expected metrics - assert len(data[metrics_info_key]) > 0 - for metric in data[metrics_info_key]: + assert len(data) > 0 + for metric in data: assert "metricName" in metric assert "description" in metric assert "metricValueInfo" in metric diff --git a/tests/unittests/cli/utils/test_cli_deploy.py b/tests/unittests/cli/utils/test_cli_deploy.py index b2a31f70f3..3ffe702038 100644 --- a/tests/unittests/cli/utils/test_cli_deploy.py +++ b/tests/unittests/cli/utils/test_cli_deploy.py @@ -22,6 +22,7 @@ import shutil import subprocess import sys +import tempfile import types from typing import Any from typing import Callable @@ -79,6 +80,14 @@ def reload_cli_deploy(): def agent_dir(tmp_path: Path) -> Callable[[bool, bool], Path]: """ Return a factory that creates a dummy agent directory tree. + + Args: + tmp_path: The temporary path fixture provided by pytest. + + Returns: + A factory function that takes two booleans: + - include_requirements: Whether to include a `requirements.txt` file. + - include_env: Whether to include a `.env` file. """ def _factory(include_requirements: bool, include_env: bool) -> Path: @@ -112,12 +121,14 @@ def mock_vertex_ai( sys.modules["vertexai"] = mock_vertexai sys.modules["vertexai.agent_engines"] = mock_agent_engines + # Also mock dotenv mock_dotenv = mock.MagicMock() mock_dotenv.dotenv_values = mock.MagicMock(return_value={"FILE_VAR": "value"}) sys.modules["dotenv"] = mock_dotenv yield mock_vertexai + # Cleanup: remove mocks from sys.modules del sys.modules["vertexai"] del sys.modules["vertexai.agent_engines"] del sys.modules["dotenv"] @@ -199,6 +210,8 @@ def test_resolve_project_from_gcloud_fails( ("1.2.0", None, "gs://a", None, " --artifact_storage_uri=gs://a"), ], ) + +# _get_service_option_by_adk_version def test_get_service_option_by_adk_version( adk_version: str, session_uri: str | None, @@ -207,13 +220,204 @@ def test_get_service_option_by_adk_version( expected: str, ) -> None: """It should return the correct service URI flags for a given ADK version.""" - actual = cli_deploy._get_service_option_by_adk_version( - adk_version=adk_version, - session_uri=session_uri, - artifact_uri=artifact_uri, - memory_uri=memory_uri, + assert ( + cli_deploy._get_service_option_by_adk_version( + adk_version=adk_version, + session_uri=session_uri, + artifact_uri=artifact_uri, + memory_uri=memory_uri, + ) + == expected + ) + + +@pytest.mark.parametrize("include_requirements", [True, False]) +@pytest.mark.parametrize("with_ui", [True, False]) +def test_to_cloud_run_happy_path( + monkeypatch: pytest.MonkeyPatch, + agent_dir: Callable[[bool, bool], Path], + tmp_path: Path, + include_requirements: bool, + with_ui: bool, +) -> None: + """ + End-to-end execution test for `to_cloud_run`. + + This test verifies that for a given configuration: + 1. The agent source files are correctly copied to a temporary build context. + 2. A valid Dockerfile is generated with the correct parameters. + 3. The `gcloud run deploy` command is constructed with the correct arguments. + """ + src_dir = agent_dir(include_requirements, False) + run_recorder = _Recorder() + + monkeypatch.setattr(subprocess, "run", run_recorder) + # Mock rmtree to prevent actual deletion during test run but record calls + rmtree_recorder = _Recorder() + monkeypatch.setattr(shutil, "rmtree", rmtree_recorder) + + # Execute the function under test + cli_deploy.to_cloud_run( + agent_folder=str(src_dir), + project="proj", + region="asia-northeast1", + service_name="svc", + app_name="agent", + temp_folder=str(tmp_path), + port=8080, + trace_to_cloud=True, + with_ui=with_ui, + log_level="info", + verbosity="info", + allow_origins=["http://localhost:3000", "https://my-app.com"], + session_service_uri="sqlite://", + artifact_service_uri="gs://bucket", + memory_service_uri="rag://", + adk_version="1.3.0", + ) + + # 1. Assert that source files were copied correctly + agent_dest_path = tmp_path / "agents" / "agent" + assert (agent_dest_path / "agent.py").is_file() + assert (agent_dest_path / "__init__.py").is_file() + assert ( + agent_dest_path / "requirements.txt" + ).is_file() == include_requirements + + # 2. Assert that the Dockerfile was generated correctly + dockerfile_path = tmp_path / "Dockerfile" + assert dockerfile_path.is_file() + dockerfile_content = dockerfile_path.read_text() + + expected_command = "web" if with_ui else "api_server" + assert f"CMD adk {expected_command} --port=8080" in dockerfile_content + assert "FROM python:3.11-slim" in dockerfile_content + assert ( + 'RUN adduser --disabled-password --gecos "" myuser' in dockerfile_content + ) + assert "USER myuser" in dockerfile_content + assert "ENV GOOGLE_CLOUD_PROJECT=proj" in dockerfile_content + assert "ENV GOOGLE_CLOUD_LOCATION=asia-northeast1" in dockerfile_content + assert 'RUN pip install "google-adk[extensions]==1.3.0"' in dockerfile_content + assert "--trace_to_cloud" in dockerfile_content + + if include_requirements: + assert ( + 'RUN pip install -r "/app/agents/agent/requirements.txt"' + in dockerfile_content + ) + else: + assert "RUN pip install -r" not in dockerfile_content + + assert ( + "--allow_origins=http://localhost:3000,https://my-app.com" + in dockerfile_content ) - assert actual.rstrip() == expected.rstrip() + + # 3. Assert that the gcloud command was constructed correctly + assert len(run_recorder.calls) == 1 + gcloud_args = run_recorder.get_last_call_args()[0] + + expected_gcloud_command = [ + "gcloud", + "run", + "deploy", + "svc", + "--source", + str(tmp_path), + "--project", + "proj", + "--region", + "asia-northeast1", + "--port", + "8080", + "--verbosity", + "info", + "--labels", + "created-by=adk", + ] + assert gcloud_args == expected_gcloud_command + + # 4. Assert cleanup was performed + assert str(rmtree_recorder.get_last_call_args()[0]) == str(tmp_path) + + +def test_to_cloud_run_cleans_temp_dir( + monkeypatch: pytest.MonkeyPatch, + agent_dir: Callable[[bool], Path], +) -> None: + """`to_cloud_run` should always delete the temporary folder on exit.""" + tmp_dir = Path(tempfile.mkdtemp()) + src_dir = agent_dir(False, False) + + deleted: Dict[str, Path] = {} + + def _fake_rmtree(path: str | Path, *a: Any, **k: Any) -> None: + deleted["path"] = Path(path) + + monkeypatch.setattr(cli_deploy.shutil, "rmtree", _fake_rmtree) + monkeypatch.setattr(subprocess, "run", _Recorder()) + + cli_deploy.to_cloud_run( + agent_folder=str(src_dir), + project="proj", + region=None, + service_name="svc", + app_name="app", + temp_folder=str(tmp_dir), + port=8080, + trace_to_cloud=False, + with_ui=False, + log_level="info", + verbosity="info", + adk_version="1.0.0", + session_service_uri=None, + artifact_service_uri=None, + memory_service_uri=None, + ) + + assert deleted["path"] == tmp_dir + + +def test_to_cloud_run_cleans_temp_dir_on_failure( + monkeypatch: pytest.MonkeyPatch, + agent_dir: Callable[[bool, bool], Path], +) -> None: + """`to_cloud_run` should always delete the temporary folder on exit, even if gcloud fails.""" + tmp_dir = Path(tempfile.mkdtemp()) + src_dir = agent_dir(False, False) + + rmtree_recorder = _Recorder() + monkeypatch.setattr(shutil, "rmtree", rmtree_recorder) + # Make the gcloud command fail + monkeypatch.setattr( + subprocess, + "run", + mock.Mock(side_effect=subprocess.CalledProcessError(1, "gcloud")), + ) + + with pytest.raises(subprocess.CalledProcessError): + cli_deploy.to_cloud_run( + agent_folder=str(src_dir), + project="proj", + region="us-central1", + service_name="svc", + app_name="app", + temp_folder=str(tmp_dir), + port=8080, + trace_to_cloud=False, + with_ui=False, + log_level="info", + verbosity="info", + adk_version="1.0.0", + session_service_uri=None, + artifact_service_uri=None, + memory_service_uri=None, + ) + + # Check that rmtree was called on the temp folder in the finally block + assert rmtree_recorder.calls, "shutil.rmtree should have been called" + assert str(rmtree_recorder.get_last_call_args()[0]) == str(tmp_dir) @pytest.mark.usefixtures("mock_vertex_ai") @@ -228,6 +432,14 @@ def test_to_agent_engine_happy_path( ) -> None: """ Tests the happy path for the `to_agent_engine` function. + + Verifies: + 1. Source files are copied. + 2. `adk_app.py` is created correctly. + 3. `requirements.txt` is handled (created if not present). + 4. `.env` file is read if present. + 5. `vertexai.init` and `agent_engines.create` are called with the correct args. + 6. Cleanup is performed. """ src_dir = agent_dir(has_reqs, has_env) temp_folder = tmp_path / "build" @@ -236,6 +448,7 @@ def test_to_agent_engine_happy_path( monkeypatch.setattr(shutil, "rmtree", rmtree_recorder) + # Execute cli_deploy.to_agent_engine( agent_folder=str(src_dir), temp_folder=str(temp_folder), @@ -248,9 +461,11 @@ def test_to_agent_engine_happy_path( description="A test agent.", ) + # 1. Verify file operations assert (temp_folder / app_name / "agent.py").is_file() assert (temp_folder / app_name / "__init__.py").is_file() + # 2. Verify adk_app.py creation adk_app_path = temp_folder / "my_adk_app.py" assert adk_app_path.is_file() content = adk_app_path.read_text() @@ -258,11 +473,14 @@ def test_to_agent_engine_happy_path( assert "adk_app = AdkApp(" in content assert "enable_tracing=True" in content + # 3. Verify requirements handling reqs_path = temp_folder / app_name / "requirements.txt" assert reqs_path.is_file() if not has_reqs: + # It should have been created with the default content assert "google-cloud-aiplatform[adk,agent_engines]" in reqs_path.read_text() + # 4. Verify Vertex AI SDK calls vertexai = sys.modules["vertexai"] vertexai.init.assert_called_once_with( project="my-gcp-project", @@ -270,6 +488,7 @@ def test_to_agent_engine_happy_path( staging_bucket="gs://my-staging-bucket", ) + # 5. Verify env var handling dotenv = sys.modules["dotenv"] if has_env: dotenv.dotenv_values.assert_called_once() @@ -278,6 +497,7 @@ def test_to_agent_engine_happy_path( dotenv.dotenv_values.assert_not_called() expected_env_vars = None + # 6. Verify agent_engines.create call vertexai.agent_engines.create.assert_called_once() create_kwargs = vertexai.agent_engines.create.call_args.kwargs assert create_kwargs["agent_engine"] == "mock-agent-engine-object" @@ -287,6 +507,7 @@ def test_to_agent_engine_happy_path( assert create_kwargs["extra_packages"] == [str(temp_folder)] assert create_kwargs["env_vars"] == expected_env_vars + # 7. Verify cleanup assert str(rmtree_recorder.get_last_call_args()[0]) == str(temp_folder) @@ -299,22 +520,40 @@ def test_to_gke_happy_path( ) -> None: """ Tests the happy path for the `to_gke` function. + + Verifies: + 1. Source files are copied and Dockerfile is created. + 2. `gcloud builds submit` is called to build the image. + 3. `deployment.yaml` is created with the correct content. + 4. `gcloud container get-credentials` and `kubectl apply` are called. + 5. Cleanup is performed. """ src_dir = agent_dir(include_requirements, False) run_recorder = _Recorder() rmtree_recorder = _Recorder() def mock_subprocess_run(*args, **kwargs): + # We still use the recorder to check which commands were called run_recorder(*args, **kwargs) + + # The command is the first positional argument, e.g., ['kubectl', 'apply', ...] command_list = args[0] + + # Check if this is the 'kubectl apply' call if command_list and command_list[0:2] == ["kubectl", "apply"]: + # If it is, return a fake process object with a .stdout attribute + # This mimics the real output from kubectl. fake_stdout = "deployment.apps/gke-svc created\nservice/gke-svc created" return types.SimpleNamespace(stdout=fake_stdout) + + # For all other subprocess.run calls (like 'gcloud builds submit'), + # we don't need a return value, so the default None is fine. return None monkeypatch.setattr(subprocess, "run", mock_subprocess_run) monkeypatch.setattr(shutil, "rmtree", rmtree_recorder) + # Execute cli_deploy.to_gke( agent_folder=str(src_dir), project="gke-proj", @@ -331,16 +570,20 @@ def mock_subprocess_run(*args, **kwargs): allow_origins=["http://localhost:3000", "https://my-app.com"], session_service_uri="sqlite:///", artifact_service_uri="gs://gke-bucket", + editable=False ) + # 1. Verify Dockerfile (basic check) dockerfile_path = tmp_path / "Dockerfile" assert dockerfile_path.is_file() dockerfile_content = dockerfile_path.read_text() assert "CMD adk web --port=9090" in dockerfile_content - assert "RUN pip install google-adk==1.2.0" in dockerfile_content + assert 'RUN pip install "google-adk[extensions]==1.2.0"' in dockerfile_content + # 2. Verify command executions by checking each recorded call assert len(run_recorder.calls) == 3, "Expected 3 subprocess calls" + # Call 1: gcloud builds submit build_args = run_recorder.calls[0][0][0] expected_build_args = [ "gcloud", @@ -354,6 +597,7 @@ def mock_subprocess_run(*args, **kwargs): ] assert build_args == expected_build_args + # Call 2: gcloud container clusters get-credentials creds_args = run_recorder.calls[1][0][0] expected_creds_args = [ "gcloud", @@ -373,10 +617,12 @@ def mock_subprocess_run(*args, **kwargs): in dockerfile_content ) + # Call 3: kubectl apply apply_args = run_recorder.calls[2][0][0] expected_apply_args = ["kubectl", "apply", "-f", str(tmp_path)] assert apply_args == expected_apply_args + # 3. Verify deployment.yaml content deployment_yaml_path = tmp_path / "deployment.yaml" assert deployment_yaml_path.is_file() yaml_content = deployment_yaml_path.read_text() @@ -391,3 +637,64 @@ def mock_subprocess_run(*args, **kwargs): # 4. Verify cleanup assert str(rmtree_recorder.get_last_call_args()[0]) == str(tmp_path) + +def test_to_gke_editable_mode( + monkeypatch: pytest.MonkeyPatch, + agent_dir: Callable[[bool, bool], Path], + tmp_path: Path, +) -> None: + """ + Tests that `to_gke` with `editable=True` generates the correct Dockerfile. + + Verifies: + 1. The local ADK source is copied (mocked). + 2. The Dockerfile contains `COPY` and `pip install --editable` commands. + 3. The Dockerfile does NOT contain the standard `pip install from pypi`. + """ + src_dir = agent_dir(False, False) + # Mock subprocess and cleanup functions + monkeypatch.setattr(subprocess, "run", lambda *a, **k: types.SimpleNamespace(stdout="")) + monkeypatch.setattr(shutil, "rmtree", lambda *a, **k: None) + + # Mock the shutil.copytree to avoid actual file operations for the ADK source + copytree_recorder = _Recorder() + # The first call will be for the agent, the second for the ADK source + original_copytree = shutil.copytree + def mock_copytree(src, dst, **kwargs): + copytree_recorder(src, dst, **kwargs) + # We still need to copy the agent for the test to proceed + if "agent" in str(src): + original_copytree(src, dst, **kwargs) + + monkeypatch.setattr(shutil, "copytree", mock_copytree) + + # Execute + cli_deploy.to_gke( + agent_folder=str(src_dir), + project="gke-proj", + region="us-east1", + cluster_name="my-gke-cluster", + service_name="gke-svc", + app_name="agent", + temp_folder=str(tmp_path), + port=9090, + trace_to_cloud=False, + with_ui=False, + log_level="debug", + adk_version="1.2.0", + editable=True, # Test the new editable path + ) + + # 1. Verify that copytree was called for the ADK source + assert len(copytree_recorder.calls) == 2 + adk_source_copy_call = copytree_recorder.calls[1][0] + assert str(adk_source_copy_call[1]) == str(tmp_path / "adk_local_src") + + # 2. Verify Dockerfile content for editable mode + dockerfile_path = tmp_path / "Dockerfile" + assert dockerfile_path.is_file() + dockerfile_content = dockerfile_path.read_text() + + assert "COPY --chown=myuser:myuser adk_local_src/ /app/adk_local_src/" in dockerfile_content + assert 'RUN pip install --editable "/app/adk_local_src/[extensions]"' in dockerfile_content + assert "RUN pip install google-adk==" not in dockerfile_content \ No newline at end of file diff --git a/tests/unittests/code_executors/test_gke_code_executor.py b/tests/unittests/code_executors/test_gke_code_executor.py deleted file mode 100644 index 9738a1b950..0000000000 --- a/tests/unittests/code_executors/test_gke_code_executor.py +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from unittest.mock import MagicMock, patch - -from google.adk.agents.invocation_context import InvocationContext -from google.adk.code_executors.code_execution_utils import CodeExecutionInput -from google.adk.code_executors.gke_code_executor import GkeCodeExecutor -from kubernetes.client.rest import ApiException -from kubernetes import client, config -import pytest - - -@pytest.fixture -def mock_invocation_context() -> InvocationContext: - """Fixture for a mock InvocationContext.""" - mock = MagicMock(spec=InvocationContext) - mock.invocation_id = "test-invocation-123" - return mock - - -@pytest.fixture(autouse=True) -def mock_k8s_config(): - """Fixture for auto-mocking Kubernetes config loading.""" - with patch("google.adk.code_executors.gke_code_executor.config") as mock_config: - # Simulate fallback from in-cluster to kubeconfig - mock_config.ConfigException = config.ConfigException - mock_config.load_incluster_config.side_effect = config.ConfigException - yield mock_config - - -@pytest.fixture -def mock_k8s_clients(): - """Fixture for mock Kubernetes API clients.""" - with patch( - "google.adk.code_executors.gke_code_executor.client" - ) as mock_client_class: - mock_batch_v1 = MagicMock(spec=client.BatchV1Api) - mock_core_v1 = MagicMock(spec=client.CoreV1Api) - mock_client_class.BatchV1Api.return_value = mock_batch_v1 - mock_client_class.CoreV1Api.return_value = mock_core_v1 - yield { - "batch_v1": mock_batch_v1, - "core_v1": mock_core_v1, - } - - -class TestGkeCodeExecutor: - """Unit tests for the GkeCodeExecutor.""" - - def test_init_defaults(self): - """Tests that the executor initializes with correct default values.""" - executor = GkeCodeExecutor() - assert executor.namespace == "default" - assert executor.image == "python:3.11-slim" - assert executor.timeout_seconds == 300 - assert executor.cpu_requested == "200m" - assert executor.mem_limit == "512Mi" - - def test_init_with_overrides(self): - """Tests that class attributes can be overridden at instantiation.""" - executor = GkeCodeExecutor( - namespace="test-ns", - image="custom-python:latest", - timeout_seconds=60, - cpu_limit="1000m", - ) - assert executor.namespace == "test-ns" - assert executor.image == "custom-python:latest" - assert executor.timeout_seconds == 60 - assert executor.cpu_limit == "1000m" - - @patch("google.adk.code_executors.gke_code_executor.Watch") - def test_execute_code_success( - self, - mock_watch, - mock_k8s_clients, - mock_invocation_context, - ): - """Tests the happy path for successful code execution.""" - # Setup Mocks - mock_job = MagicMock() - mock_job.status.succeeded = True - mock_job.status.failed = None - mock_watch.return_value.stream.return_value = [{"object": mock_job}] - - mock_pod_list = MagicMock() - mock_pod_list.items = [MagicMock()] - mock_pod_list.items[0].metadata.name = "test-pod-name" - mock_k8s_clients["core_v1"].list_namespaced_pod.return_value = mock_pod_list - mock_k8s_clients["core_v1"].read_namespaced_pod_log.return_value = "hello world" - - # Execute - executor = GkeCodeExecutor() - code_input = CodeExecutionInput(code='print("hello world")') - result = executor.execute_code(mock_invocation_context, code_input) - - # Assert - assert result.stdout == "hello world" - assert result.stderr == "" - mock_k8s_clients["core_v1"].create_namespaced_config_map.assert_called_once() - mock_k8s_clients["batch_v1"].create_namespaced_job.assert_called_once() - mock_k8s_clients["core_v1"].patch_namespaced_config_map.assert_called_once() - mock_k8s_clients["core_v1"].read_namespaced_pod_log.assert_called_once() - - @patch("google.adk.code_executors.gke_code_executor.Watch") - def test_execute_code_job_failed( - self, - mock_watch, - mock_k8s_clients, - mock_invocation_context, - ): - """Tests the path where the Kubernetes Job fails.""" - mock_job = MagicMock() - mock_job.status.succeeded = None - mock_job.status.failed = True - mock_watch.return_value.stream.return_value = [{"object": mock_job}] - mock_k8s_clients["core_v1"].read_namespaced_pod_log.return_value = ( - "Traceback...\nValueError: failure" - ) - - executor = GkeCodeExecutor() - result = executor.execute_code(mock_invocation_context, CodeExecutionInput(code="fail")) - - assert result.stdout == "" - assert "Job failed. Logs:" in result.stderr - assert "ValueError: failure" in result.stderr - - def test_execute_code_api_exception( - self, mock_k8s_clients, mock_invocation_context - ): - """Tests handling of an ApiException from the K8s client.""" - mock_k8s_clients["core_v1"].create_namespaced_config_map.side_effect = ( - ApiException(reason="Test API Error") - ) - executor = GkeCodeExecutor() - result = executor.execute_code(mock_invocation_context, CodeExecutionInput(code="...")) - - assert result.stdout == "" - assert "Kubernetes API error: Test API Error" in result.stderr - - @patch("google.adk.code_executors.gke_code_executor.Watch") - def test_execute_code_timeout( - self, - mock_watch, - mock_k8s_clients, - mock_invocation_context, - ): - """Tests the case where the job watch times out.""" - mock_watch.return_value.stream.return_value = [] # Empty stream simulates timeout - mock_k8s_clients["core_v1"].read_namespaced_pod_log.return_value = ( - "Still running..." - ) - - executor = GkeCodeExecutor(timeout_seconds=1) - result = executor.execute_code(mock_invocation_context, CodeExecutionInput(code="...")) - - assert result.stdout == "" - assert "Executor timed out" in result.stderr - assert "did not complete within 1s" in result.stderr - assert "Pod Logs:\nStill running..." in result.stderr - - def test_create_job_manifest_structure(self, mock_invocation_context): - """Tests the correctness of the generated Job manifest.""" - executor = GkeCodeExecutor(namespace="test-ns", image="test-img:v1") - job = executor._create_job_manifest("test-job", "test-cm", mock_invocation_context) - - # Check top-level properties - assert isinstance(job, client.V1Job) - assert job.api_version == "batch/v1" - assert job.kind == "Job" - assert job.metadata.name == "test-job" - assert job.spec.backoff_limit == 0 - assert job.spec.ttl_seconds_after_finished == 600 - - # Check pod template properties - pod_spec = job.spec.template.spec - assert pod_spec.restart_policy == "Never" - assert pod_spec.runtime_class_name == "gvisor" - assert len(pod_spec.tolerations) == 1 - assert pod_spec.tolerations[0].value == "gvisor" - assert len(pod_spec.volumes) == 1 - assert pod_spec.volumes[0].name == "code-volume" - assert pod_spec.volumes[0].config_map.name == "test-cm" - - # Check container properties - container = pod_spec.containers[0] - assert container.name == "code-runner" - assert container.image == "test-img:v1" - assert container.command == ["python3", "/app/code.py"] - - # Check security context - sec_context = container.security_context - assert sec_context.run_as_non_root is True - assert sec_context.run_as_user == 1001 - assert sec_context.allow_privilege_escalation is False - assert sec_context.read_only_root_filesystem is True - assert sec_context.capabilities.drop == ["ALL"] From d500ff306310ef91907ea632db6f22d22c4f112f Mon Sep 17 00:00:00 2001 From: Summer Date: Thu, 28 Aug 2025 15:09:34 -0700 Subject: [PATCH 25/34] [08/28] GKE cli fix --- .../code_execution/gke_sandbox_agent.py | 49 ------------------- pyproject.toml | 2 - src/google/adk/cli/cli_deploy.py | 43 ++++++++++++++++ 3 files changed, 43 insertions(+), 51 deletions(-) delete mode 100644 contributing/samples/code_execution/gke_sandbox_agent.py diff --git a/contributing/samples/code_execution/gke_sandbox_agent.py b/contributing/samples/code_execution/gke_sandbox_agent.py deleted file mode 100644 index 5ee64513bf..0000000000 --- a/contributing/samples/code_execution/gke_sandbox_agent.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""A Python coding agent using the GkeCodeExecutor for secure execution.""" - -from google.adk.agents import LlmAgent -from google.adk.code_executors import GkeCodeExecutor - - -def gke_agent_system_instruction(): - """Returns: The system instruction for the GKE-based coding agent.""" - return """You are a helpful and capable AI agent that can write and execute Python code to answer questions and perform tasks. - -When a user asks a question, follow these steps: -1. Analyze the request. -2. Write a complete, self-contained Python script to accomplish the task. -3. Your code will be executed in a secure, sandboxed environment. -4. Return the full and complete output from the code execution, including any text, results, or error messages.""" - - -gke_executor = GkeCodeExecutor( - # This must match the namespace in your deployment_rbac.yaml where the - # agent's ServiceAccount and Role have permissions. - namespace="agent-sandbox", - # Setting an explicit timeout prevents a stuck job from running forever. - timeout_seconds=600, -) - -root_agent = LlmAgent( - name="gke_coding_agent", - model="gemini-2.0-flash", - description=( - "A general-purpose agent that executes Python code in a secure GKE" - " Sandbox." - ), - instruction=gke_agent_system_instruction(), - code_executor=gke_executor, -) diff --git a/pyproject.toml b/pyproject.toml index 277dd810f7..e89c9656b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,7 +104,6 @@ test = [ "langchain-community>=0.3.17", "langgraph>=0.2.60, <= 0.4.10", # For LangGraphAgent "litellm>=1.75.5, <2.0.0", # For LiteLLM tests - "kubernetes>=29.0.0", # For GkeCodeExecutor "llama-index-readers-file>=0.4.0", # For retrieval tests "openai>=1.100.2", # For LiteLLM "pytest-asyncio>=0.25.0", @@ -134,7 +133,6 @@ extensions = [ "docker>=7.0.0", # For ContainerCodeExecutor "langgraph>=0.2.60", # For LangGraphAgent "litellm>=1.75.5", # For LiteLlm class. Currently has OpenAI limitations. TODO: once LiteLlm fix it - "kubernetes>=29.0.0", # For GkeCodeExecutor "llama-index-readers-file>=0.4.0", # For retrieval using LlamaIndex. "lxml>=5.3.0", # For load_web_page tool. "toolbox-core>=0.1.0", # For tools.toolbox_toolset.ToolboxToolset diff --git a/src/google/adk/cli/cli_deploy.py b/src/google/adk/cli/cli_deploy.py index 7ddb4f6c9b..1378645585 100644 --- a/src/google/adk/cli/cli_deploy.py +++ b/src/google/adk/cli/cli_deploy.py @@ -98,6 +98,49 @@ def _resolve_project(project_in_option: Optional[str]) -> str: return project +def _validate_gcloud_extra_args( + extra_gcloud_args: Optional[tuple[str, ...]], adk_managed_args: set[str] +) -> None: + """Validates that extra gcloud args don't conflict with ADK-managed args. + This function dynamically checks for conflicts based on the actual args + that ADK will set, rather than using a hardcoded list. + Args: + extra_gcloud_args: User-provided extra arguments for gcloud. + adk_managed_args: Set of argument names that ADK will set automatically. + Should include '--' prefix (e.g., '--project'). + Raises: + click.ClickException: If any conflicts are found. + """ + if not extra_gcloud_args: + return + + # Parse user arguments into a set of argument names for faster lookup + user_arg_names = set() + for arg in extra_gcloud_args: + if arg.startswith('--'): + # Handle both '--arg=value' and '--arg value' formats + arg_name = arg.split('=')[0] + user_arg_names.add(arg_name) + + # Check for conflicts with ADK-managed args + conflicts = user_arg_names.intersection(adk_managed_args) + + if conflicts: + conflict_list = ', '.join(f"'{arg}'" for arg in sorted(conflicts)) + if len(conflicts) == 1: + raise click.ClickException( + f"The argument {conflict_list} conflicts with ADK's automatic" + ' configuration. ADK will set this argument automatically, so please' + ' remove it from your command.' + ) + else: + raise click.ClickException( + f"The arguments {conflict_list} conflict with ADK's automatic" + ' configuration. ADK will set these arguments automatically, so' + ' please remove them from your command.' + ) + + def _get_service_option_by_adk_version( adk_version: str, session_uri: Optional[str], From b434fc9d1d55841e754fc190a35ef4d1f93c6ac9 Mon Sep 17 00:00:00 2001 From: Summer Date: Thu, 28 Aug 2025 15:21:31 -0700 Subject: [PATCH 26/34] [08/28] GKE cli fix --- src/google/adk/cli/cli_deploy.py | 187 ++++++++++++++-------- src/google/adk/code_executors/__init__.py | 8 +- tests/unittests/cli/test_fast_api.py | 14 +- 3 files changed, 132 insertions(+), 77 deletions(-) diff --git a/src/google/adk/cli/cli_deploy.py b/src/google/adk/cli/cli_deploy.py index 1378645585..d26b3c9660 100644 --- a/src/google/adk/cli/cli_deploy.py +++ b/src/google/adk/cli/cli_deploy.py @@ -13,25 +13,23 @@ # limitations under the License. from __future__ import annotations +import json import os -from pathlib import Path import shutil import subprocess +from typing import Final from typing import Optional import click from packaging.version import parse -_DOCKERFILE_TEMPLATE = """ +_DOCKERFILE_TEMPLATE: Final[str] = """ FROM python:3.11-slim WORKDIR /app # Create a non-root user RUN adduser --disabled-password --gecos "" myuser -# Change ownership of /app to myuser -RUN chown -R myuser:myuser /app - # Switch to the non-root user USER myuser @@ -45,7 +43,7 @@ # Set up environment variables - End # Install ADK - Start -{adk_install_instructions} +RUN pip install google-adk=={adk_version} # Install ADK - End # Copy agent - Start @@ -53,16 +51,18 @@ # Set permission COPY --chown=myuser:myuser "agents/{app_name}/" "/app/agents/{app_name}/" -{install_agent_deps} - # Copy agent - End +# Install Agent Deps - Start +{install_agent_deps} +# Install Agent Deps - End + EXPOSE {port} CMD adk {command} --port={port} {host_option} {service_option} {trace_to_cloud_option} {allow_origins_option} {a2a_option} "/app/agents" """ -_AGENT_ENGINE_APP_TEMPLATE = """ +_AGENT_ENGINE_APP_TEMPLATE: Final[str] = """ from vertexai.preview.reasoning_engines import AdkApp if {is_config_agent}: @@ -102,12 +102,15 @@ def _validate_gcloud_extra_args( extra_gcloud_args: Optional[tuple[str, ...]], adk_managed_args: set[str] ) -> None: """Validates that extra gcloud args don't conflict with ADK-managed args. + This function dynamically checks for conflicts based on the actual args that ADK will set, rather than using a hardcoded list. + Args: extra_gcloud_args: User-provided extra arguments for gcloud. adk_managed_args: Set of argument names that ADK will set automatically. Should include '--' prefix (e.g., '--project'). + Raises: click.ClickException: If any conflicts are found. """ @@ -187,6 +190,7 @@ def to_cloud_run( artifact_service_uri: Optional[str] = None, memory_service_uri: Optional[str] = None, a2a: bool = False, + extra_gcloud_args: Optional[tuple[str, ...]] = None, ): """Deploys an agent to Google Cloud Run. @@ -238,15 +242,10 @@ def to_cloud_run( install_agent_deps = ( f'RUN pip install -r "/app/agents/{app_name}/requirements.txt"' if os.path.exists(requirements_txt_path) - else '' + else '# No requirements.txt found.' ) click.echo('Copying agent source code completed.') - adk_install_instructions = ( - '# Install ADK from PyPI\n' - f'RUN pip install "google-adk[extensions]=={adk_version}"' - ) - # create Dockerfile click.echo('Creating Dockerfile...') host_option = '--host=0.0.0.0' if adk_version > '0.5.0' else '' @@ -269,7 +268,7 @@ def to_cloud_run( ), trace_to_cloud_option='--trace_to_cloud' if trace_to_cloud else '', allow_origins_option=allow_origins_option, - adk_install_instructions=adk_install_instructions, + adk_version=adk_version, host_option=host_option, a2a_option=a2a_option, ) @@ -285,26 +284,56 @@ def to_cloud_run( click.echo('Deploying to Cloud Run...') region_options = ['--region', region] if region else [] project = _resolve_project(project) - subprocess.run( - [ - 'gcloud', - 'run', - 'deploy', - service_name, - '--source', - temp_folder, - '--project', - project, - *region_options, - '--port', - str(port), - '--verbosity', - log_level.lower() if log_level else verbosity, - '--labels', - 'created-by=adk', - ], - check=True, - ) + + # Build the set of args that ADK will manage + adk_managed_args = {'--source', '--project', '--port', '--verbosity'} + if region: + adk_managed_args.add('--region') + + # Validate that extra gcloud args don't conflict with ADK-managed args + _validate_gcloud_extra_args(extra_gcloud_args, adk_managed_args) + + # Build the command with extra gcloud args + gcloud_cmd = [ + 'gcloud', + 'run', + 'deploy', + service_name, + '--source', + temp_folder, + '--project', + project, + *region_options, + '--port', + str(port), + '--verbosity', + log_level.lower() if log_level else verbosity, + ] + + # Handle labels specially - merge user labels with ADK label + user_labels = [] + extra_args_without_labels = [] + + if extra_gcloud_args: + for arg in extra_gcloud_args: + if arg.startswith('--labels='): + # Extract user-provided labels + user_labels_value = arg[9:] # Remove '--labels=' prefix + user_labels.append(user_labels_value) + else: + extra_args_without_labels.append(arg) + + # Combine ADK label with user labels + all_labels = ['created-by=adk'] + all_labels.extend(user_labels) + labels_arg = ','.join(all_labels) + + gcloud_cmd.extend(['--labels', labels_arg]) + + # Add any remaining extra passthrough args + gcloud_cmd.extend(extra_args_without_labels) + + subprocess.run(gcloud_cmd, check=True) finally: click.echo(f'Cleaning up the temp folder: {temp_folder}') shutil.rmtree(temp_folder) @@ -325,6 +354,7 @@ def to_agent_engine( description: Optional[str] = None, requirements_file: Optional[str] = None, env_file: Optional[str] = None, + agent_engine_config_file: Optional[str] = None, ): """Deploys an agent to Vertex AI Agent Engine. @@ -374,6 +404,9 @@ def to_agent_engine( variables. If not specified, the `.env` file in the `agent_folder` will be used. The values of `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION` will be overridden by `project` and `region` if they are specified. + agent_engine_config_file (str): The filepath to the agent engine config file + to use. If not specified, the `.agent_engine_config.json` file in the + `agent_folder` will be used. """ app_name = os.path.basename(agent_folder) agent_src_path = os.path.join(temp_folder, app_name) @@ -404,6 +437,34 @@ def to_agent_engine( project = _resolve_project(project) click.echo('Resolving files and dependencies...') + agent_config = {} + if not agent_engine_config_file: + # Attempt to read the agent engine config from .agent_engine_config.json in the dir (if any). + agent_engine_config_file = os.path.join( + agent_folder, '.agent_engine_config.json' + ) + if os.path.exists(agent_engine_config_file): + click.echo(f'Reading agent engine config from {agent_engine_config_file}') + with open(agent_engine_config_file, 'r') as f: + agent_config = json.load(f) + if display_name: + if 'display_name' in agent_config: + click.echo( + 'Overriding display_name in agent engine config with' + f' {display_name}' + ) + agent_config['display_name'] = display_name + if description: + if 'description' in agent_config: + click.echo( + f'Overriding description in agent engine config with {description}' + ) + agent_config['description'] = description + if agent_config.get('extra_packages'): + agent_config['extra_packages'].append(temp_folder) + else: + agent_config['extra_packages'] = [temp_folder] + if not requirements_file: # Attempt to read requirements from requirements.txt in the dir (if any). requirements_txt_path = os.path.join(agent_src_path, 'requirements.txt') @@ -412,7 +473,18 @@ def to_agent_engine( with open(requirements_txt_path, 'w', encoding='utf-8') as f: f.write('google-cloud-aiplatform[adk,agent_engines]') click.echo(f'Created {requirements_txt_path}') - requirements_file = requirements_txt_path + agent_config['requirements'] = agent_config.get( + 'requirements', + requirements_txt_path, + ) + else: + if 'requirements' in agent_config: + click.echo( + 'Overriding requirements in agent engine config with ' + f'{requirements_file}' + ) + agent_config['requirements'] = requirements_file + env_vars = None if not env_file: # Attempt to read the env variables from .env in the dir (if any). @@ -446,6 +518,14 @@ def to_agent_engine( else: region = env_region click.echo(f'{region=} set by GOOGLE_CLOUD_LOCATION in {env_file}') + if env_vars: + if 'env_vars' in agent_config: + click.echo( + f'Overriding env_vars in agent engine config with {env_vars}' + ) + agent_config['env_vars'] = env_vars + # Set env_vars in agent_config to None if it is not set. + agent_config['env_vars'] = agent_config.get('env_vars', env_vars) vertexai.init( project=project, @@ -457,7 +537,7 @@ def to_agent_engine( is_config_agent = False config_root_agent_file = os.path.join(agent_src_path, 'root_agent.yaml') if os.path.exists(config_root_agent_file): - click.echo('Config agent detected.') + click.echo(f'Config agent detected: {config_root_agent_file}') is_config_agent = True adk_app_file = os.path.join(temp_folder, f'{adk_app}.py') @@ -490,7 +570,7 @@ def to_agent_engine( click.echo(f'The following exception was raised: {e}') click.echo('Deploying to agent engine...') - agent_engine = agent_engines.ModuleAgent( + agent_config['agent_engine'] = agent_engines.ModuleAgent( module_name=adk_app, agent_name='adk_app', register_operations={ @@ -512,14 +592,6 @@ def to_agent_engine( sys_paths=[temp_folder[1:]], agent_framework='google-adk', ) - agent_config = dict( - agent_engine=agent_engine, - requirements=requirements_file, - display_name=display_name, - description=description, - env_vars=env_vars, - extra_packages=[temp_folder], - ) if not agent_engine_id: agent_engines.create(**agent_config) @@ -550,7 +622,6 @@ def to_gke( artifact_service_uri: Optional[str] = None, memory_service_uri: Optional[str] = None, a2a: bool = False, - editable: bool = False, ): """Deploys an agent to Google Kubernetes Engine(GKE). @@ -609,23 +680,6 @@ def to_gke( ) click.secho('āœ… Environment prepared.', fg='green') - adk_install_instructions = ( - '# Install ADK from PyPI\n' - f'RUN pip install "google-adk[extensions]=={adk_version}"' - ) - if editable: - click.echo(' - Preparing local ADK source for editable install...') - # Find the project root to include pyproject.toml - adk_source_path = Path(__file__).resolve().parents[4] - temp_adk_source_dest = os.path.join(temp_folder, 'adk_local_src') - shutil.copytree(adk_source_path, temp_adk_source_dest) - adk_install_instructions = ( - '# Install ADK from local source with extensions\n' - 'COPY --chown=myuser:myuser adk_local_src/ /app/adk_local_src/\n' - 'RUN pip install --editable "/app/adk_local_src/[extensions]"' - ) - click.secho('āœ… Local ADK source prepared.', fg='green') - allow_origins_option = ( f'--allow_origins={",".join(allow_origins)}' if allow_origins else '' ) @@ -649,7 +703,7 @@ def to_gke( ), trace_to_cloud_option='--trace_to_cloud' if trace_to_cloud else '', allow_origins_option=allow_origins_option, - adk_install_instructions=adk_install_instructions, + adk_version=adk_version, host_option=host_option, a2a_option='--a2a' if a2a else '', ) @@ -712,7 +766,6 @@ def to_gke( app.kubernetes.io/instance: {service_name} app.kubernetes.io/managed-by: adk-cli spec: - serviceAccountName: adk-code-executor-sa containers: - name: {service_name} image: {image_name} diff --git a/src/google/adk/code_executors/__init__.py b/src/google/adk/code_executors/__init__.py index e6f5ce6387..6b139ed469 100644 --- a/src/google/adk/code_executors/__init__.py +++ b/src/google/adk/code_executors/__init__.py @@ -42,14 +42,14 @@ def __getattr__(name: str): 'VertexAiCodeExecutor requires additional dependencies. ' 'Please install with: pip install "google-adk[extensions]"' ) from e - elif name == 'GkeCodeExecutor': + elif name == 'ContainerCodeExecutor': try: - from .gke_code_executor import GkeCodeExecutor + from .gke_code_executor import ContainerCodeExecutor - return GkeCodeExecutor + return ContainerCodeExecutor except ImportError as e: raise ImportError( - 'GkeCodeExecutor requires additional dependencies. ' + 'ContainerCodeExecutor requires additional dependencies. ' 'Please install with: pip install "google-adk[extensions]"' ) from e raise AttributeError(f"module '{__name__}' has no attribute '{name}'") diff --git a/tests/unittests/cli/test_fast_api.py b/tests/unittests/cli/test_fast_api.py index f1c9e9d6ef..423581dfd9 100755 --- a/tests/unittests/cli/test_fast_api.py +++ b/tests/unittests/cli/test_fast_api.py @@ -845,18 +845,20 @@ def verify_eval_case_result(actual_eval_case_result): assert data == [f"{info['app_name']}_test_eval_set_id_eval_result"] -def test_list_eval_metrics(test_app): - """Test listing eval metrics.""" - url = "/apps/test_app/eval_metrics" +def test_list_metrics_info(test_app): + """Test listing metrics info.""" + url = "/apps/test_app/metrics-info" response = test_app.get(url) # Verify the response assert response.status_code == 200 data = response.json() - assert isinstance(data, list) + metrics_info_key = "metricsInfo" + assert metrics_info_key in data + assert isinstance(data[metrics_info_key], list) # Add more assertions based on the expected metrics - assert len(data) > 0 - for metric in data: + assert len(data[metrics_info_key]) > 0 + for metric in data[metrics_info_key]: assert "metricName" in metric assert "description" in metric assert "metricValueInfo" in metric From 09fa7ceeb3ef09132cd6486ba756c8f3037e9310 Mon Sep 17 00:00:00 2001 From: Summer Date: Thu, 28 Aug 2025 15:24:14 -0700 Subject: [PATCH 27/34] [08/28] GKE cli fix --- src/google/adk/cli/cli_tools_click.py | 68 +++++++++++++++++++---- src/google/adk/code_executors/__init__.py | 2 +- 2 files changed, 59 insertions(+), 11 deletions(-) diff --git a/src/google/adk/cli/cli_tools_click.py b/src/google/adk/cli/cli_tools_click.py index 47d755baa8..c45fdd37ea 100644 --- a/src/google/adk/cli/cli_tools_click.py +++ b/src/google/adk/cli/cli_tools_click.py @@ -858,7 +858,13 @@ def cli_api_server( server.run() -@deploy.command("cloud_run") +@deploy.command( + "cloud_run", + context_settings={ + "allow_extra_args": True, + "allow_interspersed_args": False, + }, +) @click.option( "--project", type=str, @@ -971,7 +977,9 @@ def cli_api_server( # TODO: Add eval_storage_uri option back when evals are supported in Cloud Run. @adk_services_options() @deprecated_adk_services_options() +@click.pass_context def cli_deploy_cloud_run( + ctx, agent: str, project: Optional[str], region: Optional[str], @@ -996,9 +1004,13 @@ def cli_deploy_cloud_run( AGENT: The path to the agent source code folder. - Example: + Use '--' to separate gcloud arguments from adk arguments. + + Examples: adk deploy cloud_run --project=[project] --region=[region] path/to/my_agent + + adk deploy cloud_run --project=[project] --region=[region] path/to/my_agent -- --no-allow-unauthenticated --min-instances=2 """ if verbosity: click.secho( @@ -1010,6 +1022,36 @@ def cli_deploy_cloud_run( session_service_uri = session_service_uri or session_db_url artifact_service_uri = artifact_service_uri or artifact_storage_uri + + # Parse arguments to separate gcloud args (after --) from regular args + gcloud_args = [] + if "--" in ctx.args: + separator_index = ctx.args.index("--") + gcloud_args = ctx.args[separator_index + 1 :] + regular_args = ctx.args[:separator_index] + + # If there are regular args before --, that's an error + if regular_args: + click.secho( + "Error: Unexpected arguments after agent path and before '--':" + f" {' '.join(regular_args)}. \nOnly arguments after '--' are passed" + " to gcloud.", + fg="red", + err=True, + ) + ctx.exit(2) + else: + # No -- separator, treat all args as an error to enforce the new behavior + if ctx.args: + click.secho( + f"Error: Unexpected arguments: {' '.join(ctx.args)}. \nUse '--' to" + " separate gcloud arguments, e.g.: adk deploy cloud_run [options]" + " agent_path -- --min-instances=2", + fg="red", + err=True, + ) + ctx.exit(2) + try: cli_deploy.to_cloud_run( agent_folder=agent, @@ -1029,6 +1071,7 @@ def cli_deploy_cloud_run( artifact_service_uri=artifact_service_uri, memory_service_uri=memory_service_uri, a2a=a2a, + extra_gcloud_args=tuple(gcloud_args), ) except Exception as e: click.secho(f"Deploy failed: {e}", fg="red", err=True) @@ -1141,6 +1184,17 @@ def cli_deploy_cloud_run( " NOTE: This flag is temporary and will be removed in the future." ), ) +@click.option( + "--agent_engine_config_file", + type=str, + default="", + help=( + "Optional. The filepath to the `.agent_engine_config.json` file to use." + " The values in this file will be overriden by the values set by other" + " flags. (default: the `.agent_engine_config.json` file in the `agent`" + " directory, if any.)" + ), +) @click.argument( "agent", type=click.Path( @@ -1161,6 +1215,7 @@ def cli_deploy_agent_engine( env_file: str, requirements_file: str, absolutize_imports: bool, + agent_engine_config_file: str, ): """Deploys an agent to Agent Engine. @@ -1184,6 +1239,7 @@ def cli_deploy_agent_engine( env_file=env_file, requirements_file=requirements_file, absolutize_imports=absolutize_imports, + agent_engine_config_file=agent_engine_config_file, ) except Exception as e: click.secho(f"Deploy failed: {e}", fg="red", err=True) @@ -1220,12 +1276,6 @@ def cli_deploy_agent_engine( " 'adk-default-service-name')." ), ) -@click.option( - '--editable', - is_flag=True, - default=False, - help='Build the container using the local ADK source code.' -) @click.option( "--app_name", type=str, @@ -1307,7 +1357,6 @@ def cli_deploy_gke( with_ui: bool, adk_version: str, log_level: Optional[str] = None, - editable: bool = False, session_service_uri: Optional[str] = None, artifact_service_uri: Optional[str] = None, memory_service_uri: Optional[str] = None, @@ -1334,7 +1383,6 @@ def cli_deploy_gke( with_ui=with_ui, log_level=log_level, adk_version=adk_version, - editable=editable, session_service_uri=session_service_uri, artifact_service_uri=artifact_service_uri, memory_service_uri=memory_service_uri, diff --git a/src/google/adk/code_executors/__init__.py b/src/google/adk/code_executors/__init__.py index 6b139ed469..12b6d870ad 100644 --- a/src/google/adk/code_executors/__init__.py +++ b/src/google/adk/code_executors/__init__.py @@ -44,7 +44,7 @@ def __getattr__(name: str): ) from e elif name == 'ContainerCodeExecutor': try: - from .gke_code_executor import ContainerCodeExecutor + from .container_code_executor import ContainerCodeExecutor return ContainerCodeExecutor except ImportError as e: From 61eec69f21230b502118a8908df6aecfc9a24bb0 Mon Sep 17 00:00:00 2001 From: Summer Date: Thu, 28 Aug 2025 15:48:24 -0700 Subject: [PATCH 28/34] [08/28] GKE cli fix --- src/google/adk/cli/cli_deploy.py | 33 ++- src/google/adk/cli/cli_tools_click.py | 7 + tests/unittests/cli/utils/test_cli_deploy.py | 258 +------------------ 3 files changed, 45 insertions(+), 253 deletions(-) diff --git a/src/google/adk/cli/cli_deploy.py b/src/google/adk/cli/cli_deploy.py index d26b3c9660..5c59964334 100644 --- a/src/google/adk/cli/cli_deploy.py +++ b/src/google/adk/cli/cli_deploy.py @@ -15,6 +15,7 @@ import json import os +from pathlib import Path import shutil import subprocess from typing import Final @@ -43,7 +44,7 @@ # Set up environment variables - End # Install ADK - Start -RUN pip install google-adk=={adk_version} +{adk_install_instructions} # Install ADK - End # Copy agent - Start @@ -246,6 +247,10 @@ def to_cloud_run( ) click.echo('Copying agent source code completed.') + adk_install_instructions = ( + f'RUN pip install google-adk=={adk_version}' + ) + # create Dockerfile click.echo('Creating Dockerfile...') host_option = '--host=0.0.0.0' if adk_version > '0.5.0' else '' @@ -268,6 +273,7 @@ def to_cloud_run( ), trace_to_cloud_option='--trace_to_cloud' if trace_to_cloud else '', allow_origins_option=allow_origins_option, + adk_install_instructions=adk_install_instructions, adk_version=adk_version, host_option=host_option, a2a_option=a2a_option, @@ -622,6 +628,8 @@ def to_gke( artifact_service_uri: Optional[str] = None, memory_service_uri: Optional[str] = None, a2a: bool = False, + editable: bool = False, + service_account_name: Optional[str] = None, ): """Deploys an agent to Google Kubernetes Engine(GKE). @@ -645,6 +653,7 @@ def to_gke( session_service_uri: The URI of the session service. artifact_service_uri: The URI of the artifact service. memory_service_uri: The URI of the memory service. + service_account_name: The name of the Kubernetes Service Account to use for the deployed agent pod. """ click.secho( '\nšŸš€ Starting ADK Agent Deployment to GKE...', fg='cyan', bold=True @@ -680,6 +689,22 @@ def to_gke( ) click.secho('āœ… Environment prepared.', fg='green') + adk_install_instructions = ( + f'RUN pip install "google-adk[extensions]=={adk_version}"' + ) + if editable: + click.echo(' - Preparing local ADK source for editable install...') + # Find the project root to include pyproject.toml + adk_source_path = Path(__file__).resolve().parents[4] + temp_adk_source_dest = os.path.join(temp_folder, 'adk_local_src') + shutil.copytree(adk_source_path, temp_adk_source_dest) + adk_install_instructions = ( + '# Install ADK from local source \n' + 'COPY --chown=myuser:myuser adk_local_src/ /app/adk_local_src/\n' + 'RUN pip install --editable "/app/adk_local_src/[extensions]"' + ) + click.secho('āœ… Local ADK source prepared.', fg='green') + allow_origins_option = ( f'--allow_origins={",".join(allow_origins)}' if allow_origins else '' ) @@ -703,6 +728,7 @@ def to_gke( ), trace_to_cloud_option='--trace_to_cloud' if trace_to_cloud else '', allow_origins_option=allow_origins_option, + adk_install_instructions=adk_install_instructions, adk_version=adk_version, host_option=host_option, a2a_option='--a2a' if a2a else '', @@ -742,6 +768,10 @@ def to_gke( # Create a Kubernetes deployment click.echo(' - Creating Kubernetes deployment.yaml...') + sa_yaml_block = '' + if service_account_name: + # The newline at the end is important for correct YAML formatting. + sa_yaml_block = f' serviceAccountName: {service_account_name}\n' deployment_yaml = f""" apiVersion: apps/v1 kind: Deployment @@ -766,6 +796,7 @@ def to_gke( app.kubernetes.io/instance: {service_name} app.kubernetes.io/managed-by: adk-cli spec: + {sa_yaml_block} containers: - name: {service_name} image: {image_name} diff --git a/src/google/adk/cli/cli_tools_click.py b/src/google/adk/cli/cli_tools_click.py index c45fdd37ea..a99f24add5 100644 --- a/src/google/adk/cli/cli_tools_click.py +++ b/src/google/adk/cli/cli_tools_click.py @@ -1285,6 +1285,11 @@ def cli_deploy_agent_engine( " of the AGENT source code)." ), ) +@click.option( + "--service-account-name", + default=None, + help="Optional. Name of the K8s Service Account for the pod.", +) @click.option( "--port", type=int, @@ -1351,6 +1356,7 @@ def cli_deploy_gke( cluster_name: str, service_name: str, app_name: str, + service_account_name: Optional[str], temp_folder: str, port: int, trace_to_cloud: bool, @@ -1377,6 +1383,7 @@ def cli_deploy_gke( cluster_name=cluster_name, service_name=service_name, app_name=app_name, + service_account_name=service_account_name, temp_folder=temp_folder, port=port, trace_to_cloud=trace_to_cloud, diff --git a/tests/unittests/cli/utils/test_cli_deploy.py b/tests/unittests/cli/utils/test_cli_deploy.py index 3ffe702038..13a6cb85bc 100644 --- a/tests/unittests/cli/utils/test_cli_deploy.py +++ b/tests/unittests/cli/utils/test_cli_deploy.py @@ -22,7 +22,6 @@ import shutil import subprocess import sys -import tempfile import types from typing import Any from typing import Callable @@ -80,14 +79,6 @@ def reload_cli_deploy(): def agent_dir(tmp_path: Path) -> Callable[[bool, bool], Path]: """ Return a factory that creates a dummy agent directory tree. - - Args: - tmp_path: The temporary path fixture provided by pytest. - - Returns: - A factory function that takes two booleans: - - include_requirements: Whether to include a `requirements.txt` file. - - include_env: Whether to include a `.env` file. """ def _factory(include_requirements: bool, include_env: bool) -> Path: @@ -121,14 +112,12 @@ def mock_vertex_ai( sys.modules["vertexai"] = mock_vertexai sys.modules["vertexai.agent_engines"] = mock_agent_engines - # Also mock dotenv mock_dotenv = mock.MagicMock() mock_dotenv.dotenv_values = mock.MagicMock(return_value={"FILE_VAR": "value"}) sys.modules["dotenv"] = mock_dotenv yield mock_vertexai - # Cleanup: remove mocks from sys.modules del sys.modules["vertexai"] del sys.modules["vertexai.agent_engines"] del sys.modules["dotenv"] @@ -210,8 +199,6 @@ def test_resolve_project_from_gcloud_fails( ("1.2.0", None, "gs://a", None, " --artifact_storage_uri=gs://a"), ], ) - -# _get_service_option_by_adk_version def test_get_service_option_by_adk_version( adk_version: str, session_uri: str | None, @@ -220,204 +207,13 @@ def test_get_service_option_by_adk_version( expected: str, ) -> None: """It should return the correct service URI flags for a given ADK version.""" - assert ( - cli_deploy._get_service_option_by_adk_version( - adk_version=adk_version, - session_uri=session_uri, - artifact_uri=artifact_uri, - memory_uri=memory_uri, - ) - == expected - ) - - -@pytest.mark.parametrize("include_requirements", [True, False]) -@pytest.mark.parametrize("with_ui", [True, False]) -def test_to_cloud_run_happy_path( - monkeypatch: pytest.MonkeyPatch, - agent_dir: Callable[[bool, bool], Path], - tmp_path: Path, - include_requirements: bool, - with_ui: bool, -) -> None: - """ - End-to-end execution test for `to_cloud_run`. - - This test verifies that for a given configuration: - 1. The agent source files are correctly copied to a temporary build context. - 2. A valid Dockerfile is generated with the correct parameters. - 3. The `gcloud run deploy` command is constructed with the correct arguments. - """ - src_dir = agent_dir(include_requirements, False) - run_recorder = _Recorder() - - monkeypatch.setattr(subprocess, "run", run_recorder) - # Mock rmtree to prevent actual deletion during test run but record calls - rmtree_recorder = _Recorder() - monkeypatch.setattr(shutil, "rmtree", rmtree_recorder) - - # Execute the function under test - cli_deploy.to_cloud_run( - agent_folder=str(src_dir), - project="proj", - region="asia-northeast1", - service_name="svc", - app_name="agent", - temp_folder=str(tmp_path), - port=8080, - trace_to_cloud=True, - with_ui=with_ui, - log_level="info", - verbosity="info", - allow_origins=["http://localhost:3000", "https://my-app.com"], - session_service_uri="sqlite://", - artifact_service_uri="gs://bucket", - memory_service_uri="rag://", - adk_version="1.3.0", + actual = cli_deploy._get_service_option_by_adk_version( + adk_version=adk_version, + session_uri=session_uri, + artifact_uri=artifact_uri, + memory_uri=memory_uri, ) - - # 1. Assert that source files were copied correctly - agent_dest_path = tmp_path / "agents" / "agent" - assert (agent_dest_path / "agent.py").is_file() - assert (agent_dest_path / "__init__.py").is_file() - assert ( - agent_dest_path / "requirements.txt" - ).is_file() == include_requirements - - # 2. Assert that the Dockerfile was generated correctly - dockerfile_path = tmp_path / "Dockerfile" - assert dockerfile_path.is_file() - dockerfile_content = dockerfile_path.read_text() - - expected_command = "web" if with_ui else "api_server" - assert f"CMD adk {expected_command} --port=8080" in dockerfile_content - assert "FROM python:3.11-slim" in dockerfile_content - assert ( - 'RUN adduser --disabled-password --gecos "" myuser' in dockerfile_content - ) - assert "USER myuser" in dockerfile_content - assert "ENV GOOGLE_CLOUD_PROJECT=proj" in dockerfile_content - assert "ENV GOOGLE_CLOUD_LOCATION=asia-northeast1" in dockerfile_content - assert 'RUN pip install "google-adk[extensions]==1.3.0"' in dockerfile_content - assert "--trace_to_cloud" in dockerfile_content - - if include_requirements: - assert ( - 'RUN pip install -r "/app/agents/agent/requirements.txt"' - in dockerfile_content - ) - else: - assert "RUN pip install -r" not in dockerfile_content - - assert ( - "--allow_origins=http://localhost:3000,https://my-app.com" - in dockerfile_content - ) - - # 3. Assert that the gcloud command was constructed correctly - assert len(run_recorder.calls) == 1 - gcloud_args = run_recorder.get_last_call_args()[0] - - expected_gcloud_command = [ - "gcloud", - "run", - "deploy", - "svc", - "--source", - str(tmp_path), - "--project", - "proj", - "--region", - "asia-northeast1", - "--port", - "8080", - "--verbosity", - "info", - "--labels", - "created-by=adk", - ] - assert gcloud_args == expected_gcloud_command - - # 4. Assert cleanup was performed - assert str(rmtree_recorder.get_last_call_args()[0]) == str(tmp_path) - - -def test_to_cloud_run_cleans_temp_dir( - monkeypatch: pytest.MonkeyPatch, - agent_dir: Callable[[bool], Path], -) -> None: - """`to_cloud_run` should always delete the temporary folder on exit.""" - tmp_dir = Path(tempfile.mkdtemp()) - src_dir = agent_dir(False, False) - - deleted: Dict[str, Path] = {} - - def _fake_rmtree(path: str | Path, *a: Any, **k: Any) -> None: - deleted["path"] = Path(path) - - monkeypatch.setattr(cli_deploy.shutil, "rmtree", _fake_rmtree) - monkeypatch.setattr(subprocess, "run", _Recorder()) - - cli_deploy.to_cloud_run( - agent_folder=str(src_dir), - project="proj", - region=None, - service_name="svc", - app_name="app", - temp_folder=str(tmp_dir), - port=8080, - trace_to_cloud=False, - with_ui=False, - log_level="info", - verbosity="info", - adk_version="1.0.0", - session_service_uri=None, - artifact_service_uri=None, - memory_service_uri=None, - ) - - assert deleted["path"] == tmp_dir - - -def test_to_cloud_run_cleans_temp_dir_on_failure( - monkeypatch: pytest.MonkeyPatch, - agent_dir: Callable[[bool, bool], Path], -) -> None: - """`to_cloud_run` should always delete the temporary folder on exit, even if gcloud fails.""" - tmp_dir = Path(tempfile.mkdtemp()) - src_dir = agent_dir(False, False) - - rmtree_recorder = _Recorder() - monkeypatch.setattr(shutil, "rmtree", rmtree_recorder) - # Make the gcloud command fail - monkeypatch.setattr( - subprocess, - "run", - mock.Mock(side_effect=subprocess.CalledProcessError(1, "gcloud")), - ) - - with pytest.raises(subprocess.CalledProcessError): - cli_deploy.to_cloud_run( - agent_folder=str(src_dir), - project="proj", - region="us-central1", - service_name="svc", - app_name="app", - temp_folder=str(tmp_dir), - port=8080, - trace_to_cloud=False, - with_ui=False, - log_level="info", - verbosity="info", - adk_version="1.0.0", - session_service_uri=None, - artifact_service_uri=None, - memory_service_uri=None, - ) - - # Check that rmtree was called on the temp folder in the finally block - assert rmtree_recorder.calls, "shutil.rmtree should have been called" - assert str(rmtree_recorder.get_last_call_args()[0]) == str(tmp_dir) + assert actual.rstrip() == expected.rstrip() @pytest.mark.usefixtures("mock_vertex_ai") @@ -432,14 +228,6 @@ def test_to_agent_engine_happy_path( ) -> None: """ Tests the happy path for the `to_agent_engine` function. - - Verifies: - 1. Source files are copied. - 2. `adk_app.py` is created correctly. - 3. `requirements.txt` is handled (created if not present). - 4. `.env` file is read if present. - 5. `vertexai.init` and `agent_engines.create` are called with the correct args. - 6. Cleanup is performed. """ src_dir = agent_dir(has_reqs, has_env) temp_folder = tmp_path / "build" @@ -448,7 +236,6 @@ def test_to_agent_engine_happy_path( monkeypatch.setattr(shutil, "rmtree", rmtree_recorder) - # Execute cli_deploy.to_agent_engine( agent_folder=str(src_dir), temp_folder=str(temp_folder), @@ -461,11 +248,9 @@ def test_to_agent_engine_happy_path( description="A test agent.", ) - # 1. Verify file operations assert (temp_folder / app_name / "agent.py").is_file() assert (temp_folder / app_name / "__init__.py").is_file() - # 2. Verify adk_app.py creation adk_app_path = temp_folder / "my_adk_app.py" assert adk_app_path.is_file() content = adk_app_path.read_text() @@ -473,14 +258,11 @@ def test_to_agent_engine_happy_path( assert "adk_app = AdkApp(" in content assert "enable_tracing=True" in content - # 3. Verify requirements handling reqs_path = temp_folder / app_name / "requirements.txt" assert reqs_path.is_file() if not has_reqs: - # It should have been created with the default content assert "google-cloud-aiplatform[adk,agent_engines]" in reqs_path.read_text() - # 4. Verify Vertex AI SDK calls vertexai = sys.modules["vertexai"] vertexai.init.assert_called_once_with( project="my-gcp-project", @@ -488,7 +270,6 @@ def test_to_agent_engine_happy_path( staging_bucket="gs://my-staging-bucket", ) - # 5. Verify env var handling dotenv = sys.modules["dotenv"] if has_env: dotenv.dotenv_values.assert_called_once() @@ -497,7 +278,6 @@ def test_to_agent_engine_happy_path( dotenv.dotenv_values.assert_not_called() expected_env_vars = None - # 6. Verify agent_engines.create call vertexai.agent_engines.create.assert_called_once() create_kwargs = vertexai.agent_engines.create.call_args.kwargs assert create_kwargs["agent_engine"] == "mock-agent-engine-object" @@ -507,7 +287,6 @@ def test_to_agent_engine_happy_path( assert create_kwargs["extra_packages"] == [str(temp_folder)] assert create_kwargs["env_vars"] == expected_env_vars - # 7. Verify cleanup assert str(rmtree_recorder.get_last_call_args()[0]) == str(temp_folder) @@ -520,40 +299,22 @@ def test_to_gke_happy_path( ) -> None: """ Tests the happy path for the `to_gke` function. - - Verifies: - 1. Source files are copied and Dockerfile is created. - 2. `gcloud builds submit` is called to build the image. - 3. `deployment.yaml` is created with the correct content. - 4. `gcloud container get-credentials` and `kubectl apply` are called. - 5. Cleanup is performed. """ src_dir = agent_dir(include_requirements, False) run_recorder = _Recorder() rmtree_recorder = _Recorder() def mock_subprocess_run(*args, **kwargs): - # We still use the recorder to check which commands were called run_recorder(*args, **kwargs) - - # The command is the first positional argument, e.g., ['kubectl', 'apply', ...] command_list = args[0] - - # Check if this is the 'kubectl apply' call if command_list and command_list[0:2] == ["kubectl", "apply"]: - # If it is, return a fake process object with a .stdout attribute - # This mimics the real output from kubectl. fake_stdout = "deployment.apps/gke-svc created\nservice/gke-svc created" return types.SimpleNamespace(stdout=fake_stdout) - - # For all other subprocess.run calls (like 'gcloud builds submit'), - # we don't need a return value, so the default None is fine. return None monkeypatch.setattr(subprocess, "run", mock_subprocess_run) monkeypatch.setattr(shutil, "rmtree", rmtree_recorder) - # Execute cli_deploy.to_gke( agent_folder=str(src_dir), project="gke-proj", @@ -570,20 +331,16 @@ def mock_subprocess_run(*args, **kwargs): allow_origins=["http://localhost:3000", "https://my-app.com"], session_service_uri="sqlite:///", artifact_service_uri="gs://gke-bucket", - editable=False ) - # 1. Verify Dockerfile (basic check) dockerfile_path = tmp_path / "Dockerfile" assert dockerfile_path.is_file() dockerfile_content = dockerfile_path.read_text() assert "CMD adk web --port=9090" in dockerfile_content assert 'RUN pip install "google-adk[extensions]==1.2.0"' in dockerfile_content - # 2. Verify command executions by checking each recorded call assert len(run_recorder.calls) == 3, "Expected 3 subprocess calls" - # Call 1: gcloud builds submit build_args = run_recorder.calls[0][0][0] expected_build_args = [ "gcloud", @@ -597,7 +354,6 @@ def mock_subprocess_run(*args, **kwargs): ] assert build_args == expected_build_args - # Call 2: gcloud container clusters get-credentials creds_args = run_recorder.calls[1][0][0] expected_creds_args = [ "gcloud", @@ -617,12 +373,10 @@ def mock_subprocess_run(*args, **kwargs): in dockerfile_content ) - # Call 3: kubectl apply apply_args = run_recorder.calls[2][0][0] expected_apply_args = ["kubectl", "apply", "-f", str(tmp_path)] assert apply_args == expected_apply_args - # 3. Verify deployment.yaml content deployment_yaml_path = tmp_path / "deployment.yaml" assert deployment_yaml_path.is_file() yaml_content = deployment_yaml_path.read_text() From 9598469443bda5de60c0f05a6ab21e4e6ee21245 Mon Sep 17 00:00:00 2001 From: Summer Date: Thu, 28 Aug 2025 15:49:28 -0700 Subject: [PATCH 29/34] [08/28] GKE cli fix --- tests/unittests/cli/utils/test_cli_deploy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unittests/cli/utils/test_cli_deploy.py b/tests/unittests/cli/utils/test_cli_deploy.py index 13a6cb85bc..f3154042f4 100644 --- a/tests/unittests/cli/utils/test_cli_deploy.py +++ b/tests/unittests/cli/utils/test_cli_deploy.py @@ -451,4 +451,4 @@ def mock_copytree(src, dst, **kwargs): assert "COPY --chown=myuser:myuser adk_local_src/ /app/adk_local_src/" in dockerfile_content assert 'RUN pip install --editable "/app/adk_local_src/[extensions]"' in dockerfile_content - assert "RUN pip install google-adk==" not in dockerfile_content \ No newline at end of file + assert "RUN pip install google-adk==" not in dockerfile_content From 01c5261fabe319709caafef8a3992b49322adff5 Mon Sep 17 00:00:00 2001 From: Summer Date: Thu, 28 Aug 2025 16:11:51 -0700 Subject: [PATCH 30/34] [08/28] GKE cli fix --- src/google/adk/cli/cli_deploy.py | 2 +- tests/unittests/cli/utils/test_cli_deploy.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/google/adk/cli/cli_deploy.py b/src/google/adk/cli/cli_deploy.py index 5c59964334..e7d2a508ef 100644 --- a/src/google/adk/cli/cli_deploy.py +++ b/src/google/adk/cli/cli_deploy.py @@ -690,7 +690,7 @@ def to_gke( click.secho('āœ… Environment prepared.', fg='green') adk_install_instructions = ( - f'RUN pip install "google-adk[extensions]=={adk_version}"' + f'RUN pip install "google-adk=={adk_version}"' ) if editable: click.echo(' - Preparing local ADK source for editable install...') diff --git a/tests/unittests/cli/utils/test_cli_deploy.py b/tests/unittests/cli/utils/test_cli_deploy.py index f3154042f4..a699654224 100644 --- a/tests/unittests/cli/utils/test_cli_deploy.py +++ b/tests/unittests/cli/utils/test_cli_deploy.py @@ -337,7 +337,7 @@ def mock_subprocess_run(*args, **kwargs): assert dockerfile_path.is_file() dockerfile_content = dockerfile_path.read_text() assert "CMD adk web --port=9090" in dockerfile_content - assert 'RUN pip install "google-adk[extensions]==1.2.0"' in dockerfile_content + assert 'RUN pip install "google-adk==1.2.0"' in dockerfile_content assert len(run_recorder.calls) == 3, "Expected 3 subprocess calls" From 37983f96f99986771da20a690ae62d573c3aae7a Mon Sep 17 00:00:00 2001 From: Summer Date: Thu, 28 Aug 2025 16:14:45 -0700 Subject: [PATCH 31/34] [08/28] GKE cli fix --- src/google/adk/cli/cli_deploy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/google/adk/cli/cli_deploy.py b/src/google/adk/cli/cli_deploy.py index e7d2a508ef..84954f7e11 100644 --- a/src/google/adk/cli/cli_deploy.py +++ b/src/google/adk/cli/cli_deploy.py @@ -771,7 +771,7 @@ def to_gke( sa_yaml_block = '' if service_account_name: # The newline at the end is important for correct YAML formatting. - sa_yaml_block = f' serviceAccountName: {service_account_name}\n' + sa_yaml_block = f'serviceAccountName: {service_account_name}\n' deployment_yaml = f""" apiVersion: apps/v1 kind: Deployment From eb3008bf794309175e52dfa4a8cdc7acc17b279e Mon Sep 17 00:00:00 2001 From: Summer Date: Thu, 28 Aug 2025 16:16:58 -0700 Subject: [PATCH 32/34] [08/28] GKE cli fix --- src/google/adk/cli/cli_deploy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/google/adk/cli/cli_deploy.py b/src/google/adk/cli/cli_deploy.py index 84954f7e11..fb3303f24f 100644 --- a/src/google/adk/cli/cli_deploy.py +++ b/src/google/adk/cli/cli_deploy.py @@ -695,8 +695,8 @@ def to_gke( if editable: click.echo(' - Preparing local ADK source for editable install...') # Find the project root to include pyproject.toml - adk_source_path = Path(__file__).resolve().parents[4] - temp_adk_source_dest = os.path.join(temp_folder, 'adk_local_src') + adk_source_path = next(p for p in Path(__file__).resolve().parents if (p / 'pyproject.toml').is_file()) + temp_adk_source_dest = Path(temp_folder) / 'adk_local_src' shutil.copytree(adk_source_path, temp_adk_source_dest) adk_install_instructions = ( '# Install ADK from local source \n' From 49b7c223dab57bde958dc8acd9e1af70cc954cbc Mon Sep 17 00:00:00 2001 From: Summer Date: Thu, 28 Aug 2025 16:25:55 -0700 Subject: [PATCH 33/34] [08/28] GKE cli fix --- src/google/adk/cli/cli_deploy.py | 4 +-- tests/unittests/cli/utils/test_cli_deploy.py | 37 ++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/src/google/adk/cli/cli_deploy.py b/src/google/adk/cli/cli_deploy.py index fb3303f24f..2835706e51 100644 --- a/src/google/adk/cli/cli_deploy.py +++ b/src/google/adk/cli/cli_deploy.py @@ -844,8 +844,8 @@ def to_gke( result = subprocess.run( ['kubectl', 'apply', '-f', temp_folder], check=True, - capture_output=True, # <-- Add this - text=True, # <-- Add this + capture_output=True, + text=True, ) # 2. Print the captured output line by line diff --git a/tests/unittests/cli/utils/test_cli_deploy.py b/tests/unittests/cli/utils/test_cli_deploy.py index a699654224..c85a3967ce 100644 --- a/tests/unittests/cli/utils/test_cli_deploy.py +++ b/tests/unittests/cli/utils/test_cli_deploy.py @@ -388,10 +388,47 @@ def mock_subprocess_run(*args, **kwargs): assert f"containerPort: 9090" in yaml_content assert f"targetPort: 9090" in yaml_content assert "type: LoadBalancer" in yaml_content + assert "serviceAccountName:" not in yaml_content # 4. Verify cleanup assert str(rmtree_recorder.get_last_call_args()[0]) == str(tmp_path) +def test_to_gke_with_service_account( + monkeypatch: pytest.MonkeyPatch, + agent_dir: Callable[[bool, bool], Path], + tmp_path: Path, +) -> None: + """ + Tests that `to_gke` correctly adds the serviceAccountName to the + deployment manifest when the parameter is provided. + """ + src_dir = agent_dir(False, False) + monkeypatch.setattr(subprocess, "run", lambda *a, **k: types.SimpleNamespace(stdout="")) + monkeypatch.setattr(shutil, "rmtree", lambda *a, **k: None) + + # Execute with the new service_account_name parameter + cli_deploy.to_gke( + agent_folder=str(src_dir), + project="gke-proj", + region="us-east1", + cluster_name="my-gke-cluster", + service_name="gke-svc", + app_name="agent", + temp_folder=str(tmp_path), + port=9090, + trace_to_cloud=False, + with_ui=False, + log_level="debug", + adk_version="1.2.0", + service_account_name="my-test-sa", + ) + + deployment_yaml_path = tmp_path / "deployment.yaml" + assert deployment_yaml_path.is_file() + yaml_content = deployment_yaml_path.read_text() + + assert "serviceAccountName: my-test-sa" in yaml_content + def test_to_gke_editable_mode( monkeypatch: pytest.MonkeyPatch, agent_dir: Callable[[bool, bool], Path], From 4e88f42fad0f06df4b0171794f3255f0382accc9 Mon Sep 17 00:00:00 2001 From: Summer Date: Fri, 29 Aug 2025 14:05:26 -0700 Subject: [PATCH 34/34] [08/28] GKE cli fix --- tests/unittests/cli/utils/test_cli_deploy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unittests/cli/utils/test_cli_deploy.py b/tests/unittests/cli/utils/test_cli_deploy.py index c85a3967ce..2b1ec5b3c5 100644 --- a/tests/unittests/cli/utils/test_cli_deploy.py +++ b/tests/unittests/cli/utils/test_cli_deploy.py @@ -337,7 +337,7 @@ def mock_subprocess_run(*args, **kwargs): assert dockerfile_path.is_file() dockerfile_content = dockerfile_path.read_text() assert "CMD adk web --port=9090" in dockerfile_content - assert 'RUN pip install "google-adk==1.2.0"' in dockerfile_content + assert "RUN pip install google-adk==1.2.0" in dockerfile_content assert len(run_recorder.calls) == 3, "Expected 3 subprocess calls"