From d5e71ff860e926cc1877a5f1bba1a00617a23e7a Mon Sep 17 00:00:00 2001
From: Yvonne Yu <yyyu@google.com>
Date: Tue, 5 May 2026 13:38:52 -0700
Subject: [PATCH] feat: migrate model garden to agentplatform

PiperOrigin-RevId: 910885525
---
 .kokoro/docker/docs/Dockerfile                |    5 +-
 agentplatform/__init__.py                     |   15 +
 agentplatform/model_garden/README.md          |  214 ++
 agentplatform/model_garden/__init__.py        |   27 +
 agentplatform/model_garden/_model_garden.py   | 1537 +++++++++++++
 agentplatform/preview/__init__.py             |   15 +
 agentplatform/preview/model_garden.py         |   31 +
 .../model_garden/test_model_garden.py         |    8 +-
 .../test_vertexai_model_garden.py             | 1999 +++++++++++++++++
 tests/unit/vertexai/test_rubric_based_eval.py |    3 +-
 10 files changed, 3848 insertions(+), 6 deletions(-)
 create mode 100644 agentplatform/model_garden/README.md
 create mode 100644 agentplatform/model_garden/__init__.py
 create mode 100644 agentplatform/model_garden/_model_garden.py
 create mode 100644 agentplatform/preview/__init__.py
 create mode 100644 agentplatform/preview/model_garden.py
 rename tests/unit/{vertexai => agentplatform}/model_garden/test_model_garden.py (99%)
 create mode 100644 tests/unit/vertexai/model_garden/test_vertexai_model_garden.py

diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile
index 93abeb68b7..a3668cbb93 100644
--- a/.kokoro/docker/docs/Dockerfile
+++ b/.kokoro/docker/docs/Dockerfile
@@ -21,7 +21,10 @@ ENV PATH /usr/local/bin:$PATH
 
 # Install dependencies.
 RUN apt-get update \
-  && apt-get install -y --no-install-recommends \
+  && apt-get install -y ca-certificates --fix-missing \
+  && update-ca-certificates
+
+RUN apt-get install -y --no-install-recommends \
     apt-transport-https \
     build-essential \
     ca-certificates \
diff --git a/agentplatform/__init__.py b/agentplatform/__init__.py
index 913bdcffb7..9756768002 100644
--- a/agentplatform/__init__.py
+++ b/agentplatform/__init__.py
@@ -14,11 +14,26 @@
 #
 """The agentplatform module."""
 
+import importlib
 from google.cloud.aiplatform import init
 from google.cloud.aiplatform import version as aiplatform_version
 
 __version__ = aiplatform_version.__version__
 
+
+def __getattr__(name):  # type: ignore[no-untyped-def]
+    if name == "preview":
+        # We need to import carefully to avoid `RecursionError`.
+        # This won't work since it causes `RecursionError`:
+        # `from agentplatform import preview`
+        # This won't work due to Copybara lacking a transform:
+        # `import google.cloud.aiplatform.agentplatform.preview as`
+        #    `agentplatform_preview`
+        return importlib.import_module(".preview", __name__)
+    raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
+
+
 __all__ = [
     "init",
+    "preview",
 ]
diff --git a/agentplatform/model_garden/README.md b/agentplatform/model_garden/README.md
new file mode 100644
index 0000000000..5fc942a5d1
--- /dev/null
+++ b/agentplatform/model_garden/README.md
@@ -0,0 +1,214 @@
+# Gemini Enterprise Agent Platform Model Garden SDK for Python
+
+The Gemini Enterprise Agent Platform Model Garden SDK helps developers use [Model Garden](https://cloud.google.com/model-garden) open models to build AI-powered features and applications.
+The SDKs support use cases like the following:
+
+- Deploy an open model
+- Export open model weights
+
+## Installation
+
+To install the
+[google-cloud-aiplatform](https://pypi.org/project/google-cloud-aiplatform/)
+Python package, run the following command:
+
+```shell
+pip3 install --upgrade --user "google-cloud-aiplatform>=1.84"
+```
+
+## Usage
+
+For detailed instructions, see [deploy an open model](https://cloud.google.com/vertex-ai/generative-ai/docs/model-garden/use-models#deploy_an_open_model) and [deploy notebook tutorial](https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_deployment_tutorial.ipynb).
+
+## Quick Start: Default Deployment
+
+This is the simplest way to deploy a model. If you provide just a model name, the SDK will use the default deployment configuration.
+
+```python
+from agentplatform import model_garden
+
+model = model_garden.OpenModel("google/paligemma@paligemma-224-float32")
+endpoint = model.deploy()
+```
+
+**Use case:** Fast prototyping, first-time users evaluating model outputs.
+
+## List Deployable Models
+
+You can list all models that are currently deployable via Model Garden:
+
+```python
+from agentplatform import model_garden
+
+models = model_garden.list_deployable_models()
+```
+
+To filter only Hugging Face models or by keyword:
+
+```python
+models = model_garden.list_deployable_models(list_hf_models=True, model_filter="stable-diffusion")
+```
+
+**Use case:** Discover available models before deciding which one to deploy.
+
+## Hugging Face Model Deployment
+
+Deploy a model directly from Hugging Face using the model ID.
+
+```python
+model = model_garden.OpenModel("Qwen/Qwen2-1.5B-Instruct")
+endpoint = model.deploy()
+```
+
+**Use case:** Leverage community or third-party models without custom container setup. If the model is gated, you may need to provide a Hugging Face access token:
+
+```python
+endpoint = model.deploy(hugging_face_access_token="your_hf_token")
+```
+
+**Use case:** Deploy gated Hugging Face models requiring authentication.
+
+## List Deployment Configurations
+
+You can inspect available deployment configurations for a model:
+
+```python
+model = model_garden.OpenModel("google/paligemma@paligemma-224-float32")
+deploy_options = model.list_deploy_options()
+```
+
+**Use case:** Evaluate compatible machine specs and containers before deployment.
+
+## Select a Verified Deployment: By Container Image
+
+Specify a container image from the list of verified deployment configurations.
+
+```python
+endpoint = model.deploy(
+    serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20250430_0916_RC00_maas",
+)
+```
+
+## Select a Verified Deployment: By Hardware
+
+Specify a hardware configuration from the list of verified deployment configurations.
+
+```python
+endpoints = model.deploy(
+    machine_type="a3-highgpu-1g",
+    accelerator_type="NVIDIA_H100_80GB",
+    accelerator_count=1,
+)
+```
+
+## Select a Verified Deployment: By Container and Hardware
+
+Specify both a container image and a hardware configuration from the list of verified deployment configurations.
+
+```python
+endpoint = model.deploy(
+    serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20250430_0916_RC00_maas",
+    machine_type="a3-highgpu-1g",
+    accelerator_type="NVIDIA_H100_80GB",
+    accelerator_count=1,
+)
+```
+
+**Use case:** Production configuration, performance tuning, scaling.
+
+## EULA Acceptance
+
+Some models require acceptance of a license agreement. Pass `eula=True` if prompted.
+
+```python
+model = model_garden.OpenModel("google/gemma2@gemma-2-27b-it")
+endpoint = model.deploy(eula=True)
+```
+
+**Use case:** First-time deployment of EULA-protected models.
+
+## Spot VM Deployment
+
+Schedule workloads on Spot VMs for lower cost.
+
+```python
+endpoint = model.deploy(spot=True)
+```
+
+**Use case:** Cost-sensitive development and batch workloads.
+
+## Fast Tryout Deployment
+
+Enable experimental fast-deploy path for popular models.
+
+```python
+endpoint = model.deploy(fast_tryout_enabled=True)
+```
+
+**Use case:** Interactive experimentation without full production setup.
+
+## Dedicated Endpoints
+
+Create a dedicated DNS-isolated endpoint.
+
+```python
+endpoint = model.deploy(use_dedicated_endpoint=True)
+```
+
+**Use case:** Traffic isolation for enterprise or regulated workloads.
+
+## Reservation Affinity
+
+Use shared or specific Compute Engine reservations.
+
+```python
+endpoint = model.deploy(
+    reservation_affinity_type="SPECIFIC_RESERVATION",
+    reservation_affinity_key="compute.googleapis.com/reservation-name",
+    reservation_affinity_values="projects/YOUR_PROJECT/zones/YOUR_ZONE/reservations/YOUR_RESERVATION"
+)
+```
+
+**Use case:** Optimized resource usage with pre-reserved capacity.
+
+## Custom Container Image
+
+Override the default container with a custom image.
+
+```python
+endpoint = model.deploy(
+    serving_container_image_uri="us-docker.pkg.dev/vertex-ai/custom-container:latest"
+)
+```
+
+**Use case:** Use of custom inference servers or fine-tuned environments.
+
+## Advanced Full Container Configuration
+
+Further customize startup probes, health checks, shared memory, and gRPC ports.
+
+```python
+endpoint = model.deploy(
+    serving_container_image_uri="us-docker.pkg.dev/vertex-ai/custom-container:latest",
+    container_command=["python3"],
+    container_args=["serve.py"],
+    container_ports=[8888],
+    container_env_vars={"ENV": "prod"},
+    container_predict_route="/predict",
+    container_health_route="/health",
+    serving_container_shared_memory_size_mb=512,
+    serving_container_grpc_ports=[9000],
+    serving_container_startup_probe_exec=["/bin/check-start.sh"],
+    serving_container_health_probe_exec=["/bin/health-check.sh"]
+)
+```
+
+**Use case:** Production-grade deployments requiring deep customization of runtime behavior and monitoring.
+
+## Contributing
+
+See [Contributing](https://github.com/googleapis/python-aiplatform/blob/main/CONTRIBUTING.rst) for more information on contributing to the Gemini Enterprise Agent Platform Python SDK.
+
+## License
+
+The contents of this repository are licensed under the [Apache License, version 2.0](http://www.apache.org/licenses/LICENSE-2.0).
\ No newline at end of file
diff --git a/agentplatform/model_garden/__init__.py b/agentplatform/model_garden/__init__.py
new file mode 100644
index 0000000000..589f3f77fa
--- /dev/null
+++ b/agentplatform/model_garden/__init__.py
@@ -0,0 +1,27 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Classes and functions for working with Model Garden."""
+
+# We just want to re-export certain classes
+# pylint: disable=g-multiple-import,g-importing-member
+from agentplatform.model_garden import _model_garden
+
+OpenModel = _model_garden.OpenModel
+PartnerModel = _model_garden.PartnerModel
+list_deployable_models = _model_garden.list_deployable_models
+list_models = _model_garden.list_models
+
+__all__ = ("OpenModel", "PartnerModel", "list_deployable_models", "list_models")
diff --git a/agentplatform/model_garden/_model_garden.py b/agentplatform/model_garden/_model_garden.py
new file mode 100644
index 0000000000..5b80f7e71d
--- /dev/null
+++ b/agentplatform/model_garden/_model_garden.py
@@ -0,0 +1,1537 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# pylint: disable=bad-continuation, line-too-long, protected-access
+"""Class for interacting with Model Garden OSS models."""
+
+import datetime
+import functools
+import re
+from typing import Dict, List, Optional, Sequence, Union
+
+from google.cloud import aiplatform
+from google.cloud.aiplatform import base
+from google.cloud.aiplatform import compat
+from google.cloud.aiplatform import initializer
+from google.cloud.aiplatform import models as aiplatform_models
+from google.cloud.aiplatform import utils
+from google.cloud.aiplatform_v1beta1 import types
+from google.cloud.aiplatform_v1beta1.services import model_garden_service
+from google.cloud.aiplatform_v1beta1.services import model_service
+from agentplatform import batch_prediction
+
+
+from google.protobuf import duration_pb2
+
+
+_LOGGER = base.Logger(__name__)
+_DEFAULT_VERSION = compat.V1BETA1
+_DEFAULT_TIMEOUT = 2 * 60 * 60  # 2 hours, same as UI one-click deployment.
+_DEFAULT_RECOMMEND_SPEC_TIMEOUT = 1 * 60  # 1 minute.
+_DEFAULT_EXPORT_TIMEOUT = 1 * 60 * 60  # 1 hour.
+_HF_WILDCARD_FILTER = "is_hf_wildcard(true)"
+_NATIVE_MODEL_FILTER = "is_hf_wildcard(false)"
+_VERIFIED_DEPLOYMENT_FILTER = (
+    "labels.VERIFIED_DEPLOYMENT_CONFIG=VERIFIED_DEPLOYMENT_SUCCEED"
+)
+
+
+def list_deployable_models(
+    *, list_hf_models: bool = False, model_filter: Optional[str] = None
+) -> List[str]:
+    """Lists the deployable models in Model Garden.
+
+    Args:
+        list_hf_models: Whether to list the Hugging Face models.
+        model_filter: Optional. A string to filter the models by.
+
+    Returns:
+        The names of the deployable models in Model Garden in the format of
+        `{publisher}/{model}@{version}` or Hugging Face model ID in the format
+        of `{organization}/{model}`.
+    """
+
+    filter_str = _NATIVE_MODEL_FILTER
+    if list_hf_models:
+        filter_str = " AND ".join([_HF_WILDCARD_FILTER, _VERIFIED_DEPLOYMENT_FILTER])
+    if model_filter:
+        filter_str = (
+            f'{filter_str} AND (model_user_id=~"(?i).*{model_filter}.*" OR'
+            f' display_name=~"(?i).*{model_filter}.*")'
+        )
+
+    request = types.ListPublisherModelsRequest(
+        parent="publishers/*",
+        list_all_versions=True,
+        filter=filter_str,
+    )
+    client = initializer.global_config.create_client(
+        client_class=_ModelGardenClientWithOverride,
+        credentials=initializer.global_config.credentials,
+        location_override="us-central1",
+    )
+    response = client.list_publisher_models(request)
+    output = []
+    for page in response.pages:
+        for model in page.publisher_models:
+            if model.supported_actions.multi_deploy_vertex.multi_deploy_vertex:
+                output.append(
+                    re.sub(r"publishers/(hf-|)|models/", "", model.name)
+                    + ("" if list_hf_models else ("@" + model.version_id))
+                )
+    return output
+
+
+def list_models(
+    *, list_hf_models: bool = False, model_filter: Optional[str] = None
+) -> List[str]:
+    """Lists the models in Model Garden.
+
+    Args:
+        list_hf_models: Whether to list the Hugging Face models.
+        model_filter: Optional. A string to filter the models by.
+
+    Returns:
+        The names of the models in Model Garden in the format of
+        `{publisher}/{model}@{version}` or Hugging Face model ID in the format
+        of `{organization}/{model}`.
+    """
+    filter_str = _NATIVE_MODEL_FILTER
+    if list_hf_models:
+        filter_str = _HF_WILDCARD_FILTER
+    if model_filter:
+        filter_str = (
+            f'{filter_str} AND (model_user_id=~"(?i).*{model_filter}.*" OR'
+            f' display_name=~"(?i).*{model_filter}.*")'
+        )
+
+    request = types.ListPublisherModelsRequest(
+        parent="publishers/*",
+        list_all_versions=True,
+        filter=filter_str,
+    )
+    client = initializer.global_config.create_client(
+        client_class=_ModelGardenClientWithOverride,
+        credentials=initializer.global_config.credentials,
+        location_override="us-central1",
+    )
+    response = client.list_publisher_models(request)
+    output = []
+    for page in response.pages:
+        for model in page.publisher_models:
+            output.append(
+                re.sub(r"publishers/(hf-|)|models/", "", model.name)
+                + ("" if list_hf_models else ("@" + model.version_id))
+            )
+    return output
+
+
+def _is_hugging_face_model(model_name: str) -> bool:
+    """Returns whether the model is a Hugging Face model."""
+    return re.match(r"^(?P<publisher>[^/]+)/(?P<model>[^/@]+)$", model_name)
+
+
+def _get_publisher_model_resource_name(publisher: str, model: str) -> str:
+    """Returns the resource name.
+
+    Args:
+        publisher: Publisher of the model.
+        model: Model name, may or may not include version.
+
+    Returns:
+        The resource name in the format of
+            `publishers/{publisher}/models/{model_user_id}@{version_id}`.
+    """
+    return f"publishers/{publisher}/models/{model}"
+
+
+def _reconcile_model_name(model_name: str) -> str:
+    """Returns the resource name from the model name.
+
+    Args:
+        model_name: Model Garden model resource name in the format of
+          `publishers/{publisher}/models/{model}@{version}`, or a simplified
+          resource name in the format of `{publisher}/{model}@{version}`, or a
+          Hugging Face model ID in the format of `{organization}/{model}`.
+
+    Returns:
+        The resource name in the format of
+            `publishers/{publisher}/models/{model}@{version}`.
+    """
+    model_name = model_name.lower()  # Use lower case for Hugging Face.
+    full_resource_name_match = re.match(
+        r"^publishers/(?P<publisher>[^/]+)/models/(?P<model>[^@]+)@(?P<version>[^@]+)$",
+        model_name,
+    )
+    if full_resource_name_match:
+        return _get_publisher_model_resource_name(
+            full_resource_name_match.group("publisher"),
+            full_resource_name_match.group("model")
+            + "@"
+            + full_resource_name_match.group("version"),
+        )
+    else:
+        simplified_name_match = re.match(
+            r"^(?P<publisher>[^/]+)/(?P<model>[^@]+)(?:@(?P<version>.+))?$",
+            model_name,
+        )
+        if simplified_name_match:
+            if simplified_name_match.group("version"):
+                return _get_publisher_model_resource_name(
+                    publisher=simplified_name_match.group("publisher"),
+                    model=simplified_name_match.group("model")
+                    + "@"
+                    + simplified_name_match.group("version"),
+                )
+            else:
+                return _get_publisher_model_resource_name(
+                    publisher=simplified_name_match.group("publisher"),
+                    model=simplified_name_match.group("model"),
+                )
+        else:
+            raise ValueError(f"`{model_name}` is not a valid Open Model name")
+
+
+def _construct_serving_container_spec(
+    serving_container_image_uri: Optional[str] = None,
+    serving_container_predict_route: Optional[str] = None,
+    serving_container_health_route: Optional[str] = None,
+    serving_container_command: Optional[Sequence[str]] = None,
+    serving_container_args: Optional[Sequence[str]] = None,
+    serving_container_environment_variables: Optional[Dict[str, str]] = None,
+    serving_container_ports: Optional[Sequence[int]] = None,
+    serving_container_grpc_ports: Optional[Sequence[int]] = None,
+    serving_container_deployment_timeout: Optional[int] = None,
+    serving_container_shared_memory_size_mb: Optional[int] = None,
+    serving_container_startup_probe_exec: Optional[Sequence[str]] = None,
+    serving_container_startup_probe_period_seconds: Optional[int] = None,
+    serving_container_startup_probe_timeout_seconds: Optional[int] = None,
+    serving_container_health_probe_exec: Optional[Sequence[str]] = None,
+    serving_container_health_probe_period_seconds: Optional[int] = None,
+    serving_container_health_probe_timeout_seconds: Optional[int] = None,
+) -> types.ModelContainerSpec:
+    """Constructs a ServingContainerSpec from the proto."""
+    env = None
+    ports = None
+    grpc_ports = None
+    deployment_timeout = (
+        duration_pb2.Duration(seconds=serving_container_deployment_timeout)
+        if serving_container_deployment_timeout
+        else None
+    )
+    startup_probe = None
+    health_probe = None
+
+    if serving_container_environment_variables:
+        env = [
+            types.EnvVar(name=str(key), value=str(value))
+            for key, value in serving_container_environment_variables.items()
+        ]
+    if serving_container_ports:
+        ports = [types.Port(container_port=port) for port in serving_container_ports]
+    if serving_container_grpc_ports:
+        grpc_ports = [
+            types.Port(container_port=port) for port in serving_container_grpc_ports
+        ]
+    if (
+        serving_container_startup_probe_exec
+        or serving_container_startup_probe_period_seconds
+        or serving_container_startup_probe_timeout_seconds
+    ):
+        startup_probe_exec = None
+        if serving_container_startup_probe_exec:
+            startup_probe_exec = types.Probe.ExecAction(
+                command=serving_container_startup_probe_exec
+            )
+        startup_probe = types.Probe(
+            exec=startup_probe_exec,
+            period_seconds=serving_container_startup_probe_period_seconds,
+            timeout_seconds=serving_container_startup_probe_timeout_seconds,
+        )
+    if (
+        serving_container_health_probe_exec
+        or serving_container_health_probe_period_seconds
+        or serving_container_health_probe_timeout_seconds
+    ):
+        health_probe_exec = None
+        if serving_container_health_probe_exec:
+            health_probe_exec = types.Probe.ExecAction(
+                command=serving_container_health_probe_exec
+            )
+        health_probe = types.Probe(
+            exec=health_probe_exec,
+            period_seconds=serving_container_health_probe_period_seconds,
+            timeout_seconds=serving_container_health_probe_timeout_seconds,
+        )
+
+    return types.ModelContainerSpec(
+        image_uri=serving_container_image_uri,
+        command=serving_container_command,
+        args=serving_container_args,
+        env=env,
+        ports=ports,
+        grpc_ports=grpc_ports,
+        predict_route=serving_container_predict_route,
+        health_route=serving_container_health_route,
+        deployment_timeout=deployment_timeout,
+        shared_memory_size_mb=serving_container_shared_memory_size_mb,
+        startup_probe=startup_probe,
+        health_probe=health_probe,
+    )
+
+
+class _ModelGardenClientWithOverride(utils.ClientWithOverride):
+    _is_temporary = True
+    _default_version = _DEFAULT_VERSION
+    _version_map = (
+        (
+            _DEFAULT_VERSION,
+            model_garden_service.ModelGardenServiceClient,
+        ),
+    )
+
+
+class _ModelServiceClientWithOverride(utils.ClientWithOverride):
+    _is_temporary = True
+    _default_version = _DEFAULT_VERSION
+    _version_map = (
+        (
+            _DEFAULT_VERSION,
+            model_service.ModelServiceClient,
+        ),
+    )
+
+
+class OpenModel:
+    """Represents a Model Garden Open model.
+
+    Attributes:
+        model_name: Model Garden model resource name in the format of
+          `publishers/{publisher}/models/{model}@{version}`, or a simplified
+          resource name in the format of `{publisher}/{model}@{version}`, or a
+          Hugging Face model ID in the format of `{organization}/{model}`.
+    """
+
+    __module__ = "agentplatform.model_garden"
+
+    def __init__(
+        self,
+        model_name: str,
+    ):
+        r"""Initializes a Model Garden model.
+
+        Usage:
+
+            ```
+            model = OpenModel("publishers/google/models/gemma2@gemma-2-2b-it")
+            ```
+
+        Args:
+            model_name: Model Garden model resource name in the format of
+              `publishers/{publisher}/models/{model}@{version}`, or a simplified
+              resource name in the format of `{publisher}/{model}@{version}`, or a
+              Hugging Face model ID in the format of `{organization}/{model}`.
+        """
+        project = initializer.global_config.project
+        location = initializer.global_config.location
+        credentials = initializer.global_config.credentials
+
+        self._model_name = model_name
+        self._is_hugging_face_model = _is_hugging_face_model(model_name)
+        self._publisher_model_name = _reconcile_model_name(model_name)
+        self._project = project
+        self._location = location
+        self._credentials = credentials
+
+    @functools.cached_property
+    def _model_garden_client(
+        self,
+    ) -> model_garden_service.ModelGardenServiceClient:
+        """Returns the Model Garden client."""
+        return initializer.global_config.create_client(
+            client_class=_ModelGardenClientWithOverride,
+            credentials=self._credentials,
+            location_override=self._location,
+        )
+
+    @functools.cached_property
+    def _us_central1_model_garden_client(
+        self,
+    ) -> model_garden_service.ModelGardenServiceClient:
+        """Returns the Model Garden client in us-central1."""
+        return initializer.global_config.create_client(
+            client_class=_ModelGardenClientWithOverride,
+            credentials=self._credentials,
+            location_override="us-central1",
+        )
+
+    def export(
+        self,
+        target_gcs_path: str = "",
+        export_request_timeout: Optional[float] = None,
+    ) -> str:
+        """Exports an Open Model to a google cloud storage bucket.
+
+        Args:
+            target_gcs_path: target gcs path.
+            export_request_timeout: The timeout for the deploy request. Default is 2
+              hours.
+
+        Returns:
+            str: the target gcs bucket where the model weights are downloaded to
+
+
+        Raises:
+            ValueError: If ``target_gcs_path`` is not specified
+        """
+        if not target_gcs_path:
+            raise ValueError("target_gcs_path is required.")
+
+        request = types.ExportPublisherModelRequest(
+            parent=f"projects/{self._project}/locations/{self._location}",
+            name=self._publisher_model_name,
+            destination=types.GcsDestination(output_uri_prefix=target_gcs_path),
+        )
+        request_headers = [
+            ("x-goog-user-project", "{}".format(initializer.global_config.project)),
+        ]
+
+        _LOGGER.info(f"Exporting model weights: {self._model_name}")
+
+        operation_future = self._model_garden_client.export_publisher_model(
+            request, metadata=request_headers
+        )
+        _LOGGER.info(f"LRO: {operation_future.operation.name}")
+
+        _LOGGER.info(f"Start time: {datetime.datetime.now()}")
+        export_publisher_model_response = operation_future.result(
+            timeout=export_request_timeout or _DEFAULT_EXPORT_TIMEOUT
+        )
+        _LOGGER.info(f"End time: {datetime.datetime.now()}")
+        _LOGGER.info(f"Response: {export_publisher_model_response}")
+
+        return export_publisher_model_response.destination_uri
+
+    def deploy(
+        self,
+        accept_eula: bool = False,
+        hugging_face_access_token: Optional[str] = None,
+        machine_type: Optional[str] = None,
+        min_replica_count: int = 1,
+        max_replica_count: int = 1,
+        accelerator_type: Optional[str] = None,
+        accelerator_count: Optional[int] = None,
+        spot: bool = False,
+        reservation_affinity_type: Optional[str] = None,
+        reservation_affinity_key: Optional[str] = None,
+        reservation_affinity_values: Optional[List[str]] = None,
+        use_dedicated_endpoint: Optional[bool] = False,
+        dedicated_endpoint_disabled: Optional[bool] = False,
+        fast_tryout_enabled: Optional[bool] = False,
+        system_labels: Optional[Dict[str, str]] = None,
+        endpoint_display_name: Optional[str] = None,
+        model_display_name: Optional[str] = None,
+        deploy_request_timeout: Optional[float] = None,
+        serving_container_spec: Optional[types.ModelContainerSpec] = None,
+        serving_container_image_uri: Optional[str] = None,
+        serving_container_predict_route: Optional[str] = None,
+        serving_container_health_route: Optional[str] = None,
+        serving_container_command: Optional[Sequence[str]] = None,
+        serving_container_args: Optional[Sequence[str]] = None,
+        serving_container_environment_variables: Optional[Dict[str, str]] = None,
+        serving_container_ports: Optional[Sequence[int]] = None,
+        serving_container_grpc_ports: Optional[Sequence[int]] = None,
+        serving_container_deployment_timeout: Optional[int] = None,
+        serving_container_shared_memory_size_mb: Optional[int] = None,
+        serving_container_startup_probe_exec: Optional[Sequence[str]] = None,
+        serving_container_startup_probe_period_seconds: Optional[int] = None,
+        serving_container_startup_probe_timeout_seconds: Optional[int] = None,
+        serving_container_health_probe_exec: Optional[Sequence[str]] = None,
+        serving_container_health_probe_period_seconds: Optional[int] = None,
+        serving_container_health_probe_timeout_seconds: Optional[int] = None,
+        enable_private_service_connect: bool = False,
+        psc_project_allow_list: Optional[Sequence[str]] = None,
+    ) -> aiplatform.Endpoint:
+        """Deploys an Open Model to an endpoint.
+
+        Args:
+            accept_eula (bool): Whether to accept the End User License Agreement.
+            hugging_face_access_token (str): The access token to access Hugging Face
+              models. Reference: https://huggingface.co/docs/hub/en/security-tokens
+            machine_type (str): Optional. The type of machine. Not specifying
+              machine type will result in model to be deployed with automatic
+              resources.
+            min_replica_count (int): Optional. The minimum number of machine
+              replicas this deployed model will be always deployed on. If traffic
+              against it increases, it may dynamically be deployed onto more
+              replicas, and as traffic decreases, some of these extra replicas may
+              be freed.
+            max_replica_count (int): Optional. The maximum number of replicas this
+              deployed model may be deployed on when the traffic against it
+              increases. If requested value is too large, the deployment will error,
+              but if deployment succeeds then the ability to scale the model to that
+              many replicas is guaranteed (barring service outages). If traffic
+              against the deployed model increases beyond what its replicas at
+              maximum may handle, a portion of the traffic will be dropped. If this
+              value is not provided, the larger value of min_replica_count or 1 will
+              be used. If value provided is smaller than min_replica_count, it will
+              automatically be increased to be min_replica_count.
+            accelerator_type (str): Optional. Hardware accelerator type. Must also
+              set accelerator_count if used. One of ACCELERATOR_TYPE_UNSPECIFIED,
+              NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100,
+              NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
+            accelerator_count (int): Optional. The number of accelerators to attach
+              to a worker replica.
+            spot (bool): Optional. Whether to schedule the deployment workload on
+              spot VMs.
+            reservation_affinity_type (str): Optional. The type of reservation
+              affinity. One of NO_RESERVATION, ANY_RESERVATION,
+              SPECIFIC_RESERVATION, SPECIFIC_THEN_ANY_RESERVATION,
+              SPECIFIC_THEN_NO_RESERVATION
+            reservation_affinity_key (str): Optional. Corresponds to the label key
+              of a reservation resource. To target a SPECIFIC_RESERVATION by name,
+              use `compute.googleapis.com/reservation-name` as the key and specify
+              the name of your reservation as its value.
+            reservation_affinity_values (List[str]): Optional. Corresponds to the
+              label values of a reservation resource. This must be the full resource
+              name of the reservation.
+                Format:
+                  'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
+            use_dedicated_endpoint (bool): Optional. Default value is False. If set
+              to True, the underlying prediction call will be made using the
+              dedicated endpoint dns.
+            dedicated_endpoint_disabled (bool): Optional. Default value is False. If set
+              to False, the underlying prediction call will be made using the
+              dedicated endpoint dns. Otherwise, the prediction call will be made
+              using the shared endpoint dns.
+            fast_tryout_enabled (bool): Optional. Defaults to False. If True, model
+              will be deployed using faster deployment path. Useful for quick
+              experiments. Not for production workloads. Only available for most
+              popular models with certain machine types.
+            system_labels (Dict[str, str]): Optional. System labels for Model Garden
+              deployments. These labels are managed by Google and for tracking
+              purposes only.
+            endpoint_display_name: The display name of the created endpoint.
+            model_display_name: The display name of the uploaded model.
+            deploy_request_timeout: The timeout for the deploy request. Default is 2
+              hours.
+            serving_container_spec (types.ModelContainerSpec): Optional. The
+              container specification for the model instance. This specification
+              overrides the default container specification and other serving
+              container parameters.
+            serving_container_image_uri (str): Optional. The URI of the Model
+              serving container. This parameter is required if the parameter
+              `local_model` is not specified.
+            serving_container_predict_route (str): Optional. An HTTP path to send
+              prediction requests to the container, and which must be supported by
+              it. If not specified a default HTTP path will be used by Gemini Enterprise Agent Platform.
+            serving_container_health_route (str): Optional. An HTTP path to send
+              health check requests to the container, and which must be supported by
+              it. If not specified a standard HTTP path will be used by Gemini Enterprise Agent Platform.
+            serving_container_command: Optional[Sequence[str]]=None, The command
+              with which the container is run. Not executed within a shell. The
+              Docker image's ENTRYPOINT is used if this is not provided. Variable
+              references $(VAR_NAME) are expanded using the container's environment.
+              If a variable cannot be resolved, the reference in the input string
+              will be unchanged. The $(VAR_NAME) syntax can be escaped with a double
+              $$, ie: $$(VAR_NAME). Escaped references will never be expanded,
+              regardless of whether the variable exists or not.
+            serving_container_args: Optional[Sequence[str]]=None, The arguments to
+              the command. The Docker image's CMD is used if this is not provided.
+              Variable references $(VAR_NAME) are expanded using the container's
+              environment. If a variable cannot be resolved, the reference in the
+              input string will be unchanged. The $(VAR_NAME) syntax can be escaped
+              with a double $$, ie: $$(VAR_NAME). Escaped references will never be
+              expanded, regardless of whether the variable exists or not.
+            serving_container_environment_variables: Optional[Dict[str, str]]=None,
+              The environment variables that are to be present in the container.
+              Should be a dictionary where keys are environment variable names and
+              values are environment variable values for those names.
+            serving_container_ports: Optional[Sequence[int]]=None, Declaration of
+              ports that are exposed by the container. This field is primarily
+              informational, it gives Gemini Enterprise Agent Platform information about the network
+              connections the container uses. Listing or not a port here has no
+              impact on whether the port is actually exposed, any port listening on
+              the default "0.0.0.0" address inside a container will be accessible
+              from the network.
+            serving_container_grpc_ports: Optional[Sequence[int]]=None, Declaration
+              of ports that are exposed by the container. Gemini Enterprise Agent
+              Platform sends gRPC
+              prediction requests that it receives to the first port on this list.
+              Gemini Enterprise Agent Platform also sends liveness and health checks to this port. If you
+              do not specify this field, gRPC requests to the container will be
+              disabled. Gemini Enterprise Agent Platform does not use ports other than the first one
+              listed. This field corresponds to the `ports` field of the Kubernetes
+              Containers v1 core API.
+            serving_container_deployment_timeout (int): Optional. Deployment timeout
+              in seconds.
+            serving_container_shared_memory_size_mb (int): Optional. The amount of
+              the VM memory to reserve as the shared memory for the model in
+              megabytes.
+            serving_container_startup_probe_exec (Sequence[str]): Optional. Exec
+              specifies the action to take. Used by startup probe. An example of
+              this argument would be ["cat", "/tmp/healthy"]
+            serving_container_startup_probe_period_seconds (int): Optional. How
+              often (in seconds) to perform the startup probe. Default to 10
+              seconds. Minimum value is 1.
+            serving_container_startup_probe_timeout_seconds (int): Optional. Number
+              of seconds after which the startup probe times out. Defaults to 1
+              second. Minimum value is 1.
+            serving_container_health_probe_exec (Sequence[str]): Optional. Exec
+              specifies the action to take. Used by health probe. An example of this
+              argument would be ["cat", "/tmp/healthy"]
+            serving_container_health_probe_period_seconds (int): Optional. How often
+              (in seconds) to perform the health probe. Default to 10 seconds.
+              Minimum value is 1.
+            serving_container_health_probe_timeout_seconds (int): Optional. Number
+              of seconds after which the health probe times out. Defaults to 1
+              second. Minimum value is 1.
+            enable_private_service_connect (bool): Whether to enable private service
+            connect.
+            psc_project_allow_list (Sequence[str]): The list of projects that are
+            allowed to access the endpoint over private service connect.
+
+        Returns:
+            endpoint (aiplatform.Endpoint):
+                Created endpoint.
+
+        Raises:
+            ValueError: If ``serving_container_spec`` is specified but
+            ``serving_container_spec.image_uri``
+                is ``None``, or if ``serving_container_spec`` is specified but other
+                serving container parameters are specified.
+        """
+        request = types.DeployRequest(
+            destination=f"projects/{self._project}/locations/{self._location}",
+        )
+        if self._is_hugging_face_model:
+            request.hugging_face_model_id = self._model_name.lower()
+        else:
+            request.publisher_model_name = self._publisher_model_name
+
+        if endpoint_display_name:
+            request.endpoint_config.endpoint_display_name = endpoint_display_name
+        if model_display_name:
+            request.model_config.model_display_name = model_display_name
+
+        if accept_eula:
+            request.model_config.accept_eula = accept_eula
+
+        if hugging_face_access_token:
+            request.model_config.hugging_face_access_token = hugging_face_access_token
+
+        provided_custom_machine_spec = (
+            machine_type or accelerator_type or accelerator_count
+        )
+        if provided_custom_machine_spec:
+            dedicated_resources = types.DedicatedResources(
+                machine_spec=types.MachineSpec(
+                    machine_type=machine_type,
+                    accelerator_type=accelerator_type,
+                    accelerator_count=accelerator_count,
+                ),
+                min_replica_count=min_replica_count,
+                max_replica_count=max_replica_count,
+            )
+            request.deploy_config.dedicated_resources = dedicated_resources
+        if spot:
+            request.deploy_config.dedicated_resources.spot = True
+
+        if reservation_affinity_type:
+            request.deploy_config.dedicated_resources.machine_spec.reservation_affinity.reservation_affinity_type = (
+                reservation_affinity_type
+            )
+        if reservation_affinity_key and reservation_affinity_values:
+            request.deploy_config.dedicated_resources.machine_spec.reservation_affinity.key = (
+                reservation_affinity_key
+            )
+            request.deploy_config.dedicated_resources.machine_spec.reservation_affinity.values = (
+                reservation_affinity_values
+            )
+
+        # TODO(b/417560875): Remove this once notebooks are migrated to use dedicated_endpoint_disabled.
+        if use_dedicated_endpoint:
+            request.endpoint_config.dedicated_endpoint_enabled = use_dedicated_endpoint
+
+        if dedicated_endpoint_disabled:
+            request.endpoint_config.dedicated_endpoint_disabled = (
+                dedicated_endpoint_disabled
+            )
+
+        if enable_private_service_connect and psc_project_allow_list:
+            request.endpoint_config.private_service_connect_config = (
+                types.PrivateServiceConnectConfig(
+                    enable_private_service_connect=enable_private_service_connect,
+                    project_allowlist=psc_project_allow_list,
+                )
+            )
+
+        if fast_tryout_enabled:
+            request.deploy_config.fast_tryout_enabled = fast_tryout_enabled
+
+        if system_labels:
+            request.deploy_config.system_labels = system_labels
+
+        if serving_container_spec:
+            if not serving_container_spec.image_uri:
+                raise ValueError(
+                    "Serving container image uri is required for the serving container"
+                    " spec."
+                )
+            if serving_container_image_uri:
+                raise ValueError(
+                    "Serving container image uri is already set in the serving"
+                    " container spec."
+                )
+            request.model_config.container_spec = serving_container_spec
+
+        if serving_container_image_uri:
+            request.model_config.container_spec = _construct_serving_container_spec(
+                serving_container_image_uri,
+                serving_container_predict_route,
+                serving_container_health_route,
+                serving_container_command,
+                serving_container_args,
+                serving_container_environment_variables,
+                serving_container_ports,
+                serving_container_grpc_ports,
+                serving_container_deployment_timeout,
+                serving_container_shared_memory_size_mb,
+                serving_container_startup_probe_exec,
+                serving_container_startup_probe_period_seconds,
+                serving_container_startup_probe_timeout_seconds,
+                serving_container_health_probe_exec,
+                serving_container_health_probe_period_seconds,
+                serving_container_health_probe_timeout_seconds,
+            )
+
+        _LOGGER.info(f"Deploying model: {self._model_name}")
+
+        operation_future = self._model_garden_client.deploy(request)
+        _LOGGER.info(f"LRO: {operation_future.operation.name}")
+
+        _LOGGER.info(f"Start time: {datetime.datetime.now()}")
+        deploy_response = operation_future.result(
+            timeout=deploy_request_timeout or _DEFAULT_TIMEOUT
+        )
+        _LOGGER.info(f"End time: {datetime.datetime.now()}")
+
+        self._endpoint_name = deploy_response.endpoint
+        _LOGGER.info(f"Endpoint: {self._endpoint_name}")
+        endpoint = aiplatform.Endpoint._construct_sdk_resource_from_gapic(
+            aiplatform_models.gca_endpoint_compat.Endpoint(name=self._endpoint_name),
+        )
+        return endpoint
+
+    def list_deploy_options(
+        self,
+        concise: bool = False,
+        serving_container_image_uri_filter: Optional[Union[str, List[str]]] = None,
+        machine_type_filter: Optional[str] = None,
+        accelerator_type_filter: Optional[str] = None,
+    ) -> Union[str, Sequence[types.PublisherModel.CallToAction.Deploy]]:
+        """Lists the verified deploy options for the model.
+
+        Args:
+            concise: If true, returns a human-readable string with container and
+              machine specs.
+            serving_container_image_uri_filter: If specified, only return the
+              deploy options where the serving container image URI contains one of
+              the specified keyword(s) (e.g., "vllm" or ["vllm", "tgi"]). The
+              filter is case-insensitive.
+            machine_type_filter: If specified, only return the deploy options
+              where the machine type contains one of the specified keyword(s)
+              (e.g., "n1" or ["n1", "g2"]). The filter is case-insensitive.
+            accelerator_type_filter: If specified, only return the deploy options
+              where the accelerator type contains one of the specified keyword(s)
+              (e.g., "T4" or ["T4", "L4"]). The filter is case-insensitive.
+
+        Returns:
+            A list of deploy options or a concise formatted string.
+        """
+        request = types.GetPublisherModelRequest(
+            name=self._publisher_model_name,
+            is_hugging_face_model=bool(self._is_hugging_face_model),
+            include_equivalent_model_garden_model_deployment_configs=True,
+        )
+        response = self._us_central1_model_garden_client.get_publisher_model(request)
+        deploy_options = (
+            response.supported_actions.multi_deploy_vertex.multi_deploy_vertex
+        )
+
+        if not deploy_options:
+            raise ValueError(
+                "Model does not support deployment. "
+                "Use `list_deployable_models()` to find supported models."
+            )
+
+        if serving_container_image_uri_filter:
+            if isinstance(serving_container_image_uri_filter, str):
+                serving_container_image_uri_filter = [
+                    serving_container_image_uri_filter
+                ]
+            serving_container_image_uri_filter = [
+                f.lower() for f in serving_container_image_uri_filter
+            ]
+            deploy_options = [
+                option
+                for option in deploy_options
+                if option.container_spec
+                and any(
+                    f in option.container_spec.image_uri.lower()
+                    for f in serving_container_image_uri_filter
+                )
+            ]
+
+        if machine_type_filter:
+            filters = (
+                [machine_type_filter]
+                if isinstance(machine_type_filter, str)
+                else machine_type_filter
+            )
+            deploy_options = [
+                option
+                for option in deploy_options
+                if option.dedicated_resources
+                and option.dedicated_resources.machine_spec
+                and any(
+                    f.lower()
+                    in option.dedicated_resources.machine_spec.machine_type.lower()
+                    for f in filters
+                )
+            ]
+
+        if accelerator_type_filter:
+            filters = (
+                [accelerator_type_filter]
+                if isinstance(accelerator_type_filter, str)
+                else accelerator_type_filter
+            )
+            deploy_options = [
+                option
+                for option in deploy_options
+                if option.dedicated_resources
+                and option.dedicated_resources.machine_spec
+                and option.dedicated_resources.machine_spec.accelerator_type
+                and any(
+                    f.lower()
+                    in option.dedicated_resources.machine_spec.accelerator_type.name.lower()
+                    for f in filters
+                )
+            ]
+
+        if not deploy_options:
+            raise ValueError("No deploy options found.")
+
+        if not concise:
+            return deploy_options
+
+        def _extract_config(option):
+            container = (
+                option.container_spec.image_uri if option.container_spec else None
+            )
+            machine = (
+                option.dedicated_resources.machine_spec
+                if option.dedicated_resources
+                else None
+            )
+            option_name = getattr(option, "deploy_task_name", None)
+
+            return {
+                "option_name": option_name,
+                "serving_container_image_uri": container,
+                "machine_type": getattr(machine, "machine_type", None),
+                "accelerator_type": getattr(
+                    getattr(machine, "accelerator_type", None), "name", None
+                ),
+                "accelerator_count": getattr(machine, "accelerator_count", None),
+            }
+
+        concise_deploy_options = [_extract_config(opt) for opt in deploy_options]
+        return "\n\n".join(
+            (
+                f"[Option {i + 1}: {config['option_name']}]\n"
+                if config.get("option_name")
+                else f"[Option {i + 1}]\n"
+            )
+            + "\n".join(
+                f'    {k}="{v}",' if k != "accelerator_count" else f"    {k}={v},"
+                for k, v in config.items()
+                if v is not None and k != "option_name"
+            )
+            for i, config in enumerate(concise_deploy_options)
+        )
+
+    def batch_predict(
+        self,
+        input_dataset: Union[str, List[str]],
+        *,
+        output_uri_prefix: Optional[str] = None,
+        job_display_name: Optional[str] = None,
+        machine_type: Optional[str] = None,
+        accelerator_type: Optional[str] = None,
+        accelerator_count: Optional[int] = None,
+        starting_replica_count: Optional[int] = None,
+        max_replica_count: Optional[int] = None,
+    ) -> batch_prediction.BatchPredictionJob:
+        """Perform batch prediction on the model.
+
+        Args:
+            input_dataset (Union[str, List[str]]): GCS URI(-s) or BigQuery URI to
+              your input data to run batch prediction on. Example:
+              "gs://path/to/input/data.jsonl" or
+              "bq://projectId.bqDatasetId.bqTableId"
+            output_uri_prefix (Optional[str]): GCS or BigQuery URI prefix for the
+              output predictions. Example: "gs://path/to/output/data" or
+              "bq://projectId.bqDatasetId" If not specified,
+              f"{STAGING_BUCKET}/gen-ai-batch-prediction" will be used for GCS
+              source and
+              f"bq://projectId.gen_ai_batch_prediction.predictions_{TIMESTAMP}" will
+              be used for BigQuery source.
+            job_display_name (Optional[str]): The user-defined name of the
+              BatchPredictionJob. The name can be up to 128 characters long and can
+              be consist of any UTF-8 characters.
+            machine_type (Optional[str]): The machine type for the batch prediction
+              job.
+            accelerator_type (Optional[str]): The accelerator type for the batch
+              prediction job.
+            accelerator_count (Optional[int]): The accelerator count for the batch
+              prediction job.
+            starting_replica_count (Optional[int]): The starting replica count for
+              the batch prediction job.
+            max_replica_count (Optional[int]): The maximum replica count for the
+              batch prediction job.
+
+        Returns:
+            batch_prediction.BatchPredictionJob:
+                The batch prediction job.
+        """
+        return batch_prediction.BatchPredictionJob.submit(
+            source_model=self._publisher_model_name,
+            input_dataset=input_dataset,
+            output_uri_prefix=output_uri_prefix,
+            job_display_name=job_display_name,
+            machine_type=machine_type,
+            accelerator_type=accelerator_type,
+            accelerator_count=accelerator_count,
+            starting_replica_count=starting_replica_count,
+            max_replica_count=max_replica_count,
+        )
+
+    def check_license_agreement_status(self) -> bool:
+        """Check whether the project has accepted the license agreement of the model.
+
+        EULA (End User License Agreement) is a legal document that the user must
+        accept before using the model. For Models having license restrictions,
+        the user must accept the EULA before using the model. You can check the
+        details of the License in Model Garden.
+
+        Returns:
+            bool : True if the project has accepted the End User License
+            Agreement, False otherwise.
+        """
+        request = types.CheckPublisherModelEulaAcceptanceRequest(
+            parent=f"projects/{self._project}",
+            publisher_model=self._publisher_model_name,
+        )
+        response = self._model_garden_client.check_publisher_model_eula_acceptance(
+            request
+        )
+        return response.publisher_model_eula_acked
+
+    def accept_model_license_agreement(
+        self,
+    ) -> types.model_garden_service.PublisherModelEulaAcceptance:
+        """Accepts the EULA(End User License Agreement) of the model for the project.
+
+        For Models having license restrictions, the user must accept the EULA
+        before using the model. Calling this method will mark the EULA as accepted
+        for the project.
+
+        Returns:
+            types.model_garden_service.PublisherModelEulaAcceptance:
+                The response of the accept_eula call, containing project number,
+                model name and acceptance status.
+        """
+        request = types.AcceptPublisherModelEulaRequest(
+            parent=f"projects/{self._project}",
+            publisher_model=self._publisher_model_name,
+        )
+        return self._model_garden_client.accept_publisher_model_eula(request)
+
+
+class CustomModel:
+    """Represents a Model Garden Custom model."""
+
+    def __init__(
+        self,
+        gcs_uri: Optional[str] = None,
+    ):
+        r"""Initializes a Model Garden Custom model.
+
+        Usage:
+
+            ```
+            model = agentplatform.CustomModel(
+                gcs_uri = 'gs://tuning-job-output/node-0/checkpoints/final')
+            ```
+
+        Args:
+            gcs_uri: The GCS URI of the custom model, storing weights and config
+              files
+        """
+        if not gcs_uri:
+            raise ValueError("gcs_uri must be specified.")
+
+        project = initializer.global_config.project
+        location = initializer.global_config.location
+        credentials = initializer.global_config.credentials
+
+        self._gcs_uri = gcs_uri
+        self._project = project
+        self._location = location
+        self._credentials = credentials
+
+    @functools.cached_property
+    def _model_garden_client(
+        self,
+    ) -> model_garden_service.ModelGardenServiceClient:
+        """Returns the Model Garden client."""
+        return initializer.global_config.create_client(
+            client_class=_ModelGardenClientWithOverride,
+            credentials=self._credentials,
+            location_override=self._location,
+        )
+
+    @functools.cached_property
+    def _model_service_client(
+        self,
+    ) -> model_service.ModelServiceClient:
+        """Returns the Model Service client."""
+        return initializer.global_config.create_client(
+            client_class=_ModelServiceClientWithOverride,
+            credentials=self._credentials,
+            location_override=self._location,
+        )
+
+    def list_deploy_options(
+        self,
+        available_machines: bool = True,
+        filter_by_user_quota: bool = True,
+        request_timeout: Optional[float] = None,
+    ) -> str:
+        """Lists the deploy options for the model.
+
+        Args:
+            available_machines: If true, only return the deploy options for
+              available machines.
+            filter_by_user_quota: If true, only return the deploy options for
+              machines that the user has quota for.
+            request_timeout: The timeout for the recommend spec request.
+              Default is 60 seconds.
+
+        Returns:
+            str: A string of the deploy options represented by
+                machine spec and container spec.
+
+        """
+
+        def _extract_spec(spec):
+            machine_spec = spec.machine_spec
+            return {
+                "machine_type": getattr(machine_spec, "machine_type", None),
+                "accelerator_type": getattr(
+                    getattr(machine_spec, "accelerator_type", None), "name", None
+                ),
+                "accelerator_count": getattr(machine_spec, "accelerator_count", None),
+            }
+
+        def _extract_recommendation(recommendation):
+            extracted_spec = _extract_spec(recommendation.spec)
+            extracted_spec["region"] = getattr(recommendation, "region", None)
+            if (
+                recommendation.user_quota_state
+                and recommendation.user_quota_state
+                != types.RecommendSpecResponse.Recommendation.QuotaState.QUOTA_STATE_UNSPECIFIED
+            ):
+                extracted_spec["user_quota_state"] = getattr(
+                    getattr(recommendation, "user_quota_state", None), "name", None
+                )
+            return extracted_spec
+
+        request = types.RecommendSpecRequest(
+            gcs_uri=self._gcs_uri,
+            parent=f"projects/{self._project}/locations/{self._location}",
+            check_machine_availability=available_machines,
+            check_user_quota=filter_by_user_quota,
+        )
+        try:
+            response = self._model_service_client.recommend_spec(
+                request, timeout=request_timeout or _DEFAULT_RECOMMEND_SPEC_TIMEOUT
+            )
+            options = []
+            if response.recommendations:
+                options = [
+                    _extract_recommendation(recommendation)
+                    for recommendation in response.recommendations
+                    if recommendation.spec
+                ]
+                if filter_by_user_quota:
+                    options = [
+                        option
+                        for option in options
+                        if option.get("user_quota_state")
+                        == "QUOTA_STATE_USER_HAS_QUOTA"
+                    ]
+            elif response.specs:
+                options = [_extract_spec(spec) for spec in response.specs if spec]
+            return "\n\n".join(
+                f"[Option {i + 1}]\n"
+                + ",\n".join(
+                    f'    {k}="{v}"' if k != "accelerator_count" else f"    {k}={v}"
+                    for k, v in config.items()
+                    if v is not None
+                )
+                for i, config in enumerate(options)
+            )
+
+        except Exception as e:
+            _LOGGER.error(f"Failed to list deploy options: {e}")
+            raise e
+
+    def deploy(
+        self,
+        machine_type: Optional[str] = None,
+        min_replica_count: int = 1,
+        max_replica_count: int = 1,
+        accelerator_type: Optional[str] = None,
+        accelerator_count: Optional[int] = None,
+        reservation_affinity_type: Optional[str] = None,
+        reservation_affinity_key: Optional[str] = None,
+        reservation_affinity_values: Optional[List[str]] = None,
+        system_labels: Optional[Dict[str, str]] = None,
+        endpoint_display_name: Optional[str] = None,
+        model_display_name: Optional[str] = None,
+        enable_private_service_connect: bool = False,
+        psc_project_allow_list: Optional[List[str]] = None,
+        deploy_request_timeout: Optional[float] = None,
+    ) -> aiplatform.Endpoint:
+        """Deploys a Custom Model to an endpoint.
+
+        Args:
+            machine_type (str): Optional. The type of machine. Not specifying
+              machine type will result in model to be deployed with automatic
+              resources.
+            min_replica_count (int): Optional. The minimum number of machine
+              replicas this deployed model will be always deployed on. If traffic
+              against it increases, it may dynamically be deployed onto more
+              replicas, and as traffic decreases, some of these extra replicas may
+              be freed.
+            max_replica_count (int): Optional. The maximum number of replicas this
+              deployed model may be deployed on when the traffic against it
+              increases. If requested value is too large, the deployment will error,
+              but if deployment succeeds then the ability to scale the model to that
+              many replicas is guaranteed (barring service outages). If traffic
+              against the deployed model increases beyond what its replicas at
+              maximum may handle, a portion of the traffic will be dropped. If this
+              value is not provided, the larger value of min_replica_count or 1 will
+              be used. If value provided is smaller than min_replica_count, it will
+              automatically be increased to be min_replica_count.
+            accelerator_type (str): Optional. Hardware accelerator type. Must also
+              set accelerator_count if used.
+            accelerator_count (int): Optional. The number of accelerators to attach
+              to a worker replica.
+            reservation_affinity_type (str): Optional. The type of reservation
+              affinity. One of NO_RESERVATION, ANY_RESERVATION,
+              SPECIFIC_RESERVATION, SPECIFIC_THEN_ANY_RESERVATION,
+              SPECIFIC_THEN_NO_RESERVATION
+            reservation_affinity_key (str): Optional. Corresponds to the label key
+              of a reservation resource. To target a SPECIFIC_RESERVATION by name,
+              use `compute.googleapis.com/reservation-name` as the key and specify
+              the name of your reservation as its value.
+            reservation_affinity_values (List[str]): Optional. Corresponds to the
+              label values of a reservation resource. This must be the full resource
+              name of the reservation.
+                Format:
+                  'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
+            system_labels (Dict[str, str]): Optional. System labels for Model Garden deployments.
+            endpoint_display_name: The display name of the created endpoint.
+            model_display_name: The display name of the custom model.
+            enable_private_service_connect (bool): Whether to enable private service
+              connect.
+            psc_project_allow_list (List[str]): The list of projects that are allowed to
+              access the endpoint over private service connect.
+            deploy_request_timeout: The timeout for the deploy request. Default is 2
+              hours.
+
+        Returns:
+            endpoint (aiplatform.Endpoint):
+                Created endpoint.
+        """
+        return self._deploy_gcs_uri(
+            machine_type=machine_type,
+            min_replica_count=min_replica_count,
+            max_replica_count=max_replica_count,
+            accelerator_type=accelerator_type,
+            accelerator_count=accelerator_count,
+            reservation_affinity_type=reservation_affinity_type,
+            reservation_affinity_key=reservation_affinity_key,
+            reservation_affinity_values=reservation_affinity_values,
+            system_labels=system_labels,
+            endpoint_display_name=endpoint_display_name,
+            model_display_name=model_display_name,
+            enable_private_service_connect=enable_private_service_connect,
+            psc_project_allow_list=psc_project_allow_list,
+            deploy_request_timeout=deploy_request_timeout,
+        )
+
+    def _deploy_model_registry_model(self) -> aiplatform.Endpoint:
+        """Deploys a Model Registry model to an endpoint."""
+        raise NotImplementedError(
+            "Not implemented yet. Please provide gcs_uri in CustomModel constructor."
+        )
+
+    def _deploy_gcs_uri(
+        self,
+        machine_type: Optional[str] = None,
+        min_replica_count: int = 1,
+        max_replica_count: int = 1,
+        accelerator_type: Optional[str] = None,
+        accelerator_count: Optional[int] = None,
+        enable_private_service_connect: bool = False,
+        psc_project_allow_list: Optional[List[str]] = None,
+        reservation_affinity_type: Optional[str] = None,
+        reservation_affinity_key: Optional[str] = None,
+        reservation_affinity_values: Optional[List[str]] = None,
+        system_labels: Optional[Dict[str, str]] = None,
+        endpoint_display_name: Optional[str] = None,
+        model_display_name: Optional[str] = None,
+        deploy_request_timeout: Optional[float] = None,
+    ) -> aiplatform.Endpoint:
+        """Deploys a Custom Model to an endpoint.
+
+        Args:
+            machine_type (str): Optional. The type of machine. Not specifying
+              machine type will result in model to be deployed with automatic
+              resources.
+            min_replica_count (int): Optional. The minimum number of machine
+              replicas this deployed model will be always deployed on. If traffic
+              against it increases, it may dynamically be deployed onto more
+              replicas, and as traffic decreases, some of these extra replicas may
+              be freed.
+            max_replica_count (int): Optional. The maximum number of replicas this
+              deployed model may be deployed on when the traffic against it
+              increases. If requested value is too large, the deployment will error,
+              but if deployment succeeds then the ability to scale the model to that
+              many replicas is guaranteed (barring service outages). If traffic
+              against the deployed model increases beyond what its replicas at
+              maximum may handle, a portion of the traffic will be dropped. If this
+              value is not provided, the larger value of min_replica_count or 1 will
+              be used. If value provided is smaller than min_replica_count, it will
+              automatically be increased to be min_replica_count.
+            accelerator_type (str): Optional. Hardware accelerator type. Must also
+              set accelerator_count if used. One of ACCELERATOR_TYPE_UNSPECIFIED,
+              NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100,
+              NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
+            accelerator_count (int): Optional. The number of accelerators to attach
+              to a worker replica.
+            reservation_affinity_type (str): Optional. The type of reservation
+              affinity. One of NO_RESERVATION, ANY_RESERVATION,
+              SPECIFIC_RESERVATION, SPECIFIC_THEN_ANY_RESERVATION,
+              SPECIFIC_THEN_NO_RESERVATION
+            reservation_affinity_key (str): Optional. Corresponds to the label key
+              of a reservation resource. To target a SPECIFIC_RESERVATION by name,
+              use `compute.googleapis.com/reservation-name` as the key and specify
+              the name of your reservation as its value.
+            reservation_affinity_values (List[str]): Optional. Corresponds to the
+              label values of a reservation resource. This must be the full resource
+              name of the reservation.
+                Format:
+                  'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
+            system_labels (Dict[str, str]): Optional. System labels for Model Garden deployments.
+            endpoint_display_name: The display name of the created endpoint.
+            model_display_name: The display name of the custom model.
+            enable_private_service_connect (bool): Whether to enable private service
+              connect.
+            psc_project_allow_list (List[str]): The list of projects that are allowed to
+              access the endpoint over private service connect.
+            deploy_request_timeout: The timeout for the deploy request. Default is 2
+              hours.
+
+        Returns:
+            endpoint (aiplatform.Endpoint):
+                Created endpoint.
+        """
+
+        # Validation on machine type, accelerator type and count.
+        # Return true if all three of them have value or are None.
+        def has_all_or_none_values(var1, var2, var3) -> bool:
+            return (var1 and var2 and var3) or (not var1 and not var2 and not var3)
+
+        if not has_all_or_none_values(
+            machine_type, accelerator_type, accelerator_count
+        ):
+            raise ValueError(
+                "machine_type, accelerator_type and accelerator_count must all be provided or not provided."
+            )
+
+        request = types.DeployRequest(
+            destination=f"projects/{self._project}/locations/{self._location}",
+        )
+        request.custom_model = types.DeployRequest.CustomModel(gcs_uri=self._gcs_uri)
+        if endpoint_display_name:
+            request.endpoint_config.endpoint_display_name = endpoint_display_name
+        if model_display_name:
+            request.model_config.model_display_name = model_display_name
+        if system_labels:
+            request.deploy_config.system_labels = system_labels
+
+        if enable_private_service_connect and psc_project_allow_list:
+            request.endpoint_config.private_service_connect_config = (
+                types.PrivateServiceConnectConfig(
+                    enable_private_service_connect=enable_private_service_connect,
+                    project_allowlist=psc_project_allow_list,
+                )
+            )
+
+        if machine_type and accelerator_type and accelerator_count:
+            request.deploy_config.dedicated_resources = types.DedicatedResources(
+                machine_spec=types.MachineSpec(
+                    machine_type=machine_type,
+                    accelerator_type=accelerator_type,
+                    accelerator_count=accelerator_count,
+                )
+            )
+        if min_replica_count:
+            request.deploy_config.dedicated_resources.min_replica_count = (
+                min_replica_count
+            )
+        if max_replica_count:
+            request.deploy_config.dedicated_resources.max_replica_count = (
+                max_replica_count
+            )
+
+        if reservation_affinity_type:
+            request.deploy_config.dedicated_resources.machine_spec.reservation_affinity.reservation_affinity_type = (
+                reservation_affinity_type
+            )
+        if reservation_affinity_key and reservation_affinity_values:
+            request.deploy_config.dedicated_resources.machine_spec.reservation_affinity.key = (
+                reservation_affinity_key
+            )
+            request.deploy_config.dedicated_resources.machine_spec.reservation_affinity.values = (
+                reservation_affinity_values
+            )
+
+        _LOGGER.info(f"Deploying custom model: {self._gcs_uri}")
+
+        try:
+            operation_future = self._model_garden_client.deploy(request)
+            _LOGGER.info(f"LRO: {operation_future.operation.name}")
+            deploy_response = operation_future.result(
+                timeout=deploy_request_timeout or _DEFAULT_TIMEOUT
+            )
+            _LOGGER.info(f"End time: {datetime.datetime.now()}")
+            self._endpoint_name = deploy_response.endpoint
+            _LOGGER.info(f"Endpoint: {self._endpoint_name}")
+            endpoint = aiplatform.Endpoint._construct_sdk_resource_from_gapic(
+                aiplatform_models.gca_endpoint_compat.Endpoint(
+                    name=self._endpoint_name
+                ),
+            )
+            return endpoint
+        except ValueError as e:
+            _LOGGER.error(f"Failed to deploy custom model: {e}")
+            raise e
+
+
+class PartnerModel:
+    """Represents a Model Garden Partner model."""
+
+    def __init__(
+        self,
+        model_name: str,
+    ):
+        r"""Initializes a Model Garden partner model.
+
+        Usage:
+
+            ```
+            model = PartnerModel("publishers/ai21/models/jamba-large-1.6@001")
+            ```
+
+        Args:
+            model_name: Model Garden model resource name in the format of
+              `publishers/{publisher}/models/{model}@{version}`, or a simplified
+              resource name in the format of `{publisher}/{model}@{version}`.
+        """
+        project = initializer.global_config.project
+        location = initializer.global_config.location
+        credentials = initializer.global_config.credentials
+
+        self._model_name = model_name
+        self._publisher_model_name = _reconcile_model_name(model_name)
+        self._project = project
+        self._location = location
+        self._credentials = credentials
+
+    @functools.cached_property
+    def _model_garden_client(
+        self,
+    ) -> model_garden_service.ModelGardenServiceClient:
+        """Returns the Model Garden client."""
+        return initializer.global_config.create_client(
+            client_class=_ModelGardenClientWithOverride,
+            credentials=self._credentials,
+            location_override=self._location,
+        )
+
+    def deploy(
+        self,
+        machine_type: Optional[str] = None,
+        min_replica_count: int = 1,
+        max_replica_count: int = 1,
+        accelerator_type: Optional[str] = None,
+        accelerator_count: Optional[int] = None,
+        endpoint_display_name: Optional[str] = None,
+        model_display_name: Optional[str] = None,
+        deploy_request_timeout: Optional[float] = None,
+    ) -> aiplatform.Endpoint:
+        """Deploys an Open Model to an endpoint.
+
+        Args:
+            machine_type (str): Optional. The type of machine. Not specifying
+              machine type will result in model to be deployed with automatic
+              resources.
+            min_replica_count (int): Optional. The minimum number of machine
+              replicas this deployed model will be always deployed on. If traffic
+              against it increases, it may dynamically be deployed onto more
+              replicas, and as traffic decreases, some of these extra replicas may
+              be freed.
+            max_replica_count (int): Optional. The maximum number of replicas this
+              deployed model may be deployed on when the traffic against it
+              increases. If requested value is too large, the deployment will error,
+              but if deployment succeeds then the ability to scale the model to that
+              many replicas is guaranteed (barring service outages). If traffic
+              against the deployed model increases beyond what its replicas at
+              maximum may handle, a portion of the traffic will be dropped. If this
+              value is not provided, the larger value of min_replica_count or 1 will
+              be used. If value provided is smaller than min_replica_count, it will
+              automatically be increased to be min_replica_count.
+            accelerator_type (str): Optional. Hardware accelerator type. Must also
+              set accelerator_count if used. One of ACCELERATOR_TYPE_UNSPECIFIED,
+              NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100,
+              NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
+            accelerator_count (int): Optional. The number of accelerators to attach
+              to a worker replica.
+            endpoint_display_name: The display name of the created endpoint.
+            model_display_name: The display name of the uploaded model.
+            deploy_request_timeout: The timeout for the deploy request. Default is 2
+              hours.
+
+        Returns:
+            endpoint (aiplatform.Endpoint):
+                Created endpoint.
+        """
+        request = types.DeployRequest(
+            destination=f"projects/{self._project}/locations/{self._location}",
+        )
+        request.publisher_model_name = self._publisher_model_name
+
+        if endpoint_display_name:
+            request.endpoint_config.endpoint_display_name = endpoint_display_name
+        if model_display_name:
+            request.model_config.model_display_name = model_display_name
+
+        provided_custom_machine_spec = (
+            machine_type or accelerator_type or accelerator_count
+        )
+        if provided_custom_machine_spec:
+            dedicated_resources = types.DedicatedResources(
+                machine_spec=types.MachineSpec(
+                    machine_type=machine_type,
+                    accelerator_type=accelerator_type,
+                    accelerator_count=accelerator_count,
+                ),
+                min_replica_count=min_replica_count,
+                max_replica_count=max_replica_count,
+            )
+            request.deploy_config.dedicated_resources = dedicated_resources
+
+        _LOGGER.info(f"Deploying model: {self._model_name}")
+
+        operation_future = self._model_garden_client.deploy(request)
+        _LOGGER.info(f"LRO: {operation_future.operation.name}")
+
+        _LOGGER.info(f"Start time: {datetime.datetime.now()}")
+        deploy_response = operation_future.result(
+            timeout=deploy_request_timeout or _DEFAULT_TIMEOUT
+        )
+        _LOGGER.info(f"End time: {datetime.datetime.now()}")
+
+        self._endpoint_name = deploy_response.endpoint
+        _LOGGER.info(f"Endpoint: {self._endpoint_name}")
+        endpoint = aiplatform.Endpoint._construct_sdk_resource_from_gapic(
+            aiplatform_models.gca_endpoint_compat.Endpoint(name=self._endpoint_name),
+        )
+        return endpoint
+
+
+class Model:
+    """Represents a Model Garden model."""
+
+    def __init__(
+        self,
+        model_name: Optional[str] = None,
+    ):
+        r"""Initializes a Model Garden model.
+
+        Usage:
+
+            ```
+            model = Model("publishers/google/models/gemma3@gemma-3-27b-it")
+            model = Model("google/gemma3@gemma-3-27b-it")
+            model = Model("deepseek-ai/DeepSeek-V3-0324")
+            model = Model("gs://fine-tuning-output/node-0/checkpoints/final")
+            model = Model("projects/123/locations/us-central1/models/456")
+            ```
+
+        Args:
+            model_name: Name of the model artifact.
+
+            It can be:
+            1. A pretrained model
+              1.1 Model Garden model resource name in the format of
+              `publishers/{publisher}/models/{model}@{version}`, or
+              1.2 a simplified resource name in the format of `{publisher}/{model}@{version}`, or
+              1.3 a Hugging Face model ID in the format of `{organization}/{model}`.
+            2. A custom model weights like gs://fine-tuning-output/node-0/checkpoints/final
+            3. A Model Registry model like projects/123/locations/us-central1/models/456
+        """
+        if not model_name:
+            raise ValueError("model_name must be specified.")
+
+        if re.match(r"^gs://", model_name):
+            self._model = CustomModel(gcs_uri=model_name)
+        elif re.match(r"^projects/.*/locations/.*/models/.*", model_name):
+            raise NotImplementedError("Model Registry models are not supported yet.")
+        else:
+            self._model = OpenModel(model_name)
+
+    def deploy(
+        self,
+        **kwargs,
+    ) -> aiplatform.Endpoint:
+        """Deploys the model to an endpoint."""
+        return self._model.deploy(**kwargs)
diff --git a/agentplatform/preview/__init__.py b/agentplatform/preview/__init__.py
new file mode 100644
index 0000000000..bb17a41dfd
--- /dev/null
+++ b/agentplatform/preview/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""The agentplatform.preview module."""
diff --git a/agentplatform/preview/model_garden.py b/agentplatform/preview/model_garden.py
new file mode 100644
index 0000000000..870104ec73
--- /dev/null
+++ b/agentplatform/preview/model_garden.py
@@ -0,0 +1,31 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Classes and functions for working with Model Garden."""
+
+# pylint: disable=g-multiple-import,g-importing-member
+from agentplatform.model_garden._model_garden import (
+    Model,
+    CustomModel,
+    OpenModel,
+    list_deployable_models,
+)
+
+
+__all__ = (
+    "Model",
+    "CustomModel",
+    "OpenModel",
+    "list_deployable_models",
+)
diff --git a/tests/unit/vertexai/model_garden/test_model_garden.py b/tests/unit/agentplatform/model_garden/test_model_garden.py
similarity index 99%
rename from tests/unit/vertexai/model_garden/test_model_garden.py
rename to tests/unit/agentplatform/model_garden/test_model_garden.py
index 0fa7cb565e..23c18d095f 100644
--- a/tests/unit/vertexai/model_garden/test_model_garden.py
+++ b/tests/unit/agentplatform/model_garden/test_model_garden.py
@@ -1,4 +1,4 @@
-# Copyright 2025 Google LLC
+# Copyright 2026 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -33,9 +33,9 @@
 from google.cloud.aiplatform_v1.types import manual_batch_tuning_parameters
 from google.cloud.aiplatform_v1beta1 import types
 from google.cloud.aiplatform_v1beta1.services import model_garden_service
-from vertexai import batch_prediction
-from vertexai import model_garden
-from vertexai.preview import (
+from agentplatform import batch_prediction
+from agentplatform import model_garden
+from agentplatform.preview import (
     model_garden as model_garden_preview,
 )
 import pytest
diff --git a/tests/unit/vertexai/model_garden/test_vertexai_model_garden.py b/tests/unit/vertexai/model_garden/test_vertexai_model_garden.py
new file mode 100644
index 0000000000..7150043f08
--- /dev/null
+++ b/tests/unit/vertexai/model_garden/test_vertexai_model_garden.py
@@ -0,0 +1,1999 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Unit tests for ModelGarden class."""
+
+import importlib
+import textwrap
+from unittest import mock
+
+from google import auth
+from google.api_core import operation as ga_operation
+from google.auth import credentials as auth_credentials
+from google.cloud import aiplatform
+from google.cloud.aiplatform.compat.services import job_service_client
+from google.cloud.aiplatform.compat.types import (
+    batch_prediction_job as gca_batch_prediction_job_compat,
+)
+from google.cloud.aiplatform.compat.types import io as gca_io_compat
+from google.cloud.aiplatform.compat.types import (
+    job_state as gca_job_state_compat,
+)
+from google.cloud.aiplatform_v1.types import machine_resources
+from google.cloud.aiplatform_v1.types import manual_batch_tuning_parameters
+from google.cloud.aiplatform_v1beta1 import types
+from google.cloud.aiplatform_v1beta1.services import model_garden_service
+from vertexai import batch_prediction
+from vertexai import model_garden
+from vertexai.preview import (
+    model_garden as model_garden_preview,
+)
+import pytest
+
+from google.protobuf import duration_pb2
+
+
+_TEST_PROJECT = "test-project"
+_TEST_LOCATION = "us-central1"
+_TEST_PROJECT_NUMBER = "1234567890"
+
+_TEST_MODEL_FULL_RESOURCE_NAME = (
+    "publishers/google/models/paligemma@paligemma-224-float32"
+)
+_TEST_HUGGING_FACE_MODEL_FULL_RESOURCE_NAME = (
+    "publishers/meta-llama/models/llama-3.3-70b-instruct"
+)
+_TEST_PUBLISHER_MODEL_NAME = "publishers/google/models/paligemma"
+_TEST_HUGGING_FACE_PUBLISHER_MODEL_NAME = "publishers/hf-google/models/gemma-2-2b"
+_TEST_MODEL_SIMPLIFIED_RESOURCE_NAME = "google/paligemma@paligemma-224-float32"
+_TEST_MODEL_HUGGING_FACE_ID = "meta-llama/Llama-3.3-70B-Instruct"
+_TEST_MODEL_HUGGING_FACE_RESOURCE_NAME = (
+    "publishers/hf-meta-llama/models/llama-3.3-70b-instruct"
+)
+# Note: The full resource name is in lower case.
+_TEST_MODEL_HUGGING_FACE_FULL_RESOURCE_NAME = (
+    "publishers/hf-meta-llama/models/llama-3.3-70b-instruct@001"
+)
+_TEST_HUGGING_FACE_ACCESS_TOKEN = "test-access-token"
+
+_TEST_PARTNER_MODEL_FULL_RESOURCE_NAME = "publishers/ai21/models/jamba-large-1.6@001"
+_TEST_PARTNER_MODEL_SIMPLIFIED_RESOURCE_NAME = "ai21/jamba-large-1.6@001"
+
+_TEST_GCS_URI = "gs://some-bucket/some-model"
+_TEST_ENDPOINT_NAME = "projects/test-project/locations/us-central1/endpoints/1234567890"
+_TEST_MODEL_NAME = "projects/test-project/locations/us-central1/models/9876543210"
+_TEST_IMAGE_URI = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00"
+_TEST_MODEL_CONTAINER_SPEC = types.ModelContainerSpec(
+    image_uri=_TEST_IMAGE_URI,
+    command=["python", "main.py"],
+    args=["--model-id=gemma-2b"],
+    env=[types.EnvVar(name="MODEL_ID", value="gemma-2b")],
+    ports=[types.Port(container_port=7080)],
+    grpc_ports=[types.Port(container_port=7081)],
+    predict_route="/predictions/v1/predict",
+    health_route="/ping",
+    deployment_timeout=duration_pb2.Duration(seconds=1800),
+    shared_memory_size_mb=256,
+    startup_probe=types.Probe(
+        exec_=types.Probe.ExecAction(command=["python", "main.py"]),
+        period_seconds=10,
+        timeout_seconds=10,
+    ),
+    health_probe=types.Probe(
+        exec_=types.Probe.ExecAction(command=["python", "health_check.py"]),
+        period_seconds=10,
+        timeout_seconds=10,
+    ),
+)
+_TEST_BATCH_PREDICTION_JOB_ID = "123456789"
+_TEST_PARENT = f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}"
+_TEST_BATCH_PREDICTION_JOB_NAME = (
+    f"{_TEST_PARENT}/batchPredictionJobs/{_TEST_BATCH_PREDICTION_JOB_ID}"
+)
+_TEST_BATCH_PREDICTION_MODEL_FULL_RESOURCE_NAME = (
+    "publishers/google/models/gemma@gemma-2b-it"
+)
+_TEST_BATCH_PREDICTION_JOB_DISPLAY_NAME = "test-batch-prediction-job"
+_TEST_JOB_STATE_RUNNING = gca_job_state_compat.JobState(3)
+_TEST_GAPIC_BATCH_PREDICTION_JOB = gca_batch_prediction_job_compat.BatchPredictionJob(
+    name=_TEST_BATCH_PREDICTION_JOB_NAME,
+    display_name=_TEST_BATCH_PREDICTION_JOB_DISPLAY_NAME,
+    model=_TEST_BATCH_PREDICTION_MODEL_FULL_RESOURCE_NAME,
+    state=_TEST_JOB_STATE_RUNNING,
+)
+_TEST_BQ_INPUT_URI = "bq://test-project.test-dataset.test-input"
+_TEST_BQ_OUTPUT_PREFIX = "bq://test-project.test-dataset.test-output"
+
+
+@pytest.fixture(scope="module")
+def google_auth_mock():
+    with mock.patch.object(auth, "default") as google_auth_mock:
+        google_auth_mock.return_value = (
+            auth_credentials.AnonymousCredentials(),
+            _TEST_PROJECT,
+        )
+        yield google_auth_mock
+
+
+@pytest.fixture
+def export_publisher_model_mock():
+    """Mocks the export_publisher_model method."""
+    with mock.patch.object(
+        model_garden_service.ModelGardenServiceClient,
+        "export_publisher_model",
+    ) as export_publisher_model:
+        mock_export_lro = mock.Mock(ga_operation.Operation)
+        mock_export_lro.result.return_value = types.ExportPublisherModelResponse(
+            publisher_model=_TEST_MODEL_FULL_RESOURCE_NAME,
+            destination_uri=_TEST_GCS_URI,
+        )
+        export_publisher_model.return_value = mock_export_lro
+        yield export_publisher_model
+
+
+@pytest.fixture
+def deploy_mock():
+    """Mocks the deploy method."""
+    with mock.patch.object(
+        model_garden_service.ModelGardenServiceClient,
+        "deploy",
+    ) as deploy:
+        mock_lro = mock.Mock(ga_operation.Operation)
+        mock_lro.result.return_value = types.DeployResponse(
+            endpoint=_TEST_ENDPOINT_NAME,
+            model=_TEST_MODEL_FULL_RESOURCE_NAME,
+        )
+        deploy.return_value = mock_lro
+        yield deploy
+
+
+@pytest.fixture
+def batch_prediction_mock():
+    """Mocks the create_batch_prediction_job method."""
+    with mock.patch.object(
+        job_service_client.JobServiceClient, "create_batch_prediction_job"
+    ) as create_batch_prediction_job_mock:
+        create_batch_prediction_job_mock.return_value = _TEST_GAPIC_BATCH_PREDICTION_JOB
+        yield create_batch_prediction_job_mock
+
+
+@pytest.fixture
+def complete_bq_uri_mock():
+    with mock.patch.object(
+        batch_prediction.BatchPredictionJob, "_complete_bq_uri"
+    ) as complete_bq_uri_mock:
+        complete_bq_uri_mock.return_value = _TEST_BQ_OUTPUT_PREFIX
+        yield complete_bq_uri_mock
+
+
+@pytest.fixture
+def get_publisher_model_mock():
+    with mock.patch.object(
+        model_garden_service.ModelGardenServiceClient, "get_publisher_model"
+    ) as get_publisher_model_mock:
+        error_response = types.PublisherModel(name=_TEST_PUBLISHER_MODEL_NAME)
+        success_response = types.PublisherModel(
+            name=_TEST_PUBLISHER_MODEL_NAME,
+            supported_actions=types.PublisherModel.CallToAction(
+                multi_deploy_vertex=types.PublisherModel.CallToAction.DeployVertex(
+                    multi_deploy_vertex=[
+                        types.PublisherModel.CallToAction.Deploy(
+                            deploy_task_name="vLLM 32K context",
+                            container_spec=types.ModelContainerSpec(
+                                image_uri=_TEST_IMAGE_URI,
+                                command=["python", "main.py"],
+                                args=["--model-id=gemma-2b"],
+                                env=[types.EnvVar(name="MODEL_ID", value="gemma-2b")],
+                            ),
+                            dedicated_resources=types.DedicatedResources(
+                                machine_spec=types.MachineSpec(
+                                    machine_type="g2-standard-16",
+                                    accelerator_type="NVIDIA_L4",
+                                    accelerator_count=1,
+                                )
+                            ),
+                        ),
+                        types.PublisherModel.CallToAction.Deploy(
+                            deploy_task_name="vLLM 128K context",
+                            container_spec=types.ModelContainerSpec(
+                                image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/text-generation-inference-cu121.2-1.py310:latest",
+                                command=["python", "main.py"],
+                                args=["--model-id=gemma-2b"],
+                                env=[types.EnvVar(name="MODEL_ID", value="gemma-2b")],
+                            ),
+                            dedicated_resources=types.DedicatedResources(
+                                machine_spec=types.MachineSpec(
+                                    machine_type="g2-standard-32",
+                                    accelerator_type="NVIDIA_L4",
+                                    accelerator_count=4,
+                                )
+                            ),
+                        ),
+                    ]
+                )
+            ),
+        )
+        hf_success_response = types.PublisherModel(
+            name=_TEST_MODEL_HUGGING_FACE_RESOURCE_NAME,
+            supported_actions=types.PublisherModel.CallToAction(
+                multi_deploy_vertex=types.PublisherModel.CallToAction.DeployVertex(
+                    multi_deploy_vertex=[
+                        types.PublisherModel.CallToAction.Deploy(
+                            container_spec=types.ModelContainerSpec(
+                                image_uri=_TEST_IMAGE_URI,
+                                command=["python", "main.py"],
+                                args=["--model-id=gemma-2b"],
+                                env=[types.EnvVar(name="MODEL_ID", value="gemma-2b")],
+                            ),
+                            dedicated_resources=types.DedicatedResources(
+                                machine_spec=types.MachineSpec(
+                                    machine_type="g2-standard-16",
+                                    accelerator_type="NVIDIA_L4",
+                                    accelerator_count=1,
+                                )
+                            ),
+                        ),
+                        types.PublisherModel.CallToAction.Deploy(
+                            container_spec=types.ModelContainerSpec(
+                                image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/text-generation-inference-cu121.2-1.py310:latest",
+                                command=["python", "main.py"],
+                                args=["--model-id=gemma-2b"],
+                                env=[types.EnvVar(name="MODEL_ID", value="gemma-2b")],
+                            ),
+                            dedicated_resources=types.DedicatedResources(
+                                machine_spec=types.MachineSpec(
+                                    machine_type="g2-standard-32",
+                                    accelerator_type="NVIDIA_L4",
+                                    accelerator_count=4,
+                                )
+                            ),
+                        ),
+                    ]
+                )
+            ),
+        )
+
+        call_counts = {}
+
+        def side_effect_func(request, *args, **kwargs):
+            model_name = request.name
+            if model_name not in call_counts:
+                call_counts[model_name] = 0
+
+            call_counts[model_name] += 1
+
+            if model_name == _TEST_HUGGING_FACE_MODEL_FULL_RESOURCE_NAME:
+                return hf_success_response
+
+            if call_counts[model_name] == 1:
+                return error_response
+            else:
+                return success_response
+
+        get_publisher_model_mock.side_effect = side_effect_func
+        yield get_publisher_model_mock
+
+
+@pytest.fixture
+def list_publisher_models_mock():
+    """Mocks the list_publisher_models method."""
+    with mock.patch.object(
+        model_garden_service.ModelGardenServiceClient,
+        "list_publisher_models",
+    ) as list_publisher_models:
+        pager_mg = mock.Mock()
+        pager_mg.pages = [
+            types.ListPublisherModelsResponse(
+                publisher_models=[
+                    types.PublisherModel(
+                        name=_TEST_PUBLISHER_MODEL_NAME,
+                        version_id="001",
+                        supported_actions=types.PublisherModel.CallToAction(
+                            multi_deploy_vertex=types.PublisherModel.CallToAction.DeployVertex(
+                                multi_deploy_vertex=[
+                                    types.PublisherModel.CallToAction.Deploy(
+                                        dedicated_resources=types.DedicatedResources(
+                                            machine_spec=types.MachineSpec(
+                                                machine_type="g2-standard-16",
+                                                accelerator_type="NVIDIA_L4",
+                                                accelerator_count=1,
+                                            )
+                                        )
+                                    )
+                                ]
+                            )
+                        ),
+                    ),
+                    types.PublisherModel(
+                        name=_TEST_PUBLISHER_MODEL_NAME,
+                        version_id="002",
+                        supported_actions=types.PublisherModel.CallToAction(
+                            multi_deploy_vertex=types.PublisherModel.CallToAction.DeployVertex(
+                                multi_deploy_vertex=[
+                                    types.PublisherModel.CallToAction.Deploy(
+                                        dedicated_resources=types.DedicatedResources(
+                                            machine_spec=types.MachineSpec(
+                                                machine_type="g2-standard-16",
+                                                accelerator_type="NVIDIA_L4",
+                                                accelerator_count=1,
+                                            )
+                                        )
+                                    )
+                                ]
+                            )
+                        ),
+                    ),
+                ],
+            ),
+            types.ListPublisherModelsResponse(
+                publisher_models=[
+                    types.PublisherModel(
+                        name=_TEST_PUBLISHER_MODEL_NAME,
+                        version_id="003",
+                        supported_actions=types.PublisherModel.CallToAction(
+                            multi_deploy_vertex=types.PublisherModel.CallToAction.DeployVertex(
+                                multi_deploy_vertex=[
+                                    types.PublisherModel.CallToAction.Deploy(
+                                        dedicated_resources=types.DedicatedResources(
+                                            machine_spec=types.MachineSpec(
+                                                machine_type="g2-standard-16",
+                                                accelerator_type="NVIDIA_L4",
+                                                accelerator_count=1,
+                                            )
+                                        )
+                                    )
+                                ]
+                            )
+                        ),
+                    ),
+                    types.PublisherModel(
+                        name=_TEST_PUBLISHER_MODEL_NAME,
+                        version_id="004",
+                        supported_actions=types.PublisherModel.CallToAction(
+                            multi_deploy_vertex=types.PublisherModel.CallToAction.DeployVertex(
+                                multi_deploy_vertex=[
+                                    types.PublisherModel.CallToAction.Deploy(
+                                        dedicated_resources=types.DedicatedResources(
+                                            machine_spec=types.MachineSpec(
+                                                machine_type="g2-standard-16",
+                                                accelerator_type="NVIDIA_L4",
+                                                accelerator_count=1,
+                                            )
+                                        )
+                                    )
+                                ]
+                            )
+                        ),
+                    ),
+                ],
+            ),
+        ]
+        pager_hf = mock.Mock()
+        pager_hf.pages = [
+            types.ListPublisherModelsResponse(
+                publisher_models=[
+                    types.PublisherModel(
+                        name=_TEST_HUGGING_FACE_PUBLISHER_MODEL_NAME,
+                        version_id="001",
+                        supported_actions=types.PublisherModel.CallToAction(
+                            multi_deploy_vertex=types.PublisherModel.CallToAction.DeployVertex(
+                                multi_deploy_vertex=[
+                                    types.PublisherModel.CallToAction.Deploy(
+                                        dedicated_resources=types.DedicatedResources(
+                                            machine_spec=types.MachineSpec(
+                                                machine_type="g2-standard-16",
+                                                accelerator_type="NVIDIA_L4",
+                                                accelerator_count=1,
+                                            )
+                                        )
+                                    )
+                                ]
+                            )
+                        ),
+                    ),
+                    types.PublisherModel(
+                        name=_TEST_HUGGING_FACE_PUBLISHER_MODEL_NAME,
+                        version_id="002",
+                        supported_actions=types.PublisherModel.CallToAction(
+                            multi_deploy_vertex=types.PublisherModel.CallToAction.DeployVertex(
+                                multi_deploy_vertex=[
+                                    types.PublisherModel.CallToAction.Deploy(
+                                        dedicated_resources=types.DedicatedResources(
+                                            machine_spec=types.MachineSpec(
+                                                machine_type="g2-standard-16",
+                                                accelerator_type="NVIDIA_L4",
+                                                accelerator_count=1,
+                                            )
+                                        )
+                                    )
+                                ]
+                            )
+                        ),
+                    ),
+                ],
+            ),
+            types.ListPublisherModelsResponse(
+                publisher_models=[
+                    types.PublisherModel(
+                        name=_TEST_HUGGING_FACE_PUBLISHER_MODEL_NAME,
+                        version_id="003",
+                        supported_actions=types.PublisherModel.CallToAction(
+                            multi_deploy_vertex=types.PublisherModel.CallToAction.DeployVertex(
+                                multi_deploy_vertex=[
+                                    types.PublisherModel.CallToAction.Deploy(
+                                        dedicated_resources=types.DedicatedResources(
+                                            machine_spec=types.MachineSpec(
+                                                machine_type="g2-standard-16",
+                                                accelerator_type="NVIDIA_L4",
+                                                accelerator_count=1,
+                                            )
+                                        )
+                                    )
+                                ]
+                            )
+                        ),
+                    ),
+                    types.PublisherModel(
+                        name=_TEST_HUGGING_FACE_PUBLISHER_MODEL_NAME,
+                        version_id="004",
+                        supported_actions=types.PublisherModel.CallToAction(
+                            multi_deploy_vertex=types.PublisherModel.CallToAction.DeployVertex(
+                                multi_deploy_vertex=[
+                                    types.PublisherModel.CallToAction.Deploy(
+                                        dedicated_resources=types.DedicatedResources(
+                                            machine_spec=types.MachineSpec(
+                                                machine_type="g2-standard-16",
+                                                accelerator_type="NVIDIA_L4",
+                                                accelerator_count=1,
+                                            )
+                                        )
+                                    )
+                                ]
+                            )
+                        ),
+                    ),
+                ],
+            ),
+        ]
+        list_publisher_models.side_effect = [pager_mg, pager_hf]
+        yield list_publisher_models
+
+
+@pytest.fixture
+def check_license_agreement_status_mock():
+    """Mocks the check_license_agreement_status method."""
+    with mock.patch.object(
+        model_garden_service.ModelGardenServiceClient,
+        "check_publisher_model_eula_acceptance",
+    ) as check_license_agreement_status:
+        check_license_agreement_status.return_value = (
+            types.PublisherModelEulaAcceptance(
+                project_number=_TEST_PROJECT_NUMBER,
+                publisher_model=_TEST_MODEL_FULL_RESOURCE_NAME,
+                publisher_model_eula_acked=True,
+            )
+        )
+        yield check_license_agreement_status
+
+
+@pytest.fixture
+def accept_model_license_agreement_mock():
+    """Mocks the accept_model_license_agreement method."""
+    with mock.patch.object(
+        model_garden_service.ModelGardenServiceClient,
+        "accept_publisher_model_eula",
+    ) as accept_model_license_agreement:
+        accept_model_license_agreement.return_value = (
+            types.PublisherModelEulaAcceptance(
+                project_number=_TEST_PROJECT_NUMBER,
+                publisher_model=_TEST_MODEL_FULL_RESOURCE_NAME,
+                publisher_model_eula_acked=True,
+            )
+        )
+        yield accept_model_license_agreement
+
+
+@pytest.mark.usefixtures(
+    "google_auth_mock",
+    "deploy_mock",
+)
+class TestVertexAIModelGardenPartnerModel:
+    """Test cases for Model Garden PartnerModel class."""
+
+    def setup_method(self):
+        importlib.reload(aiplatform.initializer)
+        importlib.reload(aiplatform)
+        aiplatform.init(project=_TEST_PROJECT)
+
+    def teardown_method(self):
+        aiplatform.initializer.global_pool.shutdown(wait=True)
+
+    def test_deploy_full_resource_name_success(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.PartnerModel(
+            model_name=_TEST_PARTNER_MODEL_FULL_RESOURCE_NAME
+        )
+        model.deploy()
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_PARTNER_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+            )
+        )
+
+    def test_deploy_simplified_resource_name_success(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.PartnerModel(
+            model_name=_TEST_PARTNER_MODEL_SIMPLIFIED_RESOURCE_NAME
+        )
+        model.deploy()
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_PARTNER_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+            )
+        )
+
+    def test_deploy_specify_machine_spec_success(self, deploy_mock):
+        """Tests deploying a model with specified machine spec."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.PartnerModel(
+            model_name=_TEST_PARTNER_MODEL_FULL_RESOURCE_NAME
+        )
+        model.deploy(
+            machine_type="n1-standard-4",
+            accelerator_type="NVIDIA_TESLA_T4",
+            accelerator_count=1,
+            min_replica_count=1,
+            max_replica_count=1,
+        )
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_PARTNER_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                deploy_config=types.DeployRequest.DeployConfig(
+                    dedicated_resources=types.DedicatedResources(
+                        machine_spec=types.MachineSpec(
+                            machine_type="n1-standard-4",
+                            accelerator_type="NVIDIA_TESLA_T4",
+                            accelerator_count=1,
+                        ),
+                        min_replica_count=1,
+                        max_replica_count=1,
+                    )
+                ),
+            )
+        )
+
+    def test_deploy_specify_partial_machine_spec_success(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.PartnerModel(
+            model_name=_TEST_PARTNER_MODEL_FULL_RESOURCE_NAME
+        )
+        model.deploy(
+            accelerator_type="NVIDIA_TESLA_T4",
+        )
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_PARTNER_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                deploy_config=types.DeployRequest.DeployConfig(
+                    dedicated_resources=types.DedicatedResources(
+                        machine_spec=types.MachineSpec(
+                            accelerator_type="NVIDIA_TESLA_T4",
+                        ),
+                        min_replica_count=1,
+                        max_replica_count=1,
+                    )
+                ),
+            )
+        )
+
+    def test_deploy_with_timeout_success(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.PartnerModel(
+            model_name=_TEST_PARTNER_MODEL_FULL_RESOURCE_NAME
+        )
+        model.deploy(deploy_request_timeout=10)
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_PARTNER_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+            ),
+        )
+
+    def test_deploy_with_display_names_success(self, deploy_mock):
+        """Tests deploying a model with display names."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.PartnerModel(
+            model_name=_TEST_PARTNER_MODEL_FULL_RESOURCE_NAME
+        )
+        model.deploy(
+            endpoint_display_name="test-endpoint",
+            model_display_name="test-model",
+        )
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_PARTNER_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                model_config=types.DeployRequest.ModelConfig(
+                    model_display_name="test-model",
+                ),
+                endpoint_config=types.DeployRequest.EndpointConfig(
+                    endpoint_display_name="test-endpoint",
+                ),
+            )
+        )
+
+
+@pytest.mark.usefixtures(
+    "google_auth_mock",
+    "deploy_mock",
+    "get_publisher_model_mock",
+    "list_publisher_models_mock",
+    "export_publisher_model_mock",
+    "batch_prediction_mock",
+    "complete_bq_uri_mock",
+    "check_license_agreement_status_mock",
+    "accept_model_license_agreement_mock",
+)
+class TestVertexAIModelGardenOpenModel:
+    """Test cases for Model Garden OpenModel class."""
+
+    def setup_method(self):
+        importlib.reload(aiplatform.initializer)
+        importlib.reload(aiplatform)
+        aiplatform.init(project=_TEST_PROJECT)
+
+    def teardown_method(self):
+        aiplatform.initializer.global_pool.shutdown(wait=True)
+
+    def test_export_full_resource_name_success(self, export_publisher_model_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        model.export(_TEST_GCS_URI)
+        export_publisher_model_mock.assert_called_once_with(
+            types.ExportPublisherModelRequest(
+                parent=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=types.GcsDestination(output_uri_prefix=_TEST_GCS_URI),
+            ),
+            metadata=[("x-goog-user-project", "test-project")],
+        )
+
+    def test_export_simplified_resource_name_success(self, export_publisher_model_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_SIMPLIFIED_RESOURCE_NAME)
+        model.export(_TEST_GCS_URI)
+        export_publisher_model_mock.assert_called_once_with(
+            types.ExportPublisherModelRequest(
+                parent=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=types.GcsDestination(output_uri_prefix=_TEST_GCS_URI),
+            ),
+            metadata=[("x-goog-user-project", "test-project")],
+        )
+
+    def test_export_hugging_face_id_success(self, export_publisher_model_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_HUGGING_FACE_ID)
+        model.export(_TEST_GCS_URI)
+        export_publisher_model_mock.assert_called_once_with(
+            types.ExportPublisherModelRequest(
+                parent=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                name=_TEST_HUGGING_FACE_MODEL_FULL_RESOURCE_NAME,
+                destination=types.GcsDestination(output_uri_prefix=_TEST_GCS_URI),
+            ),
+            metadata=[("x-goog-user-project", "test-project")],
+        )
+
+    def test_deploy_full_resource_name_success(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        model.deploy()
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+            )
+        )
+
+    def test_deploy_simplified_resource_name_success(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_SIMPLIFIED_RESOURCE_NAME)
+        model.deploy()
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+            )
+        )
+
+    def test_deploy_hugging_face_id_success(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_HUGGING_FACE_ID)
+        model.deploy()
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                hugging_face_model_id=_TEST_MODEL_HUGGING_FACE_ID.lower(),
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+            )
+        )
+
+    def test_deploy_specify_machine_spec_success(self, deploy_mock):
+        """Tests deploying a model with specified machine spec."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        model.deploy(
+            machine_type="n1-standard-4",
+            accelerator_type="NVIDIA_TESLA_T4",
+            accelerator_count=1,
+            min_replica_count=1,
+            max_replica_count=1,
+        )
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                deploy_config=types.DeployRequest.DeployConfig(
+                    dedicated_resources=types.DedicatedResources(
+                        machine_spec=types.MachineSpec(
+                            machine_type="n1-standard-4",
+                            accelerator_type="NVIDIA_TESLA_T4",
+                            accelerator_count=1,
+                        ),
+                        min_replica_count=1,
+                        max_replica_count=1,
+                    )
+                ),
+            )
+        )
+
+    def test_deploy_specify_partial_machine_spec_success(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        model.deploy(
+            accelerator_type="NVIDIA_TESLA_T4",
+        )
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                deploy_config=types.DeployRequest.DeployConfig(
+                    dedicated_resources=types.DedicatedResources(
+                        machine_spec=types.MachineSpec(
+                            accelerator_type="NVIDIA_TESLA_T4",
+                        ),
+                        min_replica_count=1,
+                        max_replica_count=1,
+                    )
+                ),
+            )
+        )
+
+    def test_deploy_with_timeout_success(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        model.deploy(deploy_request_timeout=10)
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+            ),
+        )
+
+    def test_deploy_with_display_names_success(self, deploy_mock):
+        """Tests deploying a model with display names."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        model.deploy(
+            endpoint_display_name="test-endpoint",
+            model_display_name="test-model",
+        )
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                model_config=types.DeployRequest.ModelConfig(
+                    model_display_name="test-model",
+                ),
+                endpoint_config=types.DeployRequest.EndpointConfig(
+                    endpoint_display_name="test-endpoint",
+                ),
+            )
+        )
+
+    def test_deploy_with_eula_success(self, deploy_mock):
+        """Tests deploying a model with EULA."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        model.deploy(accept_eula=True)
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                model_config=types.DeployRequest.ModelConfig(
+                    accept_eula=True,
+                ),
+            )
+        )
+
+    def test_deploy_with_hugging_face_access_token_success(self, deploy_mock):
+        """Tests deploying a model with Hugging Face access token."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_HUGGING_FACE_ID)
+        model.deploy(hugging_face_access_token=_TEST_HUGGING_FACE_ACCESS_TOKEN)
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                hugging_face_model_id=_TEST_MODEL_HUGGING_FACE_ID.lower(),
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                model_config=types.DeployRequest.ModelConfig(
+                    hugging_face_access_token=_TEST_HUGGING_FACE_ACCESS_TOKEN,
+                ),
+            )
+        )
+
+    def test_deploy_with_spot_vm_success(self, deploy_mock):
+        """Tests deploying a model with spot VM."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        model.deploy(spot=True)
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                deploy_config=types.DeployRequest.DeployConfig(
+                    dedicated_resources=types.DedicatedResources(spot=True),
+                ),
+            )
+        )
+
+    def test_deploy_with_reservation_success(self, deploy_mock):
+        """Tests deploying a model with spot VM."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        model.deploy(
+            reservation_affinity_type="SPECIFIC_RESERVATION",
+            reservation_affinity_key="compute.googleapis.com/reservation-name",
+            reservation_affinity_values=[
+                "projects/test-project/zones/us-central1-a/reservations/test-reservation"
+            ],
+        )
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                deploy_config=types.DeployRequest.DeployConfig(
+                    dedicated_resources=types.DedicatedResources(
+                        machine_spec=types.MachineSpec(
+                            reservation_affinity=types.ReservationAffinity(
+                                reservation_affinity_type="SPECIFIC_RESERVATION",
+                                key="compute.googleapis.com/reservation-name",
+                                values=[
+                                    "projects/test-project/zones/us-central1-a/reservations/test-reservation"
+                                ],
+                            )
+                        )
+                    )
+                ),
+            )
+        )
+
+    def test_deploy_with_dedicated_endpoint_success(self, deploy_mock):
+        """Tests deploying a model with dedicated endpoint."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        model.deploy(dedicated_endpoint_disabled=True)
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                endpoint_config=types.DeployRequest.EndpointConfig(
+                    dedicated_endpoint_disabled=True
+                ),
+            )
+        )
+
+    def test_deploy_with_system_labels_success(self, deploy_mock):
+        """Tests deploying a model with system labels."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        model.deploy(system_labels={"test-key": "test-value"})
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                deploy_config=types.DeployRequest.DeployConfig(
+                    system_labels={"test-key": "test-value"}
+                ),
+            )
+        )
+
+    def test_deploy_with_fast_tryout_enabled_success(self, deploy_mock):
+        """Tests deploying a model with fast tryout enabled."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        model.deploy(fast_tryout_enabled=True)
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                deploy_config=types.DeployRequest.DeployConfig(
+                    fast_tryout_enabled=True
+                ),
+            )
+        )
+
+    def test_deploy_with_serving_container_image_success(self, deploy_mock):
+        """Tests deploying a model with serving container spec."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        model.deploy(
+            serving_container_image_uri=_TEST_IMAGE_URI,
+        )
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                model_config=types.DeployRequest.ModelConfig(
+                    container_spec=types.ModelContainerSpec(
+                        image_uri=_TEST_IMAGE_URI,
+                    )
+                ),
+            )
+        )
+
+    def test_deploy_with_serving_container_spec_success(self, deploy_mock):
+        """Tests deploying a model with serving container spec."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        model.deploy(serving_container_spec=_TEST_MODEL_CONTAINER_SPEC)
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                model_config=types.DeployRequest.ModelConfig(
+                    container_spec=_TEST_MODEL_CONTAINER_SPEC
+                ),
+            )
+        )
+
+    def test_deploy_with_serving_container_spec_no_image_uri_raises_error(self):
+        """Tests getting the supported deploy options for a model."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+
+        expected_message = (
+            "Serving container image uri is required for the serving container" " spec."
+        )
+        with pytest.raises(ValueError) as exception:
+            model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+            model.deploy(
+                serving_container_spec=types.ModelContainerSpec(
+                    predict_route="/predictions/v1/predict",
+                    health_route="/ping",
+                )
+            )
+        assert str(exception.value) == expected_message
+
+    def test_deploy_with_serving_container_spec_with_both_image_uri_raises_error(
+        self,
+    ):
+        """Tests getting the supported deploy options for a model."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+
+        expected_message = (
+            "Serving container image uri is already set in the serving container"
+            " spec."
+        )
+        with pytest.raises(ValueError) as exception:
+            model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+            model.deploy(
+                serving_container_spec=types.ModelContainerSpec(
+                    image_uri=_TEST_IMAGE_URI,
+                    predict_route="/predictions/v1/predict",
+                    health_route="/ping",
+                ),
+                serving_container_image_uri=_TEST_IMAGE_URI,
+            )
+        assert str(exception.value) == expected_message
+
+    def test_deploy_with_serving_container_spec_individual_fields_success(
+        self, deploy_mock
+    ):
+        """Tests deploying a model with serving container spec."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        model.deploy(
+            serving_container_image_uri=_TEST_IMAGE_URI,
+            serving_container_predict_route="/predictions/v1/predict",
+            serving_container_health_route="/ping",
+            serving_container_command=["python", "main.py"],
+            serving_container_args=["--model-id=gemma-2b"],
+            serving_container_environment_variables={"MODEL_ID": "gemma-2b"},
+            serving_container_ports=[7080],
+            serving_container_grpc_ports=[7081],
+            serving_container_deployment_timeout=1800,
+            serving_container_shared_memory_size_mb=256,
+            serving_container_startup_probe_exec=["python", "main.py"],
+            serving_container_startup_probe_period_seconds=10,
+            serving_container_startup_probe_timeout_seconds=10,
+            serving_container_health_probe_exec=["python", "health_check.py"],
+            serving_container_health_probe_period_seconds=10,
+            serving_container_health_probe_timeout_seconds=10,
+        )
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                model_config=types.DeployRequest.ModelConfig(
+                    container_spec=types.ModelContainerSpec(
+                        image_uri=_TEST_IMAGE_URI,
+                        command=["python", "main.py"],
+                        args=["--model-id=gemma-2b"],
+                        env=[types.EnvVar(name="MODEL_ID", value="gemma-2b")],
+                        ports=[types.Port(container_port=7080)],
+                        grpc_ports=[types.Port(container_port=7081)],
+                        predict_route="/predictions/v1/predict",
+                        health_route="/ping",
+                        deployment_timeout=duration_pb2.Duration(seconds=1800),
+                        shared_memory_size_mb=256,
+                        startup_probe=types.Probe(
+                            exec_=types.Probe.ExecAction(command=["python", "main.py"]),
+                            period_seconds=10,
+                            timeout_seconds=10,
+                        ),
+                        health_probe=types.Probe(
+                            exec_=types.Probe.ExecAction(
+                                command=["python", "health_check.py"]
+                            ),
+                            period_seconds=10,
+                            timeout_seconds=10,
+                        ),
+                    )
+                ),
+            )
+        )
+
+    def test_list_deploy_options(self, get_publisher_model_mock):
+        """Tests getting the supported deploy options for a model."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+
+        expected_message = (
+            "Model does not support deployment. "
+            "Use `list_deployable_models()` to find supported models."
+        )
+        with pytest.raises(ValueError) as exception:
+            model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+            _ = model.list_deploy_options()
+        assert str(exception.value) == expected_message
+
+        model.list_deploy_options()
+        get_publisher_model_mock.assert_called_with(
+            types.GetPublisherModelRequest(
+                name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                is_hugging_face_model=False,
+                include_equivalent_model_garden_model_deployment_configs=True,
+            )
+        )
+
+        hf_model = model_garden.OpenModel(_TEST_MODEL_HUGGING_FACE_ID)
+        hf_model.list_deploy_options()
+        get_publisher_model_mock.assert_called_with(
+            types.GetPublisherModelRequest(
+                name=_TEST_HUGGING_FACE_MODEL_FULL_RESOURCE_NAME,
+                is_hugging_face_model=True,
+                include_equivalent_model_garden_model_deployment_configs=True,
+            )
+        )
+
+    def test_list_deploy_options_concise(self, get_publisher_model_mock):
+        """Tests getting the supported deploy options for a model."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+
+        expected_message = (
+            "Model does not support deployment. "
+            "Use `list_deployable_models()` to find supported models."
+        )
+        with pytest.raises(ValueError) as exception:
+            model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+            _ = model.list_deploy_options(concise=True)
+        assert str(exception.value) == expected_message
+
+        result = model.list_deploy_options(concise=True)
+        expected_result = textwrap.dedent(
+            """\
+        [Option 1: vLLM 32K context]
+            serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
+            machine_type="g2-standard-16",
+            accelerator_type="NVIDIA_L4",
+            accelerator_count=1,
+
+        [Option 2: vLLM 128K context]
+            serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/text-generation-inference-cu121.2-1.py310:latest",
+            machine_type="g2-standard-32",
+            accelerator_type="NVIDIA_L4",
+            accelerator_count=4,"""
+        )
+        assert result == expected_result
+        get_publisher_model_mock.assert_called_with(
+            types.GetPublisherModelRequest(
+                name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                is_hugging_face_model=False,
+                include_equivalent_model_garden_model_deployment_configs=True,
+            )
+        )
+
+        hf_model = model_garden.OpenModel(_TEST_MODEL_HUGGING_FACE_ID)
+        hf_result = hf_model.list_deploy_options(concise=True)
+        expected_hf_result = textwrap.dedent(
+            """\
+        [Option 1]
+            serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
+            machine_type="g2-standard-16",
+            accelerator_type="NVIDIA_L4",
+            accelerator_count=1,
+
+        [Option 2]
+            serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/text-generation-inference-cu121.2-1.py310:latest",
+            machine_type="g2-standard-32",
+            accelerator_type="NVIDIA_L4",
+            accelerator_count=4,"""
+        )
+        assert hf_result == expected_hf_result
+        get_publisher_model_mock.assert_called_with(
+            types.GetPublisherModelRequest(
+                name=_TEST_HUGGING_FACE_MODEL_FULL_RESOURCE_NAME,
+                is_hugging_face_model=True,
+                include_equivalent_model_garden_model_deployment_configs=True,
+            )
+        )
+
+    def test_list_deploy_options_with_filters(self, get_publisher_model_mock):
+        """Tests getting the supported deploy options for a model with filters."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+
+        expected_message = (
+            "Model does not support deployment. "
+            "Use `list_deployable_models()` to find supported models."
+        )
+        with pytest.raises(ValueError) as exception:
+            _ = model.list_deploy_options()
+        assert str(exception.value) == expected_message
+
+        # Test serving_container_image_uri_filter
+        result = model.list_deploy_options(serving_container_image_uri_filter="vllm")
+        assert len(result) == 1
+        assert "vllm" in result[0].container_spec.image_uri
+
+        # Test case-insensitivity for serving_container_image_uri_filter
+        result = model.list_deploy_options(serving_container_image_uri_filter="VLLM")
+        assert len(result) == 1
+        assert "vllm" in result[0].container_spec.image_uri
+
+        # Test list of strings for serving_container_image_uri_filter
+        result = model.list_deploy_options(
+            serving_container_image_uri_filter=["vllm", "text-generation-inference"]
+        )
+        assert len(result) == 2
+
+        # Test machine_type_filter
+        result = model.list_deploy_options(machine_type_filter="g2-standard-16")
+        assert len(result) == 1
+        assert (
+            "g2-standard-16" == result[0].dedicated_resources.machine_spec.machine_type
+        )
+
+        # Test case-insensitivity for machine_type_filter
+        result = model.list_deploy_options(machine_type_filter="G2-STANDARD-16")
+        assert len(result) == 1
+        assert (
+            "g2-standard-16" == result[0].dedicated_resources.machine_spec.machine_type
+        )
+
+        # Test accelerator_type_filter
+        result = model.list_deploy_options(accelerator_type_filter="L4")
+        assert len(result) == 2
+
+        # Test case-insensitivity for accelerator_type_filter
+        result = model.list_deploy_options(accelerator_type_filter="l4")
+        assert len(result) == 2
+
+        # Test combination of filters
+        result = model.list_deploy_options(
+            serving_container_image_uri_filter="vllm",
+            machine_type_filter="g2-standard-16",
+            accelerator_type_filter="L4",
+        )
+        assert len(result) == 1
+
+        # Test with no match
+        with pytest.raises(ValueError):
+            model.list_deploy_options(machine_type_filter="non-existent")
+
+    def test_list_deployable_models(self, list_publisher_models_mock):
+        """Tests getting the supported deploy options for a model."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+
+        mg_models = model_garden.list_deployable_models()
+        list_publisher_models_mock.assert_called_with(
+            types.ListPublisherModelsRequest(
+                parent="publishers/*",
+                list_all_versions=True,
+                filter="is_hf_wildcard(false)",
+            )
+        )
+
+        assert mg_models == [
+            "google/paligemma@001",
+            "google/paligemma@002",
+            "google/paligemma@003",
+            "google/paligemma@004",
+        ]
+
+        hf_models = model_garden.list_deployable_models(list_hf_models=True)
+        list_publisher_models_mock.assert_called_with(
+            types.ListPublisherModelsRequest(
+                parent="publishers/*",
+                list_all_versions=True,
+                filter=(
+                    "is_hf_wildcard(true) AND "
+                    "labels.VERIFIED_DEPLOYMENT_CONFIG=VERIFIED_DEPLOYMENT_SUCCEED"
+                ),
+            )
+        )
+        assert hf_models == [
+            "google/gemma-2-2b",
+            "google/gemma-2-2b",
+            "google/gemma-2-2b",
+            "google/gemma-2-2b",
+        ]
+
+    def test_list_models(self, list_publisher_models_mock):
+        """Tests listing models."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+
+        mg_models = model_garden.list_models()
+        list_publisher_models_mock.assert_called_with(
+            types.ListPublisherModelsRequest(
+                parent="publishers/*",
+                list_all_versions=True,
+                filter="is_hf_wildcard(false)",
+            )
+        )
+
+        assert mg_models == [
+            "google/paligemma@001",
+            "google/paligemma@002",
+            "google/paligemma@003",
+            "google/paligemma@004",
+        ]
+
+        hf_models = model_garden.list_models(list_hf_models=True)
+        list_publisher_models_mock.assert_called_with(
+            types.ListPublisherModelsRequest(
+                parent="publishers/*",
+                list_all_versions=True,
+                filter="is_hf_wildcard(true)",
+            )
+        )
+        assert hf_models == [
+            "google/gemma-2-2b",
+            "google/gemma-2-2b",
+            "google/gemma-2-2b",
+            "google/gemma-2-2b",
+        ]
+
+    def test_batch_prediction_success(self, batch_prediction_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(
+            model_name=_TEST_BATCH_PREDICTION_MODEL_FULL_RESOURCE_NAME
+        )
+        job = model.batch_predict(
+            input_dataset=_TEST_BQ_INPUT_URI,
+            job_display_name=_TEST_BATCH_PREDICTION_JOB_DISPLAY_NAME,
+            machine_type="g2-standard-12",
+            accelerator_type="NVIDIA_L4",
+            accelerator_count=1,
+            starting_replica_count=1,
+        )
+
+        assert job.gca_resource == _TEST_GAPIC_BATCH_PREDICTION_JOB
+
+        expected_gapic_batch_prediction_job = gca_batch_prediction_job_compat.BatchPredictionJob(
+            display_name=_TEST_BATCH_PREDICTION_JOB_DISPLAY_NAME,
+            model=_TEST_BATCH_PREDICTION_MODEL_FULL_RESOURCE_NAME,
+            input_config=gca_batch_prediction_job_compat.BatchPredictionJob.InputConfig(
+                instances_format="bigquery",
+                bigquery_source=gca_io_compat.BigQuerySource(
+                    input_uri=_TEST_BQ_INPUT_URI
+                ),
+            ),
+            output_config=gca_batch_prediction_job_compat.BatchPredictionJob.OutputConfig(
+                bigquery_destination=gca_io_compat.BigQueryDestination(
+                    output_uri=_TEST_BQ_OUTPUT_PREFIX
+                ),
+                predictions_format="bigquery",
+            ),
+            dedicated_resources=machine_resources.BatchDedicatedResources(
+                machine_spec=machine_resources.MachineSpec(
+                    machine_type="g2-standard-12",
+                    accelerator_type="NVIDIA_L4",
+                    accelerator_count=1,
+                ),
+                starting_replica_count=1,
+            ),
+            manual_batch_tuning_parameters=manual_batch_tuning_parameters.ManualBatchTuningParameters(),
+        )
+
+        batch_prediction_mock.assert_called_once_with(
+            parent=_TEST_PARENT,
+            batch_prediction_job=expected_gapic_batch_prediction_job,
+            timeout=None,
+        )
+
+    def test_deploy_with_psc_success(self, deploy_mock):
+        """Tests deploying a model with Private Service Connect."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        model.deploy(
+            enable_private_service_connect=True,
+            psc_project_allow_list=["project-1", "project-2"],
+        )
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                endpoint_config=types.DeployRequest.EndpointConfig(
+                    private_service_connect_config=types.PrivateServiceConnectConfig(
+                        enable_private_service_connect=True,
+                        project_allowlist=["project-1", "project-2"],
+                    )
+                ),
+            )
+        )
+
+    def test_check_license_agreement_status_success(
+        self, check_license_agreement_status_mock
+    ):
+        """Tests checking EULA acceptance for a model."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        eula_acceptance = model.check_license_agreement_status()
+        check_license_agreement_status_mock.assert_called_once_with(
+            types.CheckPublisherModelEulaAcceptanceRequest(
+                parent=f"projects/{_TEST_PROJECT}",
+                publisher_model=_TEST_MODEL_FULL_RESOURCE_NAME,
+            )
+        )
+        assert eula_acceptance
+
+    def test_accept_model_license_agreement_success(
+        self, accept_model_license_agreement_mock
+    ):
+        """Tests accepting EULA for a model."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        eula_acceptance = model.accept_model_license_agreement()
+        accept_model_license_agreement_mock.assert_called_once_with(
+            types.AcceptPublisherModelEulaRequest(
+                parent=f"projects/{_TEST_PROJECT}",
+                publisher_model=_TEST_MODEL_FULL_RESOURCE_NAME,
+            )
+        )
+        assert eula_acceptance == types.PublisherModelEulaAcceptance(
+            project_number=_TEST_PROJECT_NUMBER,
+            publisher_model=_TEST_MODEL_FULL_RESOURCE_NAME,
+            publisher_model_eula_acked=True,
+        )
+
+
+pytest.mark.usefixtures(
+    "google_auth_mock",
+    "deploy_mock",
+)
+
+
+class TestVertexAIModelGardenCustomModel:
+    """Test cases for ModelGarden class."""
+
+    def setup_method(self):
+        importlib.reload(aiplatform.initializer)
+        importlib.reload(aiplatform)
+        aiplatform.init(project=_TEST_PROJECT)
+
+    def teardown_method(self):
+        aiplatform.initializer.global_pool.shutdown(wait=True)
+
+    def test_deploy_custom_model_gcs_uri_only_success(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden_preview.CustomModel(gcs_uri=_TEST_GCS_URI)
+        model.deploy()
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                custom_model=types.DeployRequest.CustomModel(
+                    gcs_uri=_TEST_GCS_URI,
+                ),
+                deploy_config=types.DeployRequest.DeployConfig(
+                    dedicated_resources=types.DedicatedResources(
+                        min_replica_count=1,
+                        max_replica_count=1,
+                    )
+                ),
+            )
+        )
+
+    def test_deploy_custom_model_no_gcs_uri_raise_error(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        with pytest.raises(ValueError) as exception:
+            model = model_garden_preview.CustomModel()
+            model.deploy()
+        assert str(exception.value) == "gcs_uri must be specified."
+
+    def test_deploy_custom_model_machine_type_only_raise_error(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        with pytest.raises(ValueError) as exception:
+            model = model_garden_preview.CustomModel(gcs_uri=_TEST_GCS_URI)
+            model.deploy(machine_type="n1-standard-4")
+        assert (
+            str(exception.value)
+            == "machine_type, accelerator_type and accelerator_count must all"
+            " be provided or not provided."
+        )
+
+    def test_deploy_custom_model_with_all_config_success(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden_preview.CustomModel(gcs_uri=_TEST_GCS_URI)
+        model.deploy(
+            machine_type="n1-standard-4",
+            accelerator_type="NVIDIA_TESLA_T4",
+            accelerator_count=1,
+            min_replica_count=2,
+            max_replica_count=3,
+            endpoint_display_name="custom-mode-endpoint",
+            model_display_name="custom-model-id",
+        )
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                custom_model=types.DeployRequest.CustomModel(
+                    gcs_uri=_TEST_GCS_URI,
+                ),
+                model_config=types.DeployRequest.ModelConfig(
+                    model_display_name="custom-model-id",
+                ),
+                deploy_config=types.DeployRequest.DeployConfig(
+                    dedicated_resources=types.DedicatedResources(
+                        min_replica_count=2,
+                        max_replica_count=3,
+                        machine_spec=types.MachineSpec(
+                            machine_type="n1-standard-4",
+                            accelerator_type="NVIDIA_TESLA_T4",
+                            accelerator_count=1,
+                        ),
+                    ),
+                ),
+                endpoint_config=types.DeployRequest.EndpointConfig(
+                    endpoint_display_name="custom-mode-endpoint",
+                ),
+            )
+        )
+
+    def test_deploy_custom_model_with_psc_success(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden_preview.CustomModel(gcs_uri=_TEST_GCS_URI)
+        model.deploy(
+            machine_type="n1-standard-4",
+            accelerator_type="NVIDIA_TESLA_T4",
+            accelerator_count=1,
+            enable_private_service_connect=True,
+            psc_project_allow_list=["test-project"],
+        )
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                custom_model=types.DeployRequest.CustomModel(
+                    gcs_uri=_TEST_GCS_URI,
+                ),
+                endpoint_config=types.DeployRequest.EndpointConfig(
+                    private_service_connect_config=types.PrivateServiceConnectConfig(
+                        enable_private_service_connect=True,
+                        project_allowlist=["test-project"],
+                    ),
+                ),
+                deploy_config=types.DeployRequest.DeployConfig(
+                    dedicated_resources=types.DedicatedResources(
+                        min_replica_count=1,
+                        max_replica_count=1,
+                        machine_spec=types.MachineSpec(
+                            machine_type="n1-standard-4",
+                            accelerator_type="NVIDIA_TESLA_T4",
+                            accelerator_count=1,
+                        ),
+                    ),
+                ),
+            )
+        )
+
+    def test_deploy_custom_model_with_reservation_success(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden_preview.CustomModel(gcs_uri=_TEST_GCS_URI)
+        model.deploy(
+            machine_type="n1-standard-4",
+            accelerator_type="NVIDIA_TESLA_T4",
+            accelerator_count=1,
+            reservation_affinity_type="SPECIFIC_RESERVATION",
+            reservation_affinity_key="compute.googleapis.com/reservation-name",
+            reservation_affinity_values=[
+                "projects/test-project/zones/us-central1-a/reservations/test-reservation"
+            ],
+        )
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                custom_model=types.DeployRequest.CustomModel(
+                    gcs_uri=_TEST_GCS_URI,
+                ),
+                deploy_config=types.DeployRequest.DeployConfig(
+                    dedicated_resources=types.DedicatedResources(
+                        min_replica_count=1,
+                        max_replica_count=1,
+                        machine_spec=types.MachineSpec(
+                            machine_type="n1-standard-4",
+                            accelerator_type="NVIDIA_TESLA_T4",
+                            accelerator_count=1,
+                            reservation_affinity=types.ReservationAffinity(
+                                reservation_affinity_type="SPECIFIC_RESERVATION",
+                                key="compute.googleapis.com/reservation-name",
+                                values=[
+                                    "projects/test-project/zones/us-central1-a/reservations/test-reservation"
+                                ],
+                            ),
+                        ),
+                    ),
+                ),
+            )
+        )
+
+    def test_deploy_custom_model_with_system_labels_success(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden_preview.CustomModel(gcs_uri=_TEST_GCS_URI)
+        model.deploy(system_labels={"test-key": "test-value"})
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                custom_model=types.DeployRequest.CustomModel(
+                    gcs_uri=_TEST_GCS_URI,
+                ),
+                deploy_config=types.DeployRequest.DeployConfig(
+                    dedicated_resources=types.DedicatedResources(
+                        min_replica_count=1,
+                        max_replica_count=1,
+                    ),
+                    system_labels={"test-key": "test-value"},
+                ),
+            )
+        )
+
+    @pytest.mark.parametrize("filter_by_user_quota", [True, False])
+    def test_list_deploy_options_with_recommendations(self, filter_by_user_quota):
+        """Tests list_deploy_options when recommend_spec returns recommendations."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        mock_model_service_client = mock.Mock()
+        with mock.patch.object(
+            aiplatform.initializer.global_config,
+            "create_client",
+            return_value=mock_model_service_client,
+        ):
+            quota_state = types.RecommendSpecResponse.Recommendation.QuotaState
+            mock_response = types.RecommendSpecResponse(
+                recommendations=[
+                    types.RecommendSpecResponse.Recommendation(
+                        spec=types.RecommendSpecResponse.MachineAndModelContainerSpec(
+                            machine_spec=types.MachineSpec(
+                                machine_type="n1-standard-4",
+                                accelerator_type=types.AcceleratorType.NVIDIA_TESLA_T4,
+                                accelerator_count=1,
+                            )
+                        ),
+                        region="us-central1",
+                        user_quota_state=quota_state.QUOTA_STATE_USER_HAS_QUOTA,
+                    ),
+                    types.RecommendSpecResponse.Recommendation(
+                        spec=types.RecommendSpecResponse.MachineAndModelContainerSpec(
+                            machine_spec=types.MachineSpec(
+                                machine_type="n1-standard-8",
+                                accelerator_type=types.AcceleratorType.NVIDIA_TESLA_V100,
+                                accelerator_count=2,
+                            )
+                        ),
+                        region="us-east1",
+                        user_quota_state=quota_state.QUOTA_STATE_NO_USER_QUOTA,
+                    ),
+                    types.RecommendSpecResponse.Recommendation(
+                        spec=types.RecommendSpecResponse.MachineAndModelContainerSpec(
+                            machine_spec=types.MachineSpec(
+                                machine_type="g2-standard-24",
+                                accelerator_type=types.AcceleratorType.NVIDIA_L4,
+                                accelerator_count=2,
+                            )
+                        ),
+                        region="us-central1",
+                        user_quota_state=quota_state.QUOTA_STATE_UNSPECIFIED,
+                    ),
+                ]
+            )
+            mock_model_service_client.recommend_spec.return_value = mock_response
+
+            custom_model = model_garden_preview.CustomModel(gcs_uri=_TEST_GCS_URI)
+            result = custom_model.list_deploy_options(
+                filter_by_user_quota=filter_by_user_quota
+            )
+
+            if filter_by_user_quota:
+                expected_output = textwrap.dedent(
+                    """\
+                [Option 1]
+                    machine_type="n1-standard-4",
+                    accelerator_type="NVIDIA_TESLA_T4",
+                    accelerator_count=1,
+                    region="us-central1",
+                    user_quota_state="QUOTA_STATE_USER_HAS_QUOTA\""""
+                )
+            else:
+                expected_output = textwrap.dedent(
+                    """\
+                [Option 1]
+                    machine_type="n1-standard-4",
+                    accelerator_type="NVIDIA_TESLA_T4",
+                    accelerator_count=1,
+                    region="us-central1",
+                    user_quota_state="QUOTA_STATE_USER_HAS_QUOTA"
+
+                [Option 2]
+                    machine_type="n1-standard-8",
+                    accelerator_type="NVIDIA_TESLA_V100",
+                    accelerator_count=2,
+                    region="us-east1",
+                    user_quota_state="QUOTA_STATE_NO_USER_QUOTA"
+
+                [Option 3]
+                    machine_type="g2-standard-24",
+                    accelerator_type="NVIDIA_L4",
+                    accelerator_count=2,
+                    region="us-central1\""""
+                )
+            assert result == expected_output
+            mock_model_service_client.recommend_spec.assert_called_once_with(
+                types.RecommendSpecRequest(
+                    gcs_uri=_TEST_GCS_URI,
+                    parent=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                    check_machine_availability=True,
+                    check_user_quota=filter_by_user_quota,
+                ),
+                timeout=60,
+            )
+
+    def test_list_deploy_options_with_specs(self):
+        """Tests list_deploy_options with available_machines set to False and recommend_spec returns all compatible specs."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        mock_model_service_client = mock.Mock()
+        with mock.patch.object(
+            aiplatform.initializer.global_config,
+            "create_client",
+            return_value=mock_model_service_client,
+        ):
+            mock_response = types.RecommendSpecResponse(
+                specs=[
+                    types.RecommendSpecResponse.MachineAndModelContainerSpec(
+                        machine_spec=types.MachineSpec(
+                            machine_type="n1-standard-4",
+                            accelerator_type=types.AcceleratorType.NVIDIA_TESLA_T4,
+                            accelerator_count=1,
+                        )
+                    ),
+                    types.RecommendSpecResponse.MachineAndModelContainerSpec(
+                        machine_spec=types.MachineSpec(
+                            machine_type="n1-standard-8",
+                            accelerator_type=types.AcceleratorType.NVIDIA_TESLA_V100,
+                            accelerator_count=2,
+                        )
+                    ),
+                ]
+            )
+            mock_model_service_client.recommend_spec.return_value = mock_response
+
+            custom_model = model_garden_preview.CustomModel(gcs_uri=_TEST_GCS_URI)
+            result = custom_model.list_deploy_options(
+                available_machines=False, filter_by_user_quota=False
+            )
+
+            expected_output = textwrap.dedent(
+                """\
+            [Option 1]
+                machine_type="n1-standard-4",
+                accelerator_type="NVIDIA_TESLA_T4",
+                accelerator_count=1
+
+            [Option 2]
+                machine_type="n1-standard-8",
+                accelerator_type="NVIDIA_TESLA_V100",
+                accelerator_count=2"""
+            )
+            assert result == expected_output
+            mock_model_service_client.recommend_spec.assert_called_once_with(
+                types.RecommendSpecRequest(
+                    gcs_uri=_TEST_GCS_URI,
+                    parent=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                    check_machine_availability=False,
+                    check_user_quota=False,
+                ),
+                timeout=60,
+            )
+
+    def test_list_deploy_options_exception(self):
+        """Tests list_deploy_options when recommend_spec raises an exception."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        mock_model_service_client = mock.Mock()
+        with mock.patch.object(
+            aiplatform.initializer.global_config,
+            "create_client",
+            return_value=mock_model_service_client,
+        ):
+            mock_model_service_client.recommend_spec.side_effect = ValueError(
+                "Test Error"
+            )
+            custom_model = model_garden_preview.CustomModel(gcs_uri=_TEST_GCS_URI)
+            with pytest.raises(ValueError) as exception:
+                custom_model.list_deploy_options()
+            assert str(exception.value) == "Test Error"
+            mock_model_service_client.recommend_spec.assert_called_once()
+
+
+class TestVertexAIModelGardenModel:
+    """Test cases for Model Garden Model class."""
+
+    def setup_method(self):
+        importlib.reload(aiplatform.initializer)
+        importlib.reload(aiplatform)
+        aiplatform.init(project=_TEST_PROJECT)
+
+    def teardown_method(self):
+        aiplatform.initializer.global_pool.shutdown(wait=True)
+
+    def test_no_model_name_raises_error(self):
+        """Tests deploying a model with spot VM."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        with pytest.raises(ValueError) as exception:
+            model_garden_preview.Model()
+        assert str(exception.value) == ("model_name must be specified.")
+
+    def test_deploy_full_resource_name_success(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden_preview.Model(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        model.deploy()
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+            )
+        )
+
+    def test_deploy_simplified_resource_name_success(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden_preview.Model(
+            model_name=_TEST_MODEL_SIMPLIFIED_RESOURCE_NAME
+        )
+        model.deploy()
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+            )
+        )
+
+    def test_deploy_hugging_face_id_success(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden_preview.Model(model_name=_TEST_MODEL_HUGGING_FACE_ID)
+        model.deploy()
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                hugging_face_model_id=_TEST_MODEL_HUGGING_FACE_ID.lower(),
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+            )
+        )
+
+    def test_deploy_gcs_uri_success(self, deploy_mock):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden_preview.Model(model_name=_TEST_GCS_URI)
+        model.deploy()
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                custom_model=types.DeployRequest.CustomModel(
+                    gcs_uri=_TEST_GCS_URI,
+                ),
+                deploy_config=types.DeployRequest.DeployConfig(
+                    dedicated_resources=types.DedicatedResources(
+                        min_replica_count=1,
+                        max_replica_count=1,
+                    )
+                ),
+            )
+        )
+
+    def test_deploy_model_registry_model_success(self):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        with pytest.raises(NotImplementedError) as exception:
+            model_garden_preview.Model(model_name=_TEST_MODEL_NAME)
+        assert str(exception.value) == "Model Registry models are not supported yet."
diff --git a/tests/unit/vertexai/test_rubric_based_eval.py b/tests/unit/vertexai/test_rubric_based_eval.py
index 557873f198..1fdc0e02cd 100644
--- a/tests/unit/vertexai/test_rubric_based_eval.py
+++ b/tests/unit/vertexai/test_rubric_based_eval.py
@@ -30,6 +30,7 @@
 from vertexai.preview.evaluation.metrics import (
     predefined_rubric_metrics,
 )
+import copy
 import pandas as pd
 import pytest
 
@@ -281,7 +282,7 @@ def test_pointwise_text_quality_metric(self):
         with mock.patch.object(
             target=gapic_evaluation_services.EvaluationServiceClient,
             attribute="evaluate_instances",
-            side_effect=_MOCK_POINTWISE_RESPONSE,
+            side_effect=[copy.deepcopy(x) for x in _MOCK_POINTWISE_RESPONSE],
         ):
             eval_result = EvalTask(
                 dataset=_TEST_EVAL_DATASET, metrics=[metric]