Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,15 @@ def mock_generate_multimodal_dataset_display_name():
yield mock_generate


@pytest.fixture
def mock_get_batch_job_unique_name():
with mock.patch.object(
_datasets_utils, "get_batch_job_unique_name"
) as mock_unique_name:
mock_unique_name.return_value = "12345678901234_abcde"
yield mock_unique_name


def test_create_dataset(client):
create_dataset_operation = client.datasets._create_multimodal_dataset(
name="projects/vertex-sdk-dev/locations/us-central1",
Expand Down
31 changes: 31 additions & 0 deletions tests/unit/vertexai/genai/test_multimodal_datasets_genai.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@ def mock_import_bigframes():
yield mock_import_bigframes


@pytest.fixture
def mock_get_batch_job_unique_name():
with mock.patch.object(
_datasets_utils, "get_batch_job_unique_name"
) as mock_unique_name:
mock_unique_name.return_value = "12345678901234_abcde"
yield mock_unique_name


class TestMultimodalDataset:

def test_read_config(self):
Expand Down Expand Up @@ -157,6 +166,28 @@ def test_to_bigframes(self, mock_import_bigframes):
"project.dataset.table"
)

def test_get_batch_job_destination(self, mock_get_batch_job_unique_name):
dataset = types.MultimodalDataset(
name="projects/vertex-sdk-dev/locations/us-central1/datasets/12345",
display_name="test_multimodal_dataset",
metadata={
"inputConfig": {
"bigquerySource": {
"uri": "bq://target_project.target_dataset.target_table"
},
},
},
)
destination = dataset.get_batch_job_destination()
assert (
destination.vertex_dataset.display_name
== "test_multimodal_dataset_batch_output_12345678901234_abcde"
)
assert (
destination.vertex_dataset.bigquery_destination
== "bq://target_project.target_dataset.target_table_batch_output_12345678901234_abcde"
)


class TestGeminiRequestReadConfig:
def test_single_turn_template(self):
Expand Down
7 changes: 7 additions & 0 deletions vertexai/_genai/_datasets_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,13 @@ def generate_multimodal_dataset_display_name() -> str:
return f"MultimodalDataset {datetime.datetime.now().isoformat(sep=' ')}"


def get_batch_job_unique_name() -> str:
"""Generates a unique name suffix for a batch job destination."""
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
unique_id = uuid.uuid4().hex[0:5]
return f"{timestamp}_{unique_id}"


def save_dataframe_to_bigquery(
dataframe: "bigframes.pandas.DataFrame", # type: ignore # noqa: F821
target_table_id: str,
Expand Down
22 changes: 22 additions & 0 deletions vertexai/_genai/types/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -15334,6 +15334,28 @@ def to_bigframes(
raise ValueError("Multimodal dataset bigquery source uri is not set.")
return bigframes.pandas.read_gbq_table(self.bigquery_uri.removeprefix("bq://"))

def to_batch_job_source(self) -> "genai_types.BatchJobSource":
"""Converts the dataset to a BatchJobSource."""
return genai_types.BatchJobSource(
vertex_dataset_name=self.name,
)

def get_batch_job_destination(self) -> "genai_types.BatchJobDestination":
"""Converts the dataset to a BatchJobDestination."""
from .. import _datasets_utils

unique_name = _datasets_utils.get_batch_job_unique_name()
bigquery_uri = self.bigquery_uri
if bigquery_uri is None:
raise ValueError("Multimodal dataset bigquery source uri is not set.")
curr_display_name = self.display_name or "genai_batch_job"
return genai_types.BatchJobDestination(
vertex_dataset=genai_types.VertexMultimodalDatasetDestination(
display_name=f"{curr_display_name}_batch_output_{unique_name}",
bigquery_destination=f"{bigquery_uri}_batch_output_{unique_name}",
)
)


class MultimodalDatasetDict(TypedDict, total=False):
"""Represents a multimodal dataset."""
Expand Down
Loading