Skip to content

Commit 9d32dd5

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Add ComputationBasedMetricSpec to support metrics like EXACT_MATCH, BLEU, and ROUGE in EvaluationRun configurations.
PiperOrigin-RevId: 858830963
1 parent 351b194 commit 9d32dd5

4 files changed

Lines changed: 96 additions & 1 deletion

File tree

tests/unit/vertexai/genai/replays/test_create_evaluation_run.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,23 @@
4646
)
4747
),
4848
)
49+
EXACT_MATCH_COMPUTATION_BASED_METRIC = types.EvaluationRunMetric(
50+
metric="exact_match",
51+
metric_config=types.UnifiedMetric(
52+
computation_based_metric_spec=types.ComputationBasedMetricSpec(
53+
type=types.ComputationBasedMetricType.EXACT_MATCH,
54+
)
55+
),
56+
)
57+
BLEU_COMPUTATION_BASED_METRIC = types.EvaluationRunMetric(
58+
metric="exact_match_2",
59+
metric_config=types.UnifiedMetric(
60+
computation_based_metric_spec=types.ComputationBasedMetricSpec(
61+
type=types.ComputationBasedMetricType.BLEU,
62+
parameters={"use_effective_order": True},
63+
)
64+
),
65+
)
4966

5067

5168
def test_create_eval_run_data_source_evaluation_set(client):
@@ -74,6 +91,8 @@ def test_create_eval_run_data_source_evaluation_set(client):
7491
GENERAL_QUALITY_METRIC,
7592
types.RubricMetric.FINAL_RESPONSE_QUALITY,
7693
LLM_METRIC,
94+
EXACT_MATCH_COMPUTATION_BASED_METRIC,
95+
BLEU_COMPUTATION_BASED_METRIC,
7796
],
7897
agent_info=types.evals.AgentInfo(
7998
agent_resource_name="project/123/locations/us-central1/reasoningEngines/456",
@@ -94,7 +113,13 @@ def test_create_eval_run_data_source_evaluation_set(client):
94113
output_config=genai_types.OutputConfig(
95114
gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST)
96115
),
97-
metrics=[GENERAL_QUALITY_METRIC, FINAL_RESPONSE_QUALITY_METRIC, LLM_METRIC],
116+
metrics=[
117+
GENERAL_QUALITY_METRIC,
118+
FINAL_RESPONSE_QUALITY_METRIC,
119+
LLM_METRIC,
120+
EXACT_MATCH_COMPUTATION_BASED_METRIC,
121+
BLEU_COMPUTATION_BASED_METRIC,
122+
],
98123
)
99124
assert evaluation_run.inference_configs[
100125
"agent-1"

vertexai/_genai/evals.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,13 @@ def _UnifiedMetric_from_vertex(
577577
getv(from_object, ["predefinedMetricSpec"]),
578578
)
579579

580+
if getv(from_object, ["computationBasedMetricSpec"]) is not None:
581+
setv(
582+
to_object,
583+
["computation_based_metric_spec"],
584+
getv(from_object, ["computationBasedMetricSpec"]),
585+
)
586+
580587
return to_object
581588

582589

@@ -621,6 +628,13 @@ def _UnifiedMetric_to_vertex(
621628
getv(from_object, ["predefined_metric_spec"]),
622629
)
623630

631+
if getv(from_object, ["computation_based_metric_spec"]) is not None:
632+
setv(
633+
to_object,
634+
["computationBasedMetricSpec"],
635+
getv(from_object, ["computation_based_metric_spec"]),
636+
)
637+
624638
return to_object
625639

626640

vertexai/_genai/types/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,10 @@
174174
from .common import CometResult
175175
from .common import CometResultDict
176176
from .common import CometResultOrDict
177+
from .common import ComputationBasedMetricSpec
178+
from .common import ComputationBasedMetricSpecDict
179+
from .common import ComputationBasedMetricSpecOrDict
180+
from .common import ComputationBasedMetricType
177181
from .common import ContainerSpec
178182
from .common import ContainerSpecDict
179183
from .common import ContainerSpecOrDict
@@ -1078,6 +1082,9 @@
10781082
"CustomCodeExecutionSpec",
10791083
"CustomCodeExecutionSpecDict",
10801084
"CustomCodeExecutionSpecOrDict",
1085+
"ComputationBasedMetricSpec",
1086+
"ComputationBasedMetricSpecDict",
1087+
"ComputationBasedMetricSpecOrDict",
10811088
"UnifiedMetric",
10821089
"UnifiedMetricDict",
10831090
"UnifiedMetricOrDict",
@@ -1936,6 +1943,7 @@
19361943
"EvaluationItemType",
19371944
"SamplingMethod",
19381945
"RubricContentType",
1946+
"ComputationBasedMetricType",
19391947
"EvaluationRunState",
19401948
"OptimizeTarget",
19411949
"MemoryMetadataMergeStrategy",

vertexai/_genai/types/common.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,21 @@ class RubricContentType(_common.CaseInSensitiveEnum):
324324
"""Generate rubrics in a unit test format."""
325325

326326

327+
class ComputationBasedMetricType(_common.CaseInSensitiveEnum):
328+
"""Represents the type of the computation based metric."""
329+
330+
COMPUTATION_BASED_METRIC_TYPE_UNSPECIFIED = (
331+
"COMPUTATION_BASED_METRIC_TYPE_UNSPECIFIED"
332+
)
333+
"""Computation based metric type is unspecified."""
334+
EXACT_MATCH = "EXACT_MATCH"
335+
"""Exact match metric."""
336+
BLEU = "BLEU"
337+
"""BLEU metric."""
338+
ROUGE = "ROUGE"
339+
"""ROUGE metric."""
340+
341+
327342
class EvaluationRunState(_common.CaseInSensitiveEnum):
328343
"""Represents the state of an evaluation run."""
329344

@@ -969,6 +984,33 @@ def evaluate(instance: dict[str, Any]) -> float:
969984
]
970985

971986

987+
class ComputationBasedMetricSpec(_common.BaseModel):
988+
"""Specification for a computation based metric."""
989+
990+
type: Optional[ComputationBasedMetricType] = Field(
991+
default=None, description="""The type of the computation based metric."""
992+
)
993+
parameters: Optional[dict[str, Any]] = Field(
994+
default=None,
995+
description="""A map of parameters for the metric. ROUGE example: {"rouge_type": "rougeL", "split_summaries": True, "use_stemmer": True}. BLEU example: {"use_effective_order": True}.""",
996+
)
997+
998+
999+
class ComputationBasedMetricSpecDict(TypedDict, total=False):
1000+
"""Specification for a computation based metric."""
1001+
1002+
type: Optional[ComputationBasedMetricType]
1003+
"""The type of the computation based metric."""
1004+
1005+
parameters: Optional[dict[str, Any]]
1006+
"""A map of parameters for the metric. ROUGE example: {"rouge_type": "rougeL", "split_summaries": True, "use_stemmer": True}. BLEU example: {"use_effective_order": True}."""
1007+
1008+
1009+
ComputationBasedMetricSpecOrDict = Union[
1010+
ComputationBasedMetricSpec, ComputationBasedMetricSpecDict
1011+
]
1012+
1013+
9721014
class UnifiedMetric(_common.BaseModel):
9731015
"""The unified metric used for evaluation."""
9741016

@@ -990,6 +1032,9 @@ class UnifiedMetric(_common.BaseModel):
9901032
predefined_metric_spec: Optional[PredefinedMetricSpec] = Field(
9911033
default=None, description="""The spec for a pre-defined metric."""
9921034
)
1035+
computation_based_metric_spec: Optional[ComputationBasedMetricSpec] = Field(
1036+
default=None, description="""The spec for a computation based metric."""
1037+
)
9931038

9941039

9951040
class UnifiedMetricDict(TypedDict, total=False):
@@ -1013,6 +1058,9 @@ class UnifiedMetricDict(TypedDict, total=False):
10131058
predefined_metric_spec: Optional[PredefinedMetricSpecDict]
10141059
"""The spec for a pre-defined metric."""
10151060

1061+
computation_based_metric_spec: Optional[ComputationBasedMetricSpecDict]
1062+
"""The spec for a computation based metric."""
1063+
10161064

10171065
UnifiedMetricOrDict = Union[UnifiedMetric, UnifiedMetricDict]
10181066

0 commit comments

Comments
 (0)