1515from __future__ import annotations
1616
1717import abc
18+ import logging
1819import math
1920import os
2021from typing import Optional
22+ from typing import Union
2123
2224from google .genai import types as genai_types
2325import pandas as pd
2426from typing_extensions import override
2527
2628from ..dependencies .vertexai import vertexai
29+ from .app_details import AgentDetails
2730from .eval_case import ConversationScenario
2831from .eval_case import Invocation
32+ from .eval_case import InvocationEvent
2933from .evaluator import EvalStatus
3034from .evaluator import EvaluationResult
3135from .evaluator import Evaluator
3236from .evaluator import PerInvocationResult
3337
38+ logger = logging .getLogger ("google_adk." + __name__ )
39+
3440_ERROR_MESSAGE_SUFFIX = """
3541You should specify both project id and location. This metric uses Vertex Gen AI
3642Eval SDK, and it requires google cloud credentials.
@@ -56,7 +62,9 @@ class _VertexAiEvalFacade(Evaluator):
5662 def __init__ (
5763 self ,
5864 threshold : float ,
59- metric_name : vertexai .types .PrebuiltMetric ,
65+ metric_name : Union [
66+ vertexai .types .PrebuiltMetric , vertexai .types .RubricMetric
67+ ],
6068 expected_invocations_required = False ,
6169 ):
6270 self ._threshold = threshold
@@ -119,7 +127,7 @@ def _get_score(self, eval_result) -> Optional[float]:
119127 return None
120128
121129 def _get_eval_status (self , score : Optional [float ]):
122- if score :
130+ if score is not None :
123131 return (
124132 EvalStatus .PASSED if score >= self ._threshold else EvalStatus .FAILED
125133 )
@@ -188,7 +196,7 @@ def evaluate_invocations(
188196 )
189197 )
190198
191- if score :
199+ if score is not None :
192200 total_score += score
193201 num_invocations += 1
194202
@@ -203,3 +211,158 @@ def evaluate_invocations(
203211 )
204212
205213 return EvaluationResult ()
214+
215+
216+ class _MultiTurnVertexiAiEvalFacade (_VertexAiEvalFacade ):
217+ """A facade for multi turn metrics exposed in Vertex Gen AI Eval SDK."""
218+
219+ @override
220+ def evaluate_invocations (
221+ self ,
222+ actual_invocations : list [Invocation ],
223+ expected_invocations : Optional [list [Invocation ]] = None ,
224+ conversation_scenario : Optional [ConversationScenario ] = None ,
225+ ) -> EvaluationResult :
226+ del conversation_scenario
227+
228+ per_invocation_results = []
229+ # If expected_invocation are not required by the metric and if they are not
230+ # supplied, we provide a list of None.
231+ expected_invocations = (
232+ [None ] * len (actual_invocations )
233+ if expected_invocations is None
234+ else expected_invocations
235+ )
236+
237+ # We mark all the n-1 turns as NOT-EVALUATED for these metrics.
238+ for actual , expected in zip (
239+ actual_invocations [:- 1 ], expected_invocations [:- 1 ]
240+ ):
241+ per_invocation_results .append (
242+ PerInvocationResult (
243+ actual_invocation = actual ,
244+ expected_invocation = expected ,
245+ score = None ,
246+ eval_status = self ._get_eval_status (None ),
247+ )
248+ )
249+
250+ # Only evaluate the last turn and take into account all the previous turns.
251+ eval_case = vertexai .types .EvalCase (
252+ agent_data = _MultiTurnVertexiAiEvalFacade ._get_agent_data (
253+ actual_invocations
254+ )
255+ )
256+ dataset = vertexai .types .EvaluationDataset (eval_cases = [eval_case ])
257+
258+ eval_case_result = self ._perform_eval (
259+ dataset = dataset , metrics = [self ._metric_name ]
260+ )
261+
262+ score = self ._get_score (eval_case_result )
263+ per_invocation_results .append (
264+ PerInvocationResult (
265+ actual_invocation = actual_invocations [- 1 ],
266+ expected_invocation = expected_invocations [- 1 ],
267+ score = score ,
268+ eval_status = self ._get_eval_status (score ),
269+ )
270+ )
271+
272+ if score is not None :
273+ return EvaluationResult (
274+ overall_score = score ,
275+ overall_eval_status = self ._get_eval_status (score ),
276+ per_invocation_results = per_invocation_results ,
277+ )
278+
279+ return EvaluationResult ()
280+
281+ @staticmethod
282+ def _get_agent_data (
283+ actual_invocations : list [Invocation ],
284+ ) -> vertexai .types .evals .AgentData :
285+ return vertexai .types .evals .AgentData (
286+ agents = _MultiTurnVertexiAiEvalFacade ._get_agent_details (
287+ actual_invocations
288+ ),
289+ turns = _MultiTurnVertexiAiEvalFacade ._get_turns (actual_invocations ),
290+ )
291+
292+ @staticmethod
293+ def _get_turns (
294+ actual_invocations : list [Invocation ],
295+ ) -> list [vertexai .types .evals .ConversationTurn ]:
296+ return [
297+ _MultiTurnVertexiAiEvalFacade ._map_invocation_turn (index , invocation )
298+ for index , invocation in enumerate (actual_invocations )
299+ ]
300+
301+ @staticmethod
302+ def _map_invocation_turn (
303+ turn_index : int ,
304+ invocation : Invocation ,
305+ ) -> vertexai .types .evals .ConversationTurn :
306+ agent_events = []
307+ agent_events .append (
308+ vertexai .types .evals .AgentEvent (
309+ author = "user" , content = invocation .user_content
310+ )
311+ )
312+
313+ for invocation_event in invocation .intermediate_data .invocation_events :
314+ agent_events .append (
315+ _MultiTurnVertexiAiEvalFacade ._map_inovcation_event_to_agent_event (
316+ invocation_event
317+ )
318+ )
319+
320+ agent_events .append (
321+ vertexai .types .evals .AgentEvent (
322+ author = "agent" , content = invocation .final_response
323+ )
324+ )
325+
326+ return vertexai .types .evals .ConversationTurn (
327+ turn_index = turn_index ,
328+ events = agent_events ,
329+ turn_id = invocation .invocation_id ,
330+ )
331+
332+ @staticmethod
333+ def _map_inovcation_event_to_agent_event (
334+ invocation_event : InvocationEvent ,
335+ ) -> vertexai .types .evals .AgentEvent :
336+ return vertexai .types .evals .AgentEvent (
337+ author = invocation_event .author , content = invocation_event .content
338+ )
339+
340+ @staticmethod
341+ def _get_agent_details (
342+ actual_invocations : list [Invocation ],
343+ ) -> dict [str , vertexai .types .evals .AgentConfig ]:
344+ agent_configs = {}
345+ for invocation in actual_invocations :
346+ if invocation .app_details and invocation .app_details .agent_details :
347+ for (
348+ agent_name ,
349+ agent_details ,
350+ ) in invocation .app_details .agent_details .items ():
351+ if agent_name not in agent_configs :
352+ agent_configs [agent_name ] = (
353+ _MultiTurnVertexiAiEvalFacade ._map_agent_details_to_agent_config (
354+ agent_details
355+ )
356+ )
357+
358+ return agent_configs
359+
360+ @staticmethod
361+ def _map_agent_details_to_agent_config (
362+ agent_details : AgentDetails ,
363+ ) -> vertexai .types .evals .AgentConfig :
364+ return vertexai .types .evals .AgentConfig (
365+ agent_id = agent_details .name ,
366+ instruction = agent_details .instructions ,
367+ tools = agent_details .tool_declarations ,
368+ )
0 commit comments