@@ -1885,9 +1885,133 @@ def test_run_agent_internal_error_response(self, mock_run_agent):
18851885
18861886 assert "response" in result_df .columns
18871887 response_content = result_df ["response" ][0 ]
1888- assert "Unexpected response type from agent run" in response_content
1888+ assert "agent run failed " in response_content
18891889 assert not result_df ["intermediate_events" ][0 ]
18901890
1891+ @mock .patch .object (_evals_common , "_run_agent" )
1892+ def test_run_agent_internal_multi_turn_success (self , mock_run_agent ):
1893+ mock_run_agent .return_value = [
1894+ [
1895+ {"turn_index" : 0 , "turn_id" : "t1" , "events" : []},
1896+ {"turn_index" : 1 , "turn_id" : "t2" , "events" : []},
1897+ ]
1898+ ]
1899+ prompt_dataset = pd .DataFrame ({"prompt" : ["p1" ], "conversation_plan" : ["plan" ]})
1900+ mock_agent_engine = mock .Mock ()
1901+ mock_api_client = mock .Mock ()
1902+ result_df = _evals_common ._run_agent_internal (
1903+ api_client = mock_api_client ,
1904+ agent_engine = mock_agent_engine ,
1905+ agent = None ,
1906+ prompt_dataset = prompt_dataset ,
1907+ )
1908+
1909+ assert "agent_data" in result_df .columns
1910+ agent_data = result_df ["agent_data" ][0 ]
1911+ assert agent_data ["turns" ] == [
1912+ {"turn_index" : 0 , "turn_id" : "t1" , "events" : []},
1913+ {"turn_index" : 1 , "turn_id" : "t2" , "events" : []},
1914+ ]
1915+
1916+ @mock .patch (
1917+ "vertexai._genai._evals_common.ADK_SessionInput"
1918+ )
1919+ @mock .patch (
1920+ "vertexai._genai._evals_common.EvaluationGenerator"
1921+ )
1922+ @mock .patch (
1923+ "vertexai._genai._evals_common.LlmBackedUserSimulator"
1924+ )
1925+ @mock .patch (
1926+ "vertexai._genai._evals_common.ConversationScenario"
1927+ )
1928+ @mock .patch (
1929+ "vertexai._genai._evals_common.LlmBackedUserSimulatorConfig"
1930+ )
1931+ @pytest .mark .asyncio
1932+ async def test_run_adk_user_simulation_with_intermediate_events (
1933+ self ,
1934+ mock_config ,
1935+ mock_scenario ,
1936+ mock_simulator ,
1937+ mock_generator ,
1938+ mock_session_input ,
1939+ ):
1940+ """Tests that intermediate invocation events (e.g. tool calls) are parsed successfully."""
1941+ row = pd .Series (
1942+ {
1943+ "starting_prompt" : "I want a laptop." ,
1944+ "conversation_plan" : "Ask for a laptop" ,
1945+ "session_inputs" : json .dumps ({"user_id" : "u1" }),
1946+ }
1947+ )
1948+ mock_agent = mock .Mock ()
1949+
1950+ mock_invocation = mock .Mock ()
1951+ mock_invocation .invocation_id = "turn_123"
1952+ mock_invocation .creation_timestamp = 1771811084.88
1953+ mock_invocation .user_content .model_dump .return_value = {
1954+ "parts" : [{"text" : "I want a laptop." }],
1955+ "role" : "user" ,
1956+ }
1957+ mock_event_1 = mock .Mock ()
1958+ mock_event_1 .author = "ecommerce_agent"
1959+ mock_event_1 .content .model_dump .return_value = {
1960+ "parts" : [
1961+ {
1962+ "function_call" : {
1963+ "name" : "search_products" ,
1964+ "args" : {"query" : "laptop" },
1965+ }
1966+ }
1967+ ]
1968+ }
1969+ mock_event_2 = mock .Mock ()
1970+ mock_event_2 .author = "ecommerce_agent"
1971+ mock_event_2 .content .model_dump .return_value = {
1972+ "parts" : [
1973+ {
1974+ "function_response" : {
1975+ "name" : "search_products" ,
1976+ "response" : {"products" : []},
1977+ }
1978+ }
1979+ ]
1980+ }
1981+
1982+ mock_invocation .intermediate_data .invocation_events = [
1983+ mock_event_1 ,
1984+ mock_event_2 ,
1985+ ]
1986+ mock_invocation .final_response .model_dump .return_value = {
1987+ "parts" : [{"text" : "There are no laptops matching your search." }],
1988+ "role" : "model" ,
1989+ }
1990+ mock_generator ._generate_inferences_from_root_agent = mock .AsyncMock (
1991+ return_value = [mock_invocation ]
1992+ )
1993+ turns = await _evals_common ._run_adk_user_simulation (row , mock_agent )
1994+
1995+ assert len (turns ) == 1
1996+ turn = turns [0 ]
1997+ assert turn ["turn_index" ] == 0
1998+ assert turn ["turn_id" ] == "turn_123"
1999+ assert len (turn ["events" ]) == 4
2000+ assert turn ["events" ][0 ]["author" ] == "user"
2001+ assert turn ["events" ][0 ]["content" ]["parts" ][0 ]["text" ] == "I want a laptop."
2002+ assert turn ["events" ][1 ]["author" ] == "ecommerce_agent"
2003+ assert "function_call" in turn ["events" ][1 ]["content" ]["parts" ][0 ]
2004+ assert turn ["events" ][2 ]["author" ] == "ecommerce_agent"
2005+ assert "function_response" in turn ["events" ][2 ]["content" ]["parts" ][0 ]
2006+ assert turn ["events" ][3 ]["author" ] == "agent"
2007+ assert (
2008+ turn ["events" ][3 ]["content" ]["parts" ][0 ]["text" ]
2009+ == "There are no laptops matching your search."
2010+ )
2011+ mock_invocation .user_content .model_dump .assert_called_with (mode = "json" )
2012+ mock_event_1 .content .model_dump .assert_called_with (mode = "json" )
2013+ mock_invocation .final_response .model_dump .assert_called_with (mode = "json" )
2014+
18912015 @mock .patch .object (_evals_common , "_run_agent" )
18922016 def test_run_agent_internal_malformed_event (self , mock_run_agent ):
18932017 mock_run_agent .return_value = [
@@ -1915,6 +2039,28 @@ def test_run_agent_internal_malformed_event(self, mock_run_agent):
19152039 assert not result_df ["intermediate_events" ][0 ]
19162040
19172041
2042+ class TestIsMultiTurnAgentRun :
2043+ """Unit tests for the _is_multi_turn_agent_run function."""
2044+
2045+ def test_is_multi_turn_agent_run_with_config (self ):
2046+ config = vertexai_genai_types .UserSimulatorConfig (model_name = "gemini-pro" )
2047+ assert _evals_common ._is_multi_turn_agent_run (
2048+ user_simulator_config = config , prompt_dataset = pd .DataFrame ()
2049+ )
2050+
2051+ def test_is_multi_turn_agent_run_with_conversation_plan (self ):
2052+ prompt_dataset = pd .DataFrame ({"conversation_plan" : ["plan" ]})
2053+ assert _evals_common ._is_multi_turn_agent_run (
2054+ user_simulator_config = None , prompt_dataset = prompt_dataset
2055+ )
2056+
2057+ def test_is_multi_turn_agent_run_false (self ):
2058+ prompt_dataset = pd .DataFrame ({"prompt" : ["prompt" ]})
2059+ assert not _evals_common ._is_multi_turn_agent_run (
2060+ user_simulator_config = None , prompt_dataset = prompt_dataset
2061+ )
2062+
2063+
19182064class TestMetricPromptBuilder :
19192065 """Unit tests for the MetricPromptBuilder class."""
19202066
@@ -4228,6 +4374,101 @@ def test_tool_use_quality_metric_no_tool_call_logs_warning(
42284374 )
42294375
42304376
4377+ @pytest .mark .usefixtures ("google_auth_mock" )
4378+ class TestRunAdkUserSimulation :
4379+ """Unit tests for the _run_adk_user_simulation function."""
4380+
4381+ @mock .patch (
4382+ "vertexai._genai._evals_common.ADK_SessionInput"
4383+ )
4384+ @mock .patch (
4385+ "vertexai._genai._evals_common.EvaluationGenerator"
4386+ )
4387+ @mock .patch (
4388+ "vertexai._genai._evals_common.LlmBackedUserSimulator"
4389+ )
4390+ @mock .patch (
4391+ "vertexai._genai._evals_common.ConversationScenario"
4392+ )
4393+ @mock .patch (
4394+ "vertexai._genai._evals_common.LlmBackedUserSimulatorConfig"
4395+ )
4396+ @pytest .mark .asyncio
4397+ async def test_run_adk_user_simulation_success (
4398+ self ,
4399+ mock_config_cls ,
4400+ mock_scenario_cls ,
4401+ mock_simulator_cls ,
4402+ mock_generator_cls ,
4403+ mock_session_input_cls ,
4404+ ):
4405+ row = pd .Series (
4406+ {
4407+ "starting_prompt" : "start" ,
4408+ "conversation_plan" : "plan" ,
4409+ "session_inputs" : json .dumps ({"user_id" : "u1" }),
4410+ }
4411+ )
4412+ mock_agent = mock .Mock ()
4413+ mock_invocation = mock .Mock ()
4414+ mock_invocation .user_content .model_dump .return_value = {"text" : "user msg" }
4415+ mock_invocation .final_response .model_dump .return_value = {"text" : "agent msg" }
4416+ mock_invocation .intermediate_data = None
4417+ mock_invocation .creation_timestamp = 12345
4418+ mock_invocation .invocation_id = "turn1"
4419+
4420+ mock_generator_cls ._generate_inferences_from_root_agent = mock .AsyncMock (
4421+ return_value = [mock_invocation ]
4422+ )
4423+
4424+ turns = await _evals_common ._run_adk_user_simulation (row , mock_agent )
4425+
4426+ assert len (turns ) == 1
4427+ turn = turns [0 ]
4428+ assert turn ["turn_index" ] == 0
4429+ assert turn ["turn_id" ] == "turn1"
4430+ assert len (turn ["events" ]) == 2
4431+ assert turn ["events" ][0 ]["author" ] == "user"
4432+ assert turn ["events" ][0 ]["content" ] == {"text" : "user msg" }
4433+ assert turn ["events" ][1 ]["author" ] == "agent"
4434+ assert turn ["events" ][1 ]["content" ] == {"text" : "agent msg" }
4435+
4436+ mock_scenario_cls .assert_called_once_with (
4437+ starting_prompt = "start" , conversation_plan = "plan"
4438+ )
4439+ mock_session_input_cls .assert_called_once ()
4440+
4441+ @mock .patch (
4442+ "vertexai._genai._evals_common.ADK_SessionInput"
4443+ )
4444+ @mock .patch (
4445+ "vertexai._genai._evals_common.EvaluationGenerator"
4446+ )
4447+ @mock .patch (
4448+ "vertexai._genai._evals_common.LlmBackedUserSimulator"
4449+ )
4450+ @mock .patch (
4451+ "vertexai._genai._evals_common.ConversationScenario"
4452+ )
4453+ @mock .patch (
4454+ "vertexai._genai._evals_common.LlmBackedUserSimulatorConfig"
4455+ )
4456+ @pytest .mark .asyncio
4457+ async def test_run_adk_user_simulation_missing_columns (
4458+ self ,
4459+ mock_config_cls ,
4460+ mock_scenario_cls ,
4461+ mock_simulator_cls ,
4462+ mock_generator_cls ,
4463+ mock_session_input_cls ,
4464+ ):
4465+ row = pd .Series ({"conversation_plan" : "plan" })
4466+ mock_agent = mock .Mock ()
4467+
4468+ with pytest .raises (ValueError , match = "User simulation requires" ):
4469+ await _evals_common ._run_adk_user_simulation (row , mock_agent )
4470+
4471+
42314472@pytest .mark .usefixtures ("google_auth_mock" )
42324473class TestLLMMetricHandlerPayload :
42334474 def setup_method (self ):
0 commit comments