diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py index 3357a984d6..df0608e614 100644 --- a/python/packages/core/agent_framework/_tools.py +++ b/python/packages/core/agent_framework/_tools.py @@ -252,8 +252,23 @@ def __init__( description: A description of the function. approval_mode: Whether or not approval is required to run this tool. Default is that approval is NOT required (``"never_require"``). - max_invocations: The maximum number of times this function can be invoked. - If None, there is no limit. Should be at least 1. + max_invocations: The maximum number of times this function can be invoked + across the **lifetime of this tool instance**. If None (default), + there is no limit. Should be at least 1. If the tool is called multiple + times in one iteration, those will execute, after that it will stop working. For example, + if max_invocations is 3 and the tool is called 5 times in a single iteration, + these will complete, but any subsequent calls to the tool (in the same or future iterations) + will raise a ToolException. + + .. note:: + This counter lives on the tool instance and is never automatically + reset. For module-level or singleton tools in long-running + applications, the counter accumulates across all requests. Use + :attr:`invocation_count` to inspect or reset the counter manually, + or consider using + ``FunctionInvocationConfiguration["max_function_calls"]`` + for per-request limits instead. + max_invocation_exceptions: The maximum number of exceptions allowed during invocations. If None, there is no limit. Should be at least 1. additional_properties: Additional properties to set on the function. @@ -1130,8 +1145,10 @@ def tool( function's signature. Defaults to ``None`` (infer from signature). approval_mode: Whether or not approval is required to run this tool. Default is that approval is NOT required (``"never_require"``). - max_invocations: The maximum number of times this function can be invoked. - If None, there is no limit, should be at least 1. + max_invocations: The maximum number of times this function can be invoked + across the **lifetime of this tool instance**. If None (default), there is + no limit. Should be at least 1. For per-request limits, use + ``FunctionInvocationConfiguration["max_function_calls"]`` instead. max_invocation_exceptions: The maximum number of exceptions allowed during invocations. If None, there is no limit, should be at least 1. additional_properties: Additional properties to set on the function. @@ -1247,43 +1264,54 @@ def wrapper(f: Callable[..., Any]) -> FunctionTool: class FunctionInvocationConfiguration(TypedDict, total=False): """Configuration for function invocation in chat clients. + The configuration controls the tool execution loop that runs when the model + requests function calls. Key settings: + + - ``enabled``: Master switch for the function invocation loop. + - ``max_iterations``: Limits the number of **LLM roundtrips** (iterations). + Each iteration may execute one or more function calls in parallel, so + this does *not* directly limit the total number of function executions. + - ``max_function_calls``: Limits the **total number of individual function + invocations** across all iterations within a single request. This is the + primary knob for controlling cost and preventing runaway tool usage. When + the limit is reached, the loop stops invoking tools and forces the model + to produce a text response. Default is ``None`` (unlimited). + + This is a **best-effort** limit: it is checked *after* each batch of + parallel tool calls completes, not before. If the model requests 20 + parallel calls in a single iteration and the limit is 10, all 20 will + execute before the loop stops. + - ``max_consecutive_errors_per_request``: How many consecutive errors + before abandoning the tool loop for this request. + - ``terminate_on_unknown_calls``: Whether to raise an error when the model + requests a function that is not in the tool map. + - ``additional_tools``: Extra tools available during execution but not + advertised to the model in the tool list. + - ``include_detailed_errors``: Whether to include exception details in the + function result returned to the model. + + Note: + ``max_iterations`` and ``max_function_calls`` serve complementary purposes. + ``max_iterations`` caps the number of model round-trips regardless of how + many tools are called per trip. ``max_function_calls`` caps the cumulative + number of individual tool executions regardless of how they are distributed + across iterations. + Example: .. code-block:: python + from agent_framework.openai import OpenAIChatClient - # Create an OpenAI chat client client = OpenAIChatClient(api_key="your_api_key") - # Disable function invocation - client.function_invocation_configuration["enabled"] = False - - # Set maximum iterations to 10 - client.function_invocation_configuration["max_iterations"] = 10 - - # Enable termination on unknown function calls - client.function_invocation_configuration["terminate_on_unknown_calls"] = True - - # Add additional tools for function execution - client.function_invocation_configuration["additional_tools"] = [my_custom_tool] - - # Enable detailed error information in function results - client.function_invocation_configuration["include_detailed_errors"] = True - - # You can also create a new configuration dict if needed - new_config: FunctionInvocationConfiguration = { - "enabled": True, - "max_iterations": 20, - "terminate_on_unknown_calls": False, - "additional_tools": [another_tool], - "include_detailed_errors": False, - } - - # and then assign it to the client - client.function_invocation_configuration = new_config + # Limit to 5 LLM roundtrips and 20 total function executions + client.function_invocation_configuration["max_iterations"] = 5 + client.function_invocation_configuration["max_function_calls"] = 20 """ enabled: bool max_iterations: int + max_function_calls: int | None max_consecutive_errors_per_request: int terminate_on_unknown_calls: bool additional_tools: Sequence[FunctionTool] @@ -1296,6 +1324,7 @@ def normalize_function_invocation_configuration( normalized: FunctionInvocationConfiguration = { "enabled": True, "max_iterations": DEFAULT_MAX_ITERATIONS, + "max_function_calls": None, "max_consecutive_errors_per_request": DEFAULT_MAX_CONSECUTIVE_ERRORS_PER_REQUEST, "terminate_on_unknown_calls": False, "additional_tools": [], @@ -1305,6 +1334,8 @@ def normalize_function_invocation_configuration( normalized.update(config) if normalized["max_iterations"] < 1: raise ValueError("max_iterations must be at least 1.") + if normalized["max_function_calls"] is not None and normalized["max_function_calls"] < 1: + raise ValueError("max_function_calls must be at least 1 or None.") if normalized["max_consecutive_errors_per_request"] < 0: raise ValueError("max_consecutive_errors_per_request must be 0 or more.") if normalized["additional_tools"] is None: @@ -1816,6 +1847,7 @@ class FunctionRequestResult(TypedDict, total=False): result_message: The message containing function call results, if any. update_role: The role to update for the next message, if any. function_call_results: The list of function call results, if any. + function_call_count: The number of function calls executed in this processing step. """ action: Literal["return", "continue", "stop"] @@ -1823,6 +1855,7 @@ class FunctionRequestResult(TypedDict, total=False): result_message: Message | None update_role: Literal["assistant", "tool"] | None function_call_results: list[Content] | None + function_call_count: int def _handle_function_call_results( @@ -1913,6 +1946,7 @@ async def _process_function_requests( max_errors, ) _replace_approval_contents_with_results(prepped_messages, fcc_todo, approved_function_results) + executed_count = sum(1 for r in approved_function_results if r.type == "function_result") # Continue to call chat client with updated messages (containing function results) # so it can generate the final response return { @@ -1921,6 +1955,7 @@ async def _process_function_requests( "result_message": None, "update_role": None, "function_call_results": None, + "function_call_count": executed_count, } if response is None or fcc_messages is None: @@ -1930,6 +1965,7 @@ async def _process_function_requests( "result_message": None, "update_role": None, "function_call_results": None, + "function_call_count": 0, } tools = _extract_tools(tool_options) @@ -1942,6 +1978,7 @@ async def _process_function_requests( "result_message": None, "update_role": None, "function_call_results": None, + "function_call_count": 0, } function_call_results, should_terminate, had_errors = await execute_function_calls( @@ -1958,6 +1995,7 @@ async def _process_function_requests( max_errors=max_errors, ) result["function_call_results"] = list(function_call_results) + result["function_call_count"] = sum(1 for r in function_call_results if r.type == "function_result") # If middleware requested termination, change action to return if should_terminate: result["action"] = "return" @@ -2071,6 +2109,8 @@ async def _get_response() -> ChatResponse: nonlocal mutable_options nonlocal filtered_kwargs errors_in_a_row: int = 0 + total_function_calls: int = 0 + max_function_calls: int | None = self.function_invocation_configuration.get("max_function_calls") prepped_messages = list(messages) fcc_messages: list[Message] = [] response: ChatResponse | None = None @@ -2094,6 +2134,7 @@ async def _get_response() -> ChatResponse: response = ChatResponse(messages=prepped_messages) break errors_in_a_row = approval_result["errors_in_a_row"] + total_function_calls += approval_result.get("function_call_count", 0) response = await super_get_response( messages=prepped_messages, @@ -2118,10 +2159,24 @@ async def _get_response() -> ChatResponse: ) if result["action"] == "return": return response + total_function_calls += result.get("function_call_count", 0) if result["action"] == "stop": # Error threshold reached: force a final non-tool turn so # function_call_output items are submitted before exit. mutable_options["tool_choice"] = "none" + elif ( + max_function_calls is not None + and total_function_calls >= max_function_calls + ): + # Best-effort limit: checked after each batch of parallel calls completes, + # so the current batch always runs to completion even if it overshoots. + logger.info( + "Maximum function calls reached (%d/%d). " + "Stopping further function calls for this request.", + total_function_calls, + max_function_calls, + ) + mutable_options["tool_choice"] = "none" errors_in_a_row = result["errors_in_a_row"] # When tool_choice is 'required', reset tool_choice after one iteration to avoid infinite loops @@ -2167,6 +2222,8 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]: nonlocal mutable_options nonlocal stream_result_hooks errors_in_a_row: int = 0 + total_function_calls: int = 0 + max_function_calls: int | None = self.function_invocation_configuration.get("max_function_calls") prepped_messages = list(messages) fcc_messages: list[Message] = [] response: ChatResponse | None = None @@ -2187,6 +2244,7 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]: execute_function_calls=execute_function_calls, ) errors_in_a_row = approval_result["errors_in_a_row"] + total_function_calls += approval_result.get("function_call_count", 0) if approval_result["action"] == "stop": mutable_options["tool_choice"] = "none" return @@ -2232,6 +2290,7 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]: execute_function_calls=execute_function_calls, ) errors_in_a_row = result["errors_in_a_row"] + total_function_calls += result.get("function_call_count", 0) if role := result["update_role"]: yield ChatResponseUpdate( contents=result["function_call_results"] or [], @@ -2243,6 +2302,19 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]: mutable_options["tool_choice"] = "none" elif result["action"] != "continue": return + elif ( + max_function_calls is not None + and total_function_calls >= max_function_calls + ): + # Best-effort limit: checked after each batch of parallel calls completes, + # so the current batch always runs to completion even if it overshoots. + logger.info( + "Maximum function calls reached (%d/%d). " + "Stopping further function calls for this request.", + total_function_calls, + max_function_calls, + ) + mutable_options["tool_choice"] = "none" # When tool_choice is 'required', reset the tool_choice after one iteration to avoid infinite loops if mutable_options.get("tool_choice") == "required" or ( diff --git a/python/packages/core/tests/core/test_function_invocation_logic.py b/python/packages/core/tests/core/test_function_invocation_logic.py index 1dfd257942..b4213d6029 100644 --- a/python/packages/core/tests/core/test_function_invocation_logic.py +++ b/python/packages/core/tests/core/test_function_invocation_logic.py @@ -880,6 +880,143 @@ def ai_func(arg1: str) -> str: assert response.messages[-1].text == "I broke out of the function invocation loop..." # Failsafe response +@pytest.mark.parametrize("max_iterations", [10]) +async def test_max_function_calls_limits_parallel_invocations(chat_client_base: SupportsChatGetResponse): + """Test that max_function_calls caps total function invocations across iterations with parallel calls.""" + exec_counter = 0 + + @tool(name="search", approval_mode="never_require") + def search_func(query: str) -> str: + nonlocal exec_counter + exec_counter += 1 + return f"Result for {query}" + + # Each iteration returns 3 parallel tool calls + chat_client_base.run_responses = [ + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call(call_id="1a", name="search", arguments='{"query": "q1"}'), + Content.from_function_call(call_id="1b", name="search", arguments='{"query": "q2"}'), + Content.from_function_call(call_id="1c", name="search", arguments='{"query": "q3"}'), + ], + ) + ), + # Second iteration: 3 more parallel calls (total would be 6, exceeding limit of 5) + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call(call_id="2a", name="search", arguments='{"query": "q4"}'), + Content.from_function_call(call_id="2b", name="search", arguments='{"query": "q5"}'), + Content.from_function_call(call_id="2c", name="search", arguments='{"query": "q6"}'), + ], + ) + ), + # Final response after tool_choice="none" is forced + ChatResponse(messages=Message(role="assistant", text="done")), + ] + + # Allow many iterations but cap total function calls at 5 + chat_client_base.function_invocation_configuration["max_function_calls"] = 5 + + response = await chat_client_base.get_response( + [Message(role="user", text="search")], options={"tool_choice": "auto", "tools": [search_func]} + ) + + # First iteration executes 3 calls (total=3, under limit). + # Second iteration executes 3 more (total=6, reaches limit) then forces tool_choice="none". + # The loop completes the current batch before stopping. + assert exec_counter == 6 + assert "broke out" in response.messages[-1].text + + +@pytest.mark.parametrize("max_iterations", [10]) +async def test_max_function_calls_single_calls_per_iteration(chat_client_base: SupportsChatGetResponse): + """Test that max_function_calls works with single tool calls per iteration.""" + exec_counter = 0 + + @tool(name="lookup", approval_mode="never_require") + def lookup_func(key: str) -> str: + nonlocal exec_counter + exec_counter += 1 + return f"Value for {key}" + + chat_client_base.run_responses = [ + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call(call_id="1", name="lookup", arguments='{"key": "a"}'), + ], + ) + ), + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call(call_id="2", name="lookup", arguments='{"key": "b"}'), + ], + ) + ), + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call(call_id="3", name="lookup", arguments='{"key": "c"}'), + ], + ) + ), + # After limit is reached + ChatResponse(messages=Message(role="assistant", text="all done")), + ] + + chat_client_base.function_invocation_configuration["max_function_calls"] = 2 + + response = await chat_client_base.get_response( + [Message(role="user", text="look up keys")], options={"tool_choice": "auto", "tools": [lookup_func]} + ) + + # 2 single calls executed, then limit reached, tool_choice="none" forced + assert exec_counter == 2 + assert "broke out" in response.messages[-1].text + + +@pytest.mark.parametrize("max_iterations", [10]) +async def test_max_function_calls_none_means_unlimited(chat_client_base: SupportsChatGetResponse): + """Test that max_function_calls=None (default) allows unlimited function calls.""" + exec_counter = 0 + + @tool(name="do_thing", approval_mode="never_require") + def do_thing_func(arg: str) -> str: + nonlocal exec_counter + exec_counter += 1 + return f"Done {arg}" + + chat_client_base.run_responses = [ + ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call(call_id=str(i), name="do_thing", arguments=f'{{"arg": "v{i}"}}'), + ], + ) + ) + for i in range(5) + ] + [ChatResponse(messages=Message(role="assistant", text="finished"))] + + # Explicitly set to None (default) — should not limit + chat_client_base.function_invocation_configuration["max_function_calls"] = None + + response = await chat_client_base.get_response( + [Message(role="user", text="do things")], options={"tool_choice": "auto", "tools": [do_thing_func]} + ) + + assert exec_counter == 5 + assert response.messages[-1].text == "finished" + + async def test_function_invocation_config_enabled_false(chat_client_base: SupportsChatGetResponse): """Test that setting enabled=False disables function invocation.""" exec_counter = 0 @@ -1236,6 +1373,33 @@ async def test_function_invocation_config_validation_max_consecutive_errors(): normalize_function_invocation_configuration({"max_consecutive_errors_per_request": -1}) +async def test_function_invocation_config_validation_max_function_calls(): + """Test that max_function_calls validation works correctly.""" + from agent_framework import normalize_function_invocation_configuration + + # Default is None (unlimited) + config = normalize_function_invocation_configuration(None) + assert config["max_function_calls"] is None + + # Valid values + config = normalize_function_invocation_configuration({"max_function_calls": 1}) + assert config["max_function_calls"] == 1 + + config = normalize_function_invocation_configuration({"max_function_calls": 100}) + assert config["max_function_calls"] == 100 + + # None is valid (unlimited) + config = normalize_function_invocation_configuration({"max_function_calls": None}) + assert config["max_function_calls"] is None + + # Invalid value (less than 1) + with pytest.raises(ValueError, match="max_function_calls must be at least 1 or None"): + normalize_function_invocation_configuration({"max_function_calls": 0}) + + with pytest.raises(ValueError, match="max_function_calls must be at least 1 or None"): + normalize_function_invocation_configuration({"max_function_calls": -1}) + + async def test_argument_validation_error_with_detailed_errors(chat_client_base: SupportsChatGetResponse): """Test that argument validation errors include details when include_detailed_errors=True.""" diff --git a/python/samples/01-get-started/06_host_your_agent.py b/python/samples/01-get-started/06_host_your_agent.py index d858d61773..6bc87b48b4 100644 --- a/python/samples/01-get-started/06_host_your_agent.py +++ b/python/samples/01-get-started/06_host_your_agent.py @@ -1,5 +1,7 @@ # Copyright (c) Microsoft. All rights reserved. +# ruff: noqa: E305 +# fmt: off from typing import Any from agent_framework.azure import AgentFunctionApp, AzureOpenAIChatClient diff --git a/python/samples/02-agents/tools/control_total_tool_executions.py b/python/samples/02-agents/tools/control_total_tool_executions.py new file mode 100644 index 0000000000..eaad6e225b --- /dev/null +++ b/python/samples/02-agents/tools/control_total_tool_executions.py @@ -0,0 +1,345 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +from typing import Annotated + +from agent_framework import tool +from agent_framework.openai import OpenAIResponsesClient +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +""" +This sample demonstrates all the ways to control how many times tools are +executed during an agent run. There are three complementary mechanisms: + +1. ``max_iterations`` (on the chat client) — caps the number of **LLM + roundtrips**. Each roundtrip may invoke one or more tools in parallel. + +2. ``max_function_calls`` (on the chat client) — caps the **total number of + individual function invocations** across all iterations within a single + request. This is the primary knob for cost control. If the tool is called multiple + times in one iteration, those will execute, after that it will stop working. For example, + if max_invocations is 3 and the tool is called 5 times in a single iteration, + these will complete, but any subsequent calls to the tool (in the same or future iterations) + will raise a ToolException. + +3. ``max_invocations`` (on a tool) — caps the **lifetime invocation count** + of a specific tool instance. The counter is never automatically reset, + so it accumulates across requests when tools are singletons. + + Because ``max_invocations`` is tracked on the ``FunctionTool`` *instance*, + wrapping the same callable with ``@tool`` multiple times creates independent + counters. This lets you give different agents different invocation budgets + for the same underlying function. + +Choose the right mechanism for your scenario: +• Prevent runaway LLM loops → ``max_iterations`` +• Best-effort cap on tool execution cost per request → ``max_function_calls`` + (checked between iterations; a single batch of parallel calls may overshoot) +• Best-effort limit a specific expensive tool globally → ``max_invocations`` +• Per-agent limits on shared tools → wrap the callable separately per agent +""" + + +# --- Tool definitions --- + + +# NOTE: approval_mode="never_require" is for sample brevity. +# Use "always_require" in production; see function_tool_with_approval.py. +@tool(approval_mode="never_require") +def search_web(query: Annotated[str, "The search query to look up."]) -> str: + """Search the web for information.""" + return f"Results for '{query}': [page1, page2, page3]" + + +@tool(approval_mode="never_require") +def get_weather(city: Annotated[str, "The city to get the weather for."]) -> str: + """Get the current weather for a city.""" + return f"Weather in {city}: Sunny, 22°C" + + +@tool(approval_mode="never_require", max_invocations=2) +def call_expensive_api( + prompt: Annotated[str, "The prompt to send to the expensive API."], +) -> str: + """Call a very expensive external API. Limited to 2 calls ever.""" + return f"Expensive result for '{prompt}'" + + +# --- Scenario 1: max_iterations (limit LLM roundtrips) --- + + +async def scenario_max_iterations(): + """Demonstrate max_iterations: limits how many times we loop back to the LLM. + + Each iteration may invoke one or more tools in parallel, so this does NOT + directly limit the total number of function executions. + """ + print("=" * 60) + print("Scenario 1: max_iterations — limit LLM roundtrips") + print("=" * 60) + + client = OpenAIResponsesClient() + + # 1. Set max_iterations to 3 — the tool loop will run at most 3 roundtrips + # to the model before forcing a text response. + client.function_invocation_configuration["max_iterations"] = 3 + print(f" max_iterations = {client.function_invocation_configuration['max_iterations']}") + + agent = client.as_agent( + name="ResearchAgent", + instructions=( + "You are a research assistant. Use the search_web tool to answer " + "the user's question. Search for multiple aspects of the topic." + ), + tools=[search_web, get_weather], + ) + + response = await agent.run("Tell me about the weather in Paris, London, and Tokyo.") + print(f" Response: {response.text[:200]}...") + print() + + +# --- Scenario 2: max_function_calls (limit total tool executions per request) --- + + +async def scenario_max_function_calls(): + """Demonstrate max_function_calls: caps total individual tool invocations. + + Unlike max_iterations, this counts every individual function execution — + even when several tools run in parallel within a single iteration. + """ + print("=" * 60) + print("Scenario 2: max_function_calls — limit total tool executions") + print("=" * 60) + + client = OpenAIResponsesClient() + + # 1. Allow many iterations but cap total function calls to 4. + # If the model requests 3 parallel searches per iteration, after 2 + # iterations (6 calls) the limit is hit and the loop stops. + client.function_invocation_configuration["max_iterations"] = 20 + client.function_invocation_configuration["max_function_calls"] = 4 + print(f" max_iterations = {client.function_invocation_configuration['max_iterations']}") + print(f" max_function_calls = {client.function_invocation_configuration['max_function_calls']}") + + agent = client.as_agent( + name="ResearchAgent", + instructions=( + "You are a research assistant. Use the search_web and get_weather " + "tools to answer the user's question comprehensively." + ), + tools=[search_web, get_weather], + ) + + response = await agent.run( + "Search for the weather in Paris, London, Tokyo, " + "New York, and Sydney, and also search for best travel tips." + ) + print(f" Response: {response.text[:200]}...") + print() + + +# --- Scenario 3: max_invocations (lifetime limit on a specific tool) --- + + +async def scenario_max_invocations(): + """Demonstrate max_invocations: caps how many times a specific tool instance + can be called across ALL requests. + + Note: this counter lives on the tool instance, so for module-level tools + it accumulates globally. Use tool.invocation_count to inspect or reset. + """ + print("=" * 60) + print("Scenario 3: max_invocations — lifetime cap on a tool") + print("=" * 60) + + agent = OpenAIResponsesClient().as_agent( + name="APIAgent", + instructions="Use call_expensive_api when asked to analyze something.", + tools=[call_expensive_api], + ) + session = agent.create_session() + + # 1. First call — succeeds (invocation_count: 0 → 1) + print(f" Before call 1: invocation_count = {call_expensive_api.invocation_count}") + response = await agent.run("Analyze the market trends for AI.", session=session) + print(f" After call 1: invocation_count = {call_expensive_api.invocation_count}") + print(f" Response: {response.text[:150]}...") + + # 2. Second call — succeeds (invocation_count: 1 → 2) + response = await agent.run("Analyze the market trends for cloud computing.", session=session) + print(f" After call 2: invocation_count = {call_expensive_api.invocation_count}") + print(f" Response: {response.text[:150]}...") + + # 3. Third call — tool refuses (max_invocations=2 reached) + response = await agent.run("Analyze the market trends for quantum computing.", session=session) + print(f" After call 3: invocation_count = {call_expensive_api.invocation_count}") + print(f" Response: {response.text[:150]}...") + + # 4. Reset the counter to allow more calls + print() + print(" Resetting invocation_count to 0...") + call_expensive_api.invocation_count = 0 + print(f" invocation_count = {call_expensive_api.invocation_count}") + print() + + +# --- Scenario 4: Per-agent limits via separate tool wrappers --- + + +async def scenario_per_agent_tool_limits(): + """Demonstrate per-agent max_invocations using separate tool wrappers. + + Because max_invocations is tracked on the FunctionTool *instance*, you can + wrap the same callable with ``@tool`` multiple times to get independent + counters for different agents. This is useful when two agents share the + same underlying function but should have different invocation budgets. + """ + print("=" * 60) + print("Scenario 4: Per-agent limits via separate tool wrappers") + print("=" * 60) + + # The underlying callable — a plain function, no decorator. + def _do_lookup(query: Annotated[str, "Search query."]) -> str: + """Look up information.""" + return f"Lookup result for '{query}'" + + # Wrap it twice with different limits. Each wrapper is a separate + # FunctionTool instance with its own invocation_count. + agent_a_lookup = tool(name="lookup", approval_mode="never_require", max_invocations=2)(_do_lookup) + agent_b_lookup = tool(name="lookup", approval_mode="never_require", max_invocations=5)(_do_lookup) + + client = OpenAIResponsesClient() + agent_a = client.as_agent( + name="AgentA", + instructions="Use the lookup tool to answer questions.", + tools=[agent_a_lookup], + ) + agent_b = client.as_agent( + name="AgentB", + instructions="Use the lookup tool to answer questions.", + tools=[agent_b_lookup], + ) + + print(f" agent_a_lookup.max_invocations = {agent_a_lookup.max_invocations}") + print(f" agent_b_lookup.max_invocations = {agent_b_lookup.max_invocations}") + + # Agent A uses its budget + session_a = agent_a.create_session() + await agent_a.run("Look up AI trends", session=session_a) + await agent_a.run("Look up cloud trends", session=session_a) + + # Agent B's counter is independent — still at 0 + session_b = agent_b.create_session() + await agent_b.run("Look up quantum computing", session=session_b) + + print(f" agent_a_lookup.invocation_count = {agent_a_lookup.invocation_count} (limit {agent_a_lookup.max_invocations})") + print(f" agent_b_lookup.invocation_count = {agent_b_lookup.invocation_count} (limit {agent_b_lookup.max_invocations})") + print(" → Agent A hit its limit; Agent B used 1 of 5.") + print() + + +# --- Scenario 5: Combining all three mechanisms --- + + +async def scenario_combined(): + """Demonstrate using all three mechanisms together for defense in depth.""" + print("=" * 60) + print("Scenario 5: Combined — all mechanisms together") + print("=" * 60) + + client = OpenAIResponsesClient() + + # 1. Configure the client with both iteration and function call limits. + client.function_invocation_configuration["max_iterations"] = 5 # max 5 LLM roundtrips + client.function_invocation_configuration["max_function_calls"] = 8 # max 8 total tool calls + print(f" max_iterations = {client.function_invocation_configuration['max_iterations']}") + print(f" max_function_calls = {client.function_invocation_configuration['max_function_calls']}") + + # 2. Use a tool with a lifetime invocation limit. + @tool(approval_mode="never_require", max_invocations=3) + def premium_lookup(topic: Annotated[str, "Topic to look up."]) -> str: + """Look up premium data (max 3 calls ever).""" + return f"Premium data for '{topic}'" + + print(f" premium_lookup.max_invocations = {premium_lookup.max_invocations}") + + agent = client.as_agent( + name="MultiToolAgent", + instructions="Use all available tools to answer comprehensively.", + tools=[search_web, get_weather, premium_lookup], + ) + + # 3. Run a query that could trigger many tool calls. + response = await agent.run( + "Research the weather and tourism info for Paris, London, Tokyo, " + "New York, and Sydney. Use premium_lookup for the top 3 cities." + ) + print(f" Response: {response.text[:200]}...") + print(f" premium_lookup.invocation_count = {premium_lookup.invocation_count}") + print() + + +# --- Entry point --- + + +async def main(): + await scenario_max_iterations() + await scenario_max_function_calls() + await scenario_max_invocations() + await scenario_per_agent_tool_limits() + await scenario_combined() + + +""" +Sample output: + +============================================================ +Scenario 1: max_iterations — limit LLM roundtrips +============================================================ + max_iterations = 3 + Response: The weather in Paris is sunny at 22°C, London is sunny at 22°C, and Tokyo is sunny at 22°C... +============================================================ +Scenario 2: max_function_calls — limit total tool executions +============================================================ + max_iterations = 20 + max_function_calls = 4 + Response: Based on my research, Paris is sunny at 22°C, London is sunny at 22°C... +============================================================ +Scenario 3: max_invocations — lifetime cap on a tool +============================================================ + Before call 1: invocation_count = 0 + After call 1: invocation_count = 1 + Response: Based on the analysis, the AI market is showing strong growth trends... + After call 2: invocation_count = 2 + Response: The cloud computing market continues to expand with key trends in... + After call 3: invocation_count = 2 + Response: I'm unable to use the analysis tool right now as it has reached its limit... + + Resetting invocation_count to 0... + invocation_count = 0 + +============================================================ +Scenario 4: Per-agent limits via separate tool wrappers +============================================================ + agent_a_lookup.max_invocations = 2 + agent_b_lookup.max_invocations = 5 + agent_a_lookup.invocation_count = 2 (limit 2) + agent_b_lookup.invocation_count = 1 (limit 5) + → Agent A hit its limit; Agent B used 1 of 5. + +============================================================ +Scenario 5: Combined — all mechanisms together +============================================================ + max_iterations = 5 + max_function_calls = 8 + premium_lookup.max_invocations = 3 + Response: Here's a comprehensive overview of the weather and tourism for the cities... + premium_lookup.invocation_count = 3 +""" + +if __name__ == "__main__": + asyncio.run(main())