From 40e8004d3231ad4fb8545e9732b5c21dcc580d0a Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Mon, 23 Feb 2026 12:04:58 +0100
Subject: [PATCH 1/7] Add max_function_calls to FunctionInvocationConfiguration
 (#2329)

Add a new per-request max_function_calls setting to FunctionInvocationConfiguration
that limits the total number of individual function invocations across all iterations
within a single get_response call. This complements max_iterations (which limits LLM
roundtrips) by providing a hard cap on actual tool executions regardless of parallelism.

- Add max_function_calls field to FunctionInvocationConfiguration (default: None/unlimited)
- Track cumulative function call count in both streaming and non-streaming tool loops
- Force tool_choice='none' when the limit is reached
- Add validation in normalize_function_invocation_configuration
- Improve docstrings for FunctionInvocationConfiguration, FunctionTool, and @tool
  to clarify semantics of max_iterations vs max_function_calls vs max_invocations
- Add tests for parallel calls, single calls, unlimited mode, and config validation

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../packages/core/agent_framework/_tools.py   | 121 +++++++++----
 .../core/test_function_invocation_logic.py    | 164 ++++++++++++++++++
 2 files changed, 254 insertions(+), 31 deletions(-)

diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py
index 3357a984d6..5b1cb275bf 100644
--- a/python/packages/core/agent_framework/_tools.py
+++ b/python/packages/core/agent_framework/_tools.py
@@ -252,8 +252,19 @@ def __init__(
             description: A description of the function.
             approval_mode: Whether or not approval is required to run this tool.
                 Default is that approval is NOT required (``"never_require"``).
-            max_invocations: The maximum number of times this function can be invoked.
-                If None, there is no limit. Should be at least 1.
+            max_invocations: The maximum number of times this function can be invoked
+                across the **lifetime of this tool instance**. If None (default),
+                there is no limit. Should be at least 1.
+
+                .. note::
+                    This counter lives on the tool instance and is never automatically
+                    reset. For module-level or singleton tools in long-running
+                    applications, the counter accumulates across all requests. Use
+                    :attr:`invocation_count` to inspect or reset the counter manually,
+                    or consider using
+                    :attr:`FunctionInvocationConfiguration.max_function_calls
+                    <FunctionInvocationConfiguration>` for per-request limits instead.
+
             max_invocation_exceptions: The maximum number of exceptions allowed during invocations.
                 If None, there is no limit. Should be at least 1.
             additional_properties: Additional properties to set on the function.
@@ -1130,8 +1141,10 @@ def tool(
             function's signature. Defaults to ``None`` (infer from signature).
         approval_mode: Whether or not approval is required to run this tool.
             Default is that approval is NOT required (``"never_require"``).
-        max_invocations: The maximum number of times this function can be invoked.
-            If None, there is no limit, should be at least 1.
+        max_invocations: The maximum number of times this function can be invoked
+            across the **lifetime of this tool instance**. If None (default), there is
+            no limit. Should be at least 1. For per-request limits, use
+            ``FunctionInvocationConfiguration["max_function_calls"]`` instead.
         max_invocation_exceptions: The maximum number of exceptions allowed during invocations.
             If None, there is no limit, should be at least 1.
         additional_properties: Additional properties to set on the function.
@@ -1247,43 +1260,49 @@ def wrapper(f: Callable[..., Any]) -> FunctionTool:
 class FunctionInvocationConfiguration(TypedDict, total=False):
     """Configuration for function invocation in chat clients.
 
+    The configuration controls the tool execution loop that runs when the model
+    requests function calls. Key settings:
+
+    - ``enabled``: Master switch for the function invocation loop.
+    - ``max_iterations``: Limits the number of **LLM roundtrips** (iterations).
+      Each iteration may execute one or more function calls in parallel, so
+      this does *not* directly limit the total number of function executions.
+    - ``max_function_calls``: Limits the **total number of individual function
+      invocations** across all iterations within a single request. This is the
+      primary knob for controlling cost and preventing runaway tool usage. When
+      the limit is reached, the loop stops invoking tools and forces the model
+      to produce a text response. Default is ``None`` (unlimited).
+    - ``max_consecutive_errors_per_request``: How many consecutive errors
+      before abandoning the tool loop for this request.
+    - ``terminate_on_unknown_calls``: Whether to raise an error when the model
+      requests a function that is not in the tool map.
+    - ``additional_tools``: Extra tools available during execution but not
+      advertised to the model in the tool list.
+    - ``include_detailed_errors``: Whether to include exception details in the
+      function result returned to the model.
+
+    Note:
+        ``max_iterations`` and ``max_function_calls`` serve complementary purposes.
+        ``max_iterations`` caps the number of model round-trips regardless of how
+        many tools are called per trip. ``max_function_calls`` caps the cumulative
+        number of individual tool executions regardless of how they are distributed
+        across iterations.
+
     Example:
         .. code-block:: python
+
             from agent_framework.openai import OpenAIChatClient
 
-            # Create an OpenAI chat client
             client = OpenAIChatClient(api_key="your_api_key")
 
-            # Disable function invocation
-            client.function_invocation_configuration["enabled"] = False
-
-            # Set maximum iterations to 10
-            client.function_invocation_configuration["max_iterations"] = 10
-
-            # Enable termination on unknown function calls
-            client.function_invocation_configuration["terminate_on_unknown_calls"] = True
-
-            # Add additional tools for function execution
-            client.function_invocation_configuration["additional_tools"] = [my_custom_tool]
-
-            # Enable detailed error information in function results
-            client.function_invocation_configuration["include_detailed_errors"] = True
-
-            # You can also create a new configuration dict if needed
-            new_config: FunctionInvocationConfiguration = {
-                "enabled": True,
-                "max_iterations": 20,
-                "terminate_on_unknown_calls": False,
-                "additional_tools": [another_tool],
-                "include_detailed_errors": False,
-            }
-
-            # and then assign it to the client
-            client.function_invocation_configuration = new_config
+            # Limit to 5 LLM roundtrips and 20 total function executions
+            client.function_invocation_configuration["max_iterations"] = 5
+            client.function_invocation_configuration["max_function_calls"] = 20
     """
 
     enabled: bool
     max_iterations: int
+    max_function_calls: int | None
     max_consecutive_errors_per_request: int
     terminate_on_unknown_calls: bool
     additional_tools: Sequence[FunctionTool]
@@ -1296,6 +1315,7 @@ def normalize_function_invocation_configuration(
     normalized: FunctionInvocationConfiguration = {
         "enabled": True,
         "max_iterations": DEFAULT_MAX_ITERATIONS,
+        "max_function_calls": None,
         "max_consecutive_errors_per_request": DEFAULT_MAX_CONSECUTIVE_ERRORS_PER_REQUEST,
         "terminate_on_unknown_calls": False,
         "additional_tools": [],
@@ -1305,6 +1325,8 @@ def normalize_function_invocation_configuration(
         normalized.update(config)
     if normalized["max_iterations"] < 1:
         raise ValueError("max_iterations must be at least 1.")
+    if normalized["max_function_calls"] is not None and normalized["max_function_calls"] < 1:
+        raise ValueError("max_function_calls must be at least 1 or None.")
     if normalized["max_consecutive_errors_per_request"] < 0:
         raise ValueError("max_consecutive_errors_per_request must be 0 or more.")
     if normalized["additional_tools"] is None:
@@ -1816,6 +1838,7 @@ class FunctionRequestResult(TypedDict, total=False):
         result_message: The message containing function call results, if any.
         update_role: The role to update for the next message, if any.
         function_call_results: The list of function call results, if any.
+        function_call_count: The number of function calls executed in this processing step.
     """
 
     action: Literal["return", "continue", "stop"]
@@ -1823,6 +1846,7 @@ class FunctionRequestResult(TypedDict, total=False):
     result_message: Message | None
     update_role: Literal["assistant", "tool"] | None
     function_call_results: list[Content] | None
+    function_call_count: int
 
 
 def _handle_function_call_results(
@@ -1913,6 +1937,7 @@ async def _process_function_requests(
                             max_errors,
                         )
             _replace_approval_contents_with_results(prepped_messages, fcc_todo, approved_function_results)
+            executed_count = sum(1 for r in approved_function_results if r.type == "function_result")
             # Continue to call chat client with updated messages (containing function results)
             # so it can generate the final response
             return {
@@ -1921,6 +1946,7 @@ async def _process_function_requests(
                 "result_message": None,
                 "update_role": None,
                 "function_call_results": None,
+                "function_call_count": executed_count,
             }
 
     if response is None or fcc_messages is None:
@@ -1930,6 +1956,7 @@ async def _process_function_requests(
             "result_message": None,
             "update_role": None,
             "function_call_results": None,
+            "function_call_count": 0,
         }
 
     tools = _extract_tools(tool_options)
@@ -1942,6 +1969,7 @@ async def _process_function_requests(
             "result_message": None,
             "update_role": None,
             "function_call_results": None,
+            "function_call_count": 0,
         }
 
     function_call_results, should_terminate, had_errors = await execute_function_calls(
@@ -1958,6 +1986,7 @@ async def _process_function_requests(
         max_errors=max_errors,
     )
     result["function_call_results"] = list(function_call_results)
+    result["function_call_count"] = sum(1 for r in function_call_results if r.type == "function_result")
     # If middleware requested termination, change action to return
     if should_terminate:
         result["action"] = "return"
@@ -2071,6 +2100,8 @@ async def _get_response() -> ChatResponse:
                 nonlocal mutable_options
                 nonlocal filtered_kwargs
                 errors_in_a_row: int = 0
+                total_function_calls: int = 0
+                max_function_calls: int | None = self.function_invocation_configuration.get("max_function_calls")
                 prepped_messages = list(messages)
                 fcc_messages: list[Message] = []
                 response: ChatResponse | None = None
@@ -2094,6 +2125,7 @@ async def _get_response() -> ChatResponse:
                         response = ChatResponse(messages=prepped_messages)
                         break
                     errors_in_a_row = approval_result["errors_in_a_row"]
+                    total_function_calls += approval_result.get("function_call_count", 0)
 
                     response = await super_get_response(
                         messages=prepped_messages,
@@ -2118,10 +2150,22 @@ async def _get_response() -> ChatResponse:
                     )
                     if result["action"] == "return":
                         return response
+                    total_function_calls += result.get("function_call_count", 0)
                     if result["action"] == "stop":
                         # Error threshold reached: force a final non-tool turn so
                         # function_call_output items are submitted before exit.
                         mutable_options["tool_choice"] = "none"
+                    elif (
+                        max_function_calls is not None
+                        and total_function_calls >= max_function_calls
+                    ):
+                        logger.info(
+                            "Maximum function calls reached (%d/%d). "
+                            "Stopping further function calls for this request.",
+                            total_function_calls,
+                            max_function_calls,
+                        )
+                        mutable_options["tool_choice"] = "none"
                     errors_in_a_row = result["errors_in_a_row"]
 
                     # When tool_choice is 'required', reset tool_choice after one iteration to avoid infinite loops
@@ -2167,6 +2211,8 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]:
             nonlocal mutable_options
             nonlocal stream_result_hooks
             errors_in_a_row: int = 0
+            total_function_calls: int = 0
+            max_function_calls: int | None = self.function_invocation_configuration.get("max_function_calls")
             prepped_messages = list(messages)
             fcc_messages: list[Message] = []
             response: ChatResponse | None = None
@@ -2187,6 +2233,7 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]:
                     execute_function_calls=execute_function_calls,
                 )
                 errors_in_a_row = approval_result["errors_in_a_row"]
+                total_function_calls += approval_result.get("function_call_count", 0)
                 if approval_result["action"] == "stop":
                     mutable_options["tool_choice"] = "none"
                     return
@@ -2232,6 +2279,7 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]:
                     execute_function_calls=execute_function_calls,
                 )
                 errors_in_a_row = result["errors_in_a_row"]
+                total_function_calls += result.get("function_call_count", 0)
                 if role := result["update_role"]:
                     yield ChatResponseUpdate(
                         contents=result["function_call_results"] or [],
@@ -2243,6 +2291,17 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]:
                     mutable_options["tool_choice"] = "none"
                 elif result["action"] != "continue":
                     return
+                elif (
+                    max_function_calls is not None
+                    and total_function_calls >= max_function_calls
+                ):
+                    logger.info(
+                        "Maximum function calls reached (%d/%d). "
+                        "Stopping further function calls for this request.",
+                        total_function_calls,
+                        max_function_calls,
+                    )
+                    mutable_options["tool_choice"] = "none"
 
                 # When tool_choice is 'required', reset the tool_choice after one iteration to avoid infinite loops
                 if mutable_options.get("tool_choice") == "required" or (
diff --git a/python/packages/core/tests/core/test_function_invocation_logic.py b/python/packages/core/tests/core/test_function_invocation_logic.py
index 1dfd257942..b4213d6029 100644
--- a/python/packages/core/tests/core/test_function_invocation_logic.py
+++ b/python/packages/core/tests/core/test_function_invocation_logic.py
@@ -880,6 +880,143 @@ def ai_func(arg1: str) -> str:
     assert response.messages[-1].text == "I broke out of the function invocation loop..."  # Failsafe response
 
 
+@pytest.mark.parametrize("max_iterations", [10])
+async def test_max_function_calls_limits_parallel_invocations(chat_client_base: SupportsChatGetResponse):
+    """Test that max_function_calls caps total function invocations across iterations with parallel calls."""
+    exec_counter = 0
+
+    @tool(name="search", approval_mode="never_require")
+    def search_func(query: str) -> str:
+        nonlocal exec_counter
+        exec_counter += 1
+        return f"Result for {query}"
+
+    # Each iteration returns 3 parallel tool calls
+    chat_client_base.run_responses = [
+        ChatResponse(
+            messages=Message(
+                role="assistant",
+                contents=[
+                    Content.from_function_call(call_id="1a", name="search", arguments='{"query": "q1"}'),
+                    Content.from_function_call(call_id="1b", name="search", arguments='{"query": "q2"}'),
+                    Content.from_function_call(call_id="1c", name="search", arguments='{"query": "q3"}'),
+                ],
+            )
+        ),
+        # Second iteration: 3 more parallel calls (total would be 6, exceeding limit of 5)
+        ChatResponse(
+            messages=Message(
+                role="assistant",
+                contents=[
+                    Content.from_function_call(call_id="2a", name="search", arguments='{"query": "q4"}'),
+                    Content.from_function_call(call_id="2b", name="search", arguments='{"query": "q5"}'),
+                    Content.from_function_call(call_id="2c", name="search", arguments='{"query": "q6"}'),
+                ],
+            )
+        ),
+        # Final response after tool_choice="none" is forced
+        ChatResponse(messages=Message(role="assistant", text="done")),
+    ]
+
+    # Allow many iterations but cap total function calls at 5
+    chat_client_base.function_invocation_configuration["max_function_calls"] = 5
+
+    response = await chat_client_base.get_response(
+        [Message(role="user", text="search")], options={"tool_choice": "auto", "tools": [search_func]}
+    )
+
+    # First iteration executes 3 calls (total=3, under limit).
+    # Second iteration executes 3 more (total=6, reaches limit) then forces tool_choice="none".
+    # The loop completes the current batch before stopping.
+    assert exec_counter == 6
+    assert "broke out" in response.messages[-1].text
+
+
+@pytest.mark.parametrize("max_iterations", [10])
+async def test_max_function_calls_single_calls_per_iteration(chat_client_base: SupportsChatGetResponse):
+    """Test that max_function_calls works with single tool calls per iteration."""
+    exec_counter = 0
+
+    @tool(name="lookup", approval_mode="never_require")
+    def lookup_func(key: str) -> str:
+        nonlocal exec_counter
+        exec_counter += 1
+        return f"Value for {key}"
+
+    chat_client_base.run_responses = [
+        ChatResponse(
+            messages=Message(
+                role="assistant",
+                contents=[
+                    Content.from_function_call(call_id="1", name="lookup", arguments='{"key": "a"}'),
+                ],
+            )
+        ),
+        ChatResponse(
+            messages=Message(
+                role="assistant",
+                contents=[
+                    Content.from_function_call(call_id="2", name="lookup", arguments='{"key": "b"}'),
+                ],
+            )
+        ),
+        ChatResponse(
+            messages=Message(
+                role="assistant",
+                contents=[
+                    Content.from_function_call(call_id="3", name="lookup", arguments='{"key": "c"}'),
+                ],
+            )
+        ),
+        # After limit is reached
+        ChatResponse(messages=Message(role="assistant", text="all done")),
+    ]
+
+    chat_client_base.function_invocation_configuration["max_function_calls"] = 2
+
+    response = await chat_client_base.get_response(
+        [Message(role="user", text="look up keys")], options={"tool_choice": "auto", "tools": [lookup_func]}
+    )
+
+    # 2 single calls executed, then limit reached, tool_choice="none" forced
+    assert exec_counter == 2
+    assert "broke out" in response.messages[-1].text
+
+
+@pytest.mark.parametrize("max_iterations", [10])
+async def test_max_function_calls_none_means_unlimited(chat_client_base: SupportsChatGetResponse):
+    """Test that max_function_calls=None (default) allows unlimited function calls."""
+    exec_counter = 0
+
+    @tool(name="do_thing", approval_mode="never_require")
+    def do_thing_func(arg: str) -> str:
+        nonlocal exec_counter
+        exec_counter += 1
+        return f"Done {arg}"
+
+    chat_client_base.run_responses = [
+        ChatResponse(
+            messages=Message(
+                role="assistant",
+                contents=[
+                    Content.from_function_call(call_id=str(i), name="do_thing", arguments=f'{{"arg": "v{i}"}}'),
+                ],
+            )
+        )
+        for i in range(5)
+    ] + [ChatResponse(messages=Message(role="assistant", text="finished"))]
+
+    # Explicitly set to None (default) — should not limit
+    chat_client_base.function_invocation_configuration["max_function_calls"] = None
+
+    response = await chat_client_base.get_response(
+        [Message(role="user", text="do things")], options={"tool_choice": "auto", "tools": [do_thing_func]}
+    )
+
+    assert exec_counter == 5
+    assert response.messages[-1].text == "finished"
+
+
 async def test_function_invocation_config_enabled_false(chat_client_base: SupportsChatGetResponse):
     """Test that setting enabled=False disables function invocation."""
     exec_counter = 0
@@ -1236,6 +1373,33 @@ async def test_function_invocation_config_validation_max_consecutive_errors():
         normalize_function_invocation_configuration({"max_consecutive_errors_per_request": -1})
 
 
+async def test_function_invocation_config_validation_max_function_calls():
+    """Test that max_function_calls validation works correctly."""
+    from agent_framework import normalize_function_invocation_configuration
+
+    # Default is None (unlimited)
+    config = normalize_function_invocation_configuration(None)
+    assert config["max_function_calls"] is None
+
+    # Valid values
+    config = normalize_function_invocation_configuration({"max_function_calls": 1})
+    assert config["max_function_calls"] == 1
+
+    config = normalize_function_invocation_configuration({"max_function_calls": 100})
+    assert config["max_function_calls"] == 100
+
+    # None is valid (unlimited)
+    config = normalize_function_invocation_configuration({"max_function_calls": None})
+    assert config["max_function_calls"] is None
+
+    # Invalid value (less than 1)
+    with pytest.raises(ValueError, match="max_function_calls must be at least 1 or None"):
+        normalize_function_invocation_configuration({"max_function_calls": 0})
+
+    with pytest.raises(ValueError, match="max_function_calls must be at least 1 or None"):
+        normalize_function_invocation_configuration({"max_function_calls": -1})
+
+
 async def test_argument_validation_error_with_detailed_errors(chat_client_base: SupportsChatGetResponse):
     """Test that argument validation errors include details when include_detailed_errors=True."""
 

From b66fea87173ef33dd2de23837267282e914a0090 Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Mon, 23 Feb 2026 12:06:40 +0100
Subject: [PATCH 2/7] Add sample for controlling total tool executions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Showcases all three mechanisms for limiting tool executions:
1. max_iterations — caps LLM roundtrips
2. max_function_calls — caps total individual function invocations per request
3. max_invocations — lifetime cap on a specific tool instance
Plus a combined scenario demonstrating defense in depth.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../tools/control_total_tool_executions.py    | 269 ++++++++++++++++++
 1 file changed, 269 insertions(+)
 create mode 100644 python/samples/02-agents/tools/control_total_tool_executions.py

diff --git a/python/samples/02-agents/tools/control_total_tool_executions.py b/python/samples/02-agents/tools/control_total_tool_executions.py
new file mode 100644
index 0000000000..7440e6d93a
--- /dev/null
+++ b/python/samples/02-agents/tools/control_total_tool_executions.py
@@ -0,0 +1,269 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+from typing import Annotated
+
+from agent_framework import tool
+from agent_framework.openai import OpenAIResponsesClient
+from dotenv import load_dotenv
+
+# Load environment variables from .env file
+load_dotenv()
+
+"""
+This sample demonstrates all the ways to control how many times tools are
+executed during an agent run.  There are three complementary mechanisms:
+
+1. ``max_iterations`` (on the chat client) — caps the number of **LLM
+   roundtrips**.  Each roundtrip may invoke one or more tools in parallel.
+
+2. ``max_function_calls`` (on the chat client) — caps the **total number of
+   individual function invocations** across all iterations within a single
+   request.  This is the primary knob for cost control.
+
+3. ``max_invocations`` (on a tool) — caps the **lifetime invocation count**
+   of a specific tool instance.  The counter is never automatically reset,
+   so it accumulates across requests when tools are singletons.
+
+Choose the right mechanism for your scenario:
+• Prevent runaway LLM loops  →  ``max_iterations``
+• Hard cap on tool execution cost per request  →  ``max_function_calls``
+• Limit a specific expensive tool globally  →  ``max_invocations``
+"""
+
+
+# --- Tool definitions ---
+
+
+# NOTE: approval_mode="never_require" is for sample brevity.
+# Use "always_require" in production; see function_tool_with_approval.py.
+@tool(approval_mode="never_require")
+def search_web(query: Annotated[str, "The search query to look up."]) -> str:
+    """Search the web for information."""
+    return f"Results for '{query}': [page1, page2, page3]"
+
+
+@tool(approval_mode="never_require")
+def get_weather(city: Annotated[str, "The city to get the weather for."]) -> str:
+    """Get the current weather for a city."""
+    return f"Weather in {city}: Sunny, 22°C"
+
+
+@tool(approval_mode="never_require", max_invocations=2)
+def call_expensive_api(
+    prompt: Annotated[str, "The prompt to send to the expensive API."],
+) -> str:
+    """Call a very expensive external API. Limited to 2 calls ever."""
+    return f"Expensive result for '{prompt}'"
+
+
+# --- Scenario 1: max_iterations (limit LLM roundtrips) ---
+
+
+async def scenario_max_iterations():
+    """Demonstrate max_iterations: limits how many times we loop back to the LLM.
+
+    Each iteration may invoke one or more tools in parallel, so this does NOT
+    directly limit the total number of function executions.
+    """
+    print("=" * 60)
+    print("Scenario 1: max_iterations — limit LLM roundtrips")
+    print("=" * 60)
+
+    client = OpenAIResponsesClient()
+
+    # 1. Set max_iterations to 3 — the tool loop will run at most 3 roundtrips
+    #    to the model before forcing a text response.
+    client.function_invocation_configuration["max_iterations"] = 3
+    print(f"  max_iterations = {client.function_invocation_configuration['max_iterations']}")
+
+    agent = client.as_agent(
+        name="ResearchAgent",
+        instructions=(
+            "You are a research assistant. Use the search_web tool to answer "
+            "the user's question. Search for multiple aspects of the topic."
+        ),
+        tools=[search_web, get_weather],
+    )
+
+    response = await agent.run("Tell me about the weather in Paris, London, and Tokyo.")
+    print(f"  Response: {response.text[:200]}...")
+    print()
+
+
+# --- Scenario 2: max_function_calls (limit total tool executions per request) ---
+
+
+async def scenario_max_function_calls():
+    """Demonstrate max_function_calls: caps total individual tool invocations.
+
+    Unlike max_iterations, this counts every individual function execution —
+    even when several tools run in parallel within a single iteration.
+    """
+    print("=" * 60)
+    print("Scenario 2: max_function_calls — limit total tool executions")
+    print("=" * 60)
+
+    client = OpenAIResponsesClient()
+
+    # 1. Allow many iterations but cap total function calls to 4.
+    #    If the model requests 3 parallel searches per iteration, after 2
+    #    iterations (6 calls) the limit is hit and the loop stops.
+    client.function_invocation_configuration["max_iterations"] = 20
+    client.function_invocation_configuration["max_function_calls"] = 4
+    print(f"  max_iterations    = {client.function_invocation_configuration['max_iterations']}")
+    print(f"  max_function_calls = {client.function_invocation_configuration['max_function_calls']}")
+
+    agent = client.as_agent(
+        name="ResearchAgent",
+        instructions=(
+            "You are a research assistant. Use the search_web and get_weather "
+            "tools to answer the user's question comprehensively."
+        ),
+        tools=[search_web, get_weather],
+    )
+
+    response = await agent.run(
+        "Search for the weather in Paris, London, Tokyo, "
+        "New York, and Sydney, and also search for best travel tips."
+    )
+    print(f"  Response: {response.text[:200]}...")
+    print()
+
+
+# --- Scenario 3: max_invocations (lifetime limit on a specific tool) ---
+
+
+async def scenario_max_invocations():
+    """Demonstrate max_invocations: caps how many times a specific tool instance
+    can be called across ALL requests.
+
+    Note: this counter lives on the tool instance, so for module-level tools
+    it accumulates globally. Use tool.invocation_count to inspect or reset.
+    """
+    print("=" * 60)
+    print("Scenario 3: max_invocations — lifetime cap on a tool")
+    print("=" * 60)
+
+    agent = OpenAIResponsesClient().as_agent(
+        name="APIAgent",
+        instructions="Use call_expensive_api when asked to analyze something.",
+        tools=[call_expensive_api],
+    )
+    session = agent.create_session()
+
+    # 1. First call — succeeds (invocation_count: 0 → 1)
+    print(f"  Before call 1: invocation_count = {call_expensive_api.invocation_count}")
+    response = await agent.run("Analyze the market trends for AI.", session=session)
+    print(f"  After call 1:  invocation_count = {call_expensive_api.invocation_count}")
+    print(f"  Response: {response.text[:150]}...")
+
+    # 2. Second call — succeeds (invocation_count: 1 → 2)
+    response = await agent.run("Analyze the market trends for cloud computing.", session=session)
+    print(f"  After call 2:  invocation_count = {call_expensive_api.invocation_count}")
+    print(f"  Response: {response.text[:150]}...")
+
+    # 3. Third call — tool refuses (max_invocations=2 reached)
+    response = await agent.run("Analyze the market trends for quantum computing.", session=session)
+    print(f"  After call 3:  invocation_count = {call_expensive_api.invocation_count}")
+    print(f"  Response: {response.text[:150]}...")
+
+    # 4. Reset the counter to allow more calls
+    print()
+    print("  Resetting invocation_count to 0...")
+    call_expensive_api.invocation_count = 0
+    print(f"  invocation_count = {call_expensive_api.invocation_count}")
+    print()
+
+
+# --- Scenario 4: Combining all three mechanisms ---
+
+
+async def scenario_combined():
+    """Demonstrate using all three mechanisms together for defense in depth."""
+    print("=" * 60)
+    print("Scenario 4: Combined — all mechanisms together")
+    print("=" * 60)
+
+    client = OpenAIResponsesClient()
+
+    # 1. Configure the client with both iteration and function call limits.
+    client.function_invocation_configuration["max_iterations"] = 5       # max 5 LLM roundtrips
+    client.function_invocation_configuration["max_function_calls"] = 8   # max 8 total tool calls
+    print(f"  max_iterations     = {client.function_invocation_configuration['max_iterations']}")
+    print(f"  max_function_calls = {client.function_invocation_configuration['max_function_calls']}")
+
+    # 2. Use a tool with a lifetime invocation limit.
+    @tool(approval_mode="never_require", max_invocations=3)
+    def premium_lookup(topic: Annotated[str, "Topic to look up."]) -> str:
+        """Look up premium data (max 3 calls ever)."""
+        return f"Premium data for '{topic}'"
+
+    print(f"  premium_lookup.max_invocations = {premium_lookup.max_invocations}")
+
+    agent = client.as_agent(
+        name="MultiToolAgent",
+        instructions="Use all available tools to answer comprehensively.",
+        tools=[search_web, get_weather, premium_lookup],
+    )
+
+    # 3. Run a query that could trigger many tool calls.
+    response = await agent.run(
+        "Research the weather and tourism info for Paris, London, Tokyo, "
+        "New York, and Sydney. Use premium_lookup for the top 3 cities."
+    )
+    print(f"  Response: {response.text[:200]}...")
+    print(f"  premium_lookup.invocation_count = {premium_lookup.invocation_count}")
+    print()
+
+
+# --- Entry point ---
+
+
+async def main():
+    await scenario_max_iterations()
+    await scenario_max_function_calls()
+    await scenario_max_invocations()
+    await scenario_combined()
+
+
+"""
+Sample output:
+
+============================================================
+Scenario 1: max_iterations — limit LLM roundtrips
+============================================================
+  max_iterations = 3
+  Response: The weather in Paris is sunny at 22°C, London is sunny at 22°C, and Tokyo is sunny at 22°C...
+============================================================
+Scenario 2: max_function_calls — limit total tool executions
+============================================================
+  max_iterations    = 20
+  max_function_calls = 4
+  Response: Based on my research, Paris is sunny at 22°C, London is sunny at 22°C...
+============================================================
+Scenario 3: max_invocations — lifetime cap on a tool
+============================================================
+  Before call 1: invocation_count = 0
+  After call 1:  invocation_count = 1
+  Response: Based on the analysis, the AI market is showing strong growth trends...
+  After call 2:  invocation_count = 2
+  Response: The cloud computing market continues to expand with key trends in...
+  After call 3:  invocation_count = 2
+  Response: I'm unable to use the analysis tool right now as it has reached its limit...
+
+  Resetting invocation_count to 0...
+  invocation_count = 0
+
+============================================================
+Scenario 4: Combined — all mechanisms together
+============================================================
+  max_iterations     = 5
+  max_function_calls = 8
+  premium_lookup.max_invocations = 3
+  Response: Here's a comprehensive overview of the weather and tourism for the cities...
+  premium_lookup.invocation_count = 3
+"""
+
+if __name__ == "__main__":
+    asyncio.run(main())

From a7e7d3608d172c85486d9839c8a4ffcf48fab47f Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Mon, 23 Feb 2026 12:12:39 +0100
Subject: [PATCH 3/7] Suppress ruff E305/fmt in hosting sample to preserve XML
 doc tags

The XML snippet tags (# <create_agent> / # </create_agent>) are used for
docs extraction and must stay adjacent to the code they wrap. Both ruff
check (E305) and ruff format add blank lines after the function definition,
pushing the closing tag away. Suppress with ruff: noqa: E305 and fmt: off.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 python/samples/01-get-started/06_host_your_agent.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/samples/01-get-started/06_host_your_agent.py b/python/samples/01-get-started/06_host_your_agent.py
index d858d61773..6bc87b48b4 100644
--- a/python/samples/01-get-started/06_host_your_agent.py
+++ b/python/samples/01-get-started/06_host_your_agent.py
@@ -1,5 +1,7 @@
 # Copyright (c) Microsoft. All rights reserved.
 
+# ruff: noqa: E305
+# fmt: off
 from typing import Any
 
 from agent_framework.azure import AgentFunctionApp, AzureOpenAIChatClient

From 75eb425af19927cfd13236697da93501cf0aa0b6 Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Mon, 23 Feb 2026 12:51:13 +0100
Subject: [PATCH 4/7] Add per-agent tool wrapping scenario to
 control_total_tool_executions sample

Show that wrapping the same callable with @tool multiple times creates
independent FunctionTool instances with separate invocation counters,
enabling per-agent max_invocations budgets for shared functions.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../tools/control_total_tool_executions.py    | 77 ++++++++++++++++++-
 1 file changed, 74 insertions(+), 3 deletions(-)

diff --git a/python/samples/02-agents/tools/control_total_tool_executions.py b/python/samples/02-agents/tools/control_total_tool_executions.py
index 7440e6d93a..3420c8842c 100644
--- a/python/samples/02-agents/tools/control_total_tool_executions.py
+++ b/python/samples/02-agents/tools/control_total_tool_executions.py
@@ -25,10 +25,16 @@
    of a specific tool instance.  The counter is never automatically reset,
    so it accumulates across requests when tools are singletons.
 
+   Because ``max_invocations`` is tracked on the ``FunctionTool`` *instance*,
+   wrapping the same callable with ``@tool`` multiple times creates independent
+   counters.  This lets you give different agents different invocation budgets
+   for the same underlying function.
+
 Choose the right mechanism for your scenario:
 • Prevent runaway LLM loops  →  ``max_iterations``
 • Hard cap on tool execution cost per request  →  ``max_function_calls``
 • Limit a specific expensive tool globally  →  ``max_invocations``
+• Per-agent limits on shared tools  →  wrap the callable separately per agent
 """
 
 
@@ -176,13 +182,68 @@ async def scenario_max_invocations():
     print()
 
 
-# --- Scenario 4: Combining all three mechanisms ---
+# --- Scenario 4: Per-agent limits via separate tool wrappers ---
+
+
+async def scenario_per_agent_tool_limits():
+    """Demonstrate per-agent max_invocations using separate tool wrappers.
+
+    Because max_invocations is tracked on the FunctionTool *instance*, you can
+    wrap the same callable with ``@tool`` multiple times to get independent
+    counters for different agents.  This is useful when two agents share the
+    same underlying function but should have different invocation budgets.
+    """
+    print("=" * 60)
+    print("Scenario 4: Per-agent limits via separate tool wrappers")
+    print("=" * 60)
+
+    # The underlying callable — a plain function, no decorator.
+    def _do_lookup(query: Annotated[str, "Search query."]) -> str:
+        """Look up information."""
+        return f"Lookup result for '{query}'"
+
+    # Wrap it twice with different limits. Each wrapper is a separate
+    # FunctionTool instance with its own invocation_count.
+    agent_a_lookup = tool(name="lookup", approval_mode="never_require", max_invocations=2)(_do_lookup)
+    agent_b_lookup = tool(name="lookup", approval_mode="never_require", max_invocations=5)(_do_lookup)
+
+    client = OpenAIResponsesClient()
+    agent_a = client.as_agent(
+        name="AgentA",
+        instructions="Use the lookup tool to answer questions.",
+        tools=[agent_a_lookup],
+    )
+    agent_b = client.as_agent(
+        name="AgentB",
+        instructions="Use the lookup tool to answer questions.",
+        tools=[agent_b_lookup],
+    )
+
+    print(f"  agent_a_lookup.max_invocations = {agent_a_lookup.max_invocations}")
+    print(f"  agent_b_lookup.max_invocations = {agent_b_lookup.max_invocations}")
+
+    # Agent A uses its budget
+    session_a = agent_a.create_session()
+    await agent_a.run("Look up AI trends", session=session_a)
+    await agent_a.run("Look up cloud trends", session=session_a)
+
+    # Agent B's counter is independent — still at 0
+    session_b = agent_b.create_session()
+    await agent_b.run("Look up quantum computing", session=session_b)
+
+    print(f"  agent_a_lookup.invocation_count = {agent_a_lookup.invocation_count}  (limit {agent_a_lookup.max_invocations})")
+    print(f"  agent_b_lookup.invocation_count = {agent_b_lookup.invocation_count}  (limit {agent_b_lookup.max_invocations})")
+    print("  → Agent A hit its limit; Agent B used 1 of 5.")
+    print()
+
+
+# --- Scenario 5: Combining all three mechanisms ---
 
 
 async def scenario_combined():
     """Demonstrate using all three mechanisms together for defense in depth."""
     print("=" * 60)
-    print("Scenario 4: Combined — all mechanisms together")
+    print("Scenario 5: Combined — all mechanisms together")
     print("=" * 60)
 
     client = OpenAIResponsesClient()
@@ -224,6 +285,7 @@ async def main():
     await scenario_max_iterations()
     await scenario_max_function_calls()
     await scenario_max_invocations()
+    await scenario_per_agent_tool_limits()
     await scenario_combined()
 
 
@@ -256,7 +318,16 @@ async def main():
   invocation_count = 0
 
 ============================================================
-Scenario 4: Combined — all mechanisms together
+Scenario 4: Per-agent limits via separate tool wrappers
+============================================================
+  agent_a_lookup.max_invocations = 2
+  agent_b_lookup.max_invocations = 5
+  agent_a_lookup.invocation_count = 2  (limit 2)
+  agent_b_lookup.invocation_count = 1  (limit 5)
+  → Agent A hit its limit; Agent B used 1 of 5.
+
+============================================================
+Scenario 5: Combined — all mechanisms together
 ============================================================
   max_iterations     = 5
   max_function_calls = 8

From 438e2f54ea1ef6e0dbd6b03520b9cbbb05cddbc4 Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Mon, 23 Feb 2026 12:54:37 +0100
Subject: [PATCH 5/7] Clarify max_function_calls is a best-effort limit

The limit is checked after each batch of parallel calls completes, so the
current batch always runs to completion even if it overshoots the limit.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 python/packages/core/agent_framework/_tools.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py
index 5b1cb275bf..2f09fd80ef 100644
--- a/python/packages/core/agent_framework/_tools.py
+++ b/python/packages/core/agent_framework/_tools.py
@@ -1272,6 +1272,11 @@ class FunctionInvocationConfiguration(TypedDict, total=False):
       primary knob for controlling cost and preventing runaway tool usage. When
       the limit is reached, the loop stops invoking tools and forces the model
       to produce a text response. Default is ``None`` (unlimited).
+
+      This is a **best-effort** limit: it is checked *after* each batch of
+      parallel tool calls completes, not before. If the model requests 20
+      parallel calls in a single iteration and the limit is 10, all 20 will
+      execute before the loop stops.
     - ``max_consecutive_errors_per_request``: How many consecutive errors
       before abandoning the tool loop for this request.
     - ``terminate_on_unknown_calls``: Whether to raise an error when the model
@@ -2159,6 +2164,8 @@ async def _get_response() -> ChatResponse:
                         max_function_calls is not None
                         and total_function_calls >= max_function_calls
                     ):
+                        # Best-effort limit: checked after each batch of parallel calls completes,
+                        # so the current batch always runs to completion even if it overshoots.
                         logger.info(
                             "Maximum function calls reached (%d/%d). "
                             "Stopping further function calls for this request.",
@@ -2295,6 +2302,8 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]:
                     max_function_calls is not None
                     and total_function_calls >= max_function_calls
                 ):
+                    # Best-effort limit: checked after each batch of parallel calls completes,
+                    # so the current batch always runs to completion even if it overshoots.
                     logger.info(
                         "Maximum function calls reached (%d/%d). "
                         "Stopping further function calls for this request.",

From 452d1d4da873254424d2f9ca34b03a416f921490 Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Mon, 23 Feb 2026 12:56:46 +0100
Subject: [PATCH 6/7] Address PR review: fix docstring reference, clarify
 best-effort in sample
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Fix malformed Sphinx :attr: role in FunctionTool docstring — use plain
  backtick reference instead
- Update sample to say 'best-effort cap' instead of 'hard cap' for
  max_function_calls, noting it's checked between iterations
- Parametrize pattern is correct (fixture override, matching existing tests)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 python/packages/core/agent_framework/_tools.py                | 4 ++--
 .../samples/02-agents/tools/control_total_tool_executions.py  | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py
index 2f09fd80ef..dd82a73852 100644
--- a/python/packages/core/agent_framework/_tools.py
+++ b/python/packages/core/agent_framework/_tools.py
@@ -262,8 +262,8 @@ def __init__(
                     applications, the counter accumulates across all requests. Use
                     :attr:`invocation_count` to inspect or reset the counter manually,
                     or consider using
-                    :attr:`FunctionInvocationConfiguration.max_function_calls
-                    <FunctionInvocationConfiguration>` for per-request limits instead.
+                    ``FunctionInvocationConfiguration["max_function_calls"]``
+                    for per-request limits instead.
 
             max_invocation_exceptions: The maximum number of exceptions allowed during invocations.
                 If None, there is no limit. Should be at least 1.
diff --git a/python/samples/02-agents/tools/control_total_tool_executions.py b/python/samples/02-agents/tools/control_total_tool_executions.py
index 3420c8842c..63432367a2 100644
--- a/python/samples/02-agents/tools/control_total_tool_executions.py
+++ b/python/samples/02-agents/tools/control_total_tool_executions.py
@@ -32,7 +32,8 @@
 
 Choose the right mechanism for your scenario:
 • Prevent runaway LLM loops  →  ``max_iterations``
-• Hard cap on tool execution cost per request  →  ``max_function_calls``
+• Best-effort cap on tool execution cost per request  →  ``max_function_calls``
+  (checked between iterations; a single batch of parallel calls may overshoot)
 • Limit a specific expensive tool globally  →  ``max_invocations``
 • Per-agent limits on shared tools  →  wrap the callable separately per agent
 """

From 4d003b382c5239ce5d04bbd7d9cc8f123da9c774 Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Mon, 23 Feb 2026 21:02:36 +0100
Subject: [PATCH 7/7] clarify max_invocations limits

---
 python/packages/core/agent_framework/_tools.py            | 6 +++++-
 .../02-agents/tools/control_total_tool_executions.py      | 8 ++++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py
index dd82a73852..df0608e614 100644
--- a/python/packages/core/agent_framework/_tools.py
+++ b/python/packages/core/agent_framework/_tools.py
@@ -254,7 +254,11 @@ def __init__(
                 Default is that approval is NOT required (``"never_require"``).
             max_invocations: The maximum number of times this function can be invoked
                 across the **lifetime of this tool instance**. If None (default),
-                there is no limit. Should be at least 1.
+                there is no limit. Should be at least 1. If the tool is called multiple
+                times in one iteration, those will execute, after that it will stop working. For example,
+                if max_invocations is 3 and the tool is called 5 times in a single iteration,
+                these will complete, but any subsequent calls to the tool (in the same or future iterations)
+                will raise a ToolException.
 
                 .. note::
                     This counter lives on the tool instance and is never automatically
diff --git a/python/samples/02-agents/tools/control_total_tool_executions.py b/python/samples/02-agents/tools/control_total_tool_executions.py
index 63432367a2..eaad6e225b 100644
--- a/python/samples/02-agents/tools/control_total_tool_executions.py
+++ b/python/samples/02-agents/tools/control_total_tool_executions.py
@@ -19,7 +19,11 @@
 
 2. ``max_function_calls`` (on the chat client) — caps the **total number of
    individual function invocations** across all iterations within a single
-   request.  This is the primary knob for cost control.
+   request.  This is the primary knob for cost control. If the tool is called multiple
+   times in one iteration, those will execute, after that it will stop working. For example,
+   if max_invocations is 3 and the tool is called 5 times in a single iteration,
+   these will complete, but any subsequent calls to the tool (in the same or future iterations)
+   will raise a ToolException.
 
 3. ``max_invocations`` (on a tool) — caps the **lifetime invocation count**
    of a specific tool instance.  The counter is never automatically reset,
@@ -34,7 +38,7 @@
 • Prevent runaway LLM loops  →  ``max_iterations``
 • Best-effort cap on tool execution cost per request  →  ``max_function_calls``
   (checked between iterations; a single batch of parallel calls may overshoot)
-• Limit a specific expensive tool globally  →  ``max_invocations``
+• Best-effort limit a specific expensive tool globally  →  ``max_invocations``
 • Per-agent limits on shared tools  →  wrap the callable separately per agent
 """