diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py
index 3357a984d6..df0608e614 100644
--- a/python/packages/core/agent_framework/_tools.py
+++ b/python/packages/core/agent_framework/_tools.py
@@ -252,8 +252,23 @@ def __init__(
             description: A description of the function.
             approval_mode: Whether or not approval is required to run this tool.
                 Default is that approval is NOT required (``"never_require"``).
-            max_invocations: The maximum number of times this function can be invoked.
-                If None, there is no limit. Should be at least 1.
+            max_invocations: The maximum number of times this function can be invoked
+                across the **lifetime of this tool instance**. If None (default),
+                there is no limit. Should be at least 1. If the tool is called multiple
+                times in one iteration, those will execute, after that it will stop working. For example,
+                if max_invocations is 3 and the tool is called 5 times in a single iteration,
+                these will complete, but any subsequent calls to the tool (in the same or future iterations)
+                will raise a ToolException.
+
+                .. note::
+                    This counter lives on the tool instance and is never automatically
+                    reset. For module-level or singleton tools in long-running
+                    applications, the counter accumulates across all requests. Use
+                    :attr:`invocation_count` to inspect or reset the counter manually,
+                    or consider using
+                    ``FunctionInvocationConfiguration["max_function_calls"]``
+                    for per-request limits instead.
+
             max_invocation_exceptions: The maximum number of exceptions allowed during invocations.
                 If None, there is no limit. Should be at least 1.
             additional_properties: Additional properties to set on the function.
@@ -1130,8 +1145,10 @@ def tool(
             function's signature. Defaults to ``None`` (infer from signature).
         approval_mode: Whether or not approval is required to run this tool.
             Default is that approval is NOT required (``"never_require"``).
-        max_invocations: The maximum number of times this function can be invoked.
-            If None, there is no limit, should be at least 1.
+        max_invocations: The maximum number of times this function can be invoked
+            across the **lifetime of this tool instance**. If None (default), there is
+            no limit. Should be at least 1. For per-request limits, use
+            ``FunctionInvocationConfiguration["max_function_calls"]`` instead.
         max_invocation_exceptions: The maximum number of exceptions allowed during invocations.
             If None, there is no limit, should be at least 1.
         additional_properties: Additional properties to set on the function.
@@ -1247,43 +1264,54 @@ def wrapper(f: Callable[..., Any]) -> FunctionTool:
 class FunctionInvocationConfiguration(TypedDict, total=False):
     """Configuration for function invocation in chat clients.
 
+    The configuration controls the tool execution loop that runs when the model
+    requests function calls. Key settings:
+
+    - ``enabled``: Master switch for the function invocation loop.
+    - ``max_iterations``: Limits the number of **LLM roundtrips** (iterations).
+      Each iteration may execute one or more function calls in parallel, so
+      this does *not* directly limit the total number of function executions.
+    - ``max_function_calls``: Limits the **total number of individual function
+      invocations** across all iterations within a single request. This is the
+      primary knob for controlling cost and preventing runaway tool usage. When
+      the limit is reached, the loop stops invoking tools and forces the model
+      to produce a text response. Default is ``None`` (unlimited).
+
+      This is a **best-effort** limit: it is checked *after* each batch of
+      parallel tool calls completes, not before. If the model requests 20
+      parallel calls in a single iteration and the limit is 10, all 20 will
+      execute before the loop stops.
+    - ``max_consecutive_errors_per_request``: How many consecutive errors
+      before abandoning the tool loop for this request.
+    - ``terminate_on_unknown_calls``: Whether to raise an error when the model
+      requests a function that is not in the tool map.
+    - ``additional_tools``: Extra tools available during execution but not
+      advertised to the model in the tool list.
+    - ``include_detailed_errors``: Whether to include exception details in the
+      function result returned to the model.
+
+    Note:
+        ``max_iterations`` and ``max_function_calls`` serve complementary purposes.
+        ``max_iterations`` caps the number of model round-trips regardless of how
+        many tools are called per trip. ``max_function_calls`` caps the cumulative
+        number of individual tool executions regardless of how they are distributed
+        across iterations.
+
     Example:
         .. code-block:: python
+
             from agent_framework.openai import OpenAIChatClient
 
-            # Create an OpenAI chat client
             client = OpenAIChatClient(api_key="your_api_key")
 
-            # Disable function invocation
-            client.function_invocation_configuration["enabled"] = False
-
-            # Set maximum iterations to 10
-            client.function_invocation_configuration["max_iterations"] = 10
-
-            # Enable termination on unknown function calls
-            client.function_invocation_configuration["terminate_on_unknown_calls"] = True
-
-            # Add additional tools for function execution
-            client.function_invocation_configuration["additional_tools"] = [my_custom_tool]
-
-            # Enable detailed error information in function results
-            client.function_invocation_configuration["include_detailed_errors"] = True
-
-            # You can also create a new configuration dict if needed
-            new_config: FunctionInvocationConfiguration = {
-                "enabled": True,
-                "max_iterations": 20,
-                "terminate_on_unknown_calls": False,
-                "additional_tools": [another_tool],
-                "include_detailed_errors": False,
-            }
-
-            # and then assign it to the client
-            client.function_invocation_configuration = new_config
+            # Limit to 5 LLM roundtrips and 20 total function executions
+            client.function_invocation_configuration["max_iterations"] = 5
+            client.function_invocation_configuration["max_function_calls"] = 20
     """
 
     enabled: bool
     max_iterations: int
+    max_function_calls: int | None
     max_consecutive_errors_per_request: int
     terminate_on_unknown_calls: bool
     additional_tools: Sequence[FunctionTool]
@@ -1296,6 +1324,7 @@ def normalize_function_invocation_configuration(
     normalized: FunctionInvocationConfiguration = {
         "enabled": True,
         "max_iterations": DEFAULT_MAX_ITERATIONS,
+        "max_function_calls": None,
         "max_consecutive_errors_per_request": DEFAULT_MAX_CONSECUTIVE_ERRORS_PER_REQUEST,
         "terminate_on_unknown_calls": False,
         "additional_tools": [],
@@ -1305,6 +1334,8 @@ def normalize_function_invocation_configuration(
         normalized.update(config)
     if normalized["max_iterations"] < 1:
         raise ValueError("max_iterations must be at least 1.")
+    if normalized["max_function_calls"] is not None and normalized["max_function_calls"] < 1:
+        raise ValueError("max_function_calls must be at least 1 or None.")
     if normalized["max_consecutive_errors_per_request"] < 0:
         raise ValueError("max_consecutive_errors_per_request must be 0 or more.")
     if normalized["additional_tools"] is None:
@@ -1816,6 +1847,7 @@ class FunctionRequestResult(TypedDict, total=False):
         result_message: The message containing function call results, if any.
         update_role: The role to update for the next message, if any.
         function_call_results: The list of function call results, if any.
+        function_call_count: The number of function calls executed in this processing step.
     """
 
     action: Literal["return", "continue", "stop"]
@@ -1823,6 +1855,7 @@ class FunctionRequestResult(TypedDict, total=False):
     result_message: Message | None
     update_role: Literal["assistant", "tool"] | None
     function_call_results: list[Content] | None
+    function_call_count: int
 
 
 def _handle_function_call_results(
@@ -1913,6 +1946,7 @@ async def _process_function_requests(
                             max_errors,
                         )
             _replace_approval_contents_with_results(prepped_messages, fcc_todo, approved_function_results)
+            executed_count = sum(1 for r in approved_function_results if r.type == "function_result")
             # Continue to call chat client with updated messages (containing function results)
             # so it can generate the final response
             return {
@@ -1921,6 +1955,7 @@ async def _process_function_requests(
                 "result_message": None,
                 "update_role": None,
                 "function_call_results": None,
+                "function_call_count": executed_count,
             }
 
     if response is None or fcc_messages is None:
@@ -1930,6 +1965,7 @@ async def _process_function_requests(
             "result_message": None,
             "update_role": None,
             "function_call_results": None,
+            "function_call_count": 0,
         }
 
     tools = _extract_tools(tool_options)
@@ -1942,6 +1978,7 @@ async def _process_function_requests(
             "result_message": None,
             "update_role": None,
             "function_call_results": None,
+            "function_call_count": 0,
         }
 
     function_call_results, should_terminate, had_errors = await execute_function_calls(
@@ -1958,6 +1995,7 @@ async def _process_function_requests(
         max_errors=max_errors,
     )
     result["function_call_results"] = list(function_call_results)
+    result["function_call_count"] = sum(1 for r in function_call_results if r.type == "function_result")
     # If middleware requested termination, change action to return
     if should_terminate:
         result["action"] = "return"
@@ -2071,6 +2109,8 @@ async def _get_response() -> ChatResponse:
                 nonlocal mutable_options
                 nonlocal filtered_kwargs
                 errors_in_a_row: int = 0
+                total_function_calls: int = 0
+                max_function_calls: int | None = self.function_invocation_configuration.get("max_function_calls")
                 prepped_messages = list(messages)
                 fcc_messages: list[Message] = []
                 response: ChatResponse | None = None
@@ -2094,6 +2134,7 @@ async def _get_response() -> ChatResponse:
                         response = ChatResponse(messages=prepped_messages)
                         break
                     errors_in_a_row = approval_result["errors_in_a_row"]
+                    total_function_calls += approval_result.get("function_call_count", 0)
 
                     response = await super_get_response(
                         messages=prepped_messages,
@@ -2118,10 +2159,24 @@ async def _get_response() -> ChatResponse:
                     )
                     if result["action"] == "return":
                         return response
+                    total_function_calls += result.get("function_call_count", 0)
                     if result["action"] == "stop":
                         # Error threshold reached: force a final non-tool turn so
                         # function_call_output items are submitted before exit.
                         mutable_options["tool_choice"] = "none"
+                    elif (
+                        max_function_calls is not None
+                        and total_function_calls >= max_function_calls
+                    ):
+                        # Best-effort limit: checked after each batch of parallel calls completes,
+                        # so the current batch always runs to completion even if it overshoots.
+                        logger.info(
+                            "Maximum function calls reached (%d/%d). "
+                            "Stopping further function calls for this request.",
+                            total_function_calls,
+                            max_function_calls,
+                        )
+                        mutable_options["tool_choice"] = "none"
                     errors_in_a_row = result["errors_in_a_row"]
 
                     # When tool_choice is 'required', reset tool_choice after one iteration to avoid infinite loops
@@ -2167,6 +2222,8 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]:
             nonlocal mutable_options
             nonlocal stream_result_hooks
             errors_in_a_row: int = 0
+            total_function_calls: int = 0
+            max_function_calls: int | None = self.function_invocation_configuration.get("max_function_calls")
             prepped_messages = list(messages)
             fcc_messages: list[Message] = []
             response: ChatResponse | None = None
@@ -2187,6 +2244,7 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]:
                     execute_function_calls=execute_function_calls,
                 )
                 errors_in_a_row = approval_result["errors_in_a_row"]
+                total_function_calls += approval_result.get("function_call_count", 0)
                 if approval_result["action"] == "stop":
                     mutable_options["tool_choice"] = "none"
                     return
@@ -2232,6 +2290,7 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]:
                     execute_function_calls=execute_function_calls,
                 )
                 errors_in_a_row = result["errors_in_a_row"]
+                total_function_calls += result.get("function_call_count", 0)
                 if role := result["update_role"]:
                     yield ChatResponseUpdate(
                         contents=result["function_call_results"] or [],
@@ -2243,6 +2302,19 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]:
                     mutable_options["tool_choice"] = "none"
                 elif result["action"] != "continue":
                     return
+                elif (
+                    max_function_calls is not None
+                    and total_function_calls >= max_function_calls
+                ):
+                    # Best-effort limit: checked after each batch of parallel calls completes,
+                    # so the current batch always runs to completion even if it overshoots.
+                    logger.info(
+                        "Maximum function calls reached (%d/%d). "
+                        "Stopping further function calls for this request.",
+                        total_function_calls,
+                        max_function_calls,
+                    )
+                    mutable_options["tool_choice"] = "none"
 
                 # When tool_choice is 'required', reset the tool_choice after one iteration to avoid infinite loops
                 if mutable_options.get("tool_choice") == "required" or (
diff --git a/python/packages/core/tests/core/test_function_invocation_logic.py b/python/packages/core/tests/core/test_function_invocation_logic.py
index 1dfd257942..b4213d6029 100644
--- a/python/packages/core/tests/core/test_function_invocation_logic.py
+++ b/python/packages/core/tests/core/test_function_invocation_logic.py
@@ -880,6 +880,143 @@ def ai_func(arg1: str) -> str:
     assert response.messages[-1].text == "I broke out of the function invocation loop..."  # Failsafe response
 
 
+@pytest.mark.parametrize("max_iterations", [10])
+async def test_max_function_calls_limits_parallel_invocations(chat_client_base: SupportsChatGetResponse):
+    """Test that max_function_calls caps total function invocations across iterations with parallel calls."""
+    exec_counter = 0
+
+    @tool(name="search", approval_mode="never_require")
+    def search_func(query: str) -> str:
+        nonlocal exec_counter
+        exec_counter += 1
+        return f"Result for {query}"
+
+    # Each iteration returns 3 parallel tool calls
+    chat_client_base.run_responses = [
+        ChatResponse(
+            messages=Message(
+                role="assistant",
+                contents=[
+                    Content.from_function_call(call_id="1a", name="search", arguments='{"query": "q1"}'),
+                    Content.from_function_call(call_id="1b", name="search", arguments='{"query": "q2"}'),
+                    Content.from_function_call(call_id="1c", name="search", arguments='{"query": "q3"}'),
+                ],
+            )
+        ),
+        # Second iteration: 3 more parallel calls (total would be 6, exceeding limit of 5)
+        ChatResponse(
+            messages=Message(
+                role="assistant",
+                contents=[
+                    Content.from_function_call(call_id="2a", name="search", arguments='{"query": "q4"}'),
+                    Content.from_function_call(call_id="2b", name="search", arguments='{"query": "q5"}'),
+                    Content.from_function_call(call_id="2c", name="search", arguments='{"query": "q6"}'),
+                ],
+            )
+        ),
+        # Final response after tool_choice="none" is forced
+        ChatResponse(messages=Message(role="assistant", text="done")),
+    ]
+
+    # Allow many iterations but cap total function calls at 5
+    chat_client_base.function_invocation_configuration["max_function_calls"] = 5
+
+    response = await chat_client_base.get_response(
+        [Message(role="user", text="search")], options={"tool_choice": "auto", "tools": [search_func]}
+    )
+
+    # First iteration executes 3 calls (total=3, under limit).
+    # Second iteration executes 3 more (total=6, reaches limit) then forces tool_choice="none".
+    # The loop completes the current batch before stopping.
+    assert exec_counter == 6
+    assert "broke out" in response.messages[-1].text
+
+
+@pytest.mark.parametrize("max_iterations", [10])
+async def test_max_function_calls_single_calls_per_iteration(chat_client_base: SupportsChatGetResponse):
+    """Test that max_function_calls works with single tool calls per iteration."""
+    exec_counter = 0
+
+    @tool(name="lookup", approval_mode="never_require")
+    def lookup_func(key: str) -> str:
+        nonlocal exec_counter
+        exec_counter += 1
+        return f"Value for {key}"
+
+    chat_client_base.run_responses = [
+        ChatResponse(
+            messages=Message(
+                role="assistant",
+                contents=[
+                    Content.from_function_call(call_id="1", name="lookup", arguments='{"key": "a"}'),
+                ],
+            )
+        ),
+        ChatResponse(
+            messages=Message(
+                role="assistant",
+                contents=[
+                    Content.from_function_call(call_id="2", name="lookup", arguments='{"key": "b"}'),
+                ],
+            )
+        ),
+        ChatResponse(
+            messages=Message(
+                role="assistant",
+                contents=[
+                    Content.from_function_call(call_id="3", name="lookup", arguments='{"key": "c"}'),
+                ],
+            )
+        ),
+        # After limit is reached
+        ChatResponse(messages=Message(role="assistant", text="all done")),
+    ]
+
+    chat_client_base.function_invocation_configuration["max_function_calls"] = 2
+
+    response = await chat_client_base.get_response(
+        [Message(role="user", text="look up keys")], options={"tool_choice": "auto", "tools": [lookup_func]}
+    )
+
+    # 2 single calls executed, then limit reached, tool_choice="none" forced
+    assert exec_counter == 2
+    assert "broke out" in response.messages[-1].text
+
+
+@pytest.mark.parametrize("max_iterations", [10])
+async def test_max_function_calls_none_means_unlimited(chat_client_base: SupportsChatGetResponse):
+    """Test that max_function_calls=None (default) allows unlimited function calls."""
+    exec_counter = 0
+
+    @tool(name="do_thing", approval_mode="never_require")
+    def do_thing_func(arg: str) -> str:
+        nonlocal exec_counter
+        exec_counter += 1
+        return f"Done {arg}"
+
+    chat_client_base.run_responses = [
+        ChatResponse(
+            messages=Message(
+                role="assistant",
+                contents=[
+                    Content.from_function_call(call_id=str(i), name="do_thing", arguments=f'{{"arg": "v{i}"}}'),
+                ],
+            )
+        )
+        for i in range(5)
+    ] + [ChatResponse(messages=Message(role="assistant", text="finished"))]
+
+    # Explicitly set to None (default) — should not limit
+    chat_client_base.function_invocation_configuration["max_function_calls"] = None
+
+    response = await chat_client_base.get_response(
+        [Message(role="user", text="do things")], options={"tool_choice": "auto", "tools": [do_thing_func]}
+    )
+
+    assert exec_counter == 5
+    assert response.messages[-1].text == "finished"
+
+
 async def test_function_invocation_config_enabled_false(chat_client_base: SupportsChatGetResponse):
     """Test that setting enabled=False disables function invocation."""
     exec_counter = 0
@@ -1236,6 +1373,33 @@ async def test_function_invocation_config_validation_max_consecutive_errors():
         normalize_function_invocation_configuration({"max_consecutive_errors_per_request": -1})
 
 
+async def test_function_invocation_config_validation_max_function_calls():
+    """Test that max_function_calls validation works correctly."""
+    from agent_framework import normalize_function_invocation_configuration
+
+    # Default is None (unlimited)
+    config = normalize_function_invocation_configuration(None)
+    assert config["max_function_calls"] is None
+
+    # Valid values
+    config = normalize_function_invocation_configuration({"max_function_calls": 1})
+    assert config["max_function_calls"] == 1
+
+    config = normalize_function_invocation_configuration({"max_function_calls": 100})
+    assert config["max_function_calls"] == 100
+
+    # None is valid (unlimited)
+    config = normalize_function_invocation_configuration({"max_function_calls": None})
+    assert config["max_function_calls"] is None
+
+    # Invalid value (less than 1)
+    with pytest.raises(ValueError, match="max_function_calls must be at least 1 or None"):
+        normalize_function_invocation_configuration({"max_function_calls": 0})
+
+    with pytest.raises(ValueError, match="max_function_calls must be at least 1 or None"):
+        normalize_function_invocation_configuration({"max_function_calls": -1})
+
+
 async def test_argument_validation_error_with_detailed_errors(chat_client_base: SupportsChatGetResponse):
     """Test that argument validation errors include details when include_detailed_errors=True."""
 
diff --git a/python/samples/01-get-started/06_host_your_agent.py b/python/samples/01-get-started/06_host_your_agent.py
index d858d61773..6bc87b48b4 100644
--- a/python/samples/01-get-started/06_host_your_agent.py
+++ b/python/samples/01-get-started/06_host_your_agent.py
@@ -1,5 +1,7 @@
 # Copyright (c) Microsoft. All rights reserved.
 
+# ruff: noqa: E305
+# fmt: off
 from typing import Any
 
 from agent_framework.azure import AgentFunctionApp, AzureOpenAIChatClient
diff --git a/python/samples/02-agents/tools/control_total_tool_executions.py b/python/samples/02-agents/tools/control_total_tool_executions.py
new file mode 100644
index 0000000000..eaad6e225b
--- /dev/null
+++ b/python/samples/02-agents/tools/control_total_tool_executions.py
@@ -0,0 +1,345 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+from typing import Annotated
+
+from agent_framework import tool
+from agent_framework.openai import OpenAIResponsesClient
+from dotenv import load_dotenv
+
+# Load environment variables from .env file
+load_dotenv()
+
+"""
+This sample demonstrates all the ways to control how many times tools are
+executed during an agent run.  There are three complementary mechanisms:
+
+1. ``max_iterations`` (on the chat client) — caps the number of **LLM
+   roundtrips**.  Each roundtrip may invoke one or more tools in parallel.
+
+2. ``max_function_calls`` (on the chat client) — caps the **total number of
+   individual function invocations** across all iterations within a single
+   request.  This is the primary knob for cost control. If the tool is called multiple
+   times in one iteration, those will execute, after that it will stop working. For example,
+   if max_invocations is 3 and the tool is called 5 times in a single iteration,
+   these will complete, but any subsequent calls to the tool (in the same or future iterations)
+   will raise a ToolException.
+
+3. ``max_invocations`` (on a tool) — caps the **lifetime invocation count**
+   of a specific tool instance.  The counter is never automatically reset,
+   so it accumulates across requests when tools are singletons.
+
+   Because ``max_invocations`` is tracked on the ``FunctionTool`` *instance*,
+   wrapping the same callable with ``@tool`` multiple times creates independent
+   counters.  This lets you give different agents different invocation budgets
+   for the same underlying function.
+
+Choose the right mechanism for your scenario:
+• Prevent runaway LLM loops  →  ``max_iterations``
+• Best-effort cap on tool execution cost per request  →  ``max_function_calls``
+  (checked between iterations; a single batch of parallel calls may overshoot)
+• Best-effort limit a specific expensive tool globally  →  ``max_invocations``
+• Per-agent limits on shared tools  →  wrap the callable separately per agent
+"""
+
+
+# --- Tool definitions ---
+
+
+# NOTE: approval_mode="never_require" is for sample brevity.
+# Use "always_require" in production; see function_tool_with_approval.py.
+@tool(approval_mode="never_require")
+def search_web(query: Annotated[str, "The search query to look up."]) -> str:
+    """Search the web for information."""
+    return f"Results for '{query}': [page1, page2, page3]"
+
+
+@tool(approval_mode="never_require")
+def get_weather(city: Annotated[str, "The city to get the weather for."]) -> str:
+    """Get the current weather for a city."""
+    return f"Weather in {city}: Sunny, 22°C"
+
+
+@tool(approval_mode="never_require", max_invocations=2)
+def call_expensive_api(
+    prompt: Annotated[str, "The prompt to send to the expensive API."],
+) -> str:
+    """Call a very expensive external API. Limited to 2 calls ever."""
+    return f"Expensive result for '{prompt}'"
+
+
+# --- Scenario 1: max_iterations (limit LLM roundtrips) ---
+
+
+async def scenario_max_iterations():
+    """Demonstrate max_iterations: limits how many times we loop back to the LLM.
+
+    Each iteration may invoke one or more tools in parallel, so this does NOT
+    directly limit the total number of function executions.
+    """
+    print("=" * 60)
+    print("Scenario 1: max_iterations — limit LLM roundtrips")
+    print("=" * 60)
+
+    client = OpenAIResponsesClient()
+
+    # 1. Set max_iterations to 3 — the tool loop will run at most 3 roundtrips
+    #    to the model before forcing a text response.
+    client.function_invocation_configuration["max_iterations"] = 3
+    print(f"  max_iterations = {client.function_invocation_configuration['max_iterations']}")
+
+    agent = client.as_agent(
+        name="ResearchAgent",
+        instructions=(
+            "You are a research assistant. Use the search_web tool to answer "
+            "the user's question. Search for multiple aspects of the topic."
+        ),
+        tools=[search_web, get_weather],
+    )
+
+    response = await agent.run("Tell me about the weather in Paris, London, and Tokyo.")
+    print(f"  Response: {response.text[:200]}...")
+    print()
+
+
+# --- Scenario 2: max_function_calls (limit total tool executions per request) ---
+
+
+async def scenario_max_function_calls():
+    """Demonstrate max_function_calls: caps total individual tool invocations.
+
+    Unlike max_iterations, this counts every individual function execution —
+    even when several tools run in parallel within a single iteration.
+    """
+    print("=" * 60)
+    print("Scenario 2: max_function_calls — limit total tool executions")
+    print("=" * 60)
+
+    client = OpenAIResponsesClient()
+
+    # 1. Allow many iterations but cap total function calls to 4.
+    #    If the model requests 3 parallel searches per iteration, after 2
+    #    iterations (6 calls) the limit is hit and the loop stops.
+    client.function_invocation_configuration["max_iterations"] = 20
+    client.function_invocation_configuration["max_function_calls"] = 4
+    print(f"  max_iterations    = {client.function_invocation_configuration['max_iterations']}")
+    print(f"  max_function_calls = {client.function_invocation_configuration['max_function_calls']}")
+
+    agent = client.as_agent(
+        name="ResearchAgent",
+        instructions=(
+            "You are a research assistant. Use the search_web and get_weather "
+            "tools to answer the user's question comprehensively."
+        ),
+        tools=[search_web, get_weather],
+    )
+
+    response = await agent.run(
+        "Search for the weather in Paris, London, Tokyo, "
+        "New York, and Sydney, and also search for best travel tips."
+    )
+    print(f"  Response: {response.text[:200]}...")
+    print()
+
+
+# --- Scenario 3: max_invocations (lifetime limit on a specific tool) ---
+
+
+async def scenario_max_invocations():
+    """Demonstrate max_invocations: caps how many times a specific tool instance
+    can be called across ALL requests.
+
+    Note: this counter lives on the tool instance, so for module-level tools
+    it accumulates globally. Use tool.invocation_count to inspect or reset.
+    """
+    print("=" * 60)
+    print("Scenario 3: max_invocations — lifetime cap on a tool")
+    print("=" * 60)
+
+    agent = OpenAIResponsesClient().as_agent(
+        name="APIAgent",
+        instructions="Use call_expensive_api when asked to analyze something.",
+        tools=[call_expensive_api],
+    )
+    session = agent.create_session()
+
+    # 1. First call — succeeds (invocation_count: 0 → 1)
+    print(f"  Before call 1: invocation_count = {call_expensive_api.invocation_count}")
+    response = await agent.run("Analyze the market trends for AI.", session=session)
+    print(f"  After call 1:  invocation_count = {call_expensive_api.invocation_count}")
+    print(f"  Response: {response.text[:150]}...")
+
+    # 2. Second call — succeeds (invocation_count: 1 → 2)
+    response = await agent.run("Analyze the market trends for cloud computing.", session=session)
+    print(f"  After call 2:  invocation_count = {call_expensive_api.invocation_count}")
+    print(f"  Response: {response.text[:150]}...")
+
+    # 3. Third call — tool refuses (max_invocations=2 reached)
+    response = await agent.run("Analyze the market trends for quantum computing.", session=session)
+    print(f"  After call 3:  invocation_count = {call_expensive_api.invocation_count}")
+    print(f"  Response: {response.text[:150]}...")
+
+    # 4. Reset the counter to allow more calls
+    print()
+    print("  Resetting invocation_count to 0...")
+    call_expensive_api.invocation_count = 0
+    print(f"  invocation_count = {call_expensive_api.invocation_count}")
+    print()
+
+
+# --- Scenario 4: Per-agent limits via separate tool wrappers ---
+
+
+async def scenario_per_agent_tool_limits():
+    """Demonstrate per-agent max_invocations using separate tool wrappers.
+
+    Because max_invocations is tracked on the FunctionTool *instance*, you can
+    wrap the same callable with ``@tool`` multiple times to get independent
+    counters for different agents.  This is useful when two agents share the
+    same underlying function but should have different invocation budgets.
+    """
+    print("=" * 60)
+    print("Scenario 4: Per-agent limits via separate tool wrappers")
+    print("=" * 60)
+
+    # The underlying callable — a plain function, no decorator.
+    def _do_lookup(query: Annotated[str, "Search query."]) -> str:
+        """Look up information."""
+        return f"Lookup result for '{query}'"
+
+    # Wrap it twice with different limits. Each wrapper is a separate
+    # FunctionTool instance with its own invocation_count.
+    agent_a_lookup = tool(name="lookup", approval_mode="never_require", max_invocations=2)(_do_lookup)
+    agent_b_lookup = tool(name="lookup", approval_mode="never_require", max_invocations=5)(_do_lookup)
+
+    client = OpenAIResponsesClient()
+    agent_a = client.as_agent(
+        name="AgentA",
+        instructions="Use the lookup tool to answer questions.",
+        tools=[agent_a_lookup],
+    )
+    agent_b = client.as_agent(
+        name="AgentB",
+        instructions="Use the lookup tool to answer questions.",
+        tools=[agent_b_lookup],
+    )
+
+    print(f"  agent_a_lookup.max_invocations = {agent_a_lookup.max_invocations}")
+    print(f"  agent_b_lookup.max_invocations = {agent_b_lookup.max_invocations}")
+
+    # Agent A uses its budget
+    session_a = agent_a.create_session()
+    await agent_a.run("Look up AI trends", session=session_a)
+    await agent_a.run("Look up cloud trends", session=session_a)
+
+    # Agent B's counter is independent — still at 0
+    session_b = agent_b.create_session()
+    await agent_b.run("Look up quantum computing", session=session_b)
+
+    print(f"  agent_a_lookup.invocation_count = {agent_a_lookup.invocation_count}  (limit {agent_a_lookup.max_invocations})")
+    print(f"  agent_b_lookup.invocation_count = {agent_b_lookup.invocation_count}  (limit {agent_b_lookup.max_invocations})")
+    print("  → Agent A hit its limit; Agent B used 1 of 5.")
+    print()
+
+
+# --- Scenario 5: Combining all three mechanisms ---
+
+
+async def scenario_combined():
+    """Demonstrate using all three mechanisms together for defense in depth."""
+    print("=" * 60)
+    print("Scenario 5: Combined — all mechanisms together")
+    print("=" * 60)
+
+    client = OpenAIResponsesClient()
+
+    # 1. Configure the client with both iteration and function call limits.
+    client.function_invocation_configuration["max_iterations"] = 5       # max 5 LLM roundtrips
+    client.function_invocation_configuration["max_function_calls"] = 8   # max 8 total tool calls
+    print(f"  max_iterations     = {client.function_invocation_configuration['max_iterations']}")
+    print(f"  max_function_calls = {client.function_invocation_configuration['max_function_calls']}")
+
+    # 2. Use a tool with a lifetime invocation limit.
+    @tool(approval_mode="never_require", max_invocations=3)
+    def premium_lookup(topic: Annotated[str, "Topic to look up."]) -> str:
+        """Look up premium data (max 3 calls ever)."""
+        return f"Premium data for '{topic}'"
+
+    print(f"  premium_lookup.max_invocations = {premium_lookup.max_invocations}")
+
+    agent = client.as_agent(
+        name="MultiToolAgent",
+        instructions="Use all available tools to answer comprehensively.",
+        tools=[search_web, get_weather, premium_lookup],
+    )
+
+    # 3. Run a query that could trigger many tool calls.
+    response = await agent.run(
+        "Research the weather and tourism info for Paris, London, Tokyo, "
+        "New York, and Sydney. Use premium_lookup for the top 3 cities."
+    )
+    print(f"  Response: {response.text[:200]}...")
+    print(f"  premium_lookup.invocation_count = {premium_lookup.invocation_count}")
+    print()
+
+
+# --- Entry point ---
+
+
+async def main():
+    await scenario_max_iterations()
+    await scenario_max_function_calls()
+    await scenario_max_invocations()
+    await scenario_per_agent_tool_limits()
+    await scenario_combined()
+
+
+"""
+Sample output:
+
+============================================================
+Scenario 1: max_iterations — limit LLM roundtrips
+============================================================
+  max_iterations = 3
+  Response: The weather in Paris is sunny at 22°C, London is sunny at 22°C, and Tokyo is sunny at 22°C...
+============================================================
+Scenario 2: max_function_calls — limit total tool executions
+============================================================
+  max_iterations    = 20
+  max_function_calls = 4
+  Response: Based on my research, Paris is sunny at 22°C, London is sunny at 22°C...
+============================================================
+Scenario 3: max_invocations — lifetime cap on a tool
+============================================================
+  Before call 1: invocation_count = 0
+  After call 1:  invocation_count = 1
+  Response: Based on the analysis, the AI market is showing strong growth trends...
+  After call 2:  invocation_count = 2
+  Response: The cloud computing market continues to expand with key trends in...
+  After call 3:  invocation_count = 2
+  Response: I'm unable to use the analysis tool right now as it has reached its limit...
+
+  Resetting invocation_count to 0...
+  invocation_count = 0
+
+============================================================
+Scenario 4: Per-agent limits via separate tool wrappers
+============================================================
+  agent_a_lookup.max_invocations = 2
+  agent_b_lookup.max_invocations = 5
+  agent_a_lookup.invocation_count = 2  (limit 2)
+  agent_b_lookup.invocation_count = 1  (limit 5)
+  → Agent A hit its limit; Agent B used 1 of 5.
+
+============================================================
+Scenario 5: Combined — all mechanisms together
+============================================================
+  max_iterations     = 5
+  max_function_calls = 8
+  premium_lookup.max_invocations = 3
+  Response: Here's a comprehensive overview of the weather and tourism for the cities...
+  premium_lookup.invocation_count = 3
+"""
+
+if __name__ == "__main__":
+    asyncio.run(main())