google
diff --git a/‎src/google/adk/agents/run_config.py‎
Lines changed: 22 additions & 0 deletions b/‎src/google/adk/agents/run_config.py‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎src/google/adk/models/gemini_llm_connection.py‎
Lines changed: 41 additions & 5 deletions b/‎src/google/adk/models/gemini_llm_connection.py‎
Lines changed: 41 additions & 5 deletions
diff --git a/‎src/google/adk/models/lite_llm.py‎
Lines changed: 53 additions & 13 deletions b/‎src/google/adk/models/lite_llm.py‎
Lines changed: 53 additions & 13 deletions
diff --git a/‎src/google/adk/runners.py‎
Lines changed: 28 additions & 6 deletions b/‎src/google/adk/runners.py‎
Lines changed: 28 additions & 6 deletions
diff --git a/‎src/google/adk/tools/bigquery/search_tool.py‎
Lines changed: 4 additions & 1 deletion b/‎src/google/adk/tools/bigquery/search_tool.py‎
Lines changed: 4 additions & 1 deletion
@@ -28,6 +28,8 @@
 from pydantic import field_validator
 from pydantic import model_validator
 
+from ..sessions.base_session_service import GetSessionConfig
+
 logger = logging.getLogger('google_adk.' + __name__)
 
 
@@ -319,6 +321,26 @@ class RunConfig(BaseModel):
   custom_metadata: Optional[dict[str, Any]] = None
   """Custom metadata for the current invocation."""
 
+  get_session_config: Optional[GetSessionConfig] = None
+  """Configuration for controlling which events are fetched when loading
+  a session.
+
+  When set, the Runner will pass this configuration to the session service's
+  ``get_session`` method, allowing the caller to limit the events returned
+  (e.g. via ``num_recent_events`` or ``after_timestamp``).  This is especially
+  useful in combination with ``EventsCompactionConfig`` to avoid loading the
+  full event history on every invocation.
+
+  Example::
+
+      from google.adk.agents.run_config import RunConfig
+      from google.adk.sessions.base_session_service import GetSessionConfig
+
+      run_config = RunConfig(
+          get_session_config=GetSessionConfig(num_recent_events=50),
+      )
+  """
+
   @model_validator(mode='before')
   @classmethod
   def check_for_deprecated_save_live_audio(cls, data: Any) -> Any:
 
@@ -179,10 +179,31 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
           )
         if message.server_content:
           content = message.server_content.model_turn
+
+          # Standalone grounding_metadata event (when content is empty)
+          if (
+              not (content and content.parts)
+              and message.server_content.grounding_metadata
+              and not message.server_content.turn_complete
+          ):
+            yield LlmResponse(
+                grounding_metadata=message.server_content.grounding_metadata,
+                interrupted=message.server_content.interrupted,
+                model_version=self._model_version,
+            )
+
           if content and content.parts:
             llm_response = LlmResponse(
-                content=content, interrupted=message.server_content.interrupted
+                content=content,
+                interrupted=message.server_content.interrupted,
+                model_version=self._model_version,
             )
+            # grounding_metadata is yielded again at turn_complete,
+            # so avoid duplicating it here if turn_complete is true.
+            if not message.server_content.turn_complete:
+              llm_response.grounding_metadata = (
+                  message.server_content.grounding_metadata
+              )
             if content.parts[0].text:
               text += content.parts[0].text
               llm_response.partial = True
@@ -205,6 +226,7 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
                       finished=False,
                   ),
                   partial=True,
+                  model_version=self._model_version,
               )
             # finished=True and partial transcription may happen in the same
             # message.
@@ -215,6 +237,7 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
                       finished=True,
                   ),
                   partial=False,
+                  model_version=self._model_version,
               )
               self._input_transcription_text = ''
           if message.server_content.output_transcription:
@@ -228,6 +251,7 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
                       finished=False,
                   ),
                   partial=True,
+                  model_version=self._model_version,
               )
             if message.server_content.output_transcription.finished:
               yield LlmResponse(
@@ -236,6 +260,7 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
                       finished=True,
                   ),
                   partial=False,
+                  model_version=self._model_version,
               )
               self._output_transcription_text = ''
           # The Gemini API might not send a transcription finished signal.
@@ -253,6 +278,7 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
                       finished=True,
                   ),
                   partial=False,
+                  model_version=self._model_version,
               )
               self._input_transcription_text = ''
             if self._output_transcription_text:
@@ -262,6 +288,7 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
                       finished=True,
                   ),
                   partial=False,
+                  model_version=self._model_version,
               )
               self._output_transcription_text = ''
           if message.server_content.turn_complete:
@@ -271,9 +298,11 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
             yield LlmResponse(
                 turn_complete=True,
                 interrupted=message.server_content.interrupted,
+                grounding_metadata=message.server_content.grounding_metadata,
+                model_version=self._model_version,
             )
             break
-          # in case of empty content or parts, we sill surface it
+          # in case of empty content or parts, we still surface it
           # in case it's an interrupted message, we merge the previous partial
           # text. Other we don't merge. because content can be none when model
           # safety threshold is triggered
@@ -282,7 +311,10 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
               yield self.__build_full_text_response(text)
               text = ''
             else:
-              yield LlmResponse(interrupted=message.server_content.interrupted)
+              yield LlmResponse(
+                  interrupted=message.server_content.interrupted,
+                  model_version=self._model_version,
+              )
         if message.tool_call:
           if text:
             yield self.__build_full_text_response(text)
@@ -291,12 +323,16 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
               types.Part(function_call=function_call)
               for function_call in message.tool_call.function_calls
           ]
-          yield LlmResponse(content=types.Content(role='model', parts=parts))
+          yield LlmResponse(
+              content=types.Content(role='model', parts=parts),
+              model_version=self._model_version,
+          )
         if message.session_resumption_update:
           logger.debug('Received session resumption message: %s', message)
           yield (
               LlmResponse(
-                  live_session_resumption_update=message.session_resumption_update
+                  live_session_resumption_update=message.session_resumption_update,
+                  model_version=self._model_version,
               )
           )
 
 
@@ -1500,6 +1500,15 @@ def _message_to_generate_content_response(
   )
 
 
+def _finish_reason_to_error_message(
+    finish_reason: types.FinishReason,
+) -> str:
+  """Returns an error message for non-stop finish reasons."""
+  if finish_reason == types.FinishReason.MAX_TOKENS:
+    return "Maximum tokens reached"
+  return f"Finished with {finish_reason}"
+
+
 def _enforce_strict_openai_schema(schema: dict[str, Any]) -> None:
   """Recursively transforms a JSON schema for OpenAI strict structured outputs.
 
@@ -2000,8 +2009,15 @@ def _finalize_tool_call_response(
           *, model_version: str, finish_reason: str
       ) -> LlmResponse:
         tool_calls = []
+        has_incomplete_tool_call_args = False
         for index, func_data in function_calls.items():
           if func_data["id"]:
+            if finish_reason == "length":
+              try:
+                json.loads(func_data["args"] or "{}")
+              except json.JSONDecodeError:
+                has_incomplete_tool_call_args = True
+                continue
             tool_calls.append(
                 ChatCompletionMessageToolCall(
                     type="function",
@@ -2013,6 +2029,19 @@ def _finalize_tool_call_response(
                     ),
                 )
             )
+
+        if has_incomplete_tool_call_args:
+          return LlmResponse(
+              error_code=types.FinishReason.MAX_TOKENS,
+              error_message=(
+                  "Tool call arguments were truncated while streaming and"
+                  " could not be parsed as valid JSON. Increase"
+                  " `max_output_tokens` and retry."
+              ),
+              finish_reason=types.FinishReason.MAX_TOKENS,
+              model_version=model_version,
+          )
+
         llm_response = _message_to_generate_content_response(
             ChatCompletionAssistantMessage(
                 role="assistant",
@@ -2022,7 +2051,13 @@ def _finalize_tool_call_response(
             model_version=model_version,
             thought_parts=list(reasoning_parts) if reasoning_parts else None,
         )
-        llm_response.finish_reason = _map_finish_reason(finish_reason)
+        mapped_finish_reason = _map_finish_reason(finish_reason)
+        llm_response.finish_reason = mapped_finish_reason
+        if mapped_finish_reason != types.FinishReason.STOP:
+          llm_response.error_code = mapped_finish_reason
+          llm_response.error_message = _finish_reason_to_error_message(
+              mapped_finish_reason
+          )
         return llm_response
 
       def _finalize_text_response(
@@ -2037,7 +2072,13 @@ def _finalize_text_response(
             model_version=model_version,
             thought_parts=list(reasoning_parts) if reasoning_parts else None,
         )
-        llm_response.finish_reason = _map_finish_reason(finish_reason)
+        mapped_finish_reason = _map_finish_reason(finish_reason)
+        llm_response.finish_reason = mapped_finish_reason
+        if mapped_finish_reason != types.FinishReason.STOP:
+          llm_response.error_code = mapped_finish_reason
+          llm_response.error_message = _finish_reason_to_error_message(
+              mapped_finish_reason
+          )
         return llm_response
 
       def _reset_stream_buffers() -> None:
@@ -2096,10 +2137,11 @@ def _reset_stream_buffers() -> None:
             )
 
           # LiteLLM 1.81+ can set finish_reason="stop" on partial chunks. Only
-          # finalize tool calls on an explicit tool_calls finish_reason, or on a
-          # stop-only chunk (no content/tool deltas).
+          # finalize tool calls on an explicit tool_calls/length finish_reason,
+          # or on a stop-only chunk (no content/tool deltas).
           if function_calls and (
               finish_reason == "tool_calls"
+              or finish_reason == "length"
               or (finish_reason == "stop" and chunk is None)
           ):
             aggregated_llm_response_with_tool_call = (
@@ -2109,16 +2151,14 @@ def _reset_stream_buffers() -> None:
                 )
             )
             _reset_stream_buffers()
-          elif (
-              finish_reason == "stop"
-              and (text or reasoning_parts)
-              and chunk is None
-              and not function_calls
+          elif (text or reasoning_parts) and (
+              finish_reason == "length"
+              or (
+                  finish_reason == "stop"
+                  and chunk is None
+                  and not function_calls
+              )
           ):
-            # Only aggregate text response when we have a true stop signal
-            # chunk is None means no content in this chunk, just finish signal.
-            # LiteLLM 1.81+ sets finish_reason="stop" on partial chunks with
-            # content.
             aggregated_llm_response = _finalize_text_response(
                 model_version=part.model,
                 finish_reason=finish_reason,
 
@@ -57,6 +57,7 @@
 from .plugins.base_plugin import BasePlugin
 from .plugins.plugin_manager import PluginManager
 from .sessions.base_session_service import BaseSessionService
+from .sessions.base_session_service import GetSessionConfig
 from .sessions.in_memory_session_service import InMemorySessionService
 from .sessions.session import Session
 from .telemetry.tracing import tracer
@@ -393,7 +394,11 @@ def _format_session_not_found_message(self, session_id: str) -> str:
     )
 
   async def _get_or_create_session(
-      self, *, user_id: str, session_id: str
+      self,
+      *,
+      user_id: str,
+      session_id: str,
+      get_session_config: Optional[GetSessionConfig] = None,
   ) -> Session:
     """Gets the session or creates it if auto-creation is enabled.
 
@@ -404,6 +409,8 @@ async def _get_or_create_session(
     Args:
       user_id: The user ID of the session.
       session_id: The session ID of the session.
+      get_session_config: Optional configuration for controlling which events
+        are fetched from session storage.
 
     Returns:
       The existing or newly created `Session`.
@@ -413,7 +420,10 @@ async def _get_or_create_session(
         auto_create_session is False.
     """
     session = await self.session_service.get_session(
-        app_name=self.app_name, user_id=user_id, session_id=session_id
+        app_name=self.app_name,
+        user_id=user_id,
+        session_id=session_id,
+        config=get_session_config,
     )
     if not session:
       if self.auto_create_session:
@@ -535,7 +545,9 @@ async def _run_with_trace(
     ) -> AsyncGenerator[Event, None]:
       with tracer.start_as_current_span('invocation'):
         session = await self._get_or_create_session(
-            user_id=user_id, session_id=session_id
+            user_id=user_id,
+            session_id=session_id,
+            get_session_config=run_config.get_session_config,
         )
 
         if not invocation_id and not new_message:
@@ -626,10 +638,14 @@ async def rewind_async(
       user_id: str,
       session_id: str,
       rewind_before_invocation_id: str,
+      run_config: Optional[RunConfig] = None,
   ) -> None:
     """Rewinds the session to before the specified invocation."""
+    run_config = run_config or RunConfig()
     session = await self._get_or_create_session(
-        user_id=user_id, session_id=session_id
+        user_id=user_id,
+        session_id=session_id,
+        get_session_config=run_config.get_session_config,
     )
     rewind_event_index = -1
     for i, event in enumerate(session.events):
@@ -1060,7 +1076,9 @@ async def run_live(
       )
     if not session:
       session = await self._get_or_create_session(
-          user_id=user_id, session_id=session_id
+          user_id=user_id,
+          session_id=session_id,
+          get_session_config=run_config.get_session_config,
       )
     invocation_context = self._new_invocation_context_for_live(
         session,
@@ -1231,8 +1249,12 @@ async def run_debug(
         - Performance optimization
         Please use run_async() with proper configuration.
     """
+    run_config = run_config or RunConfig()
     session = await self.session_service.get_session(
-        app_name=self.app_name, user_id=user_id, session_id=session_id
+        app_name=self.app_name,
+        user_id=user_id,
+        session_id=session_id,
+        config=run_config.get_session_config,
     )
     if not session:
       session = await self.session_service.create_session(
 
@@ -48,7 +48,10 @@ def search_catalog(
     dataset_ids_filter: list[str] | None = None,
     types_filter: list[str] | None = None,
 ) -> dict[str, Any]:
-  """Searches for BigQuery assets within Dataplex.
+  """Finds BigQuery datasets and tables using natural language semantic search via Dataplex.
+
+  Use this tool to discover BigQuery assets when you don't know the exact names.
+  It's ideal for searching based on topics, descriptions, or questions about the data.
 
   Args:
       prompt: The base search query (natural language or keywords).