@@ -179,10 +179,31 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
179179 )
180180 if message .server_content :
181181 content = message .server_content .model_turn
182+
183+ # Standalone grounding_metadata event (when content is empty)
184+ if (
185+ not (content and content .parts )
186+ and message .server_content .grounding_metadata
187+ and not message .server_content .turn_complete
188+ ):
189+ yield LlmResponse (
190+ grounding_metadata = message .server_content .grounding_metadata ,
191+ interrupted = message .server_content .interrupted ,
192+ model_version = self ._model_version ,
193+ )
194+
182195 if content and content .parts :
183196 llm_response = LlmResponse (
184- content = content , interrupted = message .server_content .interrupted
197+ content = content ,
198+ interrupted = message .server_content .interrupted ,
199+ model_version = self ._model_version ,
185200 )
201+ # grounding_metadata is yielded again at turn_complete,
202+ # so avoid duplicating it here if turn_complete is true.
203+ if not message .server_content .turn_complete :
204+ llm_response .grounding_metadata = (
205+ message .server_content .grounding_metadata
206+ )
186207 if content .parts [0 ].text :
187208 text += content .parts [0 ].text
188209 llm_response .partial = True
@@ -205,6 +226,7 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
205226 finished = False ,
206227 ),
207228 partial = True ,
229+ model_version = self ._model_version ,
208230 )
209231 # finished=True and partial transcription may happen in the same
210232 # message.
@@ -215,6 +237,7 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
215237 finished = True ,
216238 ),
217239 partial = False ,
240+ model_version = self ._model_version ,
218241 )
219242 self ._input_transcription_text = ''
220243 if message .server_content .output_transcription :
@@ -228,6 +251,7 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
228251 finished = False ,
229252 ),
230253 partial = True ,
254+ model_version = self ._model_version ,
231255 )
232256 if message .server_content .output_transcription .finished :
233257 yield LlmResponse (
@@ -236,6 +260,7 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
236260 finished = True ,
237261 ),
238262 partial = False ,
263+ model_version = self ._model_version ,
239264 )
240265 self ._output_transcription_text = ''
241266 # The Gemini API might not send a transcription finished signal.
@@ -253,6 +278,7 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
253278 finished = True ,
254279 ),
255280 partial = False ,
281+ model_version = self ._model_version ,
256282 )
257283 self ._input_transcription_text = ''
258284 if self ._output_transcription_text :
@@ -262,6 +288,7 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
262288 finished = True ,
263289 ),
264290 partial = False ,
291+ model_version = self ._model_version ,
265292 )
266293 self ._output_transcription_text = ''
267294 if message .server_content .turn_complete :
@@ -271,9 +298,11 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
271298 yield LlmResponse (
272299 turn_complete = True ,
273300 interrupted = message .server_content .interrupted ,
301+ grounding_metadata = message .server_content .grounding_metadata ,
302+ model_version = self ._model_version ,
274303 )
275304 break
276- # in case of empty content or parts, we sill surface it
305+ # in case of empty content or parts, we still surface it
277306 # in case it's an interrupted message, we merge the previous partial
278307 # text. Other we don't merge. because content can be none when model
279308 # safety threshold is triggered
@@ -282,7 +311,10 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
282311 yield self .__build_full_text_response (text )
283312 text = ''
284313 else :
285- yield LlmResponse (interrupted = message .server_content .interrupted )
314+ yield LlmResponse (
315+ interrupted = message .server_content .interrupted ,
316+ model_version = self ._model_version ,
317+ )
286318 if message .tool_call :
287319 if text :
288320 yield self .__build_full_text_response (text )
@@ -291,12 +323,16 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
291323 types .Part (function_call = function_call )
292324 for function_call in message .tool_call .function_calls
293325 ]
294- yield LlmResponse (content = types .Content (role = 'model' , parts = parts ))
326+ yield LlmResponse (
327+ content = types .Content (role = 'model' , parts = parts ),
328+ model_version = self ._model_version ,
329+ )
295330 if message .session_resumption_update :
296331 logger .debug ('Received session resumption message: %s' , message )
297332 yield (
298333 LlmResponse (
299- live_session_resumption_update = message .session_resumption_update
334+ live_session_resumption_update = message .session_resumption_update ,
335+ model_version = self ._model_version ,
300336 )
301337 )
302338
0 commit comments