@@ -746,14 +746,103 @@ def _set_streaming_completions_api_output_data(
746746 if messages is not None and isinstance (messages , str ):
747747 messages = [messages ]
748748
749- _common_set_output_data (
750- span ,
751- response ,
752- messages ,
753- integration ,
754- start_time ,
755- finish_span ,
756- )
749+ ttft : "Optional[float]" = None
750+ data_buf : "list[list[str]]" = [] # one for each choice
751+
752+ old_iterator = response ._iterator
753+
754+ def new_iterator () -> "Iterator[ChatCompletionChunk]" :
755+ nonlocal ttft
756+ count_tokens_manually = True
757+ for x in old_iterator :
758+ with capture_internal_exceptions ():
759+ if hasattr (x , "choices" ):
760+ choice_index = 0
761+ for choice in x .choices :
762+ if hasattr (choice , "delta" ) and hasattr (
763+ choice .delta , "content"
764+ ):
765+ if start_time is not None and ttft is None :
766+ ttft = time .perf_counter () - start_time
767+ content = choice .delta .content
768+ if len (data_buf ) <= choice_index :
769+ data_buf .append ([])
770+ data_buf [choice_index ].append (content or "" )
771+ choice_index += 1
772+
773+ yield x
774+
775+ with capture_internal_exceptions ():
776+ if ttft is not None :
777+ set_data_normalized (
778+ span , SPANDATA .GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN , ttft
779+ )
780+ if len (data_buf ) > 0 :
781+ all_responses = ["" .join (chunk ) for chunk in data_buf ]
782+ if should_send_default_pii () and integration .include_prompts :
783+ set_data_normalized (
784+ span , SPANDATA .GEN_AI_RESPONSE_TEXT , all_responses
785+ )
786+ if count_tokens_manually :
787+ _calculate_token_usage (
788+ messages ,
789+ response ,
790+ span ,
791+ all_responses ,
792+ integration .count_tokens ,
793+ )
794+
795+ if finish_span :
796+ span .__exit__ (None , None , None )
797+
798+ async def new_iterator_async () -> "AsyncIterator[ChatCompletionChunk]" :
799+ nonlocal ttft
800+ count_tokens_manually = True
801+ async for x in old_iterator :
802+ with capture_internal_exceptions ():
803+ # OpenAI chat completion API
804+ if hasattr (x , "choices" ):
805+ choice_index = 0
806+ for choice in x .choices :
807+ if hasattr (choice , "delta" ) and hasattr (
808+ choice .delta , "content"
809+ ):
810+ if start_time is not None and ttft is None :
811+ ttft = time .perf_counter () - start_time
812+ content = choice .delta .content
813+ if len (data_buf ) <= choice_index :
814+ data_buf .append ([])
815+ data_buf [choice_index ].append (content or "" )
816+ choice_index += 1
817+
818+ yield x
819+
820+ with capture_internal_exceptions ():
821+ if ttft is not None :
822+ set_data_normalized (
823+ span , SPANDATA .GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN , ttft
824+ )
825+ if len (data_buf ) > 0 :
826+ all_responses = ["" .join (chunk ) for chunk in data_buf ]
827+ if should_send_default_pii () and integration .include_prompts :
828+ set_data_normalized (
829+ span , SPANDATA .GEN_AI_RESPONSE_TEXT , all_responses
830+ )
831+ if count_tokens_manually :
832+ _calculate_token_usage (
833+ messages ,
834+ response ,
835+ span ,
836+ all_responses ,
837+ integration .count_tokens ,
838+ )
839+ if finish_span :
840+ span .__exit__ (None , None , None )
841+
842+ if str (type (response ._iterator )) == "<class 'async_generator'>" :
843+ response ._iterator = new_iterator_async ()
844+ else :
845+ response ._iterator = new_iterator ()
757846
758847
759848def _set_responses_api_output_data (
@@ -792,14 +881,108 @@ def _set_streaming_responses_api_output_data(
792881 if input is not None and isinstance (input , str ):
793882 input = [input ]
794883
795- _common_set_output_data (
796- span ,
797- response ,
798- input ,
799- integration ,
800- start_time ,
801- finish_span ,
802- )
884+ ttft : "Optional[float]" = None
885+ data_buf : "list[list[str]]" = [] # one for each choice
886+
887+ old_iterator = response ._iterator
888+
889+ def new_iterator () -> "Iterator[ChatCompletionChunk]" :
890+ nonlocal ttft
891+ count_tokens_manually = True
892+ for x in old_iterator :
893+ with capture_internal_exceptions ():
894+ if hasattr (x , "delta" ):
895+ if start_time is not None and ttft is None :
896+ ttft = time .perf_counter () - start_time
897+ if len (data_buf ) == 0 :
898+ data_buf .append ([])
899+ data_buf [0 ].append (x .delta or "" )
900+
901+ if isinstance (x , ResponseCompletedEvent ):
902+ _calculate_token_usage (
903+ input ,
904+ x .response ,
905+ span ,
906+ None ,
907+ integration .count_tokens ,
908+ )
909+ count_tokens_manually = False
910+
911+ yield x
912+
913+ with capture_internal_exceptions ():
914+ if ttft is not None :
915+ set_data_normalized (
916+ span , SPANDATA .GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN , ttft
917+ )
918+ if len (data_buf ) > 0 :
919+ all_responses = ["" .join (chunk ) for chunk in data_buf ]
920+ if should_send_default_pii () and integration .include_prompts :
921+ set_data_normalized (
922+ span , SPANDATA .GEN_AI_RESPONSE_TEXT , all_responses
923+ )
924+ if count_tokens_manually :
925+ _calculate_token_usage (
926+ input ,
927+ response ,
928+ span ,
929+ all_responses ,
930+ integration .count_tokens ,
931+ )
932+
933+ if finish_span :
934+ span .__exit__ (None , None , None )
935+
936+ async def new_iterator_async () -> "AsyncIterator[ChatCompletionChunk]" :
937+ nonlocal ttft
938+ count_tokens_manually = True
939+ async for x in old_iterator :
940+ with capture_internal_exceptions ():
941+ if hasattr (x , "delta" ):
942+ if start_time is not None and ttft is None :
943+ ttft = time .perf_counter () - start_time
944+ if len (data_buf ) == 0 :
945+ data_buf .append ([])
946+ data_buf [0 ].append (x .delta or "" )
947+
948+ if isinstance (x , ResponseCompletedEvent ):
949+ _calculate_token_usage (
950+ input ,
951+ x .response ,
952+ span ,
953+ None ,
954+ integration .count_tokens ,
955+ )
956+ count_tokens_manually = False
957+
958+ yield x
959+
960+ with capture_internal_exceptions ():
961+ if ttft is not None :
962+ set_data_normalized (
963+ span , SPANDATA .GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN , ttft
964+ )
965+ if len (data_buf ) > 0 :
966+ all_responses = ["" .join (chunk ) for chunk in data_buf ]
967+ if should_send_default_pii () and integration .include_prompts :
968+ set_data_normalized (
969+ span , SPANDATA .GEN_AI_RESPONSE_TEXT , all_responses
970+ )
971+ if count_tokens_manually :
972+ _calculate_token_usage (
973+ input ,
974+ response ,
975+ span ,
976+ all_responses ,
977+ integration .count_tokens ,
978+ )
979+ if finish_span :
980+ span .__exit__ (None , None , None )
981+
982+ if str (type (response ._iterator )) == "<class 'async_generator'>" :
983+ response ._iterator = new_iterator_async ()
984+ else :
985+ response ._iterator = new_iterator ()
803986
804987
805988def _set_embeddings_output_data (
0 commit comments