From b2d13ec841c3d023a9e55313f30bbb7cbb62754f Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Fri, 8 May 2026 10:23:51 -0700 Subject: [PATCH] feat(litellm): trace text completion calls Add patchers and tracing for litellm.text_completion and litellm.atext_completion, preserving prompt inputs and text completion choice output. Cover both sync and async paths with VCR-backed tests for supported LiteLLM matrix versions. --- ...test_litellm_atext_completion_metrics.yaml | 115 +++++++++ .../test_litellm_text_completion_metrics.yaml | 115 +++++++++ ...test_litellm_atext_completion_metrics.yaml | 228 ++++++++++++++++++ .../test_litellm_text_completion_metrics.yaml | 228 ++++++++++++++++++ .../integrations/litellm/patchers.py | 23 +- .../integrations/litellm/test_litellm.py | 50 ++++ .../integrations/litellm/tracing.py | 76 ++++-- 7 files changed, 808 insertions(+), 27 deletions(-) create mode 100644 py/src/braintrust/integrations/litellm/cassettes/1.74.0/test_litellm_atext_completion_metrics.yaml create mode 100644 py/src/braintrust/integrations/litellm/cassettes/1.74.0/test_litellm_text_completion_metrics.yaml create mode 100644 py/src/braintrust/integrations/litellm/cassettes/latest/test_litellm_atext_completion_metrics.yaml create mode 100644 py/src/braintrust/integrations/litellm/cassettes/latest/test_litellm_text_completion_metrics.yaml diff --git a/py/src/braintrust/integrations/litellm/cassettes/1.74.0/test_litellm_atext_completion_metrics.yaml b/py/src/braintrust/integrations/litellm/cassettes/1.74.0/test_litellm_atext_completion_metrics.yaml new file mode 100644 index 00000000..77b55f93 --- /dev/null +++ b/py/src/braintrust/integrations/litellm/cassettes/1.74.0/test_litellm_atext_completion_metrics.yaml @@ -0,0 +1,115 @@ +interactions: +- request: + body: '{"model":"gpt-3.5-turbo-instruct","prompt":"What''s 12 + 12?"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '61' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - AsyncOpenAI/Python 1.99.9 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 1.99.9 + X-Stainless-Raw-Response: + - 'true' + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.12.12 + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/completions + response: + body: + string: "{\n \"id\": \"cmpl-DdJ9HtCsgRTbqBUitaYeCb6F03nAP\",\n \"object\": + \"text_completion\",\n \"created\": 1778260771,\n \"model\": \"gpt-3.5-turbo-instruct:20230824-v2\",\n + \ \"choices\": [\n {\n \"text\": \"\\n12 + 12 = 24\",\n \"index\": + 0,\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n + \ ],\n \"usage\": {\n \"prompt_tokens\": 8,\n \"completion_tokens\": + 8,\n \"total_tokens\": 16\n }\n}\n" + headers: + Access-Control-Allow-Origin: + - '*' + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9f8a213fcfa034bd-SJC + Cache-Control: + - no-cache, must-revalidate + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Fri, 08 May 2026 17:19:32 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + Via: + - envoy-router-6bb47b9fb6-dlvwm + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '383' + openai-model: + - gpt-3.5-turbo-instruct:20230824-v2 + openai-organization: + - braintrust-data + openai-processing-ms: + - '183' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=J7BwJkU4m3tydxu1TkSveYALVcfIWtQLIywG_Z3ocQk-1778260771.8019595-1.0.1.1-58SjNkFWP1Gv_WcjPlzrr_En2YMwTJG.KPMIrIwd3zcJe9Di0qBgjsHtErhXq_sKTthex4_ynaF99wvwmCV3gaDS.LnE5NkagenL9n2DEQW3BCP13HK5FbW4LoU1u1sF; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Fri, 08 May 2026 + 17:49:32 GMT + x-engine-geography: + - US + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '3500' + x-ratelimit-limit-tokens: + - '90000' + x-ratelimit-remaining-requests: + - '3498' + x-ratelimit-remaining-tokens: + - '89995' + x-ratelimit-reset-requests: + - 17ms + x-ratelimit-reset-tokens: + - 2ms + x-request-id: + - req_9df32c1fe39a4e36b70eb5a753424340 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/litellm/cassettes/1.74.0/test_litellm_text_completion_metrics.yaml b/py/src/braintrust/integrations/litellm/cassettes/1.74.0/test_litellm_text_completion_metrics.yaml new file mode 100644 index 00000000..16d2c69e --- /dev/null +++ b/py/src/braintrust/integrations/litellm/cassettes/1.74.0/test_litellm_text_completion_metrics.yaml @@ -0,0 +1,115 @@ +interactions: +- request: + body: '{"model":"gpt-3.5-turbo-instruct","prompt":"What''s 12 + 12?"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '61' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - OpenAI/Python 1.99.9 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 1.99.9 + X-Stainless-Raw-Response: + - 'true' + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.12.12 + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/completions + response: + body: + string: "{\n \"id\": \"cmpl-DdJ9HMEtJxIjFHx4qKe8JuNurI74b\",\n \"object\": + \"text_completion\",\n \"created\": 1778260771,\n \"model\": \"gpt-3.5-turbo-instruct:20230824-v2\",\n + \ \"choices\": [\n {\n \"text\": \"\\n\\n24\",\n \"index\": 0,\n + \ \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n ],\n + \ \"usage\": {\n \"prompt_tokens\": 8,\n \"completion_tokens\": 2,\n + \ \"total_tokens\": 10\n }\n}\n" + headers: + Access-Control-Allow-Origin: + - '*' + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9f8a213c1ccdad84-SJC + Cache-Control: + - no-cache, must-revalidate + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Fri, 08 May 2026 17:19:31 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + Via: + - envoy-router-854cb8774b-62tzj + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '375' + openai-model: + - gpt-3.5-turbo-instruct:20230824-v2 + openai-organization: + - braintrust-data + openai-processing-ms: + - '159' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=hwp6vll96JcVvTXRJKXVk62ZHOsK9JSVnfvv3Gj4Y74-1778260771.2184882-1.0.1.1-ERioTFSLV4jh0BseuCItsa0NWz_5hfwCiXAXQWmT7lSeqQnX6SpX0FzP0_eZAmE6kaot4SfUFoYAz3k1xMSgI.GXg0XSPNo.tevB4IaKhF56bzPVcfdlUJI6GR0Gmxj5; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Fri, 08 May 2026 + 17:49:31 GMT + x-engine-geography: + - US + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '3500' + x-ratelimit-limit-tokens: + - '90000' + x-ratelimit-remaining-requests: + - '3498' + x-ratelimit-remaining-tokens: + - '89995' + x-ratelimit-reset-requests: + - 17ms + x-ratelimit-reset-tokens: + - 2ms + x-request-id: + - req_41d504ad51ab45fa9ddfd578c6fc2ad1 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/litellm/cassettes/latest/test_litellm_atext_completion_metrics.yaml b/py/src/braintrust/integrations/litellm/cassettes/latest/test_litellm_atext_completion_metrics.yaml new file mode 100644 index 00000000..251efeb0 --- /dev/null +++ b/py/src/braintrust/integrations/litellm/cassettes/latest/test_litellm_atext_completion_metrics.yaml @@ -0,0 +1,228 @@ +interactions: +- request: + body: '{"model":"gpt-3.5-turbo-instruct","prompt":"What''s 12 + 12?"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '61' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 2.24.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 2.24.0 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.12 + method: POST + uri: https://api.openai.com/v1/completions + response: + body: + string: "{\n \"id\": \"cmpl-DdJ8GWvmvqQGFwyrxZlDUyLK86QX5\",\n \"object\": + \"text_completion\",\n \"created\": 1778260708,\n \"model\": \"gpt-3.5-turbo-instruct:20230824-v2\",\n + \ \"choices\": [\n {\n \"text\": \"\\n\\n24\",\n \"index\": 0,\n + \ \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n ],\n + \ \"usage\": {\n \"prompt_tokens\": 8,\n \"completion_tokens\": 2,\n + \ \"total_tokens\": 10\n }\n}\n" + headers: + Access-Control-Allow-Origin: + - '*' + Access-Control-Expose-Headers: + - X-Request-ID + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9f8a1fb0bd4b2994-SJC + Cache-Control: + - no-cache, must-revalidate + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Fri, 08 May 2026 17:18:28 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=ONjFtP4Ka.k_ARiqh0VM1EWIfKODgIjmDhooueIbeLk-1778260707.9594808-1.0.1.1-Q.aTU7pXH7YCKrKqfcux42Hvr7oTwg3POTF3i2Y_kL6G2ofh0gkKdpi34RHRGGZHpN.A.ZrrTdlKXWdL7HrVrcc4a_JT6TbDLOwAl9CdzsptEOhl_grv_dYbEFzyyw12; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Fri, 08 May 2026 + 17:48:28 GMT + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + Via: + - envoy-router-canary-7479f7498-qlb6m + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '375' + openai-model: + - gpt-3.5-turbo-instruct:20230824-v2 + openai-organization: + - braintrust-data + openai-processing-ms: + - '169' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-engine-geography: + - US + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '3500' + x-ratelimit-limit-tokens: + - '90000' + x-ratelimit-remaining-requests: + - '3498' + x-ratelimit-remaining-tokens: + - '89995' + x-ratelimit-reset-requests: + - 17ms + x-ratelimit-reset-tokens: + - 2ms + x-request-id: + - req_42eb0d800ecd47a6adc78c2512dc04dd + status: + code: 200 + message: OK +- request: + body: '{"model":"gpt-3.5-turbo-instruct","prompt":"What''s 12 + 12?"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '61' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 2.24.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 2.24.0 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.12 + method: POST + uri: https://api.openai.com/v1/completions + response: + body: + string: "{\n \"id\": \"cmpl-DdJ925pmilnyrkg1XY2AXIKSQSMG2\",\n \"object\": + \"text_completion\",\n \"created\": 1778260756,\n \"model\": \"gpt-3.5-turbo-instruct:20230824-v2\",\n + \ \"choices\": [\n {\n \"text\": \"\\n\\n24\",\n \"index\": 0,\n + \ \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n ],\n + \ \"usage\": {\n \"prompt_tokens\": 8,\n \"completion_tokens\": 2,\n + \ \"total_tokens\": 10\n }\n}\n" + headers: + Access-Control-Allow-Origin: + - '*' + Access-Control-Expose-Headers: + - X-Request-ID + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9f8a20a05b038fc5-SJC + Cache-Control: + - no-cache, must-revalidate + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Fri, 08 May 2026 17:19:17 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=scbyNmLM_8hO2D.d9NwPLpUn3So8jKjNkSKiTh0dxlw-1778260746.2933288-1.0.1.1-hMFylmP09VRyrH55miiVflI4kyfrbwtRbIzJqH.V2LvXJL1h8LpPFHnpVZ6j.GquSXpTfkijwKmUuGnIPaOU01c2XVBSPWfXDw28kKXIo0hPTz7zQm0MkLcw5ms_Y8.F; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Fri, 08 May 2026 + 17:49:17 GMT + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + Via: + - envoy-router-6bb47b9fb6-m66sb + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '375' + openai-model: + - gpt-3.5-turbo-instruct:20230824-v2 + openai-organization: + - braintrust-data + openai-processing-ms: + - '616' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-engine-geography: + - US + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '3500' + x-ratelimit-limit-tokens: + - '90000' + x-ratelimit-remaining-requests: + - '3498' + x-ratelimit-remaining-tokens: + - '89995' + x-ratelimit-reset-requests: + - 17ms + x-ratelimit-reset-tokens: + - 2ms + x-request-id: + - req_81a429cb5f404cf385bdcc94eabacfe6 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/litellm/cassettes/latest/test_litellm_text_completion_metrics.yaml b/py/src/braintrust/integrations/litellm/cassettes/latest/test_litellm_text_completion_metrics.yaml new file mode 100644 index 00000000..d80122fa --- /dev/null +++ b/py/src/braintrust/integrations/litellm/cassettes/latest/test_litellm_text_completion_metrics.yaml @@ -0,0 +1,228 @@ +interactions: +- request: + body: '{"model":"gpt-3.5-turbo-instruct","prompt":"What''s 12 + 12?"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '61' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - OpenAI/Python 2.24.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.24.0 + X-Stainless-Raw-Response: + - 'true' + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.12.12 + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/completions + response: + body: + string: "{\n \"id\": \"cmpl-DdJ8FMG77CV0zsdeZQdzPVnOL2S37\",\n \"object\": + \"text_completion\",\n \"created\": 1778260707,\n \"model\": \"gpt-3.5-turbo-instruct:20230824-v2\",\n + \ \"choices\": [\n {\n \"text\": \"\\n\\n24\",\n \"index\": 0,\n + \ \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n ],\n + \ \"usage\": {\n \"prompt_tokens\": 8,\n \"completion_tokens\": 2,\n + \ \"total_tokens\": 10\n }\n}\n" + headers: + Access-Control-Allow-Origin: + - '*' + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9f8a1fa2594a2402-SJC + Cache-Control: + - no-cache, must-revalidate + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Fri, 08 May 2026 17:18:27 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + Via: + - envoy-router-canary-6f5b65f598-sg7q7 + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '375' + openai-model: + - gpt-3.5-turbo-instruct:20230824-v2 + openai-organization: + - braintrust-data + openai-processing-ms: + - '671' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=YPkCwY43TQIKUWRNNBGuSgrjAQj57epUQsodUYE.o3A-1778260705.6578486-1.0.1.1-HdRi0kkb75Q99vuV79mw3IPYGijptP8ZAVQuHE2ogUV50q_qGi0eGzfseZL.F0nPHI397kJfuEp.zKXow7t8USz85UhzFINEbHIZEOEkjKy_2vhez6qp3fhvX_e5mfLR; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Fri, 08 May 2026 + 17:48:27 GMT + x-engine-geography: + - GB + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '3500' + x-ratelimit-limit-tokens: + - '90000' + x-ratelimit-remaining-requests: + - '3498' + x-ratelimit-remaining-tokens: + - '89995' + x-ratelimit-reset-requests: + - 17ms + x-ratelimit-reset-tokens: + - 2ms + x-request-id: + - req_9f72848e6afc4965a7c2bddf99d7a7d7 + status: + code: 200 + message: OK +- request: + body: '{"model":"gpt-3.5-turbo-instruct","prompt":"What''s 12 + 12?"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '61' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - OpenAI/Python 2.24.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.24.0 + X-Stainless-Raw-Response: + - 'true' + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.12.12 + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/completions + response: + body: + string: "{\n \"id\": \"cmpl-DdJ8satJZFSrUuTr6QVrN7r0VPiNs\",\n \"object\": + \"text_completion\",\n \"created\": 1778260746,\n \"model\": \"gpt-3.5-turbo-instruct:20230824-v2\",\n + \ \"choices\": [\n {\n \"text\": \"\\n\\n24\",\n \"index\": 0,\n + \ \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n ],\n + \ \"usage\": {\n \"prompt_tokens\": 8,\n \"completion_tokens\": 2,\n + \ \"total_tokens\": 10\n }\n}\n" + headers: + Access-Control-Allow-Origin: + - '*' + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9f8a209ccbb01354-SJC + Cache-Control: + - no-cache, must-revalidate + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Fri, 08 May 2026 17:19:06 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + Via: + - envoy-router-dcd7ff779-x44f8 + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '375' + openai-model: + - gpt-3.5-turbo-instruct:20230824-v2 + openai-organization: + - braintrust-data + openai-processing-ms: + - '352' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=atstc1aLH6y5Dkd_kHGl7FTZX9kUMEh5x44OJMh2kyo-1778260745.7228363-1.0.1.1-coB_MauZmnNQj8rDQ8GmeO2UbbgZ49MUq43X9VGu73sKmVf3EcXEs05Gv_Tcxm9kMtinwx_DppLHJfXSCSSZ6o6P9znZDKyG1RCV7cGnvTwAhGWrP6bTOu6sZwP8YyOg; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Fri, 08 May 2026 + 17:49:06 GMT + x-engine-geography: + - US + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '3500' + x-ratelimit-limit-tokens: + - '90000' + x-ratelimit-remaining-requests: + - '3499' + x-ratelimit-remaining-tokens: + - '89996' + x-ratelimit-reset-requests: + - 17ms + x-ratelimit-reset-tokens: + - 2ms + x-request-id: + - req_83f0c90d53f04e42b3748e0d9329d371 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/litellm/patchers.py b/py/src/braintrust/integrations/litellm/patchers.py index c4459712..470ff361 100644 --- a/py/src/braintrust/integrations/litellm/patchers.py +++ b/py/src/braintrust/integrations/litellm/patchers.py @@ -11,6 +11,7 @@ _arerank_wrapper_async, _aresponses_wrapper_async, _aspeech_wrapper_async, + _atext_completion_wrapper_async, _atranscription_wrapper_async, _completion_wrapper, _embedding_wrapper, @@ -19,6 +20,7 @@ _rerank_wrapper, _responses_wrapper, _speech_wrapper, + _text_completion_wrapper, _transcription_wrapper, ) @@ -40,6 +42,18 @@ class LiteLLMAcompletionPatcher(FunctionWrapperPatcher): wrapper = _acompletion_wrapper_async +class LiteLLMTextCompletionPatcher(FunctionWrapperPatcher): + name = "litellm.text_completion" + target_path = "text_completion" + wrapper = _text_completion_wrapper + + +class LiteLLMATextCompletionPatcher(FunctionWrapperPatcher): + name = "litellm.atext_completion" + target_path = "atext_completion" + wrapper = _atext_completion_wrapper_async + + class LiteLLMResponsesPatcher(FunctionWrapperPatcher): name = "litellm.responses" target_path = "responses" @@ -125,6 +139,8 @@ class LiteLLMArerankPatcher(FunctionWrapperPatcher): _ALL_LITELLM_PATCHERS = ( LiteLLMCompletionPatcher, LiteLLMAcompletionPatcher, + LiteLLMTextCompletionPatcher, + LiteLLMATextCompletionPatcher, LiteLLMResponsesPatcher, LiteLLMAresponsesPatcher, LiteLLMImageGenerationPatcher, @@ -153,9 +169,10 @@ def wrap_litellm(litellm: Any) -> Any: module, this function instruments a specific module object (or any object that exposes the same top-level callables such as ``completion``, ``acompletion``, ``responses``, ``aresponses``, ``image_generation``, - ``aimage_generation``, ``embedding``, ``aembedding``, ``moderation``, - ``speech``, ``aspeech``, ``transcription``, ``atranscription``, - ``rerank``, and ``arerank``). Each patcher is applied idempotently — calling + ``text_completion``, ``atext_completion``, ``aimage_generation``, + ``embedding``, ``aembedding``, ``moderation``, ``speech``, ``aspeech``, + ``transcription``, ``atranscription``, ``rerank``, and ``arerank``). Each + patcher is applied idempotently — calling ``wrap_litellm`` twice on the same object is safe. Args: diff --git a/py/src/braintrust/integrations/litellm/test_litellm.py b/py/src/braintrust/integrations/litellm/test_litellm.py index a64685af..a19b6dab 100644 --- a/py/src/braintrust/integrations/litellm/test_litellm.py +++ b/py/src/braintrust/integrations/litellm/test_litellm.py @@ -17,6 +17,7 @@ TEST_ORG_ID = "test-org-litellm-py-tracing" PROJECT_NAME = "test-project-litellm-py-tracing" TEST_MODEL = "gpt-4o-mini" # cheapest model for tests +TEST_TEXT_MODEL = "gpt-3.5-turbo-instruct" TEST_PROMPT = "What's 12 + 12?" TEST_SYSTEM_PROMPT = "You are a helpful assistant that only responds with numbers." TEST_AUDIO_FILE = os.path.join(os.path.dirname(__file__), "..", "..", "fixtures", "test_audio.wav") @@ -98,6 +99,55 @@ async def test_litellm_acompletion_metrics(memory_logger): assert TEST_PROMPT in str(span["input"]) +@pytest.mark.vcr +def test_litellm_text_completion_metrics(memory_logger) -> None: + assert not memory_logger.pop() + + start = time.time() + response = litellm.text_completion(model=TEST_TEXT_MODEL, prompt=TEST_PROMPT) + end = time.time() + + assert response + assert response.choices[0].text + assert "24" in response.choices[0].text or "twenty-four" in response.choices[0].text.lower() + + spans = memory_logger.pop() + assert len(spans) == 1 + span = spans[0] + assert span + metrics = span["metrics"] + assert_metrics_are_valid(metrics, start, end) + assert span["metadata"]["model"] == TEST_TEXT_MODEL + assert span["metadata"]["provider"] == "litellm" + assert TEST_PROMPT in str(span["input"]) + assert "text" in span["output"][0] + + +@pytest.mark.vcr +@pytest.mark.asyncio +async def test_litellm_atext_completion_metrics(memory_logger): + assert not memory_logger.pop() + + start = time.time() + response = await litellm.atext_completion(model=TEST_TEXT_MODEL, prompt=TEST_PROMPT) + end = time.time() + + assert response + assert response.choices[0].text + assert "24" in response.choices[0].text or "twenty-four" in response.choices[0].text.lower() + + spans = memory_logger.pop() + assert len(spans) == 1 + span = spans[0] + assert span + metrics = span["metrics"] + assert_metrics_are_valid(metrics, start, end) + assert span["metadata"]["model"] == TEST_TEXT_MODEL + assert span["metadata"]["provider"] == "litellm" + assert TEST_PROMPT in str(span["input"]) + assert "text" in span["output"][0] + + @pytest.mark.vcr def test_litellm_completion_streaming_sync(memory_logger): assert not memory_logger.pop() diff --git a/py/src/braintrust/integrations/litellm/tracing.py b/py/src/braintrust/integrations/litellm/tracing.py index 0f8b727d..96bde9aa 100644 --- a/py/src/braintrust/integrations/litellm/tracing.py +++ b/py/src/braintrust/integrations/litellm/tracing.py @@ -168,9 +168,8 @@ def sync_gen() -> Generator[Any, None, None]: # --------------------------------------------------------------------------- -def _completion_wrapper(wrapped, instance, args, kwargs): - """wrapt wrapper for litellm.completion.""" - updated_span_payload = _update_span_payload_from_params(kwargs, input_key="messages") +def _completion_wrapper_impl(wrapped, args, kwargs, *, input_key: str): + updated_span_payload = _update_span_payload_from_params(kwargs, input_key=input_key) is_streaming = kwargs.get("stream", False) span = start_span( @@ -185,20 +184,19 @@ def _completion_wrapper(wrapped, instance, args, kwargs): if is_streaming: should_end = False return _handle_completion_streaming(completion_response, span, start, is_async=False) - else: - log_response = _try_to_dict(completion_response) - metrics = _parse_metrics_from_usage(log_response.get("usage", {})) - metrics["time_to_first_token"] = time.time() - start - span.log(metrics=metrics, output=log_response["choices"]) - return completion_response + + log_response = _try_to_dict(completion_response) + metrics = _parse_metrics_from_usage(log_response.get("usage", {})) + metrics["time_to_first_token"] = time.time() - start + span.log(metrics=metrics, output=log_response["choices"]) + return completion_response finally: if should_end: span.end() -async def _acompletion_wrapper_async(wrapped, instance, args, kwargs): - """wrapt wrapper for litellm.acompletion.""" - updated_span_payload = _update_span_payload_from_params(kwargs, input_key="messages") +async def _acompletion_wrapper_impl(wrapped, args, kwargs, *, input_key: str): + updated_span_payload = _update_span_payload_from_params(kwargs, input_key=input_key) is_streaming = kwargs.get("stream", False) span = start_span( @@ -213,17 +211,37 @@ async def _acompletion_wrapper_async(wrapped, instance, args, kwargs): if is_streaming: should_end = False return _handle_completion_streaming(completion_response, span, start, is_async=True) - else: - log_response = _try_to_dict(completion_response) - metrics = _parse_metrics_from_usage(log_response.get("usage", {})) - metrics["time_to_first_token"] = time.time() - start - span.log(metrics=metrics, output=log_response["choices"]) - return completion_response + + log_response = _try_to_dict(completion_response) + metrics = _parse_metrics_from_usage(log_response.get("usage", {})) + metrics["time_to_first_token"] = time.time() - start + span.log(metrics=metrics, output=log_response["choices"]) + return completion_response finally: if should_end: span.end() +def _completion_wrapper(wrapped, instance, args, kwargs): + """wrapt wrapper for litellm.completion.""" + return _completion_wrapper_impl(wrapped, args, kwargs, input_key="messages") + + +def _text_completion_wrapper(wrapped, instance, args, kwargs): + """wrapt wrapper for litellm.text_completion.""" + return _completion_wrapper_impl(wrapped, args, kwargs, input_key="prompt") + + +async def _acompletion_wrapper_async(wrapped, instance, args, kwargs): + """wrapt wrapper for litellm.acompletion.""" + return await _acompletion_wrapper_impl(wrapped, args, kwargs, input_key="messages") + + +async def _atext_completion_wrapper_async(wrapped, instance, args, kwargs): + """wrapt wrapper for litellm.atext_completion.""" + return await _acompletion_wrapper_impl(wrapped, args, kwargs, input_key="prompt") + + def _responses_wrapper(wrapped, instance, args, kwargs): """wrapt wrapper for litellm.responses.""" updated_span_payload = _update_span_payload_from_params(kwargs, input_key="input") @@ -563,6 +581,7 @@ def _postprocess_completion_streaming_results(all_results: list[dict[str, Any]]) """Process streaming results to extract final response.""" role = None content = None + text = None tool_calls: list[Any] | None = None finish_reason = None metrics: dict[str, float] = {} @@ -575,7 +594,14 @@ def _postprocess_completion_streaming_results(all_results: list[dict[str, Any]]) choices = result["choices"] if not choices: continue - delta = choices[0]["delta"] + choice = choices[0] + if choice.get("finish_reason") is not None: + finish_reason = choice.get("finish_reason") + if choice.get("text") is not None: + text = (text or "") + choice.get("text") + continue + + delta = choice.get("delta") if not delta: continue @@ -607,17 +633,19 @@ def _postprocess_completion_streaming_results(all_results: list[dict[str, Any]]) # pylint: disable=unsubscriptable-object tool_calls[-1]["function"]["arguments"] += delta["tool_calls"][0]["function"]["arguments"] - return { - "metrics": metrics, - "output": [ + if text is not None: + output = [{"index": 0, "text": text, "logprobs": None, "finish_reason": finish_reason}] + else: + output = [ { "index": 0, "message": {"role": role, "content": content, "tool_calls": tool_calls}, "logprobs": None, "finish_reason": finish_reason, } - ], - } + ] + + return {"metrics": metrics, "output": output} def _postprocess_responses_streaming_results(all_results: list[Any]) -> dict[str, Any]: