From d77c8168269249137d87b3652e713fd02e05df8e Mon Sep 17 00:00:00 2001 From: David Elner Date: Fri, 19 Dec 2025 15:09:44 -0500 Subject: [PATCH 01/27] Added: minitest-stub-const gem for testing --- Gemfile | 1 + Gemfile.lock | 65 ++++++++++-------------- gemfiles/anthropic.gemfile | 1 + gemfiles/anthropic_1.11.gemfile | 14 ----- gemfiles/anthropic_1.12.gemfile | 14 ----- gemfiles/anthropic_1_11.gemfile | 1 + gemfiles/anthropic_1_12.gemfile | 1 + gemfiles/anthropic_uninstalled.gemfile | 1 + gemfiles/openai.gemfile | 1 + gemfiles/openai_0.33.gemfile | 14 ----- gemfiles/openai_0.34.gemfile | 14 ----- gemfiles/openai_0_33.gemfile | 1 + gemfiles/openai_0_34.gemfile | 1 + gemfiles/openai_uninstalled.gemfile | 1 + gemfiles/opentelemetry_latest.gemfile | 1 + gemfiles/opentelemetry_min.gemfile | 1 + gemfiles/ruby_llm.gemfile | 1 + gemfiles/ruby_llm_1_8.gemfile | 1 + gemfiles/ruby_llm_1_9.gemfile | 1 + gemfiles/ruby_llm_uninstalled.gemfile | 1 + gemfiles/ruby_openai.gemfile | 1 + gemfiles/ruby_openai_7_0.gemfile | 1 + gemfiles/ruby_openai_8_0.gemfile | 1 + gemfiles/ruby_openai_uninstalled.gemfile | 1 + test/test_helper.rb | 1 + 25 files changed, 46 insertions(+), 95 deletions(-) delete mode 100644 gemfiles/anthropic_1.11.gemfile delete mode 100644 gemfiles/anthropic_1.12.gemfile delete mode 100644 gemfiles/openai_0.33.gemfile delete mode 100644 gemfiles/openai_0.34.gemfile diff --git a/Gemfile b/Gemfile index 3edd697..eeccddf 100644 --- a/Gemfile +++ b/Gemfile @@ -7,3 +7,4 @@ gemspec # Additional development tools not in gemspec (optional/convenience) gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" diff --git a/Gemfile.lock b/Gemfile.lock index cff826e..169bf43 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -9,47 +9,39 @@ PATH GEM remote: https://rubygems.org/ specs: - addressable (2.8.7) - public_suffix (>= 2.0.2, < 7.0) + addressable (2.8.8) + public_suffix (>= 2.0.2, < 8.0) ansi (1.5.0) appraisal (2.5.0) bundler rake thor (>= 0.14.0) ast (2.4.3) - bigdecimal (3.3.1) + bigdecimal (4.0.1) builder (3.3.0) crack (1.0.1) bigdecimal rexml docile (1.4.1) - google-protobuf (4.33.0-aarch64-linux-gnu) - bigdecimal - rake (>= 13) - google-protobuf (4.33.0-arm64-darwin) - bigdecimal - rake (>= 13) - google-protobuf (4.33.0-x64-mingw-ucrt) - bigdecimal - rake (>= 13) - google-protobuf (4.33.0-x86_64-linux-gnu) + google-protobuf (4.33.2-x86_64-linux-gnu) bigdecimal rake (>= 13) googleapis-common-protos-types (1.22.0) google-protobuf (~> 4.26) hashdiff (1.2.1) - json (2.15.1) + json (2.18.0) kramdown (2.5.1) rexml (>= 3.3.9) language_server-protocol (3.17.0.5) lint_roller (1.1.0) - minitest (5.26.0) + minitest (5.27.0) minitest-reporters (1.7.1) ansi builder minitest (>= 5.0) ruby-progressbar - openssl (3.3.1) + minitest-stub-const (0.6) + openssl (3.3.2) opentelemetry-api (1.7.0) opentelemetry-common (0.23.0) opentelemetry-api (~> 1.0) @@ -70,17 +62,17 @@ GEM opentelemetry-semantic_conventions (1.36.0) opentelemetry-api (~> 1.0) parallel (1.27.0) - parser (3.3.9.0) + parser (3.3.10.0) ast (~> 2.4.1) racc - prism (1.6.0) - public_suffix (6.0.2) + prism (1.7.0) + public_suffix (7.0.2) racc (1.8.1) rainbow (3.1.1) - rake (13.3.0) + rake (13.3.1) regexp_parser (2.11.3) rexml (3.4.4) - rubocop (1.80.2) + rubocop (1.81.7) json (~> 2.3) language_server-protocol (~> 3.17.0.2) lint_roller (~> 1.1.0) @@ -88,16 +80,16 @@ GEM parser (>= 3.3.0.2) rainbow (>= 2.2.2, < 4.0) regexp_parser (>= 2.9.3, < 3.0) - rubocop-ast (>= 1.46.0, < 2.0) + rubocop-ast (>= 1.47.1, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 2.4.0, < 4.0) - rubocop-ast (1.47.1) + rubocop-ast (1.49.0) parser (>= 3.3.7.2) - prism (~> 1.4) - rubocop-performance (1.25.0) + prism (~> 1.7) + rubocop-performance (1.26.1) lint_roller (~> 1.1) rubocop (>= 1.75.0, < 2.0) - rubocop-ast (>= 1.38.0, < 2.0) + rubocop-ast (>= 1.47.1, < 2.0) ruby-progressbar (1.13.0) simplecov (0.22.0) docile (~> 1.1) @@ -105,36 +97,30 @@ GEM simplecov_json_formatter (~> 0.1) simplecov-html (0.13.2) simplecov_json_formatter (0.1.4) - standard (1.51.1) + standard (1.52.0) language_server-protocol (~> 3.17.0.2) lint_roller (~> 1.0) - rubocop (~> 1.80.2) + rubocop (~> 1.81.7) standard-custom (~> 1.0.0) standard-performance (~> 1.8) standard-custom (1.0.2) lint_roller (~> 1.0) rubocop (~> 1.50) - standard-performance (1.8.0) + standard-performance (1.9.0) lint_roller (~> 1.1) - rubocop-performance (~> 1.25.0) + rubocop-performance (~> 1.26.0) thor (1.4.0) unicode-display_width (3.2.0) unicode-emoji (~> 4.1) - unicode-emoji (4.1.0) + unicode-emoji (4.2.0) vcr (6.4.0) - webmock (3.25.1) + webmock (3.26.1) addressable (>= 2.8.0) crack (>= 0.3.2) hashdiff (>= 0.4.0, < 2.0.0) - yard (0.9.37) + yard (0.9.38) PLATFORMS - aarch64-linux - arm64-darwin-23 - arm64-darwin-24 - arm64-darwin-25 - x64-mingw - x64-mingw-ucrt x86_64-linux DEPENDENCIES @@ -143,6 +129,7 @@ DEPENDENCIES kramdown (~> 2.0) minitest (~> 5.0) minitest-reporters (~> 1.6) + minitest-stub-const (~> 0.6) rake (~> 13.0) simplecov (~> 0.22) standard (~> 1.0) diff --git a/gemfiles/anthropic.gemfile b/gemfiles/anthropic.gemfile index ca1632c..56e98f0 100644 --- a/gemfiles/anthropic.gemfile +++ b/gemfiles/anthropic.gemfile @@ -3,6 +3,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" gem "anthropic", ">= 1.11" gemspec path: "../" diff --git a/gemfiles/anthropic_1.11.gemfile b/gemfiles/anthropic_1.11.gemfile deleted file mode 100644 index dc96e4f..0000000 --- a/gemfiles/anthropic_1.11.gemfile +++ /dev/null @@ -1,14 +0,0 @@ -# This file was generated by Appraisal - -source "https://rubygems.org" - -gem "rake", "~> 13.0" -gem "minitest", "~> 5.0" -gem "minitest-reporters", "~> 1.6" -gem "standard", "~> 1.0" -gem "simplecov", "~> 0.22" -gem "vcr", "~> 6.0" -gem "webmock", "~> 3.0" -gem "anthropic", "~> 1.11.0" - -gemspec path: "../" diff --git a/gemfiles/anthropic_1.12.gemfile b/gemfiles/anthropic_1.12.gemfile deleted file mode 100644 index b662fe7..0000000 --- a/gemfiles/anthropic_1.12.gemfile +++ /dev/null @@ -1,14 +0,0 @@ -# This file was generated by Appraisal - -source "https://rubygems.org" - -gem "rake", "~> 13.0" -gem "minitest", "~> 5.0" -gem "minitest-reporters", "~> 1.6" -gem "standard", "~> 1.0" -gem "simplecov", "~> 0.22" -gem "vcr", "~> 6.0" -gem "webmock", "~> 3.0" -gem "anthropic", "~> 1.12.0" - -gemspec path: "../" diff --git a/gemfiles/anthropic_1_11.gemfile b/gemfiles/anthropic_1_11.gemfile index 6975b97..ed708b3 100644 --- a/gemfiles/anthropic_1_11.gemfile +++ b/gemfiles/anthropic_1_11.gemfile @@ -3,6 +3,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" gem "anthropic", "~> 1.11.0" gemspec path: "../" diff --git a/gemfiles/anthropic_1_12.gemfile b/gemfiles/anthropic_1_12.gemfile index 67e0b47..29b1b93 100644 --- a/gemfiles/anthropic_1_12.gemfile +++ b/gemfiles/anthropic_1_12.gemfile @@ -3,6 +3,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" gem "anthropic", "~> 1.12.0" gemspec path: "../" diff --git a/gemfiles/anthropic_uninstalled.gemfile b/gemfiles/anthropic_uninstalled.gemfile index 41d4801..7ec6aba 100644 --- a/gemfiles/anthropic_uninstalled.gemfile +++ b/gemfiles/anthropic_uninstalled.gemfile @@ -3,5 +3,6 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" gemspec path: "../" diff --git a/gemfiles/openai.gemfile b/gemfiles/openai.gemfile index edfe9d3..dcc2e81 100644 --- a/gemfiles/openai.gemfile +++ b/gemfiles/openai.gemfile @@ -3,6 +3,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" gem "openai", ">= 0.34" gem "base64" diff --git a/gemfiles/openai_0.33.gemfile b/gemfiles/openai_0.33.gemfile deleted file mode 100644 index 1b8d2d2..0000000 --- a/gemfiles/openai_0.33.gemfile +++ /dev/null @@ -1,14 +0,0 @@ -# This file was generated by Appraisal - -source "https://rubygems.org" - -gem "rake", "~> 13.0" -gem "minitest", "~> 5.0" -gem "minitest-reporters", "~> 1.6" -gem "standard", "~> 1.0" -gem "simplecov", "~> 0.22" -gem "vcr", "~> 6.0" -gem "webmock", "~> 3.0" -gem "openai", "~> 0.33.0" - -gemspec path: "../" diff --git a/gemfiles/openai_0.34.gemfile b/gemfiles/openai_0.34.gemfile deleted file mode 100644 index 24b39dc..0000000 --- a/gemfiles/openai_0.34.gemfile +++ /dev/null @@ -1,14 +0,0 @@ -# This file was generated by Appraisal - -source "https://rubygems.org" - -gem "rake", "~> 13.0" -gem "minitest", "~> 5.0" -gem "minitest-reporters", "~> 1.6" -gem "standard", "~> 1.0" -gem "simplecov", "~> 0.22" -gem "vcr", "~> 6.0" -gem "webmock", "~> 3.0" -gem "openai", "~> 0.34.0" - -gemspec path: "../" diff --git a/gemfiles/openai_0_33.gemfile b/gemfiles/openai_0_33.gemfile index a9eff3e..6104332 100644 --- a/gemfiles/openai_0_33.gemfile +++ b/gemfiles/openai_0_33.gemfile @@ -3,6 +3,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" gem "openai", "~> 0.33.0" gem "base64" diff --git a/gemfiles/openai_0_34.gemfile b/gemfiles/openai_0_34.gemfile index 2e2d6ab..d8969d3 100644 --- a/gemfiles/openai_0_34.gemfile +++ b/gemfiles/openai_0_34.gemfile @@ -3,6 +3,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" gem "openai", "~> 0.34.0" gem "base64" diff --git a/gemfiles/openai_uninstalled.gemfile b/gemfiles/openai_uninstalled.gemfile index 41d4801..7ec6aba 100644 --- a/gemfiles/openai_uninstalled.gemfile +++ b/gemfiles/openai_uninstalled.gemfile @@ -3,5 +3,6 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" gemspec path: "../" diff --git a/gemfiles/opentelemetry_latest.gemfile b/gemfiles/opentelemetry_latest.gemfile index 9ac76ed..d309a6b 100644 --- a/gemfiles/opentelemetry_latest.gemfile +++ b/gemfiles/opentelemetry_latest.gemfile @@ -3,6 +3,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" gem "opentelemetry-sdk", ">= 1.10" gem "opentelemetry-exporter-otlp", ">= 0.31" diff --git a/gemfiles/opentelemetry_min.gemfile b/gemfiles/opentelemetry_min.gemfile index 13a4b77..8edfd5d 100644 --- a/gemfiles/opentelemetry_min.gemfile +++ b/gemfiles/opentelemetry_min.gemfile @@ -3,6 +3,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" gem "opentelemetry-sdk", "~> 1.3.0" gem "opentelemetry-exporter-otlp", "~> 0.28.0" diff --git a/gemfiles/ruby_llm.gemfile b/gemfiles/ruby_llm.gemfile index 0199dc1..6f2c658 100644 --- a/gemfiles/ruby_llm.gemfile +++ b/gemfiles/ruby_llm.gemfile @@ -3,6 +3,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" gem "ruby_llm", ">= 1.9" gemspec path: "../" diff --git a/gemfiles/ruby_llm_1_8.gemfile b/gemfiles/ruby_llm_1_8.gemfile index 1c1e3eb..5a5a016 100644 --- a/gemfiles/ruby_llm_1_8.gemfile +++ b/gemfiles/ruby_llm_1_8.gemfile @@ -3,6 +3,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" gem "ruby_llm", "~> 1.8.0" gemspec path: "../" diff --git a/gemfiles/ruby_llm_1_9.gemfile b/gemfiles/ruby_llm_1_9.gemfile index f3bd0c2..deee1d0 100644 --- a/gemfiles/ruby_llm_1_9.gemfile +++ b/gemfiles/ruby_llm_1_9.gemfile @@ -3,6 +3,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" gem "ruby_llm", "~> 1.9.0" gemspec path: "../" diff --git a/gemfiles/ruby_llm_uninstalled.gemfile b/gemfiles/ruby_llm_uninstalled.gemfile index 41d4801..7ec6aba 100644 --- a/gemfiles/ruby_llm_uninstalled.gemfile +++ b/gemfiles/ruby_llm_uninstalled.gemfile @@ -3,5 +3,6 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" gemspec path: "../" diff --git a/gemfiles/ruby_openai.gemfile b/gemfiles/ruby_openai.gemfile index d5aabc3..29caf96 100644 --- a/gemfiles/ruby_openai.gemfile +++ b/gemfiles/ruby_openai.gemfile @@ -3,6 +3,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" gem "ruby-openai", ">= 8.0" gemspec path: "../" diff --git a/gemfiles/ruby_openai_7_0.gemfile b/gemfiles/ruby_openai_7_0.gemfile index 6a30459..4c9e319 100644 --- a/gemfiles/ruby_openai_7_0.gemfile +++ b/gemfiles/ruby_openai_7_0.gemfile @@ -3,6 +3,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" gem "ruby-openai", "~> 7.0" gemspec path: "../" diff --git a/gemfiles/ruby_openai_8_0.gemfile b/gemfiles/ruby_openai_8_0.gemfile index e9c161e..3044fea 100644 --- a/gemfiles/ruby_openai_8_0.gemfile +++ b/gemfiles/ruby_openai_8_0.gemfile @@ -3,6 +3,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" gem "ruby-openai", "~> 8.0" gemspec path: "../" diff --git a/gemfiles/ruby_openai_uninstalled.gemfile b/gemfiles/ruby_openai_uninstalled.gemfile index 41d4801..7ec6aba 100644 --- a/gemfiles/ruby_openai_uninstalled.gemfile +++ b/gemfiles/ruby_openai_uninstalled.gemfile @@ -3,5 +3,6 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" gemspec path: "../" diff --git a/test/test_helper.rb b/test/test_helper.rb index ee2c7f2..45d085b 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -17,6 +17,7 @@ require "braintrust" require "minitest/autorun" +require "minitest/stub_const" # Show test timings when MT_VERBOSE is set if ENV["MT_VERBOSE"] From 28b1209580a5753a56e2c635fdf8e8d2cce9ecf4 Mon Sep 17 00:00:00 2001 From: David Elner Date: Thu, 25 Dec 2025 14:24:06 -0500 Subject: [PATCH 02/27] Added: mock_chain to test_helper --- test/support/assert_helper.rb | 41 +++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/test/support/assert_helper.rb b/test/support/assert_helper.rb index 8d1d725..7b943cb 100644 --- a/test/support/assert_helper.rb +++ b/test/support/assert_helper.rb @@ -1,6 +1,47 @@ module Test module Support module AssertHelper + # Build a chain of mocks for nested method calls. + # Useful for testing code that traverses object hierarchies like `obj.foo.bar.baz`. + # + # Creates intermediate mocks that return each other in sequence, with the final + # mock returning the specified terminal value. All intermediate mocks are verified + # after the block. The caller is responsible for verifying the terminal value if needed. + # + # @param methods [Array] chain of method names to mock + # @param returns [Object] the value returned by the final method (can be a Class, Mock, or any object) + # @yield [root] the root mock (entry point to the chain) + # + # @example Testing a method chain with a Class as terminal + # fake_singleton = Class.new { include SomeModule } + # mock_chain(:chat, :completions, :singleton_class, returns: fake_singleton) do |client| + # assert SomePatcher.patched?(target: client) + # end + # + # @example Testing a method chain with a Mock as terminal + # terminal = Minitest::Mock.new + # terminal.expect(:include, true, [SomeModule]) + # mock_chain(:foo, :bar, returns: terminal) do |root| + # SomeCode.do_something(root) + # end + # terminal.verify + # + def mock_chain(*methods, returns:) + current = returns + mocks = [] + + methods.reverse_each do |method| + mock = Minitest::Mock.new + mock.expect(method, current) + mocks.unshift(mock) + current = mock + end + + yield(mocks.first) + + mocks.each(&:verify) + end + # Runs the test inside a fork, to isolate its side-effects from the main process. # Similar in purpose to https://docs.ruby-lang.org/en/master/Ruby/Box.html#class-Ruby::Box # From cebd618c92fbddd457015407c9e0120bd89844af Mon Sep 17 00:00:00 2001 From: David Elner Date: Mon, 15 Dec 2025 11:41:19 -0500 Subject: [PATCH 03/27] Added: Integration framework API --- CONTRIBUTING.md | 82 ++- lib/braintrust.rb | 1 + lib/braintrust/contrib.rb | 102 ++++ lib/braintrust/contrib/context.rb | 56 ++ lib/braintrust/contrib/integration.rb | 143 +++++ lib/braintrust/contrib/patcher.rb | 76 +++ lib/braintrust/contrib/registry.rb | 92 ++++ lib/braintrust/state.rb | 5 + test/braintrust/contrib/context_test.rb | 93 ++++ test/braintrust/contrib/integration_test.rb | 568 ++++++++++++++++++++ test/braintrust/contrib/patcher_test.rb | 281 ++++++++++ test/braintrust/contrib/registry_test.rb | 241 +++++++++ test/braintrust/contrib_test.rb | 200 +++++++ test/support/assert_helper.rb | 41 -- test/support/log_helper.rb | 22 + test/support/mock_helper.rb | 46 ++ test/test_helper.rb | 4 + 17 files changed, 2009 insertions(+), 44 deletions(-) create mode 100644 lib/braintrust/contrib.rb create mode 100644 lib/braintrust/contrib/context.rb create mode 100644 lib/braintrust/contrib/integration.rb create mode 100644 lib/braintrust/contrib/patcher.rb create mode 100644 lib/braintrust/contrib/registry.rb create mode 100644 test/braintrust/contrib/context_test.rb create mode 100644 test/braintrust/contrib/integration_test.rb create mode 100644 test/braintrust/contrib/patcher_test.rb create mode 100644 test/braintrust/contrib/registry_test.rb create mode 100644 test/braintrust/contrib_test.rb create mode 100644 test/support/log_helper.rb create mode 100644 test/support/mock_helper.rb diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c1301c3..f4bea9a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,9 +2,18 @@ Thanks for contributing! Follow TDD practices: write tests first, implement minimal code, refactor. +- [Quick Setup](#quick-setup) + - [Local development (on host)](#local-development-on-host) + - [Local development (in Docker)](#local-development-in-docker) +- [Development](#development) + - [Running tasks](#running-tasks) + - [Adding new integrations for AI Libraries](#adding-new-integrations-for-ai-libraries) +- [Testing](#testing) + - [With different Ruby versions](#with-different-ruby-versions) + ## Quick Setup -### Option 1: Local Development +### Local development (on host) ```bash # Clone repo @@ -31,7 +40,7 @@ cp .env.example .env rake ``` -### Option 2: Docker Container +### Local development (in Docker) Use the dev container if you prefer not to install Ruby tooling on your host machine. Your IDE edits files on the host; all tools run in the container. @@ -62,12 +71,78 @@ rake ## Development +### Running tasks + All of our common dev tasks are in rake. ```bash rake -T ``` +### Adding new integrations for AI Libraries + +To add instrumentation support for a new AI library, follow these steps: + +#### 1. Define the Integration + +Create a new file in `lib/braintrust/contrib/`: + +```ruby +# lib/braintrust/contrib/trustybrain_llm.rb +module Braintrust::Contrib + class TrustybrainLLM + include Integration + + def self.integration_name + :trustybrain_llm + end + + def self.gem_names + ["trustybrain_llm"] + end + + def self.patcher + TrustybrainLLMPatcher + end + end + + class TrustybrainLLMPatcher < Patcher + def self.perform_patch(context) + # Add your instrumentation here + # context.tracer_provider gives you access to the tracer + end + end +end +``` + +#### 2. Register It + +Add to `lib/braintrust/contrib.rb`: + +```ruby +require_relative "contrib/trustybrain_llm" + +# At the bottom: +Contrib::TrustybrainLLM.register! +``` + +#### 3. Write Tests + +Create `test/braintrust/contrib/trustybrain_llm_test.rb`: + +```ruby +require "test_helper" + +class Braintrust::Contrib::TrustybrainLLMTest < Minitest::Test + def test_integration_basics + assert_equal :trustybrain_llm, TrustybrainLLM.integration_name + assert_equal ["trustybrain_llm"], TrustybrainLLM.gem_names + end +end +``` + +See existing tests in `test/braintrust/contrib/` for complete examples of testing integrations, patchers, and the registry. + ## Testing We use VCR for making http tests fast. You can run tests with these enabled, @@ -78,7 +153,7 @@ for more details. rake -T test:vcr ``` -### Testing with Different Ruby Versions +### With different Ruby versions CI tests against Ruby 3.2, 3.3, and 3.4. To test locally with a different Ruby version: @@ -102,3 +177,4 @@ mise use ruby@3.4 ruby --version # => 3.4.x bundle exec rake test ``` + diff --git a/lib/braintrust.rb b/lib/braintrust.rb index 469cde1..979651e 100644 --- a/lib/braintrust.rb +++ b/lib/braintrust.rb @@ -7,6 +7,7 @@ require_relative "braintrust/api" require_relative "braintrust/internal/experiments" require_relative "braintrust/eval" +require_relative "braintrust/contrib" # Braintrust Ruby SDK # diff --git a/lib/braintrust/contrib.rb b/lib/braintrust/contrib.rb new file mode 100644 index 0000000..f19c4fb --- /dev/null +++ b/lib/braintrust/contrib.rb @@ -0,0 +1,102 @@ +# frozen_string_literal: true + +require_relative "contrib/registry" +require_relative "contrib/integration" +require_relative "contrib/patcher" +require_relative "contrib/context" + +module Braintrust + # Instrument a registered integration by name. + # This is the main entry point for activating integrations. + # + # @param name [Symbol] The integration name (e.g., :openai, :anthropic) + # @param options [Hash] Optional configuration + # @option options [Object] :target Optional target instance to instrument specifically + # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider + # @return [void] + # + # @example Instrument all OpenAI clients + # Braintrust.instrument!(:openai) + # + # @example Instrument specific OpenAI client instance + # client = OpenAI::Client.new + # Braintrust.instrument!(:openai, target: client, tracer_provider: my_provider) + def self.instrument!(name, **options) + Braintrust::Contrib.instrument!(name, **options) + end + + # Contrib framework for auto-instrumentation integrations. + # Provides a consistent interface for all integrations and enables + # reliable auto-instrumentation in later milestones. + module Contrib + class << self + # Get the global registry instance. + # @return [Registry] + def registry + Registry.instance + end + + # Initialize the contrib framework with optional configuration. + # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider, nil] Optional tracer provider + # @return [void] + def init(tracer_provider: nil) + @default_tracer_provider = tracer_provider + end + + # Instrument a registered integration by name. + # This is the main entry point for activating integrations. + # + # @param name [Symbol] The integration name (e.g., :openai, :anthropic) + # @param options [Hash] Optional configuration + # @option options [Object] :target Optional target instance to instrument specifically + # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider + # @return [void] + # + # @example Instrument all OpenAI clients + # Braintrust::Contrib.instrument!(:openai) + # + # @example Instrument specific OpenAI client instance + # client = OpenAI::Client.new + # Braintrust::Contrib.instrument!(:openai, target: client, tracer_provider: my_provider) + def instrument!(name, **options) + if (integration = registry[name]) + integration.instrument!(**options) + else + Braintrust::Log.error("No integration for '#{name}' is defined!") + end + end + + # Get the default tracer provider, falling back to OpenTelemetry global. + # @return [OpenTelemetry::Trace::TracerProvider] + def default_tracer_provider + @default_tracer_provider || ::OpenTelemetry.tracer_provider + end + + # Get the context for a target object. + # @param target [Object] The object to retrieve context from + # @return [Context, nil] The context if found, nil otherwise + def context_for(target) + Context.from(target) + end + + # Get the tracer provider for a target. + # Checks target's context first, then falls back to contrib default. + # @param target [Object] The object to look up tracer provider for + # @return [OpenTelemetry::Trace::TracerProvider] + def tracer_provider_for(target) + context_for(target)&.[](:tracer_provider) || default_tracer_provider + end + + # Get a tracer for a target, using its context's tracer_provider if available. + # @param target [Object] The object to look up context from + # @param name [String] Tracer name + # @return [OpenTelemetry::Trace::Tracer] + def tracer_for(target, name: "braintrust") + tracer_provider_for(target).tracer(name) + end + end + end +end + +# Load integration stubs (eager load minimal metadata). +# These will be added in subsequent milestones. diff --git a/lib/braintrust/contrib/context.rb b/lib/braintrust/contrib/context.rb new file mode 100644 index 0000000..229af0c --- /dev/null +++ b/lib/braintrust/contrib/context.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + # Per-instance or per-class configuration context. + # Allows attaching generic configuration to specific objects or classes. + class Context + # Set or update context on a target object. + # Creates a new context if one doesn't exist, or updates existing context. + # @param target [Object] The object to attach context to + # @param options [Hash] Configuration options to store + # @return [Context, nil] The existing context if updated, nil if created new or options empty + def self.set!(target, **options) + return nil if options.empty? + + if (ctx = from(target)) + # Update existing context + options.each { |k, v| ctx[k] = v } + else + # Create and attach new context + target.instance_variable_set(:@braintrust_context, new(**options)) + end + + ctx + end + + # Retrieve context from a target. + # @param target [Object] The object to retrieve context from + # @return [Context, nil] The context if found, nil otherwise + def self.from(target) + target&.instance_variable_get(:@braintrust_context) + end + + # @param options [Hash] Configuration options + def initialize(**options) + @options = options + end + + def [](key) + @options[key] + end + + def []=(key, value) + @options[key] = value + end + + # Get an option value with a default fallback. + # @param key [Symbol, String] The option key + # @param default [Object] The default value if key not found + # @return [Object] The option value, or default if not found + def fetch(key, default) + @options.fetch(key, default) + end + end + end +end diff --git a/lib/braintrust/contrib/integration.rb b/lib/braintrust/contrib/integration.rb new file mode 100644 index 0000000..1791f6f --- /dev/null +++ b/lib/braintrust/contrib/integration.rb @@ -0,0 +1,143 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + # Base module defining the integration contract. + # Include this module in integration classes to define the schema. + # Delegates actual patching to a Patcher subclass. + module Integration + def self.included(base) + base.extend(ClassMethods) + end + + module ClassMethods + # Unique symbol name for this integration (e.g., :openai, :anthropic). + # @return [Symbol] + def integration_name + raise NotImplementedError, "#{self} must implement integration_name" + end + + # Array of gem names this integration supports. + # @return [Array] + def gem_names + raise NotImplementedError, "#{self} must implement gem_names" + end + + # Require paths for auto-instrument detection. + # Default implementation returns gem_names. + # @return [Array] + def require_paths + gem_names + end + + # Is the target library available for loading? + # @return [Boolean] + def available? + gem_names.any? { |name| Gem.loaded_specs.key?(name) } + end + + # Is the target library loaded? + # @return [Boolean] + def loaded? + raise NotImplementedError, "#{self} must implement loaded?" + end + + # Minimum compatible version (optional, inclusive). + # @return [String, nil] + def minimum_version + nil + end + + # Maximum compatible version (optional, inclusive). + # @return [String, nil] + def maximum_version + nil + end + + # Is the library version compatible? + # @return [Boolean] + def compatible? + return false unless available? + + gem_names.each do |name| + spec = Gem.loaded_specs[name] + next unless spec + + version = spec.version + return false if minimum_version && version < Gem::Version.new(minimum_version) + return false if maximum_version && version > Gem::Version.new(maximum_version) + return true + end + false + end + + # Array of patcher classes for this integration. + # Override to return multiple patchers for version-specific logic. + # @return [Array] Array of patcher classes + def patchers + [patcher] # Default: single patcher + end + + # Convenience method for single patcher (existing pattern). + # Override this OR patchers (not both). + # @return [Class] The patcher class + def patcher + raise NotImplementedError, "#{self} must implement patcher or patchers" + end + + # Instrument this integration with optional configuration. + # If a target is provided, configures the target instance specifically. + # Otherwise, applies class-level instrumentation to all instances. + # + # @param options [Hash] Configuration options + # @option options [Object] :target Optional target instance to instrument + # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider + # @return [Boolean] true if patching succeeded or was already done + # + # @example Class-level instrumentation (all clients) + # integration.instrument!(tracer_provider: my_provider) + # + # @example Instance-level instrumentation (specific client) + # integration.instrument!(target: client, tracer_provider: my_provider) + def instrument!(**options) + if options[:target] + # Configure the target with provided options (exclude :target from context) + context_options = options.except(:target) + Contrib::Context.set!(options[:target], **context_options) unless context_options.empty? + end + + patch!(**options) + end + + # Apply instrumentation (idempotent). Tries all applicable patchers. + # This method is typically called by instrument! after configuration. + # + # @param options [Hash] Configuration options + # @option options [Object] :target Optional target instance to patch + # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider + # @return [Boolean] true if any patching succeeded or was already done + def patch!(**options) + return false unless available? && loaded? && compatible? + + # Try all applicable patchers + success = false + patchers.each do |patch| + # Check if this patcher is applicable + next unless patch.applicable? + + # Attempt to patch (patcher checks applicable? again under lock) + success = true if patch.patch!(**options) + end + + Braintrust::Log.debug("No applicable patcher found for #{integration_name}") unless success + success + end + + # Register this integration with the global registry. + def register! + Registry.instance.register(self) + end + end + end + end +end diff --git a/lib/braintrust/contrib/patcher.rb b/lib/braintrust/contrib/patcher.rb new file mode 100644 index 0000000..6de99ad --- /dev/null +++ b/lib/braintrust/contrib/patcher.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + # Base class for all patchers. + # Provides thread-safe, idempotent patching with error handling. + class Patcher + # For thread-safety, a mutex is used to wrap patching. + # Each instance of Patcher should have its own copy of the mutex., + # allowing each Patcher to work in parallel while protecting the + # critical patch section. + @patch_mutex = Mutex.new + + def self.inherited(subclass) + subclass.instance_variable_set(:@patch_mutex, Mutex.new) + end + + class << self + # Has this patcher already been applied? + # @return [Boolean] + def patched?(**options) + @patched == true + end + + # Override in subclasses to check if patcher should apply. + # Called after patcher loads but before perform_patch. + # @return [Boolean] true if this patcher should be applied + def applicable? + true # Default: always applicable + end + + # Apply the patch (thread-safe and idempotent). + # @param options [Hash] Configuration options passed from integration + # @option options [Object] :target Optional target instance to patch + # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider + # @return [Boolean] true if patching succeeded or was already done + def patch!(**options) + return false unless applicable? + return true if patched?(**options) # Fast path + + @patch_mutex.synchronize do + unless applicable? + Braintrust::Log.debug("Skipping #{name} - not applicable") + return false + end + return true if patched?(**options) # Double-check under lock + + perform_patch(**options) + @patched = true + end + Braintrust::Log.debug("Patched #{name}") + true + rescue => e + Braintrust::Log.error("Failed to patch #{name}: #{e.message}") + false + end + + # Subclasses implement this to perform the actual patching. + # This method is called under lock after applicable? returns true. + # + # @param options [Hash] Configuration options passed from integration + # @option options [Object] :target Optional target instance to patch + # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider + # @return [void] + def perform_patch(**options) + raise NotImplementedError, "#{self} must implement perform_patch" + end + + # Reset patched state (primarily for testing). + def reset! + @patched = false + end + end + end + end +end diff --git a/lib/braintrust/contrib/registry.rb b/lib/braintrust/contrib/registry.rb new file mode 100644 index 0000000..77a3d67 --- /dev/null +++ b/lib/braintrust/contrib/registry.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +require "singleton" + +module Braintrust + module Contrib + # Thread-safe singleton registry for integrations. + # Provides registration, lookup, and require-path mapping for auto-instrumentation. + class Registry + include Singleton + + def initialize + @integrations = {} + @require_path_map = nil # Lazy cache + @mutex = Mutex.new + end + + # Register an integration class with the registry. + # @param integration_class [Class] The integration class to register + def register(integration_class) + @mutex.synchronize do + @integrations[integration_class.integration_name] = integration_class + @require_path_map = nil # Invalidate cache + end + end + + # Look up an integration by name. + # @param name [Symbol, String] The integration name + # @return [Class, nil] The integration class, or nil if not found + def [](name) + @integrations[name.to_sym] + end + + # Get all registered integrations. + # @return [Array] All registered integration classes + def all + @integrations.values + end + + # Get all available integrations (target library is loaded). + # @return [Array] Available integration classes + def available + @integrations.values.select(&:available?) + end + + # Iterate over all registered integrations. + # @yield [Class] Each registered integration class + def each(&block) + @integrations.values.each(&block) + end + + # Returns integrations associated with a require path. + # Thread-safe with double-checked locking for performance. + # @param path [String] The require path (e.g., "openai", "anthropic") + # @return [Array] Integrations matching the require path + def integrations_for_require_path(path) + map = @require_path_map + if map.nil? + map = @mutex.synchronize do + @require_path_map ||= build_require_path_map + end + end + basename = File.basename(path.to_s, ".rb") + map.fetch(basename, EMPTY_ARRAY) + end + + # Clear all registrations (primarily for testing). + def clear! + @mutex.synchronize do + @integrations.clear + @require_path_map = nil + end + end + + private + + EMPTY_ARRAY = [].freeze + + def build_require_path_map + map = {} + @integrations.each_value do |integration| + integration.require_paths.each do |req| + map[req] ||= [] + map[req] << integration + end + end + map.each_value(&:freeze) + map.freeze + end + end + end +end diff --git a/lib/braintrust/state.rb b/lib/braintrust/state.rb index 3d6d154..6f20d69 100644 --- a/lib/braintrust/state.rb +++ b/lib/braintrust/state.rb @@ -93,6 +93,11 @@ def initialize(api_key: nil, org_name: nil, org_id: nil, default_project: nil, a if enable_tracing require_relative "trace" Trace.setup(self, tracer_provider, exporter: exporter) + + # Propagate tracer_provider to Contrib if loaded (soft dependency check) + if defined?(Braintrust::Contrib) + Braintrust::Contrib.init(tracer_provider: tracer_provider) + end end end diff --git a/test/braintrust/contrib/context_test.rb b/test/braintrust/contrib/context_test.rb new file mode 100644 index 0000000..c4d147d --- /dev/null +++ b/test/braintrust/contrib/context_test.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +require "test_helper" + +class Braintrust::Contrib::ContextTest < Minitest::Test + def setup + @target = Object.new + end + + def test_from_returns_nil_for_nil_target + assert_nil Braintrust::Contrib::Context.from(nil) + end + + def test_from_returns_nil_when_no_context_set + assert_nil Braintrust::Contrib::Context.from(@target) + end + + def test_from_returns_context_when_set + Braintrust::Contrib::Context.set!(@target, foo: "bar") + + ctx = Braintrust::Contrib::Context.from(@target) + + assert_instance_of Braintrust::Contrib::Context, ctx + end + + def test_set_creates_context_with_options + Braintrust::Contrib::Context.set!(@target, foo: "bar", baz: 123) + + ctx = Braintrust::Contrib::Context.from(@target) + + assert_equal "bar", ctx[:foo] + assert_equal 123, ctx[:baz] + end + + def test_set_returns_nil_when_creating_new_context + result = Braintrust::Contrib::Context.set!(@target, foo: "bar") + + assert_nil result + end + + def test_set_returns_nil_with_empty_options + result = Braintrust::Contrib::Context.set!(@target) + + assert_nil result + assert_nil Braintrust::Contrib::Context.from(@target) + end + + def test_set_updates_existing_context + Braintrust::Contrib::Context.set!(@target, foo: "bar") + Braintrust::Contrib::Context.set!(@target, baz: 123) + + ctx = Braintrust::Contrib::Context.from(@target) + + assert_equal "bar", ctx[:foo] + assert_equal 123, ctx[:baz] + end + + def test_set_overwrites_existing_keys + Braintrust::Contrib::Context.set!(@target, foo: "bar") + Braintrust::Contrib::Context.set!(@target, foo: "updated") + + ctx = Braintrust::Contrib::Context.from(@target) + + assert_equal "updated", ctx[:foo] + end + + def test_context_bracket_access + ctx = Braintrust::Contrib::Context.new(foo: "bar") + + assert_equal "bar", ctx[:foo] + assert_nil ctx[:missing] + end + + def test_context_bracket_assignment + ctx = Braintrust::Contrib::Context.new + + ctx[:foo] = "bar" + + assert_equal "bar", ctx[:foo] + end + + def test_context_fetch_returns_value + ctx = Braintrust::Contrib::Context.new(foo: "bar") + + assert_equal "bar", ctx.fetch(:foo, "default") + end + + def test_context_fetch_returns_default_when_missing + ctx = Braintrust::Contrib::Context.new + + assert_equal "default", ctx.fetch(:missing, "default") + end +end diff --git a/test/braintrust/contrib/integration_test.rb b/test/braintrust/contrib/integration_test.rb new file mode 100644 index 0000000..40524d6 --- /dev/null +++ b/test/braintrust/contrib/integration_test.rb @@ -0,0 +1,568 @@ +# frozen_string_literal: true + +require "test_helper" + +class Braintrust::Contrib::IntegrationTest < Minitest::Test + def setup + # Use anonymous subclass to isolate test state + registry_class = Class.new(Braintrust::Contrib::Registry) + @registry = registry_class.instance + end + + # Create a mock patcher class for testing + def create_mock_patcher(should_fail: false, applicable: true) + patcher = Class.new(Braintrust::Contrib::Patcher) do + class << self + attr_accessor :patch_called, :should_fail, :is_applicable + end + + def self.applicable? + @is_applicable + end + + def self.perform_patch(**options) + @patch_called = true + raise "Patch failed" if @should_fail + end + end + + patcher.patch_called = false + patcher.should_fail = should_fail + patcher.is_applicable = applicable + patcher + end + + # Create a full integration class for testing + def create_test_integration( + name:, + gem_names:, + require_paths: nil, + patcher: nil, + min_version: nil, + max_version: nil, + loaded: true + ) + patcher_class = patcher || create_mock_patcher + + integration = Class.new do + include Braintrust::Contrib::Integration + + class << self + attr_accessor :_integration_name, :_gem_names, :_require_paths, + :_patcher, :_min_version, :_max_version, :_loaded + end + + def self.integration_name + _integration_name + end + + def self.gem_names + _gem_names + end + + def self.require_paths + _require_paths || _gem_names + end + + def self.minimum_version + _min_version + end + + def self.maximum_version + _max_version + end + + def self.loaded? + _loaded + end + + def self.patcher + _patcher + end + end + + integration._integration_name = name + integration._gem_names = gem_names + integration._require_paths = require_paths + integration._patcher = patcher_class + integration._min_version = min_version + integration._max_version = max_version + integration._loaded = loaded + integration + end + + def test_integration_name_raises_not_implemented + integration = Class.new { include Braintrust::Contrib::Integration } + + assert_raises(NotImplementedError) do + integration.integration_name + end + end + + def test_gem_names_raises_not_implemented + integration = Class.new { include Braintrust::Contrib::Integration } + + assert_raises(NotImplementedError) do + integration.gem_names + end + end + + def test_patcher_raises_not_implemented + integration = Class.new { include Braintrust::Contrib::Integration } + + assert_raises(NotImplementedError) do + integration.patcher + end + end + + def test_loaded_raises_not_implemented + integration = Class.new { include Braintrust::Contrib::Integration } + + assert_raises(NotImplementedError) do + integration.loaded? + end + end + + def test_patchers_defaults_to_wrapping_patcher + patcher = create_mock_patcher + integration = create_test_integration( + name: :test, + gem_names: ["minitest"], + patcher: patcher + ) + + assert_equal [patcher], integration.patchers + end + + def test_require_paths_defaults_to_gem_names + integration = create_test_integration( + name: :test, + gem_names: ["test-gem", "other-gem"] + ) + + assert_equal ["test-gem", "other-gem"], integration.require_paths + end + + def test_require_paths_can_be_overridden + integration = create_test_integration( + name: :test, + gem_names: ["test-gem"], + require_paths: ["custom_path", "another_path"] + ) + + assert_equal ["custom_path", "another_path"], integration.require_paths + end + + def test_minimum_version_defaults_to_nil + integration = create_test_integration( + name: :test, + gem_names: ["test-gem"] + ) + + assert_nil integration.minimum_version + end + + def test_maximum_version_defaults_to_nil + integration = create_test_integration( + name: :test, + gem_names: ["test-gem"] + ) + + assert_nil integration.maximum_version + end + + def test_available_checks_gem_loaded_specs + # Use a gem that is actually loaded (minitest) + integration = create_test_integration( + name: :minitest_test, + gem_names: ["minitest"] + ) + + assert integration.available? + end + + def test_available_returns_false_for_unloaded_gem + integration = create_test_integration( + name: :test, + gem_names: ["nonexistent-gem-xyz-123"] + ) + + refute integration.available? + end + + def test_available_with_multiple_gems_any_loaded + # minitest is loaded, fake-gem is not + integration = create_test_integration( + name: :test, + gem_names: ["fake-gem-xyz", "minitest"] + ) + + assert integration.available? + end + + def test_compatible_returns_false_when_not_available + integration = create_test_integration( + name: :test, + gem_names: ["nonexistent-gem-xyz-123"] + ) + + refute integration.compatible? + end + + def test_compatible_returns_true_when_no_version_constraints + integration = create_test_integration( + name: :minitest_test, + gem_names: ["minitest"] + ) + + assert integration.compatible? + end + + def test_compatible_checks_minimum_version + # Get the current minitest version + Gem.loaded_specs["minitest"].version + + # Test with a minimum version below current + integration_ok = create_test_integration( + name: :minitest_test, + gem_names: ["minitest"], + min_version: "1.0.0" + ) + assert integration_ok.compatible? + + # Test with a minimum version above current + integration_too_new = create_test_integration( + name: :minitest_test, + gem_names: ["minitest"], + min_version: "999.0.0" + ) + refute integration_too_new.compatible? + end + + def test_compatible_checks_maximum_version + # Test with a maximum version above current + integration_ok = create_test_integration( + name: :minitest_test, + gem_names: ["minitest"], + max_version: "999.0.0" + ) + assert integration_ok.compatible? + + # Test with a maximum version below current + integration_too_old = create_test_integration( + name: :minitest_test, + gem_names: ["minitest"], + max_version: "0.0.1" + ) + refute integration_too_old.compatible? + end + + def test_patch_delegates_to_patcher + patcher = create_mock_patcher + integration = create_test_integration( + name: :test, + gem_names: ["minitest"], + patcher: patcher + ) + + result = integration.patch! + + assert result + assert patcher.patch_called + end + + def test_patch_returns_false_when_not_available + patcher = create_mock_patcher + integration = create_test_integration( + name: :test, + gem_names: ["nonexistent-gem-xyz-123"], + patcher: patcher + ) + + result = integration.patch! + + refute result + refute patcher.patch_called + end + + def test_patch_returns_false_when_not_compatible + patcher = create_mock_patcher + integration = create_test_integration( + name: :test, + gem_names: ["minitest"], + min_version: "999.0.0", # Too high + patcher: patcher + ) + + result = integration.patch! + + refute result + refute patcher.patch_called + end + + def test_patch_returns_false_when_not_loaded + patcher = create_mock_patcher + integration = create_test_integration( + name: :test, + gem_names: ["minitest"], + loaded: false, + patcher: patcher + ) + + result = integration.patch! + + refute result + refute patcher.patch_called + end + + def test_patch_passes_tracer_provider + received_options = nil + patcher = Class.new(Braintrust::Contrib::Patcher) do + class << self + attr_accessor :is_applicable + end + + def self.applicable? + @is_applicable + end + + define_singleton_method(:perform_patch) do |**options| + received_options = options + end + end + patcher.is_applicable = true + + integration = create_test_integration( + name: :test, + gem_names: ["minitest"], + patcher: patcher + ) + + tracer_provider = Object.new + integration.patch!(tracer_provider: tracer_provider) + + assert_equal tracer_provider, received_options[:tracer_provider] + end + + def test_instrument_passes_target_to_patcher + received_options = nil + patcher = Class.new(Braintrust::Contrib::Patcher) do + class << self + attr_accessor :is_applicable + end + + def self.applicable? + @is_applicable + end + + define_singleton_method(:perform_patch) do |**options| + received_options = options + end + end + patcher.is_applicable = true + + integration = create_test_integration( + name: :test, + gem_names: ["minitest"], + patcher: patcher + ) + + target = Object.new + tracer_provider = Object.new + integration.instrument!(target: target, tracer_provider: tracer_provider) + + # CRITICAL: target must be passed through to patcher for instance-level instrumentation + assert_equal target, received_options[:target], "target should be passed to patcher" + assert_equal tracer_provider, received_options[:tracer_provider] + end + + def test_instrument_sets_context_on_target + patcher = create_mock_patcher + integration = create_test_integration( + name: :test, + gem_names: ["minitest"], + patcher: patcher + ) + + target = Object.new + tracer_provider = Object.new + integration.instrument!(target: target, tracer_provider: tracer_provider) + + # Context should be set on the target + ctx = Braintrust::Contrib::Context.from(target) + assert_equal tracer_provider, ctx[:tracer_provider] + end + + def test_register_adds_to_registry + integration = create_test_integration( + name: :test_integration, + gem_names: ["test-gem"] + ) + + # Mock Registry.instance to verify register! calls it + mock_registry = Minitest::Mock.new + mock_registry.expect(:register, nil, [integration]) + + Braintrust::Contrib::Registry.stub(:instance, mock_registry) do + integration.register! + end + + mock_registry.verify + end + + def test_patchers_with_multiple_patchers + patcher1 = create_mock_patcher + patcher2 = create_mock_patcher + + integration = Class.new do + include Braintrust::Contrib::Integration + + class << self + attr_accessor :_patchers + end + + def self.integration_name + :test + end + + def self.gem_names + ["minitest"] + end + + def self.patchers + _patchers + end + end + + integration._patchers = [patcher1, patcher2] + + assert_equal [patcher1, patcher2], integration.patchers + end + + def test_patch_tries_all_applicable_patchers + # First patcher is not applicable + patcher1 = create_mock_patcher(applicable: false) + + # Second and third patchers are applicable - both should be tried + patcher2 = create_mock_patcher(applicable: true) + patcher3 = create_mock_patcher(applicable: true) + + integration = Class.new do + include Braintrust::Contrib::Integration + + class << self + attr_accessor :_patchers + end + + def self.integration_name + :test + end + + def self.gem_names + ["minitest"] + end + + def self.loaded? + true + end + + def self.patchers + _patchers + end + end + + integration._patchers = [patcher1, patcher2, patcher3] + + result = integration.patch! + + assert result + refute patcher1.patch_called # Not applicable + assert patcher2.patch_called # Applied + assert patcher3.patch_called # Also applied (doesn't stop after patcher2) + end + + def test_patch_skips_non_applicable_patchers + # Create patcher that is not applicable + non_applicable_patcher = create_mock_patcher(applicable: false) + + integration = Class.new do + include Braintrust::Contrib::Integration + + class << self + attr_accessor :_patcher + end + + def self.integration_name + :test + end + + def self.gem_names + ["minitest"] + end + + def self.loaded? + true + end + + def self.patchers + [_patcher] + end + end + + integration._patcher = non_applicable_patcher + + result = integration.patch! + + refute result + refute non_applicable_patcher.patch_called + end + + def test_patch_logs_when_no_applicable_patcher + non_applicable_patcher = create_mock_patcher(applicable: false) + + integration = Class.new do + include Braintrust::Contrib::Integration + + class << self + attr_accessor :_patcher + end + + def self.integration_name + :test + end + + def self.gem_names + ["minitest"] + end + + def self.loaded? + true + end + + def self.patchers + [_patcher] + end + end + + integration._patcher = non_applicable_patcher + + # Capture log output + captured_logs = [] + original_logger = Braintrust::Log.logger + test_logger = Logger.new(StringIO.new) + test_logger.level = Logger::DEBUG + test_logger.formatter = ->(_severity, _time, _progname, msg) { + captured_logs << msg + "" + } + Braintrust::Log.logger = test_logger + + begin + integration.patch! + # Check that the "no applicable patcher" message was logged + assert captured_logs.any? { |msg| msg.include?("No applicable patcher found") } + ensure + Braintrust::Log.logger = original_logger + end + end +end diff --git a/test/braintrust/contrib/patcher_test.rb b/test/braintrust/contrib/patcher_test.rb new file mode 100644 index 0000000..17ba7ca --- /dev/null +++ b/test/braintrust/contrib/patcher_test.rb @@ -0,0 +1,281 @@ +# frozen_string_literal: true + +require "test_helper" + +class Braintrust::Contrib::PatcherTest < Minitest::Test + def setup + # Create a fresh patcher class for each test to avoid state leakage + @patcher = create_test_patcher + end + + def create_test_patcher(should_fail: false) + patcher = Class.new(Braintrust::Contrib::Patcher) do + class << self + attr_accessor :patch_count, :last_options, :should_fail + end + + def self.perform_patch(**options) + @patch_count ||= 0 + @patch_count += 1 + @last_options = options + raise "Intentional patch failure" if @should_fail + end + end + + patcher.reset! + patcher.patch_count = 0 + patcher.last_options = {} + patcher.should_fail = should_fail + patcher + end + + def test_patch_passes_options + options = {tracer_provider: "test-provider", target: "test-target"} + @patcher.patch!(**options) + + assert_equal "test-provider", @patcher.last_options[:tracer_provider] + assert_equal "test-target", @patcher.last_options[:target] + end + + def test_patched_returns_false_initially + refute @patcher.patched? + end + + def test_patch_sets_patched_to_true + @patcher.patch! + + assert @patcher.patched? + end + + def test_patch_returns_true_on_success + result = @patcher.patch! + + assert result + end + + def test_patch_calls_perform_patch_once + @patcher.patch! + + assert_equal 1, @patcher.patch_count + end + + def test_patch_is_idempotent + @patcher.patch! + @patcher.patch! + @patcher.patch! + + assert_equal 1, @patcher.patch_count + assert @patcher.patched? + end + + def test_patch_returns_true_on_subsequent_calls + first_result = @patcher.patch! + second_result = @patcher.patch! + + assert first_result + assert second_result + end + + def test_patch_passes_options_to_perform_patch + tracer_provider = Object.new + + @patcher.patch!(tracer_provider: tracer_provider) + + assert_instance_of Hash, @patcher.last_options + assert_equal tracer_provider, @patcher.last_options[:tracer_provider] + end + + def test_patch_returns_false_on_error + failing_patcher = create_test_patcher(should_fail: true) + + result = suppress_logs { failing_patcher.patch! } + + refute result + end + + def test_patch_does_not_set_patched_on_error + failing_patcher = create_test_patcher(should_fail: true) + + suppress_logs { failing_patcher.patch! } + + refute failing_patcher.patched? + end + + def test_patch_logs_error_on_failure + failing_patcher = create_test_patcher(should_fail: true) + + # Capture log output + captured_logs = [] + original_logger = Braintrust::Log.logger + test_logger = Logger.new(StringIO.new) + test_logger.formatter = ->(_severity, _time, _progname, msg) { + captured_logs << msg + "" + } + Braintrust::Log.logger = test_logger + + begin + failing_patcher.patch! + # Check that error was logged (can't easily verify content without more setup) + # The main thing is that it doesn't raise + ensure + Braintrust::Log.logger = original_logger + end + end + + def test_reset_allows_repatching + @patcher.patch! + assert @patcher.patched? + + @patcher.reset! + refute @patcher.patched? + + @patcher.patch! + assert @patcher.patched? + assert_equal 2, @patcher.patch_count + end + + def test_perform_patch_raises_not_implemented_in_base_class + assert_raises(NotImplementedError) do + Braintrust::Contrib::Patcher.perform_patch + end + end + + def test_thread_safety_only_patches_once + patcher = create_test_patcher + + threads = 100.times.map do + Thread.new { patcher.patch! } + end + + threads.each(&:join) + + assert_equal 1, patcher.patch_count + assert patcher.patched? + end + + def test_thread_safety_concurrent_patch_calls + patcher = create_test_patcher + + errors = [] + results = [] + mutex = Mutex.new + + threads = 100.times.map do + Thread.new do + result = patcher.patch! + mutex.synchronize { results << result } + rescue => e + mutex.synchronize { errors << e.message } + end + end + + threads.each(&:join) + + assert_equal [], errors + assert results.all? { |r| r == true } + assert_equal 1, patcher.patch_count + end + + def test_applicable_returns_true_by_default + assert @patcher.applicable? + end + + def test_applicable_can_be_overridden + patcher = Class.new(Braintrust::Contrib::Patcher) do + def self.applicable? + false + end + + def self.perform_patch(**options) + # No-op + end + end + patcher.reset! + + refute patcher.applicable? + end + + def test_patch_checks_applicable_under_lock + applicable_calls = [] + patcher = Class.new(Braintrust::Contrib::Patcher) do + class << self + attr_accessor :applicable_calls + end + + def self.applicable? + @applicable_calls ||= [] + @applicable_calls << Thread.current.object_id + true + end + + def self.perform_patch(**options) + # No-op + end + end + patcher.reset! + patcher.applicable_calls = applicable_calls + + patcher.patch! + + # Should be called twice: once before lock (fast path), once under lock (double-check) + assert_equal 2, applicable_calls.length + end + + def test_patch_returns_false_when_not_applicable + patcher = Class.new(Braintrust::Contrib::Patcher) do + class << self + attr_accessor :perform_patch_called + end + + def self.applicable? + false + end + + def self.perform_patch(**options) + @perform_patch_called = true + end + end + patcher.reset! + patcher.perform_patch_called = false + + result = patcher.patch! + + refute result + refute patcher.perform_patch_called + refute patcher.patched? + end + + def test_patch_returns_false_and_does_not_log_when_not_applicable + patcher = Class.new(Braintrust::Contrib::Patcher) do + def self.applicable? + false + end + + def self.perform_patch(**options) + # No-op + end + end + patcher.reset! + + # Capture log output + captured_logs = [] + original_logger = Braintrust::Log.logger + test_logger = Logger.new(StringIO.new) + test_logger.level = Logger::DEBUG + test_logger.formatter = ->(_severity, _time, _progname, msg) { + captured_logs << msg + "" + } + Braintrust::Log.logger = test_logger + + begin + result = patcher.patch! + # Fast path returns false immediately without logging + refute result + assert_empty captured_logs, "Fast path should not log when not applicable" + ensure + Braintrust::Log.logger = original_logger + end + end +end diff --git a/test/braintrust/contrib/registry_test.rb b/test/braintrust/contrib/registry_test.rb new file mode 100644 index 0000000..610815e --- /dev/null +++ b/test/braintrust/contrib/registry_test.rb @@ -0,0 +1,241 @@ +# frozen_string_literal: true + +require "test_helper" + +class Braintrust::Contrib::RegistryTest < Minitest::Test + def setup + # Use anonymous subclass to isolate test state + registry_class = Class.new(Braintrust::Contrib::Registry) + @registry = registry_class.instance + end + + # Mock integration class for testing + def create_mock_integration(name:, gem_names:, require_paths: nil, available: false) + integration = Class.new do + include Braintrust::Contrib::Integration + + class << self + attr_accessor :_integration_name, :_gem_names, :_require_paths, :_available + end + + def self.integration_name + _integration_name + end + + def self.gem_names + _gem_names + end + + def self.require_paths + _require_paths || _gem_names + end + + def self.available? + _available + end + end + + integration._integration_name = name + integration._gem_names = gem_names + integration._require_paths = require_paths + integration._available = available + integration + end + + def test_register_and_lookup + integration = create_mock_integration(name: :openai, gem_names: ["openai"]) + + @registry.register(integration) + + assert_equal integration, @registry[:openai] + assert_equal integration, @registry["openai"] + end + + def test_lookup_returns_nil_for_unregistered + assert_nil @registry[:unknown] + end + + def test_all_returns_all_integrations + openai = create_mock_integration(name: :openai, gem_names: ["openai"]) + anthropic = create_mock_integration(name: :anthropic, gem_names: ["anthropic"]) + + @registry.register(openai) + @registry.register(anthropic) + + all = @registry.all + assert_equal 2, all.length + assert_includes all, openai + assert_includes all, anthropic + end + + def test_available_filters_by_availability + available_integration = create_mock_integration( + name: :available, + gem_names: ["available-gem"], + available: true + ) + unavailable_integration = create_mock_integration( + name: :unavailable, + gem_names: ["unavailable-gem"], + available: false + ) + + @registry.register(available_integration) + @registry.register(unavailable_integration) + + available = @registry.available + assert_equal 1, available.length + assert_includes available, available_integration + refute_includes available, unavailable_integration + end + + def test_each_iterates_over_integrations + openai = create_mock_integration(name: :openai, gem_names: ["openai"]) + anthropic = create_mock_integration(name: :anthropic, gem_names: ["anthropic"]) + + @registry.register(openai) + @registry.register(anthropic) + + collected = [] + @registry.each { |i| collected << i } + + assert_equal 2, collected.length + assert_includes collected, openai + assert_includes collected, anthropic + end + + def test_integrations_for_require_path + openai = create_mock_integration( + name: :openai, + gem_names: ["openai"], + require_paths: ["openai"] + ) + ruby_openai = create_mock_integration( + name: :ruby_openai, + gem_names: ["ruby-openai"], + require_paths: ["openai"] + ) + + @registry.register(openai) + @registry.register(ruby_openai) + + integrations = @registry.integrations_for_require_path("openai") + assert_equal 2, integrations.length + assert_includes integrations, openai + assert_includes integrations, ruby_openai + end + + def test_integrations_for_require_path_strips_rb_extension + openai = create_mock_integration( + name: :openai, + gem_names: ["openai"], + require_paths: ["openai"] + ) + + @registry.register(openai) + + integrations = @registry.integrations_for_require_path("openai.rb") + assert_equal 1, integrations.length + assert_includes integrations, openai + end + + def test_integrations_for_require_path_returns_empty_for_unknown + integrations = @registry.integrations_for_require_path("unknown") + assert_equal [], integrations + assert integrations.frozen? + end + + def test_integrations_for_require_path_caching + openai = create_mock_integration( + name: :openai, + gem_names: ["openai"], + require_paths: ["openai"] + ) + + @registry.register(openai) + + # First call builds the cache + result1 = @registry.integrations_for_require_path("openai") + + # Second call should return the same frozen array (cached) + result2 = @registry.integrations_for_require_path("openai") + + assert_same result1, result2 + end + + def test_register_invalidates_cache + openai = create_mock_integration( + name: :openai, + gem_names: ["openai"], + require_paths: ["openai"] + ) + + @registry.register(openai) + + # Build the cache + result1 = @registry.integrations_for_require_path("openai") + assert_equal 1, result1.length + + # Register another integration + another = create_mock_integration( + name: :another, + gem_names: ["another"], + require_paths: ["openai"] + ) + @registry.register(another) + + # Cache should be invalidated + result2 = @registry.integrations_for_require_path("openai") + assert_equal 2, result2.length + end + + def test_thread_safety_for_registration + integrations = 100.times.map do |i| + create_mock_integration(name: :"integration_#{i}", gem_names: ["gem_#{i}"]) + end + + threads = integrations.map do |integration| + Thread.new { @registry.register(integration) } + end + + threads.each(&:join) + + assert_equal 100, @registry.all.length + end + + def test_thread_safety_for_require_path_lookup + openai = create_mock_integration( + name: :openai, + gem_names: ["openai"], + require_paths: ["openai"] + ) + + @registry.register(openai) + + errors = [] + threads = 100.times.map do + Thread.new do + result = @registry.integrations_for_require_path("openai") + errors << "Got nil" if result.nil? + errors << "Wrong length: #{result.length}" unless result.length == 1 + rescue => e + errors << e.message + end + end + + threads.each(&:join) + + assert_equal [], errors + end + + def test_clear_removes_all_integrations + openai = create_mock_integration(name: :openai, gem_names: ["openai"]) + + @registry.register(openai) + assert_equal 1, @registry.all.length + + @registry.clear! + assert_equal 0, @registry.all.length + assert_nil @registry[:openai] + end +end diff --git a/test/braintrust/contrib_test.rb b/test/braintrust/contrib_test.rb new file mode 100644 index 0000000..525927b --- /dev/null +++ b/test/braintrust/contrib_test.rb @@ -0,0 +1,200 @@ +# frozen_string_literal: true + +require "test_helper" + +class Braintrust::ContribTest < Minitest::Test + # --- Braintrust.instrument! delegation --- + + def test_braintrust_instrument_delegates_to_contrib + called_with = nil + + Braintrust::Contrib.stub(:instrument!, ->(*args, **kwargs) { called_with = [args, kwargs] }) do + Braintrust.instrument!(:openai, tracer_provider: "test-provider") + end + + assert_equal [[:openai], {tracer_provider: "test-provider"}], called_with + end + + # --- registry --- + + def test_registry_returns_registry_instance + mock_registry = Object.new + + Braintrust::Contrib::Registry.stub(:instance, mock_registry) do + assert_same mock_registry, Braintrust::Contrib.registry + end + end + + # --- init --- + + def test_init_sets_default_tracer_provider + mock_provider = Object.new + original = Braintrust::Contrib.instance_variable_get(:@default_tracer_provider) + + Braintrust::Contrib.init(tracer_provider: mock_provider) + assert_same mock_provider, Braintrust::Contrib.default_tracer_provider + ensure + Braintrust::Contrib.instance_variable_set(:@default_tracer_provider, original) + end + + # --- instrument! --- + + def test_instrument_delegates_to_integration + mock_integration = Minitest::Mock.new + mock_integration.expect(:instrument!, true, [], tracer_provider: "test-provider") + + mock_registry = {test_integration: mock_integration} + + Braintrust::Contrib::Registry.stub(:instance, mock_registry) do + Braintrust::Contrib.instrument!(:test_integration, tracer_provider: "test-provider") + end + + mock_integration.verify + end + + def test_instrument_logs_error_for_unknown_integration + mock_registry = {} + logged_error = nil + + Braintrust::Contrib::Registry.stub(:instance, mock_registry) do + Braintrust::Log.stub(:error, ->(msg) { logged_error = msg }) do + Braintrust::Contrib.instrument!(:unknown_integration) + end + end + + assert_match(/No integration for 'unknown_integration'/, logged_error) + end + + def test_instrument_passes_target_option + mock_integration = Minitest::Mock.new + target = Object.new + mock_integration.expect(:instrument!, true, [], target: target) + + mock_registry = {openai: mock_integration} + + Braintrust::Contrib::Registry.stub(:instance, mock_registry) do + Braintrust::Contrib.instrument!(:openai, target: target) + end + + mock_integration.verify + end + + # --- default_tracer_provider --- + + def test_default_tracer_provider_falls_back_to_opentelemetry + mock_otel_provider = Object.new + original = Braintrust::Contrib.instance_variable_get(:@default_tracer_provider) + Braintrust::Contrib.instance_variable_set(:@default_tracer_provider, nil) + + OpenTelemetry.stub(:tracer_provider, mock_otel_provider) do + assert_same mock_otel_provider, Braintrust::Contrib.default_tracer_provider + end + ensure + Braintrust::Contrib.instance_variable_set(:@default_tracer_provider, original) + end + + # --- context_for --- + + def test_context_for_delegates_to_context_from + target = Object.new + mock_context = Object.new + + Braintrust::Contrib::Context.stub(:from, ->(t) { (t == target) ? mock_context : nil }) do + assert_same mock_context, Braintrust::Contrib.context_for(target) + end + end + + def test_context_for_returns_nil_when_no_context + target = Object.new + + Braintrust::Contrib::Context.stub(:from, nil) do + assert_nil Braintrust::Contrib.context_for(target) + end + end + + # --- tracer_provider_for --- + + def test_tracer_provider_for_returns_context_provider_when_present + target = Object.new + context_provider = Object.new + mock_context = Minitest::Mock.new + mock_context.expect(:[], context_provider, [:tracer_provider]) + + Braintrust::Contrib::Context.stub(:from, mock_context) do + assert_same context_provider, Braintrust::Contrib.tracer_provider_for(target) + end + + mock_context.verify + end + + def test_tracer_provider_for_falls_back_to_default_when_no_context + target = Object.new + default_provider = Object.new + + Braintrust::Contrib::Context.stub(:from, nil) do + Braintrust::Contrib.stub(:default_tracer_provider, default_provider) do + assert_same default_provider, Braintrust::Contrib.tracer_provider_for(target) + end + end + end + + def test_tracer_provider_for_falls_back_when_context_has_no_provider + target = Object.new + default_provider = Object.new + mock_context = Minitest::Mock.new + mock_context.expect(:[], nil, [:tracer_provider]) + + Braintrust::Contrib::Context.stub(:from, mock_context) do + Braintrust::Contrib.stub(:default_tracer_provider, default_provider) do + assert_same default_provider, Braintrust::Contrib.tracer_provider_for(target) + end + end + + mock_context.verify + end + + # --- tracer_for --- + + def test_tracer_for_gets_tracer_from_provider + target = Object.new + mock_tracer = Object.new + mock_provider = Minitest::Mock.new + mock_provider.expect(:tracer, mock_tracer, ["braintrust"]) + + Braintrust::Contrib.stub(:tracer_provider_for, mock_provider) do + assert_same mock_tracer, Braintrust::Contrib.tracer_for(target) + end + + mock_provider.verify + end + + def test_tracer_for_uses_custom_name + target = Object.new + mock_tracer = Object.new + mock_provider = Minitest::Mock.new + mock_provider.expect(:tracer, mock_tracer, ["custom-tracer"]) + + Braintrust::Contrib.stub(:tracer_provider_for, mock_provider) do + assert_same mock_tracer, Braintrust::Contrib.tracer_for(target, name: "custom-tracer") + end + + mock_provider.verify + end + + def test_tracer_for_uses_target_context_provider + target = Object.new + mock_tracer = Object.new + context_provider = Minitest::Mock.new + context_provider.expect(:tracer, mock_tracer, ["braintrust"]) + + mock_context = Minitest::Mock.new + mock_context.expect(:[], context_provider, [:tracer_provider]) + + Braintrust::Contrib::Context.stub(:from, mock_context) do + assert_same mock_tracer, Braintrust::Contrib.tracer_for(target) + end + + context_provider.verify + mock_context.verify + end +end diff --git a/test/support/assert_helper.rb b/test/support/assert_helper.rb index 7b943cb..8d1d725 100644 --- a/test/support/assert_helper.rb +++ b/test/support/assert_helper.rb @@ -1,47 +1,6 @@ module Test module Support module AssertHelper - # Build a chain of mocks for nested method calls. - # Useful for testing code that traverses object hierarchies like `obj.foo.bar.baz`. - # - # Creates intermediate mocks that return each other in sequence, with the final - # mock returning the specified terminal value. All intermediate mocks are verified - # after the block. The caller is responsible for verifying the terminal value if needed. - # - # @param methods [Array] chain of method names to mock - # @param returns [Object] the value returned by the final method (can be a Class, Mock, or any object) - # @yield [root] the root mock (entry point to the chain) - # - # @example Testing a method chain with a Class as terminal - # fake_singleton = Class.new { include SomeModule } - # mock_chain(:chat, :completions, :singleton_class, returns: fake_singleton) do |client| - # assert SomePatcher.patched?(target: client) - # end - # - # @example Testing a method chain with a Mock as terminal - # terminal = Minitest::Mock.new - # terminal.expect(:include, true, [SomeModule]) - # mock_chain(:foo, :bar, returns: terminal) do |root| - # SomeCode.do_something(root) - # end - # terminal.verify - # - def mock_chain(*methods, returns:) - current = returns - mocks = [] - - methods.reverse_each do |method| - mock = Minitest::Mock.new - mock.expect(method, current) - mocks.unshift(mock) - current = mock - end - - yield(mocks.first) - - mocks.each(&:verify) - end - # Runs the test inside a fork, to isolate its side-effects from the main process. # Similar in purpose to https://docs.ruby-lang.org/en/master/Ruby/Box.html#class-Ruby::Box # diff --git a/test/support/log_helper.rb b/test/support/log_helper.rb new file mode 100644 index 0000000..4d8bf72 --- /dev/null +++ b/test/support/log_helper.rb @@ -0,0 +1,22 @@ +module Test + module Support + module LogHelper + # Suppress log output during block execution. + # Use for tests that deliberately cause errors/warnings. + # + # @yield Block to execute with logging suppressed + # @return Result of the block + # + # @example + # suppress_logs { failing_patcher.patch! } + # + def suppress_logs + original_logger = Braintrust::Log.logger + Braintrust::Log.logger = Logger.new(File::NULL) + yield + ensure + Braintrust::Log.logger = original_logger + end + end + end +end diff --git a/test/support/mock_helper.rb b/test/support/mock_helper.rb new file mode 100644 index 0000000..1cefdcc --- /dev/null +++ b/test/support/mock_helper.rb @@ -0,0 +1,46 @@ +module Test + module Support + module MockHelper + # Build a chain of mocks for nested method calls. + # Useful for testing code that traverses object hierarchies like `obj.foo.bar.baz`. + # + # Creates intermediate mocks that return each other in sequence, with the final + # mock returning the specified terminal value. All intermediate mocks are verified + # after the block. The caller is responsible for verifying the terminal value if needed. + # + # @param methods [Array] chain of method names to mock + # @param returns [Object] the value returned by the final method (can be a Class, Mock, or any object) + # @yield [root] the root mock (entry point to the chain) + # + # @example Testing a method chain with a Class as terminal + # fake_singleton = Class.new { include SomeModule } + # mock_chain(:chat, :completions, :singleton_class, returns: fake_singleton) do |client| + # assert SomePatcher.patched?(target: client) + # end + # + # @example Testing a method chain with a Mock as terminal + # terminal = Minitest::Mock.new + # terminal.expect(:include, true, [SomeModule]) + # mock_chain(:foo, :bar, returns: terminal) do |root| + # SomeCode.do_something(root) + # end + # terminal.verify + # + def mock_chain(*methods, returns:) + current = returns + mocks = [] + + methods.reverse_each do |method| + mock = Minitest::Mock.new + mock.expect(method, current) + mocks.unshift(mock) + current = mock + end + + yield(mocks.first) + + mocks.each(&:verify) + end + end + end +end diff --git a/test/test_helper.rb b/test/test_helper.rb index 45d085b..7ef9f68 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -66,6 +66,8 @@ require_relative "support/assert_helper" require_relative "support/braintrust_helper" require_relative "support/fixture_helper" +require_relative "support/log_helper" +require_relative "support/mock_helper" require_relative "support/provider_helper" require_relative "support/tracing_helper" @@ -74,6 +76,8 @@ class Minitest::Test include ::Test::Support::AssertHelper include ::Test::Support::BraintrustHelper include ::Test::Support::FixtureHelper + include ::Test::Support::LogHelper + include ::Test::Support::MockHelper include ::Test::Support::ProviderHelper include ::Test::Support::TracingHelper From a54d1c912aca46b51fa23b0cfbba2f8da06ecdac Mon Sep 17 00:00:00 2001 From: David Elner Date: Mon, 15 Dec 2025 12:15:35 -0500 Subject: [PATCH 04/27] Added: Integration generator script --- CONTRIBUTING.md | 135 +++++++++++++++++----- Rakefile | 121 +++++++++++++++++++ templates/contrib/integration.rb.erb | 59 ++++++++++ templates/contrib/integration_test.rb.erb | 50 ++++++++ templates/contrib/patcher.rb.erb | 39 +++++++ templates/contrib/patcher_test.rb.erb | 32 +++++ 6 files changed, 408 insertions(+), 28 deletions(-) create mode 100644 templates/contrib/integration.rb.erb create mode 100644 templates/contrib/integration_test.rb.erb create mode 100644 templates/contrib/patcher.rb.erb create mode 100644 templates/contrib/patcher_test.rb.erb diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f4bea9a..258fb38 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -79,65 +79,145 @@ All of our common dev tasks are in rake. rake -T ``` +We use VCR for making http tests fast. You can run tests with these enabled, +off, etc. If you add new tests you'll need to record new cassettes. See this +for more details. + +```bash +rake -T test:vcr +``` + ### Adding new integrations for AI Libraries -To add instrumentation support for a new AI library, follow these steps: -#### 1. Define the Integration +To add instrumentation support for a new library, use the integration generator: + +```bash +rake contrib:generate NAME=trustybrain_llm AUTO_REGISTER=true +``` + +This will create the integration structure and optionally register it. You can also specify additional options: + +```bash +rake contrib:generate NAME=trustybrain_llm \ + GEM_NAMES=trustybrain_llm,trustybrain \ + REQUIRE_PATHS=trustybrain \ + MIN_VERSION=1.0.0 \ + MAX_VERSION=2.0.0 \ + AUTO_REGISTER=true +``` + +#### Manual Setup + +If you prefer to create the integration manually, follow these steps: -Create a new file in `lib/braintrust/contrib/`: +##### 1. Create the integration directory structure + +```bash +mkdir -p lib/braintrust/contrib/trustybrain_llm +mkdir -p test/braintrust/contrib/trustybrain_llm +``` + +##### 2. Define the integration + +Create `lib/braintrust/contrib/trustybrain_llm/integration.rb`: ```ruby -# lib/braintrust/contrib/trustybrain_llm.rb -module Braintrust::Contrib - class TrustybrainLLM - include Integration +# frozen_string_literal: true - def self.integration_name - :trustybrain_llm - end +require_relative "../integration" - def self.gem_names - ["trustybrain_llm"] - end +module Braintrust + module Contrib + module TrustybrainLLM + class Integration + include Braintrust::Contrib::Integration + + def self.integration_name + :trustybrain_llm + end + + def self.gem_names + ["trustybrain_llm"] + end - def self.patcher - TrustybrainLLMPatcher + def self.loaded? + defined?(::TrustybrainLLM::Client) ? true : false + end + + def self.patchers + require_relative "patcher" + [Patcher] + end + end end end +end +``` + +##### 3. Create a patcher - class TrustybrainLLMPatcher < Patcher - def self.perform_patch(context) - # Add your instrumentation here - # context.tracer_provider gives you access to the tracer +Create `lib/braintrust/contrib/trustybrain_llm/patcher.rb`: + +```ruby +# frozen_string_literal: true + +require_relative "../patcher" + +module Braintrust + module Contrib + module TrustybrainLLM + class Patcher < Braintrust::Contrib::Patcher + class << self + def applicable? + defined?(::TrustybrainLLM::Client) + end + + def perform_patch(**options) + ::TrustybrainLLM::Client.prepend(Instrumentation) + end + end + + module Instrumentation + def chat(*args, **kwargs, &block) + Braintrust::Contrib.tracer_for(self).in_span("trustybrain_llm.chat") do + super + end + end + end + end end end end ``` -#### 2. Register It +##### 4. Register it Add to `lib/braintrust/contrib.rb`: ```ruby -require_relative "contrib/trustybrain_llm" +require_relative "contrib/trustybrain_llm/integration" # At the bottom: -Contrib::TrustybrainLLM.register! +Contrib::TrustybrainLLM::Integration.register! ``` -#### 3. Write Tests +##### 5. Add tests -Create `test/braintrust/contrib/trustybrain_llm_test.rb`: +Create test files in `test/braintrust/contrib/trustybrain_llm/`: ```ruby +# test/braintrust/contrib/trustybrain_llm/integration_test.rb require "test_helper" -class Braintrust::Contrib::TrustybrainLLMTest < Minitest::Test +class Braintrust::Contrib::TrustybrainLLM::IntegrationTest < Minitest::Test def test_integration_basics - assert_equal :trustybrain_llm, TrustybrainLLM.integration_name - assert_equal ["trustybrain_llm"], TrustybrainLLM.gem_names + integration = Braintrust::Contrib::TrustybrainLLM::Integration + assert_equal :trustybrain_llm, integration.integration_name + assert_equal ["trustybrain_llm"], integration.gem_names end + + # TODO: Add tests for patchers, availability, compatibility, and instrumentation end ``` @@ -177,4 +257,3 @@ mise use ruby@3.4 ruby --version # => 3.4.x bundle exec rake test ``` - diff --git a/Rakefile b/Rakefile index dde2310..9af3066 100644 --- a/Rakefile +++ b/Rakefile @@ -251,6 +251,127 @@ task release: ["release:publish", "release:github"] do puts "✓ Release completed successfully!" end +# Contrib tasks +namespace :contrib do + desc "Generate a new integration (NAME=name [GEM_NAMES=gem1,gem2] [REQUIRE_PATHS=path1,path2] [MIN_VERSION=1.0.0] [MAX_VERSION=2.0.0] [AUTO_REGISTER=true])" + task :generate do + require "erb" + require "fileutils" + + # Parse parameters + name = ENV["NAME"] + unless name + puts "Error: NAME is required" + puts "Usage: rake contrib:generate NAME=trustybrain_llm [GEM_NAMES=trustybrain_llm] [AUTO_REGISTER=true]" + exit 1 + end + + # Convert name to snake_case if it's PascalCase + snake_case_name = name.gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2') + .gsub(/([a-z\d])([A-Z])/, '\1_\2') + .downcase + + # Convert to PascalCase for module name + module_name = snake_case_name.split("_").map(&:capitalize).join + + integration_name = snake_case_name.to_sym + + # Parse optional parameters + gem_names = ENV["GEM_NAMES"]&.split(",") || [snake_case_name] + require_paths = ENV["REQUIRE_PATHS"]&.split(",") || gem_names + min_version = ENV["MIN_VERSION"] + max_version = ENV["MAX_VERSION"] + auto_register = ENV.fetch("AUTO_REGISTER", "false").downcase == "true" + + # Display what will be generated + puts "\n=== Generating Integration ===" + puts "Name: #{module_name}" + puts "Integration name: :#{integration_name}" + puts "Gem names: #{gem_names.inspect}" + puts "Require paths: #{require_paths.inspect}" if require_paths != gem_names + puts "Min version: #{min_version}" if min_version + puts "Max version: #{max_version}" if max_version + puts + + # Template binding + template_binding = binding + + # Paths + integration_dir = "lib/braintrust/contrib/#{snake_case_name}" + test_dir = "test/braintrust/contrib/#{snake_case_name}" + + # Create directories + FileUtils.mkdir_p(integration_dir) + FileUtils.mkdir_p(test_dir) + + # Generate files + templates = { + "templates/contrib/integration.rb.erb" => "#{integration_dir}/integration.rb", + "templates/contrib/patcher.rb.erb" => "#{integration_dir}/patcher.rb", + "templates/contrib/integration_test.rb.erb" => "#{test_dir}/integration_test.rb", + "templates/contrib/patcher_test.rb.erb" => "#{test_dir}/patcher_test.rb" + } + + templates.each do |template_path, output_path| + template = ERB.new(File.read(template_path), trim_mode: "-") + content = template.result(template_binding) + File.write(output_path, content) + puts "✓ Created #{output_path}" + end + + # Auto-register if requested + if auto_register + contrib_file = "lib/braintrust/contrib.rb" + contrib_content = File.read(contrib_file) + + # Find the position to insert (before the last "end" or after the last require) + insertion_point = if /^# Load integration stubs/.match?(contrib_content) + contrib_content.index("# Load integration stubs") + else + # Insert before the final module end + contrib_content.rindex("end") + end + + require_line = "require_relative \"contrib/#{snake_case_name}/integration\"" + register_line = "Contrib::#{module_name}::Integration.register!" + + # Check if already registered + if contrib_content.include?(require_line) + puts "⚠ #{contrib_file} already contains this integration" + else + lines_to_add = [ + "", + "# #{module_name}", + require_line, + register_line + ].join("\n") + + contrib_content.insert(insertion_point, lines_to_add + "\n") + File.write(contrib_file, contrib_content) + puts "✓ Updated #{contrib_file}" + end + end + + # Display next steps + puts "\n=== Next Steps ===" + unless auto_register + puts "1. Add to lib/braintrust/contrib.rb:" + puts " require_relative \"contrib/#{snake_case_name}/integration\"" + puts " Contrib::#{module_name}::Integration.register!" + puts + end + puts "#{auto_register ? "1" : "2"}. Implement the patcher in:" + puts " #{integration_dir}/patcher.rb" + puts + puts "#{auto_register ? "2" : "3"}. Add tests in:" + puts " #{test_dir}/" + puts + puts "#{auto_register ? "3" : "4"}. Run tests:" + puts " bundle exec rake test TEST=#{test_dir}/**/*_test.rb" + puts + end +end + # Version bump tasks def bump_version(type) version_file = "lib/braintrust/version.rb" diff --git a/templates/contrib/integration.rb.erb b/templates/contrib/integration.rb.erb new file mode 100644 index 0000000..864fe69 --- /dev/null +++ b/templates/contrib/integration.rb.erb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +require_relative "../integration" + +module Braintrust + module Contrib + module <%= module_name %> + # Integration for <%= gem_names.join(", ") %> + class Integration + include Braintrust::Contrib::Integration + + # @return [Symbol] Unique identifier for this integration + def self.integration_name + :<%= integration_name %> + end + + # @return [Array] Gem names this integration supports + def self.gem_names + <%= gem_names.inspect %> + end +<% if require_paths != gem_names %> + + # @return [Array] Require paths for auto-instrument detection + def self.require_paths + <%= require_paths.inspect %> + end +<% end %> +<% if min_version %> + + # @return [String] Minimum compatible version + def self.minimum_version + "<%= min_version %>" + end +<% end %> +<% if max_version %> + + # @return [String] Maximum compatible version + def self.maximum_version + "<%= max_version %>" + end +<% end %> + + # Check if the library is actually loaded (constants are defined). + # @return [Boolean] true if the library module is defined + def self.loaded? + # TODO: Update this to check for your library's main module/class + defined?(::<%= module_name %>) ? true : false + end + + # Lazy-load the patcher only when actually patching. + # @return [Array] The patcher classes + def self.patchers + require_relative "patcher" + [Patcher] + end + end + end + end +end diff --git a/templates/contrib/integration_test.rb.erb b/templates/contrib/integration_test.rb.erb new file mode 100644 index 0000000..f7ce19d --- /dev/null +++ b/templates/contrib/integration_test.rb.erb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +require "test_helper" + +class Braintrust::Contrib::<%= module_name %>::IntegrationTest < Minitest::Test + def setup + @integration = Braintrust::Contrib::<%= module_name %>::Integration + end + + def test_integration_name + assert_equal :<%= integration_name %>, @integration.integration_name + end + + def test_gem_names + assert_equal <%= gem_names.inspect %>, @integration.gem_names + end +<% if require_paths != gem_names %> + + def test_require_paths + assert_equal <%= require_paths.inspect %>, @integration.require_paths + end +<% end %> + + def test_minimum_version +<% if min_version %> + assert_equal "<%= min_version %>", @integration.minimum_version +<% else %> + assert_nil @integration.minimum_version +<% end %> + end + + def test_maximum_version +<% if max_version %> + assert_equal "<%= max_version %>", @integration.maximum_version +<% else %> + assert_nil @integration.maximum_version +<% end %> + end + + def test_loaded + # loaded? should return true/false based on whether the library is loaded + assert [true, false].include?(@integration.loaded?) + end + + def test_patchers + patchers = @integration.patchers + assert_kind_of Array, patchers + assert_includes patchers, Braintrust::Contrib::<%= module_name %>::Patcher + end +end diff --git a/templates/contrib/patcher.rb.erb b/templates/contrib/patcher.rb.erb new file mode 100644 index 0000000..81a9043 --- /dev/null +++ b/templates/contrib/patcher.rb.erb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +require_relative "../patcher" + +module Braintrust + module Contrib + module <%= module_name %> + # Patcher for <%= gem_names.join(", ") %> + class Patcher < Braintrust::Contrib::Patcher + class << self + # Check if this patcher should apply. + # @return [Boolean] true if target library is available + def applicable? + # TODO: Update to check if target library classes are defined + # Example: defined?(::SomeLibrary::Client) + true + end + + # Perform the actual patching. + # @param options [Hash] Configuration options passed from integration + # @return [void] + def perform_patch(**options) + # TODO: Add your instrumentation here + # Example: ::SomeLibrary::Client.prepend(Instrumentation) + end + end + + # Example instrumentation module (uncomment and modify for your integration) + # module Instrumentation + # def some_method(*args, **kwargs, &block) + # Braintrust::Contrib.tracer_for(self).in_span("somelibrary.some_method") do + # super + # end + # end + # end + end + end + end +end diff --git a/templates/contrib/patcher_test.rb.erb b/templates/contrib/patcher_test.rb.erb new file mode 100644 index 0000000..89b43f8 --- /dev/null +++ b/templates/contrib/patcher_test.rb.erb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require "test_helper" + +# Explicitly load the patcher (lazy-loaded by integration) +require "braintrust/contrib/<%= snake_case_name %>/patcher" + +class Braintrust::Contrib::<%= module_name %>::PatcherTest < Minitest::Test + def setup + @patcher = Braintrust::Contrib::<%= module_name %>::Patcher + @patcher.reset! + end + + def teardown + @patcher.reset! + end + + def test_inherits_from_base_patcher + assert @patcher < Braintrust::Contrib::Patcher + end + + def test_implements_perform_patch + assert @patcher.respond_to?(:perform_patch) + end + + def test_applicable + # TODO: Update once applicable? checks for target library + assert @patcher.applicable? + end + + # TODO: Add tests for instrumented code +end From 21c71d1fe5681947261ec13525eca2e82c245605 Mon Sep 17 00:00:00 2001 From: David Elner Date: Thu, 25 Dec 2025 10:07:32 -0500 Subject: [PATCH 05/27] Added: `test:contrib` task to run integration tests --- Rakefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Rakefile b/Rakefile index 9af3066..f9e3885 100644 --- a/Rakefile +++ b/Rakefile @@ -98,6 +98,11 @@ task default: :ci # Test-related tasks namespace :test do + desc "Run only contrib framework tests" + task :contrib do + sh "bundle exec ruby -Ilib:test -e \"Dir.glob('test/braintrust/contrib{_test.rb,/**/*_test.rb}').each { |f| require_relative f }\"" + end + desc "Run tests with verbose timing output" task :verbose do ENV["MT_VERBOSE"] = "1" From 4728a178ccb838004842dbad9a457fd67e574855 Mon Sep 17 00:00:00 2001 From: David Elner Date: Thu, 8 Jan 2026 13:26:47 -0500 Subject: [PATCH 06/27] Added: Internal::Time#measure for accurately measuring time --- lib/braintrust/internal/time.rb | 44 +++++++++++ test/braintrust/internal/time_test.rb | 103 ++++++++++++++++++++++++++ 2 files changed, 147 insertions(+) create mode 100644 lib/braintrust/internal/time.rb create mode 100644 test/braintrust/internal/time_test.rb diff --git a/lib/braintrust/internal/time.rb b/lib/braintrust/internal/time.rb new file mode 100644 index 0000000..3020627 --- /dev/null +++ b/lib/braintrust/internal/time.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +module Braintrust + module Internal + # Time utilities using the monotonic clock for accurate duration measurements. + # + # Unlike Time.now, the monotonic clock is not affected by system clock adjustments + # (NTP updates, daylight saving, manual changes) and provides accurate elapsed time. + # + # @see https://blog.dnsimple.com/2018/03/elapsed-time-with-ruby-the-right-way/ + module Time + # Measure elapsed time using the monotonic clock. + # + # Three modes of operation: + # + # 1. With a block: executes the block and returns elapsed time in seconds + # elapsed = Time.measure { some_operation } + # + # 2. Without arguments: returns the current monotonic time (for later comparison) + # start = Time.measure + # # ... later ... + # elapsed = Time.measure(start) + # + # 3. With a start_time argument: returns elapsed time since start_time + # start = Time.measure + # elapsed = Time.measure(start) + # + # @param start_time [Float, nil] Optional start time from a previous measure call + # @yield Optional block to measure + # @return [Float] Elapsed time in seconds, or current monotonic time if no args/block + def self.measure(start_time = nil) + if block_given? + start = Process.clock_gettime(Process::CLOCK_MONOTONIC) + yield + Process.clock_gettime(Process::CLOCK_MONOTONIC) - start + elsif start_time + Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time + else + Process.clock_gettime(Process::CLOCK_MONOTONIC) + end + end + end + end +end diff --git a/test/braintrust/internal/time_test.rb b/test/braintrust/internal/time_test.rb new file mode 100644 index 0000000..d049149 --- /dev/null +++ b/test/braintrust/internal/time_test.rb @@ -0,0 +1,103 @@ +# frozen_string_literal: true + +require "test_helper" +require "braintrust/internal/time" + +class Braintrust::Internal::TimeTest < Minitest::Test + Time = Braintrust::Internal::Time + + # --- measure with block --- + + def test_measure_with_block_returns_elapsed_time + elapsed = Time.measure { sleep 0.01 } + + assert_kind_of Float, elapsed + assert elapsed >= 0.01, "Expected elapsed >= 0.01, got #{elapsed}" + assert elapsed < 0.1, "Expected elapsed < 0.1, got #{elapsed}" + end + + def test_measure_with_block_executes_block + executed = false + Time.measure { executed = true } + + assert executed, "Block should have been executed" + end + + def test_measure_with_block_returns_time_not_block_result + result = Time.measure { "block result" } + + assert_kind_of Float, result + refute_equal "block result", result + end + + # --- measure without arguments --- + + def test_measure_without_args_returns_monotonic_time + time1 = Time.measure + time2 = Time.measure + + assert_kind_of Float, time1 + assert_kind_of Float, time2 + assert time2 >= time1, "Monotonic time should not go backwards" + end + + def test_measure_without_args_returns_positive_value + time = Time.measure + + assert time > 0, "Monotonic time should be positive" + end + + # --- measure with start_time --- + + def test_measure_with_start_time_returns_elapsed + start = Time.measure + sleep 0.01 + elapsed = Time.measure(start) + + assert_kind_of Float, elapsed + assert elapsed >= 0.01, "Expected elapsed >= 0.01, got #{elapsed}" + assert elapsed < 0.1, "Expected elapsed < 0.1, got #{elapsed}" + end + + def test_measure_with_start_time_returns_non_negative + start = Time.measure + elapsed = Time.measure(start) + + assert elapsed >= 0, "Elapsed time should be non-negative" + end + + # --- monotonic behavior --- + + def test_measure_is_monotonic + times = 10.times.map { Time.measure } + + times.each_cons(2) do |t1, t2| + assert t2 >= t1, "Monotonic time should never decrease" + end + end + + def test_measure_returns_seconds_as_float + start = Time.measure + sleep 0.05 + elapsed = Time.measure(start) + + # Should be roughly 0.05 seconds, not 50 milliseconds or 50_000_000 nanoseconds + assert elapsed >= 0.04, "Expected seconds, got #{elapsed} (too small, might be wrong unit)" + assert elapsed < 0.2, "Expected seconds, got #{elapsed} (too large, might be wrong unit)" + end + + # --- equivalence of block and start_time modes --- + + def test_block_and_start_time_modes_equivalent + # Both modes should measure approximately the same duration + block_elapsed = Time.measure { sleep 0.02 } + + start = Time.measure + sleep 0.02 + manual_elapsed = Time.measure(start) + + # Both should be close to 0.02 seconds + assert_in_delta block_elapsed, manual_elapsed, 0.01, + "Block and start_time modes should produce similar results" + end +end From 6076afe1e97f152f6f113ac0bf70bc1f6a6040fb Mon Sep 17 00:00:00 2001 From: David Elner Date: Thu, 18 Dec 2025 19:47:11 -0500 Subject: [PATCH 07/27] Changed: Migrated openai to new integration API --- Appraisals | 6 + README.md | 5 +- Rakefile | 7 + .../{openai.rb => contrib/openai/basic.rb} | 11 +- examples/contrib/openai/deprecated.rb | 72 + examples/contrib/openai/instance.rb | 57 + examples/contrib/openai/streaming.rb | 77 ++ .../{openai.rb => contrib/openai/golden.rb} | 8 +- examples/internal/eval-parallel-benchmark.rb | 4 +- examples/internal/kitchen-sink.rb | 4 +- gemfiles/openai_ruby_openai.gemfile | 10 + lib/braintrust/contrib.rb | 5 +- lib/braintrust/contrib/openai/deprecated.rb | 22 + .../contrib/openai/instrumentation/chat.rb | 296 ++++ .../contrib/openai/instrumentation/common.rb | 164 +++ .../openai/instrumentation/responses.rb | 185 +++ lib/braintrust/contrib/openai/integration.rb | 58 + lib/braintrust/contrib/openai/patcher.rb | 130 ++ lib/braintrust/trace/contrib/openai.rb | 611 --------- .../contrib/openai/deprecated_test.rb | 43 + .../openai/instrumentation/chat_test.rb | 477 +++++++ .../openai/instrumentation/common_test.rb | 184 +++ .../openai/instrumentation/responses_test.rb | 323 +++++ .../contrib/openai/instrumentation_test.rb | 161 +++ .../contrib/openai/integration_helper.rb | 33 + .../contrib/openai/integration_test.rb | 64 + .../braintrust/contrib/openai/patcher_test.rb | 180 +++ .../contrib/openai/ruby-openai_test.rb | 69 + test/braintrust/trace/openai_test.rb | 1221 ----------------- .../openai/responses_create_error.yml | 93 ++ .../openai/responses_stream_error.yml | 93 ++ .../openai/responses_with_metadata.yml | 171 +++ .../responses_with_previous_response_id.yml | 333 +++++ .../openai/responses_with_reasoning.yml | 173 +++ .../openai/responses_with_truncation.yml | 168 +++ .../openai/streaming_chat_completions.yml | 130 ++ .../openai/streaming_multiple_choices.yml | 125 ++ 37 files changed, 3925 insertions(+), 1848 deletions(-) rename examples/{openai.rb => contrib/openai/basic.rb} (86%) create mode 100644 examples/contrib/openai/deprecated.rb create mode 100644 examples/contrib/openai/instance.rb create mode 100644 examples/contrib/openai/streaming.rb rename examples/internal/{openai.rb => contrib/openai/golden.rb} (98%) create mode 100644 gemfiles/openai_ruby_openai.gemfile create mode 100644 lib/braintrust/contrib/openai/deprecated.rb create mode 100644 lib/braintrust/contrib/openai/instrumentation/chat.rb create mode 100644 lib/braintrust/contrib/openai/instrumentation/common.rb create mode 100644 lib/braintrust/contrib/openai/instrumentation/responses.rb create mode 100644 lib/braintrust/contrib/openai/integration.rb create mode 100644 lib/braintrust/contrib/openai/patcher.rb delete mode 100644 lib/braintrust/trace/contrib/openai.rb create mode 100644 test/braintrust/contrib/openai/deprecated_test.rb create mode 100644 test/braintrust/contrib/openai/instrumentation/chat_test.rb create mode 100644 test/braintrust/contrib/openai/instrumentation/common_test.rb create mode 100644 test/braintrust/contrib/openai/instrumentation/responses_test.rb create mode 100644 test/braintrust/contrib/openai/instrumentation_test.rb create mode 100644 test/braintrust/contrib/openai/integration_helper.rb create mode 100644 test/braintrust/contrib/openai/integration_test.rb create mode 100644 test/braintrust/contrib/openai/patcher_test.rb create mode 100644 test/braintrust/contrib/openai/ruby-openai_test.rb delete mode 100644 test/braintrust/trace/openai_test.rb create mode 100644 test/fixtures/vcr_cassettes/openai/responses_create_error.yml create mode 100644 test/fixtures/vcr_cassettes/openai/responses_stream_error.yml create mode 100644 test/fixtures/vcr_cassettes/openai/responses_with_metadata.yml create mode 100644 test/fixtures/vcr_cassettes/openai/responses_with_previous_response_id.yml create mode 100644 test/fixtures/vcr_cassettes/openai/responses_with_reasoning.yml create mode 100644 test/fixtures/vcr_cassettes/openai/responses_with_truncation.yml create mode 100644 test/fixtures/vcr_cassettes/openai/streaming_chat_completions.yml create mode 100644 test/fixtures/vcr_cassettes/openai/streaming_multiple_choices.yml diff --git a/Appraisals b/Appraisals index 022a2fb..9f1df2d 100644 --- a/Appraisals +++ b/Appraisals @@ -63,3 +63,9 @@ appraise "opentelemetry-latest" do gem "opentelemetry-sdk", ">= 1.10" gem "opentelemetry-exporter-otlp", ">= 0.31" end + +# Both OpenAI gems installed - tests that loaded? correctly distinguishes them +appraise "openai-ruby-openai" do + gem "openai", ">= 0.34" + gem "ruby-openai", ">= 8.0" +end diff --git a/README.md b/README.md index 5da0707..13c8f91 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,10 @@ Braintrust.init client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) -Braintrust::Trace::OpenAI.wrap(client) +# Instrument all clients +Braintrust.instrument!(:openai) +# OR instrument a single client +Braintrust.instrument!(:openai, target: client) tracer = OpenTelemetry.tracer_provider.tracer("openai-app") root_span = nil diff --git a/Rakefile b/Rakefile index f9e3885..8bd2b54 100644 --- a/Rakefile +++ b/Rakefile @@ -103,6 +103,13 @@ namespace :test do sh "bundle exec ruby -Ilib:test -e \"Dir.glob('test/braintrust/contrib{_test.rb,/**/*_test.rb}').each { |f| require_relative f }\"" end + namespace :contrib do + desc "Run OpenAI contrib tests" + task :openai do + sh "bundle exec appraisal openai ruby -Ilib:test -e \"Dir.glob('test/braintrust/contrib/openai/**/*_test.rb').each { |f| require_relative f }\"" + end + end + desc "Run tests with verbose timing output" task :verbose do ENV["MT_VERBOSE"] = "1" diff --git a/examples/openai.rb b/examples/contrib/openai/basic.rb similarity index 86% rename from examples/openai.rb rename to examples/contrib/openai/basic.rb index 246ff20..d32371b 100644 --- a/examples/openai.rb +++ b/examples/contrib/openai/basic.rb @@ -12,10 +12,10 @@ # # Note: The openai gem is a development dependency. To run this example: # 1. Install dependencies: bundle install -# 2. Run from the SDK root: bundle exec ruby examples/openai.rb +# 2. Run from the SDK root: bundle exec ruby examples/contrib/openai/basic.rb # # Usage: -# OPENAI_API_KEY=your-openai-key bundle exec ruby examples/openai.rb +# OPENAI_API_KEY=your-openai-key bundle exec ruby examples/contrib/openai/basic.rb # Check for API keys unless ENV["OPENAI_API_KEY"] @@ -24,21 +24,20 @@ exit 1 end +# Instrument OpenAI Braintrust.init(blocking_login: true) +Braintrust.instrument!(:openai) # Create OpenAI client client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) -# Wrap the client with Braintrust tracing -Braintrust::Trace::OpenAI.wrap(client) - # Create a root span to capture the entire operation tracer = OpenTelemetry.tracer_provider.tracer("openai-example") root_span = nil # Make a chat completion request (automatically traced!) puts "Sending chat completion request to OpenAI..." -response = tracer.in_span("examples/openai.rb") do |span| +response = tracer.in_span("examples/contrib/openai/basic.rb") do |span| root_span = span client.chat.completions.create( diff --git a/examples/contrib/openai/deprecated.rb b/examples/contrib/openai/deprecated.rb new file mode 100644 index 0000000..872242f --- /dev/null +++ b/examples/contrib/openai/deprecated.rb @@ -0,0 +1,72 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "openai" +require "opentelemetry/sdk" + +# Example: OpenAI chat completion with Braintrust tracing +# +# This example demonstrates how to automatically trace OpenAI API calls with Braintrust. +# +# Note: The openai gem is a development dependency. To run this example: +# 1. Install dependencies: bundle install +# 2. Run from the SDK root: bundle exec ruby examples/contrib/openai/deprecated.rb +# +# Usage: +# OPENAI_API_KEY=your-openai-key bundle exec ruby examples/contrib/openai/deprecated.rb + +# Check for API keys +unless ENV["OPENAI_API_KEY"] + puts "Error: OPENAI_API_KEY environment variable is required" + puts "Get your API key from: https://platform.openai.com/api-keys" + exit 1 +end + +Braintrust.init(blocking_login: true) + +# Create OpenAI client +client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) + +# Wrap the client with Braintrust tracing +# DEPRECATED: Use `Braintrust.instrument!(:openai, target: client)` instead +Braintrust::Trace::OpenAI.wrap(client) + +# Create a root span to capture the entire operation +tracer = OpenTelemetry.tracer_provider.tracer("openai-example") +root_span = nil + +# Make a chat completion request (automatically traced!) +puts "Sending chat completion request to OpenAI..." +response = tracer.in_span("examples/contrib/openai/deprecated.rb") do |span| + root_span = span + + client.chat.completions.create( + messages: [ + {role: "system", content: "You are a helpful assistant."}, + {role: "user", content: "Say hello and tell me a short joke."} + ], + model: "gpt-4o-mini", + max_tokens: 100 + ) +end + +# Print the response +puts "\n✓ Response received!" +puts "\nAssistant: #{response.choices[0].message.content}" + +# Print usage stats +puts "\nToken usage:" +puts " Prompt tokens: #{response.usage.prompt_tokens}" +puts " Completion tokens: #{response.usage.completion_tokens}" +puts " Total tokens: #{response.usage.total_tokens}" + +# Print permalink to view this trace in Braintrust +puts "\n✓ View this trace in Braintrust:" +puts " #{Braintrust::Trace.permalink(root_span)}" + +# Shutdown to flush spans to Braintrust +OpenTelemetry.tracer_provider.shutdown + +puts "\n✓ Trace sent to Braintrust!" diff --git a/examples/contrib/openai/instance.rb b/examples/contrib/openai/instance.rb new file mode 100644 index 0000000..6516557 --- /dev/null +++ b/examples/contrib/openai/instance.rb @@ -0,0 +1,57 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "openai" +require "opentelemetry/sdk" + +# Example: Instance-level OpenAI instrumentation +# +# This shows how to instrument a specific client instance rather than +# all OpenAI clients globally. +# +# Usage: +# OPENAI_API_KEY=your-key bundle exec ruby examples/contrib/openai/instance.rb + +unless ENV["OPENAI_API_KEY"] + puts "Error: OPENAI_API_KEY environment variable is required" + exit 1 +end + +Braintrust.init(blocking_login: true) + +# Create two client instances +client_traced = ::OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) +client_untraced = ::OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) + +# Only instrument one of them +Braintrust.instrument!(:openai, target: client_traced) + +tracer = OpenTelemetry.tracer_provider.tracer("openai-example") +root_span = nil + +tracer.in_span("examples/contrib/openai/instance.rb") do |span| + root_span = span + + puts "Calling traced client..." + response1 = client_traced.chat.completions.create( + messages: [{role: "user", content: "Say 'traced'"}], + model: "gpt-4o-mini", + max_tokens: 10 + ) + puts "Traced response: #{response1.choices[0].message.content}" + + puts "\nCalling untraced client..." + response2 = client_untraced.chat.completions.create( + messages: [{role: "user", content: "Say 'untraced'"}], + model: "gpt-4o-mini", + max_tokens: 10 + ) + puts "Untraced response: #{response2.choices[0].message.content}" +end + +puts "\nView trace: #{Braintrust::Trace.permalink(root_span)}" +puts "(Only the first client call should have an openai.chat span)" + +OpenTelemetry.tracer_provider.shutdown diff --git a/examples/contrib/openai/streaming.rb b/examples/contrib/openai/streaming.rb new file mode 100644 index 0000000..5c4dd89 --- /dev/null +++ b/examples/contrib/openai/streaming.rb @@ -0,0 +1,77 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "openai" +require "opentelemetry/sdk" + +# Example: OpenAI streaming with Braintrust tracing +# +# This example demonstrates how to trace streaming OpenAI API calls. +# Shows two streaming patterns: +# 1. Chat completions with stream_raw +# 2. Responses API with stream +# +# Usage: +# OPENAI_API_KEY=your-key bundle exec ruby examples/contrib/openai/streaming.rb + +unless ENV["OPENAI_API_KEY"] + puts "Error: OPENAI_API_KEY environment variable is required" + puts "Get your API key from: https://platform.openai.com/api-keys" + exit 1 +end + +Braintrust.init(blocking_login: true) +Braintrust.instrument!(:openai) + +client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) +tracer = OpenTelemetry.tracer_provider.tracer("openai-streaming-example") +root_span = nil + +tracer.in_span("examples/contrib/openai/streaming.rb") do |span| + root_span = span + + # Pattern 1: Chat completions streaming with stream_raw + # Returns an iterator that yields chunks + puts "=== Pattern 1: Chat completions with stream_raw ===" + print "Assistant: " + + stream = client.chat.completions.stream_raw( + model: "gpt-4o-mini", + max_tokens: 100, + messages: [{role: "user", content: "Count from 1 to 5, one number per line."}], + stream_options: {include_usage: true} + ) + + stream.each do |chunk| + if chunk.choices&.first&.delta&.content + print chunk.choices.first.delta.content + end + end + puts "\n" + + # Pattern 2: Responses API streaming + # Returns an iterator that yields events + puts "=== Pattern 2: Responses API with stream ===" + print "Assistant: " + + stream = client.responses.stream( + model: "gpt-4o-mini", + input: "Name 3 colors." + ) + + stream.each do |event| + if event.type == :"response.output_text.delta" + print event.delta + end + end + puts "\n" +end + +puts "\nView this trace in Braintrust:" +puts " #{Braintrust::Trace.permalink(root_span)}" + +OpenTelemetry.tracer_provider.shutdown + +puts "\nTrace sent to Braintrust!" diff --git a/examples/internal/openai.rb b/examples/internal/contrib/openai/golden.rb similarity index 98% rename from examples/internal/openai.rb rename to examples/internal/contrib/openai/golden.rb index 0c66b12..0e2ed98 100755 --- a/examples/internal/openai.rb +++ b/examples/internal/contrib/openai/golden.rb @@ -27,7 +27,7 @@ # This example validates that the Ruby SDK captures the same data as TypeScript/Go SDKs. # # Usage: -# OPENAI_API_KEY=key bundle exec ruby examples/internal/openai.rb +# OPENAI_API_KEY=key bundle exec ruby examples/internal/contrib/openai/golden.rb unless ENV["OPENAI_API_KEY"] puts "Error: OPENAI_API_KEY environment variable is required" @@ -42,16 +42,16 @@ # Get a tracer for this example tracer = OpenTelemetry.tracer_provider.tracer("openai-comprehensive-example") -# Create OpenAI client and wrap it +# Create OpenAI client and instrument it client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) -Braintrust::Trace::OpenAI.wrap(client) +Braintrust.instrument!(:openai) puts "OpenAI Comprehensive Features Example" puts "=" * 50 # Wrap all examples under a single parent trace root_span = nil -tracer.in_span("examples/internal/openai.rb") do |span| +tracer.in_span("examples/internal/contrib/openai/golden.rb") do |span| root_span = span # Example 1: Vision - Image Understanding puts "\n1. Vision (Image Understanding)" diff --git a/examples/internal/eval-parallel-benchmark.rb b/examples/internal/eval-parallel-benchmark.rb index ba10525..c6a1683 100644 --- a/examples/internal/eval-parallel-benchmark.rb +++ b/examples/internal/eval-parallel-benchmark.rb @@ -105,9 +105,9 @@ def self.test_cases # Initialize Braintrust Braintrust.init(blocking_login: true) -# Create OpenAI client and wrap for tracing +# Create OpenAI client and instrument for tracing client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) -Braintrust::Trace::OpenAI.wrap(client) +Braintrust.instrument!(:openai) # Task: Call OpenAI to classify fruit/vegetable task = ->(input) { diff --git a/examples/internal/kitchen-sink.rb b/examples/internal/kitchen-sink.rb index 34d44ea..45ecd93 100755 --- a/examples/internal/kitchen-sink.rb +++ b/examples/internal/kitchen-sink.rb @@ -29,8 +29,8 @@ # Create OpenAI client openai_client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) -# Wrap the client with Braintrust tracing -Braintrust::Trace::OpenAI.wrap(openai_client) +# Instrument OpenAI with Braintrust tracing +Braintrust.instrument!(:openai) puts "Kitchen Sink Eval Example" puts "=" * 60 diff --git a/gemfiles/openai_ruby_openai.gemfile b/gemfiles/openai_ruby_openai.gemfile new file mode 100644 index 0000000..a8db595 --- /dev/null +++ b/gemfiles/openai_ruby_openai.gemfile @@ -0,0 +1,10 @@ +# This file was generated by Appraisal + +source "https://rubygems.org" + +gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" +gem "openai", ">= 0.34" +gem "ruby-openai", ">= 8.0" + +gemspec path: "../" diff --git a/lib/braintrust/contrib.rb b/lib/braintrust/contrib.rb index f19c4fb..66746dc 100644 --- a/lib/braintrust/contrib.rb +++ b/lib/braintrust/contrib.rb @@ -99,4 +99,7 @@ def tracer_for(target, name: "braintrust") end # Load integration stubs (eager load minimal metadata). -# These will be added in subsequent milestones. +require_relative "contrib/openai/integration" + +# Register integrations +Braintrust::Contrib::OpenAI::Integration.register! diff --git a/lib/braintrust/contrib/openai/deprecated.rb b/lib/braintrust/contrib/openai/deprecated.rb new file mode 100644 index 0000000..ab361cf --- /dev/null +++ b/lib/braintrust/contrib/openai/deprecated.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +# Backward compatibility shim for the old OpenAI integration API. +# This file now just delegates to the new API. + +module Braintrust + module Trace + module OpenAI + # Wrap an OpenAI::Client to automatically create spans for chat completions and responses. + # This is the legacy API - delegates to the new contrib framework. + # + # @param client [OpenAI::Client] the OpenAI client to wrap + # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider (defaults to global) + # @return [OpenAI::Client] the wrapped client + def self.wrap(client, tracer_provider: nil) + Log.warn("Braintrust::Trace::OpenAI.wrap() is deprecated and will be removed in a future version: use Braintrust.instrument!() instead.") + Braintrust.instrument!(:openai, target: client, tracer_provider: tracer_provider) + client + end + end + end +end diff --git a/lib/braintrust/contrib/openai/instrumentation/chat.rb b/lib/braintrust/contrib/openai/instrumentation/chat.rb new file mode 100644 index 0000000..50ea6e2 --- /dev/null +++ b/lib/braintrust/contrib/openai/instrumentation/chat.rb @@ -0,0 +1,296 @@ +# frozen_string_literal: true + +require "opentelemetry/sdk" +require "json" + +require_relative "common" +require_relative "../../../internal/time" + +module Braintrust + module Contrib + module OpenAI + module Instrumentation + # Chat completions instrumentation for OpenAI. + # Wraps create(), stream(), and stream_raw() methods to create spans. + module Chat + module Completions + def self.included(base) + base.prepend(InstanceMethods) unless applied?(base) + end + + def self.applied?(base) + base.ancestors.include?(InstanceMethods) + end + + METADATA_FIELDS = %i[ + model frequency_penalty logit_bias logprobs max_tokens n + presence_penalty response_format seed service_tier stop + stream stream_options temperature top_p top_logprobs + tools tool_choice parallel_tool_calls user functions function_call + ].freeze + + module InstanceMethods + # Wrap create method for non-streaming completions + def create(**params) + client = instance_variable_get(:@client) + tracer = Braintrust::Contrib.tracer_for(client) + + tracer.in_span("Chat Completion") do |span| + metadata = build_metadata(params) + + set_input(span, params) + + response = nil + time_to_first_token = Braintrust::Internal::Time.measure do + response = super + end + + set_output(span, response) + set_metrics(span, response, time_to_first_token) + finalize_metadata(span, metadata, response) + + response + end + end + + # Wrap stream_raw for streaming chat completions (returns Internal::Stream) + # Stores context on stream object for span creation during consumption + def stream_raw(**params) + client = instance_variable_get(:@client) + tracer = Braintrust::Contrib.tracer_for(client) + metadata = build_metadata(params, stream: true) + + stream_obj = super + Braintrust::Contrib::Context.set!(stream_obj, + tracer: tracer, + params: params, + metadata: metadata, + completions_instance: self, + stream_type: :raw) + stream_obj + end + + # Wrap stream for streaming chat completions (returns ChatCompletionStream) + # Stores context on stream object for span creation during consumption + def stream(**params) + client = instance_variable_get(:@client) + tracer = Braintrust::Contrib.tracer_for(client) + metadata = build_metadata(params, stream: true) + + stream_obj = super + Braintrust::Contrib::Context.set!(stream_obj, + tracer: tracer, + params: params, + metadata: metadata, + completions_instance: self, + stream_type: :chat_completion) + stream_obj + end + + private + + def build_metadata(params, stream: false) + metadata = { + "provider" => "openai", + "endpoint" => "/v1/chat/completions" + } + metadata["stream"] = true if stream + Completions::METADATA_FIELDS.each do |field| + metadata[field.to_s] = params[field] if params.key?(field) + end + metadata + end + + def set_input(span, params) + return unless params[:messages] + + messages_array = params[:messages].map(&:to_h) + Common.set_json_attr(span, "braintrust.input_json", messages_array) + end + + def set_output(span, response) + return unless response.respond_to?(:choices) && response.choices&.any? + + choices_array = response.choices.map(&:to_h) + Common.set_json_attr(span, "braintrust.output_json", choices_array) + end + + def set_metrics(span, response, time_to_first_token) + metrics = {} + if response.respond_to?(:usage) && response.usage + metrics = Common.parse_usage_tokens(response.usage) + end + metrics["time_to_first_token"] = time_to_first_token + Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? + end + + def finalize_metadata(span, metadata, response) + metadata["id"] = response.id if response.respond_to?(:id) && response.id + metadata["created"] = response.created if response.respond_to?(:created) && response.created + metadata["model"] = response.model if response.respond_to?(:model) && response.model + metadata["system_fingerprint"] = response.system_fingerprint if response.respond_to?(:system_fingerprint) && response.system_fingerprint + metadata["service_tier"] = response.service_tier if response.respond_to?(:service_tier) && response.service_tier + Common.set_json_attr(span, "braintrust.metadata", metadata) + end + end + end + + # Instrumentation for ChatCompletionStream (returned by stream()) + # Uses current_completion_snapshot for accumulated output + module ChatCompletionStream + def self.included(base) + base.prepend(InstanceMethods) unless applied?(base) + end + + def self.applied?(base) + base.ancestors.include?(InstanceMethods) + end + + module InstanceMethods + def each(&block) + ctx = Braintrust::Contrib::Context.from(self) + return super unless ctx&.[](:tracer) && !ctx[:consumed] + + trace_consumption(ctx) { super(&block) } + end + + def text + ctx = Braintrust::Contrib::Context.from(self) + return super unless ctx&.[](:tracer) && !ctx[:consumed] + + original_enum = super + Enumerator.new do |y| + trace_consumption(ctx) do + original_enum.each { |t| y << t } + end + end + end + + private + + def trace_consumption(ctx) + ctx[:consumed] = true + + tracer = ctx[:tracer] + params = ctx[:params] + metadata = ctx[:metadata] + completions_instance = ctx[:completions_instance] + start_time = Braintrust::Internal::Time.measure + + tracer.in_span("Chat Completion") do |span| + completions_instance.send(:set_input, span, params) + Common.set_json_attr(span, "braintrust.metadata", metadata) + + yield + + finalize_stream_span(span, start_time, metadata, completions_instance) + end + end + + def finalize_stream_span(span, start_time, metadata, completions_instance) + time_to_first_token = Braintrust::Internal::Time.measure(start_time) + + begin + snapshot = current_completion_snapshot + return unless snapshot + + # Set output from accumulated choices + if snapshot.choices&.any? + choices_array = snapshot.choices.map(&:to_h) + Common.set_json_attr(span, "braintrust.output_json", choices_array) + end + + # Set metrics + metrics = {} + if snapshot.usage + metrics = Common.parse_usage_tokens(snapshot.usage) + end + metrics["time_to_first_token"] = time_to_first_token + Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? + + # Update metadata with response fields + metadata["id"] = snapshot.id if snapshot.respond_to?(:id) && snapshot.id + metadata["created"] = snapshot.created if snapshot.respond_to?(:created) && snapshot.created + metadata["model"] = snapshot.model if snapshot.respond_to?(:model) && snapshot.model + metadata["system_fingerprint"] = snapshot.system_fingerprint if snapshot.respond_to?(:system_fingerprint) && snapshot.system_fingerprint + metadata["service_tier"] = snapshot.service_tier if snapshot.respond_to?(:service_tier) && snapshot.service_tier + Common.set_json_attr(span, "braintrust.metadata", metadata) + rescue => e + Braintrust::Log.debug("Failed to get completion snapshot: #{e.message}") + end + end + end + end + + # Instrumentation for Internal::Stream (returned by stream_raw()) + # Aggregates chunks manually since Internal::Stream has no built-in accumulation + module InternalStream + def self.included(base) + base.prepend(InstanceMethods) unless applied?(base) + end + + def self.applied?(base) + base.ancestors.include?(InstanceMethods) + end + + module InstanceMethods + def each(&block) + ctx = Braintrust::Contrib::Context.from(self) + # Only trace if context present and is for chat completions (not other endpoints) + return super unless ctx&.[](:tracer) && !ctx[:consumed] && ctx[:stream_type] == :raw + + ctx[:consumed] = true + + tracer = ctx[:tracer] + params = ctx[:params] + metadata = ctx[:metadata] + completions_instance = ctx[:completions_instance] + aggregated_chunks = [] + start_time = Braintrust::Internal::Time.measure + time_to_first_token = nil + + tracer.in_span("Chat Completion") do |span| + completions_instance.send(:set_input, span, params) + Common.set_json_attr(span, "braintrust.metadata", metadata) + + super do |chunk| + time_to_first_token ||= Braintrust::Internal::Time.measure(start_time) + aggregated_chunks << chunk.to_h + block&.call(chunk) + end + + finalize_stream_span(span, aggregated_chunks, time_to_first_token, metadata) + end + end + + private + + def finalize_stream_span(span, aggregated_chunks, time_to_first_token, metadata) + return if aggregated_chunks.empty? + + aggregated_output = Common.aggregate_streaming_chunks(aggregated_chunks) + Common.set_json_attr(span, "braintrust.output_json", aggregated_output[:choices]) + + # Set metrics + metrics = {} + if aggregated_output[:usage] + metrics = Common.parse_usage_tokens(aggregated_output[:usage]) + end + metrics["time_to_first_token"] = time_to_first_token + Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? + + # Update metadata with response fields + metadata["id"] = aggregated_output[:id] if aggregated_output[:id] + metadata["created"] = aggregated_output[:created] if aggregated_output[:created] + metadata["model"] = aggregated_output[:model] if aggregated_output[:model] + metadata["system_fingerprint"] = aggregated_output[:system_fingerprint] if aggregated_output[:system_fingerprint] + metadata["service_tier"] = aggregated_output[:service_tier] if aggregated_output[:service_tier] + Common.set_json_attr(span, "braintrust.metadata", metadata) + end + end + end + end + end + end + end +end diff --git a/lib/braintrust/contrib/openai/instrumentation/common.rb b/lib/braintrust/contrib/openai/instrumentation/common.rb new file mode 100644 index 0000000..039353b --- /dev/null +++ b/lib/braintrust/contrib/openai/instrumentation/common.rb @@ -0,0 +1,164 @@ +# frozen_string_literal: true + +require "json" + +require_relative "../../../trace/tokens" + +module Braintrust + module Contrib + module OpenAI + module Instrumentation + # Chat completions instrumentation for OpenAI. + # Provides modules that can be prepended to OpenAI::Client to instrument chat.completions API. + module Common + # Helper to safely set a JSON attribute on a span + # Only sets the attribute if obj is present + # @param span [OpenTelemetry::Trace::Span] the span to set attribute on + # @param attr_name [String] the attribute name (e.g., "braintrust.output_json") + # @param obj [Object] the object to serialize to JSON + # @return [void] + def self.set_json_attr(span, attr_name, obj) + return unless obj + span.set_attribute(attr_name, JSON.generate(obj)) + end + + # Parse usage tokens from OpenAI API response + # @param usage [Hash, Object] usage object from OpenAI response + # @return [Hash] metrics hash with normalized names + def self.parse_usage_tokens(usage) + Braintrust::Trace.parse_openai_usage_tokens(usage) + end + + # Aggregate streaming chunks into a single response structure + # Follows the Go SDK logic for aggregating deltas + # @param chunks [Array] array of chunk hashes from stream + # @return [Hash] aggregated response with choices, usage, etc. + def self.aggregate_streaming_chunks(chunks) + return {} if chunks.empty? + + # Initialize aggregated structure + aggregated = { + id: nil, + created: nil, + model: nil, + system_fingerprint: nil, + choices: [], + usage: nil + } + + # Track aggregated content and tool_calls for each choice index + choice_data = {} + + chunks.each do |chunk| + # Capture top-level fields from any chunk that has them + aggregated[:id] ||= chunk[:id] + aggregated[:created] ||= chunk[:created] + aggregated[:model] ||= chunk[:model] + aggregated[:system_fingerprint] ||= chunk[:system_fingerprint] + + # Aggregate usage (usually only in last chunk if stream_options.include_usage is set) + if chunk[:usage] + aggregated[:usage] = chunk[:usage] + end + + # Process choices + next unless chunk[:choices].is_a?(Array) + chunk[:choices].each do |choice| + index = choice[:index] || 0 + choice_data[index] ||= { + index: index, + role: nil, + content: +"", + tool_calls: [], + finish_reason: nil + } + + delta = choice[:delta] || {} + + # Aggregate role (set once from first delta that has it) + choice_data[index][:role] ||= delta[:role] + + # Aggregate content + if delta[:content] + choice_data[index][:content] << delta[:content] + end + + # Aggregate tool_calls (similar to Go SDK logic) + if delta[:tool_calls].is_a?(Array) && delta[:tool_calls].any? + delta[:tool_calls].each do |tool_call_delta| + # Check if this is a new tool call or continuation + if tool_call_delta[:id] && !tool_call_delta[:id].empty? + # New tool call (dup strings to avoid mutating input) + choice_data[index][:tool_calls] << { + id: tool_call_delta[:id], + type: tool_call_delta[:type], + function: { + name: +(tool_call_delta.dig(:function, :name) || ""), + arguments: +(tool_call_delta.dig(:function, :arguments) || "") + } + } + elsif choice_data[index][:tool_calls].any? + # Continuation - append arguments to last tool call + last_tool_call = choice_data[index][:tool_calls].last + if tool_call_delta.dig(:function, :arguments) + last_tool_call[:function][:arguments] << tool_call_delta[:function][:arguments] + end + end + end + end + + # Capture finish_reason + if choice[:finish_reason] + choice_data[index][:finish_reason] = choice[:finish_reason] + end + end + end + + # Build final choices array + aggregated[:choices] = choice_data.values.sort_by { |c| c[:index] }.map do |choice| + message = { + role: choice[:role], + content: choice[:content].empty? ? nil : choice[:content] + } + + # Add tool_calls to message if any + message[:tool_calls] = choice[:tool_calls] if choice[:tool_calls].any? + + { + index: choice[:index], + message: message, + finish_reason: choice[:finish_reason] + } + end + + aggregated + end + + # Aggregate responses streaming events into a single response structure + # Follows similar logic to Python SDK's _postprocess_streaming_results + # @param events [Array] array of event objects from stream + # @return [Hash] aggregated response with output, usage, etc. + def self.aggregate_responses_events(events) + return {} if events.empty? + + # Find the response.completed event which has the final response + completed_event = events.find { |e| e.respond_to?(:type) && e.type == :"response.completed" } + + if completed_event&.respond_to?(:response) + response = completed_event.response + # Convert the response object to a hash-like structure for logging + return { + id: response.respond_to?(:id) ? response.id : nil, + output: response.respond_to?(:output) ? response.output : nil, + usage: response.respond_to?(:usage) ? response.usage : nil + } + end + + # Fallback if no completed event found + {} + end + end + end + end + end +end diff --git a/lib/braintrust/contrib/openai/instrumentation/responses.rb b/lib/braintrust/contrib/openai/instrumentation/responses.rb new file mode 100644 index 0000000..909b7f4 --- /dev/null +++ b/lib/braintrust/contrib/openai/instrumentation/responses.rb @@ -0,0 +1,185 @@ +# frozen_string_literal: true + +require "opentelemetry/sdk" +require "json" + +require_relative "common" +require_relative "../../../internal/time" + +module Braintrust + module Contrib + module OpenAI + module Instrumentation + # Responses API instrumentation for OpenAI. + # Wraps create() and stream() methods to create spans. + module Responses + def self.included(base) + base.prepend(InstanceMethods) unless applied?(base) + end + + def self.applied?(base) + base.ancestors.include?(InstanceMethods) + end + + METADATA_FIELDS = %i[ + model instructions modalities tools parallel_tool_calls + tool_choice temperature max_tokens top_p frequency_penalty + presence_penalty seed user metadata store response_format + reasoning previous_response_id truncation + ].freeze + + module InstanceMethods + # Wrap non-streaming create method + def create(**params) + client = instance_variable_get(:@client) + tracer = Braintrust::Contrib.tracer_for(client) + + tracer.in_span("openai.responses.create") do |span| + metadata = build_metadata(params) + + set_input(span, params) + + response = nil + time_to_first_token = Braintrust::Internal::Time.measure do + response = super + end + + set_output(span, response) + set_metrics(span, response, time_to_first_token) + finalize_metadata(span, metadata, response) + + response + end + end + + # Wrap streaming method + # Stores context on stream object for span creation during consumption + def stream(**params) + client = instance_variable_get(:@client) + tracer = Braintrust::Contrib.tracer_for(client) + metadata = build_metadata(params, stream: true) + + stream_obj = super + + Braintrust::Contrib::Context.set!(stream_obj, + tracer: tracer, + params: params, + metadata: metadata, + responses_instance: self) + stream_obj + end + + private + + def build_metadata(params, stream: false) + metadata = { + "provider" => "openai", + "endpoint" => "/v1/responses" + } + metadata["stream"] = true if stream + Responses::METADATA_FIELDS.each do |field| + metadata[field.to_s] = params[field] if params.key?(field) + end + metadata + end + + def set_input(span, params) + return unless params[:input] + + Common.set_json_attr(span, "braintrust.input_json", params[:input]) + end + + def set_output(span, response) + return unless response.respond_to?(:output) && response.output + + Common.set_json_attr(span, "braintrust.output_json", response.output) + end + + def set_metrics(span, response, time_to_first_token) + metrics = {} + if response.respond_to?(:usage) && response.usage + metrics = Common.parse_usage_tokens(response.usage) + end + metrics["time_to_first_token"] = time_to_first_token + Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? + end + + def finalize_metadata(span, metadata, response) + metadata["id"] = response.id if response.respond_to?(:id) && response.id + Common.set_json_attr(span, "braintrust.metadata", metadata) + end + end + end + + # Instrumentation for ResponseStream (returned by stream()) + # Aggregates events and creates span lazily when consumed + module ResponseStream + def self.included(base) + base.prepend(InstanceMethods) unless applied?(base) + end + + def self.applied?(base) + base.ancestors.include?(InstanceMethods) + end + + module InstanceMethods + def each(&block) + ctx = Braintrust::Contrib::Context.from(self) + return super unless ctx&.[](:tracer) && !ctx[:consumed] + + ctx[:consumed] = true + + tracer = ctx[:tracer] + params = ctx[:params] + metadata = ctx[:metadata] + responses_instance = ctx[:responses_instance] + aggregated_events = [] + start_time = Braintrust::Internal::Time.measure + time_to_first_token = nil + + tracer.in_span("openai.responses.create") do |span| + responses_instance.send(:set_input, span, params) + Common.set_json_attr(span, "braintrust.metadata", metadata) + + begin + super do |event| + time_to_first_token ||= Braintrust::Internal::Time.measure(start_time) + aggregated_events << event + block&.call(event) + end + rescue => e + span.record_exception(e) + span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}") + raise + end + + finalize_stream_span(span, aggregated_events, time_to_first_token, metadata) + end + end + + private + + def finalize_stream_span(span, aggregated_events, time_to_first_token, metadata) + return if aggregated_events.empty? + + aggregated_output = Common.aggregate_responses_events(aggregated_events) + Common.set_json_attr(span, "braintrust.output_json", aggregated_output[:output]) if aggregated_output[:output] + + # Set metrics + metrics = {} + if aggregated_output[:usage] + metrics = Common.parse_usage_tokens(aggregated_output[:usage]) + end + metrics["time_to_first_token"] = time_to_first_token + Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? + + # Update metadata with response fields + metadata["id"] = aggregated_output[:id] if aggregated_output[:id] + Common.set_json_attr(span, "braintrust.metadata", metadata) + end + end + end + end + end + end +end diff --git a/lib/braintrust/contrib/openai/integration.rb b/lib/braintrust/contrib/openai/integration.rb new file mode 100644 index 0000000..95de580 --- /dev/null +++ b/lib/braintrust/contrib/openai/integration.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +require_relative "../integration" +require_relative "deprecated" + +module Braintrust + module Contrib + module OpenAI + # OpenAI integration for automatic instrumentation. + # Instruments the official openai gem (not ruby-openai). + class Integration + include Braintrust::Contrib::Integration + + MINIMUM_VERSION = "0.1.0" + + GEM_NAMES = ["openai"].freeze + REQUIRE_PATHS = ["openai"].freeze + + # @return [Symbol] Unique identifier for this integration + def self.integration_name + :openai + end + + # @return [Array] Gem names this integration supports + def self.gem_names + GEM_NAMES + end + + # @return [Array] Require paths for auto-instrument detection + def self.require_paths + REQUIRE_PATHS + end + + # @return [String] Minimum compatible version + def self.minimum_version + MINIMUM_VERSION + end + + # @return [Boolean] true if official openai gem is available + def self.loaded? + # Check if the official openai gem is loaded (not ruby-openai). + # The ruby-openai gem also uses "require 'openai'", so we need to distinguish them. + + # This module is defined ONLY in the official OpenAI gem + defined?(::OpenAI::Internal) ? true : false + end + + # Lazy-load the patcher only when actually patching. + # This keeps the integration stub lightweight. + # @return [Class] The patcher class + def self.patchers + require_relative "patcher" + [ChatPatcher, ResponsesPatcher] + end + end + end + end +end diff --git a/lib/braintrust/contrib/openai/patcher.rb b/lib/braintrust/contrib/openai/patcher.rb new file mode 100644 index 0000000..529482f --- /dev/null +++ b/lib/braintrust/contrib/openai/patcher.rb @@ -0,0 +1,130 @@ +# frozen_string_literal: true + +require_relative "../patcher" +require_relative "instrumentation/chat" +require_relative "instrumentation/responses" + +module Braintrust + module Contrib + module OpenAI + # Patcher for OpenAI integration - implements class-level patching. + # All new OpenAI::Client instances created after patch! will be automatically instrumented. + class ChatPatcher < Braintrust::Contrib::Patcher + class << self + def applicable? + defined?(::OpenAI::Client) + end + + def patched?(**options) + # Use the target's singleton class if provided, otherwise check the base class. + target_class = get_singleton_class(options[:target]) || ::OpenAI::Resources::Chat::Completions + + Instrumentation::Chat::Completions.applied?(target_class) + end + + # Perform the actual patching. + # @param options [Hash] Configuration options passed from integration + # @option options [Object] :target Optional target instance to patch + # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider + # @return [void] + def perform_patch(**options) + return unless applicable? + + # Stream classes are shared across all clients, patch at class level. + # The instrumentation short-circuits when no context is present, + # so uninstrumented clients' streams pass through unaffected. + patch_stream_classes + + if options[:target] + # Instance-level (for only this client) + raise ArgumentError, "target must be a kind of ::OpenAI::Client" unless options[:target].is_a?(::OpenAI::Client) + + get_singleton_class(options[:target]).include(Instrumentation::Chat::Completions) + else + # Class-level (for all clients) + ::OpenAI::Resources::Chat::Completions.include(Instrumentation::Chat::Completions) + end + end + + def patch_stream_classes + # Patch ChatCompletionStream for stream() method + if defined?(::OpenAI::Helpers::Streaming::ChatCompletionStream) + unless Instrumentation::Chat::ChatCompletionStream.applied?(::OpenAI::Helpers::Streaming::ChatCompletionStream) + ::OpenAI::Helpers::Streaming::ChatCompletionStream.include(Instrumentation::Chat::ChatCompletionStream) + end + end + + # Patch Internal::Stream for stream_raw() method + if defined?(::OpenAI::Internal::Stream) + unless Instrumentation::Chat::InternalStream.applied?(::OpenAI::Internal::Stream) + ::OpenAI::Internal::Stream.include(Instrumentation::Chat::InternalStream) + end + end + end + + private + + def get_singleton_class(client) + client&.chat&.completions&.singleton_class + end + end + end + + # Patcher for OpenAI integration - implements class-level patching. + # All new OpenAI::Client instances created after patch! will be automatically instrumented. + class ResponsesPatcher < Braintrust::Contrib::Patcher + class << self + def applicable? + defined?(::OpenAI::Client) && ::OpenAI::Client.instance_methods.include?(:responses) + end + + def patched?(**options) + # Use the target's singleton class if provided, otherwise check the base class. + target_class = get_singleton_class(options[:target]) || ::OpenAI::Resources::Responses + + Instrumentation::Responses.applied?(target_class) + end + + # Perform the actual patching. + # @param options [Hash] Configuration options passed from integration + # @option options [Object] :target Optional target instance to patch + # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider + # @return [void] + def perform_patch(**options) + return unless applicable? + + # Stream class is shared across all clients, patch at class level. + # The instrumentation short-circuits when no context is present, + # so uninstrumented clients' streams pass through unaffected. + patch_response_stream + + if options[:target] + # Instance-level (for only this client) + raise ArgumentError, "target must be a kind of ::OpenAI::Client" unless options[:target].is_a?(::OpenAI::Client) + + get_singleton_class(options[:target]).include(Instrumentation::Responses) + else + # Class-level (for all clients) + ::OpenAI::Resources::Responses.include(Instrumentation::Responses) + end + end + + def patch_response_stream + # Patch ResponseStream for stream() method + if defined?(::OpenAI::Helpers::Streaming::ResponseStream) + unless Instrumentation::ResponseStream.applied?(::OpenAI::Helpers::Streaming::ResponseStream) + ::OpenAI::Helpers::Streaming::ResponseStream.include(Instrumentation::ResponseStream) + end + end + end + + private + + def get_singleton_class(client) + client&.responses&.singleton_class + end + end + end + end + end +end diff --git a/lib/braintrust/trace/contrib/openai.rb b/lib/braintrust/trace/contrib/openai.rb deleted file mode 100644 index 5eb0a29..0000000 --- a/lib/braintrust/trace/contrib/openai.rb +++ /dev/null @@ -1,611 +0,0 @@ -# frozen_string_literal: true - -require "opentelemetry/sdk" -require "json" -require_relative "../tokens" - -module Braintrust - module Trace - module OpenAI - # Helper to safely set a JSON attribute on a span - # Only sets the attribute if obj is present - # @param span [OpenTelemetry::Trace::Span] the span to set attribute on - # @param attr_name [String] the attribute name (e.g., "braintrust.output_json") - # @param obj [Object] the object to serialize to JSON - # @return [void] - def self.set_json_attr(span, attr_name, obj) - return unless obj - span.set_attribute(attr_name, JSON.generate(obj)) - end - - # Parse usage tokens from OpenAI API response - # @param usage [Hash, Object] usage object from OpenAI response - # @return [Hash] metrics hash with normalized names - def self.parse_usage_tokens(usage) - Braintrust::Trace.parse_openai_usage_tokens(usage) - end - - # Aggregate streaming chunks into a single response structure - # Follows the Go SDK logic for aggregating deltas - # @param chunks [Array] array of chunk hashes from stream - # @return [Hash] aggregated response with choices, usage, etc. - def self.aggregate_streaming_chunks(chunks) - return {} if chunks.empty? - - # Initialize aggregated structure - aggregated = { - id: nil, - created: nil, - model: nil, - system_fingerprint: nil, - choices: [], - usage: nil - } - - # Track aggregated content and tool_calls for each choice index - choice_data = {} - - chunks.each do |chunk| - # Capture top-level fields from any chunk that has them - aggregated[:id] ||= chunk[:id] - aggregated[:created] ||= chunk[:created] - aggregated[:model] ||= chunk[:model] - aggregated[:system_fingerprint] ||= chunk[:system_fingerprint] - - # Aggregate usage (usually only in last chunk if stream_options.include_usage is set) - if chunk[:usage] - aggregated[:usage] = chunk[:usage] - end - - # Process choices - next unless chunk[:choices].is_a?(Array) - chunk[:choices].each do |choice| - index = choice[:index] || 0 - choice_data[index] ||= { - index: index, - role: nil, - content: +"", - tool_calls: [], - finish_reason: nil - } - - delta = choice[:delta] || {} - - # Aggregate role (set once from first delta that has it) - choice_data[index][:role] ||= delta[:role] - - # Aggregate content - if delta[:content] - choice_data[index][:content] << delta[:content] - end - - # Aggregate tool_calls (similar to Go SDK logic) - if delta[:tool_calls].is_a?(Array) && delta[:tool_calls].any? - delta[:tool_calls].each do |tool_call_delta| - # Check if this is a new tool call or continuation - if tool_call_delta[:id] && !tool_call_delta[:id].empty? - # New tool call - choice_data[index][:tool_calls] << { - id: tool_call_delta[:id], - type: tool_call_delta[:type], - function: { - name: tool_call_delta.dig(:function, :name) || +"", - arguments: tool_call_delta.dig(:function, :arguments) || +"" - } - } - elsif choice_data[index][:tool_calls].any? - # Continuation - append arguments to last tool call - last_tool_call = choice_data[index][:tool_calls].last - if tool_call_delta.dig(:function, :arguments) - last_tool_call[:function][:arguments] << tool_call_delta[:function][:arguments] - end - end - end - end - - # Capture finish_reason - if choice[:finish_reason] - choice_data[index][:finish_reason] = choice[:finish_reason] - end - end - end - - # Build final choices array - aggregated[:choices] = choice_data.values.sort_by { |c| c[:index] }.map do |choice| - message = { - role: choice[:role], - content: choice[:content].empty? ? nil : choice[:content] - } - - # Add tool_calls to message if any - message[:tool_calls] = choice[:tool_calls] if choice[:tool_calls].any? - - { - index: choice[:index], - message: message, - finish_reason: choice[:finish_reason] - } - end - - aggregated - end - - # Wrap an OpenAI::Client to automatically create spans for chat completions and responses - # Supports both synchronous and streaming requests - # @param client [OpenAI::Client] the OpenAI client to wrap - # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider (defaults to global) - def self.wrap(client, tracer_provider: nil) - tracer_provider ||= ::OpenTelemetry.tracer_provider - - # Wrap chat completions - wrap_chat_completions(client, tracer_provider) - - # Wrap responses API if available - wrap_responses(client, tracer_provider) if client.respond_to?(:responses) - - client - end - - # Wrap chat completions API - # @param client [OpenAI::Client] the OpenAI client - # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider - def self.wrap_chat_completions(client, tracer_provider) - # Create a wrapper module that intercepts chat.completions.create - wrapper = Module.new do - define_method(:create) do |**params| - tracer = tracer_provider.tracer("braintrust") - - tracer.in_span("Chat Completion") do |span| - # Track start time for time_to_first_token - start_time = Time.now - - # Initialize metadata hash - metadata = { - "provider" => "openai", - "endpoint" => "/v1/chat/completions" - } - - # Capture request metadata fields - metadata_fields = %i[ - model frequency_penalty logit_bias logprobs max_tokens n - presence_penalty response_format seed service_tier stop - stream stream_options temperature top_p top_logprobs - tools tool_choice parallel_tool_calls user functions function_call - ] - - metadata_fields.each do |field| - metadata[field.to_s] = params[field] if params.key?(field) - end - - # Set input messages as JSON - # Pass through all message fields to preserve tool_calls, tool_call_id, name, etc. - if params[:messages] - messages_array = params[:messages].map(&:to_h) - span.set_attribute("braintrust.input_json", JSON.generate(messages_array)) - end - - # Call the original method - response = super(**params) - - # Calculate time to first token - time_to_first_token = Time.now - start_time - - # Set output (choices) as JSON - # Use to_h to get the raw structure with all fields (including tool_calls) - if response.respond_to?(:choices) && response.choices&.any? - choices_array = response.choices.map(&:to_h) - span.set_attribute("braintrust.output_json", JSON.generate(choices_array)) - end - - # Set metrics (token usage with advanced details) - metrics = {} - if response.respond_to?(:usage) && response.usage - metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(response.usage) - end - # Add time_to_first_token metric - metrics["time_to_first_token"] = time_to_first_token - span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty? - - # Add response metadata fields - metadata["id"] = response.id if response.respond_to?(:id) && response.id - metadata["created"] = response.created if response.respond_to?(:created) && response.created - metadata["system_fingerprint"] = response.system_fingerprint if response.respond_to?(:system_fingerprint) && response.system_fingerprint - metadata["service_tier"] = response.service_tier if response.respond_to?(:service_tier) && response.service_tier - - # Set metadata ONCE at the end with complete hash - span.set_attribute("braintrust.metadata", JSON.generate(metadata)) - - response - end - end - - # Wrap stream_raw for streaming chat completions - define_method(:stream_raw) do |**params| - tracer = tracer_provider.tracer("braintrust") - aggregated_chunks = [] - start_time = Time.now - time_to_first_token = nil - metadata = { - "provider" => "openai", - "endpoint" => "/v1/chat/completions" - } - - # Start span with proper context (will be child of current span if any) - span = tracer.start_span("Chat Completion") - - # Capture request metadata fields - metadata_fields = %i[ - model frequency_penalty logit_bias logprobs max_tokens n - presence_penalty response_format seed service_tier stop - stream stream_options temperature top_p top_logprobs - tools tool_choice parallel_tool_calls user functions function_call - ] - - metadata_fields.each do |field| - metadata[field.to_s] = params[field] if params.key?(field) - end - metadata["stream"] = true # Explicitly mark as streaming - - # Set input messages as JSON - if params[:messages] - messages_array = params[:messages].map(&:to_h) - span.set_attribute("braintrust.input_json", JSON.generate(messages_array)) - end - - # Set initial metadata - span.set_attribute("braintrust.metadata", JSON.generate(metadata)) - - # Call the original stream_raw method with error handling - begin - stream = super(**params) - rescue => e - # Record exception if stream creation fails - span.record_exception(e) - span.status = ::OpenTelemetry::Trace::Status.error("OpenAI API error: #{e.message}") - span.finish - raise - end - - # Wrap the stream to aggregate chunks - original_each = stream.method(:each) - stream.define_singleton_method(:each) do |&block| - original_each.call do |chunk| - # Capture time to first token on first chunk - time_to_first_token ||= Time.now - start_time - aggregated_chunks << chunk.to_h - block&.call(chunk) - end - rescue => e - # Record exception if streaming fails - span.record_exception(e) - span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}") - raise - ensure - # Always aggregate whatever chunks we collected and finish span - # This runs on normal completion, break, or exception - unless aggregated_chunks.empty? - aggregated_output = Braintrust::Trace::OpenAI.aggregate_streaming_chunks(aggregated_chunks) - Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.output_json", aggregated_output[:choices]) - - # Set metrics if usage is included (requires stream_options.include_usage) - metrics = {} - if aggregated_output[:usage] - metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(aggregated_output[:usage]) - end - # Add time_to_first_token metric - metrics["time_to_first_token"] = time_to_first_token || 0.0 - Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? - - # Update metadata with response fields - metadata["id"] = aggregated_output[:id] if aggregated_output[:id] - metadata["created"] = aggregated_output[:created] if aggregated_output[:created] - metadata["model"] = aggregated_output[:model] if aggregated_output[:model] - metadata["system_fingerprint"] = aggregated_output[:system_fingerprint] if aggregated_output[:system_fingerprint] - Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metadata", metadata) - end - - span.finish - end - - stream - end - - # Wrap stream for streaming chat completions (returns ChatCompletionStream with convenience methods) - define_method(:stream) do |**params| - tracer = tracer_provider.tracer("braintrust") - start_time = Time.now - time_to_first_token = nil - metadata = { - "provider" => "openai", - "endpoint" => "/v1/chat/completions" - } - - # Start span with proper context (will be child of current span if any) - span = tracer.start_span("Chat Completion") - - # Capture request metadata fields - metadata_fields = %i[ - model frequency_penalty logit_bias logprobs max_tokens n - presence_penalty response_format seed service_tier stop - stream stream_options temperature top_p top_logprobs - tools tool_choice parallel_tool_calls user functions function_call - ] - - metadata_fields.each do |field| - metadata[field.to_s] = params[field] if params.key?(field) - end - metadata["stream"] = true # Explicitly mark as streaming - - # Set input messages as JSON - if params[:messages] - messages_array = params[:messages].map(&:to_h) - span.set_attribute("braintrust.input_json", JSON.generate(messages_array)) - end - - # Set initial metadata - span.set_attribute("braintrust.metadata", JSON.generate(metadata)) - - # Call the original stream method with error handling - begin - stream = super(**params) - rescue => e - # Record exception if stream creation fails - span.record_exception(e) - span.status = ::OpenTelemetry::Trace::Status.error("OpenAI API error: #{e.message}") - span.finish - raise - end - - # Local helper for setting JSON attributes - set_json_attr = ->(attr_name, obj) { Braintrust::Trace::OpenAI.set_json_attr(span, attr_name, obj) } - - # Helper to extract metadata from SDK's internal snapshot - extract_stream_metadata = lambda do - # Access the SDK's internal accumulated completion snapshot - snapshot = stream.current_completion_snapshot - return unless snapshot - - # Set output from accumulated choices - if snapshot.choices&.any? - choices_array = snapshot.choices.map(&:to_h) - set_json_attr.call("braintrust.output_json", choices_array) - end - - # Set metrics if usage is available - metrics = {} - if snapshot.usage - metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(snapshot.usage) - end - # Add time_to_first_token metric - metrics["time_to_first_token"] = time_to_first_token || 0.0 - set_json_attr.call("braintrust.metrics", metrics) unless metrics.empty? - - # Update metadata with response fields - metadata["id"] = snapshot.id if snapshot.respond_to?(:id) && snapshot.id - metadata["created"] = snapshot.created if snapshot.respond_to?(:created) && snapshot.created - metadata["model"] = snapshot.model if snapshot.respond_to?(:model) && snapshot.model - metadata["system_fingerprint"] = snapshot.system_fingerprint if snapshot.respond_to?(:system_fingerprint) && snapshot.system_fingerprint - set_json_attr.call("braintrust.metadata", metadata) - end - - # Prevent double-finish of span - finish_braintrust_span = lambda do - return if stream.instance_variable_get(:@braintrust_span_finished) - stream.instance_variable_set(:@braintrust_span_finished, true) - extract_stream_metadata.call - span.finish - end - - # Wrap .each() method - this is the core consumption method - original_each = stream.method(:each) - stream.define_singleton_method(:each) do |&block| - original_each.call do |chunk| - # Capture time to first token on first chunk - time_to_first_token ||= Time.now - start_time - block&.call(chunk) - end - rescue => e - span.record_exception(e) - span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}") - raise - ensure - finish_braintrust_span.call - end - - # Wrap .text() method - returns enumerable for text deltas - original_text = stream.method(:text) - stream.define_singleton_method(:text) do - text_enum = original_text.call - # Wrap the returned enumerable's .each method - original_text_each = text_enum.method(:each) - text_enum.define_singleton_method(:each) do |&block| - original_text_each.call do |delta| - # Capture time to first token on first delta - time_to_first_token ||= Time.now - start_time - block&.call(delta) - end - rescue => e - span.record_exception(e) - span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}") - raise - ensure - finish_braintrust_span.call - end - text_enum - end - - stream - end - end - - # Prepend the wrapper to the completions resource - client.chat.completions.singleton_class.prepend(wrapper) - end - - # Wrap responses API - # @param client [OpenAI::Client] the OpenAI client - # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider - def self.wrap_responses(client, tracer_provider) - # Create a wrapper module that intercepts responses.create and responses.stream - wrapper = Module.new do - # Wrap non-streaming create method - define_method(:create) do |**params| - tracer = tracer_provider.tracer("braintrust") - - tracer.in_span("openai.responses.create") do |span| - # Initialize metadata hash - metadata = { - "provider" => "openai", - "endpoint" => "/v1/responses" - } - - # Capture request metadata fields - metadata_fields = %i[ - model instructions modalities tools parallel_tool_calls - tool_choice temperature max_tokens top_p frequency_penalty - presence_penalty seed user metadata store response_format - ] - - metadata_fields.each do |field| - metadata[field.to_s] = params[field] if params.key?(field) - end - - # Set input as JSON - if params[:input] - span.set_attribute("braintrust.input_json", JSON.generate(params[:input])) - end - - # Call the original method - response = super(**params) - - # Set output as JSON - if response.respond_to?(:output) && response.output - span.set_attribute("braintrust.output_json", JSON.generate(response.output)) - end - - # Set metrics (token usage) - if response.respond_to?(:usage) && response.usage - metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(response.usage) - span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty? - end - - # Add response metadata fields - metadata["id"] = response.id if response.respond_to?(:id) && response.id - - # Set metadata ONCE at the end with complete hash - span.set_attribute("braintrust.metadata", JSON.generate(metadata)) - - response - end - end - - # Wrap streaming method - define_method(:stream) do |**params| - tracer = tracer_provider.tracer("braintrust") - aggregated_events = [] - metadata = { - "provider" => "openai", - "endpoint" => "/v1/responses", - "stream" => true - } - - # Start span with proper context - span = tracer.start_span("openai.responses.create") - - # Capture request metadata fields - metadata_fields = %i[ - model instructions modalities tools parallel_tool_calls - tool_choice temperature max_tokens top_p frequency_penalty - presence_penalty seed user metadata store response_format - ] - - metadata_fields.each do |field| - metadata[field.to_s] = params[field] if params.key?(field) - end - - # Set input as JSON - if params[:input] - span.set_attribute("braintrust.input_json", JSON.generate(params[:input])) - end - - # Set initial metadata - span.set_attribute("braintrust.metadata", JSON.generate(metadata)) - - # Call the original stream method with error handling - begin - stream = super(**params) - rescue => e - # Record exception if stream creation fails - span.record_exception(e) - span.status = ::OpenTelemetry::Trace::Status.error("OpenAI API error: #{e.message}") - span.finish - raise - end - - # Wrap the stream to aggregate events - original_each = stream.method(:each) - stream.define_singleton_method(:each) do |&block| - original_each.call do |event| - # Store the actual event object (not converted to hash) - aggregated_events << event - block&.call(event) - end - rescue => e - # Record exception if streaming fails - span.record_exception(e) - span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}") - raise - ensure - # Always aggregate whatever events we collected and finish span - unless aggregated_events.empty? - aggregated_output = Braintrust::Trace::OpenAI.aggregate_responses_events(aggregated_events) - Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.output_json", aggregated_output[:output]) if aggregated_output[:output] - - # Set metrics if usage is included - if aggregated_output[:usage] - metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(aggregated_output[:usage]) - Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? - end - - # Update metadata with response fields - metadata["id"] = aggregated_output[:id] if aggregated_output[:id] - Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metadata", metadata) - end - - span.finish - end - - stream - end - end - - # Prepend the wrapper to the responses resource - client.responses.singleton_class.prepend(wrapper) - end - - # Aggregate responses streaming events into a single response structure - # Follows similar logic to Python SDK's _postprocess_streaming_results - # @param events [Array] array of event objects from stream - # @return [Hash] aggregated response with output, usage, etc. - def self.aggregate_responses_events(events) - return {} if events.empty? - - # Find the response.completed event which has the final response - completed_event = events.find { |e| e.respond_to?(:type) && e.type == :"response.completed" } - - if completed_event&.respond_to?(:response) - response = completed_event.response - # Convert the response object to a hash-like structure for logging - return { - id: response.respond_to?(:id) ? response.id : nil, - output: response.respond_to?(:output) ? response.output : nil, - usage: response.respond_to?(:usage) ? response.usage : nil - } - end - - # Fallback if no completed event found - {} - end - end - end -end diff --git a/test/braintrust/contrib/openai/deprecated_test.rb b/test/braintrust/contrib/openai/deprecated_test.rb new file mode 100644 index 0000000..32c0c74 --- /dev/null +++ b/test/braintrust/contrib/openai/deprecated_test.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +require "test_helper" +require "braintrust/contrib/openai/deprecated" + +class Braintrust::Contrib::OpenAI::DeprecatedTest < Minitest::Test + def test_wrap_delegates_to_instrument + mock_client = Object.new + mock_tracer = Object.new + + captured_args = nil + Braintrust.stub(:instrument!, ->(name, **opts) { captured_args = [name, opts] }) do + suppress_logs { Braintrust::Trace::OpenAI.wrap(mock_client, tracer_provider: mock_tracer) } + end + + assert_equal :openai, captured_args[0] + assert_same mock_client, captured_args[1][:target] + assert_same mock_tracer, captured_args[1][:tracer_provider] + end + + def test_wrap_logs_deprecation_warning + mock_client = Object.new + + warning_message = nil + Braintrust.stub(:instrument!, ->(*) {}) do + Braintrust::Log.stub(:warn, ->(msg) { warning_message = msg }) do + Braintrust::Trace::OpenAI.wrap(mock_client) + end + end + + assert_match(/deprecated/, warning_message) + assert_match(/Braintrust\.instrument!/, warning_message) + end + + def test_wrap_returns_client + mock_client = Object.new + + Braintrust.stub(:instrument!, ->(*) {}) do + result = suppress_logs { Braintrust::Trace::OpenAI.wrap(mock_client) } + assert_same mock_client, result + end + end +end diff --git a/test/braintrust/contrib/openai/instrumentation/chat_test.rb b/test/braintrust/contrib/openai/instrumentation/chat_test.rb new file mode 100644 index 0000000..3dfa0eb --- /dev/null +++ b/test/braintrust/contrib/openai/instrumentation/chat_test.rb @@ -0,0 +1,477 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "../integration_helper" +require "braintrust/contrib/openai/instrumentation/chat" + +class Braintrust::Contrib::OpenAI::Instrumentation::Chat::CompletionsTest < Minitest::Test + Completions = Braintrust::Contrib::OpenAI::Instrumentation::Chat::Completions + + # --- .included --- + + def test_included_prepends_instance_methods + base = Class.new + mock = Minitest::Mock.new + mock.expect(:include?, false, [Completions::InstanceMethods]) + mock.expect(:prepend, nil, [Completions::InstanceMethods]) + + base.define_singleton_method(:ancestors) { mock } + base.define_singleton_method(:prepend) { |mod| mock.prepend(mod) } + + Completions.included(base) + + mock.verify + end + + def test_included_skips_prepend_when_already_applied + base = Class.new do + include Braintrust::Contrib::OpenAI::Instrumentation::Chat::Completions + end + + # Should not raise or double-prepend + Completions.included(base) + + # InstanceMethods should appear only once in ancestors + count = base.ancestors.count { |a| a == Completions::InstanceMethods } + assert_equal 1, count + end + + # --- .applied? --- + + def test_applied_returns_false_when_not_included + base = Class.new + + refute Completions.applied?(base) + end + + def test_applied_returns_true_when_included + base = Class.new do + include Braintrust::Contrib::OpenAI::Instrumentation::Chat::Completions + end + + assert Completions.applied?(base) + end +end + +# E2E tests for Chat::Completions instrumentation +class Braintrust::Contrib::OpenAI::Instrumentation::Chat::CompletionsE2ETest < Minitest::Test + include Braintrust::Contrib::OpenAI::IntegrationHelper + + def setup + skip_unless_openai! + end + + # --- #create --- + + def test_create_creates_span_with_correct_attributes + VCR.use_cassette("openai/chat_completions") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + Braintrust::Contrib::OpenAI::Integration.patch! + + client = OpenAI::Client.new(api_key: get_openai_key) + response = client.chat.completions.create( + messages: [ + {role: "system", content: "You are a test assistant."}, + {role: "user", content: "Say 'test'"} + ], + model: "gpt-4o-mini", + max_tokens: 10, + temperature: 0.5 + ) + + refute_nil response + refute_nil response.choices[0].message.content + + span = rig.drain_one + + assert_equal "Chat Completion", span.name + + # Verify input + assert span.attributes.key?("braintrust.input_json") + input = JSON.parse(span.attributes["braintrust.input_json"]) + assert_equal 2, input.length + assert_equal "system", input[0]["role"] + assert_equal "user", input[1]["role"] + + # Verify output + assert span.attributes.key?("braintrust.output_json") + output = JSON.parse(span.attributes["braintrust.output_json"]) + assert_equal 1, output.length + assert_equal "assistant", output[0]["message"]["role"] + + # Verify metadata + assert span.attributes.key?("braintrust.metadata") + metadata = JSON.parse(span.attributes["braintrust.metadata"]) + assert_equal "openai", metadata["provider"] + assert_equal "/v1/chat/completions", metadata["endpoint"] + # Model is overwritten with response value (includes version suffix) + assert_match(/\Agpt-4o-mini/, metadata["model"]) + + # Verify metrics + assert span.attributes.key?("braintrust.metrics") + metrics = JSON.parse(span.attributes["braintrust.metrics"]) + assert metrics["prompt_tokens"] > 0 + assert metrics["completion_tokens"] > 0 + assert metrics.key?("time_to_first_token") + end + end + + def test_create_parses_advanced_token_metrics + VCR.use_cassette("openai/advanced_tokens") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + Braintrust::Contrib::OpenAI::Integration.patch! + + client = OpenAI::Client.new(api_key: get_openai_key) + response = client.chat.completions.create( + messages: [{role: "user", content: "test"}], + model: "gpt-4o-mini", + max_tokens: 10 + ) + + refute_nil response + + span = rig.drain_one + metrics = JSON.parse(span.attributes["braintrust.metrics"]) + + # Basic metrics should always be present + assert metrics["prompt_tokens"] > 0 + assert metrics["completion_tokens"] > 0 + assert metrics["tokens"] > 0 + + # If the response includes token_details, verify they're parsed with correct naming + # - prompt_tokens_details.cached_tokens → prompt_cached_tokens + # - completion_tokens_details.reasoning_tokens → completion_reasoning_tokens + if response.usage.respond_to?(:prompt_tokens_details) && response.usage.prompt_tokens_details + details = response.usage.prompt_tokens_details + if details.respond_to?(:cached_tokens) && details.cached_tokens + assert metrics.key?("prompt_cached_tokens") + assert_equal details.cached_tokens, metrics["prompt_cached_tokens"] + end + end + + if response.usage.respond_to?(:completion_tokens_details) && response.usage.completion_tokens_details + details = response.usage.completion_tokens_details + if details.respond_to?(:reasoning_tokens) && details.reasoning_tokens + assert metrics.key?("completion_reasoning_tokens") + assert_equal details.reasoning_tokens, metrics["completion_reasoning_tokens"] + end + end + end + end + + def test_create_handles_vision_messages + VCR.use_cassette("openai/vision") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + Braintrust::Contrib::OpenAI::Integration.patch! + + client = OpenAI::Client.new(api_key: get_openai_key) + response = client.chat.completions.create( + messages: [ + { + role: "user", + content: [ + {type: "text", text: "What color is this image?"}, + {type: "image_url", image_url: {url: "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/320px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"}} + ] + } + ], + model: "gpt-4o-mini", + max_tokens: 50 + ) + + refute_nil response + + span = rig.drain_one + input = JSON.parse(span.attributes["braintrust.input_json"]) + + assert_instance_of Array, input[0]["content"] + assert_equal "text", input[0]["content"][0]["type"] + assert_equal "image_url", input[0]["content"][1]["type"] + end + end + + def test_create_handles_tool_messages + VCR.use_cassette("openai/tool_messages") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + Braintrust::Contrib::OpenAI::Integration.patch! + + client = OpenAI::Client.new(api_key: get_openai_key) + + tools = [{ + type: "function", + function: { + name: "get_weather", + description: "Get the current weather", + parameters: {type: "object", properties: {location: {type: "string"}}, required: ["location"]} + } + }] + + first_response = client.chat.completions.create( + messages: [{role: "user", content: "What's the weather in Paris?"}], + model: "gpt-4o-mini", + tools: tools, + max_tokens: 100 + ) + + tool_call = first_response.choices[0].message.tool_calls&.first + skip "Model didn't call tool" unless tool_call + + client.chat.completions.create( + messages: [ + {role: "user", content: "What's the weather in Paris?"}, + {role: "assistant", content: nil, tool_calls: [{id: tool_call.id, type: "function", function: {name: tool_call.function.name, arguments: tool_call.function.arguments}}]}, + {role: "tool", tool_call_id: tool_call.id, content: "Sunny, 22°C"} + ], + model: "gpt-4o-mini", + tools: tools, + max_tokens: 100 + ) + + spans = rig.drain + span = spans[1] + input = JSON.parse(span.attributes["braintrust.input_json"]) + + assert_equal "tool", input[2]["role"] + assert_equal tool_call.id, input[2]["tool_call_id"] + end + end + + def test_create_records_exception_on_error + VCR.use_cassette("openai/create_error") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + client = OpenAI::Client.new(api_key: "invalid_key") + Braintrust.instrument!(:openai, target: client, tracer_provider: rig.tracer_provider) + + assert_raises do + client.chat.completions.create( + messages: [{role: "user", content: "test"}], + model: "gpt-4o-mini" + ) + end + + span = rig.drain_one + assert_equal OpenTelemetry::Trace::Status::ERROR, span.status.code + assert span.events.any? { |e| e.name == "exception" } + end + end + + # --- #stream_raw --- + + def test_stream_raw_aggregates_chunks + VCR.use_cassette("openai/streaming") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + Braintrust::Contrib::OpenAI::Integration.patch! + + client = OpenAI::Client.new(api_key: get_openai_key) + stream = client.chat.completions.stream_raw( + messages: [{role: "user", content: "Count from 1 to 3"}], + model: "gpt-4o-mini", + max_tokens: 50, + stream_options: {include_usage: true} + ) + + full_content = "" + stream.each do |chunk| + delta_content = chunk.choices[0]&.delta&.content + full_content += delta_content if delta_content + end + + refute_empty full_content + + # Single span created during consumption + span = rig.drain_one + + assert_equal "Chat Completion", span.name + + output = JSON.parse(span.attributes["braintrust.output_json"]) + assert output[0]["message"]["content"].length > 0 + + metadata = JSON.parse(span.attributes["braintrust.metadata"]) + assert_equal true, metadata["stream"] + + metrics = JSON.parse(span.attributes["braintrust.metrics"]) + assert metrics["prompt_tokens"] > 0 + assert metrics.key?("time_to_first_token") + end + end + + def test_stream_raw_handles_multiple_choices + VCR.use_cassette("openai/streaming_multiple_choices") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + Braintrust::Contrib::OpenAI::Integration.patch! + + client = OpenAI::Client.new(api_key: get_openai_key) + stream = client.chat.completions.stream_raw( + messages: [{role: "user", content: "Say either 'hello' or 'hi' in one word"}], + model: "gpt-4o-mini", + max_tokens: 10, + n: 2, + stream_options: {include_usage: true} + ) + + stream.each { |chunk| } # consume all chunks + + # Single span created during consumption + span = rig.drain_one + + assert_equal "Chat Completion", span.name + + output = JSON.parse(span.attributes["braintrust.output_json"]) + + assert_equal 2, output.length, "Expected 2 choices in output" + assert output[0]["message"]["content"].length > 0 + assert output[1]["message"]["content"].length > 0 + + metadata = JSON.parse(span.attributes["braintrust.metadata"]) + assert_equal 2, metadata["n"] + end + end + + def test_stream_raw_closes_span_on_partial_consumption + VCR.use_cassette("openai/partial_stream") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + Braintrust::Contrib::OpenAI::Integration.patch! + + client = OpenAI::Client.new(api_key: get_openai_key) + stream = client.chat.completions.stream_raw( + messages: [{role: "user", content: "Count from 1 to 10"}], + model: "gpt-4o-mini", + max_tokens: 50 + ) + + chunk_count = 0 + begin + stream.each do |chunk| + chunk_count += 1 + break if chunk_count >= 2 + end + rescue StopIteration + end + + # Single span created during consumption + span = rig.drain_one + + assert_equal "Chat Completion", span.name + assert span.attributes.key?("braintrust.input_json") + end + end + + def test_stream_raw_records_exception_on_error + VCR.use_cassette("openai/stream_error") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + client = OpenAI::Client.new(api_key: "invalid_key") + Braintrust.instrument!(:openai, target: client, tracer_provider: rig.tracer_provider) + + assert_raises do + stream = client.chat.completions.stream_raw( + messages: [{role: "user", content: "test"}], + model: "gpt-4o-mini" + ) + stream.each { |chunk| } + end + + # No span created when stream fails before consumption + spans = rig.drain + assert_empty spans, "No span should be created when stream fails before consumption" + end + end + + # --- #stream --- + + def test_stream_with_text_method + VCR.use_cassette("openai/streaming_text") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + Braintrust::Contrib::OpenAI::Integration.patch! + + client = OpenAI::Client.new(api_key: get_openai_key) + stream = client.chat.completions.stream( + messages: [{role: "user", content: "Count from 1 to 3"}], + model: "gpt-4o-mini", + max_tokens: 50, + stream_options: {include_usage: true} + ) + + full_text = "" + stream.text.each { |delta| full_text += delta } + + refute_empty full_text + + # Single span created during consumption + span = rig.drain_one + + assert_equal "Chat Completion", span.name + + output = JSON.parse(span.attributes["braintrust.output_json"]) + assert output[0]["message"]["content"].length > 0 + end + end + + def test_stream_with_get_final_completion + VCR.use_cassette("openai/streaming_get_final_completion") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + Braintrust::Contrib::OpenAI::Integration.patch! + + client = OpenAI::Client.new(api_key: get_openai_key) + stream = client.chat.completions.stream( + messages: [{role: "user", content: "Say hello"}], + model: "gpt-4o-mini", + max_tokens: 20, + stream_options: {include_usage: true} + ) + + completion = stream.get_final_completion + + refute_nil completion + refute_nil completion.choices[0].message.content + + # Single span created during consumption + span = rig.drain_one + + assert_equal "Chat Completion", span.name + + output = JSON.parse(span.attributes["braintrust.output_json"]) + assert output[0]["message"]["content"] + end + end + + def test_stream_with_get_output_text + VCR.use_cassette("openai/streaming_get_output_text") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + Braintrust::Contrib::OpenAI::Integration.patch! + + client = OpenAI::Client.new(api_key: get_openai_key) + stream = client.chat.completions.stream( + messages: [{role: "user", content: "Say hello"}], + model: "gpt-4o-mini", + max_tokens: 20, + stream_options: {include_usage: true} + ) + + output_text = stream.get_output_text + + refute_empty output_text + + # Single span created during consumption + span = rig.drain_one + + assert_equal "Chat Completion", span.name + + output = JSON.parse(span.attributes["braintrust.output_json"]) + assert output[0]["message"]["content"] + end + end +end diff --git a/test/braintrust/contrib/openai/instrumentation/common_test.rb b/test/braintrust/contrib/openai/instrumentation/common_test.rb new file mode 100644 index 0000000..a40bc3e --- /dev/null +++ b/test/braintrust/contrib/openai/instrumentation/common_test.rb @@ -0,0 +1,184 @@ +# frozen_string_literal: true + +require "test_helper" +require "braintrust/contrib/openai/instrumentation/common" + +class Braintrust::Contrib::OpenAI::Instrumentation::CommonTest < Minitest::Test + Common = Braintrust::Contrib::OpenAI::Instrumentation::Common + + # --- .set_json_attr --- + + def test_set_json_attr_sets_attribute_with_json + span = Minitest::Mock.new + span.expect(:set_attribute, nil, ["braintrust.output", '{"foo":"bar"}']) + + Common.set_json_attr(span, "braintrust.output", {foo: "bar"}) + + span.verify + end + + def test_set_json_attr_skips_nil_object + span = Minitest::Mock.new + # No expectations - set_attribute should not be called + + Common.set_json_attr(span, "braintrust.output", nil) + + span.verify + end + + def test_set_json_attr_handles_array + span = Minitest::Mock.new + span.expect(:set_attribute, nil, ["braintrust.input", "[1,2,3]"]) + + Common.set_json_attr(span, "braintrust.input", [1, 2, 3]) + + span.verify + end + + # --- .parse_usage_tokens --- + + def test_parse_usage_tokens_delegates_to_trace + usage = {prompt_tokens: 10, completion_tokens: 5} + expected_result = {"prompt_tokens" => 10, "completion_tokens" => 5, "tokens" => 15} + + mock = Minitest::Mock.new + mock.expect(:call, expected_result, [usage]) + + Braintrust::Trace.stub(:parse_openai_usage_tokens, mock) do + result = Common.parse_usage_tokens(usage) + assert_equal expected_result, result + end + + mock.verify + end + + # --- .aggregate_streaming_chunks --- + + def test_aggregate_streaming_chunks_returns_empty_hash_for_empty_array + result = Common.aggregate_streaming_chunks([]) + + assert_equal({}, result) + end + + def test_aggregate_streaming_chunks_captures_top_level_fields + chunks = [ + {id: "chatcmpl-123", created: 1234567890, model: "gpt-4", system_fingerprint: "fp_abc", choices: []} + ] + + result = Common.aggregate_streaming_chunks(chunks) + + assert_equal "chatcmpl-123", result[:id] + assert_equal 1234567890, result[:created] + assert_equal "gpt-4", result[:model] + assert_equal "fp_abc", result[:system_fingerprint] + end + + def test_aggregate_streaming_chunks_aggregates_content + chunks = [ + {choices: [{index: 0, delta: {role: "assistant", content: "Hello"}}]}, + {choices: [{index: 0, delta: {content: " world"}}]}, + {choices: [{index: 0, delta: {content: "!"}, finish_reason: "stop"}]} + ] + + result = Common.aggregate_streaming_chunks(chunks) + + assert_equal 1, result[:choices].length + assert_equal 0, result[:choices][0][:index] + assert_equal "assistant", result[:choices][0][:message][:role] + assert_equal "Hello world!", result[:choices][0][:message][:content] + assert_equal "stop", result[:choices][0][:finish_reason] + end + + def test_aggregate_streaming_chunks_aggregates_tool_calls + chunks = [ + {choices: [{index: 0, delta: {role: "assistant", tool_calls: [{id: "call_123", type: "function", function: {name: "get_weather", arguments: '{"loc'}}]}}]}, + {choices: [{index: 0, delta: {tool_calls: [{function: {arguments: 'ation":'}}]}}]}, + {choices: [{index: 0, delta: {tool_calls: [{function: {arguments: '"NYC"}'}}]}, finish_reason: "tool_calls"}]} + ] + + result = Common.aggregate_streaming_chunks(chunks) + + assert_equal 1, result[:choices].length + tool_calls = result[:choices][0][:message][:tool_calls] + assert_equal 1, tool_calls.length + assert_equal "call_123", tool_calls[0][:id] + assert_equal "function", tool_calls[0][:type] + assert_equal "get_weather", tool_calls[0][:function][:name] + assert_equal '{"location":"NYC"}', tool_calls[0][:function][:arguments] + end + + def test_aggregate_streaming_chunks_captures_usage + chunks = [ + {choices: [{index: 0, delta: {content: "Hi"}}]}, + {choices: [{index: 0, delta: {}, finish_reason: "stop"}], usage: {prompt_tokens: 10, completion_tokens: 2, total_tokens: 12}} + ] + + result = Common.aggregate_streaming_chunks(chunks) + + assert_equal({prompt_tokens: 10, completion_tokens: 2, total_tokens: 12}, result[:usage]) + end + + def test_aggregate_streaming_chunks_handles_multiple_choices + chunks = [ + {choices: [{index: 0, delta: {role: "assistant", content: "A"}}, {index: 1, delta: {role: "assistant", content: "B"}}]}, + {choices: [{index: 0, delta: {content: "1"}}, {index: 1, delta: {content: "2"}}]} + ] + + result = Common.aggregate_streaming_chunks(chunks) + + assert_equal 2, result[:choices].length + assert_equal "A1", result[:choices][0][:message][:content] + assert_equal "B2", result[:choices][1][:message][:content] + end + + def test_aggregate_streaming_chunks_sets_nil_content_when_empty + chunks = [ + {choices: [{index: 0, delta: {role: "assistant", tool_calls: [{id: "call_1", type: "function", function: {name: "foo", arguments: "{}"}}]}, finish_reason: "tool_calls"}]} + ] + + result = Common.aggregate_streaming_chunks(chunks) + + assert_nil result[:choices][0][:message][:content] + end + + # --- .aggregate_responses_events --- + + def test_aggregate_responses_events_returns_empty_hash_for_empty_array + result = Common.aggregate_responses_events([]) + + assert_equal({}, result) + end + + def test_aggregate_responses_events_extracts_from_completed_event + response = Struct.new(:id, :output, :usage).new("resp_123", [{type: "message", content: "Hello"}], {input_tokens: 5, output_tokens: 3}) + completed_event = Struct.new(:type, :response).new(:"response.completed", response) + events = [completed_event] + + result = Common.aggregate_responses_events(events) + + assert_equal "resp_123", result[:id] + assert_equal [{type: "message", content: "Hello"}], result[:output] + assert_equal({input_tokens: 5, output_tokens: 3}, result[:usage]) + end + + def test_aggregate_responses_events_ignores_non_completed_events + other_event = Struct.new(:type).new(:"response.created") + events = [other_event] + + result = Common.aggregate_responses_events(events) + + assert_equal({}, result) + end + + def test_aggregate_responses_events_handles_missing_response_fields + response = Struct.new(:id, :output, :usage).new(nil, nil, nil) + completed_event = Struct.new(:type, :response).new(:"response.completed", response) + events = [completed_event] + + result = Common.aggregate_responses_events(events) + + assert_nil result[:id] + assert_nil result[:output] + assert_nil result[:usage] + end +end diff --git a/test/braintrust/contrib/openai/instrumentation/responses_test.rb b/test/braintrust/contrib/openai/instrumentation/responses_test.rb new file mode 100644 index 0000000..26a3694 --- /dev/null +++ b/test/braintrust/contrib/openai/instrumentation/responses_test.rb @@ -0,0 +1,323 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "../integration_helper" +require "braintrust/contrib/openai/instrumentation/responses" + +class Braintrust::Contrib::OpenAI::Instrumentation::ResponsesTest < Minitest::Test + Responses = Braintrust::Contrib::OpenAI::Instrumentation::Responses + + # --- .included --- + + def test_included_prepends_instance_methods + base = Class.new + mock = Minitest::Mock.new + mock.expect(:include?, false, [Responses::InstanceMethods]) + mock.expect(:prepend, nil, [Responses::InstanceMethods]) + + base.define_singleton_method(:ancestors) { mock } + base.define_singleton_method(:prepend) { |mod| mock.prepend(mod) } + + Responses.included(base) + + mock.verify + end + + def test_included_skips_prepend_when_already_applied + base = Class.new do + include Braintrust::Contrib::OpenAI::Instrumentation::Responses + end + + # Should not raise or double-prepend + Responses.included(base) + + # InstanceMethods should appear only once in ancestors + count = base.ancestors.count { |a| a == Responses::InstanceMethods } + assert_equal 1, count + end + + # --- .applied? --- + + def test_applied_returns_false_when_not_included + base = Class.new + + refute Responses.applied?(base) + end + + def test_applied_returns_true_when_included + base = Class.new do + include Braintrust::Contrib::OpenAI::Instrumentation::Responses + end + + assert Responses.applied?(base) + end +end + +# E2E tests for Responses instrumentation +class Braintrust::Contrib::OpenAI::Instrumentation::ResponsesE2ETest < Minitest::Test + include Braintrust::Contrib::OpenAI::IntegrationHelper + + def setup + skip_unless_openai! + skip "Responses API not available" unless OpenAI::Client.instance_methods.include?(:responses) + end + + # --- #create --- + + def test_create_creates_span_with_correct_attributes + VCR.use_cassette("openai_responses_create_non_streaming") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + Braintrust::Contrib::OpenAI::Integration.patch! + + client = OpenAI::Client.new(api_key: get_openai_key) + response = client.responses.create( + model: "gpt-4o-mini", + instructions: "You are a helpful assistant.", + input: "What is 2+2?" + ) + + refute_nil response + refute_nil response.output + + span = rig.drain_one + + assert_equal "openai.responses.create", span.name + + # Verify input + assert span.attributes.key?("braintrust.input_json") + input = JSON.parse(span.attributes["braintrust.input_json"]) + assert_equal "What is 2+2?", input + + # Verify output + assert span.attributes.key?("braintrust.output_json") + + # Verify metadata + assert span.attributes.key?("braintrust.metadata") + metadata = JSON.parse(span.attributes["braintrust.metadata"]) + assert_equal "openai", metadata["provider"] + assert_equal "/v1/responses", metadata["endpoint"] + assert_equal "gpt-4o-mini", metadata["model"] + assert_equal "You are a helpful assistant.", metadata["instructions"] + + # Verify metrics include time_to_first_token + assert span.attributes.key?("braintrust.metrics") + metrics = JSON.parse(span.attributes["braintrust.metrics"]) + assert metrics.key?("time_to_first_token") + assert metrics["time_to_first_token"] >= 0 + end + end + + def test_create_captures_metadata_field + VCR.use_cassette("openai/responses_with_metadata") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + Braintrust::Contrib::OpenAI::Integration.patch! + + client = OpenAI::Client.new(api_key: get_openai_key) + response = client.responses.create( + model: "gpt-4o-mini", + input: "Say hello", + metadata: {user_id: "test-123", session: "abc"} + ) + + refute_nil response + + span = rig.drain_one + span_metadata = JSON.parse(span.attributes["braintrust.metadata"]) + + assert_equal({"user_id" => "test-123", "session" => "abc"}, span_metadata["metadata"]) + end + end + + def test_create_captures_truncation_field + VCR.use_cassette("openai/responses_with_truncation") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + Braintrust::Contrib::OpenAI::Integration.patch! + + client = OpenAI::Client.new(api_key: get_openai_key) + response = client.responses.create( + model: "gpt-4o-mini", + input: "Say hello", + truncation: "auto" + ) + + refute_nil response + + span = rig.drain_one + metadata = JSON.parse(span.attributes["braintrust.metadata"]) + + assert_equal "auto", metadata["truncation"] + end + end + + def test_create_captures_reasoning_field + VCR.use_cassette("openai/responses_with_reasoning") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + Braintrust::Contrib::OpenAI::Integration.patch! + + client = OpenAI::Client.new(api_key: get_openai_key) + response = client.responses.create( + model: "o4-mini", + input: "What is 2+2?", + reasoning: {effort: "low"} + ) + + refute_nil response + + span = rig.drain_one + metadata = JSON.parse(span.attributes["braintrust.metadata"]) + + assert_equal({"effort" => "low"}, metadata["reasoning"]) + end + end + + def test_create_captures_previous_response_id_field + VCR.use_cassette("openai/responses_with_previous_response_id") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + Braintrust::Contrib::OpenAI::Integration.patch! + + client = OpenAI::Client.new(api_key: get_openai_key) + + # First request to get a response ID + first_response = client.responses.create( + model: "gpt-4o-mini", + input: "Remember the number 42" + ) + + # Second request referencing the first + response = client.responses.create( + model: "gpt-4o-mini", + input: "What number did I ask you to remember?", + previous_response_id: first_response.id + ) + + refute_nil response + + spans = rig.drain + assert_equal 2, spans.length + + # Check the second span has previous_response_id in metadata + metadata = JSON.parse(spans[1].attributes["braintrust.metadata"]) + assert_equal first_response.id, metadata["previous_response_id"] + end + end + + def test_create_records_exception_on_error + VCR.use_cassette("openai/responses_create_error") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + client = OpenAI::Client.new(api_key: "invalid_key") + Braintrust.instrument!(:openai, target: client, tracer_provider: rig.tracer_provider) + + assert_raises do + client.responses.create( + model: "gpt-4o-mini", + input: "test" + ) + end + + span = rig.drain_one + assert_equal OpenTelemetry::Trace::Status::ERROR, span.status.code + assert span.events.any? { |e| e.name == "exception" } + end + end + + # --- #stream --- + + def test_stream_aggregates_events + VCR.use_cassette("openai_responses_create_streaming") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + Braintrust::Contrib::OpenAI::Integration.patch! + + client = OpenAI::Client.new(api_key: get_openai_key) + stream = client.responses.stream( + model: "gpt-4o-mini", + input: "Count from 1 to 3" + ) + + event_count = 0 + stream.each { |event| event_count += 1 } + + assert event_count > 0 + + # Single span created during consumption + span = rig.drain_one + + assert_equal "openai.responses.create", span.name + + # Span has input + input = JSON.parse(span.attributes["braintrust.input_json"]) + assert_equal "Count from 1 to 3", input + + # Span has output + assert span.attributes.key?("braintrust.output_json") + + # Verify metadata + metadata = JSON.parse(span.attributes["braintrust.metadata"]) + assert_equal true, metadata["stream"] + + # Verify metrics include time_to_first_token + assert span.attributes.key?("braintrust.metrics") + metrics = JSON.parse(span.attributes["braintrust.metrics"]) + assert metrics.key?("time_to_first_token") + assert metrics["time_to_first_token"] >= 0 + end + end + + def test_stream_closes_span_on_partial_consumption + VCR.use_cassette("openai_responses_stream_partial") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + Braintrust::Contrib::OpenAI::Integration.patch! + + client = OpenAI::Client.new(api_key: get_openai_key) + stream = client.responses.stream( + model: "gpt-4o-mini", + input: "Count from 1 to 10" + ) + + event_count = 0 + begin + stream.each do |event| + event_count += 1 + break if event_count >= 3 + end + rescue StopIteration + end + + # Single span created during consumption + span = rig.drain_one + + assert_equal "openai.responses.create", span.name + assert span.attributes.key?("braintrust.input_json") + end + end + + def test_stream_records_exception_on_error + VCR.use_cassette("openai/responses_stream_error") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + client = OpenAI::Client.new(api_key: "invalid_key") + Braintrust.instrument!(:openai, target: client, tracer_provider: rig.tracer_provider) + + assert_raises do + stream = client.responses.stream( + model: "gpt-4o-mini", + input: "test" + ) + stream.each { |event| } + end + + # No span created when stream fails before consumption + spans = rig.drain + assert_empty spans, "No span should be created when stream fails before consumption" + end + end +end diff --git a/test/braintrust/contrib/openai/instrumentation_test.rb b/test/braintrust/contrib/openai/instrumentation_test.rb new file mode 100644 index 0000000..48b9b53 --- /dev/null +++ b/test/braintrust/contrib/openai/instrumentation_test.rb @@ -0,0 +1,161 @@ +# frozen_string_literal: true + +require "test_helper" + +# Explicitly load the patcher (lazy-loaded by integration) +require "braintrust/contrib/openai/patcher" + +# Cross-cutting tests that verify chat and responses instrumentation work together +class Braintrust::Contrib::OpenAI::InstrumentationTest < Minitest::Test + def setup + if Gem.loaded_specs["ruby-openai"] + skip "openai gem not available (found ruby-openai gem instead)" + elsif !Gem.loaded_specs["openai"] + skip "openai gem not available" + end + + require "openai" unless defined?(OpenAI) + end + + # --- Instance-level instrumentation --- + + def test_instance_instrumentation_only_patches_target_client + # Skip if class-level patching already occurred from another test + skip "class already patched by another test" if Braintrust::Contrib::OpenAI::ChatPatcher.patched? + + rig = setup_otel_test_rig + + # Create two clients BEFORE any patching + client_traced = OpenAI::Client.new(api_key: "test-key") + client_untraced = OpenAI::Client.new(api_key: "test-key") + + # Verify neither client is patched initially + refute Braintrust::Contrib::OpenAI::ChatPatcher.patched?(target: client_traced), + "client_traced should not be patched initially" + refute Braintrust::Contrib::OpenAI::ChatPatcher.patched?(target: client_untraced), + "client_untraced should not be patched initially" + + # Verify class is not patched + refute Braintrust::Contrib::OpenAI::ChatPatcher.patched?, + "class should not be patched initially" + + # Instrument only one client (instance-level) + Braintrust.instrument!(:openai, target: client_traced, tracer_provider: rig.tracer_provider) + + # Only the traced client should be patched + assert Braintrust::Contrib::OpenAI::ChatPatcher.patched?(target: client_traced), + "client_traced should be patched after instrument!" + refute Braintrust::Contrib::OpenAI::ChatPatcher.patched?(target: client_untraced), + "client_untraced should NOT be patched after instrument!" + + # Class itself should NOT be patched (only the instance) + refute Braintrust::Contrib::OpenAI::ChatPatcher.patched?, + "class should NOT be patched when using instance-level instrumentation" + end + + # --- Chat and Responses cross-cutting tests --- + + def test_chat_and_responses_do_not_interfere + skip "Responses API not available" unless OpenAI::Client.instance_methods.include?(:responses) + VCR.use_cassette("openai_chat_and_responses_no_interference") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + client = OpenAI::Client.new(api_key: get_openai_key) + Braintrust.instrument!(:openai, target: client, tracer_provider: rig.tracer_provider) + + # Make a chat completion request + chat_response = client.chat.completions.create( + messages: [{role: "user", content: "Say hello"}], + model: "gpt-4o-mini", + max_tokens: 10 + ) + refute_nil chat_response + + # Make a responses API request + responses_response = client.responses.create( + model: "gpt-4o-mini", + instructions: "You are a helpful assistant.", + input: "Say goodbye" + ) + refute_nil responses_response + refute_nil responses_response.output + + # Verify both spans are correct + spans = rig.drain + assert_equal 2, spans.length + + chat_span = spans[0] + assert_equal "Chat Completion", chat_span.name + chat_metadata = JSON.parse(chat_span.attributes["braintrust.metadata"]) + assert_equal "/v1/chat/completions", chat_metadata["endpoint"] + + chat_input = JSON.parse(chat_span.attributes["braintrust.input_json"]) + assert_instance_of Array, chat_input + assert_equal "user", chat_input[0]["role"] + + responses_span = spans[1] + assert_equal "openai.responses.create", responses_span.name + responses_metadata = JSON.parse(responses_span.attributes["braintrust.metadata"]) + assert_equal "/v1/responses", responses_metadata["endpoint"] + + responses_input = JSON.parse(responses_span.attributes["braintrust.input_json"]) + assert_equal "Say goodbye", responses_input + end + end + + def test_streaming_chat_and_responses_do_not_interfere + VCR.use_cassette("openai_streaming_chat_and_responses_no_interference") do + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + client = OpenAI::Client.new(api_key: get_openai_key) + Braintrust.instrument!(:openai, target: client, tracer_provider: rig.tracer_provider) + + # Make a streaming chat completion request + chat_content = "" + stream = client.chat.completions.stream_raw( + messages: [{role: "user", content: "Count from 1 to 3"}], + model: "gpt-4o-mini", + max_tokens: 50, + stream_options: {include_usage: true} + ) + stream.each do |chunk| + delta_content = chunk.choices[0]&.delta&.content + chat_content += delta_content if delta_content + end + refute_empty chat_content + + # Make a streaming responses API request + responses_event_count = 0 + responses_stream = client.responses.stream( + model: "gpt-4o-mini", + instructions: "You are a helpful assistant.", + input: "Say hello" + ) + responses_stream.each { |event| responses_event_count += 1 } + assert responses_event_count > 0 + + # Verify spans are correct (1 per streaming operation: created during consumption) + spans = rig.drain + assert_equal 2, spans.length + + # Chat streaming: single span created during consumption + chat_span = spans.find { |s| s.name == "Chat Completion" } + assert chat_span, "Expected chat span" + + chat_metadata = JSON.parse(chat_span.attributes["braintrust.metadata"]) + assert_equal true, chat_metadata["stream"] + + chat_output = JSON.parse(chat_span.attributes["braintrust.output_json"]) + assert chat_output[0]["message"]["content"].length > 0 + + # Responses streaming: single span created during consumption + responses_span = spans.find { |s| s.name == "openai.responses.create" } + assert responses_span, "Expected responses span" + + responses_metadata = JSON.parse(responses_span.attributes["braintrust.metadata"]) + assert_equal true, responses_metadata["stream"] + end + end +end diff --git a/test/braintrust/contrib/openai/integration_helper.rb b/test/braintrust/contrib/openai/integration_helper.rb new file mode 100644 index 0000000..0b4075d --- /dev/null +++ b/test/braintrust/contrib/openai/integration_helper.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +# Test helpers for OpenAI integration tests. +# Provides gem loading helpers that handle the official openai gem vs ruby-openai disambiguation. + +module Braintrust + module Contrib + module OpenAI + module IntegrationHelper + # Skip test unless official openai gem is available (not ruby-openai). + # Loads the gem if available. + def skip_unless_openai! + if Gem.loaded_specs["ruby-openai"] + skip "openai gem not available (found ruby-openai gem instead)" + end + + unless Gem.loaded_specs["openai"] + skip "openai gem not available" + end + + require "openai" unless defined?(::OpenAI) + end + + # Load official openai gem if available (doesn't skip, for tests that handle both states). + def load_openai_if_available + if Gem.loaded_specs["openai"] && !Gem.loaded_specs["ruby-openai"] + require "openai" unless defined?(::OpenAI) + end + end + end + end + end +end diff --git a/test/braintrust/contrib/openai/integration_test.rb b/test/braintrust/contrib/openai/integration_test.rb new file mode 100644 index 0000000..d13945d --- /dev/null +++ b/test/braintrust/contrib/openai/integration_test.rb @@ -0,0 +1,64 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "integration_helper" + +class Braintrust::Contrib::OpenAI::IntegrationTest < Minitest::Test + include Braintrust::Contrib::OpenAI::IntegrationHelper + + def setup + load_openai_if_available + @integration = Braintrust::Contrib::OpenAI::Integration + end + + # --- .integration_name --- + + def test_integration_name + assert_equal :openai, @integration.integration_name + end + + # --- .gem_names --- + + def test_gem_names + assert_equal ["openai"], @integration.gem_names + end + + # --- .require_paths --- + + def test_require_paths + assert_equal ["openai"], @integration.require_paths + end + + # --- .minimum_version --- + + def test_minimum_version + assert_equal "0.1.0", @integration.minimum_version + end + + # --- .loaded? --- + + def test_loaded_returns_true_when_openai_internal_defined + skip "OpenAI gem not loaded" unless defined?(::OpenAI::Internal) + + assert @integration.loaded?, "Should be loaded when ::OpenAI::Internal is defined" + end + + def test_loaded_returns_false_when_openai_internal_not_defined + skip "OpenAI gem is loaded" if defined?(::OpenAI::Internal) + + refute @integration.loaded?, "Should not be loaded when ::OpenAI::Internal is not defined" + end + + # --- .patchers --- + + def test_patchers_returns_array_of_patcher_classes + patcher_classes = @integration.patchers + + assert_instance_of Array, patcher_classes + assert patcher_classes.length > 0, "patchers should return at least one patcher" + patcher_classes.each do |patcher_class| + assert patcher_class.is_a?(Class), "each patcher should be a Class" + assert patcher_class < Braintrust::Contrib::Patcher, "each patcher should inherit from Patcher" + end + end +end diff --git a/test/braintrust/contrib/openai/patcher_test.rb b/test/braintrust/contrib/openai/patcher_test.rb new file mode 100644 index 0000000..4b0f7af --- /dev/null +++ b/test/braintrust/contrib/openai/patcher_test.rb @@ -0,0 +1,180 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "integration_helper" + +# Explicitly load the patcher (lazy-loaded by integration) +require "braintrust/contrib/openai/patcher" + +class Braintrust::Contrib::OpenAI::ChatPatcherTest < Minitest::Test + include Braintrust::Contrib::OpenAI::IntegrationHelper + + def setup + skip_unless_openai! + end + + # --- .applicable? --- + + def test_applicable_returns_true_when_openai_client_defined + assert Braintrust::Contrib::OpenAI::ChatPatcher.applicable? + end + + # --- .patched? --- + + def test_patched_returns_false_when_module_not_included + fake_completions = Class.new + + OpenAI::Resources::Chat.stub_const(:Completions, fake_completions) do + refute Braintrust::Contrib::OpenAI::ChatPatcher.patched? + end + end + + def test_patched_returns_true_when_module_included + fake_completions = Class.new do + include Braintrust::Contrib::OpenAI::Instrumentation::Chat::Completions + end + + OpenAI::Resources::Chat.stub_const(:Completions, fake_completions) do + assert Braintrust::Contrib::OpenAI::ChatPatcher.patched? + end + end + + def test_patched_returns_false_for_unpatched_instance + fake_singleton = Class.new + + mock_chain(:chat, :completions, :singleton_class, returns: fake_singleton) do |client| + refute Braintrust::Contrib::OpenAI::ChatPatcher.patched?(target: client) + end + end + + def test_patched_returns_true_for_patched_instance + fake_singleton = Class.new do + include Braintrust::Contrib::OpenAI::Instrumentation::Chat::Completions + end + + mock_chain(:chat, :completions, :singleton_class, returns: fake_singleton) do |client| + assert Braintrust::Contrib::OpenAI::ChatPatcher.patched?(target: client) + end + end + + # --- .perform_patch --- + + def test_perform_patch_includes_module_for_class_level + fake_completions = Minitest::Mock.new + fake_completions.expect(:include, true, [Braintrust::Contrib::OpenAI::Instrumentation::Chat::Completions]) + + OpenAI::Resources::Chat.stub_const(:Completions, fake_completions) do + Braintrust::Contrib::OpenAI::ChatPatcher.perform_patch + fake_completions.verify + end + end + + def test_perform_patch_includes_module_for_instance_level + terminal = Minitest::Mock.new + terminal.expect(:include, true, [Braintrust::Contrib::OpenAI::Instrumentation::Chat::Completions]) + + mock_chain(:chat, :completions, :singleton_class, returns: terminal) do |client| + client.expect(:is_a?, true, [::OpenAI::Client]) + Braintrust::Contrib::OpenAI::ChatPatcher.perform_patch(target: client) + end + terminal.verify + end + + def test_perform_patch_raises_for_invalid_target + fake_client = Minitest::Mock.new + fake_client.expect(:is_a?, false, [::OpenAI::Client]) + + assert_raises(ArgumentError) do + Braintrust::Contrib::OpenAI::ChatPatcher.perform_patch(target: fake_client) + end + + fake_client.verify + end +end + +class Braintrust::Contrib::OpenAI::ResponsesPatcherTest < Minitest::Test + include Braintrust::Contrib::OpenAI::IntegrationHelper + + def setup + skip_unless_openai! + skip "Responses API not available" unless OpenAI::Client.instance_methods.include?(:responses) + end + + # --- .applicable? --- + + def test_applicable_returns_true_when_responses_method_exists + assert Braintrust::Contrib::OpenAI::ResponsesPatcher.applicable? + end + + # --- .patched? --- + + def test_patched_returns_false_when_module_not_included + fake_responses = Class.new + + OpenAI::Resources.stub_const(:Responses, fake_responses) do + refute Braintrust::Contrib::OpenAI::ResponsesPatcher.patched? + end + end + + def test_patched_returns_true_when_module_included + fake_responses = Class.new do + include Braintrust::Contrib::OpenAI::Instrumentation::Responses + end + + OpenAI::Resources.stub_const(:Responses, fake_responses) do + assert Braintrust::Contrib::OpenAI::ResponsesPatcher.patched? + end + end + + def test_patched_returns_false_for_unpatched_instance + fake_singleton = Class.new + + mock_chain(:responses, :singleton_class, returns: fake_singleton) do |client| + refute Braintrust::Contrib::OpenAI::ResponsesPatcher.patched?(target: client) + end + end + + def test_patched_returns_true_for_patched_instance + fake_singleton = Class.new do + include Braintrust::Contrib::OpenAI::Instrumentation::Responses + end + + mock_chain(:responses, :singleton_class, returns: fake_singleton) do |client| + assert Braintrust::Contrib::OpenAI::ResponsesPatcher.patched?(target: client) + end + end + + # --- .perform_patch --- + + def test_perform_patch_includes_module_for_class_level + fake_responses = Minitest::Mock.new + fake_responses.expect(:include, true, [Braintrust::Contrib::OpenAI::Instrumentation::Responses]) + + OpenAI::Resources.stub_const(:Responses, fake_responses) do + Braintrust::Contrib::OpenAI::ResponsesPatcher.perform_patch + fake_responses.verify + end + end + + def test_perform_patch_includes_module_for_instance_level + terminal = Minitest::Mock.new + terminal.expect(:include, true, [Braintrust::Contrib::OpenAI::Instrumentation::Responses]) + + mock_chain(:responses, :singleton_class, returns: terminal) do |client| + client.expect(:is_a?, true, [::OpenAI::Client]) + Braintrust::Contrib::OpenAI::ResponsesPatcher.perform_patch(target: client) + end + terminal.verify + end + + def test_perform_patch_raises_for_invalid_target + fake_client = Minitest::Mock.new + fake_client.expect(:is_a?, false, [::OpenAI::Client]) + + assert_raises(ArgumentError) do + Braintrust::Contrib::OpenAI::ResponsesPatcher.perform_patch(target: fake_client) + end + + fake_client.verify + end +end diff --git a/test/braintrust/contrib/openai/ruby-openai_test.rb b/test/braintrust/contrib/openai/ruby-openai_test.rb new file mode 100644 index 0000000..1041358 --- /dev/null +++ b/test/braintrust/contrib/openai/ruby-openai_test.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "integration_helper" +require_relative "../ruby_openai/integration_helper" + +# This test verifies that the OpenAI integration correctly identifies when the +# official `openai` gem is loaded vs other OpenAI-like gems (e.g., ruby-openai). +# +# Tests are grouped by which gem scenario they verify: +# OfficialOpenAIGemTest - runs with: appraisal openai +# RubyOpenAIGemTest - runs with: appraisal ruby-openai + +# Tests that verify behavior when the official openai gem is available +class Braintrust::Contrib::OpenAI::OfficialOpenAIGemTest < Minitest::Test + include Braintrust::Contrib::OpenAI::IntegrationHelper + + Integration = Braintrust::Contrib::OpenAI::Integration + + def setup + skip_unless_openai! + end + + def test_loaded_returns_true + assert Integration.loaded?, + "loaded? should return true when official openai gem is loaded" + end + + def test_instrument_succeeds + result = Braintrust.instrument!(:openai) + + # instrument! returns true if patching succeeded or was already done + assert result, "instrument! should return truthy for official gem" + + # At least one patcher should be in patched state + any_patched = Integration.patchers.any?(&:patched?) + assert any_patched, "at least one patcher should be patched for official gem" + end + + def test_openai_internal_is_defined + # OpenAI::Internal should be defined for official gem + assert defined?(::OpenAI::Internal), + "OpenAI::Internal should be defined when official gem is loaded" + end +end + +# Tests that verify behavior when ruby-openai gem is loaded (not official openai) +class Braintrust::Contrib::OpenAI::RubyOpenAIGemTest < Minitest::Test + include Braintrust::Contrib::RubyOpenAI::IntegrationHelper + + Integration = Braintrust::Contrib::OpenAI::Integration + + def setup + skip_unless_ruby_openai! + end + + def test_loaded_returns_false + # When ruby-openai is loaded, the OpenAI integration should NOT be loaded + # because OpenAI::Internal is not defined + refute Integration.loaded?, + "loaded? should return false when ruby-openai gem is loaded" + end + + def test_not_available + # The OpenAI integration should NOT be available when only ruby-openai is loaded + refute Integration.available?, + "Should NOT be available when only ruby-openai gem is loaded" + end +end diff --git a/test/braintrust/trace/openai_test.rb b/test/braintrust/trace/openai_test.rb deleted file mode 100644 index 22e3233..0000000 --- a/test/braintrust/trace/openai_test.rb +++ /dev/null @@ -1,1221 +0,0 @@ -# frozen_string_literal: true - -require "test_helper" - -class Braintrust::Trace::OpenAITest < Minitest::Test - def setup - # Skip all OpenAI tests if the gem is not available - skip "OpenAI gem not available" unless defined?(OpenAI) - - # Check which gem is loaded by looking at Gem.loaded_specs - # ruby-openai has gem name "ruby-openai" - # official openai gem has gem name "openai" - if Gem.loaded_specs["ruby-openai"] - skip "openai gem not available (found ruby-openai gem instead)" - elsif !Gem.loaded_specs["openai"] - skip "Could not determine which OpenAI gem is loaded" - end - end - - def test_wrap_creates_span_for_chat_completions - VCR.use_cassette("openai/chat_completions") do - require "openai" - - # Set up test rig (includes Braintrust processor) - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it with Braintrust tracing - client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Make a simple chat completion request with additional params to test metadata capture - response = client.chat.completions.create( - messages: [ - {role: "system", content: "You are a test assistant."}, - {role: "user", content: "Say 'test'"} - ], - model: "gpt-4o-mini", - max_tokens: 10, - temperature: 0.5 - ) - - # Verify response - refute_nil response - refute_nil response.choices[0].message.content - - # Drain and verify span - span = rig.drain_one - - # Verify span name matches Go SDK - assert_equal "Chat Completion", span.name - - # Verify braintrust.input_json contains messages - assert span.attributes.key?("braintrust.input_json") - input = JSON.parse(span.attributes["braintrust.input_json"]) - assert_equal 2, input.length - assert_equal "system", input[0]["role"] - assert_equal "You are a test assistant.", input[0]["content"] - assert_equal "user", input[1]["role"] - assert_equal "Say 'test'", input[1]["content"] - - # Verify braintrust.output_json contains choices - assert span.attributes.key?("braintrust.output_json") - output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length - assert_equal 0, output[0]["index"] - assert_equal "assistant", output[0]["message"]["role"] - refute_nil output[0]["message"]["content"] - refute_nil output[0]["finish_reason"] - - # Verify braintrust.metadata contains request and response metadata - assert span.attributes.key?("braintrust.metadata") - metadata = JSON.parse(span.attributes["braintrust.metadata"]) - assert_equal "openai", metadata["provider"] - assert_equal "/v1/chat/completions", metadata["endpoint"] - assert_equal "gpt-4o-mini", metadata["model"] - assert_equal 10, metadata["max_tokens"] - assert_equal 0.5, metadata["temperature"] - refute_nil metadata["id"] - refute_nil metadata["created"] - - # Verify braintrust.metrics contains token usage - assert span.attributes.key?("braintrust.metrics") - metrics = JSON.parse(span.attributes["braintrust.metrics"]) - assert metrics["prompt_tokens"] > 0 - assert metrics["completion_tokens"] > 0 - assert metrics["tokens"] > 0 - assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"] - - # Verify time_to_first_token metric is present - assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric" - assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be >= 0" - end - end - - def test_wrap_handles_vision_messages_with_image_url - VCR.use_cassette("openai/vision") do - require "openai" - - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it - client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Make a vision request with content array (image_url + text) - response = client.chat.completions.create( - messages: [ - { - role: "user", - content: [ - {type: "text", text: "What color is this image?"}, - { - type: "image_url", - image_url: { - url: "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/320px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" - } - } - ] - } - ], - model: "gpt-4o-mini", - max_tokens: 50 - ) - - # Verify response - refute_nil response - refute_nil response.choices[0].message.content - - # Drain and verify span - span = rig.drain_one - - # Verify span name - assert_equal "Chat Completion", span.name - - # Verify braintrust.input_json contains messages with content array - assert span.attributes.key?("braintrust.input_json") - input = JSON.parse(span.attributes["braintrust.input_json"]) - assert_equal 1, input.length - assert_equal "user", input[0]["role"] - - # Content should be an array, not a string - assert_instance_of Array, input[0]["content"] - assert_equal 2, input[0]["content"].length - - # First element should be text - assert_equal "text", input[0]["content"][0]["type"] - assert_equal "What color is this image?", input[0]["content"][0]["text"] - - # Second element should be image_url - assert_equal "image_url", input[0]["content"][1]["type"] - assert input[0]["content"][1]["image_url"].key?("url") - assert_match(/wikimedia/, input[0]["content"][1]["image_url"]["url"]) - - # Verify output still works - assert span.attributes.key?("braintrust.output_json") - output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length - refute_nil output[0]["message"]["content"] - end - end - - def test_wrap_handles_tool_messages_with_tool_call_id - VCR.use_cassette("openai/tool_messages") do - require "openai" - - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it - client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # First request - model will use a tool - tools = [ - { - type: "function", - function: { - name: "get_weather", - description: "Get the current weather", - parameters: { - type: "object", - properties: { - location: {type: "string", description: "City name"} - }, - required: ["location"] - } - } - } - ] - - first_response = client.chat.completions.create( - messages: [ - {role: "user", content: "What's the weather in Paris?"} - ], - model: "gpt-4o-mini", - tools: tools, - max_tokens: 100 - ) - - # Get the tool call from response - tool_call = first_response.choices[0].message.tool_calls&.first - skip "Model didn't call tool" unless tool_call - - # Second request - provide tool result with tool_call_id - second_response = client.chat.completions.create( - messages: [ - {role: "user", content: "What's the weather in Paris?"}, - { - role: "assistant", - content: nil, - tool_calls: [ - { - id: tool_call.id, - type: "function", - function: { - name: tool_call.function.name, - arguments: tool_call.function.arguments - } - } - ] - }, - { - role: "tool", - tool_call_id: tool_call.id, - content: "Sunny, 22°C" - } - ], - model: "gpt-4o-mini", - tools: tools, - max_tokens: 100 - ) - - # Verify response - refute_nil second_response - refute_nil second_response.choices[0].message.content - - # Drain all spans (we have 2: first request + second request) - spans = rig.drain - assert_equal 2, spans.length, "Should have 2 spans (one for each request)" - - # We're testing the second span (second request) - span = spans[1] - - # Verify braintrust.input_json contains all message fields - assert span.attributes.key?("braintrust.input_json") - input = JSON.parse(span.attributes["braintrust.input_json"]) - assert_equal 3, input.length - - # First message: user - assert_equal "user", input[0]["role"] - assert_equal "What's the weather in Paris?", input[0]["content"] - - # Second message: assistant with tool_calls - assert_equal "assistant", input[1]["role"] - assert input[1].key?("tool_calls"), "assistant message should have tool_calls" - assert_equal 1, input[1]["tool_calls"].length - assert_equal tool_call.id, input[1]["tool_calls"][0]["id"] - assert_equal "function", input[1]["tool_calls"][0]["type"] - assert_equal tool_call.function.name, input[1]["tool_calls"][0]["function"]["name"] - - # Third message: tool response with tool_call_id - assert_equal "tool", input[2]["role"] - assert_equal tool_call.id, input[2]["tool_call_id"], "tool message should preserve tool_call_id" - assert_equal "Sunny, 22°C", input[2]["content"] - - # Verify output contains tool_calls - assert span.attributes.key?("braintrust.output_json") - output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length - refute_nil output[0]["message"]["content"] - end - end - - def test_wrap_parses_advanced_token_metrics - VCR.use_cassette("openai/advanced_tokens") do - require "openai" - - # This test verifies that we properly parse token_details fields - # Note: We're testing with a mock since we can't control what OpenAI returns - - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it - client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Make a request (ideally with a model that returns detailed metrics) - # For now, we'll just make a normal request and verify the metrics structure - response = client.chat.completions.create( - messages: [ - {role: "user", content: "test"} - ], - model: "gpt-4o-mini", - max_tokens: 10 - ) - - # Verify response - refute_nil response - - # Drain and verify span - span = rig.drain_one - - # Verify braintrust.metrics exists - assert span.attributes.key?("braintrust.metrics") - metrics = JSON.parse(span.attributes["braintrust.metrics"]) - - # Basic metrics should always be present - assert metrics["prompt_tokens"] > 0 - assert metrics["completion_tokens"] > 0 - assert metrics["tokens"] > 0 - - # If the response includes token_details, they should be parsed with correct naming - # The response.usage object may have: - # - prompt_tokens_details.cached_tokens → prompt_cached_tokens - # - prompt_tokens_details.audio_tokens → prompt_audio_tokens - # - completion_tokens_details.reasoning_tokens → completion_reasoning_tokens - # - completion_tokens_details.audio_tokens → completion_audio_tokens - # - # We can't force OpenAI to return these, but if they exist, we verify the naming - - if response.usage.respond_to?(:prompt_tokens_details) && response.usage.prompt_tokens_details - details = response.usage.prompt_tokens_details - if details.respond_to?(:cached_tokens) && details.cached_tokens - assert metrics.key?("prompt_cached_tokens"), "Should have prompt_cached_tokens" - assert_equal details.cached_tokens, metrics["prompt_cached_tokens"] - end - end - - if response.usage.respond_to?(:completion_tokens_details) && response.usage.completion_tokens_details - details = response.usage.completion_tokens_details - if details.respond_to?(:reasoning_tokens) && details.reasoning_tokens - assert metrics.key?("completion_reasoning_tokens"), "Should have completion_reasoning_tokens" - assert_equal details.reasoning_tokens, metrics["completion_reasoning_tokens"] - end - end - end - end - - def test_wrap_handles_streaming_chat_completions - VCR.use_cassette("openai/streaming") do - require "openai" - - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it - client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Make a streaming request - stream = client.chat.completions.stream_raw( - messages: [ - {role: "user", content: "Count from 1 to 3"} - ], - model: "gpt-4o-mini", - max_tokens: 50, - stream_options: { - include_usage: true # Request usage stats in stream - } - ) - - # Consume the stream - full_content = "" - stream.each do |chunk| - delta_content = chunk.choices[0]&.delta&.content - full_content += delta_content if delta_content - end - - # Verify we got content - refute_empty full_content - - # Drain and verify span - span = rig.drain_one - - # Verify span name - assert_equal "Chat Completion", span.name - - # Verify input was captured - assert span.attributes.key?("braintrust.input_json") - input = JSON.parse(span.attributes["braintrust.input_json"]) - assert_equal 1, input.length - assert_equal "user", input[0]["role"] - assert_equal "Count from 1 to 3", input[0]["content"] - - # Verify output was aggregated from stream - assert span.attributes.key?("braintrust.output_json") - output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length - assert_equal 0, output[0]["index"] - assert_equal "assistant", output[0]["message"]["role"] - assert output[0]["message"]["content"], "Should have aggregated content" - assert output[0]["message"]["content"].length > 0, "Content should not be empty" - - # Verify metadata includes stream flag - assert span.attributes.key?("braintrust.metadata") - metadata = JSON.parse(span.attributes["braintrust.metadata"]) - assert_equal "openai", metadata["provider"] - assert_equal true, metadata["stream"] - assert_match(/gpt-4o-mini/, metadata["model"]) # Model may include version suffix - - # Verify metrics include time_to_first_token and usage tokens - assert span.attributes.key?("braintrust.metrics"), "Should have braintrust.metrics" - metrics = JSON.parse(span.attributes["braintrust.metrics"]) - assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric" - assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be >= 0" - - # Verify usage metrics are present (when stream_options.include_usage is set) - assert metrics.key?("prompt_tokens"), "Should have prompt_tokens metric" - assert metrics["prompt_tokens"] > 0, "prompt_tokens should be > 0" - assert metrics.key?("completion_tokens"), "Should have completion_tokens metric" - assert metrics["completion_tokens"] > 0, "completion_tokens should be > 0" - assert metrics.key?("tokens"), "Should have tokens metric" - assert metrics["tokens"] > 0, "tokens should be > 0" - assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"] - end - end - - def test_wrap_closes_span_for_partially_consumed_stream - VCR.use_cassette("openai/partial_stream") do - require "openai" - - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it - client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Make a streaming request - stream = client.chat.completions.stream_raw( - messages: [ - {role: "user", content: "Count from 1 to 10"} - ], - model: "gpt-4o-mini", - max_tokens: 50 - ) - - # Consume only part of the stream - chunk_count = 0 - begin - stream.each do |chunk| - chunk_count += 1 - break if chunk_count >= 2 # Stop after 2 chunks - end - rescue StopIteration - # Expected when breaking out of iteration - end - - # Span should be finished even though we didn't consume all chunks - span = rig.drain_one - - # Verify span name - assert_equal "Chat Completion", span.name - - # Verify input was captured - assert span.attributes.key?("braintrust.input_json") - input = JSON.parse(span.attributes["braintrust.input_json"]) - assert_equal 1, input.length - - # Note: output will be partially aggregated - end - end - - def test_wrap_records_exception_for_create_errors - VCR.use_cassette("openai/create_error") do - require "openai" - - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client with invalid API key to trigger an error - client = OpenAI::Client.new(api_key: "invalid_key") - Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Make a request that will fail - error = assert_raises do - client.chat.completions.create( - messages: [ - {role: "user", content: "test"} - ], - model: "gpt-4o-mini" - ) - end - - # Verify an error was raised - refute_nil error - - # Drain and verify span was created with error information - span = rig.drain_one - - # Verify span name - assert_equal "Chat Completion", span.name - - # Verify span status indicates an error - assert_equal OpenTelemetry::Trace::Status::ERROR, span.status.code - - # Verify error message is captured in status description - refute_nil span.status.description - assert span.status.description.length > 0 - - # Verify exception event was recorded - assert span.events.any? { |event| event.name == "exception" }, "Should have an exception event" - - exception_event = span.events.find { |event| event.name == "exception" } - assert exception_event.attributes.key?("exception.type"), "Should have exception type" - assert exception_event.attributes.key?("exception.message"), "Should have exception message" - end - end - - def test_wrap_records_exception_for_stream_errors - VCR.use_cassette("openai/stream_error") do - require "openai" - - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client with invalid API key to trigger an error - client = OpenAI::Client.new(api_key: "invalid_key") - Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Make a streaming request that will fail - error = assert_raises do - stream = client.chat.completions.stream_raw( - messages: [ - {role: "user", content: "test"} - ], - model: "gpt-4o-mini" - ) - - # Error occurs when we try to consume the stream - stream.each do |chunk| - # Won't get here - end - end - - # Verify an error was raised - refute_nil error - - # Drain and verify span was created with error information - span = rig.drain_one - - # Verify span name - assert_equal "Chat Completion", span.name - - # Verify span status indicates an error - assert_equal OpenTelemetry::Trace::Status::ERROR, span.status.code - - # Verify error message is captured in status description - refute_nil span.status.description - - # Verify exception event was recorded - assert span.events.any? { |event| event.name == "exception" }, "Should have an exception event" - end - end - - def test_wrap_responses_create_non_streaming - require "openai" - - VCR.use_cassette("openai_responses_create_non_streaming") do - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it - client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Skip if responses API not available - skip "Responses API not available in this OpenAI gem version" unless client.respond_to?(:responses) - - # Make a non-streaming responses.create request - response = client.responses.create( - model: "gpt-4o-mini", - instructions: "You are a helpful assistant.", - input: "What is 2+2?" - ) - - # Verify response - refute_nil response - refute_nil response.output - - # Drain and verify span - span = rig.drain_one - - # Verify span name - assert_equal "openai.responses.create", span.name - - # Verify braintrust.input_json contains input - assert span.attributes.key?("braintrust.input_json") - input = JSON.parse(span.attributes["braintrust.input_json"]) - assert_equal "What is 2+2?", input - - # Verify braintrust.output_json contains output - assert span.attributes.key?("braintrust.output_json") - output = JSON.parse(span.attributes["braintrust.output_json"]) - refute_nil output - - # Verify braintrust.metadata contains request metadata - assert span.attributes.key?("braintrust.metadata") - metadata = JSON.parse(span.attributes["braintrust.metadata"]) - assert_equal "openai", metadata["provider"] - assert_equal "/v1/responses", metadata["endpoint"] - assert_equal "gpt-4o-mini", metadata["model"] - assert_equal "You are a helpful assistant.", metadata["instructions"] - - # Verify braintrust.metrics contains token usage - assert span.attributes.key?("braintrust.metrics") - metrics = JSON.parse(span.attributes["braintrust.metrics"]) - assert metrics["tokens"] > 0 if metrics["tokens"] - end - end - - def test_wrap_responses_create_streaming - require "openai" - - VCR.use_cassette("openai_responses_create_streaming") do - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it - client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Skip if responses API not available - skip "Responses API not available in this OpenAI gem version" unless client.respond_to?(:responses) - - # Make a streaming responses request using .stream method - stream = client.responses.stream( - model: "gpt-4o-mini", - input: "Count from 1 to 3" - ) - - # Consume the stream - event_count = 0 - stream.each do |event| - event_count += 1 - end - - # Verify we got events - assert event_count > 0, "Should have received streaming events" - - # Drain and verify span - span = rig.drain_one - - # Verify span name - assert_equal "openai.responses.create", span.name - - # Verify input was captured - assert span.attributes.key?("braintrust.input_json") - input = JSON.parse(span.attributes["braintrust.input_json"]) - assert_equal "Count from 1 to 3", input - - # Verify output was aggregated from stream - assert span.attributes.key?("braintrust.output_json"), "Missing braintrust.output_json. Keys: #{span.attributes.keys}" - output = JSON.parse(span.attributes["braintrust.output_json"]) - refute_nil output, "Output is nil: #{output.inspect}" - - # Verify metadata includes stream flag - assert span.attributes.key?("braintrust.metadata") - metadata = JSON.parse(span.attributes["braintrust.metadata"]) - assert_equal "openai", metadata["provider"] - assert_equal "/v1/responses", metadata["endpoint"] - assert_equal true, metadata["stream"] - - # Verify metrics were captured if available - if span.attributes.key?("braintrust.metrics") - metrics = JSON.parse(span.attributes["braintrust.metrics"]) - assert metrics["tokens"] > 0 if metrics["tokens"] - end - end - end - - def test_wrap_responses_stream_partial_consumption - require "openai" - - VCR.use_cassette("openai_responses_stream_partial") do - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it - client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Skip if responses API not available - skip "Responses API not available in this OpenAI gem version" unless client.respond_to?(:responses) - - # Make a streaming request - stream = client.responses.stream( - model: "gpt-4o-mini", - input: "Count from 1 to 10" - ) - - # Consume only part of the stream - event_count = 0 - begin - stream.each do |event| - event_count += 1 - break if event_count >= 3 # Stop after 3 events - end - rescue StopIteration - # Expected when breaking out of iteration - end - - # Span should be finished even though we didn't consume all events - span = rig.drain_one - - # Verify span name - assert_equal "openai.responses.create", span.name - - # Verify input was captured - assert span.attributes.key?("braintrust.input_json") - end - end - - def test_chat_and_responses_do_not_interfere - require "openai" - - # This test verifies that chat completions and responses API can coexist - # without interfering with each other when both wrappers are active - VCR.use_cassette("openai_chat_and_responses_no_interference") do - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it (wraps BOTH chat and responses) - client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Skip if responses API not available - skip "Responses API not available in this OpenAI gem version" unless client.respond_to?(:responses) - - # First, make a chat completion request - chat_response = client.chat.completions.create( - messages: [{role: "user", content: "Say hello"}], - model: "gpt-4o-mini", - max_tokens: 10 - ) - refute_nil chat_response - - # Then, make a responses API request - # This is where the bug would manifest if the wrappers interfere - responses_response = client.responses.create( - model: "gpt-4o-mini", - instructions: "You are a helpful assistant.", - input: "Say goodbye" - ) - refute_nil responses_response - refute_nil responses_response.output - - # Drain both spans - spans = rig.drain - assert_equal 2, spans.length, "Should have 2 spans (chat + responses)" - - # Verify first span is for chat completions - chat_span = spans[0] - assert_equal "Chat Completion", chat_span.name - chat_metadata = JSON.parse(chat_span.attributes["braintrust.metadata"]) - assert_equal "/v1/chat/completions", chat_metadata["endpoint"] - assert_equal "gpt-4o-mini", chat_metadata["model"] - - # Verify input is messages array (chat API structure) - chat_input = JSON.parse(chat_span.attributes["braintrust.input_json"]) - assert_instance_of Array, chat_input - assert_equal "user", chat_input[0]["role"] - assert_equal "Say hello", chat_input[0]["content"] - - responses_span = spans[1] - assert_equal "openai.responses.create", responses_span.name - - responses_metadata = JSON.parse(responses_span.attributes["braintrust.metadata"]) - assert_equal "/v1/responses", responses_metadata["endpoint"] - assert_equal "gpt-4o-mini", responses_metadata["model"] - assert_equal "You are a helpful assistant.", responses_metadata["instructions"] - - responses_input = JSON.parse(responses_span.attributes["braintrust.input_json"]) - assert_equal "Say goodbye", responses_input - end - end - - def test_streaming_chat_and_responses_do_not_interfere - require "openai" - - # This test verifies that streaming for both chat completions and responses API - # work correctly without interfering when both streaming wrappers are active. - # This is critical because streaming uses different aggregation mechanisms. - VCR.use_cassette("openai_streaming_chat_and_responses_no_interference") do - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it (wraps BOTH chat and responses) - client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Skip if responses API not available - skip "Responses API not available in this OpenAI gem version" unless client.respond_to?(:responses) - - # First, make a STREAMING chat completion request - chat_content = "" - stream = client.chat.completions.stream_raw( - messages: [{role: "user", content: "Count from 1 to 3"}], - model: "gpt-4o-mini", - max_tokens: 50, - stream_options: {include_usage: true} - ) - stream.each do |chunk| - delta_content = chunk.choices[0]&.delta&.content - chat_content += delta_content if delta_content - end - refute_empty chat_content - - # Then, make a STREAMING responses API request - # This is where the bug would manifest if streaming wrappers interfere - responses_event_count = 0 - responses_stream = client.responses.stream( - model: "gpt-4o-mini", - instructions: "You are a helpful assistant.", - input: "Say hello" - ) - responses_stream.each do |event| - responses_event_count += 1 - end - assert responses_event_count > 0, "Should have received streaming events from responses API" - - # Drain both spans - spans = rig.drain - assert_equal 2, spans.length, "Should have 2 spans (chat streaming + responses streaming)" - - # Verify first span is for STREAMING chat completions - chat_span = spans[0] - assert_equal "Chat Completion", chat_span.name - chat_metadata = JSON.parse(chat_span.attributes["braintrust.metadata"]) - assert_equal "/v1/chat/completions", chat_metadata["endpoint"] - assert_equal true, chat_metadata["stream"], "Chat span should have stream flag" - assert_match(/gpt-4o-mini/, chat_metadata["model"]) - - # Verify chat input is messages array (chat API structure) - chat_input = JSON.parse(chat_span.attributes["braintrust.input_json"]) - assert_instance_of Array, chat_input - assert_equal "user", chat_input[0]["role"] - assert_equal "Count from 1 to 3", chat_input[0]["content"] - - # Verify chat output was aggregated from stream chunks - chat_output = JSON.parse(chat_span.attributes["braintrust.output_json"]) - assert_equal 1, chat_output.length - assert_equal "assistant", chat_output[0]["message"]["role"] - refute_nil chat_output[0]["message"]["content"] - assert chat_output[0]["message"]["content"].length > 0, "Chat content should be aggregated" - - responses_span = spans[1] - assert_equal "openai.responses.create", responses_span.name - - responses_metadata = JSON.parse(responses_span.attributes["braintrust.metadata"]) - assert_equal "/v1/responses", responses_metadata["endpoint"] - assert_equal true, responses_metadata["stream"] - assert_match(/gpt-4o-mini/, responses_metadata["model"]) - assert_equal "You are a helpful assistant.", responses_metadata["instructions"] - - responses_input = JSON.parse(responses_span.attributes["braintrust.input_json"]) - assert_equal "Say hello", responses_input - - assert responses_span.attributes.key?("braintrust.output_json") - responses_output = JSON.parse(responses_span.attributes["braintrust.output_json"]) - refute_nil responses_output - end - end - - def test_traced_vs_raw_chat_completions_non_streaming - require "openai" - - # This test verifies that tracing doesn't mutate the response - # by comparing output from a raw client vs a traced client - VCR.use_cassette("openai_traced_vs_raw_chat_non_streaming") do - # Set up test rig - rig = setup_otel_test_rig - - # Create two clients: one raw, one traced - raw_client = OpenAI::Client.new(api_key: get_openai_key) - traced_client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(traced_client, tracer_provider: rig.tracer_provider) - - # Make identical requests - params = { - messages: [{role: "user", content: "Say hello"}], - model: "gpt-4o-mini", - max_tokens: 10 - } - - raw_response = raw_client.chat.completions.create(**params) - traced_response = traced_client.chat.completions.create(**params) - - assert_match(/gpt-4o-mini/, raw_response.model) - assert_match(/gpt-4o-mini/, traced_response.model) - assert_equal raw_response.choices.length, traced_response.choices.length - assert_equal raw_response.choices[0].message.role, traced_response.choices[0].message.role - refute_nil raw_response.choices[0].message.content - refute_nil traced_response.choices[0].message.content - assert_equal raw_response.choices[0].finish_reason, traced_response.choices[0].finish_reason - assert_operator raw_response.usage.total_tokens, :>, 0 - assert_operator traced_response.usage.total_tokens, :>, 0 - - spans = rig.drain - assert_equal 1, spans.length - assert_equal "Chat Completion", spans[0].name - end - end - - def test_traced_vs_raw_chat_completions_streaming - require "openai" - - # This test verifies that tracing doesn't mutate streaming responses - VCR.use_cassette("openai_traced_vs_raw_chat_streaming") do - # Set up test rig - rig = setup_otel_test_rig - - # Create two clients: one raw, one traced - raw_client = OpenAI::Client.new(api_key: get_openai_key) - traced_client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(traced_client, tracer_provider: rig.tracer_provider) - - # Make identical streaming requests - params = { - messages: [{role: "user", content: "Count from 1 to 3"}], - model: "gpt-4o-mini", - max_tokens: 50, - stream_options: {include_usage: true} - } - - raw_chunks = [] - raw_stream = raw_client.chat.completions.stream_raw(**params) - raw_stream.each do |chunk| - raw_chunks << chunk - end - - traced_chunks = [] - traced_stream = traced_client.chat.completions.stream_raw(**params) - traced_stream.each do |chunk| - traced_chunks << chunk - end - - assert_operator (raw_chunks.length - traced_chunks.length).abs, :<=, 2 - if raw_chunks[0].respond_to?(:model) && traced_chunks[0].respond_to?(:model) - assert_match(/gpt-4o-mini/, raw_chunks[0].model) - assert_match(/gpt-4o-mini/, traced_chunks[0].model) - end - - raw_content = raw_chunks.map { |c| c.choices[0]&.delta&.content }.compact.join - traced_content = traced_chunks.map { |c| c.choices[0]&.delta&.content }.compact.join - refute_empty raw_content - refute_empty traced_content - - spans = rig.drain - assert_equal 1, spans.length - assert_equal "Chat Completion", spans[0].name - end - end - - def test_traced_vs_raw_responses_non_streaming - require "openai" - - # This test verifies that tracing doesn't mutate responses API output - VCR.use_cassette("openai_traced_vs_raw_responses_non_streaming") do - # Set up test rig - rig = setup_otel_test_rig - - # Create two clients: one raw, one traced - raw_client = OpenAI::Client.new(api_key: get_openai_key) - traced_client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(traced_client, tracer_provider: rig.tracer_provider) - - # Skip if responses API not available - skip "Responses API not available in this OpenAI gem version" unless raw_client.respond_to?(:responses) - - # Make identical requests - params = { - model: "gpt-4o-mini", - instructions: "You are a helpful assistant.", - input: "Say hello" - } - - raw_response = raw_client.responses.create(**params) - traced_response = traced_client.responses.create(**params) - - assert_match(/gpt-4o-mini/, raw_response.model) - assert_match(/gpt-4o-mini/, traced_response.model) - assert_equal raw_response.output.length, traced_response.output.length - - raw_output = raw_response.output.first.content.first - traced_output = traced_response.output.first.content.first - assert_equal raw_output[:type], traced_output[:type] - refute_nil raw_output[:text] - refute_nil traced_output[:text] - assert_operator raw_output[:text].length, :>, 0 - assert_operator traced_output[:text].length, :>, 0 - - assert_operator raw_response.usage.total_tokens, :>, 0 - assert_operator traced_response.usage.total_tokens, :>, 0 - - spans = rig.drain - assert_equal 1, spans.length - assert_equal "openai.responses.create", spans[0].name - end - end - - def test_traced_vs_raw_responses_streaming - require "openai" - - # This test verifies that tracing doesn't mutate responses API streaming - VCR.use_cassette("openai_traced_vs_raw_responses_streaming") do - # Set up test rig - rig = setup_otel_test_rig - - # Create two clients: one raw, one traced - raw_client = OpenAI::Client.new(api_key: get_openai_key) - traced_client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(traced_client, tracer_provider: rig.tracer_provider) - - # Skip if responses API not available - skip "Responses API not available in this OpenAI gem version" unless raw_client.respond_to?(:responses) - - params = { - model: "gpt-4o-mini", - instructions: "You are a helpful assistant.", - input: "Count from 1 to 3" - } - - raw_events = [] - raw_stream = raw_client.responses.stream(**params) - raw_stream.each do |event| - raw_events << event - end - - traced_events = [] - traced_stream = traced_client.responses.stream(**params) - traced_stream.each do |event| - traced_events << event - end - - assert_equal raw_events.length, traced_events.length - - raw_event_types = raw_events.map(&:type) - traced_event_types = traced_events.map(&:type) - assert_equal raw_event_types, traced_event_types - - raw_completed = raw_events.find { |e| e.type == "response.completed" } - traced_completed = traced_events.find { |e| e.type == "response.completed" } - - if raw_completed && traced_completed - assert_match(/gpt-4o-mini/, raw_completed.response.model) - assert_match(/gpt-4o-mini/, traced_completed.response.model) - assert_operator raw_completed.response.usage.total_tokens, :>, 0 - assert_operator traced_completed.response.usage.total_tokens, :>, 0 - end - - spans = rig.drain - assert_equal 1, spans.length - assert_equal "openai.responses.create", spans[0].name - end - end - - def test_wrap_handles_streaming_with_text - VCR.use_cassette("openai/streaming_text") do - require "openai" - - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it - client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Make a streaming request using .stream (not .stream_raw) - stream = client.chat.completions.stream( - messages: [ - {role: "user", content: "Count from 1 to 3"} - ], - model: "gpt-4o-mini", - max_tokens: 50, - stream_options: { - include_usage: true - } - ) - - # Consume the stream using .text() method - full_text = "" - stream.text.each do |delta| - full_text += delta - end - - # Verify we got content - refute_empty full_text - - # Drain and verify span - span = rig.drain_one - - # Verify span name - assert_equal "Chat Completion", span.name - - # Verify input was captured - assert span.attributes.key?("braintrust.input_json") - input = JSON.parse(span.attributes["braintrust.input_json"]) - assert_equal 1, input.length - assert_equal "user", input[0]["role"] - assert_equal "Count from 1 to 3", input[0]["content"] - - # Verify output was aggregated from stream - assert span.attributes.key?("braintrust.output_json") - output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length - assert_equal 0, output[0]["index"] - assert_equal "assistant", output[0]["message"]["role"] - assert output[0]["message"]["content"], "Should have aggregated content" - assert output[0]["message"]["content"].length > 0, "Content should not be empty" - - # Verify metadata includes stream flag - assert span.attributes.key?("braintrust.metadata") - metadata = JSON.parse(span.attributes["braintrust.metadata"]) - assert_equal "openai", metadata["provider"] - assert_equal true, metadata["stream"] - assert_match(/gpt-4o-mini/, metadata["model"]) - - # Verify metrics were captured (if include_usage was respected) - assert span.attributes.key?("braintrust.metrics"), "Should have braintrust.metrics" - metrics = JSON.parse(span.attributes["braintrust.metrics"]) - assert metrics["tokens"] > 0 if metrics["tokens"] - - # Verify time_to_first_token metric is present - assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric" - assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be >= 0" - end - end - - def test_wrap_handles_streaming_with_get_final_completion - VCR.use_cassette("openai/streaming_get_final_completion") do - require "openai" - - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it - client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Make a streaming request using .stream - stream = client.chat.completions.stream( - messages: [ - {role: "user", content: "Say hello"} - ], - model: "gpt-4o-mini", - max_tokens: 20, - stream_options: { - include_usage: true - } - ) - - # Use .get_final_completion() to block and get final result - completion = stream.get_final_completion - - # Verify we got a completion - refute_nil completion - refute_nil completion.choices - assert completion.choices.length > 0 - refute_nil completion.choices[0].message.content - - # Drain and verify span - span = rig.drain_one - - # Verify span name - assert_equal "Chat Completion", span.name - - # Verify output was captured - assert span.attributes.key?("braintrust.output_json") - output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length - assert output[0]["message"]["content"], "Should have captured content" - end - end - - def test_wrap_handles_streaming_with_get_output_text - VCR.use_cassette("openai/streaming_get_output_text") do - require "openai" - - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it - client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Make a streaming request using .stream - stream = client.chat.completions.stream( - messages: [ - {role: "user", content: "Say hello"} - ], - model: "gpt-4o-mini", - max_tokens: 20, - stream_options: { - include_usage: true - } - ) - - # Use .get_output_text() to block and get final text - output_text = stream.get_output_text - - # Verify we got text - refute_nil output_text - refute_empty output_text - - # Drain and verify span - span = rig.drain_one - - # Verify span name - assert_equal "Chat Completion", span.name - - # Verify output was captured - assert span.attributes.key?("braintrust.output_json") - output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length - assert output[0]["message"]["content"], "Should have captured content" - end - end -end diff --git a/test/fixtures/vcr_cassettes/openai/responses_create_error.yml b/test/fixtures/vcr_cassettes/openai/responses_create_error.yml new file mode 100644 index 0000000..a962d94 --- /dev/null +++ b/test/fixtures/vcr_cassettes/openai/responses_create_error.yml @@ -0,0 +1,93 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/responses + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-mini","input":"test"}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - application/json + User-Agent: + - OpenAI::Client/Ruby 0.39.0 + Host: + - api.openai.com + X-Stainless-Arch: + - x64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - Linux + X-Stainless-Package-Version: + - 0.39.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 3.2.9 + Content-Type: + - application/json + Authorization: + - Bearer invalid_key + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '38' + response: + status: + code: 401 + message: Unauthorized + headers: + Date: + - Fri, 26 Dec 2025 15:54:02 GMT + Content-Type: + - application/json + Content-Length: + - '240' + Connection: + - keep-alive + Www-Authenticate: + - Bearer realm="OpenAI API" + Openai-Version: + - '2020-10-01' + X-Request-Id: + - req_f36e8b127fb944aa8668c38d9d94dc8d + Openai-Processing-Ms: + - '15' + X-Envoy-Upstream-Service-Time: + - '17' + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=B8TFIln4qDneeipgm74c7YA75ZPAbWQ87zY.SAUlQ.4-1766764442-1.0.1.1-Hu_z7RRJr3dOLcUcYpBBb3fYopeWNngzBv3NCnzRXfa0TewdfqnAycEqX0yELTqRK51a1fgi.iSEbjuSv_jC5wUSFrM1AVcYNXiKRP5nlxU; + path=/; expires=Fri, 26-Dec-25 16:24:02 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=syD8zCWt82.PG7gWWOPB37IQRmsBX2vdKNxztHEhuKg-1766764442726-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - 9b41c1266d0598b9-ORD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: UTF-8 + string: |- + { + "error": { + "message": "Incorrect API key provided: invalid_key. You can find your API key at https://platform.openai.com/account/api-keys.", + "type": "invalid_request_error", + "param": null, + "code": "invalid_api_key" + } + } + recorded_at: Fri, 26 Dec 2025 15:54:02 GMT +recorded_with: VCR 6.3.1 diff --git a/test/fixtures/vcr_cassettes/openai/responses_stream_error.yml b/test/fixtures/vcr_cassettes/openai/responses_stream_error.yml new file mode 100644 index 0000000..76d43aa --- /dev/null +++ b/test/fixtures/vcr_cassettes/openai/responses_stream_error.yml @@ -0,0 +1,93 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/responses + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-mini","input":"test","stream":true}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - text/event-stream + User-Agent: + - OpenAI::Client/Ruby 0.39.0 + Host: + - api.openai.com + X-Stainless-Arch: + - x64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - Linux + X-Stainless-Package-Version: + - 0.39.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 3.2.9 + Content-Type: + - application/json + Authorization: + - Bearer invalid_key + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '52' + response: + status: + code: 401 + message: Unauthorized + headers: + Date: + - Fri, 26 Dec 2025 15:54:12 GMT + Content-Type: + - application/json + Content-Length: + - '240' + Connection: + - keep-alive + Www-Authenticate: + - Bearer realm="OpenAI API" + Openai-Version: + - '2020-10-01' + X-Request-Id: + - req_fde4ec8f349e4459bd295a8db0cfa8a2 + Openai-Processing-Ms: + - '26' + X-Envoy-Upstream-Service-Time: + - '28' + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=cxJtiBq.wJ3ile9vOTZrTzpfwJKbqg6tQ5IKTv._xfk-1766764452-1.0.1.1-ilLZ9aJZ15NL_MIplarRpfiSS.dPFBDZVu4nvQg7dIQZkiH6qSyhWCCJtQWdZ0f6dtoEm4CLuNKo9b1FdVt618gVy7h6gmwgN4rYSPJhNXU; + path=/; expires=Fri, 26-Dec-25 16:24:12 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=2vDmgGJFXrFGUm5KeF6m2Rhx53MraieXxTmZWA8P3.M-1766764452532-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - 9b41c1638d933eb5-ORD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: UTF-8 + string: |- + { + "error": { + "message": "Incorrect API key provided: invalid_key. You can find your API key at https://platform.openai.com/account/api-keys.", + "type": "invalid_request_error", + "param": null, + "code": "invalid_api_key" + } + } + recorded_at: Fri, 26 Dec 2025 15:54:12 GMT +recorded_with: VCR 6.3.1 diff --git a/test/fixtures/vcr_cassettes/openai/responses_with_metadata.yml b/test/fixtures/vcr_cassettes/openai/responses_with_metadata.yml new file mode 100644 index 0000000..96feafe --- /dev/null +++ b/test/fixtures/vcr_cassettes/openai/responses_with_metadata.yml @@ -0,0 +1,171 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/responses + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-mini","input":"Say hello","metadata":{"user_id":"test-123","session":"abc"}}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - application/json + User-Agent: + - OpenAI::Client/Ruby 0.39.0 + Host: + - api.openai.com + X-Stainless-Arch: + - x64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - Linux + X-Stainless-Package-Version: + - 0.39.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 3.2.9 + Content-Type: + - application/json + Authorization: + - Bearer + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '93' + response: + status: + code: 200 + message: OK + headers: + Date: + - Fri, 26 Dec 2025 16:12:44 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999972' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + Openai-Version: + - '2020-10-01' + Openai-Organization: + - braintrust-data + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + X-Request-Id: + - req_fd0f3f2417fe43dfa46bae14cc3aff2e + Openai-Processing-Ms: + - '596' + X-Envoy-Upstream-Service-Time: + - '600' + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=8bz9W7X41Jibw8fCNQfsasrLzokbP5lzjAVlTNUJ5BE-1766765564-1.0.1.1-zbm7guE6Dgs_WOnz_iHMAlHUwNxmn3FPBZlU1OuBbr0.qZYm.5PThX4ibEGvc5uBdLluQDO7wQmGPp7kolHqg2Gnx4ATuicDhNk7UpferyA; + path=/; expires=Fri, 26-Dec-25 16:42:44 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=IXkv1khkAHUPUq6E.JuJmqDQITcVjyprVvTBPz6Q3.c-1766765564377-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - 9b41dc852f9ed836-ORD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: |- + { + "id": "resp_0764b673f1c2ba2200694eb3fbc1408197a9541aab17fa4a38", + "object": "response", + "created_at": 1766765563, + "status": "completed", + "background": false, + "billing": { + "payer": "developer" + }, + "completed_at": 1766765564, + "error": null, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "max_tool_calls": null, + "model": "gpt-4o-mini-2024-07-18", + "output": [ + { + "id": "msg_0764b673f1c2ba2200694eb3fc16688197886a7c5b366adb22", + "type": "message", + "status": "completed", + "content": [ + { + "type": "output_text", + "annotations": [], + "logprobs": [], + "text": "Hello! How can I assist you today?" + } + ], + "role": "assistant" + } + ], + "parallel_tool_calls": true, + "previous_response_id": null, + "prompt_cache_key": null, + "prompt_cache_retention": null, + "reasoning": { + "effort": null, + "summary": null + }, + "safety_identifier": null, + "service_tier": "default", + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + }, + "verbosity": "medium" + }, + "tool_choice": "auto", + "tools": [], + "top_logprobs": 0, + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 9, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 10, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 19 + }, + "user": null, + "metadata": { + "user_id": "test-123", + "session": "abc" + } + } + recorded_at: Fri, 26 Dec 2025 16:12:44 GMT +recorded_with: VCR 6.3.1 diff --git a/test/fixtures/vcr_cassettes/openai/responses_with_previous_response_id.yml b/test/fixtures/vcr_cassettes/openai/responses_with_previous_response_id.yml new file mode 100644 index 0000000..a87331c --- /dev/null +++ b/test/fixtures/vcr_cassettes/openai/responses_with_previous_response_id.yml @@ -0,0 +1,333 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/responses + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-mini","input":"Remember the number 42"}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - application/json + User-Agent: + - OpenAI::Client/Ruby 0.39.0 + Host: + - api.openai.com + X-Stainless-Arch: + - x64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - Linux + X-Stainless-Package-Version: + - 0.39.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 3.2.9 + Content-Type: + - application/json + Authorization: + - Bearer + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '56' + response: + status: + code: 200 + message: OK + headers: + Date: + - Fri, 26 Dec 2025 16:09:45 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999967' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + Openai-Version: + - '2020-10-01' + Openai-Organization: + - braintrust-data + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + X-Request-Id: + - req_0ab64f5a094b914093bf296cdca77497 + Openai-Processing-Ms: + - '1136' + X-Envoy-Upstream-Service-Time: + - '1139' + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=SMkDI5BdoHSsZz4hLNunLabg2lxz85BmCv8tEyQH4NQ-1766765385-1.0.1.1-42idQ1Rhrx7PotV3B187x2UBY3hboPghf.6fNZUjuov8mFtjC16nBad7X2nYxAj72jpxRSJqmXkobwumWVUDoX0C4NZeyfG26OKYf5_yCkU; + path=/; expires=Fri, 26-Dec-25 16:39:45 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=qlWmlIGXmgynnOfHtkxc74AkUtqXYIPbz0PpIqPgVvs-1766765385465-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - 9b41d8214a900044-ORD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: |- + { + "id": "resp_0c60fcf96b8d6d2700694eb3484ea48193ac4cceb658fcd486", + "object": "response", + "created_at": 1766765384, + "status": "completed", + "background": false, + "billing": { + "payer": "developer" + }, + "completed_at": 1766765385, + "error": null, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "max_tool_calls": null, + "model": "gpt-4o-mini-2024-07-18", + "output": [ + { + "id": "msg_0c60fcf96b8d6d2700694eb3490d888193ab0b3a2727ce65f3", + "type": "message", + "status": "completed", + "content": [ + { + "type": "output_text", + "annotations": [], + "logprobs": [], + "text": "Got it! The number 42 is noted. How can I assist you with it?" + } + ], + "role": "assistant" + } + ], + "parallel_tool_calls": true, + "previous_response_id": null, + "prompt_cache_key": null, + "prompt_cache_retention": null, + "reasoning": { + "effort": null, + "summary": null + }, + "safety_identifier": null, + "service_tier": "default", + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + }, + "verbosity": "medium" + }, + "tool_choice": "auto", + "tools": [], + "top_logprobs": 0, + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 12, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 19, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 31 + }, + "user": null, + "metadata": {} + } + recorded_at: Fri, 26 Dec 2025 16:09:45 GMT +- request: + method: post + uri: https://api.openai.com/v1/responses + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-mini","input":"What number did I ask you to remember?","previous_response_id":"resp_0c60fcf96b8d6d2700694eb3484ea48193ac4cceb658fcd486"}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - application/json + User-Agent: + - OpenAI::Client/Ruby 0.39.0 + Host: + - api.openai.com + X-Stainless-Arch: + - x64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - Linux + X-Stainless-Package-Version: + - 0.39.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 3.2.9 + Content-Type: + - application/json + Authorization: + - Bearer + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '153' + response: + status: + code: 200 + message: OK + headers: + Date: + - Fri, 26 Dec 2025 16:09:46 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999932' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + Openai-Version: + - '2020-10-01' + Openai-Organization: + - braintrust-data + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + X-Request-Id: + - req_46d4877214f246738e58d31fab959770 + Openai-Processing-Ms: + - '1061' + X-Envoy-Upstream-Service-Time: + - '1063' + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=ocCldRJ.Lx8YzZMYhiezJrjNEO99zk5raYwhuvBokSo-1766765386-1.0.1.1-TWrYTuD.wS61zLZ7gzUcNW7O.HUr7hyi2l_Koe2XkyMJSSnww.j29twJQ_wJ8lyF4BprHU60hxghSkGBd8HUXgpdt2MHW_0X5EeuQIXEwd8; + path=/; expires=Fri, 26-Dec-25 16:39:46 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=KqrEnHpW_rouxgseTBUOd6ltl0yL7_UFioqmp7m6s7U-1766765386985-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - 9b41d82b3d6b0044-ORD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: |- + { + "id": "resp_0c60fcf96b8d6d2700694eb349e6c08193ab164d7790c02afd", + "object": "response", + "created_at": 1766765385, + "status": "completed", + "background": false, + "billing": { + "payer": "developer" + }, + "completed_at": 1766765386, + "error": null, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "max_tool_calls": null, + "model": "gpt-4o-mini-2024-07-18", + "output": [ + { + "id": "msg_0c60fcf96b8d6d2700694eb34aad6c81939c250bd6c98f4729", + "type": "message", + "status": "completed", + "content": [ + { + "type": "output_text", + "annotations": [], + "logprobs": [], + "text": "You asked me to remember the number 42." + } + ], + "role": "assistant" + } + ], + "parallel_tool_calls": true, + "previous_response_id": "resp_0c60fcf96b8d6d2700694eb3484ea48193ac4cceb658fcd486", + "prompt_cache_key": null, + "prompt_cache_retention": null, + "reasoning": { + "effort": null, + "summary": null + }, + "safety_identifier": null, + "service_tier": "default", + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + }, + "verbosity": "medium" + }, + "tool_choice": "auto", + "tools": [], + "top_logprobs": 0, + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 47, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 11, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 58 + }, + "user": null, + "metadata": {} + } + recorded_at: Fri, 26 Dec 2025 16:09:46 GMT +recorded_with: VCR 6.3.1 diff --git a/test/fixtures/vcr_cassettes/openai/responses_with_reasoning.yml b/test/fixtures/vcr_cassettes/openai/responses_with_reasoning.yml new file mode 100644 index 0000000..8f3a8a4 --- /dev/null +++ b/test/fixtures/vcr_cassettes/openai/responses_with_reasoning.yml @@ -0,0 +1,173 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/responses + body: + encoding: UTF-8 + string: '{"model":"o4-mini","input":"What is 2+2?","reasoning":{"effort":"low"}}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - application/json + User-Agent: + - OpenAI::Client/Ruby 0.39.0 + Host: + - api.openai.com + X-Stainless-Arch: + - x64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - Linux + X-Stainless-Package-Version: + - 0.39.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 3.2.9 + Content-Type: + - application/json + Authorization: + - Bearer + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '71' + response: + status: + code: 200 + message: OK + headers: + Date: + - Fri, 26 Dec 2025 16:11:39 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999780' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + Openai-Version: + - '2020-10-01' + Openai-Organization: + - braintrust-data + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + X-Request-Id: + - req_93d774acb37d492a94abb8d1777c10e4 + Openai-Processing-Ms: + - '2675' + X-Envoy-Upstream-Service-Time: + - '2680' + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=hfpoec9m9b4G1l8P0FMILfD.LxdBjOW0Ihj2hgy0a6w-1766765499-1.0.1.1-Waas3S9_hag4CT.yQVaFXf.YUeJd22FhvjB0e4D1tGIRMvEkEkXjNZQmlMpFW5uomY35IoheQ1ZjMZ.H2hIxTLs.6wTpM914GeRsVi0JxTc; + path=/; expires=Fri, 26-Dec-25 16:41:39 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=pfpF5kd2yEhd79IRBBZwP_YK0LolT.iGYo_bHWz1t3g-1766765499630-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - 9b41dae38a7a233c-ORD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: |- + { + "id": "resp_05c9b376eaef5df500694eb3b8f0488190aad671821487d718", + "object": "response", + "created_at": 1766765496, + "status": "completed", + "background": false, + "billing": { + "payer": "developer" + }, + "completed_at": 1766765499, + "error": null, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "max_tool_calls": null, + "model": "o4-mini-2025-04-16", + "output": [ + { + "id": "rs_05c9b376eaef5df500694eb3baeb3c819098d8a271d979e4e5", + "type": "reasoning", + "summary": [] + }, + { + "id": "msg_05c9b376eaef5df500694eb3bb32508190be0d4fba1f18ed2b", + "type": "message", + "status": "completed", + "content": [ + { + "type": "output_text", + "annotations": [], + "logprobs": [], + "text": "2 + 2 = 4." + } + ], + "role": "assistant" + } + ], + "parallel_tool_calls": true, + "previous_response_id": null, + "prompt_cache_key": null, + "prompt_cache_retention": null, + "reasoning": { + "effort": "low", + "summary": null + }, + "safety_identifier": null, + "service_tier": "default", + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + }, + "verbosity": "medium" + }, + "tool_choice": "auto", + "tools": [], + "top_logprobs": 0, + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 13, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 14, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 27 + }, + "user": null, + "metadata": {} + } + recorded_at: Fri, 26 Dec 2025 16:11:39 GMT +recorded_with: VCR 6.3.1 diff --git a/test/fixtures/vcr_cassettes/openai/responses_with_truncation.yml b/test/fixtures/vcr_cassettes/openai/responses_with_truncation.yml new file mode 100644 index 0000000..8201003 --- /dev/null +++ b/test/fixtures/vcr_cassettes/openai/responses_with_truncation.yml @@ -0,0 +1,168 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/responses + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-mini","input":"Say hello","truncation":"auto"}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - application/json + User-Agent: + - OpenAI::Client/Ruby 0.39.0 + Host: + - api.openai.com + X-Stainless-Arch: + - x64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - Linux + X-Stainless-Package-Version: + - 0.39.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 3.2.9 + Content-Type: + - application/json + Authorization: + - Bearer + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '63' + response: + status: + code: 200 + message: OK + headers: + Date: + - Fri, 26 Dec 2025 16:08:31 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999972' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + Openai-Version: + - '2020-10-01' + Openai-Organization: + - braintrust-data + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + X-Request-Id: + - req_6e9eb60fab3543edb2d0accafecb1573 + Openai-Processing-Ms: + - '1633' + X-Envoy-Upstream-Service-Time: + - '1635' + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=qSGY6qtNUcYKuk4_4m3mYxKoLDyIbTKA1nFX0tTt39s-1766765311-1.0.1.1-NbC6iV7rcg2BeUAxyB.HEi.9ZjvaxVWs8JiY2O_dpB7NUyPuDVuOPzv.YBxFPzm5D8NgCe9MYxkt6d2G8V4OjULe4Xqi206YOkY1WnO_zSc; + path=/; expires=Fri, 26-Dec-25 16:38:31 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=NMb7jI4HKKmKeilvSZD.bb1QxYZnK1BJHRdBcWxwVLI-1766765311632-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - 9b41d650cc89e239-ORD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: |- + { + "id": "resp_081844c3e063c0a200694eb2fdfa708197a76ed12cff1fe0f3", + "object": "response", + "created_at": 1766765310, + "status": "completed", + "background": false, + "billing": { + "payer": "developer" + }, + "completed_at": 1766765311, + "error": null, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "max_tool_calls": null, + "model": "gpt-4o-mini-2024-07-18", + "output": [ + { + "id": "msg_081844c3e063c0a200694eb2ff0f908197b778bf59e1cc197d", + "type": "message", + "status": "completed", + "content": [ + { + "type": "output_text", + "annotations": [], + "logprobs": [], + "text": "Hello! How can I assist you today?" + } + ], + "role": "assistant" + } + ], + "parallel_tool_calls": true, + "previous_response_id": null, + "prompt_cache_key": null, + "prompt_cache_retention": null, + "reasoning": { + "effort": null, + "summary": null + }, + "safety_identifier": null, + "service_tier": "default", + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + }, + "verbosity": "medium" + }, + "tool_choice": "auto", + "tools": [], + "top_logprobs": 0, + "top_p": 1.0, + "truncation": "auto", + "usage": { + "input_tokens": 9, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 10, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 19 + }, + "user": null, + "metadata": {} + } + recorded_at: Fri, 26 Dec 2025 16:08:31 GMT +recorded_with: VCR 6.3.1 diff --git a/test/fixtures/vcr_cassettes/openai/streaming_chat_completions.yml b/test/fixtures/vcr_cassettes/openai/streaming_chat_completions.yml new file mode 100644 index 0000000..8701555 --- /dev/null +++ b/test/fixtures/vcr_cassettes/openai/streaming_chat_completions.yml @@ -0,0 +1,130 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/chat/completions + body: + encoding: UTF-8 + string: '{"messages":[{"role":"user","content":"Say hello"}],"model":"gpt-4o-mini","max_tokens":20,"stream_options":{"include_usage":true},"stream":true}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - text/event-stream + User-Agent: + - OpenAI::Client/Ruby 0.39.0 + Host: + - api.openai.com + X-Stainless-Arch: + - x64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - Linux + X-Stainless-Package-Version: + - 0.39.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 3.2.9 + Content-Type: + - application/json + Authorization: + - Bearer + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '144' + response: + status: + code: 200 + message: OK + headers: + Date: + - Wed, 17 Dec 2025 21:39:31 GMT + Content-Type: + - text/event-stream; charset=utf-8 + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Access-Control-Expose-Headers: + - X-Request-ID + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - '770' + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - '2020-10-01' + X-Envoy-Upstream-Service-Time: + - '783' + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999995' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_ec79554aeb544044b6c2cacda6bf5deb + X-Openai-Proxy-Wasm: + - v0.1 + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=SULTPs8wSyJcfdzyW7AfYgAxNgFQhBWBh.cql5YgWK0-1766007571-1.0.1.1-dQJHZ9TQEcAnlZRHvdg1XfsuT75SbqxOy7CshY.xN_PW51.1TgtNQ9fBUxZb6_8NQvDeFYe3EjwY3BuSvB4Tx.xZYCofVzH1yOnByqls5oU; + path=/; expires=Wed, 17-Dec-25 22:09:31 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=jTnwp.LHHLW48BnvmEnO2LcqYuzDdW.89mY1nRyPvtU-1766007571885-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - 9af992d469010010-ORD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: UTF-8 + string: |+ + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"KaaErWmpF"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Hf0SpV"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"GGciGwweQs"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":" How"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"KOLqgQ8"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":" can"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"xKM2bpu"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":" I"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"o0w7wEHVB"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":" assist"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"pIQQ"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":" you"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"0PBQOon"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":" today"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"gtUVW"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"tvwmRpNgk7"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"72xRr"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[],"usage":{"prompt_tokens":9,"completion_tokens":9,"total_tokens":18,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"NF9h1hYqo9hk"} + + data: [DONE] + + recorded_at: Wed, 17 Dec 2025 21:39:32 GMT +recorded_with: VCR 6.3.1 +... diff --git a/test/fixtures/vcr_cassettes/openai/streaming_multiple_choices.yml b/test/fixtures/vcr_cassettes/openai/streaming_multiple_choices.yml new file mode 100644 index 0000000..b923abd --- /dev/null +++ b/test/fixtures/vcr_cassettes/openai/streaming_multiple_choices.yml @@ -0,0 +1,125 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/chat/completions + body: + encoding: UTF-8 + string: '{"messages":[{"role":"user","content":"Say either ''hello'' or ''hi'' + in one word"}],"model":"gpt-4o-mini","max_tokens":10,"n":2,"stream_options":{"include_usage":true},"stream":true}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - text/event-stream + User-Agent: + - OpenAI::Client/Ruby 0.39.0 + Host: + - api.openai.com + X-Stainless-Arch: + - x64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - Linux + X-Stainless-Package-Version: + - 0.39.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 3.2.9 + Content-Type: + - application/json + Authorization: + - Bearer + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '179' + response: + status: + code: 200 + message: OK + headers: + Date: + - Fri, 26 Dec 2025 15:53:38 GMT + Content-Type: + - text/event-stream; charset=utf-8 + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Access-Control-Expose-Headers: + - X-Request-ID + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - '203' + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - '2020-10-01' + X-Envoy-Upstream-Service-Time: + - '412' + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999987' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_ec522a5cd1a244368302f316ad41173d + X-Openai-Proxy-Wasm: + - v0.1 + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=hHlghp8HNFh4QUbUZbVcH72blcdZfpsMhy06JGp1I4Q-1766764418-1.0.1.1-OYsXT2WSAV4eQlgZBCF7AOq6VVRFuN3YUeztTY49tignIkkaN.gmMmdRmNQID92B1JGCDv5VYmYFXUAUiZQzkugmVd.MQd0ULVBS2uBWC1I; + path=/; expires=Fri, 26-Dec-25 16:23:38 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=O6M5xgfBX7r6bd848UUsi71Vfrdorj5aqqUkSzNa5gY-1766764418274-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - 9b41c0891aec11fd-ORD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: UTF-8 + string: |+ + data: {"id":"chatcmpl-Cr4QEId1Y0l9qcxYwbRcJ5NjRcVSZ","object":"chat.completion.chunk","created":1766764418,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"KzDrhZVz8"} + + data: {"id":"chatcmpl-Cr4QEId1Y0l9qcxYwbRcJ5NjRcVSZ","object":"chat.completion.chunk","created":1766764418,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"bOwYV2"} + + data: {"id":"chatcmpl-Cr4QEId1Y0l9qcxYwbRcJ5NjRcVSZ","object":"chat.completion.chunk","created":1766764418,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"3AwjpG0rsH"} + + data: {"id":"chatcmpl-Cr4QEId1Y0l9qcxYwbRcJ5NjRcVSZ","object":"chat.completion.chunk","created":1766764418,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"ZsYMs6apS"} + + data: {"id":"chatcmpl-Cr4QEId1Y0l9qcxYwbRcJ5NjRcVSZ","object":"chat.completion.chunk","created":1766764418,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":1,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"InGNn4"} + + data: {"id":"chatcmpl-Cr4QEId1Y0l9qcxYwbRcJ5NjRcVSZ","object":"chat.completion.chunk","created":1766764418,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":1,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"s3ED3sFS7z"} + + data: {"id":"chatcmpl-Cr4QEId1Y0l9qcxYwbRcJ5NjRcVSZ","object":"chat.completion.chunk","created":1766764418,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"5qR7E"} + + data: {"id":"chatcmpl-Cr4QEId1Y0l9qcxYwbRcJ5NjRcVSZ","object":"chat.completion.chunk","created":1766764418,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"k9EbC"} + + data: {"id":"chatcmpl-Cr4QEId1Y0l9qcxYwbRcJ5NjRcVSZ","object":"chat.completion.chunk","created":1766764418,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[],"usage":{"prompt_tokens":19,"completion_tokens":4,"total_tokens":23,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"Dh4cs1gqrFe"} + + data: [DONE] + + recorded_at: Fri, 26 Dec 2025 15:53:38 GMT +recorded_with: VCR 6.3.1 +... From d52999568c4a07cc0a0f5ff8fc263c10e0e3be19 Mon Sep 17 00:00:00 2001 From: David Elner Date: Thu, 25 Dec 2025 09:55:58 -0500 Subject: [PATCH 08/27] Changed: Share common implementation between OpenAI integrations --- .../contrib/openai/instrumentation/chat.rb | 32 ++-- .../contrib/openai/instrumentation/common.rb | 54 ++---- .../openai/instrumentation/responses.rb | 22 +-- lib/braintrust/contrib/support/openai.rb | 72 ++++++++ lib/braintrust/contrib/support/otel.rb | 23 +++ lib/braintrust/trace/tokens.rb | 60 ------- test/braintrust/contrib/openai/common_test.rb | 156 ++++++++++++++++++ .../openai/instrumentation/common_test.rb | 46 ------ .../braintrust/contrib/support/openai_test.rb | 91 ++++++++++ test/braintrust/contrib/support/otel_test.rb | 35 ++++ test/braintrust/trace/tokens_test.rb | 78 --------- 11 files changed, 418 insertions(+), 251 deletions(-) create mode 100644 lib/braintrust/contrib/support/openai.rb create mode 100644 lib/braintrust/contrib/support/otel.rb create mode 100644 test/braintrust/contrib/openai/common_test.rb create mode 100644 test/braintrust/contrib/support/openai_test.rb create mode 100644 test/braintrust/contrib/support/otel_test.rb diff --git a/lib/braintrust/contrib/openai/instrumentation/chat.rb b/lib/braintrust/contrib/openai/instrumentation/chat.rb index 50ea6e2..ac06229 100644 --- a/lib/braintrust/contrib/openai/instrumentation/chat.rb +++ b/lib/braintrust/contrib/openai/instrumentation/chat.rb @@ -5,6 +5,8 @@ require_relative "common" require_relative "../../../internal/time" +require_relative "../../support/otel" +require_relative "../../support/openai" module Braintrust module Contrib @@ -105,23 +107,23 @@ def set_input(span, params) return unless params[:messages] messages_array = params[:messages].map(&:to_h) - Common.set_json_attr(span, "braintrust.input_json", messages_array) + Support::OTel.set_json_attr(span, "braintrust.input_json", messages_array) end def set_output(span, response) return unless response.respond_to?(:choices) && response.choices&.any? choices_array = response.choices.map(&:to_h) - Common.set_json_attr(span, "braintrust.output_json", choices_array) + Support::OTel.set_json_attr(span, "braintrust.output_json", choices_array) end def set_metrics(span, response, time_to_first_token) metrics = {} if response.respond_to?(:usage) && response.usage - metrics = Common.parse_usage_tokens(response.usage) + metrics = Support::OpenAI.parse_usage_tokens(response.usage) end metrics["time_to_first_token"] = time_to_first_token - Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? + Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? end def finalize_metadata(span, metadata, response) @@ -130,7 +132,7 @@ def finalize_metadata(span, metadata, response) metadata["model"] = response.model if response.respond_to?(:model) && response.model metadata["system_fingerprint"] = response.system_fingerprint if response.respond_to?(:system_fingerprint) && response.system_fingerprint metadata["service_tier"] = response.service_tier if response.respond_to?(:service_tier) && response.service_tier - Common.set_json_attr(span, "braintrust.metadata", metadata) + Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) end end end @@ -179,7 +181,7 @@ def trace_consumption(ctx) tracer.in_span("Chat Completion") do |span| completions_instance.send(:set_input, span, params) - Common.set_json_attr(span, "braintrust.metadata", metadata) + Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) yield @@ -197,16 +199,16 @@ def finalize_stream_span(span, start_time, metadata, completions_instance) # Set output from accumulated choices if snapshot.choices&.any? choices_array = snapshot.choices.map(&:to_h) - Common.set_json_attr(span, "braintrust.output_json", choices_array) + Support::OTel.set_json_attr(span, "braintrust.output_json", choices_array) end # Set metrics metrics = {} if snapshot.usage - metrics = Common.parse_usage_tokens(snapshot.usage) + metrics = Support::OpenAI.parse_usage_tokens(snapshot.usage) end metrics["time_to_first_token"] = time_to_first_token - Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? + Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? # Update metadata with response fields metadata["id"] = snapshot.id if snapshot.respond_to?(:id) && snapshot.id @@ -214,7 +216,7 @@ def finalize_stream_span(span, start_time, metadata, completions_instance) metadata["model"] = snapshot.model if snapshot.respond_to?(:model) && snapshot.model metadata["system_fingerprint"] = snapshot.system_fingerprint if snapshot.respond_to?(:system_fingerprint) && snapshot.system_fingerprint metadata["service_tier"] = snapshot.service_tier if snapshot.respond_to?(:service_tier) && snapshot.service_tier - Common.set_json_attr(span, "braintrust.metadata", metadata) + Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) rescue => e Braintrust::Log.debug("Failed to get completion snapshot: #{e.message}") end @@ -251,7 +253,7 @@ def each(&block) tracer.in_span("Chat Completion") do |span| completions_instance.send(:set_input, span, params) - Common.set_json_attr(span, "braintrust.metadata", metadata) + Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) super do |chunk| time_to_first_token ||= Braintrust::Internal::Time.measure(start_time) @@ -269,15 +271,15 @@ def finalize_stream_span(span, aggregated_chunks, time_to_first_token, metadata) return if aggregated_chunks.empty? aggregated_output = Common.aggregate_streaming_chunks(aggregated_chunks) - Common.set_json_attr(span, "braintrust.output_json", aggregated_output[:choices]) + Support::OTel.set_json_attr(span, "braintrust.output_json", aggregated_output[:choices]) # Set metrics metrics = {} if aggregated_output[:usage] - metrics = Common.parse_usage_tokens(aggregated_output[:usage]) + metrics = Support::OpenAI.parse_usage_tokens(aggregated_output[:usage]) end metrics["time_to_first_token"] = time_to_first_token - Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? + Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? # Update metadata with response fields metadata["id"] = aggregated_output[:id] if aggregated_output[:id] @@ -285,7 +287,7 @@ def finalize_stream_span(span, aggregated_chunks, time_to_first_token, metadata) metadata["model"] = aggregated_output[:model] if aggregated_output[:model] metadata["system_fingerprint"] = aggregated_output[:system_fingerprint] if aggregated_output[:system_fingerprint] metadata["service_tier"] = aggregated_output[:service_tier] if aggregated_output[:service_tier] - Common.set_json_attr(span, "braintrust.metadata", metadata) + Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) end end end diff --git a/lib/braintrust/contrib/openai/instrumentation/common.rb b/lib/braintrust/contrib/openai/instrumentation/common.rb index 039353b..4c2f112 100644 --- a/lib/braintrust/contrib/openai/instrumentation/common.rb +++ b/lib/braintrust/contrib/openai/instrumentation/common.rb @@ -1,38 +1,16 @@ # frozen_string_literal: true -require "json" - -require_relative "../../../trace/tokens" - module Braintrust module Contrib module OpenAI module Instrumentation - # Chat completions instrumentation for OpenAI. - # Provides modules that can be prepended to OpenAI::Client to instrument chat.completions API. + # Aggregation utilities for official OpenAI SDK instrumentation. + # These are specific to the official openai gem's data structures (symbol keys, SDK objects). module Common - # Helper to safely set a JSON attribute on a span - # Only sets the attribute if obj is present - # @param span [OpenTelemetry::Trace::Span] the span to set attribute on - # @param attr_name [String] the attribute name (e.g., "braintrust.output_json") - # @param obj [Object] the object to serialize to JSON - # @return [void] - def self.set_json_attr(span, attr_name, obj) - return unless obj - span.set_attribute(attr_name, JSON.generate(obj)) - end - - # Parse usage tokens from OpenAI API response - # @param usage [Hash, Object] usage object from OpenAI response - # @return [Hash] metrics hash with normalized names - def self.parse_usage_tokens(usage) - Braintrust::Trace.parse_openai_usage_tokens(usage) - end - - # Aggregate streaming chunks into a single response structure - # Follows the Go SDK logic for aggregating deltas - # @param chunks [Array] array of chunk hashes from stream - # @return [Hash] aggregated response with choices, usage, etc. + # Aggregate streaming chunks into a single response structure. + # Specific to official OpenAI SDK which uses symbol keys and SDK objects. + # @param chunks [Array] array of chunk hashes from stream (symbol keys) + # @return [Hash] aggregated response with choices, usage, etc. (symbol keys) def self.aggregate_streaming_chunks(chunks) return {} if chunks.empty? @@ -57,9 +35,7 @@ def self.aggregate_streaming_chunks(chunks) aggregated[:system_fingerprint] ||= chunk[:system_fingerprint] # Aggregate usage (usually only in last chunk if stream_options.include_usage is set) - if chunk[:usage] - aggregated[:usage] = chunk[:usage] - end + aggregated[:usage] = chunk[:usage] if chunk[:usage] # Process choices next unless chunk[:choices].is_a?(Array) @@ -79,14 +55,11 @@ def self.aggregate_streaming_chunks(chunks) choice_data[index][:role] ||= delta[:role] # Aggregate content - if delta[:content] - choice_data[index][:content] << delta[:content] - end + choice_data[index][:content] << delta[:content] if delta[:content] - # Aggregate tool_calls (similar to Go SDK logic) + # Aggregate tool_calls if delta[:tool_calls].is_a?(Array) && delta[:tool_calls].any? delta[:tool_calls].each do |tool_call_delta| - # Check if this is a new tool call or continuation if tool_call_delta[:id] && !tool_call_delta[:id].empty? # New tool call (dup strings to avoid mutating input) choice_data[index][:tool_calls] << { @@ -108,9 +81,7 @@ def self.aggregate_streaming_chunks(chunks) end # Capture finish_reason - if choice[:finish_reason] - choice_data[index][:finish_reason] = choice[:finish_reason] - end + choice_data[index][:finish_reason] = choice[:finish_reason] if choice[:finish_reason] end end @@ -134,8 +105,8 @@ def self.aggregate_streaming_chunks(chunks) aggregated end - # Aggregate responses streaming events into a single response structure - # Follows similar logic to Python SDK's _postprocess_streaming_results + # Aggregate responses streaming events into a single response structure. + # Specific to official OpenAI SDK which returns typed event objects. # @param events [Array] array of event objects from stream # @return [Hash] aggregated response with output, usage, etc. def self.aggregate_responses_events(events) @@ -146,7 +117,6 @@ def self.aggregate_responses_events(events) if completed_event&.respond_to?(:response) response = completed_event.response - # Convert the response object to a hash-like structure for logging return { id: response.respond_to?(:id) ? response.id : nil, output: response.respond_to?(:output) ? response.output : nil, diff --git a/lib/braintrust/contrib/openai/instrumentation/responses.rb b/lib/braintrust/contrib/openai/instrumentation/responses.rb index 909b7f4..5bb4da6 100644 --- a/lib/braintrust/contrib/openai/instrumentation/responses.rb +++ b/lib/braintrust/contrib/openai/instrumentation/responses.rb @@ -5,6 +5,8 @@ require_relative "common" require_relative "../../../internal/time" +require_relative "../../support/otel" +require_relative "../../support/openai" module Braintrust module Contrib @@ -86,27 +88,27 @@ def build_metadata(params, stream: false) def set_input(span, params) return unless params[:input] - Common.set_json_attr(span, "braintrust.input_json", params[:input]) + Support::OTel.set_json_attr(span, "braintrust.input_json", params[:input]) end def set_output(span, response) return unless response.respond_to?(:output) && response.output - Common.set_json_attr(span, "braintrust.output_json", response.output) + Support::OTel.set_json_attr(span, "braintrust.output_json", response.output) end def set_metrics(span, response, time_to_first_token) metrics = {} if response.respond_to?(:usage) && response.usage - metrics = Common.parse_usage_tokens(response.usage) + metrics = Support::OpenAI.parse_usage_tokens(response.usage) end metrics["time_to_first_token"] = time_to_first_token - Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? + Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? end def finalize_metadata(span, metadata, response) metadata["id"] = response.id if response.respond_to?(:id) && response.id - Common.set_json_attr(span, "braintrust.metadata", metadata) + Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) end end end @@ -139,7 +141,7 @@ def each(&block) tracer.in_span("openai.responses.create") do |span| responses_instance.send(:set_input, span, params) - Common.set_json_attr(span, "braintrust.metadata", metadata) + Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) begin super do |event| @@ -163,19 +165,19 @@ def finalize_stream_span(span, aggregated_events, time_to_first_token, metadata) return if aggregated_events.empty? aggregated_output = Common.aggregate_responses_events(aggregated_events) - Common.set_json_attr(span, "braintrust.output_json", aggregated_output[:output]) if aggregated_output[:output] + Support::OTel.set_json_attr(span, "braintrust.output_json", aggregated_output[:output]) if aggregated_output[:output] # Set metrics metrics = {} if aggregated_output[:usage] - metrics = Common.parse_usage_tokens(aggregated_output[:usage]) + metrics = Support::OpenAI.parse_usage_tokens(aggregated_output[:usage]) end metrics["time_to_first_token"] = time_to_first_token - Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? + Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? # Update metadata with response fields metadata["id"] = aggregated_output[:id] if aggregated_output[:id] - Common.set_json_attr(span, "braintrust.metadata", metadata) + Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) end end end diff --git a/lib/braintrust/contrib/support/openai.rb b/lib/braintrust/contrib/support/openai.rb new file mode 100644 index 0000000..27581b5 --- /dev/null +++ b/lib/braintrust/contrib/support/openai.rb @@ -0,0 +1,72 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + module Support + # OpenAI API-specific utilities shared across OpenAI integrations. + # These work with normalized data structures (hashes), not library-specific objects. + module OpenAI + # Parse OpenAI usage tokens into normalized Braintrust metrics. + # Handles standard fields and *_tokens_details nested objects. + # Works with both Hash objects and SDK response objects (via to_h). + # @param usage [Hash, Object] usage object from OpenAI response + # @return [Hash] normalized metrics + def self.parse_usage_tokens(usage) + metrics = {} + return metrics unless usage + + usage_hash = usage.respond_to?(:to_h) ? usage.to_h : usage + return metrics unless usage_hash.is_a?(Hash) + + # Field mappings: OpenAI → Braintrust + # Supports both Chat Completions API (prompt_tokens, completion_tokens) + # and Responses API (input_tokens, output_tokens) + field_map = { + "prompt_tokens" => "prompt_tokens", + "completion_tokens" => "completion_tokens", + "total_tokens" => "tokens", + # Responses API uses different field names + "input_tokens" => "prompt_tokens", + "output_tokens" => "completion_tokens" + } + + # Prefix mappings for *_tokens_details + prefix_map = { + "prompt" => "prompt", + "completion" => "completion", + # Responses API uses input/output prefixes + "input" => "prompt", + "output" => "completion" + } + + usage_hash.each do |key, value| + key_str = key.to_s + + if value.is_a?(Numeric) + target = field_map[key_str] + metrics[target] = value.to_i if target + elsif key_str.end_with?("_tokens_details") + # Convert to hash if it's an object (OpenAI SDK returns objects) + details_hash = value.respond_to?(:to_h) ? value.to_h : value + next unless details_hash.is_a?(Hash) + + raw_prefix = key_str.sub(/_tokens_details$/, "") + prefix = prefix_map[raw_prefix] || raw_prefix + details_hash.each do |detail_key, detail_value| + next unless detail_value.is_a?(Numeric) + metrics["#{prefix}_#{detail_key}"] = detail_value.to_i + end + end + end + + # Calculate total if missing + if !metrics.key?("tokens") && metrics.key?("prompt_tokens") && metrics.key?("completion_tokens") + metrics["tokens"] = metrics["prompt_tokens"] + metrics["completion_tokens"] + end + + metrics + end + end + end + end +end diff --git a/lib/braintrust/contrib/support/otel.rb b/lib/braintrust/contrib/support/otel.rb new file mode 100644 index 0000000..d073337 --- /dev/null +++ b/lib/braintrust/contrib/support/otel.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +require "json" + +module Braintrust + module Contrib + module Support + # OpenTelemetry utilities shared across all integrations. + module OTel + # Helper to safely set a JSON attribute on a span + # Only sets the attribute if obj is present + # @param span [OpenTelemetry::Trace::Span] the span to set attribute on + # @param attr_name [String] the attribute name (e.g., "braintrust.output_json") + # @param obj [Object] the object to serialize to JSON + # @return [void] + def self.set_json_attr(span, attr_name, obj) + return unless obj + span.set_attribute(attr_name, JSON.generate(obj)) + end + end + end + end +end diff --git a/lib/braintrust/trace/tokens.rb b/lib/braintrust/trace/tokens.rb index d40b5d8..679df12 100644 --- a/lib/braintrust/trace/tokens.rb +++ b/lib/braintrust/trace/tokens.rb @@ -2,66 +2,6 @@ module Braintrust module Trace - # Parse OpenAI usage tokens into normalized Braintrust metrics. - # Handles standard fields and *_tokens_details nested objects. - # @param usage [Hash, Object] usage object from OpenAI response - # @return [Hash] normalized metrics - def self.parse_openai_usage_tokens(usage) - metrics = {} - return metrics unless usage - - usage_hash = usage.respond_to?(:to_h) ? usage.to_h : usage - return metrics unless usage_hash.is_a?(Hash) - - # Field mappings: OpenAI → Braintrust - # Supports both Chat Completions API (prompt_tokens, completion_tokens) - # and Responses API (input_tokens, output_tokens) - field_map = { - "prompt_tokens" => "prompt_tokens", - "completion_tokens" => "completion_tokens", - "total_tokens" => "tokens", - # Responses API uses different field names - "input_tokens" => "prompt_tokens", - "output_tokens" => "completion_tokens" - } - - # Prefix mappings for *_tokens_details - prefix_map = { - "prompt" => "prompt", - "completion" => "completion", - # Responses API uses input/output prefixes - "input" => "prompt", - "output" => "completion" - } - - usage_hash.each do |key, value| - key_str = key.to_s - - if value.is_a?(Numeric) - target = field_map[key_str] - metrics[target] = value.to_i if target - elsif key_str.end_with?("_tokens_details") - # Convert to hash if it's an object (OpenAI SDK returns objects) - details_hash = value.respond_to?(:to_h) ? value.to_h : value - next unless details_hash.is_a?(Hash) - - raw_prefix = key_str.sub(/_tokens_details$/, "") - prefix = prefix_map[raw_prefix] || raw_prefix - details_hash.each do |detail_key, detail_value| - next unless detail_value.is_a?(Numeric) - metrics["#{prefix}_#{detail_key}"] = detail_value.to_i - end - end - end - - # Calculate total if missing - if !metrics.key?("tokens") && metrics.key?("prompt_tokens") && metrics.key?("completion_tokens") - metrics["tokens"] = metrics["prompt_tokens"] + metrics["completion_tokens"] - end - - metrics - end - # Parse Anthropic usage tokens into normalized Braintrust metrics. # Accumulates cache tokens into prompt_tokens and calculates total. # @param usage [Hash, Object] usage object from Anthropic response diff --git a/test/braintrust/contrib/openai/common_test.rb b/test/braintrust/contrib/openai/common_test.rb new file mode 100644 index 0000000..8152c89 --- /dev/null +++ b/test/braintrust/contrib/openai/common_test.rb @@ -0,0 +1,156 @@ +# frozen_string_literal: true + +require "test_helper" +require "braintrust/contrib/openai/instrumentation/common" + +class Braintrust::Contrib::OpenAI::Instrumentation::CommonTest < Minitest::Test + # ============================ + # aggregate_streaming_chunks + # ============================ + + def test_aggregate_streaming_chunks_empty + assert_equal({}, Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_streaming_chunks([])) + end + + def test_aggregate_streaming_chunks_basic + chunks = [ + {id: "chatcmpl-123", model: "gpt-4", choices: [{index: 0, delta: {role: "assistant"}}]}, + {choices: [{index: 0, delta: {content: "Hello"}}]}, + {choices: [{index: 0, delta: {content: " world"}, finish_reason: "stop"}]}, + {usage: {prompt_tokens: 10, completion_tokens: 5}} + ] + + result = Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_streaming_chunks(chunks) + + assert_equal "chatcmpl-123", result[:id] + assert_equal "gpt-4", result[:model] + assert_equal 1, result[:choices].length + assert_equal "assistant", result[:choices][0][:message][:role] + assert_equal "Hello world", result[:choices][0][:message][:content] + assert_equal "stop", result[:choices][0][:finish_reason] + assert_equal 10, result[:usage][:prompt_tokens] + end + + def test_aggregate_streaming_chunks_captures_system_fingerprint + chunks = [ + {id: "chatcmpl-123", system_fingerprint: "fp_abc123", choices: [{index: 0, delta: {role: "assistant"}}]}, + {choices: [{index: 0, delta: {content: "Hi"}, finish_reason: "stop"}]} + ] + + result = Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_streaming_chunks(chunks) + + assert_equal "fp_abc123", result[:system_fingerprint] + end + + def test_aggregate_streaming_chunks_with_tool_calls + chunks = [ + {id: "chatcmpl-123", choices: [{index: 0, delta: {role: "assistant", tool_calls: [{id: "call_abc", type: "function", function: {name: "get_weather", arguments: ""}}]}}]}, + {choices: [{index: 0, delta: {tool_calls: [{function: {arguments: '{"loc'}}]}}]}, + {choices: [{index: 0, delta: {tool_calls: [{function: {arguments: 'ation":"NYC"}'}}]}, finish_reason: "tool_calls"}]} + ] + + result = Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_streaming_chunks(chunks) + + assert_equal 1, result[:choices].length + assert_equal 1, result[:choices][0][:message][:tool_calls].length + assert_equal "call_abc", result[:choices][0][:message][:tool_calls][0][:id] + assert_equal "get_weather", result[:choices][0][:message][:tool_calls][0][:function][:name] + assert_equal '{"location":"NYC"}', result[:choices][0][:message][:tool_calls][0][:function][:arguments] + end + + def test_aggregate_streaming_chunks_multiple_choices + chunks = [ + {id: "chatcmpl-123", choices: [ + {index: 0, delta: {role: "assistant"}}, + {index: 1, delta: {role: "assistant"}} + ]}, + {choices: [ + {index: 0, delta: {content: "First"}}, + {index: 1, delta: {content: "Second"}} + ]}, + {choices: [ + {index: 0, delta: {}, finish_reason: "stop"}, + {index: 1, delta: {}, finish_reason: "stop"} + ]} + ] + + result = Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_streaming_chunks(chunks) + + assert_equal 2, result[:choices].length + assert_equal "First", result[:choices][0][:message][:content] + assert_equal "Second", result[:choices][1][:message][:content] + end + + def test_aggregate_streaming_chunks_empty_content_is_nil + chunks = [ + {id: "chatcmpl-123", choices: [{index: 0, delta: {role: "assistant"}}]}, + {choices: [{index: 0, delta: {}, finish_reason: "stop"}]} + ] + + result = Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_streaming_chunks(chunks) + + assert_nil result[:choices][0][:message][:content] + end + + # ============================ + # aggregate_responses_events + # ============================ + + def test_aggregate_responses_events_empty + assert_equal({}, Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_responses_events([])) + end + + def test_aggregate_responses_events_with_completed_event + # Create mock event objects that respond to :type and :response + completed_response = Struct.new(:id, :output, :usage, keyword_init: true).new( + id: "resp_123", + output: [{"type" => "message", "content" => "Hello"}], + usage: {input_tokens: 10, output_tokens: 5} + ) + + completed_event = Struct.new(:type, :response, keyword_init: true).new( + type: :"response.completed", + response: completed_response + ) + + events = [ + Struct.new(:type).new(:"response.created"), + Struct.new(:type).new(:"response.in_progress"), + completed_event + ] + + result = Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_responses_events(events) + + assert_equal "resp_123", result[:id] + assert_equal [{"type" => "message", "content" => "Hello"}], result[:output] + assert_equal({input_tokens: 10, output_tokens: 5}, result[:usage]) + end + + def test_aggregate_responses_events_without_completed_event + events = [ + Struct.new(:type).new(:"response.created"), + Struct.new(:type).new(:"response.in_progress") + ] + + result = Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_responses_events(events) + + assert_equal({}, result) + end + + def test_aggregate_responses_events_handles_missing_response_fields + completed_response = Struct.new(:id, keyword_init: true).new(id: "resp_123") + + completed_event = Struct.new(:type, :response, keyword_init: true).new( + type: :"response.completed", + response: completed_response + ) + + events = [completed_event] + + result = Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_responses_events(events) + + assert_equal "resp_123", result[:id] + assert_nil result[:output] + assert_nil result[:usage] + end +end diff --git a/test/braintrust/contrib/openai/instrumentation/common_test.rb b/test/braintrust/contrib/openai/instrumentation/common_test.rb index a40bc3e..8132280 100644 --- a/test/braintrust/contrib/openai/instrumentation/common_test.rb +++ b/test/braintrust/contrib/openai/instrumentation/common_test.rb @@ -6,52 +6,6 @@ class Braintrust::Contrib::OpenAI::Instrumentation::CommonTest < Minitest::Test Common = Braintrust::Contrib::OpenAI::Instrumentation::Common - # --- .set_json_attr --- - - def test_set_json_attr_sets_attribute_with_json - span = Minitest::Mock.new - span.expect(:set_attribute, nil, ["braintrust.output", '{"foo":"bar"}']) - - Common.set_json_attr(span, "braintrust.output", {foo: "bar"}) - - span.verify - end - - def test_set_json_attr_skips_nil_object - span = Minitest::Mock.new - # No expectations - set_attribute should not be called - - Common.set_json_attr(span, "braintrust.output", nil) - - span.verify - end - - def test_set_json_attr_handles_array - span = Minitest::Mock.new - span.expect(:set_attribute, nil, ["braintrust.input", "[1,2,3]"]) - - Common.set_json_attr(span, "braintrust.input", [1, 2, 3]) - - span.verify - end - - # --- .parse_usage_tokens --- - - def test_parse_usage_tokens_delegates_to_trace - usage = {prompt_tokens: 10, completion_tokens: 5} - expected_result = {"prompt_tokens" => 10, "completion_tokens" => 5, "tokens" => 15} - - mock = Minitest::Mock.new - mock.expect(:call, expected_result, [usage]) - - Braintrust::Trace.stub(:parse_openai_usage_tokens, mock) do - result = Common.parse_usage_tokens(usage) - assert_equal expected_result, result - end - - mock.verify - end - # --- .aggregate_streaming_chunks --- def test_aggregate_streaming_chunks_returns_empty_hash_for_empty_array diff --git a/test/braintrust/contrib/support/openai_test.rb b/test/braintrust/contrib/support/openai_test.rb new file mode 100644 index 0000000..fd5fd28 --- /dev/null +++ b/test/braintrust/contrib/support/openai_test.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +require "test_helper" +require "braintrust/contrib/support/openai" + +class Braintrust::Contrib::Support::OpenAITest < Minitest::Test + # =================== + # parse_usage_tokens + # =================== + + def test_maps_standard_fields + usage = {"prompt_tokens" => 10, "completion_tokens" => 20, "total_tokens" => 30} + + metrics = Braintrust::Contrib::Support::OpenAI.parse_usage_tokens(usage) + + assert_equal 10, metrics["prompt_tokens"] + assert_equal 20, metrics["completion_tokens"] + assert_equal 30, metrics["tokens"] + end + + def test_handles_symbol_keys + usage = {prompt_tokens: 10, completion_tokens: 20, total_tokens: 30} + + metrics = Braintrust::Contrib::Support::OpenAI.parse_usage_tokens(usage) + + assert_equal 10, metrics["prompt_tokens"] + end + + def test_handles_prompt_tokens_details + usage = { + "prompt_tokens" => 100, + "prompt_tokens_details" => {"cached_tokens" => 80} + } + + metrics = Braintrust::Contrib::Support::OpenAI.parse_usage_tokens(usage) + + assert_equal 80, metrics["prompt_cached_tokens"] + end + + def test_handles_completion_tokens_details + usage = { + "completion_tokens" => 50, + "completion_tokens_details" => {"reasoning_tokens" => 30} + } + + metrics = Braintrust::Contrib::Support::OpenAI.parse_usage_tokens(usage) + + assert_equal 30, metrics["completion_reasoning_tokens"] + end + + def test_handles_object_style_token_details + # OpenAI SDK returns objects with to_h, not plain hashes + details_object = Struct.new(:cached_tokens, :audio_tokens, keyword_init: true) + usage_object = Struct.new(:prompt_tokens, :completion_tokens, :total_tokens, :prompt_tokens_details, keyword_init: true) + + usage = usage_object.new( + prompt_tokens: 100, + completion_tokens: 50, + total_tokens: 150, + prompt_tokens_details: details_object.new(cached_tokens: 80, audio_tokens: 0) + ) + + metrics = Braintrust::Contrib::Support::OpenAI.parse_usage_tokens(usage) + + assert_equal 80, metrics["prompt_cached_tokens"] + assert_equal 0, metrics["prompt_audio_tokens"] + end + + def test_returns_empty_hash_for_nil + assert_equal({}, Braintrust::Contrib::Support::OpenAI.parse_usage_tokens(nil)) + end + + def test_calculates_total_if_missing + usage = {"prompt_tokens" => 10, "completion_tokens" => 20} + + metrics = Braintrust::Contrib::Support::OpenAI.parse_usage_tokens(usage) + + assert_equal 30, metrics["tokens"] + end + + def test_handles_responses_api_field_names + # Responses API uses input_tokens/output_tokens + usage = {"input_tokens" => 10, "output_tokens" => 20} + + metrics = Braintrust::Contrib::Support::OpenAI.parse_usage_tokens(usage) + + assert_equal 10, metrics["prompt_tokens"] + assert_equal 20, metrics["completion_tokens"] + assert_equal 30, metrics["tokens"] + end +end diff --git a/test/braintrust/contrib/support/otel_test.rb b/test/braintrust/contrib/support/otel_test.rb new file mode 100644 index 0000000..4971b7b --- /dev/null +++ b/test/braintrust/contrib/support/otel_test.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +require "test_helper" +require "braintrust/contrib/support/otel" + +class Braintrust::Contrib::Support::OTelTest < Minitest::Test + # --- .set_json_attr --- + + def test_set_json_attr_sets_attribute_with_json + span = Minitest::Mock.new + span.expect(:set_attribute, nil, ["braintrust.output", '{"foo":"bar"}']) + + Braintrust::Contrib::Support::OTel.set_json_attr(span, "braintrust.output", {foo: "bar"}) + + span.verify + end + + def test_set_json_attr_skips_nil_object + span = Minitest::Mock.new + # No expectations - set_attribute should not be called + + Braintrust::Contrib::Support::OTel.set_json_attr(span, "braintrust.output", nil) + + span.verify + end + + def test_set_json_attr_handles_array + span = Minitest::Mock.new + span.expect(:set_attribute, nil, ["braintrust.input", "[1,2,3]"]) + + Braintrust::Contrib::Support::OTel.set_json_attr(span, "braintrust.input", [1, 2, 3]) + + span.verify + end +end diff --git a/test/braintrust/trace/tokens_test.rb b/test/braintrust/trace/tokens_test.rb index cd4e7e3..c990c58 100644 --- a/test/braintrust/trace/tokens_test.rb +++ b/test/braintrust/trace/tokens_test.rb @@ -4,84 +4,6 @@ require "braintrust/trace/tokens" class TokensTest < Minitest::Test - # =================== - # OpenAI Token Parser - # =================== - - def test_openai_maps_standard_fields - usage = {"prompt_tokens" => 10, "completion_tokens" => 20, "total_tokens" => 30} - - metrics = Braintrust::Trace.parse_openai_usage_tokens(usage) - - assert_equal 10, metrics["prompt_tokens"] - assert_equal 20, metrics["completion_tokens"] - assert_equal 30, metrics["tokens"] - end - - def test_openai_handles_symbol_keys - usage = {prompt_tokens: 10, completion_tokens: 20, total_tokens: 30} - - metrics = Braintrust::Trace.parse_openai_usage_tokens(usage) - - assert_equal 10, metrics["prompt_tokens"] - end - - def test_openai_handles_prompt_tokens_details - usage = { - "prompt_tokens" => 100, - "prompt_tokens_details" => {"cached_tokens" => 80} - } - - metrics = Braintrust::Trace.parse_openai_usage_tokens(usage) - - assert_equal 80, metrics["prompt_cached_tokens"] - end - - def test_openai_handles_completion_tokens_details - usage = { - "completion_tokens" => 50, - "completion_tokens_details" => {"reasoning_tokens" => 30} - } - - metrics = Braintrust::Trace.parse_openai_usage_tokens(usage) - - assert_equal 30, metrics["completion_reasoning_tokens"] - end - - def test_openai_handles_object_style_token_details - # OpenAI SDK returns objects with to_h, not plain hashes - details_object = Struct.new(:cached_tokens, :audio_tokens, keyword_init: true) - usage_object = Struct.new(:prompt_tokens, :completion_tokens, :total_tokens, :prompt_tokens_details, keyword_init: true) - - usage = usage_object.new( - prompt_tokens: 100, - completion_tokens: 50, - total_tokens: 150, - prompt_tokens_details: details_object.new(cached_tokens: 80, audio_tokens: 0) - ) - - metrics = Braintrust::Trace.parse_openai_usage_tokens(usage) - - assert_equal 80, metrics["prompt_cached_tokens"] - assert_equal 0, metrics["prompt_audio_tokens"] - end - - def test_openai_returns_empty_hash_for_nil - assert_equal({}, Braintrust::Trace.parse_openai_usage_tokens(nil)) - end - - def test_openai_calculates_total_if_missing - usage = {"prompt_tokens" => 10, "completion_tokens" => 20} - - metrics = Braintrust::Trace.parse_openai_usage_tokens(usage) - - assert_equal 30, metrics["tokens"] - end - - # ======================= - # Anthropic Token Parser - # ======================= - def test_anthropic_maps_input_output_tokens usage = {"input_tokens" => 10, "output_tokens" => 20} From 3b037f0035e7507f6e83ea48d073dfb302a3807f Mon Sep 17 00:00:00 2001 From: David Elner Date: Thu, 25 Dec 2025 09:47:37 -0500 Subject: [PATCH 09/27] Changed: Migrated ruby-openai to new integration API --- Rakefile | 16 +- examples/contrib/ruby_openai/basic.rb | 78 +++ .../ruby_openai/deprecated.rb} | 11 +- examples/contrib/ruby_openai/instance.rb | 61 ++ examples/contrib/ruby_openai/streaming.rb | 80 +++ .../ruby_openai/golden.rb} | 8 +- lib/braintrust/contrib.rb | 2 + .../contrib/ruby_openai/deprecated.rb | 24 + .../ruby_openai/instrumentation/chat.rb | 149 +++++ .../ruby_openai/instrumentation/common.rb | 138 ++++ .../ruby_openai/instrumentation/responses.rb | 146 +++++ .../contrib/ruby_openai/integration.rb | 58 ++ lib/braintrust/contrib/ruby_openai/patcher.rb | 85 +++ lib/braintrust/trace.rb | 26 - .../alexrudall/ruby-openai/ruby-openai.rb | 377 ----------- .../contrib/ruby_openai/deprecated_test.rb | 45 ++ .../ruby_openai/instrumentation/chat_test.rb | 362 ++++++++++ .../instrumentation/common_test.rb | 149 +++++ .../instrumentation/responses_test.rb | 232 +++++++ .../ruby_openai/instrumentation_test.rb | 52 ++ .../contrib/ruby_openai/integration_helper.rb | 33 + .../contrib/ruby_openai/integration_test.rb | 65 ++ .../contrib/ruby_openai/openai_test.rb | 92 +++ .../contrib/ruby_openai/patcher_test.rb | 206 ++++++ .../contrib/alexrudall_ruby_openai_test.rb | 620 ------------------ .../responses_with_metadata.yml | 151 +++++ 26 files changed, 2233 insertions(+), 1033 deletions(-) create mode 100644 examples/contrib/ruby_openai/basic.rb rename examples/{alexrudall_openai.rb => contrib/ruby_openai/deprecated.rb} (80%) create mode 100644 examples/contrib/ruby_openai/instance.rb create mode 100644 examples/contrib/ruby_openai/streaming.rb rename examples/internal/{alexrudall_ruby_openai.rb => contrib/ruby_openai/golden.rb} (98%) create mode 100644 lib/braintrust/contrib/ruby_openai/deprecated.rb create mode 100644 lib/braintrust/contrib/ruby_openai/instrumentation/chat.rb create mode 100644 lib/braintrust/contrib/ruby_openai/instrumentation/common.rb create mode 100644 lib/braintrust/contrib/ruby_openai/instrumentation/responses.rb create mode 100644 lib/braintrust/contrib/ruby_openai/integration.rb create mode 100644 lib/braintrust/contrib/ruby_openai/patcher.rb delete mode 100644 lib/braintrust/trace/contrib/github.com/alexrudall/ruby-openai/ruby-openai.rb create mode 100644 test/braintrust/contrib/ruby_openai/deprecated_test.rb create mode 100644 test/braintrust/contrib/ruby_openai/instrumentation/chat_test.rb create mode 100644 test/braintrust/contrib/ruby_openai/instrumentation/common_test.rb create mode 100644 test/braintrust/contrib/ruby_openai/instrumentation/responses_test.rb create mode 100644 test/braintrust/contrib/ruby_openai/instrumentation_test.rb create mode 100644 test/braintrust/contrib/ruby_openai/integration_helper.rb create mode 100644 test/braintrust/contrib/ruby_openai/integration_test.rb create mode 100644 test/braintrust/contrib/ruby_openai/openai_test.rb create mode 100644 test/braintrust/contrib/ruby_openai/patcher_test.rb delete mode 100644 test/braintrust/trace/contrib/alexrudall_ruby_openai_test.rb create mode 100644 test/fixtures/vcr_cassettes/alexrudall_ruby_openai/responses_with_metadata.yml diff --git a/Rakefile b/Rakefile index 8bd2b54..84f03cd 100644 --- a/Rakefile +++ b/Rakefile @@ -104,9 +104,19 @@ namespace :test do end namespace :contrib do - desc "Run OpenAI contrib tests" - task :openai do - sh "bundle exec appraisal openai ruby -Ilib:test -e \"Dir.glob('test/braintrust/contrib/openai/**/*_test.rb').each { |f| require_relative f }\"" + # Tasks per integration + [ + {name: :openai}, + {name: :ruby_openai, appraisal: "ruby-openai"} + ].each do |integration| + name = integration[:name] + appraisal = integration[:appraisal] || integration[:name] + folder = integration[:name] + + desc "Run #{name} contrib tests" + task name do + sh "bundle exec appraisal #{appraisal} ruby -Ilib:test -e \"Dir.glob('test/braintrust/contrib/#{folder}/**/*_test.rb').each { |f| require_relative f }\"" + end end end diff --git a/examples/contrib/ruby_openai/basic.rb b/examples/contrib/ruby_openai/basic.rb new file mode 100644 index 0000000..8d58f4f --- /dev/null +++ b/examples/contrib/ruby_openai/basic.rb @@ -0,0 +1,78 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "openai" +require "opentelemetry/sdk" + +# Example: ruby-openai (alexrudall) chat completion with Braintrust tracing (class-level) +# +# This example demonstrates how to automatically trace OpenAI API calls using +# the ruby-openai gem (by alexrudall) with Braintrust class-level instrumentation. +# All OpenAI clients created after instrument! is called will be traced. +# +# Note: ruby-openai is an optional development dependency. To run this example: +# 1. Install ruby-openai: gem install ruby-openai +# 2. Run from the SDK root: ruby examples/contrib/ruby_openai/basic.rb +# +# Usage: +# OPENAI_API_KEY=your-openai-key ruby examples/contrib/ruby_openai/basic.rb + +# Check for API keys +unless ENV["OPENAI_API_KEY"] + puts "Error: OPENAI_API_KEY environment variable is required" + puts "Get your API key from: https://platform.openai.com/api-keys" + exit 1 +end + +# Initialize Braintrust and instrument all ruby-openai clients +Braintrust.init(blocking_login: true) +Braintrust.instrument!(:ruby_openai) + +# Create OpenAI client (ruby-openai uses access_token parameter) +# This client is automatically instrumented because of the class-level instrument! call +client = OpenAI::Client.new(access_token: ENV["OPENAI_API_KEY"]) + +# Create a root span to capture the entire operation +tracer = OpenTelemetry.tracer_provider.tracer("ruby-openai-example") +root_span = nil + +# Make a chat completion request (automatically traced!) +puts "Sending chat completion request to OpenAI (using ruby-openai gem)..." +response = tracer.in_span("examples/contrib/ruby_openai/basic.rb") do |span| + root_span = span + + # ruby-openai uses: client.chat(parameters: {...}) + client.chat( + parameters: { + model: "gpt-4o-mini", + messages: [ + {role: "system", content: "You are a helpful assistant."}, + {role: "user", content: "Say hello and tell me a short joke."} + ], + max_tokens: 100 + } + ) +end + +# Print the response (ruby-openai returns a hash) +puts "\n Response received!" +puts "\nAssistant: #{response.dig("choices", 0, "message", "content")}" + +# Print usage stats +if response["usage"] + puts "\nToken usage:" + puts " Prompt tokens: #{response["usage"]["prompt_tokens"]}" + puts " Completion tokens: #{response["usage"]["completion_tokens"]}" + puts " Total tokens: #{response["usage"]["total_tokens"]}" +end + +# Print permalink to view this trace in Braintrust +puts "\n View this trace in Braintrust:" +puts " #{Braintrust::Trace.permalink(root_span)}" + +# Shutdown to flush spans to Braintrust +OpenTelemetry.tracer_provider.shutdown + +puts "\n Trace sent to Braintrust!" diff --git a/examples/alexrudall_openai.rb b/examples/contrib/ruby_openai/deprecated.rb similarity index 80% rename from examples/alexrudall_openai.rb rename to examples/contrib/ruby_openai/deprecated.rb index 78782a2..370208d 100755 --- a/examples/alexrudall_openai.rb +++ b/examples/contrib/ruby_openai/deprecated.rb @@ -8,15 +8,20 @@ # Example: ruby-openai (alexrudall) chat completion with Braintrust tracing # +# DEPRECATED: This example uses the old wrap() API which is deprecated. +# Please use the new instrument!() API instead: +# - examples/contrib/ruby_openai/basic.rb - Class-level instrumentation (all clients) +# - examples/contrib/ruby_openai/instance.rb - Instance-level instrumentation (specific client) +# # This example demonstrates how to automatically trace OpenAI API calls using # the ruby-openai gem (by alexrudall) with Braintrust. # # Note: ruby-openai is an optional development dependency. To run this example: # 1. Install ruby-openai: gem install ruby-openai -# 2. Run from the SDK root: ruby examples/alexrudall_openai.rb +# 2. Run from the SDK root: ruby examples/contrib/ruby_openai/deprecated.rb # # Usage: -# OPENAI_API_KEY=your-openai-key ruby examples/alexrudall_openai.rb +# OPENAI_API_KEY=your-openai-key ruby examples/contrib/ruby_openai/deprecated.rb # Check for API keys unless ENV["OPENAI_API_KEY"] @@ -39,7 +44,7 @@ # Make a chat completion request (automatically traced!) puts "Sending chat completion request to OpenAI (using ruby-openai gem)..." -response = tracer.in_span("examples/alexrudall_openai.rb") do |span| +response = tracer.in_span("examples/contrib/ruby_openai/deprecated.rb") do |span| root_span = span # ruby-openai uses: client.chat(parameters: {...}) diff --git a/examples/contrib/ruby_openai/instance.rb b/examples/contrib/ruby_openai/instance.rb new file mode 100644 index 0000000..97ce7ab --- /dev/null +++ b/examples/contrib/ruby_openai/instance.rb @@ -0,0 +1,61 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "openai" +require "opentelemetry/sdk" + +# Example: Instance-level ruby-openai (alexrudall) instrumentation +# +# This shows how to instrument a specific client instance rather than +# all OpenAI clients globally. +# +# Usage: +# OPENAI_API_KEY=your-key bundle exec appraisal ruby_openai ruby examples/contrib/ruby_openai/instance.rb + +unless ENV["OPENAI_API_KEY"] + puts "Error: OPENAI_API_KEY environment variable is required" + exit 1 +end + +Braintrust.init(blocking_login: true) + +# Create two client instances (ruby-openai uses access_token) +client_traced = ::OpenAI::Client.new(access_token: ENV["OPENAI_API_KEY"]) +client_untraced = ::OpenAI::Client.new(access_token: ENV["OPENAI_API_KEY"]) + +# Only instrument one of them +Braintrust.instrument!(:ruby_openai, target: client_traced) + +tracer = OpenTelemetry.tracer_provider.tracer("ruby-openai-example") +root_span = nil + +tracer.in_span("examples/contrib/ruby_openai/instance.rb") do |span| + root_span = span + + puts "Calling traced client..." + response1 = client_traced.chat( + parameters: { + messages: [{role: "user", content: "Say 'traced'"}], + model: "gpt-4o-mini", + max_tokens: 10 + } + ) + puts "Traced response: #{response1.dig("choices", 0, "message", "content")}" + + puts "\nCalling untraced client..." + response2 = client_untraced.chat( + parameters: { + messages: [{role: "user", content: "Say 'untraced'"}], + model: "gpt-4o-mini", + max_tokens: 10 + } + ) + puts "Untraced response: #{response2.dig("choices", 0, "message", "content")}" +end + +puts "\nView trace: #{Braintrust::Trace.permalink(root_span)}" +puts "(Only the first client call should have an openai.chat span)" + +OpenTelemetry.tracer_provider.shutdown diff --git a/examples/contrib/ruby_openai/streaming.rb b/examples/contrib/ruby_openai/streaming.rb new file mode 100644 index 0000000..b958744 --- /dev/null +++ b/examples/contrib/ruby_openai/streaming.rb @@ -0,0 +1,80 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "openai" +require "opentelemetry/sdk" + +# Example: ruby-openai (alexrudall) streaming with Braintrust tracing +# +# This example demonstrates how to trace streaming OpenAI API calls +# using the ruby-openai gem. Shows the callback-based streaming pattern. +# +# Usage: +# OPENAI_API_KEY=your-key bundle exec ruby examples/contrib/ruby_openai/streaming.rb + +unless ENV["OPENAI_API_KEY"] + puts "Error: OPENAI_API_KEY environment variable is required" + puts "Get your API key from: https://platform.openai.com/api-keys" + exit 1 +end + +Braintrust.init(blocking_login: true) +Braintrust.instrument!(:ruby_openai) + +# ruby-openai uses access_token parameter +client = OpenAI::Client.new(access_token: ENV["OPENAI_API_KEY"]) +tracer = OpenTelemetry.tracer_provider.tracer("ruby-openai-streaming-example") +root_span = nil + +tracer.in_span("examples/contrib/ruby_openai/streaming.rb") do |span| + root_span = span + + # Pattern 1: Chat streaming with callback proc + # Pass a proc to the stream parameter to receive chunks + puts "=== Pattern 1: Chat streaming with callback ===" + print "Assistant: " + + client.chat( + parameters: { + model: "gpt-4o-mini", + max_tokens: 100, + messages: [{role: "user", content: "Count from 1 to 5, one number per line."}], + stream_options: {include_usage: true}, + stream: proc do |chunk, _bytesize| + content = chunk.dig("choices", 0, "delta", "content") + print content if content + end + } + ) + puts "\n" + + # Pattern 2: Responses API streaming (if available) + if client.respond_to?(:responses) + puts "=== Pattern 2: Responses API streaming ===" + print "Assistant: " + + client.responses.create( + parameters: { + model: "gpt-4o-mini", + input: "Name 3 colors.", + stream: proc do |chunk, _event| + if chunk["type"] == "response.output_text.delta" + print chunk["delta"] + end + end + } + ) + puts "\n" + else + puts "=== Pattern 2: Responses API (skipped - not available in this version) ===" + end +end + +puts "\nView this trace in Braintrust:" +puts " #{Braintrust::Trace.permalink(root_span)}" + +OpenTelemetry.tracer_provider.shutdown + +puts "\nTrace sent to Braintrust!" diff --git a/examples/internal/alexrudall_ruby_openai.rb b/examples/internal/contrib/ruby_openai/golden.rb similarity index 98% rename from examples/internal/alexrudall_ruby_openai.rb rename to examples/internal/contrib/ruby_openai/golden.rb index a66b15f..4528e3b 100755 --- a/examples/internal/alexrudall_ruby_openai.rb +++ b/examples/internal/contrib/ruby_openai/golden.rb @@ -25,7 +25,7 @@ # as the openai gem integration for identical inputs. # # Usage: -# OPENAI_API_KEY=key bundle exec appraisal ruby-openai ruby examples/internal/alexrudall_ruby_openai.rb +# OPENAI_API_KEY=key bundle exec appraisal ruby-openai ruby examples/internal/contrib/ruby_openai/golden.rb unless ENV["OPENAI_API_KEY"] puts "Error: OPENAI_API_KEY environment variable is required" @@ -37,16 +37,16 @@ # Get a tracer for this example tracer = OpenTelemetry.tracer_provider.tracer("alexrudall-ruby-openai-comprehensive-example") -# Create OpenAI client (ruby-openai style) and wrap it +# Create OpenAI client (ruby-openai style) and instrument it client = OpenAI::Client.new(access_token: ENV["OPENAI_API_KEY"]) -Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client) +Braintrust.instrument!(:ruby_openai, target: client) puts "ruby-openai (alexrudall) Comprehensive Features Example" puts "=" * 60 # Wrap all examples under a single parent trace root_span = nil -tracer.in_span("examples/internal/alexrudall_ruby_openai.rb") do |span| +tracer.in_span("examples/internal/contrib/ruby_openai/golden.rb") do |span| root_span = span # Example 1: Vision - Image Understanding [SKIPPED] diff --git a/lib/braintrust/contrib.rb b/lib/braintrust/contrib.rb index 66746dc..ddf85ed 100644 --- a/lib/braintrust/contrib.rb +++ b/lib/braintrust/contrib.rb @@ -100,6 +100,8 @@ def tracer_for(target, name: "braintrust") # Load integration stubs (eager load minimal metadata). require_relative "contrib/openai/integration" +require_relative "contrib/ruby_openai/integration" # Register integrations Braintrust::Contrib::OpenAI::Integration.register! +Braintrust::Contrib::RubyOpenAI::Integration.register! diff --git a/lib/braintrust/contrib/ruby_openai/deprecated.rb b/lib/braintrust/contrib/ruby_openai/deprecated.rb new file mode 100644 index 0000000..225a7e8 --- /dev/null +++ b/lib/braintrust/contrib/ruby_openai/deprecated.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +# Backward compatibility shim for the old ruby-openai integration API. +# This file now just delegates to the new API. + +module Braintrust + module Trace + module AlexRudall + module RubyOpenAI + # Wrap an OpenAI::Client (ruby-openai gem) to automatically create spans. + # This is the legacy API - delegates to the new contrib framework. + # + # @param client [OpenAI::Client] the OpenAI client to wrap + # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider (defaults to global) + # @return [OpenAI::Client] the wrapped client + def self.wrap(client, tracer_provider: nil) + Log.warn("Braintrust::Trace::AlexRudall::RubyOpenAI.wrap() is deprecated and will be removed in a future version: use Braintrust.instrument!() instead.") + Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: tracer_provider) + client + end + end + end + end +end diff --git a/lib/braintrust/contrib/ruby_openai/instrumentation/chat.rb b/lib/braintrust/contrib/ruby_openai/instrumentation/chat.rb new file mode 100644 index 0000000..e40f1cf --- /dev/null +++ b/lib/braintrust/contrib/ruby_openai/instrumentation/chat.rb @@ -0,0 +1,149 @@ +# frozen_string_literal: true + +require "opentelemetry/sdk" +require "json" + +require_relative "common" +require_relative "../../support/otel" +require_relative "../../support/openai" +require_relative "../../../internal/time" + +module Braintrust + module Contrib + module RubyOpenAI + module Instrumentation + # Chat completions instrumentation for ruby-openai. + # Provides module that can be prepended to OpenAI::Client to instrument the chat method. + module Chat + def self.included(base) + # Guard against double-wrapping: Check if patch is already in the ancestor chain. + # This prevents double instrumentation if class-level patching was already applied, + # and this patch is being applied to a singleton-class. (Special case.) + # + # Ruby's prepend() doesn't check the full inheritance chain, so without this guard, + # the instrumentation could be added twice. + base.prepend(InstanceMethods) unless applied?(base) + end + + def self.applied?(base) + base.ancestors.include?(InstanceMethods) + end + + METADATA_FIELDS = %i[ + model frequency_penalty logit_bias logprobs max_tokens n + presence_penalty response_format seed service_tier stop + stream stream_options temperature top_p top_logprobs + tools tool_choice parallel_tool_calls user functions function_call + ].freeze + + module InstanceMethods + # Wrap chat method for ruby-openai gem + # ruby-openai API: client.chat(parameters: {...}) + def chat(parameters:) + tracer = Braintrust::Contrib.tracer_for(self) + + tracer.in_span("Chat Completion") do |span| + is_streaming = streaming?(parameters) + metadata = build_metadata(parameters) + set_input(span, parameters) + + aggregated_chunks = [] + time_to_first_token = nil + response = nil + response_data = {} + + if is_streaming + # Setup a time measurement for the first chunk from the stream + start_time = nil + parameters = wrap_stream_callback(parameters, aggregated_chunks) do + time_to_first_token ||= Braintrust::Internal::Time.measure(start_time) + end + start_time = Braintrust::Internal::Time.measure + + # Then initiate the stream + response = super(parameters: parameters) + + if !aggregated_chunks.empty? + response_data = Common.aggregate_streaming_chunks(aggregated_chunks) + end + else + # Make a time measurement synchronously around the API call + time_to_first_token = Braintrust::Internal::Time.measure do + response = super(parameters: parameters) + response_data = response if response + end + end + + set_output(span, response_data) + set_metrics(span, response_data, time_to_first_token) + finalize_metadata(span, metadata, response_data) + + response + end + end + + private + + def streaming?(parameters) + parameters.key?(:stream) && parameters[:stream].is_a?(Proc) + end + + def wrap_stream_callback(parameters, aggregated_chunks) + original_stream_proc = parameters[:stream] + parameters = parameters.dup + + parameters[:stream] = proc do |chunk, bytesize| + yield if aggregated_chunks.empty? + aggregated_chunks << chunk + original_stream_proc.call(chunk, bytesize) + end + + parameters + end + + def build_metadata(parameters) + metadata = { + "provider" => "openai", + "endpoint" => "/v1/chat/completions" + } + + Chat::METADATA_FIELDS.each do |field| + next unless parameters.key?(field) + # Stream param is a Proc - just mark as true + metadata[field.to_s] = (field == :stream) ? true : parameters[field] + end + + metadata + end + + def set_input(span, parameters) + return unless parameters[:messages] + Support::OTel.set_json_attr(span, "braintrust.input_json", parameters[:messages]) + end + + def set_output(span, response_data) + choices = response_data[:choices] || response_data["choices"] + return unless choices&.any? + Support::OTel.set_json_attr(span, "braintrust.output_json", choices) + end + + def set_metrics(span, response_data, time_to_first_token) + usage = response_data[:usage] || response_data["usage"] + metrics = usage ? Support::OpenAI.parse_usage_tokens(usage) : {} + metrics["time_to_first_token"] = time_to_first_token || 0.0 + Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? + end + + def finalize_metadata(span, metadata, response_data) + %w[id created model system_fingerprint service_tier].each do |field| + value = response_data[field.to_sym] || response_data[field] + metadata[field] = value if value + end + Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) + end + end + end + end + end + end +end diff --git a/lib/braintrust/contrib/ruby_openai/instrumentation/common.rb b/lib/braintrust/contrib/ruby_openai/instrumentation/common.rb new file mode 100644 index 0000000..3fff4bf --- /dev/null +++ b/lib/braintrust/contrib/ruby_openai/instrumentation/common.rb @@ -0,0 +1,138 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + module RubyOpenAI + module Instrumentation + # Aggregation utilities for ruby-openai gem instrumentation. + # These are specific to the ruby-openai gem's data structures (string keys, plain hashes). + module Common + # Aggregate streaming chunks into a single response structure. + # Specific to ruby-openai gem which uses string keys and plain hashes. + # @param chunks [Array] array of chunk hashes from stream (string keys) + # @return [Hash] aggregated response with choices, usage, etc. (string keys) + def self.aggregate_streaming_chunks(chunks) + return {} if chunks.empty? + + # Initialize aggregated structure + aggregated = { + "id" => nil, + "created" => nil, + "model" => nil, + "system_fingerprint" => nil, + "usage" => nil, + "choices" => [] + } + + # Track aggregated content and tool_calls for each choice index + choice_data = {} + + chunks.each do |chunk| + # Capture top-level fields from any chunk that has them + aggregated["id"] ||= chunk["id"] + aggregated["created"] ||= chunk["created"] + aggregated["model"] ||= chunk["model"] + aggregated["system_fingerprint"] ||= chunk["system_fingerprint"] + + # Aggregate usage (usually only in last chunk if stream_options.include_usage is set) + aggregated["usage"] = chunk["usage"] if chunk["usage"] + + # Process choices + choices = chunk["choices"] + next unless choices.is_a?(Array) + + choices.each do |choice| + index = choice["index"] || 0 + choice_data[index] ||= { + "index" => index, + "role" => nil, + "content" => +"", + "tool_calls" => [], + "finish_reason" => nil + } + + delta = choice["delta"] || {} + + # Aggregate role (set once from first delta that has it) + choice_data[index]["role"] ||= delta["role"] + + # Aggregate content + choice_data[index]["content"] << delta["content"] if delta["content"] + + # Aggregate tool_calls + tool_calls = delta["tool_calls"] + if tool_calls.is_a?(Array) && tool_calls.any? + tool_calls.each do |tool_call_delta| + tc_id = tool_call_delta["id"] + if tc_id && !tc_id.empty? + # New tool call + choice_data[index]["tool_calls"] << { + "id" => tc_id, + "type" => tool_call_delta["type"], + "function" => { + "name" => +(tool_call_delta.dig("function", "name") || ""), + "arguments" => +(tool_call_delta.dig("function", "arguments") || "") + } + } + elsif choice_data[index]["tool_calls"].any? + # Continuation - append arguments to last tool call + last_tool_call = choice_data[index]["tool_calls"].last + if tool_call_delta.dig("function", "arguments") + last_tool_call["function"]["arguments"] << tool_call_delta["function"]["arguments"] + end + end + end + end + + # Capture finish_reason + choice_data[index]["finish_reason"] = choice["finish_reason"] if choice["finish_reason"] + end + end + + # Build final choices array + aggregated["choices"] = choice_data.values.sort_by { |c| c["index"] }.map do |choice| + message = { + "role" => choice["role"], + "content" => choice["content"].empty? ? nil : choice["content"] + } + + # Add tool_calls to message if any + message["tool_calls"] = choice["tool_calls"] if choice["tool_calls"].any? + + { + "index" => choice["index"], + "message" => message, + "finish_reason" => choice["finish_reason"] + } + end + + aggregated + end + + # Aggregate responses streaming chunks into a single response structure. + # Specific to ruby-openai gem which uses string keys and plain hashes. + # @param chunks [Array] array of chunk hashes from stream (string keys) + # @return [Hash] aggregated response with output, usage, id (string keys) + def self.aggregate_responses_chunks(chunks) + return {} if chunks.empty? + + # Find the response.completed event which has the final response + completed_chunk = chunks.find { |c| c["type"] == "response.completed" } + + if completed_chunk && completed_chunk["response"] + response = completed_chunk["response"] + return { + "id" => response["id"], + "output" => response["output"], + "usage" => response["usage"] + } + end + + # Fallback if no completed event found + {} + end + end + end + end + end +end diff --git a/lib/braintrust/contrib/ruby_openai/instrumentation/responses.rb b/lib/braintrust/contrib/ruby_openai/instrumentation/responses.rb new file mode 100644 index 0000000..d2768a0 --- /dev/null +++ b/lib/braintrust/contrib/ruby_openai/instrumentation/responses.rb @@ -0,0 +1,146 @@ +# frozen_string_literal: true + +require "opentelemetry/sdk" +require "json" + +require_relative "common" +require_relative "../../support/otel" +require_relative "../../support/openai" +require_relative "../../../internal/time" + +module Braintrust + module Contrib + module RubyOpenAI + module Instrumentation + # Responses API instrumentation for ruby-openai. + # Provides module that can be prepended to OpenAI::Responses to instrument the create method. + module Responses + def self.included(base) + # Guard against double-wrapping: Check if patch is already in the ancestor chain. + # This prevents double instrumentation if class-level patching was already applied, + # and this patch is being applied to a singleton-class. (Special case.) + # + # Ruby's prepend() doesn't check the full inheritance chain, so without this guard, + # the instrumentation could be added twice. + base.prepend(InstanceMethods) unless applied?(base) + end + + def self.applied?(base) + base.ancestors.include?(InstanceMethods) + end + + METADATA_FIELDS = %i[ + model instructions modalities tools parallel_tool_calls + tool_choice temperature max_tokens top_p frequency_penalty + presence_penalty seed user metadata store response_format + reasoning previous_response_id truncation + ].freeze + + module InstanceMethods + # Wrap create method for ruby-openai responses API + # ruby-openai API: client.responses.create(parameters: {...}) + def create(parameters:) + client = instance_variable_get(:@client) + tracer = Braintrust::Contrib.tracer_for(client) + + tracer.in_span("openai.responses.create") do |span| + is_streaming = streaming?(parameters) + metadata = build_metadata(parameters) + set_input(span, parameters) + + aggregated_chunks = [] + time_to_first_token = nil + response = nil + response_data = {} + + if is_streaming + # Setup a time measurement for the first chunk from the stream + start_time = nil + parameters = wrap_stream_callback(parameters, aggregated_chunks) do + time_to_first_token ||= Braintrust::Internal::Time.measure(start_time) + end + start_time = Braintrust::Internal::Time.measure + + # Then initiate the stream + response = super(parameters: parameters) + + if !aggregated_chunks.empty? + response_data = Common.aggregate_responses_chunks(aggregated_chunks) + end + else + # Make a time measurement synchronously around the API call + time_to_first_token = Braintrust::Internal::Time.measure do + response = super(parameters: parameters) + response_data = response if response + end + end + + set_output(span, response_data) + set_metrics(span, response_data, time_to_first_token) + finalize_metadata(span, metadata, response_data) + + response + end + end + + private + + def streaming?(parameters) + parameters.key?(:stream) && parameters[:stream].is_a?(Proc) + end + + def wrap_stream_callback(parameters, aggregated_chunks) + original_stream_proc = parameters[:stream] + parameters = parameters.dup + + parameters[:stream] = proc do |chunk, event| + yield if aggregated_chunks.empty? + aggregated_chunks << chunk + original_stream_proc.call(chunk, event) + end + + parameters + end + + def build_metadata(parameters) + metadata = { + "provider" => "openai", + "endpoint" => "/v1/responses" + } + + Responses::METADATA_FIELDS.each do |field| + metadata[field.to_s] = parameters[field] if parameters.key?(field) + end + + metadata["stream"] = true if streaming?(parameters) + metadata + end + + def set_input(span, parameters) + return unless parameters[:input] + Support::OTel.set_json_attr(span, "braintrust.input_json", parameters[:input]) + end + + def set_output(span, response_data) + output = response_data["output"] + return unless output + Support::OTel.set_json_attr(span, "braintrust.output_json", output) + end + + def set_metrics(span, response_data, time_to_first_token) + usage = response_data["usage"] + metrics = usage ? Support::OpenAI.parse_usage_tokens(usage) : {} + metrics["time_to_first_token"] = time_to_first_token || 0.0 + Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? + end + + def finalize_metadata(span, metadata, response_data) + metadata["id"] = response_data["id"] if response_data["id"] + Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) + end + end + end + end + end + end +end diff --git a/lib/braintrust/contrib/ruby_openai/integration.rb b/lib/braintrust/contrib/ruby_openai/integration.rb new file mode 100644 index 0000000..4255aa2 --- /dev/null +++ b/lib/braintrust/contrib/ruby_openai/integration.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +require_relative "../integration" +require_relative "deprecated" + +module Braintrust + module Contrib + module RubyOpenAI + # RubyOpenAI integration for automatic instrumentation. + # Instruments the alexrudall ruby-openai gem (not the official openai gem). + class Integration + include Braintrust::Contrib::Integration + + MINIMUM_VERSION = "7.0.0" + + GEM_NAMES = ["ruby-openai"].freeze + REQUIRE_PATHS = ["openai"].freeze + + # @return [Symbol] Unique identifier for this integration + def self.integration_name + :ruby_openai + end + + # @return [Array] Gem names this integration supports + def self.gem_names + GEM_NAMES + end + + # @return [Array] Require paths for auto-instrument detection + def self.require_paths + REQUIRE_PATHS + end + + # @return [String] Minimum compatible version + def self.minimum_version + MINIMUM_VERSION + end + + # @return [Boolean] true if ruby-openai gem is available (not official openai gem) + def self.loaded? + # Check if ruby-openai gem is loaded (not the official openai gem). + # Both gems use "require 'openai'", so we need to distinguish them. + # + # OpenAI::Internal is defined ONLY in the official OpenAI gem + (defined?(::OpenAI::Client) && !defined?(::OpenAI::Internal)) ? true : false + end + + # Lazy-load the patcher only when actually patching. + # This keeps the integration stub lightweight. + # @return [Array] The patcher classes + def self.patchers + require_relative "patcher" + [ChatPatcher, ResponsesPatcher] + end + end + end + end +end diff --git a/lib/braintrust/contrib/ruby_openai/patcher.rb b/lib/braintrust/contrib/ruby_openai/patcher.rb new file mode 100644 index 0000000..9699787 --- /dev/null +++ b/lib/braintrust/contrib/ruby_openai/patcher.rb @@ -0,0 +1,85 @@ +# frozen_string_literal: true + +require_relative "../patcher" +require_relative "instrumentation/chat" +require_relative "instrumentation/responses" + +module Braintrust + module Contrib + module RubyOpenAI + # Patcher for ruby-openai chat completions. + # Instruments OpenAI::Client#chat method. + class ChatPatcher < Braintrust::Contrib::Patcher + class << self + def applicable? + defined?(::OpenAI::Client) + end + + def patched?(**options) + target_class = options[:target]&.singleton_class || ::OpenAI::Client + Instrumentation::Chat.applied?(target_class) + end + + # Perform the actual patching. + # @param options [Hash] Configuration options passed from integration + # @option options [Object] :target Optional target instance to patch + # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider + # @return [void] + def perform_patch(**options) + return unless applicable? + + if options[:target] + # Instance-level (for only this client) + raise ArgumentError, "target must be a kind of ::OpenAI::Client" unless options[:target].is_a?(::OpenAI::Client) + + options[:target].singleton_class.include(Instrumentation::Chat) + else + # Class-level (for all clients) + ::OpenAI::Client.include(Instrumentation::Chat) + end + end + end + end + + # Patcher for ruby-openai responses API. + # Instruments OpenAI::Responses#create method. + class ResponsesPatcher < Braintrust::Contrib::Patcher + class << self + def applicable? + defined?(::OpenAI::Client) && ::OpenAI::Client.method_defined?(:responses) + end + + def patched?(**options) + if options[:target] + responses_obj = options[:target].responses + Instrumentation::Responses.applied?(responses_obj.singleton_class) + else + # For class-level, check if the responses class is patched + defined?(::OpenAI::Responses) && Instrumentation::Responses.applied?(::OpenAI::Responses) + end + end + + # Perform the actual patching. + # @param options [Hash] Configuration options passed from integration + # @option options [Object] :target Optional target instance to patch + # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider + # @return [void] + def perform_patch(**options) + return unless applicable? + + if options[:target] + # Instance-level (for only this client) + raise ArgumentError, "target must be a kind of ::OpenAI::Client" unless options[:target].is_a?(::OpenAI::Client) + + responses_obj = options[:target].responses + responses_obj.singleton_class.include(Instrumentation::Responses) + else + # Class-level (for all clients) + ::OpenAI::Responses.include(Instrumentation::Responses) + end + end + end + end + end + end +end diff --git a/lib/braintrust/trace.rb b/lib/braintrust/trace.rb index e29694d..6bf9434 100644 --- a/lib/braintrust/trace.rb +++ b/lib/braintrust/trace.rb @@ -6,32 +6,6 @@ require_relative "trace/span_filter" require_relative "logger" -# OpenAI integrations - both ruby-openai and openai gems use require "openai" -# so we detect which one actually loaded the code and require the appropriate integration -begin - require "openai" - - # Check which OpenAI gem's code is actually loaded by inspecting $LOADED_FEATURES - # (both gems can be in Gem.loaded_specs, but only one's code can be loaded) - openai_load_path = $LOADED_FEATURES.find { |f| f.end_with?("/openai.rb") } - - if openai_load_path&.include?("ruby-openai") - # alexrudall/ruby-openai gem (path contains "ruby-openai-X.Y.Z") - require_relative "trace/contrib/github.com/alexrudall/ruby-openai/ruby-openai" - elsif openai_load_path&.include?("/openai-") - # Official openai gem (path contains "openai-X.Y.Z") - require_relative "trace/contrib/openai" - elsif Gem.loaded_specs["ruby-openai"] - # Fallback: ruby-openai in loaded_specs (for unusual installation paths) - require_relative "trace/contrib/github.com/alexrudall/ruby-openai/ruby-openai" - elsif Gem.loaded_specs["openai"] - # Fallback: official openai in loaded_specs (for unusual installation paths) - require_relative "trace/contrib/openai" - end -rescue LoadError - # No OpenAI gem installed - integration will not be available -end - # Anthropic integration is optional - automatically loaded if anthropic gem is available begin require "anthropic" diff --git a/lib/braintrust/trace/contrib/github.com/alexrudall/ruby-openai/ruby-openai.rb b/lib/braintrust/trace/contrib/github.com/alexrudall/ruby-openai/ruby-openai.rb deleted file mode 100644 index c2dfe7a..0000000 --- a/lib/braintrust/trace/contrib/github.com/alexrudall/ruby-openai/ruby-openai.rb +++ /dev/null @@ -1,377 +0,0 @@ -# frozen_string_literal: true - -require "opentelemetry/sdk" -require "json" -require_relative "../../../../tokens" - -module Braintrust - module Trace - module Contrib - module Github - module Alexrudall - module RubyOpenAI - # Helper to safely set a JSON attribute on a span - # Only sets the attribute if obj is present - # @param span [OpenTelemetry::Trace::Span] the span to set attribute on - # @param attr_name [String] the attribute name (e.g., "braintrust.output_json") - # @param obj [Object] the object to serialize to JSON - # @return [void] - def self.set_json_attr(span, attr_name, obj) - return unless obj - span.set_attribute(attr_name, JSON.generate(obj)) - end - - # Parse usage tokens from OpenAI API response - # @param usage [Hash] usage hash from OpenAI response - # @return [Hash] metrics hash with normalized names - def self.parse_usage_tokens(usage) - Braintrust::Trace.parse_openai_usage_tokens(usage) - end - - # Aggregate streaming chunks into a single response structure - # @param chunks [Array] array of chunk hashes from stream - # @return [Hash] aggregated response with choices, usage, id, created, model - def self.aggregate_streaming_chunks(chunks) - return {} if chunks.empty? - - # Initialize aggregated structure - aggregated = { - "id" => nil, - "created" => nil, - "model" => nil, - "usage" => nil, - "choices" => [] - } - - # Track aggregated content for the first choice - role = nil - content = +"" - - chunks.each do |chunk| - # Capture top-level fields from any chunk that has them - aggregated["id"] ||= chunk["id"] - aggregated["created"] ||= chunk["created"] - aggregated["model"] ||= chunk["model"] - - # Aggregate usage (usually only in last chunk if stream_options.include_usage is set) - aggregated["usage"] = chunk["usage"] if chunk["usage"] - - # Aggregate content from first choice - if chunk.dig("choices", 0, "delta", "role") - role ||= chunk.dig("choices", 0, "delta", "role") - end - if chunk.dig("choices", 0, "delta", "content") - content << chunk.dig("choices", 0, "delta", "content") - end - end - - # Build aggregated choices array - aggregated["choices"] = [ - { - "index" => 0, - "message" => { - "role" => role || "assistant", - "content" => content - }, - "finish_reason" => chunks.dig(-1, "choices", 0, "finish_reason") - } - ] - - aggregated - end - - # Aggregate responses streaming chunks into a single response structure - # @param chunks [Array] array of chunk hashes from stream - # @return [Hash] aggregated response with output, usage, id - def self.aggregate_responses_chunks(chunks) - return {} if chunks.empty? - - # Find the response.completed event which has the final response - completed_chunk = chunks.find { |c| c["type"] == "response.completed" } - - if completed_chunk && completed_chunk["response"] - response = completed_chunk["response"] - return { - "id" => response["id"], - "output" => response["output"], - "usage" => response["usage"] - } - end - - # Fallback if no completed event found - {} - end - - # Set span attributes from response data (works for both streaming and non-streaming) - # @param span [OpenTelemetry::Trace::Span] the span to set attributes on - # @param response_data [Hash] response hash with keys: choices, usage, id, created, model, system_fingerprint, service_tier - # @param time_to_first_token [Float] time to first token in seconds - # @param metadata [Hash] metadata hash to update with response fields - def self.set_span_attributes(span, response_data, time_to_first_token, metadata) - # Set output (choices) as JSON - if response_data["choices"]&.any? - set_json_attr(span, "braintrust.output_json", response_data["choices"]) - end - - # Set metrics (token usage + time_to_first_token) - metrics = {} - if response_data["usage"] - metrics = parse_usage_tokens(response_data["usage"]) - end - metrics["time_to_first_token"] = time_to_first_token || 0.0 - set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? - - # Update metadata with response fields - %w[id created model system_fingerprint service_tier].each do |field| - metadata[field] = response_data[field] if response_data[field] - end - end - - # Wrap an OpenAI::Client (ruby-openai gem) to automatically create spans - # Supports both synchronous and streaming requests - # @param client [OpenAI::Client] the OpenAI client to wrap - # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider (defaults to global) - def self.wrap(client, tracer_provider: nil) - tracer_provider ||= ::OpenTelemetry.tracer_provider - - # Store tracer provider on the client for use by wrapper modules - client.instance_variable_set(:@braintrust_tracer_provider, tracer_provider) - - # Wrap chat completions - wrap_chat(client) - - # Wrap responses API if available - wrap_responses(client) if client.respond_to?(:responses) - - client - end - - # Wrap chat API - # @param client [OpenAI::Client] the OpenAI client - def self.wrap_chat(client) - client.singleton_class.prepend(ChatWrapper) - end - - # Wrap responses API - # @param client [OpenAI::Client] the OpenAI client - def self.wrap_responses(client) - # Store tracer provider on the responses object for use by wrapper module - responses_obj = client.responses - responses_obj.instance_variable_set(:@braintrust_tracer_provider, client.instance_variable_get(:@braintrust_tracer_provider)) - responses_obj.singleton_class.prepend(ResponsesCreateWrapper) - end - - # Wrapper module for chat completions - module ChatWrapper - def chat(parameters:) - tracer_provider = @braintrust_tracer_provider - tracer = tracer_provider.tracer("braintrust") - - tracer.in_span("Chat Completion") do |span| - # Track start time for time_to_first_token - start_time = Time.now - time_to_first_token = nil - is_streaming = parameters.key?(:stream) && parameters[:stream].is_a?(Proc) - - # Initialize metadata hash - metadata = { - "provider" => "openai", - "endpoint" => "/v1/chat/completions" - } - - # Capture request metadata fields - metadata_fields = %w[ - model frequency_penalty logit_bias logprobs max_tokens n - presence_penalty response_format seed service_tier stop - stream stream_options temperature top_p top_logprobs - tools tool_choice parallel_tool_calls user functions function_call - ] - - metadata_fields.each do |field| - field_sym = field.to_sym - if parameters.key?(field_sym) - # Special handling for stream parameter (it's a Proc) - metadata[field] = if field == "stream" - true # Just mark as streaming - else - parameters[field_sym] - end - end - end - - # Set input messages as JSON - if parameters[:messages] - RubyOpenAI.set_json_attr(span, "braintrust.input_json", parameters[:messages]) - end - - # Wrap streaming callback if present to capture time to first token and aggregate chunks - aggregated_chunks = [] - if is_streaming - original_stream_proc = parameters[:stream] - parameters = parameters.dup - parameters[:stream] = proc do |chunk, bytesize| - # Capture time to first token on first chunk - time_to_first_token ||= Time.now - start_time - # Aggregate chunks for later processing - aggregated_chunks << chunk - # Call original callback - original_stream_proc.call(chunk, bytesize) - end - end - - begin - # Call the original method - response = super(parameters: parameters) - - # Calculate time to first token for non-streaming - time_to_first_token ||= Time.now - start_time unless is_streaming - - # Process response data - if is_streaming && !aggregated_chunks.empty? - # Aggregate streaming chunks into response-like structure - aggregated_response = RubyOpenAI.aggregate_streaming_chunks(aggregated_chunks) - RubyOpenAI.set_span_attributes(span, aggregated_response, time_to_first_token, metadata) - else - # Non-streaming: use response object directly - RubyOpenAI.set_span_attributes(span, response || {}, time_to_first_token, metadata) - end - - # Set metadata ONCE at the end with complete hash - RubyOpenAI.set_json_attr(span, "braintrust.metadata", metadata) - - response - rescue => e - # Record exception in span - span.record_exception(e) - span.status = OpenTelemetry::Trace::Status.error("Exception: #{e.class} - #{e.message}") - raise - end - end - end - end - - # Wrapper module for responses API create method - module ResponsesCreateWrapper - def create(parameters:) - tracer_provider = @braintrust_tracer_provider - tracer = tracer_provider.tracer("braintrust") - - tracer.in_span("openai.responses.create") do |span| - # Track start time for time_to_first_token - start_time = Time.now - time_to_first_token = nil - is_streaming = parameters.key?(:stream) && parameters[:stream].is_a?(Proc) - - # Initialize metadata hash - metadata = { - "provider" => "openai", - "endpoint" => "/v1/responses" - } - - # Capture request metadata fields - metadata_fields = %w[ - model instructions modalities tools parallel_tool_calls - tool_choice temperature max_tokens top_p frequency_penalty - presence_penalty seed user store response_format - reasoning previous_response_id truncation - ] - - metadata_fields.each do |field| - field_sym = field.to_sym - if parameters.key?(field_sym) - metadata[field] = parameters[field_sym] - end - end - - # Mark as streaming if applicable - metadata["stream"] = true if is_streaming - - # Set input as JSON - if parameters[:input] - RubyOpenAI.set_json_attr(span, "braintrust.input_json", parameters[:input]) - end - - # Wrap streaming callback if present to capture time to first token and aggregate chunks - aggregated_chunks = [] - if is_streaming - original_stream_proc = parameters[:stream] - parameters = parameters.dup - parameters[:stream] = proc do |chunk, event| - # Capture time to first token on first chunk - time_to_first_token ||= Time.now - start_time - # Aggregate chunks for later processing - aggregated_chunks << chunk - # Call original callback - original_stream_proc.call(chunk, event) - end - end - - begin - # Call the original method - response = super(parameters: parameters) - - # Calculate time to first token for non-streaming - time_to_first_token ||= Time.now - start_time unless is_streaming - - # Process response data - if is_streaming && !aggregated_chunks.empty? - # Aggregate streaming chunks into response-like structure - aggregated_response = RubyOpenAI.aggregate_responses_chunks(aggregated_chunks) - - # Set output as JSON - if aggregated_response["output"] - RubyOpenAI.set_json_attr(span, "braintrust.output_json", aggregated_response["output"]) - end - - # Set metrics (token usage + time_to_first_token) - metrics = {} - if aggregated_response["usage"] - metrics = RubyOpenAI.parse_usage_tokens(aggregated_response["usage"]) - end - metrics["time_to_first_token"] = time_to_first_token || 0.0 - RubyOpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? - - # Update metadata with response fields - metadata["id"] = aggregated_response["id"] if aggregated_response["id"] - else - # Non-streaming: use response object directly - if response && response["output"] - RubyOpenAI.set_json_attr(span, "braintrust.output_json", response["output"]) - end - - # Set metrics (token usage + time_to_first_token) - metrics = {} - if response && response["usage"] - metrics = RubyOpenAI.parse_usage_tokens(response["usage"]) - end - metrics["time_to_first_token"] = time_to_first_token || 0.0 - RubyOpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? - - # Update metadata with response fields - metadata["id"] = response["id"] if response && response["id"] - end - - # Set metadata ONCE at the end with complete hash - RubyOpenAI.set_json_attr(span, "braintrust.metadata", metadata) - - response - rescue => e - # Record exception in span - span.record_exception(e) - span.status = OpenTelemetry::Trace::Status.error("Exception: #{e.class} - #{e.message}") - raise - end - end - end - end - end - end - end - end - - # Backwards compatibility: this module was originally at Braintrust::Trace::AlexRudall::RubyOpenAI - module AlexRudall - RubyOpenAI = Contrib::Github::Alexrudall::RubyOpenAI - end - end -end diff --git a/test/braintrust/contrib/ruby_openai/deprecated_test.rb b/test/braintrust/contrib/ruby_openai/deprecated_test.rb new file mode 100644 index 0000000..0ab85ad --- /dev/null +++ b/test/braintrust/contrib/ruby_openai/deprecated_test.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +require "test_helper" +require "braintrust/contrib/ruby_openai/deprecated" + +class Braintrust::Contrib::RubyOpenAI::DeprecatedTest < Minitest::Test + # --- .wrap --- + + def test_wrap_delegates_to_instrument + mock_client = Object.new + mock_tracer = Object.new + + captured_args = nil + Braintrust.stub(:instrument!, ->(name, **opts) { captured_args = [name, opts] }) do + suppress_logs { Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(mock_client, tracer_provider: mock_tracer) } + end + + assert_equal :ruby_openai, captured_args[0] + assert_same mock_client, captured_args[1][:target] + assert_same mock_tracer, captured_args[1][:tracer_provider] + end + + def test_wrap_logs_deprecation_warning + mock_client = Object.new + + warning_message = nil + Braintrust.stub(:instrument!, ->(*) {}) do + Braintrust::Log.stub(:warn, ->(msg) { warning_message = msg }) do + Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(mock_client) + end + end + + assert_match(/deprecated/, warning_message) + assert_match(/Braintrust\.instrument!/, warning_message) + end + + def test_wrap_returns_client + mock_client = Object.new + + Braintrust.stub(:instrument!, ->(*) {}) do + result = suppress_logs { Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(mock_client) } + assert_same mock_client, result + end + end +end diff --git a/test/braintrust/contrib/ruby_openai/instrumentation/chat_test.rb b/test/braintrust/contrib/ruby_openai/instrumentation/chat_test.rb new file mode 100644 index 0000000..90f068b --- /dev/null +++ b/test/braintrust/contrib/ruby_openai/instrumentation/chat_test.rb @@ -0,0 +1,362 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "../integration_helper" +require "braintrust/contrib/ruby_openai/instrumentation/chat" + +class Braintrust::Contrib::RubyOpenAI::Instrumentation::ChatTest < Minitest::Test + Chat = Braintrust::Contrib::RubyOpenAI::Instrumentation::Chat + + # --- .included --- + + def test_included_prepends_instance_methods + base = Class.new + mock = Minitest::Mock.new + mock.expect(:include?, false, [Chat::InstanceMethods]) + mock.expect(:prepend, nil, [Chat::InstanceMethods]) + + base.define_singleton_method(:ancestors) { mock } + base.define_singleton_method(:prepend) { |mod| mock.prepend(mod) } + + Chat.included(base) + + mock.verify + end + + def test_included_skips_prepend_when_already_applied + base = Class.new do + include Braintrust::Contrib::RubyOpenAI::Instrumentation::Chat + end + + # Should not raise or double-prepend + Chat.included(base) + + # InstanceMethods should appear only once in ancestors + count = base.ancestors.count { |a| a == Chat::InstanceMethods } + assert_equal 1, count + end + + # --- .applied? --- + + def test_applied_returns_false_when_not_included + base = Class.new + + refute Chat.applied?(base) + end + + def test_applied_returns_true_when_included + base = Class.new do + include Braintrust::Contrib::RubyOpenAI::Instrumentation::Chat + end + + assert Chat.applied?(base) + end +end + +# E2E tests for Chat instrumentation +class Braintrust::Contrib::RubyOpenAI::Instrumentation::ChatE2ETest < Minitest::Test + include Braintrust::Contrib::RubyOpenAI::IntegrationHelper + + def setup + skip_unless_ruby_openai! + @api_key = ENV["OPENAI_API_KEY"] || "test-api-key" + end + + # --- #chat --- + + def test_chat_creates_span_with_correct_attributes + VCR.use_cassette("alexrudall_ruby_openai/chat_completions") do + rig = setup_otel_test_rig + + client = OpenAI::Client.new(access_token: @api_key) + Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: rig.tracer_provider) + + response = client.chat( + parameters: { + model: "gpt-4o-mini", + messages: [ + {role: "system", content: "You are a test assistant."}, + {role: "user", content: "Say 'test'"} + ], + max_tokens: 10, + temperature: 0.5 + } + ) + + refute_nil response + refute_nil response.dig("choices", 0, "message", "content") + + span = rig.drain_one + + assert_equal "Chat Completion", span.name + + # Verify braintrust.input_json contains messages with content + assert span.attributes.key?("braintrust.input_json") + input = JSON.parse(span.attributes["braintrust.input_json"]) + assert_equal 2, input.length + assert_equal "system", input[0]["role"] + assert_equal "You are a test assistant.", input[0]["content"] + assert_equal "user", input[1]["role"] + assert_equal "Say 'test'", input[1]["content"] + + # Verify braintrust.output_json contains choices with full details + assert span.attributes.key?("braintrust.output_json") + output = JSON.parse(span.attributes["braintrust.output_json"]) + assert_equal 1, output.length + assert_equal 0, output[0]["index"] + assert_equal "assistant", output[0]["message"]["role"] + refute_nil output[0]["message"]["content"] + refute_nil output[0]["finish_reason"] + + # Verify braintrust.metadata contains request and response metadata + assert span.attributes.key?("braintrust.metadata") + metadata = JSON.parse(span.attributes["braintrust.metadata"]) + assert_equal "openai", metadata["provider"] + assert_equal "/v1/chat/completions", metadata["endpoint"] + assert_match(/\Agpt-4o-mini/, metadata["model"]) + assert_equal 10, metadata["max_tokens"] + assert_equal 0.5, metadata["temperature"] + refute_nil metadata["id"] + refute_nil metadata["created"] + + # Verify braintrust.metrics contains token usage + assert span.attributes.key?("braintrust.metrics") + metrics = JSON.parse(span.attributes["braintrust.metrics"]) + assert metrics["prompt_tokens"] > 0 + assert metrics["completion_tokens"] > 0 + assert metrics["tokens"] > 0 + assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"] + assert metrics.key?("time_to_first_token") + assert metrics["time_to_first_token"] >= 0 + end + end + + def test_chat_handles_tool_calls + VCR.use_cassette("alexrudall_ruby_openai/tool_calls") do + rig = setup_otel_test_rig + + client = OpenAI::Client.new(access_token: @api_key) + Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: rig.tracer_provider) + + tools = [ + { + type: "function", + function: { + name: "get_weather", + description: "Get the current weather", + parameters: { + type: "object", + properties: { + location: {type: "string", description: "City name"} + }, + required: ["location"] + } + } + } + ] + + response = client.chat( + parameters: { + model: "gpt-4o-mini", + messages: [ + {role: "user", content: "What's the weather in Paris?"} + ], + tools: tools, + max_tokens: 100 + } + ) + + refute_nil response + + span = rig.drain_one + + assert_equal "Chat Completion", span.name + + # Verify input was captured + assert span.attributes.key?("braintrust.input_json") + input = JSON.parse(span.attributes["braintrust.input_json"]) + assert_equal 1, input.length + assert_equal "user", input[0]["role"] + + # Verify output contains tool calls + assert span.attributes.key?("braintrust.output_json") + output = JSON.parse(span.attributes["braintrust.output_json"]) + assert_equal 1, output.length + + # Verify metadata includes tools + metadata = JSON.parse(span.attributes["braintrust.metadata"]) + assert_equal "openai", metadata["provider"] + refute_nil metadata["tools"] + end + end + + def test_chat_handles_multi_turn_tool_calls + VCR.use_cassette("alexrudall_ruby_openai/multi_turn_tools") do + rig = setup_otel_test_rig + + client = OpenAI::Client.new(access_token: @api_key) + Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: rig.tracer_provider) + + tools = [ + { + type: "function", + function: { + name: "calculate", + description: "Perform a calculation", + parameters: { + type: "object", + properties: { + operation: {type: "string"}, + a: {type: "number"}, + b: {type: "number"} + } + } + } + } + ] + + # First request - model calls tool + first_response = client.chat( + parameters: { + model: "gpt-4o-mini", + messages: [ + {role: "user", content: "What is 127 multiplied by 49?"} + ], + tools: tools, + max_tokens: 100 + } + ) + + tool_call = first_response.dig("choices", 0, "message", "tool_calls", 0) + skip "Model didn't call tool" unless tool_call + + # Second request - provide tool result with tool_call_id + second_response = client.chat( + parameters: { + model: "gpt-4o-mini", + messages: [ + {role: "user", content: "What is 127 multiplied by 49?"}, + first_response.dig("choices", 0, "message"), # Assistant message with tool_calls + { + role: "tool", + tool_call_id: tool_call["id"], + content: "6223" + } + ], + tools: tools, + max_tokens: 100 + } + ) + + # Verify response + refute_nil second_response + + # Drain spans (we have 2: first request + second request) + spans = rig.drain + assert_equal 2, spans.length + + # Verify second span contains tool_call_id in input + span = spans[1] + assert span.attributes.key?("braintrust.input_json") + input = JSON.parse(span.attributes["braintrust.input_json"]) + assert_equal 3, input.length + + # Third message should be tool response with tool_call_id + assert_equal "tool", input[2]["role"] + assert_equal tool_call["id"], input[2]["tool_call_id"] + assert_equal "6223", input[2]["content"] + end + end + + def test_chat_handles_streaming + VCR.use_cassette("alexrudall_ruby_openai/streaming") do + rig = setup_otel_test_rig + + client = OpenAI::Client.new(access_token: @api_key) + Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: rig.tracer_provider) + + full_content = "" + client.chat( + parameters: { + model: "gpt-4o-mini", + messages: [ + {role: "user", content: "Count from 1 to 3"} + ], + max_tokens: 50, + stream_options: {include_usage: true}, + stream: proc do |chunk, _bytesize| + delta_content = chunk.dig("choices", 0, "delta", "content") + full_content += delta_content if delta_content + end + } + ) + + refute_empty full_content + + span = rig.drain_one + + assert_equal "Chat Completion", span.name + + # Verify input was captured + assert span.attributes.key?("braintrust.input_json") + input = JSON.parse(span.attributes["braintrust.input_json"]) + assert_equal 1, input.length + assert_equal "user", input[0]["role"] + + # Verify output was captured and aggregated + assert span.attributes.key?("braintrust.output_json") + output = JSON.parse(span.attributes["braintrust.output_json"]) + assert_equal 1, output.length + assert_equal 0, output[0]["index"] + assert_equal "assistant", output[0]["message"]["role"] + refute_nil output[0]["message"]["content"] + refute_empty output[0]["message"]["content"] + + # Verify metadata includes stream flag + metadata = JSON.parse(span.attributes["braintrust.metadata"]) + assert_equal "openai", metadata["provider"] + assert_equal true, metadata["stream"] + + # Verify metrics include time_to_first_token and usage tokens + metrics = JSON.parse(span.attributes["braintrust.metrics"]) + assert metrics.key?("time_to_first_token") + assert metrics["time_to_first_token"] >= 0 + assert metrics["prompt_tokens"] > 0 + assert metrics["completion_tokens"] > 0 + assert metrics["tokens"] > 0 + assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"] + end + end + + def test_chat_records_exception_on_error + VCR.use_cassette("alexrudall_ruby_openai/error") do + rig = setup_otel_test_rig + + client = OpenAI::Client.new(access_token: "invalid_key") + Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: rig.tracer_provider) + + error = assert_raises do + client.chat( + parameters: { + model: "gpt-4o-mini", + messages: [{role: "user", content: "test"}] + } + ) + end + + refute_nil error + + span = rig.drain_one + + assert_equal "Chat Completion", span.name + assert_equal OpenTelemetry::Trace::Status::ERROR, span.status.code + + # Verify error message is captured + refute_nil span.status.description + assert span.status.description.length > 0 + + # Verify exception event was recorded + assert span.events.any? { |event| event.name == "exception" } + end + end +end diff --git a/test/braintrust/contrib/ruby_openai/instrumentation/common_test.rb b/test/braintrust/contrib/ruby_openai/instrumentation/common_test.rb new file mode 100644 index 0000000..4053f64 --- /dev/null +++ b/test/braintrust/contrib/ruby_openai/instrumentation/common_test.rb @@ -0,0 +1,149 @@ +# frozen_string_literal: true + +require "test_helper" +require "braintrust/contrib/ruby_openai/instrumentation/common" + +class Braintrust::Contrib::RubyOpenAI::Instrumentation::CommonTest < Minitest::Test + Common = Braintrust::Contrib::RubyOpenAI::Instrumentation::Common + + # --- .aggregate_streaming_chunks --- + + def test_aggregate_streaming_chunks_returns_empty_hash_for_empty_input + assert_equal({}, Common.aggregate_streaming_chunks([])) + end + + def test_aggregate_streaming_chunks_aggregates_basic_chunks + chunks = [ + {"id" => "chatcmpl-123", "model" => "gpt-4", "choices" => [{"index" => 0, "delta" => {"role" => "assistant"}}]}, + {"choices" => [{"index" => 0, "delta" => {"content" => "Hello"}}]}, + {"choices" => [{"index" => 0, "delta" => {"content" => " world"}, "finish_reason" => "stop"}]}, + {"usage" => {"prompt_tokens" => 10, "completion_tokens" => 5}} + ] + + result = Common.aggregate_streaming_chunks(chunks) + + assert_equal "chatcmpl-123", result["id"] + assert_equal "gpt-4", result["model"] + assert_equal 1, result["choices"].length + assert_equal "assistant", result["choices"][0]["message"]["role"] + assert_equal "Hello world", result["choices"][0]["message"]["content"] + assert_equal "stop", result["choices"][0]["finish_reason"] + assert_equal({"prompt_tokens" => 10, "completion_tokens" => 5}, result["usage"]) + end + + def test_aggregate_streaming_chunks_captures_system_fingerprint + chunks = [ + {"id" => "chatcmpl-123", "system_fingerprint" => "fp_abc123", "choices" => [{"index" => 0, "delta" => {"role" => "assistant"}}]}, + {"choices" => [{"index" => 0, "delta" => {"content" => "Hi"}, "finish_reason" => "stop"}]} + ] + + result = Common.aggregate_streaming_chunks(chunks) + + assert_equal "fp_abc123", result["system_fingerprint"] + end + + def test_aggregate_streaming_chunks_aggregates_tool_calls + chunks = [ + {"id" => "chatcmpl-123", "choices" => [{"index" => 0, "delta" => {"role" => "assistant", "tool_calls" => [{"id" => "call_abc", "type" => "function", "function" => {"name" => "get_weather", "arguments" => ""}}]}}]}, + {"choices" => [{"index" => 0, "delta" => {"tool_calls" => [{"function" => {"arguments" => '{"loc'}}]}}]}, + {"choices" => [{"index" => 0, "delta" => {"tool_calls" => [{"function" => {"arguments" => 'ation":"NYC"}'}}]}, "finish_reason" => "tool_calls"}]} + ] + + result = Common.aggregate_streaming_chunks(chunks) + + assert_equal 1, result["choices"].length + assert_equal 1, result["choices"][0]["message"]["tool_calls"].length + assert_equal "call_abc", result["choices"][0]["message"]["tool_calls"][0]["id"] + assert_equal "get_weather", result["choices"][0]["message"]["tool_calls"][0]["function"]["name"] + assert_equal '{"location":"NYC"}', result["choices"][0]["message"]["tool_calls"][0]["function"]["arguments"] + end + + def test_aggregate_streaming_chunks_handles_multiple_choices + chunks = [ + {"id" => "chatcmpl-123", "choices" => [ + {"index" => 0, "delta" => {"role" => "assistant"}}, + {"index" => 1, "delta" => {"role" => "assistant"}} + ]}, + {"choices" => [ + {"index" => 0, "delta" => {"content" => "First"}}, + {"index" => 1, "delta" => {"content" => "Second"}} + ]}, + {"choices" => [ + {"index" => 0, "delta" => {}, "finish_reason" => "stop"}, + {"index" => 1, "delta" => {}, "finish_reason" => "stop"} + ]} + ] + + result = Common.aggregate_streaming_chunks(chunks) + + assert_equal 2, result["choices"].length + assert_equal "First", result["choices"][0]["message"]["content"] + assert_equal "Second", result["choices"][1]["message"]["content"] + end + + def test_aggregate_streaming_chunks_returns_nil_content_when_empty + chunks = [ + {"id" => "chatcmpl-123", "choices" => [{"index" => 0, "delta" => {"role" => "assistant"}}]}, + {"choices" => [{"index" => 0, "delta" => {}, "finish_reason" => "stop"}]} + ] + + result = Common.aggregate_streaming_chunks(chunks) + + assert_nil result["choices"][0]["message"]["content"] + end + + # --- .aggregate_responses_chunks --- + + def test_aggregate_responses_chunks_returns_empty_hash_for_empty_input + assert_equal({}, Common.aggregate_responses_chunks([])) + end + + def test_aggregate_responses_chunks_extracts_completed_event_data + chunks = [ + {"type" => "response.created"}, + {"type" => "response.in_progress"}, + { + "type" => "response.completed", + "response" => { + "id" => "resp_123", + "output" => [{"type" => "message", "content" => "Hello"}], + "usage" => {"input_tokens" => 10, "output_tokens" => 5} + } + } + ] + + result = Common.aggregate_responses_chunks(chunks) + + assert_equal "resp_123", result["id"] + assert_equal [{"type" => "message", "content" => "Hello"}], result["output"] + assert_equal({"input_tokens" => 10, "output_tokens" => 5}, result["usage"]) + end + + def test_aggregate_responses_chunks_returns_empty_without_completed_event + chunks = [ + {"type" => "response.created"}, + {"type" => "response.in_progress"} + ] + + result = Common.aggregate_responses_chunks(chunks) + + assert_equal({}, result) + end + + def test_aggregate_responses_chunks_handles_missing_response_fields + chunks = [ + { + "type" => "response.completed", + "response" => { + "id" => "resp_123" + } + } + ] + + result = Common.aggregate_responses_chunks(chunks) + + assert_equal "resp_123", result["id"] + assert_nil result["output"] + assert_nil result["usage"] + end +end diff --git a/test/braintrust/contrib/ruby_openai/instrumentation/responses_test.rb b/test/braintrust/contrib/ruby_openai/instrumentation/responses_test.rb new file mode 100644 index 0000000..a0ef98e --- /dev/null +++ b/test/braintrust/contrib/ruby_openai/instrumentation/responses_test.rb @@ -0,0 +1,232 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "../integration_helper" +require "braintrust/contrib/ruby_openai/instrumentation/responses" + +class Braintrust::Contrib::RubyOpenAI::Instrumentation::ResponsesTest < Minitest::Test + Responses = Braintrust::Contrib::RubyOpenAI::Instrumentation::Responses + + # --- .included --- + + def test_included_prepends_instance_methods + base = Class.new + mock = Minitest::Mock.new + mock.expect(:include?, false, [Responses::InstanceMethods]) + mock.expect(:prepend, nil, [Responses::InstanceMethods]) + + base.define_singleton_method(:ancestors) { mock } + base.define_singleton_method(:prepend) { |mod| mock.prepend(mod) } + + Responses.included(base) + + mock.verify + end + + def test_included_skips_prepend_when_already_applied + base = Class.new do + include Braintrust::Contrib::RubyOpenAI::Instrumentation::Responses + end + + # Should not raise or double-prepend + Responses.included(base) + + # InstanceMethods should appear only once in ancestors + count = base.ancestors.count { |a| a == Responses::InstanceMethods } + assert_equal 1, count + end + + # --- .applied? --- + + def test_applied_returns_false_when_not_included + base = Class.new + + refute Responses.applied?(base) + end + + def test_applied_returns_true_when_included + base = Class.new do + include Braintrust::Contrib::RubyOpenAI::Instrumentation::Responses + end + + assert Responses.applied?(base) + end +end + +# E2E tests for Responses instrumentation +class Braintrust::Contrib::RubyOpenAI::Instrumentation::ResponsesE2ETest < Minitest::Test + include Braintrust::Contrib::RubyOpenAI::IntegrationHelper + + def setup + skip_unless_ruby_openai! + @api_key = ENV["OPENAI_API_KEY"] || "test-api-key" + end + + # --- #create --- + + def test_create_creates_span_with_correct_attributes + VCR.use_cassette("alexrudall_ruby_openai/responses_non_streaming") do + rig = setup_otel_test_rig + + client = OpenAI::Client.new(access_token: @api_key) + Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: rig.tracer_provider) + + skip "Responses API not available" unless client.respond_to?(:responses) + + response = client.responses.create( + parameters: { + model: "gpt-4o-mini", + input: "What is 2+2? Reply with just the number." + } + ) + + refute_nil response + refute_nil response["output"] + + span = rig.drain_one + + assert_equal "openai.responses.create", span.name + + # Verify braintrust.input_json contains input + assert span.attributes.key?("braintrust.input_json") + input = JSON.parse(span.attributes["braintrust.input_json"]) + assert_equal "What is 2+2? Reply with just the number.", input + + # Verify braintrust.output_json contains output + assert span.attributes.key?("braintrust.output_json") + output = JSON.parse(span.attributes["braintrust.output_json"]) + refute_nil output + + # Verify braintrust.metadata contains request metadata + metadata = JSON.parse(span.attributes["braintrust.metadata"]) + assert_equal "openai", metadata["provider"] + assert_equal "/v1/responses", metadata["endpoint"] + assert_equal "gpt-4o-mini", metadata["model"] + refute_nil metadata["id"] + + # Verify braintrust.metrics contains token usage + metrics = JSON.parse(span.attributes["braintrust.metrics"]) + assert metrics["prompt_tokens"] > 0 + assert metrics["completion_tokens"] > 0 + assert metrics["tokens"] > 0 + assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"] + assert metrics.key?("time_to_first_token") + assert metrics["time_to_first_token"] >= 0 + end + end + + def test_create_handles_streaming + VCR.use_cassette("alexrudall_ruby_openai/responses_streaming") do + rig = setup_otel_test_rig + + client = OpenAI::Client.new(access_token: @api_key) + Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: rig.tracer_provider) + + skip "Responses API not available" unless client.respond_to?(:responses) + + chunks = [] + client.responses.create( + parameters: { + model: "gpt-4o-mini", + input: "Count from 1 to 3", + stream: proc do |chunk, _event| + chunks << chunk + end + } + ) + + refute_empty chunks + + span = rig.drain_one + + assert_equal "openai.responses.create", span.name + + # Verify braintrust.input_json contains input + assert span.attributes.key?("braintrust.input_json") + input = JSON.parse(span.attributes["braintrust.input_json"]) + assert_equal "Count from 1 to 3", input + + # Verify braintrust.output_json contains aggregated output + assert span.attributes.key?("braintrust.output_json") + output = JSON.parse(span.attributes["braintrust.output_json"]) + refute_nil output + + # Verify braintrust.metadata contains request metadata with stream flag + metadata = JSON.parse(span.attributes["braintrust.metadata"]) + assert_equal "openai", metadata["provider"] + assert_equal "/v1/responses", metadata["endpoint"] + assert_equal true, metadata["stream"] + + # Verify braintrust.metrics contains time_to_first_token + metrics = JSON.parse(span.attributes["braintrust.metrics"]) + assert metrics.key?("time_to_first_token") + assert metrics["time_to_first_token"] >= 0 + end + end + + def test_create_captures_metadata_parameter + VCR.use_cassette("alexrudall_ruby_openai/responses_with_metadata") do + rig = setup_otel_test_rig + + client = OpenAI::Client.new(access_token: @api_key) + Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: rig.tracer_provider) + + skip "Responses API not available" unless client.respond_to?(:responses) + + response = client.responses.create( + parameters: { + model: "gpt-4o-mini", + input: "Say hello", + metadata: { + "test_key" => "test_value", + "user_id" => "user_123" + } + } + ) + + refute_nil response + + span = rig.drain_one + + # Verify metadata parameter is captured in span metadata + metadata = JSON.parse(span.attributes["braintrust.metadata"]) + assert metadata.key?("metadata"), "metadata parameter should be captured" + assert_equal "test_value", metadata["metadata"]["test_key"] + assert_equal "user_123", metadata["metadata"]["user_id"] + end + end + + def test_create_records_exception_on_error + VCR.use_cassette("alexrudall_ruby_openai/responses_error") do + rig = setup_otel_test_rig + + client = OpenAI::Client.new(access_token: "invalid_key") + Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: rig.tracer_provider) + + skip "Responses API not available" unless client.respond_to?(:responses) + + error = assert_raises do + client.responses.create( + parameters: { + model: "gpt-4o-mini", + input: "test" + } + ) + end + + refute_nil error + + span = rig.drain_one + + assert_equal "openai.responses.create", span.name + assert_equal OpenTelemetry::Trace::Status::ERROR, span.status.code + + # Verify error message is captured + refute_nil span.status.description + assert span.status.description.length > 0 + + # Verify exception event was recorded + assert span.events.any? { |event| event.name == "exception" } + end + end +end diff --git a/test/braintrust/contrib/ruby_openai/instrumentation_test.rb b/test/braintrust/contrib/ruby_openai/instrumentation_test.rb new file mode 100644 index 0000000..a2469c6 --- /dev/null +++ b/test/braintrust/contrib/ruby_openai/instrumentation_test.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "integration_helper" + +# Explicitly load the patcher (lazy-loaded by integration) +require "braintrust/contrib/ruby_openai/patcher" + +# Tests for instance-level instrumentation behavior +class Braintrust::Contrib::RubyOpenAI::InstrumentationTest < Minitest::Test + include Braintrust::Contrib::RubyOpenAI::IntegrationHelper + + def setup + skip_unless_ruby_openai! + end + + # --- Instance-level instrumentation --- + + def test_instance_instrumentation_only_patches_target_client + # Skip if class-level patching already occurred from another test + skip "class already patched by another test" if Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched? + + rig = setup_otel_test_rig + + # Create two clients BEFORE any patching + client_traced = OpenAI::Client.new(access_token: "test-key") + client_untraced = OpenAI::Client.new(access_token: "test-key") + + # Verify neither client is patched initially + refute Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched?(target: client_traced), + "client_traced should not be patched initially" + refute Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched?(target: client_untraced), + "client_untraced should not be patched initially" + + # Verify class is not patched + refute Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched?, + "class should not be patched initially" + + # Instrument only one client (instance-level) + Braintrust.instrument!(:ruby_openai, target: client_traced, tracer_provider: rig.tracer_provider) + + # Only the traced client should be patched + assert Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched?(target: client_traced), + "client_traced should be patched after instrument!" + refute Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched?(target: client_untraced), + "client_untraced should NOT be patched after instrument!" + + # Class itself should NOT be patched (only the instance) + refute Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched?, + "class should NOT be patched when using instance-level instrumentation" + end +end diff --git a/test/braintrust/contrib/ruby_openai/integration_helper.rb b/test/braintrust/contrib/ruby_openai/integration_helper.rb new file mode 100644 index 0000000..c9d44ad --- /dev/null +++ b/test/braintrust/contrib/ruby_openai/integration_helper.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +# Test helpers for ruby-openai integration tests. +# Provides gem loading helpers that handle the ruby-openai gem vs official openai disambiguation. + +module Braintrust + module Contrib + module RubyOpenAI + module IntegrationHelper + # Skip test unless ruby-openai gem is available (not official openai). + # Loads the gem if available. + def skip_unless_ruby_openai! + if Gem.loaded_specs["openai"] + skip "ruby-openai gem not available (found official openai gem instead)" + end + + unless Gem.loaded_specs["ruby-openai"] + skip "ruby-openai gem not available" + end + + require "openai" unless defined?(::OpenAI) + end + + # Load ruby-openai gem if available (doesn't skip, for tests that handle both states). + def load_ruby_openai_if_available + if Gem.loaded_specs["ruby-openai"] && !Gem.loaded_specs["openai"] + require "openai" unless defined?(::OpenAI) + end + end + end + end + end +end diff --git a/test/braintrust/contrib/ruby_openai/integration_test.rb b/test/braintrust/contrib/ruby_openai/integration_test.rb new file mode 100644 index 0000000..489347d --- /dev/null +++ b/test/braintrust/contrib/ruby_openai/integration_test.rb @@ -0,0 +1,65 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "integration_helper" + +class Braintrust::Contrib::RubyOpenAI::IntegrationTest < Minitest::Test + include Braintrust::Contrib::RubyOpenAI::IntegrationHelper + + def setup + load_ruby_openai_if_available + @integration = Braintrust::Contrib::RubyOpenAI::Integration + end + + # --- .integration_name --- + + def test_integration_name + assert_equal :ruby_openai, @integration.integration_name + end + + # --- .gem_names --- + + def test_gem_names + assert_equal ["ruby-openai"], @integration.gem_names + end + + # --- .require_paths --- + + def test_require_paths + assert_equal ["openai"], @integration.require_paths + end + + # --- .minimum_version --- + + def test_minimum_version + assert_equal "7.0.0", @integration.minimum_version + end + + # --- .loaded? --- + + def test_loaded_returns_true_when_openai_client_defined_without_internal + skip "Official OpenAI gem is loaded (has ::OpenAI::Internal)" if defined?(::OpenAI::Internal) + skip "OpenAI module not defined" unless defined?(::OpenAI::Client) + + assert @integration.loaded?, "Should be loaded when ::OpenAI::Client is defined without ::OpenAI::Internal" + end + + def test_loaded_returns_false_when_openai_internal_defined + skip "Official OpenAI gem not loaded" unless defined?(::OpenAI::Internal) + + refute @integration.loaded?, "Should not be loaded when ::OpenAI::Internal is defined" + end + + # --- .patchers --- + + def test_patchers_returns_array_of_patcher_classes + patcher_classes = @integration.patchers + + assert_instance_of Array, patcher_classes + assert patcher_classes.length > 0, "patchers should return at least one patcher" + patcher_classes.each do |patcher_class| + assert patcher_class.is_a?(Class), "each patcher should be a Class" + assert patcher_class < Braintrust::Contrib::Patcher, "each patcher should inherit from Patcher" + end + end +end diff --git a/test/braintrust/contrib/ruby_openai/openai_test.rb b/test/braintrust/contrib/ruby_openai/openai_test.rb new file mode 100644 index 0000000..b147db8 --- /dev/null +++ b/test/braintrust/contrib/ruby_openai/openai_test.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +# require "test_helper" + +# # Load the openai gem to define OpenAI::Client (and OpenAI::Internal if official gem) +# # This must happen before tests run so the gem detection logic works +# begin +# require "openai" +# rescue LoadError +# # Gem not available in this appraisal +# end + +# This test verifies that the RubyOpenAI integration correctly identifies when the +# `ruby-openai` gem is loaded vs the official `openai` gem. +# +# The test is designed to work with different appraisals: +# bundle exec appraisal ruby-openai rake test:contrib # ruby-openai only - tests run +# bundle exec appraisal openai-ruby-openai rake test:contrib # Both gems - tests skipped (ruby-openai loads) +# bundle exec appraisal openai rake test:contrib # Official gem only - tests skipped +# +# The test verifies: +# 1. When `ruby-openai` gem is loaded, loaded? returns true +# 2. When `ruby-openai` gem is loaded, instrument! succeeds and patchers are applied + +class Braintrust::Contrib::RubyOpenAI::OpenAITest < Minitest::Test + Integration = Braintrust::Contrib::RubyOpenAI::Integration + + # def ruby_openai_available? + # # OpenAI::Internal is only defined in the official openai gem + # !Gem.loaded_specs["ruby-openai"].nil? + # end + + # def official_openai_available? + # # OpenAI::Internal is only defined in the official openai gem + # !Gem.loaded_specs["openai"].nil? + # end + + def ruby_openai_loaded? + # Check if ruby-openai gem is loaded (not the official openai gem). + # Both gems use "require 'openai'", so we need to distinguish them. + # + # OpenAI::Internal is defined ONLY in the official OpenAI gem + (defined?(::OpenAI::Client) && !defined?(::OpenAI::Internal)) ? true : false + end + + # --- .loaded? --- + + def test_loaded_returns_true_for_ruby_openai_gem + skip "ruby-openai gem not loaded" unless ruby_openai_loaded? + + assert Integration.loaded?, + "loaded? should return true when ruby-openai gem is loaded" + end + + def test_loaded_returns_false_when_ruby_openai_gem_not_loaded + skip "ruby-openai gem is loaded" if ruby_openai_loaded? + + refute Integration.loaded?, + "loaded? should return false when ruby-openai gem is not loaded" + end + + # --- Braintrust.instrument! --- + + def test_instrument_succeeds_for_ruby_openai_gem + skip "ruby-openai gem not loaded" unless ruby_openai_loaded? + + result = Braintrust.instrument!(:ruby_openai) + + assert result, "instrument! should return truthy for ruby-openai gem" + + any_patched = Integration.patchers.any?(&:patched?) + assert any_patched, "at least one patcher should be patched for ruby-openai gem" + end + + # --- OpenAI::Internal --- + + def test_openai_internal_not_defined_for_ruby_openai_gem + skip "ruby-openai gem not loaded" unless ruby_openai_loaded? + + refute defined?(::OpenAI::Internal), + "OpenAI::Internal should not be defined when ruby-openai gem is loaded" + end + + # --- .available? --- + + def test_not_available_when_official_openai_gem_loaded + skip "ruby-openai gem is available" if Gem.loaded_specs["ruby-openai"] + skip "Official openai gem not loaded" unless Gem.loaded_specs["openai"] + + refute Integration.available?, "Should NOT be available when only official openai gem is loaded" + end +end diff --git a/test/braintrust/contrib/ruby_openai/patcher_test.rb b/test/braintrust/contrib/ruby_openai/patcher_test.rb new file mode 100644 index 0000000..5566682 --- /dev/null +++ b/test/braintrust/contrib/ruby_openai/patcher_test.rb @@ -0,0 +1,206 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "integration_helper" + +# Explicitly load the patcher (lazy-loaded by integration) +require "braintrust/contrib/ruby_openai/patcher" + +class Braintrust::Contrib::RubyOpenAI::ChatPatcherTest < Minitest::Test + include Braintrust::Contrib::RubyOpenAI::IntegrationHelper + + def setup + skip_unless_ruby_openai! + end + + # --- .applicable? --- + + def test_applicable_returns_true_when_openai_client_defined + assert Braintrust::Contrib::RubyOpenAI::ChatPatcher.applicable? + end + + # --- .patched? --- + + def test_patched_returns_false_when_not_patched + fake_client_class = Class.new + + OpenAI.stub_const(:Client, fake_client_class) do + refute Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched? + end + end + + def test_patched_returns_true_when_module_included + fake_client_class = Class.new do + include Braintrust::Contrib::RubyOpenAI::Instrumentation::Chat + end + + OpenAI.stub_const(:Client, fake_client_class) do + assert Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched? + end + end + + def test_patched_returns_false_for_unpatched_instance + fake_singleton = Class.new + + mock_chain(:singleton_class, returns: fake_singleton) do |client| + refute Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched?(target: client) + end + end + + def test_patched_returns_true_for_patched_instance + fake_singleton = Class.new do + include Braintrust::Contrib::RubyOpenAI::Instrumentation::Chat + end + + mock_chain(:singleton_class, returns: fake_singleton) do |client| + assert Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched?(target: client) + end + end + + # --- .perform_patch --- + + def test_perform_patch_includes_module_for_class_level + fake_client_class = Minitest::Mock.new + fake_client_class.expect(:include, true, [Braintrust::Contrib::RubyOpenAI::Instrumentation::Chat]) + + OpenAI.stub_const(:Client, fake_client_class) do + Braintrust::Contrib::RubyOpenAI::ChatPatcher.perform_patch + fake_client_class.verify + end + end + + def test_perform_patch_includes_module_for_instance_level + terminal = Minitest::Mock.new + terminal.expect(:include, true, [Braintrust::Contrib::RubyOpenAI::Instrumentation::Chat]) + + mock_chain(:singleton_class, returns: terminal) do |client| + client.expect(:is_a?, true, [::OpenAI::Client]) + Braintrust::Contrib::RubyOpenAI::ChatPatcher.perform_patch(target: client) + end + terminal.verify + end + + def test_perform_patch_raises_for_invalid_target + fake_client = Minitest::Mock.new + fake_client.expect(:is_a?, false, [::OpenAI::Client]) + + assert_raises(ArgumentError) do + Braintrust::Contrib::RubyOpenAI::ChatPatcher.perform_patch(target: fake_client) + end + + fake_client.verify + end +end + +class Braintrust::Contrib::RubyOpenAI::ResponsesPatcherTest < Minitest::Test + include Braintrust::Contrib::RubyOpenAI::IntegrationHelper + + def setup + skip_unless_ruby_openai! + end + + # --- .applicable? --- + + def test_applicable_returns_true_when_responses_method_exists + skip "Responses API not available" unless OpenAI::Client.method_defined?(:responses) + + assert Braintrust::Contrib::RubyOpenAI::ResponsesPatcher.applicable? + end + + def test_applicable_returns_false_when_responses_method_missing + fake_client_class = Class.new + + OpenAI.stub_const(:Client, fake_client_class) do + refute Braintrust::Contrib::RubyOpenAI::ResponsesPatcher.applicable? + end + end + + # --- .patched? --- + + def test_patched_returns_false_when_not_patched + skip "Responses API not available" unless OpenAI::Client.method_defined?(:responses) + skip "OpenAI::Responses not defined" unless defined?(::OpenAI::Responses) + + fake_responses_class = Class.new + + OpenAI.stub_const(:Responses, fake_responses_class) do + refute Braintrust::Contrib::RubyOpenAI::ResponsesPatcher.patched? + end + end + + def test_patched_returns_true_when_module_included + skip "Responses API not available" unless OpenAI::Client.method_defined?(:responses) + skip "OpenAI::Responses not defined" unless defined?(::OpenAI::Responses) + + fake_responses_class = Class.new do + include Braintrust::Contrib::RubyOpenAI::Instrumentation::Responses + end + + OpenAI.stub_const(:Responses, fake_responses_class) do + assert Braintrust::Contrib::RubyOpenAI::ResponsesPatcher.patched? + end + end + + def test_patched_returns_false_for_unpatched_instance + skip "Responses API not available" unless OpenAI::Client.method_defined?(:responses) + + fake_singleton = Class.new + + mock_chain(:responses, :singleton_class, returns: fake_singleton) do |client| + refute Braintrust::Contrib::RubyOpenAI::ResponsesPatcher.patched?(target: client) + end + end + + def test_patched_returns_true_for_patched_instance + skip "Responses API not available" unless OpenAI::Client.method_defined?(:responses) + + fake_singleton = Class.new do + include Braintrust::Contrib::RubyOpenAI::Instrumentation::Responses + end + + mock_chain(:responses, :singleton_class, returns: fake_singleton) do |client| + assert Braintrust::Contrib::RubyOpenAI::ResponsesPatcher.patched?(target: client) + end + end + + # --- .perform_patch --- + + def test_perform_patch_includes_module_for_class_level + skip "Responses API not available" unless OpenAI::Client.method_defined?(:responses) + skip "OpenAI::Responses not defined" unless defined?(::OpenAI::Responses) + + fake_responses_class = Minitest::Mock.new + fake_responses_class.expect(:include, true, [Braintrust::Contrib::RubyOpenAI::Instrumentation::Responses]) + + OpenAI.stub_const(:Responses, fake_responses_class) do + Braintrust::Contrib::RubyOpenAI::ResponsesPatcher.perform_patch + fake_responses_class.verify + end + end + + def test_perform_patch_includes_module_for_instance_level + skip "Responses API not available" unless OpenAI::Client.method_defined?(:responses) + + terminal = Minitest::Mock.new + terminal.expect(:include, true, [Braintrust::Contrib::RubyOpenAI::Instrumentation::Responses]) + + mock_chain(:responses, :singleton_class, returns: terminal) do |client| + client.expect(:is_a?, true, [::OpenAI::Client]) + Braintrust::Contrib::RubyOpenAI::ResponsesPatcher.perform_patch(target: client) + end + terminal.verify + end + + def test_perform_patch_raises_for_invalid_target + skip "Responses API not available" unless OpenAI::Client.method_defined?(:responses) + + fake_client = Minitest::Mock.new + fake_client.expect(:is_a?, false, [::OpenAI::Client]) + + assert_raises(ArgumentError) do + Braintrust::Contrib::RubyOpenAI::ResponsesPatcher.perform_patch(target: fake_client) + end + + fake_client.verify + end +end diff --git a/test/braintrust/trace/contrib/alexrudall_ruby_openai_test.rb b/test/braintrust/trace/contrib/alexrudall_ruby_openai_test.rb deleted file mode 100644 index c736c12..0000000 --- a/test/braintrust/trace/contrib/alexrudall_ruby_openai_test.rb +++ /dev/null @@ -1,620 +0,0 @@ -# frozen_string_literal: true - -require "test_helper" -require_relative "../../../../lib/braintrust/trace/contrib/github.com/alexrudall/ruby-openai/ruby-openai" - -class Braintrust::Trace::AlexRudall::RubyOpenAITest < Minitest::Test - def setup - # Skip all ruby-openai tests if the gem is not available - # Note: ruby-openai gem is required as "openai" (same as the openai gem) - # We detect ruby-openai by checking the gem name in loaded specs - begin - require "openai" - - # Check which gem is loaded by looking at Gem.loaded_specs - # ruby-openai has gem name "ruby-openai" - # official openai gem has gem name "openai" - if Gem.loaded_specs["ruby-openai"] - @using_ruby_openai = true - elsif Gem.loaded_specs["openai"] - skip "ruby-openai gem not available (found openai gem instead)" - else - skip "Could not determine which OpenAI gem is loaded" - end - - @gem_available = true - rescue LoadError - @gem_available = false - skip "ruby-openai gem not available" - end - - @api_key = ENV["OPENAI_API_KEY"] || "test-api-key" - @original_api_key = ENV["OPENAI_API_KEY"] - end - - def teardown - if @original_api_key - ENV["OPENAI_API_KEY"] = @original_api_key - else - ENV.delete("OPENAI_API_KEY") - end - end - - def test_wrap_creates_span_for_chat_completions - VCR.use_cassette("alexrudall_ruby_openai/chat_completions") do - require "openai" - - # Set up test rig (includes Braintrust processor) - rig = setup_otel_test_rig - - # Create OpenAI client using ruby-openai's API and wrap it with Braintrust tracing - client = OpenAI::Client.new(access_token: @api_key) - Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Make a simple chat completion request with additional params to test metadata capture - # ruby-openai uses: client.chat(parameters: {...}) - response = client.chat( - parameters: { - model: "gpt-4o-mini", - messages: [ - {role: "system", content: "You are a test assistant."}, - {role: "user", content: "Say 'test'"} - ], - max_tokens: 10, - temperature: 0.5 - } - ) - - # Verify response - refute_nil response - refute_nil response.dig("choices", 0, "message", "content") - - # Drain and verify span - span = rig.drain_one - - # Verify span name matches pattern - assert_equal "Chat Completion", span.name - - # Verify braintrust.input_json contains messages - assert span.attributes.key?("braintrust.input_json") - input = JSON.parse(span.attributes["braintrust.input_json"]) - assert_equal 2, input.length - assert_equal "system", input[0]["role"] - assert_equal "You are a test assistant.", input[0]["content"] - assert_equal "user", input[1]["role"] - assert_equal "Say 'test'", input[1]["content"] - - # Verify braintrust.output_json contains choices - assert span.attributes.key?("braintrust.output_json") - output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length - assert_equal 0, output[0]["index"] - assert_equal "assistant", output[0]["message"]["role"] - refute_nil output[0]["message"]["content"] - refute_nil output[0]["finish_reason"] - - # Verify braintrust.metadata contains request and response metadata - assert span.attributes.key?("braintrust.metadata") - metadata = JSON.parse(span.attributes["braintrust.metadata"]) - assert_equal "openai", metadata["provider"] - assert_equal "/v1/chat/completions", metadata["endpoint"] - assert_match(/\Agpt-4o-mini/, metadata["model"]) - assert_equal 10, metadata["max_tokens"] - assert_equal 0.5, metadata["temperature"] - refute_nil metadata["id"] - refute_nil metadata["created"] - - # Verify braintrust.metrics contains token usage - assert span.attributes.key?("braintrust.metrics") - metrics = JSON.parse(span.attributes["braintrust.metrics"]) - assert metrics["prompt_tokens"] > 0 - assert metrics["completion_tokens"] > 0 - assert metrics["tokens"] > 0 - assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"] - - # Verify time_to_first_token metric is present - assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric" - assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be >= 0" - end - end - - def test_wrap_handles_tool_calls - VCR.use_cassette("alexrudall_ruby_openai/tool_calls") do - require "openai" - - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it - client = OpenAI::Client.new(access_token: @api_key) - Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Make a request that will trigger a tool call - tools = [ - { - type: "function", - function: { - name: "get_weather", - description: "Get the current weather", - parameters: { - type: "object", - properties: { - location: {type: "string", description: "City name"} - }, - required: ["location"] - } - } - } - ] - - response = client.chat( - parameters: { - model: "gpt-4o-mini", - messages: [ - {role: "user", content: "What's the weather in Paris?"} - ], - tools: tools, - max_tokens: 100 - } - ) - - # Verify response - refute_nil response - - # Drain and verify span - span = rig.drain_one - - # Verify span name - assert_equal "Chat Completion", span.name - - # Verify input was captured - assert span.attributes.key?("braintrust.input_json") - input = JSON.parse(span.attributes["braintrust.input_json"]) - assert_equal 1, input.length - assert_equal "user", input[0]["role"] - - # Verify output contains tool calls - assert span.attributes.key?("braintrust.output_json") - output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length - - # Verify metadata includes tools - assert span.attributes.key?("braintrust.metadata") - metadata = JSON.parse(span.attributes["braintrust.metadata"]) - assert_equal "openai", metadata["provider"] - refute_nil metadata["tools"] - end - end - - def test_wrap_handles_multi_turn_tool_calls_with_tool_call_id - VCR.use_cassette("alexrudall_ruby_openai/multi_turn_tools") do - require "openai" - - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it - client = OpenAI::Client.new(access_token: @api_key) - Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - tools = [ - { - type: "function", - function: { - name: "calculate", - description: "Perform a calculation", - parameters: { - type: "object", - properties: { - operation: {type: "string"}, - a: {type: "number"}, - b: {type: "number"} - } - } - } - } - ] - - # First request - model calls tool - first_response = client.chat( - parameters: { - model: "gpt-4o-mini", - messages: [ - {role: "user", content: "What is 127 multiplied by 49?"} - ], - tools: tools, - max_tokens: 100 - } - ) - - tool_call = first_response.dig("choices", 0, "message", "tool_calls", 0) - skip "Model didn't call tool" unless tool_call - - # Second request - provide tool result with tool_call_id - second_response = client.chat( - parameters: { - model: "gpt-4o-mini", - messages: [ - {role: "user", content: "What is 127 multiplied by 49?"}, - first_response.dig("choices", 0, "message"), # Assistant message with tool_calls - { - role: "tool", - tool_call_id: tool_call["id"], - content: "6223" - } - ], - tools: tools, - max_tokens: 100 - } - ) - - # Verify response - refute_nil second_response - - # Drain spans (we have 2: first request + second request) - spans = rig.drain - assert_equal 2, spans.length - - # Verify second span contains tool_call_id in input - span = spans[1] - assert span.attributes.key?("braintrust.input_json") - input = JSON.parse(span.attributes["braintrust.input_json"]) - assert_equal 3, input.length - - # Third message should be tool response with tool_call_id - assert_equal "tool", input[2]["role"] - assert_equal tool_call["id"], input[2]["tool_call_id"] - assert_equal "6223", input[2]["content"] - end - end - - def test_wrap_handles_streaming_chat_completions - VCR.use_cassette("alexrudall_ruby_openai/streaming") do - require "openai" - - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it - client = OpenAI::Client.new(access_token: @api_key) - Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Make a streaming request with stream_options to get usage metrics - full_content = "" - client.chat( - parameters: { - model: "gpt-4o-mini", - messages: [ - {role: "user", content: "Count from 1 to 3"} - ], - max_tokens: 50, - stream_options: {include_usage: true}, - stream: proc do |chunk, _bytesize| - delta_content = chunk.dig("choices", 0, "delta", "content") - full_content += delta_content if delta_content - end - } - ) - - # Verify we got content - refute_empty full_content - - # Drain and verify span - span = rig.drain_one - - # Verify span name - assert_equal "Chat Completion", span.name - - # Verify input was captured - assert span.attributes.key?("braintrust.input_json") - input = JSON.parse(span.attributes["braintrust.input_json"]) - assert_equal 1, input.length - assert_equal "user", input[0]["role"] - - # Verify output was captured and aggregated - assert span.attributes.key?("braintrust.output_json"), "Should have braintrust.output_json" - output = JSON.parse(span.attributes["braintrust.output_json"]) - assert_equal 1, output.length - assert_equal 0, output[0]["index"] - assert_equal "assistant", output[0]["message"]["role"] - refute_nil output[0]["message"]["content"] - refute_empty output[0]["message"]["content"] - - # Verify metadata includes stream flag - assert span.attributes.key?("braintrust.metadata") - metadata = JSON.parse(span.attributes["braintrust.metadata"]) - assert_equal "openai", metadata["provider"] - assert_equal true, metadata["stream"] - - # Verify metrics include time_to_first_token and usage tokens - assert span.attributes.key?("braintrust.metrics"), "Should have braintrust.metrics" - metrics = JSON.parse(span.attributes["braintrust.metrics"]) - assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric" - assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be >= 0" - - # Verify usage metrics are present (when stream_options.include_usage is set) - assert metrics.key?("prompt_tokens"), "Should have prompt_tokens metric" - assert metrics["prompt_tokens"] > 0, "prompt_tokens should be > 0" - assert metrics.key?("completion_tokens"), "Should have completion_tokens metric" - assert metrics["completion_tokens"] > 0, "completion_tokens should be > 0" - assert metrics.key?("tokens"), "Should have tokens metric" - assert metrics["tokens"] > 0, "tokens should be > 0" - assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"] - end - end - - def test_wrap_handles_embeddings - VCR.use_cassette("alexrudall_ruby_openai/embeddings") do - require "openai" - - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it - client = OpenAI::Client.new(access_token: @api_key) - Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Make an embeddings request - response = client.embeddings( - parameters: { - model: "text-embedding-3-small", - input: "The quick brown fox" - } - ) - - # Verify response - refute_nil response - refute_nil response.dig("data", 0, "embedding") - - # Embeddings are not traced yet (no wrapper implemented) - # So this should not create a span for embeddings - # Only verify the request succeeded - assert response.dig("data", 0, "embedding").length > 0 - end - end - - def test_wrap_handles_completions - VCR.use_cassette("alexrudall_ruby_openai/completions") do - require "openai" - - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it - client = OpenAI::Client.new(access_token: @api_key) - Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Make a completions request - response = client.completions( - parameters: { - model: "gpt-3.5-turbo-instruct", - prompt: "Say hello:", - max_tokens: 10 - } - ) - - # Verify response - refute_nil response - refute_nil response.dig("choices", 0, "text") - - # Completions are not traced yet (no wrapper implemented) - # Only verify the request succeeded - refute_empty response.dig("choices", 0, "text") - end - end - - def test_wrap_records_exception_for_errors - VCR.use_cassette("alexrudall_ruby_openai/error") do - require "openai" - - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client with invalid API key - client = OpenAI::Client.new(access_token: "invalid_key") - Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Make a request that will fail - error = assert_raises do - client.chat( - parameters: { - model: "gpt-4o-mini", - messages: [ - {role: "user", content: "test"} - ] - } - ) - end - - # Verify an error was raised - refute_nil error - - # Drain and verify span was created with error information - span = rig.drain_one - - # Verify span name - assert_equal "Chat Completion", span.name - - # Verify span status indicates an error - assert_equal OpenTelemetry::Trace::Status::ERROR, span.status.code - - # Verify error message is captured - refute_nil span.status.description - assert span.status.description.length > 0 - - # Verify exception event was recorded - assert span.events.any? { |event| event.name == "exception" } - end - end - - def test_wrap_responses_create_non_streaming - VCR.use_cassette("alexrudall_ruby_openai/responses_non_streaming") do - require "openai" - - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it - client = OpenAI::Client.new(access_token: @api_key) - Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Skip if responses API not available - skip "Responses API not available in this ruby-openai gem version" unless client.respond_to?(:responses) - - # Make a non-streaming responses request - # ruby-openai 8.x uses: client.responses.create(parameters: {...}) - response = client.responses.create( - parameters: { - model: "gpt-4o-mini", - input: "What is 2+2? Reply with just the number." - } - ) - - # Verify response - refute_nil response - refute_nil response["output"] - - # Drain and verify span - span = rig.drain_one - - # Verify span name - assert_equal "openai.responses.create", span.name - - # Verify braintrust.input_json contains input - assert span.attributes.key?("braintrust.input_json") - input = JSON.parse(span.attributes["braintrust.input_json"]) - assert_equal "What is 2+2? Reply with just the number.", input - - # Verify braintrust.output_json contains output - assert span.attributes.key?("braintrust.output_json") - output = JSON.parse(span.attributes["braintrust.output_json"]) - refute_nil output - - # Verify braintrust.metadata contains request metadata - assert span.attributes.key?("braintrust.metadata") - metadata = JSON.parse(span.attributes["braintrust.metadata"]) - assert_equal "openai", metadata["provider"] - assert_equal "/v1/responses", metadata["endpoint"] - assert_equal "gpt-4o-mini", metadata["model"] - refute_nil metadata["id"] - - # Verify braintrust.metrics contains token usage - assert span.attributes.key?("braintrust.metrics") - metrics = JSON.parse(span.attributes["braintrust.metrics"]) - assert metrics["prompt_tokens"] > 0 - assert metrics["completion_tokens"] > 0 - assert metrics["tokens"] > 0 - assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"] - - # Verify time_to_first_token metric is present - assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric" - assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be >= 0" - end - end - - def test_wrap_responses_create_streaming - VCR.use_cassette("alexrudall_ruby_openai/responses_streaming") do - require "openai" - - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client and wrap it - client = OpenAI::Client.new(access_token: @api_key) - Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Skip if responses API not available - skip "Responses API not available in this ruby-openai gem version" unless client.respond_to?(:responses) - - # Make a streaming responses request - # ruby-openai 8.x uses client.responses.create with stream: proc - chunks = [] - client.responses.create( - parameters: { - model: "gpt-4o-mini", - input: "Count from 1 to 3", - stream: proc do |chunk, _event| - chunks << chunk - end - } - ) - - # Verify we got chunks - refute_empty chunks, "Should have received streaming chunks" - - # Drain and verify span - span = rig.drain_one - - # Verify span name - assert_equal "openai.responses.create", span.name - - # Verify braintrust.input_json contains input - assert span.attributes.key?("braintrust.input_json") - input = JSON.parse(span.attributes["braintrust.input_json"]) - assert_equal "Count from 1 to 3", input - - # Verify braintrust.output_json contains aggregated output - assert span.attributes.key?("braintrust.output_json"), "Missing braintrust.output_json" - output = JSON.parse(span.attributes["braintrust.output_json"]) - refute_nil output - - # Verify braintrust.metadata contains request metadata with stream flag - assert span.attributes.key?("braintrust.metadata") - metadata = JSON.parse(span.attributes["braintrust.metadata"]) - assert_equal "openai", metadata["provider"] - assert_equal "/v1/responses", metadata["endpoint"] - assert_equal true, metadata["stream"] - - # Verify braintrust.metrics contains time_to_first_token - assert span.attributes.key?("braintrust.metrics") - metrics = JSON.parse(span.attributes["braintrust.metrics"]) - assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric" - assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be >= 0" - end - end - - def test_wrap_responses_records_exception_for_errors - VCR.use_cassette("alexrudall_ruby_openai/responses_error") do - require "openai" - - # Set up test rig - rig = setup_otel_test_rig - - # Create OpenAI client with invalid API key - client = OpenAI::Client.new(access_token: "invalid_key") - Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider) - - # Skip if responses API not available - skip "Responses API not available in this ruby-openai gem version" unless client.respond_to?(:responses) - - # Make a request that will fail - error = assert_raises do - client.responses.create( - parameters: { - model: "gpt-4o-mini", - input: "test" - } - ) - end - - # Verify an error was raised - refute_nil error - - # Drain and verify span was created with error information - span = rig.drain_one - - # Verify span name - assert_equal "openai.responses.create", span.name - - # Verify span status indicates an error - assert_equal OpenTelemetry::Trace::Status::ERROR, span.status.code - - # Verify error message is captured - refute_nil span.status.description - assert span.status.description.length > 0 - - # Verify exception event was recorded - assert span.events.any? { |event| event.name == "exception" } - end - end -end diff --git a/test/fixtures/vcr_cassettes/alexrudall_ruby_openai/responses_with_metadata.yml b/test/fixtures/vcr_cassettes/alexrudall_ruby_openai/responses_with_metadata.yml new file mode 100644 index 0000000..79ff38d --- /dev/null +++ b/test/fixtures/vcr_cassettes/alexrudall_ruby_openai/responses_with_metadata.yml @@ -0,0 +1,151 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/responses + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-mini","input":"Say hello","metadata":{"test_key":"test_value","user_id":"user_123"}}' + headers: + Content-Type: + - application/json + Authorization: + - Bearer + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + User-Agent: + - Ruby + response: + status: + code: 200 + message: OK + headers: + Date: + - Sat, 27 Dec 2025 03:59:59 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999972' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + Openai-Version: + - '2020-10-01' + Openai-Organization: + - braintrust-data + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + X-Request-Id: + - req_371f05e8dd259faeaf7d6de7207f36b4 + Openai-Processing-Ms: + - '2566' + X-Envoy-Upstream-Service-Time: + - '2570' + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=W3oofKUUmIA0dWOD3ZuVkOhOJxKZ5YyM4F2ntLlkxAE-1766807999-1.0.1.1-UTg_ztOFr0MwhTvC4mztsGvA28HStEtGampvNLxcHfj4vWNVeryxI7Ef_dlZXkInHIgsW9vjlEUU7zA06k59B_qHuO_L0ykjU0WKrE1z7RM; + path=/; expires=Sat, 27-Dec-25 04:29:59 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=_LPwVwPzQE7iEbOQOsGKJvu4vI.xP_G338aY9YYAqa8-1766807999838-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - 9b45e87c58691477-ORD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: |- + { + "id": "resp_07bbee4b8f5626d900694f59bd3fd88196a83738018d7106b8", + "object": "response", + "created_at": 1766807997, + "status": "completed", + "background": false, + "billing": { + "payer": "developer" + }, + "completed_at": 1766807999, + "error": null, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "max_tool_calls": null, + "model": "gpt-4o-mini-2024-07-18", + "output": [ + { + "id": "msg_07bbee4b8f5626d900694f59bf7d7c8196bc9d82703845a129", + "type": "message", + "status": "completed", + "content": [ + { + "type": "output_text", + "annotations": [], + "logprobs": [], + "text": "Hello! How can I assist you today?" + } + ], + "role": "assistant" + } + ], + "parallel_tool_calls": true, + "previous_response_id": null, + "prompt_cache_key": null, + "prompt_cache_retention": null, + "reasoning": { + "effort": null, + "summary": null + }, + "safety_identifier": null, + "service_tier": "default", + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + }, + "verbosity": "medium" + }, + "tool_choice": "auto", + "tools": [], + "top_logprobs": 0, + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 9, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 10, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 19 + }, + "user": null, + "metadata": { + "test_key": "test_value", + "user_id": "user_123" + } + } + recorded_at: Sat, 27 Dec 2025 03:59:59 GMT +recorded_with: VCR 6.3.1 From 131149a22068712fe9c742c478f743ff1864f857 Mon Sep 17 00:00:00 2001 From: David Elner Date: Sun, 28 Dec 2025 19:24:49 -0500 Subject: [PATCH 10/27] Changed: Migrated ruby_llm to new integration API --- README.md | 4 +- Rakefile | 1 + examples/contrib/ruby_llm/basic.rb | 42 ++ examples/contrib/ruby_llm/instance.rb | 53 ++ examples/contrib/ruby_llm/streaming.rb | 42 ++ examples/contrib/ruby_llm/tool_usage.rb | 371 ++++++++++ .../ruby_llm/golden.rb} | 6 +- examples/ruby_llm.rb | 63 -- lib/braintrust/contrib.rb | 2 + lib/braintrust/contrib/ruby_llm/deprecated.rb | 45 ++ .../contrib/ruby_llm/instrumentation/chat.rb | 464 +++++++++++++ .../ruby_llm/instrumentation/common.rb | 58 ++ .../contrib/ruby_llm/integration.rb | 54 ++ lib/braintrust/contrib/ruby_llm/patcher.rb | 44 ++ lib/braintrust/trace.rb | 17 - .../contrib/github.com/crmne/ruby_llm.rb | 631 ------------------ .../contrib/ruby_llm/deprecated_test.rb | 134 ++++ .../ruby_llm/instrumentation/chat_test.rb} | 453 ++++++------- .../contrib/ruby_llm/instrumentation_test.rb | 57 ++ .../contrib/ruby_llm/integration_helper.rb | 29 + .../contrib/ruby_llm/integration_test.rb | 58 ++ .../contrib/ruby_llm/patcher_test.rb | 93 +++ .../contrib/ruby_llm/basic_chat.yml | 119 ++++ .../contrib/ruby_llm/direct_complete.yml | 119 ++++ .../contrib/ruby_llm/streaming_chat.yml | 109 +++ .../contrib/ruby_llm/tool_calling.yml | 220 ++++++ 26 files changed, 2328 insertions(+), 960 deletions(-) create mode 100644 examples/contrib/ruby_llm/basic.rb create mode 100644 examples/contrib/ruby_llm/instance.rb create mode 100644 examples/contrib/ruby_llm/streaming.rb create mode 100644 examples/contrib/ruby_llm/tool_usage.rb rename examples/internal/{ruby_llm.rb => contrib/ruby_llm/golden.rb} (98%) delete mode 100644 examples/ruby_llm.rb create mode 100644 lib/braintrust/contrib/ruby_llm/deprecated.rb create mode 100644 lib/braintrust/contrib/ruby_llm/instrumentation/chat.rb create mode 100644 lib/braintrust/contrib/ruby_llm/instrumentation/common.rb create mode 100644 lib/braintrust/contrib/ruby_llm/integration.rb create mode 100644 lib/braintrust/contrib/ruby_llm/patcher.rb delete mode 100644 lib/braintrust/trace/contrib/github.com/crmne/ruby_llm.rb create mode 100644 test/braintrust/contrib/ruby_llm/deprecated_test.rb rename test/braintrust/{trace/ruby_llm_test.rb => contrib/ruby_llm/instrumentation/chat_test.rb} (59%) create mode 100644 test/braintrust/contrib/ruby_llm/instrumentation_test.rb create mode 100644 test/braintrust/contrib/ruby_llm/integration_helper.rb create mode 100644 test/braintrust/contrib/ruby_llm/integration_test.rb create mode 100644 test/braintrust/contrib/ruby_llm/patcher_test.rb create mode 100644 test/fixtures/vcr_cassettes/contrib/ruby_llm/basic_chat.yml create mode 100644 test/fixtures/vcr_cassettes/contrib/ruby_llm/direct_complete.yml create mode 100644 test/fixtures/vcr_cassettes/contrib/ruby_llm/streaming_chat.yml create mode 100644 test/fixtures/vcr_cassettes/contrib/ruby_llm/tool_calling.yml diff --git a/README.md b/README.md index 13c8f91..0d71497 100644 --- a/README.md +++ b/README.md @@ -182,8 +182,8 @@ require "ruby_llm" Braintrust.init -# Wrap RubyLLM globally (wraps all Chat instances) -Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap +# Instrument all RubyLLM Chat instances +Braintrust.instrument!(:ruby_llm) tracer = OpenTelemetry.tracer_provider.tracer("ruby-llm-app") root_span = nil diff --git a/Rakefile b/Rakefile index 84f03cd..74788e8 100644 --- a/Rakefile +++ b/Rakefile @@ -107,6 +107,7 @@ namespace :test do # Tasks per integration [ {name: :openai}, + {name: :ruby_llm}, {name: :ruby_openai, appraisal: "ruby-openai"} ].each do |integration| name = integration[:name] diff --git a/examples/contrib/ruby_llm/basic.rb b/examples/contrib/ruby_llm/basic.rb new file mode 100644 index 0000000..cc35fbb --- /dev/null +++ b/examples/contrib/ruby_llm/basic.rb @@ -0,0 +1,42 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "ruby_llm" +require "opentelemetry/sdk" + +# Example: Basic RubyLLM chat with Braintrust tracing +# +# Usage: +# OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/contrib/ruby_llm/basic.rb + +unless ENV["OPENAI_API_KEY"] + puts "Error: OPENAI_API_KEY environment variable is required" + exit 1 +end + +Braintrust.init(blocking_login: true) +Braintrust.instrument!(:ruby_llm) + +RubyLLM.configure do |config| + config.openai_api_key = ENV["OPENAI_API_KEY"] +end + +tracer = OpenTelemetry.tracer_provider.tracer("ruby_llm-example") +root_span = nil + +puts "Sending chat request..." +response = tracer.in_span("examples/contrib/ruby_llm/basic.rb") do |span| + root_span = span + chat = RubyLLM.chat(model: "gpt-4o-mini") + chat.ask("What is the capital of France?") +end + +puts "\nAssistant: #{response.content}" +puts "\nToken usage:" +puts " Input: #{response.to_h[:input_tokens]}" +puts " Output: #{response.to_h[:output_tokens]}" +puts "\nView trace: #{Braintrust::Trace.permalink(root_span)}" + +OpenTelemetry.tracer_provider.shutdown diff --git a/examples/contrib/ruby_llm/instance.rb b/examples/contrib/ruby_llm/instance.rb new file mode 100644 index 0000000..5feeb52 --- /dev/null +++ b/examples/contrib/ruby_llm/instance.rb @@ -0,0 +1,53 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "ruby_llm" +require "opentelemetry/sdk" + +# Example: Instance-level RubyLLM instrumentation +# +# This shows how to instrument a specific chat instance rather than +# all RubyLLM chats globally. +# +# Usage: +# OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/contrib/ruby_llm/instance.rb + +unless ENV["OPENAI_API_KEY"] + puts "Error: OPENAI_API_KEY environment variable is required" + exit 1 +end + +Braintrust.init(blocking_login: true) + +RubyLLM.configure do |config| + config.openai_api_key = ENV["OPENAI_API_KEY"] +end + +# Create two chat instances +chat_traced = RubyLLM.chat(model: "gpt-4o-mini") +chat_untraced = RubyLLM.chat(model: "gpt-4o-mini") + +# Only instrument one of them +Braintrust.instrument!(:ruby_llm, target: chat_traced) + +tracer = OpenTelemetry.tracer_provider.tracer("ruby_llm-example") +root_span = nil + +tracer.in_span("examples/contrib/ruby_llm/instance.rb") do |span| + root_span = span + + puts "Calling traced chat..." + response1 = chat_traced.ask("Say 'traced'") + puts "Traced response: #{response1.content}" + + puts "\nCalling untraced chat..." + response2 = chat_untraced.ask("Say 'untraced'") + puts "Untraced response: #{response2.content}" +end + +puts "\nView trace: #{Braintrust::Trace.permalink(root_span)}" +puts "(Only the first chat call should have a ruby_llm.chat span)" + +OpenTelemetry.tracer_provider.shutdown diff --git a/examples/contrib/ruby_llm/streaming.rb b/examples/contrib/ruby_llm/streaming.rb new file mode 100644 index 0000000..179064f --- /dev/null +++ b/examples/contrib/ruby_llm/streaming.rb @@ -0,0 +1,42 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "ruby_llm" +require "opentelemetry/sdk" + +# Example: Streaming RubyLLM chat with Braintrust tracing +# +# Usage: +# OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/contrib/ruby_llm/streaming.rb + +unless ENV["OPENAI_API_KEY"] + puts "Error: OPENAI_API_KEY environment variable is required" + exit 1 +end + +Braintrust.init(blocking_login: true) +Braintrust.instrument!(:ruby_llm) + +RubyLLM.configure do |config| + config.openai_api_key = ENV["OPENAI_API_KEY"] +end + +tracer = OpenTelemetry.tracer_provider.tracer("ruby_llm-example") +root_span = nil + +puts "Streaming response..." +print "\nAssistant: " + +tracer.in_span("examples/contrib/ruby_llm/streaming.rb") do |span| + root_span = span + chat = RubyLLM.chat(model: "gpt-4o-mini") + chat.ask("Write a haiku about programming") do |chunk| + print chunk.content + end +end + +puts "\n\nView trace: #{Braintrust::Trace.permalink(root_span)}" + +OpenTelemetry.tracer_provider.shutdown diff --git a/examples/contrib/ruby_llm/tool_usage.rb b/examples/contrib/ruby_llm/tool_usage.rb new file mode 100644 index 0000000..fa154ea --- /dev/null +++ b/examples/contrib/ruby_llm/tool_usage.rb @@ -0,0 +1,371 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "ruby_llm" +require "opentelemetry/sdk" + +# Example: RubyLLM tool calling with Braintrust tracing +# +# This demonstrates how tool calls create nested spans with a sophisticated +# travel planning scenario featuring: +# - Multiple tools with complex nested parameters +# - Tools that return deeply nested response structures +# - Tools that create their own internal spans (simulating API calls) +# - Multi-turn conversation with multiple tool invocations +# +# Trace hierarchy will look like: +# examples/contrib/ruby_llm/tool_usage.rb +# └── ruby_llm.chat +# ├── ruby_llm.tool.search_flights +# │ └── flight_api.search (internal span) +# ├── ruby_llm.tool.search_hotels +# │ └── hotel_api.search (internal span) +# └── ruby_llm.tool.get_destination_info +# ├── weather_api.forecast (internal span) +# └── events_api.search (internal span) +# +# Usage: +# OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/contrib/ruby_llm/tool_usage.rb + +unless ENV["OPENAI_API_KEY"] + puts "Error: OPENAI_API_KEY environment variable is required" + exit 1 +end + +# Flight search tool with complex nested parameters +class SearchFlightsTool < RubyLLM::Tool + description "Search for flights between cities with flexible date options and passenger details" + + param :origin, type: :string, desc: "Origin airport code (e.g., SFO, JFK)" + param :destination, type: :string, desc: "Destination airport code" + param :departure_date, type: :string, desc: "Departure date (YYYY-MM-DD)" + param :return_date, type: :string, desc: "Return date for round trip (YYYY-MM-DD), optional" + param :passengers, type: :integer, desc: "Number of passengers (default: 1)" + param :cabin_class, type: :string, desc: "Cabin class: economy, premium_economy, business, first" + param :flexible_dates, type: :boolean, desc: "Search +/- 3 days from specified dates" + + def execute(origin:, destination:, departure_date:, return_date: nil, passengers: 1, cabin_class: "economy", flexible_dates: false) + tracer = OpenTelemetry.tracer_provider.tracer("flight-service") + + # Simulate an internal API call with its own span + tracer.in_span("flight_api.search", attributes: { + "flight.origin" => origin, + "flight.destination" => destination, + "flight.passengers" => passengers + }) do + sleep(0.05) # Simulate API latency + + # Return deeply nested flight results + { + search_id: "FL-#{rand(100000..999999)}", + query: { + route: {origin: origin, destination: destination}, + dates: { + departure: departure_date, + return: return_date, + flexible: flexible_dates + }, + travelers: {count: passengers, cabin: cabin_class} + }, + results: [ + { + flight_id: "UA#{rand(100..999)}", + airline: {code: "UA", name: "United Airlines"}, + segments: [ + { + departure: {airport: origin, terminal: "2", time: "#{departure_date}T08:30:00", gate: "A12"}, + arrival: {airport: destination, terminal: "B", time: "#{departure_date}T11:45:00"}, + aircraft: {type: "Boeing 737-900", wifi: true, power_outlets: true}, + duration_minutes: 195 + } + ], + pricing: { + base_fare: 299.00 * passengers, + taxes: 45.50 * passengers, + fees: {carrier_fee: 25.00, booking_fee: 0}, + total: (299.00 + 45.50 + 25.00) * passengers, + currency: "USD", + fare_class: cabin_class, + refundable: false, + change_fee: 75.00 + }, + availability: {seats_remaining: rand(2..9)} + }, + { + flight_id: "DL#{rand(100..999)}", + airline: {code: "DL", name: "Delta Air Lines"}, + segments: [ + { + departure: {airport: origin, terminal: "1", time: "#{departure_date}T14:15:00", gate: "C8"}, + arrival: {airport: destination, terminal: "A", time: "#{departure_date}T17:30:00"}, + aircraft: {type: "Airbus A320", wifi: true, power_outlets: true}, + duration_minutes: 195 + } + ], + pricing: { + base_fare: 279.00 * passengers, + taxes: 42.00 * passengers, + fees: {carrier_fee: 30.00, booking_fee: 0}, + total: (279.00 + 42.00 + 30.00) * passengers, + currency: "USD", + fare_class: cabin_class, + refundable: false, + change_fee: 0 + }, + availability: {seats_remaining: rand(2..9)} + } + ], + metadata: { + search_time_ms: rand(150..300), + providers_queried: %w[amadeus sabre travelport], + cache_hit: false + } + } + end + end +end + +# Hotel search tool with nested room and amenity preferences +class SearchHotelsTool < RubyLLM::Tool + description "Search for hotels with detailed room preferences and amenity filters" + + param :city, type: :string, desc: "City name for hotel search" + param :check_in, type: :string, desc: "Check-in date (YYYY-MM-DD)" + param :check_out, type: :string, desc: "Check-out date (YYYY-MM-DD)" + param :guests, type: :integer, desc: "Number of guests" + param :rooms, type: :integer, desc: "Number of rooms needed" + param :star_rating_min, type: :integer, desc: "Minimum star rating (1-5)" + param :amenities, type: :string, desc: "Comma-separated amenities: pool,gym,spa,parking,wifi,breakfast" + + def execute(city:, check_in:, check_out:, guests: 2, rooms: 1, star_rating_min: 3, amenities: "wifi") + tracer = OpenTelemetry.tracer_provider.tracer("hotel-service") + requested_amenities = amenities.split(",").map(&:strip) + + tracer.in_span("hotel_api.search", attributes: { + "hotel.city" => city, + "hotel.guests" => guests, + "hotel.rooms" => rooms + }) do + sleep(0.05) + + { + search_id: "HT-#{rand(100000..999999)}", + query: { + location: {city: city, radius_km: 10, center: "downtown"}, + stay: {check_in: check_in, check_out: check_out, nights: 3}, + occupancy: {guests: guests, rooms: rooms}, + filters: {min_stars: star_rating_min, amenities: requested_amenities} + }, + results: [ + { + hotel_id: "HTL-#{rand(10000..99999)}", + name: "Grand #{city} Hotel & Spa", + brand: {name: "Luxury Collection", loyalty_program: "Marriott Bonvoy"}, + location: { + address: {street: "123 Main Street", city: city, postal_code: "94102", country: "USA"}, + coordinates: {latitude: 37.7749, longitude: -122.4194}, + neighborhood: "Downtown", + transit: {nearest_metro: "Powell St", distance_km: 0.3} + }, + rating: { + stars: 5, + guest_score: 4.7, + reviews: {count: 2341, recent_sentiment: "excellent"} + }, + rooms: [ + { + room_id: "RM-#{rand(1000..9999)}", + type: "Deluxe King", + description: "Spacious room with city views", + bedding: {beds: [{type: "king", count: 1}], max_occupancy: 2}, + size: {sqft: 450, sqm: 42}, + features: %w[city_view work_desk minibar safe], + pricing: { + per_night: 289.00, + total: 867.00, + taxes: 130.05, + fees: {resort_fee: 45.00, cleaning_fee: 0}, + grand_total: 1042.05, + currency: "USD", + cancellation: {free_until: check_in, policy: "free_cancellation"} + } + } + ], + amenities: { + wellness: {pool: {indoor: true, outdoor: false, hours: "6am-10pm"}, gym: true, spa: {available: true, appointment_required: true}}, + dining: {restaurants: 2, room_service: {available: true, hours: "24/7"}, breakfast: {included: false, price: 35.00}}, + business: {wifi: {free: true, speed: "high-speed"}, business_center: true, meeting_rooms: 5}, + parking: {available: true, valet: true, self_park: 45.00, valet_price: 65.00} + }, + policies: { + check_in_time: "15:00", + check_out_time: "11:00", + pets: {allowed: true, fee: 75.00, restrictions: "dogs under 25lbs"}, + smoking: false + } + } + ], + metadata: {search_time_ms: rand(200..400), availability_as_of: Time.now.utc.iso8601} + } + end + end +end + +# Destination info tool that aggregates multiple data sources +class GetDestinationInfoTool < RubyLLM::Tool + description "Get comprehensive destination information including weather forecast and local events" + + param :city, type: :string, desc: "City name" + param :travel_dates, type: :string, desc: "Travel date range (YYYY-MM-DD to YYYY-MM-DD)" + param :interests, type: :string, desc: "Comma-separated interests: food,art,music,sports,nature,nightlife" + + def execute(city:, travel_dates:, interests: "food,art") + tracer = OpenTelemetry.tracer_provider.tracer("destination-service") + interest_list = interests.split(",").map(&:strip) + dates = travel_dates.split(" to ") + start_date = dates[0] + end_date = dates[1] || start_date + + # This tool makes multiple internal API calls, each with their own spans + weather_data = tracer.in_span("weather_api.forecast", attributes: {"weather.city" => city}) do + sleep(0.03) + { + location: {city: city, timezone: "America/Los_Angeles"}, + forecast: [ + {date: start_date, high_f: 68, low_f: 54, conditions: "Partly Cloudy", precipitation_chance: 10, humidity: 65, wind: {speed_mph: 12, direction: "W"}}, + {date: end_date, high_f: 72, low_f: 56, conditions: "Sunny", precipitation_chance: 0, humidity: 55, wind: {speed_mph: 8, direction: "NW"}} + ], + alerts: [], + best_times: {outdoor_activities: "10am-4pm", photography: "golden_hour"} + } + end + + events_data = tracer.in_span("events_api.search", attributes: {"events.city" => city, "events.interests" => interests}) do + sleep(0.03) + { + events: [ + { + event_id: "EVT-#{rand(10000..99999)}", + name: "#{city} Food & Wine Festival", + category: "food", + venue: {name: "Civic Center Plaza", address: "123 Civic Center", capacity: 5000}, + schedule: {date: start_date, time: "11:00", duration_hours: 8}, + pricing: {general_admission: 45.00, vip: 125.00, currency: "USD"}, + highlights: ["50+ local restaurants", "Wine tastings", "Cooking demos"] + }, + { + event_id: "EVT-#{rand(10000..99999)}", + name: "Modern Art Exhibition", + category: "art", + venue: {name: "#{city} Museum of Modern Art", address: "456 Art Street"}, + schedule: {date: start_date, time: "10:00", duration_hours: 6}, + pricing: {general_admission: 25.00, student: 15.00, currency: "USD"}, + highlights: ["New installations", "Interactive exhibits", "Guided tours available"] + } + ], + total_matching: 12, + filtered_by: interest_list + } + end + + { + destination: { + city: city, + overview: { + description: "A vibrant city known for its culture, cuisine, and scenic beauty", + population: "800,000+", + language: "English", + currency: {code: "USD", symbol: "$"}, + time_zone: "Pacific Time (PT)" + }, + travel_advisory: {level: "normal", last_updated: Time.now.utc.iso8601} + }, + weather: weather_data, + events: events_data, + recommendations: { + neighborhoods: [ + {name: "Downtown", vibe: "bustling", best_for: %w[dining shopping nightlife]}, + {name: "Waterfront", vibe: "scenic", best_for: %w[walking photography seafood]} + ], + tips: [ + "Book popular restaurants at least 2 weeks in advance", + "Public transit is efficient - consider a visitor pass", + "Many museums offer free admission on first Thursdays" + ] + }, + metadata: {generated_at: Time.now.utc.iso8601, data_sources: %w[weather_api events_api local_guides]} + } + end +end + +# Initialize Braintrust and instrument RubyLLM +Braintrust.init(blocking_login: true) +Braintrust.instrument!(:ruby_llm) + +RubyLLM.configure do |config| + config.openai_api_key = ENV["OPENAI_API_KEY"] +end + +tracer = OpenTelemetry.tracer_provider.tracer("ruby_llm-example") +root_span = nil + +puts "=" * 70 +puts "Travel Planning Assistant with Multi-Tool Orchestration" +puts "=" * 70 +puts +puts "This example demonstrates deeply nested tracing with multiple tools." +puts "Watch the trace to see how tool calls create hierarchical spans." +puts + +response = tracer.in_span("examples/contrib/ruby_llm/tool_usage.rb") do |span| + root_span = span + + chat = RubyLLM.chat(model: "gpt-4o-mini") + chat.with_tools(SearchFlightsTool, SearchHotelsTool, GetDestinationInfoTool) + + puts "User: I'm planning a trip from San Francisco (SFO) to New York (JFK)" + puts " for December 15-18, 2025. Can you help me find flights, a nice" + puts " hotel downtown, and tell me what's happening in the city? I'm" + puts " interested in food and art." + puts + puts "Assistant is thinking and calling tools..." + puts "(This may take a moment as multiple tools are invoked)" + puts + + chat.ask(<<~PROMPT) + I'm planning a trip from San Francisco (SFO) to New York (JFK) for December 15-18, 2025. + I need: + 1. Flight options (economy class, 1 passenger) + 2. A nice hotel downtown (4+ stars, need wifi and gym) + 3. What's the weather forecast and any food/art events happening? + + Please search for all of this and give me a summary of the best options. + PROMPT +end + +puts "-" * 70 +puts "Assistant Response:" +puts "-" * 70 +puts response.content +puts +puts "=" * 70 +puts "Trace Information" +puts "=" * 70 +puts +puts "View the full trace: #{Braintrust::Trace.permalink(root_span)}" +puts +puts "Expected span hierarchy:" +puts " └── examples/contrib/ruby_llm/tool_usage.rb" +puts " └── ruby_llm.chat" +puts " ├── ruby_llm.tool.search_flights" +puts " │ └── flight_api.search" +puts " ├── ruby_llm.tool.search_hotels" +puts " │ └── hotel_api.search" +puts " └── ruby_llm.tool.get_destination_info" +puts " ├── weather_api.forecast" +puts " └── events_api.search" +puts + +OpenTelemetry.tracer_provider.shutdown diff --git a/examples/internal/ruby_llm.rb b/examples/internal/contrib/ruby_llm/golden.rb similarity index 98% rename from examples/internal/ruby_llm.rb rename to examples/internal/contrib/ruby_llm/golden.rb index 523e983..edf9dff 100644 --- a/examples/internal/ruby_llm.rb +++ b/examples/internal/contrib/ruby_llm/golden.rb @@ -30,8 +30,8 @@ blocking_login: true ) -# Wrap RubyLLM module with Braintrust tracing (applies to all instances) -Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap +# Instrument all RubyLLM chats with Braintrust tracing +Braintrust.instrument!(:ruby_llm) # Configure RubyLLM with both providers RubyLLM.configure do |config| @@ -81,7 +81,7 @@ def execute(operation:, a:, b:) puts "Running comprehensive RubyLLM integration tests..." puts "Testing features across OpenAI and Anthropic providers" -tracer.in_span("examples/internal/ruby_llm.rb") do |span| +tracer.in_span("examples/internal/contrib/ruby_llm/golden.rb") do |span| root_span = span # Feature 1: Basic Chat diff --git a/examples/ruby_llm.rb b/examples/ruby_llm.rb deleted file mode 100644 index 65dd666..0000000 --- a/examples/ruby_llm.rb +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require "bundler/setup" -require "braintrust" -require "ruby_llm" -require "opentelemetry/sdk" - -# Example: RubyLLM chat with Braintrust tracing -# -# This example demonstrates how to automatically trace RubyLLM API calls with Braintrust. -# -# Usage: -# OPENAI_API_KEY=your-openai-key bundle exec ruby examples/ruby_llm.rb - -# Check for API keys -unless ENV["OPENAI_API_KEY"] - puts "Error: OPENAI_API_KEY environment variable is required" - puts "Get your API key from: https://platform.openai.com/api-keys" - exit 1 -end - -Braintrust.init( - default_project: "ruby-sdk-examples", - blocking_login: true -) - -# Wrap RubyLLM module with Braintrust tracing -Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap - -RubyLLM.configure do |config| - config.openai_api_key = ENV["OPENAI_API_KEY"] -end - -chat = RubyLLM.chat(model: "gpt-4o-mini") - -tracer = OpenTelemetry.tracer_provider.tracer("ruby_llm-example") -root_span = nil - -# Make a chat request (automatically traced!) -puts "Sending chat request to RubyLLM..." -response = tracer.in_span("examples/ruby_llm.rb") do |span| - root_span = span - chat.ask("What is the capital of France?") -end - -# Print the response -puts "\n✓ Response received!" -puts "\nAssistant: #{response.content}" - -# Print usage stats -puts "\nToken usage:" -puts " Input tokens: #{response.to_h[:input_tokens]}" -puts " Output tokens: #{response.to_h[:output_tokens]}" - -# Print permalink to view this trace in Braintrust -puts "\n✓ View this trace in Braintrust:" -puts " #{Braintrust::Trace.permalink(root_span)}" - -# Shutdown to flush spans to Braintrust -OpenTelemetry.tracer_provider.shutdown - -puts "\n✓ Trace sent to Braintrust!" diff --git a/lib/braintrust/contrib.rb b/lib/braintrust/contrib.rb index ddf85ed..2b24890 100644 --- a/lib/braintrust/contrib.rb +++ b/lib/braintrust/contrib.rb @@ -101,7 +101,9 @@ def tracer_for(target, name: "braintrust") # Load integration stubs (eager load minimal metadata). require_relative "contrib/openai/integration" require_relative "contrib/ruby_openai/integration" +require_relative "contrib/ruby_llm/integration" # Register integrations Braintrust::Contrib::OpenAI::Integration.register! Braintrust::Contrib::RubyOpenAI::Integration.register! +Braintrust::Contrib::RubyLLM::Integration.register! diff --git a/lib/braintrust/contrib/ruby_llm/deprecated.rb b/lib/braintrust/contrib/ruby_llm/deprecated.rb new file mode 100644 index 0000000..8b2cafe --- /dev/null +++ b/lib/braintrust/contrib/ruby_llm/deprecated.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +# Backward compatibility shim for the old ruby_llm integration API. +# This file now just delegates to the new API. + +module Braintrust + module Trace + module Contrib + module Github + module Crmne + module RubyLLM + # Wrap RubyLLM to automatically create spans for chat requests. + # This is the legacy API - delegates to the new contrib framework. + # + # @param chat [RubyLLM::Chat, nil] the chat instance to wrap (if nil, wraps the class) + # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider + # @return [RubyLLM::Chat, nil] the wrapped chat instance + def self.wrap(chat = nil, tracer_provider: nil) + Log.warn("Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap() is deprecated and will be removed in a future version: use Braintrust.instrument!() instead.") + Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: tracer_provider) + chat + end + + # Unwrap RubyLLM to disable Braintrust tracing. + # This is the legacy API - uses the Context pattern to disable tracing. + # + # Note: Prepended modules cannot be truly removed in Ruby. + # This method sets `enabled: false` in the Context, which the + # instrumentation checks before creating spans. + # + # @param chat [RubyLLM::Chat, nil] the chat instance to unwrap (if nil, unwraps the class) + # @return [RubyLLM::Chat, nil] the chat instance + def self.unwrap(chat = nil) + Log.warn("Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.unwrap() is deprecated and will be removed in a future version.") + + target = chat || ::RubyLLM::Chat + Braintrust::Contrib::Context.set!(target, enabled: false) + chat + end + end + end + end + end + end +end diff --git a/lib/braintrust/contrib/ruby_llm/instrumentation/chat.rb b/lib/braintrust/contrib/ruby_llm/instrumentation/chat.rb new file mode 100644 index 0000000..00a68f2 --- /dev/null +++ b/lib/braintrust/contrib/ruby_llm/instrumentation/chat.rb @@ -0,0 +1,464 @@ +# frozen_string_literal: true + +require "opentelemetry/sdk" +require "json" + +require_relative "common" +require_relative "../../support/otel" +require_relative "../../../internal/encoding" +require_relative "../../../internal/time" + +module Braintrust + module Contrib + module RubyLLM + module Instrumentation + # Chat instrumentation for RubyLLM. + # Wraps complete() and execute_tool() methods to create spans. + module Chat + def self.included(base) + # Guard against double-wrapping + base.prepend(InstanceMethods) unless applied?(base) + end + + def self.applied?(base) + base.ancestors.include?(InstanceMethods) + end + + module InstanceMethods + # Wrap complete() to trace chat completions. + # Each call creates a span - recursive calls from tool execution + # create nested spans (each is a separate API call). + def complete(&block) + return block ? super : super() unless tracing_enabled? + + tracer = Braintrust::Contrib.tracer_for(self) + + tracer.in_span("ruby_llm.chat") do |span| + if block + # Streaming: pass a block that calls super() with the wrapper + handle_streaming_complete(span, block) do |&wrapper| + super(&wrapper) + end + else + # Non-streaming: pass a block that calls super() + handle_non_streaming_complete(span) do + super() + end + end + end + end + + private + + # Wrap execute_tool() to trace tool executions. + # This is a private method in RubyLLM - wrapping it avoids + # conflicting with user-registered on_tool_call/on_tool_result callbacks. + def execute_tool(tool_call) + return super unless tracing_enabled? + + tracer = Braintrust::Contrib.tracer_for(self) + + tracer.in_span("ruby_llm.tool.#{tool_call.name}") do |span| + Support::OTel.set_json_attr(span, "braintrust.span_attributes", {type: "tool"}) + span.set_attribute("tool.name", tool_call.name) + span.set_attribute("tool.call_id", tool_call.id) + + Support::OTel.set_json_attr(span, "braintrust.input_json", { + "name" => tool_call.name, + "arguments" => tool_call.arguments + }) + + result = super + + Support::OTel.set_json_attr(span, "braintrust.output_json", result) + result + end + end + + # DEPRECATED: Support legacy unwrap() + # Checks Context for enabled: false on instance or class. + # This will be removed in a future version. + def tracing_enabled? + ctx = Braintrust::Contrib.context_for(self) + class_ctx = Braintrust::Contrib.context_for(self.class) + ctx&.[](:enabled) != false && class_ctx&.[](:enabled) != false + end + + # Handle streaming complete request with tracing. + # Calls the provided block with a wrapper that aggregates chunks. + # @param span [OpenTelemetry::Trace::Span] the span to record to + # @param user_block [Proc] the streaming block from user + # @param super_caller [Proc] block that calls super(&wrapper) + def handle_streaming_complete(span, user_block, &super_caller) + aggregated_chunks = [] + metadata = extract_metadata(stream: true) + input_messages = build_input_messages + start_time = Braintrust::Internal::Time.measure + time_to_first_token = nil + + Support::OTel.set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any? + Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) + + # Wrapper block that RubyLLM calls once per chunk. + # Aggregates chunks for span recording and forwards to user's block. + wrapper = proc do |chunk| + time_to_first_token ||= Braintrust::Internal::Time.measure(start_time) + aggregated_chunks << chunk + user_block.call(chunk) + end + + begin + result = super_caller.call(&wrapper) + + capture_streaming_output(span, aggregated_chunks, result, time_to_first_token) + result + rescue => e + span.record_exception(e) + span.status = ::OpenTelemetry::Trace::Status.error("RubyLLM error: #{e.message}") + raise + end + end + + # Handle non-streaming complete request with tracing. + # Calls the provided block to invoke super() and returns the response. + # @param span [OpenTelemetry::Trace::Span] the span to record to + # @param super_caller [Proc] block that calls super() + def handle_non_streaming_complete(span, &super_caller) + metadata = extract_metadata + input_messages = build_input_messages + Support::OTel.set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any? + + messages_before_count = messages&.length || 0 + + begin + response = nil + time_to_first_token = Braintrust::Internal::Time.measure do + response = super_caller.call + end + + capture_non_streaming_output(span, response, messages_before_count, time_to_first_token) + Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) + + response + rescue => e + span.record_exception(e) + span.status = ::OpenTelemetry::Trace::Status.error("RubyLLM error: #{e.message}") + raise + end + end + + # Extract metadata from chat instance (provider, model, tools, stream flag) + def extract_metadata(stream: false) + metadata = {"provider" => "ruby_llm"} + metadata["stream"] = true if stream + + # Extract model + if respond_to?(:model) && model + model_id = model.respond_to?(:id) ? model.id : model.to_s + metadata["model"] = model_id + end + + # Extract tools (only for non-streaming) + if !stream && respond_to?(:tools) && tools&.any? + metadata["tools"] = extract_tools_metadata + end + + metadata + end + + # Extract tools metadata from chat instance + def extract_tools_metadata + provider = instance_variable_get(:@provider) if instance_variable_defined?(:@provider) + + tools.map do |_name, tool| + format_tool_schema(tool, provider) + end + end + + # Format a tool into OpenAI-compatible schema + def format_tool_schema(tool, provider) + tool_schema = nil + + # Use provider-specific tool_for method if available + if provider + begin + tool_schema = if provider.is_a?(::RubyLLM::Providers::OpenAI) + ::RubyLLM::Providers::OpenAI::Tools.tool_for(tool) + elsif defined?(::RubyLLM::Providers::Anthropic) && provider.is_a?(::RubyLLM::Providers::Anthropic) + ::RubyLLM::Providers::Anthropic::Tools.tool_for(tool) + elsif tool.respond_to?(:params_schema) && tool.params_schema + build_basic_tool_schema(tool) + else + build_minimal_tool_schema(tool) + end + rescue NameError, ArgumentError => e + Braintrust::Log.debug("Failed to extract tool schema using provider-specific method: #{e.class.name}: #{e.message}") + tool_schema = (tool.respond_to?(:params_schema) && tool.params_schema) ? build_basic_tool_schema(tool) : build_minimal_tool_schema(tool) + end + else + tool_schema = (tool.respond_to?(:params_schema) && tool.params_schema) ? build_basic_tool_schema(tool) : build_minimal_tool_schema(tool) + end + + # Strip RubyLLM-specific fields to match native OpenAI format + function_key = tool_schema&.key?(:function) ? :function : "function" + if tool_schema && tool_schema[function_key] + tool_params = tool_schema[function_key][:parameters] || tool_schema[function_key]["parameters"] + if tool_params.is_a?(Hash) + tool_params = tool_params.dup if tool_params.frozen? + tool_params.delete("strict") + tool_params.delete(:strict) + tool_params.delete("additionalProperties") + tool_params.delete(:additionalProperties) + params_key = tool_schema[function_key].key?(:parameters) ? :parameters : "parameters" + tool_schema[function_key][params_key] = tool_params + end + end + + tool_schema + end + + # Build a basic tool schema with parameters + def build_basic_tool_schema(tool) + { + "type" => "function", + "function" => { + "name" => tool.name.to_s, + "description" => tool.description, + "parameters" => tool.params_schema + } + } + end + + # Build a minimal tool schema without parameters + def build_minimal_tool_schema(tool) + { + "type" => "function", + "function" => { + "name" => tool.name.to_s, + "description" => tool.description, + "parameters" => {} + } + } + end + + # Build input messages array from chat history + def build_input_messages + return [] unless respond_to?(:messages) && messages&.any? + + messages.map { |m| format_message_for_input(m) } + end + + # Format a RubyLLM message to OpenAI-compatible format + def format_message_for_input(msg) + formatted = {"role" => msg.role.to_s} + + # Handle content + if msg.respond_to?(:content) && msg.content + raw_content = msg.content + + # Check if content is a Content object with attachments (issue #71) + formatted["content"] = if raw_content.respond_to?(:text) && raw_content.respond_to?(:attachments) && raw_content.attachments&.any? + format_multipart_content(raw_content) + else + format_simple_content(raw_content, msg.role.to_s) + end + end + + # Handle tool_calls for assistant messages + if msg.respond_to?(:tool_calls) && msg.tool_calls&.any? + formatted["tool_calls"] = format_tool_calls(msg.tool_calls) + formatted["content"] = nil + end + + # Handle tool_call_id for tool result messages + if msg.respond_to?(:tool_call_id) && msg.tool_call_id + formatted["tool_call_id"] = msg.tool_call_id + end + + formatted + end + + # Format multipart content with text and attachments + # @param content_obj [Object] Content object with text and attachments + # @return [Array] array of content parts + def format_multipart_content(content_obj) + content_parts = [] + + # Add text part + content_parts << {"type" => "text", "text" => content_obj.text} if content_obj.text + + # Add attachment parts (convert to Braintrust format) + content_obj.attachments.each do |attachment| + content_parts << format_attachment_for_input(attachment) + end + + content_parts + end + + # Format simple text content + # @param raw_content [Object] String or Content object with text + # @param role [String] the message role + # @return [String] formatted text content + def format_simple_content(raw_content, role) + content = raw_content + content = content.text if content.respond_to?(:text) + + # Convert Ruby hash string to JSON for tool results + if role == "tool" && content.is_a?(String) && content.start_with?("{:") + begin + content = content.gsub(/(?<=\{|, ):(\w+)=>/, '"\1":').gsub("=>", ":") + rescue + # Keep original if conversion fails + end + end + + content + end + + # Format a RubyLLM attachment to OpenAI-compatible format + # @param attachment [Object] the RubyLLM attachment + # @return [Hash] OpenAI image_url format for consistency with other integrations + def format_attachment_for_input(attachment) + # RubyLLM Attachment has: source (Pathname), filename, mime_type + if attachment.respond_to?(:source) && attachment.source + begin + data = File.binread(attachment.source.to_s) + encoded = Braintrust::Internal::Encoding::Base64.strict_encode64(data) + mime_type = attachment.respond_to?(:mime_type) ? attachment.mime_type : "application/octet-stream" + + # Use OpenAI's image_url format for consistency + { + "type" => "image_url", + "image_url" => { + "url" => "data:#{mime_type};base64,#{encoded}" + } + } + rescue => e + Braintrust::Log.debug("Failed to read attachment file: #{e.message}") + # Return a placeholder if we can't read the file + {"type" => "text", "text" => "[attachment: #{attachment.respond_to?(:filename) ? attachment.filename : "unknown"}]"} + end + elsif attachment.respond_to?(:to_h) + # Try to use attachment's own serialization + attachment.to_h + else + {"type" => "text", "text" => "[attachment]"} + end + end + + # Format tool calls into OpenAI format + def format_tool_calls(tool_calls) + tool_calls.map do |_id, tc| + args = tc.arguments + args_string = args.is_a?(String) ? args : JSON.generate(args) + + { + "id" => tc.id, + "type" => "function", + "function" => { + "name" => tc.name, + "arguments" => args_string + } + } + end + end + + # Capture streaming output and metrics + def capture_streaming_output(span, aggregated_chunks, result, time_to_first_token) + return if aggregated_chunks.empty? + + # Aggregate content from chunks + # Extract text from Content objects if present (issue #71) + aggregated_content = aggregated_chunks.map { |c| + content = c.respond_to?(:content) ? c.content : c.to_s + content = content.text if content.respond_to?(:text) + content + }.join + + output = [{ + role: "assistant", + content: aggregated_content + }] + Support::OTel.set_json_attr(span, "braintrust.output_json", output) + + # Set metrics (token usage + time_to_first_token) + # RubyLLM stores tokens directly in the response hash, not in a nested usage object + metrics = {} + if result.respond_to?(:to_h) + result_hash = result.to_h + usage = { + "input_tokens" => result_hash[:input_tokens], + "output_tokens" => result_hash[:output_tokens], + "cached_tokens" => result_hash[:cached_tokens], + "cache_creation_tokens" => result_hash[:cache_creation_tokens] + }.compact + + unless usage.empty? + metrics = Common.parse_usage_tokens(usage) + end + end + metrics["time_to_first_token"] = time_to_first_token if time_to_first_token + Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? + end + + # Capture non-streaming output and metrics + def capture_non_streaming_output(span, response, messages_before_count, time_to_first_token) + return unless response + + message = { + "role" => "assistant", + "content" => nil + } + + # Add content if it's a simple text response + # Extract text from Content objects if present (issue #71) + if response.respond_to?(:content) && response.content && !response.content.empty? + content = response.content + content = content.text if content.respond_to?(:text) + message["content"] = content + end + + # Check if there are tool calls in the messages history + if respond_to?(:messages) && messages + assistant_msg = messages[messages_before_count..]&.find { |m| + m.role.to_s == "assistant" && m.respond_to?(:tool_calls) && m.tool_calls&.any? + } + + if assistant_msg&.tool_calls&.any? + message["tool_calls"] = format_tool_calls(assistant_msg.tool_calls) + message["content"] = nil + end + end + + output = [{ + "index" => 0, + "message" => message, + "finish_reason" => message["tool_calls"] ? "tool_calls" : "stop" + }] + + Support::OTel.set_json_attr(span, "braintrust.output_json", output) + + # Set metrics (token usage + time_to_first_token) + metrics = {} + if response.respond_to?(:to_h) + response_hash = response.to_h + usage = { + "input_tokens" => response_hash[:input_tokens], + "output_tokens" => response_hash[:output_tokens], + "cached_tokens" => response_hash[:cached_tokens], + "cache_creation_tokens" => response_hash[:cache_creation_tokens] + }.compact + + unless usage.empty? + metrics = Common.parse_usage_tokens(usage) + end + end + metrics["time_to_first_token"] = time_to_first_token if time_to_first_token + Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? + end + end + end + end + end + end +end diff --git a/lib/braintrust/contrib/ruby_llm/instrumentation/common.rb b/lib/braintrust/contrib/ruby_llm/instrumentation/common.rb new file mode 100644 index 0000000..e1df858 --- /dev/null +++ b/lib/braintrust/contrib/ruby_llm/instrumentation/common.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + module RubyLLM + module Instrumentation + # Common utilities for RubyLLM instrumentation. + module Common + # Parse RubyLLM usage tokens into normalized Braintrust metrics. + # RubyLLM normalizes token fields from all providers (OpenAI, Anthropic, etc.) + # into a consistent format: + # - input_tokens: prompt tokens sent + # - output_tokens: completion tokens received + # - cached_tokens: tokens read from cache + # - cache_creation_tokens: tokens written to cache + # + # @param usage [Hash, Object] usage object from RubyLLM response + # @return [Hash] normalized metrics for Braintrust + def self.parse_usage_tokens(usage) + metrics = {} + return metrics unless usage + + usage_hash = usage.respond_to?(:to_h) ? usage.to_h : usage + return metrics unless usage_hash.is_a?(Hash) + + # RubyLLM normalized field mappings → Braintrust metrics + field_map = { + "input_tokens" => "prompt_tokens", + "output_tokens" => "completion_tokens", + "cached_tokens" => "prompt_cached_tokens", + "cache_creation_tokens" => "prompt_cache_creation_tokens" + } + + usage_hash.each do |key, value| + next unless value.is_a?(Numeric) + key_str = key.to_s + target = field_map[key_str] + metrics[target] = value.to_i if target + end + + # Accumulate cache tokens into prompt_tokens (matching TS/Python SDKs) + prompt_tokens = (metrics["prompt_tokens"] || 0) + + (metrics["prompt_cached_tokens"] || 0) + + (metrics["prompt_cache_creation_tokens"] || 0) + metrics["prompt_tokens"] = prompt_tokens if prompt_tokens > 0 + + # Calculate total + if metrics.key?("prompt_tokens") && metrics.key?("completion_tokens") + metrics["tokens"] = metrics["prompt_tokens"] + metrics["completion_tokens"] + end + + metrics + end + end + end + end + end +end diff --git a/lib/braintrust/contrib/ruby_llm/integration.rb b/lib/braintrust/contrib/ruby_llm/integration.rb new file mode 100644 index 0000000..69561b5 --- /dev/null +++ b/lib/braintrust/contrib/ruby_llm/integration.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +require_relative "../integration" +require_relative "deprecated" + +module Braintrust + module Contrib + module RubyLLM + # RubyLLM integration for automatic instrumentation. + # Instruments the crmne/ruby_llm gem. + class Integration + include Braintrust::Contrib::Integration + + MINIMUM_VERSION = "1.8.0" + + GEM_NAMES = ["ruby_llm"].freeze + REQUIRE_PATHS = ["ruby_llm"].freeze + + # @return [Symbol] Unique identifier for this integration + def self.integration_name + :ruby_llm + end + + # @return [Array] Gem names this integration supports + def self.gem_names + GEM_NAMES + end + + # @return [Array] Require paths for auto-instrument detection + def self.require_paths + REQUIRE_PATHS + end + + # @return [String] Minimum compatible version + def self.minimum_version + MINIMUM_VERSION + end + + # @return [Boolean] true if ruby_llm gem is available + def self.loaded? + defined?(::RubyLLM::Chat) ? true : false + end + + # Lazy-load the patcher only when actually patching. + # This keeps the integration stub lightweight. + # @return [Array] The patcher classes + def self.patchers + require_relative "patcher" + [ChatPatcher] + end + end + end + end +end diff --git a/lib/braintrust/contrib/ruby_llm/patcher.rb b/lib/braintrust/contrib/ruby_llm/patcher.rb new file mode 100644 index 0000000..a4fde88 --- /dev/null +++ b/lib/braintrust/contrib/ruby_llm/patcher.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +require_relative "../patcher" +require_relative "instrumentation/chat" + +module Braintrust + module Contrib + module RubyLLM + # Patcher for RubyLLM chat completions. + # Instruments RubyLLM::Chat#complete and #execute_tool methods. + class ChatPatcher < Braintrust::Contrib::Patcher + class << self + def applicable? + defined?(::RubyLLM::Chat) + end + + def patched?(**options) + target_class = options[:target]&.singleton_class || ::RubyLLM::Chat + Instrumentation::Chat.applied?(target_class) + end + + # Perform the actual patching. + # @param options [Hash] Configuration options passed from integration + # @option options [Object] :target Optional target instance to patch + # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider + # @return [void] + def perform_patch(**options) + return unless applicable? + + if options[:target] + # Instance-level (for only this chat instance) + raise ArgumentError, "target must be a kind of ::RubyLLM::Chat" unless options[:target].is_a?(::RubyLLM::Chat) + + options[:target].singleton_class.include(Instrumentation::Chat) + else + # Class-level (for all chat instances) + ::RubyLLM::Chat.include(Instrumentation::Chat) + end + end + end + end + end + end +end diff --git a/lib/braintrust/trace.rb b/lib/braintrust/trace.rb index 6bf9434..b87d804 100644 --- a/lib/braintrust/trace.rb +++ b/lib/braintrust/trace.rb @@ -14,23 +14,6 @@ # Anthropic gem not installed - integration will not be available end -# RubyLLM integration is optional - automatically loaded if ruby_llm gem is available -# -# Usage: -# # Wrap the class once (affects all instances): -# Braintrust::Trace::RubyLLM.wrap -# -# # Or wrap a specific instance: -# chat = RubyLLM.chat(model: "gpt-4o-mini") -# Braintrust::Trace::RubyLLM.wrap(chat) -# -begin - require "ruby_llm" - require_relative "trace/contrib/github.com/crmne/ruby_llm" -rescue LoadError - # RubyLLM gem not installed - integration will not be available -end - module Braintrust module Trace # Set up OpenTelemetry tracing with Braintrust diff --git a/lib/braintrust/trace/contrib/github.com/crmne/ruby_llm.rb b/lib/braintrust/trace/contrib/github.com/crmne/ruby_llm.rb deleted file mode 100644 index 4d3db6e..0000000 --- a/lib/braintrust/trace/contrib/github.com/crmne/ruby_llm.rb +++ /dev/null @@ -1,631 +0,0 @@ -# frozen_string_literal: true - -require "opentelemetry/sdk" -require "json" -require_relative "../../../tokens" -require_relative "../../../../logger" -require_relative "../../../../internal/encoding" - -module Braintrust - module Trace - module Contrib - module Github - module Crmne - module RubyLLM - # Helper to safely set a JSON attribute on a span - # Only sets the attribute if obj is present - # @param span [OpenTelemetry::Trace::Span] the span to set attribute on - # @param attr_name [String] the attribute name (e.g., "braintrust.output_json") - # @param obj [Object] the object to serialize to JSON - # @return [void] - def self.set_json_attr(span, attr_name, obj) - return unless obj - span.set_attribute(attr_name, JSON.generate(obj)) - rescue => e - Log.debug("Failed to serialize #{attr_name}: #{e.message}") - end - - # Parse usage tokens from RubyLLM response - # RubyLLM uses Anthropic-style field naming (input_tokens, output_tokens) - # @param usage [Hash, Object] usage object from RubyLLM response - # @return [Hash] metrics hash with normalized names - def self.parse_usage_tokens(usage) - Braintrust::Trace.parse_anthropic_usage_tokens(usage) - end - - # Wrap RubyLLM to automatically create spans for chat requests - # Supports both synchronous and streaming requests - # - # Usage: - # # Wrap the class once (affects all future instances): - # Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap - # - # # Or wrap a specific instance: - # chat = RubyLLM.chat(model: "gpt-4o-mini") - # Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(chat) - # - # @param chat [RubyLLM::Chat, nil] the RubyLLM chat instance to wrap (if nil, wraps the class) - # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider (defaults to global) - def self.wrap(chat = nil, tracer_provider: nil) - tracer_provider ||= ::OpenTelemetry.tracer_provider - - # If no chat instance provided, wrap the class globally via initialize hook - if chat.nil? - return if defined?(::RubyLLM::Chat) && ::RubyLLM::Chat.instance_variable_defined?(:@braintrust_wrapper_module) - - # Create module that wraps initialize to auto-wrap each new instance - wrapper_module = Module.new do - define_method(:initialize) do |*args, **kwargs, &block| - super(*args, **kwargs, &block) - # Auto-wrap this instance during initialization - Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(self, tracer_provider: tracer_provider) - self - end - end - - # Store reference to wrapper module for cleanup - ::RubyLLM::Chat.instance_variable_set(:@braintrust_wrapper_module, wrapper_module) - ::RubyLLM::Chat.prepend(wrapper_module) - return nil - end - - # Check if already wrapped to make this idempotent - return chat if chat.instance_variable_get(:@braintrust_wrapped) - - # Create a wrapper module that intercepts chat.complete - wrapper = create_wrapper_module(tracer_provider) - - # Mark as wrapped and prepend the wrapper to the chat instance - chat.instance_variable_set(:@braintrust_wrapped, true) - chat.singleton_class.prepend(wrapper) - - # Register tool callbacks for tool span creation - register_tool_callbacks(chat, tracer_provider) - - chat - end - - # Register callbacks for tool execution tracing - # @param chat [RubyLLM::Chat] the chat instance - # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider - def self.register_tool_callbacks(chat, tracer_provider) - tracer = tracer_provider.tracer("braintrust") - - # Track tool spans by tool_call_id - tool_spans = {} - - # Start tool span when tool is called - chat.on_tool_call do |tool_call| - span = tracer.start_span("ruby_llm.tool.#{tool_call.name}") - set_json_attr(span, "braintrust.span_attributes", {type: "tool"}) - span.set_attribute("tool.name", tool_call.name) - span.set_attribute("tool.call_id", tool_call.id) - - # Store tool input - input = { - "name" => tool_call.name, - "arguments" => tool_call.arguments - } - set_json_attr(span, "braintrust.input_json", input) - - tool_spans[tool_call.id] = span - end - - # End tool span when result is received - chat.on_tool_result do |result| - # Find the most recent tool span (RubyLLM doesn't pass tool_call_id to on_tool_result) - # The spans are processed in order, so we can use the first unfinished one - tool_call_id, span = tool_spans.find { |_id, s| s } - if span - # Store tool output - set_json_attr(span, "braintrust.output_json", result) - span.finish - tool_spans.delete(tool_call_id) - end - end - end - - # Unwrap RubyLLM to remove Braintrust tracing - # For class-level unwrapping, removes the initialize override from the wrapper module - # For instance-level unwrapping, clears the wrapped flag - # - # @param chat [RubyLLM::Chat, nil] the RubyLLM chat instance to unwrap (if nil, unwraps the class) - def self.unwrap(chat = nil) - # If no chat instance provided, unwrap the class globally - if chat.nil? - if defined?(::RubyLLM::Chat) && ::RubyLLM::Chat.instance_variable_defined?(:@braintrust_wrapper_module) - wrapper_module = ::RubyLLM::Chat.instance_variable_get(:@braintrust_wrapper_module) - # Redefine initialize to just call super (disables auto-wrapping) - # We can't actually remove a prepended module, so we make it a no-op - wrapper_module.module_eval do - define_method(:initialize) do |*args, **kwargs, &block| - super(*args, **kwargs, &block) - end - end - ::RubyLLM::Chat.remove_instance_variable(:@braintrust_wrapper_module) - end - return nil - end - - # Unwrap instance - chat.remove_instance_variable(:@braintrust_wrapped) if chat.instance_variable_defined?(:@braintrust_wrapped) - chat - end - - # Wrap the RubyLLM::Chat class globally - # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider - def self.wrap_class(tracer_provider) - return unless defined?(::RubyLLM::Chat) - - wrapper = create_wrapper_module(tracer_provider) - ::RubyLLM::Chat.prepend(wrapper) - end - - # Create the wrapper module that intercepts chat.complete - # We wrap complete() instead of ask() because: - # - ask() internally calls complete() for the actual API call - # - ActiveRecord integration (acts_as_chat) calls complete() directly - # - This ensures all LLM calls are traced regardless of entry point - # - # Important: RubyLLM's complete() calls itself recursively for tool execution. - # We only create a span for the outermost call to avoid duplicate spans. - # Tool execution is traced separately via on_tool_call/on_tool_result callbacks. - # - # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider - # @return [Module] the wrapper module - def self.create_wrapper_module(tracer_provider) - Module.new do - define_method(:complete) do |&block| - # Check if we're already inside a traced complete() call - # If so, just call super without creating a new span - if @braintrust_in_complete - if block - return super(&block) - else - return super() - end - end - - tracer = tracer_provider.tracer("braintrust") - - # Mark that we're inside a complete() call - @braintrust_in_complete = true - - begin - if block - # Handle streaming request - wrapped_block = proc do |chunk| - block.call(chunk) - end - Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.handle_streaming_complete(self, tracer, block) do |aggregated_chunks| - super(&proc do |chunk| - aggregated_chunks << chunk - wrapped_block.call(chunk) - end) - end - else - # Handle non-streaming request - Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.handle_non_streaming_complete(self, tracer) do - super() - end - end - ensure - @braintrust_in_complete = false - end - end - end - end - - # Handle streaming complete request with tracing - # @param chat [RubyLLM::Chat] the chat instance - # @param tracer [OpenTelemetry::Trace::Tracer] the tracer - # @param block [Proc] the streaming block - def self.handle_streaming_complete(chat, tracer, block) - # Start span immediately for accurate timing - span = tracer.start_span("ruby_llm.chat") - - aggregated_chunks = [] - - # Extract metadata and build input messages - # For complete(), messages are already in chat history (no prompt param) - metadata = extract_metadata(chat, stream: true) - input_messages = build_input_messages(chat, nil) - - # Set input and metadata - set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any? - set_json_attr(span, "braintrust.metadata", metadata) - - # Call original method, passing aggregated_chunks to the block - begin - result = yield aggregated_chunks - rescue => e - span.record_exception(e) - span.status = ::OpenTelemetry::Trace::Status.error("RubyLLM error: #{e.message}") - span.finish - raise - end - - # Set output and metrics from aggregated chunks - capture_streaming_output(span, aggregated_chunks, result) - span.finish - result - end - - # Handle non-streaming complete request with tracing - # @param chat [RubyLLM::Chat] the chat instance - # @param tracer [OpenTelemetry::Trace::Tracer] the tracer - def self.handle_non_streaming_complete(chat, tracer) - # Start span immediately for accurate timing - span = tracer.start_span("ruby_llm.chat") - - begin - # Extract metadata and build input messages - # For complete(), messages are already in chat history (no prompt param) - metadata = extract_metadata(chat) - input_messages = build_input_messages(chat, nil) - set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any? - - # Remember message count before the call (for tool call detection) - messages_before_count = (chat.respond_to?(:messages) && chat.messages) ? chat.messages.length : 0 - - # Call the original method - response = yield - - # Capture output and metrics - capture_non_streaming_output(span, chat, response, messages_before_count) - - # Set metadata - set_json_attr(span, "braintrust.metadata", metadata) - - response - ensure - span.finish - end - end - - # Extract metadata from chat instance (provider, model, tools, stream flag) - # @param chat [RubyLLM::Chat] the chat instance - # @param stream [Boolean] whether this is a streaming request - # @return [Hash] metadata hash - def self.extract_metadata(chat, stream: false) - metadata = {"provider" => "ruby_llm"} - metadata["stream"] = true if stream - - # Extract model - if chat.respond_to?(:model) && chat.model - model = chat.model.respond_to?(:id) ? chat.model.id : chat.model.to_s - metadata["model"] = model - end - - # Extract tools (only for non-streaming) - if !stream && chat.respond_to?(:tools) && chat.tools&.any? - metadata["tools"] = extract_tools_metadata(chat) - end - - metadata - end - - # Extract tools metadata from chat instance - # @param chat [RubyLLM::Chat] the chat instance - # @return [Array] array of tool schemas - def self.extract_tools_metadata(chat) - provider = chat.instance_variable_get(:@provider) if chat.instance_variable_defined?(:@provider) - - chat.tools.map do |_name, tool| - format_tool_schema(tool, provider) - end - end - - # Format a tool into OpenAI-compatible schema - # @param tool [Object] the tool object - # @param provider [Object, nil] the provider instance - # @return [Hash] tool schema - def self.format_tool_schema(tool, provider) - tool_schema = nil - - # Use provider-specific tool_for method if available - if provider - begin - tool_schema = if provider.is_a?(::RubyLLM::Providers::OpenAI) - ::RubyLLM::Providers::OpenAI::Tools.tool_for(tool) - elsif defined?(::RubyLLM::Providers::Anthropic) && provider.is_a?(::RubyLLM::Providers::Anthropic) - ::RubyLLM::Providers::Anthropic::Tools.tool_for(tool) - elsif tool.respond_to?(:params_schema) && tool.params_schema - build_basic_tool_schema(tool) - else - build_minimal_tool_schema(tool) - end - rescue NameError, ArgumentError => e - # If provider-specific tool_for fails, fall back to basic format - Log.debug("Failed to extract tool schema using provider-specific method: #{e.class.name}: #{e.message}") - tool_schema = (tool.respond_to?(:params_schema) && tool.params_schema) ? build_basic_tool_schema(tool) : build_minimal_tool_schema(tool) - end - else - # No provider, use basic format with params_schema if available - tool_schema = (tool.respond_to?(:params_schema) && tool.params_schema) ? build_basic_tool_schema(tool) : build_minimal_tool_schema(tool) - end - - # Strip RubyLLM-specific fields to match native OpenAI format - # Handle both symbol and string keys - function_key = tool_schema&.key?(:function) ? :function : "function" - if tool_schema && tool_schema[function_key] - tool_params = tool_schema[function_key][:parameters] || tool_schema[function_key]["parameters"] - if tool_params.is_a?(Hash) - # Create a mutable copy if the hash is frozen - tool_params = tool_params.dup if tool_params.frozen? - tool_params.delete("strict") - tool_params.delete(:strict) - tool_params.delete("additionalProperties") - tool_params.delete(:additionalProperties) - # Assign the modified copy back - params_key = tool_schema[function_key].key?(:parameters) ? :parameters : "parameters" - tool_schema[function_key][params_key] = tool_params - end - end - - tool_schema - end - - # Build a basic tool schema with parameters - # @param tool [Object] the tool object - # @return [Hash] tool schema - def self.build_basic_tool_schema(tool) - { - "type" => "function", - "function" => { - "name" => tool.name.to_s, - "description" => tool.description, - "parameters" => tool.params_schema - } - } - end - - # Build a minimal tool schema without parameters - # @param tool [Object] the tool object - # @return [Hash] tool schema - def self.build_minimal_tool_schema(tool) - { - "type" => "function", - "function" => { - "name" => tool.name.to_s, - "description" => tool.description, - "parameters" => {} - } - } - end - - # Build input messages array from chat history and prompt - # Formats messages to match OpenAI's message format - # @param chat [RubyLLM::Chat] the chat instance - # @param prompt [String, nil] the user prompt - # @return [Array] array of message hashes - def self.build_input_messages(chat, prompt) - input_messages = [] - - # Add conversation history, formatting each message to OpenAI format - if chat.respond_to?(:messages) && chat.messages&.any? - input_messages = chat.messages.map { |m| format_message_for_input(m) } - end - - # Add current prompt - input_messages << {"role" => "user", "content" => prompt} if prompt - - input_messages - end - - # Format a RubyLLM message to OpenAI-compatible format - # @param msg [Object] the RubyLLM message - # @return [Hash] OpenAI-formatted message - def self.format_message_for_input(msg) - formatted = { - "role" => msg.role.to_s - } - - # Handle content - if msg.respond_to?(:content) && msg.content - raw_content = msg.content - - # Check if content is a Content object with attachments (issue #71) - formatted["content"] = if raw_content.respond_to?(:text) && raw_content.respond_to?(:attachments) && raw_content.attachments&.any? - format_multipart_content(raw_content) - else - format_simple_content(raw_content, msg.role.to_s) - end - end - - # Handle tool_calls for assistant messages - if msg.respond_to?(:tool_calls) && msg.tool_calls&.any? - formatted["tool_calls"] = format_tool_calls(msg.tool_calls) - formatted["content"] = nil - end - - # Handle tool_call_id for tool result messages - if msg.respond_to?(:tool_call_id) && msg.tool_call_id - formatted["tool_call_id"] = msg.tool_call_id - end - - formatted - end - - # Format multipart content with text and attachments - # @param content_obj [Object] Content object with text and attachments - # @return [Array] array of content parts - def self.format_multipart_content(content_obj) - content_parts = [] - - # Add text part - content_parts << {"type" => "text", "text" => content_obj.text} if content_obj.text - - # Add attachment parts (convert to Braintrust format) - content_obj.attachments.each do |attachment| - content_parts << format_attachment_for_input(attachment) - end - - content_parts - end - - # Format simple text content - # @param raw_content [Object] String or Content object with text - # @param role [String] the message role - # @return [String] formatted text content - def self.format_simple_content(raw_content, role) - content = raw_content - content = content.text if content.respond_to?(:text) - - # Convert Ruby hash string to JSON for tool results - if role == "tool" && content.is_a?(String) && content.start_with?("{:") - begin - content = content.gsub(/(?<=\{|, ):(\w+)=>/, '"\1":').gsub("=>", ":") - rescue - # Keep original if conversion fails - end - end - - content - end - - # Format a RubyLLM attachment to OpenAI-compatible format - # @param attachment [Object] the RubyLLM attachment - # @return [Hash] OpenAI image_url format for consistency with other integrations - def self.format_attachment_for_input(attachment) - # RubyLLM Attachment has: source (Pathname), filename, mime_type - if attachment.respond_to?(:source) && attachment.source - begin - data = File.binread(attachment.source.to_s) - encoded = Internal::Encoding::Base64.strict_encode64(data) - mime_type = attachment.respond_to?(:mime_type) ? attachment.mime_type : "application/octet-stream" - - # Use OpenAI's image_url format for consistency - { - "type" => "image_url", - "image_url" => { - "url" => "data:#{mime_type};base64,#{encoded}" - } - } - rescue => e - Log.debug("Failed to read attachment file: #{e.message}") - # Return a placeholder if we can't read the file - {"type" => "text", "text" => "[attachment: #{attachment.respond_to?(:filename) ? attachment.filename : "unknown"}]"} - end - elsif attachment.respond_to?(:to_h) - # Try to use attachment's own serialization - attachment.to_h - else - {"type" => "text", "text" => "[attachment]"} - end - end - - # Capture streaming output and metrics - # @param span [OpenTelemetry::Trace::Span] the span - # @param aggregated_chunks [Array] the aggregated chunks - # @param result [Object] the result object - def self.capture_streaming_output(span, aggregated_chunks, result) - return if aggregated_chunks.empty? - - # Aggregate content from chunks - # Extract text from Content objects if present (issue #71) - aggregated_content = aggregated_chunks.map { |c| - content = c.respond_to?(:content) ? c.content : c.to_s - content = content.text if content.respond_to?(:text) - content - }.join - - output = [{ - role: "assistant", - content: aggregated_content - }] - set_json_attr(span, "braintrust.output_json", output) - - # Try to extract usage from the result - if result.respond_to?(:usage) && result.usage - metrics = parse_usage_tokens(result.usage) - set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? - end - end - - # Capture non-streaming output and metrics - # @param span [OpenTelemetry::Trace::Span] the span - # @param chat [RubyLLM::Chat] the chat instance - # @param response [Object] the response object - # @param messages_before_count [Integer] message count before the call - def self.capture_non_streaming_output(span, chat, response, messages_before_count) - return unless response - - # Build message object from response - message = { - "role" => "assistant", - "content" => nil - } - - # Add content if it's a simple text response - # Extract text from Content objects if present (issue #71) - if response.respond_to?(:content) && response.content && !response.content.empty? - content = response.content - content = content.text if content.respond_to?(:text) - message["content"] = content - end - - # Check if there are tool calls in the messages history - # Look at messages added during this complete() call - if chat.respond_to?(:messages) && chat.messages - assistant_msg = chat.messages[messages_before_count..].find { |m| - m.role.to_s == "assistant" && m.respond_to?(:tool_calls) && m.tool_calls&.any? - } - - if assistant_msg&.tool_calls&.any? - message["tool_calls"] = format_tool_calls(assistant_msg.tool_calls) - message["content"] = nil - end - end - - # Format as OpenAI choices[] structure - output = [{ - "index" => 0, - "message" => message, - "finish_reason" => message["tool_calls"] ? "tool_calls" : "stop" - }] - - set_json_attr(span, "braintrust.output_json", output) - - # Set metrics (token usage) - if response.respond_to?(:to_h) - response_hash = response.to_h - usage = { - "input_tokens" => response_hash[:input_tokens], - "output_tokens" => response_hash[:output_tokens], - "cached_tokens" => response_hash[:cached_tokens], - "cache_creation_tokens" => response_hash[:cache_creation_tokens] - }.compact - - unless usage.empty? - metrics = parse_usage_tokens(usage) - set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? - end - end - end - - # Format tool calls into OpenAI format - # @param tool_calls [Hash, Array] the tool calls - # @return [Array] formatted tool calls - def self.format_tool_calls(tool_calls) - tool_calls.map do |_id, tc| - # Ensure arguments is a JSON string (OpenAI format) - args = tc.arguments - args_string = args.is_a?(String) ? args : JSON.generate(args) - - { - "id" => tc.id, - "type" => "function", - "function" => { - "name" => tc.name, - "arguments" => args_string - } - } - end - end - end - end - end - end - end -end diff --git a/test/braintrust/contrib/ruby_llm/deprecated_test.rb b/test/braintrust/contrib/ruby_llm/deprecated_test.rb new file mode 100644 index 0000000..0abf3af --- /dev/null +++ b/test/braintrust/contrib/ruby_llm/deprecated_test.rb @@ -0,0 +1,134 @@ +# frozen_string_literal: true + +require "test_helper" +require "braintrust/contrib/ruby_llm/deprecated" + +class Braintrust::Contrib::RubyLLM::DeprecatedTest < Minitest::Test + # --- .wrap --- + + def test_wrap_delegates_to_instrument + mock_chat = Object.new + mock_tracer = Object.new + + captured_args = nil + Braintrust.stub(:instrument!, ->(name, **opts) { captured_args = [name, opts] }) do + suppress_logs { Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(mock_chat, tracer_provider: mock_tracer) } + end + + assert_equal :ruby_llm, captured_args[0] + assert_same mock_chat, captured_args[1][:target] + assert_same mock_tracer, captured_args[1][:tracer_provider] + end + + def test_wrap_with_nil_target_instruments_class + captured_args = nil + Braintrust.stub(:instrument!, ->(name, **opts) { captured_args = [name, opts] }) do + suppress_logs { Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap } + end + + assert_equal :ruby_llm, captured_args[0] + assert_nil captured_args[1][:target] + end + + def test_wrap_logs_deprecation_warning + mock_chat = Object.new + + warning_message = nil + Braintrust.stub(:instrument!, ->(*) {}) do + Braintrust::Log.stub(:warn, ->(msg) { warning_message = msg }) do + Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(mock_chat) + end + end + + assert_match(/deprecated/, warning_message) + assert_match(/Braintrust\.instrument!/, warning_message) + end + + def test_wrap_returns_chat + mock_chat = Object.new + + Braintrust.stub(:instrument!, ->(*) {}) do + result = suppress_logs { Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(mock_chat) } + assert_same mock_chat, result + end + end + + # --- .unwrap --- + + def test_unwrap_sets_context_enabled_false + skip "RubyLLM gem not available" unless defined?(::RubyLLM::Chat) + + mock_chat = Object.new + + suppress_logs { Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.unwrap(mock_chat) } + + ctx = Braintrust::Contrib::Context.from(mock_chat) + assert_equal false, ctx[:enabled] + end + + def test_unwrap_with_nil_target_sets_class_context + skip "RubyLLM gem not available" unless defined?(::RubyLLM::Chat) + + suppress_logs { Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.unwrap } + + ctx = Braintrust::Contrib::Context.from(::RubyLLM::Chat) + assert_equal false, ctx[:enabled] + + # Clean up + Braintrust::Contrib::Context.set!(::RubyLLM::Chat, enabled: true) + end + + def test_unwrap_logs_deprecation_warning + skip "RubyLLM gem not available" unless defined?(::RubyLLM::Chat) + + mock_chat = Object.new + + warning_message = nil + Braintrust::Log.stub(:warn, ->(msg) { warning_message = msg }) do + Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.unwrap(mock_chat) + end + + assert_match(/deprecated/, warning_message) + end + + def test_unwrap_returns_chat + skip "RubyLLM gem not available" unless defined?(::RubyLLM::Chat) + + mock_chat = Object.new + + result = suppress_logs { Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.unwrap(mock_chat) } + assert_same mock_chat, result + end + + # --- E2E: unwrap disables instrumentation --- + + def test_unwrap_disables_instrumentation_for_subsequent_requests + skip "RubyLLM gem not available" unless defined?(::RubyLLM::Chat) + + VCR.use_cassette("contrib/ruby_llm/basic_chat") do + rig = setup_otel_test_rig + + RubyLLM.configure do |config| + config.openai_api_key = get_openai_key + end + + chat = RubyLLM.chat(model: "gpt-4o-mini") + + # Instrument the chat instance + Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: rig.tracer_provider) + + # First request should create a span + chat.ask("Say 'test'") + spans_before = rig.drain + assert_equal 1, spans_before.length, "Expected 1 span before unwrap" + + # Now unwrap the chat instance + suppress_logs { Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.unwrap(chat) } + + # Second request should NOT create a span + chat.ask("Say 'test'") + spans_after = rig.drain + assert_equal 0, spans_after.length, "Expected 0 spans after unwrap" + end + end +end diff --git a/test/braintrust/trace/ruby_llm_test.rb b/test/braintrust/contrib/ruby_llm/instrumentation/chat_test.rb similarity index 59% rename from test/braintrust/trace/ruby_llm_test.rb rename to test/braintrust/contrib/ruby_llm/instrumentation/chat_test.rb index f5dc38a..c0d1f5a 100644 --- a/test/braintrust/trace/ruby_llm_test.rb +++ b/test/braintrust/contrib/ruby_llm/instrumentation/chat_test.rb @@ -1,17 +1,65 @@ # frozen_string_literal: true require "test_helper" +require_relative "../integration_helper" +require "braintrust/contrib/ruby_llm/instrumentation/chat" + +class Braintrust::Contrib::RubyLLM::Instrumentation::ChatTest < Minitest::Test + Chat = Braintrust::Contrib::RubyLLM::Instrumentation::Chat + + # --- .included --- + + def test_included_prepends_instance_methods + base = Class.new + mock = Minitest::Mock.new + mock.expect(:include?, false, [Chat::InstanceMethods]) + mock.expect(:prepend, nil, [Chat::InstanceMethods]) + + base.define_singleton_method(:ancestors) { mock } + base.define_singleton_method(:prepend) { |mod| mock.prepend(mod) } + + Chat.included(base) + + mock.verify + end + + def test_included_skips_prepend_when_already_applied + base = Class.new do + include Braintrust::Contrib::RubyLLM::Instrumentation::Chat + end + + # Should not raise or double-prepend + Chat.included(base) + + # InstanceMethods should appear only once in ancestors + count = base.ancestors.count { |a| a == Chat::InstanceMethods } + assert_equal 1, count + end + + # --- .applied? --- + + def test_applied_returns_false_when_not_included + base = Class.new + + refute Chat.applied?(base) + end + + def test_applied_returns_true_when_included + base = Class.new do + include Braintrust::Contrib::RubyLLM::Instrumentation::Chat + end + + assert Chat.applied?(base) + end +end # Define test tool classes for different ruby_llm versions -# Must be defined at module level for RubyLLM to find them properly if defined?(RubyLLM) - # Check ruby_llm version to determine which API to use RUBY_LLM_VERSION = Gem.loaded_specs["ruby_llm"]&.version || Gem::Version.new("0.0.0") SUPPORTS_PARAMS_DSL = Gem::Version.new("1.9.0") <= RUBY_LLM_VERSION - # Tool for ruby_llm 1.8 (using param - singular) - # This syntax works in 1.8+ including 1.9+ for backward compatibility - class WeatherTestToolV18 < RubyLLM::Tool + # Tool for ruby_llm 1.8+ (using param - singular) + class WeatherTestTool < RubyLLM::Tool description "Get the current weather for a location" param :location, type: :string, desc: "The city and state, e.g. San Francisco, CA" @@ -21,59 +69,37 @@ def execute(location:, unit: "fahrenheit") {location: location, temperature: 72, unit: unit, conditions: "sunny"} end end - - # Tool for ruby_llm 1.9+ (using params - plural) - # This new DSL only works in 1.9+ - if SUPPORTS_PARAMS_DSL - class WeatherTestToolV19 < RubyLLM::Tool - description "Get the current weather for a location" - - params do - string :location, description: "The city and state, e.g. San Francisco, CA" - string :unit, description: "Temperature unit (celsius or fahrenheit)" - end - - def execute(location:, unit: "fahrenheit") - {location: location, temperature: 72, unit: unit, conditions: "sunny"} - end - end - end end -class RubyLLMIntegrationTest < Minitest::Test +# E2E tests for Chat instrumentation +class Braintrust::Contrib::RubyLLM::Instrumentation::ChatE2ETest < Minitest::Test + include Braintrust::Contrib::RubyLLM::IntegrationHelper + def setup - # Skip all RubyLLM tests if the gem is not available - skip "RubyLLM gem not available" unless defined?(RubyLLM) + skip_unless_ruby_llm! end - def test_wrap_creates_span_for_basic_chat - VCR.use_cassette("ruby_llm/basic_chat") do - require "ruby_llm" + # --- #complete (non-streaming) --- - # Set up test rig (includes Braintrust processor) + def test_complete_creates_span_with_correct_attributes + VCR.use_cassette("contrib/ruby_llm/basic_chat") do rig = setup_otel_test_rig - # Configure RubyLLM (use real key for recording, fake key for playback) RubyLLM.configure do |config| config.openai_api_key = get_openai_key end - # Create chat instance and wrap it with Braintrust tracing chat = RubyLLM.chat(model: "gpt-4o-mini") - Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(chat, tracer_provider: rig.tracer_provider) + Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: rig.tracer_provider) - # Make a simple chat request response = chat.ask("Say 'test'") - # Verify response refute_nil response refute_nil response.content assert response.content.length > 0 - # Drain and verify span span = rig.drain_one - # Verify span name assert_equal "ruby_llm.chat", span.name # Verify braintrust.input_json contains messages @@ -99,38 +125,35 @@ def test_wrap_creates_span_for_basic_chat assert metrics["prompt_tokens"] > 0 assert metrics["completion_tokens"] > 0 assert metrics["tokens"] > 0 + + # Verify time_to_first_token is present and non-negative + assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric" + assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be non-negative" end end - def test_wrap_creates_span_for_streaming_chat - VCR.use_cassette("ruby_llm/streaming_chat") do - require "ruby_llm" + # --- #complete (streaming) --- - # Set up test rig + def test_streaming_complete_creates_span + VCR.use_cassette("contrib/ruby_llm/streaming_chat") do rig = setup_otel_test_rig - # Configure RubyLLM (use real key for recording, fake key for playback) RubyLLM.configure do |config| config.openai_api_key = get_openai_key end - # Create chat instance and wrap it chat = RubyLLM.chat(model: "gpt-4o-mini") - Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(chat, tracer_provider: rig.tracer_provider) + Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: rig.tracer_provider) - # Make a streaming chat request chunks = [] chat.ask("Count to 3") do |chunk| chunks << chunk end - # Verify chunks were received refute_empty chunks - # Drain and verify span span = rig.drain_one - # Verify span name (same for streaming and non-streaming) assert_equal "ruby_llm.chat", span.name # Verify input @@ -141,64 +164,76 @@ def test_wrap_creates_span_for_streaming_chat output = JSON.parse(span.attributes["braintrust.output_json"]) refute_nil output - # Verify metadata + # Verify metadata has stream flag assert span.attributes.key?("braintrust.metadata") metadata = JSON.parse(span.attributes["braintrust.metadata"]) assert_equal "ruby_llm", metadata["provider"] assert_equal true, metadata["stream"] + + # Verify metrics are present + assert span.attributes.key?("braintrust.metrics"), "Should have metrics attribute" + metrics = JSON.parse(span.attributes["braintrust.metrics"]) + + # Verify time_to_first_token + assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric" + assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be non-negative" + + # Verify token usage metrics + assert metrics["prompt_tokens"] > 0, "Should have prompt_tokens > 0" + assert metrics["completion_tokens"] > 0, "Should have completion_tokens > 0" + assert metrics["tokens"] > 0, "Should have total tokens > 0" end end - def test_wrap_creates_span_for_tool_calling - skip "Tool calling test requires ruby_llm >= 1.9" unless Gem::Version.new("1.9.0") <= RUBY_LLM_VERSION + # --- Tool calling --- - VCR.use_cassette("ruby_llm/tool_calling") do - require "ruby_llm" + def test_tool_calling_creates_nested_spans + skip "Tool calling test requires ruby_llm >= 1.9" unless defined?(SUPPORTS_PARAMS_DSL) && SUPPORTS_PARAMS_DSL - # Set up test rig + VCR.use_cassette("contrib/ruby_llm/tool_calling") do rig = setup_otel_test_rig - # Configure RubyLLM RubyLLM.configure do |config| config.openai_api_key = get_openai_key end - # Create chat instance and wrap it chat = RubyLLM.chat(model: "gpt-4o-mini") - Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(chat, tracer_provider: rig.tracer_provider) - chat.with_tool(WeatherTestToolV19) + Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: rig.tracer_provider) + chat.with_tool(WeatherTestTool) - # Make a chat request that should trigger tool usage response = chat.ask("What's the weather like in San Francisco?") - # Verify response refute_nil response refute_nil response.content - # Tool calling creates sibling spans: - # 1. ruby_llm.chat - the LLM conversation span - # 2. ruby_llm.tool.* - tool execution span(s) + # Should have multiple spans: chat + tool + possibly another chat for follow-up spans = rig.drain assert spans.length >= 2, "Expected at least 2 spans for tool calling (chat + tool)" # Find the chat span and tool span - chat_span = spans.find { |s| s.name == "ruby_llm.chat" } - tool_span = spans.find { |s| s.name.start_with?("ruby_llm.tool.") } + chat_spans = spans.select { |s| s.name == "ruby_llm.chat" } + tool_spans = spans.select { |s| s.name.start_with?("ruby_llm.tool.") } + + assert chat_spans.length >= 1, "Expected at least one ruby_llm.chat span" + assert tool_spans.length >= 1, "Expected at least one ruby_llm.tool.* span" - refute_nil chat_span, "Expected a ruby_llm.chat span" - refute_nil tool_span, "Expected a ruby_llm.tool.* span" + # Verify tool span has correct attributes + tool_span = tool_spans.first + assert tool_span.attributes.key?("braintrust.span_attributes"), "Tool span should have span_attributes" + span_attrs = JSON.parse(tool_span.attributes["braintrust.span_attributes"]) + assert_equal "tool", span_attrs["type"] + assert tool_span.attributes.key?("braintrust.input_json"), "Tool span should have input" + assert tool_span.attributes.key?("braintrust.output_json"), "Tool span should have output" - # Verify chat span has metadata with tools - assert chat_span.attributes.key?("braintrust.metadata") + # Verify chat span metadata contains exact tool schema (OpenAI format) + chat_span = chat_spans.first metadata = JSON.parse(chat_span.attributes["braintrust.metadata"]) - assert_equal "ruby_llm", metadata["provider"] - # Assert exact tool schema (OpenAI format with RubyLLM-specific fields stripped) expected_tools = [ { "type" => "function", "function" => { - "name" => "weather_test_tool_v19", + "name" => "weather_test", "description" => "Get the current weather for a location", "parameters" => { "type" => "object", @@ -218,53 +253,32 @@ def test_wrap_creates_span_for_tool_calling } ] assert_equal expected_tools, metadata["tools"] - - # Verify chat span has output - assert chat_span.attributes.key?("braintrust.output_json") - output = JSON.parse(chat_span.attributes["braintrust.output_json"]) - refute_nil output - - # Verify tool span has correct attributes - assert tool_span.attributes.key?("braintrust.span_attributes"), "Tool span should have span_attributes" - span_attrs = JSON.parse(tool_span.attributes["braintrust.span_attributes"]) - assert_equal "tool", span_attrs["type"] - assert tool_span.attributes.key?("braintrust.input_json"), "Tool span should have input" - assert tool_span.attributes.key?("braintrust.output_json"), "Tool span should have output" end end - # Test for GitHub issue #39: ActiveRecord integration calls complete() directly - # This simulates how acts_as_chat uses RubyLLM - it adds messages to chat history - # and then calls complete() directly, bypassing ask() - def test_wrap_creates_span_for_direct_complete - VCR.use_cassette("ruby_llm/direct_complete") do - require "ruby_llm" + # --- Direct complete() --- - # Set up test rig + def test_direct_complete_creates_span + VCR.use_cassette("contrib/ruby_llm/direct_complete") do rig = setup_otel_test_rig - # Configure RubyLLM RubyLLM.configure do |config| config.openai_api_key = get_openai_key end - # Create chat instance and wrap it chat = RubyLLM.chat(model: "gpt-4o-mini") - Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(chat, tracer_provider: rig.tracer_provider) + Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: rig.tracer_provider) # Simulate ActiveRecord integration: add message directly, then call complete() chat.add_message(role: :user, content: "Say 'hello'") response = chat.complete - # Verify response refute_nil response refute_nil response.content assert response.content.length > 0 - # Drain and verify span span = rig.drain_one - # Verify span name assert_equal "ruby_llm.chat", span.name # Verify braintrust.input_json contains the message we added @@ -272,200 +286,87 @@ def test_wrap_creates_span_for_direct_complete input = JSON.parse(span.attributes["braintrust.input_json"]) assert input.is_a?(Array) assert input.length > 0 - # The user message should be in the input user_msg = input.find { |m| m["role"] == "user" } refute_nil user_msg assert_includes user_msg["content"], "hello" - - # Verify braintrust.output_json contains response - assert span.attributes.key?("braintrust.output_json") - output = JSON.parse(span.attributes["braintrust.output_json"]) - refute_nil output - - # Verify braintrust.metadata contains provider and model info - assert span.attributes.key?("braintrust.metadata") - metadata = JSON.parse(span.attributes["braintrust.metadata"]) - assert_equal "ruby_llm", metadata["provider"] - assert_equal "gpt-4o-mini", metadata["model"] - - # Verify braintrust.metrics contains token usage - assert span.attributes.key?("braintrust.metrics") - metrics = JSON.parse(span.attributes["braintrust.metrics"]) - assert metrics["prompt_tokens"] > 0 - assert metrics["completion_tokens"] > 0 - assert metrics["tokens"] > 0 end end - def test_wrap_is_idempotent - VCR.use_cassette("ruby_llm/basic_chat") do - require "ruby_llm" + # --- Idempotency --- - # Set up test rig + def test_wrapping_is_idempotent + VCR.use_cassette("contrib/ruby_llm/basic_chat") do rig = setup_otel_test_rig - # Configure RubyLLM RubyLLM.configure do |config| config.openai_api_key = get_openai_key end - # Create chat instance chat = RubyLLM.chat(model: "gpt-4o-mini") - # Wrap it twice - should not cause issues - Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(chat, tracer_provider: rig.tracer_provider) - Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(chat, tracer_provider: rig.tracer_provider) + # Wrap twice - should not cause issues + Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: rig.tracer_provider) + Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: rig.tracer_provider) - # Make a chat request response = chat.ask("Say 'test'") - # Verify response refute_nil response # Drain and verify - should only have ONE span (not double-wrapped) span = rig.drain_one - # Verify span name assert_equal "ruby_llm.chat", span.name - - # Verify the wrapped flag is set - assert chat.instance_variable_get(:@braintrust_wrapped) end end - def test_wrap_module_creates_spans_for_all_instances - VCR.use_cassette("ruby_llm/basic_chat") do - require "ruby_llm" + # --- Class-level instrumentation --- - # Set up test rig + def test_class_level_instrumentation_traces_new_instances + VCR.use_cassette("contrib/ruby_llm/basic_chat") do rig = setup_otel_test_rig - # Configure RubyLLM RubyLLM.configure do |config| config.openai_api_key = get_openai_key end - # Wrap the module (not an instance) - Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(tracer_provider: rig.tracer_provider) - - # Verify the wrapper module is stored - assert ::RubyLLM::Chat.instance_variable_defined?(:@braintrust_wrapper_module) - - # Create multiple chat instances AFTER wrapping - they should all be traced - chat1 = RubyLLM.chat(model: "gpt-4o-mini") - chat2 = RubyLLM.chat(model: "gpt-4o-mini") - chat3 = RubyLLM.chat(model: "gpt-4o-mini") + # Set the default tracer provider for class-level instrumentation + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) - # Make chat requests with each instance - response1 = chat1.ask("Say 'test'") - response2 = chat2.ask("Say 'test'") - response3 = chat3.ask("Say 'test'") + # Instrument at the class level (no target) + Braintrust.instrument!(:ruby_llm) - # Verify responses - refute_nil response1 - refute_nil response2 - refute_nil response3 - - # Drain and verify we got 3 spans (one per request) - spans = rig.drain - assert_equal 3, spans.length, "Expected 3 spans (one per request)" - - # Verify all spans have the correct name and attributes - spans.each do |span| - assert_equal "ruby_llm.chat", span.name - assert span.attributes.key?("braintrust.input_json") - assert span.attributes.key?("braintrust.output_json") - assert span.attributes.key?("braintrust.metadata") - assert span.attributes.key?("braintrust.metrics") - end - - # Verify exporter is empty after drain - assert_equal 0, rig.drain.length, "Exporter should be empty after drain" - - # Now unwrap the module - Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.unwrap - - # Verify the wrapper module reference is removed - refute ::RubyLLM::Chat.instance_variable_defined?(:@braintrust_wrapper_module) + # Create a NEW chat instance AFTER class-level instrumentation + # This instance should automatically be traced without explicit instrumentation + chat = RubyLLM.chat(model: "gpt-4o-mini") - # Create new chat instances AFTER unwrapping - they should NOT be traced - chat4 = RubyLLM.chat(model: "gpt-4o-mini") - chat5 = RubyLLM.chat(model: "gpt-4o-mini") + # Make a request - should be traced automatically + response = chat.ask("Say 'test'") - # Verify they are NOT wrapped - refute chat4.instance_variable_defined?(:@braintrust_wrapped), "chat4 should not be wrapped" - refute chat5.instance_variable_defined?(:@braintrust_wrapped), "chat5 should not be wrapped" + refute_nil response + refute_nil response.content - # Make chat requests with the unwrapped instances - response4 = chat4.ask("Say 'test'") - response5 = chat5.ask("Say 'test'") + # Verify span was created (proving class-level instrumentation works) + span = rig.drain_one - # Verify responses still work (RubyLLM still functions) - refute_nil response4 - refute_nil response5 + assert_equal "ruby_llm.chat", span.name + assert span.attributes.key?("braintrust.input_json") + assert span.attributes.key?("braintrust.output_json") + assert span.attributes.key?("braintrust.metadata") - # Verify NO new spans were created (unwrap disabled tracing) - spans_after_unwrap = rig.drain - assert_equal 0, spans_after_unwrap.length, "Expected 0 spans after unwrap" + metadata = JSON.parse(span.attributes["braintrust.metadata"]) + assert_equal "ruby_llm", metadata["provider"] + assert_equal "gpt-4o-mini", metadata["model"] end end - # Test for frozen Hash bug in format_tool_schema - # This reproduces the issue where tool_params from provider's tool_for method - # returns a frozen hash, causing FrozenError when trying to delete keys - def test_format_tool_schema_handles_frozen_hash - # Create a mock tool object - mock_tool = Object.new - def mock_tool.name - "test_tool" - end - - def mock_tool.description - "A test tool" - end - - def mock_tool.params_schema - { - "type" => "object", - "properties" => {}, - "required" => [], - "additionalProperties" => false, - "strict" => true - }.freeze - end - - def mock_tool.respond_to?(method) - [:name, :description, :params_schema].include?(method) - end - - # Test with basic tool schema (no provider) - # This will use build_basic_tool_schema which creates a tool schema - # with the frozen params_schema from the tool - result = Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.format_tool_schema(mock_tool, nil) - - # Verify the result is returned (not raising FrozenError) - refute_nil result - assert_equal "function", result["type"] - assert_equal "test_tool", result["function"]["name"] - assert_equal "A test tool", result["function"]["description"] - - # Verify the parameters don't contain RubyLLM-specific fields - params = result["function"]["parameters"] - refute params.key?("strict"), "strict should be removed" - refute params.key?(:strict), "strict (symbol) should be removed" - refute params.key?("additionalProperties"), "additionalProperties should be removed" - refute params.key?(:additionalProperties), "additionalProperties (symbol) should be removed" - - # Verify the expected fields are still present - assert_equal "object", params["type"] - assert_equal({}, params["properties"]) - assert_equal [], params["required"] - end + # --- Attachment handling (GitHub issue #71) --- # Test for GitHub issue #71: Content object not properly serialized # When a message has attachments, RubyLLM returns a Content object instead of a string # The SDK should include both text and attachments in the trace def test_format_message_for_input_handles_content_object_with_attachments + skip "RubyLLM gem not available" unless defined?(::RubyLLM) + with_tmp_file(data: "This is test content") do |tmpfile| # Create a Content object with an attachment (this triggers the Content object return) content = ::RubyLLM::Content.new("Hello, this is the actual text content") @@ -478,8 +379,14 @@ def test_format_message_for_input_handles_content_object_with_attachments assert msg.content.is_a?(::RubyLLM::Content), "Precondition failed: Expected Content object when message has attachments" + # Create a minimal chat-like object to test the helper method + chat_class = Class.new do + include Braintrust::Contrib::RubyLLM::Instrumentation::Chat + end + chat = chat_class.new + # Call the method under test - result = Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.format_message_for_input(msg) + result = chat.send(:format_message_for_input, msg) # When attachments are present, content should be a multipart array assert_equal "user", result["role"] @@ -506,6 +413,8 @@ def test_format_message_for_input_handles_content_object_with_attachments # Test for GitHub issue #71: Content object with image attachment # Verifies attachments are properly included in traces (similar to braintrust-go-sdk) def test_format_message_for_input_handles_image_attachment + skip "RubyLLM gem not available" unless defined?(::RubyLLM) + with_png_file do |tmpfile| # Create a Content object with an image attachment content = ::RubyLLM::Content.new("What's in this image?") @@ -520,8 +429,14 @@ def test_format_message_for_input_handles_image_attachment assert_equal 1, msg.content.attachments.count, "Expected one attachment" + # Create a minimal chat-like object to test the helper method + chat_class = Class.new do + include Braintrust::Contrib::RubyLLM::Instrumentation::Chat + end + chat = chat_class.new + # Call the method under test - result = Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.format_message_for_input(msg) + result = chat.send(:format_message_for_input, msg) # Verify multipart content array is returned assert_equal "user", result["role"] @@ -550,6 +465,8 @@ def test_format_message_for_input_handles_image_attachment # Test for GitHub issue #71: build_input_messages works with Content objects # Verifies the full flow works end-to-end with attachments def test_build_input_messages_handles_content_objects_with_attachments + skip "RubyLLM gem not available" unless defined?(::RubyLLM) + with_png_file do |tmpfile| # Configure RubyLLM ::RubyLLM.configure do |config| @@ -564,8 +481,11 @@ def test_build_input_messages_handles_content_objects_with_attachments content.add_attachment(tmpfile.path) chat.add_message(role: :user, content: content) + # Instrument the chat to get access to the helper method + Braintrust.instrument!(:ruby_llm, target: chat) + # Call the method under test - result = Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.build_input_messages(chat, nil) + result = chat.send(:build_input_messages) # Verify the result assert_equal 1, result.length, "Expected one message" @@ -592,4 +512,49 @@ def test_build_input_messages_handles_content_objects_with_attachments "Result should not contain Content object reference" end end + + # --- Frozen hash handling --- + + def test_format_tool_schema_handles_frozen_hash + skip "RubyLLM gem not available" unless defined?(::RubyLLM) + + # Create a mock tool object with frozen params_schema + mock_tool = Object.new + def mock_tool.name + "test_tool" + end + + def mock_tool.description + "A test tool" + end + + def mock_tool.params_schema + { + "type" => "object", + "properties" => {}, + "required" => [], + "additionalProperties" => false, + "strict" => true + }.freeze + end + + # Create a minimal chat-like object to test the helper method + chat_class = Class.new do + include Braintrust::Contrib::RubyLLM::Instrumentation::Chat + end + chat = chat_class.new + + # Test format_tool_schema (via send since it's private) + result = chat.send(:format_tool_schema, mock_tool, nil) + + # Verify the result is returned (not raising FrozenError) + refute_nil result + assert_equal "function", result["type"] + assert_equal "test_tool", result["function"]["name"] + + # Verify the parameters don't contain RubyLLM-specific fields + params = result["function"]["parameters"] + refute params.key?("strict"), "strict should be removed" + refute params.key?("additionalProperties"), "additionalProperties should be removed" + end end diff --git a/test/braintrust/contrib/ruby_llm/instrumentation_test.rb b/test/braintrust/contrib/ruby_llm/instrumentation_test.rb new file mode 100644 index 0000000..c0a93c5 --- /dev/null +++ b/test/braintrust/contrib/ruby_llm/instrumentation_test.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "integration_helper" + +# Explicitly load the patcher (lazy-loaded by integration) +require "braintrust/contrib/ruby_llm/patcher" + +# Tests for instance-level instrumentation behavior +class Braintrust::Contrib::RubyLLM::InstrumentationTest < Minitest::Test + include Braintrust::Contrib::RubyLLM::IntegrationHelper + + def setup + skip_unless_ruby_llm! + end + + # --- Instance-level instrumentation --- + + def test_instance_instrumentation_only_patches_target_chat + # Skip if class-level patching already occurred from another test + skip "class already patched by another test" if Braintrust::Contrib::RubyLLM::ChatPatcher.patched? + + rig = setup_otel_test_rig + + # Configure RubyLLM with a test key + RubyLLM.configure do |config| + config.openai_api_key = "test-api-key" + end + + # Create two chat instances BEFORE any patching + chat_traced = RubyLLM::Chat.new(model: "gpt-4o-mini") + chat_untraced = RubyLLM::Chat.new(model: "gpt-4o-mini") + + # Verify neither client is patched initially + refute Braintrust::Contrib::RubyLLM::ChatPatcher.patched?(target: chat_traced), + "chat_traced should not be patched initially" + refute Braintrust::Contrib::RubyLLM::ChatPatcher.patched?(target: chat_untraced), + "chat_untraced should not be patched initially" + + # Verify class is not patched + refute Braintrust::Contrib::RubyLLM::ChatPatcher.patched?, + "class should not be patched initially" + + # Instrument only one chat (instance-level) + Braintrust.instrument!(:ruby_llm, target: chat_traced, tracer_provider: rig.tracer_provider) + + # Only the traced chat should be patched + assert Braintrust::Contrib::RubyLLM::ChatPatcher.patched?(target: chat_traced), + "chat_traced should be patched after instrument!" + refute Braintrust::Contrib::RubyLLM::ChatPatcher.patched?(target: chat_untraced), + "chat_untraced should NOT be patched after instrument!" + + # Class itself should NOT be patched (only the instance) + refute Braintrust::Contrib::RubyLLM::ChatPatcher.patched?, + "class should NOT be patched when using instance-level instrumentation" + end +end diff --git a/test/braintrust/contrib/ruby_llm/integration_helper.rb b/test/braintrust/contrib/ruby_llm/integration_helper.rb new file mode 100644 index 0000000..c2a9788 --- /dev/null +++ b/test/braintrust/contrib/ruby_llm/integration_helper.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +# Test helpers for RubyLLM integration tests. +# Provides gem loading helpers for the ruby_llm gem. + +module Braintrust + module Contrib + module RubyLLM + module IntegrationHelper + # Skip test unless ruby_llm gem is available. + # Loads the gem if available. + def skip_unless_ruby_llm! + unless Gem.loaded_specs["ruby_llm"] + skip "ruby_llm gem not available" + end + + require "ruby_llm" unless defined?(::RubyLLM) + end + + # Load ruby_llm gem if available (doesn't skip, for tests that handle both states). + def load_ruby_llm_if_available + if Gem.loaded_specs["ruby_llm"] + require "ruby_llm" unless defined?(::RubyLLM) + end + end + end + end + end +end diff --git a/test/braintrust/contrib/ruby_llm/integration_test.rb b/test/braintrust/contrib/ruby_llm/integration_test.rb new file mode 100644 index 0000000..b2e9296 --- /dev/null +++ b/test/braintrust/contrib/ruby_llm/integration_test.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "integration_helper" + +class Braintrust::Contrib::RubyLLM::IntegrationTest < Minitest::Test + include Braintrust::Contrib::RubyLLM::IntegrationHelper + + def setup + load_ruby_llm_if_available + @integration = Braintrust::Contrib::RubyLLM::Integration + end + + # --- .integration_name --- + + def test_integration_name + assert_equal :ruby_llm, @integration.integration_name + end + + # --- .gem_names --- + + def test_gem_names + assert_equal ["ruby_llm"], @integration.gem_names + end + + # --- .require_paths --- + + def test_require_paths + assert_equal ["ruby_llm"], @integration.require_paths + end + + # --- .minimum_version --- + + def test_minimum_version + assert_equal "1.8.0", @integration.minimum_version + end + + # --- .loaded? --- + + def test_loaded_returns_true_when_ruby_llm_chat_defined + skip "RubyLLM gem not available" unless defined?(::RubyLLM::Chat) + + assert @integration.loaded?, "Should be loaded when ::RubyLLM::Chat is defined" + end + + # --- .patchers --- + + def test_patchers_returns_array_of_patcher_classes + patcher_classes = @integration.patchers + + assert_instance_of Array, patcher_classes + assert patcher_classes.length > 0, "patchers should return at least one patcher" + patcher_classes.each do |patcher_class| + assert patcher_class.is_a?(Class), "each patcher should be a Class" + assert patcher_class < Braintrust::Contrib::Patcher, "each patcher should inherit from Patcher" + end + end +end diff --git a/test/braintrust/contrib/ruby_llm/patcher_test.rb b/test/braintrust/contrib/ruby_llm/patcher_test.rb new file mode 100644 index 0000000..eaaca0a --- /dev/null +++ b/test/braintrust/contrib/ruby_llm/patcher_test.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "integration_helper" + +# Explicitly load the patcher (lazy-loaded by integration) +require "braintrust/contrib/ruby_llm/patcher" + +class Braintrust::Contrib::RubyLLM::ChatPatcherTest < Minitest::Test + include Braintrust::Contrib::RubyLLM::IntegrationHelper + + def setup + skip_unless_ruby_llm! + end + + # --- .applicable? --- + + def test_applicable_returns_true_when_ruby_llm_chat_defined + assert Braintrust::Contrib::RubyLLM::ChatPatcher.applicable? + end + + # --- .patched? --- + + def test_patched_returns_false_when_not_patched + fake_chat_class = Class.new + + ::RubyLLM.stub_const(:Chat, fake_chat_class) do + refute Braintrust::Contrib::RubyLLM::ChatPatcher.patched? + end + end + + def test_patched_returns_true_when_module_included + fake_chat_class = Class.new do + include Braintrust::Contrib::RubyLLM::Instrumentation::Chat + end + + ::RubyLLM.stub_const(:Chat, fake_chat_class) do + assert Braintrust::Contrib::RubyLLM::ChatPatcher.patched? + end + end + + def test_patched_returns_false_for_unpatched_instance + fake_singleton = Class.new + + mock_chain(:singleton_class, returns: fake_singleton) do |chat| + refute Braintrust::Contrib::RubyLLM::ChatPatcher.patched?(target: chat) + end + end + + def test_patched_returns_true_for_patched_instance + fake_singleton = Class.new do + include Braintrust::Contrib::RubyLLM::Instrumentation::Chat + end + + mock_chain(:singleton_class, returns: fake_singleton) do |chat| + assert Braintrust::Contrib::RubyLLM::ChatPatcher.patched?(target: chat) + end + end + + # --- .perform_patch --- + + def test_perform_patch_includes_module_for_class_level + fake_chat_class = Minitest::Mock.new + fake_chat_class.expect(:include, true, [Braintrust::Contrib::RubyLLM::Instrumentation::Chat]) + + ::RubyLLM.stub_const(:Chat, fake_chat_class) do + Braintrust::Contrib::RubyLLM::ChatPatcher.perform_patch + fake_chat_class.verify + end + end + + def test_perform_patch_includes_module_for_instance_level + terminal = Minitest::Mock.new + terminal.expect(:include, true, [Braintrust::Contrib::RubyLLM::Instrumentation::Chat]) + + mock_chain(:singleton_class, returns: terminal) do |chat| + chat.expect(:is_a?, true, [::RubyLLM::Chat]) + Braintrust::Contrib::RubyLLM::ChatPatcher.perform_patch(target: chat) + end + terminal.verify + end + + def test_perform_patch_raises_for_invalid_target + fake_chat = Minitest::Mock.new + fake_chat.expect(:is_a?, false, [::RubyLLM::Chat]) + + assert_raises(ArgumentError) do + Braintrust::Contrib::RubyLLM::ChatPatcher.perform_patch(target: fake_chat) + end + + fake_chat.verify + end +end diff --git a/test/fixtures/vcr_cassettes/contrib/ruby_llm/basic_chat.yml b/test/fixtures/vcr_cassettes/contrib/ruby_llm/basic_chat.yml new file mode 100644 index 0000000..cd554b7 --- /dev/null +++ b/test/fixtures/vcr_cassettes/contrib/ruby_llm/basic_chat.yml @@ -0,0 +1,119 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/chat/completions + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"Say ''test''"}],"stream":false}' + headers: + User-Agent: + - Faraday v2.14.0 + Authorization: + - Bearer + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Mon, 29 Dec 2025 00:18:01 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Access-Control-Expose-Headers: + - X-Request-ID + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - '268' + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - '2020-10-01' + X-Envoy-Upstream-Service-Time: + - '423' + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999995' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_79db98b8ed3a4623be07fcb826bb94f6 + X-Openai-Proxy-Wasm: + - v0.1 + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=DzV_AEA6fZZyC.vKkRS.2DlX8b8W3.X4Xihg1bIYEKQ-1766967481-1.0.1.1-kUeLmejj.yghCo2m1hgTl0RsTL9XtF8khPwq0uqeYgTZmbQYMPIJd7VP3VERW_LHMmespaoTxNQuFnZW1uMc1sUUtM1bHeW0coVVcF2re14; + path=/; expires=Mon, 29-Dec-25 00:48:01 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=5oPQZmzv6H6BNQsGIuLakdFOwrudVV7ZCQTSgweYwmw-1766967481726-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - 9b551e236b7fb680-ORD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: | + { + "id": "chatcmpl-CrvFRNGbE66JUzZgXuLlWUo1QUxHn", + "object": "chat.completion", + "created": 1766967481, + "model": "gpt-4o-mini-2024-07-18", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Test.", + "refusal": null, + "annotations": [] + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 11, + "completion_tokens": 2, + "total_tokens": 13, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": "fp_c4585b5b9c" + } + recorded_at: Mon, 29 Dec 2025 00:18:01 GMT +recorded_with: VCR 6.3.1 diff --git a/test/fixtures/vcr_cassettes/contrib/ruby_llm/direct_complete.yml b/test/fixtures/vcr_cassettes/contrib/ruby_llm/direct_complete.yml new file mode 100644 index 0000000..a856010 --- /dev/null +++ b/test/fixtures/vcr_cassettes/contrib/ruby_llm/direct_complete.yml @@ -0,0 +1,119 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/chat/completions + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"Say ''hello''"}],"stream":false}' + headers: + User-Agent: + - Faraday v2.14.0 + Authorization: + - Bearer + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Mon, 29 Dec 2025 00:17:58 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Access-Control-Expose-Headers: + - X-Request-ID + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - '383' + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - '2020-10-01' + X-Envoy-Upstream-Service-Time: + - '532' + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999995' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_a9c1c29e88f34223a39f05e8b3c1b915 + X-Openai-Proxy-Wasm: + - v0.1 + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=yzF9Fvu_9Mya4Ityhcgf9YdquuMEoME3B2h33f8FDrU-1766967478-1.0.1.1-CdeI5dr56iEtuJkKE.q2BWtd8sDdKPbjAvjkF.xy.RVyTnv1nlwynLwm3gzR58Xd2FKD_WIM5EpgYmroiDPudBzp2XPwsCdFezjeR3Vmxfs; + path=/; expires=Mon, 29-Dec-25 00:47:58 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=gDhAm9ipn0fkMChqAy5vOjbKVUKvGVsm1gakEJ43s7M-1766967478363-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - 9b551e0dddd2f84d-ORD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: | + { + "id": "chatcmpl-CrvFNePrBoNzJZTdgA7GdlfyXPt3w", + "object": "chat.completion", + "created": 1766967477, + "model": "gpt-4o-mini-2024-07-18", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Hello! How can I assist you today?", + "refusal": null, + "annotations": [] + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 11, + "completion_tokens": 9, + "total_tokens": 20, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": "fp_db64b059be" + } + recorded_at: Mon, 29 Dec 2025 00:17:58 GMT +recorded_with: VCR 6.3.1 diff --git a/test/fixtures/vcr_cassettes/contrib/ruby_llm/streaming_chat.yml b/test/fixtures/vcr_cassettes/contrib/ruby_llm/streaming_chat.yml new file mode 100644 index 0000000..8aef2b8 --- /dev/null +++ b/test/fixtures/vcr_cassettes/contrib/ruby_llm/streaming_chat.yml @@ -0,0 +1,109 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/chat/completions + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"Count + to 3"}],"stream":true,"stream_options":{"include_usage":true}}' + headers: + User-Agent: + - Faraday v2.14.0 + Authorization: + - Bearer + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Mon, 29 Dec 2025 00:17:59 GMT + Content-Type: + - text/event-stream; charset=utf-8 + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Access-Control-Expose-Headers: + - X-Request-ID + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - '158' + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - '2020-10-01' + X-Envoy-Upstream-Service-Time: + - '297' + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999995' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_4cb567fe754a470bbd96de2e66c5ba17 + X-Openai-Proxy-Wasm: + - v0.1 + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=y0wQ2ZTC80M.9WLZ8o_1mgv_GucWdtWUo9Pi98ugrdA-1766967479-1.0.1.1-0VkVwdOBB8LYcuFKzfk3XOi54oa_hKgcFO8WODbUU1sq..QemE7aph58Uyry4r9wq0PiJmITYE50IS8r7L7.a1lLsWj5hncSgh2iyiy5BIk; + path=/; expires=Mon, 29-Dec-25 00:47:59 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=_.nRIr0a48TKL8OnhL3EVqWlAbd1YcnaJfsajCYRKwc-1766967479987-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - 9b551e142b1610d5-ORD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: UTF-8 + string: |+ + data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"mGeH3kiRC"} + + data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"1"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"qhpHse2qUP"} + + data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"FkuOCIZAL6"} + + data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" "},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"18PrtDgh4V"} + + data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"2"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"m00xds2zeu"} + + data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"IjniYU966s"} + + data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" "},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"wslCqvFoLt"} + + data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"3"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"mkF5IQIdqQ"} + + data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"5gWlzsmQme"} + + data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"NHMuR"} + + data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[],"usage":{"prompt_tokens":11,"completion_tokens":8,"total_tokens":19,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"U0r0eiGyTnn"} + + data: [DONE] + + recorded_at: Mon, 29 Dec 2025 00:18:00 GMT +recorded_with: VCR 6.3.1 +... diff --git a/test/fixtures/vcr_cassettes/contrib/ruby_llm/tool_calling.yml b/test/fixtures/vcr_cassettes/contrib/ruby_llm/tool_calling.yml new file mode 100644 index 0000000..8812a53 --- /dev/null +++ b/test/fixtures/vcr_cassettes/contrib/ruby_llm/tool_calling.yml @@ -0,0 +1,220 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/chat/completions + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"What''s + the weather like in San Francisco?"}],"stream":false,"tools":[{"type":"function","function":{"name":"weather_test","description":"Get + the current weather for a location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, CA"},"unit":{"type":"string","description":"Temperature + unit (celsius or fahrenheit)"}},"required":["location","unit"]}}}]}' + headers: + User-Agent: + - Faraday v2.14.0 + Authorization: + - Bearer + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Mon, 29 Dec 2025 00:18:03 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Access-Control-Expose-Headers: + - X-Request-ID + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - '1090' + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - '2020-10-01' + X-Envoy-Upstream-Service-Time: + - '1255' + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999987' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_5db84c9eccd54c4381517371d916d2d7 + X-Openai-Proxy-Wasm: + - v0.1 + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=o7gXcsyJykTSc3AQEOeH2F2Zd76Qmrj7P5ShIxZSAZI-1766967483-1.0.1.1-wBpt4tXwupbboQBP9wbjfJgSKaekk9fkllt4JQxktoyyeRUDFvGMphoD2NERvZd3aw..QuedwPBoRcxvYTpo_kD7U0tJ5FUB3bjwFjKyKqA; + path=/; expires=Mon, 29-Dec-25 00:48:03 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=F1nn6Pe4lVvPtXyAQ6p3IsEVl1AhcR50tVr4LFPyXsc-1766967483487-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - 9b551e29185613a0-ORD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: | + { + "id": "chatcmpl-CrvFSUW7ygZkMNlUHRDxcEUObHIHP", + "object": "chat.completion", + "created": 1766967482, + "model": "gpt-4o-mini-2024-07-18", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": null, + "tool_calls": [ + { + "id": "call_iw7sDBQxXqVqAnfid7in9Xti", + "type": "function", + "function": { + "name": "weather_test", + "arguments": "{\"location\":\"San Francisco, CA\",\"unit\":\"celsius\"}" + } + } + ], + "refusal": null, + "annotations": [] + }, + "logprobs": null, + "finish_reason": "tool_calls" + } + ], + "usage": { + "prompt_tokens": 80, + "completion_tokens": 22, + "total_tokens": 102, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": "fp_8bbc38b4db" + } + recorded_at: Mon, 29 Dec 2025 00:18:03 GMT +- request: + method: post + uri: https://api.openai.com/v1/chat/completions + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"What''s + the weather like in San Francisco?"},{"role":"assistant","tool_calls":[{"id":"call_iw7sDBQxXqVqAnfid7in9Xti","type":"function","function":{"name":"weather_test","arguments":"{\"location\":\"San + Francisco, CA\",\"unit\":\"celsius\"}"}}]},{"role":"tool","content":"{:location=>\"San + Francisco, CA\", :temperature=>72, :unit=>\"celsius\", :conditions=>\"sunny\"}","tool_call_id":"call_iw7sDBQxXqVqAnfid7in9Xti"}],"stream":false,"tools":[{"type":"function","function":{"name":"weather_test","description":"Get + the current weather for a location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, CA"},"unit":{"type":"string","description":"Temperature + unit (celsius or fahrenheit)"}},"required":["location","unit"]}}}]}' + headers: + User-Agent: + - Faraday v2.14.0 + Authorization: + - Bearer + Content-Type: + - application/json + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Date: + - Mon, 29 Dec 2025 00:18:04 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Access-Control-Expose-Headers: + - X-Request-ID + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - '579' + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - '2020-10-01' + X-Envoy-Upstream-Service-Time: + - '592' + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999962' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_2ed1ffa90e494f06a5339cdcf266983d + X-Openai-Proxy-Wasm: + - v0.1 + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=ogfj4fYYFkRY6zM7zXWJGX8QhM1dJRVtmGji0Ky56_Q-1766967484-1.0.1.1-Pookt2Yg.wmTZyFEfUixlRnu2Ue9EOAOw3IKpQm4UuN9uRTfhgnYF53WQetZHsXQvRR7wtyrMI_khsq2G.ebtYAaz8m_Pjqbf.FTtpAqsbE; + path=/; expires=Mon, 29-Dec-25 00:48:04 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=JDJQV_HRTMBJyhD7n_pWhKWZXE0SI38XFZLZLLNktQE-1766967484184-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - 9b551e3409791045-ORD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: ASCII-8BIT + string: !binary |- + ewogICJpZCI6ICJjaGF0Y21wbC1DcnZGVDVDbTRsWUx2R295WHVqSFNOUXVoRXZDayIsCiAgIm9iamVjdCI6ICJjaGF0LmNvbXBsZXRpb24iLAogICJjcmVhdGVkIjogMTc2Njk2NzQ4MywKICAibW9kZWwiOiAiZ3B0LTRvLW1pbmktMjAyNC0wNy0xOCIsCiAgImNob2ljZXMiOiBbCiAgICB7CiAgICAgICJpbmRleCI6IDAsCiAgICAgICJtZXNzYWdlIjogewogICAgICAgICJyb2xlIjogImFzc2lzdGFudCIsCiAgICAgICAgImNvbnRlbnQiOiAiVGhlIHdlYXRoZXIgaW4gU2FuIEZyYW5jaXNjbywgQ0EgaXMgY3VycmVudGx5IHN1bm55IHdpdGggYSB0ZW1wZXJhdHVyZSBvZiA3MsKwQy4iLAogICAgICAgICJyZWZ1c2FsIjogbnVsbCwKICAgICAgICAiYW5ub3RhdGlvbnMiOiBbXQogICAgICB9LAogICAgICAibG9ncHJvYnMiOiBudWxsLAogICAgICAiZmluaXNoX3JlYXNvbiI6ICJzdG9wIgogICAgfQogIF0sCiAgInVzYWdlIjogewogICAgInByb21wdF90b2tlbnMiOiAxMzUsCiAgICAiY29tcGxldGlvbl90b2tlbnMiOiAxOSwKICAgICJ0b3RhbF90b2tlbnMiOiAxNTQsCiAgICAicHJvbXB0X3Rva2Vuc19kZXRhaWxzIjogewogICAgICAiY2FjaGVkX3Rva2VucyI6IDAsCiAgICAgICJhdWRpb190b2tlbnMiOiAwCiAgICB9LAogICAgImNvbXBsZXRpb25fdG9rZW5zX2RldGFpbHMiOiB7CiAgICAgICJyZWFzb25pbmdfdG9rZW5zIjogMCwKICAgICAgImF1ZGlvX3Rva2VucyI6IDAsCiAgICAgICJhY2NlcHRlZF9wcmVkaWN0aW9uX3Rva2VucyI6IDAsCiAgICAgICJyZWplY3RlZF9wcmVkaWN0aW9uX3Rva2VucyI6IDAKICAgIH0KICB9LAogICJzZXJ2aWNlX3RpZXIiOiAiZGVmYXVsdCIsCiAgInN5c3RlbV9maW5nZXJwcmludCI6ICJmcF84YmJjMzhiNGRiIgp9Cg== + recorded_at: Mon, 29 Dec 2025 00:18:04 GMT +recorded_with: VCR 6.3.1 From c654952401771a1befc806c45217602992f2a0f3 Mon Sep 17 00:00:00 2001 From: David Elner Date: Sun, 28 Dec 2025 23:20:24 -0500 Subject: [PATCH 11/27] Changed: Migrated anthropic to new integration API --- README.md | 6 +- Rakefile | 1 + .../anthropic/basic.rb} | 15 +- examples/contrib/anthropic/deprecated.rb | 69 ++++ examples/contrib/anthropic/instance.rb | 57 ++++ examples/contrib/anthropic/streaming.rb | 75 +++++ .../anthropic/golden.rb} | 2 +- .../anthropic/kitchen-sink.rb} | 2 +- lib/braintrust/contrib.rb | 2 + .../contrib/anthropic/deprecated.rb | 24 ++ .../anthropic/instrumentation/common.rb | 53 +++ .../anthropic/instrumentation/messages.rb | 232 +++++++++++++ .../contrib/anthropic/integration.rb | 53 +++ lib/braintrust/contrib/anthropic/patcher.rb | 62 ++++ lib/braintrust/trace.rb | 8 - lib/braintrust/trace/contrib/anthropic.rb | 316 ------------------ lib/braintrust/trace/tokens.rb | 49 --- .../contrib/anthropic/deprecated_test.rb | 58 ++++ .../anthropic/instrumentation/common_test.rb | 102 ++++++ .../instrumentation/messages_test.rb} | 235 +++++++------ .../contrib/anthropic/instrumentation_test.rb | 111 ++++++ .../contrib/anthropic/integration_helper.rb | 29 ++ .../contrib/anthropic/integration_test.rb | 59 ++++ .../contrib/anthropic/patcher_test.rb | 93 ++++++ test/braintrust/trace/tokens_test.rb | 78 ----- 25 files changed, 1222 insertions(+), 569 deletions(-) rename examples/{anthropic.rb => contrib/anthropic/basic.rb} (81%) create mode 100644 examples/contrib/anthropic/deprecated.rb create mode 100644 examples/contrib/anthropic/instance.rb create mode 100644 examples/contrib/anthropic/streaming.rb rename examples/internal/{anthropic_golden.rb => contrib/anthropic/golden.rb} (99%) rename examples/internal/{anthropic.rb => contrib/anthropic/kitchen-sink.rb} (99%) create mode 100644 lib/braintrust/contrib/anthropic/deprecated.rb create mode 100644 lib/braintrust/contrib/anthropic/instrumentation/common.rb create mode 100644 lib/braintrust/contrib/anthropic/instrumentation/messages.rb create mode 100644 lib/braintrust/contrib/anthropic/integration.rb create mode 100644 lib/braintrust/contrib/anthropic/patcher.rb delete mode 100644 lib/braintrust/trace/contrib/anthropic.rb delete mode 100644 lib/braintrust/trace/tokens.rb create mode 100644 test/braintrust/contrib/anthropic/deprecated_test.rb create mode 100644 test/braintrust/contrib/anthropic/instrumentation/common_test.rb rename test/braintrust/{trace/anthropic_test.rb => contrib/anthropic/instrumentation/messages_test.rb} (79%) create mode 100644 test/braintrust/contrib/anthropic/instrumentation_test.rb create mode 100644 test/braintrust/contrib/anthropic/integration_helper.rb create mode 100644 test/braintrust/contrib/anthropic/integration_test.rb create mode 100644 test/braintrust/contrib/anthropic/patcher_test.rb delete mode 100644 test/braintrust/trace/tokens_test.rb diff --git a/README.md b/README.md index 0d71497..2eef3cf 100644 --- a/README.md +++ b/README.md @@ -146,11 +146,11 @@ require "braintrust" require "anthropic" Braintrust.init +Braintrust.instrument!(:anthropic) +# Instrument Anthropic (instance-level) client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"]) -Braintrust::Trace::Anthropic.wrap(client) - tracer = OpenTelemetry.tracer_provider.tracer("anthropic-app") root_span = nil @@ -268,7 +268,7 @@ Check out the [`examples/`](./examples/) directory for complete working examples - [trace.rb](./examples/trace.rb) - Manual span creation and tracing - [openai.rb](./examples/openai.rb) - Automatically trace OpenAI API calls - [alexrudall_openai.rb](./examples/alexrudall_openai.rb) - Automatically trace ruby-openai gem API calls -- [anthropic.rb](./examples/anthropic.rb) - Automatically trace Anthropic API calls +- [contrib/anthropic/basic.rb](./examples/contrib/anthropic/basic.rb) - Automatically trace Anthropic API calls - [ruby_llm.rb](./examples/ruby_llm.rb) - Automatically trace RubyLLM API calls - [trace/trace_attachments.rb](./examples/trace/trace_attachments.rb) - Log attachments (images, PDFs) in traces - [eval/dataset.rb](./examples/eval/dataset.rb) - Run evaluations using datasets stored in Braintrust diff --git a/Rakefile b/Rakefile index 74788e8..64c43e5 100644 --- a/Rakefile +++ b/Rakefile @@ -106,6 +106,7 @@ namespace :test do namespace :contrib do # Tasks per integration [ + {name: :anthropic}, {name: :openai}, {name: :ruby_llm}, {name: :ruby_openai, appraisal: "ruby-openai"} diff --git a/examples/anthropic.rb b/examples/contrib/anthropic/basic.rb similarity index 81% rename from examples/anthropic.rb rename to examples/contrib/anthropic/basic.rb index 03884c7..b453b0b 100644 --- a/examples/anthropic.rb +++ b/examples/contrib/anthropic/basic.rb @@ -11,7 +11,7 @@ # This example demonstrates how to automatically trace Anthropic API calls with Braintrust. # # Usage: -# ANTHROPIC_API_KEY=your-key bundle exec ruby examples/anthropic.rb +# ANTHROPIC_API_KEY=your-key bundle exec ruby examples/contrib/anthropic/basic.rb # Check for API keys unless ENV["ANTHROPIC_API_KEY"] @@ -20,21 +20,20 @@ exit 1 end +# Instrument Anthropic (class-level, affects all clients) Braintrust.init(blocking_login: true) +Braintrust.instrument!(:anthropic) # Create Anthropic client client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"]) -# Wrap the client with Braintrust tracing -Braintrust::Trace::Anthropic.wrap(client) - # Create a root span to capture the entire operation tracer = OpenTelemetry.tracer_provider.tracer("anthropic-example") root_span = nil # Make a message request (automatically traced!) puts "Sending message request to Anthropic..." -message = tracer.in_span("examples/anthropic.rb") do |span| +message = tracer.in_span("examples/contrib/anthropic/basic.rb") do |span| root_span = span client.messages.create( @@ -48,7 +47,7 @@ end # Print the response -puts "\n✓ Response received!" +puts "\n Response received!" puts "\nClaude: #{message.content[0].text}" # Print usage stats @@ -57,10 +56,10 @@ puts " Output tokens: #{message.usage.output_tokens}" # Print permalink to view this trace in Braintrust -puts "\n✓ View this trace in Braintrust:" +puts "\n View this trace in Braintrust:" puts " #{Braintrust::Trace.permalink(root_span)}" # Shutdown to flush spans to Braintrust OpenTelemetry.tracer_provider.shutdown -puts "\n✓ Trace sent to Braintrust!" +puts "\n Trace sent to Braintrust!" diff --git a/examples/contrib/anthropic/deprecated.rb b/examples/contrib/anthropic/deprecated.rb new file mode 100644 index 0000000..d1761e6 --- /dev/null +++ b/examples/contrib/anthropic/deprecated.rb @@ -0,0 +1,69 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "braintrust/contrib/anthropic/deprecated" +require "anthropic" +require "opentelemetry/sdk" + +# Example: Anthropic message creation with deprecated wrap() API +# +# This example demonstrates the old API for backward compatibility. +# For new code, use `Braintrust.instrument!(:anthropic)` instead. +# +# Usage: +# ANTHROPIC_API_KEY=your-key bundle exec ruby examples/contrib/anthropic/deprecated.rb + +# Check for API keys +unless ENV["ANTHROPIC_API_KEY"] + puts "Error: ANTHROPIC_API_KEY environment variable is required" + puts "Get your API key from: https://console.anthropic.com/" + exit 1 +end + +Braintrust.init(blocking_login: true) + +# Create Anthropic client +client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"]) + +# Wrap the client with Braintrust tracing +# DEPRECATED: Use `Braintrust.instrument!(:anthropic, target: client)` instead +Braintrust::Trace::Anthropic.wrap(client) + +# Create a root span to capture the entire operation +tracer = OpenTelemetry.tracer_provider.tracer("anthropic-example") +root_span = nil + +# Make a message request (automatically traced!) +puts "Sending message request to Anthropic..." +message = tracer.in_span("examples/contrib/anthropic/deprecated.rb") do |span| + root_span = span + + client.messages.create( + model: "claude-3-haiku-20240307", + max_tokens: 100, + system: "You are a helpful assistant.", + messages: [ + {role: "user", content: "Say hello and tell me a short joke."} + ] + ) +end + +# Print the response +puts "\n Response received!" +puts "\nClaude: #{message.content[0].text}" + +# Print usage stats +puts "\nToken usage:" +puts " Input tokens: #{message.usage.input_tokens}" +puts " Output tokens: #{message.usage.output_tokens}" + +# Print permalink to view this trace in Braintrust +puts "\n View this trace in Braintrust:" +puts " #{Braintrust::Trace.permalink(root_span)}" + +# Shutdown to flush spans to Braintrust +OpenTelemetry.tracer_provider.shutdown + +puts "\n Trace sent to Braintrust!" diff --git a/examples/contrib/anthropic/instance.rb b/examples/contrib/anthropic/instance.rb new file mode 100644 index 0000000..8b49458 --- /dev/null +++ b/examples/contrib/anthropic/instance.rb @@ -0,0 +1,57 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "anthropic" +require "opentelemetry/sdk" + +# Example: Instance-level Anthropic instrumentation +# +# This shows how to instrument a specific client instance rather than +# all Anthropic clients globally. +# +# Usage: +# ANTHROPIC_API_KEY=your-key bundle exec ruby examples/contrib/anthropic/instance.rb + +unless ENV["ANTHROPIC_API_KEY"] + puts "Error: ANTHROPIC_API_KEY environment variable is required" + exit 1 +end + +Braintrust.init(blocking_login: true) + +# Create two client instances +client_traced = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"]) +client_untraced = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"]) + +# Only instrument one of them +Braintrust.instrument!(:anthropic, target: client_traced) + +tracer = OpenTelemetry.tracer_provider.tracer("anthropic-example") +root_span = nil + +tracer.in_span("examples/contrib/anthropic/instance.rb") do |span| + root_span = span + + puts "Calling traced client..." + response1 = client_traced.messages.create( + model: "claude-3-haiku-20240307", + max_tokens: 10, + messages: [{role: "user", content: "Say 'traced'"}] + ) + puts "Traced response: #{response1.content[0].text}" + + puts "\nCalling untraced client..." + response2 = client_untraced.messages.create( + model: "claude-3-haiku-20240307", + max_tokens: 10, + messages: [{role: "user", content: "Say 'untraced'"}] + ) + puts "Untraced response: #{response2.content[0].text}" +end + +puts "\nView trace: #{Braintrust::Trace.permalink(root_span)}" +puts "(Only the first client call should have an anthropic.messages span)" + +OpenTelemetry.tracer_provider.shutdown diff --git a/examples/contrib/anthropic/streaming.rb b/examples/contrib/anthropic/streaming.rb new file mode 100644 index 0000000..ea42074 --- /dev/null +++ b/examples/contrib/anthropic/streaming.rb @@ -0,0 +1,75 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "anthropic" +require "opentelemetry/sdk" + +# Example: Anthropic streaming with Braintrust tracing +# +# This example demonstrates how to trace streaming Anthropic API calls. +# Shows two different streaming patterns: +# 1. Iterator-based streaming with .each (full event access) +# 2. Text-only streaming with .text (simplified) +# +# Usage: +# ANTHROPIC_API_KEY=your-key bundle exec ruby examples/contrib/anthropic/streaming.rb + +unless ENV["ANTHROPIC_API_KEY"] + puts "Error: ANTHROPIC_API_KEY environment variable is required" + puts "Get your API key from: https://console.anthropic.com/" + exit 1 +end + +Braintrust.init(blocking_login: true) +Braintrust.instrument!(:anthropic) + +client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"]) +tracer = OpenTelemetry.tracer_provider.tracer("anthropic-streaming-example") +root_span = nil + +tracer.in_span("examples/contrib/anthropic/streaming.rb") do |span| + root_span = span + + # Pattern 1: Iterator-based streaming with .each + # Returns a MessageStream, iterate to get individual events + puts "=== Pattern 1: Iterator with .each ===" + print "Claude: " + + stream = client.messages.stream( + model: "claude-3-haiku-20240307", + max_tokens: 100, + messages: [{role: "user", content: "Count from 1 to 5, one number per line."}] + ) + + stream.each do |event| + if event.type == :content_block_delta && event.delta.respond_to?(:text) + print event.delta.text + end + end + puts "\n" + + # Pattern 2: Text-only streaming with .text + # Returns an Enumerator that yields only text chunks (simpler) + puts "=== Pattern 2: Text-only with .text ===" + print "Claude: " + + stream = client.messages.stream( + model: "claude-3-haiku-20240307", + max_tokens: 100, + messages: [{role: "user", content: "Name 3 colors."}] + ) + + stream.text.each do |text_chunk| + print text_chunk + end + puts "\n" +end + +puts "\nView this trace in Braintrust:" +puts " #{Braintrust::Trace.permalink(root_span)}" + +OpenTelemetry.tracer_provider.shutdown + +puts "\nTrace sent to Braintrust!" diff --git a/examples/internal/anthropic_golden.rb b/examples/internal/contrib/anthropic/golden.rb similarity index 99% rename from examples/internal/anthropic_golden.rb rename to examples/internal/contrib/anthropic/golden.rb index badd3ad..93e6b55 100755 --- a/examples/internal/anthropic_golden.rb +++ b/examples/internal/contrib/anthropic/golden.rb @@ -28,7 +28,7 @@ # Create an Anthropic client with tracing client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"]) -Braintrust::Trace::Anthropic.wrap(client) +Braintrust.instrument!(:anthropic) # Get a tracer instance tracer = OpenTelemetry.tracer_provider.tracer("anthropic-golden") diff --git a/examples/internal/anthropic.rb b/examples/internal/contrib/anthropic/kitchen-sink.rb similarity index 99% rename from examples/internal/anthropic.rb rename to examples/internal/contrib/anthropic/kitchen-sink.rb index 6dfbad2..3fc483d 100644 --- a/examples/internal/anthropic.rb +++ b/examples/internal/contrib/anthropic/kitchen-sink.rb @@ -29,7 +29,7 @@ # Create an Anthropic client with tracing client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"]) -Braintrust::Trace::Anthropic.wrap(client) +Braintrust.instrument!(:anthropic) # Get a tracer instance tracer = OpenTelemetry.tracer_provider.tracer("anthropic-examples") diff --git a/lib/braintrust/contrib.rb b/lib/braintrust/contrib.rb index 2b24890..713d536 100644 --- a/lib/braintrust/contrib.rb +++ b/lib/braintrust/contrib.rb @@ -102,8 +102,10 @@ def tracer_for(target, name: "braintrust") require_relative "contrib/openai/integration" require_relative "contrib/ruby_openai/integration" require_relative "contrib/ruby_llm/integration" +require_relative "contrib/anthropic/integration" # Register integrations Braintrust::Contrib::OpenAI::Integration.register! Braintrust::Contrib::RubyOpenAI::Integration.register! Braintrust::Contrib::RubyLLM::Integration.register! +Braintrust::Contrib::Anthropic::Integration.register! diff --git a/lib/braintrust/contrib/anthropic/deprecated.rb b/lib/braintrust/contrib/anthropic/deprecated.rb new file mode 100644 index 0000000..1ef5fb3 --- /dev/null +++ b/lib/braintrust/contrib/anthropic/deprecated.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +# Backward compatibility shim for the old Anthropic integration API. +# This file now just delegates to the new API. + +require_relative "../../../braintrust" + +module Braintrust + module Trace + module Anthropic + # Wrap an Anthropic::Client to automatically create spans for messages. + # This is the legacy API - delegates to the new contrib framework. + # + # @param client [Anthropic::Client] the Anthropic client to wrap + # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider + # @return [Anthropic::Client] the wrapped client + def self.wrap(client, tracer_provider: nil) + Log.warn("Braintrust::Trace::Anthropic.wrap() is deprecated and will be removed in a future version: use Braintrust.instrument!() instead.") + Braintrust.instrument!(:anthropic, target: client, tracer_provider: tracer_provider) + client + end + end + end +end diff --git a/lib/braintrust/contrib/anthropic/instrumentation/common.rb b/lib/braintrust/contrib/anthropic/instrumentation/common.rb new file mode 100644 index 0000000..710a413 --- /dev/null +++ b/lib/braintrust/contrib/anthropic/instrumentation/common.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + module Anthropic + module Instrumentation + # Common utilities for Anthropic SDK instrumentation. + module Common + # Parse Anthropic SDK usage tokens into normalized Braintrust metrics. + # Accumulates cache tokens into prompt_tokens and calculates total. + # Works with both Hash objects and SDK response objects (via to_h). + # @param usage [Hash, Object] usage object from Anthropic response + # @return [Hash] normalized metrics + def self.parse_usage_tokens(usage) + metrics = {} + return metrics unless usage + + usage_hash = usage.respond_to?(:to_h) ? usage.to_h : usage + return metrics unless usage_hash.is_a?(Hash) + + # Anthropic SDK field mappings → Braintrust metrics + field_map = { + "input_tokens" => "prompt_tokens", + "output_tokens" => "completion_tokens", + "cache_read_input_tokens" => "prompt_cached_tokens", + "cache_creation_input_tokens" => "prompt_cache_creation_tokens" + } + + usage_hash.each do |key, value| + next unless value.is_a?(Numeric) + key_str = key.to_s + target = field_map[key_str] + metrics[target] = value.to_i if target + end + + # Accumulate cache tokens into prompt_tokens (matching TS/Python SDKs) + prompt_tokens = (metrics["prompt_tokens"] || 0) + + (metrics["prompt_cached_tokens"] || 0) + + (metrics["prompt_cache_creation_tokens"] || 0) + metrics["prompt_tokens"] = prompt_tokens if prompt_tokens > 0 + + # Calculate total + if metrics.key?("prompt_tokens") && metrics.key?("completion_tokens") + metrics["tokens"] = metrics["prompt_tokens"] + metrics["completion_tokens"] + end + + metrics + end + end + end + end + end +end diff --git a/lib/braintrust/contrib/anthropic/instrumentation/messages.rb b/lib/braintrust/contrib/anthropic/instrumentation/messages.rb new file mode 100644 index 0000000..5512c07 --- /dev/null +++ b/lib/braintrust/contrib/anthropic/instrumentation/messages.rb @@ -0,0 +1,232 @@ +# frozen_string_literal: true + +require "opentelemetry/sdk" +require "json" +require_relative "../../support/otel" +require_relative "common" +require_relative "../../../internal/time" + +module Braintrust + module Contrib + module Anthropic + module Instrumentation + # Messages instrumentation for Anthropic. + # Wraps create() and stream() methods to create spans. + module Messages + def self.included(base) + base.prepend(InstanceMethods) unless applied?(base) + end + + def self.applied?(base) + base.ancestors.include?(InstanceMethods) + end + + module InstanceMethods + METADATA_FIELDS = %i[ + model max_tokens temperature top_p top_k stop_sequences + stream tools tool_choice thinking metadata service_tier + ].freeze + + # Wrap synchronous messages.create + def create(**params) + client = instance_variable_get(:@client) + tracer = Braintrust::Contrib.tracer_for(client) + + tracer.in_span("anthropic.messages.create") do |span| + metadata = build_metadata(params) + set_input(span, params) + + response = nil + time_to_first_token = Braintrust::Internal::Time.measure do + response = super(**params) + end + + set_output(span, response) + set_metrics(span, response, time_to_first_token) + finalize_metadata(span, metadata, response) + + response + end + end + + # Wrap streaming messages.stream + # Stores context on stream object for span creation during consumption + def stream(**params) + client = instance_variable_get(:@client) + tracer = Braintrust::Contrib.tracer_for(client) + metadata = build_metadata(params, stream: true) + + stream_obj = super + Braintrust::Contrib::Context.set!(stream_obj, + tracer: tracer, + params: params, + metadata: metadata, + messages_instance: self, + start_time: Braintrust::Internal::Time.measure) + stream_obj + end + + private + + def finalize_stream_span(span, stream_obj, metadata, time_to_first_token) + if stream_obj.respond_to?(:accumulated_message) + begin + msg = stream_obj.accumulated_message + set_output(span, msg) + set_metrics(span, msg, time_to_first_token) + metadata["stop_reason"] = msg.stop_reason if msg.respond_to?(:stop_reason) && msg.stop_reason + metadata["model"] = msg.model if msg.respond_to?(:model) && msg.model + rescue => e + Braintrust::Log.debug("Failed to get accumulated message: #{e.message}") + end + end + Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) + end + + def build_metadata(params, stream: false) + metadata = { + "provider" => "anthropic", + "endpoint" => "/v1/messages" + } + metadata["stream"] = true if stream + METADATA_FIELDS.each do |field| + metadata[field.to_s] = params[field] if params.key?(field) + end + metadata + end + + def set_input(span, params) + input_messages = [] + + if params[:system] + system_content = params[:system] + if system_content.is_a?(Array) + system_text = system_content.map { |blk| + blk.is_a?(Hash) ? blk[:text] : blk + }.join("\n") + input_messages << {role: "system", content: system_text} + else + input_messages << {role: "system", content: system_content} + end + end + + if params[:messages] + messages_array = params[:messages].map(&:to_h) + input_messages.concat(messages_array) + end + + Support::OTel.set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any? + end + + def set_output(span, response) + return unless response.respond_to?(:content) && response.content + + content_array = response.content.map(&:to_h) + output = [{ + role: response.respond_to?(:role) ? response.role : "assistant", + content: content_array + }] + Support::OTel.set_json_attr(span, "braintrust.output_json", output) + end + + def set_metrics(span, response, time_to_first_token) + metrics = {} + if response.respond_to?(:usage) && response.usage + metrics = Common.parse_usage_tokens(response.usage) + end + metrics["time_to_first_token"] = time_to_first_token if time_to_first_token + Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? + end + + def finalize_metadata(span, metadata, response) + metadata["stop_reason"] = response.stop_reason if response.respond_to?(:stop_reason) && response.stop_reason + metadata["stop_sequence"] = response.stop_sequence if response.respond_to?(:stop_sequence) && response.stop_sequence + metadata["model"] = response.model if response.respond_to?(:model) && response.model + Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) + end + end + end + + # MessageStream instrumentation for Anthropic. + # Prepended to Anthropic::Helpers::Streaming::MessageStream to create spans on consumption. + module MessageStream + def self.included(base) + base.prepend(InstanceMethods) unless applied?(base) + end + + def self.applied?(base) + base.ancestors.include?(InstanceMethods) + end + + module InstanceMethods + def each(&block) + ctx = Braintrust::Contrib::Context.from(self) + return super unless ctx&.[](:tracer) && !ctx[:consumed] + + trace_consumption(ctx) do + super do |*args| + ctx[:time_to_first_token] ||= Braintrust::Internal::Time.measure(ctx[:start_time]) + block.call(*args) + end + end + end + + def text + ctx = Braintrust::Contrib::Context.from(self) + return super unless ctx&.[](:tracer) && !ctx[:consumed] + + original_text_enum = super + Enumerator.new do |output| + trace_consumption(ctx) do + original_text_enum.each do |text_chunk| + ctx[:time_to_first_token] ||= Braintrust::Internal::Time.measure(ctx[:start_time]) + output << text_chunk + end + end + end + end + + def close + ctx = Braintrust::Contrib::Context.from(self) + if ctx&.[](:tracer) && !ctx[:consumed] + # Stream closed without consumption - create minimal span + ctx[:consumed] = true + tracer = ctx[:tracer] + params = ctx[:params] + metadata = ctx[:metadata] + messages_instance = ctx[:messages_instance] + + tracer.in_span("anthropic.messages.create") do |span| + messages_instance.send(:set_input, span, params) + Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) + end + end + super + end + + private + + def trace_consumption(ctx) + # Mark as consumed to prevent re-entry (accumulated_message calls each internally) + ctx[:consumed] = true + + tracer = ctx[:tracer] + params = ctx[:params] + metadata = ctx[:metadata] + messages_instance = ctx[:messages_instance] + + tracer.in_span("anthropic.messages.create") do |span| + messages_instance.send(:set_input, span, params) + Support::OTel.set_json_attr(span, "braintrust.metadata", metadata) + + yield + + messages_instance.send(:finalize_stream_span, span, self, metadata, ctx[:time_to_first_token]) + end + end + end + end + end + end + end +end diff --git a/lib/braintrust/contrib/anthropic/integration.rb b/lib/braintrust/contrib/anthropic/integration.rb new file mode 100644 index 0000000..42961a9 --- /dev/null +++ b/lib/braintrust/contrib/anthropic/integration.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +require_relative "../integration" + +module Braintrust + module Contrib + module Anthropic + # Anthropic integration for automatic instrumentation. + # Instruments the anthropic gem (https://github.com/anthropics/anthropic-sdk-ruby). + class Integration + include Braintrust::Contrib::Integration + + MINIMUM_VERSION = "0.3.0" + + GEM_NAMES = ["anthropic"].freeze + REQUIRE_PATHS = ["anthropic"].freeze + + # @return [Symbol] Unique identifier for this integration + def self.integration_name + :anthropic + end + + # @return [Array] Gem names this integration supports + def self.gem_names + GEM_NAMES + end + + # @return [Array] Require paths for auto-instrument detection + def self.require_paths + REQUIRE_PATHS + end + + # @return [String] Minimum compatible version + def self.minimum_version + MINIMUM_VERSION + end + + # @return [Boolean] true if anthropic gem is available + def self.loaded? + defined?(::Anthropic::Client) ? true : false + end + + # Lazy-load the patcher only when actually patching. + # This keeps the integration stub lightweight. + # @return [Array] The patcher classes + def self.patchers + require_relative "patcher" + [MessagesPatcher] + end + end + end + end +end diff --git a/lib/braintrust/contrib/anthropic/patcher.rb b/lib/braintrust/contrib/anthropic/patcher.rb new file mode 100644 index 0000000..4f02a1f --- /dev/null +++ b/lib/braintrust/contrib/anthropic/patcher.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +require_relative "../patcher" +require_relative "instrumentation/messages" + +module Braintrust + module Contrib + module Anthropic + # Patcher for Anthropic messages. + # Instruments Anthropic::Messages#create and #stream methods. + class MessagesPatcher < Braintrust::Contrib::Patcher + class << self + def applicable? + defined?(::Anthropic::Client) + end + + def patched?(**options) + target_class = get_singleton_class(options[:target]) || ::Anthropic::Resources::Messages + Instrumentation::Messages.applied?(target_class) + end + + # Perform the actual patching. + # @param options [Hash] Configuration options passed from integration + # @option options [Object] :target Optional target instance to patch + # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider + # @return [void] + def perform_patch(**options) + return unless applicable? + + # MessageStream is shared across all clients, so patch at class level. + # The instrumentation short-circuits when no context is present, + # so uninstrumented clients' streams pass through unaffected. + patch_message_stream + + if options[:target] + # Instance-level (for only this client instance) + raise ArgumentError, "target must be a kind of ::Anthropic::Client" unless options[:target].is_a?(::Anthropic::Client) + + get_singleton_class(options[:target]).include(Instrumentation::Messages) + else + # Class-level (for all client instances) + ::Anthropic::Resources::Messages.include(Instrumentation::Messages) + end + end + + private + + def get_singleton_class(client) + client&.messages&.singleton_class + end + + def patch_message_stream + return unless defined?(::Anthropic::Helpers::Streaming::MessageStream) + return if Instrumentation::MessageStream.applied?(::Anthropic::Helpers::Streaming::MessageStream) + + ::Anthropic::Helpers::Streaming::MessageStream.include(Instrumentation::MessageStream) + end + end + end + end + end +end diff --git a/lib/braintrust/trace.rb b/lib/braintrust/trace.rb index b87d804..578f194 100644 --- a/lib/braintrust/trace.rb +++ b/lib/braintrust/trace.rb @@ -6,14 +6,6 @@ require_relative "trace/span_filter" require_relative "logger" -# Anthropic integration is optional - automatically loaded if anthropic gem is available -begin - require "anthropic" - require_relative "trace/contrib/anthropic" -rescue LoadError - # Anthropic gem not installed - integration will not be available -end - module Braintrust module Trace # Set up OpenTelemetry tracing with Braintrust diff --git a/lib/braintrust/trace/contrib/anthropic.rb b/lib/braintrust/trace/contrib/anthropic.rb deleted file mode 100644 index 533eb8a..0000000 --- a/lib/braintrust/trace/contrib/anthropic.rb +++ /dev/null @@ -1,316 +0,0 @@ -# frozen_string_literal: true - -require "opentelemetry/sdk" -require "json" -require_relative "../tokens" - -module Braintrust - module Trace - module Anthropic - # Helper to safely set a JSON attribute on a span - # Only sets the attribute if obj is present - # @param span [OpenTelemetry::Trace::Span] the span to set attribute on - # @param attr_name [String] the attribute name (e.g., "braintrust.output_json") - # @param obj [Object] the object to serialize to JSON - # @return [void] - def self.set_json_attr(span, attr_name, obj) - return unless obj - span.set_attribute(attr_name, JSON.generate(obj)) - end - - # Parse usage tokens from Anthropic API response - # @param usage [Hash, Object] usage object from Anthropic response - # @return [Hash] metrics hash with normalized names - def self.parse_usage_tokens(usage) - Braintrust::Trace.parse_anthropic_usage_tokens(usage) - end - - # Wrap an Anthropic::Client to automatically create spans for messages and responses - # Supports both synchronous and streaming requests - # @param client [Anthropic::Client] the Anthropic client to wrap - # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider (defaults to global) - def self.wrap(client, tracer_provider: nil) - tracer_provider ||= ::OpenTelemetry.tracer_provider - - # Wrap messages.create - wrap_messages_create(client, tracer_provider) - - # Wrap messages.stream (Anthropic SDK always has this method) - wrap_messages_stream(client, tracer_provider) - - client - end - - # Wrap messages.create API - # @param client [Anthropic::Client] the Anthropic client - # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider - def self.wrap_messages_create(client, tracer_provider) - # Create a wrapper module that intercepts messages.create - wrapper = Module.new do - define_method(:create) do |**params| - tracer = tracer_provider.tracer("braintrust") - - tracer.in_span("anthropic.messages.create") do |span| - # Initialize metadata hash - metadata = { - "provider" => "anthropic", - "endpoint" => "/v1/messages" - } - - # Capture request metadata fields - metadata_fields = %i[ - model max_tokens temperature top_p top_k stop_sequences - stream tools tool_choice thinking metadata service_tier - ] - - metadata_fields.each do |field| - metadata[field.to_s] = params[field] if params.key?(field) - end - - # Build input messages array, prepending system prompt if present - input_messages = [] - - # Prepend system prompt as a message if present - if params[:system] - # System can be a string or array of text blocks - system_content = params[:system] - if system_content.is_a?(Array) - # Extract text from array of text blocks - system_text = system_content.map { |block| - block.is_a?(Hash) ? block[:text] : block - }.join("\n") - input_messages << {role: "system", content: system_text} - else - input_messages << {role: "system", content: system_content} - end - end - - # Add user/assistant messages - if params[:messages] - messages_array = params[:messages].map(&:to_h) - input_messages.concat(messages_array) - end - - # Set input messages as JSON - if input_messages.any? - span.set_attribute("braintrust.input_json", JSON.generate(input_messages)) - end - - # Call the original method - response = super(**params) - - # Format output as array of messages (same format as input) - if response.respond_to?(:content) && response.content - content_array = response.content.map(&:to_h) - output = [{ - role: response.respond_to?(:role) ? response.role : "assistant", - content: content_array - }] - span.set_attribute("braintrust.output_json", JSON.generate(output)) - end - - # Set metrics (token usage with Anthropic-specific cache tokens) - if response.respond_to?(:usage) && response.usage - metrics = Braintrust::Trace::Anthropic.parse_usage_tokens(response.usage) - span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty? - end - - # Add response metadata fields - if response.respond_to?(:stop_reason) && response.stop_reason - metadata["stop_reason"] = response.stop_reason - end - if response.respond_to?(:stop_sequence) && response.stop_sequence - metadata["stop_sequence"] = response.stop_sequence - end - # Update model if present in response (in case it was resolved from "latest") - if response.respond_to?(:model) && response.model - metadata["model"] = response.model - end - - # Set metadata ONCE at the end with complete hash - span.set_attribute("braintrust.metadata", JSON.generate(metadata)) - - response - end - end - end - - # Prepend the wrapper to the messages resource - client.messages.singleton_class.prepend(wrapper) - end - - # Wrap messages.stream API - # @param client [Anthropic::Client] the Anthropic client - # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider - def self.wrap_messages_stream(client, tracer_provider) - # Create a wrapper module that intercepts messages.stream - wrapper = Module.new do - define_method(:stream) do |**params, &block| - tracer = tracer_provider.tracer("braintrust") - - metadata = { - "provider" => "anthropic", - "endpoint" => "/v1/messages", - "stream" => true - } - - # Start span with proper context - span = tracer.start_span("anthropic.messages.create") - - # Capture request metadata fields - metadata_fields = %i[ - model max_tokens temperature top_p top_k stop_sequences - tools tool_choice thinking metadata service_tier - ] - - metadata_fields.each do |field| - metadata[field.to_s] = params[field] if params.key?(field) - end - - # Build input messages array, prepending system prompt if present - input_messages = [] - - if params[:system] - system_content = params[:system] - if system_content.is_a?(Array) - system_text = system_content.map { |block| - block.is_a?(Hash) ? block[:text] : block - }.join("\n") - input_messages << {role: "system", content: system_text} - else - input_messages << {role: "system", content: system_content} - end - end - - if params[:messages] - messages_array = params[:messages].map(&:to_h) - input_messages.concat(messages_array) - end - - if input_messages.any? - span.set_attribute("braintrust.input_json", JSON.generate(input_messages)) - end - - # Set initial metadata - span.set_attribute("braintrust.metadata", JSON.generate(metadata)) - - # Call the original stream method WITHOUT passing the block - # We'll handle the block ourselves to aggregate events - begin - stream = super(**params) - rescue => e - span.record_exception(e) - span.status = ::OpenTelemetry::Trace::Status.error("Anthropic API error: #{e.message}") - span.finish - raise - end - - # Store references on the stream object itself for the wrapper - stream.instance_variable_set(:@braintrust_span, span) - stream.instance_variable_set(:@braintrust_metadata, metadata) - stream.instance_variable_set(:@braintrust_span_finished, false) - - # Local helper for brevity - set_json_attr = ->(attr_name, obj) { Braintrust::Trace::Anthropic.set_json_attr(span, attr_name, obj) } - - # Helper lambda to extract stream data and set span attributes - # This is DRY - used by both .each() and .text() wrappers - extract_stream_metadata = lambda do - # Extract the SDK's internal accumulated message (built during streaming) - acc_msg = stream.instance_variable_get(:@accumated_message_snapshot) - return unless acc_msg - - # Set output from accumulated message - if acc_msg.respond_to?(:content) && acc_msg.content - content_array = acc_msg.content.map(&:to_h) - output = [{ - role: acc_msg.respond_to?(:role) ? acc_msg.role : "assistant", - content: content_array - }] - set_json_attr.call("braintrust.output_json", output) - end - - # Set metrics from accumulated message - if acc_msg.respond_to?(:usage) && acc_msg.usage - metrics = Braintrust::Trace::Anthropic.parse_usage_tokens(acc_msg.usage) - set_json_attr.call("braintrust.metrics", metrics) unless metrics.empty? - end - - # Update metadata with response fields - if acc_msg.respond_to?(:stop_reason) && acc_msg.stop_reason - metadata["stop_reason"] = acc_msg.stop_reason - end - if acc_msg.respond_to?(:model) && acc_msg.model - metadata["model"] = acc_msg.model - end - set_json_attr.call("braintrust.metadata", metadata) - end - - # Helper lambda to finish span (prevents double-finishing via closure) - finish_braintrust_span = lambda do - return if stream.instance_variable_get(:@braintrust_span_finished) - stream.instance_variable_set(:@braintrust_span_finished, true) - - extract_stream_metadata.call - span.finish - end - - # Wrap .each() to ensure span finishes after consumption - original_each = stream.method(:each) - stream.define_singleton_method(:each) do |&user_block| - # Consume stream, calling user's block for each event - # The SDK builds @accumated_message_snapshot internally - original_each.call(&user_block) - rescue => e - span.record_exception(e) - span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}") - raise - ensure - # Extract accumulated message and finish span - finish_braintrust_span.call - end - - # Wrap .text() to return an Enumerable that ensures span finishes - original_text = stream.method(:text) - stream.define_singleton_method(:text) do - text_enum = original_text.call - - # Return wrapper Enumerable that finishes span after consumption - Enumerator.new do |y| - # Consume text enumerable (this consumes underlying stream) - # The SDK builds @accumated_message_snapshot internally - text_enum.each { |text| y << text } - rescue => e - span.record_exception(e) - span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}") - raise - ensure - # Extract accumulated message and finish span - finish_braintrust_span.call - end - end - - # Wrap .close() to ensure span finishes even if stream not consumed - original_close = stream.method(:close) - stream.define_singleton_method(:close) do - original_close.call - ensure - # Finish span even if stream was closed early - finish_braintrust_span.call - end - - # If a block was provided to stream(), call each with it immediately - if block - stream.each(&block) - end - - stream - end - end - - # Prepend the wrapper to the messages resource - client.messages.singleton_class.prepend(wrapper) - end - end - end -end diff --git a/lib/braintrust/trace/tokens.rb b/lib/braintrust/trace/tokens.rb deleted file mode 100644 index 679df12..0000000 --- a/lib/braintrust/trace/tokens.rb +++ /dev/null @@ -1,49 +0,0 @@ -# frozen_string_literal: true - -module Braintrust - module Trace - # Parse Anthropic usage tokens into normalized Braintrust metrics. - # Accumulates cache tokens into prompt_tokens and calculates total. - # @param usage [Hash, Object] usage object from Anthropic response - # @return [Hash] normalized metrics - def self.parse_anthropic_usage_tokens(usage) - metrics = {} - return metrics unless usage - - usage_hash = usage.respond_to?(:to_h) ? usage.to_h : usage - return metrics unless usage_hash.is_a?(Hash) - - # Field mappings: Anthropic → Braintrust - # Also handles RubyLLM's simplified cache field names - field_map = { - "input_tokens" => "prompt_tokens", - "output_tokens" => "completion_tokens", - "cache_read_input_tokens" => "prompt_cached_tokens", - "cache_creation_input_tokens" => "prompt_cache_creation_tokens", - # RubyLLM uses simplified names - "cached_tokens" => "prompt_cached_tokens", - "cache_creation_tokens" => "prompt_cache_creation_tokens" - } - - usage_hash.each do |key, value| - next unless value.is_a?(Numeric) - key_str = key.to_s - target = field_map[key_str] - metrics[target] = value.to_i if target - end - - # Accumulate cache tokens into prompt_tokens (matching TS/Python SDKs) - prompt_tokens = (metrics["prompt_tokens"] || 0) + - (metrics["prompt_cached_tokens"] || 0) + - (metrics["prompt_cache_creation_tokens"] || 0) - metrics["prompt_tokens"] = prompt_tokens if prompt_tokens > 0 - - # Calculate total - if metrics.key?("prompt_tokens") && metrics.key?("completion_tokens") - metrics["tokens"] = metrics["prompt_tokens"] + metrics["completion_tokens"] - end - - metrics - end - end -end diff --git a/test/braintrust/contrib/anthropic/deprecated_test.rb b/test/braintrust/contrib/anthropic/deprecated_test.rb new file mode 100644 index 0000000..9fda545 --- /dev/null +++ b/test/braintrust/contrib/anthropic/deprecated_test.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +require "test_helper" +require "braintrust/contrib/anthropic/integration" +require "braintrust/contrib/anthropic/deprecated" + +class Braintrust::Contrib::Anthropic::DeprecatedTest < Minitest::Test + # --- .wrap --- + + def test_wrap_delegates_to_instrument + mock_client = Object.new + mock_tracer = Object.new + + captured_args = nil + Braintrust.stub(:instrument!, ->(name, **opts) { captured_args = [name, opts] }) do + suppress_logs { Braintrust::Trace::Anthropic.wrap(mock_client, tracer_provider: mock_tracer) } + end + + assert_equal :anthropic, captured_args[0] + assert_same mock_client, captured_args[1][:target] + assert_same mock_tracer, captured_args[1][:tracer_provider] + end + + def test_wrap_logs_deprecation_warning + mock_client = Object.new + + warning_message = nil + Braintrust.stub(:instrument!, ->(*) {}) do + Braintrust::Log.stub(:warn, ->(msg) { warning_message = msg }) do + Braintrust::Trace::Anthropic.wrap(mock_client) + end + end + + assert_match(/deprecated/, warning_message) + assert_match(/Braintrust\.instrument!/, warning_message) + end + + def test_wrap_returns_client + mock_client = Object.new + + Braintrust.stub(:instrument!, ->(*) {}) do + result = suppress_logs { Braintrust::Trace::Anthropic.wrap(mock_client) } + assert_same mock_client, result + end + end + + def test_wrap_uses_default_tracer_when_not_provided + mock_client = Object.new + + captured_args = nil + Braintrust.stub(:instrument!, ->(name, **opts) { captured_args = [name, opts] }) do + suppress_logs { Braintrust::Trace::Anthropic.wrap(mock_client) } + end + + assert_equal :anthropic, captured_args[0] + assert_nil captured_args[1][:tracer_provider] + end +end diff --git a/test/braintrust/contrib/anthropic/instrumentation/common_test.rb b/test/braintrust/contrib/anthropic/instrumentation/common_test.rb new file mode 100644 index 0000000..a530424 --- /dev/null +++ b/test/braintrust/contrib/anthropic/instrumentation/common_test.rb @@ -0,0 +1,102 @@ +# frozen_string_literal: true + +require "test_helper" +require "braintrust/contrib/anthropic/instrumentation/common" + +class Braintrust::Contrib::Anthropic::Instrumentation::CommonTest < Minitest::Test + Common = Braintrust::Contrib::Anthropic::Instrumentation::Common + + # =================== + # parse_usage_tokens + # =================== + + def test_maps_standard_fields + usage = {"input_tokens" => 10, "output_tokens" => 20} + + metrics = Common.parse_usage_tokens(usage) + + assert_equal 10, metrics["prompt_tokens"] + assert_equal 20, metrics["completion_tokens"] + assert_equal 30, metrics["tokens"] + end + + def test_handles_symbol_keys + usage = {input_tokens: 10, output_tokens: 20} + + metrics = Common.parse_usage_tokens(usage) + + assert_equal 10, metrics["prompt_tokens"] + assert_equal 20, metrics["completion_tokens"] + end + + def test_handles_cache_read_tokens + usage = { + "input_tokens" => 100, + "output_tokens" => 50, + "cache_read_input_tokens" => 80 + } + + metrics = Common.parse_usage_tokens(usage) + + assert_equal 80, metrics["prompt_cached_tokens"] + # prompt_tokens should include cache tokens + assert_equal 180, metrics["prompt_tokens"] + end + + def test_handles_cache_creation_tokens + usage = { + "input_tokens" => 100, + "output_tokens" => 50, + "cache_creation_input_tokens" => 20 + } + + metrics = Common.parse_usage_tokens(usage) + + assert_equal 20, metrics["prompt_cache_creation_tokens"] + # prompt_tokens should include cache creation tokens + assert_equal 120, metrics["prompt_tokens"] + end + + def test_handles_object_with_to_h + # SDK returns objects with to_h method + usage_object = Struct.new(:input_tokens, :output_tokens, keyword_init: true) + usage = usage_object.new(input_tokens: 10, output_tokens: 20) + + metrics = Common.parse_usage_tokens(usage) + + assert_equal 10, metrics["prompt_tokens"] + assert_equal 20, metrics["completion_tokens"] + end + + def test_returns_empty_hash_for_nil + assert_equal({}, Common.parse_usage_tokens(nil)) + end + + def test_returns_empty_hash_for_non_hash + assert_equal({}, Common.parse_usage_tokens("invalid")) + end + + def test_calculates_total + usage = {"input_tokens" => 10, "output_tokens" => 20} + + metrics = Common.parse_usage_tokens(usage) + + assert_equal 30, metrics["tokens"] + end + + def test_total_includes_cache_tokens + usage = { + "input_tokens" => 100, + "output_tokens" => 50, + "cache_read_input_tokens" => 80, + "cache_creation_input_tokens" => 20 + } + + metrics = Common.parse_usage_tokens(usage) + + # Total = prompt_tokens (which includes cache) + completion_tokens + # prompt_tokens = 100 + 80 + 20 = 200 + # tokens = 200 + 50 = 250 + assert_equal 250, metrics["tokens"] + end +end diff --git a/test/braintrust/trace/anthropic_test.rb b/test/braintrust/contrib/anthropic/instrumentation/messages_test.rb similarity index 79% rename from test/braintrust/trace/anthropic_test.rb rename to test/braintrust/contrib/anthropic/instrumentation/messages_test.rb index 918f010..d9b00b7 100644 --- a/test/braintrust/trace/anthropic_test.rb +++ b/test/braintrust/contrib/anthropic/instrumentation/messages_test.rb @@ -1,23 +1,26 @@ # frozen_string_literal: true require "test_helper" +require_relative "../integration_helper" + +# Explicitly load the patcher (lazy-loaded by integration) +require "braintrust/contrib/anthropic/patcher" + +class Braintrust::Contrib::Anthropic::Instrumentation::MessagesTest < Minitest::Test + include Braintrust::Contrib::Anthropic::IntegrationHelper -class Braintrust::Trace::AnthropicTest < Minitest::Test def setup - # Skip all Anthropic tests if the gem is not available - skip "Anthropic gem not available" unless defined?(Anthropic) + skip_unless_anthropic! end - def test_wrap_creates_span_for_basic_message + def test_creates_span_for_basic_message VCR.use_cassette("anthropic/basic_message") do - require "anthropic" - # Set up test rig (includes Braintrust processor) rig = setup_otel_test_rig - # Create Anthropic client and wrap it with Braintrust tracing + # Create Anthropic client and instrument it client = Anthropic::Client.new(api_key: get_anthropic_key) - Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider) + Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider) # Make a simple message request message = client.messages.create( @@ -68,19 +71,60 @@ def test_wrap_creates_span_for_basic_message assert metrics["completion_tokens"] > 0 assert metrics["tokens"] > 0 assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"] + + # Verify time_to_first_token is present and non-negative + assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric" + assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be non-negative" end end - def test_wrap_handles_system_prompt - VCR.use_cassette("anthropic/system_prompt") do - require "anthropic" + def test_creates_span_with_class_level_patching + VCR.use_cassette("anthropic/basic_message") do + # Set up test rig + rig = setup_otel_test_rig + + # For class-level patching, set the default tracer provider via Braintrust.init + # (instance-level patching uses target: which stores tracer_provider in context) + Braintrust.init(tracer_provider: rig.tracer_provider) + + # Instrument at class level (no target:) - patches Anthropic::Resources::Messages + Braintrust.instrument!(:anthropic) + # Create client AFTER class-level instrumentation + client = Anthropic::Client.new(api_key: get_anthropic_key) + + # Make a simple message request + message = client.messages.create( + model: "claude-3-5-sonnet-20241022", + max_tokens: 10, + messages: [ + {role: "user", content: "Say 'test'"} + ] + ) + + # Verify response + refute_nil message + + # Drain and verify span was created + span = rig.drain_one + + # Verify span name and key attributes + assert_equal "anthropic.messages.create", span.name + assert span.attributes.key?("braintrust.input_json") + assert span.attributes.key?("braintrust.output_json") + assert span.attributes.key?("braintrust.metadata") + assert span.attributes.key?("braintrust.metrics") + end + end + + def test_handles_system_prompt + VCR.use_cassette("anthropic/system_prompt") do # Set up test rig rig = setup_otel_test_rig - # Create Anthropic client and wrap it - client = Anthropic::Client.new(api_key: @api_key) - Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider) + # Create Anthropic client and instrument it + client = Anthropic::Client.new(api_key: get_anthropic_key) + Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider) # Make a request with system prompt message = client.messages.create( @@ -122,16 +166,14 @@ def test_wrap_handles_system_prompt end end - def test_wrap_handles_tool_use + def test_handles_tool_use VCR.use_cassette("anthropic/tool_use") do - require "anthropic" - # Set up test rig rig = setup_otel_test_rig - # Create Anthropic client and wrap it - client = Anthropic::Client.new(api_key: @api_key) - Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider) + # Create Anthropic client and instrument it + client = Anthropic::Client.new(api_key: get_anthropic_key) + Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider) # Make a request with tools message = client.messages.create( @@ -191,16 +233,14 @@ def test_wrap_handles_tool_use end end - def test_wrap_handles_streaming + def test_handles_streaming VCR.use_cassette("anthropic/streaming") do - require "anthropic" - # Set up test rig rig = setup_otel_test_rig - # Create Anthropic client and wrap it - client = Anthropic::Client.new(api_key: @api_key) - Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider) + # Create Anthropic client and instrument it + client = Anthropic::Client.new(api_key: get_anthropic_key) + Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider) # Make a streaming request stream = client.messages.stream( @@ -216,13 +256,12 @@ def test_wrap_handles_streaming # Just consume events end - # Drain and verify span was created + # Single span created during consumption span = rig.drain_one - # Verify span name assert_equal "anthropic.messages.create", span.name - # Verify input captured + # Verify input captured on span assert span.attributes.key?("braintrust.input_json") input = JSON.parse(span.attributes["braintrust.input_json"]) assert_equal 1, input.length @@ -232,24 +271,17 @@ def test_wrap_handles_streaming assert span.attributes.key?("braintrust.metadata") metadata = JSON.parse(span.attributes["braintrust.metadata"]) assert_equal true, metadata["stream"] - - # Note: Full output aggregation testing requires live API calls - # VCR doesn't perfectly replay streaming SSE responses - # The streaming wrapper is implemented and works with real API calls end end - def test_wrap_handles_streaming_output_aggregation - # This test demonstrates the bug: streaming output is not aggregated + def test_handles_streaming_output_aggregation VCR.use_cassette("anthropic/streaming_aggregation") do - require "anthropic" - # Set up test rig rig = setup_otel_test_rig - # Create Anthropic client and wrap it - client = Anthropic::Client.new(api_key: @api_key) - Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider) + # Create Anthropic client and instrument it + client = Anthropic::Client.new(api_key: get_anthropic_key) + Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider) # Make a streaming request collected_text = "" @@ -271,10 +303,9 @@ def test_wrap_handles_streaming_output_aggregation # Verify we got some text refute_empty collected_text, "Should have received text from stream" - # Drain and verify span + # Single span created during consumption span = rig.drain_one - # Verify span name assert_equal "anthropic.messages.create", span.name # CRITICAL: Verify output was aggregated @@ -303,19 +334,21 @@ def test_wrap_handles_streaming_output_aggregation assert metrics["completion_tokens"] > 0, "Completion tokens should be greater than 0" assert metrics["tokens"], "Should have total tokens" assert metrics["tokens"] > 0, "Total tokens should be greater than 0" + + # Verify time_to_first_token is present and non-negative + assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric" + assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be non-negative" end end - def test_wrap_handles_vision_with_base64 + def test_handles_vision_with_base64 VCR.use_cassette("anthropic/vision_base64") do - require "anthropic" - # Set up test rig rig = setup_otel_test_rig - # Create Anthropic client and wrap it - client = Anthropic::Client.new(api_key: @api_key) - Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider) + # Create Anthropic client and instrument it + client = Anthropic::Client.new(api_key: get_anthropic_key) + Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider) # Small 1x1 red pixel PNG as base64 test_image = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==" @@ -369,16 +402,14 @@ def test_wrap_handles_vision_with_base64 end end - def test_wrap_handles_reasoning_thinking_blocks + def test_handles_reasoning_thinking_blocks VCR.use_cassette("anthropic/reasoning") do - require "anthropic" - # Set up test rig rig = setup_otel_test_rig - # Create Anthropic client and wrap it - client = Anthropic::Client.new(api_key: @api_key) - Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider) + # Create Anthropic client and instrument it + client = Anthropic::Client.new(api_key: get_anthropic_key) + Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider) # Make a request with reasoning enabled message = client.messages.create( @@ -429,16 +460,14 @@ def test_wrap_handles_reasoning_thinking_blocks end end - def test_wrap_handles_multi_turn_conversation + def test_handles_multi_turn_conversation VCR.use_cassette("anthropic/multi_turn") do - require "anthropic" - # Set up test rig rig = setup_otel_test_rig - # Create Anthropic client and wrap it - client = Anthropic::Client.new(api_key: @api_key) - Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider) + # Create Anthropic client and instrument it + client = Anthropic::Client.new(api_key: get_anthropic_key) + Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider) # Make a multi-turn conversation request message = client.messages.create( @@ -483,16 +512,14 @@ def test_wrap_handles_multi_turn_conversation end end - def test_wrap_handles_temperature_and_stop_sequences + def test_handles_temperature_and_stop_sequences VCR.use_cassette("anthropic/temperature_stop") do - require "anthropic" - # Set up test rig rig = setup_otel_test_rig - # Create Anthropic client and wrap it - client = Anthropic::Client.new(api_key: @api_key) - Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider) + # Create Anthropic client and instrument it + client = Anthropic::Client.new(api_key: get_anthropic_key) + Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider) # Make a request with temperature and stop sequences message = client.messages.create( @@ -533,16 +560,14 @@ def test_wrap_handles_temperature_and_stop_sequences end end - def test_wrap_handles_tool_use_multi_turn + def test_handles_tool_use_multi_turn VCR.use_cassette("anthropic/tool_use_multi_turn") do - require "anthropic" - # Set up test rig rig = setup_otel_test_rig - # Create Anthropic client and wrap it - client = Anthropic::Client.new(api_key: @api_key) - Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider) + # Create Anthropic client and instrument it + client = Anthropic::Client.new(api_key: get_anthropic_key) + Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider) # First request - model should use tool first_message = client.messages.create( @@ -652,16 +677,14 @@ def test_wrap_handles_tool_use_multi_turn end end - def test_wrap_handles_streaming_with_text_each + def test_handles_streaming_with_text_each VCR.use_cassette("anthropic/streaming_text_each") do - require "anthropic" - # Set up test rig rig = setup_otel_test_rig - # Create Anthropic client and wrap it - client = Anthropic::Client.new(api_key: @api_key) - Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider) + # Create Anthropic client and instrument it + client = Anthropic::Client.new(api_key: get_anthropic_key) + Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider) # Make a streaming request using .text.each collected_text = "" @@ -687,10 +710,9 @@ def test_wrap_handles_streaming_with_text_each assert_match(/2/, collected_text, "Should contain 2") assert_match(/3/, collected_text, "Should contain 3") - # Drain and verify span + # Single span created during consumption span = rig.drain_one - # Verify span name assert_equal "anthropic.messages.create", span.name # CRITICAL: Verify output was aggregated @@ -715,19 +737,21 @@ def test_wrap_handles_streaming_with_text_each assert metrics["prompt_tokens"] > 0, "Prompt tokens should be greater than 0" assert metrics["completion_tokens"] > 0, "Completion tokens should be greater than 0" assert metrics["tokens"] > 0, "Total tokens should be greater than 0" + + # Verify time_to_first_token is present and non-negative + assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric" + assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be non-negative" end end - def test_wrap_handles_streaming_with_accumulated_text + def test_handles_streaming_with_accumulated_text VCR.use_cassette("anthropic/streaming_accumulated_text") do - require "anthropic" - # Set up test rig rig = setup_otel_test_rig - # Create Anthropic client and wrap it - client = Anthropic::Client.new(api_key: @api_key) - Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider) + # Create Anthropic client and instrument it + client = Anthropic::Client.new(api_key: get_anthropic_key) + Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider) # Make a streaming request using .accumulated_text stream = client.messages.stream( @@ -750,10 +774,9 @@ def test_wrap_handles_streaming_with_accumulated_text assert_match(/Hello/, accumulated_text, "Should contain greeting") assert_match(/help/, accumulated_text, "Should offer help") - # Drain and verify span + # Single span created during consumption span = rig.drain_one - # Verify span name assert_equal "anthropic.messages.create", span.name # CRITICAL: Verify output was aggregated @@ -774,19 +797,21 @@ def test_wrap_handles_streaming_with_accumulated_text assert metrics["prompt_tokens"] > 0, "Prompt tokens should be greater than 0" assert metrics["completion_tokens"] > 0, "Completion tokens should be greater than 0" assert metrics["tokens"] > 0, "Total tokens should be greater than 0" + + # Verify time_to_first_token is present and non-negative + assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric" + assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be non-negative" end end - def test_wrap_handles_streaming_with_accumulated_message + def test_handles_streaming_with_accumulated_message VCR.use_cassette("anthropic/streaming_accumulated_message") do - require "anthropic" - # Set up test rig rig = setup_otel_test_rig - # Create Anthropic client and wrap it - client = Anthropic::Client.new(api_key: @api_key) - Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider) + # Create Anthropic client and instrument it + client = Anthropic::Client.new(api_key: get_anthropic_key) + Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider) # Make a streaming request using .accumulated_message stream = client.messages.stream( @@ -805,10 +830,9 @@ def test_wrap_handles_streaming_with_accumulated_message refute_nil message.content, "Message should have content" refute_empty message.content, "Message content should not be empty" - # Drain and verify span + # Single span created during consumption span = rig.drain_one - # Verify span name assert_equal "anthropic.messages.create", span.name # CRITICAL: Verify output was aggregated @@ -827,19 +851,21 @@ def test_wrap_handles_streaming_with_accumulated_message assert metrics["prompt_tokens"] > 0, "Prompt tokens should be greater than 0" assert metrics["completion_tokens"], "Should have completion_tokens" assert metrics["completion_tokens"] > 0, "Completion tokens should be greater than 0" + + # Verify time_to_first_token is present and non-negative + assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric" + assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be non-negative" end end - def test_wrap_handles_streaming_with_close + def test_handles_streaming_with_close VCR.use_cassette("anthropic/streaming_close") do - require "anthropic" - # Set up test rig rig = setup_otel_test_rig - # Create Anthropic client and wrap it - client = Anthropic::Client.new(api_key: @api_key) - Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider) + # Create Anthropic client and instrument it + client = Anthropic::Client.new(api_key: get_anthropic_key) + Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider) # Make a streaming request and close early without consuming stream = client.messages.stream( @@ -853,13 +879,12 @@ def test_wrap_handles_streaming_with_close # Close the stream early (before consuming) stream.close - # Drain and verify span was finished + # Single span created on close span = rig.drain_one - # Verify span name assert_equal "anthropic.messages.create", span.name - # Verify input was captured + # Verify input was captured on span assert span.attributes.key?("braintrust.input_json") input = JSON.parse(span.attributes["braintrust.input_json"]) assert_equal 1, input.length diff --git a/test/braintrust/contrib/anthropic/instrumentation_test.rb b/test/braintrust/contrib/anthropic/instrumentation_test.rb new file mode 100644 index 0000000..2932d1c --- /dev/null +++ b/test/braintrust/contrib/anthropic/instrumentation_test.rb @@ -0,0 +1,111 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "integration_helper" + +# Explicitly load the patcher (lazy-loaded by integration) +require "braintrust/contrib/anthropic/patcher" +require "braintrust/contrib/anthropic/deprecated" + +# Tests for instance-level instrumentation behavior and backward compatibility +class Braintrust::Contrib::Anthropic::InstrumentationTest < Minitest::Test + include Braintrust::Contrib::Anthropic::IntegrationHelper + + def setup + skip_unless_anthropic! + end + + # --- Instance-level instrumentation --- + + def test_instance_instrumentation_only_patches_target_client + # Skip if class-level patching already occurred from another test + skip "class already patched by another test" if Braintrust::Contrib::Anthropic::MessagesPatcher.patched? + + rig = setup_otel_test_rig + + # Create two client instances BEFORE any patching + client_traced = Anthropic::Client.new(api_key: "test-api-key") + client_untraced = Anthropic::Client.new(api_key: "test-api-key") + + # Verify neither client is patched initially + refute Braintrust::Contrib::Anthropic::MessagesPatcher.patched?(target: client_traced), + "client_traced should not be patched initially" + refute Braintrust::Contrib::Anthropic::MessagesPatcher.patched?(target: client_untraced), + "client_untraced should not be patched initially" + + # Verify class is not patched + refute Braintrust::Contrib::Anthropic::MessagesPatcher.patched?, + "class should not be patched initially" + + # Instrument only one client (instance-level) + Braintrust.instrument!(:anthropic, target: client_traced, tracer_provider: rig.tracer_provider) + + # Only the traced client should be patched + assert Braintrust::Contrib::Anthropic::MessagesPatcher.patched?(target: client_traced), + "client_traced should be patched after instrument!" + refute Braintrust::Contrib::Anthropic::MessagesPatcher.patched?(target: client_untraced), + "client_untraced should NOT be patched after instrument!" + + # Class itself should NOT be patched (only the instance) + refute Braintrust::Contrib::Anthropic::MessagesPatcher.patched?, + "class should NOT be patched when using instance-level instrumentation" + end + + # --- Backward compatibility: wrap() vs instrument!() --- + + def test_wrap_and_instrument_produce_identical_spans + VCR.use_cassette("anthropic/basic_message", allow_playback_repeats: true) do + # Test with old wrap() API + rig_wrap = setup_otel_test_rig + client_wrap = Anthropic::Client.new(api_key: get_anthropic_key) + suppress_logs { Braintrust::Trace::Anthropic.wrap(client_wrap, tracer_provider: rig_wrap.tracer_provider) } + + client_wrap.messages.create( + model: "claude-3-5-sonnet-20241022", + max_tokens: 10, + messages: [{role: "user", content: "Say 'test'"}] + ) + span_wrap = rig_wrap.drain_one + + # Test with new instrument!() API + rig_instrument = setup_otel_test_rig + client_instrument = Anthropic::Client.new(api_key: get_anthropic_key) + Braintrust.instrument!(:anthropic, target: client_instrument, tracer_provider: rig_instrument.tracer_provider) + + client_instrument.messages.create( + model: "claude-3-5-sonnet-20241022", + max_tokens: 10, + messages: [{role: "user", content: "Say 'test'"}] + ) + span_instrument = rig_instrument.drain_one + + # Both spans should have identical structure + assert_equal span_wrap.name, span_instrument.name, + "span names should match" + assert_equal span_wrap.attributes["braintrust.input_json"], + span_instrument.attributes["braintrust.input_json"], + "input attributes should match" + assert_equal span_wrap.attributes["braintrust.output_json"], + span_instrument.attributes["braintrust.output_json"], + "output attributes should match" + + # Compare token metrics (timing metrics like time_to_first_token may vary between calls) + metrics_wrap = JSON.parse(span_wrap.attributes["braintrust.metrics"]) + metrics_instrument = JSON.parse(span_instrument.attributes["braintrust.metrics"]) + %w[prompt_tokens completion_tokens tokens].each do |key| + assert_equal metrics_wrap[key], metrics_instrument[key], + "metrics #{key} should match" + end + + # Both should have time_to_first_token (but values may differ) + assert metrics_wrap.key?("time_to_first_token"), "wrap metrics should have time_to_first_token" + assert metrics_instrument.key?("time_to_first_token"), "instrument metrics should have time_to_first_token" + + # Metadata should have same keys (values may differ slightly for timestamps) + metadata_wrap = JSON.parse(span_wrap.attributes["braintrust.metadata"]) + metadata_instrument = JSON.parse(span_instrument.attributes["braintrust.metadata"]) + assert_equal metadata_wrap.keys.sort, metadata_instrument.keys.sort, + "metadata keys should match" + end + end +end diff --git a/test/braintrust/contrib/anthropic/integration_helper.rb b/test/braintrust/contrib/anthropic/integration_helper.rb new file mode 100644 index 0000000..a7ccb7a --- /dev/null +++ b/test/braintrust/contrib/anthropic/integration_helper.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +# Test helpers for Anthropic integration tests. +# Provides gem loading helpers for the anthropic gem. + +module Braintrust + module Contrib + module Anthropic + module IntegrationHelper + # Skip test unless anthropic gem is available. + # Loads the gem if available. + def skip_unless_anthropic! + unless Gem.loaded_specs["anthropic"] + skip "anthropic gem not available" + end + + require "anthropic" unless defined?(::Anthropic) + end + + # Load anthropic gem if available (doesn't skip, for tests that handle both states). + def load_anthropic_if_available + if Gem.loaded_specs["anthropic"] + require "anthropic" unless defined?(::Anthropic) + end + end + end + end + end +end diff --git a/test/braintrust/contrib/anthropic/integration_test.rb b/test/braintrust/contrib/anthropic/integration_test.rb new file mode 100644 index 0000000..0a77fe2 --- /dev/null +++ b/test/braintrust/contrib/anthropic/integration_test.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "integration_helper" +require "braintrust/contrib/anthropic/integration" + +class Braintrust::Contrib::Anthropic::IntegrationTest < Minitest::Test + include Braintrust::Contrib::Anthropic::IntegrationHelper + + def setup + load_anthropic_if_available + @integration = Braintrust::Contrib::Anthropic::Integration + end + + # --- .integration_name --- + + def test_integration_name + assert_equal :anthropic, @integration.integration_name + end + + # --- .gem_names --- + + def test_gem_names + assert_equal ["anthropic"], @integration.gem_names + end + + # --- .require_paths --- + + def test_require_paths + assert_equal ["anthropic"], @integration.require_paths + end + + # --- .minimum_version --- + + def test_minimum_version + assert_equal "0.3.0", @integration.minimum_version + end + + # --- .loaded? --- + + def test_loaded_returns_true_when_anthropic_client_defined + skip "Anthropic gem not available" unless defined?(::Anthropic::Client) + + assert @integration.loaded?, "Should be loaded when ::Anthropic::Client is defined" + end + + # --- .patchers --- + + def test_patchers_returns_array_of_patcher_classes + patcher_classes = @integration.patchers + + assert_instance_of Array, patcher_classes + assert patcher_classes.length > 0, "patchers should return at least one patcher" + patcher_classes.each do |patcher_class| + assert patcher_class.is_a?(Class), "each patcher should be a Class" + assert patcher_class < Braintrust::Contrib::Patcher, "each patcher should inherit from Patcher" + end + end +end diff --git a/test/braintrust/contrib/anthropic/patcher_test.rb b/test/braintrust/contrib/anthropic/patcher_test.rb new file mode 100644 index 0000000..ba5888d --- /dev/null +++ b/test/braintrust/contrib/anthropic/patcher_test.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "integration_helper" + +# Explicitly load the patcher (lazy-loaded by integration) +require "braintrust/contrib/anthropic/patcher" + +class Braintrust::Contrib::Anthropic::MessagesPatcherTest < Minitest::Test + include Braintrust::Contrib::Anthropic::IntegrationHelper + + def setup + skip_unless_anthropic! + end + + # --- .applicable? --- + + def test_applicable_returns_true_when_anthropic_client_defined + assert Braintrust::Contrib::Anthropic::MessagesPatcher.applicable? + end + + # --- .patched? --- + + def test_patched_returns_false_when_not_patched + fake_messages_class = Class.new + + ::Anthropic::Resources.stub_const(:Messages, fake_messages_class) do + refute Braintrust::Contrib::Anthropic::MessagesPatcher.patched? + end + end + + def test_patched_returns_true_when_module_included + fake_messages_class = Class.new do + include Braintrust::Contrib::Anthropic::Instrumentation::Messages + end + + ::Anthropic::Resources.stub_const(:Messages, fake_messages_class) do + assert Braintrust::Contrib::Anthropic::MessagesPatcher.patched? + end + end + + def test_patched_returns_false_for_unpatched_instance + fake_singleton = Class.new + + mock_chain(:messages, :singleton_class, returns: fake_singleton) do |client| + refute Braintrust::Contrib::Anthropic::MessagesPatcher.patched?(target: client) + end + end + + def test_patched_returns_true_for_patched_instance + fake_singleton = Class.new do + include Braintrust::Contrib::Anthropic::Instrumentation::Messages + end + + mock_chain(:messages, :singleton_class, returns: fake_singleton) do |client| + assert Braintrust::Contrib::Anthropic::MessagesPatcher.patched?(target: client) + end + end + + # --- .perform_patch --- + + def test_perform_patch_includes_module_for_class_level + fake_messages_class = Minitest::Mock.new + fake_messages_class.expect(:include, true, [Braintrust::Contrib::Anthropic::Instrumentation::Messages]) + + ::Anthropic::Resources.stub_const(:Messages, fake_messages_class) do + Braintrust::Contrib::Anthropic::MessagesPatcher.perform_patch + fake_messages_class.verify + end + end + + def test_perform_patch_includes_module_for_instance_level + terminal = Minitest::Mock.new + terminal.expect(:include, true, [Braintrust::Contrib::Anthropic::Instrumentation::Messages]) + + mock_chain(:messages, :singleton_class, returns: terminal) do |client| + client.expect(:is_a?, true, [::Anthropic::Client]) + Braintrust::Contrib::Anthropic::MessagesPatcher.perform_patch(target: client) + end + terminal.verify + end + + def test_perform_patch_raises_for_invalid_target + fake_client = Minitest::Mock.new + fake_client.expect(:is_a?, false, [::Anthropic::Client]) + + assert_raises(ArgumentError) do + Braintrust::Contrib::Anthropic::MessagesPatcher.perform_patch(target: fake_client) + end + + fake_client.verify + end +end diff --git a/test/braintrust/trace/tokens_test.rb b/test/braintrust/trace/tokens_test.rb deleted file mode 100644 index c990c58..0000000 --- a/test/braintrust/trace/tokens_test.rb +++ /dev/null @@ -1,78 +0,0 @@ -# frozen_string_literal: true - -require "test_helper" -require "braintrust/trace/tokens" - -class TokensTest < Minitest::Test - def test_anthropic_maps_input_output_tokens - usage = {"input_tokens" => 10, "output_tokens" => 20} - - metrics = Braintrust::Trace.parse_anthropic_usage_tokens(usage) - - assert_equal 10, metrics["prompt_tokens"] - assert_equal 20, metrics["completion_tokens"] - assert_equal 30, metrics["tokens"] - end - - def test_anthropic_maps_cache_tokens - usage = { - "input_tokens" => 10, - "output_tokens" => 20, - "cache_read_input_tokens" => 50, - "cache_creation_input_tokens" => 5 - } - - metrics = Braintrust::Trace.parse_anthropic_usage_tokens(usage) - - assert_equal 50, metrics["prompt_cached_tokens"] - assert_equal 5, metrics["prompt_cache_creation_tokens"] - end - - def test_anthropic_accumulates_cache_into_prompt_tokens - usage = { - "input_tokens" => 10, - "output_tokens" => 20, - "cache_read_input_tokens" => 50, - "cache_creation_input_tokens" => 5 - } - - metrics = Braintrust::Trace.parse_anthropic_usage_tokens(usage) - - # prompt_tokens = 10 + 50 + 5 = 65 - assert_equal 65, metrics["prompt_tokens"] - end - - def test_anthropic_calculates_total_with_cache - usage = { - "input_tokens" => 10, - "output_tokens" => 20, - "cache_read_input_tokens" => 50, - "cache_creation_input_tokens" => 5 - } - - metrics = Braintrust::Trace.parse_anthropic_usage_tokens(usage) - - # tokens = (10 + 50 + 5) + 20 = 85 - assert_equal 85, metrics["tokens"] - end - - def test_anthropic_returns_empty_hash_for_nil - assert_equal({}, Braintrust::Trace.parse_anthropic_usage_tokens(nil)) - end - - def test_anthropic_handles_ruby_llm_simplified_cache_fields - usage = { - "input_tokens" => 10, - "output_tokens" => 20, - "cached_tokens" => 50, - "cache_creation_tokens" => 5 - } - - metrics = Braintrust::Trace.parse_anthropic_usage_tokens(usage) - - assert_equal 50, metrics["prompt_cached_tokens"] - assert_equal 5, metrics["prompt_cache_creation_tokens"] - assert_equal 65, metrics["prompt_tokens"] - assert_equal 85, metrics["tokens"] - end -end From 475b390c7049d13c3be48d08273c96b2b661acfc Mon Sep 17 00:00:00 2001 From: David Elner Date: Fri, 19 Dec 2025 14:52:14 -0500 Subject: [PATCH 12/27] Added: Internal::Env for managing environment variables --- lib/braintrust/internal/env.rb | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 lib/braintrust/internal/env.rb diff --git a/lib/braintrust/internal/env.rb b/lib/braintrust/internal/env.rb new file mode 100644 index 0000000..ba17b85 --- /dev/null +++ b/lib/braintrust/internal/env.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module Braintrust + module Internal + # Environment variable utilities. + module Env + # Parse a comma-separated environment variable into an array of symbols. + # @param key [String] The environment variable name + # @return [Array, nil] Array of symbols, or nil if not set + def self.parse_list(key) + value = ENV[key] + return nil unless value + value.split(",").map(&:strip).map(&:to_sym) + end + end + end +end From 6393b7e0fbc5c85f36815500417550b5fd2f6378 Mon Sep 17 00:00:00 2001 From: David Elner Date: Sun, 28 Dec 2025 20:22:59 -0500 Subject: [PATCH 13/27] Added: ClimateControl for testing with env vars --- Gemfile | 1 + Gemfile.lock | 2 ++ gemfiles/anthropic.gemfile | 1 + gemfiles/anthropic_1_11.gemfile | 1 + gemfiles/anthropic_1_12.gemfile | 1 + gemfiles/anthropic_uninstalled.gemfile | 1 + gemfiles/openai.gemfile | 1 + gemfiles/openai_0_33.gemfile | 1 + gemfiles/openai_0_34.gemfile | 1 + gemfiles/openai_ruby_openai.gemfile | 1 + gemfiles/openai_uninstalled.gemfile | 1 + gemfiles/opentelemetry_latest.gemfile | 1 + gemfiles/opentelemetry_min.gemfile | 1 + gemfiles/ruby_llm.gemfile | 1 + gemfiles/ruby_llm_1_8.gemfile | 1 + gemfiles/ruby_llm_1_9.gemfile | 1 + gemfiles/ruby_llm_uninstalled.gemfile | 1 + gemfiles/ruby_openai.gemfile | 1 + gemfiles/ruby_openai_7_0.gemfile | 1 + gemfiles/ruby_openai_8_0.gemfile | 1 + gemfiles/ruby_openai_uninstalled.gemfile | 1 + test/test_helper.rb | 1 + 22 files changed, 23 insertions(+) diff --git a/Gemfile b/Gemfile index eeccddf..56cd531 100644 --- a/Gemfile +++ b/Gemfile @@ -8,3 +8,4 @@ gemspec # Additional development tools not in gemspec (optional/convenience) gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" diff --git a/Gemfile.lock b/Gemfile.lock index 169bf43..d2ff082 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -19,6 +19,7 @@ GEM ast (2.4.3) bigdecimal (4.0.1) builder (3.3.0) + climate_control (1.2.0) crack (1.0.1) bigdecimal rexml @@ -126,6 +127,7 @@ PLATFORMS DEPENDENCIES appraisal (~> 2.5) braintrust! + climate_control (~> 1.2) kramdown (~> 2.0) minitest (~> 5.0) minitest-reporters (~> 1.6) diff --git a/gemfiles/anthropic.gemfile b/gemfiles/anthropic.gemfile index 56e98f0..d558f46 100644 --- a/gemfiles/anthropic.gemfile +++ b/gemfiles/anthropic.gemfile @@ -4,6 +4,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" gem "anthropic", ">= 1.11" gemspec path: "../" diff --git a/gemfiles/anthropic_1_11.gemfile b/gemfiles/anthropic_1_11.gemfile index ed708b3..f4e3486 100644 --- a/gemfiles/anthropic_1_11.gemfile +++ b/gemfiles/anthropic_1_11.gemfile @@ -4,6 +4,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" gem "anthropic", "~> 1.11.0" gemspec path: "../" diff --git a/gemfiles/anthropic_1_12.gemfile b/gemfiles/anthropic_1_12.gemfile index 29b1b93..5d59bc9 100644 --- a/gemfiles/anthropic_1_12.gemfile +++ b/gemfiles/anthropic_1_12.gemfile @@ -4,6 +4,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" gem "anthropic", "~> 1.12.0" gemspec path: "../" diff --git a/gemfiles/anthropic_uninstalled.gemfile b/gemfiles/anthropic_uninstalled.gemfile index 7ec6aba..6dfe324 100644 --- a/gemfiles/anthropic_uninstalled.gemfile +++ b/gemfiles/anthropic_uninstalled.gemfile @@ -4,5 +4,6 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" gemspec path: "../" diff --git a/gemfiles/openai.gemfile b/gemfiles/openai.gemfile index dcc2e81..694be81 100644 --- a/gemfiles/openai.gemfile +++ b/gemfiles/openai.gemfile @@ -4,6 +4,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" gem "openai", ">= 0.34" gem "base64" diff --git a/gemfiles/openai_0_33.gemfile b/gemfiles/openai_0_33.gemfile index 6104332..2d1b22b 100644 --- a/gemfiles/openai_0_33.gemfile +++ b/gemfiles/openai_0_33.gemfile @@ -4,6 +4,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" gem "openai", "~> 0.33.0" gem "base64" diff --git a/gemfiles/openai_0_34.gemfile b/gemfiles/openai_0_34.gemfile index d8969d3..774901c 100644 --- a/gemfiles/openai_0_34.gemfile +++ b/gemfiles/openai_0_34.gemfile @@ -4,6 +4,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" gem "openai", "~> 0.34.0" gem "base64" diff --git a/gemfiles/openai_ruby_openai.gemfile b/gemfiles/openai_ruby_openai.gemfile index a8db595..24b7d0b 100644 --- a/gemfiles/openai_ruby_openai.gemfile +++ b/gemfiles/openai_ruby_openai.gemfile @@ -4,6 +4,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" gem "openai", ">= 0.34" gem "ruby-openai", ">= 8.0" diff --git a/gemfiles/openai_uninstalled.gemfile b/gemfiles/openai_uninstalled.gemfile index 7ec6aba..6dfe324 100644 --- a/gemfiles/openai_uninstalled.gemfile +++ b/gemfiles/openai_uninstalled.gemfile @@ -4,5 +4,6 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" gemspec path: "../" diff --git a/gemfiles/opentelemetry_latest.gemfile b/gemfiles/opentelemetry_latest.gemfile index d309a6b..717db79 100644 --- a/gemfiles/opentelemetry_latest.gemfile +++ b/gemfiles/opentelemetry_latest.gemfile @@ -4,6 +4,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" gem "opentelemetry-sdk", ">= 1.10" gem "opentelemetry-exporter-otlp", ">= 0.31" diff --git a/gemfiles/opentelemetry_min.gemfile b/gemfiles/opentelemetry_min.gemfile index 8edfd5d..4eeaf7a 100644 --- a/gemfiles/opentelemetry_min.gemfile +++ b/gemfiles/opentelemetry_min.gemfile @@ -4,6 +4,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" gem "opentelemetry-sdk", "~> 1.3.0" gem "opentelemetry-exporter-otlp", "~> 0.28.0" diff --git a/gemfiles/ruby_llm.gemfile b/gemfiles/ruby_llm.gemfile index 6f2c658..84c240e 100644 --- a/gemfiles/ruby_llm.gemfile +++ b/gemfiles/ruby_llm.gemfile @@ -4,6 +4,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" gem "ruby_llm", ">= 1.9" gemspec path: "../" diff --git a/gemfiles/ruby_llm_1_8.gemfile b/gemfiles/ruby_llm_1_8.gemfile index 5a5a016..ce2ffb9 100644 --- a/gemfiles/ruby_llm_1_8.gemfile +++ b/gemfiles/ruby_llm_1_8.gemfile @@ -4,6 +4,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" gem "ruby_llm", "~> 1.8.0" gemspec path: "../" diff --git a/gemfiles/ruby_llm_1_9.gemfile b/gemfiles/ruby_llm_1_9.gemfile index deee1d0..3334794 100644 --- a/gemfiles/ruby_llm_1_9.gemfile +++ b/gemfiles/ruby_llm_1_9.gemfile @@ -4,6 +4,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" gem "ruby_llm", "~> 1.9.0" gemspec path: "../" diff --git a/gemfiles/ruby_llm_uninstalled.gemfile b/gemfiles/ruby_llm_uninstalled.gemfile index 7ec6aba..6dfe324 100644 --- a/gemfiles/ruby_llm_uninstalled.gemfile +++ b/gemfiles/ruby_llm_uninstalled.gemfile @@ -4,5 +4,6 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" gemspec path: "../" diff --git a/gemfiles/ruby_openai.gemfile b/gemfiles/ruby_openai.gemfile index 29caf96..1f7f2f7 100644 --- a/gemfiles/ruby_openai.gemfile +++ b/gemfiles/ruby_openai.gemfile @@ -4,6 +4,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" gem "ruby-openai", ">= 8.0" gemspec path: "../" diff --git a/gemfiles/ruby_openai_7_0.gemfile b/gemfiles/ruby_openai_7_0.gemfile index 4c9e319..d509a12 100644 --- a/gemfiles/ruby_openai_7_0.gemfile +++ b/gemfiles/ruby_openai_7_0.gemfile @@ -4,6 +4,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" gem "ruby-openai", "~> 7.0" gemspec path: "../" diff --git a/gemfiles/ruby_openai_8_0.gemfile b/gemfiles/ruby_openai_8_0.gemfile index 3044fea..da5eb28 100644 --- a/gemfiles/ruby_openai_8_0.gemfile +++ b/gemfiles/ruby_openai_8_0.gemfile @@ -4,6 +4,7 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" gem "ruby-openai", "~> 8.0" gemspec path: "../" diff --git a/gemfiles/ruby_openai_uninstalled.gemfile b/gemfiles/ruby_openai_uninstalled.gemfile index 7ec6aba..6dfe324 100644 --- a/gemfiles/ruby_openai_uninstalled.gemfile +++ b/gemfiles/ruby_openai_uninstalled.gemfile @@ -4,5 +4,6 @@ source "https://rubygems.org" gem "minitest-reporters", "~> 1.6" gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" gemspec path: "../" diff --git a/test/test_helper.rb b/test/test_helper.rb index 7ef9f68..e03950d 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -18,6 +18,7 @@ require "minitest/autorun" require "minitest/stub_const" +require "climate_control" # Show test timings when MT_VERBOSE is set if ENV["MT_VERBOSE"] From 5421567f6a2cee9b3d639c8123695af1c9cc082f Mon Sep 17 00:00:00 2001 From: David Elner Date: Fri, 19 Dec 2025 14:50:26 -0500 Subject: [PATCH 14/27] Added: Contrib#auto_instrument! --- lib/braintrust/contrib.rb | 88 ++++++++++++++- lib/braintrust/internal/env.rb | 16 +++ test/braintrust/contrib_test.rb | 191 ++++++++++++++++++++++++++++++++ 3 files changed, 292 insertions(+), 3 deletions(-) diff --git a/lib/braintrust/contrib.rb b/lib/braintrust/contrib.rb index 713d536..c81b36d 100644 --- a/lib/braintrust/contrib.rb +++ b/lib/braintrust/contrib.rb @@ -6,6 +6,62 @@ require_relative "contrib/context" module Braintrust + # Auto-instrument available integrations. + # + # Discovers which integrations have their target libraries loaded + # and instruments them automatically. This is the primary public API + # for enabling auto-instrumentation. + # + # @param config [nil, Boolean, Hash] Auto-instrumentation configuration: + # - nil: use BRAINTRUST_AUTO_INSTRUMENT env var (defaults to enabled) + # - true: explicitly enable auto-instrumentation + # - false: explicitly disable auto-instrumentation + # - Hash with :only or :except keys for filtering integrations + # + # Environment variables: + # - BRAINTRUST_AUTO_INSTRUMENT: set to "false" to disable (default: enabled) + # - BRAINTRUST_INSTRUMENT_ONLY: comma-separated list of integrations to include + # - BRAINTRUST_INSTRUMENT_EXCEPT: comma-separated list of integrations to exclude + # + # @return [Array, nil] names of instrumented integrations, or nil if disabled + # + # @example Enable with defaults + # Braintrust.auto_instrument! + # + # @example Explicitly enable + # Braintrust.auto_instrument!(true) + # + # @example Disable + # Braintrust.auto_instrument!(false) + # + # @example Only specific integrations + # Braintrust.auto_instrument!(only: [:openai, :anthropic]) + # + # @example Exclude specific integrations + # Braintrust.auto_instrument!(except: [:ruby_llm]) + def self.auto_instrument!(config = nil) + should_instrument = case config + when nil + Internal::Env.auto_instrument + when false + false + when true, Hash + true + end + + return unless should_instrument + + only = Internal::Env.instrument_only + except = Internal::Env.instrument_except + + if config.is_a?(Hash) + only = config[:only] || only + except = config[:except] || except + end + + Contrib.auto_instrument!(only: only, except: except) + end + # Instrument a registered integration by name. # This is the main entry point for activating integrations. # @@ -22,7 +78,7 @@ module Braintrust # client = OpenAI::Client.new # Braintrust.instrument!(:openai, target: client, tracer_provider: my_provider) def self.instrument!(name, **options) - Braintrust::Contrib.instrument!(name, **options) + Contrib.instrument!(name, **options) end # Contrib framework for auto-instrumentation integrations. @@ -43,14 +99,39 @@ def init(tracer_provider: nil) @default_tracer_provider = tracer_provider end + # Auto-instrument available integrations. + # Discovers which integrations have their target libraries loaded + # and instruments them automatically. + # + # @param only [Array] whitelist - only instrument these + # @param except [Array] blacklist - skip these + # @return [Array] names of integrations that were instrumented + # + # @example Instrument all available + # Braintrust::Contrib.auto_instrument! + # + # @example Only specific integrations + # Braintrust::Contrib.auto_instrument!(only: [:openai, :anthropic]) + # + # @example Exclude specific integrations + # Braintrust::Contrib.auto_instrument!(except: [:ruby_llm]) + def auto_instrument!(only: nil, except: nil) + targets = registry.available + targets = targets.select { |i| only.include?(i.integration_name) } if only + targets = targets.reject { |i| except.include?(i.integration_name) } if except + + targets.each_with_object([]) do |integration, instrumented| + instrumented << integration.integration_name if instrument!(integration.integration_name) + end + end + # Instrument a registered integration by name. - # This is the main entry point for activating integrations. # # @param name [Symbol] The integration name (e.g., :openai, :anthropic) # @param options [Hash] Optional configuration # @option options [Object] :target Optional target instance to instrument specifically # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider - # @return [void] + # @return [Boolean] true if instrumentation succeeded # # @example Instrument all OpenAI clients # Braintrust::Contrib.instrument!(:openai) @@ -63,6 +144,7 @@ def instrument!(name, **options) integration.instrument!(**options) else Braintrust::Log.error("No integration for '#{name}' is defined!") + false end end diff --git a/lib/braintrust/internal/env.rb b/lib/braintrust/internal/env.rb index ba17b85..cf0fa57 100644 --- a/lib/braintrust/internal/env.rb +++ b/lib/braintrust/internal/env.rb @@ -4,6 +4,22 @@ module Braintrust module Internal # Environment variable utilities. module Env + ENV_AUTO_INSTRUMENT = "BRAINTRUST_AUTO_INSTRUMENT" + ENV_INSTRUMENT_EXCEPT = "BRAINTRUST_INSTRUMENT_EXCEPT" + ENV_INSTRUMENT_ONLY = "BRAINTRUST_INSTRUMENT_ONLY" + + def self.auto_instrument + ENV[ENV_AUTO_INSTRUMENT] != "false" + end + + def self.instrument_except + parse_list(ENV_INSTRUMENT_EXCEPT) + end + + def self.instrument_only + parse_list(ENV_INSTRUMENT_ONLY) + end + # Parse a comma-separated environment variable into an array of symbols. # @param key [String] The environment variable name # @return [Array, nil] Array of symbols, or nil if not set diff --git a/test/braintrust/contrib_test.rb b/test/braintrust/contrib_test.rb index 525927b..4e19da1 100644 --- a/test/braintrust/contrib_test.rb +++ b/test/braintrust/contrib_test.rb @@ -3,6 +3,189 @@ require "test_helper" class Braintrust::ContribTest < Minitest::Test + # --- Braintrust.auto_instrument! --- + + def test_auto_instrument_enabled_by_default + called_with = nil + + Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do + Braintrust.auto_instrument! + end + + assert_equal({only: nil, except: nil}, called_with) + end + + def test_auto_instrument_disabled_when_config_false + called = false + + Braintrust::Contrib.stub(:auto_instrument!, -> { called = true }) do + Braintrust.auto_instrument!(false) + end + + refute called + end + + def test_auto_instrument_enabled_when_config_true + called_with = nil + + Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do + Braintrust.auto_instrument!(true) + end + + assert_equal({only: nil, except: nil}, called_with) + end + + def test_auto_instrument_disabled_via_env_var + called = false + + ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: "false") do + Braintrust::Contrib.stub(:auto_instrument!, -> { called = true }) do + Braintrust.auto_instrument! + end + end + + refute called + end + + def test_auto_instrument_passes_only_from_hash + called_with = nil + + Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do + Braintrust.auto_instrument!(only: [:openai, :anthropic]) + end + + assert_equal({only: [:openai, :anthropic], except: nil}, called_with) + end + + def test_auto_instrument_passes_except_from_hash + called_with = nil + + Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do + Braintrust.auto_instrument!(except: [:ruby_llm]) + end + + assert_equal({only: nil, except: [:ruby_llm]}, called_with) + end + + def test_auto_instrument_reads_only_from_env + called_with = nil + + ClimateControl.modify(BRAINTRUST_INSTRUMENT_ONLY: "openai,anthropic") do + Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do + Braintrust.auto_instrument! + end + end + + assert_equal({only: [:openai, :anthropic], except: nil}, called_with) + end + + def test_auto_instrument_reads_except_from_env + called_with = nil + + ClimateControl.modify(BRAINTRUST_INSTRUMENT_EXCEPT: "ruby_llm") do + Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do + Braintrust.auto_instrument! + end + end + + assert_equal({only: nil, except: [:ruby_llm]}, called_with) + end + + def test_auto_instrument_hash_overrides_env + called_with = nil + + ClimateControl.modify(BRAINTRUST_INSTRUMENT_ONLY: "from_env", BRAINTRUST_INSTRUMENT_EXCEPT: "from_env") do + Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do + Braintrust.auto_instrument!(only: [:from_hash], except: [:also_from_hash]) + end + end + + assert_equal({only: [:from_hash], except: [:also_from_hash]}, called_with) + end + + # --- Contrib.auto_instrument! --- + + def test_contrib_auto_instrument_instruments_available_integrations + integration1 = stub_integration(:openai) + integration2 = stub_integration(:anthropic) + + mock_registry = Minitest::Mock.new + mock_registry.expect(:available, [integration1, integration2]) + + instrumented_names = [] + + Braintrust::Contrib::Registry.stub(:instance, mock_registry) do + Braintrust::Contrib.stub(:instrument!, ->(name, **_opts) { + instrumented_names << name + true + }) do + result = Braintrust::Contrib.auto_instrument! + assert_equal [:openai, :anthropic], result + end + end + + assert_equal [:openai, :anthropic], instrumented_names + end + + def test_contrib_auto_instrument_filters_with_only + integration1 = stub_integration(:openai) + integration2 = stub_integration(:anthropic) + + mock_registry = Minitest::Mock.new + mock_registry.expect(:available, [integration1, integration2]) + + instrumented_names = [] + + Braintrust::Contrib::Registry.stub(:instance, mock_registry) do + Braintrust::Contrib.stub(:instrument!, ->(name, **_opts) { + instrumented_names << name + true + }) do + result = Braintrust::Contrib.auto_instrument!(only: [:openai]) + assert_equal [:openai], result + end + end + + assert_equal [:openai], instrumented_names + end + + def test_contrib_auto_instrument_filters_with_except + integration1 = stub_integration(:openai) + integration2 = stub_integration(:anthropic) + + mock_registry = Minitest::Mock.new + mock_registry.expect(:available, [integration1, integration2]) + + instrumented_names = [] + + Braintrust::Contrib::Registry.stub(:instance, mock_registry) do + Braintrust::Contrib.stub(:instrument!, ->(name, **_opts) { + instrumented_names << name + true + }) do + result = Braintrust::Contrib.auto_instrument!(except: [:anthropic]) + assert_equal [:openai], result + end + end + + assert_equal [:openai], instrumented_names + end + + def test_contrib_auto_instrument_excludes_failed_instrumentations + integration1 = stub_integration(:openai) + integration2 = stub_integration(:anthropic) + + mock_registry = Minitest::Mock.new + mock_registry.expect(:available, [integration1, integration2]) + + Braintrust::Contrib::Registry.stub(:instance, mock_registry) do + Braintrust::Contrib.stub(:instrument!, ->(name, **_opts) { name == :openai }) do + result = Braintrust::Contrib.auto_instrument! + assert_equal [:openai], result + end + end + end + # --- Braintrust.instrument! delegation --- def test_braintrust_instrument_delegates_to_contrib @@ -197,4 +380,12 @@ def test_tracer_for_uses_target_context_provider context_provider.verify mock_context.verify end + + private + + def stub_integration(name) + Object.new.tap do |obj| + obj.define_singleton_method(:integration_name) { name } + end + end end From 28a449851ed5c36294afa5e97c38172cf54358b2 Mon Sep 17 00:00:00 2001 From: David Elner Date: Fri, 19 Dec 2025 14:53:03 -0500 Subject: [PATCH 15/27] Added: auto_instrument to Braintrust.init --- Appraisals | 10 +++ examples/setup/init.rb | 126 +++++++++++++++++++++++++++++++++ examples/setup/init_minimal.rb | 48 +++++++++++++ gemfiles/contrib.gemfile | 13 ++++ lib/braintrust.rb | 10 ++- test/braintrust_test.rb | 20 ++++++ 6 files changed, 226 insertions(+), 1 deletion(-) create mode 100644 examples/setup/init.rb create mode 100644 examples/setup/init_minimal.rb create mode 100644 gemfiles/contrib.gemfile diff --git a/Appraisals b/Appraisals index 9f1df2d..5d062e5 100644 --- a/Appraisals +++ b/Appraisals @@ -69,3 +69,13 @@ appraise "openai-ruby-openai" do gem "openai", ">= 0.34" gem "ruby-openai", ">= 8.0" end + +# LLM libraries that can coexist - for demos that use multiple LLM SDKs +# Note: ruby-openai is excluded because it conflicts with the official openai gem +# (they share the same namespace). Use openai-ruby-openai appraisal to test both. +appraise "contrib" do + gem "openai", ">= 0.34" + gem "anthropic", ">= 1.11" + gem "ruby_llm", ">= 1.9" + gem "base64" # needed for openai gem on Ruby 3.4+ +end diff --git a/examples/setup/init.rb b/examples/setup/init.rb new file mode 100644 index 0000000..2c64a29 --- /dev/null +++ b/examples/setup/init.rb @@ -0,0 +1,126 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "openai" +require "anthropic" +require "ruby_llm" +require "opentelemetry/sdk" + +# Example: Auto-instrumentation via Braintrust.init +# +# This example demonstrates the auto_instrument feature introduced in Braintrust.init. +# Instead of manually calling Braintrust.instrument! for each LLM library, a single +# Braintrust.init call automatically detects and instruments all supported libraries. +# +# Supported libraries (when installed): +# - openai (OpenAI's official Ruby gem) +# - ruby-openai (alexrudall's ruby-openai gem) * +# - anthropic (Anthropic's official Ruby gem) +# - ruby_llm (RubyLLM unified interface) +# +# * Note: openai and ruby-openai share the same require path ("openai"), so only one +# can be loaded at a time. This demo uses the official openai gem. +# +# Usage: +# OPENAI_API_KEY=your-key ANTHROPIC_API_KEY=your-key \ +# bundle exec appraisal contrib ruby examples/setup/init.rb + +# Check for required API keys +missing_keys = [] +missing_keys << "OPENAI_API_KEY" unless ENV["OPENAI_API_KEY"] +missing_keys << "ANTHROPIC_API_KEY" unless ENV["ANTHROPIC_API_KEY"] + +unless missing_keys.empty? + puts "Error: Missing required environment variables: #{missing_keys.join(", ")}" + puts "" + puts "Get your API keys from:" + puts " OpenAI: https://platform.openai.com/api-keys" + puts " Anthropic: https://console.anthropic.com/" + exit 1 +end + +puts "Auto-Instrumentation Demo" +puts "=" * 50 +puts "" +puts "Calling Braintrust.init..." +puts "This automatically detects and instruments all supported LLM libraries." +puts "" + +# Configure RubyLLM before init (it needs API keys configured) +RubyLLM.configure do |config| + config.openai_api_key = ENV["OPENAI_API_KEY"] +end + +# Single init call - auto_instrument is enabled by default! +# This detects openai, anthropic, and ruby_llm are loaded and instruments them. +Braintrust.init + +# Brief pause to allow async Braintrust login to complete +# (Not necessary in production, just for this short lived example) +sleep 0.5 + +puts "Libraries will be instrumented as API calls are made." +puts "" + +# Create clients for each library +openai_client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) +anthropic_client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"]) + +# Create a tracer and root span to capture all operations +tracer = OpenTelemetry.tracer_provider.tracer("auto-instrument-demo") +root_span = nil + +puts "Making API calls with each library..." +puts "-" * 50 + +tracer.in_span("examples/setup/init.rb") do |span| + root_span = span + + # 1. OpenAI (official gem) + puts "" + puts "[1/3] OpenAI (official gem)..." + openai_response = openai_client.chat.completions.create( + model: "gpt-4o-mini", + max_tokens: 50, + messages: [ + {role: "user", content: "Say 'Hello from OpenAI!' in exactly 5 words."} + ] + ) + puts " Response: #{openai_response.choices[0].message.content}" + + # 2. Anthropic + puts "" + puts "[2/3] Anthropic..." + anthropic_response = anthropic_client.messages.create( + model: "claude-3-haiku-20240307", + max_tokens: 50, + messages: [ + {role: "user", content: "Say 'Hello from Anthropic!' in exactly 5 words."} + ] + ) + puts " Response: #{anthropic_response.content[0].text}" + + # 3. RubyLLM (using OpenAI provider) + puts "" + puts "[3/3] RubyLLM (via OpenAI)..." + chat = RubyLLM.chat(model: "gpt-4o-mini") + ruby_llm_response = chat.ask("Say 'Hello from RubyLLM!' in exactly 5 words.") + puts " Response: #{ruby_llm_response.content}" +end + +puts "" +puts "-" * 50 +puts "" +puts "All API calls complete!" +puts "" +puts "View trace in Braintrust:" +puts " #{Braintrust::Trace.permalink(root_span)}" +puts "" + +# Shutdown to flush spans +# (Not necessary in production, just for this short lived example) +OpenTelemetry.tracer_provider.shutdown + +puts "Trace sent to Braintrust!" diff --git a/examples/setup/init_minimal.rb b/examples/setup/init_minimal.rb new file mode 100644 index 0000000..4210dc1 --- /dev/null +++ b/examples/setup/init_minimal.rb @@ -0,0 +1,48 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +# Example: Minimal auto-instrumentation via Braintrust.init +# +# Single Braintrust.init call auto-detects and instruments all supported LLM libraries. +# +# Usage: +# OPENAI_API_KEY=your-key ANTHROPIC_API_KEY=your-key \ +# bundle exec appraisal contrib ruby examples/setup/init_minimal.rb + +require "bundler/setup" +require "braintrust" +require "openai" +require "anthropic" +require "ruby_llm" +require "opentelemetry/sdk" + +RubyLLM.configure { |c| c.openai_api_key = ENV["OPENAI_API_KEY"] } + +Braintrust.init + +# Brief pause to allow async Braintrust login to complete +# (Not necessary in production, just for this short lived example) +sleep 0.5 + +openai_client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) +anthropic_client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"]) +tracer = OpenTelemetry.tracer_provider.tracer("auto-instrument-demo") + +tracer.in_span("examples/setup/init_minimal.rb") do + openai_client.chat.completions.create( + model: "gpt-4o-mini", + messages: [{role: "user", content: "Hello from OpenAI"}] + ) + + anthropic_client.messages.create( + model: "claude-3-haiku-20240307", + max_tokens: 50, + messages: [{role: "user", content: "Hello from Anthropic"}] + ) + + RubyLLM.chat(model: "gpt-4o-mini").ask("Hello from RubyLLM") +end + +# Shutdown to flush spans +# (Not necessary in production, just for this short lived example) +OpenTelemetry.tracer_provider.shutdown diff --git a/gemfiles/contrib.gemfile b/gemfiles/contrib.gemfile new file mode 100644 index 0000000..3d96dac --- /dev/null +++ b/gemfiles/contrib.gemfile @@ -0,0 +1,13 @@ +# This file was generated by Appraisal + +source "https://rubygems.org" + +gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" +gem "openai", ">= 0.34" +gem "anthropic", ">= 1.11" +gem "ruby_llm", ">= 1.9" +gem "base64" + +gemspec path: "../" diff --git a/lib/braintrust.rb b/lib/braintrust.rb index 979651e..ba023fc 100644 --- a/lib/braintrust.rb +++ b/lib/braintrust.rb @@ -6,6 +6,7 @@ require_relative "braintrust/trace" require_relative "braintrust/api" require_relative "braintrust/internal/experiments" +require_relative "braintrust/internal/env" require_relative "braintrust/eval" require_relative "braintrust/contrib" @@ -41,8 +42,13 @@ class Error < StandardError; end # @param filter_ai_spans [Boolean, nil] Enable AI span filtering (overrides BRAINTRUST_OTEL_FILTER_AI_SPANS env var) # @param span_filter_funcs [Array, nil] Custom span filter functions # @param exporter [Exporter, nil] Optional exporter override (for testing) + # @param auto_instrument [Boolean, Hash, nil] Auto-instrumentation config: + # - nil (default): use BRAINTRUST_AUTO_INSTRUMENT env var, default true if not set + # - true: explicitly enable + # - false: explicitly disable + # - Hash with :only or :except keys for filtering # @return [State] the created state - def self.init(api_key: nil, org_name: nil, default_project: nil, app_url: nil, api_url: nil, set_global: true, blocking_login: false, enable_tracing: true, tracer_provider: nil, filter_ai_spans: nil, span_filter_funcs: nil, exporter: nil) + def self.init(api_key: nil, org_name: nil, default_project: nil, app_url: nil, api_url: nil, set_global: true, blocking_login: false, enable_tracing: true, tracer_provider: nil, filter_ai_spans: nil, span_filter_funcs: nil, exporter: nil, auto_instrument: nil) state = State.from_env( api_key: api_key, org_name: org_name, @@ -59,6 +65,8 @@ def self.init(api_key: nil, org_name: nil, default_project: nil, app_url: nil, a State.global = state if set_global + auto_instrument!(auto_instrument) + state end diff --git a/test/braintrust_test.rb b/test/braintrust_test.rb index e083be6..1d3bbd7 100644 --- a/test/braintrust_test.rb +++ b/test/braintrust_test.rb @@ -179,4 +179,24 @@ def test_default_project_from_parameter_overrides_env span_data = spans.first assert_equal "project_name:param-project", span_data.attributes["braintrust.parent"] end + + def test_init_calls_auto_instrument_with_config + called_with = :not_called + + Braintrust.stub(:auto_instrument!, ->(config) { called_with = config }) do + Braintrust.init(api_key: "test-api-key", auto_instrument: {only: [:openai]}) + end + + assert_equal({only: [:openai]}, called_with) + end + + def test_init_calls_auto_instrument_with_nil_by_default + called_with = :not_called + + Braintrust.stub(:auto_instrument!, ->(config) { called_with = config }) do + Braintrust.init(api_key: "test-api-key") + end + + assert_nil called_with + end end From 2ddb2d2148b8a895e527b761bdacc9ac27cf3d0b Mon Sep 17 00:00:00 2001 From: David Elner Date: Fri, 2 Jan 2026 11:27:36 -0500 Subject: [PATCH 16/27] Changed: README to reflect new Braintrust.init auto instrumentation --- README.md | 54 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 2eef3cf..5920aa1 100644 --- a/README.md +++ b/README.md @@ -36,11 +36,25 @@ gem install braintrust ## Quick Start -### Set up your API key +1. **Set up your API key** -```bash -export BRAINTRUST_API_KEY="your-api-key" -``` + ```bash + export BRAINTRUST_API_KEY="your-api-key" + ``` + +2. **Add `Braintrust.init` to your application** + + For Rails applications, we suggest adding this to an initializer file. + + ```ruby + require "braintrust" + + Braintrust.init + ``` + + This sets up Braintrust and auto-instruments your application with LLM tracing. + +## How to use... ### Evals @@ -67,7 +81,9 @@ Braintrust::Eval.run( ) ``` -### Tracing +## Tracing + +Supported LLM libraries are automatically instrumented with `Braintrust.init`. If you wish to manually instrument your LLM clients (for greater customization) you can use OpenTelemetry and `instrument!` to make your own custom LLM traces. ```ruby require "braintrust" @@ -101,18 +117,19 @@ OpenTelemetry.tracer_provider.shutdown puts "View trace in Braintrust!" ``` -### OpenAI Tracing +### OpenAI ```ruby require "braintrust" require "openai" -Braintrust.init +Braintrust.init(auto_instrument: false) client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) # Instrument all clients Braintrust.instrument!(:openai) + # OR instrument a single client Braintrust.instrument!(:openai, target: client) @@ -139,18 +156,23 @@ puts "View trace at: #{Braintrust::Trace.permalink(root_span)}" OpenTelemetry.tracer_provider.shutdown ``` -### Anthropic Tracing +### Anthropic ```ruby require "braintrust" require "anthropic" -Braintrust.init -Braintrust.instrument!(:anthropic) +Braintrust.init(auto_instrument: false) # Instrument Anthropic (instance-level) client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"]) +# Instrument all clients +Braintrust.instrument!(:anthropic) + +# OR instrument a single client +Braintrust.instrument!(:anthropic, target: client) + tracer = OpenTelemetry.tracer_provider.tracer("anthropic-app") root_span = nil @@ -174,24 +196,28 @@ puts "View trace at: #{Braintrust::Trace.permalink(root_span)}" OpenTelemetry.tracer_provider.shutdown ``` -### RubyLLM Tracing +### RubyLLM ```ruby require "braintrust" require "ruby_llm" -Braintrust.init +Braintrust.init(auto_instrument: false) + +chat = RubyLLM.chat(model: "gpt-4o-mini") -# Instrument all RubyLLM Chat instances +# Instrument all clients Braintrust.instrument!(:ruby_llm) +# OR instrument a single client +Braintrust.instrument!(:ruby_llm, target: chat) + tracer = OpenTelemetry.tracer_provider.tracer("ruby-llm-app") root_span = nil response = tracer.in_span("chat") do |span| root_span = span - chat = RubyLLM.chat(model: "gpt-4o-mini") chat.ask("Say hello!") end From c50583325d8111ce26e5dd4a651bd018bcc8ac92 Mon Sep 17 00:00:00 2001 From: David Elner Date: Fri, 19 Dec 2025 14:54:55 -0500 Subject: [PATCH 17/27] Added: braintrust/setup.rb for automatically setting up the SDK --- Appraisals | 6 + examples/setup/require.rb | 128 ++++++ examples/setup/require_minimal.rb | 47 +++ gemfiles/rails.gemfile | 11 + lib/braintrust/contrib/rails/railtie.rb | 16 + lib/braintrust/contrib/registry.rb | 33 +- lib/braintrust/contrib/setup.rb | 69 +++ lib/braintrust/setup.rb | 45 ++ .../contrib/rails/integration_helper.rb | 25 ++ test/braintrust/contrib/rails/railtie_test.rb | 115 +++++ test/braintrust/contrib/registry_test.rb | 11 - test/braintrust/contrib/setup_test.rb | 396 ++++++++++++++++++ test/braintrust/setup_test.rb | 130 ++++++ test/support/mock_helper.rb | 32 ++ 14 files changed, 1044 insertions(+), 20 deletions(-) create mode 100644 examples/setup/require.rb create mode 100644 examples/setup/require_minimal.rb create mode 100644 gemfiles/rails.gemfile create mode 100644 lib/braintrust/contrib/rails/railtie.rb create mode 100644 lib/braintrust/contrib/setup.rb create mode 100644 lib/braintrust/setup.rb create mode 100644 test/braintrust/contrib/rails/integration_helper.rb create mode 100644 test/braintrust/contrib/rails/railtie_test.rb create mode 100644 test/braintrust/contrib/setup_test.rb create mode 100644 test/braintrust/setup_test.rb diff --git a/Appraisals b/Appraisals index 5d062e5..5681396 100644 --- a/Appraisals +++ b/Appraisals @@ -79,3 +79,9 @@ appraise "contrib" do gem "ruby_llm", ">= 1.9" gem "base64" # needed for openai gem on Ruby 3.4+ end + +# Rails integration testing (minimal dependencies) +appraise "rails" do + gem "activesupport", "~> 8.0" + gem "railties", "~> 8.0" +end diff --git a/examples/setup/require.rb b/examples/setup/require.rb new file mode 100644 index 0000000..69feb48 --- /dev/null +++ b/examples/setup/require.rb @@ -0,0 +1,128 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" + +# Setup via require +# +# This example demonstrates the simplest way to auto-instrument LLM libraries: +# just require "braintrust/setup" BEFORE your LLM libraries. +# +# This approach: +# - Hooks into Ruby's require mechanism +# - Automatically patches LLM libraries as they are loaded +# - Calls Braintrust.init internally (no manual init needed) +# +# For Rails apps, add to your Gemfile: +# gem "braintrust", require: "braintrust/setup" +# +# Supported libraries (instrumented automatically when required): +# - openai (OpenAI's official Ruby gem) +# - ruby-openai (alexrudall's ruby-openai gem) * +# - anthropic (Anthropic's official Ruby gem) +# - ruby_llm (RubyLLM unified interface) +# +# * Note: openai and ruby-openai share the same require path ("openai"), so only one +# can be loaded at a time. This demo uses the official openai gem. +# +# Usage: +# OPENAI_API_KEY=your-key ANTHROPIC_API_KEY=your-key \ +# bundle exec appraisal contrib ruby examples/setup/require.rb + +# Step 1: Require braintrust/setup FIRST (before any LLM libraries) +require "braintrust/setup" + +# Step 2: Now require your LLM libraries - they will be instrumented automatically! +require "openai" +require "anthropic" +require "ruby_llm" +require "opentelemetry/sdk" + +# Brief pause to allow async Braintrust login to complete +sleep 0.5 + +# Check for required API keys +missing_keys = [] +missing_keys << "OPENAI_API_KEY" unless ENV["OPENAI_API_KEY"] +missing_keys << "ANTHROPIC_API_KEY" unless ENV["ANTHROPIC_API_KEY"] + +unless missing_keys.empty? + puts "Error: Missing required environment variables: #{missing_keys.join(", ")}" + puts "" + puts "Get your API keys from:" + puts " OpenAI: https://platform.openai.com/api-keys" + puts " Anthropic: https://console.anthropic.com/" + exit 1 +end + +puts "Auto-Instrumentation via Require Demo" +puts "=" * 50 +puts "" +puts "Libraries were instrumented automatically when required!" +puts "No Braintrust.init call needed." +puts "" + +# Configure RubyLLM +RubyLLM.configure do |config| + config.openai_api_key = ENV["OPENAI_API_KEY"] +end + +# Create clients for each library +openai_client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) +anthropic_client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"]) + +# Create a tracer and root span to capture all operations +tracer = OpenTelemetry.tracer_provider.tracer("auto-instrument-require-demo") +root_span = nil + +puts "Making API calls with each library..." +puts "-" * 50 + +tracer.in_span("examples/setup/require.rb") do |span| + root_span = span + + # 1. OpenAI (official gem) + puts "" + puts "[1/3] OpenAI (official gem)..." + openai_response = openai_client.chat.completions.create( + model: "gpt-4o-mini", + max_tokens: 50, + messages: [ + {role: "user", content: "Say 'Hello from OpenAI!' in exactly 5 words."} + ] + ) + puts " Response: #{openai_response.choices[0].message.content}" + + # 2. Anthropic + puts "" + puts "[2/3] Anthropic..." + anthropic_response = anthropic_client.messages.create( + model: "claude-3-haiku-20240307", + max_tokens: 50, + messages: [ + {role: "user", content: "Say 'Hello from Anthropic!' in exactly 5 words."} + ] + ) + puts " Response: #{anthropic_response.content[0].text}" + + # 3. RubyLLM (using OpenAI provider) + puts "" + puts "[3/3] RubyLLM (via OpenAI)..." + chat = RubyLLM.chat(model: "gpt-4o-mini") + ruby_llm_response = chat.ask("Say 'Hello from RubyLLM!' in exactly 5 words.") + puts " Response: #{ruby_llm_response.content}" +end + +puts "" +puts "-" * 50 +puts "" +puts "All API calls complete!" +puts "" +puts "View trace in Braintrust:" +puts " #{Braintrust::Trace.permalink(root_span)}" +puts "" + +# Shutdown to flush spans (not necessary in production, just for this short lived example) +OpenTelemetry.tracer_provider.shutdown + +puts "Trace sent to Braintrust!" diff --git a/examples/setup/require_minimal.rb b/examples/setup/require_minimal.rb new file mode 100644 index 0000000..9b65f56 --- /dev/null +++ b/examples/setup/require_minimal.rb @@ -0,0 +1,47 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +# Example: Minimal setup via require +# +# Require "braintrust/setup" BEFORE your LLM libraries to auto-instrument them. +# For Rails, add to Gemfile: gem "braintrust", require: "braintrust/setup" +# +# Usage: +# OPENAI_API_KEY=your-key ANTHROPIC_API_KEY=your-key \ +# bundle exec appraisal contrib ruby examples/setup/require_minimal.rb + +require "bundler/setup" +require "braintrust/setup" +require "openai" +require "anthropic" +require "ruby_llm" +require "opentelemetry/sdk" + +# Brief pause to allow async Braintrust login to complete +# (Not necessary in production, just for this short lived example) +sleep 0.5 + +RubyLLM.configure { |c| c.openai_api_key = ENV["OPENAI_API_KEY"] } + +openai_client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) +anthropic_client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"]) +tracer = OpenTelemetry.tracer_provider.tracer("auto-instrument-require-demo") + +tracer.in_span("examples/setup/require_minimal.rb") do + openai_client.chat.completions.create( + model: "gpt-4o-mini", + messages: [{role: "user", content: "Hello from OpenAI"}] + ) + + anthropic_client.messages.create( + model: "claude-3-haiku-20240307", + max_tokens: 50, + messages: [{role: "user", content: "Hello from Anthropic"}] + ) + + RubyLLM.chat(model: "gpt-4o-mini").ask("Hello from RubyLLM") +end + +# Shutdown to flush spans +# (Not necessary in production, just for this short lived example) +OpenTelemetry.tracer_provider.shutdown diff --git a/gemfiles/rails.gemfile b/gemfiles/rails.gemfile new file mode 100644 index 0000000..eb90b15 --- /dev/null +++ b/gemfiles/rails.gemfile @@ -0,0 +1,11 @@ +# This file was generated by Appraisal + +source "https://rubygems.org" + +gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" +gem "activesupport", "~> 8.0" +gem "railties", "~> 8.0" + +gemspec path: "../" diff --git a/lib/braintrust/contrib/rails/railtie.rb b/lib/braintrust/contrib/rails/railtie.rb new file mode 100644 index 0000000..fd9749a --- /dev/null +++ b/lib/braintrust/contrib/rails/railtie.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + module Rails + class Railtie < ::Rails::Railtie + config.after_initialize do + Braintrust.auto_instrument!( + only: Braintrust::Internal::Env.instrument_only, + except: Braintrust::Internal::Env.instrument_except + ) + end + end + end + end +end diff --git a/lib/braintrust/contrib/registry.rb b/lib/braintrust/contrib/registry.rb index 77a3d67..658a171 100644 --- a/lib/braintrust/contrib/registry.rb +++ b/lib/braintrust/contrib/registry.rb @@ -60,22 +60,37 @@ def integrations_for_require_path(path) @require_path_map ||= build_require_path_map end end - basename = File.basename(path.to_s, ".rb") - map.fetch(basename, EMPTY_ARRAY) - end - # Clear all registrations (primarily for testing). - def clear! - @mutex.synchronize do - @integrations.clear - @require_path_map = nil - end + path_str = path.to_s + basename = File.basename(path_str, ".rb") + + # Quick check: is this basename even in our map? + return EMPTY_ARRAY unless map.key?(basename) + + # Only match top-level requires or gem entry points. + # Avoid matching internal subpaths (e.g., ruby_llm/providers/anthropic). + return EMPTY_ARRAY unless gem_entry_point?(path_str, basename) + + map.fetch(basename, EMPTY_ARRAY) end private EMPTY_ARRAY = [].freeze + # Check if this is a gem entry point require. + # @param path [String] Full require path + # @param basename [String] File basename without extension + # @return [Boolean] + def gem_entry_point?(path, basename) + # Direct require like `require 'anthropic'` - no directory separators + return true unless path.include?("/") + + # Full path to gem entry point: /gems/anthropic-1.0.0/lib/anthropic.rb + # The basename appears in both the gem directory name AND as the final file + path.match?(%r{/#{Regexp.escape(basename)}[^/]*/lib/#{Regexp.escape(basename)}(\.rb)?$}) + end + def build_require_path_map map = {} @integrations.each_value do |integration| diff --git a/lib/braintrust/contrib/setup.rb b/lib/braintrust/contrib/setup.rb new file mode 100644 index 0000000..27ebd40 --- /dev/null +++ b/lib/braintrust/contrib/setup.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true + +require_relative "../internal/env" + +module Braintrust + module Contrib + module Setup + class << self + def run! + return if @setup_complete + @setup_complete = true + + if Internal::Env.auto_instrument + # Set up deferred patching for libraries loaded later + if rails_environment? + setup_rails_hook! + else + setup_require_hook! + end + end + end + + def rails_environment? + # Check for Rails::Railtie which is defined during gem loading, + # before Rails.application exists + defined?(::Rails::Railtie) + end + + def setup_rails_hook! + require_relative "rails/railtie" + end + + def setup_require_hook! + registry = Contrib::Registry.instance + only = Internal::Env.instrument_only + except = Internal::Env.instrument_except + + # Store original require + original_require = Kernel.method(:require) + + Kernel.define_method(:require) do |path| + result = original_require.call(path) + + # Reentrancy guard + unless Thread.current[:braintrust_in_require_hook] + begin + Thread.current[:braintrust_in_require_hook] = true + + # Check if any integration matches this require path + registry.integrations_for_require_path(path).each do |integration| + next unless integration.available? && integration.compatible? + next if only && !only.include?(integration.integration_name) + next if except&.include?(integration.integration_name) + integration.patch! + end + rescue => e + Braintrust::Log.error("Failed to auto-instrument on `require`: #{e.message}") + ensure + Thread.current[:braintrust_in_require_hook] = false + end + end + + result + end + end + end + end + end +end diff --git a/lib/braintrust/setup.rb b/lib/braintrust/setup.rb new file mode 100644 index 0000000..5feb180 --- /dev/null +++ b/lib/braintrust/setup.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +# Setup file for automatic SDK initialization and instrumentation. +# Load this file to automatically initialize Braintrust and instrument all available LLM libraries. +# +# Usage: +# # Gemfile +# gem "braintrust", require: "braintrust/setup" +# +# # Or in code +# require "braintrust/setup" +# +# Environment variables: +# BRAINTRUST_API_KEY - Required for tracing to work +# BRAINTRUST_AUTO_INSTRUMENT - Set to "false" to disable (default: true) +# BRAINTRUST_INSTRUMENT_ONLY - Comma-separated whitelist +# BRAINTRUST_INSTRUMENT_EXCEPT - Comma-separated blacklist + +require_relative "../braintrust" +require_relative "contrib/setup" + +module Braintrust + module Setup + class << self + def run! + return if @setup_complete + + @setup_complete = true + + # Initialize Braintrust (silent failure if no API key) + begin + Braintrust.init + rescue => e + Braintrust::Log.error("Failed to automatically setup Braintrust: #{e.message}") + end + + # Setup contrib for 3rd party integrations + Contrib::Setup.run! + end + end + end +end + +# Auto-setup when required +Braintrust::Setup.run! diff --git a/test/braintrust/contrib/rails/integration_helper.rb b/test/braintrust/contrib/rails/integration_helper.rb new file mode 100644 index 0000000..53e22e1 --- /dev/null +++ b/test/braintrust/contrib/rails/integration_helper.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +# Test helpers for Rails integration tests. +# Provides gem loading helpers for Rails railtie testing. + +module Braintrust + module Contrib + module Rails + module IntegrationHelper + # Skip test unless Rails gem is available. + # Loads the gem if available. + def skip_unless_rails! + unless Gem.loaded_specs["rails"] || Gem.loaded_specs["railties"] + skip "Rails gem not available" + end + + unless defined?(::Rails::Railtie) + require "active_support" + require "rails/railtie" + end + end + end + end + end +end diff --git a/test/braintrust/contrib/rails/railtie_test.rb b/test/braintrust/contrib/rails/railtie_test.rb new file mode 100644 index 0000000..fc701e1 --- /dev/null +++ b/test/braintrust/contrib/rails/railtie_test.rb @@ -0,0 +1,115 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "integration_helper" + +module Braintrust + module Contrib + module Rails + class RailtieTest < Minitest::Test + include IntegrationHelper + + def test_railtie_callback_calls_auto_instrument + skip_unless_rails! + + assert_in_fork do + require "active_support" + require "rails/railtie" + + # Capture the after_initialize block when the railtie is loaded + captured_block = nil + original_after_init = ::Rails::Railtie::Configuration.instance_method(:after_initialize) + ::Rails::Railtie::Configuration.define_method(:after_initialize) do |&block| + captured_block = block + end + + require "braintrust/contrib/rails/railtie" + + # Restore original method + ::Rails::Railtie::Configuration.define_method(:after_initialize, original_after_init) + + auto_instrument_called_with = nil + Braintrust.stub(:auto_instrument!, ->(**kwargs) { auto_instrument_called_with = kwargs }) do + captured_block.call + end + + if auto_instrument_called_with == {only: nil, except: nil} + puts "callback_test:passed" + else + puts "callback_test:failed - got #{auto_instrument_called_with.inspect}" + exit 1 + end + end + end + + def test_railtie_passes_only_from_env + skip_unless_rails! + + assert_in_fork do + ENV["BRAINTRUST_INSTRUMENT_ONLY"] = "openai,anthropic" + + require "active_support" + require "rails/railtie" + + captured_block = nil + original_after_init = ::Rails::Railtie::Configuration.instance_method(:after_initialize) + ::Rails::Railtie::Configuration.define_method(:after_initialize) do |&block| + captured_block = block + end + + require "braintrust/contrib/rails/railtie" + + ::Rails::Railtie::Configuration.define_method(:after_initialize, original_after_init) + + auto_instrument_called_with = nil + Braintrust.stub(:auto_instrument!, ->(**kwargs) { auto_instrument_called_with = kwargs }) do + captured_block.call + end + + expected = {only: [:openai, :anthropic], except: nil} + if auto_instrument_called_with == expected + puts "only_env_test:passed" + else + puts "only_env_test:failed - expected #{expected.inspect}, got #{auto_instrument_called_with.inspect}" + exit 1 + end + end + end + + def test_railtie_passes_except_from_env + skip_unless_rails! + + assert_in_fork do + ENV["BRAINTRUST_INSTRUMENT_EXCEPT"] = "ruby_llm" + + require "active_support" + require "rails/railtie" + + captured_block = nil + original_after_init = ::Rails::Railtie::Configuration.instance_method(:after_initialize) + ::Rails::Railtie::Configuration.define_method(:after_initialize) do |&block| + captured_block = block + end + + require "braintrust/contrib/rails/railtie" + + ::Rails::Railtie::Configuration.define_method(:after_initialize, original_after_init) + + auto_instrument_called_with = nil + Braintrust.stub(:auto_instrument!, ->(**kwargs) { auto_instrument_called_with = kwargs }) do + captured_block.call + end + + expected = {only: nil, except: [:ruby_llm]} + if auto_instrument_called_with == expected + puts "except_env_test:passed" + else + puts "except_env_test:failed - expected #{expected.inspect}, got #{auto_instrument_called_with.inspect}" + exit 1 + end + end + end + end + end + end +end diff --git a/test/braintrust/contrib/registry_test.rb b/test/braintrust/contrib/registry_test.rb index 610815e..cee460a 100644 --- a/test/braintrust/contrib/registry_test.rb +++ b/test/braintrust/contrib/registry_test.rb @@ -227,15 +227,4 @@ def test_thread_safety_for_require_path_lookup assert_equal [], errors end - - def test_clear_removes_all_integrations - openai = create_mock_integration(name: :openai, gem_names: ["openai"]) - - @registry.register(openai) - assert_equal 1, @registry.all.length - - @registry.clear! - assert_equal 0, @registry.all.length - assert_nil @registry[:openai] - end end diff --git a/test/braintrust/contrib/setup_test.rb b/test/braintrust/contrib/setup_test.rb new file mode 100644 index 0000000..59cb432 --- /dev/null +++ b/test/braintrust/contrib/setup_test.rb @@ -0,0 +1,396 @@ +# frozen_string_literal: true + +require "test_helper" +require "braintrust/contrib/setup" + +class Braintrust::Contrib::SetupTest < Minitest::Test + def setup + # Reset the setup_complete flag before each test + Braintrust::Contrib::Setup.instance_variable_set(:@setup_complete, nil) + end + + def teardown + # Reset after each test to not affect other tests + Braintrust::Contrib::Setup.instance_variable_set(:@setup_complete, nil) + end + + # --- run! idempotency --- + + def test_run_is_idempotent + skip "Test not applicable when Rails is loaded" if defined?(::Rails::Railtie) + + call_count = 0 + + Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { call_count += 1 }) do + Braintrust::Contrib::Setup.run! + Braintrust::Contrib::Setup.run! + Braintrust::Contrib::Setup.run! + end + + assert_equal 1, call_count, "run! should only execute once" + end + + # --- BRAINTRUST_AUTO_INSTRUMENT env var --- + + def test_run_skips_hooks_when_auto_instrument_disabled + hook_called = false + + ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: "false") do + Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { hook_called = true }) do + Braintrust::Contrib::Setup.run! + end + end + + refute hook_called, "hooks should not be set up when auto_instrument is disabled" + end + + def test_run_sets_up_hooks_when_auto_instrument_enabled + skip "Test not applicable when Rails is loaded" if defined?(::Rails::Railtie) + + hook_called = false + + ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: "true") do + Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { hook_called = true }) do + Braintrust::Contrib::Setup.run! + end + end + + assert hook_called, "hooks should be set up when auto_instrument is enabled" + end + + def test_run_sets_up_hooks_by_default + skip "Test not applicable when Rails is loaded" if defined?(::Rails::Railtie) + + hook_called = false + + # Ensure env var is not set + ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: nil) do + Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { hook_called = true }) do + Braintrust::Contrib::Setup.run! + end + end + + assert hook_called, "hooks should be set up by default" + end + + # --- rails_environment? --- + + def test_rails_environment_returns_false_when_rails_not_defined + skip "Test not applicable when Rails is loaded" if defined?(::Rails::Railtie) + + refute Braintrust::Contrib::Setup.rails_environment? + end + + def test_rails_environment_returns_false_when_rails_application_nil + mock_rails = Module.new do + def self.respond_to?(method) + method == :application || super + end + + def self.application + nil + end + end + + Object.stub_const(:Rails, mock_rails) do + refute Braintrust::Contrib::Setup.rails_environment? + end + end + + def test_rails_environment_returns_true_when_rails_railtie_defined + with_mock_rails_railtie do + assert Braintrust::Contrib::Setup.rails_environment? + end + end + + # --- setup_rails_hook! --- + + def test_setup_rails_hook_loads_railtie + railtie_loaded = false + + Braintrust::Contrib::Setup.stub(:require_relative, ->(path) { railtie_loaded = true if path == "rails/railtie" }) do + Braintrust::Contrib::Setup.setup_rails_hook! + end + + assert railtie_loaded, "setup_rails_hook! should load rails/railtie" + end + + # --- setup_require_hook! --- + # These tests run in a fork to prevent the require hook from leaking into other tests + + def test_setup_require_hook_patches_kernel_require + assert_in_fork do + Braintrust::Contrib::Setup.setup_require_hook! + + # Check that require still works normally + require "json" # Should work normally + + # If we got here without error, the hook is working + puts "require_hook_test:passed" + end + end + + def test_setup_require_hook_instruments_matching_library + assert_in_fork do + with_tmp_file(data: "# test file\n", filename: "test_integration_lib", extension: ".rb") do |test_file| + patch_called = false + test_lib_name = File.basename(test_file.path, ".rb") + $LOAD_PATH.unshift(File.dirname(test_file.path)) + + mock_integration = mock_integration_object( + name: :test_lib, + on_patch: -> { patch_called = true } + ) + + mock_registry = mock_registry_for(test_lib_name => [mock_integration]) + + Braintrust::Contrib::Registry.stub(:instance, mock_registry) do + Braintrust::Contrib::Setup.setup_require_hook! + require test_lib_name + end + + if patch_called + puts "instrument_test:passed" + else + puts "instrument_test:failed - patch! was not called" + exit 1 + end + end + end + end + + def test_setup_require_hook_respects_only_filter + assert_in_fork do + with_tmp_file(data: "# test file\n", filename: "excluded_lib", extension: ".rb") do |test_file| + patch_called = false + test_lib_name = File.basename(test_file.path, ".rb") + $LOAD_PATH.unshift(File.dirname(test_file.path)) + + mock_integration = mock_integration_object( + name: :excluded_lib, + on_patch: -> { patch_called = true } + ) + + mock_registry = mock_registry_for(test_lib_name => [mock_integration]) + + ENV["BRAINTRUST_INSTRUMENT_ONLY"] = "openai,anthropic" + + Braintrust::Contrib::Registry.stub(:instance, mock_registry) do + Braintrust::Contrib::Setup.setup_require_hook! + require test_lib_name + end + + if patch_called + puts "only_filter_test:failed - patch! should not be called for excluded lib" + exit 1 + else + puts "only_filter_test:passed" + end + end + end + end + + def test_setup_require_hook_respects_except_filter + assert_in_fork do + with_tmp_file(data: "# test file\n", filename: "excluded_lib", extension: ".rb") do |test_file| + patch_called = false + test_lib_name = File.basename(test_file.path, ".rb") + $LOAD_PATH.unshift(File.dirname(test_file.path)) + + mock_integration = mock_integration_object( + name: :excluded_lib, + on_patch: -> { patch_called = true } + ) + + mock_registry = mock_registry_for(test_lib_name => [mock_integration]) + + ENV["BRAINTRUST_INSTRUMENT_EXCEPT"] = "excluded_lib" + + Braintrust::Contrib::Registry.stub(:instance, mock_registry) do + Braintrust::Contrib::Setup.setup_require_hook! + require test_lib_name + end + + if patch_called + puts "except_filter_test:failed - patch! should not be called for excluded lib" + exit 1 + else + puts "except_filter_test:passed" + end + end + end + end + + def test_setup_require_hook_skips_unavailable_integration + assert_in_fork do + with_tmp_file(data: "# test file\n", filename: "unavailable_lib", extension: ".rb") do |test_file| + patch_called = false + test_lib_name = File.basename(test_file.path, ".rb") + $LOAD_PATH.unshift(File.dirname(test_file.path)) + + mock_integration = mock_integration_object( + name: :unavailable_lib, + available: false, + on_patch: -> { patch_called = true } + ) + + mock_registry = mock_registry_for(test_lib_name => [mock_integration]) + + Braintrust::Contrib::Registry.stub(:instance, mock_registry) do + Braintrust::Contrib::Setup.setup_require_hook! + require test_lib_name + end + + if patch_called + puts "unavailable_test:failed - patch! should not be called for unavailable integration" + exit 1 + else + puts "unavailable_test:passed" + end + end + end + end + + def test_setup_require_hook_skips_incompatible_integration + assert_in_fork do + with_tmp_file(data: "# test file\n", filename: "incompatible_lib", extension: ".rb") do |test_file| + patch_called = false + test_lib_name = File.basename(test_file.path, ".rb") + $LOAD_PATH.unshift(File.dirname(test_file.path)) + + mock_integration = mock_integration_object( + name: :incompatible_lib, + compatible: false, + on_patch: -> { patch_called = true } + ) + + mock_registry = mock_registry_for(test_lib_name => [mock_integration]) + + Braintrust::Contrib::Registry.stub(:instance, mock_registry) do + Braintrust::Contrib::Setup.setup_require_hook! + require test_lib_name + end + + if patch_called + puts "incompatible_test:failed - patch! should not be called for incompatible integration" + exit 1 + else + puts "incompatible_test:passed" + end + end + end + end + + def test_setup_require_hook_has_reentrancy_guard + assert_in_fork do + with_tmp_file(data: "# reentrant lib\n", filename: "reentrant_lib", extension: ".rb") do |reentrant_file| + with_tmp_file(data: "# nested lib\n", filename: "nested_lib", extension: ".rb") do |nested_file| + require_calls_during_patch = [] + reentrant_lib_name = File.basename(reentrant_file.path, ".rb") + nested_lib_name = File.basename(nested_file.path, ".rb") + + $LOAD_PATH.unshift(File.dirname(reentrant_file.path)) + $LOAD_PATH.unshift(File.dirname(nested_file.path)) + + nested_name = nested_lib_name + + mock_integration = mock_integration_object( + name: :reentrant_lib, + on_patch: -> { + require nested_name + require_calls_during_patch << :patch_completed + } + ) + + nested_integration = mock_integration_object( + name: :nested_lib, + on_patch: -> { require_calls_during_patch << :nested_patch_should_not_run } + ) + + mock_registry = mock_registry_for( + reentrant_lib_name => [mock_integration], + nested_lib_name => [nested_integration] + ) + + Braintrust::Contrib::Registry.stub(:instance, mock_registry) do + Braintrust::Contrib::Setup.setup_require_hook! + require reentrant_lib_name + end + + # The nested integration should NOT have been patched due to reentrancy guard + if require_calls_during_patch == [:patch_completed] + puts "reentrancy_test:passed" + else + puts "reentrancy_test:failed - got #{require_calls_during_patch.inspect}" + exit 1 + end + end + end + end + end + + def test_setup_require_hook_logs_errors_without_crashing + assert_in_fork do + with_tmp_file(data: "# error lib\n", filename: "error_lib", extension: ".rb") do |test_file| + error_logged = false + test_lib_name = File.basename(test_file.path, ".rb") + $LOAD_PATH.unshift(File.dirname(test_file.path)) + + mock_integration = mock_integration_object( + name: :error_lib, + on_patch: -> { raise "Patch error!" } + ) + + mock_registry = mock_registry_for(test_lib_name => [mock_integration]) + + with_stubs( + [Braintrust::Contrib::Registry, :instance, mock_registry], + [Braintrust::Log, :error, ->(_msg) { error_logged = true }] + ) do + Braintrust::Contrib::Setup.setup_require_hook! + require test_lib_name + end + + if error_logged + puts "error_handling_test:passed" + else + puts "error_handling_test:failed - error was not logged" + exit 1 + end + end + end + end + + private + + # Helper to stub Rails with a Railtie class + def with_mock_rails_railtie + mock_rails = Module.new + + Object.stub_const(:Rails, mock_rails) do + mock_rails.stub_const(:Railtie, Class.new) do + yield + end + end + end + + # Helper to create a mock integration object + def mock_integration_object(name:, available: true, compatible: true, on_patch: nil) + integration = Object.new + integration.define_singleton_method(:integration_name) { name } + integration.define_singleton_method(:available?) { available } + integration.define_singleton_method(:compatible?) { compatible } + integration.define_singleton_method(:patch!) { on_patch&.call } + integration + end + + # Helper to create a mock registry that maps lib names to integrations + # @param mappings [Hash] lib_name => [integrations] mapping + def mock_registry_for(mappings) + registry = Object.new + registry.define_singleton_method(:integrations_for_require_path) do |path| + mappings.find { |lib_name, _| path.include?(lib_name) }&.last || [] + end + registry + end +end diff --git a/test/braintrust/setup_test.rb b/test/braintrust/setup_test.rb new file mode 100644 index 0000000..aca9c5b --- /dev/null +++ b/test/braintrust/setup_test.rb @@ -0,0 +1,130 @@ +# frozen_string_literal: true + +require "test_helper" + +# NOTE: braintrust/setup.rb triggers Braintrust::Setup.run! when required, +# so all tests must run in a fork to isolate side effects. +class Braintrust::SetupTest < Minitest::Test + # --- run! --- + + def test_run_calls_braintrust_init + assert_in_fork do + require "braintrust/contrib/setup" + + init_called = false + + with_stubs( + [Braintrust, :init, ->(**_kwargs) { init_called = true }], + [Braintrust::Contrib::Setup, :run!, -> {}] + ) do + require "braintrust/setup" + end + + if init_called + puts "test_run_calls_braintrust_init:passed" + else + puts "test_run_calls_braintrust_init:failed - init not called" + exit 1 + end + end + end + + def test_run_calls_contrib_setup_run + assert_in_fork do + require "braintrust/contrib/setup" + + contrib_setup_called = false + + with_stubs( + [Braintrust, :init, ->(**_kwargs) {}], + [Braintrust::Contrib::Setup, :run!, -> { contrib_setup_called = true }] + ) do + require "braintrust/setup" + end + + if contrib_setup_called + puts "test_run_calls_contrib_setup_run:passed" + else + puts "test_run_calls_contrib_setup_run:failed - Contrib::Setup.run! not called" + exit 1 + end + end + end + + def test_run_is_idempotent + assert_in_fork do + require "braintrust/contrib/setup" + + call_count = 0 + + with_stubs( + [Braintrust, :init, ->(**_kwargs) { call_count += 1 }], + [Braintrust::Contrib::Setup, :run!, -> {}] + ) do + require "braintrust/setup" + + # First call happens during require, try calling again + Braintrust::Setup.run! + Braintrust::Setup.run! + end + + if call_count == 1 + puts "test_run_is_idempotent:passed" + else + puts "test_run_is_idempotent:failed - call_count=#{call_count}, expected 1" + exit 1 + end + end + end + + def test_run_logs_error_on_init_failure + assert_in_fork do + require "braintrust/contrib/setup" + + logged_messages = [] + + with_stubs( + [Braintrust, :init, ->(**_kwargs) { raise "Test init error" }], + [Braintrust::Log, :error, ->(msg) { logged_messages << msg }], + [Braintrust::Contrib::Setup, :run!, -> {}] + ) do + require "braintrust/setup" + end + + error_logged = logged_messages.any? do |msg| + msg.include?("Failed to automatically setup Braintrust") && + msg.include?("Test init error") + end + + if error_logged + puts "test_run_logs_error_on_init_failure:passed" + else + puts "test_run_logs_error_on_init_failure:failed - logged: #{logged_messages.inspect}" + exit 1 + end + end + end + + def test_run_continues_to_contrib_setup_after_init_failure + assert_in_fork do + require "braintrust/contrib/setup" + + contrib_setup_called = false + + with_stubs( + [Braintrust, :init, ->(**_kwargs) { raise "Test init error" }], + [Braintrust::Log, :error, ->(_msg) {}], + [Braintrust::Contrib::Setup, :run!, -> { contrib_setup_called = true }] + ) do + require "braintrust/setup" + end + + if contrib_setup_called + puts "test_run_continues_to_contrib_setup_after_init_failure:passed" + else + puts "test_run_continues_to_contrib_setup_after_init_failure:failed" + exit 1 + end + end + end +end diff --git a/test/support/mock_helper.rb b/test/support/mock_helper.rb index 1cefdcc..6c016e3 100644 --- a/test/support/mock_helper.rb +++ b/test/support/mock_helper.rb @@ -41,6 +41,38 @@ def mock_chain(*methods, returns:) mocks.each(&:verify) end + + # Apply multiple stubs without deep nesting. + # Each stub is an array of [object, method, value]. + # + # @param stubs [Array] list of [object, method, value] tuples + # @yield block to execute with all stubs applied + # + # @example Single stub + # with_stubs([Braintrust, :init, -> { }]) do + # # code runs with Braintrust.init stubbed + # end + # + # @example Multiple stubs (no nesting required) + # with_stubs( + # [Braintrust, :init, -> { init_called = true }], + # [Braintrust::Contrib::Setup, :run!, -> {}], + # [Braintrust::Log, :error, ->(msg) { errors << msg }] + # ) do + # require "braintrust/setup" + # # assertions here - only one level deep + # end + # + def with_stubs(*stubs, &block) + if stubs.empty? + yield + else + obj, method, value = stubs.first + obj.stub(method, value) do + with_stubs(*stubs[1..], &block) + end + end + end end end end From a4970d51bfade3754d6f7d6a20869d0f6ef266da Mon Sep 17 00:00:00 2001 From: David Elner Date: Thu, 8 Jan 2026 17:27:34 -0500 Subject: [PATCH 18/27] Changed: README to reflect require 'braintrust/setup' --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 5920aa1..7aa3653 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,9 @@ Add this line to your application's Gemfile: ```ruby gem 'braintrust' + +# OR if you'd like to activate auto-instrumentation +gem 'braintrust', require 'braintrust/setup' ``` And then execute: From f3e6af8fcea32208a82c29b62d995d527be806fc Mon Sep 17 00:00:00 2001 From: David Elner Date: Fri, 19 Dec 2025 14:56:29 -0500 Subject: [PATCH 19/27] Added: braintrust exec CLI command to auto instrument --- examples/setup/README.md | 26 ++++++ examples/setup/exec.rb | 132 ++++++++++++++++++++++++++++++ examples/setup/exec_minimal.rb | 37 +++++++++ exe/braintrust | 143 +++++++++++++++++++++++++++++++++ 4 files changed, 338 insertions(+) create mode 100644 examples/setup/README.md create mode 100644 examples/setup/exec.rb create mode 100644 examples/setup/exec_minimal.rb create mode 100755 exe/braintrust diff --git a/examples/setup/README.md b/examples/setup/README.md new file mode 100644 index 0000000..7814380 --- /dev/null +++ b/examples/setup/README.md @@ -0,0 +1,26 @@ +# Setup Examples + +These examples demonstrate different ways to automatically setup the Braintrust SDK. + +## Local Development + +When running examples from the repo (gem not installed), use these commands: + +```bash +# For init.rb and require.rb examples: +OPENAI_API_KEY=your-key ANTHROPIC_API_KEY=your-key \ + bundle exec appraisal contrib ruby examples/setup/init.rb + +# For exec.rb examples (reference exe/braintrust directly): +OPENAI_API_KEY=your-key ANTHROPIC_API_KEY=your-key \ + bundle exec appraisal contrib exe/braintrust exec -- \ + ruby examples/setup/exec.rb +``` + +## Examples + +| File | Approach | Code Changes Required | +| ------------ | -------------------------------- | --------------------- | +| `init.rb` | `Braintrust.init` | Add init call | +| `require.rb` | `require "braintrust/setup"` | Add require | +| `exec.rb` | `braintrust exec -- ruby app.rb` | None | diff --git a/examples/setup/exec.rb b/examples/setup/exec.rb new file mode 100644 index 0000000..84789f3 --- /dev/null +++ b/examples/setup/exec.rb @@ -0,0 +1,132 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +# Example: Automatic Braintrust SDK setup via braintrust exec +# +# This example demonstrates the zero-code-changes approach to auto-instrumentation. +# Instead of modifying your application code, you simply run it with `braintrust exec`. +# +# The CLI: +# - Injects auto-setup via RUBYOPT +# - Automatically patches LLM libraries as they are loaded +# - Initializes Braintrust tracing +# +# This is ideal for: +# - Existing applications you don't want to modify +# - Quick testing and debugging +# - Production deployments via process supervisors +# +# Supported libraries (instrumented automatically): +# - openai (OpenAI's official Ruby gem) +# - ruby-openai (alexrudall's ruby-openai gem) * +# - anthropic (Anthropic's official Ruby gem) +# - ruby_llm (RubyLLM unified interface) +# +# * Note: openai and ruby-openai share the same require path ("openai"), so only one +# can be loaded at a time. This demo uses the official openai gem. +# +# Usage: +# OPENAI_API_KEY=your-key ANTHROPIC_API_KEY=your-key \ +# bundle exec appraisal contrib braintrust exec -- \ +# ruby examples/setup/exec.rb +# +# With filtering: +# braintrust exec --only openai,anthropic -- ruby examples/setup/exec.rb +# braintrust exec --except ruby_llm -- ruby examples/setup/exec.rb + +# Notice: NO braintrust require needed! The CLI handles everything. + +require "openai" +require "anthropic" +require "ruby_llm" +require "opentelemetry-sdk" + +# Check for required API keys +missing_keys = [] +missing_keys << "OPENAI_API_KEY" unless ENV["OPENAI_API_KEY"] +missing_keys << "ANTHROPIC_API_KEY" unless ENV["ANTHROPIC_API_KEY"] + +unless missing_keys.empty? + puts "Error: Missing required environment variables: #{missing_keys.join(", ")}" + puts "" + puts "Get your API keys from:" + puts " OpenAI: https://platform.openai.com/api-keys" + puts " Anthropic: https://console.anthropic.com/" + exit 1 +end + +puts "auto-setup via braintrust exec Demo" +puts "=" * 50 +puts "" +puts "This script has NO braintrust imports!" +puts "The CLI injected auto-setup via RUBYOPT." +puts "" + +# Brief pause to allow async Braintrust login to complete +# (Not necessary in production, just for this short lived example) +sleep 0.5 + +# Configure RubyLLM +RubyLLM.configure do |config| + config.openai_api_key = ENV["OPENAI_API_KEY"] +end + +# Create clients for each library +openai_client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) +anthropic_client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"]) + +# Create a tracer and root span to capture all operations +tracer = OpenTelemetry.tracer_provider.tracer("setup-exec-demo") +root_span = nil + +puts "Making API calls with each library..." +puts "-" * 50 + +tracer.in_span("examples/setup/exec.rb") do |span| + root_span = span + + # 1. OpenAI (official gem) + puts "" + puts "[1/3] OpenAI (official gem)..." + openai_response = openai_client.chat.completions.create( + model: "gpt-4o-mini", + max_tokens: 50, + messages: [ + {role: "user", content: "Say 'Hello from OpenAI!' in exactly 5 words."} + ] + ) + puts " Response: #{openai_response.choices[0].message.content}" + + # 2. Anthropic + puts "" + puts "[2/3] Anthropic..." + anthropic_response = anthropic_client.messages.create( + model: "claude-3-haiku-20240307", + max_tokens: 50, + messages: [ + {role: "user", content: "Say 'Hello from Anthropic!' in exactly 5 words."} + ] + ) + puts " Response: #{anthropic_response.content[0].text}" + + # 3. RubyLLM (using OpenAI provider) + puts "" + puts "[3/3] RubyLLM (via OpenAI)..." + chat = RubyLLM.chat(model: "gpt-4o-mini") + ruby_llm_response = chat.ask("Say 'Hello from RubyLLM!' in exactly 5 words.") + puts " Response: #{ruby_llm_response.content}" +end + +puts "" +puts "-" * 50 +puts "" +puts "All API calls complete!" +puts "" +puts "View trace in Braintrust:" +puts " #{Braintrust::Trace.permalink(root_span)}" +puts "" + +# Shutdown to flush spans (not necessary in production, just for this short lived example) +OpenTelemetry.tracer_provider.shutdown + +puts "Trace sent to Braintrust!" diff --git a/examples/setup/exec_minimal.rb b/examples/setup/exec_minimal.rb new file mode 100644 index 0000000..158fc84 --- /dev/null +++ b/examples/setup/exec_minimal.rb @@ -0,0 +1,37 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +# No braintrust require needed - the CLI handles everything! +require "openai" +require "anthropic" +require "ruby_llm" +require "opentelemetry-sdk" + +# Brief pause to allow async Braintrust login to complete +# (Not necessary in production, just for this short lived example) +sleep 0.5 + +RubyLLM.configure { |c| c.openai_api_key = ENV["OPENAI_API_KEY"] } + +openai_client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) +anthropic_client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"]) +tracer = OpenTelemetry.tracer_provider.tracer("setup-exec-demo") + +tracer.in_span("examples/setup/exec_minimal.rb") do + openai_client.chat.completions.create( + model: "gpt-4o-mini", + messages: [{role: "user", content: "Hello from OpenAI"}] + ) + + anthropic_client.messages.create( + model: "claude-3-haiku-20240307", + max_tokens: 50, + messages: [{role: "user", content: "Hello from Anthropic"}] + ) + + RubyLLM.chat(model: "gpt-4o-mini").ask("Hello from RubyLLM") +end + +# Shutdown to flush spans +# (Not necessary in production, just for this short lived example) +OpenTelemetry.tracer_provider.shutdown diff --git a/exe/braintrust b/exe/braintrust new file mode 100755 index 0000000..444d39c --- /dev/null +++ b/exe/braintrust @@ -0,0 +1,143 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +# Braintrust CLI - Auto-instrument Ruby applications +# +# Usage: +# braintrust exec -- ruby app.rb +# braintrust exec -- bundle exec rails server +# braintrust exec --only openai,anthropic -- ruby app.rb + +require "optparse" + +module Braintrust + module CLI + class << self + def run(args) + command = parse_args(args) + + case command + when :exec + exec_command + when :help, nil + print_help + exit((command == :help) ? 0 : 1) + else + puts "Unknown command: #{command}" + print_help + exit 1 + end + end + + private + + def parse_args(args) + @options = {} + @remaining_args = [] + + # Find -- separator + separator_index = args.index("--") + if separator_index + to_parse = args[0...separator_index] + @remaining_args = args[(separator_index + 1)..] + else + to_parse = args.dup + end + + parser = OptionParser.new do |opts| + opts.banner = "Usage: braintrust [options] -- COMMAND" + + opts.on("--only INTEGRATIONS", "Only instrument these (comma-separated)") do |v| + @options[:only] = v + end + + opts.on("--except INTEGRATIONS", "Skip these integrations (comma-separated)") do |v| + @options[:except] = v + end + + opts.on("-h", "--help", "Show this help") do + return :help + end + + opts.on("-v", "--version", "Show version") do + require_relative "../lib/braintrust/version" + puts "braintrust #{Braintrust::VERSION}" + exit 0 + end + end + + parser.parse!(to_parse) + + return nil if to_parse.empty? + to_parse.first.to_sym + end + + def exec_command + if @remaining_args.empty? + puts "Error: No command specified after --" + puts "Usage: braintrust exec [options] -- COMMAND" + exit 1 + end + + # Set environment variables for filtering + ENV["BRAINTRUST_INSTRUMENT_ONLY"] = @options[:only] if @options[:only] + ENV["BRAINTRUST_INSTRUMENT_EXCEPT"] = @options[:except] if @options[:except] + + # Inject auto-instrument via RUBYOPT + set_rubyopt! + + # Execute the command with error handling + exec_with_error_handling(@remaining_args) + end + + def rubyopts + ["-rbraintrust/setup"] + end + + def set_rubyopt! + existing = ENV["RUBYOPT"] + ENV["RUBYOPT"] = existing ? "#{existing} #{rubyopts.join(" ")}" : rubyopts.join(" ") + end + + def exec_with_error_handling(args) + Kernel.exec(*args) + rescue Errno::ENOENT => e + Kernel.warn "braintrust exec failed: #{e.class.name} #{e.message}" + Kernel.exit 127 + rescue Errno::EACCES, Errno::ENOEXEC => e + Kernel.warn "braintrust exec failed: #{e.class.name} #{e.message}" + Kernel.exit 126 + end + + def print_help + puts <<~HELP + Braintrust CLI - Auto-instrument Ruby applications + + Usage: + braintrust exec [options] -- COMMAND + + Commands: + exec Run a command with auto-instrumentation enabled + + Options: + --only INTEGRATIONS Only instrument these (comma-separated) + --except INTEGRATIONS Skip these integrations (comma-separated) + -h, --help Show this help + -v, --version Show version + + Examples: + braintrust exec -- ruby app.rb + braintrust exec -- rails server + braintrust exec --only openai -- ruby app.rb + + Environment Variables: + BRAINTRUST_API_KEY API key for Braintrust + BRAINTRUST_INSTRUMENT_ONLY Comma-separated whitelist + BRAINTRUST_INSTRUMENT_EXCEPT Comma-separated blacklist + HELP + end + end + end +end + +Braintrust::CLI.run(ARGV) From bfd4678e5897f441721c554c570101d48c719d62 Mon Sep 17 00:00:00 2001 From: David Elner Date: Mon, 5 Jan 2026 13:43:47 -0500 Subject: [PATCH 20/27] Changed: Documentation to reflect new installation patterns --- README.md | 411 +++++++++++++++++++++++++++--------------------------- 1 file changed, 206 insertions(+), 205 deletions(-) diff --git a/README.md b/README.md index 7aa3653..8fdb51a 100644 --- a/README.md +++ b/README.md @@ -4,314 +4,315 @@ [![Documentation](https://img.shields.io/badge/docs-gemdocs.org-blue.svg)](https://gemdocs.org/gems/braintrust/) ![Beta](https://img.shields.io/badge/status-beta-yellow) -## Overview +This is the official Ruby SDK for [Braintrust](https://www.braintrust.dev), for tracing and evaluating your AI applications. + +*NOTE: This SDK is currently in BETA status and APIs may change between minor versions.* + +- [Quick Start](#quick-start) +- [Installation](#installation) + - [Setup script](#setup-script) + - [CLI Command](#cli-command) + - [Braintrust.init](#braintrustinit) + - [Environment variables](#environment-variables) +- [Tracing](#tracing) + - [Supported providers](#supported-providers) + - [Manually applying instrumentation](#manually-applying-instrumentation) + - [Creating custom spans](#creating-custom-spans) + - [Attachments](#attachments) + - [Viewing traces](#viewing-traces) +- [Evals](#evals) + - [Datasets](#datasets) + - [Remote scorers](#remote-scorers) +- [Documentation](#documentation) +- [Troubleshooting](#troubleshooting) +- [Contributing](#contributing) +- [License](#license) -This library provides tools for **evaluating** and **tracing** AI applications in [Braintrust](https://www.braintrust.dev). Use it to: - -- **Evaluate** your AI models with custom test cases and scoring functions -- **Trace** LLM calls and monitor AI application performance with OpenTelemetry -- **Integrate** seamlessly with OpenAI and other LLM providers - -This SDK is currently in BETA status and APIs may change. - -## Installation +## Quick Start -Add this line to your application's Gemfile: +Add to your Gemfile: ```ruby -gem 'braintrust' - -# OR if you'd like to activate auto-instrumentation -gem 'braintrust', require 'braintrust/setup' +gem "braintrust", require: "braintrust/setup" ``` -And then execute: +Set your API key and install: ```bash +export BRAINTRUST_API_KEY="your-api-key" bundle install ``` -Or install it yourself as: +Your LLM calls are now automatically traced. View them at [braintrust.dev](https://www.braintrust.dev). + +## Installation + +The SDK also offers additional setup options for a variety of applications. + +| | What it looks like | When to Use | +| -------------------------------------- | ----------------------------------- | --------------------------------------------------------------------------- | +| [**Setup script**](#setup-script) | `require 'braintrust/setup'` | You'd like to automatically setup the SDK at load time. | +| [**CLI command**](#cli-command) | `braintrust exec -- ruby app.rb` | You prefer not to modify the application source code. | +| [**Braintrust.init**](#braintrustinit) | Call `Braintrust.init` in your code | You need to control when setup occurs, or require customized configuration. | + +See [our examples](./examples/setup/README.md) for more detail. + +### Setup script + +For most applications, we recommend adding `require: "braintrust/setup"` to your `Gemfile` or an initializer file in your Ruby application to automatically setup the SDK. This will automatically apply instrumentation to all available LLM libraries. + +You can use [environment variables](#environment-variables) to configure behavior. + +### CLI Command + +You can use this CLI command to instrument any Ruby application without modifying the source code. + +First, make sure the gem is installed on the system: ```bash gem install braintrust ``` -## Quick Start +Then wrap the start up command of any Ruby application to apply: -1. **Set up your API key** +```bash +braintrust exec -- ruby app.rb +braintrust exec -- bundle exec rails server +braintrust exec --only openai -- ruby app.rb +``` - ```bash - export BRAINTRUST_API_KEY="your-api-key" - ``` +You can use [environment variables](#environment-variables) to configure behavior. -2. **Add `Braintrust.init` to your application** +--- - For Rails applications, we suggest adding this to an initializer file. +*NOTE: Installing a package at the system-level does not guarantee compatibility with all Ruby applications on that system; conflicts with dependencies can arise.* - ```ruby - require "braintrust" +For stronger assurance of compatibility, we recommend either: - Braintrust.init - ``` +- Installing via the application's `Gemfile` and `bundle install` when possible. +- **OR** for OCI/Docker deployments, `gem install braintrust` when building images in your CI/CD pipeline (and verifying their safe function.) - This sets up Braintrust and auto-instruments your application with LLM tracing. +--- -## How to use... +### Braintrust.init -### Evals +For more control over when auto-instrumentation is applied: ```ruby require "braintrust" Braintrust.init - -# Define task to evaluate -task = ->(input) { input.include?("a") ? "fruit" : "vegetable" } - -# Run evaluation -Braintrust::Eval.run( - project: "my-project", - experiment: "food-classifier", - cases: [ - {input: "apple", expected: "fruit"}, - {input: "carrot", expected: "vegetable"} - ], - task: task, - scorers: [ - ->(input, expected, output) { output == expected ? 1.0 : 0.0 } - ] -) ``` -## Tracing +**Options:** -Supported LLM libraries are automatically instrumented with `Braintrust.init`. If you wish to manually instrument your LLM clients (for greater customization) you can use OpenTelemetry and `instrument!` to make your own custom LLM traces. +| Option | Default | Description | +| ----------------- | ---------------------------------------- | --------------------------------------------------------------------------- | +| `api_key` | `ENV['BRAINTRUST_API_KEY']` | API key | +| `auto_instrument` | `true` | `true`, `false`, or Hash with `:only`/`:except` keys to filter integrations | +| `blocking_login` | `false` | Block until login completes (async login when `false`) | +| `default_project` | `ENV['BRAINTRUST_DEFAULT_PROJECT']` | Default project for spans | +| `enable_tracing` | `true` | Enable OpenTelemetry tracing | +| `filter_ai_spans` | `ENV['BRAINTRUST_OTEL_FILTER_AI_SPANS']` | Only export AI-related spans | +| `org_name` | `ENV['BRAINTRUST_ORG_NAME']` | Organization name | +| `set_global` | `true` | Set as global state. Set to `false` for isolated instances | -```ruby -require "braintrust" -require "opentelemetry/sdk" +**Example with options:** -# Initialize Braintrust -Braintrust.init +```ruby +Braintrust.init( + default_project: "my-project", + auto_instrument: { only: [:openai] } +) +``` -# Get a tracer -tracer = OpenTelemetry.tracer_provider.tracer("my-app") +### Environment variables + +| Variable | Description | +| --------------------------------- | ------------------------------------------------------------------------- | +| `BRAINTRUST_API_KEY` | Required. Your Braintrust API key | +| `BRAINTRUST_API_URL` | Braintrust API URL (default: `https://api.braintrust.dev`) | +| `BRAINTRUST_APP_URL` | Braintrust app URL (default: `https://www.braintrust.dev`) | +| `BRAINTRUST_AUTO_INSTRUMENT` | Set to `false` to disable auto-instrumentation | +| `BRAINTRUST_DEBUG` | Set to `true` to enable debug logging | +| `BRAINTRUST_DEFAULT_PROJECT` | Default project for spans | +| `BRAINTRUST_INSTRUMENT_EXCEPT` | Comma-separated list of integrations to skip | +| `BRAINTRUST_INSTRUMENT_ONLY` | Comma-separated list of integrations to enable (e.g., `openai,anthropic`) | +| `BRAINTRUST_ORG_NAME` | Organization name | +| `BRAINTRUST_OTEL_FILTER_AI_SPANS` | Set to `true` to only export AI-related spans | -# Create spans to track operations -tracer.in_span("process-data") do |span| - span.set_attribute("user.id", "123") - span.set_attribute("operation.type", "data_processing") +## Tracing - # Your code here - puts "Processing data..." - sleep 0.1 +### Supported providers - # Nested spans are automatically linked - tracer.in_span("nested-operation") do |nested_span| - nested_span.set_attribute("step", "1") - puts "Nested operation..." - end -end +The SDK automatically instruments these LLM libraries: -# Shutdown to flush spans -OpenTelemetry.tracer_provider.shutdown +| Provider | Gem | Versions | Integration Name | Examples | +| --------- | ------------- | -------- | ---------------- | ----------------------------------------------- | +| Anthropic | `anthropic` | >= 0.3.0 | `:anthropic` | [Link](./examples/contrib/anthropic/basic.rb) | +| OpenAI | `openai` | >= 0.1.0 | `:openai` | [Link](./examples/contrib/openai/basic.rb) | +| | `ruby-openai` | >= 7.0.0 | `:ruby_openai` | [Link](./examples/contrib/ruby_openai/basic.rb) | +| Multiple | `ruby_llm` | >= 1.8.0 | `:ruby_llm` | [Link](./examples/contrib/ruby_llm/basic.rb) | -puts "View trace in Braintrust!" -``` +### Manually applying instrumentation -### OpenAI +For fine-grained control, disable auto-instrumentation and instrument specific clients: ```ruby require "braintrust" require "openai" -Braintrust.init(auto_instrument: false) - -client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) +Braintrust.init(auto_instrument: false) # Or BRAINTRUST_AUTO_INSTRUMENT=false -# Instrument all clients +# Instrument all OpenAI clients Braintrust.instrument!(:openai) # OR instrument a single client +client = OpenAI::Client.new Braintrust.instrument!(:openai, target: client) - -tracer = OpenTelemetry.tracer_provider.tracer("openai-app") -root_span = nil - -response = tracer.in_span("chat-completion") do |span| - root_span = span - - client.chat.completions.create( - messages: [ - {role: "system", content: "You are a helpful assistant."}, - {role: "user", content: "Say hello!"} - ], - model: "gpt-4o-mini", - max_tokens: 100 - ) -end - -puts "Response: #{response.choices[0].message.content}" - -puts "View trace at: #{Braintrust::Trace.permalink(root_span)}" - -OpenTelemetry.tracer_provider.shutdown ``` -### Anthropic +### Creating custom spans + +Wrap business logic in spans to see it in your traces: ```ruby -require "braintrust" -require "anthropic" +tracer = OpenTelemetry.tracer_provider.tracer("my-app") -Braintrust.init(auto_instrument: false) +tracer.in_span("process-request") do |span| + span.set_attribute("user.id", user_id) -# Instrument Anthropic (instance-level) -client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"]) + # LLM calls inside here are automatically nested under this span + response = client.chat.completions.create(...) +end +``` -# Instrument all clients -Braintrust.instrument!(:anthropic) +### Attachments -# OR instrument a single client -Braintrust.instrument!(:anthropic, target: client) +Log binary data (images, PDFs, audio) in your traces: -tracer = OpenTelemetry.tracer_provider.tracer("anthropic-app") -root_span = nil +```ruby +require "braintrust/trace/attachment" -message = tracer.in_span("chat-message") do |span| - root_span = span +att = Braintrust::Trace::Attachment.from_file("image/png", "./photo.png") - client.messages.create( - model: "claude-3-haiku-20240307", - max_tokens: 100, - system: "You are a helpful assistant.", - messages: [ - {role: "user", content: "Say hello!"} +# Use in messages (OpenAI/Anthropic format) +messages = [ + { + role: "user", + content: [ + {type: "text", text: "What's in this image?"}, + att.to_h ] - ) -end - -puts "Response: #{message.content[0].text}" + } +] -puts "View trace at: #{Braintrust::Trace.permalink(root_span)}" - -OpenTelemetry.tracer_provider.shutdown +# Log to span +span.set_attribute("braintrust.input_json", JSON.generate(messages)) ``` -### RubyLLM +Create attachments from various sources: ```ruby -require "braintrust" -require "ruby_llm" - -Braintrust.init(auto_instrument: false) +Braintrust::Trace::Attachment.from_bytes("image/jpeg", image_data) +Braintrust::Trace::Attachment.from_file("application/pdf", "./doc.pdf") +Braintrust::Trace::Attachment.from_url("https://example.com/image.png") +``` -chat = RubyLLM.chat(model: "gpt-4o-mini") +See example: [trace_attachments.rb](./examples/trace/trace_attachments.rb) -# Instrument all clients -Braintrust.instrument!(:ruby_llm) +### Viewing traces -# OR instrument a single client -Braintrust.instrument!(:ruby_llm, target: chat) +Get a permalink to any span: -tracer = OpenTelemetry.tracer_provider.tracer("ruby-llm-app") -root_span = nil - -response = tracer.in_span("chat") do |span| - root_span = span +```ruby +tracer = OpenTelemetry.tracer_provider.tracer("my-app") - chat.ask("Say hello!") +tracer.in_span("my-operation") do |span| + # your code here + puts "View trace at: #{Braintrust::Trace.permalink(span)}" end - -puts "Response: #{response.content}" - -puts "View trace at: #{Braintrust::Trace.permalink(root_span)}" - -OpenTelemetry.tracer_provider.shutdown ``` -### Attachments +## Evals -Attachments allow you to log binary data (images, PDFs, audio, etc.) as part of your traces. This is particularly useful for multimodal AI applications like vision models. +Run evaluations against your AI systems: ```ruby require "braintrust" -require "braintrust/trace/attachment" Braintrust.init -tracer = OpenTelemetry.tracer_provider.tracer("vision-app") - -tracer.in_span("analyze-image") do |span| - # Create attachment from file - att = Braintrust::Trace::Attachment.from_file( - Braintrust::Trace::Attachment::IMAGE_PNG, - "./photo.png" - ) - - # Build message with attachment (OpenAI/Anthropic format) - messages = [ - { - role: "user", - content: [ - {type: "text", text: "What's in this image?"}, - att.to_h # Converts to {"type" => "base64_attachment", "content" => "data:..."} - ] - } +Braintrust::Eval.run( + project: "my-project", + experiment: "classifier-v1", + cases: [ + {input: "apple", expected: "fruit"}, + {input: "carrot", expected: "vegetable"} + ], + task: ->(input) { classify(input) }, + scorers: [ + ->(input, expected, output) { output == expected ? 1.0 : 0.0 } ] +) +``` - # Log to trace - span.set_attribute("braintrust.input_json", JSON.generate(messages)) -end +### Datasets + +Load test cases from a Braintrust dataset: -OpenTelemetry.tracer_provider.shutdown +```ruby +Braintrust::Eval.run( + project: "my-project", + dataset: "my-dataset", + task: ->(input) { classify(input) }, + scorers: [...] +) ``` -You can create attachments from bytes, files, or URLs: +### Remote scorers + +Use scoring functions defined in Braintrust: ```ruby -# From bytes -att = Braintrust::Trace::Attachment.from_bytes("image/jpeg", image_data) +Braintrust::Eval.run( + project: "my-project", + cases: [...], + task: ->(input) { ... }, + scorers: [ + Braintrust::Scorer.remote("my-project", "accuracy-scorer") + ] +) +``` -# From file -att = Braintrust::Trace::Attachment.from_file("application/pdf", "./doc.pdf") +See examples: [eval.rb](./examples/eval.rb), [dataset.rb](./examples/eval/dataset.rb), [remote_functions.rb](./examples/eval/remote_functions.rb) -# From URL -att = Braintrust::Trace::Attachment.from_url("https://example.com/image.png") -``` +## Documentation -## Features +- [Braintrust Documentation](https://www.braintrust.dev/docs) +- [API Reference](https://gemdocs.org/gems/braintrust/) -- **Evaluations**: Run systematic evaluations of your AI systems with custom scoring functions -- **Tracing**: Automatic instrumentation for OpenAI and Anthropic API calls with OpenTelemetry -- **Datasets**: Manage and version your evaluation datasets -- **Experiments**: Track different versions and configurations of your AI systems -- **Observability**: Monitor your AI applications in production +## Troubleshooting -## Examples +#### No traces after adding `require 'braintrust/setup'` to the Gemfile -Check out the [`examples/`](./examples/) directory for complete working examples: +First verify there are no errors in your logs after running with `BRAINTRUST_DEBUG=true` set. -- [eval.rb](./examples/eval.rb) - Create and run evaluations with custom test cases and scoring functions -- [trace.rb](./examples/trace.rb) - Manual span creation and tracing -- [openai.rb](./examples/openai.rb) - Automatically trace OpenAI API calls -- [alexrudall_openai.rb](./examples/alexrudall_openai.rb) - Automatically trace ruby-openai gem API calls -- [contrib/anthropic/basic.rb](./examples/contrib/anthropic/basic.rb) - Automatically trace Anthropic API calls -- [ruby_llm.rb](./examples/ruby_llm.rb) - Automatically trace RubyLLM API calls -- [trace/trace_attachments.rb](./examples/trace/trace_attachments.rb) - Log attachments (images, PDFs) in traces -- [eval/dataset.rb](./examples/eval/dataset.rb) - Run evaluations using datasets stored in Braintrust -- [eval/remote_functions.rb](./examples/eval/remote_functions.rb) - Use remote scoring functions +Your application needs the following for this to work: -## Documentation +```ruby +require 'bundler/setup' +Bundler.require +``` -- [Braintrust Documentation](https://www.braintrust.dev/docs) -- [API Documentation](https://gemdocs.org/gems/braintrust/) +It is present by default in Rails applications, but may not be in other Sinatra, Rack or other applications. + +Alternatively, you can add `require 'braintrust/setup'` to your application initialization files. ## Contributing -See [CONTRIBUTING.md](./CONTRIBUTING.md) for development setup and contribution guidelines. +See [CONTRIBUTING.md](./CONTRIBUTING.md) for development setup and guidelines. ## License -This project is licensed under the Apache License 2.0. See the [LICENSE](./LICENSE) file for details. +Apache License 2.0 - see [LICENSE](./LICENSE). From 16773446b39d594fd9a910caacf456ced6653d87 Mon Sep 17 00:00:00 2001 From: David Elner Date: Thu, 8 Jan 2026 22:39:12 -0500 Subject: [PATCH 21/27] Added: Debug statements for auto-instrumentation --- lib/braintrust/contrib.rb | 16 +++++++++++-- lib/braintrust/contrib/integration.rb | 21 +++++++++++++++-- lib/braintrust/contrib/setup.rb | 25 ++++++++++++++------- lib/braintrust/setup.rb | 5 +++++ test/braintrust/contrib/integration_test.rb | 2 +- 5 files changed, 56 insertions(+), 13 deletions(-) diff --git a/lib/braintrust/contrib.rb b/lib/braintrust/contrib.rb index c81b36d..feef9c2 100644 --- a/lib/braintrust/contrib.rb +++ b/lib/braintrust/contrib.rb @@ -116,13 +116,25 @@ def init(tracer_provider: nil) # @example Exclude specific integrations # Braintrust::Contrib.auto_instrument!(except: [:ruby_llm]) def auto_instrument!(only: nil, except: nil) + if only || except + Braintrust::Log.debug("auto_instrument! called (only: #{only.inspect}, except: #{except.inspect})") + else + Braintrust::Log.debug("auto_instrument! called") + end + targets = registry.available + Braintrust::Log.debug("auto_instrument! available: #{targets.map(&:integration_name).inspect}") targets = targets.select { |i| only.include?(i.integration_name) } if only targets = targets.reject { |i| except.include?(i.integration_name) } if except - targets.each_with_object([]) do |integration, instrumented| - instrumented << integration.integration_name if instrument!(integration.integration_name) + results = targets.each_with_object([]) do |integration, instrumented| + result = instrument!(integration.integration_name) + # Note: false means skipped (not applicable) or failed (error logged at lower level) + Braintrust::Log.debug("auto_instrument! :#{integration.integration_name} #{result ? "instrumented" : "skipped"}") + instrumented << integration.integration_name if result end + Braintrust::Log.debug("auto_instrument! complete: #{results.inspect}") + results end # Instrument a registered integration by name. diff --git a/lib/braintrust/contrib/integration.rb b/lib/braintrust/contrib/integration.rb index 1791f6f..e338cda 100644 --- a/lib/braintrust/contrib/integration.rb +++ b/lib/braintrust/contrib/integration.rb @@ -100,6 +100,12 @@ def patcher # @example Instance-level instrumentation (specific client) # integration.instrument!(target: client, tracer_provider: my_provider) def instrument!(**options) + if options.empty? + Braintrust::Log.debug("#{integration_name}.instrument! called") + else + Braintrust::Log.debug("#{integration_name}.instrument! called (#{options.keys.join(", ")})") + end + if options[:target] # Configure the target with provided options (exclude :target from context) context_options = options.except(:target) @@ -117,7 +123,18 @@ def instrument!(**options) # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider # @return [Boolean] true if any patching succeeded or was already done def patch!(**options) - return false unless available? && loaded? && compatible? + unless available? + Braintrust::Log.debug("#{integration_name}.patch! skipped: gem not available") + return false + end + unless loaded? + Braintrust::Log.debug("#{integration_name}.patch! skipped: library not loaded") + return false + end + unless compatible? + Braintrust::Log.debug("#{integration_name}.patch! skipped: version not compatible") + return false + end # Try all applicable patchers success = false @@ -129,7 +146,7 @@ def patch!(**options) success = true if patch.patch!(**options) end - Braintrust::Log.debug("No applicable patcher found for #{integration_name}") unless success + Braintrust::Log.debug("#{integration_name}.patch! skipped: no applicable patcher") unless success success end diff --git a/lib/braintrust/contrib/setup.rb b/lib/braintrust/contrib/setup.rb index 27ebd40..3fc0ad4 100644 --- a/lib/braintrust/contrib/setup.rb +++ b/lib/braintrust/contrib/setup.rb @@ -10,13 +10,18 @@ def run! return if @setup_complete @setup_complete = true - if Internal::Env.auto_instrument - # Set up deferred patching for libraries loaded later - if rails_environment? - setup_rails_hook! - else - setup_require_hook! - end + unless Internal::Env.auto_instrument + Braintrust::Log.debug("Contrib::Setup.run! auto-instrumentation disabled via environment") + return + end + + # Set up deferred patching for libraries loaded later + if rails_environment? + Braintrust::Log.debug("Contrib::Setup.run! using Rails after_initialize hook") + setup_rails_hook! + else + Braintrust::Log.debug("Contrib::Setup.run! using require hook") + setup_require_hook! end end @@ -47,7 +52,11 @@ def setup_require_hook! Thread.current[:braintrust_in_require_hook] = true # Check if any integration matches this require path - registry.integrations_for_require_path(path).each do |integration| + integrations = registry.integrations_for_require_path(path) + if integrations.any? + Braintrust::Log.debug("require '#{path}' matched integration hook: #{integrations.map(&:integration_name).inspect}") + end + integrations.each do |integration| next unless integration.available? && integration.compatible? next if only && !only.include?(integration.integration_name) next if except&.include?(integration.integration_name) diff --git a/lib/braintrust/setup.rb b/lib/braintrust/setup.rb index 5feb180..29482fb 100644 --- a/lib/braintrust/setup.rb +++ b/lib/braintrust/setup.rb @@ -27,6 +27,8 @@ def run! @setup_complete = true + Braintrust::Log.debug("Braintrust setting up...") + # Initialize Braintrust (silent failure if no API key) begin Braintrust.init @@ -36,10 +38,13 @@ def run! # Setup contrib for 3rd party integrations Contrib::Setup.run! + + Braintrust::Log.debug("Braintrust setup complete. Auto-instrumentation enabled: #{Braintrust::Internal::Env.auto_instrument}") end end end end # Auto-setup when required + Braintrust::Setup.run! diff --git a/test/braintrust/contrib/integration_test.rb b/test/braintrust/contrib/integration_test.rb index 40524d6..c344f9f 100644 --- a/test/braintrust/contrib/integration_test.rb +++ b/test/braintrust/contrib/integration_test.rb @@ -560,7 +560,7 @@ def self.patchers begin integration.patch! # Check that the "no applicable patcher" message was logged - assert captured_logs.any? { |msg| msg.include?("No applicable patcher found") } + assert captured_logs.any? { |msg| msg.include?("no applicable patcher") } ensure Braintrust::Log.logger = original_logger end From 6b5b85d7018d6694abb7c4db6086c85220349107 Mon Sep 17 00:00:00 2001 From: David Elner Date: Fri, 9 Jan 2026 13:50:50 -0500 Subject: [PATCH 22/27] Refactored: Moved most examples to internal, left simplified user examples --- README.md | 10 ++-- examples/contrib/anthropic.rb | 46 ++++++++++++++++++ examples/contrib/openai.rb | 46 ++++++++++++++++++ examples/contrib/ruby-openai.rb | 47 +++++++++++++++++++ examples/contrib/ruby_llm.rb | 40 ++++++++++++++++ .../{ => internal}/contrib/anthropic/basic.rb | 4 +- .../contrib/anthropic/deprecated.rb | 4 +- .../contrib/anthropic/instance.rb | 4 +- .../contrib/anthropic/streaming.rb | 4 +- .../{ => internal}/contrib/openai/basic.rb | 6 +-- .../contrib/openai/deprecated.rb | 6 +-- .../{ => internal}/contrib/openai/instance.rb | 4 +- .../contrib/openai/streaming.rb | 4 +- .../{ => internal}/contrib/ruby_llm/basic.rb | 4 +- .../contrib/ruby_llm/instance.rb | 4 +- .../contrib/ruby_llm/streaming.rb | 4 +- .../contrib/ruby_llm/tool_usage.rb | 8 ++-- .../contrib/ruby_openai/basic.rb | 6 +-- .../contrib/ruby_openai/deprecated.rb | 10 ++-- .../contrib/ruby_openai/instance.rb | 4 +- .../contrib/ruby_openai/streaming.rb | 4 +- 21 files changed, 224 insertions(+), 45 deletions(-) create mode 100644 examples/contrib/anthropic.rb create mode 100644 examples/contrib/openai.rb create mode 100644 examples/contrib/ruby-openai.rb create mode 100644 examples/contrib/ruby_llm.rb rename examples/{ => internal}/contrib/anthropic/basic.rb (90%) rename examples/{ => internal}/contrib/anthropic/deprecated.rb (91%) rename examples/{ => internal}/contrib/anthropic/instance.rb (89%) rename examples/{ => internal}/contrib/anthropic/streaming.rb (91%) rename examples/{ => internal}/contrib/openai/basic.rb (87%) rename examples/{ => internal}/contrib/openai/deprecated.rb (87%) rename examples/{ => internal}/contrib/openai/instance.rb (90%) rename examples/{ => internal}/contrib/openai/streaming.rb (92%) rename examples/{ => internal}/contrib/ruby_llm/basic.rb (88%) rename examples/{ => internal}/contrib/ruby_llm/instance.rb (91%) rename examples/{ => internal}/contrib/ruby_llm/streaming.rb (88%) rename examples/{ => internal}/contrib/ruby_llm/tool_usage.rb (98%) rename examples/{ => internal}/contrib/ruby_openai/basic.rb (90%) rename examples/{ => internal}/contrib/ruby_openai/deprecated.rb (83%) rename examples/{ => internal}/contrib/ruby_openai/instance.rb (92%) rename examples/{ => internal}/contrib/ruby_openai/streaming.rb (92%) diff --git a/README.md b/README.md index 8fdb51a..6b641d4 100644 --- a/README.md +++ b/README.md @@ -149,10 +149,10 @@ The SDK automatically instruments these LLM libraries: | Provider | Gem | Versions | Integration Name | Examples | | --------- | ------------- | -------- | ---------------- | ----------------------------------------------- | -| Anthropic | `anthropic` | >= 0.3.0 | `:anthropic` | [Link](./examples/contrib/anthropic/basic.rb) | -| OpenAI | `openai` | >= 0.1.0 | `:openai` | [Link](./examples/contrib/openai/basic.rb) | -| | `ruby-openai` | >= 7.0.0 | `:ruby_openai` | [Link](./examples/contrib/ruby_openai/basic.rb) | -| Multiple | `ruby_llm` | >= 1.8.0 | `:ruby_llm` | [Link](./examples/contrib/ruby_llm/basic.rb) | +| Anthropic | `anthropic` | >= 0.3.0 | `:anthropic` | [Link](./examples/contrib/anthropic.rb) | +| OpenAI | `openai` | >= 0.1.0 | `:openai` | [Link](./examples/contrib/openai.rb) | +| | `ruby-openai` | >= 7.0.0 | `:ruby_openai` | [Link](./examples/contrib/ruby-openai.rb) | +| Multiple | `ruby_llm` | >= 1.8.0 | `:ruby_llm` | [Link](./examples/contrib/ruby_llm.rb) | ### Manually applying instrumentation @@ -305,7 +305,7 @@ require 'bundler/setup' Bundler.require ``` -It is present by default in Rails applications, but may not be in other Sinatra, Rack or other applications. +It is present by default in Rails applications, but may not be in Sinatra, Rack, or other applications. Alternatively, you can add `require 'braintrust/setup'` to your application initialization files. diff --git a/examples/contrib/anthropic.rb b/examples/contrib/anthropic.rb new file mode 100644 index 0000000..4f2dd1f --- /dev/null +++ b/examples/contrib/anthropic.rb @@ -0,0 +1,46 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "anthropic" +require "opentelemetry/sdk" + +# Usage: +# ANTHROPIC_API_KEY=your-key bundle exec appraisal anthropic ruby examples/contrib/anthropic.rb + +# Check for API keys +unless ENV["ANTHROPIC_API_KEY"] + puts "Error: ANTHROPIC_API_KEY environment variable is required" + puts "Get your API key from: https://console.anthropic.com/" + exit 1 +end + +# Initialize Braintrust (with blocking login) +# +# NOTE: blocking_login is only necessary for this short-lived example. +# In most production apps, you can omit this. +Braintrust.init(blocking_login: true) + +# Create Anthropic client +client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"]) + +# Make a message request (automatically traced!) +client.messages.create( + model: "claude-3-haiku-20240307", + max_tokens: 100, + system: "You are a helpful assistant.", + messages: [ + {role: "user", content: "Say hello and tell me a short joke."} + ] +) + +# Print permalink to view this trace in Braintrust +puts "\n View this trace in Braintrust:" +puts " #{Braintrust::Trace.permalink(root_span)}" + +# Shutdown to flush spans to Braintrust +# +# NOTE: shutdown is only necessary for this short-lived example. +# In most production apps, you can omit this. +OpenTelemetry.tracer_provider.shutdown diff --git a/examples/contrib/openai.rb b/examples/contrib/openai.rb new file mode 100644 index 0000000..dbaa27d --- /dev/null +++ b/examples/contrib/openai.rb @@ -0,0 +1,46 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "openai" +require "opentelemetry/sdk" + +# Usage: +# OPENAI_API_KEY=your-openai-key bundle exec appraisal openai ruby examples/contrib/openai.rb + +# Check for API keys +unless ENV["OPENAI_API_KEY"] + puts "Error: OPENAI_API_KEY environment variable is required" + puts "Get your API key from: https://platform.openai.com/api-keys" + exit 1 +end + +# Initialize Braintrust (with blocking login) +# +# NOTE: blocking_login is only necessary for this short-lived example. +# In most production apps, you can omit this. +Braintrust.init(blocking_login: true) + +# Create OpenAI client +client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) + +# Make a chat completion request (automatically traced!) +client.chat.completions.create( + messages: [ + {role: "system", content: "You are a helpful assistant."}, + {role: "user", content: "Say hello and tell me a short joke."} + ], + model: "gpt-4o-mini", + max_tokens: 100 +) + +# Print permalink to view this trace in Braintrust +puts "\n View this trace in Braintrust:" +puts " #{Braintrust::Trace.permalink(root_span)}" + +# Shutdown to flush spans to Braintrust +# +# NOTE: shutdown is only necessary for this short-lived example. +# In most production apps, you can omit this. +OpenTelemetry.tracer_provider.shutdown diff --git a/examples/contrib/ruby-openai.rb b/examples/contrib/ruby-openai.rb new file mode 100644 index 0000000..01c7ba7 --- /dev/null +++ b/examples/contrib/ruby-openai.rb @@ -0,0 +1,47 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "openai" +require "opentelemetry/sdk" + +# Usage: +# OPENAI_API_KEY=your-openai-key bundle exec appraisal ruby-openai examples/internal/contrib/ruby_openai/basic.rb + +# Check for API keys +unless ENV["OPENAI_API_KEY"] + puts "Error: OPENAI_API_KEY environment variable is required" + puts "Get your API key from: https://platform.openai.com/api-keys" + exit 1 +end + +# Initialize Braintrust (with blocking login) +# +# NOTE: blocking_login is only necessary for this short-lived example. +# In most production apps, you can omit this. +Braintrust.init(blocking_login: true) + +# Create OpenAI client +client = OpenAI::Client.new(access_token: ENV["OPENAI_API_KEY"]) + +client.chat( + parameters: { + model: "gpt-4o-mini", + messages: [ + {role: "system", content: "You are a helpful assistant."}, + {role: "user", content: "Say hello and tell me a short joke."} + ], + max_tokens: 100 + } +) + +# Print permalink to view this trace in Braintrust +puts "\n View this trace in Braintrust:" +puts " #{Braintrust::Trace.permalink(root_span)}" + +# Shutdown to flush spans to Braintrust +# +# NOTE: shutdown is only necessary for this short-lived example. +# In most production apps, you can omit this. +OpenTelemetry.tracer_provider.shutdown diff --git a/examples/contrib/ruby_llm.rb b/examples/contrib/ruby_llm.rb new file mode 100644 index 0000000..02acf0f --- /dev/null +++ b/examples/contrib/ruby_llm.rb @@ -0,0 +1,40 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "ruby_llm" +require "opentelemetry/sdk" + +# Example: Basic RubyLLM chat with Braintrust tracing +# +# Usage: +# OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/contrib/ruby_llm.rb + +unless ENV["OPENAI_API_KEY"] + puts "Error: OPENAI_API_KEY environment variable is required" + exit 1 +end + +# Initialize Braintrust (with blocking login) +# +# NOTE: blocking_login is only necessary for this short-lived example. +# In most production apps, you can omit this. +Braintrust.init(blocking_login: true) + +RubyLLM.configure do |config| + config.openai_api_key = ENV["OPENAI_API_KEY"] +end + +chat = RubyLLM.chat(model: "gpt-4o-mini") +chat.ask("What is the capital of France?") + +# Print permalink to view this trace in Braintrust +puts "\n View this trace in Braintrust:" +puts " #{Braintrust::Trace.permalink(root_span)}" + +# Shutdown to flush spans to Braintrust +# +# NOTE: shutdown is only necessary for this short-lived example. +# In most production apps, you can omit this. +OpenTelemetry.tracer_provider.shutdown diff --git a/examples/contrib/anthropic/basic.rb b/examples/internal/contrib/anthropic/basic.rb similarity index 90% rename from examples/contrib/anthropic/basic.rb rename to examples/internal/contrib/anthropic/basic.rb index b453b0b..d51e98e 100644 --- a/examples/contrib/anthropic/basic.rb +++ b/examples/internal/contrib/anthropic/basic.rb @@ -11,7 +11,7 @@ # This example demonstrates how to automatically trace Anthropic API calls with Braintrust. # # Usage: -# ANTHROPIC_API_KEY=your-key bundle exec ruby examples/contrib/anthropic/basic.rb +# ANTHROPIC_API_KEY=your-key bundle exec ruby examples/internal/contrib/anthropic/basic.rb # Check for API keys unless ENV["ANTHROPIC_API_KEY"] @@ -33,7 +33,7 @@ # Make a message request (automatically traced!) puts "Sending message request to Anthropic..." -message = tracer.in_span("examples/contrib/anthropic/basic.rb") do |span| +message = tracer.in_span("examples/internal/contrib/anthropic/basic.rb") do |span| root_span = span client.messages.create( diff --git a/examples/contrib/anthropic/deprecated.rb b/examples/internal/contrib/anthropic/deprecated.rb similarity index 91% rename from examples/contrib/anthropic/deprecated.rb rename to examples/internal/contrib/anthropic/deprecated.rb index d1761e6..341527a 100644 --- a/examples/contrib/anthropic/deprecated.rb +++ b/examples/internal/contrib/anthropic/deprecated.rb @@ -13,7 +13,7 @@ # For new code, use `Braintrust.instrument!(:anthropic)` instead. # # Usage: -# ANTHROPIC_API_KEY=your-key bundle exec ruby examples/contrib/anthropic/deprecated.rb +# ANTHROPIC_API_KEY=your-key bundle exec ruby examples/internal/contrib/anthropic/deprecated.rb # Check for API keys unless ENV["ANTHROPIC_API_KEY"] @@ -37,7 +37,7 @@ # Make a message request (automatically traced!) puts "Sending message request to Anthropic..." -message = tracer.in_span("examples/contrib/anthropic/deprecated.rb") do |span| +message = tracer.in_span("examples/internal/contrib/anthropic/deprecated.rb") do |span| root_span = span client.messages.create( diff --git a/examples/contrib/anthropic/instance.rb b/examples/internal/contrib/anthropic/instance.rb similarity index 89% rename from examples/contrib/anthropic/instance.rb rename to examples/internal/contrib/anthropic/instance.rb index 8b49458..6929e60 100644 --- a/examples/contrib/anthropic/instance.rb +++ b/examples/internal/contrib/anthropic/instance.rb @@ -12,7 +12,7 @@ # all Anthropic clients globally. # # Usage: -# ANTHROPIC_API_KEY=your-key bundle exec ruby examples/contrib/anthropic/instance.rb +# ANTHROPIC_API_KEY=your-key bundle exec ruby examples/internal/contrib/anthropic/instance.rb unless ENV["ANTHROPIC_API_KEY"] puts "Error: ANTHROPIC_API_KEY environment variable is required" @@ -31,7 +31,7 @@ tracer = OpenTelemetry.tracer_provider.tracer("anthropic-example") root_span = nil -tracer.in_span("examples/contrib/anthropic/instance.rb") do |span| +tracer.in_span("examples/internal/contrib/anthropic/instance.rb") do |span| root_span = span puts "Calling traced client..." diff --git a/examples/contrib/anthropic/streaming.rb b/examples/internal/contrib/anthropic/streaming.rb similarity index 91% rename from examples/contrib/anthropic/streaming.rb rename to examples/internal/contrib/anthropic/streaming.rb index ea42074..ac70daf 100644 --- a/examples/contrib/anthropic/streaming.rb +++ b/examples/internal/contrib/anthropic/streaming.rb @@ -14,7 +14,7 @@ # 2. Text-only streaming with .text (simplified) # # Usage: -# ANTHROPIC_API_KEY=your-key bundle exec ruby examples/contrib/anthropic/streaming.rb +# ANTHROPIC_API_KEY=your-key bundle exec ruby examples/internal/contrib/anthropic/streaming.rb unless ENV["ANTHROPIC_API_KEY"] puts "Error: ANTHROPIC_API_KEY environment variable is required" @@ -29,7 +29,7 @@ tracer = OpenTelemetry.tracer_provider.tracer("anthropic-streaming-example") root_span = nil -tracer.in_span("examples/contrib/anthropic/streaming.rb") do |span| +tracer.in_span("examples/internal/contrib/anthropic/streaming.rb") do |span| root_span = span # Pattern 1: Iterator-based streaming with .each diff --git a/examples/contrib/openai/basic.rb b/examples/internal/contrib/openai/basic.rb similarity index 87% rename from examples/contrib/openai/basic.rb rename to examples/internal/contrib/openai/basic.rb index d32371b..5c1ad0e 100644 --- a/examples/contrib/openai/basic.rb +++ b/examples/internal/contrib/openai/basic.rb @@ -12,10 +12,10 @@ # # Note: The openai gem is a development dependency. To run this example: # 1. Install dependencies: bundle install -# 2. Run from the SDK root: bundle exec ruby examples/contrib/openai/basic.rb +# 2. Run from the SDK root: bundle exec ruby examples/internal/contrib/openai/basic.rb # # Usage: -# OPENAI_API_KEY=your-openai-key bundle exec ruby examples/contrib/openai/basic.rb +# OPENAI_API_KEY=your-openai-key bundle exec ruby examples/internal/contrib/openai/basic.rb # Check for API keys unless ENV["OPENAI_API_KEY"] @@ -37,7 +37,7 @@ # Make a chat completion request (automatically traced!) puts "Sending chat completion request to OpenAI..." -response = tracer.in_span("examples/contrib/openai/basic.rb") do |span| +response = tracer.in_span("examples/internal/contrib/openai/basic.rb") do |span| root_span = span client.chat.completions.create( diff --git a/examples/contrib/openai/deprecated.rb b/examples/internal/contrib/openai/deprecated.rb similarity index 87% rename from examples/contrib/openai/deprecated.rb rename to examples/internal/contrib/openai/deprecated.rb index 872242f..c2e6fad 100644 --- a/examples/contrib/openai/deprecated.rb +++ b/examples/internal/contrib/openai/deprecated.rb @@ -12,10 +12,10 @@ # # Note: The openai gem is a development dependency. To run this example: # 1. Install dependencies: bundle install -# 2. Run from the SDK root: bundle exec ruby examples/contrib/openai/deprecated.rb +# 2. Run from the SDK root: bundle exec ruby examples/internal/contrib/openai/deprecated.rb # # Usage: -# OPENAI_API_KEY=your-openai-key bundle exec ruby examples/contrib/openai/deprecated.rb +# OPENAI_API_KEY=your-openai-key bundle exec ruby examples/internal/contrib/openai/deprecated.rb # Check for API keys unless ENV["OPENAI_API_KEY"] @@ -39,7 +39,7 @@ # Make a chat completion request (automatically traced!) puts "Sending chat completion request to OpenAI..." -response = tracer.in_span("examples/contrib/openai/deprecated.rb") do |span| +response = tracer.in_span("examples/internal/contrib/openai/deprecated.rb") do |span| root_span = span client.chat.completions.create( diff --git a/examples/contrib/openai/instance.rb b/examples/internal/contrib/openai/instance.rb similarity index 90% rename from examples/contrib/openai/instance.rb rename to examples/internal/contrib/openai/instance.rb index 6516557..396b765 100644 --- a/examples/contrib/openai/instance.rb +++ b/examples/internal/contrib/openai/instance.rb @@ -12,7 +12,7 @@ # all OpenAI clients globally. # # Usage: -# OPENAI_API_KEY=your-key bundle exec ruby examples/contrib/openai/instance.rb +# OPENAI_API_KEY=your-key bundle exec ruby examples/internal/contrib/openai/instance.rb unless ENV["OPENAI_API_KEY"] puts "Error: OPENAI_API_KEY environment variable is required" @@ -31,7 +31,7 @@ tracer = OpenTelemetry.tracer_provider.tracer("openai-example") root_span = nil -tracer.in_span("examples/contrib/openai/instance.rb") do |span| +tracer.in_span("examples/internal/contrib/openai/instance.rb") do |span| root_span = span puts "Calling traced client..." diff --git a/examples/contrib/openai/streaming.rb b/examples/internal/contrib/openai/streaming.rb similarity index 92% rename from examples/contrib/openai/streaming.rb rename to examples/internal/contrib/openai/streaming.rb index 5c4dd89..0dcdc86 100644 --- a/examples/contrib/openai/streaming.rb +++ b/examples/internal/contrib/openai/streaming.rb @@ -14,7 +14,7 @@ # 2. Responses API with stream # # Usage: -# OPENAI_API_KEY=your-key bundle exec ruby examples/contrib/openai/streaming.rb +# OPENAI_API_KEY=your-key bundle exec ruby examples/internal/contrib/openai/streaming.rb unless ENV["OPENAI_API_KEY"] puts "Error: OPENAI_API_KEY environment variable is required" @@ -29,7 +29,7 @@ tracer = OpenTelemetry.tracer_provider.tracer("openai-streaming-example") root_span = nil -tracer.in_span("examples/contrib/openai/streaming.rb") do |span| +tracer.in_span("examples/internal/contrib/openai/streaming.rb") do |span| root_span = span # Pattern 1: Chat completions streaming with stream_raw diff --git a/examples/contrib/ruby_llm/basic.rb b/examples/internal/contrib/ruby_llm/basic.rb similarity index 88% rename from examples/contrib/ruby_llm/basic.rb rename to examples/internal/contrib/ruby_llm/basic.rb index cc35fbb..ce18061 100644 --- a/examples/contrib/ruby_llm/basic.rb +++ b/examples/internal/contrib/ruby_llm/basic.rb @@ -9,7 +9,7 @@ # Example: Basic RubyLLM chat with Braintrust tracing # # Usage: -# OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/contrib/ruby_llm/basic.rb +# OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/internal/contrib/ruby_llm/basic.rb unless ENV["OPENAI_API_KEY"] puts "Error: OPENAI_API_KEY environment variable is required" @@ -27,7 +27,7 @@ root_span = nil puts "Sending chat request..." -response = tracer.in_span("examples/contrib/ruby_llm/basic.rb") do |span| +response = tracer.in_span("examples/internal/contrib/ruby_llm/basic.rb") do |span| root_span = span chat = RubyLLM.chat(model: "gpt-4o-mini") chat.ask("What is the capital of France?") diff --git a/examples/contrib/ruby_llm/instance.rb b/examples/internal/contrib/ruby_llm/instance.rb similarity index 91% rename from examples/contrib/ruby_llm/instance.rb rename to examples/internal/contrib/ruby_llm/instance.rb index 5feeb52..2dae854 100644 --- a/examples/contrib/ruby_llm/instance.rb +++ b/examples/internal/contrib/ruby_llm/instance.rb @@ -12,7 +12,7 @@ # all RubyLLM chats globally. # # Usage: -# OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/contrib/ruby_llm/instance.rb +# OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/internal/contrib/ruby_llm/instance.rb unless ENV["OPENAI_API_KEY"] puts "Error: OPENAI_API_KEY environment variable is required" @@ -35,7 +35,7 @@ tracer = OpenTelemetry.tracer_provider.tracer("ruby_llm-example") root_span = nil -tracer.in_span("examples/contrib/ruby_llm/instance.rb") do |span| +tracer.in_span("examples/internal/contrib/ruby_llm/instance.rb") do |span| root_span = span puts "Calling traced chat..." diff --git a/examples/contrib/ruby_llm/streaming.rb b/examples/internal/contrib/ruby_llm/streaming.rb similarity index 88% rename from examples/contrib/ruby_llm/streaming.rb rename to examples/internal/contrib/ruby_llm/streaming.rb index 179064f..47d504a 100644 --- a/examples/contrib/ruby_llm/streaming.rb +++ b/examples/internal/contrib/ruby_llm/streaming.rb @@ -9,7 +9,7 @@ # Example: Streaming RubyLLM chat with Braintrust tracing # # Usage: -# OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/contrib/ruby_llm/streaming.rb +# OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/internal/contrib/ruby_llm/streaming.rb unless ENV["OPENAI_API_KEY"] puts "Error: OPENAI_API_KEY environment variable is required" @@ -29,7 +29,7 @@ puts "Streaming response..." print "\nAssistant: " -tracer.in_span("examples/contrib/ruby_llm/streaming.rb") do |span| +tracer.in_span("examples/internal/contrib/ruby_llm/streaming.rb") do |span| root_span = span chat = RubyLLM.chat(model: "gpt-4o-mini") chat.ask("Write a haiku about programming") do |chunk| diff --git a/examples/contrib/ruby_llm/tool_usage.rb b/examples/internal/contrib/ruby_llm/tool_usage.rb similarity index 98% rename from examples/contrib/ruby_llm/tool_usage.rb rename to examples/internal/contrib/ruby_llm/tool_usage.rb index fa154ea..01e2c00 100644 --- a/examples/contrib/ruby_llm/tool_usage.rb +++ b/examples/internal/contrib/ruby_llm/tool_usage.rb @@ -16,7 +16,7 @@ # - Multi-turn conversation with multiple tool invocations # # Trace hierarchy will look like: -# examples/contrib/ruby_llm/tool_usage.rb +# examples/internal/contrib/ruby_llm/tool_usage.rb # └── ruby_llm.chat # ├── ruby_llm.tool.search_flights # │ └── flight_api.search (internal span) @@ -27,7 +27,7 @@ # └── events_api.search (internal span) # # Usage: -# OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/contrib/ruby_llm/tool_usage.rb +# OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/internal/contrib/ruby_llm/tool_usage.rb unless ENV["OPENAI_API_KEY"] puts "Error: OPENAI_API_KEY environment variable is required" @@ -319,7 +319,7 @@ def execute(city:, travel_dates:, interests: "food,art") puts "Watch the trace to see how tool calls create hierarchical spans." puts -response = tracer.in_span("examples/contrib/ruby_llm/tool_usage.rb") do |span| +response = tracer.in_span("examples/internal/contrib/ruby_llm/tool_usage.rb") do |span| root_span = span chat = RubyLLM.chat(model: "gpt-4o-mini") @@ -357,7 +357,7 @@ def execute(city:, travel_dates:, interests: "food,art") puts "View the full trace: #{Braintrust::Trace.permalink(root_span)}" puts puts "Expected span hierarchy:" -puts " └── examples/contrib/ruby_llm/tool_usage.rb" +puts " └── examples/internal/contrib/ruby_llm/tool_usage.rb" puts " └── ruby_llm.chat" puts " ├── ruby_llm.tool.search_flights" puts " │ └── flight_api.search" diff --git a/examples/contrib/ruby_openai/basic.rb b/examples/internal/contrib/ruby_openai/basic.rb similarity index 90% rename from examples/contrib/ruby_openai/basic.rb rename to examples/internal/contrib/ruby_openai/basic.rb index 8d58f4f..a6179c7 100644 --- a/examples/contrib/ruby_openai/basic.rb +++ b/examples/internal/contrib/ruby_openai/basic.rb @@ -14,10 +14,10 @@ # # Note: ruby-openai is an optional development dependency. To run this example: # 1. Install ruby-openai: gem install ruby-openai -# 2. Run from the SDK root: ruby examples/contrib/ruby_openai/basic.rb +# 2. Run from the SDK root: ruby examples/internal/contrib/ruby_openai/basic.rb # # Usage: -# OPENAI_API_KEY=your-openai-key ruby examples/contrib/ruby_openai/basic.rb +# OPENAI_API_KEY=your-openai-key ruby examples/internal/contrib/ruby_openai/basic.rb # Check for API keys unless ENV["OPENAI_API_KEY"] @@ -40,7 +40,7 @@ # Make a chat completion request (automatically traced!) puts "Sending chat completion request to OpenAI (using ruby-openai gem)..." -response = tracer.in_span("examples/contrib/ruby_openai/basic.rb") do |span| +response = tracer.in_span("examples/internal/contrib/ruby_openai/basic.rb") do |span| root_span = span # ruby-openai uses: client.chat(parameters: {...}) diff --git a/examples/contrib/ruby_openai/deprecated.rb b/examples/internal/contrib/ruby_openai/deprecated.rb similarity index 83% rename from examples/contrib/ruby_openai/deprecated.rb rename to examples/internal/contrib/ruby_openai/deprecated.rb index 370208d..0e1225d 100755 --- a/examples/contrib/ruby_openai/deprecated.rb +++ b/examples/internal/contrib/ruby_openai/deprecated.rb @@ -10,18 +10,18 @@ # # DEPRECATED: This example uses the old wrap() API which is deprecated. # Please use the new instrument!() API instead: -# - examples/contrib/ruby_openai/basic.rb - Class-level instrumentation (all clients) -# - examples/contrib/ruby_openai/instance.rb - Instance-level instrumentation (specific client) +# - examples/internal/contrib/ruby_openai/basic.rb - Class-level instrumentation (all clients) +# - examples/internal/contrib/ruby_openai/instance.rb - Instance-level instrumentation (specific client) # # This example demonstrates how to automatically trace OpenAI API calls using # the ruby-openai gem (by alexrudall) with Braintrust. # # Note: ruby-openai is an optional development dependency. To run this example: # 1. Install ruby-openai: gem install ruby-openai -# 2. Run from the SDK root: ruby examples/contrib/ruby_openai/deprecated.rb +# 2. Run from the SDK root: ruby examples/internal/contrib/ruby_openai/deprecated.rb # # Usage: -# OPENAI_API_KEY=your-openai-key ruby examples/contrib/ruby_openai/deprecated.rb +# OPENAI_API_KEY=your-openai-key ruby examples/internal/contrib/ruby_openai/deprecated.rb # Check for API keys unless ENV["OPENAI_API_KEY"] @@ -44,7 +44,7 @@ # Make a chat completion request (automatically traced!) puts "Sending chat completion request to OpenAI (using ruby-openai gem)..." -response = tracer.in_span("examples/contrib/ruby_openai/deprecated.rb") do |span| +response = tracer.in_span("examples/internal/contrib/ruby_openai/deprecated.rb") do |span| root_span = span # ruby-openai uses: client.chat(parameters: {...}) diff --git a/examples/contrib/ruby_openai/instance.rb b/examples/internal/contrib/ruby_openai/instance.rb similarity index 92% rename from examples/contrib/ruby_openai/instance.rb rename to examples/internal/contrib/ruby_openai/instance.rb index 97ce7ab..c5c78ab 100644 --- a/examples/contrib/ruby_openai/instance.rb +++ b/examples/internal/contrib/ruby_openai/instance.rb @@ -12,7 +12,7 @@ # all OpenAI clients globally. # # Usage: -# OPENAI_API_KEY=your-key bundle exec appraisal ruby_openai ruby examples/contrib/ruby_openai/instance.rb +# OPENAI_API_KEY=your-key bundle exec appraisal ruby_openai ruby examples/internal/contrib/ruby_openai/instance.rb unless ENV["OPENAI_API_KEY"] puts "Error: OPENAI_API_KEY environment variable is required" @@ -31,7 +31,7 @@ tracer = OpenTelemetry.tracer_provider.tracer("ruby-openai-example") root_span = nil -tracer.in_span("examples/contrib/ruby_openai/instance.rb") do |span| +tracer.in_span("examples/internal/contrib/ruby_openai/instance.rb") do |span| root_span = span puts "Calling traced client..." diff --git a/examples/contrib/ruby_openai/streaming.rb b/examples/internal/contrib/ruby_openai/streaming.rb similarity index 92% rename from examples/contrib/ruby_openai/streaming.rb rename to examples/internal/contrib/ruby_openai/streaming.rb index b958744..88b7a7c 100644 --- a/examples/contrib/ruby_openai/streaming.rb +++ b/examples/internal/contrib/ruby_openai/streaming.rb @@ -12,7 +12,7 @@ # using the ruby-openai gem. Shows the callback-based streaming pattern. # # Usage: -# OPENAI_API_KEY=your-key bundle exec ruby examples/contrib/ruby_openai/streaming.rb +# OPENAI_API_KEY=your-key bundle exec ruby examples/internal/contrib/ruby_openai/streaming.rb unless ENV["OPENAI_API_KEY"] puts "Error: OPENAI_API_KEY environment variable is required" @@ -28,7 +28,7 @@ tracer = OpenTelemetry.tracer_provider.tracer("ruby-openai-streaming-example") root_span = nil -tracer.in_span("examples/contrib/ruby_openai/streaming.rb") do |span| +tracer.in_span("examples/internal/contrib/ruby_openai/streaming.rb") do |span| root_span = span # Pattern 1: Chat streaming with callback proc From db6cc9f4e54f5fd222550c78b0c80d909649a6a3 Mon Sep 17 00:00:00 2001 From: David Elner Date: Fri, 9 Jan 2026 18:14:38 -0500 Subject: [PATCH 23/27] Fixed: Missing darwin arch in Gemfile.lock --- Gemfile.lock | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Gemfile.lock b/Gemfile.lock index d2ff082..1b9cd16 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -24,6 +24,9 @@ GEM bigdecimal rexml docile (1.4.1) + google-protobuf (4.33.2-arm64-darwin) + bigdecimal + rake (>= 13) google-protobuf (4.33.2-x86_64-linux-gnu) bigdecimal rake (>= 13) @@ -122,6 +125,7 @@ GEM yard (0.9.38) PLATFORMS + arm64-darwin x86_64-linux DEPENDENCIES From f97127a8aa3de77c133edce1ad8cd801f191a363 Mon Sep 17 00:00:00 2001 From: David Elner Date: Fri, 9 Jan 2026 18:55:48 -0500 Subject: [PATCH 24/27] Changed: Make Internal::Time tests less flaky --- test/braintrust/internal/time_test.rb | 67 +++------------------------ 1 file changed, 7 insertions(+), 60 deletions(-) diff --git a/test/braintrust/internal/time_test.rb b/test/braintrust/internal/time_test.rb index d049149..9c1257b 100644 --- a/test/braintrust/internal/time_test.rb +++ b/test/braintrust/internal/time_test.rb @@ -8,12 +8,11 @@ class Braintrust::Internal::TimeTest < Minitest::Test # --- measure with block --- - def test_measure_with_block_returns_elapsed_time - elapsed = Time.measure { sleep 0.01 } + def test_measure_with_block_returns_float + elapsed = Time.measure { 1 + 1 } assert_kind_of Float, elapsed - assert elapsed >= 0.01, "Expected elapsed >= 0.01, got #{elapsed}" - assert elapsed < 0.1, "Expected elapsed < 0.1, got #{elapsed}" + assert elapsed >= 0 end def test_measure_with_block_executes_block @@ -32,72 +31,20 @@ def test_measure_with_block_returns_time_not_block_result # --- measure without arguments --- - def test_measure_without_args_returns_monotonic_time - time1 = Time.measure - time2 = Time.measure - - assert_kind_of Float, time1 - assert_kind_of Float, time2 - assert time2 >= time1, "Monotonic time should not go backwards" - end - - def test_measure_without_args_returns_positive_value + def test_measure_without_args_returns_float time = Time.measure + assert_kind_of Float, time assert time > 0, "Monotonic time should be positive" end # --- measure with start_time --- - def test_measure_with_start_time_returns_elapsed + def test_measure_with_start_time_returns_float start = Time.measure - sleep 0.01 elapsed = Time.measure(start) assert_kind_of Float, elapsed - assert elapsed >= 0.01, "Expected elapsed >= 0.01, got #{elapsed}" - assert elapsed < 0.1, "Expected elapsed < 0.1, got #{elapsed}" - end - - def test_measure_with_start_time_returns_non_negative - start = Time.measure - elapsed = Time.measure(start) - - assert elapsed >= 0, "Elapsed time should be non-negative" - end - - # --- monotonic behavior --- - - def test_measure_is_monotonic - times = 10.times.map { Time.measure } - - times.each_cons(2) do |t1, t2| - assert t2 >= t1, "Monotonic time should never decrease" - end - end - - def test_measure_returns_seconds_as_float - start = Time.measure - sleep 0.05 - elapsed = Time.measure(start) - - # Should be roughly 0.05 seconds, not 50 milliseconds or 50_000_000 nanoseconds - assert elapsed >= 0.04, "Expected seconds, got #{elapsed} (too small, might be wrong unit)" - assert elapsed < 0.2, "Expected seconds, got #{elapsed} (too large, might be wrong unit)" - end - - # --- equivalence of block and start_time modes --- - - def test_block_and_start_time_modes_equivalent - # Both modes should measure approximately the same duration - block_elapsed = Time.measure { sleep 0.02 } - - start = Time.measure - sleep 0.02 - manual_elapsed = Time.measure(start) - - # Both should be close to 0.02 seconds - assert_in_delta block_elapsed, manual_elapsed, 0.01, - "Block and start_time modes should produce similar results" + assert elapsed >= 0 end end From 32ddc6b19c086d49a94893d6c171a558af3404de Mon Sep 17 00:00:00 2001 From: David Elner Date: Fri, 9 Jan 2026 19:44:07 -0500 Subject: [PATCH 25/27] Fixed: More flake from polluted global state --- .../ruby_llm/instrumentation/chat_test.rb | 165 +++++++++--------- .../contrib/ruby_openai/openai_test.rb | 46 ++--- test/braintrust/contrib/setup_test.rb | 10 +- test/braintrust/contrib_test.rb | 10 +- test/test_helper.rb | 4 + 5 files changed, 127 insertions(+), 108 deletions(-) diff --git a/test/braintrust/contrib/ruby_llm/instrumentation/chat_test.rb b/test/braintrust/contrib/ruby_llm/instrumentation/chat_test.rb index c0d1f5a..788f36a 100644 --- a/test/braintrust/contrib/ruby_llm/instrumentation/chat_test.rb +++ b/test/braintrust/contrib/ruby_llm/instrumentation/chat_test.rb @@ -53,27 +53,34 @@ def test_applied_returns_true_when_included end end -# Define test tool classes for different ruby_llm versions -if defined?(RubyLLM) - RUBY_LLM_VERSION = Gem.loaded_specs["ruby_llm"]&.version || Gem::Version.new("0.0.0") - SUPPORTS_PARAMS_DSL = Gem::Version.new("1.9.0") <= RUBY_LLM_VERSION +# E2E tests for Chat instrumentation +class Braintrust::Contrib::RubyLLM::Instrumentation::ChatE2ETest < Minitest::Test + include Braintrust::Contrib::RubyLLM::IntegrationHelper - # Tool for ruby_llm 1.8+ (using param - singular) - class WeatherTestTool < RubyLLM::Tool - description "Get the current weather for a location" + def self.supports_tool_calling? + return false unless defined?(RubyLLM) + version = Gem.loaded_specs["ruby_llm"]&.version || Gem::Version.new("0.0.0") + Gem::Version.new("1.9.0") <= version + end - param :location, type: :string, desc: "The city and state, e.g. San Francisco, CA" - param :unit, type: :string, desc: "Temperature unit (celsius or fahrenheit)" + # Creates a test tool class and yields it to the block. + # Uses stub_const to avoid polluting global state. + def with_weather_test_tool + tool_class = Class.new(RubyLLM::Tool) do + description "Get the current weather for a location" - def execute(location:, unit: "fahrenheit") - {location: location, temperature: 72, unit: unit, conditions: "sunny"} + param :location, type: :string, desc: "The city and state, e.g. San Francisco, CA" + param :unit, type: :string, desc: "Temperature unit (celsius or fahrenheit)" + + def execute(location:, unit: "fahrenheit") + {location: location, temperature: 72, unit: unit, conditions: "sunny"} + end end - end -end -# E2E tests for Chat instrumentation -class Braintrust::Contrib::RubyLLM::Instrumentation::ChatE2ETest < Minitest::Test - include Braintrust::Contrib::RubyLLM::IntegrationHelper + Object.stub_const(:WeatherTestTool, tool_class) do + yield WeatherTestTool + end + end def setup skip_unless_ruby_llm! @@ -188,71 +195,73 @@ def test_streaming_complete_creates_span # --- Tool calling --- def test_tool_calling_creates_nested_spans - skip "Tool calling test requires ruby_llm >= 1.9" unless defined?(SUPPORTS_PARAMS_DSL) && SUPPORTS_PARAMS_DSL - - VCR.use_cassette("contrib/ruby_llm/tool_calling") do - rig = setup_otel_test_rig - - RubyLLM.configure do |config| - config.openai_api_key = get_openai_key - end - - chat = RubyLLM.chat(model: "gpt-4o-mini") - Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: rig.tracer_provider) - chat.with_tool(WeatherTestTool) - - response = chat.ask("What's the weather like in San Francisco?") - - refute_nil response - refute_nil response.content - - # Should have multiple spans: chat + tool + possibly another chat for follow-up - spans = rig.drain - assert spans.length >= 2, "Expected at least 2 spans for tool calling (chat + tool)" - - # Find the chat span and tool span - chat_spans = spans.select { |s| s.name == "ruby_llm.chat" } - tool_spans = spans.select { |s| s.name.start_with?("ruby_llm.tool.") } - - assert chat_spans.length >= 1, "Expected at least one ruby_llm.chat span" - assert tool_spans.length >= 1, "Expected at least one ruby_llm.tool.* span" - - # Verify tool span has correct attributes - tool_span = tool_spans.first - assert tool_span.attributes.key?("braintrust.span_attributes"), "Tool span should have span_attributes" - span_attrs = JSON.parse(tool_span.attributes["braintrust.span_attributes"]) - assert_equal "tool", span_attrs["type"] - assert tool_span.attributes.key?("braintrust.input_json"), "Tool span should have input" - assert tool_span.attributes.key?("braintrust.output_json"), "Tool span should have output" - - # Verify chat span metadata contains exact tool schema (OpenAI format) - chat_span = chat_spans.first - metadata = JSON.parse(chat_span.attributes["braintrust.metadata"]) - - expected_tools = [ - { - "type" => "function", - "function" => { - "name" => "weather_test", - "description" => "Get the current weather for a location", - "parameters" => { - "type" => "object", - "properties" => { - "location" => { - "type" => "string", - "description" => "The city and state, e.g. San Francisco, CA" + skip "Tool calling test requires ruby_llm >= 1.9" unless self.class.supports_tool_calling? + + with_weather_test_tool do |tool_class| + VCR.use_cassette("contrib/ruby_llm/tool_calling") do + rig = setup_otel_test_rig + + RubyLLM.configure do |config| + config.openai_api_key = get_openai_key + end + + chat = RubyLLM.chat(model: "gpt-4o-mini") + Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: rig.tracer_provider) + chat.with_tool(tool_class) + + response = chat.ask("What's the weather like in San Francisco?") + + refute_nil response + refute_nil response.content + + # Should have multiple spans: chat + tool + possibly another chat for follow-up + spans = rig.drain + assert spans.length >= 2, "Expected at least 2 spans for tool calling (chat + tool)" + + # Find the chat span and tool span + chat_spans = spans.select { |s| s.name == "ruby_llm.chat" } + tool_spans = spans.select { |s| s.name.start_with?("ruby_llm.tool.") } + + assert chat_spans.length >= 1, "Expected at least one ruby_llm.chat span" + assert tool_spans.length >= 1, "Expected at least one ruby_llm.tool.* span" + + # Verify tool span has correct attributes + tool_span = tool_spans.first + assert tool_span.attributes.key?("braintrust.span_attributes"), "Tool span should have span_attributes" + span_attrs = JSON.parse(tool_span.attributes["braintrust.span_attributes"]) + assert_equal "tool", span_attrs["type"] + assert tool_span.attributes.key?("braintrust.input_json"), "Tool span should have input" + assert tool_span.attributes.key?("braintrust.output_json"), "Tool span should have output" + + # Verify chat span metadata contains exact tool schema (OpenAI format) + chat_span = chat_spans.first + metadata = JSON.parse(chat_span.attributes["braintrust.metadata"]) + + expected_tools = [ + { + "type" => "function", + "function" => { + "name" => "weather_test", + "description" => "Get the current weather for a location", + "parameters" => { + "type" => "object", + "properties" => { + "location" => { + "type" => "string", + "description" => "The city and state, e.g. San Francisco, CA" + }, + "unit" => { + "type" => "string", + "description" => "Temperature unit (celsius or fahrenheit)" + } }, - "unit" => { - "type" => "string", - "description" => "Temperature unit (celsius or fahrenheit)" - } - }, - "required" => ["location", "unit"] + "required" => ["location", "unit"] + } } } - } - ] - assert_equal expected_tools, metadata["tools"] + ] + assert_equal expected_tools, metadata["tools"] + end end end diff --git a/test/braintrust/contrib/ruby_openai/openai_test.rb b/test/braintrust/contrib/ruby_openai/openai_test.rb index b147db8..4575728 100644 --- a/test/braintrust/contrib/ruby_openai/openai_test.rb +++ b/test/braintrust/contrib/ruby_openai/openai_test.rb @@ -1,14 +1,7 @@ # frozen_string_literal: true -# require "test_helper" - -# # Load the openai gem to define OpenAI::Client (and OpenAI::Internal if official gem) -# # This must happen before tests run so the gem detection logic works -# begin -# require "openai" -# rescue LoadError -# # Gem not available in this appraisal -# end +require "test_helper" +require_relative "integration_helper" # This test verifies that the RubyOpenAI integration correctly identifies when the # `ruby-openai` gem is loaded vs the official `openai` gem. @@ -23,17 +16,13 @@ # 2. When `ruby-openai` gem is loaded, instrument! succeeds and patchers are applied class Braintrust::Contrib::RubyOpenAI::OpenAITest < Minitest::Test - Integration = Braintrust::Contrib::RubyOpenAI::Integration + include Braintrust::Contrib::RubyOpenAI::IntegrationHelper - # def ruby_openai_available? - # # OpenAI::Internal is only defined in the official openai gem - # !Gem.loaded_specs["ruby-openai"].nil? - # end + Integration = Braintrust::Contrib::RubyOpenAI::Integration - # def official_openai_available? - # # OpenAI::Internal is only defined in the official openai gem - # !Gem.loaded_specs["openai"].nil? - # end + def setup + load_ruby_openai_if_available + end def ruby_openai_loaded? # Check if ruby-openai gem is loaded (not the official openai gem). @@ -64,12 +53,25 @@ def test_loaded_returns_false_when_ruby_openai_gem_not_loaded def test_instrument_succeeds_for_ruby_openai_gem skip "ruby-openai gem not loaded" unless ruby_openai_loaded? - result = Braintrust.instrument!(:ruby_openai) + # Run in fork to avoid polluting global state (class-level patching) + assert_in_fork do + require "openai" + + result = Braintrust.instrument!(:ruby_openai) + + unless result + puts "FAIL: instrument! should return truthy for ruby-openai gem" + exit 1 + end - assert result, "instrument! should return truthy for ruby-openai gem" + any_patched = Integration.patchers.any?(&:patched?) + unless any_patched + puts "FAIL: at least one patcher should be patched for ruby-openai gem" + exit 1 + end - any_patched = Integration.patchers.any?(&:patched?) - assert any_patched, "at least one patcher should be patched for ruby-openai gem" + puts "instrument_succeeds_test:passed" + end end # --- OpenAI::Internal --- diff --git a/test/braintrust/contrib/setup_test.rb b/test/braintrust/contrib/setup_test.rb index 59cb432..238eb0d 100644 --- a/test/braintrust/contrib/setup_test.rb +++ b/test/braintrust/contrib/setup_test.rb @@ -21,10 +21,12 @@ def test_run_is_idempotent call_count = 0 - Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { call_count += 1 }) do - Braintrust::Contrib::Setup.run! - Braintrust::Contrib::Setup.run! - Braintrust::Contrib::Setup.run! + ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: nil) do + Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { call_count += 1 }) do + Braintrust::Contrib::Setup.run! + Braintrust::Contrib::Setup.run! + Braintrust::Contrib::Setup.run! + end end assert_equal 1, call_count, "run! should only execute once" diff --git a/test/braintrust/contrib_test.rb b/test/braintrust/contrib_test.rb index 4e19da1..af867ec 100644 --- a/test/braintrust/contrib_test.rb +++ b/test/braintrust/contrib_test.rb @@ -8,8 +8,10 @@ class Braintrust::ContribTest < Minitest::Test def test_auto_instrument_enabled_by_default called_with = nil - Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do - Braintrust.auto_instrument! + ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: nil) do + Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do + Braintrust.auto_instrument! + end end assert_equal({only: nil, except: nil}, called_with) @@ -70,7 +72,7 @@ def test_auto_instrument_passes_except_from_hash def test_auto_instrument_reads_only_from_env called_with = nil - ClimateControl.modify(BRAINTRUST_INSTRUMENT_ONLY: "openai,anthropic") do + ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: nil, BRAINTRUST_INSTRUMENT_ONLY: "openai,anthropic") do Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do Braintrust.auto_instrument! end @@ -82,7 +84,7 @@ def test_auto_instrument_reads_only_from_env def test_auto_instrument_reads_except_from_env called_with = nil - ClimateControl.modify(BRAINTRUST_INSTRUMENT_EXCEPT: "ruby_llm") do + ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: nil, BRAINTRUST_INSTRUMENT_EXCEPT: "ruby_llm") do Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do Braintrust.auto_instrument! end diff --git a/test/test_helper.rb b/test/test_helper.rb index e03950d..b59dd77 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,5 +1,9 @@ # frozen_string_literal: true +# Disable auto-instrumentation by default in tests to prevent test pollution. +# Tests that specifically need auto-instrumentation can opt-in via auto_instrument: true. +ENV["BRAINTRUST_AUTO_INSTRUMENT"] ||= "false" + # Start SimpleCov BEFORE loading any code to track # Skip coverage when running under appraisal (different dependency scenarios) unless ENV["BUNDLE_GEMFILE"]&.include?("gemfiles/") From 9ed8f4bc0a544ec7a698926157f8315e9b13fba9 Mon Sep 17 00:00:00 2001 From: David Elner Date: Fri, 9 Jan 2026 21:27:01 -0500 Subject: [PATCH 26/27] Fixed: `braintrust exec` not working in Rails apps (zeitwerk incompatibility) --- braintrust.gemspec | 1 + lib/braintrust/contrib/setup.rb | 186 ++++++--- lib/braintrust/setup.rb | 4 +- test/braintrust/contrib/setup_test.rb | 570 ++++++++++++++++---------- test/braintrust/setup_test.rb | 35 +- test/support/fixture_helper.rb | 33 +- 6 files changed, 526 insertions(+), 303 deletions(-) diff --git a/braintrust.gemspec b/braintrust.gemspec index 6f1fd68..ba9f5c3 100644 --- a/braintrust.gemspec +++ b/braintrust.gemspec @@ -20,6 +20,7 @@ Gem::Specification.new do |spec| # Specify which files should be added to the gem when it is released. spec.files = Dir.glob(%w[ + exe/* lib/**/*.rb README.md LICENSE diff --git a/lib/braintrust/contrib/setup.rb b/lib/braintrust/contrib/setup.rb index 3fc0ad4..41a18ce 100644 --- a/lib/braintrust/contrib/setup.rb +++ b/lib/braintrust/contrib/setup.rb @@ -4,75 +4,165 @@ module Braintrust module Contrib + # Automatic instrumentation setup for LLM libraries. + # + # Intercepts `require` calls to detect when LLM libraries are loaded, then patches + # them automatically. The main challenge is doing this safely with zeitwerk (Rails' + # autoloader) which also hooks into require. + # + # ## The Zeitwerk Problem + # + # Zeitwerk uses `alias_method :zeitwerk_original_require, :require`. If we prepend + # to Kernel before zeitwerk loads, zeitwerk captures our method as its "original", + # creating an infinite loop. + # + # ## Solution: Two-Phase Hook + # + # ┌─────────────────────────────────────────────────────────────────────────┐ + # │ Setup.run! called │ + # └─────────────────────────────────────────────────────────────────────────┘ + # │ + # ┌─────────────────────────┼─────────────────────────┐ + # ▼ ▼ ▼ + # ┌─────────────────┐ ┌─────────────────────┐ ┌─────────────────────┐ + # │ Rails loaded? │ │ Zeitwerk loaded? │ │ Neither loaded yet │ + # │ (Rails::Railtie)│ │ │ │ │ + # └────────┬────────┘ └──────────┬──────────┘ └──────────┬──────────┘ + # │ │ │ + # ▼ ▼ ▼ + # ┌─────────────────┐ ┌─────────────────────┐ ┌─────────────────────┐ + # │ Install Railtie │ │ Install prepend │ │ Install watcher │ + # │ (after_init) │ │ hook directly │ │ hook (alias_method) │ + # └─────────────────┘ └─────────────────────┘ └──────────┬──────────┘ + # │ │ │ + # │ │ ┌──────────┴──────────┐ + # │ │ ▼ ▼ + # │ │ ┌──────────────┐ ┌──────────────────┐ + # │ │ │ Rails loads? │ │ Zeitwerk loads? │ + # │ │ │ → Railtie │ │ → Prepend hook │ + # │ │ └──────────────┘ └──────────────────┘ + # │ │ │ │ + # ▼ ▼ ▼ ▼ + # ┌─────────────────────────────────────────────────────────────────────────┐ + # │ LLM library loads → patch! │ + # └─────────────────────────────────────────────────────────────────────────┘ + # + # The watcher hook uses alias_method which zeitwerk captures harmlessly (alias + # chains work correctly). Once zeitwerk/Rails loads, we upgrade to the better + # approach: prepend (takes precedence, `super` chains through zeitwerk) or + # Railtie (patches after all gems loaded via after_initialize). + # module Setup + REENTRANCY_KEY = :braintrust_in_require_hook + class << self + # Main entry point. Call once per process. def run! - return if @setup_complete - @setup_complete = true - unless Internal::Env.auto_instrument - Braintrust::Log.debug("Contrib::Setup.run! auto-instrumentation disabled via environment") + Braintrust::Log.debug("Contrib::Setup: auto-instrumentation disabled via environment") return end - # Set up deferred patching for libraries loaded later - if rails_environment? - Braintrust::Log.debug("Contrib::Setup.run! using Rails after_initialize hook") - setup_rails_hook! + @registry = Contrib::Registry.instance + @only = Internal::Env.instrument_only + @except = Internal::Env.instrument_except + + if defined?(::Rails::Railtie) + Braintrust::Log.debug("Contrib::Setup: using Rails railtie hook") + install_railtie! + elsif defined?(::Zeitwerk) + Braintrust::Log.debug("Contrib::Setup: using require hook (zeitwerk detected)") + install_require_hook! else - Braintrust::Log.debug("Contrib::Setup.run! using require hook") - setup_require_hook! + Braintrust::Log.debug("Contrib::Setup: using watcher hook") + install_watcher_hook! end end - def rails_environment? - # Check for Rails::Railtie which is defined during gem loading, - # before Rails.application exists - defined?(::Rails::Railtie) + # Called after each require to check if we should patch anything. + def on_require(path) + return unless @registry + + @registry.integrations_for_require_path(path).each do |integration| + next unless integration.available? && integration.compatible? + next if @only && !@only.include?(integration.integration_name) + next if @except&.include?(integration.integration_name) + + Braintrust::Log.debug("Contrib::Setup: patching #{integration.integration_name}") + integration.patch! + end + rescue => e + Braintrust::Log.error("Auto-instrument failed: #{e.message}") end - def setup_rails_hook! + # Execute block with reentrancy protection (prevents infinite loops). + def with_reentrancy_guard + return if Thread.current[REENTRANCY_KEY] + Thread.current[REENTRANCY_KEY] = true + yield + rescue => e + Braintrust::Log.error("Auto-instrument failed: #{e.message}") + ensure + Thread.current[REENTRANCY_KEY] = false + end + + def railtie_installed? = @railtie_installed + def require_hook_installed? = @require_hook_installed + + def install_railtie! + return if @railtie_installed + @railtie_installed = true require_relative "rails/railtie" end - def setup_require_hook! - registry = Contrib::Registry.instance - only = Internal::Env.instrument_only - except = Internal::Env.instrument_except - - # Store original require - original_require = Kernel.method(:require) - - Kernel.define_method(:require) do |path| - result = original_require.call(path) - - # Reentrancy guard - unless Thread.current[:braintrust_in_require_hook] - begin - Thread.current[:braintrust_in_require_hook] = true - - # Check if any integration matches this require path - integrations = registry.integrations_for_require_path(path) - if integrations.any? - Braintrust::Log.debug("require '#{path}' matched integration hook: #{integrations.map(&:integration_name).inspect}") - end - integrations.each do |integration| - next unless integration.available? && integration.compatible? - next if only && !only.include?(integration.integration_name) - next if except&.include?(integration.integration_name) - integration.patch! - end - rescue => e - Braintrust::Log.error("Failed to auto-instrument on `require`: #{e.message}") - ensure - Thread.current[:braintrust_in_require_hook] = false + def install_require_hook! + return if @require_hook_installed + @require_hook_installed = true + Kernel.prepend(RequireHook) + end + + private + + def install_watcher_hook! + return if Kernel.private_method_defined?(:braintrust_watcher_require) + + Kernel.module_eval do + alias_method :braintrust_watcher_require, :require + + define_method(:require) do |path| + result = braintrust_watcher_require(path) + + # Detect Rails/zeitwerk loading and upgrade hook strategy. + # IMPORTANT: Only check when the gem itself finishes loading (path match), + # not on every require where the constant happens to be defined. + # Installing too early (during gem init) breaks the alias_method chain. + if (path == "rails" || path.include?("railties")) && + defined?(::Rails::Railtie) && !Braintrust::Contrib::Setup.railtie_installed? + Braintrust::Contrib::Setup.install_railtie! + elsif (path == "zeitwerk" || path.end_with?("/zeitwerk.rb")) && + defined?(::Zeitwerk) && !Braintrust::Contrib::Setup.require_hook_installed? + Braintrust::Contrib::Setup.install_require_hook! end - end - result + Braintrust::Contrib::Setup.with_reentrancy_guard do + Braintrust::Contrib::Setup.on_require(path) + end + + result + end end end end end + + # Prepend module for require interception. + # Only installed AFTER zeitwerk loads to avoid alias_method loop. + module RequireHook + def require(path) + result = super + Setup.with_reentrancy_guard { Setup.on_require(path) } + result + end + end end end diff --git a/lib/braintrust/setup.rb b/lib/braintrust/setup.rb index 29482fb..b947ac2 100644 --- a/lib/braintrust/setup.rb +++ b/lib/braintrust/setup.rb @@ -30,13 +30,14 @@ def run! Braintrust::Log.debug("Braintrust setting up...") # Initialize Braintrust (silent failure if no API key) + # Must run in every process - tracer provider doesn't persist across Kernel.exec begin Braintrust.init rescue => e Braintrust::Log.error("Failed to automatically setup Braintrust: #{e.message}") end - # Setup contrib for 3rd party integrations + # Always setup contrib - hooks don't persist across Kernel.exec Contrib::Setup.run! Braintrust::Log.debug("Braintrust setup complete. Auto-instrumentation enabled: #{Braintrust::Internal::Env.auto_instrument}") @@ -46,5 +47,4 @@ def run! end # Auto-setup when required - Braintrust::Setup.run! diff --git a/test/braintrust/contrib/setup_test.rb b/test/braintrust/contrib/setup_test.rb index 238eb0d..e78c8ef 100644 --- a/test/braintrust/contrib/setup_test.rb +++ b/test/braintrust/contrib/setup_test.rb @@ -4,286 +4,462 @@ require "braintrust/contrib/setup" class Braintrust::Contrib::SetupTest < Minitest::Test - def setup - # Reset the setup_complete flag before each test - Braintrust::Contrib::Setup.instance_variable_set(:@setup_complete, nil) + # --- run! --- + + def test_run_skips_when_auto_instrument_disabled + assert_in_fork do + hook_installed = false + + ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: "false") do + with_stubs( + [Braintrust::Contrib::Setup, :install_watcher_hook!, -> { hook_installed = true }], + [Braintrust::Contrib::Setup, :install_require_hook!, -> { hook_installed = true }], + [Braintrust::Contrib::Setup, :install_railtie!, -> { hook_installed = true }] + ) do + Braintrust::Contrib::Setup.run! + end + end + + refute hook_installed, "hooks should not be installed when auto_instrument is disabled" + end end - def teardown - # Reset after each test to not affect other tests - Braintrust::Contrib::Setup.instance_variable_set(:@setup_complete, nil) + def test_run_installs_railtie_when_rails_defined + skip "Test not applicable when Rails is already loaded" if defined?(::Rails::Railtie) + + assert_in_fork do + railtie_installed = false + + with_mock_rails_railtie do + Braintrust::Contrib::Setup.stub(:install_railtie!, -> { railtie_installed = true }) do + ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: "true") do + Braintrust::Contrib::Setup.run! + end + end + end + + assert railtie_installed, "should install railtie when Rails::Railtie is defined" + end end - # --- run! idempotency --- + def test_run_installs_require_hook_when_zeitwerk_defined + skip "Test not applicable when Rails or Zeitwerk is loaded" if defined?(::Rails::Railtie) || defined?(::Zeitwerk) - def test_run_is_idempotent - skip "Test not applicable when Rails is loaded" if defined?(::Rails::Railtie) + assert_in_fork do + require_hook_installed = false - call_count = 0 + with_mock_zeitwerk do + Braintrust::Contrib::Setup.stub(:install_require_hook!, -> { require_hook_installed = true }) do + ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: "true") do + Braintrust::Contrib::Setup.run! + end + end + end - ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: nil) do - Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { call_count += 1 }) do - Braintrust::Contrib::Setup.run! - Braintrust::Contrib::Setup.run! - Braintrust::Contrib::Setup.run! + assert require_hook_installed, "should install require hook when Zeitwerk is defined" + end + end + + def test_run_installs_watcher_hook_when_neither_rails_nor_zeitwerk + skip "Test not applicable when Rails or Zeitwerk is loaded" if defined?(::Rails::Railtie) || defined?(::Zeitwerk) + + assert_in_fork do + watcher_installed = false + + Braintrust::Contrib::Setup.stub(:install_watcher_hook!, -> { watcher_installed = true }) do + ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: "true") do + Braintrust::Contrib::Setup.run! + end end + + assert watcher_installed, "should install watcher hook when neither Rails nor Zeitwerk is defined" end + end + + # --- on_require --- + + def test_on_require_patches_matching_integration + assert_in_fork do + patch_called = false + + mock_integration = mock_integration_object( + name: :test_lib, + on_patch: -> { patch_called = true } + ) - assert_equal 1, call_count, "run! should only execute once" + mock_registry = mock_registry_for("test_lib" => [mock_integration]) + + Braintrust::Contrib::Setup.instance_variable_set(:@registry, mock_registry) + Braintrust::Contrib::Setup.instance_variable_set(:@only, nil) + Braintrust::Contrib::Setup.instance_variable_set(:@except, nil) + + Braintrust::Contrib::Setup.on_require("test_lib") + + assert patch_called, "on_require should call patch! on matching integration" + end end - # --- BRAINTRUST_AUTO_INSTRUMENT env var --- + def test_on_require_respects_only_filter + assert_in_fork do + patch_called = false - def test_run_skips_hooks_when_auto_instrument_disabled - hook_called = false + mock_integration = mock_integration_object( + name: :excluded_lib, + on_patch: -> { patch_called = true } + ) - ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: "false") do - Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { hook_called = true }) do - Braintrust::Contrib::Setup.run! - end + mock_registry = mock_registry_for("excluded_lib" => [mock_integration]) + + Braintrust::Contrib::Setup.instance_variable_set(:@registry, mock_registry) + Braintrust::Contrib::Setup.instance_variable_set(:@only, %i[openai anthropic]) + Braintrust::Contrib::Setup.instance_variable_set(:@except, nil) + + Braintrust::Contrib::Setup.on_require("excluded_lib") + + refute patch_called, "on_require should respect only filter" + end + end + + def test_on_require_respects_except_filter + assert_in_fork do + patch_called = false + + mock_integration = mock_integration_object( + name: :excluded_lib, + on_patch: -> { patch_called = true } + ) + + mock_registry = mock_registry_for("excluded_lib" => [mock_integration]) + + Braintrust::Contrib::Setup.instance_variable_set(:@registry, mock_registry) + Braintrust::Contrib::Setup.instance_variable_set(:@only, nil) + Braintrust::Contrib::Setup.instance_variable_set(:@except, %i[excluded_lib]) + + Braintrust::Contrib::Setup.on_require("excluded_lib") + + refute patch_called, "on_require should respect except filter" end + end + + def test_on_require_skips_unavailable_integration + assert_in_fork do + patch_called = false + + mock_integration = mock_integration_object( + name: :unavailable_lib, + available: false, + on_patch: -> { patch_called = true } + ) + + mock_registry = mock_registry_for("unavailable_lib" => [mock_integration]) - refute hook_called, "hooks should not be set up when auto_instrument is disabled" + Braintrust::Contrib::Setup.instance_variable_set(:@registry, mock_registry) + Braintrust::Contrib::Setup.instance_variable_set(:@only, nil) + Braintrust::Contrib::Setup.instance_variable_set(:@except, nil) + + Braintrust::Contrib::Setup.on_require("unavailable_lib") + + refute patch_called, "on_require should skip unavailable integrations" + end end - def test_run_sets_up_hooks_when_auto_instrument_enabled - skip "Test not applicable when Rails is loaded" if defined?(::Rails::Railtie) + def test_on_require_skips_incompatible_integration + assert_in_fork do + patch_called = false - hook_called = false + mock_integration = mock_integration_object( + name: :incompatible_lib, + compatible: false, + on_patch: -> { patch_called = true } + ) - ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: "true") do - Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { hook_called = true }) do - Braintrust::Contrib::Setup.run! - end + mock_registry = mock_registry_for("incompatible_lib" => [mock_integration]) + + Braintrust::Contrib::Setup.instance_variable_set(:@registry, mock_registry) + Braintrust::Contrib::Setup.instance_variable_set(:@only, nil) + Braintrust::Contrib::Setup.instance_variable_set(:@except, nil) + + Braintrust::Contrib::Setup.on_require("incompatible_lib") + + refute patch_called, "on_require should skip incompatible integrations" end + end + + def test_on_require_returns_early_without_registry + assert_in_fork do + Braintrust::Contrib::Setup.instance_variable_set(:@registry, nil) - assert hook_called, "hooks should be set up when auto_instrument is enabled" + # Should not raise + Braintrust::Contrib::Setup.on_require("anything") + end end - def test_run_sets_up_hooks_by_default - skip "Test not applicable when Rails is loaded" if defined?(::Rails::Railtie) + def test_on_require_logs_errors_without_crashing + assert_in_fork do + error_logged = false + + mock_integration = mock_integration_object( + name: :error_lib, + on_patch: -> { raise "Patch error!" } + ) - hook_called = false + mock_registry = mock_registry_for("error_lib" => [mock_integration]) - # Ensure env var is not set - ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: nil) do - Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { hook_called = true }) do - Braintrust::Contrib::Setup.run! + Braintrust::Contrib::Setup.instance_variable_set(:@registry, mock_registry) + Braintrust::Contrib::Setup.instance_variable_set(:@only, nil) + Braintrust::Contrib::Setup.instance_variable_set(:@except, nil) + + Braintrust::Log.stub(:error, ->(_msg) { error_logged = true }) do + Braintrust::Contrib::Setup.on_require("error_lib") end - end - assert hook_called, "hooks should be set up by default" + assert error_logged, "on_require should log errors without crashing" + end end - # --- rails_environment? --- + # --- with_reentrancy_guard --- - def test_rails_environment_returns_false_when_rails_not_defined - skip "Test not applicable when Rails is loaded" if defined?(::Rails::Railtie) + def test_with_reentrancy_guard_prevents_nested_calls + assert_in_fork do + outer_ran = false + inner_ran = false - refute Braintrust::Contrib::Setup.rails_environment? + Braintrust::Contrib::Setup.with_reentrancy_guard do + outer_ran = true + Braintrust::Contrib::Setup.with_reentrancy_guard do + inner_ran = true + end + end + + assert outer_ran, "outer block should run" + refute inner_ran, "inner block should be skipped due to reentrancy guard" + end end - def test_rails_environment_returns_false_when_rails_application_nil - mock_rails = Module.new do - def self.respond_to?(method) - method == :application || super + def test_with_reentrancy_guard_resets_after_completion + assert_in_fork do + Braintrust::Contrib::Setup.with_reentrancy_guard do + # first call end - def self.application - nil + second_call_ran = false + Braintrust::Contrib::Setup.with_reentrancy_guard do + second_call_ran = true end - end - Object.stub_const(:Rails, mock_rails) do - refute Braintrust::Contrib::Setup.rails_environment? + assert second_call_ran, "second call should run after first completes" end end - def test_rails_environment_returns_true_when_rails_railtie_defined - with_mock_rails_railtie do - assert Braintrust::Contrib::Setup.rails_environment? + def test_with_reentrancy_guard_logs_errors + assert_in_fork do + error_logged = false + + Braintrust::Log.stub(:error, ->(_msg) { error_logged = true }) do + Braintrust::Contrib::Setup.with_reentrancy_guard do + raise "Test error" + end + end + + assert error_logged, "with_reentrancy_guard should log errors" end end - # --- setup_rails_hook! --- + # --- install_railtie! --- - def test_setup_rails_hook_loads_railtie - railtie_loaded = false + def test_install_railtie_is_idempotent + assert_in_fork do + call_count = 0 - Braintrust::Contrib::Setup.stub(:require_relative, ->(path) { railtie_loaded = true if path == "rails/railtie" }) do - Braintrust::Contrib::Setup.setup_rails_hook! - end + Braintrust::Contrib::Setup.stub(:require_relative, ->(path) { call_count += 1 if path == "rails/railtie" }) do + Braintrust::Contrib::Setup.install_railtie! + Braintrust::Contrib::Setup.install_railtie! + Braintrust::Contrib::Setup.install_railtie! + end - assert railtie_loaded, "setup_rails_hook! should load rails/railtie" + assert_equal 1, call_count, "install_railtie! should only load railtie once" + end end - # --- setup_require_hook! --- - # These tests run in a fork to prevent the require hook from leaking into other tests + # --- install_require_hook! --- - def test_setup_require_hook_patches_kernel_require + def test_install_require_hook_is_idempotent assert_in_fork do - Braintrust::Contrib::Setup.setup_require_hook! + call_count = 0 + original_prepend = Kernel.method(:prepend) + + Kernel.define_singleton_method(:prepend) do |mod| + call_count += 1 if mod == Braintrust::Contrib::RequireHook + original_prepend.call(mod) + end - # Check that require still works normally - require "json" # Should work normally + Braintrust::Contrib::Setup.install_require_hook! + Braintrust::Contrib::Setup.install_require_hook! + Braintrust::Contrib::Setup.install_require_hook! - # If we got here without error, the hook is working - puts "require_hook_test:passed" + assert_equal 1, call_count, "install_require_hook! should only prepend once" end end - def test_setup_require_hook_instruments_matching_library + # --- install_watcher_hook! --- + + def test_install_watcher_hook_is_idempotent assert_in_fork do - with_tmp_file(data: "# test file\n", filename: "test_integration_lib", extension: ".rb") do |test_file| - patch_called = false + Braintrust::Contrib::Setup.send(:install_watcher_hook!) + Braintrust::Contrib::Setup.send(:install_watcher_hook!) + + # require still works + require "json" + end + end + + def test_watcher_hook_triggers_on_require + assert_in_fork do + with_tmp_file(data: "# test file\n", filename: "watcher_test_lib", extension: ".rb") do |test_file| + on_require_called = false test_lib_name = File.basename(test_file.path, ".rb") $LOAD_PATH.unshift(File.dirname(test_file.path)) - mock_integration = mock_integration_object( - name: :test_lib, - on_patch: -> { patch_called = true } - ) + Braintrust::Contrib::Setup.instance_variable_set(:@registry, Braintrust::Contrib::Registry.instance) - mock_registry = mock_registry_for(test_lib_name => [mock_integration]) - - Braintrust::Contrib::Registry.stub(:instance, mock_registry) do - Braintrust::Contrib::Setup.setup_require_hook! - require test_lib_name + original_on_require = Braintrust::Contrib::Setup.method(:on_require) + Braintrust::Contrib::Setup.define_singleton_method(:on_require) do |path| + on_require_called = true if path.include?(test_lib_name) + original_on_require.call(path) end - if patch_called - puts "instrument_test:passed" - else - puts "instrument_test:failed - patch! was not called" - exit 1 - end + Braintrust::Contrib::Setup.send(:install_watcher_hook!) + require test_lib_name + + assert on_require_called, "on_require should be called" end end end - def test_setup_require_hook_respects_only_filter - assert_in_fork do - with_tmp_file(data: "# test file\n", filename: "excluded_lib", extension: ".rb") do |test_file| - patch_called = false - test_lib_name = File.basename(test_file.path, ".rb") - $LOAD_PATH.unshift(File.dirname(test_file.path)) + def test_watcher_hook_upgrades_to_require_hook_when_zeitwerk_required + skip "Test not applicable when Zeitwerk is already loaded" if defined?(::Zeitwerk) - mock_integration = mock_integration_object( - name: :excluded_lib, - on_patch: -> { patch_called = true } - ) + assert_in_fork do + # Create a fake zeitwerk.rb that defines the Zeitwerk constant + with_tmp_file(data: "module Zeitwerk; end\n", filename: "zeitwerk", extension: ".rb", exact_name: true) do |path| + $LOAD_PATH.unshift(File.dirname(path)) - mock_registry = mock_registry_for(test_lib_name => [mock_integration]) + Braintrust::Contrib::Setup.send(:install_watcher_hook!) - ENV["BRAINTRUST_INSTRUMENT_ONLY"] = "openai,anthropic" + refute Braintrust::Contrib::Setup.require_hook_installed?, "require hook should not be installed yet" - Braintrust::Contrib::Registry.stub(:instance, mock_registry) do - Braintrust::Contrib::Setup.setup_require_hook! - require test_lib_name - end + require "zeitwerk" - if patch_called - puts "only_filter_test:failed - patch! should not be called for excluded lib" - exit 1 - else - puts "only_filter_test:passed" - end + assert Braintrust::Contrib::Setup.require_hook_installed?, "require hook should be installed after zeitwerk loads" end end end - def test_setup_require_hook_respects_except_filter + def test_watcher_hook_does_not_upgrade_for_non_zeitwerk_require_even_if_constant_exists + skip "Test not applicable when Zeitwerk is already loaded" if defined?(::Zeitwerk) + assert_in_fork do - with_tmp_file(data: "# test file\n", filename: "excluded_lib", extension: ".rb") do |test_file| - patch_called = false - test_lib_name = File.basename(test_file.path, ".rb") - $LOAD_PATH.unshift(File.dirname(test_file.path)) + # Define Zeitwerk constant first + Object.const_set(:Zeitwerk, Module.new) - mock_integration = mock_integration_object( - name: :excluded_lib, - on_patch: -> { patch_called = true } - ) + # Create an unrelated file (doesn't need exact_name since we're testing it DOESN'T match) + with_tmp_file(data: "# unrelated\n", filename: "unrelated_lib", extension: ".rb", exact_name: true) do |path| + $LOAD_PATH.unshift(File.dirname(path)) - mock_registry = mock_registry_for(test_lib_name => [mock_integration]) + Braintrust::Contrib::Setup.send(:install_watcher_hook!) - ENV["BRAINTRUST_INSTRUMENT_EXCEPT"] = "excluded_lib" + refute Braintrust::Contrib::Setup.require_hook_installed?, "require hook should not be installed yet" - Braintrust::Contrib::Registry.stub(:instance, mock_registry) do - Braintrust::Contrib::Setup.setup_require_hook! - require test_lib_name - end + require "unrelated_lib" - if patch_called - puts "except_filter_test:failed - patch! should not be called for excluded lib" - exit 1 - else - puts "except_filter_test:passed" - end + refute Braintrust::Contrib::Setup.require_hook_installed?, "require hook should NOT be installed for non-zeitwerk require" end end end - def test_setup_require_hook_skips_unavailable_integration + def test_watcher_hook_upgrades_to_railtie_when_rails_required + skip "Test not applicable when Rails is already loaded" if defined?(::Rails::Railtie) + assert_in_fork do - with_tmp_file(data: "# test file\n", filename: "unavailable_lib", extension: ".rb") do |test_file| - patch_called = false - test_lib_name = File.basename(test_file.path, ".rb") - $LOAD_PATH.unshift(File.dirname(test_file.path)) + # Create a fake rails.rb that defines the Rails::Railtie constant + with_tmp_file(data: "module Rails; class Railtie; end; end\n", filename: "rails", extension: ".rb", exact_name: true) do |path| + $LOAD_PATH.unshift(File.dirname(path)) + + railtie_installed = false + Braintrust::Contrib::Setup.define_singleton_method(:install_railtie!) do + railtie_installed = true + @railtie_installed = true + end - mock_integration = mock_integration_object( - name: :unavailable_lib, - available: false, - on_patch: -> { patch_called = true } - ) + Braintrust::Contrib::Setup.send(:install_watcher_hook!) - mock_registry = mock_registry_for(test_lib_name => [mock_integration]) + refute railtie_installed, "railtie should not be installed yet" - Braintrust::Contrib::Registry.stub(:instance, mock_registry) do - Braintrust::Contrib::Setup.setup_require_hook! - require test_lib_name - end + require "rails" - if patch_called - puts "unavailable_test:failed - patch! should not be called for unavailable integration" - exit 1 - else - puts "unavailable_test:passed" + assert railtie_installed, "railtie should be installed after rails loads" + end + end + end + + def test_watcher_hook_does_not_upgrade_for_non_rails_require_even_if_constant_exists + skip "Test not applicable when Rails is already loaded" if defined?(::Rails::Railtie) + + assert_in_fork do + # Define Rails::Railtie constant first + Object.const_set(:Rails, Module.new) + Rails.const_set(:Railtie, Class.new) + + # Create an unrelated file (doesn't need exact_name since we're testing it DOESN'T match) + with_tmp_file(data: "# unrelated\n", filename: "another_lib", extension: ".rb", exact_name: true) do |path| + $LOAD_PATH.unshift(File.dirname(path)) + + railtie_installed = false + Braintrust::Contrib::Setup.define_singleton_method(:install_railtie!) do + railtie_installed = true + @railtie_installed = true end + + Braintrust::Contrib::Setup.send(:install_watcher_hook!) + + refute railtie_installed, "railtie should not be installed yet" + + require "another_lib" + + refute railtie_installed, "railtie should NOT be installed for non-rails require" end end end - def test_setup_require_hook_skips_incompatible_integration + # --- Integration tests with actual require hook --- + + def test_require_hook_instruments_matching_library assert_in_fork do - with_tmp_file(data: "# test file\n", filename: "incompatible_lib", extension: ".rb") do |test_file| + with_tmp_file(data: "# test file\n", filename: "test_integration_lib", extension: ".rb") do |test_file| patch_called = false test_lib_name = File.basename(test_file.path, ".rb") $LOAD_PATH.unshift(File.dirname(test_file.path)) mock_integration = mock_integration_object( - name: :incompatible_lib, - compatible: false, + name: :test_lib, on_patch: -> { patch_called = true } ) mock_registry = mock_registry_for(test_lib_name => [mock_integration]) - Braintrust::Contrib::Registry.stub(:instance, mock_registry) do - Braintrust::Contrib::Setup.setup_require_hook! - require test_lib_name - end + Braintrust::Contrib::Setup.instance_variable_set(:@registry, mock_registry) + Braintrust::Contrib::Setup.instance_variable_set(:@only, nil) + Braintrust::Contrib::Setup.instance_variable_set(:@except, nil) - if patch_called - puts "incompatible_test:failed - patch! should not be called for incompatible integration" - exit 1 - else - puts "incompatible_test:passed" - end + Braintrust::Contrib::Setup.install_require_hook! + require test_lib_name + + assert patch_called, "patch! should be called" end end end - def test_setup_require_hook_has_reentrancy_guard + def test_require_hook_has_reentrancy_guard assert_in_fork do with_tmp_file(data: "# reentrant lib\n", filename: "reentrant_lib", extension: ".rb") do |reentrant_file| with_tmp_file(data: "# nested lib\n", filename: "nested_lib", extension: ".rb") do |nested_file| @@ -314,50 +490,14 @@ def test_setup_require_hook_has_reentrancy_guard nested_lib_name => [nested_integration] ) - Braintrust::Contrib::Registry.stub(:instance, mock_registry) do - Braintrust::Contrib::Setup.setup_require_hook! - require reentrant_lib_name - end - - # The nested integration should NOT have been patched due to reentrancy guard - if require_calls_during_patch == [:patch_completed] - puts "reentrancy_test:passed" - else - puts "reentrancy_test:failed - got #{require_calls_during_patch.inspect}" - exit 1 - end - end - end - end - end - - def test_setup_require_hook_logs_errors_without_crashing - assert_in_fork do - with_tmp_file(data: "# error lib\n", filename: "error_lib", extension: ".rb") do |test_file| - error_logged = false - test_lib_name = File.basename(test_file.path, ".rb") - $LOAD_PATH.unshift(File.dirname(test_file.path)) - - mock_integration = mock_integration_object( - name: :error_lib, - on_patch: -> { raise "Patch error!" } - ) + Braintrust::Contrib::Setup.instance_variable_set(:@registry, mock_registry) + Braintrust::Contrib::Setup.instance_variable_set(:@only, nil) + Braintrust::Contrib::Setup.instance_variable_set(:@except, nil) - mock_registry = mock_registry_for(test_lib_name => [mock_integration]) + Braintrust::Contrib::Setup.install_require_hook! + require reentrant_lib_name - with_stubs( - [Braintrust::Contrib::Registry, :instance, mock_registry], - [Braintrust::Log, :error, ->(_msg) { error_logged = true }] - ) do - Braintrust::Contrib::Setup.setup_require_hook! - require test_lib_name - end - - if error_logged - puts "error_handling_test:passed" - else - puts "error_handling_test:failed - error was not logged" - exit 1 + assert_equal [:patch_completed], require_calls_during_patch, "nested integration should not be patched" end end end @@ -376,6 +516,15 @@ def with_mock_rails_railtie end end + # Helper to stub Zeitwerk + def with_mock_zeitwerk + mock_zeitwerk = Module.new + + Object.stub_const(:Zeitwerk, mock_zeitwerk) do + yield + end + end + # Helper to create a mock integration object def mock_integration_object(name:, available: true, compatible: true, on_patch: nil) integration = Object.new @@ -387,7 +536,6 @@ def mock_integration_object(name:, available: true, compatible: true, on_patch: end # Helper to create a mock registry that maps lib names to integrations - # @param mappings [Hash] lib_name => [integrations] mapping def mock_registry_for(mappings) registry = Object.new registry.define_singleton_method(:integrations_for_require_path) do |path| diff --git a/test/braintrust/setup_test.rb b/test/braintrust/setup_test.rb index aca9c5b..a79cd10 100644 --- a/test/braintrust/setup_test.rb +++ b/test/braintrust/setup_test.rb @@ -20,12 +20,7 @@ def test_run_calls_braintrust_init require "braintrust/setup" end - if init_called - puts "test_run_calls_braintrust_init:passed" - else - puts "test_run_calls_braintrust_init:failed - init not called" - exit 1 - end + assert init_called, "Braintrust.init should be called" end end @@ -42,12 +37,7 @@ def test_run_calls_contrib_setup_run require "braintrust/setup" end - if contrib_setup_called - puts "test_run_calls_contrib_setup_run:passed" - else - puts "test_run_calls_contrib_setup_run:failed - Contrib::Setup.run! not called" - exit 1 - end + assert contrib_setup_called, "Contrib::Setup.run! should be called" end end @@ -68,12 +58,7 @@ def test_run_is_idempotent Braintrust::Setup.run! end - if call_count == 1 - puts "test_run_is_idempotent:passed" - else - puts "test_run_is_idempotent:failed - call_count=#{call_count}, expected 1" - exit 1 - end + assert_equal 1, call_count, "init should only be called once" end end @@ -96,12 +81,7 @@ def test_run_logs_error_on_init_failure msg.include?("Test init error") end - if error_logged - puts "test_run_logs_error_on_init_failure:passed" - else - puts "test_run_logs_error_on_init_failure:failed - logged: #{logged_messages.inspect}" - exit 1 - end + assert error_logged, "should log error message containing failure details" end end @@ -119,12 +99,7 @@ def test_run_continues_to_contrib_setup_after_init_failure require "braintrust/setup" end - if contrib_setup_called - puts "test_run_continues_to_contrib_setup_after_init_failure:passed" - else - puts "test_run_continues_to_contrib_setup_after_init_failure:failed" - exit 1 - end + assert contrib_setup_called, "Contrib::Setup.run! should be called even after init failure" end end end diff --git a/test/support/fixture_helper.rb b/test/support/fixture_helper.rb index 26e6dcb..5971769 100644 --- a/test/support/fixture_helper.rb +++ b/test/support/fixture_helper.rb @@ -25,23 +25,32 @@ def with_png_file(data: PNG_DATA, filename: "test_image", extension: ".png", &bl # Create a temporary file and yield to the block # File is automatically cleaned up after the block # @param data [String] content to write to the file (default: empty) - # @param filename [String] prefix for the temp file name + # @param filename [String] prefix for the temp file name (or exact name if exact_name: true) # @param extension [String] extension for the temp file name # @param binary [Boolean] whether to write in binary mode - # @yield [Tempfile] the temporary file - def with_tmp_file(data: "", filename: "test", extension: ".txt", binary: false) + # @param exact_name [Boolean] if true, use exact filename without random suffix + # @yield [Tempfile, String] the temporary file (Tempfile) or path (String if exact_name) + def with_tmp_file(data: "", filename: "test", extension: ".txt", binary: false, exact_name: false) return unless block_given? - require "tempfile" - tmpfile = Tempfile.new([filename, extension]) - tmpfile.binmode if binary - tmpfile.write(data) - tmpfile.close + if exact_name + Dir.mktmpdir do |dir| + path = File.join(dir, "#{filename}#{extension}") + File.write(path, data, mode: binary ? "wb" : "w") + yield path + end + else + require "tempfile" + tmpfile = Tempfile.new([filename, extension]) + tmpfile.binmode if binary + tmpfile.write(data) + tmpfile.close - begin - yield(tmpfile) - ensure - tmpfile.unlink + begin + yield(tmpfile) + ensure + tmpfile.unlink + end end end end From 9e4fae8c9283c4bf02a9bbbe18d0ff9b97415456 Mon Sep 17 00:00:00 2001 From: David Elner Date: Sat, 10 Jan 2026 01:38:03 -0500 Subject: [PATCH 27/27] Fixed: Broken permalinks in examples --- examples/contrib/anthropic.rb | 28 ++++++++++++++++++---------- examples/contrib/openai.rb | 28 ++++++++++++++++++---------- examples/contrib/ruby-openai.rb | 33 +++++++++++++++++++++------------ examples/contrib/ruby_llm.rb | 15 ++++++++++++--- 4 files changed, 69 insertions(+), 35 deletions(-) diff --git a/examples/contrib/anthropic.rb b/examples/contrib/anthropic.rb index 4f2dd1f..43ef642 100644 --- a/examples/contrib/anthropic.rb +++ b/examples/contrib/anthropic.rb @@ -25,18 +25,26 @@ # Create Anthropic client client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"]) -# Make a message request (automatically traced!) -client.messages.create( - model: "claude-3-haiku-20240307", - max_tokens: 100, - system: "You are a helpful assistant.", - messages: [ - {role: "user", content: "Say hello and tell me a short joke."} - ] -) +# Get a tracer and wrap the API call in a span +tracer = OpenTelemetry.tracer_provider.tracer("anthropic-example") + +root_span = nil +tracer.in_span("examples/contrib/anthropic.rb") do |span| + root_span = span + + # Make a message request (automatically traced!) + client.messages.create( + model: "claude-3-haiku-20240307", + max_tokens: 100, + system: "You are a helpful assistant.", + messages: [ + {role: "user", content: "Say hello and tell me a short joke."} + ] + ) +end # Print permalink to view this trace in Braintrust -puts "\n View this trace in Braintrust:" +puts "\nView this trace in Braintrust:" puts " #{Braintrust::Trace.permalink(root_span)}" # Shutdown to flush spans to Braintrust diff --git a/examples/contrib/openai.rb b/examples/contrib/openai.rb index dbaa27d..eb7d0ee 100644 --- a/examples/contrib/openai.rb +++ b/examples/contrib/openai.rb @@ -25,18 +25,26 @@ # Create OpenAI client client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) -# Make a chat completion request (automatically traced!) -client.chat.completions.create( - messages: [ - {role: "system", content: "You are a helpful assistant."}, - {role: "user", content: "Say hello and tell me a short joke."} - ], - model: "gpt-4o-mini", - max_tokens: 100 -) +# Get a tracer and wrap the API call in a span +tracer = OpenTelemetry.tracer_provider.tracer("openai-example") + +root_span = nil +tracer.in_span("examples/contrib/openai.rb") do |span| + root_span = span + + # Make a chat completion request (automatically traced!) + client.chat.completions.create( + messages: [ + {role: "system", content: "You are a helpful assistant."}, + {role: "user", content: "Say hello and tell me a short joke."} + ], + model: "gpt-4o-mini", + max_tokens: 100 + ) +end # Print permalink to view this trace in Braintrust -puts "\n View this trace in Braintrust:" +puts "\nView this trace in Braintrust:" puts " #{Braintrust::Trace.permalink(root_span)}" # Shutdown to flush spans to Braintrust diff --git a/examples/contrib/ruby-openai.rb b/examples/contrib/ruby-openai.rb index 01c7ba7..3072d22 100644 --- a/examples/contrib/ruby-openai.rb +++ b/examples/contrib/ruby-openai.rb @@ -7,7 +7,7 @@ require "opentelemetry/sdk" # Usage: -# OPENAI_API_KEY=your-openai-key bundle exec appraisal ruby-openai examples/internal/contrib/ruby_openai/basic.rb +# OPENAI_API_KEY=your-openai-key bundle exec appraisal ruby-openai ruby examples/contrib/ruby-openai.rb # Check for API keys unless ENV["OPENAI_API_KEY"] @@ -25,19 +25,28 @@ # Create OpenAI client client = OpenAI::Client.new(access_token: ENV["OPENAI_API_KEY"]) -client.chat( - parameters: { - model: "gpt-4o-mini", - messages: [ - {role: "system", content: "You are a helpful assistant."}, - {role: "user", content: "Say hello and tell me a short joke."} - ], - max_tokens: 100 - } -) +# Get a tracer and wrap the API call in a span +tracer = OpenTelemetry.tracer_provider.tracer("ruby-openai-example") + +root_span = nil +tracer.in_span("examples/contrib/ruby-openai.rb") do |span| + root_span = span + + # Make a chat request (automatically traced!) + client.chat( + parameters: { + model: "gpt-4o-mini", + messages: [ + {role: "system", content: "You are a helpful assistant."}, + {role: "user", content: "Say hello and tell me a short joke."} + ], + max_tokens: 100 + } + ) +end # Print permalink to view this trace in Braintrust -puts "\n View this trace in Braintrust:" +puts "\nView this trace in Braintrust:" puts " #{Braintrust::Trace.permalink(root_span)}" # Shutdown to flush spans to Braintrust diff --git a/examples/contrib/ruby_llm.rb b/examples/contrib/ruby_llm.rb index 02acf0f..97078f0 100644 --- a/examples/contrib/ruby_llm.rb +++ b/examples/contrib/ruby_llm.rb @@ -26,11 +26,20 @@ config.openai_api_key = ENV["OPENAI_API_KEY"] end -chat = RubyLLM.chat(model: "gpt-4o-mini") -chat.ask("What is the capital of France?") +# Get a tracer and wrap the API call in a span +tracer = OpenTelemetry.tracer_provider.tracer("ruby-llm-example") + +root_span = nil +tracer.in_span("examples/contrib/ruby_llm.rb") do |span| + root_span = span + + # Make a chat request (automatically traced!) + chat = RubyLLM.chat(model: "gpt-4o-mini") + chat.ask("What is the capital of France?") +end # Print permalink to view this trace in Braintrust -puts "\n View this trace in Braintrust:" +puts "\nView this trace in Braintrust:" puts " #{Braintrust::Trace.permalink(root_span)}" # Shutdown to flush spans to Braintrust