From d77c8168269249137d87b3652e713fd02e05df8e Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Fri, 19 Dec 2025 15:09:44 -0500
Subject: [PATCH 01/27] Added: minitest-stub-const gem for testing

---
 Gemfile                                  |  1 +
 Gemfile.lock                             | 65 ++++++++++--------------
 gemfiles/anthropic.gemfile               |  1 +
 gemfiles/anthropic_1.11.gemfile          | 14 -----
 gemfiles/anthropic_1.12.gemfile          | 14 -----
 gemfiles/anthropic_1_11.gemfile          |  1 +
 gemfiles/anthropic_1_12.gemfile          |  1 +
 gemfiles/anthropic_uninstalled.gemfile   |  1 +
 gemfiles/openai.gemfile                  |  1 +
 gemfiles/openai_0.33.gemfile             | 14 -----
 gemfiles/openai_0.34.gemfile             | 14 -----
 gemfiles/openai_0_33.gemfile             |  1 +
 gemfiles/openai_0_34.gemfile             |  1 +
 gemfiles/openai_uninstalled.gemfile      |  1 +
 gemfiles/opentelemetry_latest.gemfile    |  1 +
 gemfiles/opentelemetry_min.gemfile       |  1 +
 gemfiles/ruby_llm.gemfile                |  1 +
 gemfiles/ruby_llm_1_8.gemfile            |  1 +
 gemfiles/ruby_llm_1_9.gemfile            |  1 +
 gemfiles/ruby_llm_uninstalled.gemfile    |  1 +
 gemfiles/ruby_openai.gemfile             |  1 +
 gemfiles/ruby_openai_7_0.gemfile         |  1 +
 gemfiles/ruby_openai_8_0.gemfile         |  1 +
 gemfiles/ruby_openai_uninstalled.gemfile |  1 +
 test/test_helper.rb                      |  1 +
 25 files changed, 46 insertions(+), 95 deletions(-)
 delete mode 100644 gemfiles/anthropic_1.11.gemfile
 delete mode 100644 gemfiles/anthropic_1.12.gemfile
 delete mode 100644 gemfiles/openai_0.33.gemfile
 delete mode 100644 gemfiles/openai_0.34.gemfile

diff --git a/Gemfile b/Gemfile
index 3edd697..eeccddf 100644
--- a/Gemfile
+++ b/Gemfile
@@ -7,3 +7,4 @@ gemspec
 
 # Additional development tools not in gemspec (optional/convenience)
 gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
diff --git a/Gemfile.lock b/Gemfile.lock
index cff826e..169bf43 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -9,47 +9,39 @@ PATH
 GEM
   remote: https://rubygems.org/
   specs:
-    addressable (2.8.7)
-      public_suffix (>= 2.0.2, < 7.0)
+    addressable (2.8.8)
+      public_suffix (>= 2.0.2, < 8.0)
     ansi (1.5.0)
     appraisal (2.5.0)
       bundler
       rake
       thor (>= 0.14.0)
     ast (2.4.3)
-    bigdecimal (3.3.1)
+    bigdecimal (4.0.1)
     builder (3.3.0)
     crack (1.0.1)
       bigdecimal
       rexml
     docile (1.4.1)
-    google-protobuf (4.33.0-aarch64-linux-gnu)
-      bigdecimal
-      rake (>= 13)
-    google-protobuf (4.33.0-arm64-darwin)
-      bigdecimal
-      rake (>= 13)
-    google-protobuf (4.33.0-x64-mingw-ucrt)
-      bigdecimal
-      rake (>= 13)
-    google-protobuf (4.33.0-x86_64-linux-gnu)
+    google-protobuf (4.33.2-x86_64-linux-gnu)
       bigdecimal
       rake (>= 13)
     googleapis-common-protos-types (1.22.0)
       google-protobuf (~> 4.26)
     hashdiff (1.2.1)
-    json (2.15.1)
+    json (2.18.0)
     kramdown (2.5.1)
       rexml (>= 3.3.9)
     language_server-protocol (3.17.0.5)
     lint_roller (1.1.0)
-    minitest (5.26.0)
+    minitest (5.27.0)
     minitest-reporters (1.7.1)
       ansi
       builder
       minitest (>= 5.0)
       ruby-progressbar
-    openssl (3.3.1)
+    minitest-stub-const (0.6)
+    openssl (3.3.2)
     opentelemetry-api (1.7.0)
     opentelemetry-common (0.23.0)
       opentelemetry-api (~> 1.0)
@@ -70,17 +62,17 @@ GEM
     opentelemetry-semantic_conventions (1.36.0)
       opentelemetry-api (~> 1.0)
     parallel (1.27.0)
-    parser (3.3.9.0)
+    parser (3.3.10.0)
       ast (~> 2.4.1)
       racc
-    prism (1.6.0)
-    public_suffix (6.0.2)
+    prism (1.7.0)
+    public_suffix (7.0.2)
     racc (1.8.1)
     rainbow (3.1.1)
-    rake (13.3.0)
+    rake (13.3.1)
     regexp_parser (2.11.3)
     rexml (3.4.4)
-    rubocop (1.80.2)
+    rubocop (1.81.7)
       json (~> 2.3)
       language_server-protocol (~> 3.17.0.2)
       lint_roller (~> 1.1.0)
@@ -88,16 +80,16 @@ GEM
       parser (>= 3.3.0.2)
       rainbow (>= 2.2.2, < 4.0)
       regexp_parser (>= 2.9.3, < 3.0)
-      rubocop-ast (>= 1.46.0, < 2.0)
+      rubocop-ast (>= 1.47.1, < 2.0)
       ruby-progressbar (~> 1.7)
       unicode-display_width (>= 2.4.0, < 4.0)
-    rubocop-ast (1.47.1)
+    rubocop-ast (1.49.0)
       parser (>= 3.3.7.2)
-      prism (~> 1.4)
-    rubocop-performance (1.25.0)
+      prism (~> 1.7)
+    rubocop-performance (1.26.1)
       lint_roller (~> 1.1)
       rubocop (>= 1.75.0, < 2.0)
-      rubocop-ast (>= 1.38.0, < 2.0)
+      rubocop-ast (>= 1.47.1, < 2.0)
     ruby-progressbar (1.13.0)
     simplecov (0.22.0)
       docile (~> 1.1)
@@ -105,36 +97,30 @@ GEM
       simplecov_json_formatter (~> 0.1)
     simplecov-html (0.13.2)
     simplecov_json_formatter (0.1.4)
-    standard (1.51.1)
+    standard (1.52.0)
       language_server-protocol (~> 3.17.0.2)
       lint_roller (~> 1.0)
-      rubocop (~> 1.80.2)
+      rubocop (~> 1.81.7)
       standard-custom (~> 1.0.0)
       standard-performance (~> 1.8)
     standard-custom (1.0.2)
       lint_roller (~> 1.0)
       rubocop (~> 1.50)
-    standard-performance (1.8.0)
+    standard-performance (1.9.0)
       lint_roller (~> 1.1)
-      rubocop-performance (~> 1.25.0)
+      rubocop-performance (~> 1.26.0)
     thor (1.4.0)
     unicode-display_width (3.2.0)
       unicode-emoji (~> 4.1)
-    unicode-emoji (4.1.0)
+    unicode-emoji (4.2.0)
     vcr (6.4.0)
-    webmock (3.25.1)
+    webmock (3.26.1)
       addressable (>= 2.8.0)
       crack (>= 0.3.2)
       hashdiff (>= 0.4.0, < 2.0.0)
-    yard (0.9.37)
+    yard (0.9.38)
 
 PLATFORMS
-  aarch64-linux
-  arm64-darwin-23
-  arm64-darwin-24
-  arm64-darwin-25
-  x64-mingw
-  x64-mingw-ucrt
   x86_64-linux
 
 DEPENDENCIES
@@ -143,6 +129,7 @@ DEPENDENCIES
   kramdown (~> 2.0)
   minitest (~> 5.0)
   minitest-reporters (~> 1.6)
+  minitest-stub-const (~> 0.6)
   rake (~> 13.0)
   simplecov (~> 0.22)
   standard (~> 1.0)
diff --git a/gemfiles/anthropic.gemfile b/gemfiles/anthropic.gemfile
index ca1632c..56e98f0 100644
--- a/gemfiles/anthropic.gemfile
+++ b/gemfiles/anthropic.gemfile
@@ -3,6 +3,7 @@
 source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
 gem "anthropic", ">= 1.11"
 
 gemspec path: "../"
diff --git a/gemfiles/anthropic_1.11.gemfile b/gemfiles/anthropic_1.11.gemfile
deleted file mode 100644
index dc96e4f..0000000
--- a/gemfiles/anthropic_1.11.gemfile
+++ /dev/null
@@ -1,14 +0,0 @@
-# This file was generated by Appraisal
-
-source "https://rubygems.org"
-
-gem "rake", "~> 13.0"
-gem "minitest", "~> 5.0"
-gem "minitest-reporters", "~> 1.6"
-gem "standard", "~> 1.0"
-gem "simplecov", "~> 0.22"
-gem "vcr", "~> 6.0"
-gem "webmock", "~> 3.0"
-gem "anthropic", "~> 1.11.0"
-
-gemspec path: "../"
diff --git a/gemfiles/anthropic_1.12.gemfile b/gemfiles/anthropic_1.12.gemfile
deleted file mode 100644
index b662fe7..0000000
--- a/gemfiles/anthropic_1.12.gemfile
+++ /dev/null
@@ -1,14 +0,0 @@
-# This file was generated by Appraisal
-
-source "https://rubygems.org"
-
-gem "rake", "~> 13.0"
-gem "minitest", "~> 5.0"
-gem "minitest-reporters", "~> 1.6"
-gem "standard", "~> 1.0"
-gem "simplecov", "~> 0.22"
-gem "vcr", "~> 6.0"
-gem "webmock", "~> 3.0"
-gem "anthropic", "~> 1.12.0"
-
-gemspec path: "../"
diff --git a/gemfiles/anthropic_1_11.gemfile b/gemfiles/anthropic_1_11.gemfile
index 6975b97..ed708b3 100644
--- a/gemfiles/anthropic_1_11.gemfile
+++ b/gemfiles/anthropic_1_11.gemfile
@@ -3,6 +3,7 @@
 source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
 gem "anthropic", "~> 1.11.0"
 
 gemspec path: "../"
diff --git a/gemfiles/anthropic_1_12.gemfile b/gemfiles/anthropic_1_12.gemfile
index 67e0b47..29b1b93 100644
--- a/gemfiles/anthropic_1_12.gemfile
+++ b/gemfiles/anthropic_1_12.gemfile
@@ -3,6 +3,7 @@
 source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
 gem "anthropic", "~> 1.12.0"
 
 gemspec path: "../"
diff --git a/gemfiles/anthropic_uninstalled.gemfile b/gemfiles/anthropic_uninstalled.gemfile
index 41d4801..7ec6aba 100644
--- a/gemfiles/anthropic_uninstalled.gemfile
+++ b/gemfiles/anthropic_uninstalled.gemfile
@@ -3,5 +3,6 @@
 source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
 
 gemspec path: "../"
diff --git a/gemfiles/openai.gemfile b/gemfiles/openai.gemfile
index edfe9d3..dcc2e81 100644
--- a/gemfiles/openai.gemfile
+++ b/gemfiles/openai.gemfile
@@ -3,6 +3,7 @@
 source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
 gem "openai", ">= 0.34"
 gem "base64"
 
diff --git a/gemfiles/openai_0.33.gemfile b/gemfiles/openai_0.33.gemfile
deleted file mode 100644
index 1b8d2d2..0000000
--- a/gemfiles/openai_0.33.gemfile
+++ /dev/null
@@ -1,14 +0,0 @@
-# This file was generated by Appraisal
-
-source "https://rubygems.org"
-
-gem "rake", "~> 13.0"
-gem "minitest", "~> 5.0"
-gem "minitest-reporters", "~> 1.6"
-gem "standard", "~> 1.0"
-gem "simplecov", "~> 0.22"
-gem "vcr", "~> 6.0"
-gem "webmock", "~> 3.0"
-gem "openai", "~> 0.33.0"
-
-gemspec path: "../"
diff --git a/gemfiles/openai_0.34.gemfile b/gemfiles/openai_0.34.gemfile
deleted file mode 100644
index 24b39dc..0000000
--- a/gemfiles/openai_0.34.gemfile
+++ /dev/null
@@ -1,14 +0,0 @@
-# This file was generated by Appraisal
-
-source "https://rubygems.org"
-
-gem "rake", "~> 13.0"
-gem "minitest", "~> 5.0"
-gem "minitest-reporters", "~> 1.6"
-gem "standard", "~> 1.0"
-gem "simplecov", "~> 0.22"
-gem "vcr", "~> 6.0"
-gem "webmock", "~> 3.0"
-gem "openai", "~> 0.34.0"
-
-gemspec path: "../"
diff --git a/gemfiles/openai_0_33.gemfile b/gemfiles/openai_0_33.gemfile
index a9eff3e..6104332 100644
--- a/gemfiles/openai_0_33.gemfile
+++ b/gemfiles/openai_0_33.gemfile
@@ -3,6 +3,7 @@
 source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
 gem "openai", "~> 0.33.0"
 gem "base64"
 
diff --git a/gemfiles/openai_0_34.gemfile b/gemfiles/openai_0_34.gemfile
index 2e2d6ab..d8969d3 100644
--- a/gemfiles/openai_0_34.gemfile
+++ b/gemfiles/openai_0_34.gemfile
@@ -3,6 +3,7 @@
 source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
 gem "openai", "~> 0.34.0"
 gem "base64"
 
diff --git a/gemfiles/openai_uninstalled.gemfile b/gemfiles/openai_uninstalled.gemfile
index 41d4801..7ec6aba 100644
--- a/gemfiles/openai_uninstalled.gemfile
+++ b/gemfiles/openai_uninstalled.gemfile
@@ -3,5 +3,6 @@
 source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
 
 gemspec path: "../"
diff --git a/gemfiles/opentelemetry_latest.gemfile b/gemfiles/opentelemetry_latest.gemfile
index 9ac76ed..d309a6b 100644
--- a/gemfiles/opentelemetry_latest.gemfile
+++ b/gemfiles/opentelemetry_latest.gemfile
@@ -3,6 +3,7 @@
 source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
 gem "opentelemetry-sdk", ">= 1.10"
 gem "opentelemetry-exporter-otlp", ">= 0.31"
 
diff --git a/gemfiles/opentelemetry_min.gemfile b/gemfiles/opentelemetry_min.gemfile
index 13a4b77..8edfd5d 100644
--- a/gemfiles/opentelemetry_min.gemfile
+++ b/gemfiles/opentelemetry_min.gemfile
@@ -3,6 +3,7 @@
 source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
 gem "opentelemetry-sdk", "~> 1.3.0"
 gem "opentelemetry-exporter-otlp", "~> 0.28.0"
 
diff --git a/gemfiles/ruby_llm.gemfile b/gemfiles/ruby_llm.gemfile
index 0199dc1..6f2c658 100644
--- a/gemfiles/ruby_llm.gemfile
+++ b/gemfiles/ruby_llm.gemfile
@@ -3,6 +3,7 @@
 source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
 gem "ruby_llm", ">= 1.9"
 
 gemspec path: "../"
diff --git a/gemfiles/ruby_llm_1_8.gemfile b/gemfiles/ruby_llm_1_8.gemfile
index 1c1e3eb..5a5a016 100644
--- a/gemfiles/ruby_llm_1_8.gemfile
+++ b/gemfiles/ruby_llm_1_8.gemfile
@@ -3,6 +3,7 @@
 source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
 gem "ruby_llm", "~> 1.8.0"
 
 gemspec path: "../"
diff --git a/gemfiles/ruby_llm_1_9.gemfile b/gemfiles/ruby_llm_1_9.gemfile
index f3bd0c2..deee1d0 100644
--- a/gemfiles/ruby_llm_1_9.gemfile
+++ b/gemfiles/ruby_llm_1_9.gemfile
@@ -3,6 +3,7 @@
 source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
 gem "ruby_llm", "~> 1.9.0"
 
 gemspec path: "../"
diff --git a/gemfiles/ruby_llm_uninstalled.gemfile b/gemfiles/ruby_llm_uninstalled.gemfile
index 41d4801..7ec6aba 100644
--- a/gemfiles/ruby_llm_uninstalled.gemfile
+++ b/gemfiles/ruby_llm_uninstalled.gemfile
@@ -3,5 +3,6 @@
 source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
 
 gemspec path: "../"
diff --git a/gemfiles/ruby_openai.gemfile b/gemfiles/ruby_openai.gemfile
index d5aabc3..29caf96 100644
--- a/gemfiles/ruby_openai.gemfile
+++ b/gemfiles/ruby_openai.gemfile
@@ -3,6 +3,7 @@
 source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
 gem "ruby-openai", ">= 8.0"
 
 gemspec path: "../"
diff --git a/gemfiles/ruby_openai_7_0.gemfile b/gemfiles/ruby_openai_7_0.gemfile
index 6a30459..4c9e319 100644
--- a/gemfiles/ruby_openai_7_0.gemfile
+++ b/gemfiles/ruby_openai_7_0.gemfile
@@ -3,6 +3,7 @@
 source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
 gem "ruby-openai", "~> 7.0"
 
 gemspec path: "../"
diff --git a/gemfiles/ruby_openai_8_0.gemfile b/gemfiles/ruby_openai_8_0.gemfile
index e9c161e..3044fea 100644
--- a/gemfiles/ruby_openai_8_0.gemfile
+++ b/gemfiles/ruby_openai_8_0.gemfile
@@ -3,6 +3,7 @@
 source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
 gem "ruby-openai", "~> 8.0"
 
 gemspec path: "../"
diff --git a/gemfiles/ruby_openai_uninstalled.gemfile b/gemfiles/ruby_openai_uninstalled.gemfile
index 41d4801..7ec6aba 100644
--- a/gemfiles/ruby_openai_uninstalled.gemfile
+++ b/gemfiles/ruby_openai_uninstalled.gemfile
@@ -3,5 +3,6 @@
 source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
 
 gemspec path: "../"
diff --git a/test/test_helper.rb b/test/test_helper.rb
index ee2c7f2..45d085b 100644
--- a/test/test_helper.rb
+++ b/test/test_helper.rb
@@ -17,6 +17,7 @@
 require "braintrust"
 
 require "minitest/autorun"
+require "minitest/stub_const"
 
 # Show test timings when MT_VERBOSE is set
 if ENV["MT_VERBOSE"]

From 28b1209580a5753a56e2c635fdf8e8d2cce9ecf4 Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Thu, 25 Dec 2025 14:24:06 -0500
Subject: [PATCH 02/27] Added: mock_chain to test_helper

---
 test/support/assert_helper.rb | 41 +++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/test/support/assert_helper.rb b/test/support/assert_helper.rb
index 8d1d725..7b943cb 100644
--- a/test/support/assert_helper.rb
+++ b/test/support/assert_helper.rb
@@ -1,6 +1,47 @@
 module Test
   module Support
     module AssertHelper
+      # Build a chain of mocks for nested method calls.
+      # Useful for testing code that traverses object hierarchies like `obj.foo.bar.baz`.
+      #
+      # Creates intermediate mocks that return each other in sequence, with the final
+      # mock returning the specified terminal value. All intermediate mocks are verified
+      # after the block. The caller is responsible for verifying the terminal value if needed.
+      #
+      # @param methods [Array<Symbol>] chain of method names to mock
+      # @param returns [Object] the value returned by the final method (can be a Class, Mock, or any object)
+      # @yield [root] the root mock (entry point to the chain)
+      #
+      # @example Testing a method chain with a Class as terminal
+      #   fake_singleton = Class.new { include SomeModule }
+      #   mock_chain(:chat, :completions, :singleton_class, returns: fake_singleton) do |client|
+      #     assert SomePatcher.patched?(target: client)
+      #   end
+      #
+      # @example Testing a method chain with a Mock as terminal
+      #   terminal = Minitest::Mock.new
+      #   terminal.expect(:include, true, [SomeModule])
+      #   mock_chain(:foo, :bar, returns: terminal) do |root|
+      #     SomeCode.do_something(root)
+      #   end
+      #   terminal.verify
+      #
+      def mock_chain(*methods, returns:)
+        current = returns
+        mocks = []
+
+        methods.reverse_each do |method|
+          mock = Minitest::Mock.new
+          mock.expect(method, current)
+          mocks.unshift(mock)
+          current = mock
+        end
+
+        yield(mocks.first)
+
+        mocks.each(&:verify)
+      end
+
       # Runs the test inside a fork, to isolate its side-effects from the main process.
       # Similar in purpose to https://docs.ruby-lang.org/en/master/Ruby/Box.html#class-Ruby::Box
       #

From cebd618c92fbddd457015407c9e0120bd89844af Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Mon, 15 Dec 2025 11:41:19 -0500
Subject: [PATCH 03/27] Added: Integration framework API

---
 CONTRIBUTING.md                             |  82 ++-
 lib/braintrust.rb                           |   1 +
 lib/braintrust/contrib.rb                   | 102 ++++
 lib/braintrust/contrib/context.rb           |  56 ++
 lib/braintrust/contrib/integration.rb       | 143 +++++
 lib/braintrust/contrib/patcher.rb           |  76 +++
 lib/braintrust/contrib/registry.rb          |  92 ++++
 lib/braintrust/state.rb                     |   5 +
 test/braintrust/contrib/context_test.rb     |  93 ++++
 test/braintrust/contrib/integration_test.rb | 568 ++++++++++++++++++++
 test/braintrust/contrib/patcher_test.rb     | 281 ++++++++++
 test/braintrust/contrib/registry_test.rb    | 241 +++++++++
 test/braintrust/contrib_test.rb             | 200 +++++++
 test/support/assert_helper.rb               |  41 --
 test/support/log_helper.rb                  |  22 +
 test/support/mock_helper.rb                 |  46 ++
 test/test_helper.rb                         |   4 +
 17 files changed, 2009 insertions(+), 44 deletions(-)
 create mode 100644 lib/braintrust/contrib.rb
 create mode 100644 lib/braintrust/contrib/context.rb
 create mode 100644 lib/braintrust/contrib/integration.rb
 create mode 100644 lib/braintrust/contrib/patcher.rb
 create mode 100644 lib/braintrust/contrib/registry.rb
 create mode 100644 test/braintrust/contrib/context_test.rb
 create mode 100644 test/braintrust/contrib/integration_test.rb
 create mode 100644 test/braintrust/contrib/patcher_test.rb
 create mode 100644 test/braintrust/contrib/registry_test.rb
 create mode 100644 test/braintrust/contrib_test.rb
 create mode 100644 test/support/log_helper.rb
 create mode 100644 test/support/mock_helper.rb

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c1301c3..f4bea9a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,9 +2,18 @@
 
 Thanks for contributing! Follow TDD practices: write tests first, implement minimal code, refactor.
 
+- [Quick Setup](#quick-setup)
+  - [Local development (on host)](#local-development-on-host)
+  - [Local development (in Docker)](#local-development-in-docker)
+- [Development](#development)
+  - [Running tasks](#running-tasks)
+  - [Adding new integrations for AI Libraries](#adding-new-integrations-for-ai-libraries)
+- [Testing](#testing)
+  - [With different Ruby versions](#with-different-ruby-versions)
+
 ## Quick Setup
 
-### Option 1: Local Development
+### Local development (on host)
 
 ```bash
 # Clone repo
@@ -31,7 +40,7 @@ cp .env.example .env
 rake
 ```
 
-### Option 2: Docker Container
+### Local development (in Docker)
 
 Use the dev container if you prefer not to install Ruby tooling on your host machine. Your IDE edits files on the host; all tools run in the container.
 
@@ -62,12 +71,78 @@ rake
 
 ## Development
 
+### Running tasks
+
 All of our common dev tasks are in rake.
 
 ```bash
 rake -T 
 ```
 
+### Adding new integrations for AI Libraries
+
+To add instrumentation support for a new AI library, follow these steps:
+
+#### 1. Define the Integration
+
+Create a new file in `lib/braintrust/contrib/`:
+
+```ruby
+# lib/braintrust/contrib/trustybrain_llm.rb
+module Braintrust::Contrib
+  class TrustybrainLLM
+    include Integration
+
+    def self.integration_name
+      :trustybrain_llm
+    end
+
+    def self.gem_names
+      ["trustybrain_llm"]
+    end
+
+    def self.patcher
+      TrustybrainLLMPatcher
+    end
+  end
+
+  class TrustybrainLLMPatcher < Patcher
+    def self.perform_patch(context)
+      # Add your instrumentation here
+      # context.tracer_provider gives you access to the tracer
+    end
+  end
+end
+```
+
+#### 2. Register It
+
+Add to `lib/braintrust/contrib.rb`:
+
+```ruby
+require_relative "contrib/trustybrain_llm"
+
+# At the bottom:
+Contrib::TrustybrainLLM.register!
+```
+
+#### 3. Write Tests
+
+Create `test/braintrust/contrib/trustybrain_llm_test.rb`:
+
+```ruby
+require "test_helper"
+
+class Braintrust::Contrib::TrustybrainLLMTest < Minitest::Test
+  def test_integration_basics
+    assert_equal :trustybrain_llm, TrustybrainLLM.integration_name
+    assert_equal ["trustybrain_llm"], TrustybrainLLM.gem_names
+  end
+end
+```
+
+See existing tests in `test/braintrust/contrib/` for complete examples of testing integrations, patchers, and the registry.
+
 ## Testing
 
 We use VCR for making http tests fast. You can run tests with these enabled,
@@ -78,7 +153,7 @@ for more details.
 rake -T test:vcr
 ```
 
-### Testing with Different Ruby Versions
+### With different Ruby versions
 
 CI tests against Ruby 3.2, 3.3, and 3.4. To test locally with a different Ruby version:
 
@@ -102,3 +177,4 @@ mise use ruby@3.4
 ruby --version  # => 3.4.x
 bundle exec rake test
 ```
+
diff --git a/lib/braintrust.rb b/lib/braintrust.rb
index 469cde1..979651e 100644
--- a/lib/braintrust.rb
+++ b/lib/braintrust.rb
@@ -7,6 +7,7 @@
 require_relative "braintrust/api"
 require_relative "braintrust/internal/experiments"
 require_relative "braintrust/eval"
+require_relative "braintrust/contrib"
 
 # Braintrust Ruby SDK
 #
diff --git a/lib/braintrust/contrib.rb b/lib/braintrust/contrib.rb
new file mode 100644
index 0000000..f19c4fb
--- /dev/null
+++ b/lib/braintrust/contrib.rb
@@ -0,0 +1,102 @@
+# frozen_string_literal: true
+
+require_relative "contrib/registry"
+require_relative "contrib/integration"
+require_relative "contrib/patcher"
+require_relative "contrib/context"
+
+module Braintrust
+  # Instrument a registered integration by name.
+  # This is the main entry point for activating integrations.
+  #
+  # @param name [Symbol] The integration name (e.g., :openai, :anthropic)
+  # @param options [Hash] Optional configuration
+  # @option options [Object] :target Optional target instance to instrument specifically
+  # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider
+  # @return [void]
+  #
+  # @example Instrument all OpenAI clients
+  #   Braintrust.instrument!(:openai)
+  #
+  # @example Instrument specific OpenAI client instance
+  #   client = OpenAI::Client.new
+  #   Braintrust.instrument!(:openai, target: client, tracer_provider: my_provider)
+  def self.instrument!(name, **options)
+    Braintrust::Contrib.instrument!(name, **options)
+  end
+
+  # Contrib framework for auto-instrumentation integrations.
+  # Provides a consistent interface for all integrations and enables
+  # reliable auto-instrumentation in later milestones.
+  module Contrib
+    class << self
+      # Get the global registry instance.
+      # @return [Registry]
+      def registry
+        Registry.instance
+      end
+
+      # Initialize the contrib framework with optional configuration.
+      # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider, nil] Optional tracer provider
+      # @return [void]
+      def init(tracer_provider: nil)
+        @default_tracer_provider = tracer_provider
+      end
+
+      # Instrument a registered integration by name.
+      # This is the main entry point for activating integrations.
+      #
+      # @param name [Symbol] The integration name (e.g., :openai, :anthropic)
+      # @param options [Hash] Optional configuration
+      # @option options [Object] :target Optional target instance to instrument specifically
+      # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider
+      # @return [void]
+      #
+      # @example Instrument all OpenAI clients
+      #   Braintrust::Contrib.instrument!(:openai)
+      #
+      # @example Instrument specific OpenAI client instance
+      #   client = OpenAI::Client.new
+      #   Braintrust::Contrib.instrument!(:openai, target: client, tracer_provider: my_provider)
+      def instrument!(name, **options)
+        if (integration = registry[name])
+          integration.instrument!(**options)
+        else
+          Braintrust::Log.error("No integration for '#{name}' is defined!")
+        end
+      end
+
+      # Get the default tracer provider, falling back to OpenTelemetry global.
+      # @return [OpenTelemetry::Trace::TracerProvider]
+      def default_tracer_provider
+        @default_tracer_provider || ::OpenTelemetry.tracer_provider
+      end
+
+      # Get the context for a target object.
+      # @param target [Object] The object to retrieve context from
+      # @return [Context, nil] The context if found, nil otherwise
+      def context_for(target)
+        Context.from(target)
+      end
+
+      # Get the tracer provider for a target.
+      # Checks target's context first, then falls back to contrib default.
+      # @param target [Object] The object to look up tracer provider for
+      # @return [OpenTelemetry::Trace::TracerProvider]
+      def tracer_provider_for(target)
+        context_for(target)&.[](:tracer_provider) || default_tracer_provider
+      end
+
+      # Get a tracer for a target, using its context's tracer_provider if available.
+      # @param target [Object] The object to look up context from
+      # @param name [String] Tracer name
+      # @return [OpenTelemetry::Trace::Tracer]
+      def tracer_for(target, name: "braintrust")
+        tracer_provider_for(target).tracer(name)
+      end
+    end
+  end
+end
+
+# Load integration stubs (eager load minimal metadata).
+# These will be added in subsequent milestones.
diff --git a/lib/braintrust/contrib/context.rb b/lib/braintrust/contrib/context.rb
new file mode 100644
index 0000000..229af0c
--- /dev/null
+++ b/lib/braintrust/contrib/context.rb
@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+
+module Braintrust
+  module Contrib
+    # Per-instance or per-class configuration context.
+    # Allows attaching generic configuration to specific objects or classes.
+    class Context
+      # Set or update context on a target object.
+      # Creates a new context if one doesn't exist, or updates existing context.
+      # @param target [Object] The object to attach context to
+      # @param options [Hash] Configuration options to store
+      # @return [Context, nil] The existing context if updated, nil if created new or options empty
+      def self.set!(target, **options)
+        return nil if options.empty?
+
+        if (ctx = from(target))
+          # Update existing context
+          options.each { |k, v| ctx[k] = v }
+        else
+          # Create and attach new context
+          target.instance_variable_set(:@braintrust_context, new(**options))
+        end
+
+        ctx
+      end
+
+      # Retrieve context from a target.
+      # @param target [Object] The object to retrieve context from
+      # @return [Context, nil] The context if found, nil otherwise
+      def self.from(target)
+        target&.instance_variable_get(:@braintrust_context)
+      end
+
+      # @param options [Hash] Configuration options
+      def initialize(**options)
+        @options = options
+      end
+
+      def [](key)
+        @options[key]
+      end
+
+      def []=(key, value)
+        @options[key] = value
+      end
+
+      # Get an option value with a default fallback.
+      # @param key [Symbol, String] The option key
+      # @param default [Object] The default value if key not found
+      # @return [Object] The option value, or default if not found
+      def fetch(key, default)
+        @options.fetch(key, default)
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/integration.rb b/lib/braintrust/contrib/integration.rb
new file mode 100644
index 0000000..1791f6f
--- /dev/null
+++ b/lib/braintrust/contrib/integration.rb
@@ -0,0 +1,143 @@
+# frozen_string_literal: true
+
+module Braintrust
+  module Contrib
+    # Base module defining the integration contract.
+    # Include this module in integration classes to define the schema.
+    # Delegates actual patching to a Patcher subclass.
+    module Integration
+      def self.included(base)
+        base.extend(ClassMethods)
+      end
+
+      module ClassMethods
+        # Unique symbol name for this integration (e.g., :openai, :anthropic).
+        # @return [Symbol]
+        def integration_name
+          raise NotImplementedError, "#{self} must implement integration_name"
+        end
+
+        # Array of gem names this integration supports.
+        # @return [Array<String>]
+        def gem_names
+          raise NotImplementedError, "#{self} must implement gem_names"
+        end
+
+        # Require paths for auto-instrument detection.
+        # Default implementation returns gem_names.
+        # @return [Array<String>]
+        def require_paths
+          gem_names
+        end
+
+        # Is the target library available for loading?
+        # @return [Boolean]
+        def available?
+          gem_names.any? { |name| Gem.loaded_specs.key?(name) }
+        end
+
+        # Is the target library loaded?
+        # @return [Boolean]
+        def loaded?
+          raise NotImplementedError, "#{self} must implement loaded?"
+        end
+
+        # Minimum compatible version (optional, inclusive).
+        # @return [String, nil]
+        def minimum_version
+          nil
+        end
+
+        # Maximum compatible version (optional, inclusive).
+        # @return [String, nil]
+        def maximum_version
+          nil
+        end
+
+        # Is the library version compatible?
+        # @return [Boolean]
+        def compatible?
+          return false unless available?
+
+          gem_names.each do |name|
+            spec = Gem.loaded_specs[name]
+            next unless spec
+
+            version = spec.version
+            return false if minimum_version && version < Gem::Version.new(minimum_version)
+            return false if maximum_version && version > Gem::Version.new(maximum_version)
+            return true
+          end
+          false
+        end
+
+        # Array of patcher classes for this integration.
+        # Override to return multiple patchers for version-specific logic.
+        # @return [Array<Class>] Array of patcher classes
+        def patchers
+          [patcher] # Default: single patcher
+        end
+
+        # Convenience method for single patcher (existing pattern).
+        # Override this OR patchers (not both).
+        # @return [Class] The patcher class
+        def patcher
+          raise NotImplementedError, "#{self} must implement patcher or patchers"
+        end
+
+        # Instrument this integration with optional configuration.
+        # If a target is provided, configures the target instance specifically.
+        # Otherwise, applies class-level instrumentation to all instances.
+        #
+        # @param options [Hash] Configuration options
+        # @option options [Object] :target Optional target instance to instrument
+        # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider
+        # @return [Boolean] true if patching succeeded or was already done
+        #
+        # @example Class-level instrumentation (all clients)
+        #   integration.instrument!(tracer_provider: my_provider)
+        #
+        # @example Instance-level instrumentation (specific client)
+        #   integration.instrument!(target: client, tracer_provider: my_provider)
+        def instrument!(**options)
+          if options[:target]
+            # Configure the target with provided options (exclude :target from context)
+            context_options = options.except(:target)
+            Contrib::Context.set!(options[:target], **context_options) unless context_options.empty?
+          end
+
+          patch!(**options)
+        end
+
+        # Apply instrumentation (idempotent). Tries all applicable patchers.
+        # This method is typically called by instrument! after configuration.
+        #
+        # @param options [Hash] Configuration options
+        # @option options [Object] :target Optional target instance to patch
+        # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider
+        # @return [Boolean] true if any patching succeeded or was already done
+        def patch!(**options)
+          return false unless available? && loaded? && compatible?
+
+          # Try all applicable patchers
+          success = false
+          patchers.each do |patch|
+            # Check if this patcher is applicable
+            next unless patch.applicable?
+
+            # Attempt to patch (patcher checks applicable? again under lock)
+            success = true if patch.patch!(**options)
+          end
+
+          Braintrust::Log.debug("No applicable patcher found for #{integration_name}") unless success
+          success
+        end
+
+        # Register this integration with the global registry.
+        def register!
+          Registry.instance.register(self)
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/patcher.rb b/lib/braintrust/contrib/patcher.rb
new file mode 100644
index 0000000..6de99ad
--- /dev/null
+++ b/lib/braintrust/contrib/patcher.rb
@@ -0,0 +1,76 @@
+# frozen_string_literal: true
+
+module Braintrust
+  module Contrib
+    # Base class for all patchers.
+    # Provides thread-safe, idempotent patching with error handling.
+    class Patcher
+      # For thread-safety, a mutex is used to wrap patching.
+      # Each instance of Patcher should have its own copy of the mutex.,
+      # allowing each Patcher to work in parallel while protecting the
+      # critical patch section.
+      @patch_mutex = Mutex.new
+
+      def self.inherited(subclass)
+        subclass.instance_variable_set(:@patch_mutex, Mutex.new)
+      end
+
+      class << self
+        # Has this patcher already been applied?
+        # @return [Boolean]
+        def patched?(**options)
+          @patched == true
+        end
+
+        # Override in subclasses to check if patcher should apply.
+        # Called after patcher loads but before perform_patch.
+        # @return [Boolean] true if this patcher should be applied
+        def applicable?
+          true # Default: always applicable
+        end
+
+        # Apply the patch (thread-safe and idempotent).
+        # @param options [Hash] Configuration options passed from integration
+        # @option options [Object] :target Optional target instance to patch
+        # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider
+        # @return [Boolean] true if patching succeeded or was already done
+        def patch!(**options)
+          return false unless applicable?
+          return true if patched?(**options) # Fast path
+
+          @patch_mutex.synchronize do
+            unless applicable?
+              Braintrust::Log.debug("Skipping #{name} - not applicable")
+              return false
+            end
+            return true if patched?(**options) # Double-check under lock
+
+            perform_patch(**options)
+            @patched = true
+          end
+          Braintrust::Log.debug("Patched #{name}")
+          true
+        rescue => e
+          Braintrust::Log.error("Failed to patch #{name}: #{e.message}")
+          false
+        end
+
+        # Subclasses implement this to perform the actual patching.
+        # This method is called under lock after applicable? returns true.
+        #
+        # @param options [Hash] Configuration options passed from integration
+        # @option options [Object] :target Optional target instance to patch
+        # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider
+        # @return [void]
+        def perform_patch(**options)
+          raise NotImplementedError, "#{self} must implement perform_patch"
+        end
+
+        # Reset patched state (primarily for testing).
+        def reset!
+          @patched = false
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/registry.rb b/lib/braintrust/contrib/registry.rb
new file mode 100644
index 0000000..77a3d67
--- /dev/null
+++ b/lib/braintrust/contrib/registry.rb
@@ -0,0 +1,92 @@
+# frozen_string_literal: true
+
+require "singleton"
+
+module Braintrust
+  module Contrib
+    # Thread-safe singleton registry for integrations.
+    # Provides registration, lookup, and require-path mapping for auto-instrumentation.
+    class Registry
+      include Singleton
+
+      def initialize
+        @integrations = {}
+        @require_path_map = nil # Lazy cache
+        @mutex = Mutex.new
+      end
+
+      # Register an integration class with the registry.
+      # @param integration_class [Class] The integration class to register
+      def register(integration_class)
+        @mutex.synchronize do
+          @integrations[integration_class.integration_name] = integration_class
+          @require_path_map = nil # Invalidate cache
+        end
+      end
+
+      # Look up an integration by name.
+      # @param name [Symbol, String] The integration name
+      # @return [Class, nil] The integration class, or nil if not found
+      def [](name)
+        @integrations[name.to_sym]
+      end
+
+      # Get all registered integrations.
+      # @return [Array<Class>] All registered integration classes
+      def all
+        @integrations.values
+      end
+
+      # Get all available integrations (target library is loaded).
+      # @return [Array<Class>] Available integration classes
+      def available
+        @integrations.values.select(&:available?)
+      end
+
+      # Iterate over all registered integrations.
+      # @yield [Class] Each registered integration class
+      def each(&block)
+        @integrations.values.each(&block)
+      end
+
+      # Returns integrations associated with a require path.
+      # Thread-safe with double-checked locking for performance.
+      # @param path [String] The require path (e.g., "openai", "anthropic")
+      # @return [Array<Class>] Integrations matching the require path
+      def integrations_for_require_path(path)
+        map = @require_path_map
+        if map.nil?
+          map = @mutex.synchronize do
+            @require_path_map ||= build_require_path_map
+          end
+        end
+        basename = File.basename(path.to_s, ".rb")
+        map.fetch(basename, EMPTY_ARRAY)
+      end
+
+      # Clear all registrations (primarily for testing).
+      def clear!
+        @mutex.synchronize do
+          @integrations.clear
+          @require_path_map = nil
+        end
+      end
+
+      private
+
+      EMPTY_ARRAY = [].freeze
+
+      def build_require_path_map
+        map = {}
+        @integrations.each_value do |integration|
+          integration.require_paths.each do |req|
+            map[req] ||= []
+            map[req] << integration
+          end
+        end
+        map.each_value(&:freeze)
+        map.freeze
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/state.rb b/lib/braintrust/state.rb
index 3d6d154..6f20d69 100644
--- a/lib/braintrust/state.rb
+++ b/lib/braintrust/state.rb
@@ -93,6 +93,11 @@ def initialize(api_key: nil, org_name: nil, org_id: nil, default_project: nil, a
       if enable_tracing
         require_relative "trace"
         Trace.setup(self, tracer_provider, exporter: exporter)
+
+        # Propagate tracer_provider to Contrib if loaded (soft dependency check)
+        if defined?(Braintrust::Contrib)
+          Braintrust::Contrib.init(tracer_provider: tracer_provider)
+        end
       end
     end
 
diff --git a/test/braintrust/contrib/context_test.rb b/test/braintrust/contrib/context_test.rb
new file mode 100644
index 0000000..c4d147d
--- /dev/null
+++ b/test/braintrust/contrib/context_test.rb
@@ -0,0 +1,93 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class Braintrust::Contrib::ContextTest < Minitest::Test
+  def setup
+    @target = Object.new
+  end
+
+  def test_from_returns_nil_for_nil_target
+    assert_nil Braintrust::Contrib::Context.from(nil)
+  end
+
+  def test_from_returns_nil_when_no_context_set
+    assert_nil Braintrust::Contrib::Context.from(@target)
+  end
+
+  def test_from_returns_context_when_set
+    Braintrust::Contrib::Context.set!(@target, foo: "bar")
+
+    ctx = Braintrust::Contrib::Context.from(@target)
+
+    assert_instance_of Braintrust::Contrib::Context, ctx
+  end
+
+  def test_set_creates_context_with_options
+    Braintrust::Contrib::Context.set!(@target, foo: "bar", baz: 123)
+
+    ctx = Braintrust::Contrib::Context.from(@target)
+
+    assert_equal "bar", ctx[:foo]
+    assert_equal 123, ctx[:baz]
+  end
+
+  def test_set_returns_nil_when_creating_new_context
+    result = Braintrust::Contrib::Context.set!(@target, foo: "bar")
+
+    assert_nil result
+  end
+
+  def test_set_returns_nil_with_empty_options
+    result = Braintrust::Contrib::Context.set!(@target)
+
+    assert_nil result
+    assert_nil Braintrust::Contrib::Context.from(@target)
+  end
+
+  def test_set_updates_existing_context
+    Braintrust::Contrib::Context.set!(@target, foo: "bar")
+    Braintrust::Contrib::Context.set!(@target, baz: 123)
+
+    ctx = Braintrust::Contrib::Context.from(@target)
+
+    assert_equal "bar", ctx[:foo]
+    assert_equal 123, ctx[:baz]
+  end
+
+  def test_set_overwrites_existing_keys
+    Braintrust::Contrib::Context.set!(@target, foo: "bar")
+    Braintrust::Contrib::Context.set!(@target, foo: "updated")
+
+    ctx = Braintrust::Contrib::Context.from(@target)
+
+    assert_equal "updated", ctx[:foo]
+  end
+
+  def test_context_bracket_access
+    ctx = Braintrust::Contrib::Context.new(foo: "bar")
+
+    assert_equal "bar", ctx[:foo]
+    assert_nil ctx[:missing]
+  end
+
+  def test_context_bracket_assignment
+    ctx = Braintrust::Contrib::Context.new
+
+    ctx[:foo] = "bar"
+
+    assert_equal "bar", ctx[:foo]
+  end
+
+  def test_context_fetch_returns_value
+    ctx = Braintrust::Contrib::Context.new(foo: "bar")
+
+    assert_equal "bar", ctx.fetch(:foo, "default")
+  end
+
+  def test_context_fetch_returns_default_when_missing
+    ctx = Braintrust::Contrib::Context.new
+
+    assert_equal "default", ctx.fetch(:missing, "default")
+  end
+end
diff --git a/test/braintrust/contrib/integration_test.rb b/test/braintrust/contrib/integration_test.rb
new file mode 100644
index 0000000..40524d6
--- /dev/null
+++ b/test/braintrust/contrib/integration_test.rb
@@ -0,0 +1,568 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class Braintrust::Contrib::IntegrationTest < Minitest::Test
+  def setup
+    # Use anonymous subclass to isolate test state
+    registry_class = Class.new(Braintrust::Contrib::Registry)
+    @registry = registry_class.instance
+  end
+
+  # Create a mock patcher class for testing
+  def create_mock_patcher(should_fail: false, applicable: true)
+    patcher = Class.new(Braintrust::Contrib::Patcher) do
+      class << self
+        attr_accessor :patch_called, :should_fail, :is_applicable
+      end
+
+      def self.applicable?
+        @is_applicable
+      end
+
+      def self.perform_patch(**options)
+        @patch_called = true
+        raise "Patch failed" if @should_fail
+      end
+    end
+
+    patcher.patch_called = false
+    patcher.should_fail = should_fail
+    patcher.is_applicable = applicable
+    patcher
+  end
+
+  # Create a full integration class for testing
+  def create_test_integration(
+    name:,
+    gem_names:,
+    require_paths: nil,
+    patcher: nil,
+    min_version: nil,
+    max_version: nil,
+    loaded: true
+  )
+    patcher_class = patcher || create_mock_patcher
+
+    integration = Class.new do
+      include Braintrust::Contrib::Integration
+
+      class << self
+        attr_accessor :_integration_name, :_gem_names, :_require_paths,
+          :_patcher, :_min_version, :_max_version, :_loaded
+      end
+
+      def self.integration_name
+        _integration_name
+      end
+
+      def self.gem_names
+        _gem_names
+      end
+
+      def self.require_paths
+        _require_paths || _gem_names
+      end
+
+      def self.minimum_version
+        _min_version
+      end
+
+      def self.maximum_version
+        _max_version
+      end
+
+      def self.loaded?
+        _loaded
+      end
+
+      def self.patcher
+        _patcher
+      end
+    end
+
+    integration._integration_name = name
+    integration._gem_names = gem_names
+    integration._require_paths = require_paths
+    integration._patcher = patcher_class
+    integration._min_version = min_version
+    integration._max_version = max_version
+    integration._loaded = loaded
+    integration
+  end
+
+  def test_integration_name_raises_not_implemented
+    integration = Class.new { include Braintrust::Contrib::Integration }
+
+    assert_raises(NotImplementedError) do
+      integration.integration_name
+    end
+  end
+
+  def test_gem_names_raises_not_implemented
+    integration = Class.new { include Braintrust::Contrib::Integration }
+
+    assert_raises(NotImplementedError) do
+      integration.gem_names
+    end
+  end
+
+  def test_patcher_raises_not_implemented
+    integration = Class.new { include Braintrust::Contrib::Integration }
+
+    assert_raises(NotImplementedError) do
+      integration.patcher
+    end
+  end
+
+  def test_loaded_raises_not_implemented
+    integration = Class.new { include Braintrust::Contrib::Integration }
+
+    assert_raises(NotImplementedError) do
+      integration.loaded?
+    end
+  end
+
+  def test_patchers_defaults_to_wrapping_patcher
+    patcher = create_mock_patcher
+    integration = create_test_integration(
+      name: :test,
+      gem_names: ["minitest"],
+      patcher: patcher
+    )
+
+    assert_equal [patcher], integration.patchers
+  end
+
+  def test_require_paths_defaults_to_gem_names
+    integration = create_test_integration(
+      name: :test,
+      gem_names: ["test-gem", "other-gem"]
+    )
+
+    assert_equal ["test-gem", "other-gem"], integration.require_paths
+  end
+
+  def test_require_paths_can_be_overridden
+    integration = create_test_integration(
+      name: :test,
+      gem_names: ["test-gem"],
+      require_paths: ["custom_path", "another_path"]
+    )
+
+    assert_equal ["custom_path", "another_path"], integration.require_paths
+  end
+
+  def test_minimum_version_defaults_to_nil
+    integration = create_test_integration(
+      name: :test,
+      gem_names: ["test-gem"]
+    )
+
+    assert_nil integration.minimum_version
+  end
+
+  def test_maximum_version_defaults_to_nil
+    integration = create_test_integration(
+      name: :test,
+      gem_names: ["test-gem"]
+    )
+
+    assert_nil integration.maximum_version
+  end
+
+  def test_available_checks_gem_loaded_specs
+    # Use a gem that is actually loaded (minitest)
+    integration = create_test_integration(
+      name: :minitest_test,
+      gem_names: ["minitest"]
+    )
+
+    assert integration.available?
+  end
+
+  def test_available_returns_false_for_unloaded_gem
+    integration = create_test_integration(
+      name: :test,
+      gem_names: ["nonexistent-gem-xyz-123"]
+    )
+
+    refute integration.available?
+  end
+
+  def test_available_with_multiple_gems_any_loaded
+    # minitest is loaded, fake-gem is not
+    integration = create_test_integration(
+      name: :test,
+      gem_names: ["fake-gem-xyz", "minitest"]
+    )
+
+    assert integration.available?
+  end
+
+  def test_compatible_returns_false_when_not_available
+    integration = create_test_integration(
+      name: :test,
+      gem_names: ["nonexistent-gem-xyz-123"]
+    )
+
+    refute integration.compatible?
+  end
+
+  def test_compatible_returns_true_when_no_version_constraints
+    integration = create_test_integration(
+      name: :minitest_test,
+      gem_names: ["minitest"]
+    )
+
+    assert integration.compatible?
+  end
+
+  def test_compatible_checks_minimum_version
+    # Get the current minitest version
+    Gem.loaded_specs["minitest"].version
+
+    # Test with a minimum version below current
+    integration_ok = create_test_integration(
+      name: :minitest_test,
+      gem_names: ["minitest"],
+      min_version: "1.0.0"
+    )
+    assert integration_ok.compatible?
+
+    # Test with a minimum version above current
+    integration_too_new = create_test_integration(
+      name: :minitest_test,
+      gem_names: ["minitest"],
+      min_version: "999.0.0"
+    )
+    refute integration_too_new.compatible?
+  end
+
+  def test_compatible_checks_maximum_version
+    # Test with a maximum version above current
+    integration_ok = create_test_integration(
+      name: :minitest_test,
+      gem_names: ["minitest"],
+      max_version: "999.0.0"
+    )
+    assert integration_ok.compatible?
+
+    # Test with a maximum version below current
+    integration_too_old = create_test_integration(
+      name: :minitest_test,
+      gem_names: ["minitest"],
+      max_version: "0.0.1"
+    )
+    refute integration_too_old.compatible?
+  end
+
+  def test_patch_delegates_to_patcher
+    patcher = create_mock_patcher
+    integration = create_test_integration(
+      name: :test,
+      gem_names: ["minitest"],
+      patcher: patcher
+    )
+
+    result = integration.patch!
+
+    assert result
+    assert patcher.patch_called
+  end
+
+  def test_patch_returns_false_when_not_available
+    patcher = create_mock_patcher
+    integration = create_test_integration(
+      name: :test,
+      gem_names: ["nonexistent-gem-xyz-123"],
+      patcher: patcher
+    )
+
+    result = integration.patch!
+
+    refute result
+    refute patcher.patch_called
+  end
+
+  def test_patch_returns_false_when_not_compatible
+    patcher = create_mock_patcher
+    integration = create_test_integration(
+      name: :test,
+      gem_names: ["minitest"],
+      min_version: "999.0.0", # Too high
+      patcher: patcher
+    )
+
+    result = integration.patch!
+
+    refute result
+    refute patcher.patch_called
+  end
+
+  def test_patch_returns_false_when_not_loaded
+    patcher = create_mock_patcher
+    integration = create_test_integration(
+      name: :test,
+      gem_names: ["minitest"],
+      loaded: false,
+      patcher: patcher
+    )
+
+    result = integration.patch!
+
+    refute result
+    refute patcher.patch_called
+  end
+
+  def test_patch_passes_tracer_provider
+    received_options = nil
+    patcher = Class.new(Braintrust::Contrib::Patcher) do
+      class << self
+        attr_accessor :is_applicable
+      end
+
+      def self.applicable?
+        @is_applicable
+      end
+
+      define_singleton_method(:perform_patch) do |**options|
+        received_options = options
+      end
+    end
+    patcher.is_applicable = true
+
+    integration = create_test_integration(
+      name: :test,
+      gem_names: ["minitest"],
+      patcher: patcher
+    )
+
+    tracer_provider = Object.new
+    integration.patch!(tracer_provider: tracer_provider)
+
+    assert_equal tracer_provider, received_options[:tracer_provider]
+  end
+
+  def test_instrument_passes_target_to_patcher
+    received_options = nil
+    patcher = Class.new(Braintrust::Contrib::Patcher) do
+      class << self
+        attr_accessor :is_applicable
+      end
+
+      def self.applicable?
+        @is_applicable
+      end
+
+      define_singleton_method(:perform_patch) do |**options|
+        received_options = options
+      end
+    end
+    patcher.is_applicable = true
+
+    integration = create_test_integration(
+      name: :test,
+      gem_names: ["minitest"],
+      patcher: patcher
+    )
+
+    target = Object.new
+    tracer_provider = Object.new
+    integration.instrument!(target: target, tracer_provider: tracer_provider)
+
+    # CRITICAL: target must be passed through to patcher for instance-level instrumentation
+    assert_equal target, received_options[:target], "target should be passed to patcher"
+    assert_equal tracer_provider, received_options[:tracer_provider]
+  end
+
+  def test_instrument_sets_context_on_target
+    patcher = create_mock_patcher
+    integration = create_test_integration(
+      name: :test,
+      gem_names: ["minitest"],
+      patcher: patcher
+    )
+
+    target = Object.new
+    tracer_provider = Object.new
+    integration.instrument!(target: target, tracer_provider: tracer_provider)
+
+    # Context should be set on the target
+    ctx = Braintrust::Contrib::Context.from(target)
+    assert_equal tracer_provider, ctx[:tracer_provider]
+  end
+
+  def test_register_adds_to_registry
+    integration = create_test_integration(
+      name: :test_integration,
+      gem_names: ["test-gem"]
+    )
+
+    # Mock Registry.instance to verify register! calls it
+    mock_registry = Minitest::Mock.new
+    mock_registry.expect(:register, nil, [integration])
+
+    Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
+      integration.register!
+    end
+
+    mock_registry.verify
+  end
+
+  def test_patchers_with_multiple_patchers
+    patcher1 = create_mock_patcher
+    patcher2 = create_mock_patcher
+
+    integration = Class.new do
+      include Braintrust::Contrib::Integration
+
+      class << self
+        attr_accessor :_patchers
+      end
+
+      def self.integration_name
+        :test
+      end
+
+      def self.gem_names
+        ["minitest"]
+      end
+
+      def self.patchers
+        _patchers
+      end
+    end
+
+    integration._patchers = [patcher1, patcher2]
+
+    assert_equal [patcher1, patcher2], integration.patchers
+  end
+
+  def test_patch_tries_all_applicable_patchers
+    # First patcher is not applicable
+    patcher1 = create_mock_patcher(applicable: false)
+
+    # Second and third patchers are applicable - both should be tried
+    patcher2 = create_mock_patcher(applicable: true)
+    patcher3 = create_mock_patcher(applicable: true)
+
+    integration = Class.new do
+      include Braintrust::Contrib::Integration
+
+      class << self
+        attr_accessor :_patchers
+      end
+
+      def self.integration_name
+        :test
+      end
+
+      def self.gem_names
+        ["minitest"]
+      end
+
+      def self.loaded?
+        true
+      end
+
+      def self.patchers
+        _patchers
+      end
+    end
+
+    integration._patchers = [patcher1, patcher2, patcher3]
+
+    result = integration.patch!
+
+    assert result
+    refute patcher1.patch_called # Not applicable
+    assert patcher2.patch_called # Applied
+    assert patcher3.patch_called # Also applied (doesn't stop after patcher2)
+  end
+
+  def test_patch_skips_non_applicable_patchers
+    # Create patcher that is not applicable
+    non_applicable_patcher = create_mock_patcher(applicable: false)
+
+    integration = Class.new do
+      include Braintrust::Contrib::Integration
+
+      class << self
+        attr_accessor :_patcher
+      end
+
+      def self.integration_name
+        :test
+      end
+
+      def self.gem_names
+        ["minitest"]
+      end
+
+      def self.loaded?
+        true
+      end
+
+      def self.patchers
+        [_patcher]
+      end
+    end
+
+    integration._patcher = non_applicable_patcher
+
+    result = integration.patch!
+
+    refute result
+    refute non_applicable_patcher.patch_called
+  end
+
+  def test_patch_logs_when_no_applicable_patcher
+    non_applicable_patcher = create_mock_patcher(applicable: false)
+
+    integration = Class.new do
+      include Braintrust::Contrib::Integration
+
+      class << self
+        attr_accessor :_patcher
+      end
+
+      def self.integration_name
+        :test
+      end
+
+      def self.gem_names
+        ["minitest"]
+      end
+
+      def self.loaded?
+        true
+      end
+
+      def self.patchers
+        [_patcher]
+      end
+    end
+
+    integration._patcher = non_applicable_patcher
+
+    # Capture log output
+    captured_logs = []
+    original_logger = Braintrust::Log.logger
+    test_logger = Logger.new(StringIO.new)
+    test_logger.level = Logger::DEBUG
+    test_logger.formatter = ->(_severity, _time, _progname, msg) {
+      captured_logs << msg
+      ""
+    }
+    Braintrust::Log.logger = test_logger
+
+    begin
+      integration.patch!
+      # Check that the "no applicable patcher" message was logged
+      assert captured_logs.any? { |msg| msg.include?("No applicable patcher found") }
+    ensure
+      Braintrust::Log.logger = original_logger
+    end
+  end
+end
diff --git a/test/braintrust/contrib/patcher_test.rb b/test/braintrust/contrib/patcher_test.rb
new file mode 100644
index 0000000..17ba7ca
--- /dev/null
+++ b/test/braintrust/contrib/patcher_test.rb
@@ -0,0 +1,281 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class Braintrust::Contrib::PatcherTest < Minitest::Test
+  def setup
+    # Create a fresh patcher class for each test to avoid state leakage
+    @patcher = create_test_patcher
+  end
+
+  def create_test_patcher(should_fail: false)
+    patcher = Class.new(Braintrust::Contrib::Patcher) do
+      class << self
+        attr_accessor :patch_count, :last_options, :should_fail
+      end
+
+      def self.perform_patch(**options)
+        @patch_count ||= 0
+        @patch_count += 1
+        @last_options = options
+        raise "Intentional patch failure" if @should_fail
+      end
+    end
+
+    patcher.reset!
+    patcher.patch_count = 0
+    patcher.last_options = {}
+    patcher.should_fail = should_fail
+    patcher
+  end
+
+  def test_patch_passes_options
+    options = {tracer_provider: "test-provider", target: "test-target"}
+    @patcher.patch!(**options)
+
+    assert_equal "test-provider", @patcher.last_options[:tracer_provider]
+    assert_equal "test-target", @patcher.last_options[:target]
+  end
+
+  def test_patched_returns_false_initially
+    refute @patcher.patched?
+  end
+
+  def test_patch_sets_patched_to_true
+    @patcher.patch!
+
+    assert @patcher.patched?
+  end
+
+  def test_patch_returns_true_on_success
+    result = @patcher.patch!
+
+    assert result
+  end
+
+  def test_patch_calls_perform_patch_once
+    @patcher.patch!
+
+    assert_equal 1, @patcher.patch_count
+  end
+
+  def test_patch_is_idempotent
+    @patcher.patch!
+    @patcher.patch!
+    @patcher.patch!
+
+    assert_equal 1, @patcher.patch_count
+    assert @patcher.patched?
+  end
+
+  def test_patch_returns_true_on_subsequent_calls
+    first_result = @patcher.patch!
+    second_result = @patcher.patch!
+
+    assert first_result
+    assert second_result
+  end
+
+  def test_patch_passes_options_to_perform_patch
+    tracer_provider = Object.new
+
+    @patcher.patch!(tracer_provider: tracer_provider)
+
+    assert_instance_of Hash, @patcher.last_options
+    assert_equal tracer_provider, @patcher.last_options[:tracer_provider]
+  end
+
+  def test_patch_returns_false_on_error
+    failing_patcher = create_test_patcher(should_fail: true)
+
+    result = suppress_logs { failing_patcher.patch! }
+
+    refute result
+  end
+
+  def test_patch_does_not_set_patched_on_error
+    failing_patcher = create_test_patcher(should_fail: true)
+
+    suppress_logs { failing_patcher.patch! }
+
+    refute failing_patcher.patched?
+  end
+
+  def test_patch_logs_error_on_failure
+    failing_patcher = create_test_patcher(should_fail: true)
+
+    # Capture log output
+    captured_logs = []
+    original_logger = Braintrust::Log.logger
+    test_logger = Logger.new(StringIO.new)
+    test_logger.formatter = ->(_severity, _time, _progname, msg) {
+      captured_logs << msg
+      ""
+    }
+    Braintrust::Log.logger = test_logger
+
+    begin
+      failing_patcher.patch!
+      # Check that error was logged (can't easily verify content without more setup)
+      # The main thing is that it doesn't raise
+    ensure
+      Braintrust::Log.logger = original_logger
+    end
+  end
+
+  def test_reset_allows_repatching
+    @patcher.patch!
+    assert @patcher.patched?
+
+    @patcher.reset!
+    refute @patcher.patched?
+
+    @patcher.patch!
+    assert @patcher.patched?
+    assert_equal 2, @patcher.patch_count
+  end
+
+  def test_perform_patch_raises_not_implemented_in_base_class
+    assert_raises(NotImplementedError) do
+      Braintrust::Contrib::Patcher.perform_patch
+    end
+  end
+
+  def test_thread_safety_only_patches_once
+    patcher = create_test_patcher
+
+    threads = 100.times.map do
+      Thread.new { patcher.patch! }
+    end
+
+    threads.each(&:join)
+
+    assert_equal 1, patcher.patch_count
+    assert patcher.patched?
+  end
+
+  def test_thread_safety_concurrent_patch_calls
+    patcher = create_test_patcher
+
+    errors = []
+    results = []
+    mutex = Mutex.new
+
+    threads = 100.times.map do
+      Thread.new do
+        result = patcher.patch!
+        mutex.synchronize { results << result }
+      rescue => e
+        mutex.synchronize { errors << e.message }
+      end
+    end
+
+    threads.each(&:join)
+
+    assert_equal [], errors
+    assert results.all? { |r| r == true }
+    assert_equal 1, patcher.patch_count
+  end
+
+  def test_applicable_returns_true_by_default
+    assert @patcher.applicable?
+  end
+
+  def test_applicable_can_be_overridden
+    patcher = Class.new(Braintrust::Contrib::Patcher) do
+      def self.applicable?
+        false
+      end
+
+      def self.perform_patch(**options)
+        # No-op
+      end
+    end
+    patcher.reset!
+
+    refute patcher.applicable?
+  end
+
+  def test_patch_checks_applicable_under_lock
+    applicable_calls = []
+    patcher = Class.new(Braintrust::Contrib::Patcher) do
+      class << self
+        attr_accessor :applicable_calls
+      end
+
+      def self.applicable?
+        @applicable_calls ||= []
+        @applicable_calls << Thread.current.object_id
+        true
+      end
+
+      def self.perform_patch(**options)
+        # No-op
+      end
+    end
+    patcher.reset!
+    patcher.applicable_calls = applicable_calls
+
+    patcher.patch!
+
+    # Should be called twice: once before lock (fast path), once under lock (double-check)
+    assert_equal 2, applicable_calls.length
+  end
+
+  def test_patch_returns_false_when_not_applicable
+    patcher = Class.new(Braintrust::Contrib::Patcher) do
+      class << self
+        attr_accessor :perform_patch_called
+      end
+
+      def self.applicable?
+        false
+      end
+
+      def self.perform_patch(**options)
+        @perform_patch_called = true
+      end
+    end
+    patcher.reset!
+    patcher.perform_patch_called = false
+
+    result = patcher.patch!
+
+    refute result
+    refute patcher.perform_patch_called
+    refute patcher.patched?
+  end
+
+  def test_patch_returns_false_and_does_not_log_when_not_applicable
+    patcher = Class.new(Braintrust::Contrib::Patcher) do
+      def self.applicable?
+        false
+      end
+
+      def self.perform_patch(**options)
+        # No-op
+      end
+    end
+    patcher.reset!
+
+    # Capture log output
+    captured_logs = []
+    original_logger = Braintrust::Log.logger
+    test_logger = Logger.new(StringIO.new)
+    test_logger.level = Logger::DEBUG
+    test_logger.formatter = ->(_severity, _time, _progname, msg) {
+      captured_logs << msg
+      ""
+    }
+    Braintrust::Log.logger = test_logger
+
+    begin
+      result = patcher.patch!
+      # Fast path returns false immediately without logging
+      refute result
+      assert_empty captured_logs, "Fast path should not log when not applicable"
+    ensure
+      Braintrust::Log.logger = original_logger
+    end
+  end
+end
diff --git a/test/braintrust/contrib/registry_test.rb b/test/braintrust/contrib/registry_test.rb
new file mode 100644
index 0000000..610815e
--- /dev/null
+++ b/test/braintrust/contrib/registry_test.rb
@@ -0,0 +1,241 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class Braintrust::Contrib::RegistryTest < Minitest::Test
+  def setup
+    # Use anonymous subclass to isolate test state
+    registry_class = Class.new(Braintrust::Contrib::Registry)
+    @registry = registry_class.instance
+  end
+
+  # Mock integration class for testing
+  def create_mock_integration(name:, gem_names:, require_paths: nil, available: false)
+    integration = Class.new do
+      include Braintrust::Contrib::Integration
+
+      class << self
+        attr_accessor :_integration_name, :_gem_names, :_require_paths, :_available
+      end
+
+      def self.integration_name
+        _integration_name
+      end
+
+      def self.gem_names
+        _gem_names
+      end
+
+      def self.require_paths
+        _require_paths || _gem_names
+      end
+
+      def self.available?
+        _available
+      end
+    end
+
+    integration._integration_name = name
+    integration._gem_names = gem_names
+    integration._require_paths = require_paths
+    integration._available = available
+    integration
+  end
+
+  def test_register_and_lookup
+    integration = create_mock_integration(name: :openai, gem_names: ["openai"])
+
+    @registry.register(integration)
+
+    assert_equal integration, @registry[:openai]
+    assert_equal integration, @registry["openai"]
+  end
+
+  def test_lookup_returns_nil_for_unregistered
+    assert_nil @registry[:unknown]
+  end
+
+  def test_all_returns_all_integrations
+    openai = create_mock_integration(name: :openai, gem_names: ["openai"])
+    anthropic = create_mock_integration(name: :anthropic, gem_names: ["anthropic"])
+
+    @registry.register(openai)
+    @registry.register(anthropic)
+
+    all = @registry.all
+    assert_equal 2, all.length
+    assert_includes all, openai
+    assert_includes all, anthropic
+  end
+
+  def test_available_filters_by_availability
+    available_integration = create_mock_integration(
+      name: :available,
+      gem_names: ["available-gem"],
+      available: true
+    )
+    unavailable_integration = create_mock_integration(
+      name: :unavailable,
+      gem_names: ["unavailable-gem"],
+      available: false
+    )
+
+    @registry.register(available_integration)
+    @registry.register(unavailable_integration)
+
+    available = @registry.available
+    assert_equal 1, available.length
+    assert_includes available, available_integration
+    refute_includes available, unavailable_integration
+  end
+
+  def test_each_iterates_over_integrations
+    openai = create_mock_integration(name: :openai, gem_names: ["openai"])
+    anthropic = create_mock_integration(name: :anthropic, gem_names: ["anthropic"])
+
+    @registry.register(openai)
+    @registry.register(anthropic)
+
+    collected = []
+    @registry.each { |i| collected << i }
+
+    assert_equal 2, collected.length
+    assert_includes collected, openai
+    assert_includes collected, anthropic
+  end
+
+  def test_integrations_for_require_path
+    openai = create_mock_integration(
+      name: :openai,
+      gem_names: ["openai"],
+      require_paths: ["openai"]
+    )
+    ruby_openai = create_mock_integration(
+      name: :ruby_openai,
+      gem_names: ["ruby-openai"],
+      require_paths: ["openai"]
+    )
+
+    @registry.register(openai)
+    @registry.register(ruby_openai)
+
+    integrations = @registry.integrations_for_require_path("openai")
+    assert_equal 2, integrations.length
+    assert_includes integrations, openai
+    assert_includes integrations, ruby_openai
+  end
+
+  def test_integrations_for_require_path_strips_rb_extension
+    openai = create_mock_integration(
+      name: :openai,
+      gem_names: ["openai"],
+      require_paths: ["openai"]
+    )
+
+    @registry.register(openai)
+
+    integrations = @registry.integrations_for_require_path("openai.rb")
+    assert_equal 1, integrations.length
+    assert_includes integrations, openai
+  end
+
+  def test_integrations_for_require_path_returns_empty_for_unknown
+    integrations = @registry.integrations_for_require_path("unknown")
+    assert_equal [], integrations
+    assert integrations.frozen?
+  end
+
+  def test_integrations_for_require_path_caching
+    openai = create_mock_integration(
+      name: :openai,
+      gem_names: ["openai"],
+      require_paths: ["openai"]
+    )
+
+    @registry.register(openai)
+
+    # First call builds the cache
+    result1 = @registry.integrations_for_require_path("openai")
+
+    # Second call should return the same frozen array (cached)
+    result2 = @registry.integrations_for_require_path("openai")
+
+    assert_same result1, result2
+  end
+
+  def test_register_invalidates_cache
+    openai = create_mock_integration(
+      name: :openai,
+      gem_names: ["openai"],
+      require_paths: ["openai"]
+    )
+
+    @registry.register(openai)
+
+    # Build the cache
+    result1 = @registry.integrations_for_require_path("openai")
+    assert_equal 1, result1.length
+
+    # Register another integration
+    another = create_mock_integration(
+      name: :another,
+      gem_names: ["another"],
+      require_paths: ["openai"]
+    )
+    @registry.register(another)
+
+    # Cache should be invalidated
+    result2 = @registry.integrations_for_require_path("openai")
+    assert_equal 2, result2.length
+  end
+
+  def test_thread_safety_for_registration
+    integrations = 100.times.map do |i|
+      create_mock_integration(name: :"integration_#{i}", gem_names: ["gem_#{i}"])
+    end
+
+    threads = integrations.map do |integration|
+      Thread.new { @registry.register(integration) }
+    end
+
+    threads.each(&:join)
+
+    assert_equal 100, @registry.all.length
+  end
+
+  def test_thread_safety_for_require_path_lookup
+    openai = create_mock_integration(
+      name: :openai,
+      gem_names: ["openai"],
+      require_paths: ["openai"]
+    )
+
+    @registry.register(openai)
+
+    errors = []
+    threads = 100.times.map do
+      Thread.new do
+        result = @registry.integrations_for_require_path("openai")
+        errors << "Got nil" if result.nil?
+        errors << "Wrong length: #{result.length}" unless result.length == 1
+      rescue => e
+        errors << e.message
+      end
+    end
+
+    threads.each(&:join)
+
+    assert_equal [], errors
+  end
+
+  def test_clear_removes_all_integrations
+    openai = create_mock_integration(name: :openai, gem_names: ["openai"])
+
+    @registry.register(openai)
+    assert_equal 1, @registry.all.length
+
+    @registry.clear!
+    assert_equal 0, @registry.all.length
+    assert_nil @registry[:openai]
+  end
+end
diff --git a/test/braintrust/contrib_test.rb b/test/braintrust/contrib_test.rb
new file mode 100644
index 0000000..525927b
--- /dev/null
+++ b/test/braintrust/contrib_test.rb
@@ -0,0 +1,200 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class Braintrust::ContribTest < Minitest::Test
+  # --- Braintrust.instrument! delegation ---
+
+  def test_braintrust_instrument_delegates_to_contrib
+    called_with = nil
+
+    Braintrust::Contrib.stub(:instrument!, ->(*args, **kwargs) { called_with = [args, kwargs] }) do
+      Braintrust.instrument!(:openai, tracer_provider: "test-provider")
+    end
+
+    assert_equal [[:openai], {tracer_provider: "test-provider"}], called_with
+  end
+
+  # --- registry ---
+
+  def test_registry_returns_registry_instance
+    mock_registry = Object.new
+
+    Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
+      assert_same mock_registry, Braintrust::Contrib.registry
+    end
+  end
+
+  # --- init ---
+
+  def test_init_sets_default_tracer_provider
+    mock_provider = Object.new
+    original = Braintrust::Contrib.instance_variable_get(:@default_tracer_provider)
+
+    Braintrust::Contrib.init(tracer_provider: mock_provider)
+    assert_same mock_provider, Braintrust::Contrib.default_tracer_provider
+  ensure
+    Braintrust::Contrib.instance_variable_set(:@default_tracer_provider, original)
+  end
+
+  # --- instrument! ---
+
+  def test_instrument_delegates_to_integration
+    mock_integration = Minitest::Mock.new
+    mock_integration.expect(:instrument!, true, [], tracer_provider: "test-provider")
+
+    mock_registry = {test_integration: mock_integration}
+
+    Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
+      Braintrust::Contrib.instrument!(:test_integration, tracer_provider: "test-provider")
+    end
+
+    mock_integration.verify
+  end
+
+  def test_instrument_logs_error_for_unknown_integration
+    mock_registry = {}
+    logged_error = nil
+
+    Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
+      Braintrust::Log.stub(:error, ->(msg) { logged_error = msg }) do
+        Braintrust::Contrib.instrument!(:unknown_integration)
+      end
+    end
+
+    assert_match(/No integration for 'unknown_integration'/, logged_error)
+  end
+
+  def test_instrument_passes_target_option
+    mock_integration = Minitest::Mock.new
+    target = Object.new
+    mock_integration.expect(:instrument!, true, [], target: target)
+
+    mock_registry = {openai: mock_integration}
+
+    Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
+      Braintrust::Contrib.instrument!(:openai, target: target)
+    end
+
+    mock_integration.verify
+  end
+
+  # --- default_tracer_provider ---
+
+  def test_default_tracer_provider_falls_back_to_opentelemetry
+    mock_otel_provider = Object.new
+    original = Braintrust::Contrib.instance_variable_get(:@default_tracer_provider)
+    Braintrust::Contrib.instance_variable_set(:@default_tracer_provider, nil)
+
+    OpenTelemetry.stub(:tracer_provider, mock_otel_provider) do
+      assert_same mock_otel_provider, Braintrust::Contrib.default_tracer_provider
+    end
+  ensure
+    Braintrust::Contrib.instance_variable_set(:@default_tracer_provider, original)
+  end
+
+  # --- context_for ---
+
+  def test_context_for_delegates_to_context_from
+    target = Object.new
+    mock_context = Object.new
+
+    Braintrust::Contrib::Context.stub(:from, ->(t) { (t == target) ? mock_context : nil }) do
+      assert_same mock_context, Braintrust::Contrib.context_for(target)
+    end
+  end
+
+  def test_context_for_returns_nil_when_no_context
+    target = Object.new
+
+    Braintrust::Contrib::Context.stub(:from, nil) do
+      assert_nil Braintrust::Contrib.context_for(target)
+    end
+  end
+
+  # --- tracer_provider_for ---
+
+  def test_tracer_provider_for_returns_context_provider_when_present
+    target = Object.new
+    context_provider = Object.new
+    mock_context = Minitest::Mock.new
+    mock_context.expect(:[], context_provider, [:tracer_provider])
+
+    Braintrust::Contrib::Context.stub(:from, mock_context) do
+      assert_same context_provider, Braintrust::Contrib.tracer_provider_for(target)
+    end
+
+    mock_context.verify
+  end
+
+  def test_tracer_provider_for_falls_back_to_default_when_no_context
+    target = Object.new
+    default_provider = Object.new
+
+    Braintrust::Contrib::Context.stub(:from, nil) do
+      Braintrust::Contrib.stub(:default_tracer_provider, default_provider) do
+        assert_same default_provider, Braintrust::Contrib.tracer_provider_for(target)
+      end
+    end
+  end
+
+  def test_tracer_provider_for_falls_back_when_context_has_no_provider
+    target = Object.new
+    default_provider = Object.new
+    mock_context = Minitest::Mock.new
+    mock_context.expect(:[], nil, [:tracer_provider])
+
+    Braintrust::Contrib::Context.stub(:from, mock_context) do
+      Braintrust::Contrib.stub(:default_tracer_provider, default_provider) do
+        assert_same default_provider, Braintrust::Contrib.tracer_provider_for(target)
+      end
+    end
+
+    mock_context.verify
+  end
+
+  # --- tracer_for ---
+
+  def test_tracer_for_gets_tracer_from_provider
+    target = Object.new
+    mock_tracer = Object.new
+    mock_provider = Minitest::Mock.new
+    mock_provider.expect(:tracer, mock_tracer, ["braintrust"])
+
+    Braintrust::Contrib.stub(:tracer_provider_for, mock_provider) do
+      assert_same mock_tracer, Braintrust::Contrib.tracer_for(target)
+    end
+
+    mock_provider.verify
+  end
+
+  def test_tracer_for_uses_custom_name
+    target = Object.new
+    mock_tracer = Object.new
+    mock_provider = Minitest::Mock.new
+    mock_provider.expect(:tracer, mock_tracer, ["custom-tracer"])
+
+    Braintrust::Contrib.stub(:tracer_provider_for, mock_provider) do
+      assert_same mock_tracer, Braintrust::Contrib.tracer_for(target, name: "custom-tracer")
+    end
+
+    mock_provider.verify
+  end
+
+  def test_tracer_for_uses_target_context_provider
+    target = Object.new
+    mock_tracer = Object.new
+    context_provider = Minitest::Mock.new
+    context_provider.expect(:tracer, mock_tracer, ["braintrust"])
+
+    mock_context = Minitest::Mock.new
+    mock_context.expect(:[], context_provider, [:tracer_provider])
+
+    Braintrust::Contrib::Context.stub(:from, mock_context) do
+      assert_same mock_tracer, Braintrust::Contrib.tracer_for(target)
+    end
+
+    context_provider.verify
+    mock_context.verify
+  end
+end
diff --git a/test/support/assert_helper.rb b/test/support/assert_helper.rb
index 7b943cb..8d1d725 100644
--- a/test/support/assert_helper.rb
+++ b/test/support/assert_helper.rb
@@ -1,47 +1,6 @@
 module Test
   module Support
     module AssertHelper
-      # Build a chain of mocks for nested method calls.
-      # Useful for testing code that traverses object hierarchies like `obj.foo.bar.baz`.
-      #
-      # Creates intermediate mocks that return each other in sequence, with the final
-      # mock returning the specified terminal value. All intermediate mocks are verified
-      # after the block. The caller is responsible for verifying the terminal value if needed.
-      #
-      # @param methods [Array<Symbol>] chain of method names to mock
-      # @param returns [Object] the value returned by the final method (can be a Class, Mock, or any object)
-      # @yield [root] the root mock (entry point to the chain)
-      #
-      # @example Testing a method chain with a Class as terminal
-      #   fake_singleton = Class.new { include SomeModule }
-      #   mock_chain(:chat, :completions, :singleton_class, returns: fake_singleton) do |client|
-      #     assert SomePatcher.patched?(target: client)
-      #   end
-      #
-      # @example Testing a method chain with a Mock as terminal
-      #   terminal = Minitest::Mock.new
-      #   terminal.expect(:include, true, [SomeModule])
-      #   mock_chain(:foo, :bar, returns: terminal) do |root|
-      #     SomeCode.do_something(root)
-      #   end
-      #   terminal.verify
-      #
-      def mock_chain(*methods, returns:)
-        current = returns
-        mocks = []
-
-        methods.reverse_each do |method|
-          mock = Minitest::Mock.new
-          mock.expect(method, current)
-          mocks.unshift(mock)
-          current = mock
-        end
-
-        yield(mocks.first)
-
-        mocks.each(&:verify)
-      end
-
       # Runs the test inside a fork, to isolate its side-effects from the main process.
       # Similar in purpose to https://docs.ruby-lang.org/en/master/Ruby/Box.html#class-Ruby::Box
       #
diff --git a/test/support/log_helper.rb b/test/support/log_helper.rb
new file mode 100644
index 0000000..4d8bf72
--- /dev/null
+++ b/test/support/log_helper.rb
@@ -0,0 +1,22 @@
+module Test
+  module Support
+    module LogHelper
+      # Suppress log output during block execution.
+      # Use for tests that deliberately cause errors/warnings.
+      #
+      # @yield Block to execute with logging suppressed
+      # @return Result of the block
+      #
+      # @example
+      #   suppress_logs { failing_patcher.patch! }
+      #
+      def suppress_logs
+        original_logger = Braintrust::Log.logger
+        Braintrust::Log.logger = Logger.new(File::NULL)
+        yield
+      ensure
+        Braintrust::Log.logger = original_logger
+      end
+    end
+  end
+end
diff --git a/test/support/mock_helper.rb b/test/support/mock_helper.rb
new file mode 100644
index 0000000..1cefdcc
--- /dev/null
+++ b/test/support/mock_helper.rb
@@ -0,0 +1,46 @@
+module Test
+  module Support
+    module MockHelper
+      # Build a chain of mocks for nested method calls.
+      # Useful for testing code that traverses object hierarchies like `obj.foo.bar.baz`.
+      #
+      # Creates intermediate mocks that return each other in sequence, with the final
+      # mock returning the specified terminal value. All intermediate mocks are verified
+      # after the block. The caller is responsible for verifying the terminal value if needed.
+      #
+      # @param methods [Array<Symbol>] chain of method names to mock
+      # @param returns [Object] the value returned by the final method (can be a Class, Mock, or any object)
+      # @yield [root] the root mock (entry point to the chain)
+      #
+      # @example Testing a method chain with a Class as terminal
+      #   fake_singleton = Class.new { include SomeModule }
+      #   mock_chain(:chat, :completions, :singleton_class, returns: fake_singleton) do |client|
+      #     assert SomePatcher.patched?(target: client)
+      #   end
+      #
+      # @example Testing a method chain with a Mock as terminal
+      #   terminal = Minitest::Mock.new
+      #   terminal.expect(:include, true, [SomeModule])
+      #   mock_chain(:foo, :bar, returns: terminal) do |root|
+      #     SomeCode.do_something(root)
+      #   end
+      #   terminal.verify
+      #
+      def mock_chain(*methods, returns:)
+        current = returns
+        mocks = []
+
+        methods.reverse_each do |method|
+          mock = Minitest::Mock.new
+          mock.expect(method, current)
+          mocks.unshift(mock)
+          current = mock
+        end
+
+        yield(mocks.first)
+
+        mocks.each(&:verify)
+      end
+    end
+  end
+end
diff --git a/test/test_helper.rb b/test/test_helper.rb
index 45d085b..7ef9f68 100644
--- a/test/test_helper.rb
+++ b/test/test_helper.rb
@@ -66,6 +66,8 @@
 require_relative "support/assert_helper"
 require_relative "support/braintrust_helper"
 require_relative "support/fixture_helper"
+require_relative "support/log_helper"
+require_relative "support/mock_helper"
 require_relative "support/provider_helper"
 require_relative "support/tracing_helper"
 
@@ -74,6 +76,8 @@ class Minitest::Test
   include ::Test::Support::AssertHelper
   include ::Test::Support::BraintrustHelper
   include ::Test::Support::FixtureHelper
+  include ::Test::Support::LogHelper
+  include ::Test::Support::MockHelper
   include ::Test::Support::ProviderHelper
   include ::Test::Support::TracingHelper
 

From a54d1c912aca46b51fa23b0cfbba2f8da06ecdac Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Mon, 15 Dec 2025 12:15:35 -0500
Subject: [PATCH 04/27] Added: Integration generator script

---
 CONTRIBUTING.md                           | 135 +++++++++++++++++-----
 Rakefile                                  | 121 +++++++++++++++++++
 templates/contrib/integration.rb.erb      |  59 ++++++++++
 templates/contrib/integration_test.rb.erb |  50 ++++++++
 templates/contrib/patcher.rb.erb          |  39 +++++++
 templates/contrib/patcher_test.rb.erb     |  32 +++++
 6 files changed, 408 insertions(+), 28 deletions(-)
 create mode 100644 templates/contrib/integration.rb.erb
 create mode 100644 templates/contrib/integration_test.rb.erb
 create mode 100644 templates/contrib/patcher.rb.erb
 create mode 100644 templates/contrib/patcher_test.rb.erb

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index f4bea9a..258fb38 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -79,65 +79,145 @@ All of our common dev tasks are in rake.
 rake -T 
 ```
 
+We use VCR for making http tests fast. You can run tests with these enabled,
+off, etc. If you add new tests you'll need to record new cassettes. See this
+for more details.
+
+```bash
+rake -T test:vcr
+```
+
 ### Adding new integrations for AI Libraries
 
-To add instrumentation support for a new AI library, follow these steps:
 
-#### 1. Define the Integration
+To add instrumentation support for a new library, use the integration generator:
+
+```bash
+rake contrib:generate NAME=trustybrain_llm AUTO_REGISTER=true
+```
+
+This will create the integration structure and optionally register it. You can also specify additional options:
+
+```bash
+rake contrib:generate NAME=trustybrain_llm \
+  GEM_NAMES=trustybrain_llm,trustybrain \
+  REQUIRE_PATHS=trustybrain \
+  MIN_VERSION=1.0.0 \
+  MAX_VERSION=2.0.0 \
+  AUTO_REGISTER=true
+```
+
+#### Manual Setup
+
+If you prefer to create the integration manually, follow these steps:
 
-Create a new file in `lib/braintrust/contrib/`:
+##### 1. Create the integration directory structure
+
+```bash
+mkdir -p lib/braintrust/contrib/trustybrain_llm
+mkdir -p test/braintrust/contrib/trustybrain_llm
+```
+
+##### 2. Define the integration
+
+Create `lib/braintrust/contrib/trustybrain_llm/integration.rb`:
 
 ```ruby
-# lib/braintrust/contrib/trustybrain_llm.rb
-module Braintrust::Contrib
-  class TrustybrainLLM
-    include Integration
+# frozen_string_literal: true
 
-    def self.integration_name
-      :trustybrain_llm
-    end
+require_relative "../integration"
 
-    def self.gem_names
-      ["trustybrain_llm"]
-    end
+module Braintrust
+  module Contrib
+    module TrustybrainLLM
+      class Integration
+        include Braintrust::Contrib::Integration
+
+        def self.integration_name
+          :trustybrain_llm
+        end
+
+        def self.gem_names
+          ["trustybrain_llm"]
+        end
 
-    def self.patcher
-      TrustybrainLLMPatcher
+        def self.loaded?
+          defined?(::TrustybrainLLM::Client) ? true : false
+        end
+
+        def self.patchers
+          require_relative "patcher"
+          [Patcher]
+        end
+      end
     end
   end
+end
+```
+
+##### 3. Create a patcher
 
-  class TrustybrainLLMPatcher < Patcher
-    def self.perform_patch(context)
-      # Add your instrumentation here
-      # context.tracer_provider gives you access to the tracer
+Create `lib/braintrust/contrib/trustybrain_llm/patcher.rb`:
+
+```ruby
+# frozen_string_literal: true
+
+require_relative "../patcher"
+
+module Braintrust
+  module Contrib
+    module TrustybrainLLM
+      class Patcher < Braintrust::Contrib::Patcher
+        class << self
+          def applicable?
+            defined?(::TrustybrainLLM::Client)
+          end
+
+          def perform_patch(**options)
+            ::TrustybrainLLM::Client.prepend(Instrumentation)
+          end
+        end
+
+        module Instrumentation
+          def chat(*args, **kwargs, &block)
+            Braintrust::Contrib.tracer_for(self).in_span("trustybrain_llm.chat") do
+              super
+            end
+          end
+        end
+      end
     end
   end
 end
 ```
 
-#### 2. Register It
+##### 4. Register it
 
 Add to `lib/braintrust/contrib.rb`:
 
 ```ruby
-require_relative "contrib/trustybrain_llm"
+require_relative "contrib/trustybrain_llm/integration"
 
 # At the bottom:
-Contrib::TrustybrainLLM.register!
+Contrib::TrustybrainLLM::Integration.register!
 ```
 
-#### 3. Write Tests
+##### 5. Add tests
 
-Create `test/braintrust/contrib/trustybrain_llm_test.rb`:
+Create test files in `test/braintrust/contrib/trustybrain_llm/`:
 
 ```ruby
+# test/braintrust/contrib/trustybrain_llm/integration_test.rb
 require "test_helper"
 
-class Braintrust::Contrib::TrustybrainLLMTest < Minitest::Test
+class Braintrust::Contrib::TrustybrainLLM::IntegrationTest < Minitest::Test
   def test_integration_basics
-    assert_equal :trustybrain_llm, TrustybrainLLM.integration_name
-    assert_equal ["trustybrain_llm"], TrustybrainLLM.gem_names
+    integration = Braintrust::Contrib::TrustybrainLLM::Integration
+    assert_equal :trustybrain_llm, integration.integration_name
+    assert_equal ["trustybrain_llm"], integration.gem_names
   end
+
+  # TODO: Add tests for patchers, availability, compatibility, and instrumentation
 end
 ```
 
@@ -177,4 +257,3 @@ mise use ruby@3.4
 ruby --version  # => 3.4.x
 bundle exec rake test
 ```
-
diff --git a/Rakefile b/Rakefile
index dde2310..9af3066 100644
--- a/Rakefile
+++ b/Rakefile
@@ -251,6 +251,127 @@ task release: ["release:publish", "release:github"] do
   puts "✓ Release completed successfully!"
 end
 
+# Contrib tasks
+namespace :contrib do
+  desc "Generate a new integration (NAME=name [GEM_NAMES=gem1,gem2] [REQUIRE_PATHS=path1,path2] [MIN_VERSION=1.0.0] [MAX_VERSION=2.0.0] [AUTO_REGISTER=true])"
+  task :generate do
+    require "erb"
+    require "fileutils"
+
+    # Parse parameters
+    name = ENV["NAME"]
+    unless name
+      puts "Error: NAME is required"
+      puts "Usage: rake contrib:generate NAME=trustybrain_llm [GEM_NAMES=trustybrain_llm] [AUTO_REGISTER=true]"
+      exit 1
+    end
+
+    # Convert name to snake_case if it's PascalCase
+    snake_case_name = name.gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2')
+      .gsub(/([a-z\d])([A-Z])/, '\1_\2')
+      .downcase
+
+    # Convert to PascalCase for module name
+    module_name = snake_case_name.split("_").map(&:capitalize).join
+
+    integration_name = snake_case_name.to_sym
+
+    # Parse optional parameters
+    gem_names = ENV["GEM_NAMES"]&.split(",") || [snake_case_name]
+    require_paths = ENV["REQUIRE_PATHS"]&.split(",") || gem_names
+    min_version = ENV["MIN_VERSION"]
+    max_version = ENV["MAX_VERSION"]
+    auto_register = ENV.fetch("AUTO_REGISTER", "false").downcase == "true"
+
+    # Display what will be generated
+    puts "\n=== Generating Integration ==="
+    puts "Name: #{module_name}"
+    puts "Integration name: :#{integration_name}"
+    puts "Gem names: #{gem_names.inspect}"
+    puts "Require paths: #{require_paths.inspect}" if require_paths != gem_names
+    puts "Min version: #{min_version}" if min_version
+    puts "Max version: #{max_version}" if max_version
+    puts
+
+    # Template binding
+    template_binding = binding
+
+    # Paths
+    integration_dir = "lib/braintrust/contrib/#{snake_case_name}"
+    test_dir = "test/braintrust/contrib/#{snake_case_name}"
+
+    # Create directories
+    FileUtils.mkdir_p(integration_dir)
+    FileUtils.mkdir_p(test_dir)
+
+    # Generate files
+    templates = {
+      "templates/contrib/integration.rb.erb" => "#{integration_dir}/integration.rb",
+      "templates/contrib/patcher.rb.erb" => "#{integration_dir}/patcher.rb",
+      "templates/contrib/integration_test.rb.erb" => "#{test_dir}/integration_test.rb",
+      "templates/contrib/patcher_test.rb.erb" => "#{test_dir}/patcher_test.rb"
+    }
+
+    templates.each do |template_path, output_path|
+      template = ERB.new(File.read(template_path), trim_mode: "-")
+      content = template.result(template_binding)
+      File.write(output_path, content)
+      puts "✓ Created #{output_path}"
+    end
+
+    # Auto-register if requested
+    if auto_register
+      contrib_file = "lib/braintrust/contrib.rb"
+      contrib_content = File.read(contrib_file)
+
+      # Find the position to insert (before the last "end" or after the last require)
+      insertion_point = if /^# Load integration stubs/.match?(contrib_content)
+        contrib_content.index("# Load integration stubs")
+      else
+        # Insert before the final module end
+        contrib_content.rindex("end")
+      end
+
+      require_line = "require_relative \"contrib/#{snake_case_name}/integration\""
+      register_line = "Contrib::#{module_name}::Integration.register!"
+
+      # Check if already registered
+      if contrib_content.include?(require_line)
+        puts "⚠ #{contrib_file} already contains this integration"
+      else
+        lines_to_add = [
+          "",
+          "# #{module_name}",
+          require_line,
+          register_line
+        ].join("\n")
+
+        contrib_content.insert(insertion_point, lines_to_add + "\n")
+        File.write(contrib_file, contrib_content)
+        puts "✓ Updated #{contrib_file}"
+      end
+    end
+
+    # Display next steps
+    puts "\n=== Next Steps ==="
+    unless auto_register
+      puts "1. Add to lib/braintrust/contrib.rb:"
+      puts "   require_relative \"contrib/#{snake_case_name}/integration\""
+      puts "   Contrib::#{module_name}::Integration.register!"
+      puts
+    end
+    puts "#{auto_register ? "1" : "2"}. Implement the patcher in:"
+    puts "   #{integration_dir}/patcher.rb"
+    puts
+    puts "#{auto_register ? "2" : "3"}. Add tests in:"
+    puts "   #{test_dir}/"
+    puts
+    puts "#{auto_register ? "3" : "4"}. Run tests:"
+    puts "   bundle exec rake test TEST=#{test_dir}/**/*_test.rb"
+    puts
+  end
+end
+
 # Version bump tasks
 def bump_version(type)
   version_file = "lib/braintrust/version.rb"
diff --git a/templates/contrib/integration.rb.erb b/templates/contrib/integration.rb.erb
new file mode 100644
index 0000000..864fe69
--- /dev/null
+++ b/templates/contrib/integration.rb.erb
@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+
+require_relative "../integration"
+
+module Braintrust
+  module Contrib
+    module <%= module_name %>
+      # Integration for <%= gem_names.join(", ") %>
+      class Integration
+        include Braintrust::Contrib::Integration
+
+        # @return [Symbol] Unique identifier for this integration
+        def self.integration_name
+          :<%= integration_name %>
+        end
+
+        # @return [Array<String>] Gem names this integration supports
+        def self.gem_names
+          <%= gem_names.inspect %>
+        end
+<% if require_paths != gem_names %>
+
+        # @return [Array<String>] Require paths for auto-instrument detection
+        def self.require_paths
+          <%= require_paths.inspect %>
+        end
+<% end %>
+<% if min_version %>
+
+        # @return [String] Minimum compatible version
+        def self.minimum_version
+          "<%= min_version %>"
+        end
+<% end %>
+<% if max_version %>
+
+        # @return [String] Maximum compatible version
+        def self.maximum_version
+          "<%= max_version %>"
+        end
+<% end %>
+
+        # Check if the library is actually loaded (constants are defined).
+        # @return [Boolean] true if the library module is defined
+        def self.loaded?
+          # TODO: Update this to check for your library's main module/class
+          defined?(::<%= module_name %>) ? true : false
+        end
+
+        # Lazy-load the patcher only when actually patching.
+        # @return [Array<Class>] The patcher classes
+        def self.patchers
+          require_relative "patcher"
+          [Patcher]
+        end
+      end
+    end
+  end
+end
diff --git a/templates/contrib/integration_test.rb.erb b/templates/contrib/integration_test.rb.erb
new file mode 100644
index 0000000..f7ce19d
--- /dev/null
+++ b/templates/contrib/integration_test.rb.erb
@@ -0,0 +1,50 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class Braintrust::Contrib::<%= module_name %>::IntegrationTest < Minitest::Test
+  def setup
+    @integration = Braintrust::Contrib::<%= module_name %>::Integration
+  end
+
+  def test_integration_name
+    assert_equal :<%= integration_name %>, @integration.integration_name
+  end
+
+  def test_gem_names
+    assert_equal <%= gem_names.inspect %>, @integration.gem_names
+  end
+<% if require_paths != gem_names %>
+
+  def test_require_paths
+    assert_equal <%= require_paths.inspect %>, @integration.require_paths
+  end
+<% end %>
+
+  def test_minimum_version
+<% if min_version %>
+    assert_equal "<%= min_version %>", @integration.minimum_version
+<% else %>
+    assert_nil @integration.minimum_version
+<% end %>
+  end
+
+  def test_maximum_version
+<% if max_version %>
+    assert_equal "<%= max_version %>", @integration.maximum_version
+<% else %>
+    assert_nil @integration.maximum_version
+<% end %>
+  end
+
+  def test_loaded
+    # loaded? should return true/false based on whether the library is loaded
+    assert [true, false].include?(@integration.loaded?)
+  end
+
+  def test_patchers
+    patchers = @integration.patchers
+    assert_kind_of Array, patchers
+    assert_includes patchers, Braintrust::Contrib::<%= module_name %>::Patcher
+  end
+end
diff --git a/templates/contrib/patcher.rb.erb b/templates/contrib/patcher.rb.erb
new file mode 100644
index 0000000..81a9043
--- /dev/null
+++ b/templates/contrib/patcher.rb.erb
@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+
+require_relative "../patcher"
+
+module Braintrust
+  module Contrib
+    module <%= module_name %>
+      # Patcher for <%= gem_names.join(", ") %>
+      class Patcher < Braintrust::Contrib::Patcher
+        class << self
+          # Check if this patcher should apply.
+          # @return [Boolean] true if target library is available
+          def applicable?
+            # TODO: Update to check if target library classes are defined
+            # Example: defined?(::SomeLibrary::Client)
+            true
+          end
+
+          # Perform the actual patching.
+          # @param options [Hash] Configuration options passed from integration
+          # @return [void]
+          def perform_patch(**options)
+            # TODO: Add your instrumentation here
+            # Example: ::SomeLibrary::Client.prepend(Instrumentation)
+          end
+        end
+
+        # Example instrumentation module (uncomment and modify for your integration)
+        # module Instrumentation
+        #   def some_method(*args, **kwargs, &block)
+        #     Braintrust::Contrib.tracer_for(self).in_span("somelibrary.some_method") do
+        #       super
+        #     end
+        #   end
+        # end
+      end
+    end
+  end
+end
diff --git a/templates/contrib/patcher_test.rb.erb b/templates/contrib/patcher_test.rb.erb
new file mode 100644
index 0000000..89b43f8
--- /dev/null
+++ b/templates/contrib/patcher_test.rb.erb
@@ -0,0 +1,32 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+# Explicitly load the patcher (lazy-loaded by integration)
+require "braintrust/contrib/<%= snake_case_name %>/patcher"
+
+class Braintrust::Contrib::<%= module_name %>::PatcherTest < Minitest::Test
+  def setup
+    @patcher = Braintrust::Contrib::<%= module_name %>::Patcher
+    @patcher.reset!
+  end
+
+  def teardown
+    @patcher.reset!
+  end
+
+  def test_inherits_from_base_patcher
+    assert @patcher < Braintrust::Contrib::Patcher
+  end
+
+  def test_implements_perform_patch
+    assert @patcher.respond_to?(:perform_patch)
+  end
+
+  def test_applicable
+    # TODO: Update once applicable? checks for target library
+    assert @patcher.applicable?
+  end
+
+  # TODO: Add tests for instrumented code
+end

From 21c71d1fe5681947261ec13525eca2e82c245605 Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Thu, 25 Dec 2025 10:07:32 -0500
Subject: [PATCH 05/27] Added: `test:contrib` task to run integration tests

---
 Rakefile | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Rakefile b/Rakefile
index 9af3066..f9e3885 100644
--- a/Rakefile
+++ b/Rakefile
@@ -98,6 +98,11 @@ task default: :ci
 
 # Test-related tasks
 namespace :test do
+  desc "Run only contrib framework tests"
+  task :contrib do
+    sh "bundle exec ruby -Ilib:test -e \"Dir.glob('test/braintrust/contrib{_test.rb,/**/*_test.rb}').each { |f| require_relative f }\""
+  end
+
   desc "Run tests with verbose timing output"
   task :verbose do
     ENV["MT_VERBOSE"] = "1"

From 4728a178ccb838004842dbad9a457fd67e574855 Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Thu, 8 Jan 2026 13:26:47 -0500
Subject: [PATCH 06/27] Added: Internal::Time#measure for accurately measuring
 time

---
 lib/braintrust/internal/time.rb       |  44 +++++++++++
 test/braintrust/internal/time_test.rb | 103 ++++++++++++++++++++++++++
 2 files changed, 147 insertions(+)
 create mode 100644 lib/braintrust/internal/time.rb
 create mode 100644 test/braintrust/internal/time_test.rb

diff --git a/lib/braintrust/internal/time.rb b/lib/braintrust/internal/time.rb
new file mode 100644
index 0000000..3020627
--- /dev/null
+++ b/lib/braintrust/internal/time.rb
@@ -0,0 +1,44 @@
+# frozen_string_literal: true
+
+module Braintrust
+  module Internal
+    # Time utilities using the monotonic clock for accurate duration measurements.
+    #
+    # Unlike Time.now, the monotonic clock is not affected by system clock adjustments
+    # (NTP updates, daylight saving, manual changes) and provides accurate elapsed time.
+    #
+    # @see https://blog.dnsimple.com/2018/03/elapsed-time-with-ruby-the-right-way/
+    module Time
+      # Measure elapsed time using the monotonic clock.
+      #
+      # Three modes of operation:
+      #
+      # 1. With a block: executes the block and returns elapsed time in seconds
+      #    elapsed = Time.measure { some_operation }
+      #
+      # 2. Without arguments: returns the current monotonic time (for later comparison)
+      #    start = Time.measure
+      #    # ... later ...
+      #    elapsed = Time.measure(start)
+      #
+      # 3. With a start_time argument: returns elapsed time since start_time
+      #    start = Time.measure
+      #    elapsed = Time.measure(start)
+      #
+      # @param start_time [Float, nil] Optional start time from a previous measure call
+      # @yield Optional block to measure
+      # @return [Float] Elapsed time in seconds, or current monotonic time if no args/block
+      def self.measure(start_time = nil)
+        if block_given?
+          start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+          yield
+          Process.clock_gettime(Process::CLOCK_MONOTONIC) - start
+        elsif start_time
+          Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
+        else
+          Process.clock_gettime(Process::CLOCK_MONOTONIC)
+        end
+      end
+    end
+  end
+end
diff --git a/test/braintrust/internal/time_test.rb b/test/braintrust/internal/time_test.rb
new file mode 100644
index 0000000..d049149
--- /dev/null
+++ b/test/braintrust/internal/time_test.rb
@@ -0,0 +1,103 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require "braintrust/internal/time"
+
+class Braintrust::Internal::TimeTest < Minitest::Test
+  Time = Braintrust::Internal::Time
+
+  # --- measure with block ---
+
+  def test_measure_with_block_returns_elapsed_time
+    elapsed = Time.measure { sleep 0.01 }
+
+    assert_kind_of Float, elapsed
+    assert elapsed >= 0.01, "Expected elapsed >= 0.01, got #{elapsed}"
+    assert elapsed < 0.1, "Expected elapsed < 0.1, got #{elapsed}"
+  end
+
+  def test_measure_with_block_executes_block
+    executed = false
+    Time.measure { executed = true }
+
+    assert executed, "Block should have been executed"
+  end
+
+  def test_measure_with_block_returns_time_not_block_result
+    result = Time.measure { "block result" }
+
+    assert_kind_of Float, result
+    refute_equal "block result", result
+  end
+
+  # --- measure without arguments ---
+
+  def test_measure_without_args_returns_monotonic_time
+    time1 = Time.measure
+    time2 = Time.measure
+
+    assert_kind_of Float, time1
+    assert_kind_of Float, time2
+    assert time2 >= time1, "Monotonic time should not go backwards"
+  end
+
+  def test_measure_without_args_returns_positive_value
+    time = Time.measure
+
+    assert time > 0, "Monotonic time should be positive"
+  end
+
+  # --- measure with start_time ---
+
+  def test_measure_with_start_time_returns_elapsed
+    start = Time.measure
+    sleep 0.01
+    elapsed = Time.measure(start)
+
+    assert_kind_of Float, elapsed
+    assert elapsed >= 0.01, "Expected elapsed >= 0.01, got #{elapsed}"
+    assert elapsed < 0.1, "Expected elapsed < 0.1, got #{elapsed}"
+  end
+
+  def test_measure_with_start_time_returns_non_negative
+    start = Time.measure
+    elapsed = Time.measure(start)
+
+    assert elapsed >= 0, "Elapsed time should be non-negative"
+  end
+
+  # --- monotonic behavior ---
+
+  def test_measure_is_monotonic
+    times = 10.times.map { Time.measure }
+
+    times.each_cons(2) do |t1, t2|
+      assert t2 >= t1, "Monotonic time should never decrease"
+    end
+  end
+
+  def test_measure_returns_seconds_as_float
+    start = Time.measure
+    sleep 0.05
+    elapsed = Time.measure(start)
+
+    # Should be roughly 0.05 seconds, not 50 milliseconds or 50_000_000 nanoseconds
+    assert elapsed >= 0.04, "Expected seconds, got #{elapsed} (too small, might be wrong unit)"
+    assert elapsed < 0.2, "Expected seconds, got #{elapsed} (too large, might be wrong unit)"
+  end
+
+  # --- equivalence of block and start_time modes ---
+
+  def test_block_and_start_time_modes_equivalent
+    # Both modes should measure approximately the same duration
+    block_elapsed = Time.measure { sleep 0.02 }
+
+    start = Time.measure
+    sleep 0.02
+    manual_elapsed = Time.measure(start)
+
+    # Both should be close to 0.02 seconds
+    assert_in_delta block_elapsed, manual_elapsed, 0.01,
+      "Block and start_time modes should produce similar results"
+  end
+end

From 6076afe1e97f152f6f113ac0bf70bc1f6a6040fb Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Thu, 18 Dec 2025 19:47:11 -0500
Subject: [PATCH 07/27] Changed: Migrated openai to new integration API

---
 Appraisals                                    |    6 +
 README.md                                     |    5 +-
 Rakefile                                      |    7 +
 .../{openai.rb => contrib/openai/basic.rb}    |   11 +-
 examples/contrib/openai/deprecated.rb         |   72 +
 examples/contrib/openai/instance.rb           |   57 +
 examples/contrib/openai/streaming.rb          |   77 ++
 .../{openai.rb => contrib/openai/golden.rb}   |    8 +-
 examples/internal/eval-parallel-benchmark.rb  |    4 +-
 examples/internal/kitchen-sink.rb             |    4 +-
 gemfiles/openai_ruby_openai.gemfile           |   10 +
 lib/braintrust/contrib.rb                     |    5 +-
 lib/braintrust/contrib/openai/deprecated.rb   |   22 +
 .../contrib/openai/instrumentation/chat.rb    |  296 ++++
 .../contrib/openai/instrumentation/common.rb  |  164 +++
 .../openai/instrumentation/responses.rb       |  185 +++
 lib/braintrust/contrib/openai/integration.rb  |   58 +
 lib/braintrust/contrib/openai/patcher.rb      |  130 ++
 lib/braintrust/trace/contrib/openai.rb        |  611 ---------
 .../contrib/openai/deprecated_test.rb         |   43 +
 .../openai/instrumentation/chat_test.rb       |  477 +++++++
 .../openai/instrumentation/common_test.rb     |  184 +++
 .../openai/instrumentation/responses_test.rb  |  323 +++++
 .../contrib/openai/instrumentation_test.rb    |  161 +++
 .../contrib/openai/integration_helper.rb      |   33 +
 .../contrib/openai/integration_test.rb        |   64 +
 .../braintrust/contrib/openai/patcher_test.rb |  180 +++
 .../contrib/openai/ruby-openai_test.rb        |   69 +
 test/braintrust/trace/openai_test.rb          | 1221 -----------------
 .../openai/responses_create_error.yml         |   93 ++
 .../openai/responses_stream_error.yml         |   93 ++
 .../openai/responses_with_metadata.yml        |  171 +++
 .../responses_with_previous_response_id.yml   |  333 +++++
 .../openai/responses_with_reasoning.yml       |  173 +++
 .../openai/responses_with_truncation.yml      |  168 +++
 .../openai/streaming_chat_completions.yml     |  130 ++
 .../openai/streaming_multiple_choices.yml     |  125 ++
 37 files changed, 3925 insertions(+), 1848 deletions(-)
 rename examples/{openai.rb => contrib/openai/basic.rb} (86%)
 create mode 100644 examples/contrib/openai/deprecated.rb
 create mode 100644 examples/contrib/openai/instance.rb
 create mode 100644 examples/contrib/openai/streaming.rb
 rename examples/internal/{openai.rb => contrib/openai/golden.rb} (98%)
 create mode 100644 gemfiles/openai_ruby_openai.gemfile
 create mode 100644 lib/braintrust/contrib/openai/deprecated.rb
 create mode 100644 lib/braintrust/contrib/openai/instrumentation/chat.rb
 create mode 100644 lib/braintrust/contrib/openai/instrumentation/common.rb
 create mode 100644 lib/braintrust/contrib/openai/instrumentation/responses.rb
 create mode 100644 lib/braintrust/contrib/openai/integration.rb
 create mode 100644 lib/braintrust/contrib/openai/patcher.rb
 delete mode 100644 lib/braintrust/trace/contrib/openai.rb
 create mode 100644 test/braintrust/contrib/openai/deprecated_test.rb
 create mode 100644 test/braintrust/contrib/openai/instrumentation/chat_test.rb
 create mode 100644 test/braintrust/contrib/openai/instrumentation/common_test.rb
 create mode 100644 test/braintrust/contrib/openai/instrumentation/responses_test.rb
 create mode 100644 test/braintrust/contrib/openai/instrumentation_test.rb
 create mode 100644 test/braintrust/contrib/openai/integration_helper.rb
 create mode 100644 test/braintrust/contrib/openai/integration_test.rb
 create mode 100644 test/braintrust/contrib/openai/patcher_test.rb
 create mode 100644 test/braintrust/contrib/openai/ruby-openai_test.rb
 delete mode 100644 test/braintrust/trace/openai_test.rb
 create mode 100644 test/fixtures/vcr_cassettes/openai/responses_create_error.yml
 create mode 100644 test/fixtures/vcr_cassettes/openai/responses_stream_error.yml
 create mode 100644 test/fixtures/vcr_cassettes/openai/responses_with_metadata.yml
 create mode 100644 test/fixtures/vcr_cassettes/openai/responses_with_previous_response_id.yml
 create mode 100644 test/fixtures/vcr_cassettes/openai/responses_with_reasoning.yml
 create mode 100644 test/fixtures/vcr_cassettes/openai/responses_with_truncation.yml
 create mode 100644 test/fixtures/vcr_cassettes/openai/streaming_chat_completions.yml
 create mode 100644 test/fixtures/vcr_cassettes/openai/streaming_multiple_choices.yml

diff --git a/Appraisals b/Appraisals
index 022a2fb..9f1df2d 100644
--- a/Appraisals
+++ b/Appraisals
@@ -63,3 +63,9 @@ appraise "opentelemetry-latest" do
   gem "opentelemetry-sdk", ">= 1.10"
   gem "opentelemetry-exporter-otlp", ">= 0.31"
 end
+
+# Both OpenAI gems installed - tests that loaded? correctly distinguishes them
+appraise "openai-ruby-openai" do
+  gem "openai", ">= 0.34"
+  gem "ruby-openai", ">= 8.0"
+end
diff --git a/README.md b/README.md
index 5da0707..13c8f91 100644
--- a/README.md
+++ b/README.md
@@ -111,7 +111,10 @@ Braintrust.init
 
 client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"])
 
-Braintrust::Trace::OpenAI.wrap(client)
+# Instrument all clients
+Braintrust.instrument!(:openai)
+# OR instrument a single client
+Braintrust.instrument!(:openai, target: client)
 
 tracer = OpenTelemetry.tracer_provider.tracer("openai-app")
 root_span = nil
diff --git a/Rakefile b/Rakefile
index f9e3885..8bd2b54 100644
--- a/Rakefile
+++ b/Rakefile
@@ -103,6 +103,13 @@ namespace :test do
     sh "bundle exec ruby -Ilib:test -e \"Dir.glob('test/braintrust/contrib{_test.rb,/**/*_test.rb}').each { |f| require_relative f }\""
   end
 
+  namespace :contrib do
+    desc "Run OpenAI contrib tests"
+    task :openai do
+      sh "bundle exec appraisal openai ruby -Ilib:test -e \"Dir.glob('test/braintrust/contrib/openai/**/*_test.rb').each { |f| require_relative f }\""
+    end
+  end
+
   desc "Run tests with verbose timing output"
   task :verbose do
     ENV["MT_VERBOSE"] = "1"
diff --git a/examples/openai.rb b/examples/contrib/openai/basic.rb
similarity index 86%
rename from examples/openai.rb
rename to examples/contrib/openai/basic.rb
index 246ff20..d32371b 100644
--- a/examples/openai.rb
+++ b/examples/contrib/openai/basic.rb
@@ -12,10 +12,10 @@
 #
 # Note: The openai gem is a development dependency. To run this example:
 #   1. Install dependencies: bundle install
-#   2. Run from the SDK root: bundle exec ruby examples/openai.rb
+#   2. Run from the SDK root: bundle exec ruby examples/contrib/openai/basic.rb
 #
 # Usage:
-#   OPENAI_API_KEY=your-openai-key bundle exec ruby examples/openai.rb
+#   OPENAI_API_KEY=your-openai-key bundle exec ruby examples/contrib/openai/basic.rb
 
 # Check for API keys
 unless ENV["OPENAI_API_KEY"]
@@ -24,21 +24,20 @@
   exit 1
 end
 
+# Instrument OpenAI
 Braintrust.init(blocking_login: true)
+Braintrust.instrument!(:openai)
 
 # Create OpenAI client
 client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"])
 
-# Wrap the client with Braintrust tracing
-Braintrust::Trace::OpenAI.wrap(client)
-
 # Create a root span to capture the entire operation
 tracer = OpenTelemetry.tracer_provider.tracer("openai-example")
 root_span = nil
 
 # Make a chat completion request (automatically traced!)
 puts "Sending chat completion request to OpenAI..."
-response = tracer.in_span("examples/openai.rb") do |span|
+response = tracer.in_span("examples/contrib/openai/basic.rb") do |span|
   root_span = span
 
   client.chat.completions.create(
diff --git a/examples/contrib/openai/deprecated.rb b/examples/contrib/openai/deprecated.rb
new file mode 100644
index 0000000..872242f
--- /dev/null
+++ b/examples/contrib/openai/deprecated.rb
@@ -0,0 +1,72 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "openai"
+require "opentelemetry/sdk"
+
+# Example: OpenAI chat completion with Braintrust tracing
+#
+# This example demonstrates how to automatically trace OpenAI API calls with Braintrust.
+#
+# Note: The openai gem is a development dependency. To run this example:
+#   1. Install dependencies: bundle install
+#   2. Run from the SDK root: bundle exec ruby examples/contrib/openai/deprecated.rb
+#
+# Usage:
+#   OPENAI_API_KEY=your-openai-key bundle exec ruby examples/contrib/openai/deprecated.rb
+
+# Check for API keys
+unless ENV["OPENAI_API_KEY"]
+  puts "Error: OPENAI_API_KEY environment variable is required"
+  puts "Get your API key from: https://platform.openai.com/api-keys"
+  exit 1
+end
+
+Braintrust.init(blocking_login: true)
+
+# Create OpenAI client
+client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"])
+
+# Wrap the client with Braintrust tracing
+# DEPRECATED: Use `Braintrust.instrument!(:openai, target: client)` instead
+Braintrust::Trace::OpenAI.wrap(client)
+
+# Create a root span to capture the entire operation
+tracer = OpenTelemetry.tracer_provider.tracer("openai-example")
+root_span = nil
+
+# Make a chat completion request (automatically traced!)
+puts "Sending chat completion request to OpenAI..."
+response = tracer.in_span("examples/contrib/openai/deprecated.rb") do |span|
+  root_span = span
+
+  client.chat.completions.create(
+    messages: [
+      {role: "system", content: "You are a helpful assistant."},
+      {role: "user", content: "Say hello and tell me a short joke."}
+    ],
+    model: "gpt-4o-mini",
+    max_tokens: 100
+  )
+end
+
+# Print the response
+puts "\n✓ Response received!"
+puts "\nAssistant: #{response.choices[0].message.content}"
+
+# Print usage stats
+puts "\nToken usage:"
+puts "  Prompt tokens: #{response.usage.prompt_tokens}"
+puts "  Completion tokens: #{response.usage.completion_tokens}"
+puts "  Total tokens: #{response.usage.total_tokens}"
+
+# Print permalink to view this trace in Braintrust
+puts "\n✓ View this trace in Braintrust:"
+puts "  #{Braintrust::Trace.permalink(root_span)}"
+
+# Shutdown to flush spans to Braintrust
+OpenTelemetry.tracer_provider.shutdown
+
+puts "\n✓ Trace sent to Braintrust!"
diff --git a/examples/contrib/openai/instance.rb b/examples/contrib/openai/instance.rb
new file mode 100644
index 0000000..6516557
--- /dev/null
+++ b/examples/contrib/openai/instance.rb
@@ -0,0 +1,57 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "openai"
+require "opentelemetry/sdk"
+
+# Example: Instance-level OpenAI instrumentation
+#
+# This shows how to instrument a specific client instance rather than
+# all OpenAI clients globally.
+#
+# Usage:
+#   OPENAI_API_KEY=your-key bundle exec ruby examples/contrib/openai/instance.rb
+
+unless ENV["OPENAI_API_KEY"]
+  puts "Error: OPENAI_API_KEY environment variable is required"
+  exit 1
+end
+
+Braintrust.init(blocking_login: true)
+
+# Create two client instances
+client_traced = ::OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"])
+client_untraced = ::OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"])
+
+# Only instrument one of them
+Braintrust.instrument!(:openai, target: client_traced)
+
+tracer = OpenTelemetry.tracer_provider.tracer("openai-example")
+root_span = nil
+
+tracer.in_span("examples/contrib/openai/instance.rb") do |span|
+  root_span = span
+
+  puts "Calling traced client..."
+  response1 = client_traced.chat.completions.create(
+    messages: [{role: "user", content: "Say 'traced'"}],
+    model: "gpt-4o-mini",
+    max_tokens: 10
+  )
+  puts "Traced response: #{response1.choices[0].message.content}"
+
+  puts "\nCalling untraced client..."
+  response2 = client_untraced.chat.completions.create(
+    messages: [{role: "user", content: "Say 'untraced'"}],
+    model: "gpt-4o-mini",
+    max_tokens: 10
+  )
+  puts "Untraced response: #{response2.choices[0].message.content}"
+end
+
+puts "\nView trace: #{Braintrust::Trace.permalink(root_span)}"
+puts "(Only the first client call should have an openai.chat span)"
+
+OpenTelemetry.tracer_provider.shutdown
diff --git a/examples/contrib/openai/streaming.rb b/examples/contrib/openai/streaming.rb
new file mode 100644
index 0000000..5c4dd89
--- /dev/null
+++ b/examples/contrib/openai/streaming.rb
@@ -0,0 +1,77 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "openai"
+require "opentelemetry/sdk"
+
+# Example: OpenAI streaming with Braintrust tracing
+#
+# This example demonstrates how to trace streaming OpenAI API calls.
+# Shows two streaming patterns:
+#   1. Chat completions with stream_raw
+#   2. Responses API with stream
+#
+# Usage:
+#   OPENAI_API_KEY=your-key bundle exec ruby examples/contrib/openai/streaming.rb
+
+unless ENV["OPENAI_API_KEY"]
+  puts "Error: OPENAI_API_KEY environment variable is required"
+  puts "Get your API key from: https://platform.openai.com/api-keys"
+  exit 1
+end
+
+Braintrust.init(blocking_login: true)
+Braintrust.instrument!(:openai)
+
+client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"])
+tracer = OpenTelemetry.tracer_provider.tracer("openai-streaming-example")
+root_span = nil
+
+tracer.in_span("examples/contrib/openai/streaming.rb") do |span|
+  root_span = span
+
+  # Pattern 1: Chat completions streaming with stream_raw
+  # Returns an iterator that yields chunks
+  puts "=== Pattern 1: Chat completions with stream_raw ==="
+  print "Assistant: "
+
+  stream = client.chat.completions.stream_raw(
+    model: "gpt-4o-mini",
+    max_tokens: 100,
+    messages: [{role: "user", content: "Count from 1 to 5, one number per line."}],
+    stream_options: {include_usage: true}
+  )
+
+  stream.each do |chunk|
+    if chunk.choices&.first&.delta&.content
+      print chunk.choices.first.delta.content
+    end
+  end
+  puts "\n"
+
+  # Pattern 2: Responses API streaming
+  # Returns an iterator that yields events
+  puts "=== Pattern 2: Responses API with stream ==="
+  print "Assistant: "
+
+  stream = client.responses.stream(
+    model: "gpt-4o-mini",
+    input: "Name 3 colors."
+  )
+
+  stream.each do |event|
+    if event.type == :"response.output_text.delta"
+      print event.delta
+    end
+  end
+  puts "\n"
+end
+
+puts "\nView this trace in Braintrust:"
+puts "  #{Braintrust::Trace.permalink(root_span)}"
+
+OpenTelemetry.tracer_provider.shutdown
+
+puts "\nTrace sent to Braintrust!"
diff --git a/examples/internal/openai.rb b/examples/internal/contrib/openai/golden.rb
similarity index 98%
rename from examples/internal/openai.rb
rename to examples/internal/contrib/openai/golden.rb
index 0c66b12..0e2ed98 100755
--- a/examples/internal/openai.rb
+++ b/examples/internal/contrib/openai/golden.rb
@@ -27,7 +27,7 @@
 # This example validates that the Ruby SDK captures the same data as TypeScript/Go SDKs.
 #
 # Usage:
-#   OPENAI_API_KEY=key bundle exec ruby examples/internal/openai.rb
+#   OPENAI_API_KEY=key bundle exec ruby examples/internal/contrib/openai/golden.rb
 
 unless ENV["OPENAI_API_KEY"]
   puts "Error: OPENAI_API_KEY environment variable is required"
@@ -42,16 +42,16 @@
 # Get a tracer for this example
 tracer = OpenTelemetry.tracer_provider.tracer("openai-comprehensive-example")
 
-# Create OpenAI client and wrap it
+# Create OpenAI client and instrument it
 client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"])
-Braintrust::Trace::OpenAI.wrap(client)
+Braintrust.instrument!(:openai)
 
 puts "OpenAI Comprehensive Features Example"
 puts "=" * 50
 
 # Wrap all examples under a single parent trace
 root_span = nil
-tracer.in_span("examples/internal/openai.rb") do |span|
+tracer.in_span("examples/internal/contrib/openai/golden.rb") do |span|
   root_span = span
   # Example 1: Vision - Image Understanding
   puts "\n1. Vision (Image Understanding)"
diff --git a/examples/internal/eval-parallel-benchmark.rb b/examples/internal/eval-parallel-benchmark.rb
index ba10525..c6a1683 100644
--- a/examples/internal/eval-parallel-benchmark.rb
+++ b/examples/internal/eval-parallel-benchmark.rb
@@ -105,9 +105,9 @@ def self.test_cases
 # Initialize Braintrust
 Braintrust.init(blocking_login: true)
 
-# Create OpenAI client and wrap for tracing
+# Create OpenAI client and instrument for tracing
 client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"])
-Braintrust::Trace::OpenAI.wrap(client)
+Braintrust.instrument!(:openai)
 
 # Task: Call OpenAI to classify fruit/vegetable
 task = ->(input) {
diff --git a/examples/internal/kitchen-sink.rb b/examples/internal/kitchen-sink.rb
index 34d44ea..45ecd93 100755
--- a/examples/internal/kitchen-sink.rb
+++ b/examples/internal/kitchen-sink.rb
@@ -29,8 +29,8 @@
 # Create OpenAI client
 openai_client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"])
 
-# Wrap the client with Braintrust tracing
-Braintrust::Trace::OpenAI.wrap(openai_client)
+# Instrument OpenAI with Braintrust tracing
+Braintrust.instrument!(:openai)
 
 puts "Kitchen Sink Eval Example"
 puts "=" * 60
diff --git a/gemfiles/openai_ruby_openai.gemfile b/gemfiles/openai_ruby_openai.gemfile
new file mode 100644
index 0000000..a8db595
--- /dev/null
+++ b/gemfiles/openai_ruby_openai.gemfile
@@ -0,0 +1,10 @@
+# This file was generated by Appraisal
+
+source "https://rubygems.org"
+
+gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
+gem "openai", ">= 0.34"
+gem "ruby-openai", ">= 8.0"
+
+gemspec path: "../"
diff --git a/lib/braintrust/contrib.rb b/lib/braintrust/contrib.rb
index f19c4fb..66746dc 100644
--- a/lib/braintrust/contrib.rb
+++ b/lib/braintrust/contrib.rb
@@ -99,4 +99,7 @@ def tracer_for(target, name: "braintrust")
 end
 
 # Load integration stubs (eager load minimal metadata).
-# These will be added in subsequent milestones.
+require_relative "contrib/openai/integration"
+
+# Register integrations
+Braintrust::Contrib::OpenAI::Integration.register!
diff --git a/lib/braintrust/contrib/openai/deprecated.rb b/lib/braintrust/contrib/openai/deprecated.rb
new file mode 100644
index 0000000..ab361cf
--- /dev/null
+++ b/lib/braintrust/contrib/openai/deprecated.rb
@@ -0,0 +1,22 @@
+# frozen_string_literal: true
+
+# Backward compatibility shim for the old OpenAI integration API.
+# This file now just delegates to the new API.
+
+module Braintrust
+  module Trace
+    module OpenAI
+      # Wrap an OpenAI::Client to automatically create spans for chat completions and responses.
+      # This is the legacy API - delegates to the new contrib framework.
+      #
+      # @param client [OpenAI::Client] the OpenAI client to wrap
+      # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider (defaults to global)
+      # @return [OpenAI::Client] the wrapped client
+      def self.wrap(client, tracer_provider: nil)
+        Log.warn("Braintrust::Trace::OpenAI.wrap() is deprecated and will be removed in a future version: use Braintrust.instrument!() instead.")
+        Braintrust.instrument!(:openai, target: client, tracer_provider: tracer_provider)
+        client
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/openai/instrumentation/chat.rb b/lib/braintrust/contrib/openai/instrumentation/chat.rb
new file mode 100644
index 0000000..50ea6e2
--- /dev/null
+++ b/lib/braintrust/contrib/openai/instrumentation/chat.rb
@@ -0,0 +1,296 @@
+# frozen_string_literal: true
+
+require "opentelemetry/sdk"
+require "json"
+
+require_relative "common"
+require_relative "../../../internal/time"
+
+module Braintrust
+  module Contrib
+    module OpenAI
+      module Instrumentation
+        # Chat completions instrumentation for OpenAI.
+        # Wraps create(), stream(), and stream_raw() methods to create spans.
+        module Chat
+          module Completions
+            def self.included(base)
+              base.prepend(InstanceMethods) unless applied?(base)
+            end
+
+            def self.applied?(base)
+              base.ancestors.include?(InstanceMethods)
+            end
+
+            METADATA_FIELDS = %i[
+              model frequency_penalty logit_bias logprobs max_tokens n
+              presence_penalty response_format seed service_tier stop
+              stream stream_options temperature top_p top_logprobs
+              tools tool_choice parallel_tool_calls user functions function_call
+            ].freeze
+
+            module InstanceMethods
+              # Wrap create method for non-streaming completions
+              def create(**params)
+                client = instance_variable_get(:@client)
+                tracer = Braintrust::Contrib.tracer_for(client)
+
+                tracer.in_span("Chat Completion") do |span|
+                  metadata = build_metadata(params)
+
+                  set_input(span, params)
+
+                  response = nil
+                  time_to_first_token = Braintrust::Internal::Time.measure do
+                    response = super
+                  end
+
+                  set_output(span, response)
+                  set_metrics(span, response, time_to_first_token)
+                  finalize_metadata(span, metadata, response)
+
+                  response
+                end
+              end
+
+              # Wrap stream_raw for streaming chat completions (returns Internal::Stream)
+              # Stores context on stream object for span creation during consumption
+              def stream_raw(**params)
+                client = instance_variable_get(:@client)
+                tracer = Braintrust::Contrib.tracer_for(client)
+                metadata = build_metadata(params, stream: true)
+
+                stream_obj = super
+                Braintrust::Contrib::Context.set!(stream_obj,
+                  tracer: tracer,
+                  params: params,
+                  metadata: metadata,
+                  completions_instance: self,
+                  stream_type: :raw)
+                stream_obj
+              end
+
+              # Wrap stream for streaming chat completions (returns ChatCompletionStream)
+              # Stores context on stream object for span creation during consumption
+              def stream(**params)
+                client = instance_variable_get(:@client)
+                tracer = Braintrust::Contrib.tracer_for(client)
+                metadata = build_metadata(params, stream: true)
+
+                stream_obj = super
+                Braintrust::Contrib::Context.set!(stream_obj,
+                  tracer: tracer,
+                  params: params,
+                  metadata: metadata,
+                  completions_instance: self,
+                  stream_type: :chat_completion)
+                stream_obj
+              end
+
+              private
+
+              def build_metadata(params, stream: false)
+                metadata = {
+                  "provider" => "openai",
+                  "endpoint" => "/v1/chat/completions"
+                }
+                metadata["stream"] = true if stream
+                Completions::METADATA_FIELDS.each do |field|
+                  metadata[field.to_s] = params[field] if params.key?(field)
+                end
+                metadata
+              end
+
+              def set_input(span, params)
+                return unless params[:messages]
+
+                messages_array = params[:messages].map(&:to_h)
+                Common.set_json_attr(span, "braintrust.input_json", messages_array)
+              end
+
+              def set_output(span, response)
+                return unless response.respond_to?(:choices) && response.choices&.any?
+
+                choices_array = response.choices.map(&:to_h)
+                Common.set_json_attr(span, "braintrust.output_json", choices_array)
+              end
+
+              def set_metrics(span, response, time_to_first_token)
+                metrics = {}
+                if response.respond_to?(:usage) && response.usage
+                  metrics = Common.parse_usage_tokens(response.usage)
+                end
+                metrics["time_to_first_token"] = time_to_first_token
+                Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
+              end
+
+              def finalize_metadata(span, metadata, response)
+                metadata["id"] = response.id if response.respond_to?(:id) && response.id
+                metadata["created"] = response.created if response.respond_to?(:created) && response.created
+                metadata["model"] = response.model if response.respond_to?(:model) && response.model
+                metadata["system_fingerprint"] = response.system_fingerprint if response.respond_to?(:system_fingerprint) && response.system_fingerprint
+                metadata["service_tier"] = response.service_tier if response.respond_to?(:service_tier) && response.service_tier
+                Common.set_json_attr(span, "braintrust.metadata", metadata)
+              end
+            end
+          end
+
+          # Instrumentation for ChatCompletionStream (returned by stream())
+          # Uses current_completion_snapshot for accumulated output
+          module ChatCompletionStream
+            def self.included(base)
+              base.prepend(InstanceMethods) unless applied?(base)
+            end
+
+            def self.applied?(base)
+              base.ancestors.include?(InstanceMethods)
+            end
+
+            module InstanceMethods
+              def each(&block)
+                ctx = Braintrust::Contrib::Context.from(self)
+                return super unless ctx&.[](:tracer) && !ctx[:consumed]
+
+                trace_consumption(ctx) { super(&block) }
+              end
+
+              def text
+                ctx = Braintrust::Contrib::Context.from(self)
+                return super unless ctx&.[](:tracer) && !ctx[:consumed]
+
+                original_enum = super
+                Enumerator.new do |y|
+                  trace_consumption(ctx) do
+                    original_enum.each { |t| y << t }
+                  end
+                end
+              end
+
+              private
+
+              def trace_consumption(ctx)
+                ctx[:consumed] = true
+
+                tracer = ctx[:tracer]
+                params = ctx[:params]
+                metadata = ctx[:metadata]
+                completions_instance = ctx[:completions_instance]
+                start_time = Braintrust::Internal::Time.measure
+
+                tracer.in_span("Chat Completion") do |span|
+                  completions_instance.send(:set_input, span, params)
+                  Common.set_json_attr(span, "braintrust.metadata", metadata)
+
+                  yield
+
+                  finalize_stream_span(span, start_time, metadata, completions_instance)
+                end
+              end
+
+              def finalize_stream_span(span, start_time, metadata, completions_instance)
+                time_to_first_token = Braintrust::Internal::Time.measure(start_time)
+
+                begin
+                  snapshot = current_completion_snapshot
+                  return unless snapshot
+
+                  # Set output from accumulated choices
+                  if snapshot.choices&.any?
+                    choices_array = snapshot.choices.map(&:to_h)
+                    Common.set_json_attr(span, "braintrust.output_json", choices_array)
+                  end
+
+                  # Set metrics
+                  metrics = {}
+                  if snapshot.usage
+                    metrics = Common.parse_usage_tokens(snapshot.usage)
+                  end
+                  metrics["time_to_first_token"] = time_to_first_token
+                  Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
+
+                  # Update metadata with response fields
+                  metadata["id"] = snapshot.id if snapshot.respond_to?(:id) && snapshot.id
+                  metadata["created"] = snapshot.created if snapshot.respond_to?(:created) && snapshot.created
+                  metadata["model"] = snapshot.model if snapshot.respond_to?(:model) && snapshot.model
+                  metadata["system_fingerprint"] = snapshot.system_fingerprint if snapshot.respond_to?(:system_fingerprint) && snapshot.system_fingerprint
+                  metadata["service_tier"] = snapshot.service_tier if snapshot.respond_to?(:service_tier) && snapshot.service_tier
+                  Common.set_json_attr(span, "braintrust.metadata", metadata)
+                rescue => e
+                  Braintrust::Log.debug("Failed to get completion snapshot: #{e.message}")
+                end
+              end
+            end
+          end
+
+          # Instrumentation for Internal::Stream (returned by stream_raw())
+          # Aggregates chunks manually since Internal::Stream has no built-in accumulation
+          module InternalStream
+            def self.included(base)
+              base.prepend(InstanceMethods) unless applied?(base)
+            end
+
+            def self.applied?(base)
+              base.ancestors.include?(InstanceMethods)
+            end
+
+            module InstanceMethods
+              def each(&block)
+                ctx = Braintrust::Contrib::Context.from(self)
+                # Only trace if context present and is for chat completions (not other endpoints)
+                return super unless ctx&.[](:tracer) && !ctx[:consumed] && ctx[:stream_type] == :raw
+
+                ctx[:consumed] = true
+
+                tracer = ctx[:tracer]
+                params = ctx[:params]
+                metadata = ctx[:metadata]
+                completions_instance = ctx[:completions_instance]
+                aggregated_chunks = []
+                start_time = Braintrust::Internal::Time.measure
+                time_to_first_token = nil
+
+                tracer.in_span("Chat Completion") do |span|
+                  completions_instance.send(:set_input, span, params)
+                  Common.set_json_attr(span, "braintrust.metadata", metadata)
+
+                  super do |chunk|
+                    time_to_first_token ||= Braintrust::Internal::Time.measure(start_time)
+                    aggregated_chunks << chunk.to_h
+                    block&.call(chunk)
+                  end
+
+                  finalize_stream_span(span, aggregated_chunks, time_to_first_token, metadata)
+                end
+              end
+
+              private
+
+              def finalize_stream_span(span, aggregated_chunks, time_to_first_token, metadata)
+                return if aggregated_chunks.empty?
+
+                aggregated_output = Common.aggregate_streaming_chunks(aggregated_chunks)
+                Common.set_json_attr(span, "braintrust.output_json", aggregated_output[:choices])
+
+                # Set metrics
+                metrics = {}
+                if aggregated_output[:usage]
+                  metrics = Common.parse_usage_tokens(aggregated_output[:usage])
+                end
+                metrics["time_to_first_token"] = time_to_first_token
+                Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
+
+                # Update metadata with response fields
+                metadata["id"] = aggregated_output[:id] if aggregated_output[:id]
+                metadata["created"] = aggregated_output[:created] if aggregated_output[:created]
+                metadata["model"] = aggregated_output[:model] if aggregated_output[:model]
+                metadata["system_fingerprint"] = aggregated_output[:system_fingerprint] if aggregated_output[:system_fingerprint]
+                metadata["service_tier"] = aggregated_output[:service_tier] if aggregated_output[:service_tier]
+                Common.set_json_attr(span, "braintrust.metadata", metadata)
+              end
+            end
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/openai/instrumentation/common.rb b/lib/braintrust/contrib/openai/instrumentation/common.rb
new file mode 100644
index 0000000..039353b
--- /dev/null
+++ b/lib/braintrust/contrib/openai/instrumentation/common.rb
@@ -0,0 +1,164 @@
+# frozen_string_literal: true
+
+require "json"
+
+require_relative "../../../trace/tokens"
+
+module Braintrust
+  module Contrib
+    module OpenAI
+      module Instrumentation
+        # Chat completions instrumentation for OpenAI.
+        # Provides modules that can be prepended to OpenAI::Client to instrument chat.completions API.
+        module Common
+          # Helper to safely set a JSON attribute on a span
+          # Only sets the attribute if obj is present
+          # @param span [OpenTelemetry::Trace::Span] the span to set attribute on
+          # @param attr_name [String] the attribute name (e.g., "braintrust.output_json")
+          # @param obj [Object] the object to serialize to JSON
+          # @return [void]
+          def self.set_json_attr(span, attr_name, obj)
+            return unless obj
+            span.set_attribute(attr_name, JSON.generate(obj))
+          end
+
+          # Parse usage tokens from OpenAI API response
+          # @param usage [Hash, Object] usage object from OpenAI response
+          # @return [Hash<String, Integer>] metrics hash with normalized names
+          def self.parse_usage_tokens(usage)
+            Braintrust::Trace.parse_openai_usage_tokens(usage)
+          end
+
+          # Aggregate streaming chunks into a single response structure
+          # Follows the Go SDK logic for aggregating deltas
+          # @param chunks [Array<Hash>] array of chunk hashes from stream
+          # @return [Hash] aggregated response with choices, usage, etc.
+          def self.aggregate_streaming_chunks(chunks)
+            return {} if chunks.empty?
+
+            # Initialize aggregated structure
+            aggregated = {
+              id: nil,
+              created: nil,
+              model: nil,
+              system_fingerprint: nil,
+              choices: [],
+              usage: nil
+            }
+
+            # Track aggregated content and tool_calls for each choice index
+            choice_data = {}
+
+            chunks.each do |chunk|
+              # Capture top-level fields from any chunk that has them
+              aggregated[:id] ||= chunk[:id]
+              aggregated[:created] ||= chunk[:created]
+              aggregated[:model] ||= chunk[:model]
+              aggregated[:system_fingerprint] ||= chunk[:system_fingerprint]
+
+              # Aggregate usage (usually only in last chunk if stream_options.include_usage is set)
+              if chunk[:usage]
+                aggregated[:usage] = chunk[:usage]
+              end
+
+              # Process choices
+              next unless chunk[:choices].is_a?(Array)
+              chunk[:choices].each do |choice|
+                index = choice[:index] || 0
+                choice_data[index] ||= {
+                  index: index,
+                  role: nil,
+                  content: +"",
+                  tool_calls: [],
+                  finish_reason: nil
+                }
+
+                delta = choice[:delta] || {}
+
+                # Aggregate role (set once from first delta that has it)
+                choice_data[index][:role] ||= delta[:role]
+
+                # Aggregate content
+                if delta[:content]
+                  choice_data[index][:content] << delta[:content]
+                end
+
+                # Aggregate tool_calls (similar to Go SDK logic)
+                if delta[:tool_calls].is_a?(Array) && delta[:tool_calls].any?
+                  delta[:tool_calls].each do |tool_call_delta|
+                    # Check if this is a new tool call or continuation
+                    if tool_call_delta[:id] && !tool_call_delta[:id].empty?
+                      # New tool call (dup strings to avoid mutating input)
+                      choice_data[index][:tool_calls] << {
+                        id: tool_call_delta[:id],
+                        type: tool_call_delta[:type],
+                        function: {
+                          name: +(tool_call_delta.dig(:function, :name) || ""),
+                          arguments: +(tool_call_delta.dig(:function, :arguments) || "")
+                        }
+                      }
+                    elsif choice_data[index][:tool_calls].any?
+                      # Continuation - append arguments to last tool call
+                      last_tool_call = choice_data[index][:tool_calls].last
+                      if tool_call_delta.dig(:function, :arguments)
+                        last_tool_call[:function][:arguments] << tool_call_delta[:function][:arguments]
+                      end
+                    end
+                  end
+                end
+
+                # Capture finish_reason
+                if choice[:finish_reason]
+                  choice_data[index][:finish_reason] = choice[:finish_reason]
+                end
+              end
+            end
+
+            # Build final choices array
+            aggregated[:choices] = choice_data.values.sort_by { |c| c[:index] }.map do |choice|
+              message = {
+                role: choice[:role],
+                content: choice[:content].empty? ? nil : choice[:content]
+              }
+
+              # Add tool_calls to message if any
+              message[:tool_calls] = choice[:tool_calls] if choice[:tool_calls].any?
+
+              {
+                index: choice[:index],
+                message: message,
+                finish_reason: choice[:finish_reason]
+              }
+            end
+
+            aggregated
+          end
+
+          # Aggregate responses streaming events into a single response structure
+          # Follows similar logic to Python SDK's _postprocess_streaming_results
+          # @param events [Array] array of event objects from stream
+          # @return [Hash] aggregated response with output, usage, etc.
+          def self.aggregate_responses_events(events)
+            return {} if events.empty?
+
+            # Find the response.completed event which has the final response
+            completed_event = events.find { |e| e.respond_to?(:type) && e.type == :"response.completed" }
+
+            if completed_event&.respond_to?(:response)
+              response = completed_event.response
+              # Convert the response object to a hash-like structure for logging
+              return {
+                id: response.respond_to?(:id) ? response.id : nil,
+                output: response.respond_to?(:output) ? response.output : nil,
+                usage: response.respond_to?(:usage) ? response.usage : nil
+              }
+            end
+
+            # Fallback if no completed event found
+            {}
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/openai/instrumentation/responses.rb b/lib/braintrust/contrib/openai/instrumentation/responses.rb
new file mode 100644
index 0000000..909b7f4
--- /dev/null
+++ b/lib/braintrust/contrib/openai/instrumentation/responses.rb
@@ -0,0 +1,185 @@
+# frozen_string_literal: true
+
+require "opentelemetry/sdk"
+require "json"
+
+require_relative "common"
+require_relative "../../../internal/time"
+
+module Braintrust
+  module Contrib
+    module OpenAI
+      module Instrumentation
+        # Responses API instrumentation for OpenAI.
+        # Wraps create() and stream() methods to create spans.
+        module Responses
+          def self.included(base)
+            base.prepend(InstanceMethods) unless applied?(base)
+          end
+
+          def self.applied?(base)
+            base.ancestors.include?(InstanceMethods)
+          end
+
+          METADATA_FIELDS = %i[
+            model instructions modalities tools parallel_tool_calls
+            tool_choice temperature max_tokens top_p frequency_penalty
+            presence_penalty seed user metadata store response_format
+            reasoning previous_response_id truncation
+          ].freeze
+
+          module InstanceMethods
+            # Wrap non-streaming create method
+            def create(**params)
+              client = instance_variable_get(:@client)
+              tracer = Braintrust::Contrib.tracer_for(client)
+
+              tracer.in_span("openai.responses.create") do |span|
+                metadata = build_metadata(params)
+
+                set_input(span, params)
+
+                response = nil
+                time_to_first_token = Braintrust::Internal::Time.measure do
+                  response = super
+                end
+
+                set_output(span, response)
+                set_metrics(span, response, time_to_first_token)
+                finalize_metadata(span, metadata, response)
+
+                response
+              end
+            end
+
+            # Wrap streaming method
+            # Stores context on stream object for span creation during consumption
+            def stream(**params)
+              client = instance_variable_get(:@client)
+              tracer = Braintrust::Contrib.tracer_for(client)
+              metadata = build_metadata(params, stream: true)
+
+              stream_obj = super
+
+              Braintrust::Contrib::Context.set!(stream_obj,
+                tracer: tracer,
+                params: params,
+                metadata: metadata,
+                responses_instance: self)
+              stream_obj
+            end
+
+            private
+
+            def build_metadata(params, stream: false)
+              metadata = {
+                "provider" => "openai",
+                "endpoint" => "/v1/responses"
+              }
+              metadata["stream"] = true if stream
+              Responses::METADATA_FIELDS.each do |field|
+                metadata[field.to_s] = params[field] if params.key?(field)
+              end
+              metadata
+            end
+
+            def set_input(span, params)
+              return unless params[:input]
+
+              Common.set_json_attr(span, "braintrust.input_json", params[:input])
+            end
+
+            def set_output(span, response)
+              return unless response.respond_to?(:output) && response.output
+
+              Common.set_json_attr(span, "braintrust.output_json", response.output)
+            end
+
+            def set_metrics(span, response, time_to_first_token)
+              metrics = {}
+              if response.respond_to?(:usage) && response.usage
+                metrics = Common.parse_usage_tokens(response.usage)
+              end
+              metrics["time_to_first_token"] = time_to_first_token
+              Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
+            end
+
+            def finalize_metadata(span, metadata, response)
+              metadata["id"] = response.id if response.respond_to?(:id) && response.id
+              Common.set_json_attr(span, "braintrust.metadata", metadata)
+            end
+          end
+        end
+
+        # Instrumentation for ResponseStream (returned by stream())
+        # Aggregates events and creates span lazily when consumed
+        module ResponseStream
+          def self.included(base)
+            base.prepend(InstanceMethods) unless applied?(base)
+          end
+
+          def self.applied?(base)
+            base.ancestors.include?(InstanceMethods)
+          end
+
+          module InstanceMethods
+            def each(&block)
+              ctx = Braintrust::Contrib::Context.from(self)
+              return super unless ctx&.[](:tracer) && !ctx[:consumed]
+
+              ctx[:consumed] = true
+
+              tracer = ctx[:tracer]
+              params = ctx[:params]
+              metadata = ctx[:metadata]
+              responses_instance = ctx[:responses_instance]
+              aggregated_events = []
+              start_time = Braintrust::Internal::Time.measure
+              time_to_first_token = nil
+
+              tracer.in_span("openai.responses.create") do |span|
+                responses_instance.send(:set_input, span, params)
+                Common.set_json_attr(span, "braintrust.metadata", metadata)
+
+                begin
+                  super do |event|
+                    time_to_first_token ||= Braintrust::Internal::Time.measure(start_time)
+                    aggregated_events << event
+                    block&.call(event)
+                  end
+                rescue => e
+                  span.record_exception(e)
+                  span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}")
+                  raise
+                end
+
+                finalize_stream_span(span, aggregated_events, time_to_first_token, metadata)
+              end
+            end
+
+            private
+
+            def finalize_stream_span(span, aggregated_events, time_to_first_token, metadata)
+              return if aggregated_events.empty?
+
+              aggregated_output = Common.aggregate_responses_events(aggregated_events)
+              Common.set_json_attr(span, "braintrust.output_json", aggregated_output[:output]) if aggregated_output[:output]
+
+              # Set metrics
+              metrics = {}
+              if aggregated_output[:usage]
+                metrics = Common.parse_usage_tokens(aggregated_output[:usage])
+              end
+              metrics["time_to_first_token"] = time_to_first_token
+              Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
+
+              # Update metadata with response fields
+              metadata["id"] = aggregated_output[:id] if aggregated_output[:id]
+              Common.set_json_attr(span, "braintrust.metadata", metadata)
+            end
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/openai/integration.rb b/lib/braintrust/contrib/openai/integration.rb
new file mode 100644
index 0000000..95de580
--- /dev/null
+++ b/lib/braintrust/contrib/openai/integration.rb
@@ -0,0 +1,58 @@
+# frozen_string_literal: true
+
+require_relative "../integration"
+require_relative "deprecated"
+
+module Braintrust
+  module Contrib
+    module OpenAI
+      # OpenAI integration for automatic instrumentation.
+      # Instruments the official openai gem (not ruby-openai).
+      class Integration
+        include Braintrust::Contrib::Integration
+
+        MINIMUM_VERSION = "0.1.0"
+
+        GEM_NAMES = ["openai"].freeze
+        REQUIRE_PATHS = ["openai"].freeze
+
+        # @return [Symbol] Unique identifier for this integration
+        def self.integration_name
+          :openai
+        end
+
+        # @return [Array<String>] Gem names this integration supports
+        def self.gem_names
+          GEM_NAMES
+        end
+
+        # @return [Array<String>] Require paths for auto-instrument detection
+        def self.require_paths
+          REQUIRE_PATHS
+        end
+
+        # @return [String] Minimum compatible version
+        def self.minimum_version
+          MINIMUM_VERSION
+        end
+
+        # @return [Boolean] true if official openai gem is available
+        def self.loaded?
+          # Check if the official openai gem is loaded (not ruby-openai).
+          # The ruby-openai gem also uses "require 'openai'", so we need to distinguish them.
+
+          # This module is defined ONLY in the official OpenAI gem
+          defined?(::OpenAI::Internal) ? true : false
+        end
+
+        # Lazy-load the patcher only when actually patching.
+        # This keeps the integration stub lightweight.
+        # @return [Class] The patcher class
+        def self.patchers
+          require_relative "patcher"
+          [ChatPatcher, ResponsesPatcher]
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/openai/patcher.rb b/lib/braintrust/contrib/openai/patcher.rb
new file mode 100644
index 0000000..529482f
--- /dev/null
+++ b/lib/braintrust/contrib/openai/patcher.rb
@@ -0,0 +1,130 @@
+# frozen_string_literal: true
+
+require_relative "../patcher"
+require_relative "instrumentation/chat"
+require_relative "instrumentation/responses"
+
+module Braintrust
+  module Contrib
+    module OpenAI
+      # Patcher for OpenAI integration - implements class-level patching.
+      # All new OpenAI::Client instances created after patch! will be automatically instrumented.
+      class ChatPatcher < Braintrust::Contrib::Patcher
+        class << self
+          def applicable?
+            defined?(::OpenAI::Client)
+          end
+
+          def patched?(**options)
+            # Use the target's singleton class if provided, otherwise check the base class.
+            target_class = get_singleton_class(options[:target]) || ::OpenAI::Resources::Chat::Completions
+
+            Instrumentation::Chat::Completions.applied?(target_class)
+          end
+
+          # Perform the actual patching.
+          # @param options [Hash] Configuration options passed from integration
+          # @option options [Object] :target Optional target instance to patch
+          # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider
+          # @return [void]
+          def perform_patch(**options)
+            return unless applicable?
+
+            # Stream classes are shared across all clients, patch at class level.
+            # The instrumentation short-circuits when no context is present,
+            # so uninstrumented clients' streams pass through unaffected.
+            patch_stream_classes
+
+            if options[:target]
+              # Instance-level (for only this client)
+              raise ArgumentError, "target must be a kind of ::OpenAI::Client" unless options[:target].is_a?(::OpenAI::Client)
+
+              get_singleton_class(options[:target]).include(Instrumentation::Chat::Completions)
+            else
+              # Class-level (for all clients)
+              ::OpenAI::Resources::Chat::Completions.include(Instrumentation::Chat::Completions)
+            end
+          end
+
+          def patch_stream_classes
+            # Patch ChatCompletionStream for stream() method
+            if defined?(::OpenAI::Helpers::Streaming::ChatCompletionStream)
+              unless Instrumentation::Chat::ChatCompletionStream.applied?(::OpenAI::Helpers::Streaming::ChatCompletionStream)
+                ::OpenAI::Helpers::Streaming::ChatCompletionStream.include(Instrumentation::Chat::ChatCompletionStream)
+              end
+            end
+
+            # Patch Internal::Stream for stream_raw() method
+            if defined?(::OpenAI::Internal::Stream)
+              unless Instrumentation::Chat::InternalStream.applied?(::OpenAI::Internal::Stream)
+                ::OpenAI::Internal::Stream.include(Instrumentation::Chat::InternalStream)
+              end
+            end
+          end
+
+          private
+
+          def get_singleton_class(client)
+            client&.chat&.completions&.singleton_class
+          end
+        end
+      end
+
+      # Patcher for OpenAI integration - implements class-level patching.
+      # All new OpenAI::Client instances created after patch! will be automatically instrumented.
+      class ResponsesPatcher < Braintrust::Contrib::Patcher
+        class << self
+          def applicable?
+            defined?(::OpenAI::Client) && ::OpenAI::Client.instance_methods.include?(:responses)
+          end
+
+          def patched?(**options)
+            # Use the target's singleton class if provided, otherwise check the base class.
+            target_class = get_singleton_class(options[:target]) || ::OpenAI::Resources::Responses
+
+            Instrumentation::Responses.applied?(target_class)
+          end
+
+          # Perform the actual patching.
+          # @param options [Hash] Configuration options passed from integration
+          # @option options [Object] :target Optional target instance to patch
+          # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider
+          # @return [void]
+          def perform_patch(**options)
+            return unless applicable?
+
+            # Stream class is shared across all clients, patch at class level.
+            # The instrumentation short-circuits when no context is present,
+            # so uninstrumented clients' streams pass through unaffected.
+            patch_response_stream
+
+            if options[:target]
+              # Instance-level (for only this client)
+              raise ArgumentError, "target must be a kind of ::OpenAI::Client" unless options[:target].is_a?(::OpenAI::Client)
+
+              get_singleton_class(options[:target]).include(Instrumentation::Responses)
+            else
+              # Class-level (for all clients)
+              ::OpenAI::Resources::Responses.include(Instrumentation::Responses)
+            end
+          end
+
+          def patch_response_stream
+            # Patch ResponseStream for stream() method
+            if defined?(::OpenAI::Helpers::Streaming::ResponseStream)
+              unless Instrumentation::ResponseStream.applied?(::OpenAI::Helpers::Streaming::ResponseStream)
+                ::OpenAI::Helpers::Streaming::ResponseStream.include(Instrumentation::ResponseStream)
+              end
+            end
+          end
+
+          private
+
+          def get_singleton_class(client)
+            client&.responses&.singleton_class
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/trace/contrib/openai.rb b/lib/braintrust/trace/contrib/openai.rb
deleted file mode 100644
index 5eb0a29..0000000
--- a/lib/braintrust/trace/contrib/openai.rb
+++ /dev/null
@@ -1,611 +0,0 @@
-# frozen_string_literal: true
-
-require "opentelemetry/sdk"
-require "json"
-require_relative "../tokens"
-
-module Braintrust
-  module Trace
-    module OpenAI
-      # Helper to safely set a JSON attribute on a span
-      # Only sets the attribute if obj is present
-      # @param span [OpenTelemetry::Trace::Span] the span to set attribute on
-      # @param attr_name [String] the attribute name (e.g., "braintrust.output_json")
-      # @param obj [Object] the object to serialize to JSON
-      # @return [void]
-      def self.set_json_attr(span, attr_name, obj)
-        return unless obj
-        span.set_attribute(attr_name, JSON.generate(obj))
-      end
-
-      # Parse usage tokens from OpenAI API response
-      # @param usage [Hash, Object] usage object from OpenAI response
-      # @return [Hash<String, Integer>] metrics hash with normalized names
-      def self.parse_usage_tokens(usage)
-        Braintrust::Trace.parse_openai_usage_tokens(usage)
-      end
-
-      # Aggregate streaming chunks into a single response structure
-      # Follows the Go SDK logic for aggregating deltas
-      # @param chunks [Array<Hash>] array of chunk hashes from stream
-      # @return [Hash] aggregated response with choices, usage, etc.
-      def self.aggregate_streaming_chunks(chunks)
-        return {} if chunks.empty?
-
-        # Initialize aggregated structure
-        aggregated = {
-          id: nil,
-          created: nil,
-          model: nil,
-          system_fingerprint: nil,
-          choices: [],
-          usage: nil
-        }
-
-        # Track aggregated content and tool_calls for each choice index
-        choice_data = {}
-
-        chunks.each do |chunk|
-          # Capture top-level fields from any chunk that has them
-          aggregated[:id] ||= chunk[:id]
-          aggregated[:created] ||= chunk[:created]
-          aggregated[:model] ||= chunk[:model]
-          aggregated[:system_fingerprint] ||= chunk[:system_fingerprint]
-
-          # Aggregate usage (usually only in last chunk if stream_options.include_usage is set)
-          if chunk[:usage]
-            aggregated[:usage] = chunk[:usage]
-          end
-
-          # Process choices
-          next unless chunk[:choices].is_a?(Array)
-          chunk[:choices].each do |choice|
-            index = choice[:index] || 0
-            choice_data[index] ||= {
-              index: index,
-              role: nil,
-              content: +"",
-              tool_calls: [],
-              finish_reason: nil
-            }
-
-            delta = choice[:delta] || {}
-
-            # Aggregate role (set once from first delta that has it)
-            choice_data[index][:role] ||= delta[:role]
-
-            # Aggregate content
-            if delta[:content]
-              choice_data[index][:content] << delta[:content]
-            end
-
-            # Aggregate tool_calls (similar to Go SDK logic)
-            if delta[:tool_calls].is_a?(Array) && delta[:tool_calls].any?
-              delta[:tool_calls].each do |tool_call_delta|
-                # Check if this is a new tool call or continuation
-                if tool_call_delta[:id] && !tool_call_delta[:id].empty?
-                  # New tool call
-                  choice_data[index][:tool_calls] << {
-                    id: tool_call_delta[:id],
-                    type: tool_call_delta[:type],
-                    function: {
-                      name: tool_call_delta.dig(:function, :name) || +"",
-                      arguments: tool_call_delta.dig(:function, :arguments) || +""
-                    }
-                  }
-                elsif choice_data[index][:tool_calls].any?
-                  # Continuation - append arguments to last tool call
-                  last_tool_call = choice_data[index][:tool_calls].last
-                  if tool_call_delta.dig(:function, :arguments)
-                    last_tool_call[:function][:arguments] << tool_call_delta[:function][:arguments]
-                  end
-                end
-              end
-            end
-
-            # Capture finish_reason
-            if choice[:finish_reason]
-              choice_data[index][:finish_reason] = choice[:finish_reason]
-            end
-          end
-        end
-
-        # Build final choices array
-        aggregated[:choices] = choice_data.values.sort_by { |c| c[:index] }.map do |choice|
-          message = {
-            role: choice[:role],
-            content: choice[:content].empty? ? nil : choice[:content]
-          }
-
-          # Add tool_calls to message if any
-          message[:tool_calls] = choice[:tool_calls] if choice[:tool_calls].any?
-
-          {
-            index: choice[:index],
-            message: message,
-            finish_reason: choice[:finish_reason]
-          }
-        end
-
-        aggregated
-      end
-
-      # Wrap an OpenAI::Client to automatically create spans for chat completions and responses
-      # Supports both synchronous and streaming requests
-      # @param client [OpenAI::Client] the OpenAI client to wrap
-      # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider (defaults to global)
-      def self.wrap(client, tracer_provider: nil)
-        tracer_provider ||= ::OpenTelemetry.tracer_provider
-
-        # Wrap chat completions
-        wrap_chat_completions(client, tracer_provider)
-
-        # Wrap responses API if available
-        wrap_responses(client, tracer_provider) if client.respond_to?(:responses)
-
-        client
-      end
-
-      # Wrap chat completions API
-      # @param client [OpenAI::Client] the OpenAI client
-      # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
-      def self.wrap_chat_completions(client, tracer_provider)
-        # Create a wrapper module that intercepts chat.completions.create
-        wrapper = Module.new do
-          define_method(:create) do |**params|
-            tracer = tracer_provider.tracer("braintrust")
-
-            tracer.in_span("Chat Completion") do |span|
-              # Track start time for time_to_first_token
-              start_time = Time.now
-
-              # Initialize metadata hash
-              metadata = {
-                "provider" => "openai",
-                "endpoint" => "/v1/chat/completions"
-              }
-
-              # Capture request metadata fields
-              metadata_fields = %i[
-                model frequency_penalty logit_bias logprobs max_tokens n
-                presence_penalty response_format seed service_tier stop
-                stream stream_options temperature top_p top_logprobs
-                tools tool_choice parallel_tool_calls user functions function_call
-              ]
-
-              metadata_fields.each do |field|
-                metadata[field.to_s] = params[field] if params.key?(field)
-              end
-
-              # Set input messages as JSON
-              # Pass through all message fields to preserve tool_calls, tool_call_id, name, etc.
-              if params[:messages]
-                messages_array = params[:messages].map(&:to_h)
-                span.set_attribute("braintrust.input_json", JSON.generate(messages_array))
-              end
-
-              # Call the original method
-              response = super(**params)
-
-              # Calculate time to first token
-              time_to_first_token = Time.now - start_time
-
-              # Set output (choices) as JSON
-              # Use to_h to get the raw structure with all fields (including tool_calls)
-              if response.respond_to?(:choices) && response.choices&.any?
-                choices_array = response.choices.map(&:to_h)
-                span.set_attribute("braintrust.output_json", JSON.generate(choices_array))
-              end
-
-              # Set metrics (token usage with advanced details)
-              metrics = {}
-              if response.respond_to?(:usage) && response.usage
-                metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(response.usage)
-              end
-              # Add time_to_first_token metric
-              metrics["time_to_first_token"] = time_to_first_token
-              span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty?
-
-              # Add response metadata fields
-              metadata["id"] = response.id if response.respond_to?(:id) && response.id
-              metadata["created"] = response.created if response.respond_to?(:created) && response.created
-              metadata["system_fingerprint"] = response.system_fingerprint if response.respond_to?(:system_fingerprint) && response.system_fingerprint
-              metadata["service_tier"] = response.service_tier if response.respond_to?(:service_tier) && response.service_tier
-
-              # Set metadata ONCE at the end with complete hash
-              span.set_attribute("braintrust.metadata", JSON.generate(metadata))
-
-              response
-            end
-          end
-
-          # Wrap stream_raw for streaming chat completions
-          define_method(:stream_raw) do |**params|
-            tracer = tracer_provider.tracer("braintrust")
-            aggregated_chunks = []
-            start_time = Time.now
-            time_to_first_token = nil
-            metadata = {
-              "provider" => "openai",
-              "endpoint" => "/v1/chat/completions"
-            }
-
-            # Start span with proper context (will be child of current span if any)
-            span = tracer.start_span("Chat Completion")
-
-            # Capture request metadata fields
-            metadata_fields = %i[
-              model frequency_penalty logit_bias logprobs max_tokens n
-              presence_penalty response_format seed service_tier stop
-              stream stream_options temperature top_p top_logprobs
-              tools tool_choice parallel_tool_calls user functions function_call
-            ]
-
-            metadata_fields.each do |field|
-              metadata[field.to_s] = params[field] if params.key?(field)
-            end
-            metadata["stream"] = true  # Explicitly mark as streaming
-
-            # Set input messages as JSON
-            if params[:messages]
-              messages_array = params[:messages].map(&:to_h)
-              span.set_attribute("braintrust.input_json", JSON.generate(messages_array))
-            end
-
-            # Set initial metadata
-            span.set_attribute("braintrust.metadata", JSON.generate(metadata))
-
-            # Call the original stream_raw method with error handling
-            begin
-              stream = super(**params)
-            rescue => e
-              # Record exception if stream creation fails
-              span.record_exception(e)
-              span.status = ::OpenTelemetry::Trace::Status.error("OpenAI API error: #{e.message}")
-              span.finish
-              raise
-            end
-
-            # Wrap the stream to aggregate chunks
-            original_each = stream.method(:each)
-            stream.define_singleton_method(:each) do |&block|
-              original_each.call do |chunk|
-                # Capture time to first token on first chunk
-                time_to_first_token ||= Time.now - start_time
-                aggregated_chunks << chunk.to_h
-                block&.call(chunk)
-              end
-            rescue => e
-              # Record exception if streaming fails
-              span.record_exception(e)
-              span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}")
-              raise
-            ensure
-              # Always aggregate whatever chunks we collected and finish span
-              # This runs on normal completion, break, or exception
-              unless aggregated_chunks.empty?
-                aggregated_output = Braintrust::Trace::OpenAI.aggregate_streaming_chunks(aggregated_chunks)
-                Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.output_json", aggregated_output[:choices])
-
-                # Set metrics if usage is included (requires stream_options.include_usage)
-                metrics = {}
-                if aggregated_output[:usage]
-                  metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(aggregated_output[:usage])
-                end
-                # Add time_to_first_token metric
-                metrics["time_to_first_token"] = time_to_first_token || 0.0
-                Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
-
-                # Update metadata with response fields
-                metadata["id"] = aggregated_output[:id] if aggregated_output[:id]
-                metadata["created"] = aggregated_output[:created] if aggregated_output[:created]
-                metadata["model"] = aggregated_output[:model] if aggregated_output[:model]
-                metadata["system_fingerprint"] = aggregated_output[:system_fingerprint] if aggregated_output[:system_fingerprint]
-                Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metadata", metadata)
-              end
-
-              span.finish
-            end
-
-            stream
-          end
-
-          # Wrap stream for streaming chat completions (returns ChatCompletionStream with convenience methods)
-          define_method(:stream) do |**params|
-            tracer = tracer_provider.tracer("braintrust")
-            start_time = Time.now
-            time_to_first_token = nil
-            metadata = {
-              "provider" => "openai",
-              "endpoint" => "/v1/chat/completions"
-            }
-
-            # Start span with proper context (will be child of current span if any)
-            span = tracer.start_span("Chat Completion")
-
-            # Capture request metadata fields
-            metadata_fields = %i[
-              model frequency_penalty logit_bias logprobs max_tokens n
-              presence_penalty response_format seed service_tier stop
-              stream stream_options temperature top_p top_logprobs
-              tools tool_choice parallel_tool_calls user functions function_call
-            ]
-
-            metadata_fields.each do |field|
-              metadata[field.to_s] = params[field] if params.key?(field)
-            end
-            metadata["stream"] = true  # Explicitly mark as streaming
-
-            # Set input messages as JSON
-            if params[:messages]
-              messages_array = params[:messages].map(&:to_h)
-              span.set_attribute("braintrust.input_json", JSON.generate(messages_array))
-            end
-
-            # Set initial metadata
-            span.set_attribute("braintrust.metadata", JSON.generate(metadata))
-
-            # Call the original stream method with error handling
-            begin
-              stream = super(**params)
-            rescue => e
-              # Record exception if stream creation fails
-              span.record_exception(e)
-              span.status = ::OpenTelemetry::Trace::Status.error("OpenAI API error: #{e.message}")
-              span.finish
-              raise
-            end
-
-            # Local helper for setting JSON attributes
-            set_json_attr = ->(attr_name, obj) { Braintrust::Trace::OpenAI.set_json_attr(span, attr_name, obj) }
-
-            # Helper to extract metadata from SDK's internal snapshot
-            extract_stream_metadata = lambda do
-              # Access the SDK's internal accumulated completion snapshot
-              snapshot = stream.current_completion_snapshot
-              return unless snapshot
-
-              # Set output from accumulated choices
-              if snapshot.choices&.any?
-                choices_array = snapshot.choices.map(&:to_h)
-                set_json_attr.call("braintrust.output_json", choices_array)
-              end
-
-              # Set metrics if usage is available
-              metrics = {}
-              if snapshot.usage
-                metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(snapshot.usage)
-              end
-              # Add time_to_first_token metric
-              metrics["time_to_first_token"] = time_to_first_token || 0.0
-              set_json_attr.call("braintrust.metrics", metrics) unless metrics.empty?
-
-              # Update metadata with response fields
-              metadata["id"] = snapshot.id if snapshot.respond_to?(:id) && snapshot.id
-              metadata["created"] = snapshot.created if snapshot.respond_to?(:created) && snapshot.created
-              metadata["model"] = snapshot.model if snapshot.respond_to?(:model) && snapshot.model
-              metadata["system_fingerprint"] = snapshot.system_fingerprint if snapshot.respond_to?(:system_fingerprint) && snapshot.system_fingerprint
-              set_json_attr.call("braintrust.metadata", metadata)
-            end
-
-            # Prevent double-finish of span
-            finish_braintrust_span = lambda do
-              return if stream.instance_variable_get(:@braintrust_span_finished)
-              stream.instance_variable_set(:@braintrust_span_finished, true)
-              extract_stream_metadata.call
-              span.finish
-            end
-
-            # Wrap .each() method - this is the core consumption method
-            original_each = stream.method(:each)
-            stream.define_singleton_method(:each) do |&block|
-              original_each.call do |chunk|
-                # Capture time to first token on first chunk
-                time_to_first_token ||= Time.now - start_time
-                block&.call(chunk)
-              end
-            rescue => e
-              span.record_exception(e)
-              span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}")
-              raise
-            ensure
-              finish_braintrust_span.call
-            end
-
-            # Wrap .text() method - returns enumerable for text deltas
-            original_text = stream.method(:text)
-            stream.define_singleton_method(:text) do
-              text_enum = original_text.call
-              # Wrap the returned enumerable's .each method
-              original_text_each = text_enum.method(:each)
-              text_enum.define_singleton_method(:each) do |&block|
-                original_text_each.call do |delta|
-                  # Capture time to first token on first delta
-                  time_to_first_token ||= Time.now - start_time
-                  block&.call(delta)
-                end
-              rescue => e
-                span.record_exception(e)
-                span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}")
-                raise
-              ensure
-                finish_braintrust_span.call
-              end
-              text_enum
-            end
-
-            stream
-          end
-        end
-
-        # Prepend the wrapper to the completions resource
-        client.chat.completions.singleton_class.prepend(wrapper)
-      end
-
-      # Wrap responses API
-      # @param client [OpenAI::Client] the OpenAI client
-      # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
-      def self.wrap_responses(client, tracer_provider)
-        # Create a wrapper module that intercepts responses.create and responses.stream
-        wrapper = Module.new do
-          # Wrap non-streaming create method
-          define_method(:create) do |**params|
-            tracer = tracer_provider.tracer("braintrust")
-
-            tracer.in_span("openai.responses.create") do |span|
-              # Initialize metadata hash
-              metadata = {
-                "provider" => "openai",
-                "endpoint" => "/v1/responses"
-              }
-
-              # Capture request metadata fields
-              metadata_fields = %i[
-                model instructions modalities tools parallel_tool_calls
-                tool_choice temperature max_tokens top_p frequency_penalty
-                presence_penalty seed user metadata store response_format
-              ]
-
-              metadata_fields.each do |field|
-                metadata[field.to_s] = params[field] if params.key?(field)
-              end
-
-              # Set input as JSON
-              if params[:input]
-                span.set_attribute("braintrust.input_json", JSON.generate(params[:input]))
-              end
-
-              # Call the original method
-              response = super(**params)
-
-              # Set output as JSON
-              if response.respond_to?(:output) && response.output
-                span.set_attribute("braintrust.output_json", JSON.generate(response.output))
-              end
-
-              # Set metrics (token usage)
-              if response.respond_to?(:usage) && response.usage
-                metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(response.usage)
-                span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty?
-              end
-
-              # Add response metadata fields
-              metadata["id"] = response.id if response.respond_to?(:id) && response.id
-
-              # Set metadata ONCE at the end with complete hash
-              span.set_attribute("braintrust.metadata", JSON.generate(metadata))
-
-              response
-            end
-          end
-
-          # Wrap streaming method
-          define_method(:stream) do |**params|
-            tracer = tracer_provider.tracer("braintrust")
-            aggregated_events = []
-            metadata = {
-              "provider" => "openai",
-              "endpoint" => "/v1/responses",
-              "stream" => true
-            }
-
-            # Start span with proper context
-            span = tracer.start_span("openai.responses.create")
-
-            # Capture request metadata fields
-            metadata_fields = %i[
-              model instructions modalities tools parallel_tool_calls
-              tool_choice temperature max_tokens top_p frequency_penalty
-              presence_penalty seed user metadata store response_format
-            ]
-
-            metadata_fields.each do |field|
-              metadata[field.to_s] = params[field] if params.key?(field)
-            end
-
-            # Set input as JSON
-            if params[:input]
-              span.set_attribute("braintrust.input_json", JSON.generate(params[:input]))
-            end
-
-            # Set initial metadata
-            span.set_attribute("braintrust.metadata", JSON.generate(metadata))
-
-            # Call the original stream method with error handling
-            begin
-              stream = super(**params)
-            rescue => e
-              # Record exception if stream creation fails
-              span.record_exception(e)
-              span.status = ::OpenTelemetry::Trace::Status.error("OpenAI API error: #{e.message}")
-              span.finish
-              raise
-            end
-
-            # Wrap the stream to aggregate events
-            original_each = stream.method(:each)
-            stream.define_singleton_method(:each) do |&block|
-              original_each.call do |event|
-                # Store the actual event object (not converted to hash)
-                aggregated_events << event
-                block&.call(event)
-              end
-            rescue => e
-              # Record exception if streaming fails
-              span.record_exception(e)
-              span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}")
-              raise
-            ensure
-              # Always aggregate whatever events we collected and finish span
-              unless aggregated_events.empty?
-                aggregated_output = Braintrust::Trace::OpenAI.aggregate_responses_events(aggregated_events)
-                Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.output_json", aggregated_output[:output]) if aggregated_output[:output]
-
-                # Set metrics if usage is included
-                if aggregated_output[:usage]
-                  metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(aggregated_output[:usage])
-                  Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
-                end
-
-                # Update metadata with response fields
-                metadata["id"] = aggregated_output[:id] if aggregated_output[:id]
-                Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metadata", metadata)
-              end
-
-              span.finish
-            end
-
-            stream
-          end
-        end
-
-        # Prepend the wrapper to the responses resource
-        client.responses.singleton_class.prepend(wrapper)
-      end
-
-      # Aggregate responses streaming events into a single response structure
-      # Follows similar logic to Python SDK's _postprocess_streaming_results
-      # @param events [Array] array of event objects from stream
-      # @return [Hash] aggregated response with output, usage, etc.
-      def self.aggregate_responses_events(events)
-        return {} if events.empty?
-
-        # Find the response.completed event which has the final response
-        completed_event = events.find { |e| e.respond_to?(:type) && e.type == :"response.completed" }
-
-        if completed_event&.respond_to?(:response)
-          response = completed_event.response
-          # Convert the response object to a hash-like structure for logging
-          return {
-            id: response.respond_to?(:id) ? response.id : nil,
-            output: response.respond_to?(:output) ? response.output : nil,
-            usage: response.respond_to?(:usage) ? response.usage : nil
-          }
-        end
-
-        # Fallback if no completed event found
-        {}
-      end
-    end
-  end
-end
diff --git a/test/braintrust/contrib/openai/deprecated_test.rb b/test/braintrust/contrib/openai/deprecated_test.rb
new file mode 100644
index 0000000..32c0c74
--- /dev/null
+++ b/test/braintrust/contrib/openai/deprecated_test.rb
@@ -0,0 +1,43 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require "braintrust/contrib/openai/deprecated"
+
+class Braintrust::Contrib::OpenAI::DeprecatedTest < Minitest::Test
+  def test_wrap_delegates_to_instrument
+    mock_client = Object.new
+    mock_tracer = Object.new
+
+    captured_args = nil
+    Braintrust.stub(:instrument!, ->(name, **opts) { captured_args = [name, opts] }) do
+      suppress_logs { Braintrust::Trace::OpenAI.wrap(mock_client, tracer_provider: mock_tracer) }
+    end
+
+    assert_equal :openai, captured_args[0]
+    assert_same mock_client, captured_args[1][:target]
+    assert_same mock_tracer, captured_args[1][:tracer_provider]
+  end
+
+  def test_wrap_logs_deprecation_warning
+    mock_client = Object.new
+
+    warning_message = nil
+    Braintrust.stub(:instrument!, ->(*) {}) do
+      Braintrust::Log.stub(:warn, ->(msg) { warning_message = msg }) do
+        Braintrust::Trace::OpenAI.wrap(mock_client)
+      end
+    end
+
+    assert_match(/deprecated/, warning_message)
+    assert_match(/Braintrust\.instrument!/, warning_message)
+  end
+
+  def test_wrap_returns_client
+    mock_client = Object.new
+
+    Braintrust.stub(:instrument!, ->(*) {}) do
+      result = suppress_logs { Braintrust::Trace::OpenAI.wrap(mock_client) }
+      assert_same mock_client, result
+    end
+  end
+end
diff --git a/test/braintrust/contrib/openai/instrumentation/chat_test.rb b/test/braintrust/contrib/openai/instrumentation/chat_test.rb
new file mode 100644
index 0000000..3dfa0eb
--- /dev/null
+++ b/test/braintrust/contrib/openai/instrumentation/chat_test.rb
@@ -0,0 +1,477 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require_relative "../integration_helper"
+require "braintrust/contrib/openai/instrumentation/chat"
+
+class Braintrust::Contrib::OpenAI::Instrumentation::Chat::CompletionsTest < Minitest::Test
+  Completions = Braintrust::Contrib::OpenAI::Instrumentation::Chat::Completions
+
+  # --- .included ---
+
+  def test_included_prepends_instance_methods
+    base = Class.new
+    mock = Minitest::Mock.new
+    mock.expect(:include?, false, [Completions::InstanceMethods])
+    mock.expect(:prepend, nil, [Completions::InstanceMethods])
+
+    base.define_singleton_method(:ancestors) { mock }
+    base.define_singleton_method(:prepend) { |mod| mock.prepend(mod) }
+
+    Completions.included(base)
+
+    mock.verify
+  end
+
+  def test_included_skips_prepend_when_already_applied
+    base = Class.new do
+      include Braintrust::Contrib::OpenAI::Instrumentation::Chat::Completions
+    end
+
+    # Should not raise or double-prepend
+    Completions.included(base)
+
+    # InstanceMethods should appear only once in ancestors
+    count = base.ancestors.count { |a| a == Completions::InstanceMethods }
+    assert_equal 1, count
+  end
+
+  # --- .applied? ---
+
+  def test_applied_returns_false_when_not_included
+    base = Class.new
+
+    refute Completions.applied?(base)
+  end
+
+  def test_applied_returns_true_when_included
+    base = Class.new do
+      include Braintrust::Contrib::OpenAI::Instrumentation::Chat::Completions
+    end
+
+    assert Completions.applied?(base)
+  end
+end
+
+# E2E tests for Chat::Completions instrumentation
+class Braintrust::Contrib::OpenAI::Instrumentation::Chat::CompletionsE2ETest < Minitest::Test
+  include Braintrust::Contrib::OpenAI::IntegrationHelper
+
+  def setup
+    skip_unless_openai!
+  end
+
+  # --- #create ---
+
+  def test_create_creates_span_with_correct_attributes
+    VCR.use_cassette("openai/chat_completions") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+      Braintrust::Contrib::OpenAI::Integration.patch!
+
+      client = OpenAI::Client.new(api_key: get_openai_key)
+      response = client.chat.completions.create(
+        messages: [
+          {role: "system", content: "You are a test assistant."},
+          {role: "user", content: "Say 'test'"}
+        ],
+        model: "gpt-4o-mini",
+        max_tokens: 10,
+        temperature: 0.5
+      )
+
+      refute_nil response
+      refute_nil response.choices[0].message.content
+
+      span = rig.drain_one
+
+      assert_equal "Chat Completion", span.name
+
+      # Verify input
+      assert span.attributes.key?("braintrust.input_json")
+      input = JSON.parse(span.attributes["braintrust.input_json"])
+      assert_equal 2, input.length
+      assert_equal "system", input[0]["role"]
+      assert_equal "user", input[1]["role"]
+
+      # Verify output
+      assert span.attributes.key?("braintrust.output_json")
+      output = JSON.parse(span.attributes["braintrust.output_json"])
+      assert_equal 1, output.length
+      assert_equal "assistant", output[0]["message"]["role"]
+
+      # Verify metadata
+      assert span.attributes.key?("braintrust.metadata")
+      metadata = JSON.parse(span.attributes["braintrust.metadata"])
+      assert_equal "openai", metadata["provider"]
+      assert_equal "/v1/chat/completions", metadata["endpoint"]
+      # Model is overwritten with response value (includes version suffix)
+      assert_match(/\Agpt-4o-mini/, metadata["model"])
+
+      # Verify metrics
+      assert span.attributes.key?("braintrust.metrics")
+      metrics = JSON.parse(span.attributes["braintrust.metrics"])
+      assert metrics["prompt_tokens"] > 0
+      assert metrics["completion_tokens"] > 0
+      assert metrics.key?("time_to_first_token")
+    end
+  end
+
+  def test_create_parses_advanced_token_metrics
+    VCR.use_cassette("openai/advanced_tokens") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+      Braintrust::Contrib::OpenAI::Integration.patch!
+
+      client = OpenAI::Client.new(api_key: get_openai_key)
+      response = client.chat.completions.create(
+        messages: [{role: "user", content: "test"}],
+        model: "gpt-4o-mini",
+        max_tokens: 10
+      )
+
+      refute_nil response
+
+      span = rig.drain_one
+      metrics = JSON.parse(span.attributes["braintrust.metrics"])
+
+      # Basic metrics should always be present
+      assert metrics["prompt_tokens"] > 0
+      assert metrics["completion_tokens"] > 0
+      assert metrics["tokens"] > 0
+
+      # If the response includes token_details, verify they're parsed with correct naming
+      # - prompt_tokens_details.cached_tokens → prompt_cached_tokens
+      # - completion_tokens_details.reasoning_tokens → completion_reasoning_tokens
+      if response.usage.respond_to?(:prompt_tokens_details) && response.usage.prompt_tokens_details
+        details = response.usage.prompt_tokens_details
+        if details.respond_to?(:cached_tokens) && details.cached_tokens
+          assert metrics.key?("prompt_cached_tokens")
+          assert_equal details.cached_tokens, metrics["prompt_cached_tokens"]
+        end
+      end
+
+      if response.usage.respond_to?(:completion_tokens_details) && response.usage.completion_tokens_details
+        details = response.usage.completion_tokens_details
+        if details.respond_to?(:reasoning_tokens) && details.reasoning_tokens
+          assert metrics.key?("completion_reasoning_tokens")
+          assert_equal details.reasoning_tokens, metrics["completion_reasoning_tokens"]
+        end
+      end
+    end
+  end
+
+  def test_create_handles_vision_messages
+    VCR.use_cassette("openai/vision") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+      Braintrust::Contrib::OpenAI::Integration.patch!
+
+      client = OpenAI::Client.new(api_key: get_openai_key)
+      response = client.chat.completions.create(
+        messages: [
+          {
+            role: "user",
+            content: [
+              {type: "text", text: "What color is this image?"},
+              {type: "image_url", image_url: {url: "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/320px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"}}
+            ]
+          }
+        ],
+        model: "gpt-4o-mini",
+        max_tokens: 50
+      )
+
+      refute_nil response
+
+      span = rig.drain_one
+      input = JSON.parse(span.attributes["braintrust.input_json"])
+
+      assert_instance_of Array, input[0]["content"]
+      assert_equal "text", input[0]["content"][0]["type"]
+      assert_equal "image_url", input[0]["content"][1]["type"]
+    end
+  end
+
+  def test_create_handles_tool_messages
+    VCR.use_cassette("openai/tool_messages") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+      Braintrust::Contrib::OpenAI::Integration.patch!
+
+      client = OpenAI::Client.new(api_key: get_openai_key)
+
+      tools = [{
+        type: "function",
+        function: {
+          name: "get_weather",
+          description: "Get the current weather",
+          parameters: {type: "object", properties: {location: {type: "string"}}, required: ["location"]}
+        }
+      }]
+
+      first_response = client.chat.completions.create(
+        messages: [{role: "user", content: "What's the weather in Paris?"}],
+        model: "gpt-4o-mini",
+        tools: tools,
+        max_tokens: 100
+      )
+
+      tool_call = first_response.choices[0].message.tool_calls&.first
+      skip "Model didn't call tool" unless tool_call
+
+      client.chat.completions.create(
+        messages: [
+          {role: "user", content: "What's the weather in Paris?"},
+          {role: "assistant", content: nil, tool_calls: [{id: tool_call.id, type: "function", function: {name: tool_call.function.name, arguments: tool_call.function.arguments}}]},
+          {role: "tool", tool_call_id: tool_call.id, content: "Sunny, 22°C"}
+        ],
+        model: "gpt-4o-mini",
+        tools: tools,
+        max_tokens: 100
+      )
+
+      spans = rig.drain
+      span = spans[1]
+      input = JSON.parse(span.attributes["braintrust.input_json"])
+
+      assert_equal "tool", input[2]["role"]
+      assert_equal tool_call.id, input[2]["tool_call_id"]
+    end
+  end
+
+  def test_create_records_exception_on_error
+    VCR.use_cassette("openai/create_error") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+
+      client = OpenAI::Client.new(api_key: "invalid_key")
+      Braintrust.instrument!(:openai, target: client, tracer_provider: rig.tracer_provider)
+
+      assert_raises do
+        client.chat.completions.create(
+          messages: [{role: "user", content: "test"}],
+          model: "gpt-4o-mini"
+        )
+      end
+
+      span = rig.drain_one
+      assert_equal OpenTelemetry::Trace::Status::ERROR, span.status.code
+      assert span.events.any? { |e| e.name == "exception" }
+    end
+  end
+
+  # --- #stream_raw ---
+
+  def test_stream_raw_aggregates_chunks
+    VCR.use_cassette("openai/streaming") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+      Braintrust::Contrib::OpenAI::Integration.patch!
+
+      client = OpenAI::Client.new(api_key: get_openai_key)
+      stream = client.chat.completions.stream_raw(
+        messages: [{role: "user", content: "Count from 1 to 3"}],
+        model: "gpt-4o-mini",
+        max_tokens: 50,
+        stream_options: {include_usage: true}
+      )
+
+      full_content = ""
+      stream.each do |chunk|
+        delta_content = chunk.choices[0]&.delta&.content
+        full_content += delta_content if delta_content
+      end
+
+      refute_empty full_content
+
+      # Single span created during consumption
+      span = rig.drain_one
+
+      assert_equal "Chat Completion", span.name
+
+      output = JSON.parse(span.attributes["braintrust.output_json"])
+      assert output[0]["message"]["content"].length > 0
+
+      metadata = JSON.parse(span.attributes["braintrust.metadata"])
+      assert_equal true, metadata["stream"]
+
+      metrics = JSON.parse(span.attributes["braintrust.metrics"])
+      assert metrics["prompt_tokens"] > 0
+      assert metrics.key?("time_to_first_token")
+    end
+  end
+
+  def test_stream_raw_handles_multiple_choices
+    VCR.use_cassette("openai/streaming_multiple_choices") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+      Braintrust::Contrib::OpenAI::Integration.patch!
+
+      client = OpenAI::Client.new(api_key: get_openai_key)
+      stream = client.chat.completions.stream_raw(
+        messages: [{role: "user", content: "Say either 'hello' or 'hi' in one word"}],
+        model: "gpt-4o-mini",
+        max_tokens: 10,
+        n: 2,
+        stream_options: {include_usage: true}
+      )
+
+      stream.each { |chunk| } # consume all chunks
+
+      # Single span created during consumption
+      span = rig.drain_one
+
+      assert_equal "Chat Completion", span.name
+
+      output = JSON.parse(span.attributes["braintrust.output_json"])
+
+      assert_equal 2, output.length, "Expected 2 choices in output"
+      assert output[0]["message"]["content"].length > 0
+      assert output[1]["message"]["content"].length > 0
+
+      metadata = JSON.parse(span.attributes["braintrust.metadata"])
+      assert_equal 2, metadata["n"]
+    end
+  end
+
+  def test_stream_raw_closes_span_on_partial_consumption
+    VCR.use_cassette("openai/partial_stream") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+      Braintrust::Contrib::OpenAI::Integration.patch!
+
+      client = OpenAI::Client.new(api_key: get_openai_key)
+      stream = client.chat.completions.stream_raw(
+        messages: [{role: "user", content: "Count from 1 to 10"}],
+        model: "gpt-4o-mini",
+        max_tokens: 50
+      )
+
+      chunk_count = 0
+      begin
+        stream.each do |chunk|
+          chunk_count += 1
+          break if chunk_count >= 2
+        end
+      rescue StopIteration
+      end
+
+      # Single span created during consumption
+      span = rig.drain_one
+
+      assert_equal "Chat Completion", span.name
+      assert span.attributes.key?("braintrust.input_json")
+    end
+  end
+
+  def test_stream_raw_records_exception_on_error
+    VCR.use_cassette("openai/stream_error") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+
+      client = OpenAI::Client.new(api_key: "invalid_key")
+      Braintrust.instrument!(:openai, target: client, tracer_provider: rig.tracer_provider)
+
+      assert_raises do
+        stream = client.chat.completions.stream_raw(
+          messages: [{role: "user", content: "test"}],
+          model: "gpt-4o-mini"
+        )
+        stream.each { |chunk| }
+      end
+
+      # No span created when stream fails before consumption
+      spans = rig.drain
+      assert_empty spans, "No span should be created when stream fails before consumption"
+    end
+  end
+
+  # --- #stream ---
+
+  def test_stream_with_text_method
+    VCR.use_cassette("openai/streaming_text") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+      Braintrust::Contrib::OpenAI::Integration.patch!
+
+      client = OpenAI::Client.new(api_key: get_openai_key)
+      stream = client.chat.completions.stream(
+        messages: [{role: "user", content: "Count from 1 to 3"}],
+        model: "gpt-4o-mini",
+        max_tokens: 50,
+        stream_options: {include_usage: true}
+      )
+
+      full_text = ""
+      stream.text.each { |delta| full_text += delta }
+
+      refute_empty full_text
+
+      # Single span created during consumption
+      span = rig.drain_one
+
+      assert_equal "Chat Completion", span.name
+
+      output = JSON.parse(span.attributes["braintrust.output_json"])
+      assert output[0]["message"]["content"].length > 0
+    end
+  end
+
+  def test_stream_with_get_final_completion
+    VCR.use_cassette("openai/streaming_get_final_completion") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+      Braintrust::Contrib::OpenAI::Integration.patch!
+
+      client = OpenAI::Client.new(api_key: get_openai_key)
+      stream = client.chat.completions.stream(
+        messages: [{role: "user", content: "Say hello"}],
+        model: "gpt-4o-mini",
+        max_tokens: 20,
+        stream_options: {include_usage: true}
+      )
+
+      completion = stream.get_final_completion
+
+      refute_nil completion
+      refute_nil completion.choices[0].message.content
+
+      # Single span created during consumption
+      span = rig.drain_one
+
+      assert_equal "Chat Completion", span.name
+
+      output = JSON.parse(span.attributes["braintrust.output_json"])
+      assert output[0]["message"]["content"]
+    end
+  end
+
+  def test_stream_with_get_output_text
+    VCR.use_cassette("openai/streaming_get_output_text") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+      Braintrust::Contrib::OpenAI::Integration.patch!
+
+      client = OpenAI::Client.new(api_key: get_openai_key)
+      stream = client.chat.completions.stream(
+        messages: [{role: "user", content: "Say hello"}],
+        model: "gpt-4o-mini",
+        max_tokens: 20,
+        stream_options: {include_usage: true}
+      )
+
+      output_text = stream.get_output_text
+
+      refute_empty output_text
+
+      # Single span created during consumption
+      span = rig.drain_one
+
+      assert_equal "Chat Completion", span.name
+
+      output = JSON.parse(span.attributes["braintrust.output_json"])
+      assert output[0]["message"]["content"]
+    end
+  end
+end
diff --git a/test/braintrust/contrib/openai/instrumentation/common_test.rb b/test/braintrust/contrib/openai/instrumentation/common_test.rb
new file mode 100644
index 0000000..a40bc3e
--- /dev/null
+++ b/test/braintrust/contrib/openai/instrumentation/common_test.rb
@@ -0,0 +1,184 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require "braintrust/contrib/openai/instrumentation/common"
+
+class Braintrust::Contrib::OpenAI::Instrumentation::CommonTest < Minitest::Test
+  Common = Braintrust::Contrib::OpenAI::Instrumentation::Common
+
+  # --- .set_json_attr ---
+
+  def test_set_json_attr_sets_attribute_with_json
+    span = Minitest::Mock.new
+    span.expect(:set_attribute, nil, ["braintrust.output", '{"foo":"bar"}'])
+
+    Common.set_json_attr(span, "braintrust.output", {foo: "bar"})
+
+    span.verify
+  end
+
+  def test_set_json_attr_skips_nil_object
+    span = Minitest::Mock.new
+    # No expectations - set_attribute should not be called
+
+    Common.set_json_attr(span, "braintrust.output", nil)
+
+    span.verify
+  end
+
+  def test_set_json_attr_handles_array
+    span = Minitest::Mock.new
+    span.expect(:set_attribute, nil, ["braintrust.input", "[1,2,3]"])
+
+    Common.set_json_attr(span, "braintrust.input", [1, 2, 3])
+
+    span.verify
+  end
+
+  # --- .parse_usage_tokens ---
+
+  def test_parse_usage_tokens_delegates_to_trace
+    usage = {prompt_tokens: 10, completion_tokens: 5}
+    expected_result = {"prompt_tokens" => 10, "completion_tokens" => 5, "tokens" => 15}
+
+    mock = Minitest::Mock.new
+    mock.expect(:call, expected_result, [usage])
+
+    Braintrust::Trace.stub(:parse_openai_usage_tokens, mock) do
+      result = Common.parse_usage_tokens(usage)
+      assert_equal expected_result, result
+    end
+
+    mock.verify
+  end
+
+  # --- .aggregate_streaming_chunks ---
+
+  def test_aggregate_streaming_chunks_returns_empty_hash_for_empty_array
+    result = Common.aggregate_streaming_chunks([])
+
+    assert_equal({}, result)
+  end
+
+  def test_aggregate_streaming_chunks_captures_top_level_fields
+    chunks = [
+      {id: "chatcmpl-123", created: 1234567890, model: "gpt-4", system_fingerprint: "fp_abc", choices: []}
+    ]
+
+    result = Common.aggregate_streaming_chunks(chunks)
+
+    assert_equal "chatcmpl-123", result[:id]
+    assert_equal 1234567890, result[:created]
+    assert_equal "gpt-4", result[:model]
+    assert_equal "fp_abc", result[:system_fingerprint]
+  end
+
+  def test_aggregate_streaming_chunks_aggregates_content
+    chunks = [
+      {choices: [{index: 0, delta: {role: "assistant", content: "Hello"}}]},
+      {choices: [{index: 0, delta: {content: " world"}}]},
+      {choices: [{index: 0, delta: {content: "!"}, finish_reason: "stop"}]}
+    ]
+
+    result = Common.aggregate_streaming_chunks(chunks)
+
+    assert_equal 1, result[:choices].length
+    assert_equal 0, result[:choices][0][:index]
+    assert_equal "assistant", result[:choices][0][:message][:role]
+    assert_equal "Hello world!", result[:choices][0][:message][:content]
+    assert_equal "stop", result[:choices][0][:finish_reason]
+  end
+
+  def test_aggregate_streaming_chunks_aggregates_tool_calls
+    chunks = [
+      {choices: [{index: 0, delta: {role: "assistant", tool_calls: [{id: "call_123", type: "function", function: {name: "get_weather", arguments: '{"loc'}}]}}]},
+      {choices: [{index: 0, delta: {tool_calls: [{function: {arguments: 'ation":'}}]}}]},
+      {choices: [{index: 0, delta: {tool_calls: [{function: {arguments: '"NYC"}'}}]}, finish_reason: "tool_calls"}]}
+    ]
+
+    result = Common.aggregate_streaming_chunks(chunks)
+
+    assert_equal 1, result[:choices].length
+    tool_calls = result[:choices][0][:message][:tool_calls]
+    assert_equal 1, tool_calls.length
+    assert_equal "call_123", tool_calls[0][:id]
+    assert_equal "function", tool_calls[0][:type]
+    assert_equal "get_weather", tool_calls[0][:function][:name]
+    assert_equal '{"location":"NYC"}', tool_calls[0][:function][:arguments]
+  end
+
+  def test_aggregate_streaming_chunks_captures_usage
+    chunks = [
+      {choices: [{index: 0, delta: {content: "Hi"}}]},
+      {choices: [{index: 0, delta: {}, finish_reason: "stop"}], usage: {prompt_tokens: 10, completion_tokens: 2, total_tokens: 12}}
+    ]
+
+    result = Common.aggregate_streaming_chunks(chunks)
+
+    assert_equal({prompt_tokens: 10, completion_tokens: 2, total_tokens: 12}, result[:usage])
+  end
+
+  def test_aggregate_streaming_chunks_handles_multiple_choices
+    chunks = [
+      {choices: [{index: 0, delta: {role: "assistant", content: "A"}}, {index: 1, delta: {role: "assistant", content: "B"}}]},
+      {choices: [{index: 0, delta: {content: "1"}}, {index: 1, delta: {content: "2"}}]}
+    ]
+
+    result = Common.aggregate_streaming_chunks(chunks)
+
+    assert_equal 2, result[:choices].length
+    assert_equal "A1", result[:choices][0][:message][:content]
+    assert_equal "B2", result[:choices][1][:message][:content]
+  end
+
+  def test_aggregate_streaming_chunks_sets_nil_content_when_empty
+    chunks = [
+      {choices: [{index: 0, delta: {role: "assistant", tool_calls: [{id: "call_1", type: "function", function: {name: "foo", arguments: "{}"}}]}, finish_reason: "tool_calls"}]}
+    ]
+
+    result = Common.aggregate_streaming_chunks(chunks)
+
+    assert_nil result[:choices][0][:message][:content]
+  end
+
+  # --- .aggregate_responses_events ---
+
+  def test_aggregate_responses_events_returns_empty_hash_for_empty_array
+    result = Common.aggregate_responses_events([])
+
+    assert_equal({}, result)
+  end
+
+  def test_aggregate_responses_events_extracts_from_completed_event
+    response = Struct.new(:id, :output, :usage).new("resp_123", [{type: "message", content: "Hello"}], {input_tokens: 5, output_tokens: 3})
+    completed_event = Struct.new(:type, :response).new(:"response.completed", response)
+    events = [completed_event]
+
+    result = Common.aggregate_responses_events(events)
+
+    assert_equal "resp_123", result[:id]
+    assert_equal [{type: "message", content: "Hello"}], result[:output]
+    assert_equal({input_tokens: 5, output_tokens: 3}, result[:usage])
+  end
+
+  def test_aggregate_responses_events_ignores_non_completed_events
+    other_event = Struct.new(:type).new(:"response.created")
+    events = [other_event]
+
+    result = Common.aggregate_responses_events(events)
+
+    assert_equal({}, result)
+  end
+
+  def test_aggregate_responses_events_handles_missing_response_fields
+    response = Struct.new(:id, :output, :usage).new(nil, nil, nil)
+    completed_event = Struct.new(:type, :response).new(:"response.completed", response)
+    events = [completed_event]
+
+    result = Common.aggregate_responses_events(events)
+
+    assert_nil result[:id]
+    assert_nil result[:output]
+    assert_nil result[:usage]
+  end
+end
diff --git a/test/braintrust/contrib/openai/instrumentation/responses_test.rb b/test/braintrust/contrib/openai/instrumentation/responses_test.rb
new file mode 100644
index 0000000..26a3694
--- /dev/null
+++ b/test/braintrust/contrib/openai/instrumentation/responses_test.rb
@@ -0,0 +1,323 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require_relative "../integration_helper"
+require "braintrust/contrib/openai/instrumentation/responses"
+
+class Braintrust::Contrib::OpenAI::Instrumentation::ResponsesTest < Minitest::Test
+  Responses = Braintrust::Contrib::OpenAI::Instrumentation::Responses
+
+  # --- .included ---
+
+  def test_included_prepends_instance_methods
+    base = Class.new
+    mock = Minitest::Mock.new
+    mock.expect(:include?, false, [Responses::InstanceMethods])
+    mock.expect(:prepend, nil, [Responses::InstanceMethods])
+
+    base.define_singleton_method(:ancestors) { mock }
+    base.define_singleton_method(:prepend) { |mod| mock.prepend(mod) }
+
+    Responses.included(base)
+
+    mock.verify
+  end
+
+  def test_included_skips_prepend_when_already_applied
+    base = Class.new do
+      include Braintrust::Contrib::OpenAI::Instrumentation::Responses
+    end
+
+    # Should not raise or double-prepend
+    Responses.included(base)
+
+    # InstanceMethods should appear only once in ancestors
+    count = base.ancestors.count { |a| a == Responses::InstanceMethods }
+    assert_equal 1, count
+  end
+
+  # --- .applied? ---
+
+  def test_applied_returns_false_when_not_included
+    base = Class.new
+
+    refute Responses.applied?(base)
+  end
+
+  def test_applied_returns_true_when_included
+    base = Class.new do
+      include Braintrust::Contrib::OpenAI::Instrumentation::Responses
+    end
+
+    assert Responses.applied?(base)
+  end
+end
+
+# E2E tests for Responses instrumentation
+class Braintrust::Contrib::OpenAI::Instrumentation::ResponsesE2ETest < Minitest::Test
+  include Braintrust::Contrib::OpenAI::IntegrationHelper
+
+  def setup
+    skip_unless_openai!
+    skip "Responses API not available" unless OpenAI::Client.instance_methods.include?(:responses)
+  end
+
+  # --- #create ---
+
+  def test_create_creates_span_with_correct_attributes
+    VCR.use_cassette("openai_responses_create_non_streaming") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+      Braintrust::Contrib::OpenAI::Integration.patch!
+
+      client = OpenAI::Client.new(api_key: get_openai_key)
+      response = client.responses.create(
+        model: "gpt-4o-mini",
+        instructions: "You are a helpful assistant.",
+        input: "What is 2+2?"
+      )
+
+      refute_nil response
+      refute_nil response.output
+
+      span = rig.drain_one
+
+      assert_equal "openai.responses.create", span.name
+
+      # Verify input
+      assert span.attributes.key?("braintrust.input_json")
+      input = JSON.parse(span.attributes["braintrust.input_json"])
+      assert_equal "What is 2+2?", input
+
+      # Verify output
+      assert span.attributes.key?("braintrust.output_json")
+
+      # Verify metadata
+      assert span.attributes.key?("braintrust.metadata")
+      metadata = JSON.parse(span.attributes["braintrust.metadata"])
+      assert_equal "openai", metadata["provider"]
+      assert_equal "/v1/responses", metadata["endpoint"]
+      assert_equal "gpt-4o-mini", metadata["model"]
+      assert_equal "You are a helpful assistant.", metadata["instructions"]
+
+      # Verify metrics include time_to_first_token
+      assert span.attributes.key?("braintrust.metrics")
+      metrics = JSON.parse(span.attributes["braintrust.metrics"])
+      assert metrics.key?("time_to_first_token")
+      assert metrics["time_to_first_token"] >= 0
+    end
+  end
+
+  def test_create_captures_metadata_field
+    VCR.use_cassette("openai/responses_with_metadata") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+      Braintrust::Contrib::OpenAI::Integration.patch!
+
+      client = OpenAI::Client.new(api_key: get_openai_key)
+      response = client.responses.create(
+        model: "gpt-4o-mini",
+        input: "Say hello",
+        metadata: {user_id: "test-123", session: "abc"}
+      )
+
+      refute_nil response
+
+      span = rig.drain_one
+      span_metadata = JSON.parse(span.attributes["braintrust.metadata"])
+
+      assert_equal({"user_id" => "test-123", "session" => "abc"}, span_metadata["metadata"])
+    end
+  end
+
+  def test_create_captures_truncation_field
+    VCR.use_cassette("openai/responses_with_truncation") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+      Braintrust::Contrib::OpenAI::Integration.patch!
+
+      client = OpenAI::Client.new(api_key: get_openai_key)
+      response = client.responses.create(
+        model: "gpt-4o-mini",
+        input: "Say hello",
+        truncation: "auto"
+      )
+
+      refute_nil response
+
+      span = rig.drain_one
+      metadata = JSON.parse(span.attributes["braintrust.metadata"])
+
+      assert_equal "auto", metadata["truncation"]
+    end
+  end
+
+  def test_create_captures_reasoning_field
+    VCR.use_cassette("openai/responses_with_reasoning") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+      Braintrust::Contrib::OpenAI::Integration.patch!
+
+      client = OpenAI::Client.new(api_key: get_openai_key)
+      response = client.responses.create(
+        model: "o4-mini",
+        input: "What is 2+2?",
+        reasoning: {effort: "low"}
+      )
+
+      refute_nil response
+
+      span = rig.drain_one
+      metadata = JSON.parse(span.attributes["braintrust.metadata"])
+
+      assert_equal({"effort" => "low"}, metadata["reasoning"])
+    end
+  end
+
+  def test_create_captures_previous_response_id_field
+    VCR.use_cassette("openai/responses_with_previous_response_id") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+      Braintrust::Contrib::OpenAI::Integration.patch!
+
+      client = OpenAI::Client.new(api_key: get_openai_key)
+
+      # First request to get a response ID
+      first_response = client.responses.create(
+        model: "gpt-4o-mini",
+        input: "Remember the number 42"
+      )
+
+      # Second request referencing the first
+      response = client.responses.create(
+        model: "gpt-4o-mini",
+        input: "What number did I ask you to remember?",
+        previous_response_id: first_response.id
+      )
+
+      refute_nil response
+
+      spans = rig.drain
+      assert_equal 2, spans.length
+
+      # Check the second span has previous_response_id in metadata
+      metadata = JSON.parse(spans[1].attributes["braintrust.metadata"])
+      assert_equal first_response.id, metadata["previous_response_id"]
+    end
+  end
+
+  def test_create_records_exception_on_error
+    VCR.use_cassette("openai/responses_create_error") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+
+      client = OpenAI::Client.new(api_key: "invalid_key")
+      Braintrust.instrument!(:openai, target: client, tracer_provider: rig.tracer_provider)
+
+      assert_raises do
+        client.responses.create(
+          model: "gpt-4o-mini",
+          input: "test"
+        )
+      end
+
+      span = rig.drain_one
+      assert_equal OpenTelemetry::Trace::Status::ERROR, span.status.code
+      assert span.events.any? { |e| e.name == "exception" }
+    end
+  end
+
+  # --- #stream ---
+
+  def test_stream_aggregates_events
+    VCR.use_cassette("openai_responses_create_streaming") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+      Braintrust::Contrib::OpenAI::Integration.patch!
+
+      client = OpenAI::Client.new(api_key: get_openai_key)
+      stream = client.responses.stream(
+        model: "gpt-4o-mini",
+        input: "Count from 1 to 3"
+      )
+
+      event_count = 0
+      stream.each { |event| event_count += 1 }
+
+      assert event_count > 0
+
+      # Single span created during consumption
+      span = rig.drain_one
+
+      assert_equal "openai.responses.create", span.name
+
+      # Span has input
+      input = JSON.parse(span.attributes["braintrust.input_json"])
+      assert_equal "Count from 1 to 3", input
+
+      # Span has output
+      assert span.attributes.key?("braintrust.output_json")
+
+      # Verify metadata
+      metadata = JSON.parse(span.attributes["braintrust.metadata"])
+      assert_equal true, metadata["stream"]
+
+      # Verify metrics include time_to_first_token
+      assert span.attributes.key?("braintrust.metrics")
+      metrics = JSON.parse(span.attributes["braintrust.metrics"])
+      assert metrics.key?("time_to_first_token")
+      assert metrics["time_to_first_token"] >= 0
+    end
+  end
+
+  def test_stream_closes_span_on_partial_consumption
+    VCR.use_cassette("openai_responses_stream_partial") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+      Braintrust::Contrib::OpenAI::Integration.patch!
+
+      client = OpenAI::Client.new(api_key: get_openai_key)
+      stream = client.responses.stream(
+        model: "gpt-4o-mini",
+        input: "Count from 1 to 10"
+      )
+
+      event_count = 0
+      begin
+        stream.each do |event|
+          event_count += 1
+          break if event_count >= 3
+        end
+      rescue StopIteration
+      end
+
+      # Single span created during consumption
+      span = rig.drain_one
+
+      assert_equal "openai.responses.create", span.name
+      assert span.attributes.key?("braintrust.input_json")
+    end
+  end
+
+  def test_stream_records_exception_on_error
+    VCR.use_cassette("openai/responses_stream_error") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+
+      client = OpenAI::Client.new(api_key: "invalid_key")
+      Braintrust.instrument!(:openai, target: client, tracer_provider: rig.tracer_provider)
+
+      assert_raises do
+        stream = client.responses.stream(
+          model: "gpt-4o-mini",
+          input: "test"
+        )
+        stream.each { |event| }
+      end
+
+      # No span created when stream fails before consumption
+      spans = rig.drain
+      assert_empty spans, "No span should be created when stream fails before consumption"
+    end
+  end
+end
diff --git a/test/braintrust/contrib/openai/instrumentation_test.rb b/test/braintrust/contrib/openai/instrumentation_test.rb
new file mode 100644
index 0000000..48b9b53
--- /dev/null
+++ b/test/braintrust/contrib/openai/instrumentation_test.rb
@@ -0,0 +1,161 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+# Explicitly load the patcher (lazy-loaded by integration)
+require "braintrust/contrib/openai/patcher"
+
+# Cross-cutting tests that verify chat and responses instrumentation work together
+class Braintrust::Contrib::OpenAI::InstrumentationTest < Minitest::Test
+  def setup
+    if Gem.loaded_specs["ruby-openai"]
+      skip "openai gem not available (found ruby-openai gem instead)"
+    elsif !Gem.loaded_specs["openai"]
+      skip "openai gem not available"
+    end
+
+    require "openai" unless defined?(OpenAI)
+  end
+
+  # --- Instance-level instrumentation ---
+
+  def test_instance_instrumentation_only_patches_target_client
+    # Skip if class-level patching already occurred from another test
+    skip "class already patched by another test" if Braintrust::Contrib::OpenAI::ChatPatcher.patched?
+
+    rig = setup_otel_test_rig
+
+    # Create two clients BEFORE any patching
+    client_traced = OpenAI::Client.new(api_key: "test-key")
+    client_untraced = OpenAI::Client.new(api_key: "test-key")
+
+    # Verify neither client is patched initially
+    refute Braintrust::Contrib::OpenAI::ChatPatcher.patched?(target: client_traced),
+      "client_traced should not be patched initially"
+    refute Braintrust::Contrib::OpenAI::ChatPatcher.patched?(target: client_untraced),
+      "client_untraced should not be patched initially"
+
+    # Verify class is not patched
+    refute Braintrust::Contrib::OpenAI::ChatPatcher.patched?,
+      "class should not be patched initially"
+
+    # Instrument only one client (instance-level)
+    Braintrust.instrument!(:openai, target: client_traced, tracer_provider: rig.tracer_provider)
+
+    # Only the traced client should be patched
+    assert Braintrust::Contrib::OpenAI::ChatPatcher.patched?(target: client_traced),
+      "client_traced should be patched after instrument!"
+    refute Braintrust::Contrib::OpenAI::ChatPatcher.patched?(target: client_untraced),
+      "client_untraced should NOT be patched after instrument!"
+
+    # Class itself should NOT be patched (only the instance)
+    refute Braintrust::Contrib::OpenAI::ChatPatcher.patched?,
+      "class should NOT be patched when using instance-level instrumentation"
+  end
+
+  # --- Chat and Responses cross-cutting tests ---
+
+  def test_chat_and_responses_do_not_interfere
+    skip "Responses API not available" unless OpenAI::Client.instance_methods.include?(:responses)
+    VCR.use_cassette("openai_chat_and_responses_no_interference") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+
+      client = OpenAI::Client.new(api_key: get_openai_key)
+      Braintrust.instrument!(:openai, target: client, tracer_provider: rig.tracer_provider)
+
+      # Make a chat completion request
+      chat_response = client.chat.completions.create(
+        messages: [{role: "user", content: "Say hello"}],
+        model: "gpt-4o-mini",
+        max_tokens: 10
+      )
+      refute_nil chat_response
+
+      # Make a responses API request
+      responses_response = client.responses.create(
+        model: "gpt-4o-mini",
+        instructions: "You are a helpful assistant.",
+        input: "Say goodbye"
+      )
+      refute_nil responses_response
+      refute_nil responses_response.output
+
+      # Verify both spans are correct
+      spans = rig.drain
+      assert_equal 2, spans.length
+
+      chat_span = spans[0]
+      assert_equal "Chat Completion", chat_span.name
+      chat_metadata = JSON.parse(chat_span.attributes["braintrust.metadata"])
+      assert_equal "/v1/chat/completions", chat_metadata["endpoint"]
+
+      chat_input = JSON.parse(chat_span.attributes["braintrust.input_json"])
+      assert_instance_of Array, chat_input
+      assert_equal "user", chat_input[0]["role"]
+
+      responses_span = spans[1]
+      assert_equal "openai.responses.create", responses_span.name
+      responses_metadata = JSON.parse(responses_span.attributes["braintrust.metadata"])
+      assert_equal "/v1/responses", responses_metadata["endpoint"]
+
+      responses_input = JSON.parse(responses_span.attributes["braintrust.input_json"])
+      assert_equal "Say goodbye", responses_input
+    end
+  end
+
+  def test_streaming_chat_and_responses_do_not_interfere
+    VCR.use_cassette("openai_streaming_chat_and_responses_no_interference") do
+      rig = setup_otel_test_rig
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
+
+      client = OpenAI::Client.new(api_key: get_openai_key)
+      Braintrust.instrument!(:openai, target: client, tracer_provider: rig.tracer_provider)
+
+      # Make a streaming chat completion request
+      chat_content = ""
+      stream = client.chat.completions.stream_raw(
+        messages: [{role: "user", content: "Count from 1 to 3"}],
+        model: "gpt-4o-mini",
+        max_tokens: 50,
+        stream_options: {include_usage: true}
+      )
+      stream.each do |chunk|
+        delta_content = chunk.choices[0]&.delta&.content
+        chat_content += delta_content if delta_content
+      end
+      refute_empty chat_content
+
+      # Make a streaming responses API request
+      responses_event_count = 0
+      responses_stream = client.responses.stream(
+        model: "gpt-4o-mini",
+        instructions: "You are a helpful assistant.",
+        input: "Say hello"
+      )
+      responses_stream.each { |event| responses_event_count += 1 }
+      assert responses_event_count > 0
+
+      # Verify spans are correct (1 per streaming operation: created during consumption)
+      spans = rig.drain
+      assert_equal 2, spans.length
+
+      # Chat streaming: single span created during consumption
+      chat_span = spans.find { |s| s.name == "Chat Completion" }
+      assert chat_span, "Expected chat span"
+
+      chat_metadata = JSON.parse(chat_span.attributes["braintrust.metadata"])
+      assert_equal true, chat_metadata["stream"]
+
+      chat_output = JSON.parse(chat_span.attributes["braintrust.output_json"])
+      assert chat_output[0]["message"]["content"].length > 0
+
+      # Responses streaming: single span created during consumption
+      responses_span = spans.find { |s| s.name == "openai.responses.create" }
+      assert responses_span, "Expected responses span"
+
+      responses_metadata = JSON.parse(responses_span.attributes["braintrust.metadata"])
+      assert_equal true, responses_metadata["stream"]
+    end
+  end
+end
diff --git a/test/braintrust/contrib/openai/integration_helper.rb b/test/braintrust/contrib/openai/integration_helper.rb
new file mode 100644
index 0000000..0b4075d
--- /dev/null
+++ b/test/braintrust/contrib/openai/integration_helper.rb
@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+
+# Test helpers for OpenAI integration tests.
+# Provides gem loading helpers that handle the official openai gem vs ruby-openai disambiguation.
+
+module Braintrust
+  module Contrib
+    module OpenAI
+      module IntegrationHelper
+        # Skip test unless official openai gem is available (not ruby-openai).
+        # Loads the gem if available.
+        def skip_unless_openai!
+          if Gem.loaded_specs["ruby-openai"]
+            skip "openai gem not available (found ruby-openai gem instead)"
+          end
+
+          unless Gem.loaded_specs["openai"]
+            skip "openai gem not available"
+          end
+
+          require "openai" unless defined?(::OpenAI)
+        end
+
+        # Load official openai gem if available (doesn't skip, for tests that handle both states).
+        def load_openai_if_available
+          if Gem.loaded_specs["openai"] && !Gem.loaded_specs["ruby-openai"]
+            require "openai" unless defined?(::OpenAI)
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/test/braintrust/contrib/openai/integration_test.rb b/test/braintrust/contrib/openai/integration_test.rb
new file mode 100644
index 0000000..d13945d
--- /dev/null
+++ b/test/braintrust/contrib/openai/integration_test.rb
@@ -0,0 +1,64 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require_relative "integration_helper"
+
+class Braintrust::Contrib::OpenAI::IntegrationTest < Minitest::Test
+  include Braintrust::Contrib::OpenAI::IntegrationHelper
+
+  def setup
+    load_openai_if_available
+    @integration = Braintrust::Contrib::OpenAI::Integration
+  end
+
+  # --- .integration_name ---
+
+  def test_integration_name
+    assert_equal :openai, @integration.integration_name
+  end
+
+  # --- .gem_names ---
+
+  def test_gem_names
+    assert_equal ["openai"], @integration.gem_names
+  end
+
+  # --- .require_paths ---
+
+  def test_require_paths
+    assert_equal ["openai"], @integration.require_paths
+  end
+
+  # --- .minimum_version ---
+
+  def test_minimum_version
+    assert_equal "0.1.0", @integration.minimum_version
+  end
+
+  # --- .loaded? ---
+
+  def test_loaded_returns_true_when_openai_internal_defined
+    skip "OpenAI gem not loaded" unless defined?(::OpenAI::Internal)
+
+    assert @integration.loaded?, "Should be loaded when ::OpenAI::Internal is defined"
+  end
+
+  def test_loaded_returns_false_when_openai_internal_not_defined
+    skip "OpenAI gem is loaded" if defined?(::OpenAI::Internal)
+
+    refute @integration.loaded?, "Should not be loaded when ::OpenAI::Internal is not defined"
+  end
+
+  # --- .patchers ---
+
+  def test_patchers_returns_array_of_patcher_classes
+    patcher_classes = @integration.patchers
+
+    assert_instance_of Array, patcher_classes
+    assert patcher_classes.length > 0, "patchers should return at least one patcher"
+    patcher_classes.each do |patcher_class|
+      assert patcher_class.is_a?(Class), "each patcher should be a Class"
+      assert patcher_class < Braintrust::Contrib::Patcher, "each patcher should inherit from Patcher"
+    end
+  end
+end
diff --git a/test/braintrust/contrib/openai/patcher_test.rb b/test/braintrust/contrib/openai/patcher_test.rb
new file mode 100644
index 0000000..4b0f7af
--- /dev/null
+++ b/test/braintrust/contrib/openai/patcher_test.rb
@@ -0,0 +1,180 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require_relative "integration_helper"
+
+# Explicitly load the patcher (lazy-loaded by integration)
+require "braintrust/contrib/openai/patcher"
+
+class Braintrust::Contrib::OpenAI::ChatPatcherTest < Minitest::Test
+  include Braintrust::Contrib::OpenAI::IntegrationHelper
+
+  def setup
+    skip_unless_openai!
+  end
+
+  # --- .applicable? ---
+
+  def test_applicable_returns_true_when_openai_client_defined
+    assert Braintrust::Contrib::OpenAI::ChatPatcher.applicable?
+  end
+
+  # --- .patched? ---
+
+  def test_patched_returns_false_when_module_not_included
+    fake_completions = Class.new
+
+    OpenAI::Resources::Chat.stub_const(:Completions, fake_completions) do
+      refute Braintrust::Contrib::OpenAI::ChatPatcher.patched?
+    end
+  end
+
+  def test_patched_returns_true_when_module_included
+    fake_completions = Class.new do
+      include Braintrust::Contrib::OpenAI::Instrumentation::Chat::Completions
+    end
+
+    OpenAI::Resources::Chat.stub_const(:Completions, fake_completions) do
+      assert Braintrust::Contrib::OpenAI::ChatPatcher.patched?
+    end
+  end
+
+  def test_patched_returns_false_for_unpatched_instance
+    fake_singleton = Class.new
+
+    mock_chain(:chat, :completions, :singleton_class, returns: fake_singleton) do |client|
+      refute Braintrust::Contrib::OpenAI::ChatPatcher.patched?(target: client)
+    end
+  end
+
+  def test_patched_returns_true_for_patched_instance
+    fake_singleton = Class.new do
+      include Braintrust::Contrib::OpenAI::Instrumentation::Chat::Completions
+    end
+
+    mock_chain(:chat, :completions, :singleton_class, returns: fake_singleton) do |client|
+      assert Braintrust::Contrib::OpenAI::ChatPatcher.patched?(target: client)
+    end
+  end
+
+  # --- .perform_patch ---
+
+  def test_perform_patch_includes_module_for_class_level
+    fake_completions = Minitest::Mock.new
+    fake_completions.expect(:include, true, [Braintrust::Contrib::OpenAI::Instrumentation::Chat::Completions])
+
+    OpenAI::Resources::Chat.stub_const(:Completions, fake_completions) do
+      Braintrust::Contrib::OpenAI::ChatPatcher.perform_patch
+      fake_completions.verify
+    end
+  end
+
+  def test_perform_patch_includes_module_for_instance_level
+    terminal = Minitest::Mock.new
+    terminal.expect(:include, true, [Braintrust::Contrib::OpenAI::Instrumentation::Chat::Completions])
+
+    mock_chain(:chat, :completions, :singleton_class, returns: terminal) do |client|
+      client.expect(:is_a?, true, [::OpenAI::Client])
+      Braintrust::Contrib::OpenAI::ChatPatcher.perform_patch(target: client)
+    end
+    terminal.verify
+  end
+
+  def test_perform_patch_raises_for_invalid_target
+    fake_client = Minitest::Mock.new
+    fake_client.expect(:is_a?, false, [::OpenAI::Client])
+
+    assert_raises(ArgumentError) do
+      Braintrust::Contrib::OpenAI::ChatPatcher.perform_patch(target: fake_client)
+    end
+
+    fake_client.verify
+  end
+end
+
+class Braintrust::Contrib::OpenAI::ResponsesPatcherTest < Minitest::Test
+  include Braintrust::Contrib::OpenAI::IntegrationHelper
+
+  def setup
+    skip_unless_openai!
+    skip "Responses API not available" unless OpenAI::Client.instance_methods.include?(:responses)
+  end
+
+  # --- .applicable? ---
+
+  def test_applicable_returns_true_when_responses_method_exists
+    assert Braintrust::Contrib::OpenAI::ResponsesPatcher.applicable?
+  end
+
+  # --- .patched? ---
+
+  def test_patched_returns_false_when_module_not_included
+    fake_responses = Class.new
+
+    OpenAI::Resources.stub_const(:Responses, fake_responses) do
+      refute Braintrust::Contrib::OpenAI::ResponsesPatcher.patched?
+    end
+  end
+
+  def test_patched_returns_true_when_module_included
+    fake_responses = Class.new do
+      include Braintrust::Contrib::OpenAI::Instrumentation::Responses
+    end
+
+    OpenAI::Resources.stub_const(:Responses, fake_responses) do
+      assert Braintrust::Contrib::OpenAI::ResponsesPatcher.patched?
+    end
+  end
+
+  def test_patched_returns_false_for_unpatched_instance
+    fake_singleton = Class.new
+
+    mock_chain(:responses, :singleton_class, returns: fake_singleton) do |client|
+      refute Braintrust::Contrib::OpenAI::ResponsesPatcher.patched?(target: client)
+    end
+  end
+
+  def test_patched_returns_true_for_patched_instance
+    fake_singleton = Class.new do
+      include Braintrust::Contrib::OpenAI::Instrumentation::Responses
+    end
+
+    mock_chain(:responses, :singleton_class, returns: fake_singleton) do |client|
+      assert Braintrust::Contrib::OpenAI::ResponsesPatcher.patched?(target: client)
+    end
+  end
+
+  # --- .perform_patch ---
+
+  def test_perform_patch_includes_module_for_class_level
+    fake_responses = Minitest::Mock.new
+    fake_responses.expect(:include, true, [Braintrust::Contrib::OpenAI::Instrumentation::Responses])
+
+    OpenAI::Resources.stub_const(:Responses, fake_responses) do
+      Braintrust::Contrib::OpenAI::ResponsesPatcher.perform_patch
+      fake_responses.verify
+    end
+  end
+
+  def test_perform_patch_includes_module_for_instance_level
+    terminal = Minitest::Mock.new
+    terminal.expect(:include, true, [Braintrust::Contrib::OpenAI::Instrumentation::Responses])
+
+    mock_chain(:responses, :singleton_class, returns: terminal) do |client|
+      client.expect(:is_a?, true, [::OpenAI::Client])
+      Braintrust::Contrib::OpenAI::ResponsesPatcher.perform_patch(target: client)
+    end
+    terminal.verify
+  end
+
+  def test_perform_patch_raises_for_invalid_target
+    fake_client = Minitest::Mock.new
+    fake_client.expect(:is_a?, false, [::OpenAI::Client])
+
+    assert_raises(ArgumentError) do
+      Braintrust::Contrib::OpenAI::ResponsesPatcher.perform_patch(target: fake_client)
+    end
+
+    fake_client.verify
+  end
+end
diff --git a/test/braintrust/contrib/openai/ruby-openai_test.rb b/test/braintrust/contrib/openai/ruby-openai_test.rb
new file mode 100644
index 0000000..1041358
--- /dev/null
+++ b/test/braintrust/contrib/openai/ruby-openai_test.rb
@@ -0,0 +1,69 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require_relative "integration_helper"
+require_relative "../ruby_openai/integration_helper"
+
+# This test verifies that the OpenAI integration correctly identifies when the
+# official `openai` gem is loaded vs other OpenAI-like gems (e.g., ruby-openai).
+#
+# Tests are grouped by which gem scenario they verify:
+#   OfficialOpenAIGemTest - runs with: appraisal openai
+#   RubyOpenAIGemTest     - runs with: appraisal ruby-openai
+
+# Tests that verify behavior when the official openai gem is available
+class Braintrust::Contrib::OpenAI::OfficialOpenAIGemTest < Minitest::Test
+  include Braintrust::Contrib::OpenAI::IntegrationHelper
+
+  Integration = Braintrust::Contrib::OpenAI::Integration
+
+  def setup
+    skip_unless_openai!
+  end
+
+  def test_loaded_returns_true
+    assert Integration.loaded?,
+      "loaded? should return true when official openai gem is loaded"
+  end
+
+  def test_instrument_succeeds
+    result = Braintrust.instrument!(:openai)
+
+    # instrument! returns true if patching succeeded or was already done
+    assert result, "instrument! should return truthy for official gem"
+
+    # At least one patcher should be in patched state
+    any_patched = Integration.patchers.any?(&:patched?)
+    assert any_patched, "at least one patcher should be patched for official gem"
+  end
+
+  def test_openai_internal_is_defined
+    # OpenAI::Internal should be defined for official gem
+    assert defined?(::OpenAI::Internal),
+      "OpenAI::Internal should be defined when official gem is loaded"
+  end
+end
+
+# Tests that verify behavior when ruby-openai gem is loaded (not official openai)
+class Braintrust::Contrib::OpenAI::RubyOpenAIGemTest < Minitest::Test
+  include Braintrust::Contrib::RubyOpenAI::IntegrationHelper
+
+  Integration = Braintrust::Contrib::OpenAI::Integration
+
+  def setup
+    skip_unless_ruby_openai!
+  end
+
+  def test_loaded_returns_false
+    # When ruby-openai is loaded, the OpenAI integration should NOT be loaded
+    # because OpenAI::Internal is not defined
+    refute Integration.loaded?,
+      "loaded? should return false when ruby-openai gem is loaded"
+  end
+
+  def test_not_available
+    # The OpenAI integration should NOT be available when only ruby-openai is loaded
+    refute Integration.available?,
+      "Should NOT be available when only ruby-openai gem is loaded"
+  end
+end
diff --git a/test/braintrust/trace/openai_test.rb b/test/braintrust/trace/openai_test.rb
deleted file mode 100644
index 22e3233..0000000
--- a/test/braintrust/trace/openai_test.rb
+++ /dev/null
@@ -1,1221 +0,0 @@
-# frozen_string_literal: true
-
-require "test_helper"
-
-class Braintrust::Trace::OpenAITest < Minitest::Test
-  def setup
-    # Skip all OpenAI tests if the gem is not available
-    skip "OpenAI gem not available" unless defined?(OpenAI)
-
-    # Check which gem is loaded by looking at Gem.loaded_specs
-    # ruby-openai has gem name "ruby-openai"
-    # official openai gem has gem name "openai"
-    if Gem.loaded_specs["ruby-openai"]
-      skip "openai gem not available (found ruby-openai gem instead)"
-    elsif !Gem.loaded_specs["openai"]
-      skip "Could not determine which OpenAI gem is loaded"
-    end
-  end
-
-  def test_wrap_creates_span_for_chat_completions
-    VCR.use_cassette("openai/chat_completions") do
-      require "openai"
-
-      # Set up test rig (includes Braintrust processor)
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it with Braintrust tracing
-      client = OpenAI::Client.new(api_key: get_openai_key)
-      Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Make a simple chat completion request with additional params to test metadata capture
-      response = client.chat.completions.create(
-        messages: [
-          {role: "system", content: "You are a test assistant."},
-          {role: "user", content: "Say 'test'"}
-        ],
-        model: "gpt-4o-mini",
-        max_tokens: 10,
-        temperature: 0.5
-      )
-
-      # Verify response
-      refute_nil response
-      refute_nil response.choices[0].message.content
-
-      # Drain and verify span
-      span = rig.drain_one
-
-      # Verify span name matches Go SDK
-      assert_equal "Chat Completion", span.name
-
-      # Verify braintrust.input_json contains messages
-      assert span.attributes.key?("braintrust.input_json")
-      input = JSON.parse(span.attributes["braintrust.input_json"])
-      assert_equal 2, input.length
-      assert_equal "system", input[0]["role"]
-      assert_equal "You are a test assistant.", input[0]["content"]
-      assert_equal "user", input[1]["role"]
-      assert_equal "Say 'test'", input[1]["content"]
-
-      # Verify braintrust.output_json contains choices
-      assert span.attributes.key?("braintrust.output_json")
-      output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length
-      assert_equal 0, output[0]["index"]
-      assert_equal "assistant", output[0]["message"]["role"]
-      refute_nil output[0]["message"]["content"]
-      refute_nil output[0]["finish_reason"]
-
-      # Verify braintrust.metadata contains request and response metadata
-      assert span.attributes.key?("braintrust.metadata")
-      metadata = JSON.parse(span.attributes["braintrust.metadata"])
-      assert_equal "openai", metadata["provider"]
-      assert_equal "/v1/chat/completions", metadata["endpoint"]
-      assert_equal "gpt-4o-mini", metadata["model"]
-      assert_equal 10, metadata["max_tokens"]
-      assert_equal 0.5, metadata["temperature"]
-      refute_nil metadata["id"]
-      refute_nil metadata["created"]
-
-      # Verify braintrust.metrics contains token usage
-      assert span.attributes.key?("braintrust.metrics")
-      metrics = JSON.parse(span.attributes["braintrust.metrics"])
-      assert metrics["prompt_tokens"] > 0
-      assert metrics["completion_tokens"] > 0
-      assert metrics["tokens"] > 0
-      assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"]
-
-      # Verify time_to_first_token metric is present
-      assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric"
-      assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be >= 0"
-    end
-  end
-
-  def test_wrap_handles_vision_messages_with_image_url
-    VCR.use_cassette("openai/vision") do
-      require "openai"
-
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it
-      client = OpenAI::Client.new(api_key: get_openai_key)
-      Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Make a vision request with content array (image_url + text)
-      response = client.chat.completions.create(
-        messages: [
-          {
-            role: "user",
-            content: [
-              {type: "text", text: "What color is this image?"},
-              {
-                type: "image_url",
-                image_url: {
-                  url: "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/320px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
-                }
-              }
-            ]
-          }
-        ],
-        model: "gpt-4o-mini",
-        max_tokens: 50
-      )
-
-      # Verify response
-      refute_nil response
-      refute_nil response.choices[0].message.content
-
-      # Drain and verify span
-      span = rig.drain_one
-
-      # Verify span name
-      assert_equal "Chat Completion", span.name
-
-      # Verify braintrust.input_json contains messages with content array
-      assert span.attributes.key?("braintrust.input_json")
-      input = JSON.parse(span.attributes["braintrust.input_json"])
-      assert_equal 1, input.length
-      assert_equal "user", input[0]["role"]
-
-      # Content should be an array, not a string
-      assert_instance_of Array, input[0]["content"]
-      assert_equal 2, input[0]["content"].length
-
-      # First element should be text
-      assert_equal "text", input[0]["content"][0]["type"]
-      assert_equal "What color is this image?", input[0]["content"][0]["text"]
-
-      # Second element should be image_url
-      assert_equal "image_url", input[0]["content"][1]["type"]
-      assert input[0]["content"][1]["image_url"].key?("url")
-      assert_match(/wikimedia/, input[0]["content"][1]["image_url"]["url"])
-
-      # Verify output still works
-      assert span.attributes.key?("braintrust.output_json")
-      output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length
-      refute_nil output[0]["message"]["content"]
-    end
-  end
-
-  def test_wrap_handles_tool_messages_with_tool_call_id
-    VCR.use_cassette("openai/tool_messages") do
-      require "openai"
-
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it
-      client = OpenAI::Client.new(api_key: get_openai_key)
-      Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # First request - model will use a tool
-      tools = [
-        {
-          type: "function",
-          function: {
-            name: "get_weather",
-            description: "Get the current weather",
-            parameters: {
-              type: "object",
-              properties: {
-                location: {type: "string", description: "City name"}
-              },
-              required: ["location"]
-            }
-          }
-        }
-      ]
-
-      first_response = client.chat.completions.create(
-        messages: [
-          {role: "user", content: "What's the weather in Paris?"}
-        ],
-        model: "gpt-4o-mini",
-        tools: tools,
-        max_tokens: 100
-      )
-
-      # Get the tool call from response
-      tool_call = first_response.choices[0].message.tool_calls&.first
-      skip "Model didn't call tool" unless tool_call
-
-      # Second request - provide tool result with tool_call_id
-      second_response = client.chat.completions.create(
-        messages: [
-          {role: "user", content: "What's the weather in Paris?"},
-          {
-            role: "assistant",
-            content: nil,
-            tool_calls: [
-              {
-                id: tool_call.id,
-                type: "function",
-                function: {
-                  name: tool_call.function.name,
-                  arguments: tool_call.function.arguments
-                }
-              }
-            ]
-          },
-          {
-            role: "tool",
-            tool_call_id: tool_call.id,
-            content: "Sunny, 22°C"
-          }
-        ],
-        model: "gpt-4o-mini",
-        tools: tools,
-        max_tokens: 100
-      )
-
-      # Verify response
-      refute_nil second_response
-      refute_nil second_response.choices[0].message.content
-
-      # Drain all spans (we have 2: first request + second request)
-      spans = rig.drain
-      assert_equal 2, spans.length, "Should have 2 spans (one for each request)"
-
-      # We're testing the second span (second request)
-      span = spans[1]
-
-      # Verify braintrust.input_json contains all message fields
-      assert span.attributes.key?("braintrust.input_json")
-      input = JSON.parse(span.attributes["braintrust.input_json"])
-      assert_equal 3, input.length
-
-      # First message: user
-      assert_equal "user", input[0]["role"]
-      assert_equal "What's the weather in Paris?", input[0]["content"]
-
-      # Second message: assistant with tool_calls
-      assert_equal "assistant", input[1]["role"]
-      assert input[1].key?("tool_calls"), "assistant message should have tool_calls"
-      assert_equal 1, input[1]["tool_calls"].length
-      assert_equal tool_call.id, input[1]["tool_calls"][0]["id"]
-      assert_equal "function", input[1]["tool_calls"][0]["type"]
-      assert_equal tool_call.function.name, input[1]["tool_calls"][0]["function"]["name"]
-
-      # Third message: tool response with tool_call_id
-      assert_equal "tool", input[2]["role"]
-      assert_equal tool_call.id, input[2]["tool_call_id"], "tool message should preserve tool_call_id"
-      assert_equal "Sunny, 22°C", input[2]["content"]
-
-      # Verify output contains tool_calls
-      assert span.attributes.key?("braintrust.output_json")
-      output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length
-      refute_nil output[0]["message"]["content"]
-    end
-  end
-
-  def test_wrap_parses_advanced_token_metrics
-    VCR.use_cassette("openai/advanced_tokens") do
-      require "openai"
-
-      # This test verifies that we properly parse token_details fields
-      # Note: We're testing with a mock since we can't control what OpenAI returns
-
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it
-      client = OpenAI::Client.new(api_key: get_openai_key)
-      Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Make a request (ideally with a model that returns detailed metrics)
-      # For now, we'll just make a normal request and verify the metrics structure
-      response = client.chat.completions.create(
-        messages: [
-          {role: "user", content: "test"}
-        ],
-        model: "gpt-4o-mini",
-        max_tokens: 10
-      )
-
-      # Verify response
-      refute_nil response
-
-      # Drain and verify span
-      span = rig.drain_one
-
-      # Verify braintrust.metrics exists
-      assert span.attributes.key?("braintrust.metrics")
-      metrics = JSON.parse(span.attributes["braintrust.metrics"])
-
-      # Basic metrics should always be present
-      assert metrics["prompt_tokens"] > 0
-      assert metrics["completion_tokens"] > 0
-      assert metrics["tokens"] > 0
-
-      # If the response includes token_details, they should be parsed with correct naming
-      # The response.usage object may have:
-      # - prompt_tokens_details.cached_tokens → prompt_cached_tokens
-      # - prompt_tokens_details.audio_tokens → prompt_audio_tokens
-      # - completion_tokens_details.reasoning_tokens → completion_reasoning_tokens
-      # - completion_tokens_details.audio_tokens → completion_audio_tokens
-      #
-      # We can't force OpenAI to return these, but if they exist, we verify the naming
-
-      if response.usage.respond_to?(:prompt_tokens_details) && response.usage.prompt_tokens_details
-        details = response.usage.prompt_tokens_details
-        if details.respond_to?(:cached_tokens) && details.cached_tokens
-          assert metrics.key?("prompt_cached_tokens"), "Should have prompt_cached_tokens"
-          assert_equal details.cached_tokens, metrics["prompt_cached_tokens"]
-        end
-      end
-
-      if response.usage.respond_to?(:completion_tokens_details) && response.usage.completion_tokens_details
-        details = response.usage.completion_tokens_details
-        if details.respond_to?(:reasoning_tokens) && details.reasoning_tokens
-          assert metrics.key?("completion_reasoning_tokens"), "Should have completion_reasoning_tokens"
-          assert_equal details.reasoning_tokens, metrics["completion_reasoning_tokens"]
-        end
-      end
-    end
-  end
-
-  def test_wrap_handles_streaming_chat_completions
-    VCR.use_cassette("openai/streaming") do
-      require "openai"
-
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it
-      client = OpenAI::Client.new(api_key: get_openai_key)
-      Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Make a streaming request
-      stream = client.chat.completions.stream_raw(
-        messages: [
-          {role: "user", content: "Count from 1 to 3"}
-        ],
-        model: "gpt-4o-mini",
-        max_tokens: 50,
-        stream_options: {
-          include_usage: true  # Request usage stats in stream
-        }
-      )
-
-      # Consume the stream
-      full_content = ""
-      stream.each do |chunk|
-        delta_content = chunk.choices[0]&.delta&.content
-        full_content += delta_content if delta_content
-      end
-
-      # Verify we got content
-      refute_empty full_content
-
-      # Drain and verify span
-      span = rig.drain_one
-
-      # Verify span name
-      assert_equal "Chat Completion", span.name
-
-      # Verify input was captured
-      assert span.attributes.key?("braintrust.input_json")
-      input = JSON.parse(span.attributes["braintrust.input_json"])
-      assert_equal 1, input.length
-      assert_equal "user", input[0]["role"]
-      assert_equal "Count from 1 to 3", input[0]["content"]
-
-      # Verify output was aggregated from stream
-      assert span.attributes.key?("braintrust.output_json")
-      output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length
-      assert_equal 0, output[0]["index"]
-      assert_equal "assistant", output[0]["message"]["role"]
-      assert output[0]["message"]["content"], "Should have aggregated content"
-      assert output[0]["message"]["content"].length > 0, "Content should not be empty"
-
-      # Verify metadata includes stream flag
-      assert span.attributes.key?("braintrust.metadata")
-      metadata = JSON.parse(span.attributes["braintrust.metadata"])
-      assert_equal "openai", metadata["provider"]
-      assert_equal true, metadata["stream"]
-      assert_match(/gpt-4o-mini/, metadata["model"])  # Model may include version suffix
-
-      # Verify metrics include time_to_first_token and usage tokens
-      assert span.attributes.key?("braintrust.metrics"), "Should have braintrust.metrics"
-      metrics = JSON.parse(span.attributes["braintrust.metrics"])
-      assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric"
-      assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be >= 0"
-
-      # Verify usage metrics are present (when stream_options.include_usage is set)
-      assert metrics.key?("prompt_tokens"), "Should have prompt_tokens metric"
-      assert metrics["prompt_tokens"] > 0, "prompt_tokens should be > 0"
-      assert metrics.key?("completion_tokens"), "Should have completion_tokens metric"
-      assert metrics["completion_tokens"] > 0, "completion_tokens should be > 0"
-      assert metrics.key?("tokens"), "Should have tokens metric"
-      assert metrics["tokens"] > 0, "tokens should be > 0"
-      assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"]
-    end
-  end
-
-  def test_wrap_closes_span_for_partially_consumed_stream
-    VCR.use_cassette("openai/partial_stream") do
-      require "openai"
-
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it
-      client = OpenAI::Client.new(api_key: get_openai_key)
-      Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Make a streaming request
-      stream = client.chat.completions.stream_raw(
-        messages: [
-          {role: "user", content: "Count from 1 to 10"}
-        ],
-        model: "gpt-4o-mini",
-        max_tokens: 50
-      )
-
-      # Consume only part of the stream
-      chunk_count = 0
-      begin
-        stream.each do |chunk|
-          chunk_count += 1
-          break if chunk_count >= 2  # Stop after 2 chunks
-        end
-      rescue StopIteration
-        # Expected when breaking out of iteration
-      end
-
-      # Span should be finished even though we didn't consume all chunks
-      span = rig.drain_one
-
-      # Verify span name
-      assert_equal "Chat Completion", span.name
-
-      # Verify input was captured
-      assert span.attributes.key?("braintrust.input_json")
-      input = JSON.parse(span.attributes["braintrust.input_json"])
-      assert_equal 1, input.length
-
-      # Note: output will be partially aggregated
-    end
-  end
-
-  def test_wrap_records_exception_for_create_errors
-    VCR.use_cassette("openai/create_error") do
-      require "openai"
-
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client with invalid API key to trigger an error
-      client = OpenAI::Client.new(api_key: "invalid_key")
-      Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Make a request that will fail
-      error = assert_raises do
-        client.chat.completions.create(
-          messages: [
-            {role: "user", content: "test"}
-          ],
-          model: "gpt-4o-mini"
-        )
-      end
-
-      # Verify an error was raised
-      refute_nil error
-
-      # Drain and verify span was created with error information
-      span = rig.drain_one
-
-      # Verify span name
-      assert_equal "Chat Completion", span.name
-
-      # Verify span status indicates an error
-      assert_equal OpenTelemetry::Trace::Status::ERROR, span.status.code
-
-      # Verify error message is captured in status description
-      refute_nil span.status.description
-      assert span.status.description.length > 0
-
-      # Verify exception event was recorded
-      assert span.events.any? { |event| event.name == "exception" }, "Should have an exception event"
-
-      exception_event = span.events.find { |event| event.name == "exception" }
-      assert exception_event.attributes.key?("exception.type"), "Should have exception type"
-      assert exception_event.attributes.key?("exception.message"), "Should have exception message"
-    end
-  end
-
-  def test_wrap_records_exception_for_stream_errors
-    VCR.use_cassette("openai/stream_error") do
-      require "openai"
-
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client with invalid API key to trigger an error
-      client = OpenAI::Client.new(api_key: "invalid_key")
-      Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Make a streaming request that will fail
-      error = assert_raises do
-        stream = client.chat.completions.stream_raw(
-          messages: [
-            {role: "user", content: "test"}
-          ],
-          model: "gpt-4o-mini"
-        )
-
-        # Error occurs when we try to consume the stream
-        stream.each do |chunk|
-          # Won't get here
-        end
-      end
-
-      # Verify an error was raised
-      refute_nil error
-
-      # Drain and verify span was created with error information
-      span = rig.drain_one
-
-      # Verify span name
-      assert_equal "Chat Completion", span.name
-
-      # Verify span status indicates an error
-      assert_equal OpenTelemetry::Trace::Status::ERROR, span.status.code
-
-      # Verify error message is captured in status description
-      refute_nil span.status.description
-
-      # Verify exception event was recorded
-      assert span.events.any? { |event| event.name == "exception" }, "Should have an exception event"
-    end
-  end
-
-  def test_wrap_responses_create_non_streaming
-    require "openai"
-
-    VCR.use_cassette("openai_responses_create_non_streaming") do
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it
-      client = OpenAI::Client.new(api_key: get_openai_key)
-      Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Skip if responses API not available
-      skip "Responses API not available in this OpenAI gem version" unless client.respond_to?(:responses)
-
-      # Make a non-streaming responses.create request
-      response = client.responses.create(
-        model: "gpt-4o-mini",
-        instructions: "You are a helpful assistant.",
-        input: "What is 2+2?"
-      )
-
-      # Verify response
-      refute_nil response
-      refute_nil response.output
-
-      # Drain and verify span
-      span = rig.drain_one
-
-      # Verify span name
-      assert_equal "openai.responses.create", span.name
-
-      # Verify braintrust.input_json contains input
-      assert span.attributes.key?("braintrust.input_json")
-      input = JSON.parse(span.attributes["braintrust.input_json"])
-      assert_equal "What is 2+2?", input
-
-      # Verify braintrust.output_json contains output
-      assert span.attributes.key?("braintrust.output_json")
-      output = JSON.parse(span.attributes["braintrust.output_json"])
-      refute_nil output
-
-      # Verify braintrust.metadata contains request metadata
-      assert span.attributes.key?("braintrust.metadata")
-      metadata = JSON.parse(span.attributes["braintrust.metadata"])
-      assert_equal "openai", metadata["provider"]
-      assert_equal "/v1/responses", metadata["endpoint"]
-      assert_equal "gpt-4o-mini", metadata["model"]
-      assert_equal "You are a helpful assistant.", metadata["instructions"]
-
-      # Verify braintrust.metrics contains token usage
-      assert span.attributes.key?("braintrust.metrics")
-      metrics = JSON.parse(span.attributes["braintrust.metrics"])
-      assert metrics["tokens"] > 0 if metrics["tokens"]
-    end
-  end
-
-  def test_wrap_responses_create_streaming
-    require "openai"
-
-    VCR.use_cassette("openai_responses_create_streaming") do
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it
-      client = OpenAI::Client.new(api_key: get_openai_key)
-      Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Skip if responses API not available
-      skip "Responses API not available in this OpenAI gem version" unless client.respond_to?(:responses)
-
-      # Make a streaming responses request using .stream method
-      stream = client.responses.stream(
-        model: "gpt-4o-mini",
-        input: "Count from 1 to 3"
-      )
-
-      # Consume the stream
-      event_count = 0
-      stream.each do |event|
-        event_count += 1
-      end
-
-      # Verify we got events
-      assert event_count > 0, "Should have received streaming events"
-
-      # Drain and verify span
-      span = rig.drain_one
-
-      # Verify span name
-      assert_equal "openai.responses.create", span.name
-
-      # Verify input was captured
-      assert span.attributes.key?("braintrust.input_json")
-      input = JSON.parse(span.attributes["braintrust.input_json"])
-      assert_equal "Count from 1 to 3", input
-
-      # Verify output was aggregated from stream
-      assert span.attributes.key?("braintrust.output_json"), "Missing braintrust.output_json. Keys: #{span.attributes.keys}"
-      output = JSON.parse(span.attributes["braintrust.output_json"])
-      refute_nil output, "Output is nil: #{output.inspect}"
-
-      # Verify metadata includes stream flag
-      assert span.attributes.key?("braintrust.metadata")
-      metadata = JSON.parse(span.attributes["braintrust.metadata"])
-      assert_equal "openai", metadata["provider"]
-      assert_equal "/v1/responses", metadata["endpoint"]
-      assert_equal true, metadata["stream"]
-
-      # Verify metrics were captured if available
-      if span.attributes.key?("braintrust.metrics")
-        metrics = JSON.parse(span.attributes["braintrust.metrics"])
-        assert metrics["tokens"] > 0 if metrics["tokens"]
-      end
-    end
-  end
-
-  def test_wrap_responses_stream_partial_consumption
-    require "openai"
-
-    VCR.use_cassette("openai_responses_stream_partial") do
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it
-      client = OpenAI::Client.new(api_key: get_openai_key)
-      Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Skip if responses API not available
-      skip "Responses API not available in this OpenAI gem version" unless client.respond_to?(:responses)
-
-      # Make a streaming request
-      stream = client.responses.stream(
-        model: "gpt-4o-mini",
-        input: "Count from 1 to 10"
-      )
-
-      # Consume only part of the stream
-      event_count = 0
-      begin
-        stream.each do |event|
-          event_count += 1
-          break if event_count >= 3  # Stop after 3 events
-        end
-      rescue StopIteration
-        # Expected when breaking out of iteration
-      end
-
-      # Span should be finished even though we didn't consume all events
-      span = rig.drain_one
-
-      # Verify span name
-      assert_equal "openai.responses.create", span.name
-
-      # Verify input was captured
-      assert span.attributes.key?("braintrust.input_json")
-    end
-  end
-
-  def test_chat_and_responses_do_not_interfere
-    require "openai"
-
-    # This test verifies that chat completions and responses API can coexist
-    # without interfering with each other when both wrappers are active
-    VCR.use_cassette("openai_chat_and_responses_no_interference") do
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it (wraps BOTH chat and responses)
-      client = OpenAI::Client.new(api_key: get_openai_key)
-      Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Skip if responses API not available
-      skip "Responses API not available in this OpenAI gem version" unless client.respond_to?(:responses)
-
-      # First, make a chat completion request
-      chat_response = client.chat.completions.create(
-        messages: [{role: "user", content: "Say hello"}],
-        model: "gpt-4o-mini",
-        max_tokens: 10
-      )
-      refute_nil chat_response
-
-      # Then, make a responses API request
-      # This is where the bug would manifest if the wrappers interfere
-      responses_response = client.responses.create(
-        model: "gpt-4o-mini",
-        instructions: "You are a helpful assistant.",
-        input: "Say goodbye"
-      )
-      refute_nil responses_response
-      refute_nil responses_response.output
-
-      # Drain both spans
-      spans = rig.drain
-      assert_equal 2, spans.length, "Should have 2 spans (chat + responses)"
-
-      # Verify first span is for chat completions
-      chat_span = spans[0]
-      assert_equal "Chat Completion", chat_span.name
-      chat_metadata = JSON.parse(chat_span.attributes["braintrust.metadata"])
-      assert_equal "/v1/chat/completions", chat_metadata["endpoint"]
-      assert_equal "gpt-4o-mini", chat_metadata["model"]
-
-      # Verify input is messages array (chat API structure)
-      chat_input = JSON.parse(chat_span.attributes["braintrust.input_json"])
-      assert_instance_of Array, chat_input
-      assert_equal "user", chat_input[0]["role"]
-      assert_equal "Say hello", chat_input[0]["content"]
-
-      responses_span = spans[1]
-      assert_equal "openai.responses.create", responses_span.name
-
-      responses_metadata = JSON.parse(responses_span.attributes["braintrust.metadata"])
-      assert_equal "/v1/responses", responses_metadata["endpoint"]
-      assert_equal "gpt-4o-mini", responses_metadata["model"]
-      assert_equal "You are a helpful assistant.", responses_metadata["instructions"]
-
-      responses_input = JSON.parse(responses_span.attributes["braintrust.input_json"])
-      assert_equal "Say goodbye", responses_input
-    end
-  end
-
-  def test_streaming_chat_and_responses_do_not_interfere
-    require "openai"
-
-    # This test verifies that streaming for both chat completions and responses API
-    # work correctly without interfering when both streaming wrappers are active.
-    # This is critical because streaming uses different aggregation mechanisms.
-    VCR.use_cassette("openai_streaming_chat_and_responses_no_interference") do
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it (wraps BOTH chat and responses)
-      client = OpenAI::Client.new(api_key: get_openai_key)
-      Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Skip if responses API not available
-      skip "Responses API not available in this OpenAI gem version" unless client.respond_to?(:responses)
-
-      # First, make a STREAMING chat completion request
-      chat_content = ""
-      stream = client.chat.completions.stream_raw(
-        messages: [{role: "user", content: "Count from 1 to 3"}],
-        model: "gpt-4o-mini",
-        max_tokens: 50,
-        stream_options: {include_usage: true}
-      )
-      stream.each do |chunk|
-        delta_content = chunk.choices[0]&.delta&.content
-        chat_content += delta_content if delta_content
-      end
-      refute_empty chat_content
-
-      # Then, make a STREAMING responses API request
-      # This is where the bug would manifest if streaming wrappers interfere
-      responses_event_count = 0
-      responses_stream = client.responses.stream(
-        model: "gpt-4o-mini",
-        instructions: "You are a helpful assistant.",
-        input: "Say hello"
-      )
-      responses_stream.each do |event|
-        responses_event_count += 1
-      end
-      assert responses_event_count > 0, "Should have received streaming events from responses API"
-
-      # Drain both spans
-      spans = rig.drain
-      assert_equal 2, spans.length, "Should have 2 spans (chat streaming + responses streaming)"
-
-      # Verify first span is for STREAMING chat completions
-      chat_span = spans[0]
-      assert_equal "Chat Completion", chat_span.name
-      chat_metadata = JSON.parse(chat_span.attributes["braintrust.metadata"])
-      assert_equal "/v1/chat/completions", chat_metadata["endpoint"]
-      assert_equal true, chat_metadata["stream"], "Chat span should have stream flag"
-      assert_match(/gpt-4o-mini/, chat_metadata["model"])
-
-      # Verify chat input is messages array (chat API structure)
-      chat_input = JSON.parse(chat_span.attributes["braintrust.input_json"])
-      assert_instance_of Array, chat_input
-      assert_equal "user", chat_input[0]["role"]
-      assert_equal "Count from 1 to 3", chat_input[0]["content"]
-
-      # Verify chat output was aggregated from stream chunks
-      chat_output = JSON.parse(chat_span.attributes["braintrust.output_json"])
-      assert_equal 1, chat_output.length
-      assert_equal "assistant", chat_output[0]["message"]["role"]
-      refute_nil chat_output[0]["message"]["content"]
-      assert chat_output[0]["message"]["content"].length > 0, "Chat content should be aggregated"
-
-      responses_span = spans[1]
-      assert_equal "openai.responses.create", responses_span.name
-
-      responses_metadata = JSON.parse(responses_span.attributes["braintrust.metadata"])
-      assert_equal "/v1/responses", responses_metadata["endpoint"]
-      assert_equal true, responses_metadata["stream"]
-      assert_match(/gpt-4o-mini/, responses_metadata["model"])
-      assert_equal "You are a helpful assistant.", responses_metadata["instructions"]
-
-      responses_input = JSON.parse(responses_span.attributes["braintrust.input_json"])
-      assert_equal "Say hello", responses_input
-
-      assert responses_span.attributes.key?("braintrust.output_json")
-      responses_output = JSON.parse(responses_span.attributes["braintrust.output_json"])
-      refute_nil responses_output
-    end
-  end
-
-  def test_traced_vs_raw_chat_completions_non_streaming
-    require "openai"
-
-    # This test verifies that tracing doesn't mutate the response
-    # by comparing output from a raw client vs a traced client
-    VCR.use_cassette("openai_traced_vs_raw_chat_non_streaming") do
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create two clients: one raw, one traced
-      raw_client = OpenAI::Client.new(api_key: get_openai_key)
-      traced_client = OpenAI::Client.new(api_key: get_openai_key)
-      Braintrust::Trace::OpenAI.wrap(traced_client, tracer_provider: rig.tracer_provider)
-
-      # Make identical requests
-      params = {
-        messages: [{role: "user", content: "Say hello"}],
-        model: "gpt-4o-mini",
-        max_tokens: 10
-      }
-
-      raw_response = raw_client.chat.completions.create(**params)
-      traced_response = traced_client.chat.completions.create(**params)
-
-      assert_match(/gpt-4o-mini/, raw_response.model)
-      assert_match(/gpt-4o-mini/, traced_response.model)
-      assert_equal raw_response.choices.length, traced_response.choices.length
-      assert_equal raw_response.choices[0].message.role, traced_response.choices[0].message.role
-      refute_nil raw_response.choices[0].message.content
-      refute_nil traced_response.choices[0].message.content
-      assert_equal raw_response.choices[0].finish_reason, traced_response.choices[0].finish_reason
-      assert_operator raw_response.usage.total_tokens, :>, 0
-      assert_operator traced_response.usage.total_tokens, :>, 0
-
-      spans = rig.drain
-      assert_equal 1, spans.length
-      assert_equal "Chat Completion", spans[0].name
-    end
-  end
-
-  def test_traced_vs_raw_chat_completions_streaming
-    require "openai"
-
-    # This test verifies that tracing doesn't mutate streaming responses
-    VCR.use_cassette("openai_traced_vs_raw_chat_streaming") do
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create two clients: one raw, one traced
-      raw_client = OpenAI::Client.new(api_key: get_openai_key)
-      traced_client = OpenAI::Client.new(api_key: get_openai_key)
-      Braintrust::Trace::OpenAI.wrap(traced_client, tracer_provider: rig.tracer_provider)
-
-      # Make identical streaming requests
-      params = {
-        messages: [{role: "user", content: "Count from 1 to 3"}],
-        model: "gpt-4o-mini",
-        max_tokens: 50,
-        stream_options: {include_usage: true}
-      }
-
-      raw_chunks = []
-      raw_stream = raw_client.chat.completions.stream_raw(**params)
-      raw_stream.each do |chunk|
-        raw_chunks << chunk
-      end
-
-      traced_chunks = []
-      traced_stream = traced_client.chat.completions.stream_raw(**params)
-      traced_stream.each do |chunk|
-        traced_chunks << chunk
-      end
-
-      assert_operator (raw_chunks.length - traced_chunks.length).abs, :<=, 2
-      if raw_chunks[0].respond_to?(:model) && traced_chunks[0].respond_to?(:model)
-        assert_match(/gpt-4o-mini/, raw_chunks[0].model)
-        assert_match(/gpt-4o-mini/, traced_chunks[0].model)
-      end
-
-      raw_content = raw_chunks.map { |c| c.choices[0]&.delta&.content }.compact.join
-      traced_content = traced_chunks.map { |c| c.choices[0]&.delta&.content }.compact.join
-      refute_empty raw_content
-      refute_empty traced_content
-
-      spans = rig.drain
-      assert_equal 1, spans.length
-      assert_equal "Chat Completion", spans[0].name
-    end
-  end
-
-  def test_traced_vs_raw_responses_non_streaming
-    require "openai"
-
-    # This test verifies that tracing doesn't mutate responses API output
-    VCR.use_cassette("openai_traced_vs_raw_responses_non_streaming") do
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create two clients: one raw, one traced
-      raw_client = OpenAI::Client.new(api_key: get_openai_key)
-      traced_client = OpenAI::Client.new(api_key: get_openai_key)
-      Braintrust::Trace::OpenAI.wrap(traced_client, tracer_provider: rig.tracer_provider)
-
-      # Skip if responses API not available
-      skip "Responses API not available in this OpenAI gem version" unless raw_client.respond_to?(:responses)
-
-      # Make identical requests
-      params = {
-        model: "gpt-4o-mini",
-        instructions: "You are a helpful assistant.",
-        input: "Say hello"
-      }
-
-      raw_response = raw_client.responses.create(**params)
-      traced_response = traced_client.responses.create(**params)
-
-      assert_match(/gpt-4o-mini/, raw_response.model)
-      assert_match(/gpt-4o-mini/, traced_response.model)
-      assert_equal raw_response.output.length, traced_response.output.length
-
-      raw_output = raw_response.output.first.content.first
-      traced_output = traced_response.output.first.content.first
-      assert_equal raw_output[:type], traced_output[:type]
-      refute_nil raw_output[:text]
-      refute_nil traced_output[:text]
-      assert_operator raw_output[:text].length, :>, 0
-      assert_operator traced_output[:text].length, :>, 0
-
-      assert_operator raw_response.usage.total_tokens, :>, 0
-      assert_operator traced_response.usage.total_tokens, :>, 0
-
-      spans = rig.drain
-      assert_equal 1, spans.length
-      assert_equal "openai.responses.create", spans[0].name
-    end
-  end
-
-  def test_traced_vs_raw_responses_streaming
-    require "openai"
-
-    # This test verifies that tracing doesn't mutate responses API streaming
-    VCR.use_cassette("openai_traced_vs_raw_responses_streaming") do
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create two clients: one raw, one traced
-      raw_client = OpenAI::Client.new(api_key: get_openai_key)
-      traced_client = OpenAI::Client.new(api_key: get_openai_key)
-      Braintrust::Trace::OpenAI.wrap(traced_client, tracer_provider: rig.tracer_provider)
-
-      # Skip if responses API not available
-      skip "Responses API not available in this OpenAI gem version" unless raw_client.respond_to?(:responses)
-
-      params = {
-        model: "gpt-4o-mini",
-        instructions: "You are a helpful assistant.",
-        input: "Count from 1 to 3"
-      }
-
-      raw_events = []
-      raw_stream = raw_client.responses.stream(**params)
-      raw_stream.each do |event|
-        raw_events << event
-      end
-
-      traced_events = []
-      traced_stream = traced_client.responses.stream(**params)
-      traced_stream.each do |event|
-        traced_events << event
-      end
-
-      assert_equal raw_events.length, traced_events.length
-
-      raw_event_types = raw_events.map(&:type)
-      traced_event_types = traced_events.map(&:type)
-      assert_equal raw_event_types, traced_event_types
-
-      raw_completed = raw_events.find { |e| e.type == "response.completed" }
-      traced_completed = traced_events.find { |e| e.type == "response.completed" }
-
-      if raw_completed && traced_completed
-        assert_match(/gpt-4o-mini/, raw_completed.response.model)
-        assert_match(/gpt-4o-mini/, traced_completed.response.model)
-        assert_operator raw_completed.response.usage.total_tokens, :>, 0
-        assert_operator traced_completed.response.usage.total_tokens, :>, 0
-      end
-
-      spans = rig.drain
-      assert_equal 1, spans.length
-      assert_equal "openai.responses.create", spans[0].name
-    end
-  end
-
-  def test_wrap_handles_streaming_with_text
-    VCR.use_cassette("openai/streaming_text") do
-      require "openai"
-
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it
-      client = OpenAI::Client.new(api_key: get_openai_key)
-      Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Make a streaming request using .stream (not .stream_raw)
-      stream = client.chat.completions.stream(
-        messages: [
-          {role: "user", content: "Count from 1 to 3"}
-        ],
-        model: "gpt-4o-mini",
-        max_tokens: 50,
-        stream_options: {
-          include_usage: true
-        }
-      )
-
-      # Consume the stream using .text() method
-      full_text = ""
-      stream.text.each do |delta|
-        full_text += delta
-      end
-
-      # Verify we got content
-      refute_empty full_text
-
-      # Drain and verify span
-      span = rig.drain_one
-
-      # Verify span name
-      assert_equal "Chat Completion", span.name
-
-      # Verify input was captured
-      assert span.attributes.key?("braintrust.input_json")
-      input = JSON.parse(span.attributes["braintrust.input_json"])
-      assert_equal 1, input.length
-      assert_equal "user", input[0]["role"]
-      assert_equal "Count from 1 to 3", input[0]["content"]
-
-      # Verify output was aggregated from stream
-      assert span.attributes.key?("braintrust.output_json")
-      output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length
-      assert_equal 0, output[0]["index"]
-      assert_equal "assistant", output[0]["message"]["role"]
-      assert output[0]["message"]["content"], "Should have aggregated content"
-      assert output[0]["message"]["content"].length > 0, "Content should not be empty"
-
-      # Verify metadata includes stream flag
-      assert span.attributes.key?("braintrust.metadata")
-      metadata = JSON.parse(span.attributes["braintrust.metadata"])
-      assert_equal "openai", metadata["provider"]
-      assert_equal true, metadata["stream"]
-      assert_match(/gpt-4o-mini/, metadata["model"])
-
-      # Verify metrics were captured (if include_usage was respected)
-      assert span.attributes.key?("braintrust.metrics"), "Should have braintrust.metrics"
-      metrics = JSON.parse(span.attributes["braintrust.metrics"])
-      assert metrics["tokens"] > 0 if metrics["tokens"]
-
-      # Verify time_to_first_token metric is present
-      assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric"
-      assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be >= 0"
-    end
-  end
-
-  def test_wrap_handles_streaming_with_get_final_completion
-    VCR.use_cassette("openai/streaming_get_final_completion") do
-      require "openai"
-
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it
-      client = OpenAI::Client.new(api_key: get_openai_key)
-      Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Make a streaming request using .stream
-      stream = client.chat.completions.stream(
-        messages: [
-          {role: "user", content: "Say hello"}
-        ],
-        model: "gpt-4o-mini",
-        max_tokens: 20,
-        stream_options: {
-          include_usage: true
-        }
-      )
-
-      # Use .get_final_completion() to block and get final result
-      completion = stream.get_final_completion
-
-      # Verify we got a completion
-      refute_nil completion
-      refute_nil completion.choices
-      assert completion.choices.length > 0
-      refute_nil completion.choices[0].message.content
-
-      # Drain and verify span
-      span = rig.drain_one
-
-      # Verify span name
-      assert_equal "Chat Completion", span.name
-
-      # Verify output was captured
-      assert span.attributes.key?("braintrust.output_json")
-      output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length
-      assert output[0]["message"]["content"], "Should have captured content"
-    end
-  end
-
-  def test_wrap_handles_streaming_with_get_output_text
-    VCR.use_cassette("openai/streaming_get_output_text") do
-      require "openai"
-
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it
-      client = OpenAI::Client.new(api_key: get_openai_key)
-      Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Make a streaming request using .stream
-      stream = client.chat.completions.stream(
-        messages: [
-          {role: "user", content: "Say hello"}
-        ],
-        model: "gpt-4o-mini",
-        max_tokens: 20,
-        stream_options: {
-          include_usage: true
-        }
-      )
-
-      # Use .get_output_text() to block and get final text
-      output_text = stream.get_output_text
-
-      # Verify we got text
-      refute_nil output_text
-      refute_empty output_text
-
-      # Drain and verify span
-      span = rig.drain_one
-
-      # Verify span name
-      assert_equal "Chat Completion", span.name
-
-      # Verify output was captured
-      assert span.attributes.key?("braintrust.output_json")
-      output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length
-      assert output[0]["message"]["content"], "Should have captured content"
-    end
-  end
-end
diff --git a/test/fixtures/vcr_cassettes/openai/responses_create_error.yml b/test/fixtures/vcr_cassettes/openai/responses_create_error.yml
new file mode 100644
index 0000000..a962d94
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/openai/responses_create_error.yml
@@ -0,0 +1,93 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/responses
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4o-mini","input":"test"}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - application/json
+      User-Agent:
+      - OpenAI::Client/Ruby 0.39.0
+      Host:
+      - api.openai.com
+      X-Stainless-Arch:
+      - x64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - Linux
+      X-Stainless-Package-Version:
+      - 0.39.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 3.2.9
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer invalid_key
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '38'
+  response:
+    status:
+      code: 401
+      message: Unauthorized
+    headers:
+      Date:
+      - Fri, 26 Dec 2025 15:54:02 GMT
+      Content-Type:
+      - application/json
+      Content-Length:
+      - '240'
+      Connection:
+      - keep-alive
+      Www-Authenticate:
+      - Bearer realm="OpenAI API"
+      Openai-Version:
+      - '2020-10-01'
+      X-Request-Id:
+      - req_f36e8b127fb944aa8668c38d9d94dc8d
+      Openai-Processing-Ms:
+      - '15'
+      X-Envoy-Upstream-Service-Time:
+      - '17'
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=B8TFIln4qDneeipgm74c7YA75ZPAbWQ87zY.SAUlQ.4-1766764442-1.0.1.1-Hu_z7RRJr3dOLcUcYpBBb3fYopeWNngzBv3NCnzRXfa0TewdfqnAycEqX0yELTqRK51a1fgi.iSEbjuSv_jC5wUSFrM1AVcYNXiKRP5nlxU;
+        path=/; expires=Fri, 26-Dec-25 16:24:02 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=syD8zCWt82.PG7gWWOPB37IQRmsBX2vdKNxztHEhuKg-1766764442726-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9b41c1266d0598b9-ORD
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: UTF-8
+      string: |-
+        {
+          "error": {
+            "message": "Incorrect API key provided: invalid_key. You can find your API key at https://platform.openai.com/account/api-keys.",
+            "type": "invalid_request_error",
+            "param": null,
+            "code": "invalid_api_key"
+          }
+        }
+  recorded_at: Fri, 26 Dec 2025 15:54:02 GMT
+recorded_with: VCR 6.3.1
diff --git a/test/fixtures/vcr_cassettes/openai/responses_stream_error.yml b/test/fixtures/vcr_cassettes/openai/responses_stream_error.yml
new file mode 100644
index 0000000..76d43aa
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/openai/responses_stream_error.yml
@@ -0,0 +1,93 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/responses
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4o-mini","input":"test","stream":true}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - text/event-stream
+      User-Agent:
+      - OpenAI::Client/Ruby 0.39.0
+      Host:
+      - api.openai.com
+      X-Stainless-Arch:
+      - x64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - Linux
+      X-Stainless-Package-Version:
+      - 0.39.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 3.2.9
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer invalid_key
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '52'
+  response:
+    status:
+      code: 401
+      message: Unauthorized
+    headers:
+      Date:
+      - Fri, 26 Dec 2025 15:54:12 GMT
+      Content-Type:
+      - application/json
+      Content-Length:
+      - '240'
+      Connection:
+      - keep-alive
+      Www-Authenticate:
+      - Bearer realm="OpenAI API"
+      Openai-Version:
+      - '2020-10-01'
+      X-Request-Id:
+      - req_fde4ec8f349e4459bd295a8db0cfa8a2
+      Openai-Processing-Ms:
+      - '26'
+      X-Envoy-Upstream-Service-Time:
+      - '28'
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=cxJtiBq.wJ3ile9vOTZrTzpfwJKbqg6tQ5IKTv._xfk-1766764452-1.0.1.1-ilLZ9aJZ15NL_MIplarRpfiSS.dPFBDZVu4nvQg7dIQZkiH6qSyhWCCJtQWdZ0f6dtoEm4CLuNKo9b1FdVt618gVy7h6gmwgN4rYSPJhNXU;
+        path=/; expires=Fri, 26-Dec-25 16:24:12 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=2vDmgGJFXrFGUm5KeF6m2Rhx53MraieXxTmZWA8P3.M-1766764452532-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9b41c1638d933eb5-ORD
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: UTF-8
+      string: |-
+        {
+          "error": {
+            "message": "Incorrect API key provided: invalid_key. You can find your API key at https://platform.openai.com/account/api-keys.",
+            "type": "invalid_request_error",
+            "param": null,
+            "code": "invalid_api_key"
+          }
+        }
+  recorded_at: Fri, 26 Dec 2025 15:54:12 GMT
+recorded_with: VCR 6.3.1
diff --git a/test/fixtures/vcr_cassettes/openai/responses_with_metadata.yml b/test/fixtures/vcr_cassettes/openai/responses_with_metadata.yml
new file mode 100644
index 0000000..96feafe
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/openai/responses_with_metadata.yml
@@ -0,0 +1,171 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/responses
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4o-mini","input":"Say hello","metadata":{"user_id":"test-123","session":"abc"}}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - application/json
+      User-Agent:
+      - OpenAI::Client/Ruby 0.39.0
+      Host:
+      - api.openai.com
+      X-Stainless-Arch:
+      - x64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - Linux
+      X-Stainless-Package-Version:
+      - 0.39.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 3.2.9
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '93'
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Fri, 26 Dec 2025 16:12:44 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      X-Ratelimit-Limit-Requests:
+      - '30000'
+      X-Ratelimit-Limit-Tokens:
+      - '150000000'
+      X-Ratelimit-Remaining-Requests:
+      - '29999'
+      X-Ratelimit-Remaining-Tokens:
+      - '149999972'
+      X-Ratelimit-Reset-Requests:
+      - 2ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - braintrust-data
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      X-Request-Id:
+      - req_fd0f3f2417fe43dfa46bae14cc3aff2e
+      Openai-Processing-Ms:
+      - '596'
+      X-Envoy-Upstream-Service-Time:
+      - '600'
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=8bz9W7X41Jibw8fCNQfsasrLzokbP5lzjAVlTNUJ5BE-1766765564-1.0.1.1-zbm7guE6Dgs_WOnz_iHMAlHUwNxmn3FPBZlU1OuBbr0.qZYm.5PThX4ibEGvc5uBdLluQDO7wQmGPp7kolHqg2Gnx4ATuicDhNk7UpferyA;
+        path=/; expires=Fri, 26-Dec-25 16:42:44 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=IXkv1khkAHUPUq6E.JuJmqDQITcVjyprVvTBPz6Q3.c-1766765564377-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9b41dc852f9ed836-ORD
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "resp_0764b673f1c2ba2200694eb3fbc1408197a9541aab17fa4a38",
+          "object": "response",
+          "created_at": 1766765563,
+          "status": "completed",
+          "background": false,
+          "billing": {
+            "payer": "developer"
+          },
+          "completed_at": 1766765564,
+          "error": null,
+          "incomplete_details": null,
+          "instructions": null,
+          "max_output_tokens": null,
+          "max_tool_calls": null,
+          "model": "gpt-4o-mini-2024-07-18",
+          "output": [
+            {
+              "id": "msg_0764b673f1c2ba2200694eb3fc16688197886a7c5b366adb22",
+              "type": "message",
+              "status": "completed",
+              "content": [
+                {
+                  "type": "output_text",
+                  "annotations": [],
+                  "logprobs": [],
+                  "text": "Hello! How can I assist you today?"
+                }
+              ],
+              "role": "assistant"
+            }
+          ],
+          "parallel_tool_calls": true,
+          "previous_response_id": null,
+          "prompt_cache_key": null,
+          "prompt_cache_retention": null,
+          "reasoning": {
+            "effort": null,
+            "summary": null
+          },
+          "safety_identifier": null,
+          "service_tier": "default",
+          "store": true,
+          "temperature": 1.0,
+          "text": {
+            "format": {
+              "type": "text"
+            },
+            "verbosity": "medium"
+          },
+          "tool_choice": "auto",
+          "tools": [],
+          "top_logprobs": 0,
+          "top_p": 1.0,
+          "truncation": "disabled",
+          "usage": {
+            "input_tokens": 9,
+            "input_tokens_details": {
+              "cached_tokens": 0
+            },
+            "output_tokens": 10,
+            "output_tokens_details": {
+              "reasoning_tokens": 0
+            },
+            "total_tokens": 19
+          },
+          "user": null,
+          "metadata": {
+            "user_id": "test-123",
+            "session": "abc"
+          }
+        }
+  recorded_at: Fri, 26 Dec 2025 16:12:44 GMT
+recorded_with: VCR 6.3.1
diff --git a/test/fixtures/vcr_cassettes/openai/responses_with_previous_response_id.yml b/test/fixtures/vcr_cassettes/openai/responses_with_previous_response_id.yml
new file mode 100644
index 0000000..a87331c
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/openai/responses_with_previous_response_id.yml
@@ -0,0 +1,333 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/responses
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4o-mini","input":"Remember the number 42"}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - application/json
+      User-Agent:
+      - OpenAI::Client/Ruby 0.39.0
+      Host:
+      - api.openai.com
+      X-Stainless-Arch:
+      - x64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - Linux
+      X-Stainless-Package-Version:
+      - 0.39.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 3.2.9
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '56'
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Fri, 26 Dec 2025 16:09:45 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      X-Ratelimit-Limit-Requests:
+      - '30000'
+      X-Ratelimit-Limit-Tokens:
+      - '150000000'
+      X-Ratelimit-Remaining-Requests:
+      - '29999'
+      X-Ratelimit-Remaining-Tokens:
+      - '149999967'
+      X-Ratelimit-Reset-Requests:
+      - 2ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - braintrust-data
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      X-Request-Id:
+      - req_0ab64f5a094b914093bf296cdca77497
+      Openai-Processing-Ms:
+      - '1136'
+      X-Envoy-Upstream-Service-Time:
+      - '1139'
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=SMkDI5BdoHSsZz4hLNunLabg2lxz85BmCv8tEyQH4NQ-1766765385-1.0.1.1-42idQ1Rhrx7PotV3B187x2UBY3hboPghf.6fNZUjuov8mFtjC16nBad7X2nYxAj72jpxRSJqmXkobwumWVUDoX0C4NZeyfG26OKYf5_yCkU;
+        path=/; expires=Fri, 26-Dec-25 16:39:45 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=qlWmlIGXmgynnOfHtkxc74AkUtqXYIPbz0PpIqPgVvs-1766765385465-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9b41d8214a900044-ORD
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "resp_0c60fcf96b8d6d2700694eb3484ea48193ac4cceb658fcd486",
+          "object": "response",
+          "created_at": 1766765384,
+          "status": "completed",
+          "background": false,
+          "billing": {
+            "payer": "developer"
+          },
+          "completed_at": 1766765385,
+          "error": null,
+          "incomplete_details": null,
+          "instructions": null,
+          "max_output_tokens": null,
+          "max_tool_calls": null,
+          "model": "gpt-4o-mini-2024-07-18",
+          "output": [
+            {
+              "id": "msg_0c60fcf96b8d6d2700694eb3490d888193ab0b3a2727ce65f3",
+              "type": "message",
+              "status": "completed",
+              "content": [
+                {
+                  "type": "output_text",
+                  "annotations": [],
+                  "logprobs": [],
+                  "text": "Got it! The number 42 is noted. How can I assist you with it?"
+                }
+              ],
+              "role": "assistant"
+            }
+          ],
+          "parallel_tool_calls": true,
+          "previous_response_id": null,
+          "prompt_cache_key": null,
+          "prompt_cache_retention": null,
+          "reasoning": {
+            "effort": null,
+            "summary": null
+          },
+          "safety_identifier": null,
+          "service_tier": "default",
+          "store": true,
+          "temperature": 1.0,
+          "text": {
+            "format": {
+              "type": "text"
+            },
+            "verbosity": "medium"
+          },
+          "tool_choice": "auto",
+          "tools": [],
+          "top_logprobs": 0,
+          "top_p": 1.0,
+          "truncation": "disabled",
+          "usage": {
+            "input_tokens": 12,
+            "input_tokens_details": {
+              "cached_tokens": 0
+            },
+            "output_tokens": 19,
+            "output_tokens_details": {
+              "reasoning_tokens": 0
+            },
+            "total_tokens": 31
+          },
+          "user": null,
+          "metadata": {}
+        }
+  recorded_at: Fri, 26 Dec 2025 16:09:45 GMT
+- request:
+    method: post
+    uri: https://api.openai.com/v1/responses
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4o-mini","input":"What number did I ask you to remember?","previous_response_id":"resp_0c60fcf96b8d6d2700694eb3484ea48193ac4cceb658fcd486"}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - application/json
+      User-Agent:
+      - OpenAI::Client/Ruby 0.39.0
+      Host:
+      - api.openai.com
+      X-Stainless-Arch:
+      - x64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - Linux
+      X-Stainless-Package-Version:
+      - 0.39.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 3.2.9
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '153'
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Fri, 26 Dec 2025 16:09:46 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      X-Ratelimit-Limit-Requests:
+      - '30000'
+      X-Ratelimit-Limit-Tokens:
+      - '150000000'
+      X-Ratelimit-Remaining-Requests:
+      - '29999'
+      X-Ratelimit-Remaining-Tokens:
+      - '149999932'
+      X-Ratelimit-Reset-Requests:
+      - 2ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - braintrust-data
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      X-Request-Id:
+      - req_46d4877214f246738e58d31fab959770
+      Openai-Processing-Ms:
+      - '1061'
+      X-Envoy-Upstream-Service-Time:
+      - '1063'
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=ocCldRJ.Lx8YzZMYhiezJrjNEO99zk5raYwhuvBokSo-1766765386-1.0.1.1-TWrYTuD.wS61zLZ7gzUcNW7O.HUr7hyi2l_Koe2XkyMJSSnww.j29twJQ_wJ8lyF4BprHU60hxghSkGBd8HUXgpdt2MHW_0X5EeuQIXEwd8;
+        path=/; expires=Fri, 26-Dec-25 16:39:46 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=KqrEnHpW_rouxgseTBUOd6ltl0yL7_UFioqmp7m6s7U-1766765386985-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9b41d82b3d6b0044-ORD
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "resp_0c60fcf96b8d6d2700694eb349e6c08193ab164d7790c02afd",
+          "object": "response",
+          "created_at": 1766765385,
+          "status": "completed",
+          "background": false,
+          "billing": {
+            "payer": "developer"
+          },
+          "completed_at": 1766765386,
+          "error": null,
+          "incomplete_details": null,
+          "instructions": null,
+          "max_output_tokens": null,
+          "max_tool_calls": null,
+          "model": "gpt-4o-mini-2024-07-18",
+          "output": [
+            {
+              "id": "msg_0c60fcf96b8d6d2700694eb34aad6c81939c250bd6c98f4729",
+              "type": "message",
+              "status": "completed",
+              "content": [
+                {
+                  "type": "output_text",
+                  "annotations": [],
+                  "logprobs": [],
+                  "text": "You asked me to remember the number 42."
+                }
+              ],
+              "role": "assistant"
+            }
+          ],
+          "parallel_tool_calls": true,
+          "previous_response_id": "resp_0c60fcf96b8d6d2700694eb3484ea48193ac4cceb658fcd486",
+          "prompt_cache_key": null,
+          "prompt_cache_retention": null,
+          "reasoning": {
+            "effort": null,
+            "summary": null
+          },
+          "safety_identifier": null,
+          "service_tier": "default",
+          "store": true,
+          "temperature": 1.0,
+          "text": {
+            "format": {
+              "type": "text"
+            },
+            "verbosity": "medium"
+          },
+          "tool_choice": "auto",
+          "tools": [],
+          "top_logprobs": 0,
+          "top_p": 1.0,
+          "truncation": "disabled",
+          "usage": {
+            "input_tokens": 47,
+            "input_tokens_details": {
+              "cached_tokens": 0
+            },
+            "output_tokens": 11,
+            "output_tokens_details": {
+              "reasoning_tokens": 0
+            },
+            "total_tokens": 58
+          },
+          "user": null,
+          "metadata": {}
+        }
+  recorded_at: Fri, 26 Dec 2025 16:09:46 GMT
+recorded_with: VCR 6.3.1
diff --git a/test/fixtures/vcr_cassettes/openai/responses_with_reasoning.yml b/test/fixtures/vcr_cassettes/openai/responses_with_reasoning.yml
new file mode 100644
index 0000000..8f3a8a4
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/openai/responses_with_reasoning.yml
@@ -0,0 +1,173 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/responses
+    body:
+      encoding: UTF-8
+      string: '{"model":"o4-mini","input":"What is 2+2?","reasoning":{"effort":"low"}}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - application/json
+      User-Agent:
+      - OpenAI::Client/Ruby 0.39.0
+      Host:
+      - api.openai.com
+      X-Stainless-Arch:
+      - x64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - Linux
+      X-Stainless-Package-Version:
+      - 0.39.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 3.2.9
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '71'
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Fri, 26 Dec 2025 16:11:39 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      X-Ratelimit-Limit-Requests:
+      - '30000'
+      X-Ratelimit-Limit-Tokens:
+      - '150000000'
+      X-Ratelimit-Remaining-Requests:
+      - '29999'
+      X-Ratelimit-Remaining-Tokens:
+      - '149999780'
+      X-Ratelimit-Reset-Requests:
+      - 2ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - braintrust-data
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      X-Request-Id:
+      - req_93d774acb37d492a94abb8d1777c10e4
+      Openai-Processing-Ms:
+      - '2675'
+      X-Envoy-Upstream-Service-Time:
+      - '2680'
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=hfpoec9m9b4G1l8P0FMILfD.LxdBjOW0Ihj2hgy0a6w-1766765499-1.0.1.1-Waas3S9_hag4CT.yQVaFXf.YUeJd22FhvjB0e4D1tGIRMvEkEkXjNZQmlMpFW5uomY35IoheQ1ZjMZ.H2hIxTLs.6wTpM914GeRsVi0JxTc;
+        path=/; expires=Fri, 26-Dec-25 16:41:39 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=pfpF5kd2yEhd79IRBBZwP_YK0LolT.iGYo_bHWz1t3g-1766765499630-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9b41dae38a7a233c-ORD
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "resp_05c9b376eaef5df500694eb3b8f0488190aad671821487d718",
+          "object": "response",
+          "created_at": 1766765496,
+          "status": "completed",
+          "background": false,
+          "billing": {
+            "payer": "developer"
+          },
+          "completed_at": 1766765499,
+          "error": null,
+          "incomplete_details": null,
+          "instructions": null,
+          "max_output_tokens": null,
+          "max_tool_calls": null,
+          "model": "o4-mini-2025-04-16",
+          "output": [
+            {
+              "id": "rs_05c9b376eaef5df500694eb3baeb3c819098d8a271d979e4e5",
+              "type": "reasoning",
+              "summary": []
+            },
+            {
+              "id": "msg_05c9b376eaef5df500694eb3bb32508190be0d4fba1f18ed2b",
+              "type": "message",
+              "status": "completed",
+              "content": [
+                {
+                  "type": "output_text",
+                  "annotations": [],
+                  "logprobs": [],
+                  "text": "2 + 2 = 4."
+                }
+              ],
+              "role": "assistant"
+            }
+          ],
+          "parallel_tool_calls": true,
+          "previous_response_id": null,
+          "prompt_cache_key": null,
+          "prompt_cache_retention": null,
+          "reasoning": {
+            "effort": "low",
+            "summary": null
+          },
+          "safety_identifier": null,
+          "service_tier": "default",
+          "store": true,
+          "temperature": 1.0,
+          "text": {
+            "format": {
+              "type": "text"
+            },
+            "verbosity": "medium"
+          },
+          "tool_choice": "auto",
+          "tools": [],
+          "top_logprobs": 0,
+          "top_p": 1.0,
+          "truncation": "disabled",
+          "usage": {
+            "input_tokens": 13,
+            "input_tokens_details": {
+              "cached_tokens": 0
+            },
+            "output_tokens": 14,
+            "output_tokens_details": {
+              "reasoning_tokens": 0
+            },
+            "total_tokens": 27
+          },
+          "user": null,
+          "metadata": {}
+        }
+  recorded_at: Fri, 26 Dec 2025 16:11:39 GMT
+recorded_with: VCR 6.3.1
diff --git a/test/fixtures/vcr_cassettes/openai/responses_with_truncation.yml b/test/fixtures/vcr_cassettes/openai/responses_with_truncation.yml
new file mode 100644
index 0000000..8201003
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/openai/responses_with_truncation.yml
@@ -0,0 +1,168 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/responses
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4o-mini","input":"Say hello","truncation":"auto"}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - application/json
+      User-Agent:
+      - OpenAI::Client/Ruby 0.39.0
+      Host:
+      - api.openai.com
+      X-Stainless-Arch:
+      - x64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - Linux
+      X-Stainless-Package-Version:
+      - 0.39.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 3.2.9
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '63'
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Fri, 26 Dec 2025 16:08:31 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      X-Ratelimit-Limit-Requests:
+      - '30000'
+      X-Ratelimit-Limit-Tokens:
+      - '150000000'
+      X-Ratelimit-Remaining-Requests:
+      - '29999'
+      X-Ratelimit-Remaining-Tokens:
+      - '149999972'
+      X-Ratelimit-Reset-Requests:
+      - 2ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - braintrust-data
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      X-Request-Id:
+      - req_6e9eb60fab3543edb2d0accafecb1573
+      Openai-Processing-Ms:
+      - '1633'
+      X-Envoy-Upstream-Service-Time:
+      - '1635'
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=qSGY6qtNUcYKuk4_4m3mYxKoLDyIbTKA1nFX0tTt39s-1766765311-1.0.1.1-NbC6iV7rcg2BeUAxyB.HEi.9ZjvaxVWs8JiY2O_dpB7NUyPuDVuOPzv.YBxFPzm5D8NgCe9MYxkt6d2G8V4OjULe4Xqi206YOkY1WnO_zSc;
+        path=/; expires=Fri, 26-Dec-25 16:38:31 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=NMb7jI4HKKmKeilvSZD.bb1QxYZnK1BJHRdBcWxwVLI-1766765311632-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9b41d650cc89e239-ORD
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "resp_081844c3e063c0a200694eb2fdfa708197a76ed12cff1fe0f3",
+          "object": "response",
+          "created_at": 1766765310,
+          "status": "completed",
+          "background": false,
+          "billing": {
+            "payer": "developer"
+          },
+          "completed_at": 1766765311,
+          "error": null,
+          "incomplete_details": null,
+          "instructions": null,
+          "max_output_tokens": null,
+          "max_tool_calls": null,
+          "model": "gpt-4o-mini-2024-07-18",
+          "output": [
+            {
+              "id": "msg_081844c3e063c0a200694eb2ff0f908197b778bf59e1cc197d",
+              "type": "message",
+              "status": "completed",
+              "content": [
+                {
+                  "type": "output_text",
+                  "annotations": [],
+                  "logprobs": [],
+                  "text": "Hello! How can I assist you today?"
+                }
+              ],
+              "role": "assistant"
+            }
+          ],
+          "parallel_tool_calls": true,
+          "previous_response_id": null,
+          "prompt_cache_key": null,
+          "prompt_cache_retention": null,
+          "reasoning": {
+            "effort": null,
+            "summary": null
+          },
+          "safety_identifier": null,
+          "service_tier": "default",
+          "store": true,
+          "temperature": 1.0,
+          "text": {
+            "format": {
+              "type": "text"
+            },
+            "verbosity": "medium"
+          },
+          "tool_choice": "auto",
+          "tools": [],
+          "top_logprobs": 0,
+          "top_p": 1.0,
+          "truncation": "auto",
+          "usage": {
+            "input_tokens": 9,
+            "input_tokens_details": {
+              "cached_tokens": 0
+            },
+            "output_tokens": 10,
+            "output_tokens_details": {
+              "reasoning_tokens": 0
+            },
+            "total_tokens": 19
+          },
+          "user": null,
+          "metadata": {}
+        }
+  recorded_at: Fri, 26 Dec 2025 16:08:31 GMT
+recorded_with: VCR 6.3.1
diff --git a/test/fixtures/vcr_cassettes/openai/streaming_chat_completions.yml b/test/fixtures/vcr_cassettes/openai/streaming_chat_completions.yml
new file mode 100644
index 0000000..8701555
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/openai/streaming_chat_completions.yml
@@ -0,0 +1,130 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/chat/completions
+    body:
+      encoding: UTF-8
+      string: '{"messages":[{"role":"user","content":"Say hello"}],"model":"gpt-4o-mini","max_tokens":20,"stream_options":{"include_usage":true},"stream":true}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - text/event-stream
+      User-Agent:
+      - OpenAI::Client/Ruby 0.39.0
+      Host:
+      - api.openai.com
+      X-Stainless-Arch:
+      - x64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - Linux
+      X-Stainless-Package-Version:
+      - 0.39.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 3.2.9
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '144'
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Wed, 17 Dec 2025 21:39:31 GMT
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Access-Control-Expose-Headers:
+      - X-Request-ID
+      Openai-Organization:
+      - braintrust-data
+      Openai-Processing-Ms:
+      - '770'
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      Openai-Version:
+      - '2020-10-01'
+      X-Envoy-Upstream-Service-Time:
+      - '783'
+      X-Ratelimit-Limit-Requests:
+      - '30000'
+      X-Ratelimit-Limit-Tokens:
+      - '150000000'
+      X-Ratelimit-Remaining-Requests:
+      - '29999'
+      X-Ratelimit-Remaining-Tokens:
+      - '149999995'
+      X-Ratelimit-Reset-Requests:
+      - 2ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      X-Request-Id:
+      - req_ec79554aeb544044b6c2cacda6bf5deb
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=SULTPs8wSyJcfdzyW7AfYgAxNgFQhBWBh.cql5YgWK0-1766007571-1.0.1.1-dQJHZ9TQEcAnlZRHvdg1XfsuT75SbqxOy7CshY.xN_PW51.1TgtNQ9fBUxZb6_8NQvDeFYe3EjwY3BuSvB4Tx.xZYCofVzH1yOnByqls5oU;
+        path=/; expires=Wed, 17-Dec-25 22:09:31 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=jTnwp.LHHLW48BnvmEnO2LcqYuzDdW.89mY1nRyPvtU-1766007571885-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9af992d469010010-ORD
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: UTF-8
+      string: |+
+        data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"KaaErWmpF"}
+
+        data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Hf0SpV"}
+
+        data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"GGciGwweQs"}
+
+        data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":" How"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"KOLqgQ8"}
+
+        data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":" can"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"xKM2bpu"}
+
+        data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":" I"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"o0w7wEHVB"}
+
+        data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":" assist"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"pIQQ"}
+
+        data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":" you"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"0PBQOon"}
+
+        data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":" today"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"gtUVW"}
+
+        data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"tvwmRpNgk7"}
+
+        data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"72xRr"}
+
+        data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[],"usage":{"prompt_tokens":9,"completion_tokens":9,"total_tokens":18,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"NF9h1hYqo9hk"}
+
+        data: [DONE]
+
+  recorded_at: Wed, 17 Dec 2025 21:39:32 GMT
+recorded_with: VCR 6.3.1
+...
diff --git a/test/fixtures/vcr_cassettes/openai/streaming_multiple_choices.yml b/test/fixtures/vcr_cassettes/openai/streaming_multiple_choices.yml
new file mode 100644
index 0000000..b923abd
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/openai/streaming_multiple_choices.yml
@@ -0,0 +1,125 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/chat/completions
+    body:
+      encoding: UTF-8
+      string: '{"messages":[{"role":"user","content":"Say either ''hello'' or ''hi''
+        in one word"}],"model":"gpt-4o-mini","max_tokens":10,"n":2,"stream_options":{"include_usage":true},"stream":true}'
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - text/event-stream
+      User-Agent:
+      - OpenAI::Client/Ruby 0.39.0
+      Host:
+      - api.openai.com
+      X-Stainless-Arch:
+      - x64
+      X-Stainless-Lang:
+      - ruby
+      X-Stainless-Os:
+      - Linux
+      X-Stainless-Package-Version:
+      - 0.39.0
+      X-Stainless-Runtime:
+      - ruby
+      X-Stainless-Runtime-Version:
+      - 3.2.9
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      X-Stainless-Retry-Count:
+      - '0'
+      X-Stainless-Timeout:
+      - '600.0'
+      Content-Length:
+      - '179'
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Fri, 26 Dec 2025 15:53:38 GMT
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Access-Control-Expose-Headers:
+      - X-Request-ID
+      Openai-Organization:
+      - braintrust-data
+      Openai-Processing-Ms:
+      - '203'
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      Openai-Version:
+      - '2020-10-01'
+      X-Envoy-Upstream-Service-Time:
+      - '412'
+      X-Ratelimit-Limit-Requests:
+      - '30000'
+      X-Ratelimit-Limit-Tokens:
+      - '150000000'
+      X-Ratelimit-Remaining-Requests:
+      - '29999'
+      X-Ratelimit-Remaining-Tokens:
+      - '149999987'
+      X-Ratelimit-Reset-Requests:
+      - 2ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      X-Request-Id:
+      - req_ec522a5cd1a244368302f316ad41173d
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=hHlghp8HNFh4QUbUZbVcH72blcdZfpsMhy06JGp1I4Q-1766764418-1.0.1.1-OYsXT2WSAV4eQlgZBCF7AOq6VVRFuN3YUeztTY49tignIkkaN.gmMmdRmNQID92B1JGCDv5VYmYFXUAUiZQzkugmVd.MQd0ULVBS2uBWC1I;
+        path=/; expires=Fri, 26-Dec-25 16:23:38 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=O6M5xgfBX7r6bd848UUsi71Vfrdorj5aqqUkSzNa5gY-1766764418274-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9b41c0891aec11fd-ORD
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: UTF-8
+      string: |+
+        data: {"id":"chatcmpl-Cr4QEId1Y0l9qcxYwbRcJ5NjRcVSZ","object":"chat.completion.chunk","created":1766764418,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"KzDrhZVz8"}
+
+        data: {"id":"chatcmpl-Cr4QEId1Y0l9qcxYwbRcJ5NjRcVSZ","object":"chat.completion.chunk","created":1766764418,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"bOwYV2"}
+
+        data: {"id":"chatcmpl-Cr4QEId1Y0l9qcxYwbRcJ5NjRcVSZ","object":"chat.completion.chunk","created":1766764418,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"3AwjpG0rsH"}
+
+        data: {"id":"chatcmpl-Cr4QEId1Y0l9qcxYwbRcJ5NjRcVSZ","object":"chat.completion.chunk","created":1766764418,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"ZsYMs6apS"}
+
+        data: {"id":"chatcmpl-Cr4QEId1Y0l9qcxYwbRcJ5NjRcVSZ","object":"chat.completion.chunk","created":1766764418,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":1,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"InGNn4"}
+
+        data: {"id":"chatcmpl-Cr4QEId1Y0l9qcxYwbRcJ5NjRcVSZ","object":"chat.completion.chunk","created":1766764418,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":1,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"s3ED3sFS7z"}
+
+        data: {"id":"chatcmpl-Cr4QEId1Y0l9qcxYwbRcJ5NjRcVSZ","object":"chat.completion.chunk","created":1766764418,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"5qR7E"}
+
+        data: {"id":"chatcmpl-Cr4QEId1Y0l9qcxYwbRcJ5NjRcVSZ","object":"chat.completion.chunk","created":1766764418,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"k9EbC"}
+
+        data: {"id":"chatcmpl-Cr4QEId1Y0l9qcxYwbRcJ5NjRcVSZ","object":"chat.completion.chunk","created":1766764418,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[],"usage":{"prompt_tokens":19,"completion_tokens":4,"total_tokens":23,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"Dh4cs1gqrFe"}
+
+        data: [DONE]
+
+  recorded_at: Fri, 26 Dec 2025 15:53:38 GMT
+recorded_with: VCR 6.3.1
+...

From d52999568c4a07cc0a0f5ff8fc263c10e0e3be19 Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Thu, 25 Dec 2025 09:55:58 -0500
Subject: [PATCH 08/27] Changed: Share common implementation between OpenAI
 integrations

---
 .../contrib/openai/instrumentation/chat.rb    |  32 ++--
 .../contrib/openai/instrumentation/common.rb  |  54 ++----
 .../openai/instrumentation/responses.rb       |  22 +--
 lib/braintrust/contrib/support/openai.rb      |  72 ++++++++
 lib/braintrust/contrib/support/otel.rb        |  23 +++
 lib/braintrust/trace/tokens.rb                |  60 -------
 test/braintrust/contrib/openai/common_test.rb | 156 ++++++++++++++++++
 .../openai/instrumentation/common_test.rb     |  46 ------
 .../braintrust/contrib/support/openai_test.rb |  91 ++++++++++
 test/braintrust/contrib/support/otel_test.rb  |  35 ++++
 test/braintrust/trace/tokens_test.rb          |  78 ---------
 11 files changed, 418 insertions(+), 251 deletions(-)
 create mode 100644 lib/braintrust/contrib/support/openai.rb
 create mode 100644 lib/braintrust/contrib/support/otel.rb
 create mode 100644 test/braintrust/contrib/openai/common_test.rb
 create mode 100644 test/braintrust/contrib/support/openai_test.rb
 create mode 100644 test/braintrust/contrib/support/otel_test.rb

diff --git a/lib/braintrust/contrib/openai/instrumentation/chat.rb b/lib/braintrust/contrib/openai/instrumentation/chat.rb
index 50ea6e2..ac06229 100644
--- a/lib/braintrust/contrib/openai/instrumentation/chat.rb
+++ b/lib/braintrust/contrib/openai/instrumentation/chat.rb
@@ -5,6 +5,8 @@
 
 require_relative "common"
 require_relative "../../../internal/time"
+require_relative "../../support/otel"
+require_relative "../../support/openai"
 
 module Braintrust
   module Contrib
@@ -105,23 +107,23 @@ def set_input(span, params)
                 return unless params[:messages]
 
                 messages_array = params[:messages].map(&:to_h)
-                Common.set_json_attr(span, "braintrust.input_json", messages_array)
+                Support::OTel.set_json_attr(span, "braintrust.input_json", messages_array)
               end
 
               def set_output(span, response)
                 return unless response.respond_to?(:choices) && response.choices&.any?
 
                 choices_array = response.choices.map(&:to_h)
-                Common.set_json_attr(span, "braintrust.output_json", choices_array)
+                Support::OTel.set_json_attr(span, "braintrust.output_json", choices_array)
               end
 
               def set_metrics(span, response, time_to_first_token)
                 metrics = {}
                 if response.respond_to?(:usage) && response.usage
-                  metrics = Common.parse_usage_tokens(response.usage)
+                  metrics = Support::OpenAI.parse_usage_tokens(response.usage)
                 end
                 metrics["time_to_first_token"] = time_to_first_token
-                Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
+                Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
               end
 
               def finalize_metadata(span, metadata, response)
@@ -130,7 +132,7 @@ def finalize_metadata(span, metadata, response)
                 metadata["model"] = response.model if response.respond_to?(:model) && response.model
                 metadata["system_fingerprint"] = response.system_fingerprint if response.respond_to?(:system_fingerprint) && response.system_fingerprint
                 metadata["service_tier"] = response.service_tier if response.respond_to?(:service_tier) && response.service_tier
-                Common.set_json_attr(span, "braintrust.metadata", metadata)
+                Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
               end
             end
           end
@@ -179,7 +181,7 @@ def trace_consumption(ctx)
 
                 tracer.in_span("Chat Completion") do |span|
                   completions_instance.send(:set_input, span, params)
-                  Common.set_json_attr(span, "braintrust.metadata", metadata)
+                  Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
 
                   yield
 
@@ -197,16 +199,16 @@ def finalize_stream_span(span, start_time, metadata, completions_instance)
                   # Set output from accumulated choices
                   if snapshot.choices&.any?
                     choices_array = snapshot.choices.map(&:to_h)
-                    Common.set_json_attr(span, "braintrust.output_json", choices_array)
+                    Support::OTel.set_json_attr(span, "braintrust.output_json", choices_array)
                   end
 
                   # Set metrics
                   metrics = {}
                   if snapshot.usage
-                    metrics = Common.parse_usage_tokens(snapshot.usage)
+                    metrics = Support::OpenAI.parse_usage_tokens(snapshot.usage)
                   end
                   metrics["time_to_first_token"] = time_to_first_token
-                  Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
+                  Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
 
                   # Update metadata with response fields
                   metadata["id"] = snapshot.id if snapshot.respond_to?(:id) && snapshot.id
@@ -214,7 +216,7 @@ def finalize_stream_span(span, start_time, metadata, completions_instance)
                   metadata["model"] = snapshot.model if snapshot.respond_to?(:model) && snapshot.model
                   metadata["system_fingerprint"] = snapshot.system_fingerprint if snapshot.respond_to?(:system_fingerprint) && snapshot.system_fingerprint
                   metadata["service_tier"] = snapshot.service_tier if snapshot.respond_to?(:service_tier) && snapshot.service_tier
-                  Common.set_json_attr(span, "braintrust.metadata", metadata)
+                  Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
                 rescue => e
                   Braintrust::Log.debug("Failed to get completion snapshot: #{e.message}")
                 end
@@ -251,7 +253,7 @@ def each(&block)
 
                 tracer.in_span("Chat Completion") do |span|
                   completions_instance.send(:set_input, span, params)
-                  Common.set_json_attr(span, "braintrust.metadata", metadata)
+                  Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
 
                   super do |chunk|
                     time_to_first_token ||= Braintrust::Internal::Time.measure(start_time)
@@ -269,15 +271,15 @@ def finalize_stream_span(span, aggregated_chunks, time_to_first_token, metadata)
                 return if aggregated_chunks.empty?
 
                 aggregated_output = Common.aggregate_streaming_chunks(aggregated_chunks)
-                Common.set_json_attr(span, "braintrust.output_json", aggregated_output[:choices])
+                Support::OTel.set_json_attr(span, "braintrust.output_json", aggregated_output[:choices])
 
                 # Set metrics
                 metrics = {}
                 if aggregated_output[:usage]
-                  metrics = Common.parse_usage_tokens(aggregated_output[:usage])
+                  metrics = Support::OpenAI.parse_usage_tokens(aggregated_output[:usage])
                 end
                 metrics["time_to_first_token"] = time_to_first_token
-                Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
+                Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
 
                 # Update metadata with response fields
                 metadata["id"] = aggregated_output[:id] if aggregated_output[:id]
@@ -285,7 +287,7 @@ def finalize_stream_span(span, aggregated_chunks, time_to_first_token, metadata)
                 metadata["model"] = aggregated_output[:model] if aggregated_output[:model]
                 metadata["system_fingerprint"] = aggregated_output[:system_fingerprint] if aggregated_output[:system_fingerprint]
                 metadata["service_tier"] = aggregated_output[:service_tier] if aggregated_output[:service_tier]
-                Common.set_json_attr(span, "braintrust.metadata", metadata)
+                Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
               end
             end
           end
diff --git a/lib/braintrust/contrib/openai/instrumentation/common.rb b/lib/braintrust/contrib/openai/instrumentation/common.rb
index 039353b..4c2f112 100644
--- a/lib/braintrust/contrib/openai/instrumentation/common.rb
+++ b/lib/braintrust/contrib/openai/instrumentation/common.rb
@@ -1,38 +1,16 @@
 # frozen_string_literal: true
 
-require "json"
-
-require_relative "../../../trace/tokens"
-
 module Braintrust
   module Contrib
     module OpenAI
       module Instrumentation
-        # Chat completions instrumentation for OpenAI.
-        # Provides modules that can be prepended to OpenAI::Client to instrument chat.completions API.
+        # Aggregation utilities for official OpenAI SDK instrumentation.
+        # These are specific to the official openai gem's data structures (symbol keys, SDK objects).
         module Common
-          # Helper to safely set a JSON attribute on a span
-          # Only sets the attribute if obj is present
-          # @param span [OpenTelemetry::Trace::Span] the span to set attribute on
-          # @param attr_name [String] the attribute name (e.g., "braintrust.output_json")
-          # @param obj [Object] the object to serialize to JSON
-          # @return [void]
-          def self.set_json_attr(span, attr_name, obj)
-            return unless obj
-            span.set_attribute(attr_name, JSON.generate(obj))
-          end
-
-          # Parse usage tokens from OpenAI API response
-          # @param usage [Hash, Object] usage object from OpenAI response
-          # @return [Hash<String, Integer>] metrics hash with normalized names
-          def self.parse_usage_tokens(usage)
-            Braintrust::Trace.parse_openai_usage_tokens(usage)
-          end
-
-          # Aggregate streaming chunks into a single response structure
-          # Follows the Go SDK logic for aggregating deltas
-          # @param chunks [Array<Hash>] array of chunk hashes from stream
-          # @return [Hash] aggregated response with choices, usage, etc.
+          # Aggregate streaming chunks into a single response structure.
+          # Specific to official OpenAI SDK which uses symbol keys and SDK objects.
+          # @param chunks [Array<Hash>] array of chunk hashes from stream (symbol keys)
+          # @return [Hash] aggregated response with choices, usage, etc. (symbol keys)
           def self.aggregate_streaming_chunks(chunks)
             return {} if chunks.empty?
 
@@ -57,9 +35,7 @@ def self.aggregate_streaming_chunks(chunks)
               aggregated[:system_fingerprint] ||= chunk[:system_fingerprint]
 
               # Aggregate usage (usually only in last chunk if stream_options.include_usage is set)
-              if chunk[:usage]
-                aggregated[:usage] = chunk[:usage]
-              end
+              aggregated[:usage] = chunk[:usage] if chunk[:usage]
 
               # Process choices
               next unless chunk[:choices].is_a?(Array)
@@ -79,14 +55,11 @@ def self.aggregate_streaming_chunks(chunks)
                 choice_data[index][:role] ||= delta[:role]
 
                 # Aggregate content
-                if delta[:content]
-                  choice_data[index][:content] << delta[:content]
-                end
+                choice_data[index][:content] << delta[:content] if delta[:content]
 
-                # Aggregate tool_calls (similar to Go SDK logic)
+                # Aggregate tool_calls
                 if delta[:tool_calls].is_a?(Array) && delta[:tool_calls].any?
                   delta[:tool_calls].each do |tool_call_delta|
-                    # Check if this is a new tool call or continuation
                     if tool_call_delta[:id] && !tool_call_delta[:id].empty?
                       # New tool call (dup strings to avoid mutating input)
                       choice_data[index][:tool_calls] << {
@@ -108,9 +81,7 @@ def self.aggregate_streaming_chunks(chunks)
                 end
 
                 # Capture finish_reason
-                if choice[:finish_reason]
-                  choice_data[index][:finish_reason] = choice[:finish_reason]
-                end
+                choice_data[index][:finish_reason] = choice[:finish_reason] if choice[:finish_reason]
               end
             end
 
@@ -134,8 +105,8 @@ def self.aggregate_streaming_chunks(chunks)
             aggregated
           end
 
-          # Aggregate responses streaming events into a single response structure
-          # Follows similar logic to Python SDK's _postprocess_streaming_results
+          # Aggregate responses streaming events into a single response structure.
+          # Specific to official OpenAI SDK which returns typed event objects.
           # @param events [Array] array of event objects from stream
           # @return [Hash] aggregated response with output, usage, etc.
           def self.aggregate_responses_events(events)
@@ -146,7 +117,6 @@ def self.aggregate_responses_events(events)
 
             if completed_event&.respond_to?(:response)
               response = completed_event.response
-              # Convert the response object to a hash-like structure for logging
               return {
                 id: response.respond_to?(:id) ? response.id : nil,
                 output: response.respond_to?(:output) ? response.output : nil,
diff --git a/lib/braintrust/contrib/openai/instrumentation/responses.rb b/lib/braintrust/contrib/openai/instrumentation/responses.rb
index 909b7f4..5bb4da6 100644
--- a/lib/braintrust/contrib/openai/instrumentation/responses.rb
+++ b/lib/braintrust/contrib/openai/instrumentation/responses.rb
@@ -5,6 +5,8 @@
 
 require_relative "common"
 require_relative "../../../internal/time"
+require_relative "../../support/otel"
+require_relative "../../support/openai"
 
 module Braintrust
   module Contrib
@@ -86,27 +88,27 @@ def build_metadata(params, stream: false)
             def set_input(span, params)
               return unless params[:input]
 
-              Common.set_json_attr(span, "braintrust.input_json", params[:input])
+              Support::OTel.set_json_attr(span, "braintrust.input_json", params[:input])
             end
 
             def set_output(span, response)
               return unless response.respond_to?(:output) && response.output
 
-              Common.set_json_attr(span, "braintrust.output_json", response.output)
+              Support::OTel.set_json_attr(span, "braintrust.output_json", response.output)
             end
 
             def set_metrics(span, response, time_to_first_token)
               metrics = {}
               if response.respond_to?(:usage) && response.usage
-                metrics = Common.parse_usage_tokens(response.usage)
+                metrics = Support::OpenAI.parse_usage_tokens(response.usage)
               end
               metrics["time_to_first_token"] = time_to_first_token
-              Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
+              Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
             end
 
             def finalize_metadata(span, metadata, response)
               metadata["id"] = response.id if response.respond_to?(:id) && response.id
-              Common.set_json_attr(span, "braintrust.metadata", metadata)
+              Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
             end
           end
         end
@@ -139,7 +141,7 @@ def each(&block)
 
               tracer.in_span("openai.responses.create") do |span|
                 responses_instance.send(:set_input, span, params)
-                Common.set_json_attr(span, "braintrust.metadata", metadata)
+                Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
 
                 begin
                   super do |event|
@@ -163,19 +165,19 @@ def finalize_stream_span(span, aggregated_events, time_to_first_token, metadata)
               return if aggregated_events.empty?
 
               aggregated_output = Common.aggregate_responses_events(aggregated_events)
-              Common.set_json_attr(span, "braintrust.output_json", aggregated_output[:output]) if aggregated_output[:output]
+              Support::OTel.set_json_attr(span, "braintrust.output_json", aggregated_output[:output]) if aggregated_output[:output]
 
               # Set metrics
               metrics = {}
               if aggregated_output[:usage]
-                metrics = Common.parse_usage_tokens(aggregated_output[:usage])
+                metrics = Support::OpenAI.parse_usage_tokens(aggregated_output[:usage])
               end
               metrics["time_to_first_token"] = time_to_first_token
-              Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
+              Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
 
               # Update metadata with response fields
               metadata["id"] = aggregated_output[:id] if aggregated_output[:id]
-              Common.set_json_attr(span, "braintrust.metadata", metadata)
+              Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
             end
           end
         end
diff --git a/lib/braintrust/contrib/support/openai.rb b/lib/braintrust/contrib/support/openai.rb
new file mode 100644
index 0000000..27581b5
--- /dev/null
+++ b/lib/braintrust/contrib/support/openai.rb
@@ -0,0 +1,72 @@
+# frozen_string_literal: true
+
+module Braintrust
+  module Contrib
+    module Support
+      # OpenAI API-specific utilities shared across OpenAI integrations.
+      # These work with normalized data structures (hashes), not library-specific objects.
+      module OpenAI
+        # Parse OpenAI usage tokens into normalized Braintrust metrics.
+        # Handles standard fields and *_tokens_details nested objects.
+        # Works with both Hash objects and SDK response objects (via to_h).
+        # @param usage [Hash, Object] usage object from OpenAI response
+        # @return [Hash<String, Integer>] normalized metrics
+        def self.parse_usage_tokens(usage)
+          metrics = {}
+          return metrics unless usage
+
+          usage_hash = usage.respond_to?(:to_h) ? usage.to_h : usage
+          return metrics unless usage_hash.is_a?(Hash)
+
+          # Field mappings: OpenAI → Braintrust
+          # Supports both Chat Completions API (prompt_tokens, completion_tokens)
+          # and Responses API (input_tokens, output_tokens)
+          field_map = {
+            "prompt_tokens" => "prompt_tokens",
+            "completion_tokens" => "completion_tokens",
+            "total_tokens" => "tokens",
+            # Responses API uses different field names
+            "input_tokens" => "prompt_tokens",
+            "output_tokens" => "completion_tokens"
+          }
+
+          # Prefix mappings for *_tokens_details
+          prefix_map = {
+            "prompt" => "prompt",
+            "completion" => "completion",
+            # Responses API uses input/output prefixes
+            "input" => "prompt",
+            "output" => "completion"
+          }
+
+          usage_hash.each do |key, value|
+            key_str = key.to_s
+
+            if value.is_a?(Numeric)
+              target = field_map[key_str]
+              metrics[target] = value.to_i if target
+            elsif key_str.end_with?("_tokens_details")
+              # Convert to hash if it's an object (OpenAI SDK returns objects)
+              details_hash = value.respond_to?(:to_h) ? value.to_h : value
+              next unless details_hash.is_a?(Hash)
+
+              raw_prefix = key_str.sub(/_tokens_details$/, "")
+              prefix = prefix_map[raw_prefix] || raw_prefix
+              details_hash.each do |detail_key, detail_value|
+                next unless detail_value.is_a?(Numeric)
+                metrics["#{prefix}_#{detail_key}"] = detail_value.to_i
+              end
+            end
+          end
+
+          # Calculate total if missing
+          if !metrics.key?("tokens") && metrics.key?("prompt_tokens") && metrics.key?("completion_tokens")
+            metrics["tokens"] = metrics["prompt_tokens"] + metrics["completion_tokens"]
+          end
+
+          metrics
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/support/otel.rb b/lib/braintrust/contrib/support/otel.rb
new file mode 100644
index 0000000..d073337
--- /dev/null
+++ b/lib/braintrust/contrib/support/otel.rb
@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+
+require "json"
+
+module Braintrust
+  module Contrib
+    module Support
+      # OpenTelemetry utilities shared across all integrations.
+      module OTel
+        # Helper to safely set a JSON attribute on a span
+        # Only sets the attribute if obj is present
+        # @param span [OpenTelemetry::Trace::Span] the span to set attribute on
+        # @param attr_name [String] the attribute name (e.g., "braintrust.output_json")
+        # @param obj [Object] the object to serialize to JSON
+        # @return [void]
+        def self.set_json_attr(span, attr_name, obj)
+          return unless obj
+          span.set_attribute(attr_name, JSON.generate(obj))
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/trace/tokens.rb b/lib/braintrust/trace/tokens.rb
index d40b5d8..679df12 100644
--- a/lib/braintrust/trace/tokens.rb
+++ b/lib/braintrust/trace/tokens.rb
@@ -2,66 +2,6 @@
 
 module Braintrust
   module Trace
-    # Parse OpenAI usage tokens into normalized Braintrust metrics.
-    # Handles standard fields and *_tokens_details nested objects.
-    # @param usage [Hash, Object] usage object from OpenAI response
-    # @return [Hash<String, Integer>] normalized metrics
-    def self.parse_openai_usage_tokens(usage)
-      metrics = {}
-      return metrics unless usage
-
-      usage_hash = usage.respond_to?(:to_h) ? usage.to_h : usage
-      return metrics unless usage_hash.is_a?(Hash)
-
-      # Field mappings: OpenAI → Braintrust
-      # Supports both Chat Completions API (prompt_tokens, completion_tokens)
-      # and Responses API (input_tokens, output_tokens)
-      field_map = {
-        "prompt_tokens" => "prompt_tokens",
-        "completion_tokens" => "completion_tokens",
-        "total_tokens" => "tokens",
-        # Responses API uses different field names
-        "input_tokens" => "prompt_tokens",
-        "output_tokens" => "completion_tokens"
-      }
-
-      # Prefix mappings for *_tokens_details
-      prefix_map = {
-        "prompt" => "prompt",
-        "completion" => "completion",
-        # Responses API uses input/output prefixes
-        "input" => "prompt",
-        "output" => "completion"
-      }
-
-      usage_hash.each do |key, value|
-        key_str = key.to_s
-
-        if value.is_a?(Numeric)
-          target = field_map[key_str]
-          metrics[target] = value.to_i if target
-        elsif key_str.end_with?("_tokens_details")
-          # Convert to hash if it's an object (OpenAI SDK returns objects)
-          details_hash = value.respond_to?(:to_h) ? value.to_h : value
-          next unless details_hash.is_a?(Hash)
-
-          raw_prefix = key_str.sub(/_tokens_details$/, "")
-          prefix = prefix_map[raw_prefix] || raw_prefix
-          details_hash.each do |detail_key, detail_value|
-            next unless detail_value.is_a?(Numeric)
-            metrics["#{prefix}_#{detail_key}"] = detail_value.to_i
-          end
-        end
-      end
-
-      # Calculate total if missing
-      if !metrics.key?("tokens") && metrics.key?("prompt_tokens") && metrics.key?("completion_tokens")
-        metrics["tokens"] = metrics["prompt_tokens"] + metrics["completion_tokens"]
-      end
-
-      metrics
-    end
-
     # Parse Anthropic usage tokens into normalized Braintrust metrics.
     # Accumulates cache tokens into prompt_tokens and calculates total.
     # @param usage [Hash, Object] usage object from Anthropic response
diff --git a/test/braintrust/contrib/openai/common_test.rb b/test/braintrust/contrib/openai/common_test.rb
new file mode 100644
index 0000000..8152c89
--- /dev/null
+++ b/test/braintrust/contrib/openai/common_test.rb
@@ -0,0 +1,156 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require "braintrust/contrib/openai/instrumentation/common"
+
+class Braintrust::Contrib::OpenAI::Instrumentation::CommonTest < Minitest::Test
+  # ============================
+  # aggregate_streaming_chunks
+  # ============================
+
+  def test_aggregate_streaming_chunks_empty
+    assert_equal({}, Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_streaming_chunks([]))
+  end
+
+  def test_aggregate_streaming_chunks_basic
+    chunks = [
+      {id: "chatcmpl-123", model: "gpt-4", choices: [{index: 0, delta: {role: "assistant"}}]},
+      {choices: [{index: 0, delta: {content: "Hello"}}]},
+      {choices: [{index: 0, delta: {content: " world"}, finish_reason: "stop"}]},
+      {usage: {prompt_tokens: 10, completion_tokens: 5}}
+    ]
+
+    result = Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_streaming_chunks(chunks)
+
+    assert_equal "chatcmpl-123", result[:id]
+    assert_equal "gpt-4", result[:model]
+    assert_equal 1, result[:choices].length
+    assert_equal "assistant", result[:choices][0][:message][:role]
+    assert_equal "Hello world", result[:choices][0][:message][:content]
+    assert_equal "stop", result[:choices][0][:finish_reason]
+    assert_equal 10, result[:usage][:prompt_tokens]
+  end
+
+  def test_aggregate_streaming_chunks_captures_system_fingerprint
+    chunks = [
+      {id: "chatcmpl-123", system_fingerprint: "fp_abc123", choices: [{index: 0, delta: {role: "assistant"}}]},
+      {choices: [{index: 0, delta: {content: "Hi"}, finish_reason: "stop"}]}
+    ]
+
+    result = Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_streaming_chunks(chunks)
+
+    assert_equal "fp_abc123", result[:system_fingerprint]
+  end
+
+  def test_aggregate_streaming_chunks_with_tool_calls
+    chunks = [
+      {id: "chatcmpl-123", choices: [{index: 0, delta: {role: "assistant", tool_calls: [{id: "call_abc", type: "function", function: {name: "get_weather", arguments: ""}}]}}]},
+      {choices: [{index: 0, delta: {tool_calls: [{function: {arguments: '{"loc'}}]}}]},
+      {choices: [{index: 0, delta: {tool_calls: [{function: {arguments: 'ation":"NYC"}'}}]}, finish_reason: "tool_calls"}]}
+    ]
+
+    result = Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_streaming_chunks(chunks)
+
+    assert_equal 1, result[:choices].length
+    assert_equal 1, result[:choices][0][:message][:tool_calls].length
+    assert_equal "call_abc", result[:choices][0][:message][:tool_calls][0][:id]
+    assert_equal "get_weather", result[:choices][0][:message][:tool_calls][0][:function][:name]
+    assert_equal '{"location":"NYC"}', result[:choices][0][:message][:tool_calls][0][:function][:arguments]
+  end
+
+  def test_aggregate_streaming_chunks_multiple_choices
+    chunks = [
+      {id: "chatcmpl-123", choices: [
+        {index: 0, delta: {role: "assistant"}},
+        {index: 1, delta: {role: "assistant"}}
+      ]},
+      {choices: [
+        {index: 0, delta: {content: "First"}},
+        {index: 1, delta: {content: "Second"}}
+      ]},
+      {choices: [
+        {index: 0, delta: {}, finish_reason: "stop"},
+        {index: 1, delta: {}, finish_reason: "stop"}
+      ]}
+    ]
+
+    result = Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_streaming_chunks(chunks)
+
+    assert_equal 2, result[:choices].length
+    assert_equal "First", result[:choices][0][:message][:content]
+    assert_equal "Second", result[:choices][1][:message][:content]
+  end
+
+  def test_aggregate_streaming_chunks_empty_content_is_nil
+    chunks = [
+      {id: "chatcmpl-123", choices: [{index: 0, delta: {role: "assistant"}}]},
+      {choices: [{index: 0, delta: {}, finish_reason: "stop"}]}
+    ]
+
+    result = Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_streaming_chunks(chunks)
+
+    assert_nil result[:choices][0][:message][:content]
+  end
+
+  # ============================
+  # aggregate_responses_events
+  # ============================
+
+  def test_aggregate_responses_events_empty
+    assert_equal({}, Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_responses_events([]))
+  end
+
+  def test_aggregate_responses_events_with_completed_event
+    # Create mock event objects that respond to :type and :response
+    completed_response = Struct.new(:id, :output, :usage, keyword_init: true).new(
+      id: "resp_123",
+      output: [{"type" => "message", "content" => "Hello"}],
+      usage: {input_tokens: 10, output_tokens: 5}
+    )
+
+    completed_event = Struct.new(:type, :response, keyword_init: true).new(
+      type: :"response.completed",
+      response: completed_response
+    )
+
+    events = [
+      Struct.new(:type).new(:"response.created"),
+      Struct.new(:type).new(:"response.in_progress"),
+      completed_event
+    ]
+
+    result = Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_responses_events(events)
+
+    assert_equal "resp_123", result[:id]
+    assert_equal [{"type" => "message", "content" => "Hello"}], result[:output]
+    assert_equal({input_tokens: 10, output_tokens: 5}, result[:usage])
+  end
+
+  def test_aggregate_responses_events_without_completed_event
+    events = [
+      Struct.new(:type).new(:"response.created"),
+      Struct.new(:type).new(:"response.in_progress")
+    ]
+
+    result = Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_responses_events(events)
+
+    assert_equal({}, result)
+  end
+
+  def test_aggregate_responses_events_handles_missing_response_fields
+    completed_response = Struct.new(:id, keyword_init: true).new(id: "resp_123")
+
+    completed_event = Struct.new(:type, :response, keyword_init: true).new(
+      type: :"response.completed",
+      response: completed_response
+    )
+
+    events = [completed_event]
+
+    result = Braintrust::Contrib::OpenAI::Instrumentation::Common.aggregate_responses_events(events)
+
+    assert_equal "resp_123", result[:id]
+    assert_nil result[:output]
+    assert_nil result[:usage]
+  end
+end
diff --git a/test/braintrust/contrib/openai/instrumentation/common_test.rb b/test/braintrust/contrib/openai/instrumentation/common_test.rb
index a40bc3e..8132280 100644
--- a/test/braintrust/contrib/openai/instrumentation/common_test.rb
+++ b/test/braintrust/contrib/openai/instrumentation/common_test.rb
@@ -6,52 +6,6 @@
 class Braintrust::Contrib::OpenAI::Instrumentation::CommonTest < Minitest::Test
   Common = Braintrust::Contrib::OpenAI::Instrumentation::Common
 
-  # --- .set_json_attr ---
-
-  def test_set_json_attr_sets_attribute_with_json
-    span = Minitest::Mock.new
-    span.expect(:set_attribute, nil, ["braintrust.output", '{"foo":"bar"}'])
-
-    Common.set_json_attr(span, "braintrust.output", {foo: "bar"})
-
-    span.verify
-  end
-
-  def test_set_json_attr_skips_nil_object
-    span = Minitest::Mock.new
-    # No expectations - set_attribute should not be called
-
-    Common.set_json_attr(span, "braintrust.output", nil)
-
-    span.verify
-  end
-
-  def test_set_json_attr_handles_array
-    span = Minitest::Mock.new
-    span.expect(:set_attribute, nil, ["braintrust.input", "[1,2,3]"])
-
-    Common.set_json_attr(span, "braintrust.input", [1, 2, 3])
-
-    span.verify
-  end
-
-  # --- .parse_usage_tokens ---
-
-  def test_parse_usage_tokens_delegates_to_trace
-    usage = {prompt_tokens: 10, completion_tokens: 5}
-    expected_result = {"prompt_tokens" => 10, "completion_tokens" => 5, "tokens" => 15}
-
-    mock = Minitest::Mock.new
-    mock.expect(:call, expected_result, [usage])
-
-    Braintrust::Trace.stub(:parse_openai_usage_tokens, mock) do
-      result = Common.parse_usage_tokens(usage)
-      assert_equal expected_result, result
-    end
-
-    mock.verify
-  end
-
   # --- .aggregate_streaming_chunks ---
 
   def test_aggregate_streaming_chunks_returns_empty_hash_for_empty_array
diff --git a/test/braintrust/contrib/support/openai_test.rb b/test/braintrust/contrib/support/openai_test.rb
new file mode 100644
index 0000000..fd5fd28
--- /dev/null
+++ b/test/braintrust/contrib/support/openai_test.rb
@@ -0,0 +1,91 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require "braintrust/contrib/support/openai"
+
+class Braintrust::Contrib::Support::OpenAITest < Minitest::Test
+  # ===================
+  # parse_usage_tokens
+  # ===================
+
+  def test_maps_standard_fields
+    usage = {"prompt_tokens" => 10, "completion_tokens" => 20, "total_tokens" => 30}
+
+    metrics = Braintrust::Contrib::Support::OpenAI.parse_usage_tokens(usage)
+
+    assert_equal 10, metrics["prompt_tokens"]
+    assert_equal 20, metrics["completion_tokens"]
+    assert_equal 30, metrics["tokens"]
+  end
+
+  def test_handles_symbol_keys
+    usage = {prompt_tokens: 10, completion_tokens: 20, total_tokens: 30}
+
+    metrics = Braintrust::Contrib::Support::OpenAI.parse_usage_tokens(usage)
+
+    assert_equal 10, metrics["prompt_tokens"]
+  end
+
+  def test_handles_prompt_tokens_details
+    usage = {
+      "prompt_tokens" => 100,
+      "prompt_tokens_details" => {"cached_tokens" => 80}
+    }
+
+    metrics = Braintrust::Contrib::Support::OpenAI.parse_usage_tokens(usage)
+
+    assert_equal 80, metrics["prompt_cached_tokens"]
+  end
+
+  def test_handles_completion_tokens_details
+    usage = {
+      "completion_tokens" => 50,
+      "completion_tokens_details" => {"reasoning_tokens" => 30}
+    }
+
+    metrics = Braintrust::Contrib::Support::OpenAI.parse_usage_tokens(usage)
+
+    assert_equal 30, metrics["completion_reasoning_tokens"]
+  end
+
+  def test_handles_object_style_token_details
+    # OpenAI SDK returns objects with to_h, not plain hashes
+    details_object = Struct.new(:cached_tokens, :audio_tokens, keyword_init: true)
+    usage_object = Struct.new(:prompt_tokens, :completion_tokens, :total_tokens, :prompt_tokens_details, keyword_init: true)
+
+    usage = usage_object.new(
+      prompt_tokens: 100,
+      completion_tokens: 50,
+      total_tokens: 150,
+      prompt_tokens_details: details_object.new(cached_tokens: 80, audio_tokens: 0)
+    )
+
+    metrics = Braintrust::Contrib::Support::OpenAI.parse_usage_tokens(usage)
+
+    assert_equal 80, metrics["prompt_cached_tokens"]
+    assert_equal 0, metrics["prompt_audio_tokens"]
+  end
+
+  def test_returns_empty_hash_for_nil
+    assert_equal({}, Braintrust::Contrib::Support::OpenAI.parse_usage_tokens(nil))
+  end
+
+  def test_calculates_total_if_missing
+    usage = {"prompt_tokens" => 10, "completion_tokens" => 20}
+
+    metrics = Braintrust::Contrib::Support::OpenAI.parse_usage_tokens(usage)
+
+    assert_equal 30, metrics["tokens"]
+  end
+
+  def test_handles_responses_api_field_names
+    # Responses API uses input_tokens/output_tokens
+    usage = {"input_tokens" => 10, "output_tokens" => 20}
+
+    metrics = Braintrust::Contrib::Support::OpenAI.parse_usage_tokens(usage)
+
+    assert_equal 10, metrics["prompt_tokens"]
+    assert_equal 20, metrics["completion_tokens"]
+    assert_equal 30, metrics["tokens"]
+  end
+end
diff --git a/test/braintrust/contrib/support/otel_test.rb b/test/braintrust/contrib/support/otel_test.rb
new file mode 100644
index 0000000..4971b7b
--- /dev/null
+++ b/test/braintrust/contrib/support/otel_test.rb
@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require "braintrust/contrib/support/otel"
+
+class Braintrust::Contrib::Support::OTelTest < Minitest::Test
+  # --- .set_json_attr ---
+
+  def test_set_json_attr_sets_attribute_with_json
+    span = Minitest::Mock.new
+    span.expect(:set_attribute, nil, ["braintrust.output", '{"foo":"bar"}'])
+
+    Braintrust::Contrib::Support::OTel.set_json_attr(span, "braintrust.output", {foo: "bar"})
+
+    span.verify
+  end
+
+  def test_set_json_attr_skips_nil_object
+    span = Minitest::Mock.new
+    # No expectations - set_attribute should not be called
+
+    Braintrust::Contrib::Support::OTel.set_json_attr(span, "braintrust.output", nil)
+
+    span.verify
+  end
+
+  def test_set_json_attr_handles_array
+    span = Minitest::Mock.new
+    span.expect(:set_attribute, nil, ["braintrust.input", "[1,2,3]"])
+
+    Braintrust::Contrib::Support::OTel.set_json_attr(span, "braintrust.input", [1, 2, 3])
+
+    span.verify
+  end
+end
diff --git a/test/braintrust/trace/tokens_test.rb b/test/braintrust/trace/tokens_test.rb
index cd4e7e3..c990c58 100644
--- a/test/braintrust/trace/tokens_test.rb
+++ b/test/braintrust/trace/tokens_test.rb
@@ -4,84 +4,6 @@
 require "braintrust/trace/tokens"
 
 class TokensTest < Minitest::Test
-  # ===================
-  # OpenAI Token Parser
-  # ===================
-
-  def test_openai_maps_standard_fields
-    usage = {"prompt_tokens" => 10, "completion_tokens" => 20, "total_tokens" => 30}
-
-    metrics = Braintrust::Trace.parse_openai_usage_tokens(usage)
-
-    assert_equal 10, metrics["prompt_tokens"]
-    assert_equal 20, metrics["completion_tokens"]
-    assert_equal 30, metrics["tokens"]
-  end
-
-  def test_openai_handles_symbol_keys
-    usage = {prompt_tokens: 10, completion_tokens: 20, total_tokens: 30}
-
-    metrics = Braintrust::Trace.parse_openai_usage_tokens(usage)
-
-    assert_equal 10, metrics["prompt_tokens"]
-  end
-
-  def test_openai_handles_prompt_tokens_details
-    usage = {
-      "prompt_tokens" => 100,
-      "prompt_tokens_details" => {"cached_tokens" => 80}
-    }
-
-    metrics = Braintrust::Trace.parse_openai_usage_tokens(usage)
-
-    assert_equal 80, metrics["prompt_cached_tokens"]
-  end
-
-  def test_openai_handles_completion_tokens_details
-    usage = {
-      "completion_tokens" => 50,
-      "completion_tokens_details" => {"reasoning_tokens" => 30}
-    }
-
-    metrics = Braintrust::Trace.parse_openai_usage_tokens(usage)
-
-    assert_equal 30, metrics["completion_reasoning_tokens"]
-  end
-
-  def test_openai_handles_object_style_token_details
-    # OpenAI SDK returns objects with to_h, not plain hashes
-    details_object = Struct.new(:cached_tokens, :audio_tokens, keyword_init: true)
-    usage_object = Struct.new(:prompt_tokens, :completion_tokens, :total_tokens, :prompt_tokens_details, keyword_init: true)
-
-    usage = usage_object.new(
-      prompt_tokens: 100,
-      completion_tokens: 50,
-      total_tokens: 150,
-      prompt_tokens_details: details_object.new(cached_tokens: 80, audio_tokens: 0)
-    )
-
-    metrics = Braintrust::Trace.parse_openai_usage_tokens(usage)
-
-    assert_equal 80, metrics["prompt_cached_tokens"]
-    assert_equal 0, metrics["prompt_audio_tokens"]
-  end
-
-  def test_openai_returns_empty_hash_for_nil
-    assert_equal({}, Braintrust::Trace.parse_openai_usage_tokens(nil))
-  end
-
-  def test_openai_calculates_total_if_missing
-    usage = {"prompt_tokens" => 10, "completion_tokens" => 20}
-
-    metrics = Braintrust::Trace.parse_openai_usage_tokens(usage)
-
-    assert_equal 30, metrics["tokens"]
-  end
-
-  # =======================
-  # Anthropic Token Parser
-  # =======================
-
   def test_anthropic_maps_input_output_tokens
     usage = {"input_tokens" => 10, "output_tokens" => 20}
 

From 3b037f0035e7507f6e83ea48d073dfb302a3807f Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Thu, 25 Dec 2025 09:47:37 -0500
Subject: [PATCH 09/27] Changed: Migrated ruby-openai to new integration API

---
 Rakefile                                      |  16 +-
 examples/contrib/ruby_openai/basic.rb         |  78 +++
 .../ruby_openai/deprecated.rb}                |  11 +-
 examples/contrib/ruby_openai/instance.rb      |  61 ++
 examples/contrib/ruby_openai/streaming.rb     |  80 +++
 .../ruby_openai/golden.rb}                    |   8 +-
 lib/braintrust/contrib.rb                     |   2 +
 .../contrib/ruby_openai/deprecated.rb         |  24 +
 .../ruby_openai/instrumentation/chat.rb       | 149 +++++
 .../ruby_openai/instrumentation/common.rb     | 138 ++++
 .../ruby_openai/instrumentation/responses.rb  | 146 +++++
 .../contrib/ruby_openai/integration.rb        |  58 ++
 lib/braintrust/contrib/ruby_openai/patcher.rb |  85 +++
 lib/braintrust/trace.rb                       |  26 -
 .../alexrudall/ruby-openai/ruby-openai.rb     | 377 -----------
 .../contrib/ruby_openai/deprecated_test.rb    |  45 ++
 .../ruby_openai/instrumentation/chat_test.rb  | 362 ++++++++++
 .../instrumentation/common_test.rb            | 149 +++++
 .../instrumentation/responses_test.rb         | 232 +++++++
 .../ruby_openai/instrumentation_test.rb       |  52 ++
 .../contrib/ruby_openai/integration_helper.rb |  33 +
 .../contrib/ruby_openai/integration_test.rb   |  65 ++
 .../contrib/ruby_openai/openai_test.rb        |  92 +++
 .../contrib/ruby_openai/patcher_test.rb       | 206 ++++++
 .../contrib/alexrudall_ruby_openai_test.rb    | 620 ------------------
 .../responses_with_metadata.yml               | 151 +++++
 26 files changed, 2233 insertions(+), 1033 deletions(-)
 create mode 100644 examples/contrib/ruby_openai/basic.rb
 rename examples/{alexrudall_openai.rb => contrib/ruby_openai/deprecated.rb} (80%)
 create mode 100644 examples/contrib/ruby_openai/instance.rb
 create mode 100644 examples/contrib/ruby_openai/streaming.rb
 rename examples/internal/{alexrudall_ruby_openai.rb => contrib/ruby_openai/golden.rb} (98%)
 create mode 100644 lib/braintrust/contrib/ruby_openai/deprecated.rb
 create mode 100644 lib/braintrust/contrib/ruby_openai/instrumentation/chat.rb
 create mode 100644 lib/braintrust/contrib/ruby_openai/instrumentation/common.rb
 create mode 100644 lib/braintrust/contrib/ruby_openai/instrumentation/responses.rb
 create mode 100644 lib/braintrust/contrib/ruby_openai/integration.rb
 create mode 100644 lib/braintrust/contrib/ruby_openai/patcher.rb
 delete mode 100644 lib/braintrust/trace/contrib/github.com/alexrudall/ruby-openai/ruby-openai.rb
 create mode 100644 test/braintrust/contrib/ruby_openai/deprecated_test.rb
 create mode 100644 test/braintrust/contrib/ruby_openai/instrumentation/chat_test.rb
 create mode 100644 test/braintrust/contrib/ruby_openai/instrumentation/common_test.rb
 create mode 100644 test/braintrust/contrib/ruby_openai/instrumentation/responses_test.rb
 create mode 100644 test/braintrust/contrib/ruby_openai/instrumentation_test.rb
 create mode 100644 test/braintrust/contrib/ruby_openai/integration_helper.rb
 create mode 100644 test/braintrust/contrib/ruby_openai/integration_test.rb
 create mode 100644 test/braintrust/contrib/ruby_openai/openai_test.rb
 create mode 100644 test/braintrust/contrib/ruby_openai/patcher_test.rb
 delete mode 100644 test/braintrust/trace/contrib/alexrudall_ruby_openai_test.rb
 create mode 100644 test/fixtures/vcr_cassettes/alexrudall_ruby_openai/responses_with_metadata.yml

diff --git a/Rakefile b/Rakefile
index 8bd2b54..84f03cd 100644
--- a/Rakefile
+++ b/Rakefile
@@ -104,9 +104,19 @@ namespace :test do
   end
 
   namespace :contrib do
-    desc "Run OpenAI contrib tests"
-    task :openai do
-      sh "bundle exec appraisal openai ruby -Ilib:test -e \"Dir.glob('test/braintrust/contrib/openai/**/*_test.rb').each { |f| require_relative f }\""
+    # Tasks per integration
+    [
+      {name: :openai},
+      {name: :ruby_openai, appraisal: "ruby-openai"}
+    ].each do |integration|
+      name = integration[:name]
+      appraisal = integration[:appraisal] || integration[:name]
+      folder = integration[:name]
+
+      desc "Run #{name} contrib tests"
+      task name do
+        sh "bundle exec appraisal #{appraisal} ruby -Ilib:test -e \"Dir.glob('test/braintrust/contrib/#{folder}/**/*_test.rb').each { |f| require_relative f }\""
+      end
     end
   end
 
diff --git a/examples/contrib/ruby_openai/basic.rb b/examples/contrib/ruby_openai/basic.rb
new file mode 100644
index 0000000..8d58f4f
--- /dev/null
+++ b/examples/contrib/ruby_openai/basic.rb
@@ -0,0 +1,78 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "openai"
+require "opentelemetry/sdk"
+
+# Example: ruby-openai (alexrudall) chat completion with Braintrust tracing (class-level)
+#
+# This example demonstrates how to automatically trace OpenAI API calls using
+# the ruby-openai gem (by alexrudall) with Braintrust class-level instrumentation.
+# All OpenAI clients created after instrument! is called will be traced.
+#
+# Note: ruby-openai is an optional development dependency. To run this example:
+#   1. Install ruby-openai: gem install ruby-openai
+#   2. Run from the SDK root: ruby examples/contrib/ruby_openai/basic.rb
+#
+# Usage:
+#   OPENAI_API_KEY=your-openai-key ruby examples/contrib/ruby_openai/basic.rb
+
+# Check for API keys
+unless ENV["OPENAI_API_KEY"]
+  puts "Error: OPENAI_API_KEY environment variable is required"
+  puts "Get your API key from: https://platform.openai.com/api-keys"
+  exit 1
+end
+
+# Initialize Braintrust and instrument all ruby-openai clients
+Braintrust.init(blocking_login: true)
+Braintrust.instrument!(:ruby_openai)
+
+# Create OpenAI client (ruby-openai uses access_token parameter)
+# This client is automatically instrumented because of the class-level instrument! call
+client = OpenAI::Client.new(access_token: ENV["OPENAI_API_KEY"])
+
+# Create a root span to capture the entire operation
+tracer = OpenTelemetry.tracer_provider.tracer("ruby-openai-example")
+root_span = nil
+
+# Make a chat completion request (automatically traced!)
+puts "Sending chat completion request to OpenAI (using ruby-openai gem)..."
+response = tracer.in_span("examples/contrib/ruby_openai/basic.rb") do |span|
+  root_span = span
+
+  # ruby-openai uses: client.chat(parameters: {...})
+  client.chat(
+    parameters: {
+      model: "gpt-4o-mini",
+      messages: [
+        {role: "system", content: "You are a helpful assistant."},
+        {role: "user", content: "Say hello and tell me a short joke."}
+      ],
+      max_tokens: 100
+    }
+  )
+end
+
+# Print the response (ruby-openai returns a hash)
+puts "\n Response received!"
+puts "\nAssistant: #{response.dig("choices", 0, "message", "content")}"
+
+# Print usage stats
+if response["usage"]
+  puts "\nToken usage:"
+  puts "  Prompt tokens: #{response["usage"]["prompt_tokens"]}"
+  puts "  Completion tokens: #{response["usage"]["completion_tokens"]}"
+  puts "  Total tokens: #{response["usage"]["total_tokens"]}"
+end
+
+# Print permalink to view this trace in Braintrust
+puts "\n View this trace in Braintrust:"
+puts "  #{Braintrust::Trace.permalink(root_span)}"
+
+# Shutdown to flush spans to Braintrust
+OpenTelemetry.tracer_provider.shutdown
+
+puts "\n Trace sent to Braintrust!"
diff --git a/examples/alexrudall_openai.rb b/examples/contrib/ruby_openai/deprecated.rb
similarity index 80%
rename from examples/alexrudall_openai.rb
rename to examples/contrib/ruby_openai/deprecated.rb
index 78782a2..370208d 100755
--- a/examples/alexrudall_openai.rb
+++ b/examples/contrib/ruby_openai/deprecated.rb
@@ -8,15 +8,20 @@
 
 # Example: ruby-openai (alexrudall) chat completion with Braintrust tracing
 #
+# DEPRECATED: This example uses the old wrap() API which is deprecated.
+# Please use the new instrument!() API instead:
+#   - examples/contrib/ruby_openai/basic.rb - Class-level instrumentation (all clients)
+#   - examples/contrib/ruby_openai/instance.rb - Instance-level instrumentation (specific client)
+#
 # This example demonstrates how to automatically trace OpenAI API calls using
 # the ruby-openai gem (by alexrudall) with Braintrust.
 #
 # Note: ruby-openai is an optional development dependency. To run this example:
 #   1. Install ruby-openai: gem install ruby-openai
-#   2. Run from the SDK root: ruby examples/alexrudall_openai.rb
+#   2. Run from the SDK root: ruby examples/contrib/ruby_openai/deprecated.rb
 #
 # Usage:
-#   OPENAI_API_KEY=your-openai-key ruby examples/alexrudall_openai.rb
+#   OPENAI_API_KEY=your-openai-key ruby examples/contrib/ruby_openai/deprecated.rb
 
 # Check for API keys
 unless ENV["OPENAI_API_KEY"]
@@ -39,7 +44,7 @@
 
 # Make a chat completion request (automatically traced!)
 puts "Sending chat completion request to OpenAI (using ruby-openai gem)..."
-response = tracer.in_span("examples/alexrudall_openai.rb") do |span|
+response = tracer.in_span("examples/contrib/ruby_openai/deprecated.rb") do |span|
   root_span = span
 
   # ruby-openai uses: client.chat(parameters: {...})
diff --git a/examples/contrib/ruby_openai/instance.rb b/examples/contrib/ruby_openai/instance.rb
new file mode 100644
index 0000000..97ce7ab
--- /dev/null
+++ b/examples/contrib/ruby_openai/instance.rb
@@ -0,0 +1,61 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "openai"
+require "opentelemetry/sdk"
+
+# Example: Instance-level ruby-openai (alexrudall) instrumentation
+#
+# This shows how to instrument a specific client instance rather than
+# all OpenAI clients globally.
+#
+# Usage:
+#   OPENAI_API_KEY=your-key bundle exec appraisal ruby_openai ruby examples/contrib/ruby_openai/instance.rb
+
+unless ENV["OPENAI_API_KEY"]
+  puts "Error: OPENAI_API_KEY environment variable is required"
+  exit 1
+end
+
+Braintrust.init(blocking_login: true)
+
+# Create two client instances (ruby-openai uses access_token)
+client_traced = ::OpenAI::Client.new(access_token: ENV["OPENAI_API_KEY"])
+client_untraced = ::OpenAI::Client.new(access_token: ENV["OPENAI_API_KEY"])
+
+# Only instrument one of them
+Braintrust.instrument!(:ruby_openai, target: client_traced)
+
+tracer = OpenTelemetry.tracer_provider.tracer("ruby-openai-example")
+root_span = nil
+
+tracer.in_span("examples/contrib/ruby_openai/instance.rb") do |span|
+  root_span = span
+
+  puts "Calling traced client..."
+  response1 = client_traced.chat(
+    parameters: {
+      messages: [{role: "user", content: "Say 'traced'"}],
+      model: "gpt-4o-mini",
+      max_tokens: 10
+    }
+  )
+  puts "Traced response: #{response1.dig("choices", 0, "message", "content")}"
+
+  puts "\nCalling untraced client..."
+  response2 = client_untraced.chat(
+    parameters: {
+      messages: [{role: "user", content: "Say 'untraced'"}],
+      model: "gpt-4o-mini",
+      max_tokens: 10
+    }
+  )
+  puts "Untraced response: #{response2.dig("choices", 0, "message", "content")}"
+end
+
+puts "\nView trace: #{Braintrust::Trace.permalink(root_span)}"
+puts "(Only the first client call should have an openai.chat span)"
+
+OpenTelemetry.tracer_provider.shutdown
diff --git a/examples/contrib/ruby_openai/streaming.rb b/examples/contrib/ruby_openai/streaming.rb
new file mode 100644
index 0000000..b958744
--- /dev/null
+++ b/examples/contrib/ruby_openai/streaming.rb
@@ -0,0 +1,80 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "openai"
+require "opentelemetry/sdk"
+
+# Example: ruby-openai (alexrudall) streaming with Braintrust tracing
+#
+# This example demonstrates how to trace streaming OpenAI API calls
+# using the ruby-openai gem. Shows the callback-based streaming pattern.
+#
+# Usage:
+#   OPENAI_API_KEY=your-key bundle exec ruby examples/contrib/ruby_openai/streaming.rb
+
+unless ENV["OPENAI_API_KEY"]
+  puts "Error: OPENAI_API_KEY environment variable is required"
+  puts "Get your API key from: https://platform.openai.com/api-keys"
+  exit 1
+end
+
+Braintrust.init(blocking_login: true)
+Braintrust.instrument!(:ruby_openai)
+
+# ruby-openai uses access_token parameter
+client = OpenAI::Client.new(access_token: ENV["OPENAI_API_KEY"])
+tracer = OpenTelemetry.tracer_provider.tracer("ruby-openai-streaming-example")
+root_span = nil
+
+tracer.in_span("examples/contrib/ruby_openai/streaming.rb") do |span|
+  root_span = span
+
+  # Pattern 1: Chat streaming with callback proc
+  # Pass a proc to the stream parameter to receive chunks
+  puts "=== Pattern 1: Chat streaming with callback ==="
+  print "Assistant: "
+
+  client.chat(
+    parameters: {
+      model: "gpt-4o-mini",
+      max_tokens: 100,
+      messages: [{role: "user", content: "Count from 1 to 5, one number per line."}],
+      stream_options: {include_usage: true},
+      stream: proc do |chunk, _bytesize|
+        content = chunk.dig("choices", 0, "delta", "content")
+        print content if content
+      end
+    }
+  )
+  puts "\n"
+
+  # Pattern 2: Responses API streaming (if available)
+  if client.respond_to?(:responses)
+    puts "=== Pattern 2: Responses API streaming ==="
+    print "Assistant: "
+
+    client.responses.create(
+      parameters: {
+        model: "gpt-4o-mini",
+        input: "Name 3 colors.",
+        stream: proc do |chunk, _event|
+          if chunk["type"] == "response.output_text.delta"
+            print chunk["delta"]
+          end
+        end
+      }
+    )
+    puts "\n"
+  else
+    puts "=== Pattern 2: Responses API (skipped - not available in this version) ==="
+  end
+end
+
+puts "\nView this trace in Braintrust:"
+puts "  #{Braintrust::Trace.permalink(root_span)}"
+
+OpenTelemetry.tracer_provider.shutdown
+
+puts "\nTrace sent to Braintrust!"
diff --git a/examples/internal/alexrudall_ruby_openai.rb b/examples/internal/contrib/ruby_openai/golden.rb
similarity index 98%
rename from examples/internal/alexrudall_ruby_openai.rb
rename to examples/internal/contrib/ruby_openai/golden.rb
index a66b15f..4528e3b 100755
--- a/examples/internal/alexrudall_ruby_openai.rb
+++ b/examples/internal/contrib/ruby_openai/golden.rb
@@ -25,7 +25,7 @@
 # as the openai gem integration for identical inputs.
 #
 # Usage:
-#   OPENAI_API_KEY=key bundle exec appraisal ruby-openai ruby examples/internal/alexrudall_ruby_openai.rb
+#   OPENAI_API_KEY=key bundle exec appraisal ruby-openai ruby examples/internal/contrib/ruby_openai/golden.rb
 
 unless ENV["OPENAI_API_KEY"]
   puts "Error: OPENAI_API_KEY environment variable is required"
@@ -37,16 +37,16 @@
 # Get a tracer for this example
 tracer = OpenTelemetry.tracer_provider.tracer("alexrudall-ruby-openai-comprehensive-example")
 
-# Create OpenAI client (ruby-openai style) and wrap it
+# Create OpenAI client (ruby-openai style) and instrument it
 client = OpenAI::Client.new(access_token: ENV["OPENAI_API_KEY"])
-Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client)
+Braintrust.instrument!(:ruby_openai, target: client)
 
 puts "ruby-openai (alexrudall) Comprehensive Features Example"
 puts "=" * 60
 
 # Wrap all examples under a single parent trace
 root_span = nil
-tracer.in_span("examples/internal/alexrudall_ruby_openai.rb") do |span|
+tracer.in_span("examples/internal/contrib/ruby_openai/golden.rb") do |span|
   root_span = span
 
   # Example 1: Vision - Image Understanding [SKIPPED]
diff --git a/lib/braintrust/contrib.rb b/lib/braintrust/contrib.rb
index 66746dc..ddf85ed 100644
--- a/lib/braintrust/contrib.rb
+++ b/lib/braintrust/contrib.rb
@@ -100,6 +100,8 @@ def tracer_for(target, name: "braintrust")
 
 # Load integration stubs (eager load minimal metadata).
 require_relative "contrib/openai/integration"
+require_relative "contrib/ruby_openai/integration"
 
 # Register integrations
 Braintrust::Contrib::OpenAI::Integration.register!
+Braintrust::Contrib::RubyOpenAI::Integration.register!
diff --git a/lib/braintrust/contrib/ruby_openai/deprecated.rb b/lib/braintrust/contrib/ruby_openai/deprecated.rb
new file mode 100644
index 0000000..225a7e8
--- /dev/null
+++ b/lib/braintrust/contrib/ruby_openai/deprecated.rb
@@ -0,0 +1,24 @@
+# frozen_string_literal: true
+
+# Backward compatibility shim for the old ruby-openai integration API.
+# This file now just delegates to the new API.
+
+module Braintrust
+  module Trace
+    module AlexRudall
+      module RubyOpenAI
+        # Wrap an OpenAI::Client (ruby-openai gem) to automatically create spans.
+        # This is the legacy API - delegates to the new contrib framework.
+        #
+        # @param client [OpenAI::Client] the OpenAI client to wrap
+        # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider (defaults to global)
+        # @return [OpenAI::Client] the wrapped client
+        def self.wrap(client, tracer_provider: nil)
+          Log.warn("Braintrust::Trace::AlexRudall::RubyOpenAI.wrap() is deprecated and will be removed in a future version: use Braintrust.instrument!() instead.")
+          Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: tracer_provider)
+          client
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/ruby_openai/instrumentation/chat.rb b/lib/braintrust/contrib/ruby_openai/instrumentation/chat.rb
new file mode 100644
index 0000000..e40f1cf
--- /dev/null
+++ b/lib/braintrust/contrib/ruby_openai/instrumentation/chat.rb
@@ -0,0 +1,149 @@
+# frozen_string_literal: true
+
+require "opentelemetry/sdk"
+require "json"
+
+require_relative "common"
+require_relative "../../support/otel"
+require_relative "../../support/openai"
+require_relative "../../../internal/time"
+
+module Braintrust
+  module Contrib
+    module RubyOpenAI
+      module Instrumentation
+        # Chat completions instrumentation for ruby-openai.
+        # Provides module that can be prepended to OpenAI::Client to instrument the chat method.
+        module Chat
+          def self.included(base)
+            # Guard against double-wrapping: Check if patch is already in the ancestor chain.
+            # This prevents double instrumentation if class-level patching was already applied,
+            # and this patch is being applied to a singleton-class. (Special case.)
+            #
+            # Ruby's prepend() doesn't check the full inheritance chain, so without this guard,
+            # the instrumentation could be added twice.
+            base.prepend(InstanceMethods) unless applied?(base)
+          end
+
+          def self.applied?(base)
+            base.ancestors.include?(InstanceMethods)
+          end
+
+          METADATA_FIELDS = %i[
+            model frequency_penalty logit_bias logprobs max_tokens n
+            presence_penalty response_format seed service_tier stop
+            stream stream_options temperature top_p top_logprobs
+            tools tool_choice parallel_tool_calls user functions function_call
+          ].freeze
+
+          module InstanceMethods
+            # Wrap chat method for ruby-openai gem
+            # ruby-openai API: client.chat(parameters: {...})
+            def chat(parameters:)
+              tracer = Braintrust::Contrib.tracer_for(self)
+
+              tracer.in_span("Chat Completion") do |span|
+                is_streaming = streaming?(parameters)
+                metadata = build_metadata(parameters)
+                set_input(span, parameters)
+
+                aggregated_chunks = []
+                time_to_first_token = nil
+                response = nil
+                response_data = {}
+
+                if is_streaming
+                  # Setup a time measurement for the first chunk from the stream
+                  start_time = nil
+                  parameters = wrap_stream_callback(parameters, aggregated_chunks) do
+                    time_to_first_token ||= Braintrust::Internal::Time.measure(start_time)
+                  end
+                  start_time = Braintrust::Internal::Time.measure
+
+                  # Then initiate the stream
+                  response = super(parameters: parameters)
+
+                  if !aggregated_chunks.empty?
+                    response_data = Common.aggregate_streaming_chunks(aggregated_chunks)
+                  end
+                else
+                  # Make a time measurement synchronously around the API call
+                  time_to_first_token = Braintrust::Internal::Time.measure do
+                    response = super(parameters: parameters)
+                    response_data = response if response
+                  end
+                end
+
+                set_output(span, response_data)
+                set_metrics(span, response_data, time_to_first_token)
+                finalize_metadata(span, metadata, response_data)
+
+                response
+              end
+            end
+
+            private
+
+            def streaming?(parameters)
+              parameters.key?(:stream) && parameters[:stream].is_a?(Proc)
+            end
+
+            def wrap_stream_callback(parameters, aggregated_chunks)
+              original_stream_proc = parameters[:stream]
+              parameters = parameters.dup
+
+              parameters[:stream] = proc do |chunk, bytesize|
+                yield if aggregated_chunks.empty?
+                aggregated_chunks << chunk
+                original_stream_proc.call(chunk, bytesize)
+              end
+
+              parameters
+            end
+
+            def build_metadata(parameters)
+              metadata = {
+                "provider" => "openai",
+                "endpoint" => "/v1/chat/completions"
+              }
+
+              Chat::METADATA_FIELDS.each do |field|
+                next unless parameters.key?(field)
+                # Stream param is a Proc - just mark as true
+                metadata[field.to_s] = (field == :stream) ? true : parameters[field]
+              end
+
+              metadata
+            end
+
+            def set_input(span, parameters)
+              return unless parameters[:messages]
+              Support::OTel.set_json_attr(span, "braintrust.input_json", parameters[:messages])
+            end
+
+            def set_output(span, response_data)
+              choices = response_data[:choices] || response_data["choices"]
+              return unless choices&.any?
+              Support::OTel.set_json_attr(span, "braintrust.output_json", choices)
+            end
+
+            def set_metrics(span, response_data, time_to_first_token)
+              usage = response_data[:usage] || response_data["usage"]
+              metrics = usage ? Support::OpenAI.parse_usage_tokens(usage) : {}
+              metrics["time_to_first_token"] = time_to_first_token || 0.0
+              Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
+            end
+
+            def finalize_metadata(span, metadata, response_data)
+              %w[id created model system_fingerprint service_tier].each do |field|
+                value = response_data[field.to_sym] || response_data[field]
+                metadata[field] = value if value
+              end
+              Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
+            end
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/ruby_openai/instrumentation/common.rb b/lib/braintrust/contrib/ruby_openai/instrumentation/common.rb
new file mode 100644
index 0000000..3fff4bf
--- /dev/null
+++ b/lib/braintrust/contrib/ruby_openai/instrumentation/common.rb
@@ -0,0 +1,138 @@
+# frozen_string_literal: true
+
+module Braintrust
+  module Contrib
+    module RubyOpenAI
+      module Instrumentation
+        # Aggregation utilities for ruby-openai gem instrumentation.
+        # These are specific to the ruby-openai gem's data structures (string keys, plain hashes).
+        module Common
+          # Aggregate streaming chunks into a single response structure.
+          # Specific to ruby-openai gem which uses string keys and plain hashes.
+          # @param chunks [Array<Hash>] array of chunk hashes from stream (string keys)
+          # @return [Hash] aggregated response with choices, usage, etc. (string keys)
+          def self.aggregate_streaming_chunks(chunks)
+            return {} if chunks.empty?
+
+            # Initialize aggregated structure
+            aggregated = {
+              "id" => nil,
+              "created" => nil,
+              "model" => nil,
+              "system_fingerprint" => nil,
+              "usage" => nil,
+              "choices" => []
+            }
+
+            # Track aggregated content and tool_calls for each choice index
+            choice_data = {}
+
+            chunks.each do |chunk|
+              # Capture top-level fields from any chunk that has them
+              aggregated["id"] ||= chunk["id"]
+              aggregated["created"] ||= chunk["created"]
+              aggregated["model"] ||= chunk["model"]
+              aggregated["system_fingerprint"] ||= chunk["system_fingerprint"]
+
+              # Aggregate usage (usually only in last chunk if stream_options.include_usage is set)
+              aggregated["usage"] = chunk["usage"] if chunk["usage"]
+
+              # Process choices
+              choices = chunk["choices"]
+              next unless choices.is_a?(Array)
+
+              choices.each do |choice|
+                index = choice["index"] || 0
+                choice_data[index] ||= {
+                  "index" => index,
+                  "role" => nil,
+                  "content" => +"",
+                  "tool_calls" => [],
+                  "finish_reason" => nil
+                }
+
+                delta = choice["delta"] || {}
+
+                # Aggregate role (set once from first delta that has it)
+                choice_data[index]["role"] ||= delta["role"]
+
+                # Aggregate content
+                choice_data[index]["content"] << delta["content"] if delta["content"]
+
+                # Aggregate tool_calls
+                tool_calls = delta["tool_calls"]
+                if tool_calls.is_a?(Array) && tool_calls.any?
+                  tool_calls.each do |tool_call_delta|
+                    tc_id = tool_call_delta["id"]
+                    if tc_id && !tc_id.empty?
+                      # New tool call
+                      choice_data[index]["tool_calls"] << {
+                        "id" => tc_id,
+                        "type" => tool_call_delta["type"],
+                        "function" => {
+                          "name" => +(tool_call_delta.dig("function", "name") || ""),
+                          "arguments" => +(tool_call_delta.dig("function", "arguments") || "")
+                        }
+                      }
+                    elsif choice_data[index]["tool_calls"].any?
+                      # Continuation - append arguments to last tool call
+                      last_tool_call = choice_data[index]["tool_calls"].last
+                      if tool_call_delta.dig("function", "arguments")
+                        last_tool_call["function"]["arguments"] << tool_call_delta["function"]["arguments"]
+                      end
+                    end
+                  end
+                end
+
+                # Capture finish_reason
+                choice_data[index]["finish_reason"] = choice["finish_reason"] if choice["finish_reason"]
+              end
+            end
+
+            # Build final choices array
+            aggregated["choices"] = choice_data.values.sort_by { |c| c["index"] }.map do |choice|
+              message = {
+                "role" => choice["role"],
+                "content" => choice["content"].empty? ? nil : choice["content"]
+              }
+
+              # Add tool_calls to message if any
+              message["tool_calls"] = choice["tool_calls"] if choice["tool_calls"].any?
+
+              {
+                "index" => choice["index"],
+                "message" => message,
+                "finish_reason" => choice["finish_reason"]
+              }
+            end
+
+            aggregated
+          end
+
+          # Aggregate responses streaming chunks into a single response structure.
+          # Specific to ruby-openai gem which uses string keys and plain hashes.
+          # @param chunks [Array<Hash>] array of chunk hashes from stream (string keys)
+          # @return [Hash] aggregated response with output, usage, id (string keys)
+          def self.aggregate_responses_chunks(chunks)
+            return {} if chunks.empty?
+
+            # Find the response.completed event which has the final response
+            completed_chunk = chunks.find { |c| c["type"] == "response.completed" }
+
+            if completed_chunk && completed_chunk["response"]
+              response = completed_chunk["response"]
+              return {
+                "id" => response["id"],
+                "output" => response["output"],
+                "usage" => response["usage"]
+              }
+            end
+
+            # Fallback if no completed event found
+            {}
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/ruby_openai/instrumentation/responses.rb b/lib/braintrust/contrib/ruby_openai/instrumentation/responses.rb
new file mode 100644
index 0000000..d2768a0
--- /dev/null
+++ b/lib/braintrust/contrib/ruby_openai/instrumentation/responses.rb
@@ -0,0 +1,146 @@
+# frozen_string_literal: true
+
+require "opentelemetry/sdk"
+require "json"
+
+require_relative "common"
+require_relative "../../support/otel"
+require_relative "../../support/openai"
+require_relative "../../../internal/time"
+
+module Braintrust
+  module Contrib
+    module RubyOpenAI
+      module Instrumentation
+        # Responses API instrumentation for ruby-openai.
+        # Provides module that can be prepended to OpenAI::Responses to instrument the create method.
+        module Responses
+          def self.included(base)
+            # Guard against double-wrapping: Check if patch is already in the ancestor chain.
+            # This prevents double instrumentation if class-level patching was already applied,
+            # and this patch is being applied to a singleton-class. (Special case.)
+            #
+            # Ruby's prepend() doesn't check the full inheritance chain, so without this guard,
+            # the instrumentation could be added twice.
+            base.prepend(InstanceMethods) unless applied?(base)
+          end
+
+          def self.applied?(base)
+            base.ancestors.include?(InstanceMethods)
+          end
+
+          METADATA_FIELDS = %i[
+            model instructions modalities tools parallel_tool_calls
+            tool_choice temperature max_tokens top_p frequency_penalty
+            presence_penalty seed user metadata store response_format
+            reasoning previous_response_id truncation
+          ].freeze
+
+          module InstanceMethods
+            # Wrap create method for ruby-openai responses API
+            # ruby-openai API: client.responses.create(parameters: {...})
+            def create(parameters:)
+              client = instance_variable_get(:@client)
+              tracer = Braintrust::Contrib.tracer_for(client)
+
+              tracer.in_span("openai.responses.create") do |span|
+                is_streaming = streaming?(parameters)
+                metadata = build_metadata(parameters)
+                set_input(span, parameters)
+
+                aggregated_chunks = []
+                time_to_first_token = nil
+                response = nil
+                response_data = {}
+
+                if is_streaming
+                  # Setup a time measurement for the first chunk from the stream
+                  start_time = nil
+                  parameters = wrap_stream_callback(parameters, aggregated_chunks) do
+                    time_to_first_token ||= Braintrust::Internal::Time.measure(start_time)
+                  end
+                  start_time = Braintrust::Internal::Time.measure
+
+                  # Then initiate the stream
+                  response = super(parameters: parameters)
+
+                  if !aggregated_chunks.empty?
+                    response_data = Common.aggregate_responses_chunks(aggregated_chunks)
+                  end
+                else
+                  # Make a time measurement synchronously around the API call
+                  time_to_first_token = Braintrust::Internal::Time.measure do
+                    response = super(parameters: parameters)
+                    response_data = response if response
+                  end
+                end
+
+                set_output(span, response_data)
+                set_metrics(span, response_data, time_to_first_token)
+                finalize_metadata(span, metadata, response_data)
+
+                response
+              end
+            end
+
+            private
+
+            def streaming?(parameters)
+              parameters.key?(:stream) && parameters[:stream].is_a?(Proc)
+            end
+
+            def wrap_stream_callback(parameters, aggregated_chunks)
+              original_stream_proc = parameters[:stream]
+              parameters = parameters.dup
+
+              parameters[:stream] = proc do |chunk, event|
+                yield if aggregated_chunks.empty?
+                aggregated_chunks << chunk
+                original_stream_proc.call(chunk, event)
+              end
+
+              parameters
+            end
+
+            def build_metadata(parameters)
+              metadata = {
+                "provider" => "openai",
+                "endpoint" => "/v1/responses"
+              }
+
+              Responses::METADATA_FIELDS.each do |field|
+                metadata[field.to_s] = parameters[field] if parameters.key?(field)
+              end
+
+              metadata["stream"] = true if streaming?(parameters)
+              metadata
+            end
+
+            def set_input(span, parameters)
+              return unless parameters[:input]
+              Support::OTel.set_json_attr(span, "braintrust.input_json", parameters[:input])
+            end
+
+            def set_output(span, response_data)
+              output = response_data["output"]
+              return unless output
+              Support::OTel.set_json_attr(span, "braintrust.output_json", output)
+            end
+
+            def set_metrics(span, response_data, time_to_first_token)
+              usage = response_data["usage"]
+              metrics = usage ? Support::OpenAI.parse_usage_tokens(usage) : {}
+              metrics["time_to_first_token"] = time_to_first_token || 0.0
+              Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
+            end
+
+            def finalize_metadata(span, metadata, response_data)
+              metadata["id"] = response_data["id"] if response_data["id"]
+              Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
+            end
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/ruby_openai/integration.rb b/lib/braintrust/contrib/ruby_openai/integration.rb
new file mode 100644
index 0000000..4255aa2
--- /dev/null
+++ b/lib/braintrust/contrib/ruby_openai/integration.rb
@@ -0,0 +1,58 @@
+# frozen_string_literal: true
+
+require_relative "../integration"
+require_relative "deprecated"
+
+module Braintrust
+  module Contrib
+    module RubyOpenAI
+      # RubyOpenAI integration for automatic instrumentation.
+      # Instruments the alexrudall ruby-openai gem (not the official openai gem).
+      class Integration
+        include Braintrust::Contrib::Integration
+
+        MINIMUM_VERSION = "7.0.0"
+
+        GEM_NAMES = ["ruby-openai"].freeze
+        REQUIRE_PATHS = ["openai"].freeze
+
+        # @return [Symbol] Unique identifier for this integration
+        def self.integration_name
+          :ruby_openai
+        end
+
+        # @return [Array<String>] Gem names this integration supports
+        def self.gem_names
+          GEM_NAMES
+        end
+
+        # @return [Array<String>] Require paths for auto-instrument detection
+        def self.require_paths
+          REQUIRE_PATHS
+        end
+
+        # @return [String] Minimum compatible version
+        def self.minimum_version
+          MINIMUM_VERSION
+        end
+
+        # @return [Boolean] true if ruby-openai gem is available (not official openai gem)
+        def self.loaded?
+          # Check if ruby-openai gem is loaded (not the official openai gem).
+          # Both gems use "require 'openai'", so we need to distinguish them.
+          #
+          # OpenAI::Internal is defined ONLY in the official OpenAI gem
+          (defined?(::OpenAI::Client) && !defined?(::OpenAI::Internal)) ? true : false
+        end
+
+        # Lazy-load the patcher only when actually patching.
+        # This keeps the integration stub lightweight.
+        # @return [Array<Class>] The patcher classes
+        def self.patchers
+          require_relative "patcher"
+          [ChatPatcher, ResponsesPatcher]
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/ruby_openai/patcher.rb b/lib/braintrust/contrib/ruby_openai/patcher.rb
new file mode 100644
index 0000000..9699787
--- /dev/null
+++ b/lib/braintrust/contrib/ruby_openai/patcher.rb
@@ -0,0 +1,85 @@
+# frozen_string_literal: true
+
+require_relative "../patcher"
+require_relative "instrumentation/chat"
+require_relative "instrumentation/responses"
+
+module Braintrust
+  module Contrib
+    module RubyOpenAI
+      # Patcher for ruby-openai chat completions.
+      # Instruments OpenAI::Client#chat method.
+      class ChatPatcher < Braintrust::Contrib::Patcher
+        class << self
+          def applicable?
+            defined?(::OpenAI::Client)
+          end
+
+          def patched?(**options)
+            target_class = options[:target]&.singleton_class || ::OpenAI::Client
+            Instrumentation::Chat.applied?(target_class)
+          end
+
+          # Perform the actual patching.
+          # @param options [Hash] Configuration options passed from integration
+          # @option options [Object] :target Optional target instance to patch
+          # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider
+          # @return [void]
+          def perform_patch(**options)
+            return unless applicable?
+
+            if options[:target]
+              # Instance-level (for only this client)
+              raise ArgumentError, "target must be a kind of ::OpenAI::Client" unless options[:target].is_a?(::OpenAI::Client)
+
+              options[:target].singleton_class.include(Instrumentation::Chat)
+            else
+              # Class-level (for all clients)
+              ::OpenAI::Client.include(Instrumentation::Chat)
+            end
+          end
+        end
+      end
+
+      # Patcher for ruby-openai responses API.
+      # Instruments OpenAI::Responses#create method.
+      class ResponsesPatcher < Braintrust::Contrib::Patcher
+        class << self
+          def applicable?
+            defined?(::OpenAI::Client) && ::OpenAI::Client.method_defined?(:responses)
+          end
+
+          def patched?(**options)
+            if options[:target]
+              responses_obj = options[:target].responses
+              Instrumentation::Responses.applied?(responses_obj.singleton_class)
+            else
+              # For class-level, check if the responses class is patched
+              defined?(::OpenAI::Responses) && Instrumentation::Responses.applied?(::OpenAI::Responses)
+            end
+          end
+
+          # Perform the actual patching.
+          # @param options [Hash] Configuration options passed from integration
+          # @option options [Object] :target Optional target instance to patch
+          # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider
+          # @return [void]
+          def perform_patch(**options)
+            return unless applicable?
+
+            if options[:target]
+              # Instance-level (for only this client)
+              raise ArgumentError, "target must be a kind of ::OpenAI::Client" unless options[:target].is_a?(::OpenAI::Client)
+
+              responses_obj = options[:target].responses
+              responses_obj.singleton_class.include(Instrumentation::Responses)
+            else
+              # Class-level (for all clients)
+              ::OpenAI::Responses.include(Instrumentation::Responses)
+            end
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/trace.rb b/lib/braintrust/trace.rb
index e29694d..6bf9434 100644
--- a/lib/braintrust/trace.rb
+++ b/lib/braintrust/trace.rb
@@ -6,32 +6,6 @@
 require_relative "trace/span_filter"
 require_relative "logger"
 
-# OpenAI integrations - both ruby-openai and openai gems use require "openai"
-# so we detect which one actually loaded the code and require the appropriate integration
-begin
-  require "openai"
-
-  # Check which OpenAI gem's code is actually loaded by inspecting $LOADED_FEATURES
-  # (both gems can be in Gem.loaded_specs, but only one's code can be loaded)
-  openai_load_path = $LOADED_FEATURES.find { |f| f.end_with?("/openai.rb") }
-
-  if openai_load_path&.include?("ruby-openai")
-    # alexrudall/ruby-openai gem (path contains "ruby-openai-X.Y.Z")
-    require_relative "trace/contrib/github.com/alexrudall/ruby-openai/ruby-openai"
-  elsif openai_load_path&.include?("/openai-")
-    # Official openai gem (path contains "openai-X.Y.Z")
-    require_relative "trace/contrib/openai"
-  elsif Gem.loaded_specs["ruby-openai"]
-    # Fallback: ruby-openai in loaded_specs (for unusual installation paths)
-    require_relative "trace/contrib/github.com/alexrudall/ruby-openai/ruby-openai"
-  elsif Gem.loaded_specs["openai"]
-    # Fallback: official openai in loaded_specs (for unusual installation paths)
-    require_relative "trace/contrib/openai"
-  end
-rescue LoadError
-  # No OpenAI gem installed - integration will not be available
-end
-
 # Anthropic integration is optional - automatically loaded if anthropic gem is available
 begin
   require "anthropic"
diff --git a/lib/braintrust/trace/contrib/github.com/alexrudall/ruby-openai/ruby-openai.rb b/lib/braintrust/trace/contrib/github.com/alexrudall/ruby-openai/ruby-openai.rb
deleted file mode 100644
index c2dfe7a..0000000
--- a/lib/braintrust/trace/contrib/github.com/alexrudall/ruby-openai/ruby-openai.rb
+++ /dev/null
@@ -1,377 +0,0 @@
-# frozen_string_literal: true
-
-require "opentelemetry/sdk"
-require "json"
-require_relative "../../../../tokens"
-
-module Braintrust
-  module Trace
-    module Contrib
-      module Github
-        module Alexrudall
-          module RubyOpenAI
-            # Helper to safely set a JSON attribute on a span
-            # Only sets the attribute if obj is present
-            # @param span [OpenTelemetry::Trace::Span] the span to set attribute on
-            # @param attr_name [String] the attribute name (e.g., "braintrust.output_json")
-            # @param obj [Object] the object to serialize to JSON
-            # @return [void]
-            def self.set_json_attr(span, attr_name, obj)
-              return unless obj
-              span.set_attribute(attr_name, JSON.generate(obj))
-            end
-
-            # Parse usage tokens from OpenAI API response
-            # @param usage [Hash] usage hash from OpenAI response
-            # @return [Hash<String, Integer>] metrics hash with normalized names
-            def self.parse_usage_tokens(usage)
-              Braintrust::Trace.parse_openai_usage_tokens(usage)
-            end
-
-            # Aggregate streaming chunks into a single response structure
-            # @param chunks [Array<Hash>] array of chunk hashes from stream
-            # @return [Hash] aggregated response with choices, usage, id, created, model
-            def self.aggregate_streaming_chunks(chunks)
-              return {} if chunks.empty?
-
-              # Initialize aggregated structure
-              aggregated = {
-                "id" => nil,
-                "created" => nil,
-                "model" => nil,
-                "usage" => nil,
-                "choices" => []
-              }
-
-              # Track aggregated content for the first choice
-              role = nil
-              content = +""
-
-              chunks.each do |chunk|
-                # Capture top-level fields from any chunk that has them
-                aggregated["id"] ||= chunk["id"]
-                aggregated["created"] ||= chunk["created"]
-                aggregated["model"] ||= chunk["model"]
-
-                # Aggregate usage (usually only in last chunk if stream_options.include_usage is set)
-                aggregated["usage"] = chunk["usage"] if chunk["usage"]
-
-                # Aggregate content from first choice
-                if chunk.dig("choices", 0, "delta", "role")
-                  role ||= chunk.dig("choices", 0, "delta", "role")
-                end
-                if chunk.dig("choices", 0, "delta", "content")
-                  content << chunk.dig("choices", 0, "delta", "content")
-                end
-              end
-
-              # Build aggregated choices array
-              aggregated["choices"] = [
-                {
-                  "index" => 0,
-                  "message" => {
-                    "role" => role || "assistant",
-                    "content" => content
-                  },
-                  "finish_reason" => chunks.dig(-1, "choices", 0, "finish_reason")
-                }
-              ]
-
-              aggregated
-            end
-
-            # Aggregate responses streaming chunks into a single response structure
-            # @param chunks [Array<Hash>] array of chunk hashes from stream
-            # @return [Hash] aggregated response with output, usage, id
-            def self.aggregate_responses_chunks(chunks)
-              return {} if chunks.empty?
-
-              # Find the response.completed event which has the final response
-              completed_chunk = chunks.find { |c| c["type"] == "response.completed" }
-
-              if completed_chunk && completed_chunk["response"]
-                response = completed_chunk["response"]
-                return {
-                  "id" => response["id"],
-                  "output" => response["output"],
-                  "usage" => response["usage"]
-                }
-              end
-
-              # Fallback if no completed event found
-              {}
-            end
-
-            # Set span attributes from response data (works for both streaming and non-streaming)
-            # @param span [OpenTelemetry::Trace::Span] the span to set attributes on
-            # @param response_data [Hash] response hash with keys: choices, usage, id, created, model, system_fingerprint, service_tier
-            # @param time_to_first_token [Float] time to first token in seconds
-            # @param metadata [Hash] metadata hash to update with response fields
-            def self.set_span_attributes(span, response_data, time_to_first_token, metadata)
-              # Set output (choices) as JSON
-              if response_data["choices"]&.any?
-                set_json_attr(span, "braintrust.output_json", response_data["choices"])
-              end
-
-              # Set metrics (token usage + time_to_first_token)
-              metrics = {}
-              if response_data["usage"]
-                metrics = parse_usage_tokens(response_data["usage"])
-              end
-              metrics["time_to_first_token"] = time_to_first_token || 0.0
-              set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
-
-              # Update metadata with response fields
-              %w[id created model system_fingerprint service_tier].each do |field|
-                metadata[field] = response_data[field] if response_data[field]
-              end
-            end
-
-            # Wrap an OpenAI::Client (ruby-openai gem) to automatically create spans
-            # Supports both synchronous and streaming requests
-            # @param client [OpenAI::Client] the OpenAI client to wrap
-            # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider (defaults to global)
-            def self.wrap(client, tracer_provider: nil)
-              tracer_provider ||= ::OpenTelemetry.tracer_provider
-
-              # Store tracer provider on the client for use by wrapper modules
-              client.instance_variable_set(:@braintrust_tracer_provider, tracer_provider)
-
-              # Wrap chat completions
-              wrap_chat(client)
-
-              # Wrap responses API if available
-              wrap_responses(client) if client.respond_to?(:responses)
-
-              client
-            end
-
-            # Wrap chat API
-            # @param client [OpenAI::Client] the OpenAI client
-            def self.wrap_chat(client)
-              client.singleton_class.prepend(ChatWrapper)
-            end
-
-            # Wrap responses API
-            # @param client [OpenAI::Client] the OpenAI client
-            def self.wrap_responses(client)
-              # Store tracer provider on the responses object for use by wrapper module
-              responses_obj = client.responses
-              responses_obj.instance_variable_set(:@braintrust_tracer_provider, client.instance_variable_get(:@braintrust_tracer_provider))
-              responses_obj.singleton_class.prepend(ResponsesCreateWrapper)
-            end
-
-            # Wrapper module for chat completions
-            module ChatWrapper
-              def chat(parameters:)
-                tracer_provider = @braintrust_tracer_provider
-                tracer = tracer_provider.tracer("braintrust")
-
-                tracer.in_span("Chat Completion") do |span|
-                  # Track start time for time_to_first_token
-                  start_time = Time.now
-                  time_to_first_token = nil
-                  is_streaming = parameters.key?(:stream) && parameters[:stream].is_a?(Proc)
-
-                  # Initialize metadata hash
-                  metadata = {
-                    "provider" => "openai",
-                    "endpoint" => "/v1/chat/completions"
-                  }
-
-                  # Capture request metadata fields
-                  metadata_fields = %w[
-                    model frequency_penalty logit_bias logprobs max_tokens n
-                    presence_penalty response_format seed service_tier stop
-                    stream stream_options temperature top_p top_logprobs
-                    tools tool_choice parallel_tool_calls user functions function_call
-                  ]
-
-                  metadata_fields.each do |field|
-                    field_sym = field.to_sym
-                    if parameters.key?(field_sym)
-                      # Special handling for stream parameter (it's a Proc)
-                      metadata[field] = if field == "stream"
-                        true  # Just mark as streaming
-                      else
-                        parameters[field_sym]
-                      end
-                    end
-                  end
-
-                  # Set input messages as JSON
-                  if parameters[:messages]
-                    RubyOpenAI.set_json_attr(span, "braintrust.input_json", parameters[:messages])
-                  end
-
-                  # Wrap streaming callback if present to capture time to first token and aggregate chunks
-                  aggregated_chunks = []
-                  if is_streaming
-                    original_stream_proc = parameters[:stream]
-                    parameters = parameters.dup
-                    parameters[:stream] = proc do |chunk, bytesize|
-                      # Capture time to first token on first chunk
-                      time_to_first_token ||= Time.now - start_time
-                      # Aggregate chunks for later processing
-                      aggregated_chunks << chunk
-                      # Call original callback
-                      original_stream_proc.call(chunk, bytesize)
-                    end
-                  end
-
-                  begin
-                    # Call the original method
-                    response = super(parameters: parameters)
-
-                    # Calculate time to first token for non-streaming
-                    time_to_first_token ||= Time.now - start_time unless is_streaming
-
-                    # Process response data
-                    if is_streaming && !aggregated_chunks.empty?
-                      # Aggregate streaming chunks into response-like structure
-                      aggregated_response = RubyOpenAI.aggregate_streaming_chunks(aggregated_chunks)
-                      RubyOpenAI.set_span_attributes(span, aggregated_response, time_to_first_token, metadata)
-                    else
-                      # Non-streaming: use response object directly
-                      RubyOpenAI.set_span_attributes(span, response || {}, time_to_first_token, metadata)
-                    end
-
-                    # Set metadata ONCE at the end with complete hash
-                    RubyOpenAI.set_json_attr(span, "braintrust.metadata", metadata)
-
-                    response
-                  rescue => e
-                    # Record exception in span
-                    span.record_exception(e)
-                    span.status = OpenTelemetry::Trace::Status.error("Exception: #{e.class} - #{e.message}")
-                    raise
-                  end
-                end
-              end
-            end
-
-            # Wrapper module for responses API create method
-            module ResponsesCreateWrapper
-              def create(parameters:)
-                tracer_provider = @braintrust_tracer_provider
-                tracer = tracer_provider.tracer("braintrust")
-
-                tracer.in_span("openai.responses.create") do |span|
-                  # Track start time for time_to_first_token
-                  start_time = Time.now
-                  time_to_first_token = nil
-                  is_streaming = parameters.key?(:stream) && parameters[:stream].is_a?(Proc)
-
-                  # Initialize metadata hash
-                  metadata = {
-                    "provider" => "openai",
-                    "endpoint" => "/v1/responses"
-                  }
-
-                  # Capture request metadata fields
-                  metadata_fields = %w[
-                    model instructions modalities tools parallel_tool_calls
-                    tool_choice temperature max_tokens top_p frequency_penalty
-                    presence_penalty seed user store response_format
-                    reasoning previous_response_id truncation
-                  ]
-
-                  metadata_fields.each do |field|
-                    field_sym = field.to_sym
-                    if parameters.key?(field_sym)
-                      metadata[field] = parameters[field_sym]
-                    end
-                  end
-
-                  # Mark as streaming if applicable
-                  metadata["stream"] = true if is_streaming
-
-                  # Set input as JSON
-                  if parameters[:input]
-                    RubyOpenAI.set_json_attr(span, "braintrust.input_json", parameters[:input])
-                  end
-
-                  # Wrap streaming callback if present to capture time to first token and aggregate chunks
-                  aggregated_chunks = []
-                  if is_streaming
-                    original_stream_proc = parameters[:stream]
-                    parameters = parameters.dup
-                    parameters[:stream] = proc do |chunk, event|
-                      # Capture time to first token on first chunk
-                      time_to_first_token ||= Time.now - start_time
-                      # Aggregate chunks for later processing
-                      aggregated_chunks << chunk
-                      # Call original callback
-                      original_stream_proc.call(chunk, event)
-                    end
-                  end
-
-                  begin
-                    # Call the original method
-                    response = super(parameters: parameters)
-
-                    # Calculate time to first token for non-streaming
-                    time_to_first_token ||= Time.now - start_time unless is_streaming
-
-                    # Process response data
-                    if is_streaming && !aggregated_chunks.empty?
-                      # Aggregate streaming chunks into response-like structure
-                      aggregated_response = RubyOpenAI.aggregate_responses_chunks(aggregated_chunks)
-
-                      # Set output as JSON
-                      if aggregated_response["output"]
-                        RubyOpenAI.set_json_attr(span, "braintrust.output_json", aggregated_response["output"])
-                      end
-
-                      # Set metrics (token usage + time_to_first_token)
-                      metrics = {}
-                      if aggregated_response["usage"]
-                        metrics = RubyOpenAI.parse_usage_tokens(aggregated_response["usage"])
-                      end
-                      metrics["time_to_first_token"] = time_to_first_token || 0.0
-                      RubyOpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
-
-                      # Update metadata with response fields
-                      metadata["id"] = aggregated_response["id"] if aggregated_response["id"]
-                    else
-                      # Non-streaming: use response object directly
-                      if response && response["output"]
-                        RubyOpenAI.set_json_attr(span, "braintrust.output_json", response["output"])
-                      end
-
-                      # Set metrics (token usage + time_to_first_token)
-                      metrics = {}
-                      if response && response["usage"]
-                        metrics = RubyOpenAI.parse_usage_tokens(response["usage"])
-                      end
-                      metrics["time_to_first_token"] = time_to_first_token || 0.0
-                      RubyOpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
-
-                      # Update metadata with response fields
-                      metadata["id"] = response["id"] if response && response["id"]
-                    end
-
-                    # Set metadata ONCE at the end with complete hash
-                    RubyOpenAI.set_json_attr(span, "braintrust.metadata", metadata)
-
-                    response
-                  rescue => e
-                    # Record exception in span
-                    span.record_exception(e)
-                    span.status = OpenTelemetry::Trace::Status.error("Exception: #{e.class} - #{e.message}")
-                    raise
-                  end
-                end
-              end
-            end
-          end
-        end
-      end
-    end
-
-    # Backwards compatibility: this module was originally at Braintrust::Trace::AlexRudall::RubyOpenAI
-    module AlexRudall
-      RubyOpenAI = Contrib::Github::Alexrudall::RubyOpenAI
-    end
-  end
-end
diff --git a/test/braintrust/contrib/ruby_openai/deprecated_test.rb b/test/braintrust/contrib/ruby_openai/deprecated_test.rb
new file mode 100644
index 0000000..0ab85ad
--- /dev/null
+++ b/test/braintrust/contrib/ruby_openai/deprecated_test.rb
@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require "braintrust/contrib/ruby_openai/deprecated"
+
+class Braintrust::Contrib::RubyOpenAI::DeprecatedTest < Minitest::Test
+  # --- .wrap ---
+
+  def test_wrap_delegates_to_instrument
+    mock_client = Object.new
+    mock_tracer = Object.new
+
+    captured_args = nil
+    Braintrust.stub(:instrument!, ->(name, **opts) { captured_args = [name, opts] }) do
+      suppress_logs { Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(mock_client, tracer_provider: mock_tracer) }
+    end
+
+    assert_equal :ruby_openai, captured_args[0]
+    assert_same mock_client, captured_args[1][:target]
+    assert_same mock_tracer, captured_args[1][:tracer_provider]
+  end
+
+  def test_wrap_logs_deprecation_warning
+    mock_client = Object.new
+
+    warning_message = nil
+    Braintrust.stub(:instrument!, ->(*) {}) do
+      Braintrust::Log.stub(:warn, ->(msg) { warning_message = msg }) do
+        Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(mock_client)
+      end
+    end
+
+    assert_match(/deprecated/, warning_message)
+    assert_match(/Braintrust\.instrument!/, warning_message)
+  end
+
+  def test_wrap_returns_client
+    mock_client = Object.new
+
+    Braintrust.stub(:instrument!, ->(*) {}) do
+      result = suppress_logs { Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(mock_client) }
+      assert_same mock_client, result
+    end
+  end
+end
diff --git a/test/braintrust/contrib/ruby_openai/instrumentation/chat_test.rb b/test/braintrust/contrib/ruby_openai/instrumentation/chat_test.rb
new file mode 100644
index 0000000..90f068b
--- /dev/null
+++ b/test/braintrust/contrib/ruby_openai/instrumentation/chat_test.rb
@@ -0,0 +1,362 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require_relative "../integration_helper"
+require "braintrust/contrib/ruby_openai/instrumentation/chat"
+
+class Braintrust::Contrib::RubyOpenAI::Instrumentation::ChatTest < Minitest::Test
+  Chat = Braintrust::Contrib::RubyOpenAI::Instrumentation::Chat
+
+  # --- .included ---
+
+  def test_included_prepends_instance_methods
+    base = Class.new
+    mock = Minitest::Mock.new
+    mock.expect(:include?, false, [Chat::InstanceMethods])
+    mock.expect(:prepend, nil, [Chat::InstanceMethods])
+
+    base.define_singleton_method(:ancestors) { mock }
+    base.define_singleton_method(:prepend) { |mod| mock.prepend(mod) }
+
+    Chat.included(base)
+
+    mock.verify
+  end
+
+  def test_included_skips_prepend_when_already_applied
+    base = Class.new do
+      include Braintrust::Contrib::RubyOpenAI::Instrumentation::Chat
+    end
+
+    # Should not raise or double-prepend
+    Chat.included(base)
+
+    # InstanceMethods should appear only once in ancestors
+    count = base.ancestors.count { |a| a == Chat::InstanceMethods }
+    assert_equal 1, count
+  end
+
+  # --- .applied? ---
+
+  def test_applied_returns_false_when_not_included
+    base = Class.new
+
+    refute Chat.applied?(base)
+  end
+
+  def test_applied_returns_true_when_included
+    base = Class.new do
+      include Braintrust::Contrib::RubyOpenAI::Instrumentation::Chat
+    end
+
+    assert Chat.applied?(base)
+  end
+end
+
+# E2E tests for Chat instrumentation
+class Braintrust::Contrib::RubyOpenAI::Instrumentation::ChatE2ETest < Minitest::Test
+  include Braintrust::Contrib::RubyOpenAI::IntegrationHelper
+
+  def setup
+    skip_unless_ruby_openai!
+    @api_key = ENV["OPENAI_API_KEY"] || "test-api-key"
+  end
+
+  # --- #chat ---
+
+  def test_chat_creates_span_with_correct_attributes
+    VCR.use_cassette("alexrudall_ruby_openai/chat_completions") do
+      rig = setup_otel_test_rig
+
+      client = OpenAI::Client.new(access_token: @api_key)
+      Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: rig.tracer_provider)
+
+      response = client.chat(
+        parameters: {
+          model: "gpt-4o-mini",
+          messages: [
+            {role: "system", content: "You are a test assistant."},
+            {role: "user", content: "Say 'test'"}
+          ],
+          max_tokens: 10,
+          temperature: 0.5
+        }
+      )
+
+      refute_nil response
+      refute_nil response.dig("choices", 0, "message", "content")
+
+      span = rig.drain_one
+
+      assert_equal "Chat Completion", span.name
+
+      # Verify braintrust.input_json contains messages with content
+      assert span.attributes.key?("braintrust.input_json")
+      input = JSON.parse(span.attributes["braintrust.input_json"])
+      assert_equal 2, input.length
+      assert_equal "system", input[0]["role"]
+      assert_equal "You are a test assistant.", input[0]["content"]
+      assert_equal "user", input[1]["role"]
+      assert_equal "Say 'test'", input[1]["content"]
+
+      # Verify braintrust.output_json contains choices with full details
+      assert span.attributes.key?("braintrust.output_json")
+      output = JSON.parse(span.attributes["braintrust.output_json"])
+      assert_equal 1, output.length
+      assert_equal 0, output[0]["index"]
+      assert_equal "assistant", output[0]["message"]["role"]
+      refute_nil output[0]["message"]["content"]
+      refute_nil output[0]["finish_reason"]
+
+      # Verify braintrust.metadata contains request and response metadata
+      assert span.attributes.key?("braintrust.metadata")
+      metadata = JSON.parse(span.attributes["braintrust.metadata"])
+      assert_equal "openai", metadata["provider"]
+      assert_equal "/v1/chat/completions", metadata["endpoint"]
+      assert_match(/\Agpt-4o-mini/, metadata["model"])
+      assert_equal 10, metadata["max_tokens"]
+      assert_equal 0.5, metadata["temperature"]
+      refute_nil metadata["id"]
+      refute_nil metadata["created"]
+
+      # Verify braintrust.metrics contains token usage
+      assert span.attributes.key?("braintrust.metrics")
+      metrics = JSON.parse(span.attributes["braintrust.metrics"])
+      assert metrics["prompt_tokens"] > 0
+      assert metrics["completion_tokens"] > 0
+      assert metrics["tokens"] > 0
+      assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"]
+      assert metrics.key?("time_to_first_token")
+      assert metrics["time_to_first_token"] >= 0
+    end
+  end
+
+  def test_chat_handles_tool_calls
+    VCR.use_cassette("alexrudall_ruby_openai/tool_calls") do
+      rig = setup_otel_test_rig
+
+      client = OpenAI::Client.new(access_token: @api_key)
+      Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: rig.tracer_provider)
+
+      tools = [
+        {
+          type: "function",
+          function: {
+            name: "get_weather",
+            description: "Get the current weather",
+            parameters: {
+              type: "object",
+              properties: {
+                location: {type: "string", description: "City name"}
+              },
+              required: ["location"]
+            }
+          }
+        }
+      ]
+
+      response = client.chat(
+        parameters: {
+          model: "gpt-4o-mini",
+          messages: [
+            {role: "user", content: "What's the weather in Paris?"}
+          ],
+          tools: tools,
+          max_tokens: 100
+        }
+      )
+
+      refute_nil response
+
+      span = rig.drain_one
+
+      assert_equal "Chat Completion", span.name
+
+      # Verify input was captured
+      assert span.attributes.key?("braintrust.input_json")
+      input = JSON.parse(span.attributes["braintrust.input_json"])
+      assert_equal 1, input.length
+      assert_equal "user", input[0]["role"]
+
+      # Verify output contains tool calls
+      assert span.attributes.key?("braintrust.output_json")
+      output = JSON.parse(span.attributes["braintrust.output_json"])
+      assert_equal 1, output.length
+
+      # Verify metadata includes tools
+      metadata = JSON.parse(span.attributes["braintrust.metadata"])
+      assert_equal "openai", metadata["provider"]
+      refute_nil metadata["tools"]
+    end
+  end
+
+  def test_chat_handles_multi_turn_tool_calls
+    VCR.use_cassette("alexrudall_ruby_openai/multi_turn_tools") do
+      rig = setup_otel_test_rig
+
+      client = OpenAI::Client.new(access_token: @api_key)
+      Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: rig.tracer_provider)
+
+      tools = [
+        {
+          type: "function",
+          function: {
+            name: "calculate",
+            description: "Perform a calculation",
+            parameters: {
+              type: "object",
+              properties: {
+                operation: {type: "string"},
+                a: {type: "number"},
+                b: {type: "number"}
+              }
+            }
+          }
+        }
+      ]
+
+      # First request - model calls tool
+      first_response = client.chat(
+        parameters: {
+          model: "gpt-4o-mini",
+          messages: [
+            {role: "user", content: "What is 127 multiplied by 49?"}
+          ],
+          tools: tools,
+          max_tokens: 100
+        }
+      )
+
+      tool_call = first_response.dig("choices", 0, "message", "tool_calls", 0)
+      skip "Model didn't call tool" unless tool_call
+
+      # Second request - provide tool result with tool_call_id
+      second_response = client.chat(
+        parameters: {
+          model: "gpt-4o-mini",
+          messages: [
+            {role: "user", content: "What is 127 multiplied by 49?"},
+            first_response.dig("choices", 0, "message"),  # Assistant message with tool_calls
+            {
+              role: "tool",
+              tool_call_id: tool_call["id"],
+              content: "6223"
+            }
+          ],
+          tools: tools,
+          max_tokens: 100
+        }
+      )
+
+      # Verify response
+      refute_nil second_response
+
+      # Drain spans (we have 2: first request + second request)
+      spans = rig.drain
+      assert_equal 2, spans.length
+
+      # Verify second span contains tool_call_id in input
+      span = spans[1]
+      assert span.attributes.key?("braintrust.input_json")
+      input = JSON.parse(span.attributes["braintrust.input_json"])
+      assert_equal 3, input.length
+
+      # Third message should be tool response with tool_call_id
+      assert_equal "tool", input[2]["role"]
+      assert_equal tool_call["id"], input[2]["tool_call_id"]
+      assert_equal "6223", input[2]["content"]
+    end
+  end
+
+  def test_chat_handles_streaming
+    VCR.use_cassette("alexrudall_ruby_openai/streaming") do
+      rig = setup_otel_test_rig
+
+      client = OpenAI::Client.new(access_token: @api_key)
+      Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: rig.tracer_provider)
+
+      full_content = ""
+      client.chat(
+        parameters: {
+          model: "gpt-4o-mini",
+          messages: [
+            {role: "user", content: "Count from 1 to 3"}
+          ],
+          max_tokens: 50,
+          stream_options: {include_usage: true},
+          stream: proc do |chunk, _bytesize|
+            delta_content = chunk.dig("choices", 0, "delta", "content")
+            full_content += delta_content if delta_content
+          end
+        }
+      )
+
+      refute_empty full_content
+
+      span = rig.drain_one
+
+      assert_equal "Chat Completion", span.name
+
+      # Verify input was captured
+      assert span.attributes.key?("braintrust.input_json")
+      input = JSON.parse(span.attributes["braintrust.input_json"])
+      assert_equal 1, input.length
+      assert_equal "user", input[0]["role"]
+
+      # Verify output was captured and aggregated
+      assert span.attributes.key?("braintrust.output_json")
+      output = JSON.parse(span.attributes["braintrust.output_json"])
+      assert_equal 1, output.length
+      assert_equal 0, output[0]["index"]
+      assert_equal "assistant", output[0]["message"]["role"]
+      refute_nil output[0]["message"]["content"]
+      refute_empty output[0]["message"]["content"]
+
+      # Verify metadata includes stream flag
+      metadata = JSON.parse(span.attributes["braintrust.metadata"])
+      assert_equal "openai", metadata["provider"]
+      assert_equal true, metadata["stream"]
+
+      # Verify metrics include time_to_first_token and usage tokens
+      metrics = JSON.parse(span.attributes["braintrust.metrics"])
+      assert metrics.key?("time_to_first_token")
+      assert metrics["time_to_first_token"] >= 0
+      assert metrics["prompt_tokens"] > 0
+      assert metrics["completion_tokens"] > 0
+      assert metrics["tokens"] > 0
+      assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"]
+    end
+  end
+
+  def test_chat_records_exception_on_error
+    VCR.use_cassette("alexrudall_ruby_openai/error") do
+      rig = setup_otel_test_rig
+
+      client = OpenAI::Client.new(access_token: "invalid_key")
+      Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: rig.tracer_provider)
+
+      error = assert_raises do
+        client.chat(
+          parameters: {
+            model: "gpt-4o-mini",
+            messages: [{role: "user", content: "test"}]
+          }
+        )
+      end
+
+      refute_nil error
+
+      span = rig.drain_one
+
+      assert_equal "Chat Completion", span.name
+      assert_equal OpenTelemetry::Trace::Status::ERROR, span.status.code
+
+      # Verify error message is captured
+      refute_nil span.status.description
+      assert span.status.description.length > 0
+
+      # Verify exception event was recorded
+      assert span.events.any? { |event| event.name == "exception" }
+    end
+  end
+end
diff --git a/test/braintrust/contrib/ruby_openai/instrumentation/common_test.rb b/test/braintrust/contrib/ruby_openai/instrumentation/common_test.rb
new file mode 100644
index 0000000..4053f64
--- /dev/null
+++ b/test/braintrust/contrib/ruby_openai/instrumentation/common_test.rb
@@ -0,0 +1,149 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require "braintrust/contrib/ruby_openai/instrumentation/common"
+
+class Braintrust::Contrib::RubyOpenAI::Instrumentation::CommonTest < Minitest::Test
+  Common = Braintrust::Contrib::RubyOpenAI::Instrumentation::Common
+
+  # --- .aggregate_streaming_chunks ---
+
+  def test_aggregate_streaming_chunks_returns_empty_hash_for_empty_input
+    assert_equal({}, Common.aggregate_streaming_chunks([]))
+  end
+
+  def test_aggregate_streaming_chunks_aggregates_basic_chunks
+    chunks = [
+      {"id" => "chatcmpl-123", "model" => "gpt-4", "choices" => [{"index" => 0, "delta" => {"role" => "assistant"}}]},
+      {"choices" => [{"index" => 0, "delta" => {"content" => "Hello"}}]},
+      {"choices" => [{"index" => 0, "delta" => {"content" => " world"}, "finish_reason" => "stop"}]},
+      {"usage" => {"prompt_tokens" => 10, "completion_tokens" => 5}}
+    ]
+
+    result = Common.aggregate_streaming_chunks(chunks)
+
+    assert_equal "chatcmpl-123", result["id"]
+    assert_equal "gpt-4", result["model"]
+    assert_equal 1, result["choices"].length
+    assert_equal "assistant", result["choices"][0]["message"]["role"]
+    assert_equal "Hello world", result["choices"][0]["message"]["content"]
+    assert_equal "stop", result["choices"][0]["finish_reason"]
+    assert_equal({"prompt_tokens" => 10, "completion_tokens" => 5}, result["usage"])
+  end
+
+  def test_aggregate_streaming_chunks_captures_system_fingerprint
+    chunks = [
+      {"id" => "chatcmpl-123", "system_fingerprint" => "fp_abc123", "choices" => [{"index" => 0, "delta" => {"role" => "assistant"}}]},
+      {"choices" => [{"index" => 0, "delta" => {"content" => "Hi"}, "finish_reason" => "stop"}]}
+    ]
+
+    result = Common.aggregate_streaming_chunks(chunks)
+
+    assert_equal "fp_abc123", result["system_fingerprint"]
+  end
+
+  def test_aggregate_streaming_chunks_aggregates_tool_calls
+    chunks = [
+      {"id" => "chatcmpl-123", "choices" => [{"index" => 0, "delta" => {"role" => "assistant", "tool_calls" => [{"id" => "call_abc", "type" => "function", "function" => {"name" => "get_weather", "arguments" => ""}}]}}]},
+      {"choices" => [{"index" => 0, "delta" => {"tool_calls" => [{"function" => {"arguments" => '{"loc'}}]}}]},
+      {"choices" => [{"index" => 0, "delta" => {"tool_calls" => [{"function" => {"arguments" => 'ation":"NYC"}'}}]}, "finish_reason" => "tool_calls"}]}
+    ]
+
+    result = Common.aggregate_streaming_chunks(chunks)
+
+    assert_equal 1, result["choices"].length
+    assert_equal 1, result["choices"][0]["message"]["tool_calls"].length
+    assert_equal "call_abc", result["choices"][0]["message"]["tool_calls"][0]["id"]
+    assert_equal "get_weather", result["choices"][0]["message"]["tool_calls"][0]["function"]["name"]
+    assert_equal '{"location":"NYC"}', result["choices"][0]["message"]["tool_calls"][0]["function"]["arguments"]
+  end
+
+  def test_aggregate_streaming_chunks_handles_multiple_choices
+    chunks = [
+      {"id" => "chatcmpl-123", "choices" => [
+        {"index" => 0, "delta" => {"role" => "assistant"}},
+        {"index" => 1, "delta" => {"role" => "assistant"}}
+      ]},
+      {"choices" => [
+        {"index" => 0, "delta" => {"content" => "First"}},
+        {"index" => 1, "delta" => {"content" => "Second"}}
+      ]},
+      {"choices" => [
+        {"index" => 0, "delta" => {}, "finish_reason" => "stop"},
+        {"index" => 1, "delta" => {}, "finish_reason" => "stop"}
+      ]}
+    ]
+
+    result = Common.aggregate_streaming_chunks(chunks)
+
+    assert_equal 2, result["choices"].length
+    assert_equal "First", result["choices"][0]["message"]["content"]
+    assert_equal "Second", result["choices"][1]["message"]["content"]
+  end
+
+  def test_aggregate_streaming_chunks_returns_nil_content_when_empty
+    chunks = [
+      {"id" => "chatcmpl-123", "choices" => [{"index" => 0, "delta" => {"role" => "assistant"}}]},
+      {"choices" => [{"index" => 0, "delta" => {}, "finish_reason" => "stop"}]}
+    ]
+
+    result = Common.aggregate_streaming_chunks(chunks)
+
+    assert_nil result["choices"][0]["message"]["content"]
+  end
+
+  # --- .aggregate_responses_chunks ---
+
+  def test_aggregate_responses_chunks_returns_empty_hash_for_empty_input
+    assert_equal({}, Common.aggregate_responses_chunks([]))
+  end
+
+  def test_aggregate_responses_chunks_extracts_completed_event_data
+    chunks = [
+      {"type" => "response.created"},
+      {"type" => "response.in_progress"},
+      {
+        "type" => "response.completed",
+        "response" => {
+          "id" => "resp_123",
+          "output" => [{"type" => "message", "content" => "Hello"}],
+          "usage" => {"input_tokens" => 10, "output_tokens" => 5}
+        }
+      }
+    ]
+
+    result = Common.aggregate_responses_chunks(chunks)
+
+    assert_equal "resp_123", result["id"]
+    assert_equal [{"type" => "message", "content" => "Hello"}], result["output"]
+    assert_equal({"input_tokens" => 10, "output_tokens" => 5}, result["usage"])
+  end
+
+  def test_aggregate_responses_chunks_returns_empty_without_completed_event
+    chunks = [
+      {"type" => "response.created"},
+      {"type" => "response.in_progress"}
+    ]
+
+    result = Common.aggregate_responses_chunks(chunks)
+
+    assert_equal({}, result)
+  end
+
+  def test_aggregate_responses_chunks_handles_missing_response_fields
+    chunks = [
+      {
+        "type" => "response.completed",
+        "response" => {
+          "id" => "resp_123"
+        }
+      }
+    ]
+
+    result = Common.aggregate_responses_chunks(chunks)
+
+    assert_equal "resp_123", result["id"]
+    assert_nil result["output"]
+    assert_nil result["usage"]
+  end
+end
diff --git a/test/braintrust/contrib/ruby_openai/instrumentation/responses_test.rb b/test/braintrust/contrib/ruby_openai/instrumentation/responses_test.rb
new file mode 100644
index 0000000..a0ef98e
--- /dev/null
+++ b/test/braintrust/contrib/ruby_openai/instrumentation/responses_test.rb
@@ -0,0 +1,232 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require_relative "../integration_helper"
+require "braintrust/contrib/ruby_openai/instrumentation/responses"
+
+class Braintrust::Contrib::RubyOpenAI::Instrumentation::ResponsesTest < Minitest::Test
+  Responses = Braintrust::Contrib::RubyOpenAI::Instrumentation::Responses
+
+  # --- .included ---
+
+  def test_included_prepends_instance_methods
+    base = Class.new
+    mock = Minitest::Mock.new
+    mock.expect(:include?, false, [Responses::InstanceMethods])
+    mock.expect(:prepend, nil, [Responses::InstanceMethods])
+
+    base.define_singleton_method(:ancestors) { mock }
+    base.define_singleton_method(:prepend) { |mod| mock.prepend(mod) }
+
+    Responses.included(base)
+
+    mock.verify
+  end
+
+  def test_included_skips_prepend_when_already_applied
+    base = Class.new do
+      include Braintrust::Contrib::RubyOpenAI::Instrumentation::Responses
+    end
+
+    # Should not raise or double-prepend
+    Responses.included(base)
+
+    # InstanceMethods should appear only once in ancestors
+    count = base.ancestors.count { |a| a == Responses::InstanceMethods }
+    assert_equal 1, count
+  end
+
+  # --- .applied? ---
+
+  def test_applied_returns_false_when_not_included
+    base = Class.new
+
+    refute Responses.applied?(base)
+  end
+
+  def test_applied_returns_true_when_included
+    base = Class.new do
+      include Braintrust::Contrib::RubyOpenAI::Instrumentation::Responses
+    end
+
+    assert Responses.applied?(base)
+  end
+end
+
+# E2E tests for Responses instrumentation
+class Braintrust::Contrib::RubyOpenAI::Instrumentation::ResponsesE2ETest < Minitest::Test
+  include Braintrust::Contrib::RubyOpenAI::IntegrationHelper
+
+  def setup
+    skip_unless_ruby_openai!
+    @api_key = ENV["OPENAI_API_KEY"] || "test-api-key"
+  end
+
+  # --- #create ---
+
+  def test_create_creates_span_with_correct_attributes
+    VCR.use_cassette("alexrudall_ruby_openai/responses_non_streaming") do
+      rig = setup_otel_test_rig
+
+      client = OpenAI::Client.new(access_token: @api_key)
+      Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: rig.tracer_provider)
+
+      skip "Responses API not available" unless client.respond_to?(:responses)
+
+      response = client.responses.create(
+        parameters: {
+          model: "gpt-4o-mini",
+          input: "What is 2+2? Reply with just the number."
+        }
+      )
+
+      refute_nil response
+      refute_nil response["output"]
+
+      span = rig.drain_one
+
+      assert_equal "openai.responses.create", span.name
+
+      # Verify braintrust.input_json contains input
+      assert span.attributes.key?("braintrust.input_json")
+      input = JSON.parse(span.attributes["braintrust.input_json"])
+      assert_equal "What is 2+2? Reply with just the number.", input
+
+      # Verify braintrust.output_json contains output
+      assert span.attributes.key?("braintrust.output_json")
+      output = JSON.parse(span.attributes["braintrust.output_json"])
+      refute_nil output
+
+      # Verify braintrust.metadata contains request metadata
+      metadata = JSON.parse(span.attributes["braintrust.metadata"])
+      assert_equal "openai", metadata["provider"]
+      assert_equal "/v1/responses", metadata["endpoint"]
+      assert_equal "gpt-4o-mini", metadata["model"]
+      refute_nil metadata["id"]
+
+      # Verify braintrust.metrics contains token usage
+      metrics = JSON.parse(span.attributes["braintrust.metrics"])
+      assert metrics["prompt_tokens"] > 0
+      assert metrics["completion_tokens"] > 0
+      assert metrics["tokens"] > 0
+      assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"]
+      assert metrics.key?("time_to_first_token")
+      assert metrics["time_to_first_token"] >= 0
+    end
+  end
+
+  def test_create_handles_streaming
+    VCR.use_cassette("alexrudall_ruby_openai/responses_streaming") do
+      rig = setup_otel_test_rig
+
+      client = OpenAI::Client.new(access_token: @api_key)
+      Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: rig.tracer_provider)
+
+      skip "Responses API not available" unless client.respond_to?(:responses)
+
+      chunks = []
+      client.responses.create(
+        parameters: {
+          model: "gpt-4o-mini",
+          input: "Count from 1 to 3",
+          stream: proc do |chunk, _event|
+            chunks << chunk
+          end
+        }
+      )
+
+      refute_empty chunks
+
+      span = rig.drain_one
+
+      assert_equal "openai.responses.create", span.name
+
+      # Verify braintrust.input_json contains input
+      assert span.attributes.key?("braintrust.input_json")
+      input = JSON.parse(span.attributes["braintrust.input_json"])
+      assert_equal "Count from 1 to 3", input
+
+      # Verify braintrust.output_json contains aggregated output
+      assert span.attributes.key?("braintrust.output_json")
+      output = JSON.parse(span.attributes["braintrust.output_json"])
+      refute_nil output
+
+      # Verify braintrust.metadata contains request metadata with stream flag
+      metadata = JSON.parse(span.attributes["braintrust.metadata"])
+      assert_equal "openai", metadata["provider"]
+      assert_equal "/v1/responses", metadata["endpoint"]
+      assert_equal true, metadata["stream"]
+
+      # Verify braintrust.metrics contains time_to_first_token
+      metrics = JSON.parse(span.attributes["braintrust.metrics"])
+      assert metrics.key?("time_to_first_token")
+      assert metrics["time_to_first_token"] >= 0
+    end
+  end
+
+  def test_create_captures_metadata_parameter
+    VCR.use_cassette("alexrudall_ruby_openai/responses_with_metadata") do
+      rig = setup_otel_test_rig
+
+      client = OpenAI::Client.new(access_token: @api_key)
+      Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: rig.tracer_provider)
+
+      skip "Responses API not available" unless client.respond_to?(:responses)
+
+      response = client.responses.create(
+        parameters: {
+          model: "gpt-4o-mini",
+          input: "Say hello",
+          metadata: {
+            "test_key" => "test_value",
+            "user_id" => "user_123"
+          }
+        }
+      )
+
+      refute_nil response
+
+      span = rig.drain_one
+
+      # Verify metadata parameter is captured in span metadata
+      metadata = JSON.parse(span.attributes["braintrust.metadata"])
+      assert metadata.key?("metadata"), "metadata parameter should be captured"
+      assert_equal "test_value", metadata["metadata"]["test_key"]
+      assert_equal "user_123", metadata["metadata"]["user_id"]
+    end
+  end
+
+  def test_create_records_exception_on_error
+    VCR.use_cassette("alexrudall_ruby_openai/responses_error") do
+      rig = setup_otel_test_rig
+
+      client = OpenAI::Client.new(access_token: "invalid_key")
+      Braintrust.instrument!(:ruby_openai, target: client, tracer_provider: rig.tracer_provider)
+
+      skip "Responses API not available" unless client.respond_to?(:responses)
+
+      error = assert_raises do
+        client.responses.create(
+          parameters: {
+            model: "gpt-4o-mini",
+            input: "test"
+          }
+        )
+      end
+
+      refute_nil error
+
+      span = rig.drain_one
+
+      assert_equal "openai.responses.create", span.name
+      assert_equal OpenTelemetry::Trace::Status::ERROR, span.status.code
+
+      # Verify error message is captured
+      refute_nil span.status.description
+      assert span.status.description.length > 0
+
+      # Verify exception event was recorded
+      assert span.events.any? { |event| event.name == "exception" }
+    end
+  end
+end
diff --git a/test/braintrust/contrib/ruby_openai/instrumentation_test.rb b/test/braintrust/contrib/ruby_openai/instrumentation_test.rb
new file mode 100644
index 0000000..a2469c6
--- /dev/null
+++ b/test/braintrust/contrib/ruby_openai/instrumentation_test.rb
@@ -0,0 +1,52 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require_relative "integration_helper"
+
+# Explicitly load the patcher (lazy-loaded by integration)
+require "braintrust/contrib/ruby_openai/patcher"
+
+# Tests for instance-level instrumentation behavior
+class Braintrust::Contrib::RubyOpenAI::InstrumentationTest < Minitest::Test
+  include Braintrust::Contrib::RubyOpenAI::IntegrationHelper
+
+  def setup
+    skip_unless_ruby_openai!
+  end
+
+  # --- Instance-level instrumentation ---
+
+  def test_instance_instrumentation_only_patches_target_client
+    # Skip if class-level patching already occurred from another test
+    skip "class already patched by another test" if Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched?
+
+    rig = setup_otel_test_rig
+
+    # Create two clients BEFORE any patching
+    client_traced = OpenAI::Client.new(access_token: "test-key")
+    client_untraced = OpenAI::Client.new(access_token: "test-key")
+
+    # Verify neither client is patched initially
+    refute Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched?(target: client_traced),
+      "client_traced should not be patched initially"
+    refute Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched?(target: client_untraced),
+      "client_untraced should not be patched initially"
+
+    # Verify class is not patched
+    refute Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched?,
+      "class should not be patched initially"
+
+    # Instrument only one client (instance-level)
+    Braintrust.instrument!(:ruby_openai, target: client_traced, tracer_provider: rig.tracer_provider)
+
+    # Only the traced client should be patched
+    assert Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched?(target: client_traced),
+      "client_traced should be patched after instrument!"
+    refute Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched?(target: client_untraced),
+      "client_untraced should NOT be patched after instrument!"
+
+    # Class itself should NOT be patched (only the instance)
+    refute Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched?,
+      "class should NOT be patched when using instance-level instrumentation"
+  end
+end
diff --git a/test/braintrust/contrib/ruby_openai/integration_helper.rb b/test/braintrust/contrib/ruby_openai/integration_helper.rb
new file mode 100644
index 0000000..c9d44ad
--- /dev/null
+++ b/test/braintrust/contrib/ruby_openai/integration_helper.rb
@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+
+# Test helpers for ruby-openai integration tests.
+# Provides gem loading helpers that handle the ruby-openai gem vs official openai disambiguation.
+
+module Braintrust
+  module Contrib
+    module RubyOpenAI
+      module IntegrationHelper
+        # Skip test unless ruby-openai gem is available (not official openai).
+        # Loads the gem if available.
+        def skip_unless_ruby_openai!
+          if Gem.loaded_specs["openai"]
+            skip "ruby-openai gem not available (found official openai gem instead)"
+          end
+
+          unless Gem.loaded_specs["ruby-openai"]
+            skip "ruby-openai gem not available"
+          end
+
+          require "openai" unless defined?(::OpenAI)
+        end
+
+        # Load ruby-openai gem if available (doesn't skip, for tests that handle both states).
+        def load_ruby_openai_if_available
+          if Gem.loaded_specs["ruby-openai"] && !Gem.loaded_specs["openai"]
+            require "openai" unless defined?(::OpenAI)
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/test/braintrust/contrib/ruby_openai/integration_test.rb b/test/braintrust/contrib/ruby_openai/integration_test.rb
new file mode 100644
index 0000000..489347d
--- /dev/null
+++ b/test/braintrust/contrib/ruby_openai/integration_test.rb
@@ -0,0 +1,65 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require_relative "integration_helper"
+
+class Braintrust::Contrib::RubyOpenAI::IntegrationTest < Minitest::Test
+  include Braintrust::Contrib::RubyOpenAI::IntegrationHelper
+
+  def setup
+    load_ruby_openai_if_available
+    @integration = Braintrust::Contrib::RubyOpenAI::Integration
+  end
+
+  # --- .integration_name ---
+
+  def test_integration_name
+    assert_equal :ruby_openai, @integration.integration_name
+  end
+
+  # --- .gem_names ---
+
+  def test_gem_names
+    assert_equal ["ruby-openai"], @integration.gem_names
+  end
+
+  # --- .require_paths ---
+
+  def test_require_paths
+    assert_equal ["openai"], @integration.require_paths
+  end
+
+  # --- .minimum_version ---
+
+  def test_minimum_version
+    assert_equal "7.0.0", @integration.minimum_version
+  end
+
+  # --- .loaded? ---
+
+  def test_loaded_returns_true_when_openai_client_defined_without_internal
+    skip "Official OpenAI gem is loaded (has ::OpenAI::Internal)" if defined?(::OpenAI::Internal)
+    skip "OpenAI module not defined" unless defined?(::OpenAI::Client)
+
+    assert @integration.loaded?, "Should be loaded when ::OpenAI::Client is defined without ::OpenAI::Internal"
+  end
+
+  def test_loaded_returns_false_when_openai_internal_defined
+    skip "Official OpenAI gem not loaded" unless defined?(::OpenAI::Internal)
+
+    refute @integration.loaded?, "Should not be loaded when ::OpenAI::Internal is defined"
+  end
+
+  # --- .patchers ---
+
+  def test_patchers_returns_array_of_patcher_classes
+    patcher_classes = @integration.patchers
+
+    assert_instance_of Array, patcher_classes
+    assert patcher_classes.length > 0, "patchers should return at least one patcher"
+    patcher_classes.each do |patcher_class|
+      assert patcher_class.is_a?(Class), "each patcher should be a Class"
+      assert patcher_class < Braintrust::Contrib::Patcher, "each patcher should inherit from Patcher"
+    end
+  end
+end
diff --git a/test/braintrust/contrib/ruby_openai/openai_test.rb b/test/braintrust/contrib/ruby_openai/openai_test.rb
new file mode 100644
index 0000000..b147db8
--- /dev/null
+++ b/test/braintrust/contrib/ruby_openai/openai_test.rb
@@ -0,0 +1,92 @@
+# frozen_string_literal: true
+
+# require "test_helper"
+
+# # Load the openai gem to define OpenAI::Client (and OpenAI::Internal if official gem)
+# # This must happen before tests run so the gem detection logic works
+# begin
+#   require "openai"
+# rescue LoadError
+#   # Gem not available in this appraisal
+# end
+
+# This test verifies that the RubyOpenAI integration correctly identifies when the
+# `ruby-openai` gem is loaded vs the official `openai` gem.
+#
+# The test is designed to work with different appraisals:
+#   bundle exec appraisal ruby-openai rake test:contrib  # ruby-openai only - tests run
+#   bundle exec appraisal openai-ruby-openai rake test:contrib  # Both gems - tests skipped (ruby-openai loads)
+#   bundle exec appraisal openai rake test:contrib       # Official gem only - tests skipped
+#
+# The test verifies:
+# 1. When `ruby-openai` gem is loaded, loaded? returns true
+# 2. When `ruby-openai` gem is loaded, instrument! succeeds and patchers are applied
+
+class Braintrust::Contrib::RubyOpenAI::OpenAITest < Minitest::Test
+  Integration = Braintrust::Contrib::RubyOpenAI::Integration
+
+  # def ruby_openai_available?
+  #   # OpenAI::Internal is only defined in the official openai gem
+  #   !Gem.loaded_specs["ruby-openai"].nil?
+  # end
+
+  # def official_openai_available?
+  #   # OpenAI::Internal is only defined in the official openai gem
+  #   !Gem.loaded_specs["openai"].nil?
+  # end
+
+  def ruby_openai_loaded?
+    # Check if ruby-openai gem is loaded (not the official openai gem).
+    # Both gems use "require 'openai'", so we need to distinguish them.
+    #
+    # OpenAI::Internal is defined ONLY in the official OpenAI gem
+    (defined?(::OpenAI::Client) && !defined?(::OpenAI::Internal)) ? true : false
+  end
+
+  # --- .loaded? ---
+
+  def test_loaded_returns_true_for_ruby_openai_gem
+    skip "ruby-openai gem not loaded" unless ruby_openai_loaded?
+
+    assert Integration.loaded?,
+      "loaded? should return true when ruby-openai gem is loaded"
+  end
+
+  def test_loaded_returns_false_when_ruby_openai_gem_not_loaded
+    skip "ruby-openai gem is loaded" if ruby_openai_loaded?
+
+    refute Integration.loaded?,
+      "loaded? should return false when ruby-openai gem is not loaded"
+  end
+
+  # --- Braintrust.instrument! ---
+
+  def test_instrument_succeeds_for_ruby_openai_gem
+    skip "ruby-openai gem not loaded" unless ruby_openai_loaded?
+
+    result = Braintrust.instrument!(:ruby_openai)
+
+    assert result, "instrument! should return truthy for ruby-openai gem"
+
+    any_patched = Integration.patchers.any?(&:patched?)
+    assert any_patched, "at least one patcher should be patched for ruby-openai gem"
+  end
+
+  # --- OpenAI::Internal ---
+
+  def test_openai_internal_not_defined_for_ruby_openai_gem
+    skip "ruby-openai gem not loaded" unless ruby_openai_loaded?
+
+    refute defined?(::OpenAI::Internal),
+      "OpenAI::Internal should not be defined when ruby-openai gem is loaded"
+  end
+
+  # --- .available? ---
+
+  def test_not_available_when_official_openai_gem_loaded
+    skip "ruby-openai gem is available" if Gem.loaded_specs["ruby-openai"]
+    skip "Official openai gem not loaded" unless Gem.loaded_specs["openai"]
+
+    refute Integration.available?, "Should NOT be available when only official openai gem is loaded"
+  end
+end
diff --git a/test/braintrust/contrib/ruby_openai/patcher_test.rb b/test/braintrust/contrib/ruby_openai/patcher_test.rb
new file mode 100644
index 0000000..5566682
--- /dev/null
+++ b/test/braintrust/contrib/ruby_openai/patcher_test.rb
@@ -0,0 +1,206 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require_relative "integration_helper"
+
+# Explicitly load the patcher (lazy-loaded by integration)
+require "braintrust/contrib/ruby_openai/patcher"
+
+class Braintrust::Contrib::RubyOpenAI::ChatPatcherTest < Minitest::Test
+  include Braintrust::Contrib::RubyOpenAI::IntegrationHelper
+
+  def setup
+    skip_unless_ruby_openai!
+  end
+
+  # --- .applicable? ---
+
+  def test_applicable_returns_true_when_openai_client_defined
+    assert Braintrust::Contrib::RubyOpenAI::ChatPatcher.applicable?
+  end
+
+  # --- .patched? ---
+
+  def test_patched_returns_false_when_not_patched
+    fake_client_class = Class.new
+
+    OpenAI.stub_const(:Client, fake_client_class) do
+      refute Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched?
+    end
+  end
+
+  def test_patched_returns_true_when_module_included
+    fake_client_class = Class.new do
+      include Braintrust::Contrib::RubyOpenAI::Instrumentation::Chat
+    end
+
+    OpenAI.stub_const(:Client, fake_client_class) do
+      assert Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched?
+    end
+  end
+
+  def test_patched_returns_false_for_unpatched_instance
+    fake_singleton = Class.new
+
+    mock_chain(:singleton_class, returns: fake_singleton) do |client|
+      refute Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched?(target: client)
+    end
+  end
+
+  def test_patched_returns_true_for_patched_instance
+    fake_singleton = Class.new do
+      include Braintrust::Contrib::RubyOpenAI::Instrumentation::Chat
+    end
+
+    mock_chain(:singleton_class, returns: fake_singleton) do |client|
+      assert Braintrust::Contrib::RubyOpenAI::ChatPatcher.patched?(target: client)
+    end
+  end
+
+  # --- .perform_patch ---
+
+  def test_perform_patch_includes_module_for_class_level
+    fake_client_class = Minitest::Mock.new
+    fake_client_class.expect(:include, true, [Braintrust::Contrib::RubyOpenAI::Instrumentation::Chat])
+
+    OpenAI.stub_const(:Client, fake_client_class) do
+      Braintrust::Contrib::RubyOpenAI::ChatPatcher.perform_patch
+      fake_client_class.verify
+    end
+  end
+
+  def test_perform_patch_includes_module_for_instance_level
+    terminal = Minitest::Mock.new
+    terminal.expect(:include, true, [Braintrust::Contrib::RubyOpenAI::Instrumentation::Chat])
+
+    mock_chain(:singleton_class, returns: terminal) do |client|
+      client.expect(:is_a?, true, [::OpenAI::Client])
+      Braintrust::Contrib::RubyOpenAI::ChatPatcher.perform_patch(target: client)
+    end
+    terminal.verify
+  end
+
+  def test_perform_patch_raises_for_invalid_target
+    fake_client = Minitest::Mock.new
+    fake_client.expect(:is_a?, false, [::OpenAI::Client])
+
+    assert_raises(ArgumentError) do
+      Braintrust::Contrib::RubyOpenAI::ChatPatcher.perform_patch(target: fake_client)
+    end
+
+    fake_client.verify
+  end
+end
+
+class Braintrust::Contrib::RubyOpenAI::ResponsesPatcherTest < Minitest::Test
+  include Braintrust::Contrib::RubyOpenAI::IntegrationHelper
+
+  def setup
+    skip_unless_ruby_openai!
+  end
+
+  # --- .applicable? ---
+
+  def test_applicable_returns_true_when_responses_method_exists
+    skip "Responses API not available" unless OpenAI::Client.method_defined?(:responses)
+
+    assert Braintrust::Contrib::RubyOpenAI::ResponsesPatcher.applicable?
+  end
+
+  def test_applicable_returns_false_when_responses_method_missing
+    fake_client_class = Class.new
+
+    OpenAI.stub_const(:Client, fake_client_class) do
+      refute Braintrust::Contrib::RubyOpenAI::ResponsesPatcher.applicable?
+    end
+  end
+
+  # --- .patched? ---
+
+  def test_patched_returns_false_when_not_patched
+    skip "Responses API not available" unless OpenAI::Client.method_defined?(:responses)
+    skip "OpenAI::Responses not defined" unless defined?(::OpenAI::Responses)
+
+    fake_responses_class = Class.new
+
+    OpenAI.stub_const(:Responses, fake_responses_class) do
+      refute Braintrust::Contrib::RubyOpenAI::ResponsesPatcher.patched?
+    end
+  end
+
+  def test_patched_returns_true_when_module_included
+    skip "Responses API not available" unless OpenAI::Client.method_defined?(:responses)
+    skip "OpenAI::Responses not defined" unless defined?(::OpenAI::Responses)
+
+    fake_responses_class = Class.new do
+      include Braintrust::Contrib::RubyOpenAI::Instrumentation::Responses
+    end
+
+    OpenAI.stub_const(:Responses, fake_responses_class) do
+      assert Braintrust::Contrib::RubyOpenAI::ResponsesPatcher.patched?
+    end
+  end
+
+  def test_patched_returns_false_for_unpatched_instance
+    skip "Responses API not available" unless OpenAI::Client.method_defined?(:responses)
+
+    fake_singleton = Class.new
+
+    mock_chain(:responses, :singleton_class, returns: fake_singleton) do |client|
+      refute Braintrust::Contrib::RubyOpenAI::ResponsesPatcher.patched?(target: client)
+    end
+  end
+
+  def test_patched_returns_true_for_patched_instance
+    skip "Responses API not available" unless OpenAI::Client.method_defined?(:responses)
+
+    fake_singleton = Class.new do
+      include Braintrust::Contrib::RubyOpenAI::Instrumentation::Responses
+    end
+
+    mock_chain(:responses, :singleton_class, returns: fake_singleton) do |client|
+      assert Braintrust::Contrib::RubyOpenAI::ResponsesPatcher.patched?(target: client)
+    end
+  end
+
+  # --- .perform_patch ---
+
+  def test_perform_patch_includes_module_for_class_level
+    skip "Responses API not available" unless OpenAI::Client.method_defined?(:responses)
+    skip "OpenAI::Responses not defined" unless defined?(::OpenAI::Responses)
+
+    fake_responses_class = Minitest::Mock.new
+    fake_responses_class.expect(:include, true, [Braintrust::Contrib::RubyOpenAI::Instrumentation::Responses])
+
+    OpenAI.stub_const(:Responses, fake_responses_class) do
+      Braintrust::Contrib::RubyOpenAI::ResponsesPatcher.perform_patch
+      fake_responses_class.verify
+    end
+  end
+
+  def test_perform_patch_includes_module_for_instance_level
+    skip "Responses API not available" unless OpenAI::Client.method_defined?(:responses)
+
+    terminal = Minitest::Mock.new
+    terminal.expect(:include, true, [Braintrust::Contrib::RubyOpenAI::Instrumentation::Responses])
+
+    mock_chain(:responses, :singleton_class, returns: terminal) do |client|
+      client.expect(:is_a?, true, [::OpenAI::Client])
+      Braintrust::Contrib::RubyOpenAI::ResponsesPatcher.perform_patch(target: client)
+    end
+    terminal.verify
+  end
+
+  def test_perform_patch_raises_for_invalid_target
+    skip "Responses API not available" unless OpenAI::Client.method_defined?(:responses)
+
+    fake_client = Minitest::Mock.new
+    fake_client.expect(:is_a?, false, [::OpenAI::Client])
+
+    assert_raises(ArgumentError) do
+      Braintrust::Contrib::RubyOpenAI::ResponsesPatcher.perform_patch(target: fake_client)
+    end
+
+    fake_client.verify
+  end
+end
diff --git a/test/braintrust/trace/contrib/alexrudall_ruby_openai_test.rb b/test/braintrust/trace/contrib/alexrudall_ruby_openai_test.rb
deleted file mode 100644
index c736c12..0000000
--- a/test/braintrust/trace/contrib/alexrudall_ruby_openai_test.rb
+++ /dev/null
@@ -1,620 +0,0 @@
-# frozen_string_literal: true
-
-require "test_helper"
-require_relative "../../../../lib/braintrust/trace/contrib/github.com/alexrudall/ruby-openai/ruby-openai"
-
-class Braintrust::Trace::AlexRudall::RubyOpenAITest < Minitest::Test
-  def setup
-    # Skip all ruby-openai tests if the gem is not available
-    # Note: ruby-openai gem is required as "openai" (same as the openai gem)
-    # We detect ruby-openai by checking the gem name in loaded specs
-    begin
-      require "openai"
-
-      # Check which gem is loaded by looking at Gem.loaded_specs
-      # ruby-openai has gem name "ruby-openai"
-      # official openai gem has gem name "openai"
-      if Gem.loaded_specs["ruby-openai"]
-        @using_ruby_openai = true
-      elsif Gem.loaded_specs["openai"]
-        skip "ruby-openai gem not available (found openai gem instead)"
-      else
-        skip "Could not determine which OpenAI gem is loaded"
-      end
-
-      @gem_available = true
-    rescue LoadError
-      @gem_available = false
-      skip "ruby-openai gem not available"
-    end
-
-    @api_key = ENV["OPENAI_API_KEY"] || "test-api-key"
-    @original_api_key = ENV["OPENAI_API_KEY"]
-  end
-
-  def teardown
-    if @original_api_key
-      ENV["OPENAI_API_KEY"] = @original_api_key
-    else
-      ENV.delete("OPENAI_API_KEY")
-    end
-  end
-
-  def test_wrap_creates_span_for_chat_completions
-    VCR.use_cassette("alexrudall_ruby_openai/chat_completions") do
-      require "openai"
-
-      # Set up test rig (includes Braintrust processor)
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client using ruby-openai's API and wrap it with Braintrust tracing
-      client = OpenAI::Client.new(access_token: @api_key)
-      Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Make a simple chat completion request with additional params to test metadata capture
-      # ruby-openai uses: client.chat(parameters: {...})
-      response = client.chat(
-        parameters: {
-          model: "gpt-4o-mini",
-          messages: [
-            {role: "system", content: "You are a test assistant."},
-            {role: "user", content: "Say 'test'"}
-          ],
-          max_tokens: 10,
-          temperature: 0.5
-        }
-      )
-
-      # Verify response
-      refute_nil response
-      refute_nil response.dig("choices", 0, "message", "content")
-
-      # Drain and verify span
-      span = rig.drain_one
-
-      # Verify span name matches pattern
-      assert_equal "Chat Completion", span.name
-
-      # Verify braintrust.input_json contains messages
-      assert span.attributes.key?("braintrust.input_json")
-      input = JSON.parse(span.attributes["braintrust.input_json"])
-      assert_equal 2, input.length
-      assert_equal "system", input[0]["role"]
-      assert_equal "You are a test assistant.", input[0]["content"]
-      assert_equal "user", input[1]["role"]
-      assert_equal "Say 'test'", input[1]["content"]
-
-      # Verify braintrust.output_json contains choices
-      assert span.attributes.key?("braintrust.output_json")
-      output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length
-      assert_equal 0, output[0]["index"]
-      assert_equal "assistant", output[0]["message"]["role"]
-      refute_nil output[0]["message"]["content"]
-      refute_nil output[0]["finish_reason"]
-
-      # Verify braintrust.metadata contains request and response metadata
-      assert span.attributes.key?("braintrust.metadata")
-      metadata = JSON.parse(span.attributes["braintrust.metadata"])
-      assert_equal "openai", metadata["provider"]
-      assert_equal "/v1/chat/completions", metadata["endpoint"]
-      assert_match(/\Agpt-4o-mini/, metadata["model"])
-      assert_equal 10, metadata["max_tokens"]
-      assert_equal 0.5, metadata["temperature"]
-      refute_nil metadata["id"]
-      refute_nil metadata["created"]
-
-      # Verify braintrust.metrics contains token usage
-      assert span.attributes.key?("braintrust.metrics")
-      metrics = JSON.parse(span.attributes["braintrust.metrics"])
-      assert metrics["prompt_tokens"] > 0
-      assert metrics["completion_tokens"] > 0
-      assert metrics["tokens"] > 0
-      assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"]
-
-      # Verify time_to_first_token metric is present
-      assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric"
-      assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be >= 0"
-    end
-  end
-
-  def test_wrap_handles_tool_calls
-    VCR.use_cassette("alexrudall_ruby_openai/tool_calls") do
-      require "openai"
-
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it
-      client = OpenAI::Client.new(access_token: @api_key)
-      Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Make a request that will trigger a tool call
-      tools = [
-        {
-          type: "function",
-          function: {
-            name: "get_weather",
-            description: "Get the current weather",
-            parameters: {
-              type: "object",
-              properties: {
-                location: {type: "string", description: "City name"}
-              },
-              required: ["location"]
-            }
-          }
-        }
-      ]
-
-      response = client.chat(
-        parameters: {
-          model: "gpt-4o-mini",
-          messages: [
-            {role: "user", content: "What's the weather in Paris?"}
-          ],
-          tools: tools,
-          max_tokens: 100
-        }
-      )
-
-      # Verify response
-      refute_nil response
-
-      # Drain and verify span
-      span = rig.drain_one
-
-      # Verify span name
-      assert_equal "Chat Completion", span.name
-
-      # Verify input was captured
-      assert span.attributes.key?("braintrust.input_json")
-      input = JSON.parse(span.attributes["braintrust.input_json"])
-      assert_equal 1, input.length
-      assert_equal "user", input[0]["role"]
-
-      # Verify output contains tool calls
-      assert span.attributes.key?("braintrust.output_json")
-      output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length
-
-      # Verify metadata includes tools
-      assert span.attributes.key?("braintrust.metadata")
-      metadata = JSON.parse(span.attributes["braintrust.metadata"])
-      assert_equal "openai", metadata["provider"]
-      refute_nil metadata["tools"]
-    end
-  end
-
-  def test_wrap_handles_multi_turn_tool_calls_with_tool_call_id
-    VCR.use_cassette("alexrudall_ruby_openai/multi_turn_tools") do
-      require "openai"
-
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it
-      client = OpenAI::Client.new(access_token: @api_key)
-      Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      tools = [
-        {
-          type: "function",
-          function: {
-            name: "calculate",
-            description: "Perform a calculation",
-            parameters: {
-              type: "object",
-              properties: {
-                operation: {type: "string"},
-                a: {type: "number"},
-                b: {type: "number"}
-              }
-            }
-          }
-        }
-      ]
-
-      # First request - model calls tool
-      first_response = client.chat(
-        parameters: {
-          model: "gpt-4o-mini",
-          messages: [
-            {role: "user", content: "What is 127 multiplied by 49?"}
-          ],
-          tools: tools,
-          max_tokens: 100
-        }
-      )
-
-      tool_call = first_response.dig("choices", 0, "message", "tool_calls", 0)
-      skip "Model didn't call tool" unless tool_call
-
-      # Second request - provide tool result with tool_call_id
-      second_response = client.chat(
-        parameters: {
-          model: "gpt-4o-mini",
-          messages: [
-            {role: "user", content: "What is 127 multiplied by 49?"},
-            first_response.dig("choices", 0, "message"),  # Assistant message with tool_calls
-            {
-              role: "tool",
-              tool_call_id: tool_call["id"],
-              content: "6223"
-            }
-          ],
-          tools: tools,
-          max_tokens: 100
-        }
-      )
-
-      # Verify response
-      refute_nil second_response
-
-      # Drain spans (we have 2: first request + second request)
-      spans = rig.drain
-      assert_equal 2, spans.length
-
-      # Verify second span contains tool_call_id in input
-      span = spans[1]
-      assert span.attributes.key?("braintrust.input_json")
-      input = JSON.parse(span.attributes["braintrust.input_json"])
-      assert_equal 3, input.length
-
-      # Third message should be tool response with tool_call_id
-      assert_equal "tool", input[2]["role"]
-      assert_equal tool_call["id"], input[2]["tool_call_id"]
-      assert_equal "6223", input[2]["content"]
-    end
-  end
-
-  def test_wrap_handles_streaming_chat_completions
-    VCR.use_cassette("alexrudall_ruby_openai/streaming") do
-      require "openai"
-
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it
-      client = OpenAI::Client.new(access_token: @api_key)
-      Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Make a streaming request with stream_options to get usage metrics
-      full_content = ""
-      client.chat(
-        parameters: {
-          model: "gpt-4o-mini",
-          messages: [
-            {role: "user", content: "Count from 1 to 3"}
-          ],
-          max_tokens: 50,
-          stream_options: {include_usage: true},
-          stream: proc do |chunk, _bytesize|
-            delta_content = chunk.dig("choices", 0, "delta", "content")
-            full_content += delta_content if delta_content
-          end
-        }
-      )
-
-      # Verify we got content
-      refute_empty full_content
-
-      # Drain and verify span
-      span = rig.drain_one
-
-      # Verify span name
-      assert_equal "Chat Completion", span.name
-
-      # Verify input was captured
-      assert span.attributes.key?("braintrust.input_json")
-      input = JSON.parse(span.attributes["braintrust.input_json"])
-      assert_equal 1, input.length
-      assert_equal "user", input[0]["role"]
-
-      # Verify output was captured and aggregated
-      assert span.attributes.key?("braintrust.output_json"), "Should have braintrust.output_json"
-      output = JSON.parse(span.attributes["braintrust.output_json"])
-      assert_equal 1, output.length
-      assert_equal 0, output[0]["index"]
-      assert_equal "assistant", output[0]["message"]["role"]
-      refute_nil output[0]["message"]["content"]
-      refute_empty output[0]["message"]["content"]
-
-      # Verify metadata includes stream flag
-      assert span.attributes.key?("braintrust.metadata")
-      metadata = JSON.parse(span.attributes["braintrust.metadata"])
-      assert_equal "openai", metadata["provider"]
-      assert_equal true, metadata["stream"]
-
-      # Verify metrics include time_to_first_token and usage tokens
-      assert span.attributes.key?("braintrust.metrics"), "Should have braintrust.metrics"
-      metrics = JSON.parse(span.attributes["braintrust.metrics"])
-      assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric"
-      assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be >= 0"
-
-      # Verify usage metrics are present (when stream_options.include_usage is set)
-      assert metrics.key?("prompt_tokens"), "Should have prompt_tokens metric"
-      assert metrics["prompt_tokens"] > 0, "prompt_tokens should be > 0"
-      assert metrics.key?("completion_tokens"), "Should have completion_tokens metric"
-      assert metrics["completion_tokens"] > 0, "completion_tokens should be > 0"
-      assert metrics.key?("tokens"), "Should have tokens metric"
-      assert metrics["tokens"] > 0, "tokens should be > 0"
-      assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"]
-    end
-  end
-
-  def test_wrap_handles_embeddings
-    VCR.use_cassette("alexrudall_ruby_openai/embeddings") do
-      require "openai"
-
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it
-      client = OpenAI::Client.new(access_token: @api_key)
-      Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Make an embeddings request
-      response = client.embeddings(
-        parameters: {
-          model: "text-embedding-3-small",
-          input: "The quick brown fox"
-        }
-      )
-
-      # Verify response
-      refute_nil response
-      refute_nil response.dig("data", 0, "embedding")
-
-      # Embeddings are not traced yet (no wrapper implemented)
-      # So this should not create a span for embeddings
-      # Only verify the request succeeded
-      assert response.dig("data", 0, "embedding").length > 0
-    end
-  end
-
-  def test_wrap_handles_completions
-    VCR.use_cassette("alexrudall_ruby_openai/completions") do
-      require "openai"
-
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it
-      client = OpenAI::Client.new(access_token: @api_key)
-      Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Make a completions request
-      response = client.completions(
-        parameters: {
-          model: "gpt-3.5-turbo-instruct",
-          prompt: "Say hello:",
-          max_tokens: 10
-        }
-      )
-
-      # Verify response
-      refute_nil response
-      refute_nil response.dig("choices", 0, "text")
-
-      # Completions are not traced yet (no wrapper implemented)
-      # Only verify the request succeeded
-      refute_empty response.dig("choices", 0, "text")
-    end
-  end
-
-  def test_wrap_records_exception_for_errors
-    VCR.use_cassette("alexrudall_ruby_openai/error") do
-      require "openai"
-
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client with invalid API key
-      client = OpenAI::Client.new(access_token: "invalid_key")
-      Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Make a request that will fail
-      error = assert_raises do
-        client.chat(
-          parameters: {
-            model: "gpt-4o-mini",
-            messages: [
-              {role: "user", content: "test"}
-            ]
-          }
-        )
-      end
-
-      # Verify an error was raised
-      refute_nil error
-
-      # Drain and verify span was created with error information
-      span = rig.drain_one
-
-      # Verify span name
-      assert_equal "Chat Completion", span.name
-
-      # Verify span status indicates an error
-      assert_equal OpenTelemetry::Trace::Status::ERROR, span.status.code
-
-      # Verify error message is captured
-      refute_nil span.status.description
-      assert span.status.description.length > 0
-
-      # Verify exception event was recorded
-      assert span.events.any? { |event| event.name == "exception" }
-    end
-  end
-
-  def test_wrap_responses_create_non_streaming
-    VCR.use_cassette("alexrudall_ruby_openai/responses_non_streaming") do
-      require "openai"
-
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it
-      client = OpenAI::Client.new(access_token: @api_key)
-      Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Skip if responses API not available
-      skip "Responses API not available in this ruby-openai gem version" unless client.respond_to?(:responses)
-
-      # Make a non-streaming responses request
-      # ruby-openai 8.x uses: client.responses.create(parameters: {...})
-      response = client.responses.create(
-        parameters: {
-          model: "gpt-4o-mini",
-          input: "What is 2+2? Reply with just the number."
-        }
-      )
-
-      # Verify response
-      refute_nil response
-      refute_nil response["output"]
-
-      # Drain and verify span
-      span = rig.drain_one
-
-      # Verify span name
-      assert_equal "openai.responses.create", span.name
-
-      # Verify braintrust.input_json contains input
-      assert span.attributes.key?("braintrust.input_json")
-      input = JSON.parse(span.attributes["braintrust.input_json"])
-      assert_equal "What is 2+2? Reply with just the number.", input
-
-      # Verify braintrust.output_json contains output
-      assert span.attributes.key?("braintrust.output_json")
-      output = JSON.parse(span.attributes["braintrust.output_json"])
-      refute_nil output
-
-      # Verify braintrust.metadata contains request metadata
-      assert span.attributes.key?("braintrust.metadata")
-      metadata = JSON.parse(span.attributes["braintrust.metadata"])
-      assert_equal "openai", metadata["provider"]
-      assert_equal "/v1/responses", metadata["endpoint"]
-      assert_equal "gpt-4o-mini", metadata["model"]
-      refute_nil metadata["id"]
-
-      # Verify braintrust.metrics contains token usage
-      assert span.attributes.key?("braintrust.metrics")
-      metrics = JSON.parse(span.attributes["braintrust.metrics"])
-      assert metrics["prompt_tokens"] > 0
-      assert metrics["completion_tokens"] > 0
-      assert metrics["tokens"] > 0
-      assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"]
-
-      # Verify time_to_first_token metric is present
-      assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric"
-      assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be >= 0"
-    end
-  end
-
-  def test_wrap_responses_create_streaming
-    VCR.use_cassette("alexrudall_ruby_openai/responses_streaming") do
-      require "openai"
-
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client and wrap it
-      client = OpenAI::Client.new(access_token: @api_key)
-      Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Skip if responses API not available
-      skip "Responses API not available in this ruby-openai gem version" unless client.respond_to?(:responses)
-
-      # Make a streaming responses request
-      # ruby-openai 8.x uses client.responses.create with stream: proc
-      chunks = []
-      client.responses.create(
-        parameters: {
-          model: "gpt-4o-mini",
-          input: "Count from 1 to 3",
-          stream: proc do |chunk, _event|
-            chunks << chunk
-          end
-        }
-      )
-
-      # Verify we got chunks
-      refute_empty chunks, "Should have received streaming chunks"
-
-      # Drain and verify span
-      span = rig.drain_one
-
-      # Verify span name
-      assert_equal "openai.responses.create", span.name
-
-      # Verify braintrust.input_json contains input
-      assert span.attributes.key?("braintrust.input_json")
-      input = JSON.parse(span.attributes["braintrust.input_json"])
-      assert_equal "Count from 1 to 3", input
-
-      # Verify braintrust.output_json contains aggregated output
-      assert span.attributes.key?("braintrust.output_json"), "Missing braintrust.output_json"
-      output = JSON.parse(span.attributes["braintrust.output_json"])
-      refute_nil output
-
-      # Verify braintrust.metadata contains request metadata with stream flag
-      assert span.attributes.key?("braintrust.metadata")
-      metadata = JSON.parse(span.attributes["braintrust.metadata"])
-      assert_equal "openai", metadata["provider"]
-      assert_equal "/v1/responses", metadata["endpoint"]
-      assert_equal true, metadata["stream"]
-
-      # Verify braintrust.metrics contains time_to_first_token
-      assert span.attributes.key?("braintrust.metrics")
-      metrics = JSON.parse(span.attributes["braintrust.metrics"])
-      assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric"
-      assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be >= 0"
-    end
-  end
-
-  def test_wrap_responses_records_exception_for_errors
-    VCR.use_cassette("alexrudall_ruby_openai/responses_error") do
-      require "openai"
-
-      # Set up test rig
-      rig = setup_otel_test_rig
-
-      # Create OpenAI client with invalid API key
-      client = OpenAI::Client.new(access_token: "invalid_key")
-      Braintrust::Trace::AlexRudall::RubyOpenAI.wrap(client, tracer_provider: rig.tracer_provider)
-
-      # Skip if responses API not available
-      skip "Responses API not available in this ruby-openai gem version" unless client.respond_to?(:responses)
-
-      # Make a request that will fail
-      error = assert_raises do
-        client.responses.create(
-          parameters: {
-            model: "gpt-4o-mini",
-            input: "test"
-          }
-        )
-      end
-
-      # Verify an error was raised
-      refute_nil error
-
-      # Drain and verify span was created with error information
-      span = rig.drain_one
-
-      # Verify span name
-      assert_equal "openai.responses.create", span.name
-
-      # Verify span status indicates an error
-      assert_equal OpenTelemetry::Trace::Status::ERROR, span.status.code
-
-      # Verify error message is captured
-      refute_nil span.status.description
-      assert span.status.description.length > 0
-
-      # Verify exception event was recorded
-      assert span.events.any? { |event| event.name == "exception" }
-    end
-  end
-end
diff --git a/test/fixtures/vcr_cassettes/alexrudall_ruby_openai/responses_with_metadata.yml b/test/fixtures/vcr_cassettes/alexrudall_ruby_openai/responses_with_metadata.yml
new file mode 100644
index 0000000..79ff38d
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/alexrudall_ruby_openai/responses_with_metadata.yml
@@ -0,0 +1,151 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/responses
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4o-mini","input":"Say hello","metadata":{"test_key":"test_value","user_id":"user_123"}}'
+    headers:
+      Content-Type:
+      - application/json
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Sat, 27 Dec 2025 03:59:59 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      X-Ratelimit-Limit-Requests:
+      - '30000'
+      X-Ratelimit-Limit-Tokens:
+      - '150000000'
+      X-Ratelimit-Remaining-Requests:
+      - '29999'
+      X-Ratelimit-Remaining-Tokens:
+      - '149999972'
+      X-Ratelimit-Reset-Requests:
+      - 2ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      Openai-Version:
+      - '2020-10-01'
+      Openai-Organization:
+      - braintrust-data
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      X-Request-Id:
+      - req_371f05e8dd259faeaf7d6de7207f36b4
+      Openai-Processing-Ms:
+      - '2566'
+      X-Envoy-Upstream-Service-Time:
+      - '2570'
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=W3oofKUUmIA0dWOD3ZuVkOhOJxKZ5YyM4F2ntLlkxAE-1766807999-1.0.1.1-UTg_ztOFr0MwhTvC4mztsGvA28HStEtGampvNLxcHfj4vWNVeryxI7Ef_dlZXkInHIgsW9vjlEUU7zA06k59B_qHuO_L0ykjU0WKrE1z7RM;
+        path=/; expires=Sat, 27-Dec-25 04:29:59 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=_LPwVwPzQE7iEbOQOsGKJvu4vI.xP_G338aY9YYAqa8-1766807999838-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9b45e87c58691477-ORD
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |-
+        {
+          "id": "resp_07bbee4b8f5626d900694f59bd3fd88196a83738018d7106b8",
+          "object": "response",
+          "created_at": 1766807997,
+          "status": "completed",
+          "background": false,
+          "billing": {
+            "payer": "developer"
+          },
+          "completed_at": 1766807999,
+          "error": null,
+          "incomplete_details": null,
+          "instructions": null,
+          "max_output_tokens": null,
+          "max_tool_calls": null,
+          "model": "gpt-4o-mini-2024-07-18",
+          "output": [
+            {
+              "id": "msg_07bbee4b8f5626d900694f59bf7d7c8196bc9d82703845a129",
+              "type": "message",
+              "status": "completed",
+              "content": [
+                {
+                  "type": "output_text",
+                  "annotations": [],
+                  "logprobs": [],
+                  "text": "Hello! How can I assist you today?"
+                }
+              ],
+              "role": "assistant"
+            }
+          ],
+          "parallel_tool_calls": true,
+          "previous_response_id": null,
+          "prompt_cache_key": null,
+          "prompt_cache_retention": null,
+          "reasoning": {
+            "effort": null,
+            "summary": null
+          },
+          "safety_identifier": null,
+          "service_tier": "default",
+          "store": true,
+          "temperature": 1.0,
+          "text": {
+            "format": {
+              "type": "text"
+            },
+            "verbosity": "medium"
+          },
+          "tool_choice": "auto",
+          "tools": [],
+          "top_logprobs": 0,
+          "top_p": 1.0,
+          "truncation": "disabled",
+          "usage": {
+            "input_tokens": 9,
+            "input_tokens_details": {
+              "cached_tokens": 0
+            },
+            "output_tokens": 10,
+            "output_tokens_details": {
+              "reasoning_tokens": 0
+            },
+            "total_tokens": 19
+          },
+          "user": null,
+          "metadata": {
+            "test_key": "test_value",
+            "user_id": "user_123"
+          }
+        }
+  recorded_at: Sat, 27 Dec 2025 03:59:59 GMT
+recorded_with: VCR 6.3.1

From 131149a22068712fe9c742c478f743ff1864f857 Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Sun, 28 Dec 2025 19:24:49 -0500
Subject: [PATCH 10/27] Changed: Migrated ruby_llm to new integration API

---
 README.md                                     |   4 +-
 Rakefile                                      |   1 +
 examples/contrib/ruby_llm/basic.rb            |  42 ++
 examples/contrib/ruby_llm/instance.rb         |  53 ++
 examples/contrib/ruby_llm/streaming.rb        |  42 ++
 examples/contrib/ruby_llm/tool_usage.rb       | 371 ++++++++++
 .../ruby_llm/golden.rb}                       |   6 +-
 examples/ruby_llm.rb                          |  63 --
 lib/braintrust/contrib.rb                     |   2 +
 lib/braintrust/contrib/ruby_llm/deprecated.rb |  45 ++
 .../contrib/ruby_llm/instrumentation/chat.rb  | 464 +++++++++++++
 .../ruby_llm/instrumentation/common.rb        |  58 ++
 .../contrib/ruby_llm/integration.rb           |  54 ++
 lib/braintrust/contrib/ruby_llm/patcher.rb    |  44 ++
 lib/braintrust/trace.rb                       |  17 -
 .../contrib/github.com/crmne/ruby_llm.rb      | 631 ------------------
 .../contrib/ruby_llm/deprecated_test.rb       | 134 ++++
 .../ruby_llm/instrumentation/chat_test.rb}    | 453 ++++++-------
 .../contrib/ruby_llm/instrumentation_test.rb  |  57 ++
 .../contrib/ruby_llm/integration_helper.rb    |  29 +
 .../contrib/ruby_llm/integration_test.rb      |  58 ++
 .../contrib/ruby_llm/patcher_test.rb          |  93 +++
 .../contrib/ruby_llm/basic_chat.yml           | 119 ++++
 .../contrib/ruby_llm/direct_complete.yml      | 119 ++++
 .../contrib/ruby_llm/streaming_chat.yml       | 109 +++
 .../contrib/ruby_llm/tool_calling.yml         | 220 ++++++
 26 files changed, 2328 insertions(+), 960 deletions(-)
 create mode 100644 examples/contrib/ruby_llm/basic.rb
 create mode 100644 examples/contrib/ruby_llm/instance.rb
 create mode 100644 examples/contrib/ruby_llm/streaming.rb
 create mode 100644 examples/contrib/ruby_llm/tool_usage.rb
 rename examples/internal/{ruby_llm.rb => contrib/ruby_llm/golden.rb} (98%)
 delete mode 100644 examples/ruby_llm.rb
 create mode 100644 lib/braintrust/contrib/ruby_llm/deprecated.rb
 create mode 100644 lib/braintrust/contrib/ruby_llm/instrumentation/chat.rb
 create mode 100644 lib/braintrust/contrib/ruby_llm/instrumentation/common.rb
 create mode 100644 lib/braintrust/contrib/ruby_llm/integration.rb
 create mode 100644 lib/braintrust/contrib/ruby_llm/patcher.rb
 delete mode 100644 lib/braintrust/trace/contrib/github.com/crmne/ruby_llm.rb
 create mode 100644 test/braintrust/contrib/ruby_llm/deprecated_test.rb
 rename test/braintrust/{trace/ruby_llm_test.rb => contrib/ruby_llm/instrumentation/chat_test.rb} (59%)
 create mode 100644 test/braintrust/contrib/ruby_llm/instrumentation_test.rb
 create mode 100644 test/braintrust/contrib/ruby_llm/integration_helper.rb
 create mode 100644 test/braintrust/contrib/ruby_llm/integration_test.rb
 create mode 100644 test/braintrust/contrib/ruby_llm/patcher_test.rb
 create mode 100644 test/fixtures/vcr_cassettes/contrib/ruby_llm/basic_chat.yml
 create mode 100644 test/fixtures/vcr_cassettes/contrib/ruby_llm/direct_complete.yml
 create mode 100644 test/fixtures/vcr_cassettes/contrib/ruby_llm/streaming_chat.yml
 create mode 100644 test/fixtures/vcr_cassettes/contrib/ruby_llm/tool_calling.yml

diff --git a/README.md b/README.md
index 13c8f91..0d71497 100644
--- a/README.md
+++ b/README.md
@@ -182,8 +182,8 @@ require "ruby_llm"
 
 Braintrust.init
 
-# Wrap RubyLLM globally (wraps all Chat instances)
-Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap
+# Instrument all RubyLLM Chat instances
+Braintrust.instrument!(:ruby_llm)
 
 tracer = OpenTelemetry.tracer_provider.tracer("ruby-llm-app")
 root_span = nil
diff --git a/Rakefile b/Rakefile
index 84f03cd..74788e8 100644
--- a/Rakefile
+++ b/Rakefile
@@ -107,6 +107,7 @@ namespace :test do
     # Tasks per integration
     [
       {name: :openai},
+      {name: :ruby_llm},
       {name: :ruby_openai, appraisal: "ruby-openai"}
     ].each do |integration|
       name = integration[:name]
diff --git a/examples/contrib/ruby_llm/basic.rb b/examples/contrib/ruby_llm/basic.rb
new file mode 100644
index 0000000..cc35fbb
--- /dev/null
+++ b/examples/contrib/ruby_llm/basic.rb
@@ -0,0 +1,42 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "ruby_llm"
+require "opentelemetry/sdk"
+
+# Example: Basic RubyLLM chat with Braintrust tracing
+#
+# Usage:
+#   OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/contrib/ruby_llm/basic.rb
+
+unless ENV["OPENAI_API_KEY"]
+  puts "Error: OPENAI_API_KEY environment variable is required"
+  exit 1
+end
+
+Braintrust.init(blocking_login: true)
+Braintrust.instrument!(:ruby_llm)
+
+RubyLLM.configure do |config|
+  config.openai_api_key = ENV["OPENAI_API_KEY"]
+end
+
+tracer = OpenTelemetry.tracer_provider.tracer("ruby_llm-example")
+root_span = nil
+
+puts "Sending chat request..."
+response = tracer.in_span("examples/contrib/ruby_llm/basic.rb") do |span|
+  root_span = span
+  chat = RubyLLM.chat(model: "gpt-4o-mini")
+  chat.ask("What is the capital of France?")
+end
+
+puts "\nAssistant: #{response.content}"
+puts "\nToken usage:"
+puts "  Input: #{response.to_h[:input_tokens]}"
+puts "  Output: #{response.to_h[:output_tokens]}"
+puts "\nView trace: #{Braintrust::Trace.permalink(root_span)}"
+
+OpenTelemetry.tracer_provider.shutdown
diff --git a/examples/contrib/ruby_llm/instance.rb b/examples/contrib/ruby_llm/instance.rb
new file mode 100644
index 0000000..5feeb52
--- /dev/null
+++ b/examples/contrib/ruby_llm/instance.rb
@@ -0,0 +1,53 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "ruby_llm"
+require "opentelemetry/sdk"
+
+# Example: Instance-level RubyLLM instrumentation
+#
+# This shows how to instrument a specific chat instance rather than
+# all RubyLLM chats globally.
+#
+# Usage:
+#   OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/contrib/ruby_llm/instance.rb
+
+unless ENV["OPENAI_API_KEY"]
+  puts "Error: OPENAI_API_KEY environment variable is required"
+  exit 1
+end
+
+Braintrust.init(blocking_login: true)
+
+RubyLLM.configure do |config|
+  config.openai_api_key = ENV["OPENAI_API_KEY"]
+end
+
+# Create two chat instances
+chat_traced = RubyLLM.chat(model: "gpt-4o-mini")
+chat_untraced = RubyLLM.chat(model: "gpt-4o-mini")
+
+# Only instrument one of them
+Braintrust.instrument!(:ruby_llm, target: chat_traced)
+
+tracer = OpenTelemetry.tracer_provider.tracer("ruby_llm-example")
+root_span = nil
+
+tracer.in_span("examples/contrib/ruby_llm/instance.rb") do |span|
+  root_span = span
+
+  puts "Calling traced chat..."
+  response1 = chat_traced.ask("Say 'traced'")
+  puts "Traced response: #{response1.content}"
+
+  puts "\nCalling untraced chat..."
+  response2 = chat_untraced.ask("Say 'untraced'")
+  puts "Untraced response: #{response2.content}"
+end
+
+puts "\nView trace: #{Braintrust::Trace.permalink(root_span)}"
+puts "(Only the first chat call should have a ruby_llm.chat span)"
+
+OpenTelemetry.tracer_provider.shutdown
diff --git a/examples/contrib/ruby_llm/streaming.rb b/examples/contrib/ruby_llm/streaming.rb
new file mode 100644
index 0000000..179064f
--- /dev/null
+++ b/examples/contrib/ruby_llm/streaming.rb
@@ -0,0 +1,42 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "ruby_llm"
+require "opentelemetry/sdk"
+
+# Example: Streaming RubyLLM chat with Braintrust tracing
+#
+# Usage:
+#   OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/contrib/ruby_llm/streaming.rb
+
+unless ENV["OPENAI_API_KEY"]
+  puts "Error: OPENAI_API_KEY environment variable is required"
+  exit 1
+end
+
+Braintrust.init(blocking_login: true)
+Braintrust.instrument!(:ruby_llm)
+
+RubyLLM.configure do |config|
+  config.openai_api_key = ENV["OPENAI_API_KEY"]
+end
+
+tracer = OpenTelemetry.tracer_provider.tracer("ruby_llm-example")
+root_span = nil
+
+puts "Streaming response..."
+print "\nAssistant: "
+
+tracer.in_span("examples/contrib/ruby_llm/streaming.rb") do |span|
+  root_span = span
+  chat = RubyLLM.chat(model: "gpt-4o-mini")
+  chat.ask("Write a haiku about programming") do |chunk|
+    print chunk.content
+  end
+end
+
+puts "\n\nView trace: #{Braintrust::Trace.permalink(root_span)}"
+
+OpenTelemetry.tracer_provider.shutdown
diff --git a/examples/contrib/ruby_llm/tool_usage.rb b/examples/contrib/ruby_llm/tool_usage.rb
new file mode 100644
index 0000000..fa154ea
--- /dev/null
+++ b/examples/contrib/ruby_llm/tool_usage.rb
@@ -0,0 +1,371 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "ruby_llm"
+require "opentelemetry/sdk"
+
+# Example: RubyLLM tool calling with Braintrust tracing
+#
+# This demonstrates how tool calls create nested spans with a sophisticated
+# travel planning scenario featuring:
+# - Multiple tools with complex nested parameters
+# - Tools that return deeply nested response structures
+# - Tools that create their own internal spans (simulating API calls)
+# - Multi-turn conversation with multiple tool invocations
+#
+# Trace hierarchy will look like:
+#   examples/contrib/ruby_llm/tool_usage.rb
+#   └── ruby_llm.chat
+#       ├── ruby_llm.tool.search_flights
+#       │   └── flight_api.search (internal span)
+#       ├── ruby_llm.tool.search_hotels
+#       │   └── hotel_api.search (internal span)
+#       └── ruby_llm.tool.get_destination_info
+#           ├── weather_api.forecast (internal span)
+#           └── events_api.search (internal span)
+#
+# Usage:
+#   OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/contrib/ruby_llm/tool_usage.rb
+
+unless ENV["OPENAI_API_KEY"]
+  puts "Error: OPENAI_API_KEY environment variable is required"
+  exit 1
+end
+
+# Flight search tool with complex nested parameters
+class SearchFlightsTool < RubyLLM::Tool
+  description "Search for flights between cities with flexible date options and passenger details"
+
+  param :origin, type: :string, desc: "Origin airport code (e.g., SFO, JFK)"
+  param :destination, type: :string, desc: "Destination airport code"
+  param :departure_date, type: :string, desc: "Departure date (YYYY-MM-DD)"
+  param :return_date, type: :string, desc: "Return date for round trip (YYYY-MM-DD), optional"
+  param :passengers, type: :integer, desc: "Number of passengers (default: 1)"
+  param :cabin_class, type: :string, desc: "Cabin class: economy, premium_economy, business, first"
+  param :flexible_dates, type: :boolean, desc: "Search +/- 3 days from specified dates"
+
+  def execute(origin:, destination:, departure_date:, return_date: nil, passengers: 1, cabin_class: "economy", flexible_dates: false)
+    tracer = OpenTelemetry.tracer_provider.tracer("flight-service")
+
+    # Simulate an internal API call with its own span
+    tracer.in_span("flight_api.search", attributes: {
+      "flight.origin" => origin,
+      "flight.destination" => destination,
+      "flight.passengers" => passengers
+    }) do
+      sleep(0.05) # Simulate API latency
+
+      # Return deeply nested flight results
+      {
+        search_id: "FL-#{rand(100000..999999)}",
+        query: {
+          route: {origin: origin, destination: destination},
+          dates: {
+            departure: departure_date,
+            return: return_date,
+            flexible: flexible_dates
+          },
+          travelers: {count: passengers, cabin: cabin_class}
+        },
+        results: [
+          {
+            flight_id: "UA#{rand(100..999)}",
+            airline: {code: "UA", name: "United Airlines"},
+            segments: [
+              {
+                departure: {airport: origin, terminal: "2", time: "#{departure_date}T08:30:00", gate: "A12"},
+                arrival: {airport: destination, terminal: "B", time: "#{departure_date}T11:45:00"},
+                aircraft: {type: "Boeing 737-900", wifi: true, power_outlets: true},
+                duration_minutes: 195
+              }
+            ],
+            pricing: {
+              base_fare: 299.00 * passengers,
+              taxes: 45.50 * passengers,
+              fees: {carrier_fee: 25.00, booking_fee: 0},
+              total: (299.00 + 45.50 + 25.00) * passengers,
+              currency: "USD",
+              fare_class: cabin_class,
+              refundable: false,
+              change_fee: 75.00
+            },
+            availability: {seats_remaining: rand(2..9)}
+          },
+          {
+            flight_id: "DL#{rand(100..999)}",
+            airline: {code: "DL", name: "Delta Air Lines"},
+            segments: [
+              {
+                departure: {airport: origin, terminal: "1", time: "#{departure_date}T14:15:00", gate: "C8"},
+                arrival: {airport: destination, terminal: "A", time: "#{departure_date}T17:30:00"},
+                aircraft: {type: "Airbus A320", wifi: true, power_outlets: true},
+                duration_minutes: 195
+              }
+            ],
+            pricing: {
+              base_fare: 279.00 * passengers,
+              taxes: 42.00 * passengers,
+              fees: {carrier_fee: 30.00, booking_fee: 0},
+              total: (279.00 + 42.00 + 30.00) * passengers,
+              currency: "USD",
+              fare_class: cabin_class,
+              refundable: false,
+              change_fee: 0
+            },
+            availability: {seats_remaining: rand(2..9)}
+          }
+        ],
+        metadata: {
+          search_time_ms: rand(150..300),
+          providers_queried: %w[amadeus sabre travelport],
+          cache_hit: false
+        }
+      }
+    end
+  end
+end
+
+# Hotel search tool with nested room and amenity preferences
+class SearchHotelsTool < RubyLLM::Tool
+  description "Search for hotels with detailed room preferences and amenity filters"
+
+  param :city, type: :string, desc: "City name for hotel search"
+  param :check_in, type: :string, desc: "Check-in date (YYYY-MM-DD)"
+  param :check_out, type: :string, desc: "Check-out date (YYYY-MM-DD)"
+  param :guests, type: :integer, desc: "Number of guests"
+  param :rooms, type: :integer, desc: "Number of rooms needed"
+  param :star_rating_min, type: :integer, desc: "Minimum star rating (1-5)"
+  param :amenities, type: :string, desc: "Comma-separated amenities: pool,gym,spa,parking,wifi,breakfast"
+
+  def execute(city:, check_in:, check_out:, guests: 2, rooms: 1, star_rating_min: 3, amenities: "wifi")
+    tracer = OpenTelemetry.tracer_provider.tracer("hotel-service")
+    requested_amenities = amenities.split(",").map(&:strip)
+
+    tracer.in_span("hotel_api.search", attributes: {
+      "hotel.city" => city,
+      "hotel.guests" => guests,
+      "hotel.rooms" => rooms
+    }) do
+      sleep(0.05)
+
+      {
+        search_id: "HT-#{rand(100000..999999)}",
+        query: {
+          location: {city: city, radius_km: 10, center: "downtown"},
+          stay: {check_in: check_in, check_out: check_out, nights: 3},
+          occupancy: {guests: guests, rooms: rooms},
+          filters: {min_stars: star_rating_min, amenities: requested_amenities}
+        },
+        results: [
+          {
+            hotel_id: "HTL-#{rand(10000..99999)}",
+            name: "Grand #{city} Hotel & Spa",
+            brand: {name: "Luxury Collection", loyalty_program: "Marriott Bonvoy"},
+            location: {
+              address: {street: "123 Main Street", city: city, postal_code: "94102", country: "USA"},
+              coordinates: {latitude: 37.7749, longitude: -122.4194},
+              neighborhood: "Downtown",
+              transit: {nearest_metro: "Powell St", distance_km: 0.3}
+            },
+            rating: {
+              stars: 5,
+              guest_score: 4.7,
+              reviews: {count: 2341, recent_sentiment: "excellent"}
+            },
+            rooms: [
+              {
+                room_id: "RM-#{rand(1000..9999)}",
+                type: "Deluxe King",
+                description: "Spacious room with city views",
+                bedding: {beds: [{type: "king", count: 1}], max_occupancy: 2},
+                size: {sqft: 450, sqm: 42},
+                features: %w[city_view work_desk minibar safe],
+                pricing: {
+                  per_night: 289.00,
+                  total: 867.00,
+                  taxes: 130.05,
+                  fees: {resort_fee: 45.00, cleaning_fee: 0},
+                  grand_total: 1042.05,
+                  currency: "USD",
+                  cancellation: {free_until: check_in, policy: "free_cancellation"}
+                }
+              }
+            ],
+            amenities: {
+              wellness: {pool: {indoor: true, outdoor: false, hours: "6am-10pm"}, gym: true, spa: {available: true, appointment_required: true}},
+              dining: {restaurants: 2, room_service: {available: true, hours: "24/7"}, breakfast: {included: false, price: 35.00}},
+              business: {wifi: {free: true, speed: "high-speed"}, business_center: true, meeting_rooms: 5},
+              parking: {available: true, valet: true, self_park: 45.00, valet_price: 65.00}
+            },
+            policies: {
+              check_in_time: "15:00",
+              check_out_time: "11:00",
+              pets: {allowed: true, fee: 75.00, restrictions: "dogs under 25lbs"},
+              smoking: false
+            }
+          }
+        ],
+        metadata: {search_time_ms: rand(200..400), availability_as_of: Time.now.utc.iso8601}
+      }
+    end
+  end
+end
+
+# Destination info tool that aggregates multiple data sources
+class GetDestinationInfoTool < RubyLLM::Tool
+  description "Get comprehensive destination information including weather forecast and local events"
+
+  param :city, type: :string, desc: "City name"
+  param :travel_dates, type: :string, desc: "Travel date range (YYYY-MM-DD to YYYY-MM-DD)"
+  param :interests, type: :string, desc: "Comma-separated interests: food,art,music,sports,nature,nightlife"
+
+  def execute(city:, travel_dates:, interests: "food,art")
+    tracer = OpenTelemetry.tracer_provider.tracer("destination-service")
+    interest_list = interests.split(",").map(&:strip)
+    dates = travel_dates.split(" to ")
+    start_date = dates[0]
+    end_date = dates[1] || start_date
+
+    # This tool makes multiple internal API calls, each with their own spans
+    weather_data = tracer.in_span("weather_api.forecast", attributes: {"weather.city" => city}) do
+      sleep(0.03)
+      {
+        location: {city: city, timezone: "America/Los_Angeles"},
+        forecast: [
+          {date: start_date, high_f: 68, low_f: 54, conditions: "Partly Cloudy", precipitation_chance: 10, humidity: 65, wind: {speed_mph: 12, direction: "W"}},
+          {date: end_date, high_f: 72, low_f: 56, conditions: "Sunny", precipitation_chance: 0, humidity: 55, wind: {speed_mph: 8, direction: "NW"}}
+        ],
+        alerts: [],
+        best_times: {outdoor_activities: "10am-4pm", photography: "golden_hour"}
+      }
+    end
+
+    events_data = tracer.in_span("events_api.search", attributes: {"events.city" => city, "events.interests" => interests}) do
+      sleep(0.03)
+      {
+        events: [
+          {
+            event_id: "EVT-#{rand(10000..99999)}",
+            name: "#{city} Food & Wine Festival",
+            category: "food",
+            venue: {name: "Civic Center Plaza", address: "123 Civic Center", capacity: 5000},
+            schedule: {date: start_date, time: "11:00", duration_hours: 8},
+            pricing: {general_admission: 45.00, vip: 125.00, currency: "USD"},
+            highlights: ["50+ local restaurants", "Wine tastings", "Cooking demos"]
+          },
+          {
+            event_id: "EVT-#{rand(10000..99999)}",
+            name: "Modern Art Exhibition",
+            category: "art",
+            venue: {name: "#{city} Museum of Modern Art", address: "456 Art Street"},
+            schedule: {date: start_date, time: "10:00", duration_hours: 6},
+            pricing: {general_admission: 25.00, student: 15.00, currency: "USD"},
+            highlights: ["New installations", "Interactive exhibits", "Guided tours available"]
+          }
+        ],
+        total_matching: 12,
+        filtered_by: interest_list
+      }
+    end
+
+    {
+      destination: {
+        city: city,
+        overview: {
+          description: "A vibrant city known for its culture, cuisine, and scenic beauty",
+          population: "800,000+",
+          language: "English",
+          currency: {code: "USD", symbol: "$"},
+          time_zone: "Pacific Time (PT)"
+        },
+        travel_advisory: {level: "normal", last_updated: Time.now.utc.iso8601}
+      },
+      weather: weather_data,
+      events: events_data,
+      recommendations: {
+        neighborhoods: [
+          {name: "Downtown", vibe: "bustling", best_for: %w[dining shopping nightlife]},
+          {name: "Waterfront", vibe: "scenic", best_for: %w[walking photography seafood]}
+        ],
+        tips: [
+          "Book popular restaurants at least 2 weeks in advance",
+          "Public transit is efficient - consider a visitor pass",
+          "Many museums offer free admission on first Thursdays"
+        ]
+      },
+      metadata: {generated_at: Time.now.utc.iso8601, data_sources: %w[weather_api events_api local_guides]}
+    }
+  end
+end
+
+# Initialize Braintrust and instrument RubyLLM
+Braintrust.init(blocking_login: true)
+Braintrust.instrument!(:ruby_llm)
+
+RubyLLM.configure do |config|
+  config.openai_api_key = ENV["OPENAI_API_KEY"]
+end
+
+tracer = OpenTelemetry.tracer_provider.tracer("ruby_llm-example")
+root_span = nil
+
+puts "=" * 70
+puts "Travel Planning Assistant with Multi-Tool Orchestration"
+puts "=" * 70
+puts
+puts "This example demonstrates deeply nested tracing with multiple tools."
+puts "Watch the trace to see how tool calls create hierarchical spans."
+puts
+
+response = tracer.in_span("examples/contrib/ruby_llm/tool_usage.rb") do |span|
+  root_span = span
+
+  chat = RubyLLM.chat(model: "gpt-4o-mini")
+  chat.with_tools(SearchFlightsTool, SearchHotelsTool, GetDestinationInfoTool)
+
+  puts "User: I'm planning a trip from San Francisco (SFO) to New York (JFK)"
+  puts "      for December 15-18, 2025. Can you help me find flights, a nice"
+  puts "      hotel downtown, and tell me what's happening in the city? I'm"
+  puts "      interested in food and art."
+  puts
+  puts "Assistant is thinking and calling tools..."
+  puts "(This may take a moment as multiple tools are invoked)"
+  puts
+
+  chat.ask(<<~PROMPT)
+    I'm planning a trip from San Francisco (SFO) to New York (JFK) for December 15-18, 2025.
+    I need:
+    1. Flight options (economy class, 1 passenger)
+    2. A nice hotel downtown (4+ stars, need wifi and gym)
+    3. What's the weather forecast and any food/art events happening?
+
+    Please search for all of this and give me a summary of the best options.
+  PROMPT
+end
+
+puts "-" * 70
+puts "Assistant Response:"
+puts "-" * 70
+puts response.content
+puts
+puts "=" * 70
+puts "Trace Information"
+puts "=" * 70
+puts
+puts "View the full trace: #{Braintrust::Trace.permalink(root_span)}"
+puts
+puts "Expected span hierarchy:"
+puts "  └── examples/contrib/ruby_llm/tool_usage.rb"
+puts "      └── ruby_llm.chat"
+puts "          ├── ruby_llm.tool.search_flights"
+puts "          │   └── flight_api.search"
+puts "          ├── ruby_llm.tool.search_hotels"
+puts "          │   └── hotel_api.search"
+puts "          └── ruby_llm.tool.get_destination_info"
+puts "              ├── weather_api.forecast"
+puts "              └── events_api.search"
+puts
+
+OpenTelemetry.tracer_provider.shutdown
diff --git a/examples/internal/ruby_llm.rb b/examples/internal/contrib/ruby_llm/golden.rb
similarity index 98%
rename from examples/internal/ruby_llm.rb
rename to examples/internal/contrib/ruby_llm/golden.rb
index 523e983..edf9dff 100644
--- a/examples/internal/ruby_llm.rb
+++ b/examples/internal/contrib/ruby_llm/golden.rb
@@ -30,8 +30,8 @@
   blocking_login: true
 )
 
-# Wrap RubyLLM module with Braintrust tracing (applies to all instances)
-Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap
+# Instrument all RubyLLM chats with Braintrust tracing
+Braintrust.instrument!(:ruby_llm)
 
 # Configure RubyLLM with both providers
 RubyLLM.configure do |config|
@@ -81,7 +81,7 @@ def execute(operation:, a:, b:)
 puts "Running comprehensive RubyLLM integration tests..."
 puts "Testing features across OpenAI and Anthropic providers"
 
-tracer.in_span("examples/internal/ruby_llm.rb") do |span|
+tracer.in_span("examples/internal/contrib/ruby_llm/golden.rb") do |span|
   root_span = span
 
   # Feature 1: Basic Chat
diff --git a/examples/ruby_llm.rb b/examples/ruby_llm.rb
deleted file mode 100644
index 65dd666..0000000
--- a/examples/ruby_llm.rb
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/env ruby
-# frozen_string_literal: true
-
-require "bundler/setup"
-require "braintrust"
-require "ruby_llm"
-require "opentelemetry/sdk"
-
-# Example: RubyLLM chat with Braintrust tracing
-#
-# This example demonstrates how to automatically trace RubyLLM API calls with Braintrust.
-#
-# Usage:
-#   OPENAI_API_KEY=your-openai-key bundle exec ruby examples/ruby_llm.rb
-
-# Check for API keys
-unless ENV["OPENAI_API_KEY"]
-  puts "Error: OPENAI_API_KEY environment variable is required"
-  puts "Get your API key from: https://platform.openai.com/api-keys"
-  exit 1
-end
-
-Braintrust.init(
-  default_project: "ruby-sdk-examples",
-  blocking_login: true
-)
-
-# Wrap RubyLLM module with Braintrust tracing
-Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap
-
-RubyLLM.configure do |config|
-  config.openai_api_key = ENV["OPENAI_API_KEY"]
-end
-
-chat = RubyLLM.chat(model: "gpt-4o-mini")
-
-tracer = OpenTelemetry.tracer_provider.tracer("ruby_llm-example")
-root_span = nil
-
-# Make a chat request (automatically traced!)
-puts "Sending chat request to RubyLLM..."
-response = tracer.in_span("examples/ruby_llm.rb") do |span|
-  root_span = span
-  chat.ask("What is the capital of France?")
-end
-
-# Print the response
-puts "\n✓ Response received!"
-puts "\nAssistant: #{response.content}"
-
-# Print usage stats
-puts "\nToken usage:"
-puts "  Input tokens: #{response.to_h[:input_tokens]}"
-puts "  Output tokens: #{response.to_h[:output_tokens]}"
-
-# Print permalink to view this trace in Braintrust
-puts "\n✓ View this trace in Braintrust:"
-puts "  #{Braintrust::Trace.permalink(root_span)}"
-
-# Shutdown to flush spans to Braintrust
-OpenTelemetry.tracer_provider.shutdown
-
-puts "\n✓ Trace sent to Braintrust!"
diff --git a/lib/braintrust/contrib.rb b/lib/braintrust/contrib.rb
index ddf85ed..2b24890 100644
--- a/lib/braintrust/contrib.rb
+++ b/lib/braintrust/contrib.rb
@@ -101,7 +101,9 @@ def tracer_for(target, name: "braintrust")
 # Load integration stubs (eager load minimal metadata).
 require_relative "contrib/openai/integration"
 require_relative "contrib/ruby_openai/integration"
+require_relative "contrib/ruby_llm/integration"
 
 # Register integrations
 Braintrust::Contrib::OpenAI::Integration.register!
 Braintrust::Contrib::RubyOpenAI::Integration.register!
+Braintrust::Contrib::RubyLLM::Integration.register!
diff --git a/lib/braintrust/contrib/ruby_llm/deprecated.rb b/lib/braintrust/contrib/ruby_llm/deprecated.rb
new file mode 100644
index 0000000..8b2cafe
--- /dev/null
+++ b/lib/braintrust/contrib/ruby_llm/deprecated.rb
@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+
+# Backward compatibility shim for the old ruby_llm integration API.
+# This file now just delegates to the new API.
+
+module Braintrust
+  module Trace
+    module Contrib
+      module Github
+        module Crmne
+          module RubyLLM
+            # Wrap RubyLLM to automatically create spans for chat requests.
+            # This is the legacy API - delegates to the new contrib framework.
+            #
+            # @param chat [RubyLLM::Chat, nil] the chat instance to wrap (if nil, wraps the class)
+            # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
+            # @return [RubyLLM::Chat, nil] the wrapped chat instance
+            def self.wrap(chat = nil, tracer_provider: nil)
+              Log.warn("Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap() is deprecated and will be removed in a future version: use Braintrust.instrument!() instead.")
+              Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: tracer_provider)
+              chat
+            end
+
+            # Unwrap RubyLLM to disable Braintrust tracing.
+            # This is the legacy API - uses the Context pattern to disable tracing.
+            #
+            # Note: Prepended modules cannot be truly removed in Ruby.
+            # This method sets `enabled: false` in the Context, which the
+            # instrumentation checks before creating spans.
+            #
+            # @param chat [RubyLLM::Chat, nil] the chat instance to unwrap (if nil, unwraps the class)
+            # @return [RubyLLM::Chat, nil] the chat instance
+            def self.unwrap(chat = nil)
+              Log.warn("Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.unwrap() is deprecated and will be removed in a future version.")
+
+              target = chat || ::RubyLLM::Chat
+              Braintrust::Contrib::Context.set!(target, enabled: false)
+              chat
+            end
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/ruby_llm/instrumentation/chat.rb b/lib/braintrust/contrib/ruby_llm/instrumentation/chat.rb
new file mode 100644
index 0000000..00a68f2
--- /dev/null
+++ b/lib/braintrust/contrib/ruby_llm/instrumentation/chat.rb
@@ -0,0 +1,464 @@
+# frozen_string_literal: true
+
+require "opentelemetry/sdk"
+require "json"
+
+require_relative "common"
+require_relative "../../support/otel"
+require_relative "../../../internal/encoding"
+require_relative "../../../internal/time"
+
+module Braintrust
+  module Contrib
+    module RubyLLM
+      module Instrumentation
+        # Chat instrumentation for RubyLLM.
+        # Wraps complete() and execute_tool() methods to create spans.
+        module Chat
+          def self.included(base)
+            # Guard against double-wrapping
+            base.prepend(InstanceMethods) unless applied?(base)
+          end
+
+          def self.applied?(base)
+            base.ancestors.include?(InstanceMethods)
+          end
+
+          module InstanceMethods
+            # Wrap complete() to trace chat completions.
+            # Each call creates a span - recursive calls from tool execution
+            # create nested spans (each is a separate API call).
+            def complete(&block)
+              return block ? super : super() unless tracing_enabled?
+
+              tracer = Braintrust::Contrib.tracer_for(self)
+
+              tracer.in_span("ruby_llm.chat") do |span|
+                if block
+                  # Streaming: pass a block that calls super() with the wrapper
+                  handle_streaming_complete(span, block) do |&wrapper|
+                    super(&wrapper)
+                  end
+                else
+                  # Non-streaming: pass a block that calls super()
+                  handle_non_streaming_complete(span) do
+                    super()
+                  end
+                end
+              end
+            end
+
+            private
+
+            # Wrap execute_tool() to trace tool executions.
+            # This is a private method in RubyLLM - wrapping it avoids
+            # conflicting with user-registered on_tool_call/on_tool_result callbacks.
+            def execute_tool(tool_call)
+              return super unless tracing_enabled?
+
+              tracer = Braintrust::Contrib.tracer_for(self)
+
+              tracer.in_span("ruby_llm.tool.#{tool_call.name}") do |span|
+                Support::OTel.set_json_attr(span, "braintrust.span_attributes", {type: "tool"})
+                span.set_attribute("tool.name", tool_call.name)
+                span.set_attribute("tool.call_id", tool_call.id)
+
+                Support::OTel.set_json_attr(span, "braintrust.input_json", {
+                  "name" => tool_call.name,
+                  "arguments" => tool_call.arguments
+                })
+
+                result = super
+
+                Support::OTel.set_json_attr(span, "braintrust.output_json", result)
+                result
+              end
+            end
+
+            # DEPRECATED: Support legacy unwrap()
+            # Checks Context for enabled: false on instance or class.
+            # This will be removed in a future version.
+            def tracing_enabled?
+              ctx = Braintrust::Contrib.context_for(self)
+              class_ctx = Braintrust::Contrib.context_for(self.class)
+              ctx&.[](:enabled) != false && class_ctx&.[](:enabled) != false
+            end
+
+            # Handle streaming complete request with tracing.
+            # Calls the provided block with a wrapper that aggregates chunks.
+            # @param span [OpenTelemetry::Trace::Span] the span to record to
+            # @param user_block [Proc] the streaming block from user
+            # @param super_caller [Proc] block that calls super(&wrapper)
+            def handle_streaming_complete(span, user_block, &super_caller)
+              aggregated_chunks = []
+              metadata = extract_metadata(stream: true)
+              input_messages = build_input_messages
+              start_time = Braintrust::Internal::Time.measure
+              time_to_first_token = nil
+
+              Support::OTel.set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any?
+              Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
+
+              # Wrapper block that RubyLLM calls once per chunk.
+              # Aggregates chunks for span recording and forwards to user's block.
+              wrapper = proc do |chunk|
+                time_to_first_token ||= Braintrust::Internal::Time.measure(start_time)
+                aggregated_chunks << chunk
+                user_block.call(chunk)
+              end
+
+              begin
+                result = super_caller.call(&wrapper)
+
+                capture_streaming_output(span, aggregated_chunks, result, time_to_first_token)
+                result
+              rescue => e
+                span.record_exception(e)
+                span.status = ::OpenTelemetry::Trace::Status.error("RubyLLM error: #{e.message}")
+                raise
+              end
+            end
+
+            # Handle non-streaming complete request with tracing.
+            # Calls the provided block to invoke super() and returns the response.
+            # @param span [OpenTelemetry::Trace::Span] the span to record to
+            # @param super_caller [Proc] block that calls super()
+            def handle_non_streaming_complete(span, &super_caller)
+              metadata = extract_metadata
+              input_messages = build_input_messages
+              Support::OTel.set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any?
+
+              messages_before_count = messages&.length || 0
+
+              begin
+                response = nil
+                time_to_first_token = Braintrust::Internal::Time.measure do
+                  response = super_caller.call
+                end
+
+                capture_non_streaming_output(span, response, messages_before_count, time_to_first_token)
+                Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
+
+                response
+              rescue => e
+                span.record_exception(e)
+                span.status = ::OpenTelemetry::Trace::Status.error("RubyLLM error: #{e.message}")
+                raise
+              end
+            end
+
+            # Extract metadata from chat instance (provider, model, tools, stream flag)
+            def extract_metadata(stream: false)
+              metadata = {"provider" => "ruby_llm"}
+              metadata["stream"] = true if stream
+
+              # Extract model
+              if respond_to?(:model) && model
+                model_id = model.respond_to?(:id) ? model.id : model.to_s
+                metadata["model"] = model_id
+              end
+
+              # Extract tools (only for non-streaming)
+              if !stream && respond_to?(:tools) && tools&.any?
+                metadata["tools"] = extract_tools_metadata
+              end
+
+              metadata
+            end
+
+            # Extract tools metadata from chat instance
+            def extract_tools_metadata
+              provider = instance_variable_get(:@provider) if instance_variable_defined?(:@provider)
+
+              tools.map do |_name, tool|
+                format_tool_schema(tool, provider)
+              end
+            end
+
+            # Format a tool into OpenAI-compatible schema
+            def format_tool_schema(tool, provider)
+              tool_schema = nil
+
+              # Use provider-specific tool_for method if available
+              if provider
+                begin
+                  tool_schema = if provider.is_a?(::RubyLLM::Providers::OpenAI)
+                    ::RubyLLM::Providers::OpenAI::Tools.tool_for(tool)
+                  elsif defined?(::RubyLLM::Providers::Anthropic) && provider.is_a?(::RubyLLM::Providers::Anthropic)
+                    ::RubyLLM::Providers::Anthropic::Tools.tool_for(tool)
+                  elsif tool.respond_to?(:params_schema) && tool.params_schema
+                    build_basic_tool_schema(tool)
+                  else
+                    build_minimal_tool_schema(tool)
+                  end
+                rescue NameError, ArgumentError => e
+                  Braintrust::Log.debug("Failed to extract tool schema using provider-specific method: #{e.class.name}: #{e.message}")
+                  tool_schema = (tool.respond_to?(:params_schema) && tool.params_schema) ? build_basic_tool_schema(tool) : build_minimal_tool_schema(tool)
+                end
+              else
+                tool_schema = (tool.respond_to?(:params_schema) && tool.params_schema) ? build_basic_tool_schema(tool) : build_minimal_tool_schema(tool)
+              end
+
+              # Strip RubyLLM-specific fields to match native OpenAI format
+              function_key = tool_schema&.key?(:function) ? :function : "function"
+              if tool_schema && tool_schema[function_key]
+                tool_params = tool_schema[function_key][:parameters] || tool_schema[function_key]["parameters"]
+                if tool_params.is_a?(Hash)
+                  tool_params = tool_params.dup if tool_params.frozen?
+                  tool_params.delete("strict")
+                  tool_params.delete(:strict)
+                  tool_params.delete("additionalProperties")
+                  tool_params.delete(:additionalProperties)
+                  params_key = tool_schema[function_key].key?(:parameters) ? :parameters : "parameters"
+                  tool_schema[function_key][params_key] = tool_params
+                end
+              end
+
+              tool_schema
+            end
+
+            # Build a basic tool schema with parameters
+            def build_basic_tool_schema(tool)
+              {
+                "type" => "function",
+                "function" => {
+                  "name" => tool.name.to_s,
+                  "description" => tool.description,
+                  "parameters" => tool.params_schema
+                }
+              }
+            end
+
+            # Build a minimal tool schema without parameters
+            def build_minimal_tool_schema(tool)
+              {
+                "type" => "function",
+                "function" => {
+                  "name" => tool.name.to_s,
+                  "description" => tool.description,
+                  "parameters" => {}
+                }
+              }
+            end
+
+            # Build input messages array from chat history
+            def build_input_messages
+              return [] unless respond_to?(:messages) && messages&.any?
+
+              messages.map { |m| format_message_for_input(m) }
+            end
+
+            # Format a RubyLLM message to OpenAI-compatible format
+            def format_message_for_input(msg)
+              formatted = {"role" => msg.role.to_s}
+
+              # Handle content
+              if msg.respond_to?(:content) && msg.content
+                raw_content = msg.content
+
+                # Check if content is a Content object with attachments (issue #71)
+                formatted["content"] = if raw_content.respond_to?(:text) && raw_content.respond_to?(:attachments) && raw_content.attachments&.any?
+                  format_multipart_content(raw_content)
+                else
+                  format_simple_content(raw_content, msg.role.to_s)
+                end
+              end
+
+              # Handle tool_calls for assistant messages
+              if msg.respond_to?(:tool_calls) && msg.tool_calls&.any?
+                formatted["tool_calls"] = format_tool_calls(msg.tool_calls)
+                formatted["content"] = nil
+              end
+
+              # Handle tool_call_id for tool result messages
+              if msg.respond_to?(:tool_call_id) && msg.tool_call_id
+                formatted["tool_call_id"] = msg.tool_call_id
+              end
+
+              formatted
+            end
+
+            # Format multipart content with text and attachments
+            # @param content_obj [Object] Content object with text and attachments
+            # @return [Array<Hash>] array of content parts
+            def format_multipart_content(content_obj)
+              content_parts = []
+
+              # Add text part
+              content_parts << {"type" => "text", "text" => content_obj.text} if content_obj.text
+
+              # Add attachment parts (convert to Braintrust format)
+              content_obj.attachments.each do |attachment|
+                content_parts << format_attachment_for_input(attachment)
+              end
+
+              content_parts
+            end
+
+            # Format simple text content
+            # @param raw_content [Object] String or Content object with text
+            # @param role [String] the message role
+            # @return [String] formatted text content
+            def format_simple_content(raw_content, role)
+              content = raw_content
+              content = content.text if content.respond_to?(:text)
+
+              # Convert Ruby hash string to JSON for tool results
+              if role == "tool" && content.is_a?(String) && content.start_with?("{:")
+                begin
+                  content = content.gsub(/(?<=\{|, ):(\w+)=>/, '"\1":').gsub("=>", ":")
+                rescue
+                  # Keep original if conversion fails
+                end
+              end
+
+              content
+            end
+
+            # Format a RubyLLM attachment to OpenAI-compatible format
+            # @param attachment [Object] the RubyLLM attachment
+            # @return [Hash] OpenAI image_url format for consistency with other integrations
+            def format_attachment_for_input(attachment)
+              # RubyLLM Attachment has: source (Pathname), filename, mime_type
+              if attachment.respond_to?(:source) && attachment.source
+                begin
+                  data = File.binread(attachment.source.to_s)
+                  encoded = Braintrust::Internal::Encoding::Base64.strict_encode64(data)
+                  mime_type = attachment.respond_to?(:mime_type) ? attachment.mime_type : "application/octet-stream"
+
+                  # Use OpenAI's image_url format for consistency
+                  {
+                    "type" => "image_url",
+                    "image_url" => {
+                      "url" => "data:#{mime_type};base64,#{encoded}"
+                    }
+                  }
+                rescue => e
+                  Braintrust::Log.debug("Failed to read attachment file: #{e.message}")
+                  # Return a placeholder if we can't read the file
+                  {"type" => "text", "text" => "[attachment: #{attachment.respond_to?(:filename) ? attachment.filename : "unknown"}]"}
+                end
+              elsif attachment.respond_to?(:to_h)
+                # Try to use attachment's own serialization
+                attachment.to_h
+              else
+                {"type" => "text", "text" => "[attachment]"}
+              end
+            end
+
+            # Format tool calls into OpenAI format
+            def format_tool_calls(tool_calls)
+              tool_calls.map do |_id, tc|
+                args = tc.arguments
+                args_string = args.is_a?(String) ? args : JSON.generate(args)
+
+                {
+                  "id" => tc.id,
+                  "type" => "function",
+                  "function" => {
+                    "name" => tc.name,
+                    "arguments" => args_string
+                  }
+                }
+              end
+            end
+
+            # Capture streaming output and metrics
+            def capture_streaming_output(span, aggregated_chunks, result, time_to_first_token)
+              return if aggregated_chunks.empty?
+
+              # Aggregate content from chunks
+              # Extract text from Content objects if present (issue #71)
+              aggregated_content = aggregated_chunks.map { |c|
+                content = c.respond_to?(:content) ? c.content : c.to_s
+                content = content.text if content.respond_to?(:text)
+                content
+              }.join
+
+              output = [{
+                role: "assistant",
+                content: aggregated_content
+              }]
+              Support::OTel.set_json_attr(span, "braintrust.output_json", output)
+
+              # Set metrics (token usage + time_to_first_token)
+              # RubyLLM stores tokens directly in the response hash, not in a nested usage object
+              metrics = {}
+              if result.respond_to?(:to_h)
+                result_hash = result.to_h
+                usage = {
+                  "input_tokens" => result_hash[:input_tokens],
+                  "output_tokens" => result_hash[:output_tokens],
+                  "cached_tokens" => result_hash[:cached_tokens],
+                  "cache_creation_tokens" => result_hash[:cache_creation_tokens]
+                }.compact
+
+                unless usage.empty?
+                  metrics = Common.parse_usage_tokens(usage)
+                end
+              end
+              metrics["time_to_first_token"] = time_to_first_token if time_to_first_token
+              Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
+            end
+
+            # Capture non-streaming output and metrics
+            def capture_non_streaming_output(span, response, messages_before_count, time_to_first_token)
+              return unless response
+
+              message = {
+                "role" => "assistant",
+                "content" => nil
+              }
+
+              # Add content if it's a simple text response
+              # Extract text from Content objects if present (issue #71)
+              if response.respond_to?(:content) && response.content && !response.content.empty?
+                content = response.content
+                content = content.text if content.respond_to?(:text)
+                message["content"] = content
+              end
+
+              # Check if there are tool calls in the messages history
+              if respond_to?(:messages) && messages
+                assistant_msg = messages[messages_before_count..]&.find { |m|
+                  m.role.to_s == "assistant" && m.respond_to?(:tool_calls) && m.tool_calls&.any?
+                }
+
+                if assistant_msg&.tool_calls&.any?
+                  message["tool_calls"] = format_tool_calls(assistant_msg.tool_calls)
+                  message["content"] = nil
+                end
+              end
+
+              output = [{
+                "index" => 0,
+                "message" => message,
+                "finish_reason" => message["tool_calls"] ? "tool_calls" : "stop"
+              }]
+
+              Support::OTel.set_json_attr(span, "braintrust.output_json", output)
+
+              # Set metrics (token usage + time_to_first_token)
+              metrics = {}
+              if response.respond_to?(:to_h)
+                response_hash = response.to_h
+                usage = {
+                  "input_tokens" => response_hash[:input_tokens],
+                  "output_tokens" => response_hash[:output_tokens],
+                  "cached_tokens" => response_hash[:cached_tokens],
+                  "cache_creation_tokens" => response_hash[:cache_creation_tokens]
+                }.compact
+
+                unless usage.empty?
+                  metrics = Common.parse_usage_tokens(usage)
+                end
+              end
+              metrics["time_to_first_token"] = time_to_first_token if time_to_first_token
+              Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
+            end
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/ruby_llm/instrumentation/common.rb b/lib/braintrust/contrib/ruby_llm/instrumentation/common.rb
new file mode 100644
index 0000000..e1df858
--- /dev/null
+++ b/lib/braintrust/contrib/ruby_llm/instrumentation/common.rb
@@ -0,0 +1,58 @@
+# frozen_string_literal: true
+
+module Braintrust
+  module Contrib
+    module RubyLLM
+      module Instrumentation
+        # Common utilities for RubyLLM instrumentation.
+        module Common
+          # Parse RubyLLM usage tokens into normalized Braintrust metrics.
+          # RubyLLM normalizes token fields from all providers (OpenAI, Anthropic, etc.)
+          # into a consistent format:
+          #   - input_tokens: prompt tokens sent
+          #   - output_tokens: completion tokens received
+          #   - cached_tokens: tokens read from cache
+          #   - cache_creation_tokens: tokens written to cache
+          #
+          # @param usage [Hash, Object] usage object from RubyLLM response
+          # @return [Hash<String, Integer>] normalized metrics for Braintrust
+          def self.parse_usage_tokens(usage)
+            metrics = {}
+            return metrics unless usage
+
+            usage_hash = usage.respond_to?(:to_h) ? usage.to_h : usage
+            return metrics unless usage_hash.is_a?(Hash)
+
+            # RubyLLM normalized field mappings → Braintrust metrics
+            field_map = {
+              "input_tokens" => "prompt_tokens",
+              "output_tokens" => "completion_tokens",
+              "cached_tokens" => "prompt_cached_tokens",
+              "cache_creation_tokens" => "prompt_cache_creation_tokens"
+            }
+
+            usage_hash.each do |key, value|
+              next unless value.is_a?(Numeric)
+              key_str = key.to_s
+              target = field_map[key_str]
+              metrics[target] = value.to_i if target
+            end
+
+            # Accumulate cache tokens into prompt_tokens (matching TS/Python SDKs)
+            prompt_tokens = (metrics["prompt_tokens"] || 0) +
+              (metrics["prompt_cached_tokens"] || 0) +
+              (metrics["prompt_cache_creation_tokens"] || 0)
+            metrics["prompt_tokens"] = prompt_tokens if prompt_tokens > 0
+
+            # Calculate total
+            if metrics.key?("prompt_tokens") && metrics.key?("completion_tokens")
+              metrics["tokens"] = metrics["prompt_tokens"] + metrics["completion_tokens"]
+            end
+
+            metrics
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/ruby_llm/integration.rb b/lib/braintrust/contrib/ruby_llm/integration.rb
new file mode 100644
index 0000000..69561b5
--- /dev/null
+++ b/lib/braintrust/contrib/ruby_llm/integration.rb
@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+
+require_relative "../integration"
+require_relative "deprecated"
+
+module Braintrust
+  module Contrib
+    module RubyLLM
+      # RubyLLM integration for automatic instrumentation.
+      # Instruments the crmne/ruby_llm gem.
+      class Integration
+        include Braintrust::Contrib::Integration
+
+        MINIMUM_VERSION = "1.8.0"
+
+        GEM_NAMES = ["ruby_llm"].freeze
+        REQUIRE_PATHS = ["ruby_llm"].freeze
+
+        # @return [Symbol] Unique identifier for this integration
+        def self.integration_name
+          :ruby_llm
+        end
+
+        # @return [Array<String>] Gem names this integration supports
+        def self.gem_names
+          GEM_NAMES
+        end
+
+        # @return [Array<String>] Require paths for auto-instrument detection
+        def self.require_paths
+          REQUIRE_PATHS
+        end
+
+        # @return [String] Minimum compatible version
+        def self.minimum_version
+          MINIMUM_VERSION
+        end
+
+        # @return [Boolean] true if ruby_llm gem is available
+        def self.loaded?
+          defined?(::RubyLLM::Chat) ? true : false
+        end
+
+        # Lazy-load the patcher only when actually patching.
+        # This keeps the integration stub lightweight.
+        # @return [Array<Class>] The patcher classes
+        def self.patchers
+          require_relative "patcher"
+          [ChatPatcher]
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/ruby_llm/patcher.rb b/lib/braintrust/contrib/ruby_llm/patcher.rb
new file mode 100644
index 0000000..a4fde88
--- /dev/null
+++ b/lib/braintrust/contrib/ruby_llm/patcher.rb
@@ -0,0 +1,44 @@
+# frozen_string_literal: true
+
+require_relative "../patcher"
+require_relative "instrumentation/chat"
+
+module Braintrust
+  module Contrib
+    module RubyLLM
+      # Patcher for RubyLLM chat completions.
+      # Instruments RubyLLM::Chat#complete and #execute_tool methods.
+      class ChatPatcher < Braintrust::Contrib::Patcher
+        class << self
+          def applicable?
+            defined?(::RubyLLM::Chat)
+          end
+
+          def patched?(**options)
+            target_class = options[:target]&.singleton_class || ::RubyLLM::Chat
+            Instrumentation::Chat.applied?(target_class)
+          end
+
+          # Perform the actual patching.
+          # @param options [Hash] Configuration options passed from integration
+          # @option options [Object] :target Optional target instance to patch
+          # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider
+          # @return [void]
+          def perform_patch(**options)
+            return unless applicable?
+
+            if options[:target]
+              # Instance-level (for only this chat instance)
+              raise ArgumentError, "target must be a kind of ::RubyLLM::Chat" unless options[:target].is_a?(::RubyLLM::Chat)
+
+              options[:target].singleton_class.include(Instrumentation::Chat)
+            else
+              # Class-level (for all chat instances)
+              ::RubyLLM::Chat.include(Instrumentation::Chat)
+            end
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/trace.rb b/lib/braintrust/trace.rb
index 6bf9434..b87d804 100644
--- a/lib/braintrust/trace.rb
+++ b/lib/braintrust/trace.rb
@@ -14,23 +14,6 @@
   # Anthropic gem not installed - integration will not be available
 end
 
-# RubyLLM integration is optional - automatically loaded if ruby_llm gem is available
-#
-# Usage:
-#   # Wrap the class once (affects all instances):
-#   Braintrust::Trace::RubyLLM.wrap
-#
-#   # Or wrap a specific instance:
-#   chat = RubyLLM.chat(model: "gpt-4o-mini")
-#   Braintrust::Trace::RubyLLM.wrap(chat)
-#
-begin
-  require "ruby_llm"
-  require_relative "trace/contrib/github.com/crmne/ruby_llm"
-rescue LoadError
-  # RubyLLM gem not installed - integration will not be available
-end
-
 module Braintrust
   module Trace
     # Set up OpenTelemetry tracing with Braintrust
diff --git a/lib/braintrust/trace/contrib/github.com/crmne/ruby_llm.rb b/lib/braintrust/trace/contrib/github.com/crmne/ruby_llm.rb
deleted file mode 100644
index 4d3db6e..0000000
--- a/lib/braintrust/trace/contrib/github.com/crmne/ruby_llm.rb
+++ /dev/null
@@ -1,631 +0,0 @@
-# frozen_string_literal: true
-
-require "opentelemetry/sdk"
-require "json"
-require_relative "../../../tokens"
-require_relative "../../../../logger"
-require_relative "../../../../internal/encoding"
-
-module Braintrust
-  module Trace
-    module Contrib
-      module Github
-        module Crmne
-          module RubyLLM
-            # Helper to safely set a JSON attribute on a span
-            # Only sets the attribute if obj is present
-            # @param span [OpenTelemetry::Trace::Span] the span to set attribute on
-            # @param attr_name [String] the attribute name (e.g., "braintrust.output_json")
-            # @param obj [Object] the object to serialize to JSON
-            # @return [void]
-            def self.set_json_attr(span, attr_name, obj)
-              return unless obj
-              span.set_attribute(attr_name, JSON.generate(obj))
-            rescue => e
-              Log.debug("Failed to serialize #{attr_name}: #{e.message}")
-            end
-
-            # Parse usage tokens from RubyLLM response
-            # RubyLLM uses Anthropic-style field naming (input_tokens, output_tokens)
-            # @param usage [Hash, Object] usage object from RubyLLM response
-            # @return [Hash<String, Integer>] metrics hash with normalized names
-            def self.parse_usage_tokens(usage)
-              Braintrust::Trace.parse_anthropic_usage_tokens(usage)
-            end
-
-            # Wrap RubyLLM to automatically create spans for chat requests
-            # Supports both synchronous and streaming requests
-            #
-            # Usage:
-            #   # Wrap the class once (affects all future instances):
-            #   Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap
-            #
-            #   # Or wrap a specific instance:
-            #   chat = RubyLLM.chat(model: "gpt-4o-mini")
-            #   Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(chat)
-            #
-            # @param chat [RubyLLM::Chat, nil] the RubyLLM chat instance to wrap (if nil, wraps the class)
-            # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider (defaults to global)
-            def self.wrap(chat = nil, tracer_provider: nil)
-              tracer_provider ||= ::OpenTelemetry.tracer_provider
-
-              # If no chat instance provided, wrap the class globally via initialize hook
-              if chat.nil?
-                return if defined?(::RubyLLM::Chat) && ::RubyLLM::Chat.instance_variable_defined?(:@braintrust_wrapper_module)
-
-                # Create module that wraps initialize to auto-wrap each new instance
-                wrapper_module = Module.new do
-                  define_method(:initialize) do |*args, **kwargs, &block|
-                    super(*args, **kwargs, &block)
-                    # Auto-wrap this instance during initialization
-                    Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(self, tracer_provider: tracer_provider)
-                    self
-                  end
-                end
-
-                # Store reference to wrapper module for cleanup
-                ::RubyLLM::Chat.instance_variable_set(:@braintrust_wrapper_module, wrapper_module)
-                ::RubyLLM::Chat.prepend(wrapper_module)
-                return nil
-              end
-
-              # Check if already wrapped to make this idempotent
-              return chat if chat.instance_variable_get(:@braintrust_wrapped)
-
-              # Create a wrapper module that intercepts chat.complete
-              wrapper = create_wrapper_module(tracer_provider)
-
-              # Mark as wrapped and prepend the wrapper to the chat instance
-              chat.instance_variable_set(:@braintrust_wrapped, true)
-              chat.singleton_class.prepend(wrapper)
-
-              # Register tool callbacks for tool span creation
-              register_tool_callbacks(chat, tracer_provider)
-
-              chat
-            end
-
-            # Register callbacks for tool execution tracing
-            # @param chat [RubyLLM::Chat] the chat instance
-            # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
-            def self.register_tool_callbacks(chat, tracer_provider)
-              tracer = tracer_provider.tracer("braintrust")
-
-              # Track tool spans by tool_call_id
-              tool_spans = {}
-
-              # Start tool span when tool is called
-              chat.on_tool_call do |tool_call|
-                span = tracer.start_span("ruby_llm.tool.#{tool_call.name}")
-                set_json_attr(span, "braintrust.span_attributes", {type: "tool"})
-                span.set_attribute("tool.name", tool_call.name)
-                span.set_attribute("tool.call_id", tool_call.id)
-
-                # Store tool input
-                input = {
-                  "name" => tool_call.name,
-                  "arguments" => tool_call.arguments
-                }
-                set_json_attr(span, "braintrust.input_json", input)
-
-                tool_spans[tool_call.id] = span
-              end
-
-              # End tool span when result is received
-              chat.on_tool_result do |result|
-                # Find the most recent tool span (RubyLLM doesn't pass tool_call_id to on_tool_result)
-                # The spans are processed in order, so we can use the first unfinished one
-                tool_call_id, span = tool_spans.find { |_id, s| s }
-                if span
-                  # Store tool output
-                  set_json_attr(span, "braintrust.output_json", result)
-                  span.finish
-                  tool_spans.delete(tool_call_id)
-                end
-              end
-            end
-
-            # Unwrap RubyLLM to remove Braintrust tracing
-            # For class-level unwrapping, removes the initialize override from the wrapper module
-            # For instance-level unwrapping, clears the wrapped flag
-            #
-            # @param chat [RubyLLM::Chat, nil] the RubyLLM chat instance to unwrap (if nil, unwraps the class)
-            def self.unwrap(chat = nil)
-              # If no chat instance provided, unwrap the class globally
-              if chat.nil?
-                if defined?(::RubyLLM::Chat) && ::RubyLLM::Chat.instance_variable_defined?(:@braintrust_wrapper_module)
-                  wrapper_module = ::RubyLLM::Chat.instance_variable_get(:@braintrust_wrapper_module)
-                  # Redefine initialize to just call super (disables auto-wrapping)
-                  # We can't actually remove a prepended module, so we make it a no-op
-                  wrapper_module.module_eval do
-                    define_method(:initialize) do |*args, **kwargs, &block|
-                      super(*args, **kwargs, &block)
-                    end
-                  end
-                  ::RubyLLM::Chat.remove_instance_variable(:@braintrust_wrapper_module)
-                end
-                return nil
-              end
-
-              # Unwrap instance
-              chat.remove_instance_variable(:@braintrust_wrapped) if chat.instance_variable_defined?(:@braintrust_wrapped)
-              chat
-            end
-
-            # Wrap the RubyLLM::Chat class globally
-            # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
-            def self.wrap_class(tracer_provider)
-              return unless defined?(::RubyLLM::Chat)
-
-              wrapper = create_wrapper_module(tracer_provider)
-              ::RubyLLM::Chat.prepend(wrapper)
-            end
-
-            # Create the wrapper module that intercepts chat.complete
-            # We wrap complete() instead of ask() because:
-            # - ask() internally calls complete() for the actual API call
-            # - ActiveRecord integration (acts_as_chat) calls complete() directly
-            # - This ensures all LLM calls are traced regardless of entry point
-            #
-            # Important: RubyLLM's complete() calls itself recursively for tool execution.
-            # We only create a span for the outermost call to avoid duplicate spans.
-            # Tool execution is traced separately via on_tool_call/on_tool_result callbacks.
-            #
-            # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
-            # @return [Module] the wrapper module
-            def self.create_wrapper_module(tracer_provider)
-              Module.new do
-                define_method(:complete) do |&block|
-                  # Check if we're already inside a traced complete() call
-                  # If so, just call super without creating a new span
-                  if @braintrust_in_complete
-                    if block
-                      return super(&block)
-                    else
-                      return super()
-                    end
-                  end
-
-                  tracer = tracer_provider.tracer("braintrust")
-
-                  # Mark that we're inside a complete() call
-                  @braintrust_in_complete = true
-
-                  begin
-                    if block
-                      # Handle streaming request
-                      wrapped_block = proc do |chunk|
-                        block.call(chunk)
-                      end
-                      Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.handle_streaming_complete(self, tracer, block) do |aggregated_chunks|
-                        super(&proc do |chunk|
-                          aggregated_chunks << chunk
-                          wrapped_block.call(chunk)
-                        end)
-                      end
-                    else
-                      # Handle non-streaming request
-                      Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.handle_non_streaming_complete(self, tracer) do
-                        super()
-                      end
-                    end
-                  ensure
-                    @braintrust_in_complete = false
-                  end
-                end
-              end
-            end
-
-            # Handle streaming complete request with tracing
-            # @param chat [RubyLLM::Chat] the chat instance
-            # @param tracer [OpenTelemetry::Trace::Tracer] the tracer
-            # @param block [Proc] the streaming block
-            def self.handle_streaming_complete(chat, tracer, block)
-              # Start span immediately for accurate timing
-              span = tracer.start_span("ruby_llm.chat")
-
-              aggregated_chunks = []
-
-              # Extract metadata and build input messages
-              # For complete(), messages are already in chat history (no prompt param)
-              metadata = extract_metadata(chat, stream: true)
-              input_messages = build_input_messages(chat, nil)
-
-              # Set input and metadata
-              set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any?
-              set_json_attr(span, "braintrust.metadata", metadata)
-
-              # Call original method, passing aggregated_chunks to the block
-              begin
-                result = yield aggregated_chunks
-              rescue => e
-                span.record_exception(e)
-                span.status = ::OpenTelemetry::Trace::Status.error("RubyLLM error: #{e.message}")
-                span.finish
-                raise
-              end
-
-              # Set output and metrics from aggregated chunks
-              capture_streaming_output(span, aggregated_chunks, result)
-              span.finish
-              result
-            end
-
-            # Handle non-streaming complete request with tracing
-            # @param chat [RubyLLM::Chat] the chat instance
-            # @param tracer [OpenTelemetry::Trace::Tracer] the tracer
-            def self.handle_non_streaming_complete(chat, tracer)
-              # Start span immediately for accurate timing
-              span = tracer.start_span("ruby_llm.chat")
-
-              begin
-                # Extract metadata and build input messages
-                # For complete(), messages are already in chat history (no prompt param)
-                metadata = extract_metadata(chat)
-                input_messages = build_input_messages(chat, nil)
-                set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any?
-
-                # Remember message count before the call (for tool call detection)
-                messages_before_count = (chat.respond_to?(:messages) && chat.messages) ? chat.messages.length : 0
-
-                # Call the original method
-                response = yield
-
-                # Capture output and metrics
-                capture_non_streaming_output(span, chat, response, messages_before_count)
-
-                # Set metadata
-                set_json_attr(span, "braintrust.metadata", metadata)
-
-                response
-              ensure
-                span.finish
-              end
-            end
-
-            # Extract metadata from chat instance (provider, model, tools, stream flag)
-            # @param chat [RubyLLM::Chat] the chat instance
-            # @param stream [Boolean] whether this is a streaming request
-            # @return [Hash] metadata hash
-            def self.extract_metadata(chat, stream: false)
-              metadata = {"provider" => "ruby_llm"}
-              metadata["stream"] = true if stream
-
-              # Extract model
-              if chat.respond_to?(:model) && chat.model
-                model = chat.model.respond_to?(:id) ? chat.model.id : chat.model.to_s
-                metadata["model"] = model
-              end
-
-              # Extract tools (only for non-streaming)
-              if !stream && chat.respond_to?(:tools) && chat.tools&.any?
-                metadata["tools"] = extract_tools_metadata(chat)
-              end
-
-              metadata
-            end
-
-            # Extract tools metadata from chat instance
-            # @param chat [RubyLLM::Chat] the chat instance
-            # @return [Array<Hash>] array of tool schemas
-            def self.extract_tools_metadata(chat)
-              provider = chat.instance_variable_get(:@provider) if chat.instance_variable_defined?(:@provider)
-
-              chat.tools.map do |_name, tool|
-                format_tool_schema(tool, provider)
-              end
-            end
-
-            # Format a tool into OpenAI-compatible schema
-            # @param tool [Object] the tool object
-            # @param provider [Object, nil] the provider instance
-            # @return [Hash] tool schema
-            def self.format_tool_schema(tool, provider)
-              tool_schema = nil
-
-              # Use provider-specific tool_for method if available
-              if provider
-                begin
-                  tool_schema = if provider.is_a?(::RubyLLM::Providers::OpenAI)
-                    ::RubyLLM::Providers::OpenAI::Tools.tool_for(tool)
-                  elsif defined?(::RubyLLM::Providers::Anthropic) && provider.is_a?(::RubyLLM::Providers::Anthropic)
-                    ::RubyLLM::Providers::Anthropic::Tools.tool_for(tool)
-                  elsif tool.respond_to?(:params_schema) && tool.params_schema
-                    build_basic_tool_schema(tool)
-                  else
-                    build_minimal_tool_schema(tool)
-                  end
-                rescue NameError, ArgumentError => e
-                  # If provider-specific tool_for fails, fall back to basic format
-                  Log.debug("Failed to extract tool schema using provider-specific method: #{e.class.name}: #{e.message}")
-                  tool_schema = (tool.respond_to?(:params_schema) && tool.params_schema) ? build_basic_tool_schema(tool) : build_minimal_tool_schema(tool)
-                end
-              else
-                # No provider, use basic format with params_schema if available
-                tool_schema = (tool.respond_to?(:params_schema) && tool.params_schema) ? build_basic_tool_schema(tool) : build_minimal_tool_schema(tool)
-              end
-
-              # Strip RubyLLM-specific fields to match native OpenAI format
-              # Handle both symbol and string keys
-              function_key = tool_schema&.key?(:function) ? :function : "function"
-              if tool_schema && tool_schema[function_key]
-                tool_params = tool_schema[function_key][:parameters] || tool_schema[function_key]["parameters"]
-                if tool_params.is_a?(Hash)
-                  # Create a mutable copy if the hash is frozen
-                  tool_params = tool_params.dup if tool_params.frozen?
-                  tool_params.delete("strict")
-                  tool_params.delete(:strict)
-                  tool_params.delete("additionalProperties")
-                  tool_params.delete(:additionalProperties)
-                  # Assign the modified copy back
-                  params_key = tool_schema[function_key].key?(:parameters) ? :parameters : "parameters"
-                  tool_schema[function_key][params_key] = tool_params
-                end
-              end
-
-              tool_schema
-            end
-
-            # Build a basic tool schema with parameters
-            # @param tool [Object] the tool object
-            # @return [Hash] tool schema
-            def self.build_basic_tool_schema(tool)
-              {
-                "type" => "function",
-                "function" => {
-                  "name" => tool.name.to_s,
-                  "description" => tool.description,
-                  "parameters" => tool.params_schema
-                }
-              }
-            end
-
-            # Build a minimal tool schema without parameters
-            # @param tool [Object] the tool object
-            # @return [Hash] tool schema
-            def self.build_minimal_tool_schema(tool)
-              {
-                "type" => "function",
-                "function" => {
-                  "name" => tool.name.to_s,
-                  "description" => tool.description,
-                  "parameters" => {}
-                }
-              }
-            end
-
-            # Build input messages array from chat history and prompt
-            # Formats messages to match OpenAI's message format
-            # @param chat [RubyLLM::Chat] the chat instance
-            # @param prompt [String, nil] the user prompt
-            # @return [Array<Hash>] array of message hashes
-            def self.build_input_messages(chat, prompt)
-              input_messages = []
-
-              # Add conversation history, formatting each message to OpenAI format
-              if chat.respond_to?(:messages) && chat.messages&.any?
-                input_messages = chat.messages.map { |m| format_message_for_input(m) }
-              end
-
-              # Add current prompt
-              input_messages << {"role" => "user", "content" => prompt} if prompt
-
-              input_messages
-            end
-
-            # Format a RubyLLM message to OpenAI-compatible format
-            # @param msg [Object] the RubyLLM message
-            # @return [Hash] OpenAI-formatted message
-            def self.format_message_for_input(msg)
-              formatted = {
-                "role" => msg.role.to_s
-              }
-
-              # Handle content
-              if msg.respond_to?(:content) && msg.content
-                raw_content = msg.content
-
-                # Check if content is a Content object with attachments (issue #71)
-                formatted["content"] = if raw_content.respond_to?(:text) && raw_content.respond_to?(:attachments) && raw_content.attachments&.any?
-                  format_multipart_content(raw_content)
-                else
-                  format_simple_content(raw_content, msg.role.to_s)
-                end
-              end
-
-              # Handle tool_calls for assistant messages
-              if msg.respond_to?(:tool_calls) && msg.tool_calls&.any?
-                formatted["tool_calls"] = format_tool_calls(msg.tool_calls)
-                formatted["content"] = nil
-              end
-
-              # Handle tool_call_id for tool result messages
-              if msg.respond_to?(:tool_call_id) && msg.tool_call_id
-                formatted["tool_call_id"] = msg.tool_call_id
-              end
-
-              formatted
-            end
-
-            # Format multipart content with text and attachments
-            # @param content_obj [Object] Content object with text and attachments
-            # @return [Array<Hash>] array of content parts
-            def self.format_multipart_content(content_obj)
-              content_parts = []
-
-              # Add text part
-              content_parts << {"type" => "text", "text" => content_obj.text} if content_obj.text
-
-              # Add attachment parts (convert to Braintrust format)
-              content_obj.attachments.each do |attachment|
-                content_parts << format_attachment_for_input(attachment)
-              end
-
-              content_parts
-            end
-
-            # Format simple text content
-            # @param raw_content [Object] String or Content object with text
-            # @param role [String] the message role
-            # @return [String] formatted text content
-            def self.format_simple_content(raw_content, role)
-              content = raw_content
-              content = content.text if content.respond_to?(:text)
-
-              # Convert Ruby hash string to JSON for tool results
-              if role == "tool" && content.is_a?(String) && content.start_with?("{:")
-                begin
-                  content = content.gsub(/(?<=\{|, ):(\w+)=>/, '"\1":').gsub("=>", ":")
-                rescue
-                  # Keep original if conversion fails
-                end
-              end
-
-              content
-            end
-
-            # Format a RubyLLM attachment to OpenAI-compatible format
-            # @param attachment [Object] the RubyLLM attachment
-            # @return [Hash] OpenAI image_url format for consistency with other integrations
-            def self.format_attachment_for_input(attachment)
-              # RubyLLM Attachment has: source (Pathname), filename, mime_type
-              if attachment.respond_to?(:source) && attachment.source
-                begin
-                  data = File.binread(attachment.source.to_s)
-                  encoded = Internal::Encoding::Base64.strict_encode64(data)
-                  mime_type = attachment.respond_to?(:mime_type) ? attachment.mime_type : "application/octet-stream"
-
-                  # Use OpenAI's image_url format for consistency
-                  {
-                    "type" => "image_url",
-                    "image_url" => {
-                      "url" => "data:#{mime_type};base64,#{encoded}"
-                    }
-                  }
-                rescue => e
-                  Log.debug("Failed to read attachment file: #{e.message}")
-                  # Return a placeholder if we can't read the file
-                  {"type" => "text", "text" => "[attachment: #{attachment.respond_to?(:filename) ? attachment.filename : "unknown"}]"}
-                end
-              elsif attachment.respond_to?(:to_h)
-                # Try to use attachment's own serialization
-                attachment.to_h
-              else
-                {"type" => "text", "text" => "[attachment]"}
-              end
-            end
-
-            # Capture streaming output and metrics
-            # @param span [OpenTelemetry::Trace::Span] the span
-            # @param aggregated_chunks [Array] the aggregated chunks
-            # @param result [Object] the result object
-            def self.capture_streaming_output(span, aggregated_chunks, result)
-              return if aggregated_chunks.empty?
-
-              # Aggregate content from chunks
-              # Extract text from Content objects if present (issue #71)
-              aggregated_content = aggregated_chunks.map { |c|
-                content = c.respond_to?(:content) ? c.content : c.to_s
-                content = content.text if content.respond_to?(:text)
-                content
-              }.join
-
-              output = [{
-                role: "assistant",
-                content: aggregated_content
-              }]
-              set_json_attr(span, "braintrust.output_json", output)
-
-              # Try to extract usage from the result
-              if result.respond_to?(:usage) && result.usage
-                metrics = parse_usage_tokens(result.usage)
-                set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
-              end
-            end
-
-            # Capture non-streaming output and metrics
-            # @param span [OpenTelemetry::Trace::Span] the span
-            # @param chat [RubyLLM::Chat] the chat instance
-            # @param response [Object] the response object
-            # @param messages_before_count [Integer] message count before the call
-            def self.capture_non_streaming_output(span, chat, response, messages_before_count)
-              return unless response
-
-              # Build message object from response
-              message = {
-                "role" => "assistant",
-                "content" => nil
-              }
-
-              # Add content if it's a simple text response
-              # Extract text from Content objects if present (issue #71)
-              if response.respond_to?(:content) && response.content && !response.content.empty?
-                content = response.content
-                content = content.text if content.respond_to?(:text)
-                message["content"] = content
-              end
-
-              # Check if there are tool calls in the messages history
-              # Look at messages added during this complete() call
-              if chat.respond_to?(:messages) && chat.messages
-                assistant_msg = chat.messages[messages_before_count..].find { |m|
-                  m.role.to_s == "assistant" && m.respond_to?(:tool_calls) && m.tool_calls&.any?
-                }
-
-                if assistant_msg&.tool_calls&.any?
-                  message["tool_calls"] = format_tool_calls(assistant_msg.tool_calls)
-                  message["content"] = nil
-                end
-              end
-
-              # Format as OpenAI choices[] structure
-              output = [{
-                "index" => 0,
-                "message" => message,
-                "finish_reason" => message["tool_calls"] ? "tool_calls" : "stop"
-              }]
-
-              set_json_attr(span, "braintrust.output_json", output)
-
-              # Set metrics (token usage)
-              if response.respond_to?(:to_h)
-                response_hash = response.to_h
-                usage = {
-                  "input_tokens" => response_hash[:input_tokens],
-                  "output_tokens" => response_hash[:output_tokens],
-                  "cached_tokens" => response_hash[:cached_tokens],
-                  "cache_creation_tokens" => response_hash[:cache_creation_tokens]
-                }.compact
-
-                unless usage.empty?
-                  metrics = parse_usage_tokens(usage)
-                  set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
-                end
-              end
-            end
-
-            # Format tool calls into OpenAI format
-            # @param tool_calls [Hash, Array] the tool calls
-            # @return [Array<Hash>] formatted tool calls
-            def self.format_tool_calls(tool_calls)
-              tool_calls.map do |_id, tc|
-                # Ensure arguments is a JSON string (OpenAI format)
-                args = tc.arguments
-                args_string = args.is_a?(String) ? args : JSON.generate(args)
-
-                {
-                  "id" => tc.id,
-                  "type" => "function",
-                  "function" => {
-                    "name" => tc.name,
-                    "arguments" => args_string
-                  }
-                }
-              end
-            end
-          end
-        end
-      end
-    end
-  end
-end
diff --git a/test/braintrust/contrib/ruby_llm/deprecated_test.rb b/test/braintrust/contrib/ruby_llm/deprecated_test.rb
new file mode 100644
index 0000000..0abf3af
--- /dev/null
+++ b/test/braintrust/contrib/ruby_llm/deprecated_test.rb
@@ -0,0 +1,134 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require "braintrust/contrib/ruby_llm/deprecated"
+
+class Braintrust::Contrib::RubyLLM::DeprecatedTest < Minitest::Test
+  # --- .wrap ---
+
+  def test_wrap_delegates_to_instrument
+    mock_chat = Object.new
+    mock_tracer = Object.new
+
+    captured_args = nil
+    Braintrust.stub(:instrument!, ->(name, **opts) { captured_args = [name, opts] }) do
+      suppress_logs { Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(mock_chat, tracer_provider: mock_tracer) }
+    end
+
+    assert_equal :ruby_llm, captured_args[0]
+    assert_same mock_chat, captured_args[1][:target]
+    assert_same mock_tracer, captured_args[1][:tracer_provider]
+  end
+
+  def test_wrap_with_nil_target_instruments_class
+    captured_args = nil
+    Braintrust.stub(:instrument!, ->(name, **opts) { captured_args = [name, opts] }) do
+      suppress_logs { Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap }
+    end
+
+    assert_equal :ruby_llm, captured_args[0]
+    assert_nil captured_args[1][:target]
+  end
+
+  def test_wrap_logs_deprecation_warning
+    mock_chat = Object.new
+
+    warning_message = nil
+    Braintrust.stub(:instrument!, ->(*) {}) do
+      Braintrust::Log.stub(:warn, ->(msg) { warning_message = msg }) do
+        Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(mock_chat)
+      end
+    end
+
+    assert_match(/deprecated/, warning_message)
+    assert_match(/Braintrust\.instrument!/, warning_message)
+  end
+
+  def test_wrap_returns_chat
+    mock_chat = Object.new
+
+    Braintrust.stub(:instrument!, ->(*) {}) do
+      result = suppress_logs { Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(mock_chat) }
+      assert_same mock_chat, result
+    end
+  end
+
+  # --- .unwrap ---
+
+  def test_unwrap_sets_context_enabled_false
+    skip "RubyLLM gem not available" unless defined?(::RubyLLM::Chat)
+
+    mock_chat = Object.new
+
+    suppress_logs { Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.unwrap(mock_chat) }
+
+    ctx = Braintrust::Contrib::Context.from(mock_chat)
+    assert_equal false, ctx[:enabled]
+  end
+
+  def test_unwrap_with_nil_target_sets_class_context
+    skip "RubyLLM gem not available" unless defined?(::RubyLLM::Chat)
+
+    suppress_logs { Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.unwrap }
+
+    ctx = Braintrust::Contrib::Context.from(::RubyLLM::Chat)
+    assert_equal false, ctx[:enabled]
+
+    # Clean up
+    Braintrust::Contrib::Context.set!(::RubyLLM::Chat, enabled: true)
+  end
+
+  def test_unwrap_logs_deprecation_warning
+    skip "RubyLLM gem not available" unless defined?(::RubyLLM::Chat)
+
+    mock_chat = Object.new
+
+    warning_message = nil
+    Braintrust::Log.stub(:warn, ->(msg) { warning_message = msg }) do
+      Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.unwrap(mock_chat)
+    end
+
+    assert_match(/deprecated/, warning_message)
+  end
+
+  def test_unwrap_returns_chat
+    skip "RubyLLM gem not available" unless defined?(::RubyLLM::Chat)
+
+    mock_chat = Object.new
+
+    result = suppress_logs { Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.unwrap(mock_chat) }
+    assert_same mock_chat, result
+  end
+
+  # --- E2E: unwrap disables instrumentation ---
+
+  def test_unwrap_disables_instrumentation_for_subsequent_requests
+    skip "RubyLLM gem not available" unless defined?(::RubyLLM::Chat)
+
+    VCR.use_cassette("contrib/ruby_llm/basic_chat") do
+      rig = setup_otel_test_rig
+
+      RubyLLM.configure do |config|
+        config.openai_api_key = get_openai_key
+      end
+
+      chat = RubyLLM.chat(model: "gpt-4o-mini")
+
+      # Instrument the chat instance
+      Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: rig.tracer_provider)
+
+      # First request should create a span
+      chat.ask("Say 'test'")
+      spans_before = rig.drain
+      assert_equal 1, spans_before.length, "Expected 1 span before unwrap"
+
+      # Now unwrap the chat instance
+      suppress_logs { Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.unwrap(chat) }
+
+      # Second request should NOT create a span
+      chat.ask("Say 'test'")
+      spans_after = rig.drain
+      assert_equal 0, spans_after.length, "Expected 0 spans after unwrap"
+    end
+  end
+end
diff --git a/test/braintrust/trace/ruby_llm_test.rb b/test/braintrust/contrib/ruby_llm/instrumentation/chat_test.rb
similarity index 59%
rename from test/braintrust/trace/ruby_llm_test.rb
rename to test/braintrust/contrib/ruby_llm/instrumentation/chat_test.rb
index f5dc38a..c0d1f5a 100644
--- a/test/braintrust/trace/ruby_llm_test.rb
+++ b/test/braintrust/contrib/ruby_llm/instrumentation/chat_test.rb
@@ -1,17 +1,65 @@
 # frozen_string_literal: true
 
 require "test_helper"
+require_relative "../integration_helper"
+require "braintrust/contrib/ruby_llm/instrumentation/chat"
+
+class Braintrust::Contrib::RubyLLM::Instrumentation::ChatTest < Minitest::Test
+  Chat = Braintrust::Contrib::RubyLLM::Instrumentation::Chat
+
+  # --- .included ---
+
+  def test_included_prepends_instance_methods
+    base = Class.new
+    mock = Minitest::Mock.new
+    mock.expect(:include?, false, [Chat::InstanceMethods])
+    mock.expect(:prepend, nil, [Chat::InstanceMethods])
+
+    base.define_singleton_method(:ancestors) { mock }
+    base.define_singleton_method(:prepend) { |mod| mock.prepend(mod) }
+
+    Chat.included(base)
+
+    mock.verify
+  end
+
+  def test_included_skips_prepend_when_already_applied
+    base = Class.new do
+      include Braintrust::Contrib::RubyLLM::Instrumentation::Chat
+    end
+
+    # Should not raise or double-prepend
+    Chat.included(base)
+
+    # InstanceMethods should appear only once in ancestors
+    count = base.ancestors.count { |a| a == Chat::InstanceMethods }
+    assert_equal 1, count
+  end
+
+  # --- .applied? ---
+
+  def test_applied_returns_false_when_not_included
+    base = Class.new
+
+    refute Chat.applied?(base)
+  end
+
+  def test_applied_returns_true_when_included
+    base = Class.new do
+      include Braintrust::Contrib::RubyLLM::Instrumentation::Chat
+    end
+
+    assert Chat.applied?(base)
+  end
+end
 
 # Define test tool classes for different ruby_llm versions
-# Must be defined at module level for RubyLLM to find them properly
 if defined?(RubyLLM)
-  # Check ruby_llm version to determine which API to use
   RUBY_LLM_VERSION = Gem.loaded_specs["ruby_llm"]&.version || Gem::Version.new("0.0.0")
   SUPPORTS_PARAMS_DSL = Gem::Version.new("1.9.0") <= RUBY_LLM_VERSION
 
-  # Tool for ruby_llm 1.8 (using param - singular)
-  # This syntax works in 1.8+ including 1.9+ for backward compatibility
-  class WeatherTestToolV18 < RubyLLM::Tool
+  # Tool for ruby_llm 1.8+ (using param - singular)
+  class WeatherTestTool < RubyLLM::Tool
     description "Get the current weather for a location"
 
     param :location, type: :string, desc: "The city and state, e.g. San Francisco, CA"
@@ -21,59 +69,37 @@ def execute(location:, unit: "fahrenheit")
       {location: location, temperature: 72, unit: unit, conditions: "sunny"}
     end
   end
-
-  # Tool for ruby_llm 1.9+ (using params - plural)
-  # This new DSL only works in 1.9+
-  if SUPPORTS_PARAMS_DSL
-    class WeatherTestToolV19 < RubyLLM::Tool
-      description "Get the current weather for a location"
-
-      params do
-        string :location, description: "The city and state, e.g. San Francisco, CA"
-        string :unit, description: "Temperature unit (celsius or fahrenheit)"
-      end
-
-      def execute(location:, unit: "fahrenheit")
-        {location: location, temperature: 72, unit: unit, conditions: "sunny"}
-      end
-    end
-  end
 end
 
-class RubyLLMIntegrationTest < Minitest::Test
+# E2E tests for Chat instrumentation
+class Braintrust::Contrib::RubyLLM::Instrumentation::ChatE2ETest < Minitest::Test
+  include Braintrust::Contrib::RubyLLM::IntegrationHelper
+
   def setup
-    # Skip all RubyLLM tests if the gem is not available
-    skip "RubyLLM gem not available" unless defined?(RubyLLM)
+    skip_unless_ruby_llm!
   end
 
-  def test_wrap_creates_span_for_basic_chat
-    VCR.use_cassette("ruby_llm/basic_chat") do
-      require "ruby_llm"
+  # --- #complete (non-streaming) ---
 
-      # Set up test rig (includes Braintrust processor)
+  def test_complete_creates_span_with_correct_attributes
+    VCR.use_cassette("contrib/ruby_llm/basic_chat") do
       rig = setup_otel_test_rig
 
-      # Configure RubyLLM (use real key for recording, fake key for playback)
       RubyLLM.configure do |config|
         config.openai_api_key = get_openai_key
       end
 
-      # Create chat instance and wrap it with Braintrust tracing
       chat = RubyLLM.chat(model: "gpt-4o-mini")
-      Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(chat, tracer_provider: rig.tracer_provider)
+      Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: rig.tracer_provider)
 
-      # Make a simple chat request
       response = chat.ask("Say 'test'")
 
-      # Verify response
       refute_nil response
       refute_nil response.content
       assert response.content.length > 0
 
-      # Drain and verify span
       span = rig.drain_one
 
-      # Verify span name
       assert_equal "ruby_llm.chat", span.name
 
       # Verify braintrust.input_json contains messages
@@ -99,38 +125,35 @@ def test_wrap_creates_span_for_basic_chat
       assert metrics["prompt_tokens"] > 0
       assert metrics["completion_tokens"] > 0
       assert metrics["tokens"] > 0
+
+      # Verify time_to_first_token is present and non-negative
+      assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric"
+      assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be non-negative"
     end
   end
 
-  def test_wrap_creates_span_for_streaming_chat
-    VCR.use_cassette("ruby_llm/streaming_chat") do
-      require "ruby_llm"
+  # --- #complete (streaming) ---
 
-      # Set up test rig
+  def test_streaming_complete_creates_span
+    VCR.use_cassette("contrib/ruby_llm/streaming_chat") do
       rig = setup_otel_test_rig
 
-      # Configure RubyLLM (use real key for recording, fake key for playback)
       RubyLLM.configure do |config|
         config.openai_api_key = get_openai_key
       end
 
-      # Create chat instance and wrap it
       chat = RubyLLM.chat(model: "gpt-4o-mini")
-      Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(chat, tracer_provider: rig.tracer_provider)
+      Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: rig.tracer_provider)
 
-      # Make a streaming chat request
       chunks = []
       chat.ask("Count to 3") do |chunk|
         chunks << chunk
       end
 
-      # Verify chunks were received
       refute_empty chunks
 
-      # Drain and verify span
       span = rig.drain_one
 
-      # Verify span name (same for streaming and non-streaming)
       assert_equal "ruby_llm.chat", span.name
 
       # Verify input
@@ -141,64 +164,76 @@ def test_wrap_creates_span_for_streaming_chat
       output = JSON.parse(span.attributes["braintrust.output_json"])
       refute_nil output
 
-      # Verify metadata
+      # Verify metadata has stream flag
       assert span.attributes.key?("braintrust.metadata")
       metadata = JSON.parse(span.attributes["braintrust.metadata"])
       assert_equal "ruby_llm", metadata["provider"]
       assert_equal true, metadata["stream"]
+
+      # Verify metrics are present
+      assert span.attributes.key?("braintrust.metrics"), "Should have metrics attribute"
+      metrics = JSON.parse(span.attributes["braintrust.metrics"])
+
+      # Verify time_to_first_token
+      assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric"
+      assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be non-negative"
+
+      # Verify token usage metrics
+      assert metrics["prompt_tokens"] > 0, "Should have prompt_tokens > 0"
+      assert metrics["completion_tokens"] > 0, "Should have completion_tokens > 0"
+      assert metrics["tokens"] > 0, "Should have total tokens > 0"
     end
   end
 
-  def test_wrap_creates_span_for_tool_calling
-    skip "Tool calling test requires ruby_llm >= 1.9" unless Gem::Version.new("1.9.0") <= RUBY_LLM_VERSION
+  # --- Tool calling ---
 
-    VCR.use_cassette("ruby_llm/tool_calling") do
-      require "ruby_llm"
+  def test_tool_calling_creates_nested_spans
+    skip "Tool calling test requires ruby_llm >= 1.9" unless defined?(SUPPORTS_PARAMS_DSL) && SUPPORTS_PARAMS_DSL
 
-      # Set up test rig
+    VCR.use_cassette("contrib/ruby_llm/tool_calling") do
       rig = setup_otel_test_rig
 
-      # Configure RubyLLM
       RubyLLM.configure do |config|
         config.openai_api_key = get_openai_key
       end
 
-      # Create chat instance and wrap it
       chat = RubyLLM.chat(model: "gpt-4o-mini")
-      Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(chat, tracer_provider: rig.tracer_provider)
-      chat.with_tool(WeatherTestToolV19)
+      Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: rig.tracer_provider)
+      chat.with_tool(WeatherTestTool)
 
-      # Make a chat request that should trigger tool usage
       response = chat.ask("What's the weather like in San Francisco?")
 
-      # Verify response
       refute_nil response
       refute_nil response.content
 
-      # Tool calling creates sibling spans:
-      # 1. ruby_llm.chat - the LLM conversation span
-      # 2. ruby_llm.tool.* - tool execution span(s)
+      # Should have multiple spans: chat + tool + possibly another chat for follow-up
       spans = rig.drain
       assert spans.length >= 2, "Expected at least 2 spans for tool calling (chat + tool)"
 
       # Find the chat span and tool span
-      chat_span = spans.find { |s| s.name == "ruby_llm.chat" }
-      tool_span = spans.find { |s| s.name.start_with?("ruby_llm.tool.") }
+      chat_spans = spans.select { |s| s.name == "ruby_llm.chat" }
+      tool_spans = spans.select { |s| s.name.start_with?("ruby_llm.tool.") }
+
+      assert chat_spans.length >= 1, "Expected at least one ruby_llm.chat span"
+      assert tool_spans.length >= 1, "Expected at least one ruby_llm.tool.* span"
 
-      refute_nil chat_span, "Expected a ruby_llm.chat span"
-      refute_nil tool_span, "Expected a ruby_llm.tool.* span"
+      # Verify tool span has correct attributes
+      tool_span = tool_spans.first
+      assert tool_span.attributes.key?("braintrust.span_attributes"), "Tool span should have span_attributes"
+      span_attrs = JSON.parse(tool_span.attributes["braintrust.span_attributes"])
+      assert_equal "tool", span_attrs["type"]
+      assert tool_span.attributes.key?("braintrust.input_json"), "Tool span should have input"
+      assert tool_span.attributes.key?("braintrust.output_json"), "Tool span should have output"
 
-      # Verify chat span has metadata with tools
-      assert chat_span.attributes.key?("braintrust.metadata")
+      # Verify chat span metadata contains exact tool schema (OpenAI format)
+      chat_span = chat_spans.first
       metadata = JSON.parse(chat_span.attributes["braintrust.metadata"])
-      assert_equal "ruby_llm", metadata["provider"]
 
-      # Assert exact tool schema (OpenAI format with RubyLLM-specific fields stripped)
       expected_tools = [
         {
           "type" => "function",
           "function" => {
-            "name" => "weather_test_tool_v19",
+            "name" => "weather_test",
             "description" => "Get the current weather for a location",
             "parameters" => {
               "type" => "object",
@@ -218,53 +253,32 @@ def test_wrap_creates_span_for_tool_calling
         }
       ]
       assert_equal expected_tools, metadata["tools"]
-
-      # Verify chat span has output
-      assert chat_span.attributes.key?("braintrust.output_json")
-      output = JSON.parse(chat_span.attributes["braintrust.output_json"])
-      refute_nil output
-
-      # Verify tool span has correct attributes
-      assert tool_span.attributes.key?("braintrust.span_attributes"), "Tool span should have span_attributes"
-      span_attrs = JSON.parse(tool_span.attributes["braintrust.span_attributes"])
-      assert_equal "tool", span_attrs["type"]
-      assert tool_span.attributes.key?("braintrust.input_json"), "Tool span should have input"
-      assert tool_span.attributes.key?("braintrust.output_json"), "Tool span should have output"
     end
   end
 
-  # Test for GitHub issue #39: ActiveRecord integration calls complete() directly
-  # This simulates how acts_as_chat uses RubyLLM - it adds messages to chat history
-  # and then calls complete() directly, bypassing ask()
-  def test_wrap_creates_span_for_direct_complete
-    VCR.use_cassette("ruby_llm/direct_complete") do
-      require "ruby_llm"
+  # --- Direct complete() ---
 
-      # Set up test rig
+  def test_direct_complete_creates_span
+    VCR.use_cassette("contrib/ruby_llm/direct_complete") do
       rig = setup_otel_test_rig
 
-      # Configure RubyLLM
       RubyLLM.configure do |config|
         config.openai_api_key = get_openai_key
       end
 
-      # Create chat instance and wrap it
       chat = RubyLLM.chat(model: "gpt-4o-mini")
-      Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(chat, tracer_provider: rig.tracer_provider)
+      Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: rig.tracer_provider)
 
       # Simulate ActiveRecord integration: add message directly, then call complete()
       chat.add_message(role: :user, content: "Say 'hello'")
       response = chat.complete
 
-      # Verify response
       refute_nil response
       refute_nil response.content
       assert response.content.length > 0
 
-      # Drain and verify span
       span = rig.drain_one
 
-      # Verify span name
       assert_equal "ruby_llm.chat", span.name
 
       # Verify braintrust.input_json contains the message we added
@@ -272,200 +286,87 @@ def test_wrap_creates_span_for_direct_complete
       input = JSON.parse(span.attributes["braintrust.input_json"])
       assert input.is_a?(Array)
       assert input.length > 0
-      # The user message should be in the input
       user_msg = input.find { |m| m["role"] == "user" }
       refute_nil user_msg
       assert_includes user_msg["content"], "hello"
-
-      # Verify braintrust.output_json contains response
-      assert span.attributes.key?("braintrust.output_json")
-      output = JSON.parse(span.attributes["braintrust.output_json"])
-      refute_nil output
-
-      # Verify braintrust.metadata contains provider and model info
-      assert span.attributes.key?("braintrust.metadata")
-      metadata = JSON.parse(span.attributes["braintrust.metadata"])
-      assert_equal "ruby_llm", metadata["provider"]
-      assert_equal "gpt-4o-mini", metadata["model"]
-
-      # Verify braintrust.metrics contains token usage
-      assert span.attributes.key?("braintrust.metrics")
-      metrics = JSON.parse(span.attributes["braintrust.metrics"])
-      assert metrics["prompt_tokens"] > 0
-      assert metrics["completion_tokens"] > 0
-      assert metrics["tokens"] > 0
     end
   end
 
-  def test_wrap_is_idempotent
-    VCR.use_cassette("ruby_llm/basic_chat") do
-      require "ruby_llm"
+  # --- Idempotency ---
 
-      # Set up test rig
+  def test_wrapping_is_idempotent
+    VCR.use_cassette("contrib/ruby_llm/basic_chat") do
       rig = setup_otel_test_rig
 
-      # Configure RubyLLM
       RubyLLM.configure do |config|
         config.openai_api_key = get_openai_key
       end
 
-      # Create chat instance
       chat = RubyLLM.chat(model: "gpt-4o-mini")
 
-      # Wrap it twice - should not cause issues
-      Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(chat, tracer_provider: rig.tracer_provider)
-      Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(chat, tracer_provider: rig.tracer_provider)
+      # Wrap twice - should not cause issues
+      Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: rig.tracer_provider)
+      Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: rig.tracer_provider)
 
-      # Make a chat request
       response = chat.ask("Say 'test'")
 
-      # Verify response
       refute_nil response
 
       # Drain and verify - should only have ONE span (not double-wrapped)
       span = rig.drain_one
 
-      # Verify span name
       assert_equal "ruby_llm.chat", span.name
-
-      # Verify the wrapped flag is set
-      assert chat.instance_variable_get(:@braintrust_wrapped)
     end
   end
 
-  def test_wrap_module_creates_spans_for_all_instances
-    VCR.use_cassette("ruby_llm/basic_chat") do
-      require "ruby_llm"
+  # --- Class-level instrumentation ---
 
-      # Set up test rig
+  def test_class_level_instrumentation_traces_new_instances
+    VCR.use_cassette("contrib/ruby_llm/basic_chat") do
       rig = setup_otel_test_rig
 
-      # Configure RubyLLM
       RubyLLM.configure do |config|
         config.openai_api_key = get_openai_key
       end
 
-      # Wrap the module (not an instance)
-      Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.wrap(tracer_provider: rig.tracer_provider)
-
-      # Verify the wrapper module is stored
-      assert ::RubyLLM::Chat.instance_variable_defined?(:@braintrust_wrapper_module)
-
-      # Create multiple chat instances AFTER wrapping - they should all be traced
-      chat1 = RubyLLM.chat(model: "gpt-4o-mini")
-      chat2 = RubyLLM.chat(model: "gpt-4o-mini")
-      chat3 = RubyLLM.chat(model: "gpt-4o-mini")
+      # Set the default tracer provider for class-level instrumentation
+      Braintrust::Contrib.init(tracer_provider: rig.tracer_provider)
 
-      # Make chat requests with each instance
-      response1 = chat1.ask("Say 'test'")
-      response2 = chat2.ask("Say 'test'")
-      response3 = chat3.ask("Say 'test'")
+      # Instrument at the class level (no target)
+      Braintrust.instrument!(:ruby_llm)
 
-      # Verify responses
-      refute_nil response1
-      refute_nil response2
-      refute_nil response3
-
-      # Drain and verify we got 3 spans (one per request)
-      spans = rig.drain
-      assert_equal 3, spans.length, "Expected 3 spans (one per request)"
-
-      # Verify all spans have the correct name and attributes
-      spans.each do |span|
-        assert_equal "ruby_llm.chat", span.name
-        assert span.attributes.key?("braintrust.input_json")
-        assert span.attributes.key?("braintrust.output_json")
-        assert span.attributes.key?("braintrust.metadata")
-        assert span.attributes.key?("braintrust.metrics")
-      end
-
-      # Verify exporter is empty after drain
-      assert_equal 0, rig.drain.length, "Exporter should be empty after drain"
-
-      # Now unwrap the module
-      Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.unwrap
-
-      # Verify the wrapper module reference is removed
-      refute ::RubyLLM::Chat.instance_variable_defined?(:@braintrust_wrapper_module)
+      # Create a NEW chat instance AFTER class-level instrumentation
+      # This instance should automatically be traced without explicit instrumentation
+      chat = RubyLLM.chat(model: "gpt-4o-mini")
 
-      # Create new chat instances AFTER unwrapping - they should NOT be traced
-      chat4 = RubyLLM.chat(model: "gpt-4o-mini")
-      chat5 = RubyLLM.chat(model: "gpt-4o-mini")
+      # Make a request - should be traced automatically
+      response = chat.ask("Say 'test'")
 
-      # Verify they are NOT wrapped
-      refute chat4.instance_variable_defined?(:@braintrust_wrapped), "chat4 should not be wrapped"
-      refute chat5.instance_variable_defined?(:@braintrust_wrapped), "chat5 should not be wrapped"
+      refute_nil response
+      refute_nil response.content
 
-      # Make chat requests with the unwrapped instances
-      response4 = chat4.ask("Say 'test'")
-      response5 = chat5.ask("Say 'test'")
+      # Verify span was created (proving class-level instrumentation works)
+      span = rig.drain_one
 
-      # Verify responses still work (RubyLLM still functions)
-      refute_nil response4
-      refute_nil response5
+      assert_equal "ruby_llm.chat", span.name
+      assert span.attributes.key?("braintrust.input_json")
+      assert span.attributes.key?("braintrust.output_json")
+      assert span.attributes.key?("braintrust.metadata")
 
-      # Verify NO new spans were created (unwrap disabled tracing)
-      spans_after_unwrap = rig.drain
-      assert_equal 0, spans_after_unwrap.length, "Expected 0 spans after unwrap"
+      metadata = JSON.parse(span.attributes["braintrust.metadata"])
+      assert_equal "ruby_llm", metadata["provider"]
+      assert_equal "gpt-4o-mini", metadata["model"]
     end
   end
 
-  # Test for frozen Hash bug in format_tool_schema
-  # This reproduces the issue where tool_params from provider's tool_for method
-  # returns a frozen hash, causing FrozenError when trying to delete keys
-  def test_format_tool_schema_handles_frozen_hash
-    # Create a mock tool object
-    mock_tool = Object.new
-    def mock_tool.name
-      "test_tool"
-    end
-
-    def mock_tool.description
-      "A test tool"
-    end
-
-    def mock_tool.params_schema
-      {
-        "type" => "object",
-        "properties" => {},
-        "required" => [],
-        "additionalProperties" => false,
-        "strict" => true
-      }.freeze
-    end
-
-    def mock_tool.respond_to?(method)
-      [:name, :description, :params_schema].include?(method)
-    end
-
-    # Test with basic tool schema (no provider)
-    # This will use build_basic_tool_schema which creates a tool schema
-    # with the frozen params_schema from the tool
-    result = Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.format_tool_schema(mock_tool, nil)
-
-    # Verify the result is returned (not raising FrozenError)
-    refute_nil result
-    assert_equal "function", result["type"]
-    assert_equal "test_tool", result["function"]["name"]
-    assert_equal "A test tool", result["function"]["description"]
-
-    # Verify the parameters don't contain RubyLLM-specific fields
-    params = result["function"]["parameters"]
-    refute params.key?("strict"), "strict should be removed"
-    refute params.key?(:strict), "strict (symbol) should be removed"
-    refute params.key?("additionalProperties"), "additionalProperties should be removed"
-    refute params.key?(:additionalProperties), "additionalProperties (symbol) should be removed"
-
-    # Verify the expected fields are still present
-    assert_equal "object", params["type"]
-    assert_equal({}, params["properties"])
-    assert_equal [], params["required"]
-  end
+  # --- Attachment handling (GitHub issue #71) ---
 
   # Test for GitHub issue #71: Content object not properly serialized
   # When a message has attachments, RubyLLM returns a Content object instead of a string
   # The SDK should include both text and attachments in the trace
   def test_format_message_for_input_handles_content_object_with_attachments
+    skip "RubyLLM gem not available" unless defined?(::RubyLLM)
+
     with_tmp_file(data: "This is test content") do |tmpfile|
       # Create a Content object with an attachment (this triggers the Content object return)
       content = ::RubyLLM::Content.new("Hello, this is the actual text content")
@@ -478,8 +379,14 @@ def test_format_message_for_input_handles_content_object_with_attachments
       assert msg.content.is_a?(::RubyLLM::Content),
         "Precondition failed: Expected Content object when message has attachments"
 
+      # Create a minimal chat-like object to test the helper method
+      chat_class = Class.new do
+        include Braintrust::Contrib::RubyLLM::Instrumentation::Chat
+      end
+      chat = chat_class.new
+
       # Call the method under test
-      result = Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.format_message_for_input(msg)
+      result = chat.send(:format_message_for_input, msg)
 
       # When attachments are present, content should be a multipart array
       assert_equal "user", result["role"]
@@ -506,6 +413,8 @@ def test_format_message_for_input_handles_content_object_with_attachments
   # Test for GitHub issue #71: Content object with image attachment
   # Verifies attachments are properly included in traces (similar to braintrust-go-sdk)
   def test_format_message_for_input_handles_image_attachment
+    skip "RubyLLM gem not available" unless defined?(::RubyLLM)
+
     with_png_file do |tmpfile|
       # Create a Content object with an image attachment
       content = ::RubyLLM::Content.new("What's in this image?")
@@ -520,8 +429,14 @@ def test_format_message_for_input_handles_image_attachment
       assert_equal 1, msg.content.attachments.count,
         "Expected one attachment"
 
+      # Create a minimal chat-like object to test the helper method
+      chat_class = Class.new do
+        include Braintrust::Contrib::RubyLLM::Instrumentation::Chat
+      end
+      chat = chat_class.new
+
       # Call the method under test
-      result = Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.format_message_for_input(msg)
+      result = chat.send(:format_message_for_input, msg)
 
       # Verify multipart content array is returned
       assert_equal "user", result["role"]
@@ -550,6 +465,8 @@ def test_format_message_for_input_handles_image_attachment
   # Test for GitHub issue #71: build_input_messages works with Content objects
   # Verifies the full flow works end-to-end with attachments
   def test_build_input_messages_handles_content_objects_with_attachments
+    skip "RubyLLM gem not available" unless defined?(::RubyLLM)
+
     with_png_file do |tmpfile|
       # Configure RubyLLM
       ::RubyLLM.configure do |config|
@@ -564,8 +481,11 @@ def test_build_input_messages_handles_content_objects_with_attachments
       content.add_attachment(tmpfile.path)
       chat.add_message(role: :user, content: content)
 
+      # Instrument the chat to get access to the helper method
+      Braintrust.instrument!(:ruby_llm, target: chat)
+
       # Call the method under test
-      result = Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.build_input_messages(chat, nil)
+      result = chat.send(:build_input_messages)
 
       # Verify the result
       assert_equal 1, result.length, "Expected one message"
@@ -592,4 +512,49 @@ def test_build_input_messages_handles_content_objects_with_attachments
         "Result should not contain Content object reference"
     end
   end
+
+  # --- Frozen hash handling ---
+
+  def test_format_tool_schema_handles_frozen_hash
+    skip "RubyLLM gem not available" unless defined?(::RubyLLM)
+
+    # Create a mock tool object with frozen params_schema
+    mock_tool = Object.new
+    def mock_tool.name
+      "test_tool"
+    end
+
+    def mock_tool.description
+      "A test tool"
+    end
+
+    def mock_tool.params_schema
+      {
+        "type" => "object",
+        "properties" => {},
+        "required" => [],
+        "additionalProperties" => false,
+        "strict" => true
+      }.freeze
+    end
+
+    # Create a minimal chat-like object to test the helper method
+    chat_class = Class.new do
+      include Braintrust::Contrib::RubyLLM::Instrumentation::Chat
+    end
+    chat = chat_class.new
+
+    # Test format_tool_schema (via send since it's private)
+    result = chat.send(:format_tool_schema, mock_tool, nil)
+
+    # Verify the result is returned (not raising FrozenError)
+    refute_nil result
+    assert_equal "function", result["type"]
+    assert_equal "test_tool", result["function"]["name"]
+
+    # Verify the parameters don't contain RubyLLM-specific fields
+    params = result["function"]["parameters"]
+    refute params.key?("strict"), "strict should be removed"
+    refute params.key?("additionalProperties"), "additionalProperties should be removed"
+  end
 end
diff --git a/test/braintrust/contrib/ruby_llm/instrumentation_test.rb b/test/braintrust/contrib/ruby_llm/instrumentation_test.rb
new file mode 100644
index 0000000..c0a93c5
--- /dev/null
+++ b/test/braintrust/contrib/ruby_llm/instrumentation_test.rb
@@ -0,0 +1,57 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require_relative "integration_helper"
+
+# Explicitly load the patcher (lazy-loaded by integration)
+require "braintrust/contrib/ruby_llm/patcher"
+
+# Tests for instance-level instrumentation behavior
+class Braintrust::Contrib::RubyLLM::InstrumentationTest < Minitest::Test
+  include Braintrust::Contrib::RubyLLM::IntegrationHelper
+
+  def setup
+    skip_unless_ruby_llm!
+  end
+
+  # --- Instance-level instrumentation ---
+
+  def test_instance_instrumentation_only_patches_target_chat
+    # Skip if class-level patching already occurred from another test
+    skip "class already patched by another test" if Braintrust::Contrib::RubyLLM::ChatPatcher.patched?
+
+    rig = setup_otel_test_rig
+
+    # Configure RubyLLM with a test key
+    RubyLLM.configure do |config|
+      config.openai_api_key = "test-api-key"
+    end
+
+    # Create two chat instances BEFORE any patching
+    chat_traced = RubyLLM::Chat.new(model: "gpt-4o-mini")
+    chat_untraced = RubyLLM::Chat.new(model: "gpt-4o-mini")
+
+    # Verify neither client is patched initially
+    refute Braintrust::Contrib::RubyLLM::ChatPatcher.patched?(target: chat_traced),
+      "chat_traced should not be patched initially"
+    refute Braintrust::Contrib::RubyLLM::ChatPatcher.patched?(target: chat_untraced),
+      "chat_untraced should not be patched initially"
+
+    # Verify class is not patched
+    refute Braintrust::Contrib::RubyLLM::ChatPatcher.patched?,
+      "class should not be patched initially"
+
+    # Instrument only one chat (instance-level)
+    Braintrust.instrument!(:ruby_llm, target: chat_traced, tracer_provider: rig.tracer_provider)
+
+    # Only the traced chat should be patched
+    assert Braintrust::Contrib::RubyLLM::ChatPatcher.patched?(target: chat_traced),
+      "chat_traced should be patched after instrument!"
+    refute Braintrust::Contrib::RubyLLM::ChatPatcher.patched?(target: chat_untraced),
+      "chat_untraced should NOT be patched after instrument!"
+
+    # Class itself should NOT be patched (only the instance)
+    refute Braintrust::Contrib::RubyLLM::ChatPatcher.patched?,
+      "class should NOT be patched when using instance-level instrumentation"
+  end
+end
diff --git a/test/braintrust/contrib/ruby_llm/integration_helper.rb b/test/braintrust/contrib/ruby_llm/integration_helper.rb
new file mode 100644
index 0000000..c2a9788
--- /dev/null
+++ b/test/braintrust/contrib/ruby_llm/integration_helper.rb
@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+
+# Test helpers for RubyLLM integration tests.
+# Provides gem loading helpers for the ruby_llm gem.
+
+module Braintrust
+  module Contrib
+    module RubyLLM
+      module IntegrationHelper
+        # Skip test unless ruby_llm gem is available.
+        # Loads the gem if available.
+        def skip_unless_ruby_llm!
+          unless Gem.loaded_specs["ruby_llm"]
+            skip "ruby_llm gem not available"
+          end
+
+          require "ruby_llm" unless defined?(::RubyLLM)
+        end
+
+        # Load ruby_llm gem if available (doesn't skip, for tests that handle both states).
+        def load_ruby_llm_if_available
+          if Gem.loaded_specs["ruby_llm"]
+            require "ruby_llm" unless defined?(::RubyLLM)
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/test/braintrust/contrib/ruby_llm/integration_test.rb b/test/braintrust/contrib/ruby_llm/integration_test.rb
new file mode 100644
index 0000000..b2e9296
--- /dev/null
+++ b/test/braintrust/contrib/ruby_llm/integration_test.rb
@@ -0,0 +1,58 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require_relative "integration_helper"
+
+class Braintrust::Contrib::RubyLLM::IntegrationTest < Minitest::Test
+  include Braintrust::Contrib::RubyLLM::IntegrationHelper
+
+  def setup
+    load_ruby_llm_if_available
+    @integration = Braintrust::Contrib::RubyLLM::Integration
+  end
+
+  # --- .integration_name ---
+
+  def test_integration_name
+    assert_equal :ruby_llm, @integration.integration_name
+  end
+
+  # --- .gem_names ---
+
+  def test_gem_names
+    assert_equal ["ruby_llm"], @integration.gem_names
+  end
+
+  # --- .require_paths ---
+
+  def test_require_paths
+    assert_equal ["ruby_llm"], @integration.require_paths
+  end
+
+  # --- .minimum_version ---
+
+  def test_minimum_version
+    assert_equal "1.8.0", @integration.minimum_version
+  end
+
+  # --- .loaded? ---
+
+  def test_loaded_returns_true_when_ruby_llm_chat_defined
+    skip "RubyLLM gem not available" unless defined?(::RubyLLM::Chat)
+
+    assert @integration.loaded?, "Should be loaded when ::RubyLLM::Chat is defined"
+  end
+
+  # --- .patchers ---
+
+  def test_patchers_returns_array_of_patcher_classes
+    patcher_classes = @integration.patchers
+
+    assert_instance_of Array, patcher_classes
+    assert patcher_classes.length > 0, "patchers should return at least one patcher"
+    patcher_classes.each do |patcher_class|
+      assert patcher_class.is_a?(Class), "each patcher should be a Class"
+      assert patcher_class < Braintrust::Contrib::Patcher, "each patcher should inherit from Patcher"
+    end
+  end
+end
diff --git a/test/braintrust/contrib/ruby_llm/patcher_test.rb b/test/braintrust/contrib/ruby_llm/patcher_test.rb
new file mode 100644
index 0000000..eaaca0a
--- /dev/null
+++ b/test/braintrust/contrib/ruby_llm/patcher_test.rb
@@ -0,0 +1,93 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require_relative "integration_helper"
+
+# Explicitly load the patcher (lazy-loaded by integration)
+require "braintrust/contrib/ruby_llm/patcher"
+
+class Braintrust::Contrib::RubyLLM::ChatPatcherTest < Minitest::Test
+  include Braintrust::Contrib::RubyLLM::IntegrationHelper
+
+  def setup
+    skip_unless_ruby_llm!
+  end
+
+  # --- .applicable? ---
+
+  def test_applicable_returns_true_when_ruby_llm_chat_defined
+    assert Braintrust::Contrib::RubyLLM::ChatPatcher.applicable?
+  end
+
+  # --- .patched? ---
+
+  def test_patched_returns_false_when_not_patched
+    fake_chat_class = Class.new
+
+    ::RubyLLM.stub_const(:Chat, fake_chat_class) do
+      refute Braintrust::Contrib::RubyLLM::ChatPatcher.patched?
+    end
+  end
+
+  def test_patched_returns_true_when_module_included
+    fake_chat_class = Class.new do
+      include Braintrust::Contrib::RubyLLM::Instrumentation::Chat
+    end
+
+    ::RubyLLM.stub_const(:Chat, fake_chat_class) do
+      assert Braintrust::Contrib::RubyLLM::ChatPatcher.patched?
+    end
+  end
+
+  def test_patched_returns_false_for_unpatched_instance
+    fake_singleton = Class.new
+
+    mock_chain(:singleton_class, returns: fake_singleton) do |chat|
+      refute Braintrust::Contrib::RubyLLM::ChatPatcher.patched?(target: chat)
+    end
+  end
+
+  def test_patched_returns_true_for_patched_instance
+    fake_singleton = Class.new do
+      include Braintrust::Contrib::RubyLLM::Instrumentation::Chat
+    end
+
+    mock_chain(:singleton_class, returns: fake_singleton) do |chat|
+      assert Braintrust::Contrib::RubyLLM::ChatPatcher.patched?(target: chat)
+    end
+  end
+
+  # --- .perform_patch ---
+
+  def test_perform_patch_includes_module_for_class_level
+    fake_chat_class = Minitest::Mock.new
+    fake_chat_class.expect(:include, true, [Braintrust::Contrib::RubyLLM::Instrumentation::Chat])
+
+    ::RubyLLM.stub_const(:Chat, fake_chat_class) do
+      Braintrust::Contrib::RubyLLM::ChatPatcher.perform_patch
+      fake_chat_class.verify
+    end
+  end
+
+  def test_perform_patch_includes_module_for_instance_level
+    terminal = Minitest::Mock.new
+    terminal.expect(:include, true, [Braintrust::Contrib::RubyLLM::Instrumentation::Chat])
+
+    mock_chain(:singleton_class, returns: terminal) do |chat|
+      chat.expect(:is_a?, true, [::RubyLLM::Chat])
+      Braintrust::Contrib::RubyLLM::ChatPatcher.perform_patch(target: chat)
+    end
+    terminal.verify
+  end
+
+  def test_perform_patch_raises_for_invalid_target
+    fake_chat = Minitest::Mock.new
+    fake_chat.expect(:is_a?, false, [::RubyLLM::Chat])
+
+    assert_raises(ArgumentError) do
+      Braintrust::Contrib::RubyLLM::ChatPatcher.perform_patch(target: fake_chat)
+    end
+
+    fake_chat.verify
+  end
+end
diff --git a/test/fixtures/vcr_cassettes/contrib/ruby_llm/basic_chat.yml b/test/fixtures/vcr_cassettes/contrib/ruby_llm/basic_chat.yml
new file mode 100644
index 0000000..cd554b7
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/contrib/ruby_llm/basic_chat.yml
@@ -0,0 +1,119 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/chat/completions
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"Say ''test''"}],"stream":false}'
+    headers:
+      User-Agent:
+      - Faraday v2.14.0
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Mon, 29 Dec 2025 00:18:01 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Access-Control-Expose-Headers:
+      - X-Request-ID
+      Openai-Organization:
+      - braintrust-data
+      Openai-Processing-Ms:
+      - '268'
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      Openai-Version:
+      - '2020-10-01'
+      X-Envoy-Upstream-Service-Time:
+      - '423'
+      X-Ratelimit-Limit-Requests:
+      - '30000'
+      X-Ratelimit-Limit-Tokens:
+      - '150000000'
+      X-Ratelimit-Remaining-Requests:
+      - '29999'
+      X-Ratelimit-Remaining-Tokens:
+      - '149999995'
+      X-Ratelimit-Reset-Requests:
+      - 2ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      X-Request-Id:
+      - req_79db98b8ed3a4623be07fcb826bb94f6
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=DzV_AEA6fZZyC.vKkRS.2DlX8b8W3.X4Xihg1bIYEKQ-1766967481-1.0.1.1-kUeLmejj.yghCo2m1hgTl0RsTL9XtF8khPwq0uqeYgTZmbQYMPIJd7VP3VERW_LHMmespaoTxNQuFnZW1uMc1sUUtM1bHeW0coVVcF2re14;
+        path=/; expires=Mon, 29-Dec-25 00:48:01 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=5oPQZmzv6H6BNQsGIuLakdFOwrudVV7ZCQTSgweYwmw-1766967481726-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9b551e236b7fb680-ORD
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |
+        {
+          "id": "chatcmpl-CrvFRNGbE66JUzZgXuLlWUo1QUxHn",
+          "object": "chat.completion",
+          "created": 1766967481,
+          "model": "gpt-4o-mini-2024-07-18",
+          "choices": [
+            {
+              "index": 0,
+              "message": {
+                "role": "assistant",
+                "content": "Test.",
+                "refusal": null,
+                "annotations": []
+              },
+              "logprobs": null,
+              "finish_reason": "stop"
+            }
+          ],
+          "usage": {
+            "prompt_tokens": 11,
+            "completion_tokens": 2,
+            "total_tokens": 13,
+            "prompt_tokens_details": {
+              "cached_tokens": 0,
+              "audio_tokens": 0
+            },
+            "completion_tokens_details": {
+              "reasoning_tokens": 0,
+              "audio_tokens": 0,
+              "accepted_prediction_tokens": 0,
+              "rejected_prediction_tokens": 0
+            }
+          },
+          "service_tier": "default",
+          "system_fingerprint": "fp_c4585b5b9c"
+        }
+  recorded_at: Mon, 29 Dec 2025 00:18:01 GMT
+recorded_with: VCR 6.3.1
diff --git a/test/fixtures/vcr_cassettes/contrib/ruby_llm/direct_complete.yml b/test/fixtures/vcr_cassettes/contrib/ruby_llm/direct_complete.yml
new file mode 100644
index 0000000..a856010
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/contrib/ruby_llm/direct_complete.yml
@@ -0,0 +1,119 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/chat/completions
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"Say ''hello''"}],"stream":false}'
+    headers:
+      User-Agent:
+      - Faraday v2.14.0
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Mon, 29 Dec 2025 00:17:58 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Access-Control-Expose-Headers:
+      - X-Request-ID
+      Openai-Organization:
+      - braintrust-data
+      Openai-Processing-Ms:
+      - '383'
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      Openai-Version:
+      - '2020-10-01'
+      X-Envoy-Upstream-Service-Time:
+      - '532'
+      X-Ratelimit-Limit-Requests:
+      - '30000'
+      X-Ratelimit-Limit-Tokens:
+      - '150000000'
+      X-Ratelimit-Remaining-Requests:
+      - '29999'
+      X-Ratelimit-Remaining-Tokens:
+      - '149999995'
+      X-Ratelimit-Reset-Requests:
+      - 2ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      X-Request-Id:
+      - req_a9c1c29e88f34223a39f05e8b3c1b915
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=yzF9Fvu_9Mya4Ityhcgf9YdquuMEoME3B2h33f8FDrU-1766967478-1.0.1.1-CdeI5dr56iEtuJkKE.q2BWtd8sDdKPbjAvjkF.xy.RVyTnv1nlwynLwm3gzR58Xd2FKD_WIM5EpgYmroiDPudBzp2XPwsCdFezjeR3Vmxfs;
+        path=/; expires=Mon, 29-Dec-25 00:47:58 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=gDhAm9ipn0fkMChqAy5vOjbKVUKvGVsm1gakEJ43s7M-1766967478363-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9b551e0dddd2f84d-ORD
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |
+        {
+          "id": "chatcmpl-CrvFNePrBoNzJZTdgA7GdlfyXPt3w",
+          "object": "chat.completion",
+          "created": 1766967477,
+          "model": "gpt-4o-mini-2024-07-18",
+          "choices": [
+            {
+              "index": 0,
+              "message": {
+                "role": "assistant",
+                "content": "Hello! How can I assist you today?",
+                "refusal": null,
+                "annotations": []
+              },
+              "logprobs": null,
+              "finish_reason": "stop"
+            }
+          ],
+          "usage": {
+            "prompt_tokens": 11,
+            "completion_tokens": 9,
+            "total_tokens": 20,
+            "prompt_tokens_details": {
+              "cached_tokens": 0,
+              "audio_tokens": 0
+            },
+            "completion_tokens_details": {
+              "reasoning_tokens": 0,
+              "audio_tokens": 0,
+              "accepted_prediction_tokens": 0,
+              "rejected_prediction_tokens": 0
+            }
+          },
+          "service_tier": "default",
+          "system_fingerprint": "fp_db64b059be"
+        }
+  recorded_at: Mon, 29 Dec 2025 00:17:58 GMT
+recorded_with: VCR 6.3.1
diff --git a/test/fixtures/vcr_cassettes/contrib/ruby_llm/streaming_chat.yml b/test/fixtures/vcr_cassettes/contrib/ruby_llm/streaming_chat.yml
new file mode 100644
index 0000000..8aef2b8
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/contrib/ruby_llm/streaming_chat.yml
@@ -0,0 +1,109 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/chat/completions
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"Count
+        to 3"}],"stream":true,"stream_options":{"include_usage":true}}'
+    headers:
+      User-Agent:
+      - Faraday v2.14.0
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Mon, 29 Dec 2025 00:17:59 GMT
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Access-Control-Expose-Headers:
+      - X-Request-ID
+      Openai-Organization:
+      - braintrust-data
+      Openai-Processing-Ms:
+      - '158'
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      Openai-Version:
+      - '2020-10-01'
+      X-Envoy-Upstream-Service-Time:
+      - '297'
+      X-Ratelimit-Limit-Requests:
+      - '30000'
+      X-Ratelimit-Limit-Tokens:
+      - '150000000'
+      X-Ratelimit-Remaining-Requests:
+      - '29999'
+      X-Ratelimit-Remaining-Tokens:
+      - '149999995'
+      X-Ratelimit-Reset-Requests:
+      - 2ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      X-Request-Id:
+      - req_4cb567fe754a470bbd96de2e66c5ba17
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=y0wQ2ZTC80M.9WLZ8o_1mgv_GucWdtWUo9Pi98ugrdA-1766967479-1.0.1.1-0VkVwdOBB8LYcuFKzfk3XOi54oa_hKgcFO8WODbUU1sq..QemE7aph58Uyry4r9wq0PiJmITYE50IS8r7L7.a1lLsWj5hncSgh2iyiy5BIk;
+        path=/; expires=Mon, 29-Dec-25 00:47:59 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=_.nRIr0a48TKL8OnhL3EVqWlAbd1YcnaJfsajCYRKwc-1766967479987-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9b551e142b1610d5-ORD
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: UTF-8
+      string: |+
+        data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"mGeH3kiRC"}
+
+        data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"1"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"qhpHse2qUP"}
+
+        data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"FkuOCIZAL6"}
+
+        data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" "},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"18PrtDgh4V"}
+
+        data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"2"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"m00xds2zeu"}
+
+        data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"IjniYU966s"}
+
+        data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" "},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"wslCqvFoLt"}
+
+        data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"3"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"mkF5IQIdqQ"}
+
+        data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"5gWlzsmQme"}
+
+        data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"NHMuR"}
+
+        data: {"id":"chatcmpl-CrvFPPOSStxmKC9zUUdnEAAyMAqCs","object":"chat.completion.chunk","created":1766967479,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[],"usage":{"prompt_tokens":11,"completion_tokens":8,"total_tokens":19,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"U0r0eiGyTnn"}
+
+        data: [DONE]
+
+  recorded_at: Mon, 29 Dec 2025 00:18:00 GMT
+recorded_with: VCR 6.3.1
+...
diff --git a/test/fixtures/vcr_cassettes/contrib/ruby_llm/tool_calling.yml b/test/fixtures/vcr_cassettes/contrib/ruby_llm/tool_calling.yml
new file mode 100644
index 0000000..8812a53
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/contrib/ruby_llm/tool_calling.yml
@@ -0,0 +1,220 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/chat/completions
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"What''s
+        the weather like in San Francisco?"}],"stream":false,"tools":[{"type":"function","function":{"name":"weather_test","description":"Get
+        the current weather for a location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The
+        city and state, e.g. San Francisco, CA"},"unit":{"type":"string","description":"Temperature
+        unit (celsius or fahrenheit)"}},"required":["location","unit"]}}}]}'
+    headers:
+      User-Agent:
+      - Faraday v2.14.0
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Mon, 29 Dec 2025 00:18:03 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Access-Control-Expose-Headers:
+      - X-Request-ID
+      Openai-Organization:
+      - braintrust-data
+      Openai-Processing-Ms:
+      - '1090'
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      Openai-Version:
+      - '2020-10-01'
+      X-Envoy-Upstream-Service-Time:
+      - '1255'
+      X-Ratelimit-Limit-Requests:
+      - '30000'
+      X-Ratelimit-Limit-Tokens:
+      - '150000000'
+      X-Ratelimit-Remaining-Requests:
+      - '29999'
+      X-Ratelimit-Remaining-Tokens:
+      - '149999987'
+      X-Ratelimit-Reset-Requests:
+      - 2ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      X-Request-Id:
+      - req_5db84c9eccd54c4381517371d916d2d7
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=o7gXcsyJykTSc3AQEOeH2F2Zd76Qmrj7P5ShIxZSAZI-1766967483-1.0.1.1-wBpt4tXwupbboQBP9wbjfJgSKaekk9fkllt4JQxktoyyeRUDFvGMphoD2NERvZd3aw..QuedwPBoRcxvYTpo_kD7U0tJ5FUB3bjwFjKyKqA;
+        path=/; expires=Mon, 29-Dec-25 00:48:03 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=F1nn6Pe4lVvPtXyAQ6p3IsEVl1AhcR50tVr4LFPyXsc-1766967483487-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9b551e29185613a0-ORD
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |
+        {
+          "id": "chatcmpl-CrvFSUW7ygZkMNlUHRDxcEUObHIHP",
+          "object": "chat.completion",
+          "created": 1766967482,
+          "model": "gpt-4o-mini-2024-07-18",
+          "choices": [
+            {
+              "index": 0,
+              "message": {
+                "role": "assistant",
+                "content": null,
+                "tool_calls": [
+                  {
+                    "id": "call_iw7sDBQxXqVqAnfid7in9Xti",
+                    "type": "function",
+                    "function": {
+                      "name": "weather_test",
+                      "arguments": "{\"location\":\"San Francisco, CA\",\"unit\":\"celsius\"}"
+                    }
+                  }
+                ],
+                "refusal": null,
+                "annotations": []
+              },
+              "logprobs": null,
+              "finish_reason": "tool_calls"
+            }
+          ],
+          "usage": {
+            "prompt_tokens": 80,
+            "completion_tokens": 22,
+            "total_tokens": 102,
+            "prompt_tokens_details": {
+              "cached_tokens": 0,
+              "audio_tokens": 0
+            },
+            "completion_tokens_details": {
+              "reasoning_tokens": 0,
+              "audio_tokens": 0,
+              "accepted_prediction_tokens": 0,
+              "rejected_prediction_tokens": 0
+            }
+          },
+          "service_tier": "default",
+          "system_fingerprint": "fp_8bbc38b4db"
+        }
+  recorded_at: Mon, 29 Dec 2025 00:18:03 GMT
+- request:
+    method: post
+    uri: https://api.openai.com/v1/chat/completions
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"What''s
+        the weather like in San Francisco?"},{"role":"assistant","tool_calls":[{"id":"call_iw7sDBQxXqVqAnfid7in9Xti","type":"function","function":{"name":"weather_test","arguments":"{\"location\":\"San
+        Francisco, CA\",\"unit\":\"celsius\"}"}}]},{"role":"tool","content":"{:location=>\"San
+        Francisco, CA\", :temperature=>72, :unit=>\"celsius\", :conditions=>\"sunny\"}","tool_call_id":"call_iw7sDBQxXqVqAnfid7in9Xti"}],"stream":false,"tools":[{"type":"function","function":{"name":"weather_test","description":"Get
+        the current weather for a location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The
+        city and state, e.g. San Francisco, CA"},"unit":{"type":"string","description":"Temperature
+        unit (celsius or fahrenheit)"}},"required":["location","unit"]}}}]}'
+    headers:
+      User-Agent:
+      - Faraday v2.14.0
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Mon, 29 Dec 2025 00:18:04 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Access-Control-Expose-Headers:
+      - X-Request-ID
+      Openai-Organization:
+      - braintrust-data
+      Openai-Processing-Ms:
+      - '579'
+      Openai-Project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      Openai-Version:
+      - '2020-10-01'
+      X-Envoy-Upstream-Service-Time:
+      - '592'
+      X-Ratelimit-Limit-Requests:
+      - '30000'
+      X-Ratelimit-Limit-Tokens:
+      - '150000000'
+      X-Ratelimit-Remaining-Requests:
+      - '29999'
+      X-Ratelimit-Remaining-Tokens:
+      - '149999962'
+      X-Ratelimit-Reset-Requests:
+      - 2ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      X-Request-Id:
+      - req_2ed1ffa90e494f06a5339cdcf266983d
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - __cf_bm=ogfj4fYYFkRY6zM7zXWJGX8QhM1dJRVtmGji0Ky56_Q-1766967484-1.0.1.1-Pookt2Yg.wmTZyFEfUixlRnu2Ue9EOAOw3IKpQm4UuN9uRTfhgnYF53WQetZHsXQvRR7wtyrMI_khsq2G.ebtYAaz8m_Pjqbf.FTtpAqsbE;
+        path=/; expires=Mon, 29-Dec-25 00:48:04 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=JDJQV_HRTMBJyhD7n_pWhKWZXE0SI38XFZLZLLNktQE-1766967484184-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - 9b551e3409791045-ORD
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: !binary |-
+        ewogICJpZCI6ICJjaGF0Y21wbC1DcnZGVDVDbTRsWUx2R295WHVqSFNOUXVoRXZDayIsCiAgIm9iamVjdCI6ICJjaGF0LmNvbXBsZXRpb24iLAogICJjcmVhdGVkIjogMTc2Njk2NzQ4MywKICAibW9kZWwiOiAiZ3B0LTRvLW1pbmktMjAyNC0wNy0xOCIsCiAgImNob2ljZXMiOiBbCiAgICB7CiAgICAgICJpbmRleCI6IDAsCiAgICAgICJtZXNzYWdlIjogewogICAgICAgICJyb2xlIjogImFzc2lzdGFudCIsCiAgICAgICAgImNvbnRlbnQiOiAiVGhlIHdlYXRoZXIgaW4gU2FuIEZyYW5jaXNjbywgQ0EgaXMgY3VycmVudGx5IHN1bm55IHdpdGggYSB0ZW1wZXJhdHVyZSBvZiA3MsKwQy4iLAogICAgICAgICJyZWZ1c2FsIjogbnVsbCwKICAgICAgICAiYW5ub3RhdGlvbnMiOiBbXQogICAgICB9LAogICAgICAibG9ncHJvYnMiOiBudWxsLAogICAgICAiZmluaXNoX3JlYXNvbiI6ICJzdG9wIgogICAgfQogIF0sCiAgInVzYWdlIjogewogICAgInByb21wdF90b2tlbnMiOiAxMzUsCiAgICAiY29tcGxldGlvbl90b2tlbnMiOiAxOSwKICAgICJ0b3RhbF90b2tlbnMiOiAxNTQsCiAgICAicHJvbXB0X3Rva2Vuc19kZXRhaWxzIjogewogICAgICAiY2FjaGVkX3Rva2VucyI6IDAsCiAgICAgICJhdWRpb190b2tlbnMiOiAwCiAgICB9LAogICAgImNvbXBsZXRpb25fdG9rZW5zX2RldGFpbHMiOiB7CiAgICAgICJyZWFzb25pbmdfdG9rZW5zIjogMCwKICAgICAgImF1ZGlvX3Rva2VucyI6IDAsCiAgICAgICJhY2NlcHRlZF9wcmVkaWN0aW9uX3Rva2VucyI6IDAsCiAgICAgICJyZWplY3RlZF9wcmVkaWN0aW9uX3Rva2VucyI6IDAKICAgIH0KICB9LAogICJzZXJ2aWNlX3RpZXIiOiAiZGVmYXVsdCIsCiAgInN5c3RlbV9maW5nZXJwcmludCI6ICJmcF84YmJjMzhiNGRiIgp9Cg==
+  recorded_at: Mon, 29 Dec 2025 00:18:04 GMT
+recorded_with: VCR 6.3.1

From c654952401771a1befc806c45217602992f2a0f3 Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Sun, 28 Dec 2025 23:20:24 -0500
Subject: [PATCH 11/27] Changed: Migrated anthropic to new integration API

---
 README.md                                     |   6 +-
 Rakefile                                      |   1 +
 .../anthropic/basic.rb}                       |  15 +-
 examples/contrib/anthropic/deprecated.rb      |  69 ++++
 examples/contrib/anthropic/instance.rb        |  57 ++++
 examples/contrib/anthropic/streaming.rb       |  75 +++++
 .../anthropic/golden.rb}                      |   2 +-
 .../anthropic/kitchen-sink.rb}                |   2 +-
 lib/braintrust/contrib.rb                     |   2 +
 .../contrib/anthropic/deprecated.rb           |  24 ++
 .../anthropic/instrumentation/common.rb       |  53 +++
 .../anthropic/instrumentation/messages.rb     | 232 +++++++++++++
 .../contrib/anthropic/integration.rb          |  53 +++
 lib/braintrust/contrib/anthropic/patcher.rb   |  62 ++++
 lib/braintrust/trace.rb                       |   8 -
 lib/braintrust/trace/contrib/anthropic.rb     | 316 ------------------
 lib/braintrust/trace/tokens.rb                |  49 ---
 .../contrib/anthropic/deprecated_test.rb      |  58 ++++
 .../anthropic/instrumentation/common_test.rb  | 102 ++++++
 .../instrumentation/messages_test.rb}         | 235 +++++++------
 .../contrib/anthropic/instrumentation_test.rb | 111 ++++++
 .../contrib/anthropic/integration_helper.rb   |  29 ++
 .../contrib/anthropic/integration_test.rb     |  59 ++++
 .../contrib/anthropic/patcher_test.rb         |  93 ++++++
 test/braintrust/trace/tokens_test.rb          |  78 -----
 25 files changed, 1222 insertions(+), 569 deletions(-)
 rename examples/{anthropic.rb => contrib/anthropic/basic.rb} (81%)
 create mode 100644 examples/contrib/anthropic/deprecated.rb
 create mode 100644 examples/contrib/anthropic/instance.rb
 create mode 100644 examples/contrib/anthropic/streaming.rb
 rename examples/internal/{anthropic_golden.rb => contrib/anthropic/golden.rb} (99%)
 rename examples/internal/{anthropic.rb => contrib/anthropic/kitchen-sink.rb} (99%)
 create mode 100644 lib/braintrust/contrib/anthropic/deprecated.rb
 create mode 100644 lib/braintrust/contrib/anthropic/instrumentation/common.rb
 create mode 100644 lib/braintrust/contrib/anthropic/instrumentation/messages.rb
 create mode 100644 lib/braintrust/contrib/anthropic/integration.rb
 create mode 100644 lib/braintrust/contrib/anthropic/patcher.rb
 delete mode 100644 lib/braintrust/trace/contrib/anthropic.rb
 delete mode 100644 lib/braintrust/trace/tokens.rb
 create mode 100644 test/braintrust/contrib/anthropic/deprecated_test.rb
 create mode 100644 test/braintrust/contrib/anthropic/instrumentation/common_test.rb
 rename test/braintrust/{trace/anthropic_test.rb => contrib/anthropic/instrumentation/messages_test.rb} (79%)
 create mode 100644 test/braintrust/contrib/anthropic/instrumentation_test.rb
 create mode 100644 test/braintrust/contrib/anthropic/integration_helper.rb
 create mode 100644 test/braintrust/contrib/anthropic/integration_test.rb
 create mode 100644 test/braintrust/contrib/anthropic/patcher_test.rb
 delete mode 100644 test/braintrust/trace/tokens_test.rb

diff --git a/README.md b/README.md
index 0d71497..2eef3cf 100644
--- a/README.md
+++ b/README.md
@@ -146,11 +146,11 @@ require "braintrust"
 require "anthropic"
 
 Braintrust.init
+Braintrust.instrument!(:anthropic)
 
+# Instrument Anthropic (instance-level)
 client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
 
-Braintrust::Trace::Anthropic.wrap(client)
-
 tracer = OpenTelemetry.tracer_provider.tracer("anthropic-app")
 root_span = nil
 
@@ -268,7 +268,7 @@ Check out the [`examples/`](./examples/) directory for complete working examples
 - [trace.rb](./examples/trace.rb) - Manual span creation and tracing
 - [openai.rb](./examples/openai.rb) - Automatically trace OpenAI API calls
 - [alexrudall_openai.rb](./examples/alexrudall_openai.rb) - Automatically trace ruby-openai gem API calls
-- [anthropic.rb](./examples/anthropic.rb) - Automatically trace Anthropic API calls
+- [contrib/anthropic/basic.rb](./examples/contrib/anthropic/basic.rb) - Automatically trace Anthropic API calls
 - [ruby_llm.rb](./examples/ruby_llm.rb) - Automatically trace RubyLLM API calls
 - [trace/trace_attachments.rb](./examples/trace/trace_attachments.rb) - Log attachments (images, PDFs) in traces
 - [eval/dataset.rb](./examples/eval/dataset.rb) - Run evaluations using datasets stored in Braintrust
diff --git a/Rakefile b/Rakefile
index 74788e8..64c43e5 100644
--- a/Rakefile
+++ b/Rakefile
@@ -106,6 +106,7 @@ namespace :test do
   namespace :contrib do
     # Tasks per integration
     [
+      {name: :anthropic},
       {name: :openai},
       {name: :ruby_llm},
       {name: :ruby_openai, appraisal: "ruby-openai"}
diff --git a/examples/anthropic.rb b/examples/contrib/anthropic/basic.rb
similarity index 81%
rename from examples/anthropic.rb
rename to examples/contrib/anthropic/basic.rb
index 03884c7..b453b0b 100644
--- a/examples/anthropic.rb
+++ b/examples/contrib/anthropic/basic.rb
@@ -11,7 +11,7 @@
 # This example demonstrates how to automatically trace Anthropic API calls with Braintrust.
 #
 # Usage:
-#   ANTHROPIC_API_KEY=your-key bundle exec ruby examples/anthropic.rb
+#   ANTHROPIC_API_KEY=your-key bundle exec ruby examples/contrib/anthropic/basic.rb
 
 # Check for API keys
 unless ENV["ANTHROPIC_API_KEY"]
@@ -20,21 +20,20 @@
   exit 1
 end
 
+# Instrument Anthropic (class-level, affects all clients)
 Braintrust.init(blocking_login: true)
+Braintrust.instrument!(:anthropic)
 
 # Create Anthropic client
 client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
 
-# Wrap the client with Braintrust tracing
-Braintrust::Trace::Anthropic.wrap(client)
-
 # Create a root span to capture the entire operation
 tracer = OpenTelemetry.tracer_provider.tracer("anthropic-example")
 root_span = nil
 
 # Make a message request (automatically traced!)
 puts "Sending message request to Anthropic..."
-message = tracer.in_span("examples/anthropic.rb") do |span|
+message = tracer.in_span("examples/contrib/anthropic/basic.rb") do |span|
   root_span = span
 
   client.messages.create(
@@ -48,7 +47,7 @@
 end
 
 # Print the response
-puts "\n✓ Response received!"
+puts "\n Response received!"
 puts "\nClaude: #{message.content[0].text}"
 
 # Print usage stats
@@ -57,10 +56,10 @@
 puts "  Output tokens: #{message.usage.output_tokens}"
 
 # Print permalink to view this trace in Braintrust
-puts "\n✓ View this trace in Braintrust:"
+puts "\n View this trace in Braintrust:"
 puts "  #{Braintrust::Trace.permalink(root_span)}"
 
 # Shutdown to flush spans to Braintrust
 OpenTelemetry.tracer_provider.shutdown
 
-puts "\n✓ Trace sent to Braintrust!"
+puts "\n Trace sent to Braintrust!"
diff --git a/examples/contrib/anthropic/deprecated.rb b/examples/contrib/anthropic/deprecated.rb
new file mode 100644
index 0000000..d1761e6
--- /dev/null
+++ b/examples/contrib/anthropic/deprecated.rb
@@ -0,0 +1,69 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "braintrust/contrib/anthropic/deprecated"
+require "anthropic"
+require "opentelemetry/sdk"
+
+# Example: Anthropic message creation with deprecated wrap() API
+#
+# This example demonstrates the old API for backward compatibility.
+# For new code, use `Braintrust.instrument!(:anthropic)` instead.
+#
+# Usage:
+#   ANTHROPIC_API_KEY=your-key bundle exec ruby examples/contrib/anthropic/deprecated.rb
+
+# Check for API keys
+unless ENV["ANTHROPIC_API_KEY"]
+  puts "Error: ANTHROPIC_API_KEY environment variable is required"
+  puts "Get your API key from: https://console.anthropic.com/"
+  exit 1
+end
+
+Braintrust.init(blocking_login: true)
+
+# Create Anthropic client
+client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
+
+# Wrap the client with Braintrust tracing
+# DEPRECATED: Use `Braintrust.instrument!(:anthropic, target: client)` instead
+Braintrust::Trace::Anthropic.wrap(client)
+
+# Create a root span to capture the entire operation
+tracer = OpenTelemetry.tracer_provider.tracer("anthropic-example")
+root_span = nil
+
+# Make a message request (automatically traced!)
+puts "Sending message request to Anthropic..."
+message = tracer.in_span("examples/contrib/anthropic/deprecated.rb") do |span|
+  root_span = span
+
+  client.messages.create(
+    model: "claude-3-haiku-20240307",
+    max_tokens: 100,
+    system: "You are a helpful assistant.",
+    messages: [
+      {role: "user", content: "Say hello and tell me a short joke."}
+    ]
+  )
+end
+
+# Print the response
+puts "\n Response received!"
+puts "\nClaude: #{message.content[0].text}"
+
+# Print usage stats
+puts "\nToken usage:"
+puts "  Input tokens: #{message.usage.input_tokens}"
+puts "  Output tokens: #{message.usage.output_tokens}"
+
+# Print permalink to view this trace in Braintrust
+puts "\n View this trace in Braintrust:"
+puts "  #{Braintrust::Trace.permalink(root_span)}"
+
+# Shutdown to flush spans to Braintrust
+OpenTelemetry.tracer_provider.shutdown
+
+puts "\n Trace sent to Braintrust!"
diff --git a/examples/contrib/anthropic/instance.rb b/examples/contrib/anthropic/instance.rb
new file mode 100644
index 0000000..8b49458
--- /dev/null
+++ b/examples/contrib/anthropic/instance.rb
@@ -0,0 +1,57 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "anthropic"
+require "opentelemetry/sdk"
+
+# Example: Instance-level Anthropic instrumentation
+#
+# This shows how to instrument a specific client instance rather than
+# all Anthropic clients globally.
+#
+# Usage:
+#   ANTHROPIC_API_KEY=your-key bundle exec ruby examples/contrib/anthropic/instance.rb
+
+unless ENV["ANTHROPIC_API_KEY"]
+  puts "Error: ANTHROPIC_API_KEY environment variable is required"
+  exit 1
+end
+
+Braintrust.init(blocking_login: true)
+
+# Create two client instances
+client_traced = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
+client_untraced = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
+
+# Only instrument one of them
+Braintrust.instrument!(:anthropic, target: client_traced)
+
+tracer = OpenTelemetry.tracer_provider.tracer("anthropic-example")
+root_span = nil
+
+tracer.in_span("examples/contrib/anthropic/instance.rb") do |span|
+  root_span = span
+
+  puts "Calling traced client..."
+  response1 = client_traced.messages.create(
+    model: "claude-3-haiku-20240307",
+    max_tokens: 10,
+    messages: [{role: "user", content: "Say 'traced'"}]
+  )
+  puts "Traced response: #{response1.content[0].text}"
+
+  puts "\nCalling untraced client..."
+  response2 = client_untraced.messages.create(
+    model: "claude-3-haiku-20240307",
+    max_tokens: 10,
+    messages: [{role: "user", content: "Say 'untraced'"}]
+  )
+  puts "Untraced response: #{response2.content[0].text}"
+end
+
+puts "\nView trace: #{Braintrust::Trace.permalink(root_span)}"
+puts "(Only the first client call should have an anthropic.messages span)"
+
+OpenTelemetry.tracer_provider.shutdown
diff --git a/examples/contrib/anthropic/streaming.rb b/examples/contrib/anthropic/streaming.rb
new file mode 100644
index 0000000..ea42074
--- /dev/null
+++ b/examples/contrib/anthropic/streaming.rb
@@ -0,0 +1,75 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "anthropic"
+require "opentelemetry/sdk"
+
+# Example: Anthropic streaming with Braintrust tracing
+#
+# This example demonstrates how to trace streaming Anthropic API calls.
+# Shows two different streaming patterns:
+#   1. Iterator-based streaming with .each (full event access)
+#   2. Text-only streaming with .text (simplified)
+#
+# Usage:
+#   ANTHROPIC_API_KEY=your-key bundle exec ruby examples/contrib/anthropic/streaming.rb
+
+unless ENV["ANTHROPIC_API_KEY"]
+  puts "Error: ANTHROPIC_API_KEY environment variable is required"
+  puts "Get your API key from: https://console.anthropic.com/"
+  exit 1
+end
+
+Braintrust.init(blocking_login: true)
+Braintrust.instrument!(:anthropic)
+
+client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
+tracer = OpenTelemetry.tracer_provider.tracer("anthropic-streaming-example")
+root_span = nil
+
+tracer.in_span("examples/contrib/anthropic/streaming.rb") do |span|
+  root_span = span
+
+  # Pattern 1: Iterator-based streaming with .each
+  # Returns a MessageStream, iterate to get individual events
+  puts "=== Pattern 1: Iterator with .each ==="
+  print "Claude: "
+
+  stream = client.messages.stream(
+    model: "claude-3-haiku-20240307",
+    max_tokens: 100,
+    messages: [{role: "user", content: "Count from 1 to 5, one number per line."}]
+  )
+
+  stream.each do |event|
+    if event.type == :content_block_delta && event.delta.respond_to?(:text)
+      print event.delta.text
+    end
+  end
+  puts "\n"
+
+  # Pattern 2: Text-only streaming with .text
+  # Returns an Enumerator that yields only text chunks (simpler)
+  puts "=== Pattern 2: Text-only with .text ==="
+  print "Claude: "
+
+  stream = client.messages.stream(
+    model: "claude-3-haiku-20240307",
+    max_tokens: 100,
+    messages: [{role: "user", content: "Name 3 colors."}]
+  )
+
+  stream.text.each do |text_chunk|
+    print text_chunk
+  end
+  puts "\n"
+end
+
+puts "\nView this trace in Braintrust:"
+puts "  #{Braintrust::Trace.permalink(root_span)}"
+
+OpenTelemetry.tracer_provider.shutdown
+
+puts "\nTrace sent to Braintrust!"
diff --git a/examples/internal/anthropic_golden.rb b/examples/internal/contrib/anthropic/golden.rb
similarity index 99%
rename from examples/internal/anthropic_golden.rb
rename to examples/internal/contrib/anthropic/golden.rb
index badd3ad..93e6b55 100755
--- a/examples/internal/anthropic_golden.rb
+++ b/examples/internal/contrib/anthropic/golden.rb
@@ -28,7 +28,7 @@
 
 # Create an Anthropic client with tracing
 client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
-Braintrust::Trace::Anthropic.wrap(client)
+Braintrust.instrument!(:anthropic)
 
 # Get a tracer instance
 tracer = OpenTelemetry.tracer_provider.tracer("anthropic-golden")
diff --git a/examples/internal/anthropic.rb b/examples/internal/contrib/anthropic/kitchen-sink.rb
similarity index 99%
rename from examples/internal/anthropic.rb
rename to examples/internal/contrib/anthropic/kitchen-sink.rb
index 6dfbad2..3fc483d 100644
--- a/examples/internal/anthropic.rb
+++ b/examples/internal/contrib/anthropic/kitchen-sink.rb
@@ -29,7 +29,7 @@
 
 # Create an Anthropic client with tracing
 client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
-Braintrust::Trace::Anthropic.wrap(client)
+Braintrust.instrument!(:anthropic)
 
 # Get a tracer instance
 tracer = OpenTelemetry.tracer_provider.tracer("anthropic-examples")
diff --git a/lib/braintrust/contrib.rb b/lib/braintrust/contrib.rb
index 2b24890..713d536 100644
--- a/lib/braintrust/contrib.rb
+++ b/lib/braintrust/contrib.rb
@@ -102,8 +102,10 @@ def tracer_for(target, name: "braintrust")
 require_relative "contrib/openai/integration"
 require_relative "contrib/ruby_openai/integration"
 require_relative "contrib/ruby_llm/integration"
+require_relative "contrib/anthropic/integration"
 
 # Register integrations
 Braintrust::Contrib::OpenAI::Integration.register!
 Braintrust::Contrib::RubyOpenAI::Integration.register!
 Braintrust::Contrib::RubyLLM::Integration.register!
+Braintrust::Contrib::Anthropic::Integration.register!
diff --git a/lib/braintrust/contrib/anthropic/deprecated.rb b/lib/braintrust/contrib/anthropic/deprecated.rb
new file mode 100644
index 0000000..1ef5fb3
--- /dev/null
+++ b/lib/braintrust/contrib/anthropic/deprecated.rb
@@ -0,0 +1,24 @@
+# frozen_string_literal: true
+
+# Backward compatibility shim for the old Anthropic integration API.
+# This file now just delegates to the new API.
+
+require_relative "../../../braintrust"
+
+module Braintrust
+  module Trace
+    module Anthropic
+      # Wrap an Anthropic::Client to automatically create spans for messages.
+      # This is the legacy API - delegates to the new contrib framework.
+      #
+      # @param client [Anthropic::Client] the Anthropic client to wrap
+      # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
+      # @return [Anthropic::Client] the wrapped client
+      def self.wrap(client, tracer_provider: nil)
+        Log.warn("Braintrust::Trace::Anthropic.wrap() is deprecated and will be removed in a future version: use Braintrust.instrument!() instead.")
+        Braintrust.instrument!(:anthropic, target: client, tracer_provider: tracer_provider)
+        client
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/anthropic/instrumentation/common.rb b/lib/braintrust/contrib/anthropic/instrumentation/common.rb
new file mode 100644
index 0000000..710a413
--- /dev/null
+++ b/lib/braintrust/contrib/anthropic/instrumentation/common.rb
@@ -0,0 +1,53 @@
+# frozen_string_literal: true
+
+module Braintrust
+  module Contrib
+    module Anthropic
+      module Instrumentation
+        # Common utilities for Anthropic SDK instrumentation.
+        module Common
+          # Parse Anthropic SDK usage tokens into normalized Braintrust metrics.
+          # Accumulates cache tokens into prompt_tokens and calculates total.
+          # Works with both Hash objects and SDK response objects (via to_h).
+          # @param usage [Hash, Object] usage object from Anthropic response
+          # @return [Hash<String, Integer>] normalized metrics
+          def self.parse_usage_tokens(usage)
+            metrics = {}
+            return metrics unless usage
+
+            usage_hash = usage.respond_to?(:to_h) ? usage.to_h : usage
+            return metrics unless usage_hash.is_a?(Hash)
+
+            # Anthropic SDK field mappings → Braintrust metrics
+            field_map = {
+              "input_tokens" => "prompt_tokens",
+              "output_tokens" => "completion_tokens",
+              "cache_read_input_tokens" => "prompt_cached_tokens",
+              "cache_creation_input_tokens" => "prompt_cache_creation_tokens"
+            }
+
+            usage_hash.each do |key, value|
+              next unless value.is_a?(Numeric)
+              key_str = key.to_s
+              target = field_map[key_str]
+              metrics[target] = value.to_i if target
+            end
+
+            # Accumulate cache tokens into prompt_tokens (matching TS/Python SDKs)
+            prompt_tokens = (metrics["prompt_tokens"] || 0) +
+              (metrics["prompt_cached_tokens"] || 0) +
+              (metrics["prompt_cache_creation_tokens"] || 0)
+            metrics["prompt_tokens"] = prompt_tokens if prompt_tokens > 0
+
+            # Calculate total
+            if metrics.key?("prompt_tokens") && metrics.key?("completion_tokens")
+              metrics["tokens"] = metrics["prompt_tokens"] + metrics["completion_tokens"]
+            end
+
+            metrics
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/anthropic/instrumentation/messages.rb b/lib/braintrust/contrib/anthropic/instrumentation/messages.rb
new file mode 100644
index 0000000..5512c07
--- /dev/null
+++ b/lib/braintrust/contrib/anthropic/instrumentation/messages.rb
@@ -0,0 +1,232 @@
+# frozen_string_literal: true
+
+require "opentelemetry/sdk"
+require "json"
+require_relative "../../support/otel"
+require_relative "common"
+require_relative "../../../internal/time"
+
+module Braintrust
+  module Contrib
+    module Anthropic
+      module Instrumentation
+        # Messages instrumentation for Anthropic.
+        # Wraps create() and stream() methods to create spans.
+        module Messages
+          def self.included(base)
+            base.prepend(InstanceMethods) unless applied?(base)
+          end
+
+          def self.applied?(base)
+            base.ancestors.include?(InstanceMethods)
+          end
+
+          module InstanceMethods
+            METADATA_FIELDS = %i[
+              model max_tokens temperature top_p top_k stop_sequences
+              stream tools tool_choice thinking metadata service_tier
+            ].freeze
+
+            # Wrap synchronous messages.create
+            def create(**params)
+              client = instance_variable_get(:@client)
+              tracer = Braintrust::Contrib.tracer_for(client)
+
+              tracer.in_span("anthropic.messages.create") do |span|
+                metadata = build_metadata(params)
+                set_input(span, params)
+
+                response = nil
+                time_to_first_token = Braintrust::Internal::Time.measure do
+                  response = super(**params)
+                end
+
+                set_output(span, response)
+                set_metrics(span, response, time_to_first_token)
+                finalize_metadata(span, metadata, response)
+
+                response
+              end
+            end
+
+            # Wrap streaming messages.stream
+            # Stores context on stream object for span creation during consumption
+            def stream(**params)
+              client = instance_variable_get(:@client)
+              tracer = Braintrust::Contrib.tracer_for(client)
+              metadata = build_metadata(params, stream: true)
+
+              stream_obj = super
+              Braintrust::Contrib::Context.set!(stream_obj,
+                tracer: tracer,
+                params: params,
+                metadata: metadata,
+                messages_instance: self,
+                start_time: Braintrust::Internal::Time.measure)
+              stream_obj
+            end
+
+            private
+
+            def finalize_stream_span(span, stream_obj, metadata, time_to_first_token)
+              if stream_obj.respond_to?(:accumulated_message)
+                begin
+                  msg = stream_obj.accumulated_message
+                  set_output(span, msg)
+                  set_metrics(span, msg, time_to_first_token)
+                  metadata["stop_reason"] = msg.stop_reason if msg.respond_to?(:stop_reason) && msg.stop_reason
+                  metadata["model"] = msg.model if msg.respond_to?(:model) && msg.model
+                rescue => e
+                  Braintrust::Log.debug("Failed to get accumulated message: #{e.message}")
+                end
+              end
+              Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
+            end
+
+            def build_metadata(params, stream: false)
+              metadata = {
+                "provider" => "anthropic",
+                "endpoint" => "/v1/messages"
+              }
+              metadata["stream"] = true if stream
+              METADATA_FIELDS.each do |field|
+                metadata[field.to_s] = params[field] if params.key?(field)
+              end
+              metadata
+            end
+
+            def set_input(span, params)
+              input_messages = []
+
+              if params[:system]
+                system_content = params[:system]
+                if system_content.is_a?(Array)
+                  system_text = system_content.map { |blk|
+                    blk.is_a?(Hash) ? blk[:text] : blk
+                  }.join("\n")
+                  input_messages << {role: "system", content: system_text}
+                else
+                  input_messages << {role: "system", content: system_content}
+                end
+              end
+
+              if params[:messages]
+                messages_array = params[:messages].map(&:to_h)
+                input_messages.concat(messages_array)
+              end
+
+              Support::OTel.set_json_attr(span, "braintrust.input_json", input_messages) if input_messages.any?
+            end
+
+            def set_output(span, response)
+              return unless response.respond_to?(:content) && response.content
+
+              content_array = response.content.map(&:to_h)
+              output = [{
+                role: response.respond_to?(:role) ? response.role : "assistant",
+                content: content_array
+              }]
+              Support::OTel.set_json_attr(span, "braintrust.output_json", output)
+            end
+
+            def set_metrics(span, response, time_to_first_token)
+              metrics = {}
+              if response.respond_to?(:usage) && response.usage
+                metrics = Common.parse_usage_tokens(response.usage)
+              end
+              metrics["time_to_first_token"] = time_to_first_token if time_to_first_token
+              Support::OTel.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty?
+            end
+
+            def finalize_metadata(span, metadata, response)
+              metadata["stop_reason"] = response.stop_reason if response.respond_to?(:stop_reason) && response.stop_reason
+              metadata["stop_sequence"] = response.stop_sequence if response.respond_to?(:stop_sequence) && response.stop_sequence
+              metadata["model"] = response.model if response.respond_to?(:model) && response.model
+              Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
+            end
+          end
+        end
+
+        # MessageStream instrumentation for Anthropic.
+        # Prepended to Anthropic::Helpers::Streaming::MessageStream to create spans on consumption.
+        module MessageStream
+          def self.included(base)
+            base.prepend(InstanceMethods) unless applied?(base)
+          end
+
+          def self.applied?(base)
+            base.ancestors.include?(InstanceMethods)
+          end
+
+          module InstanceMethods
+            def each(&block)
+              ctx = Braintrust::Contrib::Context.from(self)
+              return super unless ctx&.[](:tracer) && !ctx[:consumed]
+
+              trace_consumption(ctx) do
+                super do |*args|
+                  ctx[:time_to_first_token] ||= Braintrust::Internal::Time.measure(ctx[:start_time])
+                  block.call(*args)
+                end
+              end
+            end
+
+            def text
+              ctx = Braintrust::Contrib::Context.from(self)
+              return super unless ctx&.[](:tracer) && !ctx[:consumed]
+
+              original_text_enum = super
+              Enumerator.new do |output|
+                trace_consumption(ctx) do
+                  original_text_enum.each do |text_chunk|
+                    ctx[:time_to_first_token] ||= Braintrust::Internal::Time.measure(ctx[:start_time])
+                    output << text_chunk
+                  end
+                end
+              end
+            end
+
+            def close
+              ctx = Braintrust::Contrib::Context.from(self)
+              if ctx&.[](:tracer) && !ctx[:consumed]
+                # Stream closed without consumption - create minimal span
+                ctx[:consumed] = true
+                tracer = ctx[:tracer]
+                params = ctx[:params]
+                metadata = ctx[:metadata]
+                messages_instance = ctx[:messages_instance]
+
+                tracer.in_span("anthropic.messages.create") do |span|
+                  messages_instance.send(:set_input, span, params)
+                  Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
+                end
+              end
+              super
+            end
+
+            private
+
+            def trace_consumption(ctx)
+              # Mark as consumed to prevent re-entry (accumulated_message calls each internally)
+              ctx[:consumed] = true
+
+              tracer = ctx[:tracer]
+              params = ctx[:params]
+              metadata = ctx[:metadata]
+              messages_instance = ctx[:messages_instance]
+
+              tracer.in_span("anthropic.messages.create") do |span|
+                messages_instance.send(:set_input, span, params)
+                Support::OTel.set_json_attr(span, "braintrust.metadata", metadata)
+
+                yield
+
+                messages_instance.send(:finalize_stream_span, span, self, metadata, ctx[:time_to_first_token])
+              end
+            end
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/anthropic/integration.rb b/lib/braintrust/contrib/anthropic/integration.rb
new file mode 100644
index 0000000..42961a9
--- /dev/null
+++ b/lib/braintrust/contrib/anthropic/integration.rb
@@ -0,0 +1,53 @@
+# frozen_string_literal: true
+
+require_relative "../integration"
+
+module Braintrust
+  module Contrib
+    module Anthropic
+      # Anthropic integration for automatic instrumentation.
+      # Instruments the anthropic gem (https://github.com/anthropics/anthropic-sdk-ruby).
+      class Integration
+        include Braintrust::Contrib::Integration
+
+        MINIMUM_VERSION = "0.3.0"
+
+        GEM_NAMES = ["anthropic"].freeze
+        REQUIRE_PATHS = ["anthropic"].freeze
+
+        # @return [Symbol] Unique identifier for this integration
+        def self.integration_name
+          :anthropic
+        end
+
+        # @return [Array<String>] Gem names this integration supports
+        def self.gem_names
+          GEM_NAMES
+        end
+
+        # @return [Array<String>] Require paths for auto-instrument detection
+        def self.require_paths
+          REQUIRE_PATHS
+        end
+
+        # @return [String] Minimum compatible version
+        def self.minimum_version
+          MINIMUM_VERSION
+        end
+
+        # @return [Boolean] true if anthropic gem is available
+        def self.loaded?
+          defined?(::Anthropic::Client) ? true : false
+        end
+
+        # Lazy-load the patcher only when actually patching.
+        # This keeps the integration stub lightweight.
+        # @return [Array<Class>] The patcher classes
+        def self.patchers
+          require_relative "patcher"
+          [MessagesPatcher]
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/anthropic/patcher.rb b/lib/braintrust/contrib/anthropic/patcher.rb
new file mode 100644
index 0000000..4f02a1f
--- /dev/null
+++ b/lib/braintrust/contrib/anthropic/patcher.rb
@@ -0,0 +1,62 @@
+# frozen_string_literal: true
+
+require_relative "../patcher"
+require_relative "instrumentation/messages"
+
+module Braintrust
+  module Contrib
+    module Anthropic
+      # Patcher for Anthropic messages.
+      # Instruments Anthropic::Messages#create and #stream methods.
+      class MessagesPatcher < Braintrust::Contrib::Patcher
+        class << self
+          def applicable?
+            defined?(::Anthropic::Client)
+          end
+
+          def patched?(**options)
+            target_class = get_singleton_class(options[:target]) || ::Anthropic::Resources::Messages
+            Instrumentation::Messages.applied?(target_class)
+          end
+
+          # Perform the actual patching.
+          # @param options [Hash] Configuration options passed from integration
+          # @option options [Object] :target Optional target instance to patch
+          # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider
+          # @return [void]
+          def perform_patch(**options)
+            return unless applicable?
+
+            # MessageStream is shared across all clients, so patch at class level.
+            # The instrumentation short-circuits when no context is present,
+            # so uninstrumented clients' streams pass through unaffected.
+            patch_message_stream
+
+            if options[:target]
+              # Instance-level (for only this client instance)
+              raise ArgumentError, "target must be a kind of ::Anthropic::Client" unless options[:target].is_a?(::Anthropic::Client)
+
+              get_singleton_class(options[:target]).include(Instrumentation::Messages)
+            else
+              # Class-level (for all client instances)
+              ::Anthropic::Resources::Messages.include(Instrumentation::Messages)
+            end
+          end
+
+          private
+
+          def get_singleton_class(client)
+            client&.messages&.singleton_class
+          end
+
+          def patch_message_stream
+            return unless defined?(::Anthropic::Helpers::Streaming::MessageStream)
+            return if Instrumentation::MessageStream.applied?(::Anthropic::Helpers::Streaming::MessageStream)
+
+            ::Anthropic::Helpers::Streaming::MessageStream.include(Instrumentation::MessageStream)
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/trace.rb b/lib/braintrust/trace.rb
index b87d804..578f194 100644
--- a/lib/braintrust/trace.rb
+++ b/lib/braintrust/trace.rb
@@ -6,14 +6,6 @@
 require_relative "trace/span_filter"
 require_relative "logger"
 
-# Anthropic integration is optional - automatically loaded if anthropic gem is available
-begin
-  require "anthropic"
-  require_relative "trace/contrib/anthropic"
-rescue LoadError
-  # Anthropic gem not installed - integration will not be available
-end
-
 module Braintrust
   module Trace
     # Set up OpenTelemetry tracing with Braintrust
diff --git a/lib/braintrust/trace/contrib/anthropic.rb b/lib/braintrust/trace/contrib/anthropic.rb
deleted file mode 100644
index 533eb8a..0000000
--- a/lib/braintrust/trace/contrib/anthropic.rb
+++ /dev/null
@@ -1,316 +0,0 @@
-# frozen_string_literal: true
-
-require "opentelemetry/sdk"
-require "json"
-require_relative "../tokens"
-
-module Braintrust
-  module Trace
-    module Anthropic
-      # Helper to safely set a JSON attribute on a span
-      # Only sets the attribute if obj is present
-      # @param span [OpenTelemetry::Trace::Span] the span to set attribute on
-      # @param attr_name [String] the attribute name (e.g., "braintrust.output_json")
-      # @param obj [Object] the object to serialize to JSON
-      # @return [void]
-      def self.set_json_attr(span, attr_name, obj)
-        return unless obj
-        span.set_attribute(attr_name, JSON.generate(obj))
-      end
-
-      # Parse usage tokens from Anthropic API response
-      # @param usage [Hash, Object] usage object from Anthropic response
-      # @return [Hash<String, Integer>] metrics hash with normalized names
-      def self.parse_usage_tokens(usage)
-        Braintrust::Trace.parse_anthropic_usage_tokens(usage)
-      end
-
-      # Wrap an Anthropic::Client to automatically create spans for messages and responses
-      # Supports both synchronous and streaming requests
-      # @param client [Anthropic::Client] the Anthropic client to wrap
-      # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider (defaults to global)
-      def self.wrap(client, tracer_provider: nil)
-        tracer_provider ||= ::OpenTelemetry.tracer_provider
-
-        # Wrap messages.create
-        wrap_messages_create(client, tracer_provider)
-
-        # Wrap messages.stream (Anthropic SDK always has this method)
-        wrap_messages_stream(client, tracer_provider)
-
-        client
-      end
-
-      # Wrap messages.create API
-      # @param client [Anthropic::Client] the Anthropic client
-      # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
-      def self.wrap_messages_create(client, tracer_provider)
-        # Create a wrapper module that intercepts messages.create
-        wrapper = Module.new do
-          define_method(:create) do |**params|
-            tracer = tracer_provider.tracer("braintrust")
-
-            tracer.in_span("anthropic.messages.create") do |span|
-              # Initialize metadata hash
-              metadata = {
-                "provider" => "anthropic",
-                "endpoint" => "/v1/messages"
-              }
-
-              # Capture request metadata fields
-              metadata_fields = %i[
-                model max_tokens temperature top_p top_k stop_sequences
-                stream tools tool_choice thinking metadata service_tier
-              ]
-
-              metadata_fields.each do |field|
-                metadata[field.to_s] = params[field] if params.key?(field)
-              end
-
-              # Build input messages array, prepending system prompt if present
-              input_messages = []
-
-              # Prepend system prompt as a message if present
-              if params[:system]
-                # System can be a string or array of text blocks
-                system_content = params[:system]
-                if system_content.is_a?(Array)
-                  # Extract text from array of text blocks
-                  system_text = system_content.map { |block|
-                    block.is_a?(Hash) ? block[:text] : block
-                  }.join("\n")
-                  input_messages << {role: "system", content: system_text}
-                else
-                  input_messages << {role: "system", content: system_content}
-                end
-              end
-
-              # Add user/assistant messages
-              if params[:messages]
-                messages_array = params[:messages].map(&:to_h)
-                input_messages.concat(messages_array)
-              end
-
-              # Set input messages as JSON
-              if input_messages.any?
-                span.set_attribute("braintrust.input_json", JSON.generate(input_messages))
-              end
-
-              # Call the original method
-              response = super(**params)
-
-              # Format output as array of messages (same format as input)
-              if response.respond_to?(:content) && response.content
-                content_array = response.content.map(&:to_h)
-                output = [{
-                  role: response.respond_to?(:role) ? response.role : "assistant",
-                  content: content_array
-                }]
-                span.set_attribute("braintrust.output_json", JSON.generate(output))
-              end
-
-              # Set metrics (token usage with Anthropic-specific cache tokens)
-              if response.respond_to?(:usage) && response.usage
-                metrics = Braintrust::Trace::Anthropic.parse_usage_tokens(response.usage)
-                span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty?
-              end
-
-              # Add response metadata fields
-              if response.respond_to?(:stop_reason) && response.stop_reason
-                metadata["stop_reason"] = response.stop_reason
-              end
-              if response.respond_to?(:stop_sequence) && response.stop_sequence
-                metadata["stop_sequence"] = response.stop_sequence
-              end
-              # Update model if present in response (in case it was resolved from "latest")
-              if response.respond_to?(:model) && response.model
-                metadata["model"] = response.model
-              end
-
-              # Set metadata ONCE at the end with complete hash
-              span.set_attribute("braintrust.metadata", JSON.generate(metadata))
-
-              response
-            end
-          end
-        end
-
-        # Prepend the wrapper to the messages resource
-        client.messages.singleton_class.prepend(wrapper)
-      end
-
-      # Wrap messages.stream API
-      # @param client [Anthropic::Client] the Anthropic client
-      # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
-      def self.wrap_messages_stream(client, tracer_provider)
-        # Create a wrapper module that intercepts messages.stream
-        wrapper = Module.new do
-          define_method(:stream) do |**params, &block|
-            tracer = tracer_provider.tracer("braintrust")
-
-            metadata = {
-              "provider" => "anthropic",
-              "endpoint" => "/v1/messages",
-              "stream" => true
-            }
-
-            # Start span with proper context
-            span = tracer.start_span("anthropic.messages.create")
-
-            # Capture request metadata fields
-            metadata_fields = %i[
-              model max_tokens temperature top_p top_k stop_sequences
-              tools tool_choice thinking metadata service_tier
-            ]
-
-            metadata_fields.each do |field|
-              metadata[field.to_s] = params[field] if params.key?(field)
-            end
-
-            # Build input messages array, prepending system prompt if present
-            input_messages = []
-
-            if params[:system]
-              system_content = params[:system]
-              if system_content.is_a?(Array)
-                system_text = system_content.map { |block|
-                  block.is_a?(Hash) ? block[:text] : block
-                }.join("\n")
-                input_messages << {role: "system", content: system_text}
-              else
-                input_messages << {role: "system", content: system_content}
-              end
-            end
-
-            if params[:messages]
-              messages_array = params[:messages].map(&:to_h)
-              input_messages.concat(messages_array)
-            end
-
-            if input_messages.any?
-              span.set_attribute("braintrust.input_json", JSON.generate(input_messages))
-            end
-
-            # Set initial metadata
-            span.set_attribute("braintrust.metadata", JSON.generate(metadata))
-
-            # Call the original stream method WITHOUT passing the block
-            # We'll handle the block ourselves to aggregate events
-            begin
-              stream = super(**params)
-            rescue => e
-              span.record_exception(e)
-              span.status = ::OpenTelemetry::Trace::Status.error("Anthropic API error: #{e.message}")
-              span.finish
-              raise
-            end
-
-            # Store references on the stream object itself for the wrapper
-            stream.instance_variable_set(:@braintrust_span, span)
-            stream.instance_variable_set(:@braintrust_metadata, metadata)
-            stream.instance_variable_set(:@braintrust_span_finished, false)
-
-            # Local helper for brevity
-            set_json_attr = ->(attr_name, obj) { Braintrust::Trace::Anthropic.set_json_attr(span, attr_name, obj) }
-
-            # Helper lambda to extract stream data and set span attributes
-            # This is DRY - used by both .each() and .text() wrappers
-            extract_stream_metadata = lambda do
-              # Extract the SDK's internal accumulated message (built during streaming)
-              acc_msg = stream.instance_variable_get(:@accumated_message_snapshot)
-              return unless acc_msg
-
-              # Set output from accumulated message
-              if acc_msg.respond_to?(:content) && acc_msg.content
-                content_array = acc_msg.content.map(&:to_h)
-                output = [{
-                  role: acc_msg.respond_to?(:role) ? acc_msg.role : "assistant",
-                  content: content_array
-                }]
-                set_json_attr.call("braintrust.output_json", output)
-              end
-
-              # Set metrics from accumulated message
-              if acc_msg.respond_to?(:usage) && acc_msg.usage
-                metrics = Braintrust::Trace::Anthropic.parse_usage_tokens(acc_msg.usage)
-                set_json_attr.call("braintrust.metrics", metrics) unless metrics.empty?
-              end
-
-              # Update metadata with response fields
-              if acc_msg.respond_to?(:stop_reason) && acc_msg.stop_reason
-                metadata["stop_reason"] = acc_msg.stop_reason
-              end
-              if acc_msg.respond_to?(:model) && acc_msg.model
-                metadata["model"] = acc_msg.model
-              end
-              set_json_attr.call("braintrust.metadata", metadata)
-            end
-
-            # Helper lambda to finish span (prevents double-finishing via closure)
-            finish_braintrust_span = lambda do
-              return if stream.instance_variable_get(:@braintrust_span_finished)
-              stream.instance_variable_set(:@braintrust_span_finished, true)
-
-              extract_stream_metadata.call
-              span.finish
-            end
-
-            # Wrap .each() to ensure span finishes after consumption
-            original_each = stream.method(:each)
-            stream.define_singleton_method(:each) do |&user_block|
-              # Consume stream, calling user's block for each event
-              # The SDK builds @accumated_message_snapshot internally
-              original_each.call(&user_block)
-            rescue => e
-              span.record_exception(e)
-              span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}")
-              raise
-            ensure
-              # Extract accumulated message and finish span
-              finish_braintrust_span.call
-            end
-
-            # Wrap .text() to return an Enumerable that ensures span finishes
-            original_text = stream.method(:text)
-            stream.define_singleton_method(:text) do
-              text_enum = original_text.call
-
-              # Return wrapper Enumerable that finishes span after consumption
-              Enumerator.new do |y|
-                # Consume text enumerable (this consumes underlying stream)
-                # The SDK builds @accumated_message_snapshot internally
-                text_enum.each { |text| y << text }
-              rescue => e
-                span.record_exception(e)
-                span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}")
-                raise
-              ensure
-                # Extract accumulated message and finish span
-                finish_braintrust_span.call
-              end
-            end
-
-            # Wrap .close() to ensure span finishes even if stream not consumed
-            original_close = stream.method(:close)
-            stream.define_singleton_method(:close) do
-              original_close.call
-            ensure
-              # Finish span even if stream was closed early
-              finish_braintrust_span.call
-            end
-
-            # If a block was provided to stream(), call each with it immediately
-            if block
-              stream.each(&block)
-            end
-
-            stream
-          end
-        end
-
-        # Prepend the wrapper to the messages resource
-        client.messages.singleton_class.prepend(wrapper)
-      end
-    end
-  end
-end
diff --git a/lib/braintrust/trace/tokens.rb b/lib/braintrust/trace/tokens.rb
deleted file mode 100644
index 679df12..0000000
--- a/lib/braintrust/trace/tokens.rb
+++ /dev/null
@@ -1,49 +0,0 @@
-# frozen_string_literal: true
-
-module Braintrust
-  module Trace
-    # Parse Anthropic usage tokens into normalized Braintrust metrics.
-    # Accumulates cache tokens into prompt_tokens and calculates total.
-    # @param usage [Hash, Object] usage object from Anthropic response
-    # @return [Hash<String, Integer>] normalized metrics
-    def self.parse_anthropic_usage_tokens(usage)
-      metrics = {}
-      return metrics unless usage
-
-      usage_hash = usage.respond_to?(:to_h) ? usage.to_h : usage
-      return metrics unless usage_hash.is_a?(Hash)
-
-      # Field mappings: Anthropic → Braintrust
-      # Also handles RubyLLM's simplified cache field names
-      field_map = {
-        "input_tokens" => "prompt_tokens",
-        "output_tokens" => "completion_tokens",
-        "cache_read_input_tokens" => "prompt_cached_tokens",
-        "cache_creation_input_tokens" => "prompt_cache_creation_tokens",
-        # RubyLLM uses simplified names
-        "cached_tokens" => "prompt_cached_tokens",
-        "cache_creation_tokens" => "prompt_cache_creation_tokens"
-      }
-
-      usage_hash.each do |key, value|
-        next unless value.is_a?(Numeric)
-        key_str = key.to_s
-        target = field_map[key_str]
-        metrics[target] = value.to_i if target
-      end
-
-      # Accumulate cache tokens into prompt_tokens (matching TS/Python SDKs)
-      prompt_tokens = (metrics["prompt_tokens"] || 0) +
-        (metrics["prompt_cached_tokens"] || 0) +
-        (metrics["prompt_cache_creation_tokens"] || 0)
-      metrics["prompt_tokens"] = prompt_tokens if prompt_tokens > 0
-
-      # Calculate total
-      if metrics.key?("prompt_tokens") && metrics.key?("completion_tokens")
-        metrics["tokens"] = metrics["prompt_tokens"] + metrics["completion_tokens"]
-      end
-
-      metrics
-    end
-  end
-end
diff --git a/test/braintrust/contrib/anthropic/deprecated_test.rb b/test/braintrust/contrib/anthropic/deprecated_test.rb
new file mode 100644
index 0000000..9fda545
--- /dev/null
+++ b/test/braintrust/contrib/anthropic/deprecated_test.rb
@@ -0,0 +1,58 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require "braintrust/contrib/anthropic/integration"
+require "braintrust/contrib/anthropic/deprecated"
+
+class Braintrust::Contrib::Anthropic::DeprecatedTest < Minitest::Test
+  # --- .wrap ---
+
+  def test_wrap_delegates_to_instrument
+    mock_client = Object.new
+    mock_tracer = Object.new
+
+    captured_args = nil
+    Braintrust.stub(:instrument!, ->(name, **opts) { captured_args = [name, opts] }) do
+      suppress_logs { Braintrust::Trace::Anthropic.wrap(mock_client, tracer_provider: mock_tracer) }
+    end
+
+    assert_equal :anthropic, captured_args[0]
+    assert_same mock_client, captured_args[1][:target]
+    assert_same mock_tracer, captured_args[1][:tracer_provider]
+  end
+
+  def test_wrap_logs_deprecation_warning
+    mock_client = Object.new
+
+    warning_message = nil
+    Braintrust.stub(:instrument!, ->(*) {}) do
+      Braintrust::Log.stub(:warn, ->(msg) { warning_message = msg }) do
+        Braintrust::Trace::Anthropic.wrap(mock_client)
+      end
+    end
+
+    assert_match(/deprecated/, warning_message)
+    assert_match(/Braintrust\.instrument!/, warning_message)
+  end
+
+  def test_wrap_returns_client
+    mock_client = Object.new
+
+    Braintrust.stub(:instrument!, ->(*) {}) do
+      result = suppress_logs { Braintrust::Trace::Anthropic.wrap(mock_client) }
+      assert_same mock_client, result
+    end
+  end
+
+  def test_wrap_uses_default_tracer_when_not_provided
+    mock_client = Object.new
+
+    captured_args = nil
+    Braintrust.stub(:instrument!, ->(name, **opts) { captured_args = [name, opts] }) do
+      suppress_logs { Braintrust::Trace::Anthropic.wrap(mock_client) }
+    end
+
+    assert_equal :anthropic, captured_args[0]
+    assert_nil captured_args[1][:tracer_provider]
+  end
+end
diff --git a/test/braintrust/contrib/anthropic/instrumentation/common_test.rb b/test/braintrust/contrib/anthropic/instrumentation/common_test.rb
new file mode 100644
index 0000000..a530424
--- /dev/null
+++ b/test/braintrust/contrib/anthropic/instrumentation/common_test.rb
@@ -0,0 +1,102 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require "braintrust/contrib/anthropic/instrumentation/common"
+
+class Braintrust::Contrib::Anthropic::Instrumentation::CommonTest < Minitest::Test
+  Common = Braintrust::Contrib::Anthropic::Instrumentation::Common
+
+  # ===================
+  # parse_usage_tokens
+  # ===================
+
+  def test_maps_standard_fields
+    usage = {"input_tokens" => 10, "output_tokens" => 20}
+
+    metrics = Common.parse_usage_tokens(usage)
+
+    assert_equal 10, metrics["prompt_tokens"]
+    assert_equal 20, metrics["completion_tokens"]
+    assert_equal 30, metrics["tokens"]
+  end
+
+  def test_handles_symbol_keys
+    usage = {input_tokens: 10, output_tokens: 20}
+
+    metrics = Common.parse_usage_tokens(usage)
+
+    assert_equal 10, metrics["prompt_tokens"]
+    assert_equal 20, metrics["completion_tokens"]
+  end
+
+  def test_handles_cache_read_tokens
+    usage = {
+      "input_tokens" => 100,
+      "output_tokens" => 50,
+      "cache_read_input_tokens" => 80
+    }
+
+    metrics = Common.parse_usage_tokens(usage)
+
+    assert_equal 80, metrics["prompt_cached_tokens"]
+    # prompt_tokens should include cache tokens
+    assert_equal 180, metrics["prompt_tokens"]
+  end
+
+  def test_handles_cache_creation_tokens
+    usage = {
+      "input_tokens" => 100,
+      "output_tokens" => 50,
+      "cache_creation_input_tokens" => 20
+    }
+
+    metrics = Common.parse_usage_tokens(usage)
+
+    assert_equal 20, metrics["prompt_cache_creation_tokens"]
+    # prompt_tokens should include cache creation tokens
+    assert_equal 120, metrics["prompt_tokens"]
+  end
+
+  def test_handles_object_with_to_h
+    # SDK returns objects with to_h method
+    usage_object = Struct.new(:input_tokens, :output_tokens, keyword_init: true)
+    usage = usage_object.new(input_tokens: 10, output_tokens: 20)
+
+    metrics = Common.parse_usage_tokens(usage)
+
+    assert_equal 10, metrics["prompt_tokens"]
+    assert_equal 20, metrics["completion_tokens"]
+  end
+
+  def test_returns_empty_hash_for_nil
+    assert_equal({}, Common.parse_usage_tokens(nil))
+  end
+
+  def test_returns_empty_hash_for_non_hash
+    assert_equal({}, Common.parse_usage_tokens("invalid"))
+  end
+
+  def test_calculates_total
+    usage = {"input_tokens" => 10, "output_tokens" => 20}
+
+    metrics = Common.parse_usage_tokens(usage)
+
+    assert_equal 30, metrics["tokens"]
+  end
+
+  def test_total_includes_cache_tokens
+    usage = {
+      "input_tokens" => 100,
+      "output_tokens" => 50,
+      "cache_read_input_tokens" => 80,
+      "cache_creation_input_tokens" => 20
+    }
+
+    metrics = Common.parse_usage_tokens(usage)
+
+    # Total = prompt_tokens (which includes cache) + completion_tokens
+    # prompt_tokens = 100 + 80 + 20 = 200
+    # tokens = 200 + 50 = 250
+    assert_equal 250, metrics["tokens"]
+  end
+end
diff --git a/test/braintrust/trace/anthropic_test.rb b/test/braintrust/contrib/anthropic/instrumentation/messages_test.rb
similarity index 79%
rename from test/braintrust/trace/anthropic_test.rb
rename to test/braintrust/contrib/anthropic/instrumentation/messages_test.rb
index 918f010..d9b00b7 100644
--- a/test/braintrust/trace/anthropic_test.rb
+++ b/test/braintrust/contrib/anthropic/instrumentation/messages_test.rb
@@ -1,23 +1,26 @@
 # frozen_string_literal: true
 
 require "test_helper"
+require_relative "../integration_helper"
+
+# Explicitly load the patcher (lazy-loaded by integration)
+require "braintrust/contrib/anthropic/patcher"
+
+class Braintrust::Contrib::Anthropic::Instrumentation::MessagesTest < Minitest::Test
+  include Braintrust::Contrib::Anthropic::IntegrationHelper
 
-class Braintrust::Trace::AnthropicTest < Minitest::Test
   def setup
-    # Skip all Anthropic tests if the gem is not available
-    skip "Anthropic gem not available" unless defined?(Anthropic)
+    skip_unless_anthropic!
   end
 
-  def test_wrap_creates_span_for_basic_message
+  def test_creates_span_for_basic_message
     VCR.use_cassette("anthropic/basic_message") do
-      require "anthropic"
-
       # Set up test rig (includes Braintrust processor)
       rig = setup_otel_test_rig
 
-      # Create Anthropic client and wrap it with Braintrust tracing
+      # Create Anthropic client and instrument it
       client = Anthropic::Client.new(api_key: get_anthropic_key)
-      Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider)
+      Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider)
 
       # Make a simple message request
       message = client.messages.create(
@@ -68,19 +71,60 @@ def test_wrap_creates_span_for_basic_message
       assert metrics["completion_tokens"] > 0
       assert metrics["tokens"] > 0
       assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"]
+
+      # Verify time_to_first_token is present and non-negative
+      assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric"
+      assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be non-negative"
     end
   end
 
-  def test_wrap_handles_system_prompt
-    VCR.use_cassette("anthropic/system_prompt") do
-      require "anthropic"
+  def test_creates_span_with_class_level_patching
+    VCR.use_cassette("anthropic/basic_message") do
+      # Set up test rig
+      rig = setup_otel_test_rig
+
+      # For class-level patching, set the default tracer provider via Braintrust.init
+      # (instance-level patching uses target: which stores tracer_provider in context)
+      Braintrust.init(tracer_provider: rig.tracer_provider)
+
+      # Instrument at class level (no target:) - patches Anthropic::Resources::Messages
+      Braintrust.instrument!(:anthropic)
 
+      # Create client AFTER class-level instrumentation
+      client = Anthropic::Client.new(api_key: get_anthropic_key)
+
+      # Make a simple message request
+      message = client.messages.create(
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 10,
+        messages: [
+          {role: "user", content: "Say 'test'"}
+        ]
+      )
+
+      # Verify response
+      refute_nil message
+
+      # Drain and verify span was created
+      span = rig.drain_one
+
+      # Verify span name and key attributes
+      assert_equal "anthropic.messages.create", span.name
+      assert span.attributes.key?("braintrust.input_json")
+      assert span.attributes.key?("braintrust.output_json")
+      assert span.attributes.key?("braintrust.metadata")
+      assert span.attributes.key?("braintrust.metrics")
+    end
+  end
+
+  def test_handles_system_prompt
+    VCR.use_cassette("anthropic/system_prompt") do
       # Set up test rig
       rig = setup_otel_test_rig
 
-      # Create Anthropic client and wrap it
-      client = Anthropic::Client.new(api_key: @api_key)
-      Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider)
+      # Create Anthropic client and instrument it
+      client = Anthropic::Client.new(api_key: get_anthropic_key)
+      Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider)
 
       # Make a request with system prompt
       message = client.messages.create(
@@ -122,16 +166,14 @@ def test_wrap_handles_system_prompt
     end
   end
 
-  def test_wrap_handles_tool_use
+  def test_handles_tool_use
     VCR.use_cassette("anthropic/tool_use") do
-      require "anthropic"
-
       # Set up test rig
       rig = setup_otel_test_rig
 
-      # Create Anthropic client and wrap it
-      client = Anthropic::Client.new(api_key: @api_key)
-      Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider)
+      # Create Anthropic client and instrument it
+      client = Anthropic::Client.new(api_key: get_anthropic_key)
+      Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider)
 
       # Make a request with tools
       message = client.messages.create(
@@ -191,16 +233,14 @@ def test_wrap_handles_tool_use
     end
   end
 
-  def test_wrap_handles_streaming
+  def test_handles_streaming
     VCR.use_cassette("anthropic/streaming") do
-      require "anthropic"
-
       # Set up test rig
       rig = setup_otel_test_rig
 
-      # Create Anthropic client and wrap it
-      client = Anthropic::Client.new(api_key: @api_key)
-      Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider)
+      # Create Anthropic client and instrument it
+      client = Anthropic::Client.new(api_key: get_anthropic_key)
+      Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider)
 
       # Make a streaming request
       stream = client.messages.stream(
@@ -216,13 +256,12 @@ def test_wrap_handles_streaming
         # Just consume events
       end
 
-      # Drain and verify span was created
+      # Single span created during consumption
       span = rig.drain_one
 
-      # Verify span name
       assert_equal "anthropic.messages.create", span.name
 
-      # Verify input captured
+      # Verify input captured on span
       assert span.attributes.key?("braintrust.input_json")
       input = JSON.parse(span.attributes["braintrust.input_json"])
       assert_equal 1, input.length
@@ -232,24 +271,17 @@ def test_wrap_handles_streaming
       assert span.attributes.key?("braintrust.metadata")
       metadata = JSON.parse(span.attributes["braintrust.metadata"])
       assert_equal true, metadata["stream"]
-
-      # Note: Full output aggregation testing requires live API calls
-      # VCR doesn't perfectly replay streaming SSE responses
-      # The streaming wrapper is implemented and works with real API calls
     end
   end
 
-  def test_wrap_handles_streaming_output_aggregation
-    # This test demonstrates the bug: streaming output is not aggregated
+  def test_handles_streaming_output_aggregation
     VCR.use_cassette("anthropic/streaming_aggregation") do
-      require "anthropic"
-
       # Set up test rig
       rig = setup_otel_test_rig
 
-      # Create Anthropic client and wrap it
-      client = Anthropic::Client.new(api_key: @api_key)
-      Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider)
+      # Create Anthropic client and instrument it
+      client = Anthropic::Client.new(api_key: get_anthropic_key)
+      Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider)
 
       # Make a streaming request
       collected_text = ""
@@ -271,10 +303,9 @@ def test_wrap_handles_streaming_output_aggregation
       # Verify we got some text
       refute_empty collected_text, "Should have received text from stream"
 
-      # Drain and verify span
+      # Single span created during consumption
       span = rig.drain_one
 
-      # Verify span name
       assert_equal "anthropic.messages.create", span.name
 
       # CRITICAL: Verify output was aggregated
@@ -303,19 +334,21 @@ def test_wrap_handles_streaming_output_aggregation
       assert metrics["completion_tokens"] > 0, "Completion tokens should be greater than 0"
       assert metrics["tokens"], "Should have total tokens"
       assert metrics["tokens"] > 0, "Total tokens should be greater than 0"
+
+      # Verify time_to_first_token is present and non-negative
+      assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric"
+      assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be non-negative"
     end
   end
 
-  def test_wrap_handles_vision_with_base64
+  def test_handles_vision_with_base64
     VCR.use_cassette("anthropic/vision_base64") do
-      require "anthropic"
-
       # Set up test rig
       rig = setup_otel_test_rig
 
-      # Create Anthropic client and wrap it
-      client = Anthropic::Client.new(api_key: @api_key)
-      Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider)
+      # Create Anthropic client and instrument it
+      client = Anthropic::Client.new(api_key: get_anthropic_key)
+      Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider)
 
       # Small 1x1 red pixel PNG as base64
       test_image = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="
@@ -369,16 +402,14 @@ def test_wrap_handles_vision_with_base64
     end
   end
 
-  def test_wrap_handles_reasoning_thinking_blocks
+  def test_handles_reasoning_thinking_blocks
     VCR.use_cassette("anthropic/reasoning") do
-      require "anthropic"
-
       # Set up test rig
       rig = setup_otel_test_rig
 
-      # Create Anthropic client and wrap it
-      client = Anthropic::Client.new(api_key: @api_key)
-      Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider)
+      # Create Anthropic client and instrument it
+      client = Anthropic::Client.new(api_key: get_anthropic_key)
+      Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider)
 
       # Make a request with reasoning enabled
       message = client.messages.create(
@@ -429,16 +460,14 @@ def test_wrap_handles_reasoning_thinking_blocks
     end
   end
 
-  def test_wrap_handles_multi_turn_conversation
+  def test_handles_multi_turn_conversation
     VCR.use_cassette("anthropic/multi_turn") do
-      require "anthropic"
-
       # Set up test rig
       rig = setup_otel_test_rig
 
-      # Create Anthropic client and wrap it
-      client = Anthropic::Client.new(api_key: @api_key)
-      Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider)
+      # Create Anthropic client and instrument it
+      client = Anthropic::Client.new(api_key: get_anthropic_key)
+      Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider)
 
       # Make a multi-turn conversation request
       message = client.messages.create(
@@ -483,16 +512,14 @@ def test_wrap_handles_multi_turn_conversation
     end
   end
 
-  def test_wrap_handles_temperature_and_stop_sequences
+  def test_handles_temperature_and_stop_sequences
     VCR.use_cassette("anthropic/temperature_stop") do
-      require "anthropic"
-
       # Set up test rig
       rig = setup_otel_test_rig
 
-      # Create Anthropic client and wrap it
-      client = Anthropic::Client.new(api_key: @api_key)
-      Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider)
+      # Create Anthropic client and instrument it
+      client = Anthropic::Client.new(api_key: get_anthropic_key)
+      Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider)
 
       # Make a request with temperature and stop sequences
       message = client.messages.create(
@@ -533,16 +560,14 @@ def test_wrap_handles_temperature_and_stop_sequences
     end
   end
 
-  def test_wrap_handles_tool_use_multi_turn
+  def test_handles_tool_use_multi_turn
     VCR.use_cassette("anthropic/tool_use_multi_turn") do
-      require "anthropic"
-
       # Set up test rig
       rig = setup_otel_test_rig
 
-      # Create Anthropic client and wrap it
-      client = Anthropic::Client.new(api_key: @api_key)
-      Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider)
+      # Create Anthropic client and instrument it
+      client = Anthropic::Client.new(api_key: get_anthropic_key)
+      Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider)
 
       # First request - model should use tool
       first_message = client.messages.create(
@@ -652,16 +677,14 @@ def test_wrap_handles_tool_use_multi_turn
     end
   end
 
-  def test_wrap_handles_streaming_with_text_each
+  def test_handles_streaming_with_text_each
     VCR.use_cassette("anthropic/streaming_text_each") do
-      require "anthropic"
-
       # Set up test rig
       rig = setup_otel_test_rig
 
-      # Create Anthropic client and wrap it
-      client = Anthropic::Client.new(api_key: @api_key)
-      Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider)
+      # Create Anthropic client and instrument it
+      client = Anthropic::Client.new(api_key: get_anthropic_key)
+      Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider)
 
       # Make a streaming request using .text.each
       collected_text = ""
@@ -687,10 +710,9 @@ def test_wrap_handles_streaming_with_text_each
       assert_match(/2/, collected_text, "Should contain 2")
       assert_match(/3/, collected_text, "Should contain 3")
 
-      # Drain and verify span
+      # Single span created during consumption
       span = rig.drain_one
 
-      # Verify span name
       assert_equal "anthropic.messages.create", span.name
 
       # CRITICAL: Verify output was aggregated
@@ -715,19 +737,21 @@ def test_wrap_handles_streaming_with_text_each
       assert metrics["prompt_tokens"] > 0, "Prompt tokens should be greater than 0"
       assert metrics["completion_tokens"] > 0, "Completion tokens should be greater than 0"
       assert metrics["tokens"] > 0, "Total tokens should be greater than 0"
+
+      # Verify time_to_first_token is present and non-negative
+      assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric"
+      assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be non-negative"
     end
   end
 
-  def test_wrap_handles_streaming_with_accumulated_text
+  def test_handles_streaming_with_accumulated_text
     VCR.use_cassette("anthropic/streaming_accumulated_text") do
-      require "anthropic"
-
       # Set up test rig
       rig = setup_otel_test_rig
 
-      # Create Anthropic client and wrap it
-      client = Anthropic::Client.new(api_key: @api_key)
-      Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider)
+      # Create Anthropic client and instrument it
+      client = Anthropic::Client.new(api_key: get_anthropic_key)
+      Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider)
 
       # Make a streaming request using .accumulated_text
       stream = client.messages.stream(
@@ -750,10 +774,9 @@ def test_wrap_handles_streaming_with_accumulated_text
       assert_match(/Hello/, accumulated_text, "Should contain greeting")
       assert_match(/help/, accumulated_text, "Should offer help")
 
-      # Drain and verify span
+      # Single span created during consumption
       span = rig.drain_one
 
-      # Verify span name
       assert_equal "anthropic.messages.create", span.name
 
       # CRITICAL: Verify output was aggregated
@@ -774,19 +797,21 @@ def test_wrap_handles_streaming_with_accumulated_text
       assert metrics["prompt_tokens"] > 0, "Prompt tokens should be greater than 0"
       assert metrics["completion_tokens"] > 0, "Completion tokens should be greater than 0"
       assert metrics["tokens"] > 0, "Total tokens should be greater than 0"
+
+      # Verify time_to_first_token is present and non-negative
+      assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric"
+      assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be non-negative"
     end
   end
 
-  def test_wrap_handles_streaming_with_accumulated_message
+  def test_handles_streaming_with_accumulated_message
     VCR.use_cassette("anthropic/streaming_accumulated_message") do
-      require "anthropic"
-
       # Set up test rig
       rig = setup_otel_test_rig
 
-      # Create Anthropic client and wrap it
-      client = Anthropic::Client.new(api_key: @api_key)
-      Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider)
+      # Create Anthropic client and instrument it
+      client = Anthropic::Client.new(api_key: get_anthropic_key)
+      Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider)
 
       # Make a streaming request using .accumulated_message
       stream = client.messages.stream(
@@ -805,10 +830,9 @@ def test_wrap_handles_streaming_with_accumulated_message
       refute_nil message.content, "Message should have content"
       refute_empty message.content, "Message content should not be empty"
 
-      # Drain and verify span
+      # Single span created during consumption
       span = rig.drain_one
 
-      # Verify span name
       assert_equal "anthropic.messages.create", span.name
 
       # CRITICAL: Verify output was aggregated
@@ -827,19 +851,21 @@ def test_wrap_handles_streaming_with_accumulated_message
       assert metrics["prompt_tokens"] > 0, "Prompt tokens should be greater than 0"
       assert metrics["completion_tokens"], "Should have completion_tokens"
       assert metrics["completion_tokens"] > 0, "Completion tokens should be greater than 0"
+
+      # Verify time_to_first_token is present and non-negative
+      assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric"
+      assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be non-negative"
     end
   end
 
-  def test_wrap_handles_streaming_with_close
+  def test_handles_streaming_with_close
     VCR.use_cassette("anthropic/streaming_close") do
-      require "anthropic"
-
       # Set up test rig
       rig = setup_otel_test_rig
 
-      # Create Anthropic client and wrap it
-      client = Anthropic::Client.new(api_key: @api_key)
-      Braintrust::Trace::Anthropic.wrap(client, tracer_provider: rig.tracer_provider)
+      # Create Anthropic client and instrument it
+      client = Anthropic::Client.new(api_key: get_anthropic_key)
+      Braintrust.instrument!(:anthropic, target: client, tracer_provider: rig.tracer_provider)
 
       # Make a streaming request and close early without consuming
       stream = client.messages.stream(
@@ -853,13 +879,12 @@ def test_wrap_handles_streaming_with_close
       # Close the stream early (before consuming)
       stream.close
 
-      # Drain and verify span was finished
+      # Single span created on close
       span = rig.drain_one
 
-      # Verify span name
       assert_equal "anthropic.messages.create", span.name
 
-      # Verify input was captured
+      # Verify input was captured on span
       assert span.attributes.key?("braintrust.input_json")
       input = JSON.parse(span.attributes["braintrust.input_json"])
       assert_equal 1, input.length
diff --git a/test/braintrust/contrib/anthropic/instrumentation_test.rb b/test/braintrust/contrib/anthropic/instrumentation_test.rb
new file mode 100644
index 0000000..2932d1c
--- /dev/null
+++ b/test/braintrust/contrib/anthropic/instrumentation_test.rb
@@ -0,0 +1,111 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require_relative "integration_helper"
+
+# Explicitly load the patcher (lazy-loaded by integration)
+require "braintrust/contrib/anthropic/patcher"
+require "braintrust/contrib/anthropic/deprecated"
+
+# Tests for instance-level instrumentation behavior and backward compatibility
+class Braintrust::Contrib::Anthropic::InstrumentationTest < Minitest::Test
+  include Braintrust::Contrib::Anthropic::IntegrationHelper
+
+  def setup
+    skip_unless_anthropic!
+  end
+
+  # --- Instance-level instrumentation ---
+
+  def test_instance_instrumentation_only_patches_target_client
+    # Skip if class-level patching already occurred from another test
+    skip "class already patched by another test" if Braintrust::Contrib::Anthropic::MessagesPatcher.patched?
+
+    rig = setup_otel_test_rig
+
+    # Create two client instances BEFORE any patching
+    client_traced = Anthropic::Client.new(api_key: "test-api-key")
+    client_untraced = Anthropic::Client.new(api_key: "test-api-key")
+
+    # Verify neither client is patched initially
+    refute Braintrust::Contrib::Anthropic::MessagesPatcher.patched?(target: client_traced),
+      "client_traced should not be patched initially"
+    refute Braintrust::Contrib::Anthropic::MessagesPatcher.patched?(target: client_untraced),
+      "client_untraced should not be patched initially"
+
+    # Verify class is not patched
+    refute Braintrust::Contrib::Anthropic::MessagesPatcher.patched?,
+      "class should not be patched initially"
+
+    # Instrument only one client (instance-level)
+    Braintrust.instrument!(:anthropic, target: client_traced, tracer_provider: rig.tracer_provider)
+
+    # Only the traced client should be patched
+    assert Braintrust::Contrib::Anthropic::MessagesPatcher.patched?(target: client_traced),
+      "client_traced should be patched after instrument!"
+    refute Braintrust::Contrib::Anthropic::MessagesPatcher.patched?(target: client_untraced),
+      "client_untraced should NOT be patched after instrument!"
+
+    # Class itself should NOT be patched (only the instance)
+    refute Braintrust::Contrib::Anthropic::MessagesPatcher.patched?,
+      "class should NOT be patched when using instance-level instrumentation"
+  end
+
+  # --- Backward compatibility: wrap() vs instrument!() ---
+
+  def test_wrap_and_instrument_produce_identical_spans
+    VCR.use_cassette("anthropic/basic_message", allow_playback_repeats: true) do
+      # Test with old wrap() API
+      rig_wrap = setup_otel_test_rig
+      client_wrap = Anthropic::Client.new(api_key: get_anthropic_key)
+      suppress_logs { Braintrust::Trace::Anthropic.wrap(client_wrap, tracer_provider: rig_wrap.tracer_provider) }
+
+      client_wrap.messages.create(
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 10,
+        messages: [{role: "user", content: "Say 'test'"}]
+      )
+      span_wrap = rig_wrap.drain_one
+
+      # Test with new instrument!() API
+      rig_instrument = setup_otel_test_rig
+      client_instrument = Anthropic::Client.new(api_key: get_anthropic_key)
+      Braintrust.instrument!(:anthropic, target: client_instrument, tracer_provider: rig_instrument.tracer_provider)
+
+      client_instrument.messages.create(
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 10,
+        messages: [{role: "user", content: "Say 'test'"}]
+      )
+      span_instrument = rig_instrument.drain_one
+
+      # Both spans should have identical structure
+      assert_equal span_wrap.name, span_instrument.name,
+        "span names should match"
+      assert_equal span_wrap.attributes["braintrust.input_json"],
+        span_instrument.attributes["braintrust.input_json"],
+        "input attributes should match"
+      assert_equal span_wrap.attributes["braintrust.output_json"],
+        span_instrument.attributes["braintrust.output_json"],
+        "output attributes should match"
+
+      # Compare token metrics (timing metrics like time_to_first_token may vary between calls)
+      metrics_wrap = JSON.parse(span_wrap.attributes["braintrust.metrics"])
+      metrics_instrument = JSON.parse(span_instrument.attributes["braintrust.metrics"])
+      %w[prompt_tokens completion_tokens tokens].each do |key|
+        assert_equal metrics_wrap[key], metrics_instrument[key],
+          "metrics #{key} should match"
+      end
+
+      # Both should have time_to_first_token (but values may differ)
+      assert metrics_wrap.key?("time_to_first_token"), "wrap metrics should have time_to_first_token"
+      assert metrics_instrument.key?("time_to_first_token"), "instrument metrics should have time_to_first_token"
+
+      # Metadata should have same keys (values may differ slightly for timestamps)
+      metadata_wrap = JSON.parse(span_wrap.attributes["braintrust.metadata"])
+      metadata_instrument = JSON.parse(span_instrument.attributes["braintrust.metadata"])
+      assert_equal metadata_wrap.keys.sort, metadata_instrument.keys.sort,
+        "metadata keys should match"
+    end
+  end
+end
diff --git a/test/braintrust/contrib/anthropic/integration_helper.rb b/test/braintrust/contrib/anthropic/integration_helper.rb
new file mode 100644
index 0000000..a7ccb7a
--- /dev/null
+++ b/test/braintrust/contrib/anthropic/integration_helper.rb
@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+
+# Test helpers for Anthropic integration tests.
+# Provides gem loading helpers for the anthropic gem.
+
+module Braintrust
+  module Contrib
+    module Anthropic
+      module IntegrationHelper
+        # Skip test unless anthropic gem is available.
+        # Loads the gem if available.
+        def skip_unless_anthropic!
+          unless Gem.loaded_specs["anthropic"]
+            skip "anthropic gem not available"
+          end
+
+          require "anthropic" unless defined?(::Anthropic)
+        end
+
+        # Load anthropic gem if available (doesn't skip, for tests that handle both states).
+        def load_anthropic_if_available
+          if Gem.loaded_specs["anthropic"]
+            require "anthropic" unless defined?(::Anthropic)
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/test/braintrust/contrib/anthropic/integration_test.rb b/test/braintrust/contrib/anthropic/integration_test.rb
new file mode 100644
index 0000000..0a77fe2
--- /dev/null
+++ b/test/braintrust/contrib/anthropic/integration_test.rb
@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require_relative "integration_helper"
+require "braintrust/contrib/anthropic/integration"
+
+class Braintrust::Contrib::Anthropic::IntegrationTest < Minitest::Test
+  include Braintrust::Contrib::Anthropic::IntegrationHelper
+
+  def setup
+    load_anthropic_if_available
+    @integration = Braintrust::Contrib::Anthropic::Integration
+  end
+
+  # --- .integration_name ---
+
+  def test_integration_name
+    assert_equal :anthropic, @integration.integration_name
+  end
+
+  # --- .gem_names ---
+
+  def test_gem_names
+    assert_equal ["anthropic"], @integration.gem_names
+  end
+
+  # --- .require_paths ---
+
+  def test_require_paths
+    assert_equal ["anthropic"], @integration.require_paths
+  end
+
+  # --- .minimum_version ---
+
+  def test_minimum_version
+    assert_equal "0.3.0", @integration.minimum_version
+  end
+
+  # --- .loaded? ---
+
+  def test_loaded_returns_true_when_anthropic_client_defined
+    skip "Anthropic gem not available" unless defined?(::Anthropic::Client)
+
+    assert @integration.loaded?, "Should be loaded when ::Anthropic::Client is defined"
+  end
+
+  # --- .patchers ---
+
+  def test_patchers_returns_array_of_patcher_classes
+    patcher_classes = @integration.patchers
+
+    assert_instance_of Array, patcher_classes
+    assert patcher_classes.length > 0, "patchers should return at least one patcher"
+    patcher_classes.each do |patcher_class|
+      assert patcher_class.is_a?(Class), "each patcher should be a Class"
+      assert patcher_class < Braintrust::Contrib::Patcher, "each patcher should inherit from Patcher"
+    end
+  end
+end
diff --git a/test/braintrust/contrib/anthropic/patcher_test.rb b/test/braintrust/contrib/anthropic/patcher_test.rb
new file mode 100644
index 0000000..ba5888d
--- /dev/null
+++ b/test/braintrust/contrib/anthropic/patcher_test.rb
@@ -0,0 +1,93 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require_relative "integration_helper"
+
+# Explicitly load the patcher (lazy-loaded by integration)
+require "braintrust/contrib/anthropic/patcher"
+
+class Braintrust::Contrib::Anthropic::MessagesPatcherTest < Minitest::Test
+  include Braintrust::Contrib::Anthropic::IntegrationHelper
+
+  def setup
+    skip_unless_anthropic!
+  end
+
+  # --- .applicable? ---
+
+  def test_applicable_returns_true_when_anthropic_client_defined
+    assert Braintrust::Contrib::Anthropic::MessagesPatcher.applicable?
+  end
+
+  # --- .patched? ---
+
+  def test_patched_returns_false_when_not_patched
+    fake_messages_class = Class.new
+
+    ::Anthropic::Resources.stub_const(:Messages, fake_messages_class) do
+      refute Braintrust::Contrib::Anthropic::MessagesPatcher.patched?
+    end
+  end
+
+  def test_patched_returns_true_when_module_included
+    fake_messages_class = Class.new do
+      include Braintrust::Contrib::Anthropic::Instrumentation::Messages
+    end
+
+    ::Anthropic::Resources.stub_const(:Messages, fake_messages_class) do
+      assert Braintrust::Contrib::Anthropic::MessagesPatcher.patched?
+    end
+  end
+
+  def test_patched_returns_false_for_unpatched_instance
+    fake_singleton = Class.new
+
+    mock_chain(:messages, :singleton_class, returns: fake_singleton) do |client|
+      refute Braintrust::Contrib::Anthropic::MessagesPatcher.patched?(target: client)
+    end
+  end
+
+  def test_patched_returns_true_for_patched_instance
+    fake_singleton = Class.new do
+      include Braintrust::Contrib::Anthropic::Instrumentation::Messages
+    end
+
+    mock_chain(:messages, :singleton_class, returns: fake_singleton) do |client|
+      assert Braintrust::Contrib::Anthropic::MessagesPatcher.patched?(target: client)
+    end
+  end
+
+  # --- .perform_patch ---
+
+  def test_perform_patch_includes_module_for_class_level
+    fake_messages_class = Minitest::Mock.new
+    fake_messages_class.expect(:include, true, [Braintrust::Contrib::Anthropic::Instrumentation::Messages])
+
+    ::Anthropic::Resources.stub_const(:Messages, fake_messages_class) do
+      Braintrust::Contrib::Anthropic::MessagesPatcher.perform_patch
+      fake_messages_class.verify
+    end
+  end
+
+  def test_perform_patch_includes_module_for_instance_level
+    terminal = Minitest::Mock.new
+    terminal.expect(:include, true, [Braintrust::Contrib::Anthropic::Instrumentation::Messages])
+
+    mock_chain(:messages, :singleton_class, returns: terminal) do |client|
+      client.expect(:is_a?, true, [::Anthropic::Client])
+      Braintrust::Contrib::Anthropic::MessagesPatcher.perform_patch(target: client)
+    end
+    terminal.verify
+  end
+
+  def test_perform_patch_raises_for_invalid_target
+    fake_client = Minitest::Mock.new
+    fake_client.expect(:is_a?, false, [::Anthropic::Client])
+
+    assert_raises(ArgumentError) do
+      Braintrust::Contrib::Anthropic::MessagesPatcher.perform_patch(target: fake_client)
+    end
+
+    fake_client.verify
+  end
+end
diff --git a/test/braintrust/trace/tokens_test.rb b/test/braintrust/trace/tokens_test.rb
deleted file mode 100644
index c990c58..0000000
--- a/test/braintrust/trace/tokens_test.rb
+++ /dev/null
@@ -1,78 +0,0 @@
-# frozen_string_literal: true
-
-require "test_helper"
-require "braintrust/trace/tokens"
-
-class TokensTest < Minitest::Test
-  def test_anthropic_maps_input_output_tokens
-    usage = {"input_tokens" => 10, "output_tokens" => 20}
-
-    metrics = Braintrust::Trace.parse_anthropic_usage_tokens(usage)
-
-    assert_equal 10, metrics["prompt_tokens"]
-    assert_equal 20, metrics["completion_tokens"]
-    assert_equal 30, metrics["tokens"]
-  end
-
-  def test_anthropic_maps_cache_tokens
-    usage = {
-      "input_tokens" => 10,
-      "output_tokens" => 20,
-      "cache_read_input_tokens" => 50,
-      "cache_creation_input_tokens" => 5
-    }
-
-    metrics = Braintrust::Trace.parse_anthropic_usage_tokens(usage)
-
-    assert_equal 50, metrics["prompt_cached_tokens"]
-    assert_equal 5, metrics["prompt_cache_creation_tokens"]
-  end
-
-  def test_anthropic_accumulates_cache_into_prompt_tokens
-    usage = {
-      "input_tokens" => 10,
-      "output_tokens" => 20,
-      "cache_read_input_tokens" => 50,
-      "cache_creation_input_tokens" => 5
-    }
-
-    metrics = Braintrust::Trace.parse_anthropic_usage_tokens(usage)
-
-    # prompt_tokens = 10 + 50 + 5 = 65
-    assert_equal 65, metrics["prompt_tokens"]
-  end
-
-  def test_anthropic_calculates_total_with_cache
-    usage = {
-      "input_tokens" => 10,
-      "output_tokens" => 20,
-      "cache_read_input_tokens" => 50,
-      "cache_creation_input_tokens" => 5
-    }
-
-    metrics = Braintrust::Trace.parse_anthropic_usage_tokens(usage)
-
-    # tokens = (10 + 50 + 5) + 20 = 85
-    assert_equal 85, metrics["tokens"]
-  end
-
-  def test_anthropic_returns_empty_hash_for_nil
-    assert_equal({}, Braintrust::Trace.parse_anthropic_usage_tokens(nil))
-  end
-
-  def test_anthropic_handles_ruby_llm_simplified_cache_fields
-    usage = {
-      "input_tokens" => 10,
-      "output_tokens" => 20,
-      "cached_tokens" => 50,
-      "cache_creation_tokens" => 5
-    }
-
-    metrics = Braintrust::Trace.parse_anthropic_usage_tokens(usage)
-
-    assert_equal 50, metrics["prompt_cached_tokens"]
-    assert_equal 5, metrics["prompt_cache_creation_tokens"]
-    assert_equal 65, metrics["prompt_tokens"]
-    assert_equal 85, metrics["tokens"]
-  end
-end

From 475b390c7049d13c3be48d08273c96b2b661acfc Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Fri, 19 Dec 2025 14:52:14 -0500
Subject: [PATCH 12/27] Added: Internal::Env for managing environment variables

---
 lib/braintrust/internal/env.rb | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 lib/braintrust/internal/env.rb

diff --git a/lib/braintrust/internal/env.rb b/lib/braintrust/internal/env.rb
new file mode 100644
index 0000000..ba17b85
--- /dev/null
+++ b/lib/braintrust/internal/env.rb
@@ -0,0 +1,17 @@
+# frozen_string_literal: true
+
+module Braintrust
+  module Internal
+    # Environment variable utilities.
+    module Env
+      # Parse a comma-separated environment variable into an array of symbols.
+      # @param key [String] The environment variable name
+      # @return [Array<Symbol>, nil] Array of symbols, or nil if not set
+      def self.parse_list(key)
+        value = ENV[key]
+        return nil unless value
+        value.split(",").map(&:strip).map(&:to_sym)
+      end
+    end
+  end
+end

From 6393b7e0fbc5c85f36815500417550b5fd2f6378 Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Sun, 28 Dec 2025 20:22:59 -0500
Subject: [PATCH 13/27] Added: ClimateControl for testing with env vars

---
 Gemfile                                  | 1 +
 Gemfile.lock                             | 2 ++
 gemfiles/anthropic.gemfile               | 1 +
 gemfiles/anthropic_1_11.gemfile          | 1 +
 gemfiles/anthropic_1_12.gemfile          | 1 +
 gemfiles/anthropic_uninstalled.gemfile   | 1 +
 gemfiles/openai.gemfile                  | 1 +
 gemfiles/openai_0_33.gemfile             | 1 +
 gemfiles/openai_0_34.gemfile             | 1 +
 gemfiles/openai_ruby_openai.gemfile      | 1 +
 gemfiles/openai_uninstalled.gemfile      | 1 +
 gemfiles/opentelemetry_latest.gemfile    | 1 +
 gemfiles/opentelemetry_min.gemfile       | 1 +
 gemfiles/ruby_llm.gemfile                | 1 +
 gemfiles/ruby_llm_1_8.gemfile            | 1 +
 gemfiles/ruby_llm_1_9.gemfile            | 1 +
 gemfiles/ruby_llm_uninstalled.gemfile    | 1 +
 gemfiles/ruby_openai.gemfile             | 1 +
 gemfiles/ruby_openai_7_0.gemfile         | 1 +
 gemfiles/ruby_openai_8_0.gemfile         | 1 +
 gemfiles/ruby_openai_uninstalled.gemfile | 1 +
 test/test_helper.rb                      | 1 +
 22 files changed, 23 insertions(+)

diff --git a/Gemfile b/Gemfile
index eeccddf..56cd531 100644
--- a/Gemfile
+++ b/Gemfile
@@ -8,3 +8,4 @@ gemspec
 # Additional development tools not in gemspec (optional/convenience)
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
diff --git a/Gemfile.lock b/Gemfile.lock
index 169bf43..d2ff082 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -19,6 +19,7 @@ GEM
     ast (2.4.3)
     bigdecimal (4.0.1)
     builder (3.3.0)
+    climate_control (1.2.0)
     crack (1.0.1)
       bigdecimal
       rexml
@@ -126,6 +127,7 @@ PLATFORMS
 DEPENDENCIES
   appraisal (~> 2.5)
   braintrust!
+  climate_control (~> 1.2)
   kramdown (~> 2.0)
   minitest (~> 5.0)
   minitest-reporters (~> 1.6)
diff --git a/gemfiles/anthropic.gemfile b/gemfiles/anthropic.gemfile
index 56e98f0..d558f46 100644
--- a/gemfiles/anthropic.gemfile
+++ b/gemfiles/anthropic.gemfile
@@ -4,6 +4,7 @@ source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
 gem "anthropic", ">= 1.11"
 
 gemspec path: "../"
diff --git a/gemfiles/anthropic_1_11.gemfile b/gemfiles/anthropic_1_11.gemfile
index ed708b3..f4e3486 100644
--- a/gemfiles/anthropic_1_11.gemfile
+++ b/gemfiles/anthropic_1_11.gemfile
@@ -4,6 +4,7 @@ source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
 gem "anthropic", "~> 1.11.0"
 
 gemspec path: "../"
diff --git a/gemfiles/anthropic_1_12.gemfile b/gemfiles/anthropic_1_12.gemfile
index 29b1b93..5d59bc9 100644
--- a/gemfiles/anthropic_1_12.gemfile
+++ b/gemfiles/anthropic_1_12.gemfile
@@ -4,6 +4,7 @@ source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
 gem "anthropic", "~> 1.12.0"
 
 gemspec path: "../"
diff --git a/gemfiles/anthropic_uninstalled.gemfile b/gemfiles/anthropic_uninstalled.gemfile
index 7ec6aba..6dfe324 100644
--- a/gemfiles/anthropic_uninstalled.gemfile
+++ b/gemfiles/anthropic_uninstalled.gemfile
@@ -4,5 +4,6 @@ source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
 
 gemspec path: "../"
diff --git a/gemfiles/openai.gemfile b/gemfiles/openai.gemfile
index dcc2e81..694be81 100644
--- a/gemfiles/openai.gemfile
+++ b/gemfiles/openai.gemfile
@@ -4,6 +4,7 @@ source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
 gem "openai", ">= 0.34"
 gem "base64"
 
diff --git a/gemfiles/openai_0_33.gemfile b/gemfiles/openai_0_33.gemfile
index 6104332..2d1b22b 100644
--- a/gemfiles/openai_0_33.gemfile
+++ b/gemfiles/openai_0_33.gemfile
@@ -4,6 +4,7 @@ source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
 gem "openai", "~> 0.33.0"
 gem "base64"
 
diff --git a/gemfiles/openai_0_34.gemfile b/gemfiles/openai_0_34.gemfile
index d8969d3..774901c 100644
--- a/gemfiles/openai_0_34.gemfile
+++ b/gemfiles/openai_0_34.gemfile
@@ -4,6 +4,7 @@ source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
 gem "openai", "~> 0.34.0"
 gem "base64"
 
diff --git a/gemfiles/openai_ruby_openai.gemfile b/gemfiles/openai_ruby_openai.gemfile
index a8db595..24b7d0b 100644
--- a/gemfiles/openai_ruby_openai.gemfile
+++ b/gemfiles/openai_ruby_openai.gemfile
@@ -4,6 +4,7 @@ source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
 gem "openai", ">= 0.34"
 gem "ruby-openai", ">= 8.0"
 
diff --git a/gemfiles/openai_uninstalled.gemfile b/gemfiles/openai_uninstalled.gemfile
index 7ec6aba..6dfe324 100644
--- a/gemfiles/openai_uninstalled.gemfile
+++ b/gemfiles/openai_uninstalled.gemfile
@@ -4,5 +4,6 @@ source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
 
 gemspec path: "../"
diff --git a/gemfiles/opentelemetry_latest.gemfile b/gemfiles/opentelemetry_latest.gemfile
index d309a6b..717db79 100644
--- a/gemfiles/opentelemetry_latest.gemfile
+++ b/gemfiles/opentelemetry_latest.gemfile
@@ -4,6 +4,7 @@ source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
 gem "opentelemetry-sdk", ">= 1.10"
 gem "opentelemetry-exporter-otlp", ">= 0.31"
 
diff --git a/gemfiles/opentelemetry_min.gemfile b/gemfiles/opentelemetry_min.gemfile
index 8edfd5d..4eeaf7a 100644
--- a/gemfiles/opentelemetry_min.gemfile
+++ b/gemfiles/opentelemetry_min.gemfile
@@ -4,6 +4,7 @@ source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
 gem "opentelemetry-sdk", "~> 1.3.0"
 gem "opentelemetry-exporter-otlp", "~> 0.28.0"
 
diff --git a/gemfiles/ruby_llm.gemfile b/gemfiles/ruby_llm.gemfile
index 6f2c658..84c240e 100644
--- a/gemfiles/ruby_llm.gemfile
+++ b/gemfiles/ruby_llm.gemfile
@@ -4,6 +4,7 @@ source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
 gem "ruby_llm", ">= 1.9"
 
 gemspec path: "../"
diff --git a/gemfiles/ruby_llm_1_8.gemfile b/gemfiles/ruby_llm_1_8.gemfile
index 5a5a016..ce2ffb9 100644
--- a/gemfiles/ruby_llm_1_8.gemfile
+++ b/gemfiles/ruby_llm_1_8.gemfile
@@ -4,6 +4,7 @@ source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
 gem "ruby_llm", "~> 1.8.0"
 
 gemspec path: "../"
diff --git a/gemfiles/ruby_llm_1_9.gemfile b/gemfiles/ruby_llm_1_9.gemfile
index deee1d0..3334794 100644
--- a/gemfiles/ruby_llm_1_9.gemfile
+++ b/gemfiles/ruby_llm_1_9.gemfile
@@ -4,6 +4,7 @@ source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
 gem "ruby_llm", "~> 1.9.0"
 
 gemspec path: "../"
diff --git a/gemfiles/ruby_llm_uninstalled.gemfile b/gemfiles/ruby_llm_uninstalled.gemfile
index 7ec6aba..6dfe324 100644
--- a/gemfiles/ruby_llm_uninstalled.gemfile
+++ b/gemfiles/ruby_llm_uninstalled.gemfile
@@ -4,5 +4,6 @@ source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
 
 gemspec path: "../"
diff --git a/gemfiles/ruby_openai.gemfile b/gemfiles/ruby_openai.gemfile
index 29caf96..1f7f2f7 100644
--- a/gemfiles/ruby_openai.gemfile
+++ b/gemfiles/ruby_openai.gemfile
@@ -4,6 +4,7 @@ source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
 gem "ruby-openai", ">= 8.0"
 
 gemspec path: "../"
diff --git a/gemfiles/ruby_openai_7_0.gemfile b/gemfiles/ruby_openai_7_0.gemfile
index 4c9e319..d509a12 100644
--- a/gemfiles/ruby_openai_7_0.gemfile
+++ b/gemfiles/ruby_openai_7_0.gemfile
@@ -4,6 +4,7 @@ source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
 gem "ruby-openai", "~> 7.0"
 
 gemspec path: "../"
diff --git a/gemfiles/ruby_openai_8_0.gemfile b/gemfiles/ruby_openai_8_0.gemfile
index 3044fea..da5eb28 100644
--- a/gemfiles/ruby_openai_8_0.gemfile
+++ b/gemfiles/ruby_openai_8_0.gemfile
@@ -4,6 +4,7 @@ source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
 gem "ruby-openai", "~> 8.0"
 
 gemspec path: "../"
diff --git a/gemfiles/ruby_openai_uninstalled.gemfile b/gemfiles/ruby_openai_uninstalled.gemfile
index 7ec6aba..6dfe324 100644
--- a/gemfiles/ruby_openai_uninstalled.gemfile
+++ b/gemfiles/ruby_openai_uninstalled.gemfile
@@ -4,5 +4,6 @@ source "https://rubygems.org"
 
 gem "minitest-reporters", "~> 1.6"
 gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
 
 gemspec path: "../"
diff --git a/test/test_helper.rb b/test/test_helper.rb
index 7ef9f68..e03950d 100644
--- a/test/test_helper.rb
+++ b/test/test_helper.rb
@@ -18,6 +18,7 @@
 
 require "minitest/autorun"
 require "minitest/stub_const"
+require "climate_control"
 
 # Show test timings when MT_VERBOSE is set
 if ENV["MT_VERBOSE"]

From 5421567f6a2cee9b3d639c8123695af1c9cc082f Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Fri, 19 Dec 2025 14:50:26 -0500
Subject: [PATCH 14/27] Added: Contrib#auto_instrument!

---
 lib/braintrust/contrib.rb       |  88 ++++++++++++++-
 lib/braintrust/internal/env.rb  |  16 +++
 test/braintrust/contrib_test.rb | 191 ++++++++++++++++++++++++++++++++
 3 files changed, 292 insertions(+), 3 deletions(-)

diff --git a/lib/braintrust/contrib.rb b/lib/braintrust/contrib.rb
index 713d536..c81b36d 100644
--- a/lib/braintrust/contrib.rb
+++ b/lib/braintrust/contrib.rb
@@ -6,6 +6,62 @@
 require_relative "contrib/context"
 
 module Braintrust
+  # Auto-instrument available integrations.
+  #
+  # Discovers which integrations have their target libraries loaded
+  # and instruments them automatically. This is the primary public API
+  # for enabling auto-instrumentation.
+  #
+  # @param config [nil, Boolean, Hash] Auto-instrumentation configuration:
+  #   - nil: use BRAINTRUST_AUTO_INSTRUMENT env var (defaults to enabled)
+  #   - true: explicitly enable auto-instrumentation
+  #   - false: explicitly disable auto-instrumentation
+  #   - Hash with :only or :except keys for filtering integrations
+  #
+  # Environment variables:
+  #   - BRAINTRUST_AUTO_INSTRUMENT: set to "false" to disable (default: enabled)
+  #   - BRAINTRUST_INSTRUMENT_ONLY: comma-separated list of integrations to include
+  #   - BRAINTRUST_INSTRUMENT_EXCEPT: comma-separated list of integrations to exclude
+  #
+  # @return [Array<Symbol>, nil] names of instrumented integrations, or nil if disabled
+  #
+  # @example Enable with defaults
+  #   Braintrust.auto_instrument!
+  #
+  # @example Explicitly enable
+  #   Braintrust.auto_instrument!(true)
+  #
+  # @example Disable
+  #   Braintrust.auto_instrument!(false)
+  #
+  # @example Only specific integrations
+  #   Braintrust.auto_instrument!(only: [:openai, :anthropic])
+  #
+  # @example Exclude specific integrations
+  #   Braintrust.auto_instrument!(except: [:ruby_llm])
+  def self.auto_instrument!(config = nil)
+    should_instrument = case config
+    when nil
+      Internal::Env.auto_instrument
+    when false
+      false
+    when true, Hash
+      true
+    end
+
+    return unless should_instrument
+
+    only = Internal::Env.instrument_only
+    except = Internal::Env.instrument_except
+
+    if config.is_a?(Hash)
+      only = config[:only] || only
+      except = config[:except] || except
+    end
+
+    Contrib.auto_instrument!(only: only, except: except)
+  end
+
   # Instrument a registered integration by name.
   # This is the main entry point for activating integrations.
   #
@@ -22,7 +78,7 @@ module Braintrust
   #   client = OpenAI::Client.new
   #   Braintrust.instrument!(:openai, target: client, tracer_provider: my_provider)
   def self.instrument!(name, **options)
-    Braintrust::Contrib.instrument!(name, **options)
+    Contrib.instrument!(name, **options)
   end
 
   # Contrib framework for auto-instrumentation integrations.
@@ -43,14 +99,39 @@ def init(tracer_provider: nil)
         @default_tracer_provider = tracer_provider
       end
 
+      # Auto-instrument available integrations.
+      # Discovers which integrations have their target libraries loaded
+      # and instruments them automatically.
+      #
+      # @param only [Array<Symbol>] whitelist - only instrument these
+      # @param except [Array<Symbol>] blacklist - skip these
+      # @return [Array<Symbol>] names of integrations that were instrumented
+      #
+      # @example Instrument all available
+      #   Braintrust::Contrib.auto_instrument!
+      #
+      # @example Only specific integrations
+      #   Braintrust::Contrib.auto_instrument!(only: [:openai, :anthropic])
+      #
+      # @example Exclude specific integrations
+      #   Braintrust::Contrib.auto_instrument!(except: [:ruby_llm])
+      def auto_instrument!(only: nil, except: nil)
+        targets = registry.available
+        targets = targets.select { |i| only.include?(i.integration_name) } if only
+        targets = targets.reject { |i| except.include?(i.integration_name) } if except
+
+        targets.each_with_object([]) do |integration, instrumented|
+          instrumented << integration.integration_name if instrument!(integration.integration_name)
+        end
+      end
+
       # Instrument a registered integration by name.
-      # This is the main entry point for activating integrations.
       #
       # @param name [Symbol] The integration name (e.g., :openai, :anthropic)
       # @param options [Hash] Optional configuration
       # @option options [Object] :target Optional target instance to instrument specifically
       # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider
-      # @return [void]
+      # @return [Boolean] true if instrumentation succeeded
       #
       # @example Instrument all OpenAI clients
       #   Braintrust::Contrib.instrument!(:openai)
@@ -63,6 +144,7 @@ def instrument!(name, **options)
           integration.instrument!(**options)
         else
           Braintrust::Log.error("No integration for '#{name}' is defined!")
+          false
         end
       end
 
diff --git a/lib/braintrust/internal/env.rb b/lib/braintrust/internal/env.rb
index ba17b85..cf0fa57 100644
--- a/lib/braintrust/internal/env.rb
+++ b/lib/braintrust/internal/env.rb
@@ -4,6 +4,22 @@ module Braintrust
   module Internal
     # Environment variable utilities.
     module Env
+      ENV_AUTO_INSTRUMENT = "BRAINTRUST_AUTO_INSTRUMENT"
+      ENV_INSTRUMENT_EXCEPT = "BRAINTRUST_INSTRUMENT_EXCEPT"
+      ENV_INSTRUMENT_ONLY = "BRAINTRUST_INSTRUMENT_ONLY"
+
+      def self.auto_instrument
+        ENV[ENV_AUTO_INSTRUMENT] != "false"
+      end
+
+      def self.instrument_except
+        parse_list(ENV_INSTRUMENT_EXCEPT)
+      end
+
+      def self.instrument_only
+        parse_list(ENV_INSTRUMENT_ONLY)
+      end
+
       # Parse a comma-separated environment variable into an array of symbols.
       # @param key [String] The environment variable name
       # @return [Array<Symbol>, nil] Array of symbols, or nil if not set
diff --git a/test/braintrust/contrib_test.rb b/test/braintrust/contrib_test.rb
index 525927b..4e19da1 100644
--- a/test/braintrust/contrib_test.rb
+++ b/test/braintrust/contrib_test.rb
@@ -3,6 +3,189 @@
 require "test_helper"
 
 class Braintrust::ContribTest < Minitest::Test
+  # --- Braintrust.auto_instrument! ---
+
+  def test_auto_instrument_enabled_by_default
+    called_with = nil
+
+    Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do
+      Braintrust.auto_instrument!
+    end
+
+    assert_equal({only: nil, except: nil}, called_with)
+  end
+
+  def test_auto_instrument_disabled_when_config_false
+    called = false
+
+    Braintrust::Contrib.stub(:auto_instrument!, -> { called = true }) do
+      Braintrust.auto_instrument!(false)
+    end
+
+    refute called
+  end
+
+  def test_auto_instrument_enabled_when_config_true
+    called_with = nil
+
+    Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do
+      Braintrust.auto_instrument!(true)
+    end
+
+    assert_equal({only: nil, except: nil}, called_with)
+  end
+
+  def test_auto_instrument_disabled_via_env_var
+    called = false
+
+    ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: "false") do
+      Braintrust::Contrib.stub(:auto_instrument!, -> { called = true }) do
+        Braintrust.auto_instrument!
+      end
+    end
+
+    refute called
+  end
+
+  def test_auto_instrument_passes_only_from_hash
+    called_with = nil
+
+    Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do
+      Braintrust.auto_instrument!(only: [:openai, :anthropic])
+    end
+
+    assert_equal({only: [:openai, :anthropic], except: nil}, called_with)
+  end
+
+  def test_auto_instrument_passes_except_from_hash
+    called_with = nil
+
+    Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do
+      Braintrust.auto_instrument!(except: [:ruby_llm])
+    end
+
+    assert_equal({only: nil, except: [:ruby_llm]}, called_with)
+  end
+
+  def test_auto_instrument_reads_only_from_env
+    called_with = nil
+
+    ClimateControl.modify(BRAINTRUST_INSTRUMENT_ONLY: "openai,anthropic") do
+      Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do
+        Braintrust.auto_instrument!
+      end
+    end
+
+    assert_equal({only: [:openai, :anthropic], except: nil}, called_with)
+  end
+
+  def test_auto_instrument_reads_except_from_env
+    called_with = nil
+
+    ClimateControl.modify(BRAINTRUST_INSTRUMENT_EXCEPT: "ruby_llm") do
+      Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do
+        Braintrust.auto_instrument!
+      end
+    end
+
+    assert_equal({only: nil, except: [:ruby_llm]}, called_with)
+  end
+
+  def test_auto_instrument_hash_overrides_env
+    called_with = nil
+
+    ClimateControl.modify(BRAINTRUST_INSTRUMENT_ONLY: "from_env", BRAINTRUST_INSTRUMENT_EXCEPT: "from_env") do
+      Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do
+        Braintrust.auto_instrument!(only: [:from_hash], except: [:also_from_hash])
+      end
+    end
+
+    assert_equal({only: [:from_hash], except: [:also_from_hash]}, called_with)
+  end
+
+  # --- Contrib.auto_instrument! ---
+
+  def test_contrib_auto_instrument_instruments_available_integrations
+    integration1 = stub_integration(:openai)
+    integration2 = stub_integration(:anthropic)
+
+    mock_registry = Minitest::Mock.new
+    mock_registry.expect(:available, [integration1, integration2])
+
+    instrumented_names = []
+
+    Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
+      Braintrust::Contrib.stub(:instrument!, ->(name, **_opts) {
+        instrumented_names << name
+        true
+      }) do
+        result = Braintrust::Contrib.auto_instrument!
+        assert_equal [:openai, :anthropic], result
+      end
+    end
+
+    assert_equal [:openai, :anthropic], instrumented_names
+  end
+
+  def test_contrib_auto_instrument_filters_with_only
+    integration1 = stub_integration(:openai)
+    integration2 = stub_integration(:anthropic)
+
+    mock_registry = Minitest::Mock.new
+    mock_registry.expect(:available, [integration1, integration2])
+
+    instrumented_names = []
+
+    Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
+      Braintrust::Contrib.stub(:instrument!, ->(name, **_opts) {
+        instrumented_names << name
+        true
+      }) do
+        result = Braintrust::Contrib.auto_instrument!(only: [:openai])
+        assert_equal [:openai], result
+      end
+    end
+
+    assert_equal [:openai], instrumented_names
+  end
+
+  def test_contrib_auto_instrument_filters_with_except
+    integration1 = stub_integration(:openai)
+    integration2 = stub_integration(:anthropic)
+
+    mock_registry = Minitest::Mock.new
+    mock_registry.expect(:available, [integration1, integration2])
+
+    instrumented_names = []
+
+    Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
+      Braintrust::Contrib.stub(:instrument!, ->(name, **_opts) {
+        instrumented_names << name
+        true
+      }) do
+        result = Braintrust::Contrib.auto_instrument!(except: [:anthropic])
+        assert_equal [:openai], result
+      end
+    end
+
+    assert_equal [:openai], instrumented_names
+  end
+
+  def test_contrib_auto_instrument_excludes_failed_instrumentations
+    integration1 = stub_integration(:openai)
+    integration2 = stub_integration(:anthropic)
+
+    mock_registry = Minitest::Mock.new
+    mock_registry.expect(:available, [integration1, integration2])
+
+    Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
+      Braintrust::Contrib.stub(:instrument!, ->(name, **_opts) { name == :openai }) do
+        result = Braintrust::Contrib.auto_instrument!
+        assert_equal [:openai], result
+      end
+    end
+  end
+
   # --- Braintrust.instrument! delegation ---
 
   def test_braintrust_instrument_delegates_to_contrib
@@ -197,4 +380,12 @@ def test_tracer_for_uses_target_context_provider
     context_provider.verify
     mock_context.verify
   end
+
+  private
+
+  def stub_integration(name)
+    Object.new.tap do |obj|
+      obj.define_singleton_method(:integration_name) { name }
+    end
+  end
 end

From 28a449851ed5c36294afa5e97c38172cf54358b2 Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Fri, 19 Dec 2025 14:53:03 -0500
Subject: [PATCH 15/27] Added: auto_instrument to Braintrust.init

---
 Appraisals                     |  10 +++
 examples/setup/init.rb         | 126 +++++++++++++++++++++++++++++++++
 examples/setup/init_minimal.rb |  48 +++++++++++++
 gemfiles/contrib.gemfile       |  13 ++++
 lib/braintrust.rb              |  10 ++-
 test/braintrust_test.rb        |  20 ++++++
 6 files changed, 226 insertions(+), 1 deletion(-)
 create mode 100644 examples/setup/init.rb
 create mode 100644 examples/setup/init_minimal.rb
 create mode 100644 gemfiles/contrib.gemfile

diff --git a/Appraisals b/Appraisals
index 9f1df2d..5d062e5 100644
--- a/Appraisals
+++ b/Appraisals
@@ -69,3 +69,13 @@ appraise "openai-ruby-openai" do
   gem "openai", ">= 0.34"
   gem "ruby-openai", ">= 8.0"
 end
+
+# LLM libraries that can coexist - for demos that use multiple LLM SDKs
+# Note: ruby-openai is excluded because it conflicts with the official openai gem
+# (they share the same namespace). Use openai-ruby-openai appraisal to test both.
+appraise "contrib" do
+  gem "openai", ">= 0.34"
+  gem "anthropic", ">= 1.11"
+  gem "ruby_llm", ">= 1.9"
+  gem "base64" # needed for openai gem on Ruby 3.4+
+end
diff --git a/examples/setup/init.rb b/examples/setup/init.rb
new file mode 100644
index 0000000..2c64a29
--- /dev/null
+++ b/examples/setup/init.rb
@@ -0,0 +1,126 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "openai"
+require "anthropic"
+require "ruby_llm"
+require "opentelemetry/sdk"
+
+# Example: Auto-instrumentation via Braintrust.init
+#
+# This example demonstrates the auto_instrument feature introduced in Braintrust.init.
+# Instead of manually calling Braintrust.instrument! for each LLM library, a single
+# Braintrust.init call automatically detects and instruments all supported libraries.
+#
+# Supported libraries (when installed):
+#   - openai (OpenAI's official Ruby gem)
+#   - ruby-openai (alexrudall's ruby-openai gem) *
+#   - anthropic (Anthropic's official Ruby gem)
+#   - ruby_llm (RubyLLM unified interface)
+#
+# * Note: openai and ruby-openai share the same require path ("openai"), so only one
+#   can be loaded at a time. This demo uses the official openai gem.
+#
+# Usage:
+#   OPENAI_API_KEY=your-key ANTHROPIC_API_KEY=your-key \
+#     bundle exec appraisal contrib ruby examples/setup/init.rb
+
+# Check for required API keys
+missing_keys = []
+missing_keys << "OPENAI_API_KEY" unless ENV["OPENAI_API_KEY"]
+missing_keys << "ANTHROPIC_API_KEY" unless ENV["ANTHROPIC_API_KEY"]
+
+unless missing_keys.empty?
+  puts "Error: Missing required environment variables: #{missing_keys.join(", ")}"
+  puts ""
+  puts "Get your API keys from:"
+  puts "  OpenAI: https://platform.openai.com/api-keys"
+  puts "  Anthropic: https://console.anthropic.com/"
+  exit 1
+end
+
+puts "Auto-Instrumentation Demo"
+puts "=" * 50
+puts ""
+puts "Calling Braintrust.init..."
+puts "This automatically detects and instruments all supported LLM libraries."
+puts ""
+
+# Configure RubyLLM before init (it needs API keys configured)
+RubyLLM.configure do |config|
+  config.openai_api_key = ENV["OPENAI_API_KEY"]
+end
+
+# Single init call - auto_instrument is enabled by default!
+# This detects openai, anthropic, and ruby_llm are loaded and instruments them.
+Braintrust.init
+
+# Brief pause to allow async Braintrust login to complete
+# (Not necessary in production, just for this short lived example)
+sleep 0.5
+
+puts "Libraries will be instrumented as API calls are made."
+puts ""
+
+# Create clients for each library
+openai_client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"])
+anthropic_client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
+
+# Create a tracer and root span to capture all operations
+tracer = OpenTelemetry.tracer_provider.tracer("auto-instrument-demo")
+root_span = nil
+
+puts "Making API calls with each library..."
+puts "-" * 50
+
+tracer.in_span("examples/setup/init.rb") do |span|
+  root_span = span
+
+  # 1. OpenAI (official gem)
+  puts ""
+  puts "[1/3] OpenAI (official gem)..."
+  openai_response = openai_client.chat.completions.create(
+    model: "gpt-4o-mini",
+    max_tokens: 50,
+    messages: [
+      {role: "user", content: "Say 'Hello from OpenAI!' in exactly 5 words."}
+    ]
+  )
+  puts "      Response: #{openai_response.choices[0].message.content}"
+
+  # 2. Anthropic
+  puts ""
+  puts "[2/3] Anthropic..."
+  anthropic_response = anthropic_client.messages.create(
+    model: "claude-3-haiku-20240307",
+    max_tokens: 50,
+    messages: [
+      {role: "user", content: "Say 'Hello from Anthropic!' in exactly 5 words."}
+    ]
+  )
+  puts "      Response: #{anthropic_response.content[0].text}"
+
+  # 3. RubyLLM (using OpenAI provider)
+  puts ""
+  puts "[3/3] RubyLLM (via OpenAI)..."
+  chat = RubyLLM.chat(model: "gpt-4o-mini")
+  ruby_llm_response = chat.ask("Say 'Hello from RubyLLM!' in exactly 5 words.")
+  puts "      Response: #{ruby_llm_response.content}"
+end
+
+puts ""
+puts "-" * 50
+puts ""
+puts "All API calls complete!"
+puts ""
+puts "View trace in Braintrust:"
+puts "  #{Braintrust::Trace.permalink(root_span)}"
+puts ""
+
+# Shutdown to flush spans
+# (Not necessary in production, just for this short lived example)
+OpenTelemetry.tracer_provider.shutdown
+
+puts "Trace sent to Braintrust!"
diff --git a/examples/setup/init_minimal.rb b/examples/setup/init_minimal.rb
new file mode 100644
index 0000000..4210dc1
--- /dev/null
+++ b/examples/setup/init_minimal.rb
@@ -0,0 +1,48 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+# Example: Minimal auto-instrumentation via Braintrust.init
+#
+# Single Braintrust.init call auto-detects and instruments all supported LLM libraries.
+#
+# Usage:
+#   OPENAI_API_KEY=your-key ANTHROPIC_API_KEY=your-key \
+#     bundle exec appraisal contrib ruby examples/setup/init_minimal.rb
+
+require "bundler/setup"
+require "braintrust"
+require "openai"
+require "anthropic"
+require "ruby_llm"
+require "opentelemetry/sdk"
+
+RubyLLM.configure { |c| c.openai_api_key = ENV["OPENAI_API_KEY"] }
+
+Braintrust.init
+
+# Brief pause to allow async Braintrust login to complete
+# (Not necessary in production, just for this short lived example)
+sleep 0.5
+
+openai_client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"])
+anthropic_client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
+tracer = OpenTelemetry.tracer_provider.tracer("auto-instrument-demo")
+
+tracer.in_span("examples/setup/init_minimal.rb") do
+  openai_client.chat.completions.create(
+    model: "gpt-4o-mini",
+    messages: [{role: "user", content: "Hello from OpenAI"}]
+  )
+
+  anthropic_client.messages.create(
+    model: "claude-3-haiku-20240307",
+    max_tokens: 50,
+    messages: [{role: "user", content: "Hello from Anthropic"}]
+  )
+
+  RubyLLM.chat(model: "gpt-4o-mini").ask("Hello from RubyLLM")
+end
+
+# Shutdown to flush spans
+# (Not necessary in production, just for this short lived example)
+OpenTelemetry.tracer_provider.shutdown
diff --git a/gemfiles/contrib.gemfile b/gemfiles/contrib.gemfile
new file mode 100644
index 0000000..3d96dac
--- /dev/null
+++ b/gemfiles/contrib.gemfile
@@ -0,0 +1,13 @@
+# This file was generated by Appraisal
+
+source "https://rubygems.org"
+
+gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
+gem "openai", ">= 0.34"
+gem "anthropic", ">= 1.11"
+gem "ruby_llm", ">= 1.9"
+gem "base64"
+
+gemspec path: "../"
diff --git a/lib/braintrust.rb b/lib/braintrust.rb
index 979651e..ba023fc 100644
--- a/lib/braintrust.rb
+++ b/lib/braintrust.rb
@@ -6,6 +6,7 @@
 require_relative "braintrust/trace"
 require_relative "braintrust/api"
 require_relative "braintrust/internal/experiments"
+require_relative "braintrust/internal/env"
 require_relative "braintrust/eval"
 require_relative "braintrust/contrib"
 
@@ -41,8 +42,13 @@ class Error < StandardError; end
   # @param filter_ai_spans [Boolean, nil] Enable AI span filtering (overrides BRAINTRUST_OTEL_FILTER_AI_SPANS env var)
   # @param span_filter_funcs [Array<Proc>, nil] Custom span filter functions
   # @param exporter [Exporter, nil] Optional exporter override (for testing)
+  # @param auto_instrument [Boolean, Hash, nil] Auto-instrumentation config:
+  #   - nil (default): use BRAINTRUST_AUTO_INSTRUMENT env var, default true if not set
+  #   - true: explicitly enable
+  #   - false: explicitly disable
+  #   - Hash with :only or :except keys for filtering
   # @return [State] the created state
-  def self.init(api_key: nil, org_name: nil, default_project: nil, app_url: nil, api_url: nil, set_global: true, blocking_login: false, enable_tracing: true, tracer_provider: nil, filter_ai_spans: nil, span_filter_funcs: nil, exporter: nil)
+  def self.init(api_key: nil, org_name: nil, default_project: nil, app_url: nil, api_url: nil, set_global: true, blocking_login: false, enable_tracing: true, tracer_provider: nil, filter_ai_spans: nil, span_filter_funcs: nil, exporter: nil, auto_instrument: nil)
     state = State.from_env(
       api_key: api_key,
       org_name: org_name,
@@ -59,6 +65,8 @@ def self.init(api_key: nil, org_name: nil, default_project: nil, app_url: nil, a
 
     State.global = state if set_global
 
+    auto_instrument!(auto_instrument)
+
     state
   end
 
diff --git a/test/braintrust_test.rb b/test/braintrust_test.rb
index e083be6..1d3bbd7 100644
--- a/test/braintrust_test.rb
+++ b/test/braintrust_test.rb
@@ -179,4 +179,24 @@ def test_default_project_from_parameter_overrides_env
     span_data = spans.first
     assert_equal "project_name:param-project", span_data.attributes["braintrust.parent"]
   end
+
+  def test_init_calls_auto_instrument_with_config
+    called_with = :not_called
+
+    Braintrust.stub(:auto_instrument!, ->(config) { called_with = config }) do
+      Braintrust.init(api_key: "test-api-key", auto_instrument: {only: [:openai]})
+    end
+
+    assert_equal({only: [:openai]}, called_with)
+  end
+
+  def test_init_calls_auto_instrument_with_nil_by_default
+    called_with = :not_called
+
+    Braintrust.stub(:auto_instrument!, ->(config) { called_with = config }) do
+      Braintrust.init(api_key: "test-api-key")
+    end
+
+    assert_nil called_with
+  end
 end

From 2ddb2d2148b8a895e527b761bdacc9ac27cf3d0b Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Fri, 2 Jan 2026 11:27:36 -0500
Subject: [PATCH 16/27] Changed: README to reflect new Braintrust.init auto
 instrumentation

---
 README.md | 54 ++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 40 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index 2eef3cf..5920aa1 100644
--- a/README.md
+++ b/README.md
@@ -36,11 +36,25 @@ gem install braintrust
 
 ## Quick Start
 
-### Set up your API key
+1. **Set up your API key**
 
-```bash
-export BRAINTRUST_API_KEY="your-api-key"
-```
+    ```bash
+    export BRAINTRUST_API_KEY="your-api-key"
+    ```
+
+2. **Add `Braintrust.init` to your application**
+
+    For Rails applications, we suggest adding this to an initializer file.
+
+    ```ruby
+    require "braintrust"
+
+    Braintrust.init
+    ```
+
+    This sets up Braintrust and auto-instruments your application with LLM tracing.
+
+## How to use...
 
 ### Evals
 
@@ -67,7 +81,9 @@ Braintrust::Eval.run(
 )
 ```
 
-### Tracing
+## Tracing
+
+Supported LLM libraries are automatically instrumented with `Braintrust.init`. If you wish to manually instrument your LLM clients (for greater customization) you can use OpenTelemetry and `instrument!` to make your own custom LLM traces.
 
 ```ruby
 require "braintrust"
@@ -101,18 +117,19 @@ OpenTelemetry.tracer_provider.shutdown
 puts "View trace in Braintrust!"
 ```
 
-### OpenAI Tracing
+### OpenAI
 
 ```ruby
 require "braintrust"
 require "openai"
 
-Braintrust.init
+Braintrust.init(auto_instrument: false)
 
 client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"])
 
 # Instrument all clients
 Braintrust.instrument!(:openai)
+
 # OR instrument a single client
 Braintrust.instrument!(:openai, target: client)
 
@@ -139,18 +156,23 @@ puts "View trace at: #{Braintrust::Trace.permalink(root_span)}"
 OpenTelemetry.tracer_provider.shutdown
 ```
 
-### Anthropic Tracing
+### Anthropic
 
 ```ruby
 require "braintrust"
 require "anthropic"
 
-Braintrust.init
-Braintrust.instrument!(:anthropic)
+Braintrust.init(auto_instrument: false)
 
 # Instrument Anthropic (instance-level)
 client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
 
+# Instrument all clients
+Braintrust.instrument!(:anthropic)
+
+# OR instrument a single client
+Braintrust.instrument!(:anthropic, target: client)
+
 tracer = OpenTelemetry.tracer_provider.tracer("anthropic-app")
 root_span = nil
 
@@ -174,24 +196,28 @@ puts "View trace at: #{Braintrust::Trace.permalink(root_span)}"
 OpenTelemetry.tracer_provider.shutdown
 ```
 
-### RubyLLM Tracing
+### RubyLLM
 
 ```ruby
 require "braintrust"
 require "ruby_llm"
 
-Braintrust.init
+Braintrust.init(auto_instrument: false)
+
+chat = RubyLLM.chat(model: "gpt-4o-mini")
 
-# Instrument all RubyLLM Chat instances
+# Instrument all clients
 Braintrust.instrument!(:ruby_llm)
 
+# OR instrument a single client
+Braintrust.instrument!(:ruby_llm, target: chat)
+
 tracer = OpenTelemetry.tracer_provider.tracer("ruby-llm-app")
 root_span = nil
 
 response = tracer.in_span("chat") do |span|
   root_span = span
 
-  chat = RubyLLM.chat(model: "gpt-4o-mini")
   chat.ask("Say hello!")
 end
 

From c50583325d8111ce26e5dd4a651bd018bcc8ac92 Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Fri, 19 Dec 2025 14:54:55 -0500
Subject: [PATCH 17/27] Added: braintrust/setup.rb for automatically setting up
 the SDK

---
 Appraisals                                    |   6 +
 examples/setup/require.rb                     | 128 ++++++
 examples/setup/require_minimal.rb             |  47 +++
 gemfiles/rails.gemfile                        |  11 +
 lib/braintrust/contrib/rails/railtie.rb       |  16 +
 lib/braintrust/contrib/registry.rb            |  33 +-
 lib/braintrust/contrib/setup.rb               |  69 +++
 lib/braintrust/setup.rb                       |  45 ++
 .../contrib/rails/integration_helper.rb       |  25 ++
 test/braintrust/contrib/rails/railtie_test.rb | 115 +++++
 test/braintrust/contrib/registry_test.rb      |  11 -
 test/braintrust/contrib/setup_test.rb         | 396 ++++++++++++++++++
 test/braintrust/setup_test.rb                 | 130 ++++++
 test/support/mock_helper.rb                   |  32 ++
 14 files changed, 1044 insertions(+), 20 deletions(-)
 create mode 100644 examples/setup/require.rb
 create mode 100644 examples/setup/require_minimal.rb
 create mode 100644 gemfiles/rails.gemfile
 create mode 100644 lib/braintrust/contrib/rails/railtie.rb
 create mode 100644 lib/braintrust/contrib/setup.rb
 create mode 100644 lib/braintrust/setup.rb
 create mode 100644 test/braintrust/contrib/rails/integration_helper.rb
 create mode 100644 test/braintrust/contrib/rails/railtie_test.rb
 create mode 100644 test/braintrust/contrib/setup_test.rb
 create mode 100644 test/braintrust/setup_test.rb

diff --git a/Appraisals b/Appraisals
index 5d062e5..5681396 100644
--- a/Appraisals
+++ b/Appraisals
@@ -79,3 +79,9 @@ appraise "contrib" do
   gem "ruby_llm", ">= 1.9"
   gem "base64" # needed for openai gem on Ruby 3.4+
 end
+
+# Rails integration testing (minimal dependencies)
+appraise "rails" do
+  gem "activesupport", "~> 8.0"
+  gem "railties", "~> 8.0"
+end
diff --git a/examples/setup/require.rb b/examples/setup/require.rb
new file mode 100644
index 0000000..69feb48
--- /dev/null
+++ b/examples/setup/require.rb
@@ -0,0 +1,128 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+
+# Setup via require
+#
+# This example demonstrates the simplest way to auto-instrument LLM libraries:
+# just require "braintrust/setup" BEFORE your LLM libraries.
+#
+# This approach:
+#   - Hooks into Ruby's require mechanism
+#   - Automatically patches LLM libraries as they are loaded
+#   - Calls Braintrust.init internally (no manual init needed)
+#
+# For Rails apps, add to your Gemfile:
+#   gem "braintrust", require: "braintrust/setup"
+#
+# Supported libraries (instrumented automatically when required):
+#   - openai (OpenAI's official Ruby gem)
+#   - ruby-openai (alexrudall's ruby-openai gem) *
+#   - anthropic (Anthropic's official Ruby gem)
+#   - ruby_llm (RubyLLM unified interface)
+#
+# * Note: openai and ruby-openai share the same require path ("openai"), so only one
+#   can be loaded at a time. This demo uses the official openai gem.
+#
+# Usage:
+#   OPENAI_API_KEY=your-key ANTHROPIC_API_KEY=your-key \
+#     bundle exec appraisal contrib ruby examples/setup/require.rb
+
+# Step 1: Require braintrust/setup FIRST (before any LLM libraries)
+require "braintrust/setup"
+
+# Step 2: Now require your LLM libraries - they will be instrumented automatically!
+require "openai"
+require "anthropic"
+require "ruby_llm"
+require "opentelemetry/sdk"
+
+# Brief pause to allow async Braintrust login to complete
+sleep 0.5
+
+# Check for required API keys
+missing_keys = []
+missing_keys << "OPENAI_API_KEY" unless ENV["OPENAI_API_KEY"]
+missing_keys << "ANTHROPIC_API_KEY" unless ENV["ANTHROPIC_API_KEY"]
+
+unless missing_keys.empty?
+  puts "Error: Missing required environment variables: #{missing_keys.join(", ")}"
+  puts ""
+  puts "Get your API keys from:"
+  puts "  OpenAI: https://platform.openai.com/api-keys"
+  puts "  Anthropic: https://console.anthropic.com/"
+  exit 1
+end
+
+puts "Auto-Instrumentation via Require Demo"
+puts "=" * 50
+puts ""
+puts "Libraries were instrumented automatically when required!"
+puts "No Braintrust.init call needed."
+puts ""
+
+# Configure RubyLLM
+RubyLLM.configure do |config|
+  config.openai_api_key = ENV["OPENAI_API_KEY"]
+end
+
+# Create clients for each library
+openai_client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"])
+anthropic_client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
+
+# Create a tracer and root span to capture all operations
+tracer = OpenTelemetry.tracer_provider.tracer("auto-instrument-require-demo")
+root_span = nil
+
+puts "Making API calls with each library..."
+puts "-" * 50
+
+tracer.in_span("examples/setup/require.rb") do |span|
+  root_span = span
+
+  # 1. OpenAI (official gem)
+  puts ""
+  puts "[1/3] OpenAI (official gem)..."
+  openai_response = openai_client.chat.completions.create(
+    model: "gpt-4o-mini",
+    max_tokens: 50,
+    messages: [
+      {role: "user", content: "Say 'Hello from OpenAI!' in exactly 5 words."}
+    ]
+  )
+  puts "      Response: #{openai_response.choices[0].message.content}"
+
+  # 2. Anthropic
+  puts ""
+  puts "[2/3] Anthropic..."
+  anthropic_response = anthropic_client.messages.create(
+    model: "claude-3-haiku-20240307",
+    max_tokens: 50,
+    messages: [
+      {role: "user", content: "Say 'Hello from Anthropic!' in exactly 5 words."}
+    ]
+  )
+  puts "      Response: #{anthropic_response.content[0].text}"
+
+  # 3. RubyLLM (using OpenAI provider)
+  puts ""
+  puts "[3/3] RubyLLM (via OpenAI)..."
+  chat = RubyLLM.chat(model: "gpt-4o-mini")
+  ruby_llm_response = chat.ask("Say 'Hello from RubyLLM!' in exactly 5 words.")
+  puts "      Response: #{ruby_llm_response.content}"
+end
+
+puts ""
+puts "-" * 50
+puts ""
+puts "All API calls complete!"
+puts ""
+puts "View trace in Braintrust:"
+puts "  #{Braintrust::Trace.permalink(root_span)}"
+puts ""
+
+# Shutdown to flush spans (not necessary in production, just for this short lived example)
+OpenTelemetry.tracer_provider.shutdown
+
+puts "Trace sent to Braintrust!"
diff --git a/examples/setup/require_minimal.rb b/examples/setup/require_minimal.rb
new file mode 100644
index 0000000..9b65f56
--- /dev/null
+++ b/examples/setup/require_minimal.rb
@@ -0,0 +1,47 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+# Example: Minimal setup via require
+#
+# Require "braintrust/setup" BEFORE your LLM libraries to auto-instrument them.
+# For Rails, add to Gemfile: gem "braintrust", require: "braintrust/setup"
+#
+# Usage:
+#   OPENAI_API_KEY=your-key ANTHROPIC_API_KEY=your-key \
+#     bundle exec appraisal contrib ruby examples/setup/require_minimal.rb
+
+require "bundler/setup"
+require "braintrust/setup"
+require "openai"
+require "anthropic"
+require "ruby_llm"
+require "opentelemetry/sdk"
+
+# Brief pause to allow async Braintrust login to complete
+# (Not necessary in production, just for this short lived example)
+sleep 0.5
+
+RubyLLM.configure { |c| c.openai_api_key = ENV["OPENAI_API_KEY"] }
+
+openai_client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"])
+anthropic_client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
+tracer = OpenTelemetry.tracer_provider.tracer("auto-instrument-require-demo")
+
+tracer.in_span("examples/setup/require_minimal.rb") do
+  openai_client.chat.completions.create(
+    model: "gpt-4o-mini",
+    messages: [{role: "user", content: "Hello from OpenAI"}]
+  )
+
+  anthropic_client.messages.create(
+    model: "claude-3-haiku-20240307",
+    max_tokens: 50,
+    messages: [{role: "user", content: "Hello from Anthropic"}]
+  )
+
+  RubyLLM.chat(model: "gpt-4o-mini").ask("Hello from RubyLLM")
+end
+
+# Shutdown to flush spans
+# (Not necessary in production, just for this short lived example)
+OpenTelemetry.tracer_provider.shutdown
diff --git a/gemfiles/rails.gemfile b/gemfiles/rails.gemfile
new file mode 100644
index 0000000..eb90b15
--- /dev/null
+++ b/gemfiles/rails.gemfile
@@ -0,0 +1,11 @@
+# This file was generated by Appraisal
+
+source "https://rubygems.org"
+
+gem "minitest-reporters", "~> 1.6"
+gem "minitest-stub-const", "~> 0.6"
+gem "climate_control", "~> 1.2"
+gem "activesupport", "~> 8.0"
+gem "railties", "~> 8.0"
+
+gemspec path: "../"
diff --git a/lib/braintrust/contrib/rails/railtie.rb b/lib/braintrust/contrib/rails/railtie.rb
new file mode 100644
index 0000000..fd9749a
--- /dev/null
+++ b/lib/braintrust/contrib/rails/railtie.rb
@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+
+module Braintrust
+  module Contrib
+    module Rails
+      class Railtie < ::Rails::Railtie
+        config.after_initialize do
+          Braintrust.auto_instrument!(
+            only: Braintrust::Internal::Env.instrument_only,
+            except: Braintrust::Internal::Env.instrument_except
+          )
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/contrib/registry.rb b/lib/braintrust/contrib/registry.rb
index 77a3d67..658a171 100644
--- a/lib/braintrust/contrib/registry.rb
+++ b/lib/braintrust/contrib/registry.rb
@@ -60,22 +60,37 @@ def integrations_for_require_path(path)
             @require_path_map ||= build_require_path_map
           end
         end
-        basename = File.basename(path.to_s, ".rb")
-        map.fetch(basename, EMPTY_ARRAY)
-      end
 
-      # Clear all registrations (primarily for testing).
-      def clear!
-        @mutex.synchronize do
-          @integrations.clear
-          @require_path_map = nil
-        end
+        path_str = path.to_s
+        basename = File.basename(path_str, ".rb")
+
+        # Quick check: is this basename even in our map?
+        return EMPTY_ARRAY unless map.key?(basename)
+
+        # Only match top-level requires or gem entry points.
+        # Avoid matching internal subpaths (e.g., ruby_llm/providers/anthropic).
+        return EMPTY_ARRAY unless gem_entry_point?(path_str, basename)
+
+        map.fetch(basename, EMPTY_ARRAY)
       end
 
       private
 
       EMPTY_ARRAY = [].freeze
 
+      # Check if this is a gem entry point require.
+      # @param path [String] Full require path
+      # @param basename [String] File basename without extension
+      # @return [Boolean]
+      def gem_entry_point?(path, basename)
+        # Direct require like `require 'anthropic'` - no directory separators
+        return true unless path.include?("/")
+
+        # Full path to gem entry point: /gems/anthropic-1.0.0/lib/anthropic.rb
+        # The basename appears in both the gem directory name AND as the final file
+        path.match?(%r{/#{Regexp.escape(basename)}[^/]*/lib/#{Regexp.escape(basename)}(\.rb)?$})
+      end
+
       def build_require_path_map
         map = {}
         @integrations.each_value do |integration|
diff --git a/lib/braintrust/contrib/setup.rb b/lib/braintrust/contrib/setup.rb
new file mode 100644
index 0000000..27ebd40
--- /dev/null
+++ b/lib/braintrust/contrib/setup.rb
@@ -0,0 +1,69 @@
+# frozen_string_literal: true
+
+require_relative "../internal/env"
+
+module Braintrust
+  module Contrib
+    module Setup
+      class << self
+        def run!
+          return if @setup_complete
+          @setup_complete = true
+
+          if Internal::Env.auto_instrument
+            # Set up deferred patching for libraries loaded later
+            if rails_environment?
+              setup_rails_hook!
+            else
+              setup_require_hook!
+            end
+          end
+        end
+
+        def rails_environment?
+          # Check for Rails::Railtie which is defined during gem loading,
+          # before Rails.application exists
+          defined?(::Rails::Railtie)
+        end
+
+        def setup_rails_hook!
+          require_relative "rails/railtie"
+        end
+
+        def setup_require_hook!
+          registry = Contrib::Registry.instance
+          only = Internal::Env.instrument_only
+          except = Internal::Env.instrument_except
+
+          # Store original require
+          original_require = Kernel.method(:require)
+
+          Kernel.define_method(:require) do |path|
+            result = original_require.call(path)
+
+            # Reentrancy guard
+            unless Thread.current[:braintrust_in_require_hook]
+              begin
+                Thread.current[:braintrust_in_require_hook] = true
+
+                # Check if any integration matches this require path
+                registry.integrations_for_require_path(path).each do |integration|
+                  next unless integration.available? && integration.compatible?
+                  next if only && !only.include?(integration.integration_name)
+                  next if except&.include?(integration.integration_name)
+                  integration.patch!
+                end
+              rescue => e
+                Braintrust::Log.error("Failed to auto-instrument on `require`: #{e.message}")
+              ensure
+                Thread.current[:braintrust_in_require_hook] = false
+              end
+            end
+
+            result
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/setup.rb b/lib/braintrust/setup.rb
new file mode 100644
index 0000000..5feb180
--- /dev/null
+++ b/lib/braintrust/setup.rb
@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+
+# Setup file for automatic SDK initialization and instrumentation.
+# Load this file to automatically initialize Braintrust and instrument all available LLM libraries.
+#
+# Usage:
+#   # Gemfile
+#   gem "braintrust", require: "braintrust/setup"
+#
+#   # Or in code
+#   require "braintrust/setup"
+#
+# Environment variables:
+#   BRAINTRUST_API_KEY - Required for tracing to work
+#   BRAINTRUST_AUTO_INSTRUMENT - Set to "false" to disable (default: true)
+#   BRAINTRUST_INSTRUMENT_ONLY - Comma-separated whitelist
+#   BRAINTRUST_INSTRUMENT_EXCEPT - Comma-separated blacklist
+
+require_relative "../braintrust"
+require_relative "contrib/setup"
+
+module Braintrust
+  module Setup
+    class << self
+      def run!
+        return if @setup_complete
+
+        @setup_complete = true
+
+        # Initialize Braintrust (silent failure if no API key)
+        begin
+          Braintrust.init
+        rescue => e
+          Braintrust::Log.error("Failed to automatically setup Braintrust: #{e.message}")
+        end
+
+        # Setup contrib for 3rd party integrations
+        Contrib::Setup.run!
+      end
+    end
+  end
+end
+
+# Auto-setup when required
+Braintrust::Setup.run!
diff --git a/test/braintrust/contrib/rails/integration_helper.rb b/test/braintrust/contrib/rails/integration_helper.rb
new file mode 100644
index 0000000..53e22e1
--- /dev/null
+++ b/test/braintrust/contrib/rails/integration_helper.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+# Test helpers for Rails integration tests.
+# Provides gem loading helpers for Rails railtie testing.
+
+module Braintrust
+  module Contrib
+    module Rails
+      module IntegrationHelper
+        # Skip test unless Rails gem is available.
+        # Loads the gem if available.
+        def skip_unless_rails!
+          unless Gem.loaded_specs["rails"] || Gem.loaded_specs["railties"]
+            skip "Rails gem not available"
+          end
+
+          unless defined?(::Rails::Railtie)
+            require "active_support"
+            require "rails/railtie"
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/test/braintrust/contrib/rails/railtie_test.rb b/test/braintrust/contrib/rails/railtie_test.rb
new file mode 100644
index 0000000..fc701e1
--- /dev/null
+++ b/test/braintrust/contrib/rails/railtie_test.rb
@@ -0,0 +1,115 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require_relative "integration_helper"
+
+module Braintrust
+  module Contrib
+    module Rails
+      class RailtieTest < Minitest::Test
+        include IntegrationHelper
+
+        def test_railtie_callback_calls_auto_instrument
+          skip_unless_rails!
+
+          assert_in_fork do
+            require "active_support"
+            require "rails/railtie"
+
+            # Capture the after_initialize block when the railtie is loaded
+            captured_block = nil
+            original_after_init = ::Rails::Railtie::Configuration.instance_method(:after_initialize)
+            ::Rails::Railtie::Configuration.define_method(:after_initialize) do |&block|
+              captured_block = block
+            end
+
+            require "braintrust/contrib/rails/railtie"
+
+            # Restore original method
+            ::Rails::Railtie::Configuration.define_method(:after_initialize, original_after_init)
+
+            auto_instrument_called_with = nil
+            Braintrust.stub(:auto_instrument!, ->(**kwargs) { auto_instrument_called_with = kwargs }) do
+              captured_block.call
+            end
+
+            if auto_instrument_called_with == {only: nil, except: nil}
+              puts "callback_test:passed"
+            else
+              puts "callback_test:failed - got #{auto_instrument_called_with.inspect}"
+              exit 1
+            end
+          end
+        end
+
+        def test_railtie_passes_only_from_env
+          skip_unless_rails!
+
+          assert_in_fork do
+            ENV["BRAINTRUST_INSTRUMENT_ONLY"] = "openai,anthropic"
+
+            require "active_support"
+            require "rails/railtie"
+
+            captured_block = nil
+            original_after_init = ::Rails::Railtie::Configuration.instance_method(:after_initialize)
+            ::Rails::Railtie::Configuration.define_method(:after_initialize) do |&block|
+              captured_block = block
+            end
+
+            require "braintrust/contrib/rails/railtie"
+
+            ::Rails::Railtie::Configuration.define_method(:after_initialize, original_after_init)
+
+            auto_instrument_called_with = nil
+            Braintrust.stub(:auto_instrument!, ->(**kwargs) { auto_instrument_called_with = kwargs }) do
+              captured_block.call
+            end
+
+            expected = {only: [:openai, :anthropic], except: nil}
+            if auto_instrument_called_with == expected
+              puts "only_env_test:passed"
+            else
+              puts "only_env_test:failed - expected #{expected.inspect}, got #{auto_instrument_called_with.inspect}"
+              exit 1
+            end
+          end
+        end
+
+        def test_railtie_passes_except_from_env
+          skip_unless_rails!
+
+          assert_in_fork do
+            ENV["BRAINTRUST_INSTRUMENT_EXCEPT"] = "ruby_llm"
+
+            require "active_support"
+            require "rails/railtie"
+
+            captured_block = nil
+            original_after_init = ::Rails::Railtie::Configuration.instance_method(:after_initialize)
+            ::Rails::Railtie::Configuration.define_method(:after_initialize) do |&block|
+              captured_block = block
+            end
+
+            require "braintrust/contrib/rails/railtie"
+
+            ::Rails::Railtie::Configuration.define_method(:after_initialize, original_after_init)
+
+            auto_instrument_called_with = nil
+            Braintrust.stub(:auto_instrument!, ->(**kwargs) { auto_instrument_called_with = kwargs }) do
+              captured_block.call
+            end
+
+            expected = {only: nil, except: [:ruby_llm]}
+            if auto_instrument_called_with == expected
+              puts "except_env_test:passed"
+            else
+              puts "except_env_test:failed - expected #{expected.inspect}, got #{auto_instrument_called_with.inspect}"
+              exit 1
+            end
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/test/braintrust/contrib/registry_test.rb b/test/braintrust/contrib/registry_test.rb
index 610815e..cee460a 100644
--- a/test/braintrust/contrib/registry_test.rb
+++ b/test/braintrust/contrib/registry_test.rb
@@ -227,15 +227,4 @@ def test_thread_safety_for_require_path_lookup
 
     assert_equal [], errors
   end
-
-  def test_clear_removes_all_integrations
-    openai = create_mock_integration(name: :openai, gem_names: ["openai"])
-
-    @registry.register(openai)
-    assert_equal 1, @registry.all.length
-
-    @registry.clear!
-    assert_equal 0, @registry.all.length
-    assert_nil @registry[:openai]
-  end
 end
diff --git a/test/braintrust/contrib/setup_test.rb b/test/braintrust/contrib/setup_test.rb
new file mode 100644
index 0000000..59cb432
--- /dev/null
+++ b/test/braintrust/contrib/setup_test.rb
@@ -0,0 +1,396 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require "braintrust/contrib/setup"
+
+class Braintrust::Contrib::SetupTest < Minitest::Test
+  def setup
+    # Reset the setup_complete flag before each test
+    Braintrust::Contrib::Setup.instance_variable_set(:@setup_complete, nil)
+  end
+
+  def teardown
+    # Reset after each test to not affect other tests
+    Braintrust::Contrib::Setup.instance_variable_set(:@setup_complete, nil)
+  end
+
+  # --- run! idempotency ---
+
+  def test_run_is_idempotent
+    skip "Test not applicable when Rails is loaded" if defined?(::Rails::Railtie)
+
+    call_count = 0
+
+    Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { call_count += 1 }) do
+      Braintrust::Contrib::Setup.run!
+      Braintrust::Contrib::Setup.run!
+      Braintrust::Contrib::Setup.run!
+    end
+
+    assert_equal 1, call_count, "run! should only execute once"
+  end
+
+  # --- BRAINTRUST_AUTO_INSTRUMENT env var ---
+
+  def test_run_skips_hooks_when_auto_instrument_disabled
+    hook_called = false
+
+    ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: "false") do
+      Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { hook_called = true }) do
+        Braintrust::Contrib::Setup.run!
+      end
+    end
+
+    refute hook_called, "hooks should not be set up when auto_instrument is disabled"
+  end
+
+  def test_run_sets_up_hooks_when_auto_instrument_enabled
+    skip "Test not applicable when Rails is loaded" if defined?(::Rails::Railtie)
+
+    hook_called = false
+
+    ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: "true") do
+      Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { hook_called = true }) do
+        Braintrust::Contrib::Setup.run!
+      end
+    end
+
+    assert hook_called, "hooks should be set up when auto_instrument is enabled"
+  end
+
+  def test_run_sets_up_hooks_by_default
+    skip "Test not applicable when Rails is loaded" if defined?(::Rails::Railtie)
+
+    hook_called = false
+
+    # Ensure env var is not set
+    ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: nil) do
+      Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { hook_called = true }) do
+        Braintrust::Contrib::Setup.run!
+      end
+    end
+
+    assert hook_called, "hooks should be set up by default"
+  end
+
+  # --- rails_environment? ---
+
+  def test_rails_environment_returns_false_when_rails_not_defined
+    skip "Test not applicable when Rails is loaded" if defined?(::Rails::Railtie)
+
+    refute Braintrust::Contrib::Setup.rails_environment?
+  end
+
+  def test_rails_environment_returns_false_when_rails_application_nil
+    mock_rails = Module.new do
+      def self.respond_to?(method)
+        method == :application || super
+      end
+
+      def self.application
+        nil
+      end
+    end
+
+    Object.stub_const(:Rails, mock_rails) do
+      refute Braintrust::Contrib::Setup.rails_environment?
+    end
+  end
+
+  def test_rails_environment_returns_true_when_rails_railtie_defined
+    with_mock_rails_railtie do
+      assert Braintrust::Contrib::Setup.rails_environment?
+    end
+  end
+
+  # --- setup_rails_hook! ---
+
+  def test_setup_rails_hook_loads_railtie
+    railtie_loaded = false
+
+    Braintrust::Contrib::Setup.stub(:require_relative, ->(path) { railtie_loaded = true if path == "rails/railtie" }) do
+      Braintrust::Contrib::Setup.setup_rails_hook!
+    end
+
+    assert railtie_loaded, "setup_rails_hook! should load rails/railtie"
+  end
+
+  # --- setup_require_hook! ---
+  # These tests run in a fork to prevent the require hook from leaking into other tests
+
+  def test_setup_require_hook_patches_kernel_require
+    assert_in_fork do
+      Braintrust::Contrib::Setup.setup_require_hook!
+
+      # Check that require still works normally
+      require "json" # Should work normally
+
+      # If we got here without error, the hook is working
+      puts "require_hook_test:passed"
+    end
+  end
+
+  def test_setup_require_hook_instruments_matching_library
+    assert_in_fork do
+      with_tmp_file(data: "# test file\n", filename: "test_integration_lib", extension: ".rb") do |test_file|
+        patch_called = false
+        test_lib_name = File.basename(test_file.path, ".rb")
+        $LOAD_PATH.unshift(File.dirname(test_file.path))
+
+        mock_integration = mock_integration_object(
+          name: :test_lib,
+          on_patch: -> { patch_called = true }
+        )
+
+        mock_registry = mock_registry_for(test_lib_name => [mock_integration])
+
+        Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
+          Braintrust::Contrib::Setup.setup_require_hook!
+          require test_lib_name
+        end
+
+        if patch_called
+          puts "instrument_test:passed"
+        else
+          puts "instrument_test:failed - patch! was not called"
+          exit 1
+        end
+      end
+    end
+  end
+
+  def test_setup_require_hook_respects_only_filter
+    assert_in_fork do
+      with_tmp_file(data: "# test file\n", filename: "excluded_lib", extension: ".rb") do |test_file|
+        patch_called = false
+        test_lib_name = File.basename(test_file.path, ".rb")
+        $LOAD_PATH.unshift(File.dirname(test_file.path))
+
+        mock_integration = mock_integration_object(
+          name: :excluded_lib,
+          on_patch: -> { patch_called = true }
+        )
+
+        mock_registry = mock_registry_for(test_lib_name => [mock_integration])
+
+        ENV["BRAINTRUST_INSTRUMENT_ONLY"] = "openai,anthropic"
+
+        Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
+          Braintrust::Contrib::Setup.setup_require_hook!
+          require test_lib_name
+        end
+
+        if patch_called
+          puts "only_filter_test:failed - patch! should not be called for excluded lib"
+          exit 1
+        else
+          puts "only_filter_test:passed"
+        end
+      end
+    end
+  end
+
+  def test_setup_require_hook_respects_except_filter
+    assert_in_fork do
+      with_tmp_file(data: "# test file\n", filename: "excluded_lib", extension: ".rb") do |test_file|
+        patch_called = false
+        test_lib_name = File.basename(test_file.path, ".rb")
+        $LOAD_PATH.unshift(File.dirname(test_file.path))
+
+        mock_integration = mock_integration_object(
+          name: :excluded_lib,
+          on_patch: -> { patch_called = true }
+        )
+
+        mock_registry = mock_registry_for(test_lib_name => [mock_integration])
+
+        ENV["BRAINTRUST_INSTRUMENT_EXCEPT"] = "excluded_lib"
+
+        Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
+          Braintrust::Contrib::Setup.setup_require_hook!
+          require test_lib_name
+        end
+
+        if patch_called
+          puts "except_filter_test:failed - patch! should not be called for excluded lib"
+          exit 1
+        else
+          puts "except_filter_test:passed"
+        end
+      end
+    end
+  end
+
+  def test_setup_require_hook_skips_unavailable_integration
+    assert_in_fork do
+      with_tmp_file(data: "# test file\n", filename: "unavailable_lib", extension: ".rb") do |test_file|
+        patch_called = false
+        test_lib_name = File.basename(test_file.path, ".rb")
+        $LOAD_PATH.unshift(File.dirname(test_file.path))
+
+        mock_integration = mock_integration_object(
+          name: :unavailable_lib,
+          available: false,
+          on_patch: -> { patch_called = true }
+        )
+
+        mock_registry = mock_registry_for(test_lib_name => [mock_integration])
+
+        Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
+          Braintrust::Contrib::Setup.setup_require_hook!
+          require test_lib_name
+        end
+
+        if patch_called
+          puts "unavailable_test:failed - patch! should not be called for unavailable integration"
+          exit 1
+        else
+          puts "unavailable_test:passed"
+        end
+      end
+    end
+  end
+
+  def test_setup_require_hook_skips_incompatible_integration
+    assert_in_fork do
+      with_tmp_file(data: "# test file\n", filename: "incompatible_lib", extension: ".rb") do |test_file|
+        patch_called = false
+        test_lib_name = File.basename(test_file.path, ".rb")
+        $LOAD_PATH.unshift(File.dirname(test_file.path))
+
+        mock_integration = mock_integration_object(
+          name: :incompatible_lib,
+          compatible: false,
+          on_patch: -> { patch_called = true }
+        )
+
+        mock_registry = mock_registry_for(test_lib_name => [mock_integration])
+
+        Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
+          Braintrust::Contrib::Setup.setup_require_hook!
+          require test_lib_name
+        end
+
+        if patch_called
+          puts "incompatible_test:failed - patch! should not be called for incompatible integration"
+          exit 1
+        else
+          puts "incompatible_test:passed"
+        end
+      end
+    end
+  end
+
+  def test_setup_require_hook_has_reentrancy_guard
+    assert_in_fork do
+      with_tmp_file(data: "# reentrant lib\n", filename: "reentrant_lib", extension: ".rb") do |reentrant_file|
+        with_tmp_file(data: "# nested lib\n", filename: "nested_lib", extension: ".rb") do |nested_file|
+          require_calls_during_patch = []
+          reentrant_lib_name = File.basename(reentrant_file.path, ".rb")
+          nested_lib_name = File.basename(nested_file.path, ".rb")
+
+          $LOAD_PATH.unshift(File.dirname(reentrant_file.path))
+          $LOAD_PATH.unshift(File.dirname(nested_file.path))
+
+          nested_name = nested_lib_name
+
+          mock_integration = mock_integration_object(
+            name: :reentrant_lib,
+            on_patch: -> {
+              require nested_name
+              require_calls_during_patch << :patch_completed
+            }
+          )
+
+          nested_integration = mock_integration_object(
+            name: :nested_lib,
+            on_patch: -> { require_calls_during_patch << :nested_patch_should_not_run }
+          )
+
+          mock_registry = mock_registry_for(
+            reentrant_lib_name => [mock_integration],
+            nested_lib_name => [nested_integration]
+          )
+
+          Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
+            Braintrust::Contrib::Setup.setup_require_hook!
+            require reentrant_lib_name
+          end
+
+          # The nested integration should NOT have been patched due to reentrancy guard
+          if require_calls_during_patch == [:patch_completed]
+            puts "reentrancy_test:passed"
+          else
+            puts "reentrancy_test:failed - got #{require_calls_during_patch.inspect}"
+            exit 1
+          end
+        end
+      end
+    end
+  end
+
+  def test_setup_require_hook_logs_errors_without_crashing
+    assert_in_fork do
+      with_tmp_file(data: "# error lib\n", filename: "error_lib", extension: ".rb") do |test_file|
+        error_logged = false
+        test_lib_name = File.basename(test_file.path, ".rb")
+        $LOAD_PATH.unshift(File.dirname(test_file.path))
+
+        mock_integration = mock_integration_object(
+          name: :error_lib,
+          on_patch: -> { raise "Patch error!" }
+        )
+
+        mock_registry = mock_registry_for(test_lib_name => [mock_integration])
+
+        with_stubs(
+          [Braintrust::Contrib::Registry, :instance, mock_registry],
+          [Braintrust::Log, :error, ->(_msg) { error_logged = true }]
+        ) do
+          Braintrust::Contrib::Setup.setup_require_hook!
+          require test_lib_name
+        end
+
+        if error_logged
+          puts "error_handling_test:passed"
+        else
+          puts "error_handling_test:failed - error was not logged"
+          exit 1
+        end
+      end
+    end
+  end
+
+  private
+
+  # Helper to stub Rails with a Railtie class
+  def with_mock_rails_railtie
+    mock_rails = Module.new
+
+    Object.stub_const(:Rails, mock_rails) do
+      mock_rails.stub_const(:Railtie, Class.new) do
+        yield
+      end
+    end
+  end
+
+  # Helper to create a mock integration object
+  def mock_integration_object(name:, available: true, compatible: true, on_patch: nil)
+    integration = Object.new
+    integration.define_singleton_method(:integration_name) { name }
+    integration.define_singleton_method(:available?) { available }
+    integration.define_singleton_method(:compatible?) { compatible }
+    integration.define_singleton_method(:patch!) { on_patch&.call }
+    integration
+  end
+
+  # Helper to create a mock registry that maps lib names to integrations
+  # @param mappings [Hash] lib_name => [integrations] mapping
+  def mock_registry_for(mappings)
+    registry = Object.new
+    registry.define_singleton_method(:integrations_for_require_path) do |path|
+      mappings.find { |lib_name, _| path.include?(lib_name) }&.last || []
+    end
+    registry
+  end
+end
diff --git a/test/braintrust/setup_test.rb b/test/braintrust/setup_test.rb
new file mode 100644
index 0000000..aca9c5b
--- /dev/null
+++ b/test/braintrust/setup_test.rb
@@ -0,0 +1,130 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+# NOTE: braintrust/setup.rb triggers Braintrust::Setup.run! when required,
+# so all tests must run in a fork to isolate side effects.
+class Braintrust::SetupTest < Minitest::Test
+  # --- run! ---
+
+  def test_run_calls_braintrust_init
+    assert_in_fork do
+      require "braintrust/contrib/setup"
+
+      init_called = false
+
+      with_stubs(
+        [Braintrust, :init, ->(**_kwargs) { init_called = true }],
+        [Braintrust::Contrib::Setup, :run!, -> {}]
+      ) do
+        require "braintrust/setup"
+      end
+
+      if init_called
+        puts "test_run_calls_braintrust_init:passed"
+      else
+        puts "test_run_calls_braintrust_init:failed - init not called"
+        exit 1
+      end
+    end
+  end
+
+  def test_run_calls_contrib_setup_run
+    assert_in_fork do
+      require "braintrust/contrib/setup"
+
+      contrib_setup_called = false
+
+      with_stubs(
+        [Braintrust, :init, ->(**_kwargs) {}],
+        [Braintrust::Contrib::Setup, :run!, -> { contrib_setup_called = true }]
+      ) do
+        require "braintrust/setup"
+      end
+
+      if contrib_setup_called
+        puts "test_run_calls_contrib_setup_run:passed"
+      else
+        puts "test_run_calls_contrib_setup_run:failed - Contrib::Setup.run! not called"
+        exit 1
+      end
+    end
+  end
+
+  def test_run_is_idempotent
+    assert_in_fork do
+      require "braintrust/contrib/setup"
+
+      call_count = 0
+
+      with_stubs(
+        [Braintrust, :init, ->(**_kwargs) { call_count += 1 }],
+        [Braintrust::Contrib::Setup, :run!, -> {}]
+      ) do
+        require "braintrust/setup"
+
+        # First call happens during require, try calling again
+        Braintrust::Setup.run!
+        Braintrust::Setup.run!
+      end
+
+      if call_count == 1
+        puts "test_run_is_idempotent:passed"
+      else
+        puts "test_run_is_idempotent:failed - call_count=#{call_count}, expected 1"
+        exit 1
+      end
+    end
+  end
+
+  def test_run_logs_error_on_init_failure
+    assert_in_fork do
+      require "braintrust/contrib/setup"
+
+      logged_messages = []
+
+      with_stubs(
+        [Braintrust, :init, ->(**_kwargs) { raise "Test init error" }],
+        [Braintrust::Log, :error, ->(msg) { logged_messages << msg }],
+        [Braintrust::Contrib::Setup, :run!, -> {}]
+      ) do
+        require "braintrust/setup"
+      end
+
+      error_logged = logged_messages.any? do |msg|
+        msg.include?("Failed to automatically setup Braintrust") &&
+          msg.include?("Test init error")
+      end
+
+      if error_logged
+        puts "test_run_logs_error_on_init_failure:passed"
+      else
+        puts "test_run_logs_error_on_init_failure:failed - logged: #{logged_messages.inspect}"
+        exit 1
+      end
+    end
+  end
+
+  def test_run_continues_to_contrib_setup_after_init_failure
+    assert_in_fork do
+      require "braintrust/contrib/setup"
+
+      contrib_setup_called = false
+
+      with_stubs(
+        [Braintrust, :init, ->(**_kwargs) { raise "Test init error" }],
+        [Braintrust::Log, :error, ->(_msg) {}],
+        [Braintrust::Contrib::Setup, :run!, -> { contrib_setup_called = true }]
+      ) do
+        require "braintrust/setup"
+      end
+
+      if contrib_setup_called
+        puts "test_run_continues_to_contrib_setup_after_init_failure:passed"
+      else
+        puts "test_run_continues_to_contrib_setup_after_init_failure:failed"
+        exit 1
+      end
+    end
+  end
+end
diff --git a/test/support/mock_helper.rb b/test/support/mock_helper.rb
index 1cefdcc..6c016e3 100644
--- a/test/support/mock_helper.rb
+++ b/test/support/mock_helper.rb
@@ -41,6 +41,38 @@ def mock_chain(*methods, returns:)
 
         mocks.each(&:verify)
       end
+
+      # Apply multiple stubs without deep nesting.
+      # Each stub is an array of [object, method, value].
+      #
+      # @param stubs [Array<Array>] list of [object, method, value] tuples
+      # @yield block to execute with all stubs applied
+      #
+      # @example Single stub
+      #   with_stubs([Braintrust, :init, -> { }]) do
+      #     # code runs with Braintrust.init stubbed
+      #   end
+      #
+      # @example Multiple stubs (no nesting required)
+      #   with_stubs(
+      #     [Braintrust, :init, -> { init_called = true }],
+      #     [Braintrust::Contrib::Setup, :run!, -> {}],
+      #     [Braintrust::Log, :error, ->(msg) { errors << msg }]
+      #   ) do
+      #     require "braintrust/setup"
+      #     # assertions here - only one level deep
+      #   end
+      #
+      def with_stubs(*stubs, &block)
+        if stubs.empty?
+          yield
+        else
+          obj, method, value = stubs.first
+          obj.stub(method, value) do
+            with_stubs(*stubs[1..], &block)
+          end
+        end
+      end
     end
   end
 end

From a4970d51bfade3754d6f7d6a20869d0f6ef266da Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Thu, 8 Jan 2026 17:27:34 -0500
Subject: [PATCH 18/27] Changed: README to reflect require 'braintrust/setup'

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 5920aa1..7aa3653 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,9 @@ Add this line to your application's Gemfile:
 
 ```ruby
 gem 'braintrust'
+
+# OR if you'd like to activate auto-instrumentation
+gem 'braintrust', require 'braintrust/setup'
 ```
 
 And then execute:

From f3e6af8fcea32208a82c29b62d995d527be806fc Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Fri, 19 Dec 2025 14:56:29 -0500
Subject: [PATCH 19/27] Added: braintrust exec CLI command to auto instrument

---
 examples/setup/README.md       |  26 ++++++
 examples/setup/exec.rb         | 132 ++++++++++++++++++++++++++++++
 examples/setup/exec_minimal.rb |  37 +++++++++
 exe/braintrust                 | 143 +++++++++++++++++++++++++++++++++
 4 files changed, 338 insertions(+)
 create mode 100644 examples/setup/README.md
 create mode 100644 examples/setup/exec.rb
 create mode 100644 examples/setup/exec_minimal.rb
 create mode 100755 exe/braintrust

diff --git a/examples/setup/README.md b/examples/setup/README.md
new file mode 100644
index 0000000..7814380
--- /dev/null
+++ b/examples/setup/README.md
@@ -0,0 +1,26 @@
+# Setup Examples
+
+These examples demonstrate different ways to automatically setup the Braintrust SDK.
+
+## Local Development
+
+When running examples from the repo (gem not installed), use these commands:
+
+```bash
+# For init.rb and require.rb examples:
+OPENAI_API_KEY=your-key ANTHROPIC_API_KEY=your-key \
+  bundle exec appraisal contrib ruby examples/setup/init.rb
+
+# For exec.rb examples (reference exe/braintrust directly):
+OPENAI_API_KEY=your-key ANTHROPIC_API_KEY=your-key \
+  bundle exec appraisal contrib exe/braintrust exec -- \
+  ruby examples/setup/exec.rb
+```
+
+## Examples
+
+| File         | Approach                         | Code Changes Required |
+| ------------ | -------------------------------- | --------------------- |
+| `init.rb`    | `Braintrust.init`                | Add init call         |
+| `require.rb` | `require "braintrust/setup"`     | Add require           |
+| `exec.rb`    | `braintrust exec -- ruby app.rb` | None                  |
diff --git a/examples/setup/exec.rb b/examples/setup/exec.rb
new file mode 100644
index 0000000..84789f3
--- /dev/null
+++ b/examples/setup/exec.rb
@@ -0,0 +1,132 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+# Example: Automatic Braintrust SDK setup via braintrust exec
+#
+# This example demonstrates the zero-code-changes approach to auto-instrumentation.
+# Instead of modifying your application code, you simply run it with `braintrust exec`.
+#
+# The CLI:
+#   - Injects auto-setup via RUBYOPT
+#   - Automatically patches LLM libraries as they are loaded
+#   - Initializes Braintrust tracing
+#
+# This is ideal for:
+#   - Existing applications you don't want to modify
+#   - Quick testing and debugging
+#   - Production deployments via process supervisors
+#
+# Supported libraries (instrumented automatically):
+#   - openai (OpenAI's official Ruby gem)
+#   - ruby-openai (alexrudall's ruby-openai gem) *
+#   - anthropic (Anthropic's official Ruby gem)
+#   - ruby_llm (RubyLLM unified interface)
+#
+# * Note: openai and ruby-openai share the same require path ("openai"), so only one
+#   can be loaded at a time. This demo uses the official openai gem.
+#
+# Usage:
+#   OPENAI_API_KEY=your-key ANTHROPIC_API_KEY=your-key \
+#     bundle exec appraisal contrib braintrust exec -- \
+#     ruby examples/setup/exec.rb
+#
+# With filtering:
+#   braintrust exec --only openai,anthropic -- ruby examples/setup/exec.rb
+#   braintrust exec --except ruby_llm -- ruby examples/setup/exec.rb
+
+# Notice: NO braintrust require needed! The CLI handles everything.
+
+require "openai"
+require "anthropic"
+require "ruby_llm"
+require "opentelemetry-sdk"
+
+# Check for required API keys
+missing_keys = []
+missing_keys << "OPENAI_API_KEY" unless ENV["OPENAI_API_KEY"]
+missing_keys << "ANTHROPIC_API_KEY" unless ENV["ANTHROPIC_API_KEY"]
+
+unless missing_keys.empty?
+  puts "Error: Missing required environment variables: #{missing_keys.join(", ")}"
+  puts ""
+  puts "Get your API keys from:"
+  puts "  OpenAI: https://platform.openai.com/api-keys"
+  puts "  Anthropic: https://console.anthropic.com/"
+  exit 1
+end
+
+puts "auto-setup via braintrust exec Demo"
+puts "=" * 50
+puts ""
+puts "This script has NO braintrust imports!"
+puts "The CLI injected auto-setup via RUBYOPT."
+puts ""
+
+# Brief pause to allow async Braintrust login to complete
+# (Not necessary in production, just for this short lived example)
+sleep 0.5
+
+# Configure RubyLLM
+RubyLLM.configure do |config|
+  config.openai_api_key = ENV["OPENAI_API_KEY"]
+end
+
+# Create clients for each library
+openai_client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"])
+anthropic_client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
+
+# Create a tracer and root span to capture all operations
+tracer = OpenTelemetry.tracer_provider.tracer("setup-exec-demo")
+root_span = nil
+
+puts "Making API calls with each library..."
+puts "-" * 50
+
+tracer.in_span("examples/setup/exec.rb") do |span|
+  root_span = span
+
+  # 1. OpenAI (official gem)
+  puts ""
+  puts "[1/3] OpenAI (official gem)..."
+  openai_response = openai_client.chat.completions.create(
+    model: "gpt-4o-mini",
+    max_tokens: 50,
+    messages: [
+      {role: "user", content: "Say 'Hello from OpenAI!' in exactly 5 words."}
+    ]
+  )
+  puts "      Response: #{openai_response.choices[0].message.content}"
+
+  # 2. Anthropic
+  puts ""
+  puts "[2/3] Anthropic..."
+  anthropic_response = anthropic_client.messages.create(
+    model: "claude-3-haiku-20240307",
+    max_tokens: 50,
+    messages: [
+      {role: "user", content: "Say 'Hello from Anthropic!' in exactly 5 words."}
+    ]
+  )
+  puts "      Response: #{anthropic_response.content[0].text}"
+
+  # 3. RubyLLM (using OpenAI provider)
+  puts ""
+  puts "[3/3] RubyLLM (via OpenAI)..."
+  chat = RubyLLM.chat(model: "gpt-4o-mini")
+  ruby_llm_response = chat.ask("Say 'Hello from RubyLLM!' in exactly 5 words.")
+  puts "      Response: #{ruby_llm_response.content}"
+end
+
+puts ""
+puts "-" * 50
+puts ""
+puts "All API calls complete!"
+puts ""
+puts "View trace in Braintrust:"
+puts "  #{Braintrust::Trace.permalink(root_span)}"
+puts ""
+
+# Shutdown to flush spans (not necessary in production, just for this short lived example)
+OpenTelemetry.tracer_provider.shutdown
+
+puts "Trace sent to Braintrust!"
diff --git a/examples/setup/exec_minimal.rb b/examples/setup/exec_minimal.rb
new file mode 100644
index 0000000..158fc84
--- /dev/null
+++ b/examples/setup/exec_minimal.rb
@@ -0,0 +1,37 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+# No braintrust require needed - the CLI handles everything!
+require "openai"
+require "anthropic"
+require "ruby_llm"
+require "opentelemetry-sdk"
+
+# Brief pause to allow async Braintrust login to complete
+# (Not necessary in production, just for this short lived example)
+sleep 0.5
+
+RubyLLM.configure { |c| c.openai_api_key = ENV["OPENAI_API_KEY"] }
+
+openai_client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"])
+anthropic_client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
+tracer = OpenTelemetry.tracer_provider.tracer("setup-exec-demo")
+
+tracer.in_span("examples/setup/exec_minimal.rb") do
+  openai_client.chat.completions.create(
+    model: "gpt-4o-mini",
+    messages: [{role: "user", content: "Hello from OpenAI"}]
+  )
+
+  anthropic_client.messages.create(
+    model: "claude-3-haiku-20240307",
+    max_tokens: 50,
+    messages: [{role: "user", content: "Hello from Anthropic"}]
+  )
+
+  RubyLLM.chat(model: "gpt-4o-mini").ask("Hello from RubyLLM")
+end
+
+# Shutdown to flush spans
+# (Not necessary in production, just for this short lived example)
+OpenTelemetry.tracer_provider.shutdown
diff --git a/exe/braintrust b/exe/braintrust
new file mode 100755
index 0000000..444d39c
--- /dev/null
+++ b/exe/braintrust
@@ -0,0 +1,143 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+# Braintrust CLI - Auto-instrument Ruby applications
+#
+# Usage:
+#   braintrust exec -- ruby app.rb
+#   braintrust exec -- bundle exec rails server
+#   braintrust exec --only openai,anthropic -- ruby app.rb
+
+require "optparse"
+
+module Braintrust
+  module CLI
+    class << self
+      def run(args)
+        command = parse_args(args)
+
+        case command
+        when :exec
+          exec_command
+        when :help, nil
+          print_help
+          exit((command == :help) ? 0 : 1)
+        else
+          puts "Unknown command: #{command}"
+          print_help
+          exit 1
+        end
+      end
+
+      private
+
+      def parse_args(args)
+        @options = {}
+        @remaining_args = []
+
+        # Find -- separator
+        separator_index = args.index("--")
+        if separator_index
+          to_parse = args[0...separator_index]
+          @remaining_args = args[(separator_index + 1)..]
+        else
+          to_parse = args.dup
+        end
+
+        parser = OptionParser.new do |opts|
+          opts.banner = "Usage: braintrust <command> [options] -- COMMAND"
+
+          opts.on("--only INTEGRATIONS", "Only instrument these (comma-separated)") do |v|
+            @options[:only] = v
+          end
+
+          opts.on("--except INTEGRATIONS", "Skip these integrations (comma-separated)") do |v|
+            @options[:except] = v
+          end
+
+          opts.on("-h", "--help", "Show this help") do
+            return :help
+          end
+
+          opts.on("-v", "--version", "Show version") do
+            require_relative "../lib/braintrust/version"
+            puts "braintrust #{Braintrust::VERSION}"
+            exit 0
+          end
+        end
+
+        parser.parse!(to_parse)
+
+        return nil if to_parse.empty?
+        to_parse.first.to_sym
+      end
+
+      def exec_command
+        if @remaining_args.empty?
+          puts "Error: No command specified after --"
+          puts "Usage: braintrust exec [options] -- COMMAND"
+          exit 1
+        end
+
+        # Set environment variables for filtering
+        ENV["BRAINTRUST_INSTRUMENT_ONLY"] = @options[:only] if @options[:only]
+        ENV["BRAINTRUST_INSTRUMENT_EXCEPT"] = @options[:except] if @options[:except]
+
+        # Inject auto-instrument via RUBYOPT
+        set_rubyopt!
+
+        # Execute the command with error handling
+        exec_with_error_handling(@remaining_args)
+      end
+
+      def rubyopts
+        ["-rbraintrust/setup"]
+      end
+
+      def set_rubyopt!
+        existing = ENV["RUBYOPT"]
+        ENV["RUBYOPT"] = existing ? "#{existing} #{rubyopts.join(" ")}" : rubyopts.join(" ")
+      end
+
+      def exec_with_error_handling(args)
+        Kernel.exec(*args)
+      rescue Errno::ENOENT => e
+        Kernel.warn "braintrust exec failed: #{e.class.name} #{e.message}"
+        Kernel.exit 127
+      rescue Errno::EACCES, Errno::ENOEXEC => e
+        Kernel.warn "braintrust exec failed: #{e.class.name} #{e.message}"
+        Kernel.exit 126
+      end
+
+      def print_help
+        puts <<~HELP
+          Braintrust CLI - Auto-instrument Ruby applications
+
+          Usage:
+            braintrust exec [options] -- COMMAND
+
+          Commands:
+            exec    Run a command with auto-instrumentation enabled
+
+          Options:
+            --only INTEGRATIONS     Only instrument these (comma-separated)
+            --except INTEGRATIONS   Skip these integrations (comma-separated)
+            -h, --help              Show this help
+            -v, --version           Show version
+
+          Examples:
+            braintrust exec -- ruby app.rb
+            braintrust exec -- rails server
+            braintrust exec --only openai -- ruby app.rb
+
+          Environment Variables:
+            BRAINTRUST_API_KEY              API key for Braintrust
+            BRAINTRUST_INSTRUMENT_ONLY      Comma-separated whitelist
+            BRAINTRUST_INSTRUMENT_EXCEPT    Comma-separated blacklist
+        HELP
+      end
+    end
+  end
+end
+
+Braintrust::CLI.run(ARGV)

From bfd4678e5897f441721c554c570101d48c719d62 Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Mon, 5 Jan 2026 13:43:47 -0500
Subject: [PATCH 20/27] Changed: Documentation to reflect new installation
 patterns

---
 README.md | 411 +++++++++++++++++++++++++++---------------------------
 1 file changed, 206 insertions(+), 205 deletions(-)

diff --git a/README.md b/README.md
index 7aa3653..8fdb51a 100644
--- a/README.md
+++ b/README.md
@@ -4,314 +4,315 @@
 [![Documentation](https://img.shields.io/badge/docs-gemdocs.org-blue.svg)](https://gemdocs.org/gems/braintrust/)
 ![Beta](https://img.shields.io/badge/status-beta-yellow)
 
-## Overview
+This is the official Ruby SDK for [Braintrust](https://www.braintrust.dev), for tracing and evaluating your AI applications.
+
+*NOTE: This SDK is currently in BETA status and APIs may change between minor versions.*
+
+- [Quick Start](#quick-start)
+- [Installation](#installation)
+  - [Setup script](#setup-script)
+  - [CLI Command](#cli-command)
+  - [Braintrust.init](#braintrustinit)
+  - [Environment variables](#environment-variables)
+- [Tracing](#tracing)
+  - [Supported providers](#supported-providers)
+  - [Manually applying instrumentation](#manually-applying-instrumentation)
+  - [Creating custom spans](#creating-custom-spans)
+  - [Attachments](#attachments)
+  - [Viewing traces](#viewing-traces)
+- [Evals](#evals)
+  - [Datasets](#datasets)
+  - [Remote scorers](#remote-scorers)
+- [Documentation](#documentation)
+- [Troubleshooting](#troubleshooting)
+- [Contributing](#contributing)
+- [License](#license)
 
-This library provides tools for **evaluating** and **tracing** AI applications in [Braintrust](https://www.braintrust.dev). Use it to:
-
-- **Evaluate** your AI models with custom test cases and scoring functions
-- **Trace** LLM calls and monitor AI application performance with OpenTelemetry
-- **Integrate** seamlessly with OpenAI and other LLM providers
-
-This SDK is currently in BETA status and APIs may change.
-
-## Installation
+## Quick Start
 
-Add this line to your application's Gemfile:
+Add to your Gemfile:
 
 ```ruby
-gem 'braintrust'
-
-# OR if you'd like to activate auto-instrumentation
-gem 'braintrust', require 'braintrust/setup'
+gem "braintrust", require: "braintrust/setup"
 ```
 
-And then execute:
+Set your API key and install:
 
 ```bash
+export BRAINTRUST_API_KEY="your-api-key"
 bundle install
 ```
 
-Or install it yourself as:
+Your LLM calls are now automatically traced. View them at [braintrust.dev](https://www.braintrust.dev).
+
+## Installation
+
+The SDK also offers additional setup options for a variety of applications.
+
+|                                        | What it looks like                  | When to Use                                                                 |
+| -------------------------------------- | ----------------------------------- | --------------------------------------------------------------------------- |
+| [**Setup script**](#setup-script)      | `require 'braintrust/setup'`        | You'd like to automatically setup the SDK at load time.                     |
+| [**CLI command**](#cli-command)        | `braintrust exec -- ruby app.rb`    | You prefer not to modify the application source code.                       |
+| [**Braintrust.init**](#braintrustinit) | Call `Braintrust.init` in your code | You need to control when setup occurs, or require customized configuration. |
+
+See [our examples](./examples/setup/README.md) for more detail.
+
+### Setup script
+
+For most applications, we recommend adding `require: "braintrust/setup"` to your `Gemfile` or an initializer file in your Ruby application to automatically setup the SDK. This will automatically apply instrumentation to all available LLM libraries.
+
+You can use [environment variables](#environment-variables) to configure behavior.
+
+### CLI Command
+
+You can use this CLI command to instrument any Ruby application without modifying the source code.
+
+First, make sure the gem is installed on the system:
 
 ```bash
 gem install braintrust
 ```
 
-## Quick Start
+Then wrap the start up command of any Ruby application to apply:
 
-1. **Set up your API key**
+```bash
+braintrust exec -- ruby app.rb
+braintrust exec -- bundle exec rails server
+braintrust exec --only openai -- ruby app.rb
+```
 
-    ```bash
-    export BRAINTRUST_API_KEY="your-api-key"
-    ```
+You can use [environment variables](#environment-variables) to configure behavior.
 
-2. **Add `Braintrust.init` to your application**
+---
 
-    For Rails applications, we suggest adding this to an initializer file.
+*NOTE: Installing a package at the system-level does not guarantee compatibility with all Ruby applications on that system; conflicts with dependencies can arise.*
 
-    ```ruby
-    require "braintrust"
+For stronger assurance of compatibility, we recommend either:
 
-    Braintrust.init
-    ```
+- Installing via the application's `Gemfile` and `bundle install` when possible.
+- **OR** for OCI/Docker deployments, `gem install braintrust` when building images in your CI/CD pipeline (and verifying their safe function.)
 
-    This sets up Braintrust and auto-instruments your application with LLM tracing.
+---
 
-## How to use...
+### Braintrust.init
 
-### Evals
+For more control over when auto-instrumentation is applied:
 
 ```ruby
 require "braintrust"
 
 Braintrust.init
-
-# Define task to evaluate
-task = ->(input) { input.include?("a") ? "fruit" : "vegetable" }
-
-# Run evaluation
-Braintrust::Eval.run(
-  project: "my-project",
-  experiment: "food-classifier",
-  cases: [
-    {input: "apple", expected: "fruit"},
-    {input: "carrot", expected: "vegetable"}
-  ],
-  task: task,
-  scorers: [
-    ->(input, expected, output) { output == expected ? 1.0 : 0.0 }
-  ]
-)
 ```
 
-## Tracing
+**Options:**
 
-Supported LLM libraries are automatically instrumented with `Braintrust.init`. If you wish to manually instrument your LLM clients (for greater customization) you can use OpenTelemetry and `instrument!` to make your own custom LLM traces.
+| Option            | Default                                  | Description                                                                 |
+| ----------------- | ---------------------------------------- | --------------------------------------------------------------------------- |
+| `api_key`         | `ENV['BRAINTRUST_API_KEY']`              | API key                                                                     |
+| `auto_instrument` | `true`                                   | `true`, `false`, or Hash with `:only`/`:except` keys to filter integrations |
+| `blocking_login`  | `false`                                  | Block until login completes (async login when `false`)                      |
+| `default_project` | `ENV['BRAINTRUST_DEFAULT_PROJECT']`      | Default project for spans                                                   |
+| `enable_tracing`  | `true`                                   | Enable OpenTelemetry tracing                                                |
+| `filter_ai_spans` | `ENV['BRAINTRUST_OTEL_FILTER_AI_SPANS']` | Only export AI-related spans                                                |
+| `org_name`        | `ENV['BRAINTRUST_ORG_NAME']`             | Organization name                                                           |
+| `set_global`      | `true`                                   | Set as global state. Set to `false` for isolated instances                  |
 
-```ruby
-require "braintrust"
-require "opentelemetry/sdk"
+**Example with options:**
 
-# Initialize Braintrust
-Braintrust.init
+```ruby
+Braintrust.init(
+  default_project: "my-project",
+  auto_instrument: { only: [:openai] }
+)
+```
 
-# Get a tracer
-tracer = OpenTelemetry.tracer_provider.tracer("my-app")
+### Environment variables
+
+| Variable                          | Description                                                               |
+| --------------------------------- | ------------------------------------------------------------------------- |
+| `BRAINTRUST_API_KEY`              | Required. Your Braintrust API key                                         |
+| `BRAINTRUST_API_URL`              | Braintrust API URL (default: `https://api.braintrust.dev`)                |
+| `BRAINTRUST_APP_URL`              | Braintrust app URL (default: `https://www.braintrust.dev`)                |
+| `BRAINTRUST_AUTO_INSTRUMENT`      | Set to `false` to disable auto-instrumentation                            |
+| `BRAINTRUST_DEBUG`                | Set to `true` to enable debug logging                                     |
+| `BRAINTRUST_DEFAULT_PROJECT`      | Default project for spans                                                 |
+| `BRAINTRUST_INSTRUMENT_EXCEPT`    | Comma-separated list of integrations to skip                              |
+| `BRAINTRUST_INSTRUMENT_ONLY`      | Comma-separated list of integrations to enable (e.g., `openai,anthropic`) |
+| `BRAINTRUST_ORG_NAME`             | Organization name                                                         |
+| `BRAINTRUST_OTEL_FILTER_AI_SPANS` | Set to `true` to only export AI-related spans                             |
 
-# Create spans to track operations
-tracer.in_span("process-data") do |span|
-  span.set_attribute("user.id", "123")
-  span.set_attribute("operation.type", "data_processing")
+## Tracing
 
-  # Your code here
-  puts "Processing data..."
-  sleep 0.1
+### Supported providers
 
-  # Nested spans are automatically linked
-  tracer.in_span("nested-operation") do |nested_span|
-    nested_span.set_attribute("step", "1")
-    puts "Nested operation..."
-  end
-end
+The SDK automatically instruments these LLM libraries:
 
-# Shutdown to flush spans
-OpenTelemetry.tracer_provider.shutdown
+| Provider  | Gem           | Versions | Integration Name | Examples                                        |
+| --------- | ------------- | -------- | ---------------- | ----------------------------------------------- |
+| Anthropic | `anthropic`   | >= 0.3.0 | `:anthropic`     | [Link](./examples/contrib/anthropic/basic.rb)   |
+| OpenAI    | `openai`      | >= 0.1.0 | `:openai`        | [Link](./examples/contrib/openai/basic.rb)      |
+|           | `ruby-openai` | >= 7.0.0 | `:ruby_openai`   | [Link](./examples/contrib/ruby_openai/basic.rb) |
+| Multiple  | `ruby_llm`    | >= 1.8.0 | `:ruby_llm`      | [Link](./examples/contrib/ruby_llm/basic.rb)    |
 
-puts "View trace in Braintrust!"
-```
+### Manually applying instrumentation
 
-### OpenAI
+For fine-grained control, disable auto-instrumentation and instrument specific clients:
 
 ```ruby
 require "braintrust"
 require "openai"
 
-Braintrust.init(auto_instrument: false)
-
-client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"])
+Braintrust.init(auto_instrument: false) # Or BRAINTRUST_AUTO_INSTRUMENT=false
 
-# Instrument all clients
+# Instrument all OpenAI clients
 Braintrust.instrument!(:openai)
 
 # OR instrument a single client
+client = OpenAI::Client.new
 Braintrust.instrument!(:openai, target: client)
-
-tracer = OpenTelemetry.tracer_provider.tracer("openai-app")
-root_span = nil
-
-response = tracer.in_span("chat-completion") do |span|
-  root_span = span
-
-  client.chat.completions.create(
-    messages: [
-      {role: "system", content: "You are a helpful assistant."},
-      {role: "user", content: "Say hello!"}
-    ],
-    model: "gpt-4o-mini",
-    max_tokens: 100
-  )
-end
-
-puts "Response: #{response.choices[0].message.content}"
-
-puts "View trace at: #{Braintrust::Trace.permalink(root_span)}"
-
-OpenTelemetry.tracer_provider.shutdown
 ```
 
-### Anthropic
+### Creating custom spans
+
+Wrap business logic in spans to see it in your traces:
 
 ```ruby
-require "braintrust"
-require "anthropic"
+tracer = OpenTelemetry.tracer_provider.tracer("my-app")
 
-Braintrust.init(auto_instrument: false)
+tracer.in_span("process-request") do |span|
+  span.set_attribute("user.id", user_id)
 
-# Instrument Anthropic (instance-level)
-client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
+  # LLM calls inside here are automatically nested under this span
+  response = client.chat.completions.create(...)
+end
+```
 
-# Instrument all clients
-Braintrust.instrument!(:anthropic)
+### Attachments
 
-# OR instrument a single client
-Braintrust.instrument!(:anthropic, target: client)
+Log binary data (images, PDFs, audio) in your traces:
 
-tracer = OpenTelemetry.tracer_provider.tracer("anthropic-app")
-root_span = nil
+```ruby
+require "braintrust/trace/attachment"
 
-message = tracer.in_span("chat-message") do |span|
-  root_span = span
+att = Braintrust::Trace::Attachment.from_file("image/png", "./photo.png")
 
-  client.messages.create(
-    model: "claude-3-haiku-20240307",
-    max_tokens: 100,
-    system: "You are a helpful assistant.",
-    messages: [
-      {role: "user", content: "Say hello!"}
+# Use in messages (OpenAI/Anthropic format)
+messages = [
+  {
+    role: "user",
+    content: [
+      {type: "text", text: "What's in this image?"},
+      att.to_h
     ]
-  )
-end
-
-puts "Response: #{message.content[0].text}"
+  }
+]
 
-puts "View trace at: #{Braintrust::Trace.permalink(root_span)}"
-
-OpenTelemetry.tracer_provider.shutdown
+# Log to span
+span.set_attribute("braintrust.input_json", JSON.generate(messages))
 ```
 
-### RubyLLM
+Create attachments from various sources:
 
 ```ruby
-require "braintrust"
-require "ruby_llm"
-
-Braintrust.init(auto_instrument: false)
+Braintrust::Trace::Attachment.from_bytes("image/jpeg", image_data)
+Braintrust::Trace::Attachment.from_file("application/pdf", "./doc.pdf")
+Braintrust::Trace::Attachment.from_url("https://example.com/image.png")
+```
 
-chat = RubyLLM.chat(model: "gpt-4o-mini")
+See example: [trace_attachments.rb](./examples/trace/trace_attachments.rb)
 
-# Instrument all clients
-Braintrust.instrument!(:ruby_llm)
+### Viewing traces
 
-# OR instrument a single client
-Braintrust.instrument!(:ruby_llm, target: chat)
+Get a permalink to any span:
 
-tracer = OpenTelemetry.tracer_provider.tracer("ruby-llm-app")
-root_span = nil
-
-response = tracer.in_span("chat") do |span|
-  root_span = span
+```ruby
+tracer = OpenTelemetry.tracer_provider.tracer("my-app")
 
-  chat.ask("Say hello!")
+tracer.in_span("my-operation") do |span|
+  # your code here
+  puts "View trace at: #{Braintrust::Trace.permalink(span)}"
 end
-
-puts "Response: #{response.content}"
-
-puts "View trace at: #{Braintrust::Trace.permalink(root_span)}"
-
-OpenTelemetry.tracer_provider.shutdown
 ```
 
-### Attachments
+## Evals
 
-Attachments allow you to log binary data (images, PDFs, audio, etc.) as part of your traces. This is particularly useful for multimodal AI applications like vision models.
+Run evaluations against your AI systems:
 
 ```ruby
 require "braintrust"
-require "braintrust/trace/attachment"
 
 Braintrust.init
 
-tracer = OpenTelemetry.tracer_provider.tracer("vision-app")
-
-tracer.in_span("analyze-image") do |span|
-  # Create attachment from file
-  att = Braintrust::Trace::Attachment.from_file(
-    Braintrust::Trace::Attachment::IMAGE_PNG,
-    "./photo.png"
-  )
-
-  # Build message with attachment (OpenAI/Anthropic format)
-  messages = [
-    {
-      role: "user",
-      content: [
-        {type: "text", text: "What's in this image?"},
-        att.to_h  # Converts to {"type" => "base64_attachment", "content" => "data:..."}
-      ]
-    }
+Braintrust::Eval.run(
+  project: "my-project",
+  experiment: "classifier-v1",
+  cases: [
+    {input: "apple", expected: "fruit"},
+    {input: "carrot", expected: "vegetable"}
+  ],
+  task: ->(input) { classify(input) },
+  scorers: [
+    ->(input, expected, output) { output == expected ? 1.0 : 0.0 }
   ]
+)
+```
 
-  # Log to trace
-  span.set_attribute("braintrust.input_json", JSON.generate(messages))
-end
+### Datasets
+
+Load test cases from a Braintrust dataset:
 
-OpenTelemetry.tracer_provider.shutdown
+```ruby
+Braintrust::Eval.run(
+  project: "my-project",
+  dataset: "my-dataset",
+  task: ->(input) { classify(input) },
+  scorers: [...]
+)
 ```
 
-You can create attachments from bytes, files, or URLs:
+### Remote scorers
+
+Use scoring functions defined in Braintrust:
 
 ```ruby
-# From bytes
-att = Braintrust::Trace::Attachment.from_bytes("image/jpeg", image_data)
+Braintrust::Eval.run(
+  project: "my-project",
+  cases: [...],
+  task: ->(input) { ... },
+  scorers: [
+    Braintrust::Scorer.remote("my-project", "accuracy-scorer")
+  ]
+)
+```
 
-# From file
-att = Braintrust::Trace::Attachment.from_file("application/pdf", "./doc.pdf")
+See examples: [eval.rb](./examples/eval.rb), [dataset.rb](./examples/eval/dataset.rb), [remote_functions.rb](./examples/eval/remote_functions.rb)
 
-# From URL
-att = Braintrust::Trace::Attachment.from_url("https://example.com/image.png")
-```
+## Documentation
 
-## Features
+- [Braintrust Documentation](https://www.braintrust.dev/docs)
+- [API Reference](https://gemdocs.org/gems/braintrust/)
 
-- **Evaluations**: Run systematic evaluations of your AI systems with custom scoring functions
-- **Tracing**: Automatic instrumentation for OpenAI and Anthropic API calls with OpenTelemetry
-- **Datasets**: Manage and version your evaluation datasets
-- **Experiments**: Track different versions and configurations of your AI systems
-- **Observability**: Monitor your AI applications in production
+## Troubleshooting
 
-## Examples
+#### No traces after adding `require 'braintrust/setup'` to the Gemfile
 
-Check out the [`examples/`](./examples/) directory for complete working examples:
+First verify there are no errors in your logs after running with `BRAINTRUST_DEBUG=true` set.
 
-- [eval.rb](./examples/eval.rb) - Create and run evaluations with custom test cases and scoring functions
-- [trace.rb](./examples/trace.rb) - Manual span creation and tracing
-- [openai.rb](./examples/openai.rb) - Automatically trace OpenAI API calls
-- [alexrudall_openai.rb](./examples/alexrudall_openai.rb) - Automatically trace ruby-openai gem API calls
-- [contrib/anthropic/basic.rb](./examples/contrib/anthropic/basic.rb) - Automatically trace Anthropic API calls
-- [ruby_llm.rb](./examples/ruby_llm.rb) - Automatically trace RubyLLM API calls
-- [trace/trace_attachments.rb](./examples/trace/trace_attachments.rb) - Log attachments (images, PDFs) in traces
-- [eval/dataset.rb](./examples/eval/dataset.rb) - Run evaluations using datasets stored in Braintrust
-- [eval/remote_functions.rb](./examples/eval/remote_functions.rb) - Use remote scoring functions
+Your application needs the following for this to work:
 
-## Documentation
+```ruby
+require 'bundler/setup'
+Bundler.require
+```
 
-- [Braintrust Documentation](https://www.braintrust.dev/docs)
-- [API Documentation](https://gemdocs.org/gems/braintrust/)
+It is present by default in Rails applications, but may not be in other Sinatra, Rack or other applications.
+
+Alternatively, you can add `require 'braintrust/setup'` to your application initialization files.
 
 ## Contributing
 
-See [CONTRIBUTING.md](./CONTRIBUTING.md) for development setup and contribution guidelines.
+See [CONTRIBUTING.md](./CONTRIBUTING.md) for development setup and guidelines.
 
 ## License
 
-This project is licensed under the Apache License 2.0. See the [LICENSE](./LICENSE) file for details.
+Apache License 2.0 - see [LICENSE](./LICENSE).

From 16773446b39d594fd9a910caacf456ced6653d87 Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Thu, 8 Jan 2026 22:39:12 -0500
Subject: [PATCH 21/27] Added: Debug statements for auto-instrumentation

---
 lib/braintrust/contrib.rb                   | 16 +++++++++++--
 lib/braintrust/contrib/integration.rb       | 21 +++++++++++++++--
 lib/braintrust/contrib/setup.rb             | 25 ++++++++++++++-------
 lib/braintrust/setup.rb                     |  5 +++++
 test/braintrust/contrib/integration_test.rb |  2 +-
 5 files changed, 56 insertions(+), 13 deletions(-)

diff --git a/lib/braintrust/contrib.rb b/lib/braintrust/contrib.rb
index c81b36d..feef9c2 100644
--- a/lib/braintrust/contrib.rb
+++ b/lib/braintrust/contrib.rb
@@ -116,13 +116,25 @@ def init(tracer_provider: nil)
       # @example Exclude specific integrations
       #   Braintrust::Contrib.auto_instrument!(except: [:ruby_llm])
       def auto_instrument!(only: nil, except: nil)
+        if only || except
+          Braintrust::Log.debug("auto_instrument! called (only: #{only.inspect}, except: #{except.inspect})")
+        else
+          Braintrust::Log.debug("auto_instrument! called")
+        end
+
         targets = registry.available
+        Braintrust::Log.debug("auto_instrument! available: #{targets.map(&:integration_name).inspect}")
         targets = targets.select { |i| only.include?(i.integration_name) } if only
         targets = targets.reject { |i| except.include?(i.integration_name) } if except
 
-        targets.each_with_object([]) do |integration, instrumented|
-          instrumented << integration.integration_name if instrument!(integration.integration_name)
+        results = targets.each_with_object([]) do |integration, instrumented|
+          result = instrument!(integration.integration_name)
+          # Note: false means skipped (not applicable) or failed (error logged at lower level)
+          Braintrust::Log.debug("auto_instrument! :#{integration.integration_name} #{result ? "instrumented" : "skipped"}")
+          instrumented << integration.integration_name if result
         end
+        Braintrust::Log.debug("auto_instrument! complete: #{results.inspect}")
+        results
       end
 
       # Instrument a registered integration by name.
diff --git a/lib/braintrust/contrib/integration.rb b/lib/braintrust/contrib/integration.rb
index 1791f6f..e338cda 100644
--- a/lib/braintrust/contrib/integration.rb
+++ b/lib/braintrust/contrib/integration.rb
@@ -100,6 +100,12 @@ def patcher
         # @example Instance-level instrumentation (specific client)
         #   integration.instrument!(target: client, tracer_provider: my_provider)
         def instrument!(**options)
+          if options.empty?
+            Braintrust::Log.debug("#{integration_name}.instrument! called")
+          else
+            Braintrust::Log.debug("#{integration_name}.instrument! called (#{options.keys.join(", ")})")
+          end
+
           if options[:target]
             # Configure the target with provided options (exclude :target from context)
             context_options = options.except(:target)
@@ -117,7 +123,18 @@ def instrument!(**options)
         # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider
         # @return [Boolean] true if any patching succeeded or was already done
         def patch!(**options)
-          return false unless available? && loaded? && compatible?
+          unless available?
+            Braintrust::Log.debug("#{integration_name}.patch! skipped: gem not available")
+            return false
+          end
+          unless loaded?
+            Braintrust::Log.debug("#{integration_name}.patch! skipped: library not loaded")
+            return false
+          end
+          unless compatible?
+            Braintrust::Log.debug("#{integration_name}.patch! skipped: version not compatible")
+            return false
+          end
 
           # Try all applicable patchers
           success = false
@@ -129,7 +146,7 @@ def patch!(**options)
             success = true if patch.patch!(**options)
           end
 
-          Braintrust::Log.debug("No applicable patcher found for #{integration_name}") unless success
+          Braintrust::Log.debug("#{integration_name}.patch! skipped: no applicable patcher") unless success
           success
         end
 
diff --git a/lib/braintrust/contrib/setup.rb b/lib/braintrust/contrib/setup.rb
index 27ebd40..3fc0ad4 100644
--- a/lib/braintrust/contrib/setup.rb
+++ b/lib/braintrust/contrib/setup.rb
@@ -10,13 +10,18 @@ def run!
           return if @setup_complete
           @setup_complete = true
 
-          if Internal::Env.auto_instrument
-            # Set up deferred patching for libraries loaded later
-            if rails_environment?
-              setup_rails_hook!
-            else
-              setup_require_hook!
-            end
+          unless Internal::Env.auto_instrument
+            Braintrust::Log.debug("Contrib::Setup.run! auto-instrumentation disabled via environment")
+            return
+          end
+
+          # Set up deferred patching for libraries loaded later
+          if rails_environment?
+            Braintrust::Log.debug("Contrib::Setup.run! using Rails after_initialize hook")
+            setup_rails_hook!
+          else
+            Braintrust::Log.debug("Contrib::Setup.run! using require hook")
+            setup_require_hook!
           end
         end
 
@@ -47,7 +52,11 @@ def setup_require_hook!
                 Thread.current[:braintrust_in_require_hook] = true
 
                 # Check if any integration matches this require path
-                registry.integrations_for_require_path(path).each do |integration|
+                integrations = registry.integrations_for_require_path(path)
+                if integrations.any?
+                  Braintrust::Log.debug("require '#{path}' matched integration hook: #{integrations.map(&:integration_name).inspect}")
+                end
+                integrations.each do |integration|
                   next unless integration.available? && integration.compatible?
                   next if only && !only.include?(integration.integration_name)
                   next if except&.include?(integration.integration_name)
diff --git a/lib/braintrust/setup.rb b/lib/braintrust/setup.rb
index 5feb180..29482fb 100644
--- a/lib/braintrust/setup.rb
+++ b/lib/braintrust/setup.rb
@@ -27,6 +27,8 @@ def run!
 
         @setup_complete = true
 
+        Braintrust::Log.debug("Braintrust setting up...")
+
         # Initialize Braintrust (silent failure if no API key)
         begin
           Braintrust.init
@@ -36,10 +38,13 @@ def run!
 
         # Setup contrib for 3rd party integrations
         Contrib::Setup.run!
+
+        Braintrust::Log.debug("Braintrust setup complete. Auto-instrumentation enabled: #{Braintrust::Internal::Env.auto_instrument}")
       end
     end
   end
 end
 
 # Auto-setup when required
+
 Braintrust::Setup.run!
diff --git a/test/braintrust/contrib/integration_test.rb b/test/braintrust/contrib/integration_test.rb
index 40524d6..c344f9f 100644
--- a/test/braintrust/contrib/integration_test.rb
+++ b/test/braintrust/contrib/integration_test.rb
@@ -560,7 +560,7 @@ def self.patchers
     begin
       integration.patch!
       # Check that the "no applicable patcher" message was logged
-      assert captured_logs.any? { |msg| msg.include?("No applicable patcher found") }
+      assert captured_logs.any? { |msg| msg.include?("no applicable patcher") }
     ensure
       Braintrust::Log.logger = original_logger
     end

From 6b5b85d7018d6694abb7c4db6086c85220349107 Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Fri, 9 Jan 2026 13:50:50 -0500
Subject: [PATCH 22/27] Refactored: Moved most examples to internal, left
 simplified user examples

---
 README.md                                     | 10 ++--
 examples/contrib/anthropic.rb                 | 46 ++++++++++++++++++
 examples/contrib/openai.rb                    | 46 ++++++++++++++++++
 examples/contrib/ruby-openai.rb               | 47 +++++++++++++++++++
 examples/contrib/ruby_llm.rb                  | 40 ++++++++++++++++
 .../{ => internal}/contrib/anthropic/basic.rb |  4 +-
 .../contrib/anthropic/deprecated.rb           |  4 +-
 .../contrib/anthropic/instance.rb             |  4 +-
 .../contrib/anthropic/streaming.rb            |  4 +-
 .../{ => internal}/contrib/openai/basic.rb    |  6 +--
 .../contrib/openai/deprecated.rb              |  6 +--
 .../{ => internal}/contrib/openai/instance.rb |  4 +-
 .../contrib/openai/streaming.rb               |  4 +-
 .../{ => internal}/contrib/ruby_llm/basic.rb  |  4 +-
 .../contrib/ruby_llm/instance.rb              |  4 +-
 .../contrib/ruby_llm/streaming.rb             |  4 +-
 .../contrib/ruby_llm/tool_usage.rb            |  8 ++--
 .../contrib/ruby_openai/basic.rb              |  6 +--
 .../contrib/ruby_openai/deprecated.rb         | 10 ++--
 .../contrib/ruby_openai/instance.rb           |  4 +-
 .../contrib/ruby_openai/streaming.rb          |  4 +-
 21 files changed, 224 insertions(+), 45 deletions(-)
 create mode 100644 examples/contrib/anthropic.rb
 create mode 100644 examples/contrib/openai.rb
 create mode 100644 examples/contrib/ruby-openai.rb
 create mode 100644 examples/contrib/ruby_llm.rb
 rename examples/{ => internal}/contrib/anthropic/basic.rb (90%)
 rename examples/{ => internal}/contrib/anthropic/deprecated.rb (91%)
 rename examples/{ => internal}/contrib/anthropic/instance.rb (89%)
 rename examples/{ => internal}/contrib/anthropic/streaming.rb (91%)
 rename examples/{ => internal}/contrib/openai/basic.rb (87%)
 rename examples/{ => internal}/contrib/openai/deprecated.rb (87%)
 rename examples/{ => internal}/contrib/openai/instance.rb (90%)
 rename examples/{ => internal}/contrib/openai/streaming.rb (92%)
 rename examples/{ => internal}/contrib/ruby_llm/basic.rb (88%)
 rename examples/{ => internal}/contrib/ruby_llm/instance.rb (91%)
 rename examples/{ => internal}/contrib/ruby_llm/streaming.rb (88%)
 rename examples/{ => internal}/contrib/ruby_llm/tool_usage.rb (98%)
 rename examples/{ => internal}/contrib/ruby_openai/basic.rb (90%)
 rename examples/{ => internal}/contrib/ruby_openai/deprecated.rb (83%)
 rename examples/{ => internal}/contrib/ruby_openai/instance.rb (92%)
 rename examples/{ => internal}/contrib/ruby_openai/streaming.rb (92%)

diff --git a/README.md b/README.md
index 8fdb51a..6b641d4 100644
--- a/README.md
+++ b/README.md
@@ -149,10 +149,10 @@ The SDK automatically instruments these LLM libraries:
 
 | Provider  | Gem           | Versions | Integration Name | Examples                                        |
 | --------- | ------------- | -------- | ---------------- | ----------------------------------------------- |
-| Anthropic | `anthropic`   | >= 0.3.0 | `:anthropic`     | [Link](./examples/contrib/anthropic/basic.rb)   |
-| OpenAI    | `openai`      | >= 0.1.0 | `:openai`        | [Link](./examples/contrib/openai/basic.rb)      |
-|           | `ruby-openai` | >= 7.0.0 | `:ruby_openai`   | [Link](./examples/contrib/ruby_openai/basic.rb) |
-| Multiple  | `ruby_llm`    | >= 1.8.0 | `:ruby_llm`      | [Link](./examples/contrib/ruby_llm/basic.rb)    |
+| Anthropic | `anthropic`   | >= 0.3.0 | `:anthropic`     | [Link](./examples/contrib/anthropic.rb)   |
+| OpenAI    | `openai`      | >= 0.1.0 | `:openai`        | [Link](./examples/contrib/openai.rb)      |
+|           | `ruby-openai` | >= 7.0.0 | `:ruby_openai`   | [Link](./examples/contrib/ruby-openai.rb) |
+| Multiple  | `ruby_llm`    | >= 1.8.0 | `:ruby_llm`      | [Link](./examples/contrib/ruby_llm.rb)    |
 
 ### Manually applying instrumentation
 
@@ -305,7 +305,7 @@ require 'bundler/setup'
 Bundler.require
 ```
 
-It is present by default in Rails applications, but may not be in other Sinatra, Rack or other applications.
+It is present by default in Rails applications, but may not be in Sinatra, Rack, or other applications.
 
 Alternatively, you can add `require 'braintrust/setup'` to your application initialization files.
 
diff --git a/examples/contrib/anthropic.rb b/examples/contrib/anthropic.rb
new file mode 100644
index 0000000..4f2dd1f
--- /dev/null
+++ b/examples/contrib/anthropic.rb
@@ -0,0 +1,46 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "anthropic"
+require "opentelemetry/sdk"
+
+# Usage:
+#   ANTHROPIC_API_KEY=your-key bundle exec appraisal anthropic ruby examples/contrib/anthropic.rb
+
+# Check for API keys
+unless ENV["ANTHROPIC_API_KEY"]
+  puts "Error: ANTHROPIC_API_KEY environment variable is required"
+  puts "Get your API key from: https://console.anthropic.com/"
+  exit 1
+end
+
+# Initialize Braintrust (with blocking login)
+#
+# NOTE: blocking_login is only necessary for this short-lived example.
+#       In most production apps, you can omit this.
+Braintrust.init(blocking_login: true)
+
+# Create Anthropic client
+client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
+
+# Make a message request (automatically traced!)
+client.messages.create(
+  model: "claude-3-haiku-20240307",
+  max_tokens: 100,
+  system: "You are a helpful assistant.",
+  messages: [
+    {role: "user", content: "Say hello and tell me a short joke."}
+  ]
+)
+
+# Print permalink to view this trace in Braintrust
+puts "\n View this trace in Braintrust:"
+puts "  #{Braintrust::Trace.permalink(root_span)}"
+
+# Shutdown to flush spans to Braintrust
+#
+# NOTE: shutdown is only necessary for this short-lived example.
+#       In most production apps, you can omit this.
+OpenTelemetry.tracer_provider.shutdown
diff --git a/examples/contrib/openai.rb b/examples/contrib/openai.rb
new file mode 100644
index 0000000..dbaa27d
--- /dev/null
+++ b/examples/contrib/openai.rb
@@ -0,0 +1,46 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "openai"
+require "opentelemetry/sdk"
+
+# Usage:
+#   OPENAI_API_KEY=your-openai-key bundle exec appraisal openai ruby examples/contrib/openai.rb
+
+# Check for API keys
+unless ENV["OPENAI_API_KEY"]
+  puts "Error: OPENAI_API_KEY environment variable is required"
+  puts "Get your API key from: https://platform.openai.com/api-keys"
+  exit 1
+end
+
+# Initialize Braintrust (with blocking login)
+#
+# NOTE: blocking_login is only necessary for this short-lived example.
+#       In most production apps, you can omit this.
+Braintrust.init(blocking_login: true)
+
+# Create OpenAI client
+client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"])
+
+# Make a chat completion request (automatically traced!)
+client.chat.completions.create(
+  messages: [
+    {role: "system", content: "You are a helpful assistant."},
+    {role: "user", content: "Say hello and tell me a short joke."}
+  ],
+  model: "gpt-4o-mini",
+  max_tokens: 100
+)
+
+# Print permalink to view this trace in Braintrust
+puts "\n View this trace in Braintrust:"
+puts "  #{Braintrust::Trace.permalink(root_span)}"
+
+# Shutdown to flush spans to Braintrust
+#
+# NOTE: shutdown is only necessary for this short-lived example.
+#       In most production apps, you can omit this.
+OpenTelemetry.tracer_provider.shutdown
diff --git a/examples/contrib/ruby-openai.rb b/examples/contrib/ruby-openai.rb
new file mode 100644
index 0000000..01c7ba7
--- /dev/null
+++ b/examples/contrib/ruby-openai.rb
@@ -0,0 +1,47 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "openai"
+require "opentelemetry/sdk"
+
+# Usage:
+#   OPENAI_API_KEY=your-openai-key bundle exec appraisal ruby-openai examples/internal/contrib/ruby_openai/basic.rb
+
+# Check for API keys
+unless ENV["OPENAI_API_KEY"]
+  puts "Error: OPENAI_API_KEY environment variable is required"
+  puts "Get your API key from: https://platform.openai.com/api-keys"
+  exit 1
+end
+
+# Initialize Braintrust (with blocking login)
+#
+# NOTE: blocking_login is only necessary for this short-lived example.
+#       In most production apps, you can omit this.
+Braintrust.init(blocking_login: true)
+
+# Create OpenAI client
+client = OpenAI::Client.new(access_token: ENV["OPENAI_API_KEY"])
+
+client.chat(
+  parameters: {
+    model: "gpt-4o-mini",
+    messages: [
+      {role: "system", content: "You are a helpful assistant."},
+      {role: "user", content: "Say hello and tell me a short joke."}
+    ],
+    max_tokens: 100
+  }
+)
+
+# Print permalink to view this trace in Braintrust
+puts "\n View this trace in Braintrust:"
+puts "  #{Braintrust::Trace.permalink(root_span)}"
+
+# Shutdown to flush spans to Braintrust
+#
+# NOTE: shutdown is only necessary for this short-lived example.
+#       In most production apps, you can omit this.
+OpenTelemetry.tracer_provider.shutdown
diff --git a/examples/contrib/ruby_llm.rb b/examples/contrib/ruby_llm.rb
new file mode 100644
index 0000000..02acf0f
--- /dev/null
+++ b/examples/contrib/ruby_llm.rb
@@ -0,0 +1,40 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "ruby_llm"
+require "opentelemetry/sdk"
+
+# Example: Basic RubyLLM chat with Braintrust tracing
+#
+# Usage:
+#   OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/contrib/ruby_llm.rb
+
+unless ENV["OPENAI_API_KEY"]
+  puts "Error: OPENAI_API_KEY environment variable is required"
+  exit 1
+end
+
+# Initialize Braintrust (with blocking login)
+#
+# NOTE: blocking_login is only necessary for this short-lived example.
+#       In most production apps, you can omit this.
+Braintrust.init(blocking_login: true)
+
+RubyLLM.configure do |config|
+  config.openai_api_key = ENV["OPENAI_API_KEY"]
+end
+
+chat = RubyLLM.chat(model: "gpt-4o-mini")
+chat.ask("What is the capital of France?")
+
+# Print permalink to view this trace in Braintrust
+puts "\n View this trace in Braintrust:"
+puts "  #{Braintrust::Trace.permalink(root_span)}"
+
+# Shutdown to flush spans to Braintrust
+#
+# NOTE: shutdown is only necessary for this short-lived example.
+#       In most production apps, you can omit this.
+OpenTelemetry.tracer_provider.shutdown
diff --git a/examples/contrib/anthropic/basic.rb b/examples/internal/contrib/anthropic/basic.rb
similarity index 90%
rename from examples/contrib/anthropic/basic.rb
rename to examples/internal/contrib/anthropic/basic.rb
index b453b0b..d51e98e 100644
--- a/examples/contrib/anthropic/basic.rb
+++ b/examples/internal/contrib/anthropic/basic.rb
@@ -11,7 +11,7 @@
 # This example demonstrates how to automatically trace Anthropic API calls with Braintrust.
 #
 # Usage:
-#   ANTHROPIC_API_KEY=your-key bundle exec ruby examples/contrib/anthropic/basic.rb
+#   ANTHROPIC_API_KEY=your-key bundle exec ruby examples/internal/contrib/anthropic/basic.rb
 
 # Check for API keys
 unless ENV["ANTHROPIC_API_KEY"]
@@ -33,7 +33,7 @@
 
 # Make a message request (automatically traced!)
 puts "Sending message request to Anthropic..."
-message = tracer.in_span("examples/contrib/anthropic/basic.rb") do |span|
+message = tracer.in_span("examples/internal/contrib/anthropic/basic.rb") do |span|
   root_span = span
 
   client.messages.create(
diff --git a/examples/contrib/anthropic/deprecated.rb b/examples/internal/contrib/anthropic/deprecated.rb
similarity index 91%
rename from examples/contrib/anthropic/deprecated.rb
rename to examples/internal/contrib/anthropic/deprecated.rb
index d1761e6..341527a 100644
--- a/examples/contrib/anthropic/deprecated.rb
+++ b/examples/internal/contrib/anthropic/deprecated.rb
@@ -13,7 +13,7 @@
 # For new code, use `Braintrust.instrument!(:anthropic)` instead.
 #
 # Usage:
-#   ANTHROPIC_API_KEY=your-key bundle exec ruby examples/contrib/anthropic/deprecated.rb
+#   ANTHROPIC_API_KEY=your-key bundle exec ruby examples/internal/contrib/anthropic/deprecated.rb
 
 # Check for API keys
 unless ENV["ANTHROPIC_API_KEY"]
@@ -37,7 +37,7 @@
 
 # Make a message request (automatically traced!)
 puts "Sending message request to Anthropic..."
-message = tracer.in_span("examples/contrib/anthropic/deprecated.rb") do |span|
+message = tracer.in_span("examples/internal/contrib/anthropic/deprecated.rb") do |span|
   root_span = span
 
   client.messages.create(
diff --git a/examples/contrib/anthropic/instance.rb b/examples/internal/contrib/anthropic/instance.rb
similarity index 89%
rename from examples/contrib/anthropic/instance.rb
rename to examples/internal/contrib/anthropic/instance.rb
index 8b49458..6929e60 100644
--- a/examples/contrib/anthropic/instance.rb
+++ b/examples/internal/contrib/anthropic/instance.rb
@@ -12,7 +12,7 @@
 # all Anthropic clients globally.
 #
 # Usage:
-#   ANTHROPIC_API_KEY=your-key bundle exec ruby examples/contrib/anthropic/instance.rb
+#   ANTHROPIC_API_KEY=your-key bundle exec ruby examples/internal/contrib/anthropic/instance.rb
 
 unless ENV["ANTHROPIC_API_KEY"]
   puts "Error: ANTHROPIC_API_KEY environment variable is required"
@@ -31,7 +31,7 @@
 tracer = OpenTelemetry.tracer_provider.tracer("anthropic-example")
 root_span = nil
 
-tracer.in_span("examples/contrib/anthropic/instance.rb") do |span|
+tracer.in_span("examples/internal/contrib/anthropic/instance.rb") do |span|
   root_span = span
 
   puts "Calling traced client..."
diff --git a/examples/contrib/anthropic/streaming.rb b/examples/internal/contrib/anthropic/streaming.rb
similarity index 91%
rename from examples/contrib/anthropic/streaming.rb
rename to examples/internal/contrib/anthropic/streaming.rb
index ea42074..ac70daf 100644
--- a/examples/contrib/anthropic/streaming.rb
+++ b/examples/internal/contrib/anthropic/streaming.rb
@@ -14,7 +14,7 @@
 #   2. Text-only streaming with .text (simplified)
 #
 # Usage:
-#   ANTHROPIC_API_KEY=your-key bundle exec ruby examples/contrib/anthropic/streaming.rb
+#   ANTHROPIC_API_KEY=your-key bundle exec ruby examples/internal/contrib/anthropic/streaming.rb
 
 unless ENV["ANTHROPIC_API_KEY"]
   puts "Error: ANTHROPIC_API_KEY environment variable is required"
@@ -29,7 +29,7 @@
 tracer = OpenTelemetry.tracer_provider.tracer("anthropic-streaming-example")
 root_span = nil
 
-tracer.in_span("examples/contrib/anthropic/streaming.rb") do |span|
+tracer.in_span("examples/internal/contrib/anthropic/streaming.rb") do |span|
   root_span = span
 
   # Pattern 1: Iterator-based streaming with .each
diff --git a/examples/contrib/openai/basic.rb b/examples/internal/contrib/openai/basic.rb
similarity index 87%
rename from examples/contrib/openai/basic.rb
rename to examples/internal/contrib/openai/basic.rb
index d32371b..5c1ad0e 100644
--- a/examples/contrib/openai/basic.rb
+++ b/examples/internal/contrib/openai/basic.rb
@@ -12,10 +12,10 @@
 #
 # Note: The openai gem is a development dependency. To run this example:
 #   1. Install dependencies: bundle install
-#   2. Run from the SDK root: bundle exec ruby examples/contrib/openai/basic.rb
+#   2. Run from the SDK root: bundle exec ruby examples/internal/contrib/openai/basic.rb
 #
 # Usage:
-#   OPENAI_API_KEY=your-openai-key bundle exec ruby examples/contrib/openai/basic.rb
+#   OPENAI_API_KEY=your-openai-key bundle exec ruby examples/internal/contrib/openai/basic.rb
 
 # Check for API keys
 unless ENV["OPENAI_API_KEY"]
@@ -37,7 +37,7 @@
 
 # Make a chat completion request (automatically traced!)
 puts "Sending chat completion request to OpenAI..."
-response = tracer.in_span("examples/contrib/openai/basic.rb") do |span|
+response = tracer.in_span("examples/internal/contrib/openai/basic.rb") do |span|
   root_span = span
 
   client.chat.completions.create(
diff --git a/examples/contrib/openai/deprecated.rb b/examples/internal/contrib/openai/deprecated.rb
similarity index 87%
rename from examples/contrib/openai/deprecated.rb
rename to examples/internal/contrib/openai/deprecated.rb
index 872242f..c2e6fad 100644
--- a/examples/contrib/openai/deprecated.rb
+++ b/examples/internal/contrib/openai/deprecated.rb
@@ -12,10 +12,10 @@
 #
 # Note: The openai gem is a development dependency. To run this example:
 #   1. Install dependencies: bundle install
-#   2. Run from the SDK root: bundle exec ruby examples/contrib/openai/deprecated.rb
+#   2. Run from the SDK root: bundle exec ruby examples/internal/contrib/openai/deprecated.rb
 #
 # Usage:
-#   OPENAI_API_KEY=your-openai-key bundle exec ruby examples/contrib/openai/deprecated.rb
+#   OPENAI_API_KEY=your-openai-key bundle exec ruby examples/internal/contrib/openai/deprecated.rb
 
 # Check for API keys
 unless ENV["OPENAI_API_KEY"]
@@ -39,7 +39,7 @@
 
 # Make a chat completion request (automatically traced!)
 puts "Sending chat completion request to OpenAI..."
-response = tracer.in_span("examples/contrib/openai/deprecated.rb") do |span|
+response = tracer.in_span("examples/internal/contrib/openai/deprecated.rb") do |span|
   root_span = span
 
   client.chat.completions.create(
diff --git a/examples/contrib/openai/instance.rb b/examples/internal/contrib/openai/instance.rb
similarity index 90%
rename from examples/contrib/openai/instance.rb
rename to examples/internal/contrib/openai/instance.rb
index 6516557..396b765 100644
--- a/examples/contrib/openai/instance.rb
+++ b/examples/internal/contrib/openai/instance.rb
@@ -12,7 +12,7 @@
 # all OpenAI clients globally.
 #
 # Usage:
-#   OPENAI_API_KEY=your-key bundle exec ruby examples/contrib/openai/instance.rb
+#   OPENAI_API_KEY=your-key bundle exec ruby examples/internal/contrib/openai/instance.rb
 
 unless ENV["OPENAI_API_KEY"]
   puts "Error: OPENAI_API_KEY environment variable is required"
@@ -31,7 +31,7 @@
 tracer = OpenTelemetry.tracer_provider.tracer("openai-example")
 root_span = nil
 
-tracer.in_span("examples/contrib/openai/instance.rb") do |span|
+tracer.in_span("examples/internal/contrib/openai/instance.rb") do |span|
   root_span = span
 
   puts "Calling traced client..."
diff --git a/examples/contrib/openai/streaming.rb b/examples/internal/contrib/openai/streaming.rb
similarity index 92%
rename from examples/contrib/openai/streaming.rb
rename to examples/internal/contrib/openai/streaming.rb
index 5c4dd89..0dcdc86 100644
--- a/examples/contrib/openai/streaming.rb
+++ b/examples/internal/contrib/openai/streaming.rb
@@ -14,7 +14,7 @@
 #   2. Responses API with stream
 #
 # Usage:
-#   OPENAI_API_KEY=your-key bundle exec ruby examples/contrib/openai/streaming.rb
+#   OPENAI_API_KEY=your-key bundle exec ruby examples/internal/contrib/openai/streaming.rb
 
 unless ENV["OPENAI_API_KEY"]
   puts "Error: OPENAI_API_KEY environment variable is required"
@@ -29,7 +29,7 @@
 tracer = OpenTelemetry.tracer_provider.tracer("openai-streaming-example")
 root_span = nil
 
-tracer.in_span("examples/contrib/openai/streaming.rb") do |span|
+tracer.in_span("examples/internal/contrib/openai/streaming.rb") do |span|
   root_span = span
 
   # Pattern 1: Chat completions streaming with stream_raw
diff --git a/examples/contrib/ruby_llm/basic.rb b/examples/internal/contrib/ruby_llm/basic.rb
similarity index 88%
rename from examples/contrib/ruby_llm/basic.rb
rename to examples/internal/contrib/ruby_llm/basic.rb
index cc35fbb..ce18061 100644
--- a/examples/contrib/ruby_llm/basic.rb
+++ b/examples/internal/contrib/ruby_llm/basic.rb
@@ -9,7 +9,7 @@
 # Example: Basic RubyLLM chat with Braintrust tracing
 #
 # Usage:
-#   OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/contrib/ruby_llm/basic.rb
+#   OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/internal/contrib/ruby_llm/basic.rb
 
 unless ENV["OPENAI_API_KEY"]
   puts "Error: OPENAI_API_KEY environment variable is required"
@@ -27,7 +27,7 @@
 root_span = nil
 
 puts "Sending chat request..."
-response = tracer.in_span("examples/contrib/ruby_llm/basic.rb") do |span|
+response = tracer.in_span("examples/internal/contrib/ruby_llm/basic.rb") do |span|
   root_span = span
   chat = RubyLLM.chat(model: "gpt-4o-mini")
   chat.ask("What is the capital of France?")
diff --git a/examples/contrib/ruby_llm/instance.rb b/examples/internal/contrib/ruby_llm/instance.rb
similarity index 91%
rename from examples/contrib/ruby_llm/instance.rb
rename to examples/internal/contrib/ruby_llm/instance.rb
index 5feeb52..2dae854 100644
--- a/examples/contrib/ruby_llm/instance.rb
+++ b/examples/internal/contrib/ruby_llm/instance.rb
@@ -12,7 +12,7 @@
 # all RubyLLM chats globally.
 #
 # Usage:
-#   OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/contrib/ruby_llm/instance.rb
+#   OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/internal/contrib/ruby_llm/instance.rb
 
 unless ENV["OPENAI_API_KEY"]
   puts "Error: OPENAI_API_KEY environment variable is required"
@@ -35,7 +35,7 @@
 tracer = OpenTelemetry.tracer_provider.tracer("ruby_llm-example")
 root_span = nil
 
-tracer.in_span("examples/contrib/ruby_llm/instance.rb") do |span|
+tracer.in_span("examples/internal/contrib/ruby_llm/instance.rb") do |span|
   root_span = span
 
   puts "Calling traced chat..."
diff --git a/examples/contrib/ruby_llm/streaming.rb b/examples/internal/contrib/ruby_llm/streaming.rb
similarity index 88%
rename from examples/contrib/ruby_llm/streaming.rb
rename to examples/internal/contrib/ruby_llm/streaming.rb
index 179064f..47d504a 100644
--- a/examples/contrib/ruby_llm/streaming.rb
+++ b/examples/internal/contrib/ruby_llm/streaming.rb
@@ -9,7 +9,7 @@
 # Example: Streaming RubyLLM chat with Braintrust tracing
 #
 # Usage:
-#   OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/contrib/ruby_llm/streaming.rb
+#   OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/internal/contrib/ruby_llm/streaming.rb
 
 unless ENV["OPENAI_API_KEY"]
   puts "Error: OPENAI_API_KEY environment variable is required"
@@ -29,7 +29,7 @@
 puts "Streaming response..."
 print "\nAssistant: "
 
-tracer.in_span("examples/contrib/ruby_llm/streaming.rb") do |span|
+tracer.in_span("examples/internal/contrib/ruby_llm/streaming.rb") do |span|
   root_span = span
   chat = RubyLLM.chat(model: "gpt-4o-mini")
   chat.ask("Write a haiku about programming") do |chunk|
diff --git a/examples/contrib/ruby_llm/tool_usage.rb b/examples/internal/contrib/ruby_llm/tool_usage.rb
similarity index 98%
rename from examples/contrib/ruby_llm/tool_usage.rb
rename to examples/internal/contrib/ruby_llm/tool_usage.rb
index fa154ea..01e2c00 100644
--- a/examples/contrib/ruby_llm/tool_usage.rb
+++ b/examples/internal/contrib/ruby_llm/tool_usage.rb
@@ -16,7 +16,7 @@
 # - Multi-turn conversation with multiple tool invocations
 #
 # Trace hierarchy will look like:
-#   examples/contrib/ruby_llm/tool_usage.rb
+#   examples/internal/contrib/ruby_llm/tool_usage.rb
 #   └── ruby_llm.chat
 #       ├── ruby_llm.tool.search_flights
 #       │   └── flight_api.search (internal span)
@@ -27,7 +27,7 @@
 #           └── events_api.search (internal span)
 #
 # Usage:
-#   OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/contrib/ruby_llm/tool_usage.rb
+#   OPENAI_API_KEY=your-key bundle exec appraisal ruby_llm ruby examples/internal/contrib/ruby_llm/tool_usage.rb
 
 unless ENV["OPENAI_API_KEY"]
   puts "Error: OPENAI_API_KEY environment variable is required"
@@ -319,7 +319,7 @@ def execute(city:, travel_dates:, interests: "food,art")
 puts "Watch the trace to see how tool calls create hierarchical spans."
 puts
 
-response = tracer.in_span("examples/contrib/ruby_llm/tool_usage.rb") do |span|
+response = tracer.in_span("examples/internal/contrib/ruby_llm/tool_usage.rb") do |span|
   root_span = span
 
   chat = RubyLLM.chat(model: "gpt-4o-mini")
@@ -357,7 +357,7 @@ def execute(city:, travel_dates:, interests: "food,art")
 puts "View the full trace: #{Braintrust::Trace.permalink(root_span)}"
 puts
 puts "Expected span hierarchy:"
-puts "  └── examples/contrib/ruby_llm/tool_usage.rb"
+puts "  └── examples/internal/contrib/ruby_llm/tool_usage.rb"
 puts "      └── ruby_llm.chat"
 puts "          ├── ruby_llm.tool.search_flights"
 puts "          │   └── flight_api.search"
diff --git a/examples/contrib/ruby_openai/basic.rb b/examples/internal/contrib/ruby_openai/basic.rb
similarity index 90%
rename from examples/contrib/ruby_openai/basic.rb
rename to examples/internal/contrib/ruby_openai/basic.rb
index 8d58f4f..a6179c7 100644
--- a/examples/contrib/ruby_openai/basic.rb
+++ b/examples/internal/contrib/ruby_openai/basic.rb
@@ -14,10 +14,10 @@
 #
 # Note: ruby-openai is an optional development dependency. To run this example:
 #   1. Install ruby-openai: gem install ruby-openai
-#   2. Run from the SDK root: ruby examples/contrib/ruby_openai/basic.rb
+#   2. Run from the SDK root: ruby examples/internal/contrib/ruby_openai/basic.rb
 #
 # Usage:
-#   OPENAI_API_KEY=your-openai-key ruby examples/contrib/ruby_openai/basic.rb
+#   OPENAI_API_KEY=your-openai-key ruby examples/internal/contrib/ruby_openai/basic.rb
 
 # Check for API keys
 unless ENV["OPENAI_API_KEY"]
@@ -40,7 +40,7 @@
 
 # Make a chat completion request (automatically traced!)
 puts "Sending chat completion request to OpenAI (using ruby-openai gem)..."
-response = tracer.in_span("examples/contrib/ruby_openai/basic.rb") do |span|
+response = tracer.in_span("examples/internal/contrib/ruby_openai/basic.rb") do |span|
   root_span = span
 
   # ruby-openai uses: client.chat(parameters: {...})
diff --git a/examples/contrib/ruby_openai/deprecated.rb b/examples/internal/contrib/ruby_openai/deprecated.rb
similarity index 83%
rename from examples/contrib/ruby_openai/deprecated.rb
rename to examples/internal/contrib/ruby_openai/deprecated.rb
index 370208d..0e1225d 100755
--- a/examples/contrib/ruby_openai/deprecated.rb
+++ b/examples/internal/contrib/ruby_openai/deprecated.rb
@@ -10,18 +10,18 @@
 #
 # DEPRECATED: This example uses the old wrap() API which is deprecated.
 # Please use the new instrument!() API instead:
-#   - examples/contrib/ruby_openai/basic.rb - Class-level instrumentation (all clients)
-#   - examples/contrib/ruby_openai/instance.rb - Instance-level instrumentation (specific client)
+#   - examples/internal/contrib/ruby_openai/basic.rb - Class-level instrumentation (all clients)
+#   - examples/internal/contrib/ruby_openai/instance.rb - Instance-level instrumentation (specific client)
 #
 # This example demonstrates how to automatically trace OpenAI API calls using
 # the ruby-openai gem (by alexrudall) with Braintrust.
 #
 # Note: ruby-openai is an optional development dependency. To run this example:
 #   1. Install ruby-openai: gem install ruby-openai
-#   2. Run from the SDK root: ruby examples/contrib/ruby_openai/deprecated.rb
+#   2. Run from the SDK root: ruby examples/internal/contrib/ruby_openai/deprecated.rb
 #
 # Usage:
-#   OPENAI_API_KEY=your-openai-key ruby examples/contrib/ruby_openai/deprecated.rb
+#   OPENAI_API_KEY=your-openai-key ruby examples/internal/contrib/ruby_openai/deprecated.rb
 
 # Check for API keys
 unless ENV["OPENAI_API_KEY"]
@@ -44,7 +44,7 @@
 
 # Make a chat completion request (automatically traced!)
 puts "Sending chat completion request to OpenAI (using ruby-openai gem)..."
-response = tracer.in_span("examples/contrib/ruby_openai/deprecated.rb") do |span|
+response = tracer.in_span("examples/internal/contrib/ruby_openai/deprecated.rb") do |span|
   root_span = span
 
   # ruby-openai uses: client.chat(parameters: {...})
diff --git a/examples/contrib/ruby_openai/instance.rb b/examples/internal/contrib/ruby_openai/instance.rb
similarity index 92%
rename from examples/contrib/ruby_openai/instance.rb
rename to examples/internal/contrib/ruby_openai/instance.rb
index 97ce7ab..c5c78ab 100644
--- a/examples/contrib/ruby_openai/instance.rb
+++ b/examples/internal/contrib/ruby_openai/instance.rb
@@ -12,7 +12,7 @@
 # all OpenAI clients globally.
 #
 # Usage:
-#   OPENAI_API_KEY=your-key bundle exec appraisal ruby_openai ruby examples/contrib/ruby_openai/instance.rb
+#   OPENAI_API_KEY=your-key bundle exec appraisal ruby_openai ruby examples/internal/contrib/ruby_openai/instance.rb
 
 unless ENV["OPENAI_API_KEY"]
   puts "Error: OPENAI_API_KEY environment variable is required"
@@ -31,7 +31,7 @@
 tracer = OpenTelemetry.tracer_provider.tracer("ruby-openai-example")
 root_span = nil
 
-tracer.in_span("examples/contrib/ruby_openai/instance.rb") do |span|
+tracer.in_span("examples/internal/contrib/ruby_openai/instance.rb") do |span|
   root_span = span
 
   puts "Calling traced client..."
diff --git a/examples/contrib/ruby_openai/streaming.rb b/examples/internal/contrib/ruby_openai/streaming.rb
similarity index 92%
rename from examples/contrib/ruby_openai/streaming.rb
rename to examples/internal/contrib/ruby_openai/streaming.rb
index b958744..88b7a7c 100644
--- a/examples/contrib/ruby_openai/streaming.rb
+++ b/examples/internal/contrib/ruby_openai/streaming.rb
@@ -12,7 +12,7 @@
 # using the ruby-openai gem. Shows the callback-based streaming pattern.
 #
 # Usage:
-#   OPENAI_API_KEY=your-key bundle exec ruby examples/contrib/ruby_openai/streaming.rb
+#   OPENAI_API_KEY=your-key bundle exec ruby examples/internal/contrib/ruby_openai/streaming.rb
 
 unless ENV["OPENAI_API_KEY"]
   puts "Error: OPENAI_API_KEY environment variable is required"
@@ -28,7 +28,7 @@
 tracer = OpenTelemetry.tracer_provider.tracer("ruby-openai-streaming-example")
 root_span = nil
 
-tracer.in_span("examples/contrib/ruby_openai/streaming.rb") do |span|
+tracer.in_span("examples/internal/contrib/ruby_openai/streaming.rb") do |span|
   root_span = span
 
   # Pattern 1: Chat streaming with callback proc

From db6cc9f4e54f5fd222550c78b0c80d909649a6a3 Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Fri, 9 Jan 2026 18:14:38 -0500
Subject: [PATCH 23/27] Fixed: Missing darwin arch in Gemfile.lock

---
 Gemfile.lock | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Gemfile.lock b/Gemfile.lock
index d2ff082..1b9cd16 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -24,6 +24,9 @@ GEM
       bigdecimal
       rexml
     docile (1.4.1)
+    google-protobuf (4.33.2-arm64-darwin)
+      bigdecimal
+      rake (>= 13)
     google-protobuf (4.33.2-x86_64-linux-gnu)
       bigdecimal
       rake (>= 13)
@@ -122,6 +125,7 @@ GEM
     yard (0.9.38)
 
 PLATFORMS
+  arm64-darwin
   x86_64-linux
 
 DEPENDENCIES

From f97127a8aa3de77c133edce1ad8cd801f191a363 Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Fri, 9 Jan 2026 18:55:48 -0500
Subject: [PATCH 24/27] Changed: Make Internal::Time tests less flaky

---
 test/braintrust/internal/time_test.rb | 67 +++------------------------
 1 file changed, 7 insertions(+), 60 deletions(-)

diff --git a/test/braintrust/internal/time_test.rb b/test/braintrust/internal/time_test.rb
index d049149..9c1257b 100644
--- a/test/braintrust/internal/time_test.rb
+++ b/test/braintrust/internal/time_test.rb
@@ -8,12 +8,11 @@ class Braintrust::Internal::TimeTest < Minitest::Test
 
   # --- measure with block ---
 
-  def test_measure_with_block_returns_elapsed_time
-    elapsed = Time.measure { sleep 0.01 }
+  def test_measure_with_block_returns_float
+    elapsed = Time.measure { 1 + 1 }
 
     assert_kind_of Float, elapsed
-    assert elapsed >= 0.01, "Expected elapsed >= 0.01, got #{elapsed}"
-    assert elapsed < 0.1, "Expected elapsed < 0.1, got #{elapsed}"
+    assert elapsed >= 0
   end
 
   def test_measure_with_block_executes_block
@@ -32,72 +31,20 @@ def test_measure_with_block_returns_time_not_block_result
 
   # --- measure without arguments ---
 
-  def test_measure_without_args_returns_monotonic_time
-    time1 = Time.measure
-    time2 = Time.measure
-
-    assert_kind_of Float, time1
-    assert_kind_of Float, time2
-    assert time2 >= time1, "Monotonic time should not go backwards"
-  end
-
-  def test_measure_without_args_returns_positive_value
+  def test_measure_without_args_returns_float
     time = Time.measure
 
+    assert_kind_of Float, time
     assert time > 0, "Monotonic time should be positive"
   end
 
   # --- measure with start_time ---
 
-  def test_measure_with_start_time_returns_elapsed
+  def test_measure_with_start_time_returns_float
     start = Time.measure
-    sleep 0.01
     elapsed = Time.measure(start)
 
     assert_kind_of Float, elapsed
-    assert elapsed >= 0.01, "Expected elapsed >= 0.01, got #{elapsed}"
-    assert elapsed < 0.1, "Expected elapsed < 0.1, got #{elapsed}"
-  end
-
-  def test_measure_with_start_time_returns_non_negative
-    start = Time.measure
-    elapsed = Time.measure(start)
-
-    assert elapsed >= 0, "Elapsed time should be non-negative"
-  end
-
-  # --- monotonic behavior ---
-
-  def test_measure_is_monotonic
-    times = 10.times.map { Time.measure }
-
-    times.each_cons(2) do |t1, t2|
-      assert t2 >= t1, "Monotonic time should never decrease"
-    end
-  end
-
-  def test_measure_returns_seconds_as_float
-    start = Time.measure
-    sleep 0.05
-    elapsed = Time.measure(start)
-
-    # Should be roughly 0.05 seconds, not 50 milliseconds or 50_000_000 nanoseconds
-    assert elapsed >= 0.04, "Expected seconds, got #{elapsed} (too small, might be wrong unit)"
-    assert elapsed < 0.2, "Expected seconds, got #{elapsed} (too large, might be wrong unit)"
-  end
-
-  # --- equivalence of block and start_time modes ---
-
-  def test_block_and_start_time_modes_equivalent
-    # Both modes should measure approximately the same duration
-    block_elapsed = Time.measure { sleep 0.02 }
-
-    start = Time.measure
-    sleep 0.02
-    manual_elapsed = Time.measure(start)
-
-    # Both should be close to 0.02 seconds
-    assert_in_delta block_elapsed, manual_elapsed, 0.01,
-      "Block and start_time modes should produce similar results"
+    assert elapsed >= 0
   end
 end

From 32ddc6b19c086d49a94893d6c171a558af3404de Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Fri, 9 Jan 2026 19:44:07 -0500
Subject: [PATCH 25/27] Fixed: More flake from polluted global state

---
 .../ruby_llm/instrumentation/chat_test.rb     | 165 +++++++++---------
 .../contrib/ruby_openai/openai_test.rb        |  46 ++---
 test/braintrust/contrib/setup_test.rb         |  10 +-
 test/braintrust/contrib_test.rb               |  10 +-
 test/test_helper.rb                           |   4 +
 5 files changed, 127 insertions(+), 108 deletions(-)

diff --git a/test/braintrust/contrib/ruby_llm/instrumentation/chat_test.rb b/test/braintrust/contrib/ruby_llm/instrumentation/chat_test.rb
index c0d1f5a..788f36a 100644
--- a/test/braintrust/contrib/ruby_llm/instrumentation/chat_test.rb
+++ b/test/braintrust/contrib/ruby_llm/instrumentation/chat_test.rb
@@ -53,27 +53,34 @@ def test_applied_returns_true_when_included
   end
 end
 
-# Define test tool classes for different ruby_llm versions
-if defined?(RubyLLM)
-  RUBY_LLM_VERSION = Gem.loaded_specs["ruby_llm"]&.version || Gem::Version.new("0.0.0")
-  SUPPORTS_PARAMS_DSL = Gem::Version.new("1.9.0") <= RUBY_LLM_VERSION
+# E2E tests for Chat instrumentation
+class Braintrust::Contrib::RubyLLM::Instrumentation::ChatE2ETest < Minitest::Test
+  include Braintrust::Contrib::RubyLLM::IntegrationHelper
 
-  # Tool for ruby_llm 1.8+ (using param - singular)
-  class WeatherTestTool < RubyLLM::Tool
-    description "Get the current weather for a location"
+  def self.supports_tool_calling?
+    return false unless defined?(RubyLLM)
+    version = Gem.loaded_specs["ruby_llm"]&.version || Gem::Version.new("0.0.0")
+    Gem::Version.new("1.9.0") <= version
+  end
 
-    param :location, type: :string, desc: "The city and state, e.g. San Francisco, CA"
-    param :unit, type: :string, desc: "Temperature unit (celsius or fahrenheit)"
+  # Creates a test tool class and yields it to the block.
+  # Uses stub_const to avoid polluting global state.
+  def with_weather_test_tool
+    tool_class = Class.new(RubyLLM::Tool) do
+      description "Get the current weather for a location"
 
-    def execute(location:, unit: "fahrenheit")
-      {location: location, temperature: 72, unit: unit, conditions: "sunny"}
+      param :location, type: :string, desc: "The city and state, e.g. San Francisco, CA"
+      param :unit, type: :string, desc: "Temperature unit (celsius or fahrenheit)"
+
+      def execute(location:, unit: "fahrenheit")
+        {location: location, temperature: 72, unit: unit, conditions: "sunny"}
+      end
     end
-  end
-end
 
-# E2E tests for Chat instrumentation
-class Braintrust::Contrib::RubyLLM::Instrumentation::ChatE2ETest < Minitest::Test
-  include Braintrust::Contrib::RubyLLM::IntegrationHelper
+    Object.stub_const(:WeatherTestTool, tool_class) do
+      yield WeatherTestTool
+    end
+  end
 
   def setup
     skip_unless_ruby_llm!
@@ -188,71 +195,73 @@ def test_streaming_complete_creates_span
   # --- Tool calling ---
 
   def test_tool_calling_creates_nested_spans
-    skip "Tool calling test requires ruby_llm >= 1.9" unless defined?(SUPPORTS_PARAMS_DSL) && SUPPORTS_PARAMS_DSL
-
-    VCR.use_cassette("contrib/ruby_llm/tool_calling") do
-      rig = setup_otel_test_rig
-
-      RubyLLM.configure do |config|
-        config.openai_api_key = get_openai_key
-      end
-
-      chat = RubyLLM.chat(model: "gpt-4o-mini")
-      Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: rig.tracer_provider)
-      chat.with_tool(WeatherTestTool)
-
-      response = chat.ask("What's the weather like in San Francisco?")
-
-      refute_nil response
-      refute_nil response.content
-
-      # Should have multiple spans: chat + tool + possibly another chat for follow-up
-      spans = rig.drain
-      assert spans.length >= 2, "Expected at least 2 spans for tool calling (chat + tool)"
-
-      # Find the chat span and tool span
-      chat_spans = spans.select { |s| s.name == "ruby_llm.chat" }
-      tool_spans = spans.select { |s| s.name.start_with?("ruby_llm.tool.") }
-
-      assert chat_spans.length >= 1, "Expected at least one ruby_llm.chat span"
-      assert tool_spans.length >= 1, "Expected at least one ruby_llm.tool.* span"
-
-      # Verify tool span has correct attributes
-      tool_span = tool_spans.first
-      assert tool_span.attributes.key?("braintrust.span_attributes"), "Tool span should have span_attributes"
-      span_attrs = JSON.parse(tool_span.attributes["braintrust.span_attributes"])
-      assert_equal "tool", span_attrs["type"]
-      assert tool_span.attributes.key?("braintrust.input_json"), "Tool span should have input"
-      assert tool_span.attributes.key?("braintrust.output_json"), "Tool span should have output"
-
-      # Verify chat span metadata contains exact tool schema (OpenAI format)
-      chat_span = chat_spans.first
-      metadata = JSON.parse(chat_span.attributes["braintrust.metadata"])
-
-      expected_tools = [
-        {
-          "type" => "function",
-          "function" => {
-            "name" => "weather_test",
-            "description" => "Get the current weather for a location",
-            "parameters" => {
-              "type" => "object",
-              "properties" => {
-                "location" => {
-                  "type" => "string",
-                  "description" => "The city and state, e.g. San Francisco, CA"
+    skip "Tool calling test requires ruby_llm >= 1.9" unless self.class.supports_tool_calling?
+
+    with_weather_test_tool do |tool_class|
+      VCR.use_cassette("contrib/ruby_llm/tool_calling") do
+        rig = setup_otel_test_rig
+
+        RubyLLM.configure do |config|
+          config.openai_api_key = get_openai_key
+        end
+
+        chat = RubyLLM.chat(model: "gpt-4o-mini")
+        Braintrust.instrument!(:ruby_llm, target: chat, tracer_provider: rig.tracer_provider)
+        chat.with_tool(tool_class)
+
+        response = chat.ask("What's the weather like in San Francisco?")
+
+        refute_nil response
+        refute_nil response.content
+
+        # Should have multiple spans: chat + tool + possibly another chat for follow-up
+        spans = rig.drain
+        assert spans.length >= 2, "Expected at least 2 spans for tool calling (chat + tool)"
+
+        # Find the chat span and tool span
+        chat_spans = spans.select { |s| s.name == "ruby_llm.chat" }
+        tool_spans = spans.select { |s| s.name.start_with?("ruby_llm.tool.") }
+
+        assert chat_spans.length >= 1, "Expected at least one ruby_llm.chat span"
+        assert tool_spans.length >= 1, "Expected at least one ruby_llm.tool.* span"
+
+        # Verify tool span has correct attributes
+        tool_span = tool_spans.first
+        assert tool_span.attributes.key?("braintrust.span_attributes"), "Tool span should have span_attributes"
+        span_attrs = JSON.parse(tool_span.attributes["braintrust.span_attributes"])
+        assert_equal "tool", span_attrs["type"]
+        assert tool_span.attributes.key?("braintrust.input_json"), "Tool span should have input"
+        assert tool_span.attributes.key?("braintrust.output_json"), "Tool span should have output"
+
+        # Verify chat span metadata contains exact tool schema (OpenAI format)
+        chat_span = chat_spans.first
+        metadata = JSON.parse(chat_span.attributes["braintrust.metadata"])
+
+        expected_tools = [
+          {
+            "type" => "function",
+            "function" => {
+              "name" => "weather_test",
+              "description" => "Get the current weather for a location",
+              "parameters" => {
+                "type" => "object",
+                "properties" => {
+                  "location" => {
+                    "type" => "string",
+                    "description" => "The city and state, e.g. San Francisco, CA"
+                  },
+                  "unit" => {
+                    "type" => "string",
+                    "description" => "Temperature unit (celsius or fahrenheit)"
+                  }
                 },
-                "unit" => {
-                  "type" => "string",
-                  "description" => "Temperature unit (celsius or fahrenheit)"
-                }
-              },
-              "required" => ["location", "unit"]
+                "required" => ["location", "unit"]
+              }
             }
           }
-        }
-      ]
-      assert_equal expected_tools, metadata["tools"]
+        ]
+        assert_equal expected_tools, metadata["tools"]
+      end
     end
   end
 
diff --git a/test/braintrust/contrib/ruby_openai/openai_test.rb b/test/braintrust/contrib/ruby_openai/openai_test.rb
index b147db8..4575728 100644
--- a/test/braintrust/contrib/ruby_openai/openai_test.rb
+++ b/test/braintrust/contrib/ruby_openai/openai_test.rb
@@ -1,14 +1,7 @@
 # frozen_string_literal: true
 
-# require "test_helper"
-
-# # Load the openai gem to define OpenAI::Client (and OpenAI::Internal if official gem)
-# # This must happen before tests run so the gem detection logic works
-# begin
-#   require "openai"
-# rescue LoadError
-#   # Gem not available in this appraisal
-# end
+require "test_helper"
+require_relative "integration_helper"
 
 # This test verifies that the RubyOpenAI integration correctly identifies when the
 # `ruby-openai` gem is loaded vs the official `openai` gem.
@@ -23,17 +16,13 @@
 # 2. When `ruby-openai` gem is loaded, instrument! succeeds and patchers are applied
 
 class Braintrust::Contrib::RubyOpenAI::OpenAITest < Minitest::Test
-  Integration = Braintrust::Contrib::RubyOpenAI::Integration
+  include Braintrust::Contrib::RubyOpenAI::IntegrationHelper
 
-  # def ruby_openai_available?
-  #   # OpenAI::Internal is only defined in the official openai gem
-  #   !Gem.loaded_specs["ruby-openai"].nil?
-  # end
+  Integration = Braintrust::Contrib::RubyOpenAI::Integration
 
-  # def official_openai_available?
-  #   # OpenAI::Internal is only defined in the official openai gem
-  #   !Gem.loaded_specs["openai"].nil?
-  # end
+  def setup
+    load_ruby_openai_if_available
+  end
 
   def ruby_openai_loaded?
     # Check if ruby-openai gem is loaded (not the official openai gem).
@@ -64,12 +53,25 @@ def test_loaded_returns_false_when_ruby_openai_gem_not_loaded
   def test_instrument_succeeds_for_ruby_openai_gem
     skip "ruby-openai gem not loaded" unless ruby_openai_loaded?
 
-    result = Braintrust.instrument!(:ruby_openai)
+    # Run in fork to avoid polluting global state (class-level patching)
+    assert_in_fork do
+      require "openai"
+
+      result = Braintrust.instrument!(:ruby_openai)
+
+      unless result
+        puts "FAIL: instrument! should return truthy for ruby-openai gem"
+        exit 1
+      end
 
-    assert result, "instrument! should return truthy for ruby-openai gem"
+      any_patched = Integration.patchers.any?(&:patched?)
+      unless any_patched
+        puts "FAIL: at least one patcher should be patched for ruby-openai gem"
+        exit 1
+      end
 
-    any_patched = Integration.patchers.any?(&:patched?)
-    assert any_patched, "at least one patcher should be patched for ruby-openai gem"
+      puts "instrument_succeeds_test:passed"
+    end
   end
 
   # --- OpenAI::Internal ---
diff --git a/test/braintrust/contrib/setup_test.rb b/test/braintrust/contrib/setup_test.rb
index 59cb432..238eb0d 100644
--- a/test/braintrust/contrib/setup_test.rb
+++ b/test/braintrust/contrib/setup_test.rb
@@ -21,10 +21,12 @@ def test_run_is_idempotent
 
     call_count = 0
 
-    Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { call_count += 1 }) do
-      Braintrust::Contrib::Setup.run!
-      Braintrust::Contrib::Setup.run!
-      Braintrust::Contrib::Setup.run!
+    ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: nil) do
+      Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { call_count += 1 }) do
+        Braintrust::Contrib::Setup.run!
+        Braintrust::Contrib::Setup.run!
+        Braintrust::Contrib::Setup.run!
+      end
     end
 
     assert_equal 1, call_count, "run! should only execute once"
diff --git a/test/braintrust/contrib_test.rb b/test/braintrust/contrib_test.rb
index 4e19da1..af867ec 100644
--- a/test/braintrust/contrib_test.rb
+++ b/test/braintrust/contrib_test.rb
@@ -8,8 +8,10 @@ class Braintrust::ContribTest < Minitest::Test
   def test_auto_instrument_enabled_by_default
     called_with = nil
 
-    Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do
-      Braintrust.auto_instrument!
+    ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: nil) do
+      Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do
+        Braintrust.auto_instrument!
+      end
     end
 
     assert_equal({only: nil, except: nil}, called_with)
@@ -70,7 +72,7 @@ def test_auto_instrument_passes_except_from_hash
   def test_auto_instrument_reads_only_from_env
     called_with = nil
 
-    ClimateControl.modify(BRAINTRUST_INSTRUMENT_ONLY: "openai,anthropic") do
+    ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: nil, BRAINTRUST_INSTRUMENT_ONLY: "openai,anthropic") do
       Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do
         Braintrust.auto_instrument!
       end
@@ -82,7 +84,7 @@ def test_auto_instrument_reads_only_from_env
   def test_auto_instrument_reads_except_from_env
     called_with = nil
 
-    ClimateControl.modify(BRAINTRUST_INSTRUMENT_EXCEPT: "ruby_llm") do
+    ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: nil, BRAINTRUST_INSTRUMENT_EXCEPT: "ruby_llm") do
       Braintrust::Contrib.stub(:auto_instrument!, ->(**kwargs) { called_with = kwargs }) do
         Braintrust.auto_instrument!
       end
diff --git a/test/test_helper.rb b/test/test_helper.rb
index e03950d..b59dd77 100644
--- a/test/test_helper.rb
+++ b/test/test_helper.rb
@@ -1,5 +1,9 @@
 # frozen_string_literal: true
 
+# Disable auto-instrumentation by default in tests to prevent test pollution.
+# Tests that specifically need auto-instrumentation can opt-in via auto_instrument: true.
+ENV["BRAINTRUST_AUTO_INSTRUMENT"] ||= "false"
+
 # Start SimpleCov BEFORE loading any code to track
 # Skip coverage when running under appraisal (different dependency scenarios)
 unless ENV["BUNDLE_GEMFILE"]&.include?("gemfiles/")

From 9ed8f4bc0a544ec7a698926157f8315e9b13fba9 Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Fri, 9 Jan 2026 21:27:01 -0500
Subject: [PATCH 26/27] Fixed: `braintrust exec` not working in Rails apps
 (zeitwerk incompatibility)

---
 braintrust.gemspec                    |   1 +
 lib/braintrust/contrib/setup.rb       | 186 ++++++---
 lib/braintrust/setup.rb               |   4 +-
 test/braintrust/contrib/setup_test.rb | 570 ++++++++++++++++----------
 test/braintrust/setup_test.rb         |  35 +-
 test/support/fixture_helper.rb        |  33 +-
 6 files changed, 526 insertions(+), 303 deletions(-)

diff --git a/braintrust.gemspec b/braintrust.gemspec
index 6f1fd68..ba9f5c3 100644
--- a/braintrust.gemspec
+++ b/braintrust.gemspec
@@ -20,6 +20,7 @@ Gem::Specification.new do |spec|
 
   # Specify which files should be added to the gem when it is released.
   spec.files = Dir.glob(%w[
+    exe/*
     lib/**/*.rb
     README.md
     LICENSE
diff --git a/lib/braintrust/contrib/setup.rb b/lib/braintrust/contrib/setup.rb
index 3fc0ad4..41a18ce 100644
--- a/lib/braintrust/contrib/setup.rb
+++ b/lib/braintrust/contrib/setup.rb
@@ -4,75 +4,165 @@
 
 module Braintrust
   module Contrib
+    # Automatic instrumentation setup for LLM libraries.
+    #
+    # Intercepts `require` calls to detect when LLM libraries are loaded, then patches
+    # them automatically. The main challenge is doing this safely with zeitwerk (Rails'
+    # autoloader) which also hooks into require.
+    #
+    # ## The Zeitwerk Problem
+    #
+    # Zeitwerk uses `alias_method :zeitwerk_original_require, :require`. If we prepend
+    # to Kernel before zeitwerk loads, zeitwerk captures our method as its "original",
+    # creating an infinite loop.
+    #
+    # ## Solution: Two-Phase Hook
+    #
+    #   ┌─────────────────────────────────────────────────────────────────────────┐
+    #   │                         Setup.run! called                               │
+    #   └─────────────────────────────────────────────────────────────────────────┘
+    #                                      │
+    #            ┌─────────────────────────┼─────────────────────────┐
+    #            ▼                         ▼                         ▼
+    #   ┌─────────────────┐     ┌─────────────────────┐    ┌─────────────────────┐
+    #   │ Rails loaded?   │     │ Zeitwerk loaded?    │    │ Neither loaded yet  │
+    #   │ (Rails::Railtie)│     │                     │    │                     │
+    #   └────────┬────────┘     └──────────┬──────────┘    └──────────┬──────────┘
+    #            │                         │                          │
+    #            ▼                         ▼                          ▼
+    #   ┌─────────────────┐     ┌─────────────────────┐    ┌─────────────────────┐
+    #   │ Install Railtie │     │ Install prepend     │    │ Install watcher     │
+    #   │ (after_init)    │     │ hook directly       │    │ hook (alias_method) │
+    #   └─────────────────┘     └─────────────────────┘    └──────────┬──────────┘
+    #            │                         │                          │
+    #            │                         │               ┌──────────┴──────────┐
+    #            │                         │               ▼                     ▼
+    #            │                         │      ┌──────────────┐    ┌──────────────────┐
+    #            │                         │      │ Rails loads? │    │ Zeitwerk loads?  │
+    #            │                         │      │ → Railtie    │    │ → Prepend hook   │
+    #            │                         │      └──────────────┘    └──────────────────┘
+    #            │                         │               │                     │
+    #            ▼                         ▼               ▼                     ▼
+    #   ┌─────────────────────────────────────────────────────────────────────────┐
+    #   │                    LLM library loads → patch!                           │
+    #   └─────────────────────────────────────────────────────────────────────────┘
+    #
+    # The watcher hook uses alias_method which zeitwerk captures harmlessly (alias
+    # chains work correctly). Once zeitwerk/Rails loads, we upgrade to the better
+    # approach: prepend (takes precedence, `super` chains through zeitwerk) or
+    # Railtie (patches after all gems loaded via after_initialize).
+    #
     module Setup
+      REENTRANCY_KEY = :braintrust_in_require_hook
+
       class << self
+        # Main entry point. Call once per process.
         def run!
-          return if @setup_complete
-          @setup_complete = true
-
           unless Internal::Env.auto_instrument
-            Braintrust::Log.debug("Contrib::Setup.run! auto-instrumentation disabled via environment")
+            Braintrust::Log.debug("Contrib::Setup: auto-instrumentation disabled via environment")
             return
           end
 
-          # Set up deferred patching for libraries loaded later
-          if rails_environment?
-            Braintrust::Log.debug("Contrib::Setup.run! using Rails after_initialize hook")
-            setup_rails_hook!
+          @registry = Contrib::Registry.instance
+          @only = Internal::Env.instrument_only
+          @except = Internal::Env.instrument_except
+
+          if defined?(::Rails::Railtie)
+            Braintrust::Log.debug("Contrib::Setup: using Rails railtie hook")
+            install_railtie!
+          elsif defined?(::Zeitwerk)
+            Braintrust::Log.debug("Contrib::Setup: using require hook (zeitwerk detected)")
+            install_require_hook!
           else
-            Braintrust::Log.debug("Contrib::Setup.run! using require hook")
-            setup_require_hook!
+            Braintrust::Log.debug("Contrib::Setup: using watcher hook")
+            install_watcher_hook!
           end
         end
 
-        def rails_environment?
-          # Check for Rails::Railtie which is defined during gem loading,
-          # before Rails.application exists
-          defined?(::Rails::Railtie)
+        # Called after each require to check if we should patch anything.
+        def on_require(path)
+          return unless @registry
+
+          @registry.integrations_for_require_path(path).each do |integration|
+            next unless integration.available? && integration.compatible?
+            next if @only && !@only.include?(integration.integration_name)
+            next if @except&.include?(integration.integration_name)
+
+            Braintrust::Log.debug("Contrib::Setup: patching #{integration.integration_name}")
+            integration.patch!
+          end
+        rescue => e
+          Braintrust::Log.error("Auto-instrument failed: #{e.message}")
         end
 
-        def setup_rails_hook!
+        # Execute block with reentrancy protection (prevents infinite loops).
+        def with_reentrancy_guard
+          return if Thread.current[REENTRANCY_KEY]
+          Thread.current[REENTRANCY_KEY] = true
+          yield
+        rescue => e
+          Braintrust::Log.error("Auto-instrument failed: #{e.message}")
+        ensure
+          Thread.current[REENTRANCY_KEY] = false
+        end
+
+        def railtie_installed? = @railtie_installed
+        def require_hook_installed? = @require_hook_installed
+
+        def install_railtie!
+          return if @railtie_installed
+          @railtie_installed = true
           require_relative "rails/railtie"
         end
 
-        def setup_require_hook!
-          registry = Contrib::Registry.instance
-          only = Internal::Env.instrument_only
-          except = Internal::Env.instrument_except
-
-          # Store original require
-          original_require = Kernel.method(:require)
-
-          Kernel.define_method(:require) do |path|
-            result = original_require.call(path)
-
-            # Reentrancy guard
-            unless Thread.current[:braintrust_in_require_hook]
-              begin
-                Thread.current[:braintrust_in_require_hook] = true
-
-                # Check if any integration matches this require path
-                integrations = registry.integrations_for_require_path(path)
-                if integrations.any?
-                  Braintrust::Log.debug("require '#{path}' matched integration hook: #{integrations.map(&:integration_name).inspect}")
-                end
-                integrations.each do |integration|
-                  next unless integration.available? && integration.compatible?
-                  next if only && !only.include?(integration.integration_name)
-                  next if except&.include?(integration.integration_name)
-                  integration.patch!
-                end
-              rescue => e
-                Braintrust::Log.error("Failed to auto-instrument on `require`: #{e.message}")
-              ensure
-                Thread.current[:braintrust_in_require_hook] = false
+        def install_require_hook!
+          return if @require_hook_installed
+          @require_hook_installed = true
+          Kernel.prepend(RequireHook)
+        end
+
+        private
+
+        def install_watcher_hook!
+          return if Kernel.private_method_defined?(:braintrust_watcher_require)
+
+          Kernel.module_eval do
+            alias_method :braintrust_watcher_require, :require
+
+            define_method(:require) do |path|
+              result = braintrust_watcher_require(path)
+
+              # Detect Rails/zeitwerk loading and upgrade hook strategy.
+              # IMPORTANT: Only check when the gem itself finishes loading (path match),
+              # not on every require where the constant happens to be defined.
+              # Installing too early (during gem init) breaks the alias_method chain.
+              if (path == "rails" || path.include?("railties")) &&
+                  defined?(::Rails::Railtie) && !Braintrust::Contrib::Setup.railtie_installed?
+                Braintrust::Contrib::Setup.install_railtie!
+              elsif (path == "zeitwerk" || path.end_with?("/zeitwerk.rb")) &&
+                  defined?(::Zeitwerk) && !Braintrust::Contrib::Setup.require_hook_installed?
+                Braintrust::Contrib::Setup.install_require_hook!
               end
-            end
 
-            result
+              Braintrust::Contrib::Setup.with_reentrancy_guard do
+                Braintrust::Contrib::Setup.on_require(path)
+              end
+
+              result
+            end
           end
         end
       end
     end
+
+    # Prepend module for require interception.
+    # Only installed AFTER zeitwerk loads to avoid alias_method loop.
+    module RequireHook
+      def require(path)
+        result = super
+        Setup.with_reentrancy_guard { Setup.on_require(path) }
+        result
+      end
+    end
   end
 end
diff --git a/lib/braintrust/setup.rb b/lib/braintrust/setup.rb
index 29482fb..b947ac2 100644
--- a/lib/braintrust/setup.rb
+++ b/lib/braintrust/setup.rb
@@ -30,13 +30,14 @@ def run!
         Braintrust::Log.debug("Braintrust setting up...")
 
         # Initialize Braintrust (silent failure if no API key)
+        # Must run in every process - tracer provider doesn't persist across Kernel.exec
         begin
           Braintrust.init
         rescue => e
           Braintrust::Log.error("Failed to automatically setup Braintrust: #{e.message}")
         end
 
-        # Setup contrib for 3rd party integrations
+        # Always setup contrib - hooks don't persist across Kernel.exec
         Contrib::Setup.run!
 
         Braintrust::Log.debug("Braintrust setup complete. Auto-instrumentation enabled: #{Braintrust::Internal::Env.auto_instrument}")
@@ -46,5 +47,4 @@ def run!
 end
 
 # Auto-setup when required
-
 Braintrust::Setup.run!
diff --git a/test/braintrust/contrib/setup_test.rb b/test/braintrust/contrib/setup_test.rb
index 238eb0d..e78c8ef 100644
--- a/test/braintrust/contrib/setup_test.rb
+++ b/test/braintrust/contrib/setup_test.rb
@@ -4,286 +4,462 @@
 require "braintrust/contrib/setup"
 
 class Braintrust::Contrib::SetupTest < Minitest::Test
-  def setup
-    # Reset the setup_complete flag before each test
-    Braintrust::Contrib::Setup.instance_variable_set(:@setup_complete, nil)
+  # --- run! ---
+
+  def test_run_skips_when_auto_instrument_disabled
+    assert_in_fork do
+      hook_installed = false
+
+      ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: "false") do
+        with_stubs(
+          [Braintrust::Contrib::Setup, :install_watcher_hook!, -> { hook_installed = true }],
+          [Braintrust::Contrib::Setup, :install_require_hook!, -> { hook_installed = true }],
+          [Braintrust::Contrib::Setup, :install_railtie!, -> { hook_installed = true }]
+        ) do
+          Braintrust::Contrib::Setup.run!
+        end
+      end
+
+      refute hook_installed, "hooks should not be installed when auto_instrument is disabled"
+    end
   end
 
-  def teardown
-    # Reset after each test to not affect other tests
-    Braintrust::Contrib::Setup.instance_variable_set(:@setup_complete, nil)
+  def test_run_installs_railtie_when_rails_defined
+    skip "Test not applicable when Rails is already loaded" if defined?(::Rails::Railtie)
+
+    assert_in_fork do
+      railtie_installed = false
+
+      with_mock_rails_railtie do
+        Braintrust::Contrib::Setup.stub(:install_railtie!, -> { railtie_installed = true }) do
+          ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: "true") do
+            Braintrust::Contrib::Setup.run!
+          end
+        end
+      end
+
+      assert railtie_installed, "should install railtie when Rails::Railtie is defined"
+    end
   end
 
-  # --- run! idempotency ---
+  def test_run_installs_require_hook_when_zeitwerk_defined
+    skip "Test not applicable when Rails or Zeitwerk is loaded" if defined?(::Rails::Railtie) || defined?(::Zeitwerk)
 
-  def test_run_is_idempotent
-    skip "Test not applicable when Rails is loaded" if defined?(::Rails::Railtie)
+    assert_in_fork do
+      require_hook_installed = false
 
-    call_count = 0
+      with_mock_zeitwerk do
+        Braintrust::Contrib::Setup.stub(:install_require_hook!, -> { require_hook_installed = true }) do
+          ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: "true") do
+            Braintrust::Contrib::Setup.run!
+          end
+        end
+      end
 
-    ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: nil) do
-      Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { call_count += 1 }) do
-        Braintrust::Contrib::Setup.run!
-        Braintrust::Contrib::Setup.run!
-        Braintrust::Contrib::Setup.run!
+      assert require_hook_installed, "should install require hook when Zeitwerk is defined"
+    end
+  end
+
+  def test_run_installs_watcher_hook_when_neither_rails_nor_zeitwerk
+    skip "Test not applicable when Rails or Zeitwerk is loaded" if defined?(::Rails::Railtie) || defined?(::Zeitwerk)
+
+    assert_in_fork do
+      watcher_installed = false
+
+      Braintrust::Contrib::Setup.stub(:install_watcher_hook!, -> { watcher_installed = true }) do
+        ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: "true") do
+          Braintrust::Contrib::Setup.run!
+        end
       end
+
+      assert watcher_installed, "should install watcher hook when neither Rails nor Zeitwerk is defined"
     end
+  end
+
+  # --- on_require ---
+
+  def test_on_require_patches_matching_integration
+    assert_in_fork do
+      patch_called = false
+
+      mock_integration = mock_integration_object(
+        name: :test_lib,
+        on_patch: -> { patch_called = true }
+      )
 
-    assert_equal 1, call_count, "run! should only execute once"
+      mock_registry = mock_registry_for("test_lib" => [mock_integration])
+
+      Braintrust::Contrib::Setup.instance_variable_set(:@registry, mock_registry)
+      Braintrust::Contrib::Setup.instance_variable_set(:@only, nil)
+      Braintrust::Contrib::Setup.instance_variable_set(:@except, nil)
+
+      Braintrust::Contrib::Setup.on_require("test_lib")
+
+      assert patch_called, "on_require should call patch! on matching integration"
+    end
   end
 
-  # --- BRAINTRUST_AUTO_INSTRUMENT env var ---
+  def test_on_require_respects_only_filter
+    assert_in_fork do
+      patch_called = false
 
-  def test_run_skips_hooks_when_auto_instrument_disabled
-    hook_called = false
+      mock_integration = mock_integration_object(
+        name: :excluded_lib,
+        on_patch: -> { patch_called = true }
+      )
 
-    ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: "false") do
-      Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { hook_called = true }) do
-        Braintrust::Contrib::Setup.run!
-      end
+      mock_registry = mock_registry_for("excluded_lib" => [mock_integration])
+
+      Braintrust::Contrib::Setup.instance_variable_set(:@registry, mock_registry)
+      Braintrust::Contrib::Setup.instance_variable_set(:@only, %i[openai anthropic])
+      Braintrust::Contrib::Setup.instance_variable_set(:@except, nil)
+
+      Braintrust::Contrib::Setup.on_require("excluded_lib")
+
+      refute patch_called, "on_require should respect only filter"
+    end
+  end
+
+  def test_on_require_respects_except_filter
+    assert_in_fork do
+      patch_called = false
+
+      mock_integration = mock_integration_object(
+        name: :excluded_lib,
+        on_patch: -> { patch_called = true }
+      )
+
+      mock_registry = mock_registry_for("excluded_lib" => [mock_integration])
+
+      Braintrust::Contrib::Setup.instance_variable_set(:@registry, mock_registry)
+      Braintrust::Contrib::Setup.instance_variable_set(:@only, nil)
+      Braintrust::Contrib::Setup.instance_variable_set(:@except, %i[excluded_lib])
+
+      Braintrust::Contrib::Setup.on_require("excluded_lib")
+
+      refute patch_called, "on_require should respect except filter"
     end
+  end
+
+  def test_on_require_skips_unavailable_integration
+    assert_in_fork do
+      patch_called = false
+
+      mock_integration = mock_integration_object(
+        name: :unavailable_lib,
+        available: false,
+        on_patch: -> { patch_called = true }
+      )
+
+      mock_registry = mock_registry_for("unavailable_lib" => [mock_integration])
 
-    refute hook_called, "hooks should not be set up when auto_instrument is disabled"
+      Braintrust::Contrib::Setup.instance_variable_set(:@registry, mock_registry)
+      Braintrust::Contrib::Setup.instance_variable_set(:@only, nil)
+      Braintrust::Contrib::Setup.instance_variable_set(:@except, nil)
+
+      Braintrust::Contrib::Setup.on_require("unavailable_lib")
+
+      refute patch_called, "on_require should skip unavailable integrations"
+    end
   end
 
-  def test_run_sets_up_hooks_when_auto_instrument_enabled
-    skip "Test not applicable when Rails is loaded" if defined?(::Rails::Railtie)
+  def test_on_require_skips_incompatible_integration
+    assert_in_fork do
+      patch_called = false
 
-    hook_called = false
+      mock_integration = mock_integration_object(
+        name: :incompatible_lib,
+        compatible: false,
+        on_patch: -> { patch_called = true }
+      )
 
-    ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: "true") do
-      Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { hook_called = true }) do
-        Braintrust::Contrib::Setup.run!
-      end
+      mock_registry = mock_registry_for("incompatible_lib" => [mock_integration])
+
+      Braintrust::Contrib::Setup.instance_variable_set(:@registry, mock_registry)
+      Braintrust::Contrib::Setup.instance_variable_set(:@only, nil)
+      Braintrust::Contrib::Setup.instance_variable_set(:@except, nil)
+
+      Braintrust::Contrib::Setup.on_require("incompatible_lib")
+
+      refute patch_called, "on_require should skip incompatible integrations"
     end
+  end
+
+  def test_on_require_returns_early_without_registry
+    assert_in_fork do
+      Braintrust::Contrib::Setup.instance_variable_set(:@registry, nil)
 
-    assert hook_called, "hooks should be set up when auto_instrument is enabled"
+      # Should not raise
+      Braintrust::Contrib::Setup.on_require("anything")
+    end
   end
 
-  def test_run_sets_up_hooks_by_default
-    skip "Test not applicable when Rails is loaded" if defined?(::Rails::Railtie)
+  def test_on_require_logs_errors_without_crashing
+    assert_in_fork do
+      error_logged = false
+
+      mock_integration = mock_integration_object(
+        name: :error_lib,
+        on_patch: -> { raise "Patch error!" }
+      )
 
-    hook_called = false
+      mock_registry = mock_registry_for("error_lib" => [mock_integration])
 
-    # Ensure env var is not set
-    ClimateControl.modify(BRAINTRUST_AUTO_INSTRUMENT: nil) do
-      Braintrust::Contrib::Setup.stub(:setup_require_hook!, -> { hook_called = true }) do
-        Braintrust::Contrib::Setup.run!
+      Braintrust::Contrib::Setup.instance_variable_set(:@registry, mock_registry)
+      Braintrust::Contrib::Setup.instance_variable_set(:@only, nil)
+      Braintrust::Contrib::Setup.instance_variable_set(:@except, nil)
+
+      Braintrust::Log.stub(:error, ->(_msg) { error_logged = true }) do
+        Braintrust::Contrib::Setup.on_require("error_lib")
       end
-    end
 
-    assert hook_called, "hooks should be set up by default"
+      assert error_logged, "on_require should log errors without crashing"
+    end
   end
 
-  # --- rails_environment? ---
+  # --- with_reentrancy_guard ---
 
-  def test_rails_environment_returns_false_when_rails_not_defined
-    skip "Test not applicable when Rails is loaded" if defined?(::Rails::Railtie)
+  def test_with_reentrancy_guard_prevents_nested_calls
+    assert_in_fork do
+      outer_ran = false
+      inner_ran = false
 
-    refute Braintrust::Contrib::Setup.rails_environment?
+      Braintrust::Contrib::Setup.with_reentrancy_guard do
+        outer_ran = true
+        Braintrust::Contrib::Setup.with_reentrancy_guard do
+          inner_ran = true
+        end
+      end
+
+      assert outer_ran, "outer block should run"
+      refute inner_ran, "inner block should be skipped due to reentrancy guard"
+    end
   end
 
-  def test_rails_environment_returns_false_when_rails_application_nil
-    mock_rails = Module.new do
-      def self.respond_to?(method)
-        method == :application || super
+  def test_with_reentrancy_guard_resets_after_completion
+    assert_in_fork do
+      Braintrust::Contrib::Setup.with_reentrancy_guard do
+        # first call
       end
 
-      def self.application
-        nil
+      second_call_ran = false
+      Braintrust::Contrib::Setup.with_reentrancy_guard do
+        second_call_ran = true
       end
-    end
 
-    Object.stub_const(:Rails, mock_rails) do
-      refute Braintrust::Contrib::Setup.rails_environment?
+      assert second_call_ran, "second call should run after first completes"
     end
   end
 
-  def test_rails_environment_returns_true_when_rails_railtie_defined
-    with_mock_rails_railtie do
-      assert Braintrust::Contrib::Setup.rails_environment?
+  def test_with_reentrancy_guard_logs_errors
+    assert_in_fork do
+      error_logged = false
+
+      Braintrust::Log.stub(:error, ->(_msg) { error_logged = true }) do
+        Braintrust::Contrib::Setup.with_reentrancy_guard do
+          raise "Test error"
+        end
+      end
+
+      assert error_logged, "with_reentrancy_guard should log errors"
     end
   end
 
-  # --- setup_rails_hook! ---
+  # --- install_railtie! ---
 
-  def test_setup_rails_hook_loads_railtie
-    railtie_loaded = false
+  def test_install_railtie_is_idempotent
+    assert_in_fork do
+      call_count = 0
 
-    Braintrust::Contrib::Setup.stub(:require_relative, ->(path) { railtie_loaded = true if path == "rails/railtie" }) do
-      Braintrust::Contrib::Setup.setup_rails_hook!
-    end
+      Braintrust::Contrib::Setup.stub(:require_relative, ->(path) { call_count += 1 if path == "rails/railtie" }) do
+        Braintrust::Contrib::Setup.install_railtie!
+        Braintrust::Contrib::Setup.install_railtie!
+        Braintrust::Contrib::Setup.install_railtie!
+      end
 
-    assert railtie_loaded, "setup_rails_hook! should load rails/railtie"
+      assert_equal 1, call_count, "install_railtie! should only load railtie once"
+    end
   end
 
-  # --- setup_require_hook! ---
-  # These tests run in a fork to prevent the require hook from leaking into other tests
+  # --- install_require_hook! ---
 
-  def test_setup_require_hook_patches_kernel_require
+  def test_install_require_hook_is_idempotent
     assert_in_fork do
-      Braintrust::Contrib::Setup.setup_require_hook!
+      call_count = 0
+      original_prepend = Kernel.method(:prepend)
+
+      Kernel.define_singleton_method(:prepend) do |mod|
+        call_count += 1 if mod == Braintrust::Contrib::RequireHook
+        original_prepend.call(mod)
+      end
 
-      # Check that require still works normally
-      require "json" # Should work normally
+      Braintrust::Contrib::Setup.install_require_hook!
+      Braintrust::Contrib::Setup.install_require_hook!
+      Braintrust::Contrib::Setup.install_require_hook!
 
-      # If we got here without error, the hook is working
-      puts "require_hook_test:passed"
+      assert_equal 1, call_count, "install_require_hook! should only prepend once"
     end
   end
 
-  def test_setup_require_hook_instruments_matching_library
+  # --- install_watcher_hook! ---
+
+  def test_install_watcher_hook_is_idempotent
     assert_in_fork do
-      with_tmp_file(data: "# test file\n", filename: "test_integration_lib", extension: ".rb") do |test_file|
-        patch_called = false
+      Braintrust::Contrib::Setup.send(:install_watcher_hook!)
+      Braintrust::Contrib::Setup.send(:install_watcher_hook!)
+
+      # require still works
+      require "json"
+    end
+  end
+
+  def test_watcher_hook_triggers_on_require
+    assert_in_fork do
+      with_tmp_file(data: "# test file\n", filename: "watcher_test_lib", extension: ".rb") do |test_file|
+        on_require_called = false
         test_lib_name = File.basename(test_file.path, ".rb")
         $LOAD_PATH.unshift(File.dirname(test_file.path))
 
-        mock_integration = mock_integration_object(
-          name: :test_lib,
-          on_patch: -> { patch_called = true }
-        )
+        Braintrust::Contrib::Setup.instance_variable_set(:@registry, Braintrust::Contrib::Registry.instance)
 
-        mock_registry = mock_registry_for(test_lib_name => [mock_integration])
-
-        Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
-          Braintrust::Contrib::Setup.setup_require_hook!
-          require test_lib_name
+        original_on_require = Braintrust::Contrib::Setup.method(:on_require)
+        Braintrust::Contrib::Setup.define_singleton_method(:on_require) do |path|
+          on_require_called = true if path.include?(test_lib_name)
+          original_on_require.call(path)
         end
 
-        if patch_called
-          puts "instrument_test:passed"
-        else
-          puts "instrument_test:failed - patch! was not called"
-          exit 1
-        end
+        Braintrust::Contrib::Setup.send(:install_watcher_hook!)
+        require test_lib_name
+
+        assert on_require_called, "on_require should be called"
       end
     end
   end
 
-  def test_setup_require_hook_respects_only_filter
-    assert_in_fork do
-      with_tmp_file(data: "# test file\n", filename: "excluded_lib", extension: ".rb") do |test_file|
-        patch_called = false
-        test_lib_name = File.basename(test_file.path, ".rb")
-        $LOAD_PATH.unshift(File.dirname(test_file.path))
+  def test_watcher_hook_upgrades_to_require_hook_when_zeitwerk_required
+    skip "Test not applicable when Zeitwerk is already loaded" if defined?(::Zeitwerk)
 
-        mock_integration = mock_integration_object(
-          name: :excluded_lib,
-          on_patch: -> { patch_called = true }
-        )
+    assert_in_fork do
+      # Create a fake zeitwerk.rb that defines the Zeitwerk constant
+      with_tmp_file(data: "module Zeitwerk; end\n", filename: "zeitwerk", extension: ".rb", exact_name: true) do |path|
+        $LOAD_PATH.unshift(File.dirname(path))
 
-        mock_registry = mock_registry_for(test_lib_name => [mock_integration])
+        Braintrust::Contrib::Setup.send(:install_watcher_hook!)
 
-        ENV["BRAINTRUST_INSTRUMENT_ONLY"] = "openai,anthropic"
+        refute Braintrust::Contrib::Setup.require_hook_installed?, "require hook should not be installed yet"
 
-        Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
-          Braintrust::Contrib::Setup.setup_require_hook!
-          require test_lib_name
-        end
+        require "zeitwerk"
 
-        if patch_called
-          puts "only_filter_test:failed - patch! should not be called for excluded lib"
-          exit 1
-        else
-          puts "only_filter_test:passed"
-        end
+        assert Braintrust::Contrib::Setup.require_hook_installed?, "require hook should be installed after zeitwerk loads"
       end
     end
   end
 
-  def test_setup_require_hook_respects_except_filter
+  def test_watcher_hook_does_not_upgrade_for_non_zeitwerk_require_even_if_constant_exists
+    skip "Test not applicable when Zeitwerk is already loaded" if defined?(::Zeitwerk)
+
     assert_in_fork do
-      with_tmp_file(data: "# test file\n", filename: "excluded_lib", extension: ".rb") do |test_file|
-        patch_called = false
-        test_lib_name = File.basename(test_file.path, ".rb")
-        $LOAD_PATH.unshift(File.dirname(test_file.path))
+      # Define Zeitwerk constant first
+      Object.const_set(:Zeitwerk, Module.new)
 
-        mock_integration = mock_integration_object(
-          name: :excluded_lib,
-          on_patch: -> { patch_called = true }
-        )
+      # Create an unrelated file (doesn't need exact_name since we're testing it DOESN'T match)
+      with_tmp_file(data: "# unrelated\n", filename: "unrelated_lib", extension: ".rb", exact_name: true) do |path|
+        $LOAD_PATH.unshift(File.dirname(path))
 
-        mock_registry = mock_registry_for(test_lib_name => [mock_integration])
+        Braintrust::Contrib::Setup.send(:install_watcher_hook!)
 
-        ENV["BRAINTRUST_INSTRUMENT_EXCEPT"] = "excluded_lib"
+        refute Braintrust::Contrib::Setup.require_hook_installed?, "require hook should not be installed yet"
 
-        Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
-          Braintrust::Contrib::Setup.setup_require_hook!
-          require test_lib_name
-        end
+        require "unrelated_lib"
 
-        if patch_called
-          puts "except_filter_test:failed - patch! should not be called for excluded lib"
-          exit 1
-        else
-          puts "except_filter_test:passed"
-        end
+        refute Braintrust::Contrib::Setup.require_hook_installed?, "require hook should NOT be installed for non-zeitwerk require"
       end
     end
   end
 
-  def test_setup_require_hook_skips_unavailable_integration
+  def test_watcher_hook_upgrades_to_railtie_when_rails_required
+    skip "Test not applicable when Rails is already loaded" if defined?(::Rails::Railtie)
+
     assert_in_fork do
-      with_tmp_file(data: "# test file\n", filename: "unavailable_lib", extension: ".rb") do |test_file|
-        patch_called = false
-        test_lib_name = File.basename(test_file.path, ".rb")
-        $LOAD_PATH.unshift(File.dirname(test_file.path))
+      # Create a fake rails.rb that defines the Rails::Railtie constant
+      with_tmp_file(data: "module Rails; class Railtie; end; end\n", filename: "rails", extension: ".rb", exact_name: true) do |path|
+        $LOAD_PATH.unshift(File.dirname(path))
+
+        railtie_installed = false
+        Braintrust::Contrib::Setup.define_singleton_method(:install_railtie!) do
+          railtie_installed = true
+          @railtie_installed = true
+        end
 
-        mock_integration = mock_integration_object(
-          name: :unavailable_lib,
-          available: false,
-          on_patch: -> { patch_called = true }
-        )
+        Braintrust::Contrib::Setup.send(:install_watcher_hook!)
 
-        mock_registry = mock_registry_for(test_lib_name => [mock_integration])
+        refute railtie_installed, "railtie should not be installed yet"
 
-        Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
-          Braintrust::Contrib::Setup.setup_require_hook!
-          require test_lib_name
-        end
+        require "rails"
 
-        if patch_called
-          puts "unavailable_test:failed - patch! should not be called for unavailable integration"
-          exit 1
-        else
-          puts "unavailable_test:passed"
+        assert railtie_installed, "railtie should be installed after rails loads"
+      end
+    end
+  end
+
+  def test_watcher_hook_does_not_upgrade_for_non_rails_require_even_if_constant_exists
+    skip "Test not applicable when Rails is already loaded" if defined?(::Rails::Railtie)
+
+    assert_in_fork do
+      # Define Rails::Railtie constant first
+      Object.const_set(:Rails, Module.new)
+      Rails.const_set(:Railtie, Class.new)
+
+      # Create an unrelated file (doesn't need exact_name since we're testing it DOESN'T match)
+      with_tmp_file(data: "# unrelated\n", filename: "another_lib", extension: ".rb", exact_name: true) do |path|
+        $LOAD_PATH.unshift(File.dirname(path))
+
+        railtie_installed = false
+        Braintrust::Contrib::Setup.define_singleton_method(:install_railtie!) do
+          railtie_installed = true
+          @railtie_installed = true
         end
+
+        Braintrust::Contrib::Setup.send(:install_watcher_hook!)
+
+        refute railtie_installed, "railtie should not be installed yet"
+
+        require "another_lib"
+
+        refute railtie_installed, "railtie should NOT be installed for non-rails require"
       end
     end
   end
 
-  def test_setup_require_hook_skips_incompatible_integration
+  # --- Integration tests with actual require hook ---
+
+  def test_require_hook_instruments_matching_library
     assert_in_fork do
-      with_tmp_file(data: "# test file\n", filename: "incompatible_lib", extension: ".rb") do |test_file|
+      with_tmp_file(data: "# test file\n", filename: "test_integration_lib", extension: ".rb") do |test_file|
         patch_called = false
         test_lib_name = File.basename(test_file.path, ".rb")
         $LOAD_PATH.unshift(File.dirname(test_file.path))
 
         mock_integration = mock_integration_object(
-          name: :incompatible_lib,
-          compatible: false,
+          name: :test_lib,
           on_patch: -> { patch_called = true }
         )
 
         mock_registry = mock_registry_for(test_lib_name => [mock_integration])
 
-        Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
-          Braintrust::Contrib::Setup.setup_require_hook!
-          require test_lib_name
-        end
+        Braintrust::Contrib::Setup.instance_variable_set(:@registry, mock_registry)
+        Braintrust::Contrib::Setup.instance_variable_set(:@only, nil)
+        Braintrust::Contrib::Setup.instance_variable_set(:@except, nil)
 
-        if patch_called
-          puts "incompatible_test:failed - patch! should not be called for incompatible integration"
-          exit 1
-        else
-          puts "incompatible_test:passed"
-        end
+        Braintrust::Contrib::Setup.install_require_hook!
+        require test_lib_name
+
+        assert patch_called, "patch! should be called"
       end
     end
   end
 
-  def test_setup_require_hook_has_reentrancy_guard
+  def test_require_hook_has_reentrancy_guard
     assert_in_fork do
       with_tmp_file(data: "# reentrant lib\n", filename: "reentrant_lib", extension: ".rb") do |reentrant_file|
         with_tmp_file(data: "# nested lib\n", filename: "nested_lib", extension: ".rb") do |nested_file|
@@ -314,50 +490,14 @@ def test_setup_require_hook_has_reentrancy_guard
             nested_lib_name => [nested_integration]
           )
 
-          Braintrust::Contrib::Registry.stub(:instance, mock_registry) do
-            Braintrust::Contrib::Setup.setup_require_hook!
-            require reentrant_lib_name
-          end
-
-          # The nested integration should NOT have been patched due to reentrancy guard
-          if require_calls_during_patch == [:patch_completed]
-            puts "reentrancy_test:passed"
-          else
-            puts "reentrancy_test:failed - got #{require_calls_during_patch.inspect}"
-            exit 1
-          end
-        end
-      end
-    end
-  end
-
-  def test_setup_require_hook_logs_errors_without_crashing
-    assert_in_fork do
-      with_tmp_file(data: "# error lib\n", filename: "error_lib", extension: ".rb") do |test_file|
-        error_logged = false
-        test_lib_name = File.basename(test_file.path, ".rb")
-        $LOAD_PATH.unshift(File.dirname(test_file.path))
-
-        mock_integration = mock_integration_object(
-          name: :error_lib,
-          on_patch: -> { raise "Patch error!" }
-        )
+          Braintrust::Contrib::Setup.instance_variable_set(:@registry, mock_registry)
+          Braintrust::Contrib::Setup.instance_variable_set(:@only, nil)
+          Braintrust::Contrib::Setup.instance_variable_set(:@except, nil)
 
-        mock_registry = mock_registry_for(test_lib_name => [mock_integration])
+          Braintrust::Contrib::Setup.install_require_hook!
+          require reentrant_lib_name
 
-        with_stubs(
-          [Braintrust::Contrib::Registry, :instance, mock_registry],
-          [Braintrust::Log, :error, ->(_msg) { error_logged = true }]
-        ) do
-          Braintrust::Contrib::Setup.setup_require_hook!
-          require test_lib_name
-        end
-
-        if error_logged
-          puts "error_handling_test:passed"
-        else
-          puts "error_handling_test:failed - error was not logged"
-          exit 1
+          assert_equal [:patch_completed], require_calls_during_patch, "nested integration should not be patched"
         end
       end
     end
@@ -376,6 +516,15 @@ def with_mock_rails_railtie
     end
   end
 
+  # Helper to stub Zeitwerk
+  def with_mock_zeitwerk
+    mock_zeitwerk = Module.new
+
+    Object.stub_const(:Zeitwerk, mock_zeitwerk) do
+      yield
+    end
+  end
+
   # Helper to create a mock integration object
   def mock_integration_object(name:, available: true, compatible: true, on_patch: nil)
     integration = Object.new
@@ -387,7 +536,6 @@ def mock_integration_object(name:, available: true, compatible: true, on_patch:
   end
 
   # Helper to create a mock registry that maps lib names to integrations
-  # @param mappings [Hash] lib_name => [integrations] mapping
   def mock_registry_for(mappings)
     registry = Object.new
     registry.define_singleton_method(:integrations_for_require_path) do |path|
diff --git a/test/braintrust/setup_test.rb b/test/braintrust/setup_test.rb
index aca9c5b..a79cd10 100644
--- a/test/braintrust/setup_test.rb
+++ b/test/braintrust/setup_test.rb
@@ -20,12 +20,7 @@ def test_run_calls_braintrust_init
         require "braintrust/setup"
       end
 
-      if init_called
-        puts "test_run_calls_braintrust_init:passed"
-      else
-        puts "test_run_calls_braintrust_init:failed - init not called"
-        exit 1
-      end
+      assert init_called, "Braintrust.init should be called"
     end
   end
 
@@ -42,12 +37,7 @@ def test_run_calls_contrib_setup_run
         require "braintrust/setup"
       end
 
-      if contrib_setup_called
-        puts "test_run_calls_contrib_setup_run:passed"
-      else
-        puts "test_run_calls_contrib_setup_run:failed - Contrib::Setup.run! not called"
-        exit 1
-      end
+      assert contrib_setup_called, "Contrib::Setup.run! should be called"
     end
   end
 
@@ -68,12 +58,7 @@ def test_run_is_idempotent
         Braintrust::Setup.run!
       end
 
-      if call_count == 1
-        puts "test_run_is_idempotent:passed"
-      else
-        puts "test_run_is_idempotent:failed - call_count=#{call_count}, expected 1"
-        exit 1
-      end
+      assert_equal 1, call_count, "init should only be called once"
     end
   end
 
@@ -96,12 +81,7 @@ def test_run_logs_error_on_init_failure
           msg.include?("Test init error")
       end
 
-      if error_logged
-        puts "test_run_logs_error_on_init_failure:passed"
-      else
-        puts "test_run_logs_error_on_init_failure:failed - logged: #{logged_messages.inspect}"
-        exit 1
-      end
+      assert error_logged, "should log error message containing failure details"
     end
   end
 
@@ -119,12 +99,7 @@ def test_run_continues_to_contrib_setup_after_init_failure
         require "braintrust/setup"
       end
 
-      if contrib_setup_called
-        puts "test_run_continues_to_contrib_setup_after_init_failure:passed"
-      else
-        puts "test_run_continues_to_contrib_setup_after_init_failure:failed"
-        exit 1
-      end
+      assert contrib_setup_called, "Contrib::Setup.run! should be called even after init failure"
     end
   end
 end
diff --git a/test/support/fixture_helper.rb b/test/support/fixture_helper.rb
index 26e6dcb..5971769 100644
--- a/test/support/fixture_helper.rb
+++ b/test/support/fixture_helper.rb
@@ -25,23 +25,32 @@ def with_png_file(data: PNG_DATA, filename: "test_image", extension: ".png", &bl
       # Create a temporary file and yield to the block
       # File is automatically cleaned up after the block
       # @param data [String] content to write to the file (default: empty)
-      # @param filename [String] prefix for the temp file name
+      # @param filename [String] prefix for the temp file name (or exact name if exact_name: true)
       # @param extension [String] extension for the temp file name
       # @param binary [Boolean] whether to write in binary mode
-      # @yield [Tempfile] the temporary file
-      def with_tmp_file(data: "", filename: "test", extension: ".txt", binary: false)
+      # @param exact_name [Boolean] if true, use exact filename without random suffix
+      # @yield [Tempfile, String] the temporary file (Tempfile) or path (String if exact_name)
+      def with_tmp_file(data: "", filename: "test", extension: ".txt", binary: false, exact_name: false)
         return unless block_given?
 
-        require "tempfile"
-        tmpfile = Tempfile.new([filename, extension])
-        tmpfile.binmode if binary
-        tmpfile.write(data)
-        tmpfile.close
+        if exact_name
+          Dir.mktmpdir do |dir|
+            path = File.join(dir, "#{filename}#{extension}")
+            File.write(path, data, mode: binary ? "wb" : "w")
+            yield path
+          end
+        else
+          require "tempfile"
+          tmpfile = Tempfile.new([filename, extension])
+          tmpfile.binmode if binary
+          tmpfile.write(data)
+          tmpfile.close
 
-        begin
-          yield(tmpfile)
-        ensure
-          tmpfile.unlink
+          begin
+            yield(tmpfile)
+          ensure
+            tmpfile.unlink
+          end
         end
       end
     end

From 9e4fae8c9283c4bf02a9bbbe18d0ff9b97415456 Mon Sep 17 00:00:00 2001
From: David Elner <david.elner@braintrustdata.com>
Date: Sat, 10 Jan 2026 01:38:03 -0500
Subject: [PATCH 27/27] Fixed: Broken permalinks in examples

---
 examples/contrib/anthropic.rb   | 28 ++++++++++++++++++----------
 examples/contrib/openai.rb      | 28 ++++++++++++++++++----------
 examples/contrib/ruby-openai.rb | 33 +++++++++++++++++++++------------
 examples/contrib/ruby_llm.rb    | 15 ++++++++++++---
 4 files changed, 69 insertions(+), 35 deletions(-)

diff --git a/examples/contrib/anthropic.rb b/examples/contrib/anthropic.rb
index 4f2dd1f..43ef642 100644
--- a/examples/contrib/anthropic.rb
+++ b/examples/contrib/anthropic.rb
@@ -25,18 +25,26 @@
 # Create Anthropic client
 client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
 
-# Make a message request (automatically traced!)
-client.messages.create(
-  model: "claude-3-haiku-20240307",
-  max_tokens: 100,
-  system: "You are a helpful assistant.",
-  messages: [
-    {role: "user", content: "Say hello and tell me a short joke."}
-  ]
-)
+# Get a tracer and wrap the API call in a span
+tracer = OpenTelemetry.tracer_provider.tracer("anthropic-example")
+
+root_span = nil
+tracer.in_span("examples/contrib/anthropic.rb") do |span|
+  root_span = span
+
+  # Make a message request (automatically traced!)
+  client.messages.create(
+    model: "claude-3-haiku-20240307",
+    max_tokens: 100,
+    system: "You are a helpful assistant.",
+    messages: [
+      {role: "user", content: "Say hello and tell me a short joke."}
+    ]
+  )
+end
 
 # Print permalink to view this trace in Braintrust
-puts "\n View this trace in Braintrust:"
+puts "\nView this trace in Braintrust:"
 puts "  #{Braintrust::Trace.permalink(root_span)}"
 
 # Shutdown to flush spans to Braintrust
diff --git a/examples/contrib/openai.rb b/examples/contrib/openai.rb
index dbaa27d..eb7d0ee 100644
--- a/examples/contrib/openai.rb
+++ b/examples/contrib/openai.rb
@@ -25,18 +25,26 @@
 # Create OpenAI client
 client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"])
 
-# Make a chat completion request (automatically traced!)
-client.chat.completions.create(
-  messages: [
-    {role: "system", content: "You are a helpful assistant."},
-    {role: "user", content: "Say hello and tell me a short joke."}
-  ],
-  model: "gpt-4o-mini",
-  max_tokens: 100
-)
+# Get a tracer and wrap the API call in a span
+tracer = OpenTelemetry.tracer_provider.tracer("openai-example")
+
+root_span = nil
+tracer.in_span("examples/contrib/openai.rb") do |span|
+  root_span = span
+
+  # Make a chat completion request (automatically traced!)
+  client.chat.completions.create(
+    messages: [
+      {role: "system", content: "You are a helpful assistant."},
+      {role: "user", content: "Say hello and tell me a short joke."}
+    ],
+    model: "gpt-4o-mini",
+    max_tokens: 100
+  )
+end
 
 # Print permalink to view this trace in Braintrust
-puts "\n View this trace in Braintrust:"
+puts "\nView this trace in Braintrust:"
 puts "  #{Braintrust::Trace.permalink(root_span)}"
 
 # Shutdown to flush spans to Braintrust
diff --git a/examples/contrib/ruby-openai.rb b/examples/contrib/ruby-openai.rb
index 01c7ba7..3072d22 100644
--- a/examples/contrib/ruby-openai.rb
+++ b/examples/contrib/ruby-openai.rb
@@ -7,7 +7,7 @@
 require "opentelemetry/sdk"
 
 # Usage:
-#   OPENAI_API_KEY=your-openai-key bundle exec appraisal ruby-openai examples/internal/contrib/ruby_openai/basic.rb
+#   OPENAI_API_KEY=your-openai-key bundle exec appraisal ruby-openai ruby examples/contrib/ruby-openai.rb
 
 # Check for API keys
 unless ENV["OPENAI_API_KEY"]
@@ -25,19 +25,28 @@
 # Create OpenAI client
 client = OpenAI::Client.new(access_token: ENV["OPENAI_API_KEY"])
 
-client.chat(
-  parameters: {
-    model: "gpt-4o-mini",
-    messages: [
-      {role: "system", content: "You are a helpful assistant."},
-      {role: "user", content: "Say hello and tell me a short joke."}
-    ],
-    max_tokens: 100
-  }
-)
+# Get a tracer and wrap the API call in a span
+tracer = OpenTelemetry.tracer_provider.tracer("ruby-openai-example")
+
+root_span = nil
+tracer.in_span("examples/contrib/ruby-openai.rb") do |span|
+  root_span = span
+
+  # Make a chat request (automatically traced!)
+  client.chat(
+    parameters: {
+      model: "gpt-4o-mini",
+      messages: [
+        {role: "system", content: "You are a helpful assistant."},
+        {role: "user", content: "Say hello and tell me a short joke."}
+      ],
+      max_tokens: 100
+    }
+  )
+end
 
 # Print permalink to view this trace in Braintrust
-puts "\n View this trace in Braintrust:"
+puts "\nView this trace in Braintrust:"
 puts "  #{Braintrust::Trace.permalink(root_span)}"
 
 # Shutdown to flush spans to Braintrust
diff --git a/examples/contrib/ruby_llm.rb b/examples/contrib/ruby_llm.rb
index 02acf0f..97078f0 100644
--- a/examples/contrib/ruby_llm.rb
+++ b/examples/contrib/ruby_llm.rb
@@ -26,11 +26,20 @@
   config.openai_api_key = ENV["OPENAI_API_KEY"]
 end
 
-chat = RubyLLM.chat(model: "gpt-4o-mini")
-chat.ask("What is the capital of France?")
+# Get a tracer and wrap the API call in a span
+tracer = OpenTelemetry.tracer_provider.tracer("ruby-llm-example")
+
+root_span = nil
+tracer.in_span("examples/contrib/ruby_llm.rb") do |span|
+  root_span = span
+
+  # Make a chat request (automatically traced!)
+  chat = RubyLLM.chat(model: "gpt-4o-mini")
+  chat.ask("What is the capital of France?")
+end
 
 # Print permalink to view this trace in Braintrust
-puts "\n View this trace in Braintrust:"
+puts "\nView this trace in Braintrust:"
 puts "  #{Braintrust::Trace.permalink(root_span)}"
 
 # Shutdown to flush spans to Braintrust