From 4b8e4abd86893bdb3fef84d41e495bad9720234e Mon Sep 17 00:00:00 2001 From: David Elner Date: Tue, 6 Jan 2026 17:09:54 -0500 Subject: [PATCH 1/4] Changed: Decomposed test_helper into separate modules --- test/support/assert_helper.rb | 152 ++++++++++++++ test/support/braintrust_helper.rb | 84 ++++++++ test/support/provider_helper.rb | 19 ++ test/support/tracing_helper.rb | 67 +++++++ test/test_helper.rb | 319 +----------------------------- 5 files changed, 330 insertions(+), 311 deletions(-) create mode 100644 test/support/assert_helper.rb create mode 100644 test/support/braintrust_helper.rb create mode 100644 test/support/provider_helper.rb create mode 100644 test/support/tracing_helper.rb diff --git a/test/support/assert_helper.rb b/test/support/assert_helper.rb new file mode 100644 index 0000000..8d1d725 --- /dev/null +++ b/test/support/assert_helper.rb @@ -0,0 +1,152 @@ +module Test + module Support + module AssertHelper + # Runs the test inside a fork, to isolate its side-effects from the main process. + # Similar in purpose to https://docs.ruby-lang.org/en/master/Ruby/Box.html#class-Ruby::Box + # + # Yields to the block for actual test code. + # @yield Block containing the test code + def assert_in_fork(fork_assertions: nil, timeout_seconds: 10, trigger_stacktrace_on_kill: false, debug: false) + fork_assertions ||= proc { |status:, stdout:, stderr:| + assert (status && status.success?), "STDOUT:`#{stdout}` STDERR:`#{stderr}" + } + + if debug + rv = assert_in_fork_debug(fork_assertions: fork_assertions) do + yield + end + return rv + end + + fork_stdout = Tempfile.new("braintrust-minitest-assert-in-fork-stdout") + fork_stderr = Tempfile.new("braintrust-minitest-assert-in-fork-stderr") + begin + # Start in fork + pid = fork do + # Capture forked output + $stdout.reopen(fork_stdout) + $stdout.sync = true + $stderr.reopen(fork_stderr) # STDERR captures failures. We print it in case the fork fails on exit. + $stderr.sync = true + + yield + end + + # Wait for fork to finish, retrieve its status. + # Enforce timeout to ensure test fork doesn't hang the test suite. + _, status = try_wait_until(seconds: timeout_seconds) { Process.wait2(pid, Process::WNOHANG) } + + stdout = File.read(fork_stdout.path) + stderr = File.read(fork_stderr.path) + + # Capture forked execution information + result = {status: status, stdout: stdout, stderr: stderr} + + # Check if fork and assertions have completed successfully + fork_assertions.call(**result) + + result + rescue => e + crash_note = nil + + if trigger_stacktrace_on_kill + crash_note = " (Crashing Ruby to get stacktrace as requested by `trigger_stacktrace_on_kill`)" + begin + Process.kill("SEGV", pid) + warn "Waiting for child process to exit after SEGV signal... #{crash_note}" + Process.wait(pid) + rescue + nil + end + end + + stdout = File.read(fork_stdout.path) + stderr = File.read(fork_stderr.path) + + raise "Failure or timeout in `assert_in_fork`#{crash_note}, STDOUT: `#{stdout}`, STDERR: `#{stderr}`", cause: e + ensure + begin + Process.kill("KILL", pid) + rescue + nil + end # Prevent zombie processes on failure + + fork_stderr.close + fork_stdout.close + fork_stdout.unlink + fork_stderr.unlink + end + end + + # Debug version of assert_in_fork that does not redirect I/O streams and + # has no timeout on execution. The idea is to use it for interactive + # debugging where you would set a break point in the fork. + def assert_in_fork_debug(fork_assertions:, timeout_seconds: 10, trigger_stacktrace_on_kill: false) + pid = fork do + yield + end + _, status = Process.wait2(pid) + fork_assertions.call(status: status, stdout: "", stderr: "") + end + + # Waits for the condition provided by the block argument to return truthy. + # + # Waits for 5 seconds by default. + # + # Can be configured by setting either: + # * `seconds`, or + # * `attempts` and `backoff` + # + # @yieldreturn [Boolean] block executed until it returns truthy + # @param [Numeric] seconds number of seconds to wait + # @param [Integer] attempts number of attempts at checking the condition + # @param [Numeric] backoff wait time between condition checking attempts + def try_wait_until(seconds: nil, attempts: nil, backoff: nil) + raise "Provider either `seconds` or `attempts` & `backoff`, not both" if seconds && (attempts || backoff) + + spec = if seconds + "#{seconds} seconds" + elsif attempts || backoff + "#{attempts} attempts with backoff: #{backoff}" + else + "none" + end + + if seconds + attempts = seconds * 10 + backoff = 0.1 + else + # 5 seconds by default, but respect the provide values if any. + attempts ||= 50 + backoff ||= 0.1 + end + + start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) + + # It's common for tests to want to run simple tasks in a background thread + # but call this method without the thread having even time to start. + # + # We add an extra attempt, interleaved by `Thread.pass`, in order to allow for + # those simple cases to quickly succeed without a timed `sleep` call. This will + # save simple test one `backoff` seconds sleep cycle. + # + # The total configured timeout is not reduced. + (attempts + 1).times do |i| + result = yield(attempts) + return result if result + + if i == 0 + Thread.pass + else + sleep(backoff) + end + end + + elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time + actual = "#{"%.2f" % elapsed} seconds, #{attempts} attempts with backoff #{backoff}" + + raise("Wait time exhausted! Requested: #{spec}, waited: #{actual}") + end + end + end +end diff --git a/test/support/braintrust_helper.rb b/test/support/braintrust_helper.rb new file mode 100644 index 0000000..e5e8002 --- /dev/null +++ b/test/support/braintrust_helper.rb @@ -0,0 +1,84 @@ +module Test + module Support + module BraintrustHelper + # ============================================================================= + # IMPORTANT: Magic Test API Key + # ============================================================================= + # + # When calling Braintrust.init in tests, use api_key: "test-api-key" to trigger + # fake authentication that avoids HTTP requests. This magic key is handled in + # lib/braintrust/api/internal/auth.rb and returns fake org info immediately. + # + # Without this magic key, Braintrust.init spawns a background login thread that + # can cause WebMock errors after tests complete (orphan thread race condition). + # + # Example: + # Braintrust.init(api_key: "test-api-key", set_global: false, enable_tracing: true) + # + # TODO: Future work - move this magic key handling out of production code and into + # test helpers instead. Options include: + # 1. A test-only initializer that provides org_id directly (skips login entirely) + # 2. Dependency injection for the Auth module in tests + # 3. Environment-based test mode detection + # + # See: lib/braintrust/api/internal/auth.rb for the magic key implementation + # ============================================================================= + # Get API key for tests + # Uses real key for recording, fake key for playback + # @return [String] API key + def get_braintrust_key + key = ENV["BRAINTRUST_API_KEY"] + # In forked PRs, secrets may be empty strings + key = nil if key&.empty? + key || "test-key-for-vcr" + end + + # Creates a test State for unit tests (no login, no API calls) + # Override any fields by passing options + # Note: Providing org_id skips the login thread automatically + # @return [Braintrust::State] + def get_unit_test_state(**options) + defaults = { + api_key: "test-key", + api_url: "https://api.example.com", + app_url: "https://app.example.com", + org_name: "test-org", + org_id: "test-org-id", + default_project: "test-project", + enable_tracing: false + } + + state = Braintrust::State.new(**defaults.merge(options)) + state.validate + state + end + + # Creates a State for integration tests (performs login via VCR) + # This performs login (via VCR cassettes in tests) without polluting global state + # Use this for tests that need to interact with the API (eval, experiments, datasets, etc.) + # @param options [Hash] Options to pass to Braintrust.init (set_global and blocking_login are fixed) + # @return [Braintrust::State] + def get_integration_test_state(**options) + # Provide fallback API key for VCR playback (empty in forked PRs) + options[:api_key] ||= get_braintrust_key + Braintrust.init(set_global: false, blocking_login: true, **options) + end + + # Helper to run eval internally without API calls for testing + def run_test_eval(experiment_id:, experiment_name:, project_id:, project_name:, + cases:, task:, scorers:, state:, parallelism: 1, tracer_provider: nil) + runner = Braintrust::Eval::Runner.new( + experiment_id: experiment_id, + experiment_name: experiment_name, + project_id: project_id, + project_name: project_name, + task: task, + scorers: scorers, + state: state, + tracer_provider: tracer_provider + ) + runner.run(cases, parallelism: parallelism) + end + end + end +end diff --git a/test/support/provider_helper.rb b/test/support/provider_helper.rb new file mode 100644 index 0000000..6a2780a --- /dev/null +++ b/test/support/provider_helper.rb @@ -0,0 +1,19 @@ +module Test + module Support + module ProviderHelper + # Get Anthropic API key for tests + # Uses real key for recording, fake key for playback + # @return [String] API key + def get_anthropic_key + ENV["ANTHROPIC_API_KEY"] || "sk-ant-test-key-for-vcr" + end + + # Get OpenAI API key for tests + # Uses real key for recording, fake key for playback + # @return [String] API key + def get_openai_key + ENV["OPENAI_API_KEY"] || "sk-test-key-for-vcr" + end + end + end +end diff --git a/test/support/tracing_helper.rb b/test/support/tracing_helper.rb new file mode 100644 index 0000000..3a0ace7 --- /dev/null +++ b/test/support/tracing_helper.rb @@ -0,0 +1,67 @@ +require_relative "braintrust_helper" + +module Test + module Support + module TracingHelper + def self.included(base) + base.include(BraintrustHelper) + end + + # Sets up OpenTelemetry with an in-memory exporter for testing + # Returns an OtelTestRig with tracer_provider, exporter, state, and drain() method + # The exporter can be passed to Braintrust::Trace.enable to replace OTLP exporter + # @param state_options [Hash] Options to pass to get_unit_test_state + # @return [OtelTestRig] + def setup_otel_test_rig(**state_options) + require "opentelemetry/sdk" + + exporter = OpenTelemetry::SDK::Trace::Export::InMemorySpanExporter.new + tracer_provider = OpenTelemetry::SDK::Trace::TracerProvider.new + state = get_unit_test_state(**state_options) + + # Add Braintrust span processor (wraps simple processor with memory exporter) + simple_processor = OpenTelemetry::SDK::Trace::Export::SimpleSpanProcessor.new(exporter) + braintrust_processor = Braintrust::Trace::SpanProcessor.new(simple_processor, state) + tracer_provider.add_span_processor(braintrust_processor) + + OtelTestRig.new(tracer_provider, exporter, state) + end + + # Wrapper for OpenTelemetry test setup + class OtelTestRig + attr_reader :tracer_provider, :exporter, :state + + def initialize(tracer_provider, exporter, state) + @tracer_provider = tracer_provider + @exporter = exporter + @state = state + end + + # Get a tracer from the provider + # @param name [String] tracer name (default: "test") + # @return [OpenTelemetry::Trace::Tracer] + def tracer(name = "test") + @tracer_provider.tracer(name) + end + + # Flush and drain all spans from the exporter + # @return [Array] + def drain + @tracer_provider.force_flush + spans = @exporter.finished_spans + @exporter.reset + spans + end + + # Flush and drain exactly one span from the exporter + # Asserts that exactly one span was flushed + # @return [OpenTelemetry::SDK::Trace::SpanData] + def drain_one + spans = drain + raise Minitest::Assertion, "Expected exactly 1 span, got #{spans.length}" unless spans.length == 1 + spans.first + end + end + end + end +end diff --git a/test/test_helper.rb b/test/test_helper.rb index 65a052e..a0276b2 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -62,320 +62,17 @@ WebMock.allow_net_connect! end -# Test helpers for OpenTelemetry tracing -module TracingTestHelper - # ============================================================================= - # IMPORTANT: Magic Test API Key - # ============================================================================= - # - # When calling Braintrust.init in tests, use api_key: "test-api-key" to trigger - # fake authentication that avoids HTTP requests. This magic key is handled in - # lib/braintrust/api/internal/auth.rb and returns fake org info immediately. - # - # Without this magic key, Braintrust.init spawns a background login thread that - # can cause WebMock errors after tests complete (orphan thread race condition). - # - # Example: - # Braintrust.init(api_key: "test-api-key", set_global: false, enable_tracing: true) - # - # TODO: Future work - move this magic key handling out of production code and into - # test helpers instead. Options include: - # 1. A test-only initializer that provides org_id directly (skips login entirely) - # 2. Dependency injection for the Auth module in tests - # 3. Environment-based test mode detection - # - # See: lib/braintrust/api/internal/auth.rb for the magic key implementation - # ============================================================================= - - # Wrapper for OpenTelemetry test setup - class OtelTestRig - attr_reader :tracer_provider, :exporter, :state - - def initialize(tracer_provider, exporter, state) - @tracer_provider = tracer_provider - @exporter = exporter - @state = state - end - - # Get a tracer from the provider - # @param name [String] tracer name (default: "test") - # @return [OpenTelemetry::Trace::Tracer] - def tracer(name = "test") - @tracer_provider.tracer(name) - end - - # Flush and drain all spans from the exporter - # @return [Array] - def drain - @tracer_provider.force_flush - spans = @exporter.finished_spans - @exporter.reset - spans - end - - # Flush and drain exactly one span from the exporter - # Asserts that exactly one span was flushed - # @return [OpenTelemetry::SDK::Trace::SpanData] - def drain_one - spans = drain - raise Minitest::Assertion, "Expected exactly 1 span, got #{spans.length}" unless spans.length == 1 - spans.first - end - end - - # Creates a test State for unit tests (no login, no API calls) - # Override any fields by passing options - # Note: Providing org_id skips the login thread automatically - # @return [Braintrust::State] - def get_unit_test_state(**options) - defaults = { - api_key: "test-key", - api_url: "https://api.example.com", - app_url: "https://app.example.com", - org_name: "test-org", - org_id: "test-org-id", - default_project: "test-project", - enable_tracing: false - } - - state = Braintrust::State.new(**defaults.merge(options)) - state.validate - state - end - - # Creates a State for integration tests (performs login via VCR) - # This performs login (via VCR cassettes in tests) without polluting global state - # Use this for tests that need to interact with the API (eval, experiments, datasets, etc.) - # @param options [Hash] Options to pass to Braintrust.init (set_global and blocking_login are fixed) - # @return [Braintrust::State] - def get_integration_test_state(**options) - # Provide fallback API key for VCR playback (empty in forked PRs) - options[:api_key] ||= get_braintrust_key - Braintrust.init(set_global: false, blocking_login: true, **options) - end - - # Sets up OpenTelemetry with an in-memory exporter for testing - # Returns an OtelTestRig with tracer_provider, exporter, state, and drain() method - # The exporter can be passed to Braintrust::Trace.enable to replace OTLP exporter - # @param state_options [Hash] Options to pass to get_unit_test_state - # @return [OtelTestRig] - def setup_otel_test_rig(**state_options) - require "opentelemetry/sdk" - - exporter = OpenTelemetry::SDK::Trace::Export::InMemorySpanExporter.new - tracer_provider = OpenTelemetry::SDK::Trace::TracerProvider.new - state = get_unit_test_state(**state_options) - - # Add Braintrust span processor (wraps simple processor with memory exporter) - simple_processor = OpenTelemetry::SDK::Trace::Export::SimpleSpanProcessor.new(exporter) - braintrust_processor = Braintrust::Trace::SpanProcessor.new(simple_processor, state) - tracer_provider.add_span_processor(braintrust_processor) - - OtelTestRig.new(tracer_provider, exporter, state) - end - - # Helper to run eval internally without API calls for testing - def run_test_eval(experiment_id:, experiment_name:, project_id:, project_name:, - cases:, task:, scorers:, state:, parallelism: 1, tracer_provider: nil) - runner = Braintrust::Eval::Runner.new( - experiment_id: experiment_id, - experiment_name: experiment_name, - project_id: project_id, - project_name: project_name, - task: task, - scorers: scorers, - state: state, - tracer_provider: tracer_provider - ) - runner.run(cases, parallelism: parallelism) - end - - # Generate unique name for parallel test runs - # Returns: "ruby-sdk-test--prefix-d3adb33f" (8 hex chars of entropy) - # @param prefix [String] optional prefix for the name - # @return [String] unique name safe for parallel execution - def unique_name(prefix = "") - require "securerandom" - entropy = SecureRandom.hex(4) # 8 hex chars - if prefix.empty? - "ruby-sdk-test--#{entropy}" - else - "ruby-sdk-test--#{prefix}-#{entropy}" - end - end - - # Get API key for tests - # Uses real key for recording, fake key for playback - # @return [String] API key - def get_braintrust_key - key = ENV["BRAINTRUST_API_KEY"] - # In forked PRs, secrets may be empty strings - key = nil if key&.empty? - key || "test-key-for-vcr" - end - - def get_openai_key - ENV["OPENAI_API_KEY"] || "sk-test-key-for-vcr" - end - - # Get Anthropic API key for tests - # Uses real key for recording, fake key for playback - # @return [String] API key - def get_anthropic_key - ENV["ANTHROPIC_API_KEY"] || "sk-ant-test-key-for-vcr" - end -end - -# Runs the test inside a fork, to isolate its side-effects from the main process. -# Similar in purpose to https://docs.ruby-lang.org/en/master/Ruby/Box.html#class-Ruby::Box -# -# Yields to the block for actual test code. -# @yield Block containing the test code -def assert_in_fork(fork_assertions: nil, timeout_seconds: 10, trigger_stacktrace_on_kill: false, debug: false) - fork_assertions ||= proc { |status:, stdout:, stderr:| - assert (status && status.success?), "STDOUT:`#{stdout}` STDERR:`#{stderr}" - } - - if debug - rv = assert_in_fork_debug(fork_assertions: fork_assertions) do - yield - end - return rv - end - - fork_stdout = Tempfile.new("braintrust-minitest-assert-in-fork-stdout") - fork_stderr = Tempfile.new("braintrust-minitest-assert-in-fork-stderr") - begin - # Start in fork - pid = fork do - # Capture forked output - $stdout.reopen(fork_stdout) - $stdout.sync = true - $stderr.reopen(fork_stderr) # STDERR captures failures. We print it in case the fork fails on exit. - $stderr.sync = true - - yield - end - - # Wait for fork to finish, retrieve its status. - # Enforce timeout to ensure test fork doesn't hang the test suite. - _, status = try_wait_until(seconds: timeout_seconds) { Process.wait2(pid, Process::WNOHANG) } - - stdout = File.read(fork_stdout.path) - stderr = File.read(fork_stderr.path) - - # Capture forked execution information - result = {status: status, stdout: stdout, stderr: stderr} - - # Check if fork and assertions have completed successfully - fork_assertions.call(**result) - - result - rescue => e - crash_note = nil - - if trigger_stacktrace_on_kill - crash_note = " (Crashing Ruby to get stacktrace as requested by `trigger_stacktrace_on_kill`)" - begin - Process.kill("SEGV", pid) - warn "Waiting for child process to exit after SEGV signal... #{crash_note}" - Process.wait(pid) - rescue - nil - end - end - - stdout = File.read(fork_stdout.path) - stderr = File.read(fork_stderr.path) - - raise "Failure or timeout in `assert_in_fork`#{crash_note}, STDOUT: `#{stdout}`, STDERR: `#{stderr}`", cause: e - ensure - begin - Process.kill("KILL", pid) - rescue - nil - end # Prevent zombie processes on failure - - fork_stderr.close - fork_stdout.close - fork_stdout.unlink - fork_stderr.unlink - end -end - -# Debug version of assert_in_fork that does not redirect I/O streams and -# has no timeout on execution. The idea is to use it for interactive -# debugging where you would set a break point in the fork. -def assert_in_fork_debug(fork_assertions:, timeout_seconds: 10, trigger_stacktrace_on_kill: false) - pid = fork do - yield - end - _, status = Process.wait2(pid) - fork_assertions.call(status: status, stdout: "", stderr: "") -end - -# Waits for the condition provided by the block argument to return truthy. -# -# Waits for 5 seconds by default. -# -# Can be configured by setting either: -# * `seconds`, or -# * `attempts` and `backoff` -# -# @yieldreturn [Boolean] block executed until it returns truthy -# @param [Numeric] seconds number of seconds to wait -# @param [Integer] attempts number of attempts at checking the condition -# @param [Numeric] backoff wait time between condition checking attempts -def try_wait_until(seconds: nil, attempts: nil, backoff: nil) - raise "Provider either `seconds` or `attempts` & `backoff`, not both" if seconds && (attempts || backoff) - - spec = if seconds - "#{seconds} seconds" - elsif attempts || backoff - "#{attempts} attempts with backoff: #{backoff}" - else - "none" - end - - if seconds - attempts = seconds * 10 - backoff = 0.1 - else - # 5 seconds by default, but respect the provide values if any. - attempts ||= 50 - backoff ||= 0.1 - end - - start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) - - # It's common for tests to want to run simple tasks in a background thread - # but call this method without the thread having even time to start. - # - # We add an extra attempt, interleaved by `Thread.pass`, in order to allow for - # those simple cases to quickly succeed without a timed `sleep` call. This will - # save simple test one `backoff` seconds sleep cycle. - # - # The total configured timeout is not reduced. - (attempts + 1).times do |i| - result = yield(attempts) - return result if result - - if i == 0 - Thread.pass - else - sleep(backoff) - end - end - - elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time - actual = "#{"%.2f" % elapsed} seconds, #{attempts} attempts with backoff #{backoff}" - - raise("Wait time exhausted! Requested: #{spec}, waited: #{actual}") -end +require_relative "support/assert_helper" +require_relative "support/braintrust_helper" +require_relative "support/provider_helper" +require_relative "support/tracing_helper" # Include helper in all test cases class Minitest::Test - include TracingTestHelper + include ::Test::Support::AssertHelper + include ::Test::Support::BraintrustHelper + include ::Test::Support::ProviderHelper + include ::Test::Support::TracingHelper # Use Minitest hooks to clear global state after every test # This ensures cleanup happens even if individual tests don't have teardown methods From ac1e54570773c8899ba7f413dc2917360923e577 Mon Sep 17 00:00:00 2001 From: David Elner Date: Tue, 6 Jan 2026 17:11:46 -0500 Subject: [PATCH 2/4] Added: with_tmp_file and with_png_file test helpers --- test/support/fixture_helper.rb | 49 ++++++++++++++++++++++++++++++++++ test/test_helper.rb | 2 ++ 2 files changed, 51 insertions(+) create mode 100644 test/support/fixture_helper.rb diff --git a/test/support/fixture_helper.rb b/test/support/fixture_helper.rb new file mode 100644 index 0000000..26e6dcb --- /dev/null +++ b/test/support/fixture_helper.rb @@ -0,0 +1,49 @@ +module Test + module Support + module FixtureHelper + # Minimal valid PNG image (10x10) + PNG_DATA = [ + 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00, 0x00, 0x0d, + 0x49, 0x48, 0x44, 0x52, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, + 0x08, 0x02, 0x00, 0x00, 0x00, 0x02, 0x50, 0x58, 0xea, 0x00, 0x00, 0x00, + 0x12, 0x49, 0x44, 0x41, 0x54, 0x78, 0xda, 0x63, 0xf8, 0xcf, 0xc0, 0x80, + 0x07, 0x31, 0x8c, 0x4a, 0x63, 0x43, 0x00, 0xb7, 0xca, 0x63, 0x9d, 0xd6, + 0xd5, 0xef, 0x74, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, 0x44, 0xae, + 0x42, 0x60, 0x82 + ].pack("C*").freeze + + # Create a temporary PNG file and yield to the block + # Uses a minimal valid 10x10 PNG image by default + # @param data [String] PNG data (default: PNG_DATA constant) + # @param filename [String] prefix for the temp file name + # @param extension [String] extension for the temp file name + # @yield [Tempfile] the temporary PNG file + def with_png_file(data: PNG_DATA, filename: "test_image", extension: ".png", &block) + with_tmp_file(data: data, filename: filename, extension: extension, binary: true, &block) + end + + # Create a temporary file and yield to the block + # File is automatically cleaned up after the block + # @param data [String] content to write to the file (default: empty) + # @param filename [String] prefix for the temp file name + # @param extension [String] extension for the temp file name + # @param binary [Boolean] whether to write in binary mode + # @yield [Tempfile] the temporary file + def with_tmp_file(data: "", filename: "test", extension: ".txt", binary: false) + return unless block_given? + + require "tempfile" + tmpfile = Tempfile.new([filename, extension]) + tmpfile.binmode if binary + tmpfile.write(data) + tmpfile.close + + begin + yield(tmpfile) + ensure + tmpfile.unlink + end + end + end + end +end diff --git a/test/test_helper.rb b/test/test_helper.rb index a0276b2..ee2c7f2 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -64,6 +64,7 @@ require_relative "support/assert_helper" require_relative "support/braintrust_helper" +require_relative "support/fixture_helper" require_relative "support/provider_helper" require_relative "support/tracing_helper" @@ -71,6 +72,7 @@ class Minitest::Test include ::Test::Support::AssertHelper include ::Test::Support::BraintrustHelper + include ::Test::Support::FixtureHelper include ::Test::Support::ProviderHelper include ::Test::Support::TracingHelper From a9033e03ab8cae32c81df0be49776637137436bc Mon Sep 17 00:00:00 2001 From: Matt Perpick Date: Tue, 6 Jan 2026 12:16:50 -0500 Subject: [PATCH 3/4] Fixed: RubyLLM not handling attachments correctly --- examples/internal/ruby_llm.rb | 47 +++++++ .../contrib/github.com/crmne/ruby_llm.rb | 97 +++++++++++-- test/braintrust/trace/ruby_llm_test.rb | 131 ++++++++++++++++++ 3 files changed, 262 insertions(+), 13 deletions(-) diff --git a/examples/internal/ruby_llm.rb b/examples/internal/ruby_llm.rb index 9c74462..523e983 100644 --- a/examples/internal/ruby_llm.rb +++ b/examples/internal/ruby_llm.rb @@ -236,6 +236,53 @@ def execute(operation:, a:, b:) puts "✓ Gracefully caught error: #{e.class.name}" puts " Message: #{e.message}" end + + # Feature 8: Image Attachments (Issue #71 fix) + # This demonstrates proper handling of RubyLLM Content objects with attachments + puts "\n" + "=" * 80 + puts "Feature 8: Image Attachments" + puts "=" * 80 + + tracer.in_span("feature_image_attachments") do + require "tempfile" + + # Create a minimal valid PNG image (10x10 red square) + png_data = [ + 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00, 0x00, 0x0d, + 0x49, 0x48, 0x44, 0x52, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, + 0x08, 0x02, 0x00, 0x00, 0x00, 0x02, 0x50, 0x58, 0xea, 0x00, 0x00, 0x00, + 0x12, 0x49, 0x44, 0x41, 0x54, 0x78, 0xda, 0x63, 0xf8, 0xcf, 0xc0, 0x80, + 0x07, 0x31, 0x8c, 0x4a, 0x63, 0x43, 0x00, 0xb7, 0xca, 0x63, 0x9d, 0xd6, + 0xd5, 0xef, 0x74, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, 0x44, 0xae, + 0x42, 0x60, 0x82 + ].pack("C*") + + # Create a temp PNG file + tmpfile = Tempfile.new(["test_image", ".png"]) + tmpfile.binmode + tmpfile.write(png_data) + tmpfile.close + + begin + puts "\n[OpenAI - gpt-4o-mini with Image Attachment]" + chat_openai = RubyLLM.chat(model: "gpt-4o-mini") + + # Use RubyLLM's Content class with attachment + # This triggers the Content object behavior (issue #71) + content = RubyLLM::Content.new("What color is this image? Reply in one word.") + content.add_attachment(tmpfile.path) + + chat_openai.add_message(role: :user, content: content) + response = chat_openai.complete + + puts "Q: What color is this image? (with PNG attachment)" + puts "A: #{response.content}" + puts "Tokens: #{response.to_h[:input_tokens]} in, #{response.to_h[:output_tokens]} out" + puts "Note: The trace includes the base64-encoded image attachment" + ensure + tmpfile.unlink + end + end end puts "\n" + "=" * 80 diff --git a/lib/braintrust/trace/contrib/github.com/crmne/ruby_llm.rb b/lib/braintrust/trace/contrib/github.com/crmne/ruby_llm.rb index bf86cc4..4d3db6e 100644 --- a/lib/braintrust/trace/contrib/github.com/crmne/ruby_llm.rb +++ b/lib/braintrust/trace/contrib/github.com/crmne/ruby_llm.rb @@ -4,6 +4,7 @@ require "json" require_relative "../../../tokens" require_relative "../../../../logger" +require_relative "../../../../internal/encoding" module Braintrust module Trace @@ -422,18 +423,14 @@ def self.format_message_for_input(msg) # Handle content if msg.respond_to?(:content) && msg.content - # Convert Ruby hash notation to JSON string for tool results - content = msg.content - if msg.role.to_s == "tool" && content.is_a?(String) && content.start_with?("{:") - # Ruby hash string like "{:location=>...}" - try to parse and re-serialize as JSON - begin - # Simple conversion: replace Ruby hash syntax with JSON - content = content.gsub(/(?<=\{|, ):(\w+)=>/, '"\1":').gsub("=>", ":") - rescue - # Keep original if conversion fails - end + raw_content = msg.content + + # Check if content is a Content object with attachments (issue #71) + formatted["content"] = if raw_content.respond_to?(:text) && raw_content.respond_to?(:attachments) && raw_content.attachments&.any? + format_multipart_content(raw_content) + else + format_simple_content(raw_content, msg.role.to_s) end - formatted["content"] = content end # Handle tool_calls for assistant messages @@ -450,6 +447,74 @@ def self.format_message_for_input(msg) formatted end + # Format multipart content with text and attachments + # @param content_obj [Object] Content object with text and attachments + # @return [Array] array of content parts + def self.format_multipart_content(content_obj) + content_parts = [] + + # Add text part + content_parts << {"type" => "text", "text" => content_obj.text} if content_obj.text + + # Add attachment parts (convert to Braintrust format) + content_obj.attachments.each do |attachment| + content_parts << format_attachment_for_input(attachment) + end + + content_parts + end + + # Format simple text content + # @param raw_content [Object] String or Content object with text + # @param role [String] the message role + # @return [String] formatted text content + def self.format_simple_content(raw_content, role) + content = raw_content + content = content.text if content.respond_to?(:text) + + # Convert Ruby hash string to JSON for tool results + if role == "tool" && content.is_a?(String) && content.start_with?("{:") + begin + content = content.gsub(/(?<=\{|, ):(\w+)=>/, '"\1":').gsub("=>", ":") + rescue + # Keep original if conversion fails + end + end + + content + end + + # Format a RubyLLM attachment to OpenAI-compatible format + # @param attachment [Object] the RubyLLM attachment + # @return [Hash] OpenAI image_url format for consistency with other integrations + def self.format_attachment_for_input(attachment) + # RubyLLM Attachment has: source (Pathname), filename, mime_type + if attachment.respond_to?(:source) && attachment.source + begin + data = File.binread(attachment.source.to_s) + encoded = Internal::Encoding::Base64.strict_encode64(data) + mime_type = attachment.respond_to?(:mime_type) ? attachment.mime_type : "application/octet-stream" + + # Use OpenAI's image_url format for consistency + { + "type" => "image_url", + "image_url" => { + "url" => "data:#{mime_type};base64,#{encoded}" + } + } + rescue => e + Log.debug("Failed to read attachment file: #{e.message}") + # Return a placeholder if we can't read the file + {"type" => "text", "text" => "[attachment: #{attachment.respond_to?(:filename) ? attachment.filename : "unknown"}]"} + end + elsif attachment.respond_to?(:to_h) + # Try to use attachment's own serialization + attachment.to_h + else + {"type" => "text", "text" => "[attachment]"} + end + end + # Capture streaming output and metrics # @param span [OpenTelemetry::Trace::Span] the span # @param aggregated_chunks [Array] the aggregated chunks @@ -458,8 +523,11 @@ def self.capture_streaming_output(span, aggregated_chunks, result) return if aggregated_chunks.empty? # Aggregate content from chunks + # Extract text from Content objects if present (issue #71) aggregated_content = aggregated_chunks.map { |c| - c.respond_to?(:content) ? c.content : c.to_s + content = c.respond_to?(:content) ? c.content : c.to_s + content = content.text if content.respond_to?(:text) + content }.join output = [{ @@ -490,8 +558,11 @@ def self.capture_non_streaming_output(span, chat, response, messages_before_coun } # Add content if it's a simple text response + # Extract text from Content objects if present (issue #71) if response.respond_to?(:content) && response.content && !response.content.empty? - message["content"] = response.content + content = response.content + content = content.text if content.respond_to?(:text) + message["content"] = content end # Check if there are tool calls in the messages history diff --git a/test/braintrust/trace/ruby_llm_test.rb b/test/braintrust/trace/ruby_llm_test.rb index faa2ad2..f5dc38a 100644 --- a/test/braintrust/trace/ruby_llm_test.rb +++ b/test/braintrust/trace/ruby_llm_test.rb @@ -461,4 +461,135 @@ def mock_tool.respond_to?(method) assert_equal({}, params["properties"]) assert_equal [], params["required"] end + + # Test for GitHub issue #71: Content object not properly serialized + # When a message has attachments, RubyLLM returns a Content object instead of a string + # The SDK should include both text and attachments in the trace + def test_format_message_for_input_handles_content_object_with_attachments + with_tmp_file(data: "This is test content") do |tmpfile| + # Create a Content object with an attachment (this triggers the Content object return) + content = ::RubyLLM::Content.new("Hello, this is the actual text content") + content.add_attachment(tmpfile.path) + + # Create a message with the Content object (simulates message with attachment) + msg = ::RubyLLM::Message.new(role: :user, content: content) + + # Verify the precondition: msg.content returns a Content object, not a string + assert msg.content.is_a?(::RubyLLM::Content), + "Precondition failed: Expected Content object when message has attachments" + + # Call the method under test + result = Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.format_message_for_input(msg) + + # When attachments are present, content should be a multipart array + assert_equal "user", result["role"] + assert result["content"].is_a?(Array), "Content should be an array when attachments are present" + assert_equal 2, result["content"].length, "Content should have text and attachment parts" + + # Verify text part + text_part = result["content"].find { |p| p["type"] == "text" } + refute_nil text_part, "Should have a text part" + assert_equal "Hello, this is the actual text content", text_part["text"] + + # Verify attachment part (OpenAI image_url format) + attachment_part = result["content"].find { |p| p["type"] == "image_url" } + refute_nil attachment_part, "Should have an attachment part" + assert attachment_part["image_url"]["url"].start_with?("data:text/plain;base64,"), + "Attachment should be a data URL with base64 encoded content" + + # Verify no object references + refute result.to_s.include?("RubyLLM::Content"), + "Result should not contain Content object reference" + end + end + + # Test for GitHub issue #71: Content object with image attachment + # Verifies attachments are properly included in traces (similar to braintrust-go-sdk) + def test_format_message_for_input_handles_image_attachment + with_png_file do |tmpfile| + # Create a Content object with an image attachment + content = ::RubyLLM::Content.new("What's in this image?") + content.add_attachment(tmpfile.path) + + # Create a message with the Content object + msg = ::RubyLLM::Message.new(role: :user, content: content) + + # Verify the precondition: msg.content returns a Content object + assert msg.content.is_a?(::RubyLLM::Content), + "Precondition failed: Expected Content object when message has image attachment" + assert_equal 1, msg.content.attachments.count, + "Expected one attachment" + + # Call the method under test + result = Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.format_message_for_input(msg) + + # Verify multipart content array is returned + assert_equal "user", result["role"] + assert result["content"].is_a?(Array), "Content should be an array when attachments are present" + assert_equal 2, result["content"].length, "Content should have text and attachment parts" + + # Verify text part + text_part = result["content"].find { |p| p["type"] == "text" } + refute_nil text_part, "Should have a text part" + assert_equal "What's in this image?", text_part["text"] + + # Verify attachment part (OpenAI image_url format) + attachment_part = result["content"].find { |p| p["type"] == "image_url" } + refute_nil attachment_part, "Should have an attachment part" + assert attachment_part["image_url"]["url"].start_with?("data:image/png;base64,"), + "Attachment should be a data URL with base64 encoded PNG" + + # Verify no object references in the result + refute result.to_s.include?("RubyLLM::Content"), + "Result should not contain Content object reference" + refute result.to_s.include?("RubyLLM::Attachment"), + "Result should not contain Attachment object reference" + end + end + + # Test for GitHub issue #71: build_input_messages works with Content objects + # Verifies the full flow works end-to-end with attachments + def test_build_input_messages_handles_content_objects_with_attachments + with_png_file do |tmpfile| + # Configure RubyLLM + ::RubyLLM.configure do |config| + config.openai_api_key = "test-key" + end + + # Create a chat instance with a message containing an attachment + chat = ::RubyLLM.chat(model: "gpt-4o-mini") + + # Add message with image attachment using RubyLLM's API + content = ::RubyLLM::Content.new("Describe this image") + content.add_attachment(tmpfile.path) + chat.add_message(role: :user, content: content) + + # Call the method under test + result = Braintrust::Trace::Contrib::Github::Crmne::RubyLLM.build_input_messages(chat, nil) + + # Verify the result + assert_equal 1, result.length, "Expected one message" + assert_equal "user", result[0]["role"] + + # Content should be an array with text and attachment parts + content_parts = result[0]["content"] + assert content_parts.is_a?(Array), "Content should be an array when attachments are present" + assert_equal 2, content_parts.length, "Content should have text and attachment parts" + + # Verify text part + text_part = content_parts.find { |p| p["type"] == "text" } + refute_nil text_part, "Should have a text part" + assert_equal "Describe this image", text_part["text"] + + # Verify attachment part (OpenAI image_url format) + attachment_part = content_parts.find { |p| p["type"] == "image_url" } + refute_nil attachment_part, "Should have an attachment part" + assert attachment_part["image_url"]["url"].start_with?("data:image/png;base64,"), + "Attachment should be a data URL with base64 encoded PNG" + + # Verify no object references + refute result.to_s.include?("RubyLLM::Content"), + "Result should not contain Content object reference" + end + end end From b13d4caa985627260c98de386844f1b5125be811 Mon Sep 17 00:00:00 2001 From: Matt Perpick Date: Tue, 6 Jan 2026 12:17:00 -0500 Subject: [PATCH 4/4] Added: Claude skills --- .claude/skills/fix-integration-bug/SKILL.md | 92 +++++++++++++++++++++ CLAUDE.md | 51 ++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 .claude/skills/fix-integration-bug/SKILL.md create mode 100644 CLAUDE.md diff --git a/.claude/skills/fix-integration-bug/SKILL.md b/.claude/skills/fix-integration-bug/SKILL.md new file mode 100644 index 0000000..e75d6a7 --- /dev/null +++ b/.claude/skills/fix-integration-bug/SKILL.md @@ -0,0 +1,92 @@ +--- +name: fix-integration-bug +description: Workflow for fixing bugs in Ruby SDK integrations. Covers reproducing the bug, using appraisals, adding test cases, and TDD-based fixes. +--- + +# Fixing Integration Bugs + +**This skill is for fixing bugs in existing integrations.** Follow this workflow to reproduce, test, and fix integration issues. + +## 1. Reproduce the Bug + +First, understand and reproduce the issue: + +```bash +# Run with console logging to see actual trace output +BRAINTRUST_ENABLE_TRACE_CONSOLE_LOG=true bundle exec appraisal provider ruby examples/provider.rb + +# Or create a minimal reproduction script +BRAINTRUST_ENABLE_TRACE_CONSOLE_LOG=true bundle exec appraisal provider ruby -e ' + require "braintrust" + # minimal reproduction code +' +``` + +## 2. Appraisal Commands + +Test against specific gem versions: + +```bash +# Install dependencies for all appraisals +bundle exec appraisal install + +# Run tests for specific appraisal +bundle exec appraisal provider rake test + +# Run single test file +bundle exec appraisal provider rake test TEST=test/braintrust/trace/provider_test.rb + +# Run with specific seed (useful for reproducing flaky test failures from CI) +bundle exec appraisal provider rake test[12345] + +# Run all appraisals +bundle exec appraisal rake test + +# Re-record VCR cassettes +VCR_MODE=all bundle exec appraisal provider rake test +``` + +## 3. Add Failing Test Case + +Write a test that reproduces the bug: + +```ruby +def test_bug_description + # Arrange: Set up the scenario that triggers the bug + + # Act: Call the method + + # Assert: Verify expected behavior (this should FAIL initially) +end +``` + +## 4. Add Example Case (if applicable) + +Add a case to the internal example that exercises the buggy code path: + +- **Location**: `examples/internal/provider.rb` +- **Purpose**: Demonstrates the fix works end-to-end +- Follow existing example patterns (nest under root span, print output) + +## 5. TDD Fix Cycle + +1. Run failing test: `bundle exec appraisal provider rake test` +2. Implement minimal fix in `lib/braintrust/trace/contrib/provider.rb` +3. Run tests again (should pass) +4. Lint: `bundle exec rake lint:fix` +5. Run all appraisals: `bundle exec appraisal rake test` + +## 6. Verify with MCP + +Query traces to confirm the fix: + +```ruby +mcp__braintrust__btql_query(query: "SELECT input, output, metrics FROM project_logs LIMIT 5") +``` + +## Reference Files + +- Integrations: `lib/braintrust/trace/contrib/{openai,anthropic,ruby_llm}.rb` +- Tests: `test/braintrust/trace/{openai,anthropic,ruby_llm}_test.rb` +- Examples: `examples/internal/{openai,anthropic,ruby_llm}.rb` +- VCR cassettes: `test/fixtures/vcr_cassettes/provider/` diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..703583b --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,51 @@ +# Braintrust Ruby SDK + +Ruby SDK for Braintrust observability (tracing, evals, logging). + +## Code Structure + +- `lib/braintrust/` - Main SDK code + - `trace.rb`, `trace/` - Tracing/spans for LLM calls + - `eval.rb`, `eval/` - Evaluation framework + - `api.rb`, `api/` - API client + - `state.rb` - Global state management +- `test/` - Tests mirror lib/ structure +- `examples/` - Usage examples + +## Commands + +```bash +rake # Run lint + all appraisal tests (CI) +rake test # Run tests +rake lint # StandardRB linter +rake lint:fix # Auto-fix lint issues +rake -T # List all tasks +``` + +## TDD + +Reproduce the issue in a failing test before fixing it. + +## Testing + +**Prefer real code over mocks.** Use VCR to record/replay HTTP interactions. + +```bash +rake test # Run with VCR cassettes +VCR_MODE=all rake test # Re-record all cassettes +VCR_MODE=new_episodes rake test # Record new, keep existing +``` + +### Appraisals (Optional Dependencies) + +The SDK integrates with optional gems (openai, anthropic, ruby_llm). Tests run against multiple versions: + +```bash +bundle exec appraisal list # Show scenarios +bundle exec appraisal openai rake test # Test with openai gem +bundle exec appraisal openai-uninstalled rake test # Test without +``` + +## Linting + +Uses StandardRB. Run `rake lint:fix` before committing.