diff --git a/Rakefile b/Rakefile
index 5aa4d62..dde2310 100644
--- a/Rakefile
+++ b/Rakefile
@@ -2,11 +2,10 @@
 
 require "rake/testtask"
 
-Rake::TestTask.new(:test) do |t|
-  t.libs << "test"
-  t.libs << "lib"
-  t.test_files = FileList["test/**/*_test.rb"]
-  t.warning = false
+desc "Run tests (optionally with seed: rake test[12345])"
+task :test, [:seed] do |t, args|
+  seed_opt = args[:seed] ? " -- --seed=#{args[:seed]}" : ""
+  sh "ruby -Ilib:test -e \"Dir.glob('test/**/*_test.rb').each { |f| require_relative f }\"#{seed_opt}"
 end
 
 desc "Run Standard linter"
diff --git a/test/braintrust/state_login_test.rb b/test/braintrust/state_login_test.rb
index 30ce6e6..f5a83e0 100644
--- a/test/braintrust/state_login_test.rb
+++ b/test/braintrust/state_login_test.rb
@@ -14,7 +14,8 @@ def test_login_fetches_org_info
     VCR.use_cassette("auth/login_success") do
       state = Braintrust::State.new(
         api_key: @api_key,
-        app_url: "https://www.braintrust.dev"
+        app_url: "https://www.braintrust.dev",
+        blocking_login: true
       )
 
       state.login
@@ -28,13 +29,12 @@ def test_login_fetches_org_info
 
   def test_login_with_invalid_api_key
     VCR.use_cassette("auth/login_invalid_key") do
-      state = Braintrust::State.new(
-        api_key: "invalid-key",
-        app_url: "https://www.braintrust.dev"
-      )
-
       error = assert_raises(Braintrust::Error) do
-        state.login
+        Braintrust::State.new(
+          api_key: "invalid-key",
+          app_url: "https://www.braintrust.dev",
+          blocking_login: true
+        )
       end
 
       assert_match(/invalid api key/i, error.message)
@@ -42,107 +42,77 @@ def test_login_with_invalid_api_key
   end
 
   def test_login_in_thread_retries_on_failure
-    # IMPORTANT: Disable VCR and set up stubs BEFORE creating State, because
-    # State.new immediately spawns a background login thread when no org_id
-    # is provided. If stubs aren't ready, the thread hits WebMock errors.
-    VCR.turn_off!
-    begin
-      # Stub HTTP to fail twice, then succeed
-      # This tests the real Auth.login code path and retry logic
-      stub = stub_request(:post, "https://www.braintrust.dev/api/apikey/login")
-        .to_return(
-          {status: 500, body: "Internal Server Error"},
-          {status: 500, body: "Internal Server Error"},
-          {
-            status: 200,
-            body: JSON.generate({
-              org_info: [{
-                id: "test-org-id",
-                name: "test-org",
-                api_url: "https://api.braintrust.dev",
-                proxy_url: "https://api.braintrust.dev"
-              }]
-            }),
-            headers: {"Content-Type" => "application/json"}
-          }
-        )
-
-      begin
-        # Now create State - this spawns the login thread with stubs already in place
+    assert_in_fork do
+      # The cassette returns 500 twice, then 200 on the third attempt.
+      # VCR plays back interactions in order, enabling sequential response testing.
+      VCR.use_cassette("auth/login_retry") do
         state = Braintrust::State.new(
           api_key: @api_key,
           app_url: "https://www.braintrust.dev",
           enable_tracing: false
         )
 
-        # Wait for it to complete (should retry and eventually succeed)
         state.wait_for_login(5)
 
-        # Should have retried and succeeded
         assert state.logged_in, "State should be logged in after wait_for_login"
         assert_equal "test-org-id", state.org_id
         assert_equal "test-org", state.org_name
-
-        # Verify we made at least 3 requests (2 failures + 1 success)
-        assert_requested stub, at_least_times: 3
-      ensure
-        # Clean up the stub to prevent interference with other tests
-        remove_request_stub(stub)
       end
-    ensure
-      # Re-enable VCR for other tests
-      VCR.turn_on!
     end
   end
 
   def test_login_in_thread_returns_early_if_already_logged_in
-    VCR.use_cassette("auth/login_idempotent") do
-      # Create state with blocking_login to get logged-in state
-      state = Braintrust::State.new(
-        api_key: @api_key,
-        app_url: "https://www.braintrust.dev",
-        blocking_login: true,
-        enable_tracing: false
-      )
+    assert_in_fork do
+      VCR.use_cassette("auth/login_idempotent") do
+        # Create state with blocking_login to get logged-in state
+        state = Braintrust::State.new(
+          api_key: @api_key,
+          app_url: "https://www.braintrust.dev",
+          blocking_login: true,
+          enable_tracing: false
+        )
 
-      assert state.logged_in
+        assert state.logged_in
 
-      # Track if Auth.login is called again
-      called = false
-      original_login = Braintrust::API::Internal::Auth.method(:login)
-      Braintrust::API::Internal::Auth.define_singleton_method(:login) do |**args|
-        called = true
-        original_login.call(**args)
-      end
+        # Track if Auth.login is called again
+        called = false
+        original_login = Braintrust::API::Internal::Auth.method(:login)
+        Braintrust::API::Internal::Auth.define_singleton_method(:login) do |**args|
+          called = true
+          original_login.call(**args)
+        end
 
-      # Call login_in_thread - should return early without spawning thread
-      state.login_in_thread
-      state.wait_for_login(5)
+        # Call login_in_thread - should return early without spawning thread
+        state.login_in_thread
+        state.wait_for_login(5)
 
-      # Should not have called Auth.login again
-      refute called, "Should not call Auth.login if already logged in"
-    ensure
-      Braintrust::API::Internal::Auth.define_singleton_method(:login, original_login)
+        # Should not have called Auth.login again
+        refute called, "Should not call Auth.login if already logged in"
+      ensure
+        Braintrust::API::Internal::Auth.define_singleton_method(:login, original_login)
+      end
     end
   end
 
   def test_login_in_thread_is_thread_safe
-    VCR.use_cassette("auth/login_thread_safe") do
-      state = Braintrust::State.new(
-        api_key: @api_key,
-        app_url: "https://www.braintrust.dev"
-      )
+    assert_in_fork do
+      VCR.use_cassette("auth/login_thread_safe") do
+        state = Braintrust::State.new(
+          api_key: @api_key,
+          app_url: "https://www.braintrust.dev"
+        )
 
-      # Start multiple concurrent login_in_thread calls
-      # Each call spawns an internal thread, but only one login should succeed
-      5.times { state.login_in_thread }
+        # Start multiple concurrent login_in_thread calls
+        # Each call spawns an internal thread, but only one login should succeed
+        5.times { state.login_in_thread }
 
-      # Wait for login to complete
-      state.wait_for_login(5)
+        # Wait for login to complete
+        state.wait_for_login(5)
 
-      # Should be logged in exactly once (not multiple times)
-      assert state.logged_in
-      refute_nil state.org_id
+        # Should be logged in exactly once (not multiple times)
+        assert state.logged_in
+        refute_nil state.org_id
+      end
     end
   end
 end
diff --git a/test/fixtures/vcr_cassettes/auth/login_retry.yml b/test/fixtures/vcr_cassettes/auth/login_retry.yml
new file mode 100644
index 0000000..a00372b
--- /dev/null
+++ b/test/fixtures/vcr_cassettes/auth/login_retry.yml
@@ -0,0 +1,167 @@
+---
+# This cassette is manually crafted to test retry logic.
+# It returns 500 twice, then 200 on the third attempt.
+# VCR plays back interactions in order, enabling sequential response testing.
+http_interactions:
+- request:
+    method: post
+    uri: https://www.braintrust.dev/api/apikey/login
+    body:
+      encoding: UTF-8
+      string: ''
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+      Host:
+      - www.braintrust.dev
+      Authorization:
+      - Bearer <BRAINTRUST_API_KEY>
+  response:
+    status:
+      code: 500
+      message: Internal Server Error
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Allow-Headers:
+      - X-CSRF-Token, X-Requested-With, Accept, Accept-Version, Content-Length, Content-MD5,
+        Content-Type, Date, X-Api-Version
+      Access-Control-Allow-Methods:
+      - GET,OPTIONS,PATCH,DELETE,POST,PUT
+      Access-Control-Allow-Origin:
+      - "*"
+      Cache-Control:
+      - public, max-age=0, must-revalidate
+      Content-Type:
+      - text/plain; charset=utf-8
+      Date:
+      - Fri, 24 Oct 2025 14:56:01 GMT
+      Server:
+      - Vercel
+      Strict-Transport-Security:
+      - max-age=63072000
+      X-Content-Type-Options:
+      - nosniff
+      X-Frame-Options:
+      - SAMEORIGIN
+      X-Matched-Path:
+      - "/api/apikey/login"
+      X-Vercel-Id:
+      - iad1::iad1::retry-test-1
+    body:
+      encoding: UTF-8
+      string: 'Internal Server Error'
+  recorded_at: Fri, 24 Oct 2025 14:56:01 GMT
+- request:
+    method: post
+    uri: https://www.braintrust.dev/api/apikey/login
+    body:
+      encoding: UTF-8
+      string: ''
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+      Host:
+      - www.braintrust.dev
+      Authorization:
+      - Bearer <BRAINTRUST_API_KEY>
+  response:
+    status:
+      code: 500
+      message: Internal Server Error
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Allow-Headers:
+      - X-CSRF-Token, X-Requested-With, Accept, Accept-Version, Content-Length, Content-MD5,
+        Content-Type, Date, X-Api-Version
+      Access-Control-Allow-Methods:
+      - GET,OPTIONS,PATCH,DELETE,POST,PUT
+      Access-Control-Allow-Origin:
+      - "*"
+      Cache-Control:
+      - public, max-age=0, must-revalidate
+      Content-Type:
+      - text/plain; charset=utf-8
+      Date:
+      - Fri, 24 Oct 2025 14:56:02 GMT
+      Server:
+      - Vercel
+      Strict-Transport-Security:
+      - max-age=63072000
+      X-Content-Type-Options:
+      - nosniff
+      X-Frame-Options:
+      - SAMEORIGIN
+      X-Matched-Path:
+      - "/api/apikey/login"
+      X-Vercel-Id:
+      - iad1::iad1::retry-test-2
+    body:
+      encoding: UTF-8
+      string: 'Internal Server Error'
+  recorded_at: Fri, 24 Oct 2025 14:56:02 GMT
+- request:
+    method: post
+    uri: https://www.braintrust.dev/api/apikey/login
+    body:
+      encoding: UTF-8
+      string: ''
+    headers:
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+      User-Agent:
+      - Ruby
+      Host:
+      - www.braintrust.dev
+      Authorization:
+      - Bearer <BRAINTRUST_API_KEY>
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Allow-Headers:
+      - X-CSRF-Token, X-Requested-With, Accept, Accept-Version, Content-Length, Content-MD5,
+        Content-Type, Date, X-Api-Version
+      Access-Control-Allow-Methods:
+      - GET,OPTIONS,PATCH,DELETE,POST,PUT
+      Access-Control-Allow-Origin:
+      - "*"
+      Cache-Control:
+      - public, max-age=0, must-revalidate
+      Content-Length:
+      - '147'
+      Content-Type:
+      - application/json; charset=utf-8
+      Date:
+      - Fri, 24 Oct 2025 14:56:03 GMT
+      Server:
+      - Vercel
+      Strict-Transport-Security:
+      - max-age=63072000
+      X-Content-Type-Options:
+      - nosniff
+      X-Frame-Options:
+      - SAMEORIGIN
+      X-Matched-Path:
+      - "/api/apikey/login"
+      X-Vercel-Id:
+      - iad1::iad1::retry-test-3
+    body:
+      encoding: UTF-8
+      string: '{"org_info":[{"id":"test-org-id","name":"test-org","api_url":"https://api.braintrust.dev","proxy_url":"https://api.braintrust.dev"}]}'
+  recorded_at: Fri, 24 Oct 2025 14:56:03 GMT
+recorded_with: VCR 6.3.1
diff --git a/test/test_helper.rb b/test/test_helper.rb
index b1a0f8f..65a052e 100644
--- a/test/test_helper.rb
+++ b/test/test_helper.rb
@@ -226,6 +226,153 @@ def get_anthropic_key
   end
 end
 
+# Runs the test inside a fork, to isolate its side-effects from the main process.
+# Similar in purpose to https://docs.ruby-lang.org/en/master/Ruby/Box.html#class-Ruby::Box
+#
+# Yields to the block for actual test code.
+# @yield Block containing the test code
+def assert_in_fork(fork_assertions: nil, timeout_seconds: 10, trigger_stacktrace_on_kill: false, debug: false)
+  fork_assertions ||= proc { |status:, stdout:, stderr:|
+    assert (status && status.success?), "STDOUT:`#{stdout}` STDERR:`#{stderr}"
+  }
+
+  if debug
+    rv = assert_in_fork_debug(fork_assertions: fork_assertions) do
+      yield
+    end
+    return rv
+  end
+
+  fork_stdout = Tempfile.new("braintrust-minitest-assert-in-fork-stdout")
+  fork_stderr = Tempfile.new("braintrust-minitest-assert-in-fork-stderr")
+  begin
+    # Start in fork
+    pid = fork do
+      # Capture forked output
+      $stdout.reopen(fork_stdout)
+      $stdout.sync = true
+      $stderr.reopen(fork_stderr) # STDERR captures failures. We print it in case the fork fails on exit.
+      $stderr.sync = true
+
+      yield
+    end
+
+    # Wait for fork to finish, retrieve its status.
+    # Enforce timeout to ensure test fork doesn't hang the test suite.
+    _, status = try_wait_until(seconds: timeout_seconds) { Process.wait2(pid, Process::WNOHANG) }
+
+    stdout = File.read(fork_stdout.path)
+    stderr = File.read(fork_stderr.path)
+
+    # Capture forked execution information
+    result = {status: status, stdout: stdout, stderr: stderr}
+
+    # Check if fork and assertions have completed successfully
+    fork_assertions.call(**result)
+
+    result
+  rescue => e
+    crash_note = nil
+
+    if trigger_stacktrace_on_kill
+      crash_note = " (Crashing Ruby to get stacktrace as requested by `trigger_stacktrace_on_kill`)"
+      begin
+        Process.kill("SEGV", pid)
+        warn "Waiting for child process to exit after SEGV signal... #{crash_note}"
+        Process.wait(pid)
+      rescue
+        nil
+      end
+    end
+
+    stdout = File.read(fork_stdout.path)
+    stderr = File.read(fork_stderr.path)
+
+    raise "Failure or timeout in `assert_in_fork`#{crash_note}, STDOUT: `#{stdout}`, STDERR: `#{stderr}`", cause: e
+  ensure
+    begin
+      Process.kill("KILL", pid)
+    rescue
+      nil
+    end # Prevent zombie processes on failure
+
+    fork_stderr.close
+    fork_stdout.close
+    fork_stdout.unlink
+    fork_stderr.unlink
+  end
+end
+
+# Debug version of assert_in_fork that does not redirect I/O streams and
+# has no timeout on execution. The idea is to use it for interactive
+# debugging where you would set a break point in the fork.
+def assert_in_fork_debug(fork_assertions:, timeout_seconds: 10, trigger_stacktrace_on_kill: false)
+  pid = fork do
+    yield
+  end
+  _, status = Process.wait2(pid)
+  fork_assertions.call(status: status, stdout: "", stderr: "")
+end
+
+# Waits for the condition provided by the block argument to return truthy.
+#
+# Waits for 5 seconds by default.
+#
+# Can be configured by setting either:
+#   * `seconds`, or
+#   * `attempts` and `backoff`
+#
+# @yieldreturn [Boolean] block executed until it returns truthy
+# @param [Numeric] seconds number of seconds to wait
+# @param [Integer] attempts number of attempts at checking the condition
+# @param [Numeric] backoff wait time between condition checking attempts
+def try_wait_until(seconds: nil, attempts: nil, backoff: nil)
+  raise "Provider either `seconds` or `attempts` & `backoff`, not both" if seconds && (attempts || backoff)
+
+  spec = if seconds
+    "#{seconds} seconds"
+  elsif attempts || backoff
+    "#{attempts} attempts with backoff: #{backoff}"
+  else
+    "none"
+  end
+
+  if seconds
+    attempts = seconds * 10
+    backoff = 0.1
+  else
+    # 5 seconds by default, but respect the provide values if any.
+    attempts ||= 50
+    backoff ||= 0.1
+  end
+
+  start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+
+  # It's common for tests to want to run simple tasks in a background thread
+  # but call this method without the thread having even time to start.
+  #
+  # We add an extra attempt, interleaved by `Thread.pass`, in order to allow for
+  # those simple cases to quickly succeed without a timed `sleep` call. This will
+  # save simple test one `backoff` seconds sleep cycle.
+  #
+  # The total configured timeout is not reduced.
+  (attempts + 1).times do |i|
+    result = yield(attempts)
+    return result if result
+
+    if i == 0
+      Thread.pass
+    else
+      sleep(backoff)
+    end
+  end
+
+  elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
+  actual = "#{"%.2f" % elapsed} seconds, #{attempts} attempts with backoff #{backoff}"
+
+  raise("Wait time exhausted! Requested: #{spec}, waited: #{actual}")
+end
+
 # Include helper in all test cases
 class Minitest::Test
   include TracingTestHelper