diff --git a/.release-please-manifest.json b/.release-please-manifest.json index c6a6955..f391d41 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "3.5.2" + ".": "3.6.0" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index a0da57a..27548de 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 8 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/browserbase%2Fstagehand-8fbb3fa8f3a37c1c7408de427fe125aadec49f705e8e30d191601a9b69c4cc41.yml -openapi_spec_hash: 8a36f79075102c63234ed06107deb8c9 -config_hash: 7386d24e2f03a3b2a89b3f6881446348 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/browserbase%2Fstagehand-5d0052068f044366d6d31570d9712922c9a80fdd6f9995af815e9afc075507ef.yml +openapi_spec_hash: c0cb787da075d8cd2d938c05b36b5efa +config_hash: 4252fc025e947bc0fd6b2abd91a0cc8e diff --git a/CHANGELOG.md b/CHANGELOG.md index 819a91e..9af0c8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 3.6.0 (2026-02-18) + +Full Changelog: [v3.5.2...v3.6.0](https://github.com/browserbase/stagehand-ruby/compare/v3.5.2...v3.6.0) + +### Features + +* randomize region used for evals, split out pnpm and turbo cache, veri… ([a68a9d2](https://github.com/browserbase/stagehand-ruby/commit/a68a9d2705b2c3b3b35dcaab4b5693184194774b)) + ## 3.5.2 (2026-02-07) Full Changelog: [v3.5.1...v3.5.2](https://github.com/browserbase/stagehand-ruby/compare/v3.5.1...v3.5.2) diff --git a/Gemfile.lock b/Gemfile.lock index d78caf4..8641e39 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -11,7 +11,7 @@ GIT PATH remote: . specs: - stagehand (3.5.2) + stagehand (3.6.0) cgi connection_pool diff --git a/README.md b/README.md index 4089a35..5729f31 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,58 @@ # Stagehand Ruby API library + +
+
+ The AI Browser Automation Framework
+ Read the Docs
+
+If you're looking for other languages, you can find them + here +
+ + + + The Stagehand Ruby library provides convenient access to the Stagehand REST API from any Ruby 3.2.0+ application. It ships with comprehensive types & docstrings in Yard, RBS, and RBI – [see below](https://github.com/browserbase/stagehand-ruby#Sorbet) for usage with Sorbet. The standard library's `net/http` is used as the HTTP transport, with connection pooling via the `connection_pool` gem. It is generated with [Stainless](https://www.stainless.com/). @@ -24,163 +77,119 @@ gem "stagehand", :git => "git://github.com/browserbase/stagehand-ruby.git" ## Usage +This mirrors `examples/remote_browser_playwright_example.rb`. + ```ruby require "bundler/setup" require "stagehand" -# Create a new Stagehand client with your credentials +require_relative "examples/env" +ExampleEnv.load! + +require "playwright" + client = Stagehand::Client.new( - browserbase_api_key: ENV["BROWSERBASE_API_KEY"], # defaults to ENV["BROWSERBASE_API_KEY"] - browserbase_project_id: ENV["BROWSERBASE_PROJECT_ID"], # defaults to ENV["BROWSERBASE_PROJECT_ID"] - model_api_key: ENV["MODEL_API_KEY"] # defaults to ENV["MODEL_API_KEY"] + browserbase_api_key: ENV["BROWSERBASE_API_KEY"], + browserbase_project_id: ENV["BROWSERBASE_PROJECT_ID"], + model_api_key: ENV["MODEL_API_KEY"], + server: "remote" ) -# Start a new browser session start_response = client.sessions.start( - model_name: "openai/gpt-5-nano" + model_name: "anthropic/claude-sonnet-4-6", + browser: { type: :browserbase } ) -puts "Session started: #{start_response.data.session_id}" session_id = start_response.data.session_id +cdp_url = start_response.data.cdp_url +raise "No CDP URL returned for this session." if cdp_url.to_s.empty? -# Navigate to a webpage -client.sessions.navigate( - session_id, - url: "https://news.ycombinator.com" -) -puts "Navigated to Hacker News" +Playwright.create(playwright_cli_executable_path: "./node_modules/.bin/playwright") do |playwright| + browser = playwright.chromium.connect_over_cdp(cdp_url) + context = browser.contexts.first || browser.new_context + page = context.pages.first || context.new_page -# Use Observe to find possible actions on the page -observe_response = client.sessions.observe( - session_id, - instruction: "find the link to view comments for the top post" -) - -actions = observe_response.data.result -puts "Found #{actions.length} possible actions" + client.sessions.navigate(session_id, url: "https://news.ycombinator.com") + page.wait_for_load_state(state: "domcontentloaded") -# Take the first action returned by Observe -action = actions.first -puts "Acting on: #{action.description}" + observe_stream = client.sessions.observe_streaming( + session_id, + instruction: "find the link to view comments for the top post" + ) + observe_stream.each { |_event| } -# Pass the structured action to Act -# Convert the observe result to a hash and ensure method is set to "click" -act_response = client.sessions.act( - session_id, - input: action.to_h.merge(method: "click") -) -puts "Act completed: #{act_response.data.result[:message]}" - -# Extract data from the page -# We're now on the comments page, so extract the top comment text -extract_response = client.sessions.extract( - session_id, - instruction: "extract the text of the top comment on this page", - schema: { - type: "object", - properties: { - comment_text: { - type: "string", - description: "The text content of the top comment" + act_stream = client.sessions.act_streaming( + session_id, + input: "Click the comments link for the top post" + ) + act_stream.each { |_event| } + + extract_stream = client.sessions.extract_streaming( + session_id, + instruction: "extract the text of the top comment on this page", + schema: { + type: "object", + properties: { + commentText: {type: "string"}, + author: {type: "string"} }, - author: { - type: "string", - description: "The username of the comment author" - } + required: ["commentText"] + } + ) + extract_stream.each { |_event| } + + execute_stream = client.sessions.execute_streaming( + session_id, + execute_options: { + instruction: "Click the 'Learn more' link if available", + max_steps: 3 }, - required: ["comment_text"] - } -) -puts "Extracted data: #{extract_response.data.result}" - -# Get the author from the extracted data -extracted_data = extract_response.data.result -author = extracted_data[:author] -puts "Looking up profile for author: #{author}" - -# Use the Agent to find the author's profile -# Execute runs an autonomous agent that can navigate and interact with pages -execute_response = client.sessions.execute( - session_id, - execute_options: { - instruction: "Find any personal website, GitHub, LinkedIn, or other best profile URL for the Hacker News user '#{author}'. " \ - "Click on their username to go to their profile page and look for any links they have shared.", - max_steps: 15 - }, - agent_config: { - model: Stagehand::ModelConfig::ModelConfigObject.new( - model_name: "openai/gpt-5-nano", - api_key: ENV["MODEL_API_KEY"] - ), - cua: false - } -) -puts "Agent completed: #{execute_response.data.result[:message]}" -puts "Agent success: #{execute_response.data.result[:success]}" -puts "Agent actions taken: #{execute_response.data.result[:actions]&.length || 0}" + agent_config: { + model: Stagehand::ModelConfig.new( + model_name: "anthropic/claude-opus-4-6", + api_key: ENV["MODEL_API_KEY"] + ), + cua: false + } + ) + execute_stream.each { |_event| } +end -# End the session to cleanup browser resources client.sessions.end_(session_id) -puts "Session ended" -``` - -### Running the Examples - -Install dependencies, set credentials, and run the scripts below. - -```bash -# Install the gem dependencies -bundle install ``` -Remote browser example: +## Running the Example -```bash -export BROWSERBASE_API_KEY="your-browserbase-api-key" -export BROWSERBASE_PROJECT_ID="your-browserbase-project-id" -export MODEL_API_KEY="your-openai-api-key" -bundle exec ruby examples/remote_browser_example.rb -``` +Set your environment variables (from `examples/.env.example`): -Local mode example (embedded server, local Chrome/Chromium): +- `STAGEHAND_API_URL` +- `MODEL_API_KEY` +- `BROWSERBASE_API_KEY` +- `BROWSERBASE_PROJECT_ID` ```bash -export MODEL_API_KEY="your-openai-api-key" -bundle exec ruby examples/local_browser_example.rb +cp examples/.env.example examples/.env +# Edit examples/.env with your credentials. ``` -Playwright local example (SSE streaming): +The examples load `examples/.env` automatically. -```bash -gem install playwright-ruby-client -npm install playwright -./node_modules/.bin/playwright install chromium -export MODEL_API_KEY="your-openai-api-key" -bundle exec ruby examples/local_playwright_example.rb +Examples and dependencies: -bundle exec ruby examples/local_browser_playwright_example.rb -``` +- `examples/remote_browser_example.rb`: stagehand only +- `examples/local_browser_example.rb`: stagehand only +- `examples/remote_browser_playwright_example.rb`: `playwright-ruby-client` + Playwright browsers +- `examples/local_browser_playwright_example.rb`: `playwright-ruby-client` + Playwright browsers +- `examples/local_playwright_example.rb`: `playwright-ruby-client` + Playwright browsers +- `examples/local_watir_example.rb`: `watir` -Playwright remote example: +Install dependencies for the example you want to run, then execute it: ```bash -gem install playwright-ruby-client -npm install playwright -./node_modules/.bin/playwright install chromium -export BROWSERBASE_API_KEY="your-browserbase-api-key" -export BROWSERBASE_PROJECT_ID="your-browserbase-project-id" -export MODEL_API_KEY="your-openai-api-key" +bundle install bundle exec ruby examples/remote_browser_playwright_example.rb ``` -Watir local example: - -```bash -gem install watir -export MODEL_API_KEY="your-openai-api-key" -bundle exec ruby examples/local_watir_example.rb -``` - ### Streaming We provide support for streaming responses using Server-Sent Events (SSE). @@ -202,7 +211,7 @@ When the library is unable to connect to the API, or if the API returns a non-su ```ruby begin - session = stagehand.sessions.start(model_name: "openai/gpt-5-nano") + session = stagehand.sessions.start(model_name: "anthropic/claude-sonnet-4-6") rescue Stagehand::Errors::APIConnectionError => e puts("The server could not be reached") puts(e.cause) # an underlying Exception, likely raised within `net/http` @@ -245,7 +254,7 @@ stagehand = Stagehand::Client.new( ) # Or, configure per-request: -stagehand.sessions.start(model_name: "openai/gpt-5-nano", request_options: {max_retries: 5}) +stagehand.sessions.start(model_name: "anthropic/claude-sonnet-4-6", request_options: {max_retries: 5}) ``` ### Timeouts @@ -259,7 +268,7 @@ stagehand = Stagehand::Client.new( ) # Or, configure per-request: -stagehand.sessions.start(model_name: "openai/gpt-5-nano", request_options: {timeout: 5}) +stagehand.sessions.start(model_name: "anthropic/claude-sonnet-4-6", request_options: {timeout: 5}) ``` On timeout, `Stagehand::Errors::APITimeoutError` is raised. @@ -291,7 +300,7 @@ Note: the `extra_` parameters of the same name overrides the documented paramete ```ruby response = stagehand.sessions.start( - model_name: "openai/gpt-5-nano", + model_name: "anthropic/claude-sonnet-4-6", request_options: { extra_query: {my_query_parameter: value}, extra_body: {my_body_parameter: value}, diff --git a/examples/.env.example b/examples/.env.example new file mode 100644 index 0000000..6272bb0 --- /dev/null +++ b/examples/.env.example @@ -0,0 +1,4 @@ +STAGEHAND_API_URL=https://api.stagehand.browserbase.com +MODEL_API_KEY=sk-proj-your-llm-api-key-here +BROWSERBASE_API_KEY=bb_live_your_api_key_here +BROWSERBASE_PROJECT_ID=your-bb-project-uuid-here diff --git a/examples/env.rb b/examples/env.rb new file mode 100644 index 0000000..fa3a3e3 --- /dev/null +++ b/examples/env.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module ExampleEnv + REQUIRED_KEYS = %w[ + STAGEHAND_API_URL + MODEL_API_KEY + BROWSERBASE_API_KEY + BROWSERBASE_PROJECT_ID + ].freeze + + def self.load! + env_path = find_env_path + raise "Missing examples/.env (expected in repo examples/ directory)." unless env_path + + File.readlines(env_path, chomp: true).each do |line| + trimmed = line.strip + next if trimmed.empty? || trimmed.start_with?("#") + + key, value = trimmed.split("=", 2) + next if key.nil? || value.nil? + + ENV[key] ||= value + end + + missing = REQUIRED_KEYS.select { |key| ENV[key].to_s.empty? } + unless missing.empty? + raise "Missing required env vars: #{missing.join(', ')} (from examples/.env)" + end + + ENV["STAGEHAND_BASE_URL"] ||= ENV["STAGEHAND_API_URL"] + end + + def self.find_env_path + current = File.expand_path(Dir.pwd) + loop do + candidate = File.join(current, "examples", ".env") + return candidate if File.exist?(candidate) + + parent = File.dirname(current) + return nil if parent == current + + current = parent + end + end +end diff --git a/examples/local_browser_example.rb b/examples/local_browser_example.rb index ddedca0..b7cb802 100755 --- a/examples/local_browser_example.rb +++ b/examples/local_browser_example.rb @@ -4,8 +4,8 @@ require "bundler/setup" require "stagehand" -# Local mode runs the embedded Stagehand server and uses a local Chrome/Chromium. -# Set MODEL_API_KEY before running this script. +require_relative "env" +ExampleEnv.load! model_key = ENV["MODEL_API_KEY"].to_s if model_key.empty? warn "Set MODEL_API_KEY to run the local example." @@ -17,11 +17,45 @@ server: "local" ) +def print_stream_event(label, event) + case event.type + when :log + puts("[#{label}] log: #{event.data.message}") + when :system + status = event.data.status + if event.data.respond_to?(:error) && event.data.error + puts("[#{label}] system #{status}: #{event.data.error}") + elsif event.data.respond_to?(:result) && !event.data.result.nil? + puts("[#{label}] system #{status}: #{event.data.result}") + else + puts("[#{label}] system #{status}") + end + else + puts("[#{label}] event: #{event.inspect}") + end +end + +def stream_with_result(label, stream) + puts("#{label} stream:") + result = nil + stream.each do |event| + print_stream_event(label, event) + if event.type == :system && event.data.respond_to?(:result) && !event.data.result.nil? + result = event.data.result + end + if event.type == :system && event.data.respond_to?(:status) && event.data.status == :error + error_message = event.data.respond_to?(:error) && event.data.error ? event.data.error : "unknown error" + raise("#{label} stream error: #{error_message}") + end + end + result +end + session_id = nil begin start_response = client.sessions.start( - model_name: "openai/gpt-5-nano", + model_name: "anthropic/claude-sonnet-4-6", browser: { type: :local, launch_options: { @@ -35,21 +69,25 @@ client.sessions.navigate(session_id, url: "https://example.com") puts("Navigated to example.com") - observe_response = client.sessions.observe( + observe_stream = client.sessions.observe_streaming( session_id, instruction: "Find all clickable links on this page" ) - puts("Found #{observe_response.data.result.length} possible actions") + observe_result = stream_with_result("Observe", observe_stream) + observe_actions = observe_result || [] + puts("Found #{observe_actions.length} possible actions") - action = observe_response.data.result.first + action = observe_actions.first act_input = action ? action.to_h.merge(method: "click") : "Click the 'Learn more' link" - act_response = client.sessions.act( + act_stream = client.sessions.act_streaming( session_id, input: act_input ) - puts("Act completed: #{act_response.data.result[:message]}") + act_result = stream_with_result("Act", act_stream) + act_message = act_result.is_a?(Hash) ? (act_result[:message] || act_result["message"]) : act_result + puts("Act completed: #{act_message}") - extract_response = client.sessions.extract( + extract_stream = client.sessions.extract_streaming( session_id, instruction: "extract the main heading and any links on this page", schema: { @@ -60,9 +98,10 @@ } } ) - puts("Extracted: #{extract_response.data.result}") + extract_result = stream_with_result("Extract", extract_stream) + puts("Extracted: #{extract_result}") - execute_response = client.sessions.execute( + execute_stream = client.sessions.execute_streaming( session_id, execute_options: { instruction: "Click on the 'Learn more' link if available and report the page title", @@ -70,14 +109,19 @@ }, agent_config: { model: Stagehand::ModelConfig.new( - model_name: "openai/gpt-5-nano", + model_name: "anthropic/claude-opus-4-6", api_key: model_key ), cua: false } ) - puts("Agent completed: #{execute_response.data.result[:message]}") - puts("Agent success: #{execute_response.data.result[:success]}") + execute_result = stream_with_result("Execute", execute_stream) + execute_message = execute_result.is_a?(Hash) ? (execute_result[:message] || execute_result["message"]) : execute_result + execute_success = execute_result.is_a?(Hash) ? (execute_result[:success] || execute_result["success"]) : nil + execute_actions = execute_result.is_a?(Hash) ? (execute_result[:actions] || execute_result["actions"]) : nil + puts("Agent completed: #{execute_message}") + puts("Agent success: #{execute_success}") + puts("Agent actions taken: #{execute_actions&.length || 0}") ensure client.sessions.end_(session_id) if session_id client.close diff --git a/examples/local_browser_playwright_example.rb b/examples/local_browser_playwright_example.rb index 587a8bf..23acf37 100755 --- a/examples/local_browser_playwright_example.rb +++ b/examples/local_browser_playwright_example.rb @@ -5,19 +5,8 @@ require "bundler/setup" require "stagehand" -# Example: Using Playwright with Stagehand local mode (local browser). -# -# Prerequisites: -# - Set MODEL_API_KEY or OPENAI_API_KEY environment variable -# - Set BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID (can be any value in local mode) -# - Install Playwright (outside this gem): -# gem install playwright-ruby-client -# npm install playwright -# ./node_modules/.bin/playwright install chromium -# -# Run: -# bundle exec ruby examples/local_browser_playwright_example.rb - +require_relative "env" +ExampleEnv.load! begin require("playwright") rescue LoadError @@ -25,7 +14,7 @@ exit(1) end -model_key = ENV["MODEL_API_KEY"] || ENV["OPENAI_API_KEY"] +model_key = ENV["MODEL_API_KEY"] browserbase_api_key = ENV["BROWSERBASE_API_KEY"].to_s browserbase_project_id = ENV["BROWSERBASE_PROJECT_ID"].to_s @@ -85,7 +74,7 @@ def stream_with_result(label, stream) cdp_url = browser_server.ws_endpoint start_response = client.sessions.start( - model_name: "openai/gpt-5-nano", + model_name: "anthropic/claude-sonnet-4-6", browser: { type: :local, launch_options: { @@ -171,7 +160,7 @@ def stream_with_result(label, stream) }, agent_config: { model: { - model_name: "openai/gpt-5-nano", + model_name: "anthropic/claude-opus-4-6", api_key: model_key }, cua: false @@ -187,7 +176,7 @@ def stream_with_result(label, stream) }, agent_config: { model: { - model_name: "openai/gpt-5-nano", + model_name: "anthropic/claude-opus-4-6", api_key: model_key }, cua: false diff --git a/examples/local_playwright_example.rb b/examples/local_playwright_example.rb index 55346ed..464cbfa 100755 --- a/examples/local_playwright_example.rb +++ b/examples/local_playwright_example.rb @@ -7,22 +7,8 @@ require "net/http" require "stagehand" -# Example: Using Playwright with Stagehand local mode -# -# This example mirrors the Python Playwright flow: launch the browser with Playwright, -# connect Stagehand to the same browser via CDP, and target the existing page by frame_id. -# -# Prerequisites: -# - Set MODEL_API_KEY or OPENAI_API_KEY environment variable -# - Chrome/Chromium installed locally -# - Install Playwright (outside this gem): -# gem install playwright-ruby-client -# npm install playwright -# ./node_modules/.bin/playwright install chromium -# -# Run: -# bundle exec ruby examples/local_playwright_example.rb - +require_relative "env" +ExampleEnv.load! begin require("playwright") rescue LoadError @@ -69,9 +55,43 @@ def fetch_page_target_id(port, page_url) target_id end -model_key = ENV["MODEL_API_KEY"] || ENV["OPENAI_API_KEY"] +def print_stream_event(label, event) + case event.type + when :log + puts("[#{label}] log: #{event.data.message}") + when :system + status = event.data.status + if event.data.respond_to?(:error) && event.data.error + puts("[#{label}] system #{status}: #{event.data.error}") + elsif event.data.respond_to?(:result) && !event.data.result.nil? + puts("[#{label}] system #{status}: #{event.data.result}") + else + puts("[#{label}] system #{status}") + end + else + puts("[#{label}] event: #{event.inspect}") + end +end + +def stream_with_result(label, stream) + puts("#{label} stream:") + result = nil + stream.each do |event| + print_stream_event(label, event) + if event.type == :system && event.data.respond_to?(:result) && !event.data.result.nil? + result = event.data.result + end + if event.type == :system && event.data.respond_to?(:status) && event.data.status == :error + error_message = event.data.respond_to?(:error) && event.data.error ? event.data.error : "unknown error" + raise("#{label} stream error: #{error_message}") + end + end + result +end + +model_key = ENV["MODEL_API_KEY"] if model_key.to_s.empty? - warn "Set MODEL_API_KEY (or OPENAI_API_KEY) to run the local example." + warn "Set MODEL_API_KEY to run the local example." exit 1 end @@ -99,7 +119,7 @@ def fetch_page_target_id(port, page_url) session_id = nil begin start_response = client.sessions.start( - model_name: "openai/gpt-5-nano", + model_name: "anthropic/claude-sonnet-4-6", browser: { type: :local, launch_options: { @@ -110,23 +130,27 @@ def fetch_page_target_id(port, page_url) session_id = start_response.data.session_id puts("Session started: #{session_id}") - observe_response = client.sessions.observe( + observe_stream = client.sessions.observe_streaming( session_id, frame_id: page_target_id, instruction: "Find all clickable links on this page" ) - puts("Found #{observe_response.data.result.length} possible actions") + observe_result = stream_with_result("Observe", observe_stream) + observe_actions = observe_result || [] + puts("Found #{observe_actions.length} possible actions") - action = observe_response.data.result.first + action = observe_actions.first act_input = action ? action.to_h.merge(method: "click") : "Click the 'Learn more' link" - act_response = client.sessions.act( + act_stream = client.sessions.act_streaming( session_id, frame_id: page_target_id, input: act_input ) - puts("Act completed: #{act_response.data.result[:message]}") + act_result = stream_with_result("Act", act_stream) + act_message = act_result.is_a?(Hash) ? (act_result[:message] || act_result["message"]) : act_result + puts("Act completed: #{act_message}") - extract_response = client.sessions.extract( + extract_stream = client.sessions.extract_streaming( session_id, frame_id: page_target_id, instruction: "Extract the main heading and any links on this page", @@ -138,9 +162,10 @@ def fetch_page_target_id(port, page_url) } } ) - puts("Extracted: #{extract_response.data.result}") + extract_result = stream_with_result("Extract", extract_stream) + puts("Extracted: #{extract_result}") - execute_response = client.sessions.execute( + execute_stream = client.sessions.execute_streaming( session_id, frame_id: page_target_id, execute_options: { @@ -149,14 +174,19 @@ def fetch_page_target_id(port, page_url) }, agent_config: { model: Stagehand::ModelConfig.new( - model_name: "openai/gpt-5-nano", + model_name: "anthropic/claude-opus-4-6", api_key: model_key ), cua: false } ) - puts("Agent completed: #{execute_response.data.result[:message]}") - puts("Agent success: #{execute_response.data.result[:success]}") + execute_result = stream_with_result("Execute", execute_stream) + execute_message = execute_result.is_a?(Hash) ? (execute_result[:message] || execute_result["message"]) : execute_result + execute_success = execute_result.is_a?(Hash) ? (execute_result[:success] || execute_result["success"]) : nil + execute_actions = execute_result.is_a?(Hash) ? (execute_result[:actions] || execute_result["actions"]) : nil + puts("Agent completed: #{execute_message}") + puts("Agent success: #{execute_success}") + puts("Agent actions taken: #{execute_actions&.length || 0}") page.wait_for_load_state(state: "domcontentloaded") page.screenshot(path: "screenshot_local_playwright.png", fullPage: true) diff --git a/examples/local_watir_example.rb b/examples/local_watir_example.rb index 17bf64a..bb94a2d 100755 --- a/examples/local_watir_example.rb +++ b/examples/local_watir_example.rb @@ -7,20 +7,8 @@ require "net/http" require "stagehand" -# Example: Using Watir with Stagehand local mode -# -# This example demonstrates how to combine Watir (for low-level browser control) -# with Stagehand (for AI-powered actions) using a local browser. -# -# Prerequisites: -# - Set MODEL_API_KEY or OPENAI_API_KEY environment variable -# - Chrome/Chromium installed locally -# - Install Watir (outside this gem): -# gem install watir -# -# Run: -# bundle exec ruby examples/local_watir_example.rb - +require_relative "env" +ExampleEnv.load! begin require("watir") rescue LoadError @@ -44,12 +32,46 @@ def fetch_cdp_websocket_url(port) ws_url end -model_key = ENV["MODEL_API_KEY"] || ENV["OPENAI_API_KEY"] +model_key = ENV["MODEL_API_KEY"] if model_key.to_s.empty? - warn "Set MODEL_API_KEY (or OPENAI_API_KEY) to run the local example." + warn "Set MODEL_API_KEY to run the local example." exit 1 end +def print_stream_event(label, event) + case event.type + when :log + puts("[#{label}] log: #{event.data.message}") + when :system + status = event.data.status + if event.data.respond_to?(:error) && event.data.error + puts("[#{label}] system #{status}: #{event.data.error}") + elsif event.data.respond_to?(:result) && !event.data.result.nil? + puts("[#{label}] system #{status}: #{event.data.result}") + else + puts("[#{label}] system #{status}") + end + else + puts("[#{label}] event: #{event.inspect}") + end +end + +def stream_with_result(label, stream) + puts("#{label} stream:") + result = nil + stream.each do |event| + print_stream_event(label, event) + if event.type == :system && event.data.respond_to?(:result) && !event.data.result.nil? + result = event.data.result + end + if event.type == :system && event.data.respond_to?(:status) && event.data.status == :error + error_message = event.data.respond_to?(:error) && event.data.error ? event.data.error : "unknown error" + raise("#{label} stream error: #{error_message}") + end + end + result +end + options = Selenium::WebDriver::Chrome::Options.new options.add_argument("--remote-debugging-port=#{DEBUG_PORT}") options.add_argument("--disable-gpu") @@ -69,7 +91,7 @@ def fetch_cdp_websocket_url(port) puts("Browser WebSocket URL: #{ws_url}") start_response = client.sessions.start( - model_name: "openai/gpt-5-nano", + model_name: "anthropic/claude-sonnet-4-6", browser: { type: :local, launch_options: { @@ -82,21 +104,25 @@ def fetch_cdp_websocket_url(port) client.sessions.navigate(session_id, url: "https://example.com") - observe_response = client.sessions.observe( + observe_stream = client.sessions.observe_streaming( session_id, instruction: "Find all clickable links on this page" ) - puts("Found #{observe_response.data.result.length} possible actions") + observe_result = stream_with_result("Observe", observe_stream) + observe_actions = observe_result || [] + puts("Found #{observe_actions.length} possible actions") - action = observe_response.data.result.first + action = observe_actions.first act_input = action ? action.to_h.merge(method: "click") : "Click the 'Learn more' link" - act_response = client.sessions.act( + act_stream = client.sessions.act_streaming( session_id, input: act_input ) - puts("Act completed: #{act_response.data.result[:message]}") + act_result = stream_with_result("Act", act_stream) + act_message = act_result.is_a?(Hash) ? (act_result[:message] || act_result["message"]) : act_result + puts("Act completed: #{act_message}") - extract_response = client.sessions.extract( + extract_stream = client.sessions.extract_streaming( session_id, instruction: "Extract the main heading and any links on this page", schema: { @@ -107,9 +133,10 @@ def fetch_cdp_websocket_url(port) } } ) - puts("Extracted: #{extract_response.data.result}") + extract_result = stream_with_result("Extract", extract_stream) + puts("Extracted: #{extract_result}") - execute_response = client.sessions.execute( + execute_stream = client.sessions.execute_streaming( session_id, execute_options: { instruction: "Click on the 'Learn more' link if available", @@ -117,14 +144,19 @@ def fetch_cdp_websocket_url(port) }, agent_config: { model: Stagehand::ModelConfig.new( - model_name: "openai/gpt-5-nano", + model_name: "anthropic/claude-opus-4-6", api_key: model_key ), cua: false } ) - puts("Agent completed: #{execute_response.data.result[:message]}") - puts("Agent success: #{execute_response.data.result[:success]}") + execute_result = stream_with_result("Execute", execute_stream) + execute_message = execute_result.is_a?(Hash) ? (execute_result[:message] || execute_result["message"]) : execute_result + execute_success = execute_result.is_a?(Hash) ? (execute_result[:success] || execute_result["success"]) : nil + execute_actions = execute_result.is_a?(Hash) ? (execute_result[:actions] || execute_result["actions"]) : nil + puts("Agent completed: #{execute_message}") + puts("Agent success: #{execute_success}") + puts("Agent actions taken: #{execute_actions&.length || 0}") browser.screenshot.save("screenshot_local_watir.png") puts("Screenshot saved to: screenshot_local_watir.png") diff --git a/examples/remote_browser_example.rb b/examples/remote_browser_example.rb index 336c7f1..c4bf716 100755 --- a/examples/remote_browser_example.rb +++ b/examples/remote_browser_example.rb @@ -4,9 +4,8 @@ require "bundler/setup" require "stagehand" -# Remote browser example using Browserbase + Stagehand. -# Requires BROWSERBASE_API_KEY, BROWSERBASE_PROJECT_ID, and MODEL_API_KEY. - +require_relative "env" +ExampleEnv.load! browserbase_api_key = ENV["BROWSERBASE_API_KEY"].to_s browserbase_project_id = ENV["BROWSERBASE_PROJECT_ID"].to_s model_key = ENV["MODEL_API_KEY"].to_s @@ -27,11 +26,45 @@ model_api_key: model_key ) +def print_stream_event(label, event) + case event.type + when :log + puts("[#{label}] log: #{event.data.message}") + when :system + status = event.data.status + if event.data.respond_to?(:error) && event.data.error + puts("[#{label}] system #{status}: #{event.data.error}") + elsif event.data.respond_to?(:result) && !event.data.result.nil? + puts("[#{label}] system #{status}: #{event.data.result}") + else + puts("[#{label}] system #{status}") + end + else + puts("[#{label}] event: #{event.inspect}") + end +end + +def stream_with_result(label, stream) + puts("#{label} stream:") + result = nil + stream.each do |event| + print_stream_event(label, event) + if event.type == :system && event.data.respond_to?(:result) && !event.data.result.nil? + result = event.data.result + end + if event.type == :system && event.data.respond_to?(:status) && event.data.status == :error + error_message = event.data.respond_to?(:error) && event.data.error ? event.data.error : "unknown error" + raise("#{label} stream error: #{error_message}") + end + end + result +end + session_id = nil begin start_response = client.sessions.start( - model_name: "openai/gpt-5-nano" + model_name: "anthropic/claude-sonnet-4-6" ) session_id = start_response.data.session_id puts("Session started: #{session_id}") @@ -39,12 +72,13 @@ client.sessions.navigate(session_id, url: "https://news.ycombinator.com") puts("Navigated to Hacker News") - observe_response = client.sessions.observe( + observe_stream = client.sessions.observe_streaming( session_id, instruction: "find the link to view comments for the top post" ) - actions = observe_response.data.result + observe_result = stream_with_result("Observe", observe_stream) + actions = observe_result || [] puts("Found #{actions.length} possible actions") action = actions.first @@ -55,13 +89,15 @@ puts("Acting on: #{action.description}") - act_response = client.sessions.act( + act_stream = client.sessions.act_streaming( session_id, input: action.to_h.merge(method: "click") ) - puts("Act completed: #{act_response.data.result[:message]}") + act_result = stream_with_result("Act", act_stream) + act_message = act_result.is_a?(Hash) ? (act_result[:message] || act_result["message"]) : act_result + puts("Act completed: #{act_message}") - extract_response = client.sessions.extract( + extract_stream = client.sessions.extract_streaming( session_id, instruction: "extract the text of the top comment on this page", schema: { @@ -79,9 +115,10 @@ required: ["comment_text"] } ) - puts("Extracted data: #{extract_response.data.result}") + extract_result = stream_with_result("Extract", extract_stream) + puts("Extracted data: #{extract_result}") - extracted_data = extract_response.data.result + extracted_data = extract_result author = extracted_data.is_a?(Hash) ? extracted_data[:author] : nil author ||= "unknown" puts("Looking up profile for author: #{author}") @@ -91,7 +128,7 @@ "Click their username to open the profile and look for shared links." ].join(" ") - execute_response = client.sessions.execute( + execute_stream = client.sessions.execute_streaming( session_id, execute_options: { instruction: instruction, @@ -99,16 +136,20 @@ }, agent_config: { model: Stagehand::ModelConfig.new( - model_name: "openai/gpt-5-nano", + model_name: "anthropic/claude-opus-4-6", api_key: model_key ), cua: false } ) - puts("Agent completed: #{execute_response.data.result[:message]}") - puts("Agent success: #{execute_response.data.result[:success]}") - puts("Agent actions taken: #{execute_response.data.result[:actions]&.length || 0}") + execute_result = stream_with_result("Execute", execute_stream) + execute_message = execute_result.is_a?(Hash) ? (execute_result[:message] || execute_result["message"]) : execute_result + execute_success = execute_result.is_a?(Hash) ? (execute_result[:success] || execute_result["success"]) : nil + execute_actions = execute_result.is_a?(Hash) ? (execute_result[:actions] || execute_result["actions"]) : nil + puts("Agent completed: #{execute_message}") + puts("Agent success: #{execute_success}") + puts("Agent actions taken: #{execute_actions&.length || 0}") ensure client.sessions.end_(session_id) if session_id client.close diff --git a/examples/remote_browser_playwright_example.rb b/examples/remote_browser_playwright_example.rb index 89c2fda..0b886d8 100755 --- a/examples/remote_browser_playwright_example.rb +++ b/examples/remote_browser_playwright_example.rb @@ -5,19 +5,8 @@ require "bundler/setup" require "stagehand" -# Example: Using Playwright with Stagehand remote mode (Browserbase browser). -# -# Prerequisites: -# - Set MODEL_API_KEY or OPENAI_API_KEY environment variable -# - Set BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID -# - Install Playwright (outside this gem): -# gem install playwright-ruby-client -# npm install playwright -# ./node_modules/.bin/playwright install chromium -# -# Run: -# bundle exec ruby examples/remote_browser_playwright_example.rb - +require_relative "env" +ExampleEnv.load! begin require("playwright") rescue LoadError @@ -25,7 +14,7 @@ exit(1) end -model_key = ENV["MODEL_API_KEY"] || ENV["OPENAI_API_KEY"] +model_key = ENV["MODEL_API_KEY"] browserbase_api_key = ENV["BROWSERBASE_API_KEY"].to_s browserbase_project_id = ENV["BROWSERBASE_PROJECT_ID"].to_s @@ -103,7 +92,7 @@ def stream_with_result(label, stream) begin start_response = client.sessions.start( - model_name: "openai/gpt-5-nano", + model_name: "anthropic/claude-sonnet-4-6", browser: { type: :browserbase } @@ -207,7 +196,7 @@ def stream_with_result(label, stream) }, agent_config: { model: Stagehand::ModelConfig.new( - model_name: "openai/gpt-5-nano", + model_name: "anthropic/claude-opus-4-6", api_key: model_key ), cua: false @@ -224,7 +213,7 @@ def stream_with_result(label, stream) }, agent_config: { model: Stagehand::ModelConfig.new( - model_name: "openai/gpt-5-nano", + model_name: "anthropic/claude-opus-4-6", api_key: model_key ), cua: false diff --git a/lib/stagehand/models/session_replay_response.rb b/lib/stagehand/models/session_replay_response.rb index da61e51..e8fd99b 100644 --- a/lib/stagehand/models/session_replay_response.rb +++ b/lib/stagehand/models/session_replay_response.rb @@ -24,28 +24,72 @@ class SessionReplayResponse < Stagehand::Internal::Type::BaseModel class Data < Stagehand::Internal::Type::BaseModel # @!attribute pages # - # @return [Array