PrimeIntellect-ai · filip-michalsky · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025
diff --git a/environments/browser_env/README.md b/environments/browser_env/README.md
@@ -0,0 +1,140 @@
+# Browser Environment
+
+Vision-based browser control environment using CUA (Computer Use Agent) primitives.
+
+## Overview
+
+This environment provides browser automation tools for training and evaluating agents that can interact with web pages through:
+- **Vision feedback**: Each action returns a screenshot of the page state
+- **Tool-based actions**: Click, type, scroll, navigate, etc.
+- **Session management**: One browser session per rollout
+
+## Prerequisites
+
+1. **Start the CUA Server**
+
+```bash
+cd environments/browser_env/cua-server
+pnpm install
+pnpm start
+```
+
+The server runs on `http://localhost:3000` by default.
+
+2. **Install Dependencies**
+
+```bash
+vf-install browser_env
+```
+
+## Usage
+
+### Basic Evaluation
+
+```bash
+vf-eval browser_env -n 5 -m gpt-4o
+```
+
+### Programmatic Usage
+
+```python
+import verifiers as vf
+
+env = vf.load_environment("browser_env", server_url="http://localhost:3000")
+```
+
+### Custom Configuration
+
+```python
+from environments.browser_env.browser_env import load_environment
+
+env = load_environment(
+    server_url="http://localhost:3000",
+    env="LOCAL",  # or "BROWSERBASE" for cloud browsers
+    viewport_width=1280,
+    viewport_height=720,
+    max_turns=20,
+    efficiency_weight=0.1,
+    task_completion_weight=1.0,
+)
+```
+
+## Available Tools
+
+| Tool | Description |
+|------|-------------|
+| `click(x, y, button)` | Click at coordinates |
+| `double_click(x, y)` | Double-click at coordinates |
+| `type_text(text)` | Type text into focused element |
+| `keypress(keys)` | Press keyboard key(s) |
+| `scroll(x, y, scroll_x, scroll_y)` | Scroll at position |
+| `goto(url)` | Navigate to URL |
+| `back()` | Go back in history |
+| `forward()` | Go forward in history |
+| `wait(time_ms)` | Wait for specified time |
+| `screenshot()` | Capture current page |
+
+## Reward Functions # TODO
+
+### Built-in Rewards
+
+1. **efficiency_reward** (weight: 0.1): Penalizes long rollouts. Fewer actions = higher reward.
+
+2. **task_completion_reward** (weight: 1.0): Placeholder for task-specific completion reward.
+
+### Custom Rewards 
+
+Override `task_completion_reward` or add custom reward functions:
+
+```python
+async def my_custom_reward(state: vf.State, **kwargs) -> float:
+    # Check if browser is on target URL
+    browser_state = state.get("browser_state", {})
+    current_url = browser_state.get("url", "")
+    target_url = state.get("answer", "")
+
+    if target_url in current_url:
+        return 1.0
+    return 0.0
+
+# Add to rubric
+browser_rubric.add_reward_func(my_custom_reward, weight=1.0)
+```
+
+## Environment Variables
+
+- `BROWSERBASE_API_KEY`: API key for Browserbase cloud browsers (optional)
+- `BROWSERBASE_PROJECT_ID`: Project ID for Browserbase (optional)
+
+
+# TODO
+######
+
+- Reward function - overall browser trajectory + "how are we getting closer after each step" (hard-ish)
+- Custom LLM client - via vLLM (easy)
+- DOM-based option - I started with CUA (vision-based) since there seems to be more near-term market demand with full understanding verifier's has been not focused on multimodal training much yet (afaik)
+- Dataset structure (get some examples from our evals suite in Stagehand)
+- 
+
+## Architecture
+
+```
+┌─────────────────┐     HTTP/REST     ┌──────────────────┐
+│   BrowserEnv    │ ◄──────────────►  │  CUA Server      │
+│   (Python/      │                   │   (Fastify/TS)   │
+│     verifiers)  │                   │                  │
+└─────────────────┘                   └──────────────────┘
+        │                                      │
+        │                                      ▼
+        │                             ┌──────────────────┐
+        │                             │   Stagehand V3   │
+        │                             │   (Direct CDP)   │
+        ▼                             └──────────────────┘
+┌─────────────────┐                           │
+│   Model (LLM)   │                           ▼
+│   gpt-4o, etc.  │                   ┌──────────────────┐
+└─────────────────┘                   │   Browser        │
+                                      │   (Chrome)       │
+                                      └──────────────────┘
+```
+
diff --git a/environments/browser_env/browser_env.py b/environments/browser_env/browser_env.py
@@ -0,0 +1,178 @@
+"""
+Browser Environment for vision-based browser control.
+
+This environment uses a CUA (Computer Use Agent) server to provide
+browser primitives (click, type, scroll, etc.) with screenshot feedback.
+
+Usage:
+    1. Start the CUA server:
+       cd environments/browser_env/cua-server && pnpm start
+
+    2. Run evaluation:
+       vf-eval browser_env -n 5 -m gpt-4o
+"""
+
+from typing import Literal
+
+from datasets import Dataset
+
+import verifiers as vf
+
+# Import BrowserEnv - will be available after adding to lazy imports
+try:
+    from verifiers.envs.browser_env import BrowserEnv
+except ImportError:
+    raise ImportError(
+        "BrowserEnv requires aiohttp. Install with: uv pip install aiohttp"
+    )
+
+
+# ==================== Custom Reward Functions ====================
+
+
+def efficiency_reward(state: vf.State, **kwargs) -> float:
+    """
+    Reward for completing task efficiently (fewer actions = higher reward).
+
+    Linear decay from 1.0 at 1 action to 0.0 at max_actions.
+    """
+    max_actions = kwargs.get("max_actions", 20)
+    trajectory = state.get("trajectory", [])
+    num_actions = len(trajectory)
+
+    if num_actions == 0:
+        return 0.0
+
+    # Linear decay: 1.0 at 1 action, 0.0 at max_actions
+    return max(0.0, 1.0 - (num_actions - 1) / max_actions)
+
+
+async def task_completion_reward(state: vf.State, **kwargs) -> float:
+    """
+    Placeholder reward for task completion.
+
+    Override this function or add custom reward functions based on your task type:
+    - URL matching: Check if browser navigated to target URL
+    - Element presence: Check if specific element is visible in screenshot
+    - Goal completion: Use a judge model to evaluate task completion
+    - Text extraction: Check if model extracted correct information
+
+    Returns:
+        float: 0.0 (placeholder - implement based on task requirements)
+    """
+    # TODO: Implement based on task type
+    # Examples:
+    # - Check state["browser_state"]["url"] matches target
+    # - Use vision model to verify element presence
+    # - Compare extracted text to expected answer
+    return 0.0
+
+
+# ==================== Environment Loader ====================
+
+
+def load_environment(
+    server_url: str = "http://localhost:3000",
+    env: Literal["LOCAL", "BROWSERBASE"] = "LOCAL",
+    browserbase_api_key: str | None = None,
+    browserbase_project_id: str | None = None,
+    viewport_width: int = 1280,
+    viewport_height: int = 720,
+    max_turns: int = 20,
+    system_prompt: str | None = None,
+    efficiency_weight: float = 0.1,
+    task_completion_weight: float = 1.0,
+    **kwargs,
+) -> vf.Environment:
+    """
+    Load the Browser environment for vision-based browser control.
+
+    Args:
+        server_url: URL of the CUA server (default: http://localhost:3000)
+        env: Browser environment type ("LOCAL" or "BROWSERBASE")
+        browserbase_api_key: API key for Browserbase (if env="BROWSERBASE")
+        browserbase_project_id: Project ID for Browserbase (if env="BROWSERBASE")
+        viewport_width: Browser viewport width in pixels
+        viewport_height: Browser viewport height in pixels
+        max_turns: Maximum number of actions per rollout
+        system_prompt: Custom system prompt (optional)
+        efficiency_weight: Weight for efficiency reward (default: 0.1)
+        task_completion_weight: Weight for task completion reward (default: 1.0)
+        **kwargs: Additional arguments passed to BrowserEnv
+
+    Returns:
+        BrowserEnv instance configured with tools and rubrics
+    """
+    # Default system prompt for browser agent
+    if system_prompt is None:
+        system_prompt = """You are a browser automation agent. You can control a web browser using the provided tools.
+
+Available tools:
+- click(x, y, button): Click at coordinates
+- double_click(x, y): Double-click at coordinates
+- type_text(text): Type text into focused element
+- keypress(keys): Press keyboard keys (e.g., "Enter", "Tab")
+- scroll(x, y, scroll_x, scroll_y): Scroll at position
+- goto(url): Navigate to URL
+- back(): Go back in history
+- forward(): Go forward in history
+- wait(time_ms): Wait for specified milliseconds
+- screenshot(): Capture current page state
+
+After each action, you will receive a screenshot showing the current page state.
+Analyze the screenshot to determine your next action.
+
+Complete the given task efficiently using the minimum number of actions necessary."""
+
+    # Create placeholder dataset
+    # TODO: Replace with actual task dataset
+    dataset = Dataset.from_dict(
+        {
+            "prompt": [
+                "Navigate to google.com and search for 'weather today'",
+                "Go to wikipedia.org and find the main page",
+            ],
+            "answer": [
+                "weather search results",
+                "wikipedia main page",
+            ],
+        }
+    )
+
+    # Create parser (no special parsing needed for browser tasks)
+    parser = vf.Parser()
+
+    # Create rubrics
+    # 1. ToolRubric for basic tool usage metrics
+    # Note: tools will be set by BrowserEnv, so we pass empty list here
+    # and the BrowserEnv's tools will be used for metrics
+    tool_rubric = vf.ToolRubric(tools=[])
+
+    # 2. Custom browser rubric with efficiency and task completion rewards
+    browser_rubric = vf.Rubric(
+        funcs=[efficiency_reward, task_completion_reward],
+        weights=[efficiency_weight, task_completion_weight],
+        parser=parser,
+    )
+
+    # 3. Combine rubrics
+    rubric = vf.RubricGroup(rubrics=[tool_rubric, browser_rubric])
+
+    # Create and return the environment
+    browser_env = BrowserEnv(
+        server_url=server_url,
+        env=env,
+        browserbase_api_key=browserbase_api_key,
+        browserbase_project_id=browserbase_project_id,
+        viewport_width=viewport_width,
+        viewport_height=viewport_height,
+        max_turns=max_turns,
+        dataset=dataset,
+        system_prompt=system_prompt,
+        parser=parser,
+        rubric=rubric,
+        **kwargs,
+    )
+
+    return browser_env
+
diff --git a/environments/browser_env/cua-server/.env.example b/environments/browser_env/cua-server/.env.example
@@ -0,0 +1,20 @@
+# CUA Primitives API Server Configuration
+# Copy this file to .env and customize as needed
+
+# Server Configuration
+CUA_SERVER_PORT=3000
+CUA_SERVER_HOST=0.0.0.0
+
+# Browserbase Configuration (optional, for env: "BROWSERBASE")
+# BROWSERBASE_API_KEY=your_api_key_here
+# BROWSERBASE_PROJECT_ID=your_project_id_here
+
+# OpenAI API Key (if using OpenAI models with Stagehand)
+# OPENAI_API_KEY=your_openai_api_key_here
+
+# Anthropic API Key (if using Anthropic models with Stagehand)
+# ANTHROPIC_API_KEY=your_anthropic_api_key_here
+
+# Google API Key (if using Google models with Stagehand)
+# GOOGLE_GENERATIVE_AI_API_KEY=your_google_api_key_here
+
diff --git a/environments/browser_env/cua-server/.gitignore b/environments/browser_env/cua-server/.gitignore
@@ -0,0 +1,22 @@
+# Dependencies
+node_modules/
+
+# Environment
+.env
+
+# Logs
+*.log
+
+# pnpm
+pnpm-debug.log*
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# OS
+.DS_Store
+Thumbs.db
+