Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ ENV/
htmlcov/
.tox/

# Traces (runtime and test-generated)
traces/

# Jupyter
.ipynb_checkpoints

Expand Down
7 changes: 6 additions & 1 deletion examples/click_rect_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,12 @@ def main():
print(" Clicking at center of element's bbox...")
result = click_rect(
browser,
{"x": link.bbox.x, "y": link.bbox.y, "w": link.bbox.width, "h": link.bbox.height},
{
"x": link.bbox.x,
"y": link.bbox.y,
"w": link.bbox.width,
"h": link.bbox.height,
},
)
print(f" Result: success={result.success}, outcome={result.outcome}")
print(f" URL changed: {result.url_changed}\n")
Expand Down
111 changes: 111 additions & 0 deletions examples/cloud_tracing_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
"""
Example: Agent with Cloud Tracing

Demonstrates how to use cloud tracing with SentienceAgent to upload traces
and screenshots to cloud storage for remote viewing and analysis.

Requirements:
- Pro or Enterprise tier API key (SENTIENCE_API_KEY)
- OpenAI API key (OPENAI_API_KEY) for LLM

Usage:
python examples/cloud_tracing_agent.py
"""

import os

from sentience import SentienceAgent, SentienceBrowser
from sentience.agent_config import AgentConfig
from sentience.llm_provider import OpenAIProvider
from sentience.tracer_factory import create_tracer


def main():
# Get API keys from environment
sentience_key = os.environ.get("SENTIENCE_API_KEY")
openai_key = os.environ.get("OPENAI_API_KEY")

if not sentience_key:
print("❌ Error: SENTIENCE_API_KEY not set")
print(" Cloud tracing requires Pro or Enterprise tier")
print(" Get your API key at: https://sentience.studio")
return

if not openai_key:
print("❌ Error: OPENAI_API_KEY not set")
return

print("🚀 Starting Agent with Cloud Tracing Demo\n")

# 1. Create tracer with automatic tier detection
# If api_key is Pro/Enterprise, uses CloudTraceSink
# If api_key is missing/invalid, falls back to local JsonlTraceSink
run_id = "cloud-tracing-demo"
tracer = create_tracer(api_key=sentience_key, run_id=run_id)

print(f"🆔 Run ID: {run_id}\n")

# 2. Configure agent with screenshot capture
config = AgentConfig(
snapshot_limit=50,
capture_screenshots=True, # Enable screenshot capture
screenshot_format="jpeg", # JPEG for smaller file size
screenshot_quality=80, # 80% quality (good balance)
)

# 3. Create browser and LLM
browser = SentienceBrowser(api_key=sentience_key, headless=False)
llm = OpenAIProvider(api_key=openai_key, model="gpt-4o-mini")

# 4. Create agent with tracer
agent = SentienceAgent(browser, llm, tracer=tracer, config=config)

try:
# 5. Navigate and execute agent actions
print("🌐 Navigating to Google...\n")
browser.start()
browser.page.goto("https://www.google.com")
browser.page.wait_for_load_state("networkidle")

# All actions are automatically traced!
print("📝 Executing agent actions (all automatically traced)...\n")
agent.act("Click the search box")
agent.act("Type 'Sentience AI agent SDK' into the search field")
agent.act("Press Enter key")

# Wait for results
import time

time.sleep(2)

agent.act("Click the first non-ad search result")

print("\n✅ Agent execution complete!")

# 6. Get token usage stats
stats = agent.get_token_stats()
print("\n📊 Token Usage:")
print(f" Total tokens: {stats.total_tokens}")
print(f" Prompt tokens: {stats.total_prompt_tokens}")
print(f" Completion tokens: {stats.total_completion_tokens}")

except Exception as e:
print(f"\n❌ Error during execution: {e}")
raise

finally:
# 7. Close tracer (uploads to cloud)
print("\n📤 Uploading trace to cloud...")
try:
tracer.close(blocking=True) # Wait for upload to complete
print("✅ Trace uploaded successfully!")
print(f" View at: https://studio.sentienceapi.com (run_id: {run_id})")
except Exception as e:
print(f"⚠️ Upload failed: {e}")
print(f" Trace preserved locally at: ~/.sentience/traces/pending/{run_id}.jsonl")

browser.close()


if __name__ == "__main__":
main()
5 changes: 4 additions & 1 deletion examples/test_local_llm_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ def test_local_llm_basic():
user_prompt = "What is the next step to achieve the goal?"

response = llm.generate(
system_prompt=system_prompt, user_prompt=user_prompt, max_new_tokens=20, temperature=0.0
system_prompt=system_prompt,
user_prompt=user_prompt,
max_new_tokens=20,
temperature=0.0,
)

print(f"Agent Response: {response.content}")
Expand Down
9 changes: 7 additions & 2 deletions sentience/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
# Agent Layer (Phase 1 & 2)
from .base_agent import BaseAgent
from .browser import SentienceBrowser

# Tracing (v0.12.0+)
from .cloud_tracing import CloudTraceSink
from .conversational_agent import ConversationalAgent
from .expect import expect

Expand Down Expand Up @@ -43,8 +46,7 @@
from .recorder import Recorder, Trace, TraceStep, record
from .screenshot import screenshot
from .snapshot import snapshot

# Tracing (v0.12.0+)
from .tracer_factory import SENTIENCE_API_URL, create_tracer
from .tracing import JsonlTraceSink, TraceEvent, Tracer, TraceSink

# Utilities (v0.12.0+)
Expand Down Expand Up @@ -107,7 +109,10 @@
"Tracer",
"TraceSink",
"JsonlTraceSink",
"CloudTraceSink",
"TraceEvent",
"create_tracer",
"SENTIENCE_API_URL",
# Utilities (v0.12.0+)
"canonical_snapshot_strict",
"canonical_snapshot_loose",
Expand Down
18 changes: 13 additions & 5 deletions sentience/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,13 @@
"""

import time
from typing import Any, Dict, Optional

from .browser import SentienceBrowser
from .models import ActionResult, BBox, Snapshot
from .snapshot import snapshot


def click(
def click( # noqa: C901
browser: SentienceBrowser,
element_id: int,
use_mouse: bool = True,
Expand Down Expand Up @@ -141,7 +140,10 @@ def click(
error=(
None
if success
else {"code": "click_failed", "reason": "Element not found or not clickable"}
else {
"code": "click_failed",
"reason": "Element not found or not clickable",
}
),
)

Expand Down Expand Up @@ -371,7 +373,10 @@ def click_rect(
success=False,
duration_ms=0,
outcome="error",
error={"code": "invalid_rect", "reason": "Rectangle width and height must be positive"},
error={
"code": "invalid_rect",
"reason": "Rectangle width and height must be positive",
},
)

start_time = time.time()
Expand Down Expand Up @@ -426,6 +431,9 @@ def click_rect(
error=(
None
if success
else {"code": "click_failed", "reason": error_msg if not success else "Click failed"}
else {
"code": "click_failed",
"reason": error_msg if not success else "Click failed",
}
),
)
80 changes: 62 additions & 18 deletions sentience/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import re
import time
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from typing import TYPE_CHECKING, Any, Optional

from .actions import click, press, type_text
from .base_agent import BaseAgent
Expand Down Expand Up @@ -93,8 +93,11 @@ def __init__(
# Step counter for tracing
self._step_count = 0

def act(
self, goal: str, max_retries: int = 2, snapshot_options: SnapshotOptions | None = None
def act( # noqa: C901
self,
goal: str,
max_retries: int = 2,
snapshot_options: SnapshotOptions | None = None,
) -> AgentActionResult:
"""
Execute a high-level goal using observe → think → act loop
Expand All @@ -116,9 +119,9 @@ def act(
42
"""
if self.verbose:
print(f"\n{'='*70}")
print(f"\n{'=' * 70}")
print(f"🤖 Agent Goal: {goal}")
print(f"{'='*70}")
print(f"{'=' * 70}")

# Generate step ID for tracing
self._step_count += 1
Expand Down Expand Up @@ -234,7 +237,7 @@ def act(
self._track_tokens(goal, llm_response)

# Parse action from LLM response
action_str = llm_response.content.strip()
action_str = self._extract_action_from_response(llm_response.content)

# 4. EXECUTE: Parse and run action
result_dict = self._execute_action(action_str, filtered_snap)
Expand Down Expand Up @@ -392,6 +395,34 @@ def _build_context(self, snap: Snapshot, goal: str) -> str:

return "\n".join(lines)

def _extract_action_from_response(self, response: str) -> str:
"""
Extract action command from LLM response, handling cases where
the LLM adds extra explanation despite instructions.

Args:
response: Raw LLM response text

Returns:
Cleaned action command string
"""
import re

# Remove markdown code blocks if present
response = re.sub(r"```[\w]*\n?", "", response)
response = response.strip()

# Try to find action patterns in the response
# Pattern matches: CLICK(123), TYPE(123, "text"), PRESS("key"), FINISH()
action_pattern = r'(CLICK\s*\(\s*\d+\s*\)|TYPE\s*\(\s*\d+\s*,\s*["\'].*?["\']\s*\)|PRESS\s*\(\s*["\'].*?["\']\s*\)|FINISH\s*\(\s*\))'

match = re.search(action_pattern, response, re.IGNORECASE)
if match:
return match.group(1)

# If no pattern match, return the original response (will likely fail parsing)
return response

def _query_llm(self, dom_context: str, goal: str) -> LLMResponse:
"""
Query LLM with standardized prompt template
Expand All @@ -415,23 +446,30 @@ def _query_llm(self, dom_context: str, goal: str) -> LLMResponse:
- {{CLICKABLE}}: Element is clickable
- {{color:X}}: Background color name

RESPONSE FORMAT:
Return ONLY the function call, no explanation or markdown.

Available actions:
CRITICAL RESPONSE FORMAT:
You MUST respond with ONLY ONE of these exact action formats:
- CLICK(id) - Click element by ID
- TYPE(id, "text") - Type text into element
- PRESS("key") - Press keyboard key (Enter, Escape, Tab, ArrowDown, etc)
- FINISH() - Task complete

Examples:
- CLICK(42)
- TYPE(15, "magic mouse")
- PRESS("Enter")
- FINISH()
DO NOT include any explanation, reasoning, or natural language.
DO NOT use markdown formatting or code blocks.
DO NOT say "The next step is..." or anything similar.

CORRECT Examples:
CLICK(42)
TYPE(15, "magic mouse")
PRESS("Enter")
FINISH()

INCORRECT Examples (DO NOT DO THIS):
"The next step is to click..."
"I will type..."
```CLICK(42)```
"""

user_prompt = "What is the next step to achieve the goal?"
user_prompt = "Return the single action command:"

return self.llm.generate(system_prompt, user_prompt, temperature=0.0)

Expand Down Expand Up @@ -460,7 +498,9 @@ def _execute_action(self, action_str: str, snap: Snapshot) -> dict[str, Any]:

# Parse TYPE(42, "hello world")
elif match := re.match(
r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)', action_str, re.IGNORECASE
r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
action_str,
re.IGNORECASE,
):
element_id = int(match.group(1))
text = match.group(2)
Expand All @@ -486,7 +526,11 @@ def _execute_action(self, action_str: str, snap: Snapshot) -> dict[str, Any]:

# Parse FINISH()
elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
return {"success": True, "action": "finish", "message": "Task marked as complete"}
return {
"success": True,
"action": "finish",
"message": "Task marked as complete",
}

else:
raise ValueError(
Expand Down
4 changes: 3 additions & 1 deletion sentience/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,9 @@ def main():
"--snapshots", action="store_true", help="Capture snapshots at each step"
)
record_parser.add_argument(
"--mask", action="append", help="Pattern to mask in recorded text (e.g., password)"
"--mask",
action="append",
help="Pattern to mask in recorded text (e.g., password)",
)
record_parser.set_defaults(func=cmd_record)

Expand Down
Loading