Skip to content

Commit 28486e3

Browse files
authored
Merge pull request #63 from SentienceAPI/cloud_sync
cloud tracing support
2 parents 10416c2 + 0805ab9 commit 28486e3

27 files changed

+1358
-170
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ ENV/
4040
htmlcov/
4141
.tox/
4242

43+
# Traces (runtime and test-generated)
44+
traces/
45+
4346
# Jupyter
4447
.ipynb_checkpoints
4548

examples/click_rect_demo.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,12 @@ def main():
3737
print(" Clicking at center of element's bbox...")
3838
result = click_rect(
3939
browser,
40-
{"x": link.bbox.x, "y": link.bbox.y, "w": link.bbox.width, "h": link.bbox.height},
40+
{
41+
"x": link.bbox.x,
42+
"y": link.bbox.y,
43+
"w": link.bbox.width,
44+
"h": link.bbox.height,
45+
},
4146
)
4247
print(f" Result: success={result.success}, outcome={result.outcome}")
4348
print(f" URL changed: {result.url_changed}\n")

examples/cloud_tracing_agent.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
"""
2+
Example: Agent with Cloud Tracing
3+
4+
Demonstrates how to use cloud tracing with SentienceAgent to upload traces
5+
and screenshots to cloud storage for remote viewing and analysis.
6+
7+
Requirements:
8+
- Pro or Enterprise tier API key (SENTIENCE_API_KEY)
9+
- OpenAI API key (OPENAI_API_KEY) for LLM
10+
11+
Usage:
12+
python examples/cloud_tracing_agent.py
13+
"""
14+
15+
import os
16+
17+
from sentience import SentienceAgent, SentienceBrowser
18+
from sentience.agent_config import AgentConfig
19+
from sentience.llm_provider import OpenAIProvider
20+
from sentience.tracer_factory import create_tracer
21+
22+
23+
def main():
24+
# Get API keys from environment
25+
sentience_key = os.environ.get("SENTIENCE_API_KEY")
26+
openai_key = os.environ.get("OPENAI_API_KEY")
27+
28+
if not sentience_key:
29+
print("❌ Error: SENTIENCE_API_KEY not set")
30+
print(" Cloud tracing requires Pro or Enterprise tier")
31+
print(" Get your API key at: https://sentience.studio")
32+
return
33+
34+
if not openai_key:
35+
print("❌ Error: OPENAI_API_KEY not set")
36+
return
37+
38+
print("🚀 Starting Agent with Cloud Tracing Demo\n")
39+
40+
# 1. Create tracer with automatic tier detection
41+
# If api_key is Pro/Enterprise, uses CloudTraceSink
42+
# If api_key is missing/invalid, falls back to local JsonlTraceSink
43+
run_id = "cloud-tracing-demo"
44+
tracer = create_tracer(api_key=sentience_key, run_id=run_id)
45+
46+
print(f"🆔 Run ID: {run_id}\n")
47+
48+
# 2. Configure agent with screenshot capture
49+
config = AgentConfig(
50+
snapshot_limit=50,
51+
capture_screenshots=True, # Enable screenshot capture
52+
screenshot_format="jpeg", # JPEG for smaller file size
53+
screenshot_quality=80, # 80% quality (good balance)
54+
)
55+
56+
# 3. Create browser and LLM
57+
browser = SentienceBrowser(api_key=sentience_key, headless=False)
58+
llm = OpenAIProvider(api_key=openai_key, model="gpt-4o-mini")
59+
60+
# 4. Create agent with tracer
61+
agent = SentienceAgent(browser, llm, tracer=tracer, config=config)
62+
63+
try:
64+
# 5. Navigate and execute agent actions
65+
print("🌐 Navigating to Google...\n")
66+
browser.start()
67+
browser.page.goto("https://www.google.com")
68+
browser.page.wait_for_load_state("networkidle")
69+
70+
# All actions are automatically traced!
71+
print("📝 Executing agent actions (all automatically traced)...\n")
72+
agent.act("Click the search box")
73+
agent.act("Type 'Sentience AI agent SDK' into the search field")
74+
agent.act("Press Enter key")
75+
76+
# Wait for results
77+
import time
78+
79+
time.sleep(2)
80+
81+
agent.act("Click the first non-ad search result")
82+
83+
print("\n✅ Agent execution complete!")
84+
85+
# 6. Get token usage stats
86+
stats = agent.get_token_stats()
87+
print("\n📊 Token Usage:")
88+
print(f" Total tokens: {stats.total_tokens}")
89+
print(f" Prompt tokens: {stats.total_prompt_tokens}")
90+
print(f" Completion tokens: {stats.total_completion_tokens}")
91+
92+
except Exception as e:
93+
print(f"\n❌ Error during execution: {e}")
94+
raise
95+
96+
finally:
97+
# 7. Close tracer (uploads to cloud)
98+
print("\n📤 Uploading trace to cloud...")
99+
try:
100+
tracer.close(blocking=True) # Wait for upload to complete
101+
print("✅ Trace uploaded successfully!")
102+
print(f" View at: https://studio.sentienceapi.com (run_id: {run_id})")
103+
except Exception as e:
104+
print(f"⚠️ Upload failed: {e}")
105+
print(f" Trace preserved locally at: ~/.sentience/traces/pending/{run_id}.jsonl")
106+
107+
browser.close()
108+
109+
110+
if __name__ == "__main__":
111+
main()

examples/test_local_llm_agent.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,10 @@ def test_local_llm_basic():
6565
user_prompt = "What is the next step to achieve the goal?"
6666

6767
response = llm.generate(
68-
system_prompt=system_prompt, user_prompt=user_prompt, max_new_tokens=20, temperature=0.0
68+
system_prompt=system_prompt,
69+
user_prompt=user_prompt,
70+
max_new_tokens=20,
71+
temperature=0.0,
6972
)
7073

7174
print(f"Agent Response: {response.content}")

sentience/__init__.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
# Agent Layer (Phase 1 & 2)
1010
from .base_agent import BaseAgent
1111
from .browser import SentienceBrowser
12+
13+
# Tracing (v0.12.0+)
14+
from .cloud_tracing import CloudTraceSink
1215
from .conversational_agent import ConversationalAgent
1316
from .expect import expect
1417

@@ -43,8 +46,7 @@
4346
from .recorder import Recorder, Trace, TraceStep, record
4447
from .screenshot import screenshot
4548
from .snapshot import snapshot
46-
47-
# Tracing (v0.12.0+)
49+
from .tracer_factory import SENTIENCE_API_URL, create_tracer
4850
from .tracing import JsonlTraceSink, TraceEvent, Tracer, TraceSink
4951

5052
# Utilities (v0.12.0+)
@@ -107,7 +109,10 @@
107109
"Tracer",
108110
"TraceSink",
109111
"JsonlTraceSink",
112+
"CloudTraceSink",
110113
"TraceEvent",
114+
"create_tracer",
115+
"SENTIENCE_API_URL",
111116
# Utilities (v0.12.0+)
112117
"canonical_snapshot_strict",
113118
"canonical_snapshot_loose",

sentience/actions.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,13 @@
33
"""
44

55
import time
6-
from typing import Any, Dict, Optional
76

87
from .browser import SentienceBrowser
98
from .models import ActionResult, BBox, Snapshot
109
from .snapshot import snapshot
1110

1211

13-
def click(
12+
def click( # noqa: C901
1413
browser: SentienceBrowser,
1514
element_id: int,
1615
use_mouse: bool = True,
@@ -141,7 +140,10 @@ def click(
141140
error=(
142141
None
143142
if success
144-
else {"code": "click_failed", "reason": "Element not found or not clickable"}
143+
else {
144+
"code": "click_failed",
145+
"reason": "Element not found or not clickable",
146+
}
145147
),
146148
)
147149

@@ -371,7 +373,10 @@ def click_rect(
371373
success=False,
372374
duration_ms=0,
373375
outcome="error",
374-
error={"code": "invalid_rect", "reason": "Rectangle width and height must be positive"},
376+
error={
377+
"code": "invalid_rect",
378+
"reason": "Rectangle width and height must be positive",
379+
},
375380
)
376381

377382
start_time = time.time()
@@ -426,6 +431,9 @@ def click_rect(
426431
error=(
427432
None
428433
if success
429-
else {"code": "click_failed", "reason": error_msg if not success else "Click failed"}
434+
else {
435+
"code": "click_failed",
436+
"reason": error_msg if not success else "Click failed",
437+
}
430438
),
431439
)

sentience/agent.py

Lines changed: 62 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import re
77
import time
8-
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
8+
from typing import TYPE_CHECKING, Any, Optional
99

1010
from .actions import click, press, type_text
1111
from .base_agent import BaseAgent
@@ -93,8 +93,11 @@ def __init__(
9393
# Step counter for tracing
9494
self._step_count = 0
9595

96-
def act(
97-
self, goal: str, max_retries: int = 2, snapshot_options: SnapshotOptions | None = None
96+
def act( # noqa: C901
97+
self,
98+
goal: str,
99+
max_retries: int = 2,
100+
snapshot_options: SnapshotOptions | None = None,
98101
) -> AgentActionResult:
99102
"""
100103
Execute a high-level goal using observe → think → act loop
@@ -116,9 +119,9 @@ def act(
116119
42
117120
"""
118121
if self.verbose:
119-
print(f"\n{'='*70}")
122+
print(f"\n{'=' * 70}")
120123
print(f"🤖 Agent Goal: {goal}")
121-
print(f"{'='*70}")
124+
print(f"{'=' * 70}")
122125

123126
# Generate step ID for tracing
124127
self._step_count += 1
@@ -234,7 +237,7 @@ def act(
234237
self._track_tokens(goal, llm_response)
235238

236239
# Parse action from LLM response
237-
action_str = llm_response.content.strip()
240+
action_str = self._extract_action_from_response(llm_response.content)
238241

239242
# 4. EXECUTE: Parse and run action
240243
result_dict = self._execute_action(action_str, filtered_snap)
@@ -392,6 +395,34 @@ def _build_context(self, snap: Snapshot, goal: str) -> str:
392395

393396
return "\n".join(lines)
394397

398+
def _extract_action_from_response(self, response: str) -> str:
399+
"""
400+
Extract action command from LLM response, handling cases where
401+
the LLM adds extra explanation despite instructions.
402+
403+
Args:
404+
response: Raw LLM response text
405+
406+
Returns:
407+
Cleaned action command string
408+
"""
409+
import re
410+
411+
# Remove markdown code blocks if present
412+
response = re.sub(r"```[\w]*\n?", "", response)
413+
response = response.strip()
414+
415+
# Try to find action patterns in the response
416+
# Pattern matches: CLICK(123), TYPE(123, "text"), PRESS("key"), FINISH()
417+
action_pattern = r'(CLICK\s*\(\s*\d+\s*\)|TYPE\s*\(\s*\d+\s*,\s*["\'].*?["\']\s*\)|PRESS\s*\(\s*["\'].*?["\']\s*\)|FINISH\s*\(\s*\))'
418+
419+
match = re.search(action_pattern, response, re.IGNORECASE)
420+
if match:
421+
return match.group(1)
422+
423+
# If no pattern match, return the original response (will likely fail parsing)
424+
return response
425+
395426
def _query_llm(self, dom_context: str, goal: str) -> LLMResponse:
396427
"""
397428
Query LLM with standardized prompt template
@@ -415,23 +446,30 @@ def _query_llm(self, dom_context: str, goal: str) -> LLMResponse:
415446
- {{CLICKABLE}}: Element is clickable
416447
- {{color:X}}: Background color name
417448
418-
RESPONSE FORMAT:
419-
Return ONLY the function call, no explanation or markdown.
420-
421-
Available actions:
449+
CRITICAL RESPONSE FORMAT:
450+
You MUST respond with ONLY ONE of these exact action formats:
422451
- CLICK(id) - Click element by ID
423452
- TYPE(id, "text") - Type text into element
424453
- PRESS("key") - Press keyboard key (Enter, Escape, Tab, ArrowDown, etc)
425454
- FINISH() - Task complete
426455
427-
Examples:
428-
- CLICK(42)
429-
- TYPE(15, "magic mouse")
430-
- PRESS("Enter")
431-
- FINISH()
456+
DO NOT include any explanation, reasoning, or natural language.
457+
DO NOT use markdown formatting or code blocks.
458+
DO NOT say "The next step is..." or anything similar.
459+
460+
CORRECT Examples:
461+
CLICK(42)
462+
TYPE(15, "magic mouse")
463+
PRESS("Enter")
464+
FINISH()
465+
466+
INCORRECT Examples (DO NOT DO THIS):
467+
"The next step is to click..."
468+
"I will type..."
469+
```CLICK(42)```
432470
"""
433471

434-
user_prompt = "What is the next step to achieve the goal?"
472+
user_prompt = "Return the single action command:"
435473

436474
return self.llm.generate(system_prompt, user_prompt, temperature=0.0)
437475

@@ -460,7 +498,9 @@ def _execute_action(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
460498

461499
# Parse TYPE(42, "hello world")
462500
elif match := re.match(
463-
r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)', action_str, re.IGNORECASE
501+
r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
502+
action_str,
503+
re.IGNORECASE,
464504
):
465505
element_id = int(match.group(1))
466506
text = match.group(2)
@@ -486,7 +526,11 @@ def _execute_action(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
486526

487527
# Parse FINISH()
488528
elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
489-
return {"success": True, "action": "finish", "message": "Task marked as complete"}
529+
return {
530+
"success": True,
531+
"action": "finish",
532+
"message": "Task marked as complete",
533+
}
490534

491535
else:
492536
raise ValueError(

sentience/cli.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,9 @@ def main():
104104
"--snapshots", action="store_true", help="Capture snapshots at each step"
105105
)
106106
record_parser.add_argument(
107-
"--mask", action="append", help="Pattern to mask in recorded text (e.g., password)"
107+
"--mask",
108+
action="append",
109+
help="Pattern to mask in recorded text (e.g., password)",
108110
)
109111
record_parser.set_defaults(func=cmd_record)
110112

0 commit comments

Comments
 (0)