diff --git a/sentience/tracer_factory.py b/sentience/tracer_factory.py index 0ec2f3b..142e897 100644 --- a/sentience/tracer_factory.py +++ b/sentience/tracer_factory.py @@ -1,5 +1,3 @@ -from typing import Optional - """ Tracer factory with automatic tier detection. @@ -10,7 +8,8 @@ import os import uuid from pathlib import Path -from typing import Any +from typing import Any, Optional +from collections.abc import Callable import requests @@ -31,6 +30,7 @@ def create_tracer( agent_type: str | None = None, llm_model: str | None = None, start_url: str | None = None, + screenshot_processor: Callable[[str], str] | None = None, ) -> Tracer: """ Create tracer with automatic tier detection. @@ -55,6 +55,9 @@ def create_tracer( agent_type: Type of agent running (e.g., "SentienceAgent", "CustomAgent") llm_model: LLM model used (e.g., "gpt-4-turbo", "claude-3-5-sonnet") start_url: Starting URL of the agent run (e.g., "https://amazon.com") + screenshot_processor: Optional function to process screenshots before upload. + Takes base64 string, returns processed base64 string. + Useful for PII redaction or custom image processing. Returns: Tracer configured with appropriate sink @@ -71,6 +74,17 @@ def create_tracer( ... ) >>> # Returns: Tracer with CloudTraceSink >>> + >>> # With screenshot processor for PII redaction + >>> def redact_pii(screenshot_base64: str) -> str: + ... # Your custom redaction logic + ... return redacted_screenshot + >>> + >>> tracer = create_tracer( + ... api_key="sk_pro_xyz", + ... screenshot_processor=redact_pii + ... ) + >>> # Screenshots will be processed before upload + >>> >>> # Free tier user >>> tracer = create_tracer(run_id="demo") >>> # Returns: Tracer with JsonlTraceSink (local-only) @@ -133,6 +147,7 @@ def create_tracer( api_url=api_url, logger=logger, ), + screenshot_processor=screenshot_processor, ) else: print("⚠️ [Sentience] Cloud init response missing upload_url") @@ -191,7 +206,11 @@ def create_tracer( local_path = traces_dir / f"{run_id}.jsonl" print(f"💾 [Sentience] Local tracing: {local_path}") - return Tracer(run_id=run_id, sink=JsonlTraceSink(str(local_path))) + return Tracer( + run_id=run_id, + sink=JsonlTraceSink(str(local_path)), + screenshot_processor=screenshot_processor, + ) def _recover_orphaned_traces(api_key: str, api_url: str = SENTIENCE_API_URL) -> None: diff --git a/sentience/tracing.py b/sentience/tracing.py index 0a5fe8b..03e2ccc 100644 --- a/sentience/tracing.py +++ b/sentience/tracing.py @@ -10,6 +10,7 @@ from datetime import datetime from pathlib import Path from typing import Any, Optional +from collections.abc import Callable from .models import TraceStats from .trace_file_manager import TraceFileManager @@ -163,10 +164,36 @@ class Tracer: Manages sequence numbers and provides convenient methods for emitting events. Tracks execution statistics and final status for trace completion. + + Args: + run_id: Unique identifier for this trace run + sink: TraceSink implementation for writing events + screenshot_processor: Optional function to process screenshots before emission. + Takes base64 string, returns processed base64 string. + Useful for PII redaction or custom image processing. + + Example: + >>> from sentience import Tracer, JsonlTraceSink + >>> + >>> # Basic usage + >>> sink = JsonlTraceSink("trace.jsonl") + >>> tracer = Tracer(run_id="abc123", sink=sink) + >>> + >>> # With screenshot processor for PII redaction + >>> def redact_pii(screenshot_base64: str) -> str: + ... # Your custom redaction logic + ... return redacted_screenshot + >>> + >>> tracer = Tracer( + ... run_id="abc123", + ... sink=sink, + ... screenshot_processor=redact_pii + ... ) """ run_id: str sink: TraceSink + screenshot_processor: Callable[[str], str] | None = None seq: int = field(default=0, init=False) # Stats tracking total_steps: int = field(default=0, init=False) @@ -196,6 +223,11 @@ def emit( self.seq += 1 self.total_events += 1 + # Apply screenshot processor if configured and screenshot is present + if self.screenshot_processor and "screenshot_base64" in data: + data = data.copy() # Don't modify the original dict + data["screenshot_base64"] = self.screenshot_processor(data["screenshot_base64"]) + # Generate timestamps ts_ms = int(time.time() * 1000) ts = time.strftime("%Y-%m-%dT%H:%M:%S.000Z", time.gmtime())