diff --git a/sentience/element_filter.py b/sentience/element_filter.py index a6256c7..dcedd6d 100644 --- a/sentience/element_filter.py +++ b/sentience/element_filter.py @@ -57,7 +57,8 @@ def filter_by_importance( Returns: Top N elements sorted by importance score """ - elements = snapshot.elements + # Filter out REMOVED elements - they're not actionable and shouldn't be in LLM context + elements = [el for el in snapshot.elements if el.diff_status != "REMOVED"] # Elements are already sorted by importance in snapshot return elements[:max_elements] @@ -81,7 +82,8 @@ def filter_by_goal( Returns: Filtered list of elements sorted by boosted importance score """ - elements = snapshot.elements + # Filter out REMOVED elements - they're not actionable and shouldn't be in LLM context + elements = [el for el in snapshot.elements if el.diff_status != "REMOVED"] # If no goal provided, return all elements (up to limit) if not goal: diff --git a/sentience/llm_interaction_handler.py b/sentience/llm_interaction_handler.py index 008e155..60c9311 100644 --- a/sentience/llm_interaction_handler.py +++ b/sentience/llm_interaction_handler.py @@ -6,7 +6,6 @@ """ import re -from typing import Optional from .llm_provider import LLMProvider, LLMResponse from .models import Snapshot @@ -35,7 +34,7 @@ def build_context(self, snap: Snapshot, goal: str | None = None) -> str: """ Convert snapshot elements to token-efficient prompt string. - Format: [ID] "text" {cues} @ (x,y) (Imp:score) + Format: [ID] "text" {cues} @ position size:WxH importance:score [status] Args: snap: Snapshot object @@ -46,8 +45,11 @@ def build_context(self, snap: Snapshot, goal: str | None = None) -> str: """ lines = [] for el in snap.elements: + # Skip REMOVED elements - they're not actionable and shouldn't be in LLM context + if el.diff_status == "REMOVED": + continue # Extract visual cues - cues = [] + cues: list[str] = [] if el.visual_cues.is_primary: cues.append("PRIMARY") if el.visual_cues.is_clickable: @@ -55,15 +57,38 @@ def build_context(self, snap: Snapshot, goal: str | None = None) -> str: if el.visual_cues.background_color_name: cues.append(f"color:{el.visual_cues.background_color_name}") - # Format element line + # Format element line with improved readability + # Ensure cues is defined before using it in f-string cues_str = f" {{{','.join(cues)}}}" if cues else "" - text_preview = ( - (el.text[:50] + "...") if el.text and len(el.text) > 50 else (el.text or "") - ) - + + # Better text handling - show truncation indicator + text_preview = "" + if el.text: + if len(el.text) > 50: + text_preview = f'"{el.text[:50]}..."' + else: + text_preview = f'"{el.text}"' + + # Build position and size info + x, y = int(el.bbox.x), int(el.bbox.y) + width, height = int(el.bbox.width), int(el.bbox.height) + position_str = f"@ ({x},{y})" + size_str = f"size:{width}x{height}" + + # Build status indicators (only include if relevant) + status_parts = [] + if not el.in_viewport: + status_parts.append("not_in_viewport") + if el.is_occluded: + status_parts.append("occluded") + if el.diff_status: + status_parts.append(f"diff:{el.diff_status}") + status_str = f" [{','.join(status_parts)}]" if status_parts else "" + + # Format: [ID] "text" {cues} @ (x,y) size:WxH importance:score [status] lines.append( - f'[{el.id}] <{el.role}> "{text_preview}"{cues_str} ' - f"@ ({int(el.bbox.x)},{int(el.bbox.y)}) (Imp:{el.importance})" + f'[{el.id}] <{el.role}> {text_preview}{cues_str} ' + f"{position_str} {size_str} importance:{el.importance}{status_str}" ) return "\n".join(lines) @@ -87,14 +112,34 @@ def query_llm(self, dom_context: str, goal: str) -> LLMResponse: {dom_context} VISUAL CUES EXPLAINED: -- {{PRIMARY}}: Main call-to-action element on the page -- {{CLICKABLE}}: Element is clickable -- {{color:X}}: Background color name +After the text, you may see visual cues in curly braces like {{CLICKABLE}} or {{PRIMARY,CLICKABLE,color:white}}: +- PRIMARY: Main call-to-action element on the page +- CLICKABLE: Element is clickable/interactive +- color:X: Background color name (e.g., color:white, color:blue) +Multiple cues are comma-separated inside the braces: {{CLICKABLE,color:white}} + +ELEMENT FORMAT EXPLAINED: +Each element line follows this format: +[ID] "text" {{cues}} @ (x,y) size:WxH importance:score [status] + +Example: [346]