diff --git a/sentience/query.py b/sentience/query.py index 8ab2c32..b835ac5 100644 --- a/sentience/query.py +++ b/sentience/query.py @@ -16,33 +16,121 @@ def parse_selector(selector: str) -> Dict[str, Any]: "role=textbox name~'email'" "clickable=true role=link" "role!=link" + "importance>500" + "text^='Sign'" + "text$='in'" """ query: Dict[str, Any] = {} - # Match patterns like: key=value, key~'value', key!="value" - # This regex matches: key, operator (=, ~, !=), and value (quoted or unquoted) - pattern = r'(\w+)([=~!]+)((?:\'[^\']+\'|\"[^\"]+\"|[^\s]+))' + # Match patterns like: key=value, key~'value', key!="value", key>123, key^='prefix', key$='suffix' + # Updated regex to support: =, !=, ~, ^=, $=, >, >=, <, <= + # Supports dot notation: attr.id, css.color + # Note: Handle ^= and $= first (before single char operators) to avoid regex conflicts + # Pattern matches: key, operator (including ^= and $=), and value (quoted or unquoted) + pattern = r'([\w.]+)(\^=|\$=|>=|<=|!=|[=~<>])((?:\'[^\']+\'|\"[^\"]+\"|[^\s]+))' matches = re.findall(pattern, selector) for key, op, value in matches: # Remove quotes from value value = value.strip().strip('"\'') + # Handle numeric comparisons + is_numeric = False + try: + numeric_value = float(value) + is_numeric = True + except ValueError: + pass + if op == '!=': if key == "role": query["role_exclude"] = value elif key == "clickable": query["clickable"] = False + elif key == "visible": + query["visible"] = False elif op == '~': + # Substring match (case-insensitive) if key == "text" or key == "name": query["text_contains"] = value + elif op == '^=': + # Prefix match + if key == "text" or key == "name": + query["text_prefix"] = value + elif op == '$=': + # Suffix match + if key == "text" or key == "name": + query["text_suffix"] = value + elif op == '>': + # Greater than + if is_numeric: + if key == "importance": + query["importance_min"] = numeric_value + 0.0001 # Exclusive + elif key.startswith("bbox."): + query[f"{key}_min"] = numeric_value + 0.0001 + elif key == "z_index": + query["z_index_min"] = numeric_value + 0.0001 + elif key.startswith("attr.") or key.startswith("css."): + query[f"{key}_gt"] = value + elif op == '>=': + # Greater than or equal + if is_numeric: + if key == "importance": + query["importance_min"] = numeric_value + elif key.startswith("bbox."): + query[f"{key}_min"] = numeric_value + elif key == "z_index": + query["z_index_min"] = numeric_value + elif key.startswith("attr.") or key.startswith("css."): + query[f"{key}_gte"] = value + elif op == '<': + # Less than + if is_numeric: + if key == "importance": + query["importance_max"] = numeric_value - 0.0001 # Exclusive + elif key.startswith("bbox."): + query[f"{key}_max"] = numeric_value - 0.0001 + elif key == "z_index": + query["z_index_max"] = numeric_value - 0.0001 + elif key.startswith("attr.") or key.startswith("css."): + query[f"{key}_lt"] = value + elif op == '<=': + # Less than or equal + if is_numeric: + if key == "importance": + query["importance_max"] = numeric_value + elif key.startswith("bbox."): + query[f"{key}_max"] = numeric_value + elif key == "z_index": + query["z_index_max"] = numeric_value + elif key.startswith("attr.") or key.startswith("css."): + query[f"{key}_lte"] = value elif op == '=': + # Exact match if key == "role": query["role"] = value elif key == "clickable": query["clickable"] = value.lower() == "true" + elif key == "visible": + query["visible"] = value.lower() == "true" + elif key == "tag": + query["tag"] = value elif key == "name" or key == "text": query["text"] = value + elif key == "importance" and is_numeric: + query["importance"] = numeric_value + elif key.startswith("attr."): + # Dot notation for attributes: attr.id="submit-btn" + attr_key = key[5:] # Remove "attr." prefix + if "attr" not in query: + query["attr"] = {} + query["attr"][attr_key] = value + elif key.startswith("css."): + # Dot notation for CSS: css.color="red" + css_key = key[4:] # Remove "css." prefix + if "css" not in query: + query["css"] = {} + query["css"][css_key] = value return query @@ -65,6 +153,18 @@ def match_element(element: Element, query: Dict[str, Any]) -> bool: if element.visual_cues.is_clickable != query["clickable"]: return False + # Visible (using in_viewport and !is_occluded) + if "visible" in query: + is_visible = element.in_viewport and not element.is_occluded + if is_visible != query["visible"]: + return False + + # Tag (not yet in Element model, but prepare for future) + if "tag" in query: + # For now, this will always fail since tag is not in Element model + # This is a placeholder for future implementation + pass + # Text exact match if "text" in query: if not element.text or element.text != query["text"]: @@ -77,6 +177,87 @@ def match_element(element: Element, query: Dict[str, Any]) -> bool: if query["text_contains"].lower() not in element.text.lower(): return False + # Text prefix match + if "text_prefix" in query: + if not element.text: + return False + if not element.text.lower().startswith(query["text_prefix"].lower()): + return False + + # Text suffix match + if "text_suffix" in query: + if not element.text: + return False + if not element.text.lower().endswith(query["text_suffix"].lower()): + return False + + # Importance filtering + if "importance" in query: + if element.importance != query["importance"]: + return False + if "importance_min" in query: + if element.importance < query["importance_min"]: + return False + if "importance_max" in query: + if element.importance > query["importance_max"]: + return False + + # BBox filtering (spatial) + if "bbox.x_min" in query: + if element.bbox.x < query["bbox.x_min"]: + return False + if "bbox.x_max" in query: + if element.bbox.x > query["bbox.x_max"]: + return False + if "bbox.y_min" in query: + if element.bbox.y < query["bbox.y_min"]: + return False + if "bbox.y_max" in query: + if element.bbox.y > query["bbox.y_max"]: + return False + if "bbox.width_min" in query: + if element.bbox.width < query["bbox.width_min"]: + return False + if "bbox.width_max" in query: + if element.bbox.width > query["bbox.width_max"]: + return False + if "bbox.height_min" in query: + if element.bbox.height < query["bbox.height_min"]: + return False + if "bbox.height_max" in query: + if element.bbox.height > query["bbox.height_max"]: + return False + + # Z-index filtering + if "z_index_min" in query: + if element.z_index < query["z_index_min"]: + return False + if "z_index_max" in query: + if element.z_index > query["z_index_max"]: + return False + + # In viewport filtering + if "in_viewport" in query: + if element.in_viewport != query["in_viewport"]: + return False + + # Occlusion filtering + if "is_occluded" in query: + if element.is_occluded != query["is_occluded"]: + return False + + # Attribute filtering (dot notation: attr.id="submit-btn") + if "attr" in query: + # This requires DOM access, which is not available in the Element model + # This is a placeholder for future implementation when we add DOM access + pass + + # CSS property filtering (dot notation: css.color="red") + if "css" in query: + # This requires DOM access, which is not available in the Element model + # This is a placeholder for future implementation when we add DOM access + pass + return True diff --git a/tests/__pycache__/__init__.cpython-311.pyc b/tests/__pycache__/__init__.cpython-311.pyc deleted file mode 100644 index 052259e..0000000 Binary files a/tests/__pycache__/__init__.cpython-311.pyc and /dev/null differ diff --git a/tests/__pycache__/test_actions.cpython-311-pytest-7.4.4.pyc b/tests/__pycache__/test_actions.cpython-311-pytest-7.4.4.pyc deleted file mode 100644 index d019e02..0000000 Binary files a/tests/__pycache__/test_actions.cpython-311-pytest-7.4.4.pyc and /dev/null differ diff --git a/tests/__pycache__/test_query.cpython-311-pytest-7.4.4.pyc b/tests/__pycache__/test_query.cpython-311-pytest-7.4.4.pyc deleted file mode 100644 index b10d368..0000000 Binary files a/tests/__pycache__/test_query.cpython-311-pytest-7.4.4.pyc and /dev/null differ diff --git a/tests/__pycache__/test_snapshot.cpython-311-pytest-7.4.4.pyc b/tests/__pycache__/test_snapshot.cpython-311-pytest-7.4.4.pyc deleted file mode 100644 index 670cec1..0000000 Binary files a/tests/__pycache__/test_snapshot.cpython-311-pytest-7.4.4.pyc and /dev/null differ diff --git a/tests/__pycache__/test_spec_validation.cpython-311-pytest-7.4.4.pyc b/tests/__pycache__/test_spec_validation.cpython-311-pytest-7.4.4.pyc deleted file mode 100644 index b74b704..0000000 Binary files a/tests/__pycache__/test_spec_validation.cpython-311-pytest-7.4.4.pyc and /dev/null differ diff --git a/tests/__pycache__/test_wait.cpython-311-pytest-7.4.4.pyc b/tests/__pycache__/test_wait.cpython-311-pytest-7.4.4.pyc deleted file mode 100644 index 94bfc21..0000000 Binary files a/tests/__pycache__/test_wait.cpython-311-pytest-7.4.4.pyc and /dev/null differ diff --git a/tests/test_query.py b/tests/test_query.py index 18da363..b7b9545 100644 --- a/tests/test_query.py +++ b/tests/test_query.py @@ -2,7 +2,6 @@ Tests for query engine """ -import pytest from sentience import SentienceBrowser, snapshot, query, find from sentience.query import parse_selector, match_element from sentience.models import Element, BBox, VisualCues @@ -30,6 +29,39 @@ def test_parse_selector(): # Negation q = parse_selector("role!=link") assert q["role_exclude"] == "link" + + # New operators: prefix and suffix + q = parse_selector("text^='Sign'") + assert q["text_prefix"] == "Sign" + + q = parse_selector("text$='in'") + assert q["text_suffix"] == "in" + + # Numeric comparisons: importance + q = parse_selector("importance>500") + assert "importance_min" in q + assert q["importance_min"] > 500 + + q = parse_selector("importance>=500") + assert q["importance_min"] == 500 + + q = parse_selector("importance<1000") + assert "importance_max" in q + assert q["importance_max"] < 1000 + + q = parse_selector("importance<=1000") + assert q["importance_max"] == 1000 + + # Visible field + q = parse_selector("visible=true") + assert q["visible"] is True + + q = parse_selector("visible=false") + assert q["visible"] is False + + # Tag field (placeholder for future) + q = parse_selector("tag=button") + assert q["tag"] == "button" def test_match_element(): @@ -41,6 +73,9 @@ def test_match_element(): importance=100, bbox=BBox(x=0, y=0, width=100, height=40), visual_cues=VisualCues(is_primary=True, is_clickable=True), + in_viewport=True, + is_occluded=False, + z_index=10, ) # Role match @@ -51,9 +86,71 @@ def test_match_element(): assert match_element(element, {"text_contains": "Sign"}) is True assert match_element(element, {"text_contains": "Logout"}) is False + # Text prefix + assert match_element(element, {"text_prefix": "Sign"}) is True + assert match_element(element, {"text_prefix": "Login"}) is False + + # Text suffix + assert match_element(element, {"text_suffix": "In"}) is True + assert match_element(element, {"text_suffix": "Out"}) is False + # Clickable assert match_element(element, {"clickable": True}) is True assert match_element(element, {"clickable": False}) is False + + # Visible (using in_viewport and !is_occluded) + assert match_element(element, {"visible": True}) is True + element_occluded = Element( + id=2, + role="button", + text="Hidden", + importance=50, + bbox=BBox(x=0, y=0, width=100, height=40), + visual_cues=VisualCues(is_primary=False, is_clickable=True), + in_viewport=True, + is_occluded=True, + z_index=5, + ) + assert match_element(element_occluded, {"visible": True}) is False + assert match_element(element_occluded, {"visible": False}) is True + + # Importance filtering + assert match_element(element, {"importance_min": 50}) is True + assert match_element(element, {"importance_min": 150}) is False + assert match_element(element, {"importance_max": 150}) is True + assert match_element(element, {"importance_max": 50}) is False + + # BBox filtering + assert match_element(element, {"bbox.x_min": -10}) is True + assert match_element(element, {"bbox.x_min": 10}) is False + assert match_element(element, {"bbox.width_min": 50}) is True + assert match_element(element, {"bbox.width_min": 150}) is False + + # Z-index filtering + assert match_element(element, {"z_index_min": 5}) is True + assert match_element(element, {"z_index_min": 15}) is False + assert match_element(element, {"z_index_max": 15}) is True + assert match_element(element, {"z_index_max": 5}) is False + + # In viewport filtering + assert match_element(element, {"in_viewport": True}) is True + element_off_screen = Element( + id=3, + role="button", + text="Off Screen", + importance=50, + bbox=BBox(x=0, y=0, width=100, height=40), + visual_cues=VisualCues(is_primary=False, is_clickable=True), + in_viewport=False, + is_occluded=False, + z_index=5, + ) + assert match_element(element_off_screen, {"in_viewport": False}) is True + assert match_element(element_off_screen, {"in_viewport": True}) is False + + # Occlusion filtering + assert match_element(element, {"is_occluded": False}) is True + assert match_element(element_occluded, {"is_occluded": True}) is True def test_query_integration(): @@ -89,3 +186,88 @@ def test_find_integration(): assert link.role == "link" assert link.id >= 0 + +def test_query_advanced_operators(): + """Test advanced query operators""" + # Create test elements + elements = [ + Element( + id=1, + role="button", + text="Sign In", + importance=1000, + bbox=BBox(x=10, y=20, width=100, height=40), + visual_cues=VisualCues(is_primary=True, is_clickable=True), + in_viewport=True, + is_occluded=False, + z_index=10, + ), + Element( + id=2, + role="button", + text="Sign Out", + importance=500, + bbox=BBox(x=120, y=20, width=100, height=40), + visual_cues=VisualCues(is_primary=False, is_clickable=True), + in_viewport=True, + is_occluded=False, + z_index=5, + ), + Element( + id=3, + role="link", + text="More information", + importance=200, + bbox=BBox(x=10, y=70, width=150, height=20), + visual_cues=VisualCues(is_primary=False, is_clickable=True), + in_viewport=True, + is_occluded=False, + z_index=1, + ), + ] + + from sentience.models import Snapshot + snap = Snapshot( + status="success", + url="https://example.com", + elements=elements, + ) + + # Test importance filtering + high_importance = query(snap, "importance>500") + assert len(high_importance) == 1 + assert high_importance[0].id == 1 + + low_importance = query(snap, "importance<300") + assert len(low_importance) == 1 + assert low_importance[0].id == 3 + + # Test prefix matching + sign_prefix = query(snap, "text^='Sign'") + assert len(sign_prefix) == 2 + assert all("Sign" in el.text for el in sign_prefix) + + # Test suffix matching + in_suffix = query(snap, "text$='In'") + assert len(in_suffix) == 1 + assert in_suffix[0].text == "Sign In" + + # Test BBox filtering + right_side = query(snap, "bbox.x>100") + assert len(right_side) == 1 + assert right_side[0].id == 2 + + # Test combined queries + combined = query(snap, "role=button importance>500") + assert len(combined) == 1 + assert combined[0].id == 1 + + # Test visible filtering + visible = query(snap, "visible=true") + assert len(visible) == 3 # All are visible + + # Test z-index filtering + high_z = query(snap, "z_index>5") + assert len(high_z) == 1 + assert high_z[0].id == 1 +