Skip to content

Commit 1e794e5

Browse files
author
Sentience Dev
committed
Merge pull request #5 from SentienceAPI/phase3_dsl
Phase 3: DSL query completed
2 parents 8fb4c00 + 84f8fbe commit 1e794e5

8 files changed

+367
-4
lines changed

sentience/query.py

Lines changed: 184 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,33 +16,121 @@ def parse_selector(selector: str) -> Dict[str, Any]:
1616
"role=textbox name~'email'"
1717
"clickable=true role=link"
1818
"role!=link"
19+
"importance>500"
20+
"text^='Sign'"
21+
"text$='in'"
1922
"""
2023
query: Dict[str, Any] = {}
2124

22-
# Match patterns like: key=value, key~'value', key!="value"
23-
# This regex matches: key, operator (=, ~, !=), and value (quoted or unquoted)
24-
pattern = r'(\w+)([=~!]+)((?:\'[^\']+\'|\"[^\"]+\"|[^\s]+))'
25+
# Match patterns like: key=value, key~'value', key!="value", key>123, key^='prefix', key$='suffix'
26+
# Updated regex to support: =, !=, ~, ^=, $=, >, >=, <, <=
27+
# Supports dot notation: attr.id, css.color
28+
# Note: Handle ^= and $= first (before single char operators) to avoid regex conflicts
29+
# Pattern matches: key, operator (including ^= and $=), and value (quoted or unquoted)
30+
pattern = r'([\w.]+)(\^=|\$=|>=|<=|!=|[=~<>])((?:\'[^\']+\'|\"[^\"]+\"|[^\s]+))'
2531
matches = re.findall(pattern, selector)
2632

2733
for key, op, value in matches:
2834
# Remove quotes from value
2935
value = value.strip().strip('"\'')
3036

37+
# Handle numeric comparisons
38+
is_numeric = False
39+
try:
40+
numeric_value = float(value)
41+
is_numeric = True
42+
except ValueError:
43+
pass
44+
3145
if op == '!=':
3246
if key == "role":
3347
query["role_exclude"] = value
3448
elif key == "clickable":
3549
query["clickable"] = False
50+
elif key == "visible":
51+
query["visible"] = False
3652
elif op == '~':
53+
# Substring match (case-insensitive)
3754
if key == "text" or key == "name":
3855
query["text_contains"] = value
56+
elif op == '^=':
57+
# Prefix match
58+
if key == "text" or key == "name":
59+
query["text_prefix"] = value
60+
elif op == '$=':
61+
# Suffix match
62+
if key == "text" or key == "name":
63+
query["text_suffix"] = value
64+
elif op == '>':
65+
# Greater than
66+
if is_numeric:
67+
if key == "importance":
68+
query["importance_min"] = numeric_value + 0.0001 # Exclusive
69+
elif key.startswith("bbox."):
70+
query[f"{key}_min"] = numeric_value + 0.0001
71+
elif key == "z_index":
72+
query["z_index_min"] = numeric_value + 0.0001
73+
elif key.startswith("attr.") or key.startswith("css."):
74+
query[f"{key}_gt"] = value
75+
elif op == '>=':
76+
# Greater than or equal
77+
if is_numeric:
78+
if key == "importance":
79+
query["importance_min"] = numeric_value
80+
elif key.startswith("bbox."):
81+
query[f"{key}_min"] = numeric_value
82+
elif key == "z_index":
83+
query["z_index_min"] = numeric_value
84+
elif key.startswith("attr.") or key.startswith("css."):
85+
query[f"{key}_gte"] = value
86+
elif op == '<':
87+
# Less than
88+
if is_numeric:
89+
if key == "importance":
90+
query["importance_max"] = numeric_value - 0.0001 # Exclusive
91+
elif key.startswith("bbox."):
92+
query[f"{key}_max"] = numeric_value - 0.0001
93+
elif key == "z_index":
94+
query["z_index_max"] = numeric_value - 0.0001
95+
elif key.startswith("attr.") or key.startswith("css."):
96+
query[f"{key}_lt"] = value
97+
elif op == '<=':
98+
# Less than or equal
99+
if is_numeric:
100+
if key == "importance":
101+
query["importance_max"] = numeric_value
102+
elif key.startswith("bbox."):
103+
query[f"{key}_max"] = numeric_value
104+
elif key == "z_index":
105+
query["z_index_max"] = numeric_value
106+
elif key.startswith("attr.") or key.startswith("css."):
107+
query[f"{key}_lte"] = value
39108
elif op == '=':
109+
# Exact match
40110
if key == "role":
41111
query["role"] = value
42112
elif key == "clickable":
43113
query["clickable"] = value.lower() == "true"
114+
elif key == "visible":
115+
query["visible"] = value.lower() == "true"
116+
elif key == "tag":
117+
query["tag"] = value
44118
elif key == "name" or key == "text":
45119
query["text"] = value
120+
elif key == "importance" and is_numeric:
121+
query["importance"] = numeric_value
122+
elif key.startswith("attr."):
123+
# Dot notation for attributes: attr.id="submit-btn"
124+
attr_key = key[5:] # Remove "attr." prefix
125+
if "attr" not in query:
126+
query["attr"] = {}
127+
query["attr"][attr_key] = value
128+
elif key.startswith("css."):
129+
# Dot notation for CSS: css.color="red"
130+
css_key = key[4:] # Remove "css." prefix
131+
if "css" not in query:
132+
query["css"] = {}
133+
query["css"][css_key] = value
46134

47135
return query
48136

@@ -65,6 +153,18 @@ def match_element(element: Element, query: Dict[str, Any]) -> bool:
65153
if element.visual_cues.is_clickable != query["clickable"]:
66154
return False
67155

156+
# Visible (using in_viewport and !is_occluded)
157+
if "visible" in query:
158+
is_visible = element.in_viewport and not element.is_occluded
159+
if is_visible != query["visible"]:
160+
return False
161+
162+
# Tag (not yet in Element model, but prepare for future)
163+
if "tag" in query:
164+
# For now, this will always fail since tag is not in Element model
165+
# This is a placeholder for future implementation
166+
pass
167+
68168
# Text exact match
69169
if "text" in query:
70170
if not element.text or element.text != query["text"]:
@@ -77,6 +177,87 @@ def match_element(element: Element, query: Dict[str, Any]) -> bool:
77177
if query["text_contains"].lower() not in element.text.lower():
78178
return False
79179

180+
# Text prefix match
181+
if "text_prefix" in query:
182+
if not element.text:
183+
return False
184+
if not element.text.lower().startswith(query["text_prefix"].lower()):
185+
return False
186+
187+
# Text suffix match
188+
if "text_suffix" in query:
189+
if not element.text:
190+
return False
191+
if not element.text.lower().endswith(query["text_suffix"].lower()):
192+
return False
193+
194+
# Importance filtering
195+
if "importance" in query:
196+
if element.importance != query["importance"]:
197+
return False
198+
if "importance_min" in query:
199+
if element.importance < query["importance_min"]:
200+
return False
201+
if "importance_max" in query:
202+
if element.importance > query["importance_max"]:
203+
return False
204+
205+
# BBox filtering (spatial)
206+
if "bbox.x_min" in query:
207+
if element.bbox.x < query["bbox.x_min"]:
208+
return False
209+
if "bbox.x_max" in query:
210+
if element.bbox.x > query["bbox.x_max"]:
211+
return False
212+
if "bbox.y_min" in query:
213+
if element.bbox.y < query["bbox.y_min"]:
214+
return False
215+
if "bbox.y_max" in query:
216+
if element.bbox.y > query["bbox.y_max"]:
217+
return False
218+
if "bbox.width_min" in query:
219+
if element.bbox.width < query["bbox.width_min"]:
220+
return False
221+
if "bbox.width_max" in query:
222+
if element.bbox.width > query["bbox.width_max"]:
223+
return False
224+
if "bbox.height_min" in query:
225+
if element.bbox.height < query["bbox.height_min"]:
226+
return False
227+
if "bbox.height_max" in query:
228+
if element.bbox.height > query["bbox.height_max"]:
229+
return False
230+
231+
# Z-index filtering
232+
if "z_index_min" in query:
233+
if element.z_index < query["z_index_min"]:
234+
return False
235+
if "z_index_max" in query:
236+
if element.z_index > query["z_index_max"]:
237+
return False
238+
239+
# In viewport filtering
240+
if "in_viewport" in query:
241+
if element.in_viewport != query["in_viewport"]:
242+
return False
243+
244+
# Occlusion filtering
245+
if "is_occluded" in query:
246+
if element.is_occluded != query["is_occluded"]:
247+
return False
248+
249+
# Attribute filtering (dot notation: attr.id="submit-btn")
250+
if "attr" in query:
251+
# This requires DOM access, which is not available in the Element model
252+
# This is a placeholder for future implementation when we add DOM access
253+
pass
254+
255+
# CSS property filtering (dot notation: css.color="red")
256+
if "css" in query:
257+
# This requires DOM access, which is not available in the Element model
258+
# This is a placeholder for future implementation when we add DOM access
259+
pass
260+
80261
return True
81262

82263

-180 Bytes
Binary file not shown.
-7.62 KB
Binary file not shown.
-16.6 KB
Binary file not shown.
-15.5 KB
Binary file not shown.
Binary file not shown.
-9.98 KB
Binary file not shown.

0 commit comments

Comments
 (0)