@@ -16,33 +16,121 @@ def parse_selector(selector: str) -> Dict[str, Any]:
1616 "role=textbox name~'email'"
1717 "clickable=true role=link"
1818 "role!=link"
19+ "importance>500"
20+ "text^='Sign'"
21+ "text$='in'"
1922 """
2023 query : Dict [str , Any ] = {}
2124
22- # Match patterns like: key=value, key~'value', key!="value"
23- # This regex matches: key, operator (=, ~, !=), and value (quoted or unquoted)
24- pattern = r'(\w+)([=~!]+)((?:\'[^\']+\'|\"[^\"]+\"|[^\s]+))'
25+ # Match patterns like: key=value, key~'value', key!="value", key>123, key^='prefix', key$='suffix'
26+ # Updated regex to support: =, !=, ~, ^=, $=, >, >=, <, <=
27+ # Supports dot notation: attr.id, css.color
28+ # Note: Handle ^= and $= first (before single char operators) to avoid regex conflicts
29+ # Pattern matches: key, operator (including ^= and $=), and value (quoted or unquoted)
30+ pattern = r'([\w.]+)(\^=|\$=|>=|<=|!=|[=~<>])((?:\'[^\']+\'|\"[^\"]+\"|[^\s]+))'
2531 matches = re .findall (pattern , selector )
2632
2733 for key , op , value in matches :
2834 # Remove quotes from value
2935 value = value .strip ().strip ('"\' ' )
3036
37+ # Handle numeric comparisons
38+ is_numeric = False
39+ try :
40+ numeric_value = float (value )
41+ is_numeric = True
42+ except ValueError :
43+ pass
44+
3145 if op == '!=' :
3246 if key == "role" :
3347 query ["role_exclude" ] = value
3448 elif key == "clickable" :
3549 query ["clickable" ] = False
50+ elif key == "visible" :
51+ query ["visible" ] = False
3652 elif op == '~' :
53+ # Substring match (case-insensitive)
3754 if key == "text" or key == "name" :
3855 query ["text_contains" ] = value
56+ elif op == '^=' :
57+ # Prefix match
58+ if key == "text" or key == "name" :
59+ query ["text_prefix" ] = value
60+ elif op == '$=' :
61+ # Suffix match
62+ if key == "text" or key == "name" :
63+ query ["text_suffix" ] = value
64+ elif op == '>' :
65+ # Greater than
66+ if is_numeric :
67+ if key == "importance" :
68+ query ["importance_min" ] = numeric_value + 0.0001 # Exclusive
69+ elif key .startswith ("bbox." ):
70+ query [f"{ key } _min" ] = numeric_value + 0.0001
71+ elif key == "z_index" :
72+ query ["z_index_min" ] = numeric_value + 0.0001
73+ elif key .startswith ("attr." ) or key .startswith ("css." ):
74+ query [f"{ key } _gt" ] = value
75+ elif op == '>=' :
76+ # Greater than or equal
77+ if is_numeric :
78+ if key == "importance" :
79+ query ["importance_min" ] = numeric_value
80+ elif key .startswith ("bbox." ):
81+ query [f"{ key } _min" ] = numeric_value
82+ elif key == "z_index" :
83+ query ["z_index_min" ] = numeric_value
84+ elif key .startswith ("attr." ) or key .startswith ("css." ):
85+ query [f"{ key } _gte" ] = value
86+ elif op == '<' :
87+ # Less than
88+ if is_numeric :
89+ if key == "importance" :
90+ query ["importance_max" ] = numeric_value - 0.0001 # Exclusive
91+ elif key .startswith ("bbox." ):
92+ query [f"{ key } _max" ] = numeric_value - 0.0001
93+ elif key == "z_index" :
94+ query ["z_index_max" ] = numeric_value - 0.0001
95+ elif key .startswith ("attr." ) or key .startswith ("css." ):
96+ query [f"{ key } _lt" ] = value
97+ elif op == '<=' :
98+ # Less than or equal
99+ if is_numeric :
100+ if key == "importance" :
101+ query ["importance_max" ] = numeric_value
102+ elif key .startswith ("bbox." ):
103+ query [f"{ key } _max" ] = numeric_value
104+ elif key == "z_index" :
105+ query ["z_index_max" ] = numeric_value
106+ elif key .startswith ("attr." ) or key .startswith ("css." ):
107+ query [f"{ key } _lte" ] = value
39108 elif op == '=' :
109+ # Exact match
40110 if key == "role" :
41111 query ["role" ] = value
42112 elif key == "clickable" :
43113 query ["clickable" ] = value .lower () == "true"
114+ elif key == "visible" :
115+ query ["visible" ] = value .lower () == "true"
116+ elif key == "tag" :
117+ query ["tag" ] = value
44118 elif key == "name" or key == "text" :
45119 query ["text" ] = value
120+ elif key == "importance" and is_numeric :
121+ query ["importance" ] = numeric_value
122+ elif key .startswith ("attr." ):
123+ # Dot notation for attributes: attr.id="submit-btn"
124+ attr_key = key [5 :] # Remove "attr." prefix
125+ if "attr" not in query :
126+ query ["attr" ] = {}
127+ query ["attr" ][attr_key ] = value
128+ elif key .startswith ("css." ):
129+ # Dot notation for CSS: css.color="red"
130+ css_key = key [4 :] # Remove "css." prefix
131+ if "css" not in query :
132+ query ["css" ] = {}
133+ query ["css" ][css_key ] = value
46134
47135 return query
48136
@@ -65,6 +153,18 @@ def match_element(element: Element, query: Dict[str, Any]) -> bool:
65153 if element .visual_cues .is_clickable != query ["clickable" ]:
66154 return False
67155
156+ # Visible (using in_viewport and !is_occluded)
157+ if "visible" in query :
158+ is_visible = element .in_viewport and not element .is_occluded
159+ if is_visible != query ["visible" ]:
160+ return False
161+
162+ # Tag (not yet in Element model, but prepare for future)
163+ if "tag" in query :
164+ # For now, this will always fail since tag is not in Element model
165+ # This is a placeholder for future implementation
166+ pass
167+
68168 # Text exact match
69169 if "text" in query :
70170 if not element .text or element .text != query ["text" ]:
@@ -77,6 +177,87 @@ def match_element(element: Element, query: Dict[str, Any]) -> bool:
77177 if query ["text_contains" ].lower () not in element .text .lower ():
78178 return False
79179
180+ # Text prefix match
181+ if "text_prefix" in query :
182+ if not element .text :
183+ return False
184+ if not element .text .lower ().startswith (query ["text_prefix" ].lower ()):
185+ return False
186+
187+ # Text suffix match
188+ if "text_suffix" in query :
189+ if not element .text :
190+ return False
191+ if not element .text .lower ().endswith (query ["text_suffix" ].lower ()):
192+ return False
193+
194+ # Importance filtering
195+ if "importance" in query :
196+ if element .importance != query ["importance" ]:
197+ return False
198+ if "importance_min" in query :
199+ if element .importance < query ["importance_min" ]:
200+ return False
201+ if "importance_max" in query :
202+ if element .importance > query ["importance_max" ]:
203+ return False
204+
205+ # BBox filtering (spatial)
206+ if "bbox.x_min" in query :
207+ if element .bbox .x < query ["bbox.x_min" ]:
208+ return False
209+ if "bbox.x_max" in query :
210+ if element .bbox .x > query ["bbox.x_max" ]:
211+ return False
212+ if "bbox.y_min" in query :
213+ if element .bbox .y < query ["bbox.y_min" ]:
214+ return False
215+ if "bbox.y_max" in query :
216+ if element .bbox .y > query ["bbox.y_max" ]:
217+ return False
218+ if "bbox.width_min" in query :
219+ if element .bbox .width < query ["bbox.width_min" ]:
220+ return False
221+ if "bbox.width_max" in query :
222+ if element .bbox .width > query ["bbox.width_max" ]:
223+ return False
224+ if "bbox.height_min" in query :
225+ if element .bbox .height < query ["bbox.height_min" ]:
226+ return False
227+ if "bbox.height_max" in query :
228+ if element .bbox .height > query ["bbox.height_max" ]:
229+ return False
230+
231+ # Z-index filtering
232+ if "z_index_min" in query :
233+ if element .z_index < query ["z_index_min" ]:
234+ return False
235+ if "z_index_max" in query :
236+ if element .z_index > query ["z_index_max" ]:
237+ return False
238+
239+ # In viewport filtering
240+ if "in_viewport" in query :
241+ if element .in_viewport != query ["in_viewport" ]:
242+ return False
243+
244+ # Occlusion filtering
245+ if "is_occluded" in query :
246+ if element .is_occluded != query ["is_occluded" ]:
247+ return False
248+
249+ # Attribute filtering (dot notation: attr.id="submit-btn")
250+ if "attr" in query :
251+ # This requires DOM access, which is not available in the Element model
252+ # This is a placeholder for future implementation when we add DOM access
253+ pass
254+
255+ # CSS property filtering (dot notation: css.color="red")
256+ if "css" in query :
257+ # This requires DOM access, which is not available in the Element model
258+ # This is a placeholder for future implementation when we add DOM access
259+ pass
260+
80261 return True
81262
82263
0 commit comments