Skip to content

Commit 2333c53

Browse files
committed
Added ability to parse nested character classes while using v flag.
1 parent de6f3b1 commit 2333c53

File tree

2 files changed

+114
-146
lines changed

2 files changed

+114
-146
lines changed

javascript/extractor/src/com/semmle/js/parser/RegExpParser.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,7 @@ private RegExpTerm parseAtomEscape(SourceLocation loc, boolean inCharClass) {
545545
}
546546

547547
private RegExpTerm parseCharacterClass() {
548+
if (flags != null && flags.contains("v")) return parseNestedCharacterClass();
548549
SourceLocation loc = new SourceLocation(pos());
549550
List<RegExpTerm> elements = new ArrayList<>();
550551

@@ -560,6 +561,28 @@ private RegExpTerm parseCharacterClass() {
560561
return this.finishTerm(new CharacterClass(loc, elements, inverted));
561562
}
562563

564+
// ECMA 2024 `v` flag allows nested character classes.
565+
private RegExpTerm parseNestedCharacterClass() {
566+
SourceLocation loc = new SourceLocation(pos());
567+
List<RegExpTerm> elements = new ArrayList<>();
568+
569+
this.match("[");
570+
boolean inverted = this.match("^");
571+
while (!this.match("]")) {
572+
if (this.atEOS()) {
573+
this.error(Error.EXPECTED_RBRACKET);
574+
break;
575+
}
576+
if (lookahead("[")) {
577+
elements.add(parseNestedCharacterClass());
578+
}
579+
else {
580+
elements.add(this.parseCharacterClassElement());
581+
}
582+
}
583+
return this.finishTerm(new CharacterClass(loc, elements, inverted));
584+
}
585+
563586
private static final List<String> escapeClasses = Arrays.asList("d", "D", "s", "S", "w", "W");
564587

565588
private RegExpTerm parseCharacterClassElement() {

javascript/extractor/tests/es2024/output/trap/regex_nested_character_class.js.trap

Lines changed: 91 additions & 146 deletions
Original file line numberDiff line numberDiff line change
@@ -86,181 +86,126 @@ enclosing_stmt(#20027,#20025)
8686
expr_containers(#20027,#20001)
8787
literals("/[[]]/v","/[[]]/v",#20027)
8888
#20028=*
89-
regexpterm(#20028,1,#20027,0,"[[]]")
89+
regexpterm(#20028,23,#20027,0,"[[]]")
9090
#20029=@"loc,{#10000},1,2,1,5"
9191
locations_default(#20029,#10000,1,2,1,5)
9292
hasLocation(#20028,#20029)
9393
#20030=*
94-
regexpterm(#20030,23,#20028,0,"[[]")
95-
#20031=@"loc,{#10000},1,2,1,4"
96-
locations_default(#20031,#10000,1,2,1,4)
94+
regexpterm(#20030,23,#20028,0,"[]")
95+
#20031=@"loc,{#10000},1,3,1,4"
96+
locations_default(#20031,#10000,1,3,1,4)
9797
hasLocation(#20030,#20031)
9898
#20032=*
99-
regexpterm(#20032,14,#20030,0,"[")
100-
#20033=@"loc,{#10000},1,3,1,3"
101-
locations_default(#20033,#10000,1,3,1,3)
102-
hasLocation(#20032,#20033)
103-
regexp_const_value(#20032,"[")
99+
stmts(#20032,2,#20001,1,"/[[a]]/v;")
100+
hasLocation(#20032,#20007)
101+
stmt_containers(#20032,#20001)
102+
#20033=*
103+
exprs(#20033,5,#20032,0,"/[[a]]/v")
104+
hasLocation(#20033,#20015)
105+
enclosing_stmt(#20033,#20032)
106+
expr_containers(#20033,#20001)
107+
literals("/[[a]]/v","/[[a]]/v",#20033)
104108
#20034=*
105-
regexpterm(#20034,14,#20028,1,"]")
106-
#20035=@"loc,{#10000},1,5,1,5"
107-
locations_default(#20035,#10000,1,5,1,5)
109+
regexpterm(#20034,23,#20033,0,"[[a]]")
110+
#20035=@"loc,{#10000},2,2,2,6"
111+
locations_default(#20035,#10000,2,2,2,6)
108112
hasLocation(#20034,#20035)
109-
regexp_const_value(#20034,"]")
110113
#20036=*
111-
regexp_parse_errors(#20036,#20028,"unexpected character")
112-
hasLocation(#20036,#20035)
113-
#20037=*
114-
stmts(#20037,2,#20001,1,"/[[a]]/v;")
115-
hasLocation(#20037,#20007)
116-
stmt_containers(#20037,#20001)
114+
regexpterm(#20036,23,#20034,0,"[a]")
115+
#20037=@"loc,{#10000},2,3,2,5"
116+
locations_default(#20037,#10000,2,3,2,5)
117+
hasLocation(#20036,#20037)
117118
#20038=*
118-
exprs(#20038,5,#20037,0,"/[[a]]/v")
119-
hasLocation(#20038,#20015)
120-
enclosing_stmt(#20038,#20037)
121-
expr_containers(#20038,#20001)
122-
literals("/[[a]]/v","/[[a]]/v",#20038)
123-
#20039=*
124-
regexpterm(#20039,1,#20038,0,"[[a]]")
125-
#20040=@"loc,{#10000},2,2,2,6"
126-
locations_default(#20040,#10000,2,2,2,6)
127-
hasLocation(#20039,#20040)
119+
regexpterm(#20038,14,#20036,0,"a")
120+
#20039=@"loc,{#10000},2,4,2,4"
121+
locations_default(#20039,#10000,2,4,2,4)
122+
hasLocation(#20038,#20039)
123+
regexp_const_value(#20038,"a")
124+
#20040=*
125+
stmts(#20040,2,#20001,2,"/[ [] [ [] [] ] ]/v;")
126+
hasLocation(#20040,#20009)
127+
stmt_containers(#20040,#20001)
128128
#20041=*
129-
regexpterm(#20041,23,#20039,0,"[[a]")
130-
#20042=@"loc,{#10000},2,2,2,5"
131-
locations_default(#20042,#10000,2,2,2,5)
132-
hasLocation(#20041,#20042)
133-
#20043=*
134-
regexpterm(#20043,14,#20041,0,"[")
135-
#20044=@"loc,{#10000},2,3,2,3"
136-
locations_default(#20044,#10000,2,3,2,3)
137-
hasLocation(#20043,#20044)
138-
regexp_const_value(#20043,"[")
139-
#20045=*
140-
regexpterm(#20045,14,#20041,1,"a")
141-
#20046=@"loc,{#10000},2,4,2,4"
142-
locations_default(#20046,#10000,2,4,2,4)
143-
hasLocation(#20045,#20046)
144-
regexp_const_value(#20045,"a")
145-
#20047=*
146-
regexpterm(#20047,14,#20039,1,"]")
147-
#20048=@"loc,{#10000},2,6,2,6"
148-
locations_default(#20048,#10000,2,6,2,6)
149-
hasLocation(#20047,#20048)
150-
regexp_const_value(#20047,"]")
151-
#20049=*
152-
regexp_parse_errors(#20049,#20039,"unexpected character")
153-
hasLocation(#20049,#20048)
129+
exprs(#20041,5,#20040,0,"/[ [] [ [] [] ] ]/v")
130+
hasLocation(#20041,#20019)
131+
enclosing_stmt(#20041,#20040)
132+
expr_containers(#20041,#20001)
133+
literals("/[ [] [ [] [] ] ]/v","/[ [] [ [] [] ] ]/v",#20041)
134+
#20042=*
135+
regexpterm(#20042,23,#20041,0,"[ [] [ [] [] ] ]")
136+
#20043=@"loc,{#10000},3,2,3,17"
137+
locations_default(#20043,#10000,3,2,3,17)
138+
hasLocation(#20042,#20043)
139+
#20044=*
140+
regexpterm(#20044,14,#20042,0," ")
141+
#20045=@"loc,{#10000},3,3,3,3"
142+
locations_default(#20045,#10000,3,3,3,3)
143+
hasLocation(#20044,#20045)
144+
regexp_const_value(#20044," ")
145+
#20046=*
146+
regexpterm(#20046,23,#20042,1,"[]")
147+
#20047=@"loc,{#10000},3,4,3,5"
148+
locations_default(#20047,#10000,3,4,3,5)
149+
hasLocation(#20046,#20047)
150+
#20048=*
151+
regexpterm(#20048,14,#20042,2," ")
152+
#20049=@"loc,{#10000},3,6,3,6"
153+
locations_default(#20049,#10000,3,6,3,6)
154+
hasLocation(#20048,#20049)
155+
regexp_const_value(#20048," ")
154156
#20050=*
155-
stmts(#20050,2,#20001,2,"/[ [] [ [] [] ] ]/v;")
156-
hasLocation(#20050,#20009)
157-
stmt_containers(#20050,#20001)
158-
#20051=*
159-
exprs(#20051,5,#20050,0,"/[ [] [ [] [] ] ]/v")
160-
hasLocation(#20051,#20019)
161-
enclosing_stmt(#20051,#20050)
162-
expr_containers(#20051,#20001)
163-
literals("/[ [] [ [] [] ] ]/v","/[ [] [ [] [] ] ]/v",#20051)
157+
regexpterm(#20050,23,#20042,3,"[ [] [] ]")
158+
#20051=@"loc,{#10000},3,7,3,15"
159+
locations_default(#20051,#10000,3,7,3,15)
160+
hasLocation(#20050,#20051)
164161
#20052=*
165-
regexpterm(#20052,1,#20051,0,"[ [] [ [] [] ] ]")
166-
#20053=@"loc,{#10000},3,2,3,17"
167-
locations_default(#20053,#10000,3,2,3,17)
162+
regexpterm(#20052,14,#20050,0," ")
163+
#20053=@"loc,{#10000},3,8,3,8"
164+
locations_default(#20053,#10000,3,8,3,8)
168165
hasLocation(#20052,#20053)
166+
regexp_const_value(#20052," ")
169167
#20054=*
170-
regexpterm(#20054,23,#20052,0,"[ []")
171-
#20055=@"loc,{#10000},3,2,3,5"
172-
locations_default(#20055,#10000,3,2,3,5)
168+
regexpterm(#20054,23,#20050,1,"[]")
169+
#20055=@"loc,{#10000},3,9,3,10"
170+
locations_default(#20055,#10000,3,9,3,10)
173171
hasLocation(#20054,#20055)
174172
#20056=*
175-
regexpterm(#20056,14,#20054,0," ")
176-
#20057=@"loc,{#10000},3,3,3,3"
177-
locations_default(#20057,#10000,3,3,3,3)
173+
regexpterm(#20056,14,#20050,2," ")
174+
#20057=@"loc,{#10000},3,11,3,11"
175+
locations_default(#20057,#10000,3,11,3,11)
178176
hasLocation(#20056,#20057)
179177
regexp_const_value(#20056," ")
180178
#20058=*
181-
regexpterm(#20058,14,#20054,1,"[")
182-
#20059=@"loc,{#10000},3,4,3,4"
183-
locations_default(#20059,#10000,3,4,3,4)
179+
regexpterm(#20058,23,#20050,3,"[]")
180+
#20059=@"loc,{#10000},3,12,3,13"
181+
locations_default(#20059,#10000,3,12,3,13)
184182
hasLocation(#20058,#20059)
185-
regexp_const_value(#20058,"[")
186183
#20060=*
187-
regexpterm(#20060,14,#20052,1," ")
188-
#20061=@"loc,{#10000},3,6,3,6"
189-
locations_default(#20061,#10000,3,6,3,6)
184+
regexpterm(#20060,14,#20050,4," ")
185+
#20061=@"loc,{#10000},3,14,3,14"
186+
locations_default(#20061,#10000,3,14,3,14)
190187
hasLocation(#20060,#20061)
191188
regexp_const_value(#20060," ")
192189
#20062=*
193-
regexpterm(#20062,23,#20052,2,"[ []")
194-
#20063=@"loc,{#10000},3,7,3,10"
195-
locations_default(#20063,#10000,3,7,3,10)
190+
regexpterm(#20062,14,#20042,4," ")
191+
#20063=@"loc,{#10000},3,16,3,16"
192+
locations_default(#20063,#10000,3,16,3,16)
196193
hasLocation(#20062,#20063)
194+
regexp_const_value(#20062," ")
197195
#20064=*
198-
regexpterm(#20064,14,#20062,0," ")
199-
#20065=@"loc,{#10000},3,8,3,8"
200-
locations_default(#20065,#10000,3,8,3,8)
196+
entry_cfg_node(#20064,#20001)
197+
#20065=@"loc,{#10000},1,1,1,0"
198+
locations_default(#20065,#10000,1,1,1,0)
201199
hasLocation(#20064,#20065)
202-
regexp_const_value(#20064," ")
203200
#20066=*
204-
regexpterm(#20066,14,#20062,1,"[")
205-
#20067=@"loc,{#10000},3,9,3,9"
206-
locations_default(#20067,#10000,3,9,3,9)
207-
hasLocation(#20066,#20067)
208-
regexp_const_value(#20066,"[")
209-
#20068=*
210-
regexpterm(#20068,14,#20052,3," ")
211-
#20069=@"loc,{#10000},3,11,3,11"
212-
locations_default(#20069,#10000,3,11,3,11)
213-
hasLocation(#20068,#20069)
214-
regexp_const_value(#20068," ")
215-
#20070=*
216-
regexpterm(#20070,23,#20052,4,"[]")
217-
#20071=@"loc,{#10000},3,12,3,13"
218-
locations_default(#20071,#10000,3,12,3,13)
219-
hasLocation(#20070,#20071)
220-
#20072=*
221-
regexpterm(#20072,14,#20052,5," ")
222-
#20073=@"loc,{#10000},3,14,3,14"
223-
locations_default(#20073,#10000,3,14,3,14)
224-
hasLocation(#20072,#20073)
225-
regexp_const_value(#20072," ")
226-
#20074=*
227-
regexpterm(#20074,14,#20052,6,"]")
228-
#20075=@"loc,{#10000},3,15,3,15"
229-
locations_default(#20075,#10000,3,15,3,15)
230-
hasLocation(#20074,#20075)
231-
regexp_const_value(#20074,"]")
232-
#20076=*
233-
regexpterm(#20076,14,#20052,7," ")
234-
#20077=@"loc,{#10000},3,16,3,16"
235-
locations_default(#20077,#10000,3,16,3,16)
236-
hasLocation(#20076,#20077)
237-
regexp_const_value(#20076," ")
238-
#20078=*
239-
regexpterm(#20078,14,#20052,8,"]")
240-
#20079=@"loc,{#10000},3,17,3,17"
241-
locations_default(#20079,#10000,3,17,3,17)
242-
hasLocation(#20078,#20079)
243-
regexp_const_value(#20078,"]")
244-
#20080=*
245-
regexp_parse_errors(#20080,#20052,"unexpected character")
246-
hasLocation(#20080,#20075)
247-
#20081=*
248-
regexp_parse_errors(#20081,#20052,"unexpected character")
249-
hasLocation(#20081,#20079)
250-
#20082=*
251-
entry_cfg_node(#20082,#20001)
252-
#20083=@"loc,{#10000},1,1,1,0"
253-
locations_default(#20083,#10000,1,1,1,0)
254-
hasLocation(#20082,#20083)
255-
#20084=*
256-
exit_cfg_node(#20084,#20001)
257-
hasLocation(#20084,#20023)
258-
successor(#20050,#20051)
259-
successor(#20051,#20084)
260-
successor(#20037,#20038)
261-
successor(#20038,#20050)
201+
exit_cfg_node(#20066,#20001)
202+
hasLocation(#20066,#20023)
203+
successor(#20040,#20041)
204+
successor(#20041,#20066)
205+
successor(#20032,#20033)
206+
successor(#20033,#20040)
262207
successor(#20025,#20027)
263-
successor(#20027,#20037)
264-
successor(#20082,#20025)
208+
successor(#20027,#20032)
209+
successor(#20064,#20025)
265210
numlines(#10000,3,3,1)
266211
filetype(#10000,"javascript")

0 commit comments

Comments
 (0)