Skip to content

Commit 78aa5dc

Browse files
committed
Added fallback for parsing RegExp with unknown flags.
1 parent 430514b commit 78aa5dc

File tree

3 files changed

+137
-137
lines changed

3 files changed

+137
-137
lines changed

javascript/extractor/src/com/semmle/js/extractor/ASTExtractor.java

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,27 @@ public Label visit(Identifier nd, Context c) {
580580
return key;
581581
}
582582

583+
public boolean isRegExpCall(Node parent) {
584+
if (parent != null && parent instanceof VariableDeclaration) {
585+
for (VariableDeclarator declarator : ((VariableDeclaration) parent).getDeclarations()) {
586+
if (declarator.getInit() instanceof InvokeExpression) {
587+
InvokeExpression invoke = (InvokeExpression) declarator.getInit();
588+
Expression callee = invoke.getCallee();
589+
if (callee instanceof Identifier && "RegExp".equals(((Identifier)callee).getName())) {
590+
return true;
591+
} else if (callee instanceof MemberExpression) {
592+
MemberExpression memberExpr = (MemberExpression)callee;
593+
if (memberExpr.getProperty() instanceof Identifier &&
594+
"RegExp".equals(((Identifier)memberExpr.getProperty()).getName())) {
595+
return true;
596+
}
597+
}
598+
}
599+
}
600+
}
601+
return false;
602+
}
603+
583604
@Override
584605
public Label visit(Literal nd, Context c) {
585606
Label key = super.visit(nd, c);
@@ -600,7 +621,12 @@ public Label visit(Literal nd, Context c) {
600621
SourceMap sourceMap =
601622
SourceMap.legacyWithStartPos(
602623
SourceMap.fromString(nd.getRaw()).offsetBy(0, offsets), startPos);
603-
regexpExtractor.extract(source.substring(1, source.lastIndexOf('/')), sourceMap, nd, false, source.substring(source.lastIndexOf('/'), source.length()));
624+
625+
boolean isRegExprCall = isRegExpCall(contextManager.getCurrentStatement());
626+
// If the regular expression was created using RegExp(), the flags might be unknown.
627+
// In this case, we will also attempt to parse it using the "v" (Unicode sets) flag.
628+
String flagsStr = isRegExprCall ? null : source.substring(source.lastIndexOf('/') + 1);
629+
regexpExtractor.extract(source.substring(1, source.lastIndexOf('/')), sourceMap, nd, false, flagsStr);
604630
} else if (nd.isStringLiteral()
605631
&& !c.isInsideType()
606632
&& nd.getRaw().length() < 1000

javascript/extractor/src/com/semmle/js/parser/RegExpParser.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ public List<Error> getErrors() {
7575
private String flags;
7676

7777
/** Parse the given string as a regular expression. */
78-
public Result parse(String src) {
78+
public Result tryParse(String src) {
7979
this.src = src;
8080
this.pos = 0;
8181
this.errors = new ArrayList<>();
@@ -88,6 +88,18 @@ public Result parse(String src) {
8888
return new Result(root, errors);
8989
}
9090

91+
public Result parse(String src) {
92+
Result res = tryParse(src);
93+
if(flags == null && !res.getErrors().isEmpty()) {
94+
// Try parsing with the `v` flag enabled
95+
flags = "v";
96+
Result resultWithV = tryParse(src);
97+
// If we got a better result with the `v` flag enabled, return that result
98+
if(resultWithV.getErrors().isEmpty())return resultWithV;
99+
}
100+
return res;
101+
}
102+
91103
public Result parse(String src, String flags) {
92104
this.flags = flags;
93105
return parse(src);

javascript/extractor/tests/es2024/output/trap/regex_with_not_known_flags.js.trap

Lines changed: 97 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -125,159 +125,121 @@ enclosing_stmt(#20039,#20030)
125125
expr_containers(#20039,#20001)
126126
literals("/[[abc]&&[[bcd]--[[c][d]]]]/","/[[abc]&&[[bcd]--[[c][d]]]]/",#20039)
127127
#20040=*
128-
regexpterm(#20040,1,#20039,0,"[[abc]&&[[bcd]--[[c][d]]]]")
128+
regexpterm(#20040,23,#20039,0,"[[abc]&&[[bcd]--[[c][d]]]]")
129129
#20041=@"loc,{#10000},1,27,1,52"
130130
locations_default(#20041,#10000,1,27,1,52)
131131
hasLocation(#20040,#20041)
132132
#20042=*
133-
regexpterm(#20042,23,#20040,0,"[[abc]")
134-
#20043=@"loc,{#10000},1,27,1,32"
135-
locations_default(#20043,#10000,1,27,1,32)
136-
hasLocation(#20042,#20043)
137-
#20044=*
138-
regexpterm(#20044,14,#20042,0,"[")
139-
#20045=@"loc,{#10000},1,28,1,28"
140-
locations_default(#20045,#10000,1,28,1,28)
141-
hasLocation(#20044,#20045)
142-
regexp_const_value(#20044,"[")
143-
#20046=*
144-
regexpterm(#20046,14,#20042,1,"a")
145-
#20047=@"loc,{#10000},1,29,1,29"
146-
locations_default(#20047,#10000,1,29,1,29)
147-
hasLocation(#20046,#20047)
148-
regexp_const_value(#20046,"a")
149-
#20048=*
150-
regexpterm(#20048,14,#20042,2,"b")
151-
#20049=@"loc,{#10000},1,30,1,30"
152-
locations_default(#20049,#10000,1,30,1,30)
153-
hasLocation(#20048,#20049)
154-
regexp_const_value(#20048,"b")
155-
#20050=*
156-
regexpterm(#20050,14,#20042,3,"c")
157-
#20051=@"loc,{#10000},1,31,1,31"
158-
locations_default(#20051,#10000,1,31,1,31)
159-
hasLocation(#20050,#20051)
160-
regexp_const_value(#20050,"c")
161-
#20052=*
162-
regexpterm(#20052,14,#20040,1,"&&")
163-
#20053=@"loc,{#10000},1,33,1,34"
164-
locations_default(#20053,#10000,1,33,1,34)
165-
hasLocation(#20052,#20053)
166-
regexp_const_value(#20052,"&&")
133+
regexpterm(#20042,29,#20040,0,"[[abc]&&[[bcd]--[[c][d]]]]")
134+
hasLocation(#20042,#20041)
135+
#20043=*
136+
regexpterm(#20043,23,#20042,0,"[abc]")
137+
#20044=@"loc,{#10000},1,28,1,32"
138+
locations_default(#20044,#10000,1,28,1,32)
139+
hasLocation(#20043,#20044)
140+
#20045=*
141+
regexpterm(#20045,14,#20043,0,"a")
142+
#20046=@"loc,{#10000},1,29,1,29"
143+
locations_default(#20046,#10000,1,29,1,29)
144+
hasLocation(#20045,#20046)
145+
regexp_const_value(#20045,"a")
146+
#20047=*
147+
regexpterm(#20047,14,#20043,1,"b")
148+
#20048=@"loc,{#10000},1,30,1,30"
149+
locations_default(#20048,#10000,1,30,1,30)
150+
hasLocation(#20047,#20048)
151+
regexp_const_value(#20047,"b")
152+
#20049=*
153+
regexpterm(#20049,14,#20043,2,"c")
154+
#20050=@"loc,{#10000},1,31,1,31"
155+
locations_default(#20050,#10000,1,31,1,31)
156+
hasLocation(#20049,#20050)
157+
regexp_const_value(#20049,"c")
158+
#20051=*
159+
regexpterm(#20051,23,#20042,1,"[[bcd]--[[c][d]]]")
160+
#20052=@"loc,{#10000},1,35,1,51"
161+
locations_default(#20052,#10000,1,35,1,51)
162+
hasLocation(#20051,#20052)
163+
#20053=*
164+
regexpterm(#20053,30,#20051,0,"[[bcd]--[[c][d]]]")
165+
hasLocation(#20053,#20052)
167166
#20054=*
168-
regexpterm(#20054,23,#20040,2,"[[bcd]")
169-
#20055=@"loc,{#10000},1,35,1,40"
170-
locations_default(#20055,#10000,1,35,1,40)
167+
regexpterm(#20054,23,#20053,0,"[bcd]")
168+
#20055=@"loc,{#10000},1,36,1,40"
169+
locations_default(#20055,#10000,1,36,1,40)
171170
hasLocation(#20054,#20055)
172171
#20056=*
173-
regexpterm(#20056,14,#20054,0,"[")
174-
#20057=@"loc,{#10000},1,36,1,36"
175-
locations_default(#20057,#10000,1,36,1,36)
172+
regexpterm(#20056,14,#20054,0,"b")
173+
#20057=@"loc,{#10000},1,37,1,37"
174+
locations_default(#20057,#10000,1,37,1,37)
176175
hasLocation(#20056,#20057)
177-
regexp_const_value(#20056,"[")
176+
regexp_const_value(#20056,"b")
178177
#20058=*
179-
regexpterm(#20058,14,#20054,1,"b")
180-
#20059=@"loc,{#10000},1,37,1,37"
181-
locations_default(#20059,#10000,1,37,1,37)
178+
regexpterm(#20058,14,#20054,1,"c")
179+
#20059=@"loc,{#10000},1,38,1,38"
180+
locations_default(#20059,#10000,1,38,1,38)
182181
hasLocation(#20058,#20059)
183-
regexp_const_value(#20058,"b")
182+
regexp_const_value(#20058,"c")
184183
#20060=*
185-
regexpterm(#20060,14,#20054,2,"c")
186-
#20061=@"loc,{#10000},1,38,1,38"
187-
locations_default(#20061,#10000,1,38,1,38)
184+
regexpterm(#20060,14,#20054,2,"d")
185+
#20061=@"loc,{#10000},1,39,1,39"
186+
locations_default(#20061,#10000,1,39,1,39)
188187
hasLocation(#20060,#20061)
189-
regexp_const_value(#20060,"c")
188+
regexp_const_value(#20060,"d")
190189
#20062=*
191-
regexpterm(#20062,14,#20054,3,"d")
192-
#20063=@"loc,{#10000},1,39,1,39"
193-
locations_default(#20063,#10000,1,39,1,39)
190+
regexpterm(#20062,23,#20053,1,"[[c][d]]")
191+
#20063=@"loc,{#10000},1,43,1,50"
192+
locations_default(#20063,#10000,1,43,1,50)
194193
hasLocation(#20062,#20063)
195-
regexp_const_value(#20062,"d")
196194
#20064=*
197-
regexpterm(#20064,14,#20040,3,"--")
198-
#20065=@"loc,{#10000},1,41,1,42"
199-
locations_default(#20065,#10000,1,41,1,42)
200-
hasLocation(#20064,#20065)
201-
regexp_const_value(#20064,"--")
202-
#20066=*
203-
regexpterm(#20066,23,#20040,4,"[[c]")
204-
#20067=@"loc,{#10000},1,43,1,46"
205-
locations_default(#20067,#10000,1,43,1,46)
206-
hasLocation(#20066,#20067)
207-
#20068=*
208-
regexpterm(#20068,14,#20066,0,"[")
209-
#20069=@"loc,{#10000},1,44,1,44"
210-
locations_default(#20069,#10000,1,44,1,44)
211-
hasLocation(#20068,#20069)
212-
regexp_const_value(#20068,"[")
213-
#20070=*
214-
regexpterm(#20070,14,#20066,1,"c")
215-
#20071=@"loc,{#10000},1,45,1,45"
216-
locations_default(#20071,#10000,1,45,1,45)
217-
hasLocation(#20070,#20071)
218-
regexp_const_value(#20070,"c")
219-
#20072=*
220-
regexpterm(#20072,23,#20040,5,"[d]")
221-
#20073=@"loc,{#10000},1,47,1,49"
222-
locations_default(#20073,#10000,1,47,1,49)
223-
hasLocation(#20072,#20073)
224-
#20074=*
225-
regexpterm(#20074,14,#20072,0,"d")
226-
#20075=@"loc,{#10000},1,48,1,48"
227-
locations_default(#20075,#10000,1,48,1,48)
228-
hasLocation(#20074,#20075)
229-
regexp_const_value(#20074,"d")
230-
#20076=*
231-
regexpterm(#20076,14,#20040,6,"]")
232-
#20077=@"loc,{#10000},1,50,1,50"
233-
locations_default(#20077,#10000,1,50,1,50)
234-
hasLocation(#20076,#20077)
235-
regexp_const_value(#20076,"]")
236-
#20078=*
237-
regexpterm(#20078,14,#20040,7,"]")
238-
#20079=@"loc,{#10000},1,51,1,51"
239-
locations_default(#20079,#10000,1,51,1,51)
240-
hasLocation(#20078,#20079)
241-
regexp_const_value(#20078,"]")
242-
#20080=*
243-
regexpterm(#20080,14,#20040,8,"]")
244-
#20081=@"loc,{#10000},1,52,1,52"
245-
locations_default(#20081,#10000,1,52,1,52)
246-
hasLocation(#20080,#20081)
247-
regexp_const_value(#20080,"]")
248-
#20082=*
249-
regexp_parse_errors(#20082,#20040,"unexpected character")
250-
hasLocation(#20082,#20077)
251-
#20083=*
252-
regexp_parse_errors(#20083,#20040,"unexpected character")
253-
hasLocation(#20083,#20079)
254-
#20084=*
255-
regexp_parse_errors(#20084,#20040,"unexpected character")
256-
hasLocation(#20084,#20081)
257-
#20085=*
258-
exprs(#20085,79,#20035,1,"notKnownFlags")
259-
hasLocation(#20085,#20021)
260-
enclosing_stmt(#20085,#20030)
261-
expr_containers(#20085,#20001)
262-
literals("notKnownFlags","notKnownFlags",#20085)
263-
#20086=@"var;{notKnownFlags};{#20000}"
264-
variables(#20086,"notKnownFlags",#20000)
265-
bind(#20085,#20086)
266-
#20087=*
267-
entry_cfg_node(#20087,#20001)
268-
#20088=@"loc,{#10000},1,1,1,0"
269-
locations_default(#20088,#10000,1,1,1,0)
270-
hasLocation(#20087,#20088)
271-
#20089=*
272-
exit_cfg_node(#20089,#20001)
273-
hasLocation(#20089,#20027)
195+
regexpterm(#20064,31,#20062,0,"[[c][d]]")
196+
hasLocation(#20064,#20063)
197+
#20065=*
198+
regexpterm(#20065,23,#20064,0,"[c]")
199+
#20066=@"loc,{#10000},1,44,1,46"
200+
locations_default(#20066,#10000,1,44,1,46)
201+
hasLocation(#20065,#20066)
202+
#20067=*
203+
regexpterm(#20067,14,#20065,0,"c")
204+
#20068=@"loc,{#10000},1,45,1,45"
205+
locations_default(#20068,#10000,1,45,1,45)
206+
hasLocation(#20067,#20068)
207+
regexp_const_value(#20067,"c")
208+
#20069=*
209+
regexpterm(#20069,23,#20064,1,"[d]")
210+
#20070=@"loc,{#10000},1,47,1,49"
211+
locations_default(#20070,#10000,1,47,1,49)
212+
hasLocation(#20069,#20070)
213+
#20071=*
214+
regexpterm(#20071,14,#20069,0,"d")
215+
#20072=@"loc,{#10000},1,48,1,48"
216+
locations_default(#20072,#10000,1,48,1,48)
217+
hasLocation(#20071,#20072)
218+
regexp_const_value(#20071,"d")
219+
#20073=*
220+
exprs(#20073,79,#20035,1,"notKnownFlags")
221+
hasLocation(#20073,#20021)
222+
enclosing_stmt(#20073,#20030)
223+
expr_containers(#20073,#20001)
224+
literals("notKnownFlags","notKnownFlags",#20073)
225+
#20074=@"var;{notKnownFlags};{#20000}"
226+
variables(#20074,"notKnownFlags",#20000)
227+
bind(#20073,#20074)
228+
#20075=*
229+
entry_cfg_node(#20075,#20001)
230+
#20076=@"loc,{#10000},1,1,1,0"
231+
locations_default(#20076,#10000,1,1,1,0)
232+
hasLocation(#20075,#20076)
233+
#20077=*
234+
exit_cfg_node(#20077,#20001)
235+
hasLocation(#20077,#20027)
274236
successor(#20030,#20034)
275-
successor(#20085,#20035)
276-
successor(#20039,#20085)
237+
successor(#20073,#20035)
238+
successor(#20039,#20073)
277239
successor(#20037,#20039)
278240
successor(#20035,#20032)
279241
successor(#20034,#20037)
280-
successor(#20032,#20089)
281-
successor(#20087,#20030)
242+
successor(#20032,#20077)
243+
successor(#20075,#20030)
282244
numlines(#10000,1,1,0)
283245
filetype(#10000,"javascript")

0 commit comments

Comments
 (0)