Skip to content

Commit c381d3c

Browse files
committed
JS: Added parser intersection support
1 parent 7b0c3c9 commit c381d3c

File tree

6 files changed

+60
-12
lines changed

6 files changed

+60
-12
lines changed
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package com.semmle.js.ast.regexp;
2+
3+
import com.semmle.js.ast.SourceLocation;
4+
import java.util.List;
5+
6+
public class Intersection extends RegExpTerm {
7+
private final List<RegExpTerm> intersections;
8+
9+
public Intersection(SourceLocation loc, List<RegExpTerm> intersections) {
10+
super(loc, "Intersection");
11+
this.intersections = intersections;
12+
}
13+
14+
@Override
15+
public void accept(Visitor v) {
16+
v.visit(this);
17+
}
18+
19+
public List<RegExpTerm> getIntersections() {
20+
return intersections;
21+
}
22+
}

javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,6 @@ public interface Visitor {
6161
public void visit(ZeroWidthNegativeLookbehind nd);
6262

6363
public void visit(UnicodePropertyEscape nd);
64+
65+
public void visit(Intersection nd);
6466
}

javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import com.semmle.js.ast.regexp.Group;
2323
import com.semmle.js.ast.regexp.HexEscapeSequence;
2424
import com.semmle.js.ast.regexp.IdentityEscape;
25+
import com.semmle.js.ast.regexp.Intersection;
2526
import com.semmle.js.ast.regexp.Literal;
2627
import com.semmle.js.ast.regexp.NamedBackReference;
2728
import com.semmle.js.ast.regexp.NonWordBoundary;
@@ -92,6 +93,7 @@ public RegExpExtractor(TrapWriter trapwriter, LocationManager locationManager) {
9293
termkinds.put("ZeroWidthPositiveLookbehind", 25);
9394
termkinds.put("ZeroWidthNegativeLookbehind", 26);
9495
termkinds.put("UnicodePropertyEscape", 27);
96+
termkinds.put("Intersection", 28);
9597
}
9698

9799
private static final String[] errmsgs =
@@ -344,6 +346,14 @@ public void visit(CharacterClassRange nd) {
344346
visit(nd.getLeft(), lbl, 0);
345347
visit(nd.getRight(), lbl, 1);
346348
}
349+
350+
@Override
351+
public void visit(Intersection nd) {
352+
Label lbl = extractTerm(nd, parent, idx);
353+
int i = 0;
354+
for (RegExpTerm element : nd.getIntersections())
355+
visit(element, lbl, i++);
356+
}
347357
}
348358

349359
public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing) {

javascript/extractor/src/com/semmle/js/parser/RegExpParser.java

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import com.semmle.js.ast.regexp.Group;
1919
import com.semmle.js.ast.regexp.HexEscapeSequence;
2020
import com.semmle.js.ast.regexp.IdentityEscape;
21+
import com.semmle.js.ast.regexp.Intersection;
2122
import com.semmle.js.ast.regexp.NamedBackReference;
2223
import com.semmle.js.ast.regexp.NonWordBoundary;
2324
import com.semmle.js.ast.regexp.OctalEscape;
@@ -225,20 +226,33 @@ private <T extends RegExpTerm> T finishTerm(T term) {
225226
private RegExpTerm parseDisjunction() {
226227
SourceLocation loc = new SourceLocation(pos());
227228
List<RegExpTerm> disjuncts = new ArrayList<>();
228-
disjuncts.add(this.parseAlternative());
229-
while (this.match("|")) disjuncts.add(this.parseAlternative());
229+
disjuncts.add(this.parseIntersection());
230+
while (this.match("|")) {
231+
disjuncts.add(this.parseIntersection());
232+
}
230233
if (disjuncts.size() == 1) return disjuncts.get(0);
231234
return this.finishTerm(new Disjunction(loc, disjuncts));
232-
}
235+
}
233236

234237
private RegExpTerm parseAlternative() {
235238
SourceLocation loc = new SourceLocation(pos());
236239
List<RegExpTerm> elements = new ArrayList<>();
237-
while (!this.lookahead(null, "|", ")")) elements.add(this.parseTerm());
240+
while (!this.lookahead(null, "|", "&&", ")")) elements.add(this.parseTerm());
238241
if (elements.size() == 1) return elements.get(0);
239242
return this.finishTerm(new Sequence(loc, elements));
240243
}
241244

245+
private RegExpTerm parseIntersection() {
246+
SourceLocation loc = new SourceLocation(pos());
247+
List<RegExpTerm> intersections = new ArrayList<>();
248+
intersections.add(this.parseAlternative());
249+
while (this.match("&&")) {
250+
intersections.add(this.parseAlternative());
251+
}
252+
if (intersections.size() == 1) return intersections.get(0);
253+
return this.finishTerm(new Intersection(loc, intersections));
254+
}
255+
242256
private RegExpTerm parseTerm() {
243257
SourceLocation loc = new SourceLocation(pos());
244258

javascript/extractor/tests/es2024/output/trap/intersection.js.trap

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ enclosing_stmt(#20009,#20008)
3939
expr_containers(#20009,#20001)
4040
literals("/[[abc]&&[bcd]]/v","/[[abc]&&[bcd]]/v",#20009)
4141
#20010=*
42-
regexpterm(#20010,1,#20009,0,"[[abc]&&[bcd]]")
42+
regexpterm(#20010,28,#20009,0,"[[abc]&&[bcd]]")
4343
#20011=@"loc,{#10000},1,2,1,15"
4444
locations_default(#20011,#10000,1,2,1,15)
4545
hasLocation(#20010,#20011)
@@ -73,13 +73,12 @@ locations_default(#20021,#10000,1,6,1,6)
7373
hasLocation(#20020,#20021)
7474
regexp_const_value(#20020,"c")
7575
#20022=*
76-
regexpterm(#20022,14,#20010,1,"&&")
77-
#20023=@"loc,{#10000},1,8,1,9"
78-
locations_default(#20023,#10000,1,8,1,9)
76+
regexpterm(#20022,1,#20010,1,"[bcd]]")
77+
#20023=@"loc,{#10000},1,10,1,15"
78+
locations_default(#20023,#10000,1,10,1,15)
7979
hasLocation(#20022,#20023)
80-
regexp_const_value(#20022,"&&")
8180
#20024=*
82-
regexpterm(#20024,23,#20010,2,"[bcd]")
81+
regexpterm(#20024,23,#20022,0,"[bcd]")
8382
#20025=@"loc,{#10000},1,10,1,14"
8483
locations_default(#20025,#10000,1,10,1,14)
8584
hasLocation(#20024,#20025)
@@ -102,7 +101,7 @@ locations_default(#20031,#10000,1,13,1,13)
102101
hasLocation(#20030,#20031)
103102
regexp_const_value(#20030,"d")
104103
#20032=*
105-
regexpterm(#20032,14,#20010,3,"]")
104+
regexpterm(#20032,14,#20022,1,"]")
106105
#20033=@"loc,{#10000},1,15,1,15"
107106
locations_default(#20033,#10000,1,15,1,15)
108107
hasLocation(#20032,#20033)

javascript/ql/lib/semmlecode.javascript.dbscheme

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -859,7 +859,8 @@ case @regexpterm.kind of
859859
| 24 = @regexp_char_range
860860
| 25 = @regexp_positive_lookbehind
861861
| 26 = @regexp_negative_lookbehind
862-
| 27 = @regexp_unicode_property_escape;
862+
| 27 = @regexp_unicode_property_escape
863+
| 28 = @regexp_intersection;
863864

864865
regexp_parse_errors (unique int id: @regexp_parse_error,
865866
int regexp: @regexpterm ref,

0 commit comments

Comments
 (0)