Skip to content

Commit 2c8d44c

Browse files
committed
JS: Added intersection suppoort - more of stash commit
1 parent 71eda9d commit 2c8d44c

File tree

12 files changed

+210
-19
lines changed

12 files changed

+210
-19
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package com.semmle.js.ast.regexp;
2+
3+
import com.semmle.js.ast.SourceLocation;
4+
import java.util.List;
5+
6+
public class Intersection extends RegExpTerm {
7+
private final List<RegExpTerm> intersections;
8+
9+
public Intersection(SourceLocation loc, List<RegExpTerm> intersections) {
10+
super(loc, "Intersection");
11+
this.intersections = intersections;
12+
}
13+
14+
@Override
15+
public void accept(Visitor v) {
16+
v.visit(this);
17+
}
18+
19+
/** The individual elements of the intersections. */
20+
public List<RegExpTerm> getIntersections() {
21+
return intersections;
22+
}
23+
}

javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,6 @@ public interface Visitor {
6161
public void visit(ZeroWidthNegativeLookbehind nd);
6262

6363
public void visit(UnicodePropertyEscape nd);
64+
65+
public void visit(Intersection nd);
6466
}

javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import com.semmle.js.ast.regexp.Group;
2323
import com.semmle.js.ast.regexp.HexEscapeSequence;
2424
import com.semmle.js.ast.regexp.IdentityEscape;
25+
import com.semmle.js.ast.regexp.Intersection;
2526
import com.semmle.js.ast.regexp.Literal;
2627
import com.semmle.js.ast.regexp.NamedBackReference;
2728
import com.semmle.js.ast.regexp.NonWordBoundary;
@@ -92,24 +93,25 @@ public RegExpExtractor(TrapWriter trapwriter, LocationManager locationManager) {
9293
termkinds.put("ZeroWidthPositiveLookbehind", 25);
9394
termkinds.put("ZeroWidthNegativeLookbehind", 26);
9495
termkinds.put("UnicodePropertyEscape", 27);
96+
termkinds.put("Intersection", 28);
9597
}
9698

9799
private static final String[] errmsgs =
98100
new String[] {
99-
"unexpected end of regular expression",
100-
"unexpected character",
101-
"expected digit",
102-
"expected hexadecimal digit",
103-
"expected control letter",
104-
"expected ')'",
105-
"expected '}'",
106-
"trailing characters",
107-
"octal escape sequence",
108-
"invalid back reference",
109-
"expected ']'",
110-
"expected identifier",
111-
"expected '>'"
112-
};
101+
"unexpected end of regular expression",
102+
"unexpected character",
103+
"expected digit",
104+
"expected hexadecimal digit",
105+
"expected control letter",
106+
"expected ')'",
107+
"expected '}'",
108+
"trailing characters",
109+
"octal escape sequence",
110+
"invalid back reference",
111+
"expected ']'",
112+
"expected identifier",
113+
"expected '>'"
114+
};
113115

114116
private Label extractTerm(RegExpTerm term, Label parent, int idx) {
115117
Label lbl = trapwriter.localID(term);
@@ -344,6 +346,14 @@ public void visit(CharacterClassRange nd) {
344346
visit(nd.getLeft(), lbl, 0);
345347
visit(nd.getRight(), lbl, 1);
346348
}
349+
350+
@Override
351+
public void visit(Intersection nd) {
352+
Label lbl = extractTerm(nd, parent, idx);
353+
int i = 0;
354+
for (RegExpTerm element : nd.getIntersections())
355+
visit(element, lbl, i++);
356+
}
347357
}
348358

349359
public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing) {

javascript/extractor/src/com/semmle/js/parser/RegExpParser.java

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import com.semmle.js.ast.regexp.Group;
1919
import com.semmle.js.ast.regexp.HexEscapeSequence;
2020
import com.semmle.js.ast.regexp.IdentityEscape;
21+
import com.semmle.js.ast.regexp.Intersection;
2122
import com.semmle.js.ast.regexp.NamedBackReference;
2223
import com.semmle.js.ast.regexp.NonWordBoundary;
2324
import com.semmle.js.ast.regexp.OctalEscape;
@@ -225,20 +226,33 @@ private <T extends RegExpTerm> T finishTerm(T term) {
225226
private RegExpTerm parseDisjunction() {
226227
SourceLocation loc = new SourceLocation(pos());
227228
List<RegExpTerm> disjuncts = new ArrayList<>();
228-
disjuncts.add(this.parseAlternative());
229-
while (this.match("|")) disjuncts.add(this.parseAlternative());
229+
disjuncts.add(this.parseIntersection());
230+
while (this.match("|")) {
231+
disjuncts.add(this.parseIntersection());
232+
}
230233
if (disjuncts.size() == 1) return disjuncts.get(0);
231234
return this.finishTerm(new Disjunction(loc, disjuncts));
232-
}
235+
}
233236

234237
private RegExpTerm parseAlternative() {
235238
SourceLocation loc = new SourceLocation(pos());
236239
List<RegExpTerm> elements = new ArrayList<>();
237-
while (!this.lookahead(null, "|", ")")) elements.add(this.parseTerm());
240+
while (!this.lookahead(null, "|", "&&", ")")) elements.add(this.parseTerm());
238241
if (elements.size() == 1) return elements.get(0);
239242
return this.finishTerm(new Sequence(loc, elements));
240243
}
241244

245+
private RegExpTerm parseIntersection() {
246+
SourceLocation loc = new SourceLocation(pos());
247+
List<RegExpTerm> intersections = new ArrayList<>();
248+
intersections.add(this.parseAlternative());
249+
while (this.match("&&")) {
250+
intersections.add(this.parseAlternative());
251+
}
252+
if (intersections.size() == 1) return intersections.get(0);
253+
return this.finishTerm(new Intersection(loc, intersections));
254+
}
255+
242256
private RegExpTerm parseTerm() {
243257
SourceLocation loc = new SourceLocation(pos());
244258

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/\p{Script_Extensions=Greek}&&\p{Letter}/v;
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#10000=@"/regexpIntersection.js;sourcefile"
2+
files(#10000,"/regexpIntersection.js")
3+
#10001=@"/;folder"
4+
folders(#10001,"/")
5+
containerparent(#10001,#10000)
6+
#10002=@"loc,{#10000},0,0,0,0"
7+
locations_default(#10002,#10000,0,0,0,0)
8+
hasLocation(#10000,#10002)
9+
#20000=@"global_scope"
10+
scopes(#20000,0)
11+
#20001=@"script;{#10000},1,1"
12+
#20002=*
13+
lines(#20002,#20001,"/\p{Script_Extensions=Greek}&&\p{Letter}/v;","
14+
")
15+
#20003=@"loc,{#10000},1,1,1,43"
16+
locations_default(#20003,#10000,1,1,1,43)
17+
hasLocation(#20002,#20003)
18+
numlines(#20001,1,1,0)
19+
#20004=*
20+
tokeninfo(#20004,5,#20001,0,"/\p{Script_Extensions=Greek}&&\p{Letter}/v")
21+
#20005=@"loc,{#10000},1,1,1,42"
22+
locations_default(#20005,#10000,1,1,1,42)
23+
hasLocation(#20004,#20005)
24+
#20006=*
25+
tokeninfo(#20006,8,#20001,1,";")
26+
#20007=@"loc,{#10000},1,43,1,43"
27+
locations_default(#20007,#10000,1,43,1,43)
28+
hasLocation(#20006,#20007)
29+
#20008=*
30+
tokeninfo(#20008,0,#20001,2,"")
31+
#20009=@"loc,{#10000},2,1,2,0"
32+
locations_default(#20009,#10000,2,1,2,0)
33+
hasLocation(#20008,#20009)
34+
toplevels(#20001,0)
35+
#20010=@"loc,{#10000},1,1,2,0"
36+
locations_default(#20010,#10000,1,1,2,0)
37+
hasLocation(#20001,#20010)
38+
#20011=*
39+
stmts(#20011,2,#20001,0,"/\p{Scr ... ter}/v;")
40+
hasLocation(#20011,#20003)
41+
stmt_containers(#20011,#20001)
42+
#20012=*
43+
exprs(#20012,5,#20011,0,"/\p{Scr ... tter}/v")
44+
hasLocation(#20012,#20005)
45+
enclosing_stmt(#20012,#20011)
46+
expr_containers(#20012,#20001)
47+
literals("/\p{Script_Extensions=Greek}&&\p{Letter}/v","/\p{Script_Extensions=Greek}&&\p{Letter}/v",#20012)
48+
#20013=*
49+
regexpterm(#20013,28,#20012,0,"\p{Script_Extensions=Greek}&&\p{Letter}")
50+
#20014=@"loc,{#10000},1,2,1,40"
51+
locations_default(#20014,#10000,1,2,1,40)
52+
hasLocation(#20013,#20014)
53+
#20015=*
54+
regexpterm(#20015,27,#20013,0,"\p{Script_Extensions=Greek}")
55+
#20016=@"loc,{#10000},1,2,1,28"
56+
locations_default(#20016,#10000,1,2,1,28)
57+
hasLocation(#20015,#20016)
58+
unicode_property_escapename(#20015,"Script_Extensions")
59+
unicode_property_escapevalue(#20015,"Greek")
60+
#20017=*
61+
regexpterm(#20017,27,#20013,1,"\p{Letter}")
62+
#20018=@"loc,{#10000},1,31,1,40"
63+
locations_default(#20018,#10000,1,31,1,40)
64+
hasLocation(#20017,#20018)
65+
unicode_property_escapename(#20017,"Letter")
66+
#20019=*
67+
entry_cfg_node(#20019,#20001)
68+
#20020=@"loc,{#10000},1,1,1,0"
69+
locations_default(#20020,#10000,1,1,1,0)
70+
hasLocation(#20019,#20020)
71+
#20021=*
72+
exit_cfg_node(#20021,#20001)
73+
hasLocation(#20021,#20009)
74+
successor(#20011,#20012)
75+
successor(#20012,#20021)
76+
successor(#20019,#20011)
77+
numlines(#10000,1,1,0)
78+
filetype(#10000,"javascript")

javascript/ql/lib/semmle/javascript/MembershipCandidates.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ module MembershipCandidate {
146146
child instanceof RegExpDollar or
147147
child instanceof RegExpConstant or
148148
child instanceof RegExpAlt or
149+
child instanceof RegExpIntersection or
149150
child instanceof RegExpGroup
150151
) and
151152
// exclude "length matches" that match every string

javascript/ql/lib/semmle/javascript/Regexp.qll

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,29 @@ class RegExpAlt extends RegExpTerm, @regexp_alt {
301301
override string getAPrimaryQlClass() { result = "RegExpAlt" }
302302
}
303303

304+
/**
305+
* An intersection term, that is, a term of the form `a&&b`.
306+
*
307+
* Example:
308+
*
309+
* ```
310+
* /\p{Script_Extensions=Greek}&&\p{Letter}/v
311+
* ```
312+
*/
313+
class RegExpIntersection extends RegExpTerm, @regexp_intersection {
314+
/** Gets an intersected term of this term. */
315+
RegExpTerm getIntersectedTerm() { result = this.getAChild() }
316+
317+
/** Gets the number of intersected terms of this term. */
318+
int getNumIntersectedTerm() { result = this.getNumChild() }
319+
320+
override predicate isNullable() { this.getIntersectedTerm().isNullable() }
321+
322+
override string getAMatchedString() { result = this.getIntersectedTerm().getAMatchedString() }
323+
324+
override string getAPrimaryQlClass() { result = "RegExpIntersection" }
325+
}
326+
304327
/**
305328
* A sequence term.
306329
*

javascript/ql/lib/semmlecode.javascript.dbscheme

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -859,7 +859,8 @@ case @regexpterm.kind of
859859
| 24 = @regexp_char_range
860860
| 25 = @regexp_positive_lookbehind
861861
| 26 = @regexp_negative_lookbehind
862-
| 27 = @regexp_unicode_property_escape;
862+
| 27 = @regexp_unicode_property_escape
863+
| 28 = @regexp_intersection;
863864

864865
regexp_parse_errors (unique int id: @regexp_parse_error,
865866
int regexp: @regexpterm ref,
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
nodes
2+
| intersection.js:1:1:1:42 | [RegExpLiteral] /\\p{Script_Extensions=Greek}&&\\p{Letter}/v | semmle.label | [RegExpLiteral] /\\p{Script_Extensions=Greek}&&\\p{Letter}/v |
3+
| intersection.js:1:1:1:43 | [ExprStmt] /\\p{Scr ... ter}/v; | semmle.label | [ExprStmt] /\\p{Scr ... ter}/v; |
4+
| intersection.js:1:1:1:43 | [ExprStmt] /\\p{Scr ... ter}/v; | semmle.order | 1 |
5+
| intersection.js:1:2:1:28 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.label | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} |
6+
| intersection.js:1:2:1:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | semmle.label | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} |
7+
| intersection.js:1:29:1:30 | [RegExpNormalConstant] && | semmle.label | [RegExpNormalConstant] && |
8+
| intersection.js:1:31:1:40 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.label | [RegExpUnicodePropertyEscape] \\p{Letter} |
9+
| intersection.js:2:1:2:41 | [RegExpLiteral] /\\p{Script_Extensions=Greek}\|\\p{Letter}/v | semmle.label | [RegExpLiteral] /\\p{Script_Extensions=Greek}\|\\p{Letter}/v |
10+
| intersection.js:2:1:2:42 | [ExprStmt] /\\p{Scr ... ter}/v; | semmle.label | [ExprStmt] /\\p{Scr ... ter}/v; |
11+
| intersection.js:2:1:2:42 | [ExprStmt] /\\p{Scr ... ter}/v; | semmle.order | 2 |
12+
| intersection.js:2:2:2:28 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.label | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} |
13+
| intersection.js:2:2:2:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | semmle.label | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} |
14+
| intersection.js:2:30:2:39 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.label | [RegExpUnicodePropertyEscape] \\p{Letter} |
15+
edges
16+
| intersection.js:1:1:1:42 | [RegExpLiteral] /\\p{Script_Extensions=Greek}&&\\p{Letter}/v | intersection.js:1:2:1:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | semmle.label | 0 |
17+
| intersection.js:1:1:1:42 | [RegExpLiteral] /\\p{Script_Extensions=Greek}&&\\p{Letter}/v | intersection.js:1:2:1:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | semmle.order | 0 |
18+
| intersection.js:1:1:1:43 | [ExprStmt] /\\p{Scr ... ter}/v; | intersection.js:1:1:1:42 | [RegExpLiteral] /\\p{Script_Extensions=Greek}&&\\p{Letter}/v | semmle.label | 1 |
19+
| intersection.js:1:1:1:43 | [ExprStmt] /\\p{Scr ... ter}/v; | intersection.js:1:1:1:42 | [RegExpLiteral] /\\p{Script_Extensions=Greek}&&\\p{Letter}/v | semmle.order | 1 |
20+
| intersection.js:1:2:1:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | intersection.js:1:2:1:28 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.label | 0 |
21+
| intersection.js:1:2:1:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | intersection.js:1:2:1:28 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.order | 0 |
22+
| intersection.js:1:2:1:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | intersection.js:1:29:1:30 | [RegExpNormalConstant] && | semmle.label | 1 |
23+
| intersection.js:1:2:1:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | intersection.js:1:29:1:30 | [RegExpNormalConstant] && | semmle.order | 1 |
24+
| intersection.js:1:2:1:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | intersection.js:1:31:1:40 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.label | 2 |
25+
| intersection.js:1:2:1:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | intersection.js:1:31:1:40 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.order | 2 |
26+
| intersection.js:2:1:2:41 | [RegExpLiteral] /\\p{Script_Extensions=Greek}\|\\p{Letter}/v | intersection.js:2:2:2:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | semmle.label | 0 |
27+
| intersection.js:2:1:2:41 | [RegExpLiteral] /\\p{Script_Extensions=Greek}\|\\p{Letter}/v | intersection.js:2:2:2:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | semmle.order | 0 |
28+
| intersection.js:2:1:2:42 | [ExprStmt] /\\p{Scr ... ter}/v; | intersection.js:2:1:2:41 | [RegExpLiteral] /\\p{Script_Extensions=Greek}\|\\p{Letter}/v | semmle.label | 1 |
29+
| intersection.js:2:1:2:42 | [ExprStmt] /\\p{Scr ... ter}/v; | intersection.js:2:1:2:41 | [RegExpLiteral] /\\p{Script_Extensions=Greek}\|\\p{Letter}/v | semmle.order | 1 |
30+
| intersection.js:2:2:2:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | intersection.js:2:2:2:28 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.label | 0 |
31+
| intersection.js:2:2:2:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | intersection.js:2:2:2:28 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.order | 0 |
32+
| intersection.js:2:2:2:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | intersection.js:2:30:2:39 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.label | 1 |
33+
| intersection.js:2:2:2:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | intersection.js:2:30:2:39 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.order | 1 |
34+
graphProperties
35+
| semmle.graphKind | tree |

0 commit comments

Comments
 (0)