From cb448db3cecd3da168800d41460d706cbfdd9c5f Mon Sep 17 00:00:00 2001 From: Napalys Date: Sun, 2 Mar 2025 16:53:18 +0100 Subject: [PATCH 01/27] Exposed flags to the regex parser --- .../src/com/semmle/js/extractor/ASTExtractor.java | 2 +- .../src/com/semmle/js/extractor/RegExpExtractor.java | 8 ++++++-- .../extractor/src/com/semmle/js/parser/RegExpParser.java | 6 ++++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/javascript/extractor/src/com/semmle/js/extractor/ASTExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/ASTExtractor.java index 0dc00f79ab03..5625a9f1211c 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/ASTExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/ASTExtractor.java @@ -600,7 +600,7 @@ public Label visit(Literal nd, Context c) { SourceMap sourceMap = SourceMap.legacyWithStartPos( SourceMap.fromString(nd.getRaw()).offsetBy(0, offsets), startPos); - regexpExtractor.extract(source.substring(1, source.lastIndexOf('/')), sourceMap, nd, false); + regexpExtractor.extract(source.substring(1, source.lastIndexOf('/')), sourceMap, nd, false, source.substring(source.lastIndexOf('/'), source.length())); } else if (nd.isStringLiteral() && !c.isInsideType() && nd.getRaw().length() < 1000 diff --git a/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java index 41d7d446cfe3..5c02a4e99351 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java @@ -346,8 +346,8 @@ public void visit(CharacterClassRange nd) { } } - public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing) { - Result res = parser.parse(src); + public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing, String flags) { + Result res = parser.parse(src, flags); if (isSpeculativeParsing && res.getErrors().size() > 0) { return; } @@ -364,4 +364,8 @@ public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpec this.emitLocation(err, lbl); } } + + public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing) { + extract(src, sourceMap, parent, isSpeculativeParsing, ""); + } } diff --git a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java index 5b2177f3fffa..bc39ff31316b 100644 --- a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java +++ b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java @@ -67,6 +67,7 @@ public List getErrors() { private List errors; private List backrefs; private int maxbackref; + private String flags; /** Parse the given string as a regular expression. */ public Result parse(String src) { @@ -82,6 +83,11 @@ public Result parse(String src) { return new Result(root, errors); } + public Result parse(String src, String flags) { + this.flags = flags; + return parse(src); + } + private static String fromCodePoint(int codepoint) { if (Character.isValidCodePoint(codepoint)) return new String(Character.toChars(codepoint)); // replacement character From d162acf02c4d1598adb4d0e2af01b3f38b0c39c1 Mon Sep 17 00:00:00 2001 From: Napalys Date: Sun, 2 Mar 2025 16:54:08 +0100 Subject: [PATCH 02/27] Added quoted string \q parser test cases --- .../tests/es2024/input/regex_quoted_string.js | 5 + .../extractor/tests/es2024/options.json | 3 + .../output/trap/regex_quoted_string.js.trap | 431 ++++++++++++++++++ 3 files changed, 439 insertions(+) create mode 100644 javascript/extractor/tests/es2024/input/regex_quoted_string.js create mode 100644 javascript/extractor/tests/es2024/options.json create mode 100644 javascript/extractor/tests/es2024/output/trap/regex_quoted_string.js.trap diff --git a/javascript/extractor/tests/es2024/input/regex_quoted_string.js b/javascript/extractor/tests/es2024/input/regex_quoted_string.js new file mode 100644 index 000000000000..2749af7f9d09 --- /dev/null +++ b/javascript/extractor/tests/es2024/input/regex_quoted_string.js @@ -0,0 +1,5 @@ +/[\q{abc}]/v; +/[\q{abc|cbd|dcb}]/v; +/[\q{\}}]/v; +/[\q{\{}]/v; +/[\q{cc|\}a|cc}]/v; diff --git a/javascript/extractor/tests/es2024/options.json b/javascript/extractor/tests/es2024/options.json new file mode 100644 index 000000000000..075583ca1f63 --- /dev/null +++ b/javascript/extractor/tests/es2024/options.json @@ -0,0 +1,3 @@ +{ + "experimental": true +} diff --git a/javascript/extractor/tests/es2024/output/trap/regex_quoted_string.js.trap b/javascript/extractor/tests/es2024/output/trap/regex_quoted_string.js.trap new file mode 100644 index 000000000000..8b95ac62741e --- /dev/null +++ b/javascript/extractor/tests/es2024/output/trap/regex_quoted_string.js.trap @@ -0,0 +1,431 @@ +#10000=@"/regex_quoted_string.js;sourcefile" +files(#10000,"/regex_quoted_string.js") +#10001=@"/;folder" +folders(#10001,"/") +containerparent(#10001,#10000) +#10002=@"loc,{#10000},0,0,0,0" +locations_default(#10002,#10000,0,0,0,0) +hasLocation(#10000,#10002) +#20000=@"global_scope" +scopes(#20000,0) +#20001=@"script;{#10000},1,1" +#20002=* +lines(#20002,#20001,"/[\q{abc}]/v;"," +") +#20003=@"loc,{#10000},1,1,1,13" +locations_default(#20003,#10000,1,1,1,13) +hasLocation(#20002,#20003) +#20004=* +lines(#20004,#20001,"/[\q{abc|cbd|dcb}]/v;"," +") +#20005=@"loc,{#10000},2,1,2,21" +locations_default(#20005,#10000,2,1,2,21) +hasLocation(#20004,#20005) +#20006=* +lines(#20006,#20001,"/[\q{\}}]/v;"," +") +#20007=@"loc,{#10000},3,1,3,12" +locations_default(#20007,#10000,3,1,3,12) +hasLocation(#20006,#20007) +#20008=* +lines(#20008,#20001,"/[\q{\{}]/v;"," +") +#20009=@"loc,{#10000},4,1,4,12" +locations_default(#20009,#10000,4,1,4,12) +hasLocation(#20008,#20009) +#20010=* +lines(#20010,#20001,"/[\q{cc|\}a|cc}]/v;"," +") +#20011=@"loc,{#10000},5,1,5,19" +locations_default(#20011,#10000,5,1,5,19) +hasLocation(#20010,#20011) +numlines(#20001,5,5,0) +#20012=* +tokeninfo(#20012,5,#20001,0,"/[\q{abc}]/v") +#20013=@"loc,{#10000},1,1,1,12" +locations_default(#20013,#10000,1,1,1,12) +hasLocation(#20012,#20013) +#20014=* +tokeninfo(#20014,8,#20001,1,";") +#20015=@"loc,{#10000},1,13,1,13" +locations_default(#20015,#10000,1,13,1,13) +hasLocation(#20014,#20015) +#20016=* +tokeninfo(#20016,5,#20001,2,"/[\q{abc|cbd|dcb}]/v") +#20017=@"loc,{#10000},2,1,2,20" +locations_default(#20017,#10000,2,1,2,20) +hasLocation(#20016,#20017) +#20018=* +tokeninfo(#20018,8,#20001,3,";") +#20019=@"loc,{#10000},2,21,2,21" +locations_default(#20019,#10000,2,21,2,21) +hasLocation(#20018,#20019) +#20020=* +tokeninfo(#20020,5,#20001,4,"/[\q{\}}]/v") +#20021=@"loc,{#10000},3,1,3,11" +locations_default(#20021,#10000,3,1,3,11) +hasLocation(#20020,#20021) +#20022=* +tokeninfo(#20022,8,#20001,5,";") +#20023=@"loc,{#10000},3,12,3,12" +locations_default(#20023,#10000,3,12,3,12) +hasLocation(#20022,#20023) +#20024=* +tokeninfo(#20024,5,#20001,6,"/[\q{\{}]/v") +#20025=@"loc,{#10000},4,1,4,11" +locations_default(#20025,#10000,4,1,4,11) +hasLocation(#20024,#20025) +#20026=* +tokeninfo(#20026,8,#20001,7,";") +#20027=@"loc,{#10000},4,12,4,12" +locations_default(#20027,#10000,4,12,4,12) +hasLocation(#20026,#20027) +#20028=* +tokeninfo(#20028,5,#20001,8,"/[\q{cc|\}a|cc}]/v") +#20029=@"loc,{#10000},5,1,5,18" +locations_default(#20029,#10000,5,1,5,18) +hasLocation(#20028,#20029) +#20030=* +tokeninfo(#20030,8,#20001,9,";") +#20031=@"loc,{#10000},5,19,5,19" +locations_default(#20031,#10000,5,19,5,19) +hasLocation(#20030,#20031) +#20032=* +tokeninfo(#20032,0,#20001,10,"") +#20033=@"loc,{#10000},6,1,6,0" +locations_default(#20033,#10000,6,1,6,0) +hasLocation(#20032,#20033) +toplevels(#20001,0) +#20034=@"loc,{#10000},1,1,6,0" +locations_default(#20034,#10000,1,1,6,0) +hasLocation(#20001,#20034) +#20035=* +stmts(#20035,2,#20001,0,"/[\q{abc}]/v;") +hasLocation(#20035,#20003) +stmt_containers(#20035,#20001) +#20036=* +exprs(#20036,5,#20035,0,"/[\q{abc}]/v") +hasLocation(#20036,#20013) +enclosing_stmt(#20036,#20035) +expr_containers(#20036,#20001) +literals("/[\q{abc}]/v","/[\q{abc}]/v",#20036) +#20037=* +regexpterm(#20037,23,#20036,0,"[\q{abc}]") +#20038=@"loc,{#10000},1,2,1,10" +locations_default(#20038,#10000,1,2,1,10) +hasLocation(#20037,#20038) +#20039=* +regexpterm(#20039,21,#20037,0,"\q") +#20040=@"loc,{#10000},1,3,1,4" +locations_default(#20040,#10000,1,3,1,4) +hasLocation(#20039,#20040) +regexp_const_value(#20039,"q") +#20041=* +regexpterm(#20041,14,#20037,1,"{") +#20042=@"loc,{#10000},1,5,1,5" +locations_default(#20042,#10000,1,5,1,5) +hasLocation(#20041,#20042) +regexp_const_value(#20041,"{") +#20043=* +regexpterm(#20043,14,#20037,2,"a") +#20044=@"loc,{#10000},1,6,1,6" +locations_default(#20044,#10000,1,6,1,6) +hasLocation(#20043,#20044) +regexp_const_value(#20043,"a") +#20045=* +regexpterm(#20045,14,#20037,3,"b") +#20046=@"loc,{#10000},1,7,1,7" +locations_default(#20046,#10000,1,7,1,7) +hasLocation(#20045,#20046) +regexp_const_value(#20045,"b") +#20047=* +regexpterm(#20047,14,#20037,4,"c") +#20048=@"loc,{#10000},1,8,1,8" +locations_default(#20048,#10000,1,8,1,8) +hasLocation(#20047,#20048) +regexp_const_value(#20047,"c") +#20049=* +regexpterm(#20049,14,#20037,5,"}") +#20050=@"loc,{#10000},1,9,1,9" +locations_default(#20050,#10000,1,9,1,9) +hasLocation(#20049,#20050) +regexp_const_value(#20049,"}") +#20051=* +stmts(#20051,2,#20001,1,"/[\q{ab ... cb}]/v;") +hasLocation(#20051,#20005) +stmt_containers(#20051,#20001) +#20052=* +exprs(#20052,5,#20051,0,"/[\q{abc|cbd|dcb}]/v") +hasLocation(#20052,#20017) +enclosing_stmt(#20052,#20051) +expr_containers(#20052,#20001) +literals("/[\q{abc|cbd|dcb}]/v","/[\q{abc|cbd|dcb}]/v",#20052) +#20053=* +regexpterm(#20053,23,#20052,0,"[\q{abc|cbd|dcb}]") +#20054=@"loc,{#10000},2,2,2,18" +locations_default(#20054,#10000,2,2,2,18) +hasLocation(#20053,#20054) +#20055=* +regexpterm(#20055,21,#20053,0,"\q") +#20056=@"loc,{#10000},2,3,2,4" +locations_default(#20056,#10000,2,3,2,4) +hasLocation(#20055,#20056) +regexp_const_value(#20055,"q") +#20057=* +regexpterm(#20057,14,#20053,1,"{") +#20058=@"loc,{#10000},2,5,2,5" +locations_default(#20058,#10000,2,5,2,5) +hasLocation(#20057,#20058) +regexp_const_value(#20057,"{") +#20059=* +regexpterm(#20059,14,#20053,2,"a") +#20060=@"loc,{#10000},2,6,2,6" +locations_default(#20060,#10000,2,6,2,6) +hasLocation(#20059,#20060) +regexp_const_value(#20059,"a") +#20061=* +regexpterm(#20061,14,#20053,3,"b") +#20062=@"loc,{#10000},2,7,2,7" +locations_default(#20062,#10000,2,7,2,7) +hasLocation(#20061,#20062) +regexp_const_value(#20061,"b") +#20063=* +regexpterm(#20063,14,#20053,4,"c") +#20064=@"loc,{#10000},2,8,2,8" +locations_default(#20064,#10000,2,8,2,8) +hasLocation(#20063,#20064) +regexp_const_value(#20063,"c") +#20065=* +regexpterm(#20065,14,#20053,5,"|") +#20066=@"loc,{#10000},2,9,2,9" +locations_default(#20066,#10000,2,9,2,9) +hasLocation(#20065,#20066) +regexp_const_value(#20065,"|") +#20067=* +regexpterm(#20067,14,#20053,6,"c") +#20068=@"loc,{#10000},2,10,2,10" +locations_default(#20068,#10000,2,10,2,10) +hasLocation(#20067,#20068) +regexp_const_value(#20067,"c") +#20069=* +regexpterm(#20069,14,#20053,7,"b") +#20070=@"loc,{#10000},2,11,2,11" +locations_default(#20070,#10000,2,11,2,11) +hasLocation(#20069,#20070) +regexp_const_value(#20069,"b") +#20071=* +regexpterm(#20071,14,#20053,8,"d") +#20072=@"loc,{#10000},2,12,2,12" +locations_default(#20072,#10000,2,12,2,12) +hasLocation(#20071,#20072) +regexp_const_value(#20071,"d") +#20073=* +regexpterm(#20073,14,#20053,9,"|") +#20074=@"loc,{#10000},2,13,2,13" +locations_default(#20074,#10000,2,13,2,13) +hasLocation(#20073,#20074) +regexp_const_value(#20073,"|") +#20075=* +regexpterm(#20075,14,#20053,10,"d") +#20076=@"loc,{#10000},2,14,2,14" +locations_default(#20076,#10000,2,14,2,14) +hasLocation(#20075,#20076) +regexp_const_value(#20075,"d") +#20077=* +regexpterm(#20077,14,#20053,11,"c") +#20078=@"loc,{#10000},2,15,2,15" +locations_default(#20078,#10000,2,15,2,15) +hasLocation(#20077,#20078) +regexp_const_value(#20077,"c") +#20079=* +regexpterm(#20079,14,#20053,12,"b") +#20080=@"loc,{#10000},2,16,2,16" +locations_default(#20080,#10000,2,16,2,16) +hasLocation(#20079,#20080) +regexp_const_value(#20079,"b") +#20081=* +regexpterm(#20081,14,#20053,13,"}") +#20082=@"loc,{#10000},2,17,2,17" +locations_default(#20082,#10000,2,17,2,17) +hasLocation(#20081,#20082) +regexp_const_value(#20081,"}") +#20083=* +stmts(#20083,2,#20001,2,"/[\q{\}}]/v;") +hasLocation(#20083,#20007) +stmt_containers(#20083,#20001) +#20084=* +exprs(#20084,5,#20083,0,"/[\q{\}}]/v") +hasLocation(#20084,#20021) +enclosing_stmt(#20084,#20083) +expr_containers(#20084,#20001) +literals("/[\q{\}}]/v","/[\q{\}}]/v",#20084) +#20085=* +regexpterm(#20085,23,#20084,0,"[\q{\}}]") +#20086=@"loc,{#10000},3,2,3,9" +locations_default(#20086,#10000,3,2,3,9) +hasLocation(#20085,#20086) +#20087=* +regexpterm(#20087,21,#20085,0,"\q") +#20088=@"loc,{#10000},3,3,3,4" +locations_default(#20088,#10000,3,3,3,4) +hasLocation(#20087,#20088) +regexp_const_value(#20087,"q") +#20089=* +regexpterm(#20089,14,#20085,1,"{") +#20090=@"loc,{#10000},3,5,3,5" +locations_default(#20090,#10000,3,5,3,5) +hasLocation(#20089,#20090) +regexp_const_value(#20089,"{") +#20091=* +regexpterm(#20091,21,#20085,2,"\}") +#20092=@"loc,{#10000},3,6,3,7" +locations_default(#20092,#10000,3,6,3,7) +hasLocation(#20091,#20092) +regexp_const_value(#20091,"}") +#20093=* +regexpterm(#20093,14,#20085,3,"}") +#20094=@"loc,{#10000},3,8,3,8" +locations_default(#20094,#10000,3,8,3,8) +hasLocation(#20093,#20094) +regexp_const_value(#20093,"}") +#20095=* +stmts(#20095,2,#20001,3,"/[\q{\{}]/v;") +hasLocation(#20095,#20009) +stmt_containers(#20095,#20001) +#20096=* +exprs(#20096,5,#20095,0,"/[\q{\{}]/v") +hasLocation(#20096,#20025) +enclosing_stmt(#20096,#20095) +expr_containers(#20096,#20001) +literals("/[\q{\{}]/v","/[\q{\{}]/v",#20096) +#20097=* +regexpterm(#20097,23,#20096,0,"[\q{\{}]") +#20098=@"loc,{#10000},4,2,4,9" +locations_default(#20098,#10000,4,2,4,9) +hasLocation(#20097,#20098) +#20099=* +regexpterm(#20099,21,#20097,0,"\q") +#20100=@"loc,{#10000},4,3,4,4" +locations_default(#20100,#10000,4,3,4,4) +hasLocation(#20099,#20100) +regexp_const_value(#20099,"q") +#20101=* +regexpterm(#20101,14,#20097,1,"{") +#20102=@"loc,{#10000},4,5,4,5" +locations_default(#20102,#10000,4,5,4,5) +hasLocation(#20101,#20102) +regexp_const_value(#20101,"{") +#20103=* +regexpterm(#20103,21,#20097,2,"\{") +#20104=@"loc,{#10000},4,6,4,7" +locations_default(#20104,#10000,4,6,4,7) +hasLocation(#20103,#20104) +regexp_const_value(#20103,"{") +#20105=* +regexpterm(#20105,14,#20097,3,"}") +#20106=@"loc,{#10000},4,8,4,8" +locations_default(#20106,#10000,4,8,4,8) +hasLocation(#20105,#20106) +regexp_const_value(#20105,"}") +#20107=* +stmts(#20107,2,#20001,4,"/[\q{cc|\}a|cc}]/v;") +hasLocation(#20107,#20011) +stmt_containers(#20107,#20001) +#20108=* +exprs(#20108,5,#20107,0,"/[\q{cc|\}a|cc}]/v") +hasLocation(#20108,#20029) +enclosing_stmt(#20108,#20107) +expr_containers(#20108,#20001) +literals("/[\q{cc|\}a|cc}]/v","/[\q{cc|\}a|cc}]/v",#20108) +#20109=* +regexpterm(#20109,23,#20108,0,"[\q{cc|\}a|cc}]") +#20110=@"loc,{#10000},5,2,5,16" +locations_default(#20110,#10000,5,2,5,16) +hasLocation(#20109,#20110) +#20111=* +regexpterm(#20111,21,#20109,0,"\q") +#20112=@"loc,{#10000},5,3,5,4" +locations_default(#20112,#10000,5,3,5,4) +hasLocation(#20111,#20112) +regexp_const_value(#20111,"q") +#20113=* +regexpterm(#20113,14,#20109,1,"{") +#20114=@"loc,{#10000},5,5,5,5" +locations_default(#20114,#10000,5,5,5,5) +hasLocation(#20113,#20114) +regexp_const_value(#20113,"{") +#20115=* +regexpterm(#20115,14,#20109,2,"c") +#20116=@"loc,{#10000},5,6,5,6" +locations_default(#20116,#10000,5,6,5,6) +hasLocation(#20115,#20116) +regexp_const_value(#20115,"c") +#20117=* +regexpterm(#20117,14,#20109,3,"c") +#20118=@"loc,{#10000},5,7,5,7" +locations_default(#20118,#10000,5,7,5,7) +hasLocation(#20117,#20118) +regexp_const_value(#20117,"c") +#20119=* +regexpterm(#20119,14,#20109,4,"|") +#20120=@"loc,{#10000},5,8,5,8" +locations_default(#20120,#10000,5,8,5,8) +hasLocation(#20119,#20120) +regexp_const_value(#20119,"|") +#20121=* +regexpterm(#20121,21,#20109,5,"\}") +#20122=@"loc,{#10000},5,9,5,10" +locations_default(#20122,#10000,5,9,5,10) +hasLocation(#20121,#20122) +regexp_const_value(#20121,"}") +#20123=* +regexpterm(#20123,14,#20109,6,"a") +#20124=@"loc,{#10000},5,11,5,11" +locations_default(#20124,#10000,5,11,5,11) +hasLocation(#20123,#20124) +regexp_const_value(#20123,"a") +#20125=* +regexpterm(#20125,14,#20109,7,"|") +#20126=@"loc,{#10000},5,12,5,12" +locations_default(#20126,#10000,5,12,5,12) +hasLocation(#20125,#20126) +regexp_const_value(#20125,"|") +#20127=* +regexpterm(#20127,14,#20109,8,"c") +#20128=@"loc,{#10000},5,13,5,13" +locations_default(#20128,#10000,5,13,5,13) +hasLocation(#20127,#20128) +regexp_const_value(#20127,"c") +#20129=* +regexpterm(#20129,14,#20109,9,"c") +#20130=@"loc,{#10000},5,14,5,14" +locations_default(#20130,#10000,5,14,5,14) +hasLocation(#20129,#20130) +regexp_const_value(#20129,"c") +#20131=* +regexpterm(#20131,14,#20109,10,"}") +#20132=@"loc,{#10000},5,15,5,15" +locations_default(#20132,#10000,5,15,5,15) +hasLocation(#20131,#20132) +regexp_const_value(#20131,"}") +#20133=* +entry_cfg_node(#20133,#20001) +#20134=@"loc,{#10000},1,1,1,0" +locations_default(#20134,#10000,1,1,1,0) +hasLocation(#20133,#20134) +#20135=* +exit_cfg_node(#20135,#20001) +hasLocation(#20135,#20033) +successor(#20107,#20108) +successor(#20108,#20135) +successor(#20095,#20096) +successor(#20096,#20107) +successor(#20083,#20084) +successor(#20084,#20095) +successor(#20051,#20052) +successor(#20052,#20083) +successor(#20035,#20036) +successor(#20036,#20051) +successor(#20133,#20035) +numlines(#10000,5,5,0) +filetype(#10000,"javascript") From ed418be97a070a7c9b51fb23ba6f21e5aaf99a21 Mon Sep 17 00:00:00 2001 From: Napalys Date: Fri, 28 Feb 2025 18:51:24 +0100 Subject: [PATCH 03/27] Add support for '\q{}' escape sequence in regular expressions. --- .../regexp/CharacterClassQuotedString.java | 25 ++ .../src/com/semmle/js/ast/regexp/Visitor.java | 2 + .../semmle/js/extractor/RegExpExtractor.java | 8 + .../com/semmle/js/parser/RegExpParser.java | 46 +++ .../output/trap/regex_quoted_string.js.trap | 389 ++++++------------ 5 files changed, 203 insertions(+), 267 deletions(-) create mode 100644 javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassQuotedString.java diff --git a/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassQuotedString.java b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassQuotedString.java new file mode 100644 index 000000000000..b92e9e67c723 --- /dev/null +++ b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassQuotedString.java @@ -0,0 +1,25 @@ +package com.semmle.js.ast.regexp; + +import com.semmle.js.ast.SourceLocation; + +/** + * A '\q{}' escape sequence in a regular expression, which is a special extension + * to standard regular expressions. + */ +public class CharacterClassQuotedString extends RegExpTerm { + private final RegExpTerm term; + + public CharacterClassQuotedString(SourceLocation loc, RegExpTerm term) { + super(loc, "CharacterClassQuotedString"); + this.term = term; + } + + public RegExpTerm getTerm() { + return term; + } + + @Override + public void accept(Visitor v) { + v.visit(this); + } +} diff --git a/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java b/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java index 3671a55694be..4af27e6aa8a3 100644 --- a/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java +++ b/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java @@ -61,4 +61,6 @@ public interface Visitor { public void visit(ZeroWidthNegativeLookbehind nd); public void visit(UnicodePropertyEscape nd); + + public void visit(CharacterClassQuotedString nd); } diff --git a/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java index 5c02a4e99351..1fc69458c109 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java @@ -10,6 +10,7 @@ import com.semmle.js.ast.regexp.Caret; import com.semmle.js.ast.regexp.CharacterClass; import com.semmle.js.ast.regexp.CharacterClassEscape; +import com.semmle.js.ast.regexp.CharacterClassQuotedString; import com.semmle.js.ast.regexp.CharacterClassRange; import com.semmle.js.ast.regexp.Constant; import com.semmle.js.ast.regexp.ControlEscape; @@ -92,6 +93,7 @@ public RegExpExtractor(TrapWriter trapwriter, LocationManager locationManager) { termkinds.put("ZeroWidthPositiveLookbehind", 25); termkinds.put("ZeroWidthNegativeLookbehind", 26); termkinds.put("UnicodePropertyEscape", 27); + termkinds.put("CharacterClassQuotedString", 28); } private static final String[] errmsgs = @@ -344,6 +346,12 @@ public void visit(CharacterClassRange nd) { visit(nd.getLeft(), lbl, 0); visit(nd.getRight(), lbl, 1); } + + @Override + public void visit(CharacterClassQuotedString nd) { + Label lbl = extractTerm(nd, parent, idx); + visit(nd.getTerm(), lbl, 0); + } } public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing, String flags) { diff --git a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java index bc39ff31316b..478c0c25f3be 100644 --- a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java +++ b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java @@ -6,6 +6,7 @@ import com.semmle.js.ast.regexp.Caret; import com.semmle.js.ast.regexp.CharacterClass; import com.semmle.js.ast.regexp.CharacterClassEscape; +import com.semmle.js.ast.regexp.CharacterClassQuotedString; import com.semmle.js.ast.regexp.CharacterClassRange; import com.semmle.js.ast.regexp.Constant; import com.semmle.js.ast.regexp.ControlEscape; @@ -283,6 +284,45 @@ private RegExpTerm parseTerm() { return this.finishTerm(this.parseQuantifierOpt(loc, this.parseAtom())); } + private RegExpTerm parseDisjunctionInsideQuotedString() { + SourceLocation loc = new SourceLocation(pos()); + List disjuncts = new ArrayList<>(); + disjuncts.add(this.parseAlternativeInsideQuotedString()); + while (this.match("|")) { + disjuncts.add(this.parseAlternativeInsideQuotedString()); + } + if (disjuncts.size() == 1) return disjuncts.get(0); + return this.finishTerm(new Disjunction(loc, disjuncts)); + } + + private RegExpTerm parseAlternativeInsideQuotedString() { + SourceLocation loc = new SourceLocation(pos()); + StringBuilder sb = new StringBuilder(); + boolean escaped = false; + while (true) { + // If we're at the end of the string, something went wrong. + if (this.atEOS()) { + this.error(Error.UNEXPECTED_EOS); + break; + } + // We can end parsing if we're not escaped and we see a `|` which would mean Alternation + // or `}` which would mean the end of the Quoted String. + if(!escaped && this.lookahead(null, "|", "}")){ + break; + } + char c = this.nextChar(); + // Track whether the character is an escape character. + escaped = !escaped && (c == '\\'); + sb.append(c); + } + + String literal = sb.toString(); + loc.setEnd(pos()); + loc.setSource(literal); + + return new Constant(loc, literal); + } + private RegExpTerm parseQuantifierOpt(SourceLocation loc, RegExpTerm atom) { if (this.match("*")) return this.finishTerm(new Star(loc, atom, !this.match("?"))); if (this.match("+")) return this.finishTerm(new Plus(loc, atom, !this.match("?"))); @@ -427,6 +467,12 @@ private RegExpTerm parseAtomEscape(SourceLocation loc, boolean inCharClass) { return this.finishTerm(new NamedBackReference(loc, name, "\\k<" + name + ">")); } + if (this.match("q{")) { + RegExpTerm term = parseDisjunctionInsideQuotedString(); + this.expectRBrace(); + return this.finishTerm(new CharacterClassQuotedString(loc, term)); + } + if (this.match("p{", "P{")) { String name = this.readIdentifier(); if (this.match("=")) { diff --git a/javascript/extractor/tests/es2024/output/trap/regex_quoted_string.js.trap b/javascript/extractor/tests/es2024/output/trap/regex_quoted_string.js.trap index 8b95ac62741e..822a1302bcf5 100644 --- a/javascript/extractor/tests/es2024/output/trap/regex_quoted_string.js.trap +++ b/javascript/extractor/tests/es2024/output/trap/regex_quoted_string.js.trap @@ -115,317 +115,172 @@ regexpterm(#20037,23,#20036,0,"[\q{abc}]") locations_default(#20038,#10000,1,2,1,10) hasLocation(#20037,#20038) #20039=* -regexpterm(#20039,21,#20037,0,"\q") -#20040=@"loc,{#10000},1,3,1,4" -locations_default(#20040,#10000,1,3,1,4) +regexpterm(#20039,28,#20037,0,"\q{abc}") +#20040=@"loc,{#10000},1,3,1,9" +locations_default(#20040,#10000,1,3,1,9) hasLocation(#20039,#20040) -regexp_const_value(#20039,"q") #20041=* -regexpterm(#20041,14,#20037,1,"{") -#20042=@"loc,{#10000},1,5,1,5" -locations_default(#20042,#10000,1,5,1,5) +regexpterm(#20041,14,#20039,0,"abc") +#20042=@"loc,{#10000},1,6,1,8" +locations_default(#20042,#10000,1,6,1,8) hasLocation(#20041,#20042) -regexp_const_value(#20041,"{") +regexp_const_value(#20041,"abc") #20043=* -regexpterm(#20043,14,#20037,2,"a") -#20044=@"loc,{#10000},1,6,1,6" -locations_default(#20044,#10000,1,6,1,6) -hasLocation(#20043,#20044) -regexp_const_value(#20043,"a") +stmts(#20043,2,#20001,1,"/[\q{ab ... cb}]/v;") +hasLocation(#20043,#20005) +stmt_containers(#20043,#20001) +#20044=* +exprs(#20044,5,#20043,0,"/[\q{abc|cbd|dcb}]/v") +hasLocation(#20044,#20017) +enclosing_stmt(#20044,#20043) +expr_containers(#20044,#20001) +literals("/[\q{abc|cbd|dcb}]/v","/[\q{abc|cbd|dcb}]/v",#20044) #20045=* -regexpterm(#20045,14,#20037,3,"b") -#20046=@"loc,{#10000},1,7,1,7" -locations_default(#20046,#10000,1,7,1,7) +regexpterm(#20045,23,#20044,0,"[\q{abc|cbd|dcb}]") +#20046=@"loc,{#10000},2,2,2,18" +locations_default(#20046,#10000,2,2,2,18) hasLocation(#20045,#20046) -regexp_const_value(#20045,"b") #20047=* -regexpterm(#20047,14,#20037,4,"c") -#20048=@"loc,{#10000},1,8,1,8" -locations_default(#20048,#10000,1,8,1,8) +regexpterm(#20047,28,#20045,0,"\q{abc|cbd|dcb}") +#20048=@"loc,{#10000},2,3,2,17" +locations_default(#20048,#10000,2,3,2,17) hasLocation(#20047,#20048) -regexp_const_value(#20047,"c") #20049=* -regexpterm(#20049,14,#20037,5,"}") -#20050=@"loc,{#10000},1,9,1,9" -locations_default(#20050,#10000,1,9,1,9) +regexpterm(#20049,0,#20047,0,"abc|cbd|dcb") +#20050=@"loc,{#10000},2,6,2,16" +locations_default(#20050,#10000,2,6,2,16) hasLocation(#20049,#20050) -regexp_const_value(#20049,"}") #20051=* -stmts(#20051,2,#20001,1,"/[\q{ab ... cb}]/v;") -hasLocation(#20051,#20005) -stmt_containers(#20051,#20001) -#20052=* -exprs(#20052,5,#20051,0,"/[\q{abc|cbd|dcb}]/v") -hasLocation(#20052,#20017) -enclosing_stmt(#20052,#20051) -expr_containers(#20052,#20001) -literals("/[\q{abc|cbd|dcb}]/v","/[\q{abc|cbd|dcb}]/v",#20052) +regexpterm(#20051,14,#20049,0,"abc") +#20052=@"loc,{#10000},2,6,2,8" +locations_default(#20052,#10000,2,6,2,8) +hasLocation(#20051,#20052) +regexp_const_value(#20051,"abc") #20053=* -regexpterm(#20053,23,#20052,0,"[\q{abc|cbd|dcb}]") -#20054=@"loc,{#10000},2,2,2,18" -locations_default(#20054,#10000,2,2,2,18) +regexpterm(#20053,14,#20049,1,"cbd") +#20054=@"loc,{#10000},2,10,2,12" +locations_default(#20054,#10000,2,10,2,12) hasLocation(#20053,#20054) +regexp_const_value(#20053,"cbd") #20055=* -regexpterm(#20055,21,#20053,0,"\q") -#20056=@"loc,{#10000},2,3,2,4" -locations_default(#20056,#10000,2,3,2,4) +regexpterm(#20055,14,#20049,2,"dcb") +#20056=@"loc,{#10000},2,14,2,16" +locations_default(#20056,#10000,2,14,2,16) hasLocation(#20055,#20056) -regexp_const_value(#20055,"q") +regexp_const_value(#20055,"dcb") #20057=* -regexpterm(#20057,14,#20053,1,"{") -#20058=@"loc,{#10000},2,5,2,5" -locations_default(#20058,#10000,2,5,2,5) -hasLocation(#20057,#20058) -regexp_const_value(#20057,"{") +stmts(#20057,2,#20001,2,"/[\q{\}}]/v;") +hasLocation(#20057,#20007) +stmt_containers(#20057,#20001) +#20058=* +exprs(#20058,5,#20057,0,"/[\q{\}}]/v") +hasLocation(#20058,#20021) +enclosing_stmt(#20058,#20057) +expr_containers(#20058,#20001) +literals("/[\q{\}}]/v","/[\q{\}}]/v",#20058) #20059=* -regexpterm(#20059,14,#20053,2,"a") -#20060=@"loc,{#10000},2,6,2,6" -locations_default(#20060,#10000,2,6,2,6) +regexpterm(#20059,23,#20058,0,"[\q{\}}]") +#20060=@"loc,{#10000},3,2,3,9" +locations_default(#20060,#10000,3,2,3,9) hasLocation(#20059,#20060) -regexp_const_value(#20059,"a") #20061=* -regexpterm(#20061,14,#20053,3,"b") -#20062=@"loc,{#10000},2,7,2,7" -locations_default(#20062,#10000,2,7,2,7) +regexpterm(#20061,28,#20059,0,"\q{\}}") +#20062=@"loc,{#10000},3,3,3,8" +locations_default(#20062,#10000,3,3,3,8) hasLocation(#20061,#20062) -regexp_const_value(#20061,"b") #20063=* -regexpterm(#20063,14,#20053,4,"c") -#20064=@"loc,{#10000},2,8,2,8" -locations_default(#20064,#10000,2,8,2,8) +regexpterm(#20063,14,#20061,0,"\}") +#20064=@"loc,{#10000},3,6,3,7" +locations_default(#20064,#10000,3,6,3,7) hasLocation(#20063,#20064) -regexp_const_value(#20063,"c") +regexp_const_value(#20063,"\}") #20065=* -regexpterm(#20065,14,#20053,5,"|") -#20066=@"loc,{#10000},2,9,2,9" -locations_default(#20066,#10000,2,9,2,9) -hasLocation(#20065,#20066) -regexp_const_value(#20065,"|") +stmts(#20065,2,#20001,3,"/[\q{\{}]/v;") +hasLocation(#20065,#20009) +stmt_containers(#20065,#20001) +#20066=* +exprs(#20066,5,#20065,0,"/[\q{\{}]/v") +hasLocation(#20066,#20025) +enclosing_stmt(#20066,#20065) +expr_containers(#20066,#20001) +literals("/[\q{\{}]/v","/[\q{\{}]/v",#20066) #20067=* -regexpterm(#20067,14,#20053,6,"c") -#20068=@"loc,{#10000},2,10,2,10" -locations_default(#20068,#10000,2,10,2,10) +regexpterm(#20067,23,#20066,0,"[\q{\{}]") +#20068=@"loc,{#10000},4,2,4,9" +locations_default(#20068,#10000,4,2,4,9) hasLocation(#20067,#20068) -regexp_const_value(#20067,"c") #20069=* -regexpterm(#20069,14,#20053,7,"b") -#20070=@"loc,{#10000},2,11,2,11" -locations_default(#20070,#10000,2,11,2,11) +regexpterm(#20069,28,#20067,0,"\q{\{}") +#20070=@"loc,{#10000},4,3,4,8" +locations_default(#20070,#10000,4,3,4,8) hasLocation(#20069,#20070) -regexp_const_value(#20069,"b") #20071=* -regexpterm(#20071,14,#20053,8,"d") -#20072=@"loc,{#10000},2,12,2,12" -locations_default(#20072,#10000,2,12,2,12) +regexpterm(#20071,14,#20069,0,"\{") +#20072=@"loc,{#10000},4,6,4,7" +locations_default(#20072,#10000,4,6,4,7) hasLocation(#20071,#20072) -regexp_const_value(#20071,"d") +regexp_const_value(#20071,"\{") #20073=* -regexpterm(#20073,14,#20053,9,"|") -#20074=@"loc,{#10000},2,13,2,13" -locations_default(#20074,#10000,2,13,2,13) -hasLocation(#20073,#20074) -regexp_const_value(#20073,"|") +stmts(#20073,2,#20001,4,"/[\q{cc|\}a|cc}]/v;") +hasLocation(#20073,#20011) +stmt_containers(#20073,#20001) +#20074=* +exprs(#20074,5,#20073,0,"/[\q{cc|\}a|cc}]/v") +hasLocation(#20074,#20029) +enclosing_stmt(#20074,#20073) +expr_containers(#20074,#20001) +literals("/[\q{cc|\}a|cc}]/v","/[\q{cc|\}a|cc}]/v",#20074) #20075=* -regexpterm(#20075,14,#20053,10,"d") -#20076=@"loc,{#10000},2,14,2,14" -locations_default(#20076,#10000,2,14,2,14) +regexpterm(#20075,23,#20074,0,"[\q{cc|\}a|cc}]") +#20076=@"loc,{#10000},5,2,5,16" +locations_default(#20076,#10000,5,2,5,16) hasLocation(#20075,#20076) -regexp_const_value(#20075,"d") #20077=* -regexpterm(#20077,14,#20053,11,"c") -#20078=@"loc,{#10000},2,15,2,15" -locations_default(#20078,#10000,2,15,2,15) +regexpterm(#20077,28,#20075,0,"\q{cc|\}a|cc}") +#20078=@"loc,{#10000},5,3,5,15" +locations_default(#20078,#10000,5,3,5,15) hasLocation(#20077,#20078) -regexp_const_value(#20077,"c") #20079=* -regexpterm(#20079,14,#20053,12,"b") -#20080=@"loc,{#10000},2,16,2,16" -locations_default(#20080,#10000,2,16,2,16) +regexpterm(#20079,0,#20077,0,"cc|\}a|cc") +#20080=@"loc,{#10000},5,6,5,14" +locations_default(#20080,#10000,5,6,5,14) hasLocation(#20079,#20080) -regexp_const_value(#20079,"b") #20081=* -regexpterm(#20081,14,#20053,13,"}") -#20082=@"loc,{#10000},2,17,2,17" -locations_default(#20082,#10000,2,17,2,17) +regexpterm(#20081,14,#20079,0,"cc") +#20082=@"loc,{#10000},5,6,5,7" +locations_default(#20082,#10000,5,6,5,7) hasLocation(#20081,#20082) -regexp_const_value(#20081,"}") +regexp_const_value(#20081,"cc") #20083=* -stmts(#20083,2,#20001,2,"/[\q{\}}]/v;") -hasLocation(#20083,#20007) -stmt_containers(#20083,#20001) -#20084=* -exprs(#20084,5,#20083,0,"/[\q{\}}]/v") -hasLocation(#20084,#20021) -enclosing_stmt(#20084,#20083) -expr_containers(#20084,#20001) -literals("/[\q{\}}]/v","/[\q{\}}]/v",#20084) +regexpterm(#20083,14,#20079,1,"\}a") +#20084=@"loc,{#10000},5,9,5,11" +locations_default(#20084,#10000,5,9,5,11) +hasLocation(#20083,#20084) +regexp_const_value(#20083,"\}a") #20085=* -regexpterm(#20085,23,#20084,0,"[\q{\}}]") -#20086=@"loc,{#10000},3,2,3,9" -locations_default(#20086,#10000,3,2,3,9) +regexpterm(#20085,14,#20079,2,"cc") +#20086=@"loc,{#10000},5,13,5,14" +locations_default(#20086,#10000,5,13,5,14) hasLocation(#20085,#20086) +regexp_const_value(#20085,"cc") #20087=* -regexpterm(#20087,21,#20085,0,"\q") -#20088=@"loc,{#10000},3,3,3,4" -locations_default(#20088,#10000,3,3,3,4) +entry_cfg_node(#20087,#20001) +#20088=@"loc,{#10000},1,1,1,0" +locations_default(#20088,#10000,1,1,1,0) hasLocation(#20087,#20088) -regexp_const_value(#20087,"q") #20089=* -regexpterm(#20089,14,#20085,1,"{") -#20090=@"loc,{#10000},3,5,3,5" -locations_default(#20090,#10000,3,5,3,5) -hasLocation(#20089,#20090) -regexp_const_value(#20089,"{") -#20091=* -regexpterm(#20091,21,#20085,2,"\}") -#20092=@"loc,{#10000},3,6,3,7" -locations_default(#20092,#10000,3,6,3,7) -hasLocation(#20091,#20092) -regexp_const_value(#20091,"}") -#20093=* -regexpterm(#20093,14,#20085,3,"}") -#20094=@"loc,{#10000},3,8,3,8" -locations_default(#20094,#10000,3,8,3,8) -hasLocation(#20093,#20094) -regexp_const_value(#20093,"}") -#20095=* -stmts(#20095,2,#20001,3,"/[\q{\{}]/v;") -hasLocation(#20095,#20009) -stmt_containers(#20095,#20001) -#20096=* -exprs(#20096,5,#20095,0,"/[\q{\{}]/v") -hasLocation(#20096,#20025) -enclosing_stmt(#20096,#20095) -expr_containers(#20096,#20001) -literals("/[\q{\{}]/v","/[\q{\{}]/v",#20096) -#20097=* -regexpterm(#20097,23,#20096,0,"[\q{\{}]") -#20098=@"loc,{#10000},4,2,4,9" -locations_default(#20098,#10000,4,2,4,9) -hasLocation(#20097,#20098) -#20099=* -regexpterm(#20099,21,#20097,0,"\q") -#20100=@"loc,{#10000},4,3,4,4" -locations_default(#20100,#10000,4,3,4,4) -hasLocation(#20099,#20100) -regexp_const_value(#20099,"q") -#20101=* -regexpterm(#20101,14,#20097,1,"{") -#20102=@"loc,{#10000},4,5,4,5" -locations_default(#20102,#10000,4,5,4,5) -hasLocation(#20101,#20102) -regexp_const_value(#20101,"{") -#20103=* -regexpterm(#20103,21,#20097,2,"\{") -#20104=@"loc,{#10000},4,6,4,7" -locations_default(#20104,#10000,4,6,4,7) -hasLocation(#20103,#20104) -regexp_const_value(#20103,"{") -#20105=* -regexpterm(#20105,14,#20097,3,"}") -#20106=@"loc,{#10000},4,8,4,8" -locations_default(#20106,#10000,4,8,4,8) -hasLocation(#20105,#20106) -regexp_const_value(#20105,"}") -#20107=* -stmts(#20107,2,#20001,4,"/[\q{cc|\}a|cc}]/v;") -hasLocation(#20107,#20011) -stmt_containers(#20107,#20001) -#20108=* -exprs(#20108,5,#20107,0,"/[\q{cc|\}a|cc}]/v") -hasLocation(#20108,#20029) -enclosing_stmt(#20108,#20107) -expr_containers(#20108,#20001) -literals("/[\q{cc|\}a|cc}]/v","/[\q{cc|\}a|cc}]/v",#20108) -#20109=* -regexpterm(#20109,23,#20108,0,"[\q{cc|\}a|cc}]") -#20110=@"loc,{#10000},5,2,5,16" -locations_default(#20110,#10000,5,2,5,16) -hasLocation(#20109,#20110) -#20111=* -regexpterm(#20111,21,#20109,0,"\q") -#20112=@"loc,{#10000},5,3,5,4" -locations_default(#20112,#10000,5,3,5,4) -hasLocation(#20111,#20112) -regexp_const_value(#20111,"q") -#20113=* -regexpterm(#20113,14,#20109,1,"{") -#20114=@"loc,{#10000},5,5,5,5" -locations_default(#20114,#10000,5,5,5,5) -hasLocation(#20113,#20114) -regexp_const_value(#20113,"{") -#20115=* -regexpterm(#20115,14,#20109,2,"c") -#20116=@"loc,{#10000},5,6,5,6" -locations_default(#20116,#10000,5,6,5,6) -hasLocation(#20115,#20116) -regexp_const_value(#20115,"c") -#20117=* -regexpterm(#20117,14,#20109,3,"c") -#20118=@"loc,{#10000},5,7,5,7" -locations_default(#20118,#10000,5,7,5,7) -hasLocation(#20117,#20118) -regexp_const_value(#20117,"c") -#20119=* -regexpterm(#20119,14,#20109,4,"|") -#20120=@"loc,{#10000},5,8,5,8" -locations_default(#20120,#10000,5,8,5,8) -hasLocation(#20119,#20120) -regexp_const_value(#20119,"|") -#20121=* -regexpterm(#20121,21,#20109,5,"\}") -#20122=@"loc,{#10000},5,9,5,10" -locations_default(#20122,#10000,5,9,5,10) -hasLocation(#20121,#20122) -regexp_const_value(#20121,"}") -#20123=* -regexpterm(#20123,14,#20109,6,"a") -#20124=@"loc,{#10000},5,11,5,11" -locations_default(#20124,#10000,5,11,5,11) -hasLocation(#20123,#20124) -regexp_const_value(#20123,"a") -#20125=* -regexpterm(#20125,14,#20109,7,"|") -#20126=@"loc,{#10000},5,12,5,12" -locations_default(#20126,#10000,5,12,5,12) -hasLocation(#20125,#20126) -regexp_const_value(#20125,"|") -#20127=* -regexpterm(#20127,14,#20109,8,"c") -#20128=@"loc,{#10000},5,13,5,13" -locations_default(#20128,#10000,5,13,5,13) -hasLocation(#20127,#20128) -regexp_const_value(#20127,"c") -#20129=* -regexpterm(#20129,14,#20109,9,"c") -#20130=@"loc,{#10000},5,14,5,14" -locations_default(#20130,#10000,5,14,5,14) -hasLocation(#20129,#20130) -regexp_const_value(#20129,"c") -#20131=* -regexpterm(#20131,14,#20109,10,"}") -#20132=@"loc,{#10000},5,15,5,15" -locations_default(#20132,#10000,5,15,5,15) -hasLocation(#20131,#20132) -regexp_const_value(#20131,"}") -#20133=* -entry_cfg_node(#20133,#20001) -#20134=@"loc,{#10000},1,1,1,0" -locations_default(#20134,#10000,1,1,1,0) -hasLocation(#20133,#20134) -#20135=* -exit_cfg_node(#20135,#20001) -hasLocation(#20135,#20033) -successor(#20107,#20108) -successor(#20108,#20135) -successor(#20095,#20096) -successor(#20096,#20107) -successor(#20083,#20084) -successor(#20084,#20095) -successor(#20051,#20052) -successor(#20052,#20083) +exit_cfg_node(#20089,#20001) +hasLocation(#20089,#20033) +successor(#20073,#20074) +successor(#20074,#20089) +successor(#20065,#20066) +successor(#20066,#20073) +successor(#20057,#20058) +successor(#20058,#20065) +successor(#20043,#20044) +successor(#20044,#20057) successor(#20035,#20036) -successor(#20036,#20051) -successor(#20133,#20035) +successor(#20036,#20043) +successor(#20087,#20035) numlines(#10000,5,5,0) filetype(#10000,"javascript") From ab7e08f40fc892c1ea16eafa41ccdbc12e8cd722 Mon Sep 17 00:00:00 2001 From: Napalys Date: Fri, 28 Feb 2025 18:59:23 +0100 Subject: [PATCH 04/27] Added test cases for nested character class. --- .../input/regex_nested_character_class.js | 3 + .../trap/regex_nested_character_class.js.trap | 266 ++++++++++++++++++ 2 files changed, 269 insertions(+) create mode 100644 javascript/extractor/tests/es2024/input/regex_nested_character_class.js create mode 100644 javascript/extractor/tests/es2024/output/trap/regex_nested_character_class.js.trap diff --git a/javascript/extractor/tests/es2024/input/regex_nested_character_class.js b/javascript/extractor/tests/es2024/input/regex_nested_character_class.js new file mode 100644 index 000000000000..8326d338f54f --- /dev/null +++ b/javascript/extractor/tests/es2024/input/regex_nested_character_class.js @@ -0,0 +1,3 @@ +/[[]]/v; //Previously not allowed to nest character classes now completely valid with v flag. +/[[a]]/v; +/[ [] [ [] [] ] ]/v; diff --git a/javascript/extractor/tests/es2024/output/trap/regex_nested_character_class.js.trap b/javascript/extractor/tests/es2024/output/trap/regex_nested_character_class.js.trap new file mode 100644 index 000000000000..292361c0b3e3 --- /dev/null +++ b/javascript/extractor/tests/es2024/output/trap/regex_nested_character_class.js.trap @@ -0,0 +1,266 @@ +#10000=@"/regex_nested_character_class.js;sourcefile" +files(#10000,"/regex_nested_character_class.js") +#10001=@"/;folder" +folders(#10001,"/") +containerparent(#10001,#10000) +#10002=@"loc,{#10000},0,0,0,0" +locations_default(#10002,#10000,0,0,0,0) +hasLocation(#10000,#10002) +#20000=@"global_scope" +scopes(#20000,0) +#20001=@"script;{#10000},1,1" +#20002=* +comments(#20002,0,#20001,"Previously not allowed to nest character classes now completely valid with v flag.","//Previ ... v flag.") +#20003=@"loc,{#10000},1,10,1,93" +locations_default(#20003,#10000,1,10,1,93) +hasLocation(#20002,#20003) +#20004=* +lines(#20004,#20001,"/[[]]/v; //Previously not allowed to nest character classes now completely valid with v flag."," +") +#20005=@"loc,{#10000},1,1,1,93" +locations_default(#20005,#10000,1,1,1,93) +hasLocation(#20004,#20005) +#20006=* +lines(#20006,#20001,"/[[a]]/v;"," +") +#20007=@"loc,{#10000},2,1,2,9" +locations_default(#20007,#10000,2,1,2,9) +hasLocation(#20006,#20007) +#20008=* +lines(#20008,#20001,"/[ [] [ [] [] ] ]/v;"," +") +#20009=@"loc,{#10000},3,1,3,20" +locations_default(#20009,#10000,3,1,3,20) +hasLocation(#20008,#20009) +numlines(#20001,3,3,1) +#20010=* +tokeninfo(#20010,5,#20001,0,"/[[]]/v") +#20011=@"loc,{#10000},1,1,1,7" +locations_default(#20011,#10000,1,1,1,7) +hasLocation(#20010,#20011) +#20012=* +tokeninfo(#20012,8,#20001,1,";") +#20013=@"loc,{#10000},1,8,1,8" +locations_default(#20013,#10000,1,8,1,8) +hasLocation(#20012,#20013) +#20014=* +tokeninfo(#20014,5,#20001,2,"/[[a]]/v") +#20015=@"loc,{#10000},2,1,2,8" +locations_default(#20015,#10000,2,1,2,8) +hasLocation(#20014,#20015) +next_token(#20002,#20014) +#20016=* +tokeninfo(#20016,8,#20001,3,";") +#20017=@"loc,{#10000},2,9,2,9" +locations_default(#20017,#10000,2,9,2,9) +hasLocation(#20016,#20017) +#20018=* +tokeninfo(#20018,5,#20001,4,"/[ [] [ [] [] ] ]/v") +#20019=@"loc,{#10000},3,1,3,19" +locations_default(#20019,#10000,3,1,3,19) +hasLocation(#20018,#20019) +#20020=* +tokeninfo(#20020,8,#20001,5,";") +#20021=@"loc,{#10000},3,20,3,20" +locations_default(#20021,#10000,3,20,3,20) +hasLocation(#20020,#20021) +#20022=* +tokeninfo(#20022,0,#20001,6,"") +#20023=@"loc,{#10000},4,1,4,0" +locations_default(#20023,#10000,4,1,4,0) +hasLocation(#20022,#20023) +toplevels(#20001,0) +#20024=@"loc,{#10000},1,1,4,0" +locations_default(#20024,#10000,1,1,4,0) +hasLocation(#20001,#20024) +#20025=* +stmts(#20025,2,#20001,0,"/[[]]/v;") +#20026=@"loc,{#10000},1,1,1,8" +locations_default(#20026,#10000,1,1,1,8) +hasLocation(#20025,#20026) +stmt_containers(#20025,#20001) +#20027=* +exprs(#20027,5,#20025,0,"/[[]]/v") +hasLocation(#20027,#20011) +enclosing_stmt(#20027,#20025) +expr_containers(#20027,#20001) +literals("/[[]]/v","/[[]]/v",#20027) +#20028=* +regexpterm(#20028,1,#20027,0,"[[]]") +#20029=@"loc,{#10000},1,2,1,5" +locations_default(#20029,#10000,1,2,1,5) +hasLocation(#20028,#20029) +#20030=* +regexpterm(#20030,23,#20028,0,"[[]") +#20031=@"loc,{#10000},1,2,1,4" +locations_default(#20031,#10000,1,2,1,4) +hasLocation(#20030,#20031) +#20032=* +regexpterm(#20032,14,#20030,0,"[") +#20033=@"loc,{#10000},1,3,1,3" +locations_default(#20033,#10000,1,3,1,3) +hasLocation(#20032,#20033) +regexp_const_value(#20032,"[") +#20034=* +regexpterm(#20034,14,#20028,1,"]") +#20035=@"loc,{#10000},1,5,1,5" +locations_default(#20035,#10000,1,5,1,5) +hasLocation(#20034,#20035) +regexp_const_value(#20034,"]") +#20036=* +regexp_parse_errors(#20036,#20028,"unexpected character") +hasLocation(#20036,#20035) +#20037=* +stmts(#20037,2,#20001,1,"/[[a]]/v;") +hasLocation(#20037,#20007) +stmt_containers(#20037,#20001) +#20038=* +exprs(#20038,5,#20037,0,"/[[a]]/v") +hasLocation(#20038,#20015) +enclosing_stmt(#20038,#20037) +expr_containers(#20038,#20001) +literals("/[[a]]/v","/[[a]]/v",#20038) +#20039=* +regexpterm(#20039,1,#20038,0,"[[a]]") +#20040=@"loc,{#10000},2,2,2,6" +locations_default(#20040,#10000,2,2,2,6) +hasLocation(#20039,#20040) +#20041=* +regexpterm(#20041,23,#20039,0,"[[a]") +#20042=@"loc,{#10000},2,2,2,5" +locations_default(#20042,#10000,2,2,2,5) +hasLocation(#20041,#20042) +#20043=* +regexpterm(#20043,14,#20041,0,"[") +#20044=@"loc,{#10000},2,3,2,3" +locations_default(#20044,#10000,2,3,2,3) +hasLocation(#20043,#20044) +regexp_const_value(#20043,"[") +#20045=* +regexpterm(#20045,14,#20041,1,"a") +#20046=@"loc,{#10000},2,4,2,4" +locations_default(#20046,#10000,2,4,2,4) +hasLocation(#20045,#20046) +regexp_const_value(#20045,"a") +#20047=* +regexpterm(#20047,14,#20039,1,"]") +#20048=@"loc,{#10000},2,6,2,6" +locations_default(#20048,#10000,2,6,2,6) +hasLocation(#20047,#20048) +regexp_const_value(#20047,"]") +#20049=* +regexp_parse_errors(#20049,#20039,"unexpected character") +hasLocation(#20049,#20048) +#20050=* +stmts(#20050,2,#20001,2,"/[ [] [ [] [] ] ]/v;") +hasLocation(#20050,#20009) +stmt_containers(#20050,#20001) +#20051=* +exprs(#20051,5,#20050,0,"/[ [] [ [] [] ] ]/v") +hasLocation(#20051,#20019) +enclosing_stmt(#20051,#20050) +expr_containers(#20051,#20001) +literals("/[ [] [ [] [] ] ]/v","/[ [] [ [] [] ] ]/v",#20051) +#20052=* +regexpterm(#20052,1,#20051,0,"[ [] [ [] [] ] ]") +#20053=@"loc,{#10000},3,2,3,17" +locations_default(#20053,#10000,3,2,3,17) +hasLocation(#20052,#20053) +#20054=* +regexpterm(#20054,23,#20052,0,"[ []") +#20055=@"loc,{#10000},3,2,3,5" +locations_default(#20055,#10000,3,2,3,5) +hasLocation(#20054,#20055) +#20056=* +regexpterm(#20056,14,#20054,0," ") +#20057=@"loc,{#10000},3,3,3,3" +locations_default(#20057,#10000,3,3,3,3) +hasLocation(#20056,#20057) +regexp_const_value(#20056," ") +#20058=* +regexpterm(#20058,14,#20054,1,"[") +#20059=@"loc,{#10000},3,4,3,4" +locations_default(#20059,#10000,3,4,3,4) +hasLocation(#20058,#20059) +regexp_const_value(#20058,"[") +#20060=* +regexpterm(#20060,14,#20052,1," ") +#20061=@"loc,{#10000},3,6,3,6" +locations_default(#20061,#10000,3,6,3,6) +hasLocation(#20060,#20061) +regexp_const_value(#20060," ") +#20062=* +regexpterm(#20062,23,#20052,2,"[ []") +#20063=@"loc,{#10000},3,7,3,10" +locations_default(#20063,#10000,3,7,3,10) +hasLocation(#20062,#20063) +#20064=* +regexpterm(#20064,14,#20062,0," ") +#20065=@"loc,{#10000},3,8,3,8" +locations_default(#20065,#10000,3,8,3,8) +hasLocation(#20064,#20065) +regexp_const_value(#20064," ") +#20066=* +regexpterm(#20066,14,#20062,1,"[") +#20067=@"loc,{#10000},3,9,3,9" +locations_default(#20067,#10000,3,9,3,9) +hasLocation(#20066,#20067) +regexp_const_value(#20066,"[") +#20068=* +regexpterm(#20068,14,#20052,3," ") +#20069=@"loc,{#10000},3,11,3,11" +locations_default(#20069,#10000,3,11,3,11) +hasLocation(#20068,#20069) +regexp_const_value(#20068," ") +#20070=* +regexpterm(#20070,23,#20052,4,"[]") +#20071=@"loc,{#10000},3,12,3,13" +locations_default(#20071,#10000,3,12,3,13) +hasLocation(#20070,#20071) +#20072=* +regexpterm(#20072,14,#20052,5," ") +#20073=@"loc,{#10000},3,14,3,14" +locations_default(#20073,#10000,3,14,3,14) +hasLocation(#20072,#20073) +regexp_const_value(#20072," ") +#20074=* +regexpterm(#20074,14,#20052,6,"]") +#20075=@"loc,{#10000},3,15,3,15" +locations_default(#20075,#10000,3,15,3,15) +hasLocation(#20074,#20075) +regexp_const_value(#20074,"]") +#20076=* +regexpterm(#20076,14,#20052,7," ") +#20077=@"loc,{#10000},3,16,3,16" +locations_default(#20077,#10000,3,16,3,16) +hasLocation(#20076,#20077) +regexp_const_value(#20076," ") +#20078=* +regexpterm(#20078,14,#20052,8,"]") +#20079=@"loc,{#10000},3,17,3,17" +locations_default(#20079,#10000,3,17,3,17) +hasLocation(#20078,#20079) +regexp_const_value(#20078,"]") +#20080=* +regexp_parse_errors(#20080,#20052,"unexpected character") +hasLocation(#20080,#20075) +#20081=* +regexp_parse_errors(#20081,#20052,"unexpected character") +hasLocation(#20081,#20079) +#20082=* +entry_cfg_node(#20082,#20001) +#20083=@"loc,{#10000},1,1,1,0" +locations_default(#20083,#10000,1,1,1,0) +hasLocation(#20082,#20083) +#20084=* +exit_cfg_node(#20084,#20001) +hasLocation(#20084,#20023) +successor(#20050,#20051) +successor(#20051,#20084) +successor(#20037,#20038) +successor(#20038,#20050) +successor(#20025,#20027) +successor(#20027,#20037) +successor(#20082,#20025) +numlines(#10000,3,3,1) +filetype(#10000,"javascript") From de6f3b1d04b72afa0aa384eb640fcd9a68a13674 Mon Sep 17 00:00:00 2001 From: Napalys Date: Fri, 28 Feb 2025 19:26:22 +0100 Subject: [PATCH 05/27] Add additional test cases. --- .../es2024/input/additional_test_cases.js | 2 + .../output/trap/additional_test_cases.js.trap | 546 ++++++++++++++++++ 2 files changed, 548 insertions(+) create mode 100644 javascript/extractor/tests/es2024/input/additional_test_cases.js create mode 100644 javascript/extractor/tests/es2024/output/trap/additional_test_cases.js.trap diff --git a/javascript/extractor/tests/es2024/input/additional_test_cases.js b/javascript/extractor/tests/es2024/input/additional_test_cases.js new file mode 100644 index 000000000000..718805993315 --- /dev/null +++ b/javascript/extractor/tests/es2024/input/additional_test_cases.js @@ -0,0 +1,2 @@ +/^p(ost)?[ |\.]*o(ffice)?[ |\.]*(box)?[ 0-9]*[^[a-z ]]*/g; +/([ ]*[a-z0-9&#*=?@\\><:,()$[\]_.{}!+%^-]+)+X/; diff --git a/javascript/extractor/tests/es2024/output/trap/additional_test_cases.js.trap b/javascript/extractor/tests/es2024/output/trap/additional_test_cases.js.trap new file mode 100644 index 000000000000..bd2318dbc5c4 --- /dev/null +++ b/javascript/extractor/tests/es2024/output/trap/additional_test_cases.js.trap @@ -0,0 +1,546 @@ +#10000=@"/additional_test_cases.js;sourcefile" +files(#10000,"/additional_test_cases.js") +#10001=@"/;folder" +folders(#10001,"/") +containerparent(#10001,#10000) +#10002=@"loc,{#10000},0,0,0,0" +locations_default(#10002,#10000,0,0,0,0) +hasLocation(#10000,#10002) +#20000=@"global_scope" +scopes(#20000,0) +#20001=@"script;{#10000},1,1" +#20002=* +lines(#20002,#20001,"/^p(ost)?[ |\.]*o(ffice)?[ |\.]*(box)?[ 0-9]*[^[a-z ]]*/g;"," +") +#20003=@"loc,{#10000},1,1,1,58" +locations_default(#20003,#10000,1,1,1,58) +hasLocation(#20002,#20003) +#20004=* +lines(#20004,#20001,"/([ ]*[a-z0-9&#*=?@\\><:,()$[\]_.{}!+%^-]+)+X/;"," +") +#20005=@"loc,{#10000},2,1,2,47" +locations_default(#20005,#10000,2,1,2,47) +hasLocation(#20004,#20005) +numlines(#20001,2,2,0) +#20006=* +tokeninfo(#20006,5,#20001,0,"/^p(ost)?[ |\.]*o(ffice)?[ |\.]*(box)?[ 0-9]*[^[a-z ]]*/g") +#20007=@"loc,{#10000},1,1,1,57" +locations_default(#20007,#10000,1,1,1,57) +hasLocation(#20006,#20007) +#20008=* +tokeninfo(#20008,8,#20001,1,";") +#20009=@"loc,{#10000},1,58,1,58" +locations_default(#20009,#10000,1,58,1,58) +hasLocation(#20008,#20009) +#20010=* +tokeninfo(#20010,5,#20001,2,"/([ ]*[a-z0-9&#*=?@\\><:,()$[\]_.{}!+%^-]+)+X/") +#20011=@"loc,{#10000},2,1,2,46" +locations_default(#20011,#10000,2,1,2,46) +hasLocation(#20010,#20011) +#20012=* +tokeninfo(#20012,8,#20001,3,";") +#20013=@"loc,{#10000},2,47,2,47" +locations_default(#20013,#10000,2,47,2,47) +hasLocation(#20012,#20013) +#20014=* +tokeninfo(#20014,0,#20001,4,"") +#20015=@"loc,{#10000},3,1,3,0" +locations_default(#20015,#10000,3,1,3,0) +hasLocation(#20014,#20015) +toplevels(#20001,0) +#20016=@"loc,{#10000},1,1,3,0" +locations_default(#20016,#10000,1,1,3,0) +hasLocation(#20001,#20016) +#20017=* +stmts(#20017,2,#20001,0,"/^p(ost ... ]]*/g;") +hasLocation(#20017,#20003) +stmt_containers(#20017,#20001) +#20018=* +exprs(#20018,5,#20017,0,"/^p(ost ... z ]]*/g") +hasLocation(#20018,#20007) +enclosing_stmt(#20018,#20017) +expr_containers(#20018,#20001) +literals("/^p(ost)?[ |\.]*o(ffice)?[ |\.]*(box)?[ 0-9]*[^[a-z ]]*/g","/^p(ost)?[ |\.]*o(ffice)?[ |\.]*(box)?[ 0-9]*[^[a-z ]]*/g",#20018) +#20019=* +regexpterm(#20019,1,#20018,0,"^p(ost)?[ |\.]*o(ffice)?[ |\.]*(box)?[ 0-9]*[^[a-z ]]*") +#20020=@"loc,{#10000},1,2,1,55" +locations_default(#20020,#10000,1,2,1,55) +hasLocation(#20019,#20020) +#20021=* +regexpterm(#20021,2,#20019,0,"^") +#20022=@"loc,{#10000},1,2,1,2" +locations_default(#20022,#10000,1,2,1,2) +hasLocation(#20021,#20022) +#20023=* +regexpterm(#20023,14,#20019,1,"p") +#20024=@"loc,{#10000},1,3,1,3" +locations_default(#20024,#10000,1,3,1,3) +hasLocation(#20023,#20024) +regexp_const_value(#20023,"p") +#20025=* +regexpterm(#20025,10,#20019,2,"(ost)?") +#20026=@"loc,{#10000},1,4,1,9" +locations_default(#20026,#10000,1,4,1,9) +hasLocation(#20025,#20026) +is_greedy(#20025) +#20027=* +regexpterm(#20027,13,#20025,0,"(ost)") +#20028=@"loc,{#10000},1,4,1,8" +locations_default(#20028,#10000,1,4,1,8) +hasLocation(#20027,#20028) +is_capture(#20027,1) +#20029=* +regexpterm(#20029,14,#20027,0,"ost") +#20030=@"loc,{#10000},1,5,1,7" +locations_default(#20030,#10000,1,5,1,7) +hasLocation(#20029,#20030) +regexp_const_value(#20029,"ost") +#20031=* +regexpterm(#20031,8,#20019,3,"[ |\.]*") +#20032=@"loc,{#10000},1,10,1,16" +locations_default(#20032,#10000,1,10,1,16) +hasLocation(#20031,#20032) +is_greedy(#20031) +#20033=* +regexpterm(#20033,23,#20031,0,"[ |\.]") +#20034=@"loc,{#10000},1,10,1,15" +locations_default(#20034,#10000,1,10,1,15) +hasLocation(#20033,#20034) +#20035=* +regexpterm(#20035,14,#20033,0," ") +#20036=@"loc,{#10000},1,11,1,11" +locations_default(#20036,#10000,1,11,1,11) +hasLocation(#20035,#20036) +regexp_const_value(#20035," ") +#20037=* +regexpterm(#20037,14,#20033,1,"|") +#20038=@"loc,{#10000},1,12,1,12" +locations_default(#20038,#10000,1,12,1,12) +hasLocation(#20037,#20038) +regexp_const_value(#20037,"|") +#20039=* +regexpterm(#20039,21,#20033,2,"\.") +#20040=@"loc,{#10000},1,13,1,14" +locations_default(#20040,#10000,1,13,1,14) +hasLocation(#20039,#20040) +regexp_const_value(#20039,".") +#20041=* +regexpterm(#20041,14,#20019,4,"o") +#20042=@"loc,{#10000},1,17,1,17" +locations_default(#20042,#10000,1,17,1,17) +hasLocation(#20041,#20042) +regexp_const_value(#20041,"o") +#20043=* +regexpterm(#20043,10,#20019,5,"(ffice)?") +#20044=@"loc,{#10000},1,18,1,25" +locations_default(#20044,#10000,1,18,1,25) +hasLocation(#20043,#20044) +is_greedy(#20043) +#20045=* +regexpterm(#20045,13,#20043,0,"(ffice)") +#20046=@"loc,{#10000},1,18,1,24" +locations_default(#20046,#10000,1,18,1,24) +hasLocation(#20045,#20046) +is_capture(#20045,2) +#20047=* +regexpterm(#20047,14,#20045,0,"ffice") +#20048=@"loc,{#10000},1,19,1,23" +locations_default(#20048,#10000,1,19,1,23) +hasLocation(#20047,#20048) +regexp_const_value(#20047,"ffice") +#20049=* +regexpterm(#20049,8,#20019,6,"[ |\.]*") +#20050=@"loc,{#10000},1,26,1,32" +locations_default(#20050,#10000,1,26,1,32) +hasLocation(#20049,#20050) +is_greedy(#20049) +#20051=* +regexpterm(#20051,23,#20049,0,"[ |\.]") +#20052=@"loc,{#10000},1,26,1,31" +locations_default(#20052,#10000,1,26,1,31) +hasLocation(#20051,#20052) +#20053=* +regexpterm(#20053,14,#20051,0," ") +#20054=@"loc,{#10000},1,27,1,27" +locations_default(#20054,#10000,1,27,1,27) +hasLocation(#20053,#20054) +regexp_const_value(#20053," ") +#20055=* +regexpterm(#20055,14,#20051,1,"|") +#20056=@"loc,{#10000},1,28,1,28" +locations_default(#20056,#10000,1,28,1,28) +hasLocation(#20055,#20056) +regexp_const_value(#20055,"|") +#20057=* +regexpterm(#20057,21,#20051,2,"\.") +#20058=@"loc,{#10000},1,29,1,30" +locations_default(#20058,#10000,1,29,1,30) +hasLocation(#20057,#20058) +regexp_const_value(#20057,".") +#20059=* +regexpterm(#20059,10,#20019,7,"(box)?") +#20060=@"loc,{#10000},1,33,1,38" +locations_default(#20060,#10000,1,33,1,38) +hasLocation(#20059,#20060) +is_greedy(#20059) +#20061=* +regexpterm(#20061,13,#20059,0,"(box)") +#20062=@"loc,{#10000},1,33,1,37" +locations_default(#20062,#10000,1,33,1,37) +hasLocation(#20061,#20062) +is_capture(#20061,3) +#20063=* +regexpterm(#20063,14,#20061,0,"box") +#20064=@"loc,{#10000},1,34,1,36" +locations_default(#20064,#10000,1,34,1,36) +hasLocation(#20063,#20064) +regexp_const_value(#20063,"box") +#20065=* +regexpterm(#20065,8,#20019,8,"[ 0-9]*") +#20066=@"loc,{#10000},1,39,1,45" +locations_default(#20066,#10000,1,39,1,45) +hasLocation(#20065,#20066) +is_greedy(#20065) +#20067=* +regexpterm(#20067,23,#20065,0,"[ 0-9]") +#20068=@"loc,{#10000},1,39,1,44" +locations_default(#20068,#10000,1,39,1,44) +hasLocation(#20067,#20068) +#20069=* +regexpterm(#20069,14,#20067,0," ") +#20070=@"loc,{#10000},1,40,1,40" +locations_default(#20070,#10000,1,40,1,40) +hasLocation(#20069,#20070) +regexp_const_value(#20069," ") +#20071=* +regexpterm(#20071,24,#20067,1,"0-9") +#20072=@"loc,{#10000},1,41,1,43" +locations_default(#20072,#10000,1,41,1,43) +hasLocation(#20071,#20072) +#20073=* +regexpterm(#20073,14,#20071,0,"0") +#20074=@"loc,{#10000},1,41,1,41" +locations_default(#20074,#10000,1,41,1,41) +hasLocation(#20073,#20074) +regexp_const_value(#20073,"0") +#20075=* +regexpterm(#20075,14,#20071,1,"9") +#20076=@"loc,{#10000},1,43,1,43" +locations_default(#20076,#10000,1,43,1,43) +hasLocation(#20075,#20076) +regexp_const_value(#20075,"9") +#20077=* +regexpterm(#20077,23,#20019,9,"[^[a-z ]") +#20078=@"loc,{#10000},1,46,1,53" +locations_default(#20078,#10000,1,46,1,53) +hasLocation(#20077,#20078) +is_inverted(#20077) +#20079=* +regexpterm(#20079,14,#20077,0,"[") +#20080=@"loc,{#10000},1,48,1,48" +locations_default(#20080,#10000,1,48,1,48) +hasLocation(#20079,#20080) +regexp_const_value(#20079,"[") +#20081=* +regexpterm(#20081,24,#20077,1,"a-z") +#20082=@"loc,{#10000},1,49,1,51" +locations_default(#20082,#10000,1,49,1,51) +hasLocation(#20081,#20082) +#20083=* +regexpterm(#20083,14,#20081,0,"a") +#20084=@"loc,{#10000},1,49,1,49" +locations_default(#20084,#10000,1,49,1,49) +hasLocation(#20083,#20084) +regexp_const_value(#20083,"a") +#20085=* +regexpterm(#20085,14,#20081,1,"z") +#20086=@"loc,{#10000},1,51,1,51" +locations_default(#20086,#10000,1,51,1,51) +hasLocation(#20085,#20086) +regexp_const_value(#20085,"z") +#20087=* +regexpterm(#20087,14,#20077,2," ") +#20088=@"loc,{#10000},1,52,1,52" +locations_default(#20088,#10000,1,52,1,52) +hasLocation(#20087,#20088) +regexp_const_value(#20087," ") +#20089=* +regexpterm(#20089,8,#20019,10,"]*") +#20090=@"loc,{#10000},1,54,1,55" +locations_default(#20090,#10000,1,54,1,55) +hasLocation(#20089,#20090) +is_greedy(#20089) +#20091=* +regexpterm(#20091,14,#20089,0,"]") +#20092=@"loc,{#10000},1,54,1,54" +locations_default(#20092,#10000,1,54,1,54) +hasLocation(#20091,#20092) +regexp_const_value(#20091,"]") +#20093=* +regexp_parse_errors(#20093,#20019,"unexpected character") +hasLocation(#20093,#20092) +#20094=* +stmts(#20094,2,#20001,1,"/([ ]*[ ... ]+)+X/;") +hasLocation(#20094,#20005) +stmt_containers(#20094,#20001) +#20095=* +exprs(#20095,5,#20094,0,"/([ ]*[ ... -]+)+X/") +hasLocation(#20095,#20011) +enclosing_stmt(#20095,#20094) +expr_containers(#20095,#20001) +literals("/([ ]*[a-z0-9&#*=?@\\><:,()$[\]_.{}!+%^-]+)+X/","/([ ]*[a-z0-9&#*=?@\\><:,()$[\]_.{}!+%^-]+)+X/",#20095) +#20096=* +regexpterm(#20096,1,#20095,0,"([ ]*[a-z0-9&#*=?@\\><:,()$[\]_.{}!+%^-]+)+X") +#20097=@"loc,{#10000},2,2,2,45" +locations_default(#20097,#10000,2,2,2,45) +hasLocation(#20096,#20097) +#20098=* +regexpterm(#20098,9,#20096,0,"([ ]*[a-z0-9&#*=?@\\><:,()$[\]_.{}!+%^-]+)+") +#20099=@"loc,{#10000},2,2,2,44" +locations_default(#20099,#10000,2,2,2,44) +hasLocation(#20098,#20099) +is_greedy(#20098) +#20100=* +regexpterm(#20100,13,#20098,0,"([ ]*[a-z0-9&#*=?@\\><:,()$[\]_.{}!+%^-]+)") +#20101=@"loc,{#10000},2,2,2,43" +locations_default(#20101,#10000,2,2,2,43) +hasLocation(#20100,#20101) +is_capture(#20100,1) +#20102=* +regexpterm(#20102,1,#20100,0,"[ ]*[a-z0-9&#*=?@\\><:,()$[\]_.{}!+%^-]+") +#20103=@"loc,{#10000},2,3,2,42" +locations_default(#20103,#10000,2,3,2,42) +hasLocation(#20102,#20103) +#20104=* +regexpterm(#20104,8,#20102,0,"[ ]*") +#20105=@"loc,{#10000},2,3,2,6" +locations_default(#20105,#10000,2,3,2,6) +hasLocation(#20104,#20105) +is_greedy(#20104) +#20106=* +regexpterm(#20106,23,#20104,0,"[ ]") +#20107=@"loc,{#10000},2,3,2,5" +locations_default(#20107,#10000,2,3,2,5) +hasLocation(#20106,#20107) +#20108=* +regexpterm(#20108,14,#20106,0," ") +#20109=@"loc,{#10000},2,4,2,4" +locations_default(#20109,#10000,2,4,2,4) +hasLocation(#20108,#20109) +regexp_const_value(#20108," ") +#20110=* +regexpterm(#20110,9,#20102,1,"[a-z0-9&#*=?@\\><:,()$[\]_.{}!+%^-]+") +#20111=@"loc,{#10000},2,7,2,42" +locations_default(#20111,#10000,2,7,2,42) +hasLocation(#20110,#20111) +is_greedy(#20110) +#20112=* +regexpterm(#20112,23,#20110,0,"[a-z0-9&#*=?@\\><:,()$[\]_.{}!+%^-]") +#20113=@"loc,{#10000},2,7,2,41" +locations_default(#20113,#10000,2,7,2,41) +hasLocation(#20112,#20113) +#20114=* +regexpterm(#20114,24,#20112,0,"a-z") +#20115=@"loc,{#10000},2,8,2,10" +locations_default(#20115,#10000,2,8,2,10) +hasLocation(#20114,#20115) +#20116=* +regexpterm(#20116,14,#20114,0,"a") +#20117=@"loc,{#10000},2,8,2,8" +locations_default(#20117,#10000,2,8,2,8) +hasLocation(#20116,#20117) +regexp_const_value(#20116,"a") +#20118=* +regexpterm(#20118,14,#20114,1,"z") +#20119=@"loc,{#10000},2,10,2,10" +locations_default(#20119,#10000,2,10,2,10) +hasLocation(#20118,#20119) +regexp_const_value(#20118,"z") +#20120=* +regexpterm(#20120,24,#20112,1,"0-9") +#20121=@"loc,{#10000},2,11,2,13" +locations_default(#20121,#10000,2,11,2,13) +hasLocation(#20120,#20121) +#20122=* +regexpterm(#20122,14,#20120,0,"0") +#20123=@"loc,{#10000},2,11,2,11" +locations_default(#20123,#10000,2,11,2,11) +hasLocation(#20122,#20123) +regexp_const_value(#20122,"0") +#20124=* +regexpterm(#20124,14,#20120,1,"9") +#20125=@"loc,{#10000},2,13,2,13" +locations_default(#20125,#10000,2,13,2,13) +hasLocation(#20124,#20125) +regexp_const_value(#20124,"9") +#20126=* +regexpterm(#20126,14,#20112,2,"&") +#20127=@"loc,{#10000},2,14,2,14" +locations_default(#20127,#10000,2,14,2,14) +hasLocation(#20126,#20127) +regexp_const_value(#20126,"&") +#20128=* +regexpterm(#20128,14,#20112,3,"#") +#20129=@"loc,{#10000},2,15,2,15" +locations_default(#20129,#10000,2,15,2,15) +hasLocation(#20128,#20129) +regexp_const_value(#20128,"#") +#20130=* +regexpterm(#20130,14,#20112,4,"*") +#20131=@"loc,{#10000},2,16,2,16" +locations_default(#20131,#10000,2,16,2,16) +hasLocation(#20130,#20131) +regexp_const_value(#20130,"*") +#20132=* +regexpterm(#20132,14,#20112,5,"=") +#20133=@"loc,{#10000},2,17,2,17" +locations_default(#20133,#10000,2,17,2,17) +hasLocation(#20132,#20133) +regexp_const_value(#20132,"=") +#20134=* +regexpterm(#20134,14,#20112,6,"?") +#20135=@"loc,{#10000},2,18,2,18" +locations_default(#20135,#10000,2,18,2,18) +hasLocation(#20134,#20135) +regexp_const_value(#20134,"?") +#20136=* +regexpterm(#20136,14,#20112,7,"@") +#20137=@"loc,{#10000},2,19,2,19" +locations_default(#20137,#10000,2,19,2,19) +hasLocation(#20136,#20137) +regexp_const_value(#20136,"@") +#20138=* +regexpterm(#20138,21,#20112,8,"\\") +#20139=@"loc,{#10000},2,20,2,21" +locations_default(#20139,#10000,2,20,2,21) +hasLocation(#20138,#20139) +regexp_const_value(#20138,"\") +#20140=* +regexpterm(#20140,14,#20112,9,">") +#20141=@"loc,{#10000},2,22,2,22" +locations_default(#20141,#10000,2,22,2,22) +hasLocation(#20140,#20141) +regexp_const_value(#20140,">") +#20142=* +regexpterm(#20142,14,#20112,10,"<") +#20143=@"loc,{#10000},2,23,2,23" +locations_default(#20143,#10000,2,23,2,23) +hasLocation(#20142,#20143) +regexp_const_value(#20142,"<") +#20144=* +regexpterm(#20144,14,#20112,11,":") +#20145=@"loc,{#10000},2,24,2,24" +locations_default(#20145,#10000,2,24,2,24) +hasLocation(#20144,#20145) +regexp_const_value(#20144,":") +#20146=* +regexpterm(#20146,14,#20112,12,",") +#20147=@"loc,{#10000},2,25,2,25" +locations_default(#20147,#10000,2,25,2,25) +hasLocation(#20146,#20147) +regexp_const_value(#20146,",") +#20148=* +regexpterm(#20148,14,#20112,13,"(") +#20149=@"loc,{#10000},2,26,2,26" +locations_default(#20149,#10000,2,26,2,26) +hasLocation(#20148,#20149) +regexp_const_value(#20148,"(") +#20150=* +regexpterm(#20150,14,#20112,14,")") +#20151=@"loc,{#10000},2,27,2,27" +locations_default(#20151,#10000,2,27,2,27) +hasLocation(#20150,#20151) +regexp_const_value(#20150,")") +#20152=* +regexpterm(#20152,14,#20112,15,"$") +#20153=@"loc,{#10000},2,28,2,28" +locations_default(#20153,#10000,2,28,2,28) +hasLocation(#20152,#20153) +regexp_const_value(#20152,"$") +#20154=* +regexpterm(#20154,14,#20112,16,"[") +#20155=@"loc,{#10000},2,29,2,29" +locations_default(#20155,#10000,2,29,2,29) +hasLocation(#20154,#20155) +regexp_const_value(#20154,"[") +#20156=* +regexpterm(#20156,21,#20112,17,"\]") +#20157=@"loc,{#10000},2,30,2,31" +locations_default(#20157,#10000,2,30,2,31) +hasLocation(#20156,#20157) +regexp_const_value(#20156,"]") +#20158=* +regexpterm(#20158,14,#20112,18,"_") +#20159=@"loc,{#10000},2,32,2,32" +locations_default(#20159,#10000,2,32,2,32) +hasLocation(#20158,#20159) +regexp_const_value(#20158,"_") +#20160=* +regexpterm(#20160,14,#20112,19,".") +#20161=@"loc,{#10000},2,33,2,33" +locations_default(#20161,#10000,2,33,2,33) +hasLocation(#20160,#20161) +regexp_const_value(#20160,".") +#20162=* +regexpterm(#20162,14,#20112,20,"{") +#20163=@"loc,{#10000},2,34,2,34" +locations_default(#20163,#10000,2,34,2,34) +hasLocation(#20162,#20163) +regexp_const_value(#20162,"{") +#20164=* +regexpterm(#20164,14,#20112,21,"}") +#20165=@"loc,{#10000},2,35,2,35" +locations_default(#20165,#10000,2,35,2,35) +hasLocation(#20164,#20165) +regexp_const_value(#20164,"}") +#20166=* +regexpterm(#20166,14,#20112,22,"!") +#20167=@"loc,{#10000},2,36,2,36" +locations_default(#20167,#10000,2,36,2,36) +hasLocation(#20166,#20167) +regexp_const_value(#20166,"!") +#20168=* +regexpterm(#20168,14,#20112,23,"+") +#20169=@"loc,{#10000},2,37,2,37" +locations_default(#20169,#10000,2,37,2,37) +hasLocation(#20168,#20169) +regexp_const_value(#20168,"+") +#20170=* +regexpterm(#20170,14,#20112,24,"%") +#20171=@"loc,{#10000},2,38,2,38" +locations_default(#20171,#10000,2,38,2,38) +hasLocation(#20170,#20171) +regexp_const_value(#20170,"%") +#20172=* +regexpterm(#20172,14,#20112,25,"^") +#20173=@"loc,{#10000},2,39,2,39" +locations_default(#20173,#10000,2,39,2,39) +hasLocation(#20172,#20173) +regexp_const_value(#20172,"^") +#20174=* +regexpterm(#20174,14,#20112,26,"-") +#20175=@"loc,{#10000},2,40,2,40" +locations_default(#20175,#10000,2,40,2,40) +hasLocation(#20174,#20175) +regexp_const_value(#20174,"-") +#20176=* +regexpterm(#20176,14,#20096,1,"X") +#20177=@"loc,{#10000},2,45,2,45" +locations_default(#20177,#10000,2,45,2,45) +hasLocation(#20176,#20177) +regexp_const_value(#20176,"X") +#20178=* +entry_cfg_node(#20178,#20001) +#20179=@"loc,{#10000},1,1,1,0" +locations_default(#20179,#10000,1,1,1,0) +hasLocation(#20178,#20179) +#20180=* +exit_cfg_node(#20180,#20001) +hasLocation(#20180,#20015) +successor(#20094,#20095) +successor(#20095,#20180) +successor(#20017,#20018) +successor(#20018,#20094) +successor(#20178,#20017) +numlines(#10000,2,2,0) +filetype(#10000,"javascript") From 2333c538d91f2c2832bc864c650e5e483a0e6109 Mon Sep 17 00:00:00 2001 From: Napalys Date: Sun, 2 Mar 2025 17:14:08 +0100 Subject: [PATCH 06/27] Added ability to parse nested character classes while using `v` flag. --- .../com/semmle/js/parser/RegExpParser.java | 23 ++ .../trap/regex_nested_character_class.js.trap | 237 +++++++----------- 2 files changed, 114 insertions(+), 146 deletions(-) diff --git a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java index 478c0c25f3be..e8bd0f3b1efb 100644 --- a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java +++ b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java @@ -545,6 +545,7 @@ private RegExpTerm parseAtomEscape(SourceLocation loc, boolean inCharClass) { } private RegExpTerm parseCharacterClass() { + if (flags != null && flags.contains("v")) return parseNestedCharacterClass(); SourceLocation loc = new SourceLocation(pos()); List elements = new ArrayList<>(); @@ -560,6 +561,28 @@ private RegExpTerm parseCharacterClass() { return this.finishTerm(new CharacterClass(loc, elements, inverted)); } + // ECMA 2024 `v` flag allows nested character classes. + private RegExpTerm parseNestedCharacterClass() { + SourceLocation loc = new SourceLocation(pos()); + List elements = new ArrayList<>(); + + this.match("["); + boolean inverted = this.match("^"); + while (!this.match("]")) { + if (this.atEOS()) { + this.error(Error.EXPECTED_RBRACKET); + break; + } + if (lookahead("[")) { + elements.add(parseNestedCharacterClass()); + } + else { + elements.add(this.parseCharacterClassElement()); + } + } + return this.finishTerm(new CharacterClass(loc, elements, inverted)); + } + private static final List escapeClasses = Arrays.asList("d", "D", "s", "S", "w", "W"); private RegExpTerm parseCharacterClassElement() { diff --git a/javascript/extractor/tests/es2024/output/trap/regex_nested_character_class.js.trap b/javascript/extractor/tests/es2024/output/trap/regex_nested_character_class.js.trap index 292361c0b3e3..c3bc9112cc56 100644 --- a/javascript/extractor/tests/es2024/output/trap/regex_nested_character_class.js.trap +++ b/javascript/extractor/tests/es2024/output/trap/regex_nested_character_class.js.trap @@ -86,181 +86,126 @@ enclosing_stmt(#20027,#20025) expr_containers(#20027,#20001) literals("/[[]]/v","/[[]]/v",#20027) #20028=* -regexpterm(#20028,1,#20027,0,"[[]]") +regexpterm(#20028,23,#20027,0,"[[]]") #20029=@"loc,{#10000},1,2,1,5" locations_default(#20029,#10000,1,2,1,5) hasLocation(#20028,#20029) #20030=* -regexpterm(#20030,23,#20028,0,"[[]") -#20031=@"loc,{#10000},1,2,1,4" -locations_default(#20031,#10000,1,2,1,4) +regexpterm(#20030,23,#20028,0,"[]") +#20031=@"loc,{#10000},1,3,1,4" +locations_default(#20031,#10000,1,3,1,4) hasLocation(#20030,#20031) #20032=* -regexpterm(#20032,14,#20030,0,"[") -#20033=@"loc,{#10000},1,3,1,3" -locations_default(#20033,#10000,1,3,1,3) -hasLocation(#20032,#20033) -regexp_const_value(#20032,"[") +stmts(#20032,2,#20001,1,"/[[a]]/v;") +hasLocation(#20032,#20007) +stmt_containers(#20032,#20001) +#20033=* +exprs(#20033,5,#20032,0,"/[[a]]/v") +hasLocation(#20033,#20015) +enclosing_stmt(#20033,#20032) +expr_containers(#20033,#20001) +literals("/[[a]]/v","/[[a]]/v",#20033) #20034=* -regexpterm(#20034,14,#20028,1,"]") -#20035=@"loc,{#10000},1,5,1,5" -locations_default(#20035,#10000,1,5,1,5) +regexpterm(#20034,23,#20033,0,"[[a]]") +#20035=@"loc,{#10000},2,2,2,6" +locations_default(#20035,#10000,2,2,2,6) hasLocation(#20034,#20035) -regexp_const_value(#20034,"]") #20036=* -regexp_parse_errors(#20036,#20028,"unexpected character") -hasLocation(#20036,#20035) -#20037=* -stmts(#20037,2,#20001,1,"/[[a]]/v;") -hasLocation(#20037,#20007) -stmt_containers(#20037,#20001) +regexpterm(#20036,23,#20034,0,"[a]") +#20037=@"loc,{#10000},2,3,2,5" +locations_default(#20037,#10000,2,3,2,5) +hasLocation(#20036,#20037) #20038=* -exprs(#20038,5,#20037,0,"/[[a]]/v") -hasLocation(#20038,#20015) -enclosing_stmt(#20038,#20037) -expr_containers(#20038,#20001) -literals("/[[a]]/v","/[[a]]/v",#20038) -#20039=* -regexpterm(#20039,1,#20038,0,"[[a]]") -#20040=@"loc,{#10000},2,2,2,6" -locations_default(#20040,#10000,2,2,2,6) -hasLocation(#20039,#20040) +regexpterm(#20038,14,#20036,0,"a") +#20039=@"loc,{#10000},2,4,2,4" +locations_default(#20039,#10000,2,4,2,4) +hasLocation(#20038,#20039) +regexp_const_value(#20038,"a") +#20040=* +stmts(#20040,2,#20001,2,"/[ [] [ [] [] ] ]/v;") +hasLocation(#20040,#20009) +stmt_containers(#20040,#20001) #20041=* -regexpterm(#20041,23,#20039,0,"[[a]") -#20042=@"loc,{#10000},2,2,2,5" -locations_default(#20042,#10000,2,2,2,5) -hasLocation(#20041,#20042) -#20043=* -regexpterm(#20043,14,#20041,0,"[") -#20044=@"loc,{#10000},2,3,2,3" -locations_default(#20044,#10000,2,3,2,3) -hasLocation(#20043,#20044) -regexp_const_value(#20043,"[") -#20045=* -regexpterm(#20045,14,#20041,1,"a") -#20046=@"loc,{#10000},2,4,2,4" -locations_default(#20046,#10000,2,4,2,4) -hasLocation(#20045,#20046) -regexp_const_value(#20045,"a") -#20047=* -regexpterm(#20047,14,#20039,1,"]") -#20048=@"loc,{#10000},2,6,2,6" -locations_default(#20048,#10000,2,6,2,6) -hasLocation(#20047,#20048) -regexp_const_value(#20047,"]") -#20049=* -regexp_parse_errors(#20049,#20039,"unexpected character") -hasLocation(#20049,#20048) +exprs(#20041,5,#20040,0,"/[ [] [ [] [] ] ]/v") +hasLocation(#20041,#20019) +enclosing_stmt(#20041,#20040) +expr_containers(#20041,#20001) +literals("/[ [] [ [] [] ] ]/v","/[ [] [ [] [] ] ]/v",#20041) +#20042=* +regexpterm(#20042,23,#20041,0,"[ [] [ [] [] ] ]") +#20043=@"loc,{#10000},3,2,3,17" +locations_default(#20043,#10000,3,2,3,17) +hasLocation(#20042,#20043) +#20044=* +regexpterm(#20044,14,#20042,0," ") +#20045=@"loc,{#10000},3,3,3,3" +locations_default(#20045,#10000,3,3,3,3) +hasLocation(#20044,#20045) +regexp_const_value(#20044," ") +#20046=* +regexpterm(#20046,23,#20042,1,"[]") +#20047=@"loc,{#10000},3,4,3,5" +locations_default(#20047,#10000,3,4,3,5) +hasLocation(#20046,#20047) +#20048=* +regexpterm(#20048,14,#20042,2," ") +#20049=@"loc,{#10000},3,6,3,6" +locations_default(#20049,#10000,3,6,3,6) +hasLocation(#20048,#20049) +regexp_const_value(#20048," ") #20050=* -stmts(#20050,2,#20001,2,"/[ [] [ [] [] ] ]/v;") -hasLocation(#20050,#20009) -stmt_containers(#20050,#20001) -#20051=* -exprs(#20051,5,#20050,0,"/[ [] [ [] [] ] ]/v") -hasLocation(#20051,#20019) -enclosing_stmt(#20051,#20050) -expr_containers(#20051,#20001) -literals("/[ [] [ [] [] ] ]/v","/[ [] [ [] [] ] ]/v",#20051) +regexpterm(#20050,23,#20042,3,"[ [] [] ]") +#20051=@"loc,{#10000},3,7,3,15" +locations_default(#20051,#10000,3,7,3,15) +hasLocation(#20050,#20051) #20052=* -regexpterm(#20052,1,#20051,0,"[ [] [ [] [] ] ]") -#20053=@"loc,{#10000},3,2,3,17" -locations_default(#20053,#10000,3,2,3,17) +regexpterm(#20052,14,#20050,0," ") +#20053=@"loc,{#10000},3,8,3,8" +locations_default(#20053,#10000,3,8,3,8) hasLocation(#20052,#20053) +regexp_const_value(#20052," ") #20054=* -regexpterm(#20054,23,#20052,0,"[ []") -#20055=@"loc,{#10000},3,2,3,5" -locations_default(#20055,#10000,3,2,3,5) +regexpterm(#20054,23,#20050,1,"[]") +#20055=@"loc,{#10000},3,9,3,10" +locations_default(#20055,#10000,3,9,3,10) hasLocation(#20054,#20055) #20056=* -regexpterm(#20056,14,#20054,0," ") -#20057=@"loc,{#10000},3,3,3,3" -locations_default(#20057,#10000,3,3,3,3) +regexpterm(#20056,14,#20050,2," ") +#20057=@"loc,{#10000},3,11,3,11" +locations_default(#20057,#10000,3,11,3,11) hasLocation(#20056,#20057) regexp_const_value(#20056," ") #20058=* -regexpterm(#20058,14,#20054,1,"[") -#20059=@"loc,{#10000},3,4,3,4" -locations_default(#20059,#10000,3,4,3,4) +regexpterm(#20058,23,#20050,3,"[]") +#20059=@"loc,{#10000},3,12,3,13" +locations_default(#20059,#10000,3,12,3,13) hasLocation(#20058,#20059) -regexp_const_value(#20058,"[") #20060=* -regexpterm(#20060,14,#20052,1," ") -#20061=@"loc,{#10000},3,6,3,6" -locations_default(#20061,#10000,3,6,3,6) +regexpterm(#20060,14,#20050,4," ") +#20061=@"loc,{#10000},3,14,3,14" +locations_default(#20061,#10000,3,14,3,14) hasLocation(#20060,#20061) regexp_const_value(#20060," ") #20062=* -regexpterm(#20062,23,#20052,2,"[ []") -#20063=@"loc,{#10000},3,7,3,10" -locations_default(#20063,#10000,3,7,3,10) +regexpterm(#20062,14,#20042,4," ") +#20063=@"loc,{#10000},3,16,3,16" +locations_default(#20063,#10000,3,16,3,16) hasLocation(#20062,#20063) +regexp_const_value(#20062," ") #20064=* -regexpterm(#20064,14,#20062,0," ") -#20065=@"loc,{#10000},3,8,3,8" -locations_default(#20065,#10000,3,8,3,8) +entry_cfg_node(#20064,#20001) +#20065=@"loc,{#10000},1,1,1,0" +locations_default(#20065,#10000,1,1,1,0) hasLocation(#20064,#20065) -regexp_const_value(#20064," ") #20066=* -regexpterm(#20066,14,#20062,1,"[") -#20067=@"loc,{#10000},3,9,3,9" -locations_default(#20067,#10000,3,9,3,9) -hasLocation(#20066,#20067) -regexp_const_value(#20066,"[") -#20068=* -regexpterm(#20068,14,#20052,3," ") -#20069=@"loc,{#10000},3,11,3,11" -locations_default(#20069,#10000,3,11,3,11) -hasLocation(#20068,#20069) -regexp_const_value(#20068," ") -#20070=* -regexpterm(#20070,23,#20052,4,"[]") -#20071=@"loc,{#10000},3,12,3,13" -locations_default(#20071,#10000,3,12,3,13) -hasLocation(#20070,#20071) -#20072=* -regexpterm(#20072,14,#20052,5," ") -#20073=@"loc,{#10000},3,14,3,14" -locations_default(#20073,#10000,3,14,3,14) -hasLocation(#20072,#20073) -regexp_const_value(#20072," ") -#20074=* -regexpterm(#20074,14,#20052,6,"]") -#20075=@"loc,{#10000},3,15,3,15" -locations_default(#20075,#10000,3,15,3,15) -hasLocation(#20074,#20075) -regexp_const_value(#20074,"]") -#20076=* -regexpterm(#20076,14,#20052,7," ") -#20077=@"loc,{#10000},3,16,3,16" -locations_default(#20077,#10000,3,16,3,16) -hasLocation(#20076,#20077) -regexp_const_value(#20076," ") -#20078=* -regexpterm(#20078,14,#20052,8,"]") -#20079=@"loc,{#10000},3,17,3,17" -locations_default(#20079,#10000,3,17,3,17) -hasLocation(#20078,#20079) -regexp_const_value(#20078,"]") -#20080=* -regexp_parse_errors(#20080,#20052,"unexpected character") -hasLocation(#20080,#20075) -#20081=* -regexp_parse_errors(#20081,#20052,"unexpected character") -hasLocation(#20081,#20079) -#20082=* -entry_cfg_node(#20082,#20001) -#20083=@"loc,{#10000},1,1,1,0" -locations_default(#20083,#10000,1,1,1,0) -hasLocation(#20082,#20083) -#20084=* -exit_cfg_node(#20084,#20001) -hasLocation(#20084,#20023) -successor(#20050,#20051) -successor(#20051,#20084) -successor(#20037,#20038) -successor(#20038,#20050) +exit_cfg_node(#20066,#20001) +hasLocation(#20066,#20023) +successor(#20040,#20041) +successor(#20041,#20066) +successor(#20032,#20033) +successor(#20033,#20040) successor(#20025,#20027) -successor(#20027,#20037) -successor(#20082,#20025) +successor(#20027,#20032) +successor(#20064,#20025) numlines(#10000,3,3,1) filetype(#10000,"javascript") From fa5093f6ad65fbb9064c910d95feb1e908c5889d Mon Sep 17 00:00:00 2001 From: Napalys Date: Sun, 2 Mar 2025 19:07:43 +0100 Subject: [PATCH 07/27] Added test cases for intersection --- .../tests/es2024/input/intersection.js | 7 + .../es2024/output/trap/intersection.js.trap | 427 ++++++++++++++++++ 2 files changed, 434 insertions(+) create mode 100644 javascript/extractor/tests/es2024/input/intersection.js create mode 100644 javascript/extractor/tests/es2024/output/trap/intersection.js.trap diff --git a/javascript/extractor/tests/es2024/input/intersection.js b/javascript/extractor/tests/es2024/input/intersection.js new file mode 100644 index 000000000000..a1f50cbaef6d --- /dev/null +++ b/javascript/extractor/tests/es2024/input/intersection.js @@ -0,0 +1,7 @@ +/[[abc]&&[bcd]]/v; // Valid use of intersection operator, matches b or c +/abc&&bcd/v; //Valid regex, but no intersection operation: Matches the literal string "abc&&bcd" +/[abc]&&[bcd]/v; // Valid regex, but incorrect intersection operation: + // - Matches a single character from [abc] + // - Then the literal "&&" + // - Then a single character from [bcd] +/[[abc]&&[bcd]&&[c]]/v; // Valid use of intersection operator, matches c diff --git a/javascript/extractor/tests/es2024/output/trap/intersection.js.trap b/javascript/extractor/tests/es2024/output/trap/intersection.js.trap new file mode 100644 index 000000000000..4d65235e7a36 --- /dev/null +++ b/javascript/extractor/tests/es2024/output/trap/intersection.js.trap @@ -0,0 +1,427 @@ +#10000=@"/intersection.js;sourcefile" +files(#10000,"/intersection.js") +#10001=@"/;folder" +folders(#10001,"/") +containerparent(#10001,#10000) +#10002=@"loc,{#10000},0,0,0,0" +locations_default(#10002,#10000,0,0,0,0) +hasLocation(#10000,#10002) +#20000=@"global_scope" +scopes(#20000,0) +#20001=@"script;{#10000},1,1" +#20002=* +comments(#20002,0,#20001," Valid use of intersection operator, matches b or c","// Vali ... b or c") +#20003=@"loc,{#10000},1,20,1,72" +locations_default(#20003,#10000,1,20,1,72) +hasLocation(#20002,#20003) +#20004=* +comments(#20004,0,#20001,"Valid regex, but no intersection operation: Matches the literal string ""abc&&bcd""","//Valid ... c&&bcd""") +#20005=@"loc,{#10000},2,14,2,96" +locations_default(#20005,#10000,2,14,2,96) +hasLocation(#20004,#20005) +#20006=* +comments(#20006,0,#20001," Valid regex, but incorrect intersection operation: ","// Vali ... ation: ") +#20007=@"loc,{#10000},3,18,3,71" +locations_default(#20007,#10000,3,18,3,71) +hasLocation(#20006,#20007) +#20008=* +comments(#20008,0,#20001," - Matches a single character from [abc]","// - Ma ... m [abc]") +#20009=@"loc,{#10000},4,18,4,59" +locations_default(#20009,#10000,4,18,4,59) +hasLocation(#20008,#20009) +#20010=* +comments(#20010,0,#20001," - Then the literal ""&&""","// - Th ... al ""&&""") +#20011=@"loc,{#10000},5,18,5,43" +locations_default(#20011,#10000,5,18,5,43) +hasLocation(#20010,#20011) +#20012=* +comments(#20012,0,#20001," - Then a single character from [bcd]","// - Th ... m [bcd]") +#20013=@"loc,{#10000},6,18,6,56" +locations_default(#20013,#10000,6,18,6,56) +hasLocation(#20012,#20013) +#20014=* +comments(#20014,0,#20001," Valid use of intersection operator, matches c","// Vali ... tches c") +#20015=@"loc,{#10000},7,25,7,72" +locations_default(#20015,#10000,7,25,7,72) +hasLocation(#20014,#20015) +#20016=* +lines(#20016,#20001,"/[[abc]&&[bcd]]/v; // Valid use of intersection operator, matches b or c"," +") +#20017=@"loc,{#10000},1,1,1,72" +locations_default(#20017,#10000,1,1,1,72) +hasLocation(#20016,#20017) +#20018=* +lines(#20018,#20001,"/abc&&bcd/v; //Valid regex, but no intersection operation: Matches the literal string ""abc&&bcd"""," +") +#20019=@"loc,{#10000},2,1,2,96" +locations_default(#20019,#10000,2,1,2,96) +hasLocation(#20018,#20019) +#20020=* +lines(#20020,#20001,"/[abc]&&[bcd]/v; // Valid regex, but incorrect intersection operation: "," +") +#20021=@"loc,{#10000},3,1,3,71" +locations_default(#20021,#10000,3,1,3,71) +hasLocation(#20020,#20021) +#20022=* +lines(#20022,#20001," // - Matches a single character from [abc]"," +") +#20023=@"loc,{#10000},4,1,4,59" +locations_default(#20023,#10000,4,1,4,59) +hasLocation(#20022,#20023) +indentation(#10000,4," ",17) +#20024=* +lines(#20024,#20001," // - Then the literal ""&&"""," +") +#20025=@"loc,{#10000},5,1,5,43" +locations_default(#20025,#10000,5,1,5,43) +hasLocation(#20024,#20025) +indentation(#10000,5," ",17) +#20026=* +lines(#20026,#20001," // - Then a single character from [bcd]"," +") +#20027=@"loc,{#10000},6,1,6,56" +locations_default(#20027,#10000,6,1,6,56) +hasLocation(#20026,#20027) +indentation(#10000,6," ",17) +#20028=* +lines(#20028,#20001,"/[[abc]&&[bcd]&&[c]]/v; // Valid use of intersection operator, matches c"," +") +#20029=@"loc,{#10000},7,1,7,72" +locations_default(#20029,#10000,7,1,7,72) +hasLocation(#20028,#20029) +numlines(#20001,7,4,7) +#20030=* +tokeninfo(#20030,5,#20001,0,"/[[abc]&&[bcd]]/v") +#20031=@"loc,{#10000},1,1,1,17" +locations_default(#20031,#10000,1,1,1,17) +hasLocation(#20030,#20031) +#20032=* +tokeninfo(#20032,8,#20001,1,";") +#20033=@"loc,{#10000},1,18,1,18" +locations_default(#20033,#10000,1,18,1,18) +hasLocation(#20032,#20033) +#20034=* +tokeninfo(#20034,5,#20001,2,"/abc&&bcd/v") +#20035=@"loc,{#10000},2,1,2,11" +locations_default(#20035,#10000,2,1,2,11) +hasLocation(#20034,#20035) +next_token(#20002,#20034) +#20036=* +tokeninfo(#20036,8,#20001,3,";") +#20037=@"loc,{#10000},2,12,2,12" +locations_default(#20037,#10000,2,12,2,12) +hasLocation(#20036,#20037) +#20038=* +tokeninfo(#20038,5,#20001,4,"/[abc]&&[bcd]/v") +#20039=@"loc,{#10000},3,1,3,15" +locations_default(#20039,#10000,3,1,3,15) +hasLocation(#20038,#20039) +next_token(#20004,#20038) +#20040=* +tokeninfo(#20040,8,#20001,5,";") +#20041=@"loc,{#10000},3,16,3,16" +locations_default(#20041,#10000,3,16,3,16) +hasLocation(#20040,#20041) +#20042=* +tokeninfo(#20042,5,#20001,6,"/[[abc]&&[bcd]&&[c]]/v") +#20043=@"loc,{#10000},7,1,7,22" +locations_default(#20043,#10000,7,1,7,22) +hasLocation(#20042,#20043) +next_token(#20006,#20042) +next_token(#20008,#20042) +next_token(#20010,#20042) +next_token(#20012,#20042) +#20044=* +tokeninfo(#20044,8,#20001,7,";") +#20045=@"loc,{#10000},7,23,7,23" +locations_default(#20045,#10000,7,23,7,23) +hasLocation(#20044,#20045) +#20046=* +tokeninfo(#20046,0,#20001,8,"") +#20047=@"loc,{#10000},8,1,8,0" +locations_default(#20047,#10000,8,1,8,0) +hasLocation(#20046,#20047) +next_token(#20014,#20046) +toplevels(#20001,0) +#20048=@"loc,{#10000},1,1,8,0" +locations_default(#20048,#10000,1,1,8,0) +hasLocation(#20001,#20048) +#20049=* +stmts(#20049,2,#20001,0,"/[[abc]&&[bcd]]/v;") +#20050=@"loc,{#10000},1,1,1,18" +locations_default(#20050,#10000,1,1,1,18) +hasLocation(#20049,#20050) +stmt_containers(#20049,#20001) +#20051=* +exprs(#20051,5,#20049,0,"/[[abc]&&[bcd]]/v") +hasLocation(#20051,#20031) +enclosing_stmt(#20051,#20049) +expr_containers(#20051,#20001) +literals("/[[abc]&&[bcd]]/v","/[[abc]&&[bcd]]/v",#20051) +#20052=* +regexpterm(#20052,23,#20051,0,"[[abc]&&[bcd]]") +#20053=@"loc,{#10000},1,2,1,15" +locations_default(#20053,#10000,1,2,1,15) +hasLocation(#20052,#20053) +#20054=* +regexpterm(#20054,23,#20052,0,"[abc]") +#20055=@"loc,{#10000},1,3,1,7" +locations_default(#20055,#10000,1,3,1,7) +hasLocation(#20054,#20055) +#20056=* +regexpterm(#20056,14,#20054,0,"a") +#20057=@"loc,{#10000},1,4,1,4" +locations_default(#20057,#10000,1,4,1,4) +hasLocation(#20056,#20057) +regexp_const_value(#20056,"a") +#20058=* +regexpterm(#20058,14,#20054,1,"b") +#20059=@"loc,{#10000},1,5,1,5" +locations_default(#20059,#10000,1,5,1,5) +hasLocation(#20058,#20059) +regexp_const_value(#20058,"b") +#20060=* +regexpterm(#20060,14,#20054,2,"c") +#20061=@"loc,{#10000},1,6,1,6" +locations_default(#20061,#10000,1,6,1,6) +hasLocation(#20060,#20061) +regexp_const_value(#20060,"c") +#20062=* +regexpterm(#20062,14,#20052,1,"&") +#20063=@"loc,{#10000},1,8,1,8" +locations_default(#20063,#10000,1,8,1,8) +hasLocation(#20062,#20063) +regexp_const_value(#20062,"&") +#20064=* +regexpterm(#20064,14,#20052,2,"&") +#20065=@"loc,{#10000},1,9,1,9" +locations_default(#20065,#10000,1,9,1,9) +hasLocation(#20064,#20065) +regexp_const_value(#20064,"&") +#20066=* +regexpterm(#20066,23,#20052,3,"[bcd]") +#20067=@"loc,{#10000},1,10,1,14" +locations_default(#20067,#10000,1,10,1,14) +hasLocation(#20066,#20067) +#20068=* +regexpterm(#20068,14,#20066,0,"b") +#20069=@"loc,{#10000},1,11,1,11" +locations_default(#20069,#10000,1,11,1,11) +hasLocation(#20068,#20069) +regexp_const_value(#20068,"b") +#20070=* +regexpterm(#20070,14,#20066,1,"c") +#20071=@"loc,{#10000},1,12,1,12" +locations_default(#20071,#10000,1,12,1,12) +hasLocation(#20070,#20071) +regexp_const_value(#20070,"c") +#20072=* +regexpterm(#20072,14,#20066,2,"d") +#20073=@"loc,{#10000},1,13,1,13" +locations_default(#20073,#10000,1,13,1,13) +hasLocation(#20072,#20073) +regexp_const_value(#20072,"d") +#20074=* +stmts(#20074,2,#20001,1,"/abc&&bcd/v;") +#20075=@"loc,{#10000},2,1,2,12" +locations_default(#20075,#10000,2,1,2,12) +hasLocation(#20074,#20075) +stmt_containers(#20074,#20001) +#20076=* +exprs(#20076,5,#20074,0,"/abc&&bcd/v") +hasLocation(#20076,#20035) +enclosing_stmt(#20076,#20074) +expr_containers(#20076,#20001) +literals("/abc&&bcd/v","/abc&&bcd/v",#20076) +#20077=* +regexpterm(#20077,14,#20076,0,"abc&&bcd") +#20078=@"loc,{#10000},2,2,2,9" +locations_default(#20078,#10000,2,2,2,9) +hasLocation(#20077,#20078) +regexp_const_value(#20077,"abc&&bcd") +#20079=* +stmts(#20079,2,#20001,2,"/[abc]&&[bcd]/v;") +#20080=@"loc,{#10000},3,1,3,16" +locations_default(#20080,#10000,3,1,3,16) +hasLocation(#20079,#20080) +stmt_containers(#20079,#20001) +#20081=* +exprs(#20081,5,#20079,0,"/[abc]&&[bcd]/v") +hasLocation(#20081,#20039) +enclosing_stmt(#20081,#20079) +expr_containers(#20081,#20001) +literals("/[abc]&&[bcd]/v","/[abc]&&[bcd]/v",#20081) +#20082=* +regexpterm(#20082,1,#20081,0,"[abc]&&[bcd]") +#20083=@"loc,{#10000},3,2,3,13" +locations_default(#20083,#10000,3,2,3,13) +hasLocation(#20082,#20083) +#20084=* +regexpterm(#20084,23,#20082,0,"[abc]") +#20085=@"loc,{#10000},3,2,3,6" +locations_default(#20085,#10000,3,2,3,6) +hasLocation(#20084,#20085) +#20086=* +regexpterm(#20086,14,#20084,0,"a") +#20087=@"loc,{#10000},3,3,3,3" +locations_default(#20087,#10000,3,3,3,3) +hasLocation(#20086,#20087) +regexp_const_value(#20086,"a") +#20088=* +regexpterm(#20088,14,#20084,1,"b") +#20089=@"loc,{#10000},3,4,3,4" +locations_default(#20089,#10000,3,4,3,4) +hasLocation(#20088,#20089) +regexp_const_value(#20088,"b") +#20090=* +regexpterm(#20090,14,#20084,2,"c") +#20091=@"loc,{#10000},3,5,3,5" +locations_default(#20091,#10000,3,5,3,5) +hasLocation(#20090,#20091) +regexp_const_value(#20090,"c") +#20092=* +regexpterm(#20092,14,#20082,1,"&&") +#20093=@"loc,{#10000},3,7,3,8" +locations_default(#20093,#10000,3,7,3,8) +hasLocation(#20092,#20093) +regexp_const_value(#20092,"&&") +#20094=* +regexpterm(#20094,23,#20082,2,"[bcd]") +#20095=@"loc,{#10000},3,9,3,13" +locations_default(#20095,#10000,3,9,3,13) +hasLocation(#20094,#20095) +#20096=* +regexpterm(#20096,14,#20094,0,"b") +#20097=@"loc,{#10000},3,10,3,10" +locations_default(#20097,#10000,3,10,3,10) +hasLocation(#20096,#20097) +regexp_const_value(#20096,"b") +#20098=* +regexpterm(#20098,14,#20094,1,"c") +#20099=@"loc,{#10000},3,11,3,11" +locations_default(#20099,#10000,3,11,3,11) +hasLocation(#20098,#20099) +regexp_const_value(#20098,"c") +#20100=* +regexpterm(#20100,14,#20094,2,"d") +#20101=@"loc,{#10000},3,12,3,12" +locations_default(#20101,#10000,3,12,3,12) +hasLocation(#20100,#20101) +regexp_const_value(#20100,"d") +#20102=* +stmts(#20102,2,#20001,3,"/[[abc] ... [c]]/v;") +#20103=@"loc,{#10000},7,1,7,23" +locations_default(#20103,#10000,7,1,7,23) +hasLocation(#20102,#20103) +stmt_containers(#20102,#20001) +#20104=* +exprs(#20104,5,#20102,0,"/[[abc] ... &[c]]/v") +hasLocation(#20104,#20043) +enclosing_stmt(#20104,#20102) +expr_containers(#20104,#20001) +literals("/[[abc]&&[bcd]&&[c]]/v","/[[abc]&&[bcd]&&[c]]/v",#20104) +#20105=* +regexpterm(#20105,23,#20104,0,"[[abc]&&[bcd]&&[c]]") +#20106=@"loc,{#10000},7,2,7,20" +locations_default(#20106,#10000,7,2,7,20) +hasLocation(#20105,#20106) +#20107=* +regexpterm(#20107,23,#20105,0,"[abc]") +#20108=@"loc,{#10000},7,3,7,7" +locations_default(#20108,#10000,7,3,7,7) +hasLocation(#20107,#20108) +#20109=* +regexpterm(#20109,14,#20107,0,"a") +#20110=@"loc,{#10000},7,4,7,4" +locations_default(#20110,#10000,7,4,7,4) +hasLocation(#20109,#20110) +regexp_const_value(#20109,"a") +#20111=* +regexpterm(#20111,14,#20107,1,"b") +#20112=@"loc,{#10000},7,5,7,5" +locations_default(#20112,#10000,7,5,7,5) +hasLocation(#20111,#20112) +regexp_const_value(#20111,"b") +#20113=* +regexpterm(#20113,14,#20107,2,"c") +#20114=@"loc,{#10000},7,6,7,6" +locations_default(#20114,#10000,7,6,7,6) +hasLocation(#20113,#20114) +regexp_const_value(#20113,"c") +#20115=* +regexpterm(#20115,14,#20105,1,"&") +#20116=@"loc,{#10000},7,8,7,8" +locations_default(#20116,#10000,7,8,7,8) +hasLocation(#20115,#20116) +regexp_const_value(#20115,"&") +#20117=* +regexpterm(#20117,14,#20105,2,"&") +#20118=@"loc,{#10000},7,9,7,9" +locations_default(#20118,#10000,7,9,7,9) +hasLocation(#20117,#20118) +regexp_const_value(#20117,"&") +#20119=* +regexpterm(#20119,23,#20105,3,"[bcd]") +#20120=@"loc,{#10000},7,10,7,14" +locations_default(#20120,#10000,7,10,7,14) +hasLocation(#20119,#20120) +#20121=* +regexpterm(#20121,14,#20119,0,"b") +#20122=@"loc,{#10000},7,11,7,11" +locations_default(#20122,#10000,7,11,7,11) +hasLocation(#20121,#20122) +regexp_const_value(#20121,"b") +#20123=* +regexpterm(#20123,14,#20119,1,"c") +#20124=@"loc,{#10000},7,12,7,12" +locations_default(#20124,#10000,7,12,7,12) +hasLocation(#20123,#20124) +regexp_const_value(#20123,"c") +#20125=* +regexpterm(#20125,14,#20119,2,"d") +#20126=@"loc,{#10000},7,13,7,13" +locations_default(#20126,#10000,7,13,7,13) +hasLocation(#20125,#20126) +regexp_const_value(#20125,"d") +#20127=* +regexpterm(#20127,14,#20105,4,"&") +#20128=@"loc,{#10000},7,15,7,15" +locations_default(#20128,#10000,7,15,7,15) +hasLocation(#20127,#20128) +regexp_const_value(#20127,"&") +#20129=* +regexpterm(#20129,14,#20105,5,"&") +#20130=@"loc,{#10000},7,16,7,16" +locations_default(#20130,#10000,7,16,7,16) +hasLocation(#20129,#20130) +regexp_const_value(#20129,"&") +#20131=* +regexpterm(#20131,23,#20105,6,"[c]") +#20132=@"loc,{#10000},7,17,7,19" +locations_default(#20132,#10000,7,17,7,19) +hasLocation(#20131,#20132) +#20133=* +regexpterm(#20133,14,#20131,0,"c") +#20134=@"loc,{#10000},7,18,7,18" +locations_default(#20134,#10000,7,18,7,18) +hasLocation(#20133,#20134) +regexp_const_value(#20133,"c") +#20135=* +entry_cfg_node(#20135,#20001) +#20136=@"loc,{#10000},1,1,1,0" +locations_default(#20136,#10000,1,1,1,0) +hasLocation(#20135,#20136) +#20137=* +exit_cfg_node(#20137,#20001) +hasLocation(#20137,#20047) +successor(#20102,#20104) +successor(#20104,#20137) +successor(#20079,#20081) +successor(#20081,#20102) +successor(#20074,#20076) +successor(#20076,#20079) +successor(#20049,#20051) +successor(#20051,#20074) +successor(#20135,#20049) +numlines(#10000,7,4,7) +filetype(#10000,"javascript") From 381b5ebe8adae245cf791729aa4facc8457b070e Mon Sep 17 00:00:00 2001 From: Napalys Date: Sun, 2 Mar 2025 19:24:32 +0100 Subject: [PATCH 08/27] Added intersection support --- .../regexp/CharacterClassIntersection.java | 22 + .../src/com/semmle/js/ast/regexp/Visitor.java | 2 + .../semmle/js/extractor/RegExpExtractor.java | 10 + .../com/semmle/js/parser/RegExpParser.java | 22 +- .../es2024/output/trap/intersection.js.trap | 420 ++++++++---------- 5 files changed, 250 insertions(+), 226 deletions(-) create mode 100644 javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassIntersection.java diff --git a/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassIntersection.java b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassIntersection.java new file mode 100644 index 000000000000..6e1a424976d6 --- /dev/null +++ b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassIntersection.java @@ -0,0 +1,22 @@ +package com.semmle.js.ast.regexp; + +import com.semmle.js.ast.SourceLocation; +import java.util.List; + +public class CharacterClassIntersection extends RegExpTerm { + private final List intersections; + + public CharacterClassIntersection(SourceLocation loc, List intersections) { + super(loc, "CharacterClassIntersection"); + this.intersections = intersections; + } + + @Override + public void accept(Visitor v) { + v.visit(this); + } + + public List getIntersections() { + return intersections; + } +} diff --git a/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java b/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java index 4af27e6aa8a3..31126b63005a 100644 --- a/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java +++ b/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java @@ -63,4 +63,6 @@ public interface Visitor { public void visit(UnicodePropertyEscape nd); public void visit(CharacterClassQuotedString nd); + + public void visit(CharacterClassIntersection nd); } diff --git a/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java index 1fc69458c109..a226ccc9cc61 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java @@ -23,6 +23,7 @@ import com.semmle.js.ast.regexp.Group; import com.semmle.js.ast.regexp.HexEscapeSequence; import com.semmle.js.ast.regexp.IdentityEscape; +import com.semmle.js.ast.regexp.CharacterClassIntersection; import com.semmle.js.ast.regexp.Literal; import com.semmle.js.ast.regexp.NamedBackReference; import com.semmle.js.ast.regexp.NonWordBoundary; @@ -94,6 +95,7 @@ public RegExpExtractor(TrapWriter trapwriter, LocationManager locationManager) { termkinds.put("ZeroWidthNegativeLookbehind", 26); termkinds.put("UnicodePropertyEscape", 27); termkinds.put("CharacterClassQuotedString", 28); + termkinds.put("CharacterClassIntersection", 29); } private static final String[] errmsgs = @@ -352,6 +354,14 @@ public void visit(CharacterClassQuotedString nd) { Label lbl = extractTerm(nd, parent, idx); visit(nd.getTerm(), lbl, 0); } + + @Override + public void visit(CharacterClassIntersection nd) { + Label lbl = extractTerm(nd, parent, idx); + int i = 0; + for (RegExpTerm element : nd.getIntersections()) + visit(element, lbl, i++); + } } public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing, String flags) { diff --git a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java index e8bd0f3b1efb..ee5da2b6ba3f 100644 --- a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java +++ b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java @@ -19,6 +19,7 @@ import com.semmle.js.ast.regexp.Group; import com.semmle.js.ast.regexp.HexEscapeSequence; import com.semmle.js.ast.regexp.IdentityEscape; +import com.semmle.js.ast.regexp.CharacterClassIntersection; import com.semmle.js.ast.regexp.NamedBackReference; import com.semmle.js.ast.regexp.NonWordBoundary; import com.semmle.js.ast.regexp.OctalEscape; @@ -37,6 +38,7 @@ import com.semmle.js.ast.regexp.ZeroWidthPositiveLookbehind; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; /** A parser for ECMAScript 2018 regular expressions. */ @@ -561,10 +563,16 @@ private RegExpTerm parseCharacterClass() { return this.finishTerm(new CharacterClass(loc, elements, inverted)); } + private enum CharacterClassType { + STANDARD, + INTERSECTION, + } + // ECMA 2024 `v` flag allows nested character classes. private RegExpTerm parseNestedCharacterClass() { SourceLocation loc = new SourceLocation(pos()); List elements = new ArrayList<>(); + CharacterClassType classType = CharacterClassType.STANDARD; this.match("["); boolean inverted = this.match("^"); @@ -576,11 +584,23 @@ private RegExpTerm parseNestedCharacterClass() { if (lookahead("[")) { elements.add(parseNestedCharacterClass()); } + else if (lookahead("&&")) { + this.match("&&"); + classType = CharacterClassType.INTERSECTION; + } else { elements.add(this.parseCharacterClassElement()); } } - return this.finishTerm(new CharacterClass(loc, elements, inverted)); + + // Create appropriate RegExpTerm based on the detected class type + switch (classType) { + case INTERSECTION: + return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassIntersection(loc, elements)), inverted)); + case STANDARD: + default: + return this.finishTerm(new CharacterClass(loc, elements, inverted)); + } } private static final List escapeClasses = Arrays.asList("d", "D", "s", "S", "w", "W"); diff --git a/javascript/extractor/tests/es2024/output/trap/intersection.js.trap b/javascript/extractor/tests/es2024/output/trap/intersection.js.trap index 4d65235e7a36..a48eccb5a741 100644 --- a/javascript/extractor/tests/es2024/output/trap/intersection.js.trap +++ b/javascript/extractor/tests/es2024/output/trap/intersection.js.trap @@ -164,264 +164,234 @@ regexpterm(#20052,23,#20051,0,"[[abc]&&[bcd]]") locations_default(#20053,#10000,1,2,1,15) hasLocation(#20052,#20053) #20054=* -regexpterm(#20054,23,#20052,0,"[abc]") -#20055=@"loc,{#10000},1,3,1,7" -locations_default(#20055,#10000,1,3,1,7) -hasLocation(#20054,#20055) -#20056=* -regexpterm(#20056,14,#20054,0,"a") -#20057=@"loc,{#10000},1,4,1,4" -locations_default(#20057,#10000,1,4,1,4) -hasLocation(#20056,#20057) -regexp_const_value(#20056,"a") -#20058=* -regexpterm(#20058,14,#20054,1,"b") -#20059=@"loc,{#10000},1,5,1,5" -locations_default(#20059,#10000,1,5,1,5) -hasLocation(#20058,#20059) -regexp_const_value(#20058,"b") -#20060=* -regexpterm(#20060,14,#20054,2,"c") -#20061=@"loc,{#10000},1,6,1,6" -locations_default(#20061,#10000,1,6,1,6) -hasLocation(#20060,#20061) -regexp_const_value(#20060,"c") -#20062=* -regexpterm(#20062,14,#20052,1,"&") -#20063=@"loc,{#10000},1,8,1,8" -locations_default(#20063,#10000,1,8,1,8) -hasLocation(#20062,#20063) -regexp_const_value(#20062,"&") -#20064=* -regexpterm(#20064,14,#20052,2,"&") -#20065=@"loc,{#10000},1,9,1,9" -locations_default(#20065,#10000,1,9,1,9) -hasLocation(#20064,#20065) -regexp_const_value(#20064,"&") -#20066=* -regexpterm(#20066,23,#20052,3,"[bcd]") -#20067=@"loc,{#10000},1,10,1,14" -locations_default(#20067,#10000,1,10,1,14) -hasLocation(#20066,#20067) -#20068=* -regexpterm(#20068,14,#20066,0,"b") -#20069=@"loc,{#10000},1,11,1,11" -locations_default(#20069,#10000,1,11,1,11) -hasLocation(#20068,#20069) -regexp_const_value(#20068,"b") -#20070=* -regexpterm(#20070,14,#20066,1,"c") -#20071=@"loc,{#10000},1,12,1,12" -locations_default(#20071,#10000,1,12,1,12) -hasLocation(#20070,#20071) -regexp_const_value(#20070,"c") -#20072=* -regexpterm(#20072,14,#20066,2,"d") -#20073=@"loc,{#10000},1,13,1,13" -locations_default(#20073,#10000,1,13,1,13) -hasLocation(#20072,#20073) -regexp_const_value(#20072,"d") +regexpterm(#20054,29,#20052,0,"[[abc]&&[bcd]]") +hasLocation(#20054,#20053) +#20055=* +regexpterm(#20055,23,#20054,0,"[abc]") +#20056=@"loc,{#10000},1,3,1,7" +locations_default(#20056,#10000,1,3,1,7) +hasLocation(#20055,#20056) +#20057=* +regexpterm(#20057,14,#20055,0,"a") +#20058=@"loc,{#10000},1,4,1,4" +locations_default(#20058,#10000,1,4,1,4) +hasLocation(#20057,#20058) +regexp_const_value(#20057,"a") +#20059=* +regexpterm(#20059,14,#20055,1,"b") +#20060=@"loc,{#10000},1,5,1,5" +locations_default(#20060,#10000,1,5,1,5) +hasLocation(#20059,#20060) +regexp_const_value(#20059,"b") +#20061=* +regexpterm(#20061,14,#20055,2,"c") +#20062=@"loc,{#10000},1,6,1,6" +locations_default(#20062,#10000,1,6,1,6) +hasLocation(#20061,#20062) +regexp_const_value(#20061,"c") +#20063=* +regexpterm(#20063,23,#20054,1,"[bcd]") +#20064=@"loc,{#10000},1,10,1,14" +locations_default(#20064,#10000,1,10,1,14) +hasLocation(#20063,#20064) +#20065=* +regexpterm(#20065,14,#20063,0,"b") +#20066=@"loc,{#10000},1,11,1,11" +locations_default(#20066,#10000,1,11,1,11) +hasLocation(#20065,#20066) +regexp_const_value(#20065,"b") +#20067=* +regexpterm(#20067,14,#20063,1,"c") +#20068=@"loc,{#10000},1,12,1,12" +locations_default(#20068,#10000,1,12,1,12) +hasLocation(#20067,#20068) +regexp_const_value(#20067,"c") +#20069=* +regexpterm(#20069,14,#20063,2,"d") +#20070=@"loc,{#10000},1,13,1,13" +locations_default(#20070,#10000,1,13,1,13) +hasLocation(#20069,#20070) +regexp_const_value(#20069,"d") +#20071=* +stmts(#20071,2,#20001,1,"/abc&&bcd/v;") +#20072=@"loc,{#10000},2,1,2,12" +locations_default(#20072,#10000,2,1,2,12) +hasLocation(#20071,#20072) +stmt_containers(#20071,#20001) +#20073=* +exprs(#20073,5,#20071,0,"/abc&&bcd/v") +hasLocation(#20073,#20035) +enclosing_stmt(#20073,#20071) +expr_containers(#20073,#20001) +literals("/abc&&bcd/v","/abc&&bcd/v",#20073) #20074=* -stmts(#20074,2,#20001,1,"/abc&&bcd/v;") -#20075=@"loc,{#10000},2,1,2,12" -locations_default(#20075,#10000,2,1,2,12) +regexpterm(#20074,14,#20073,0,"abc&&bcd") +#20075=@"loc,{#10000},2,2,2,9" +locations_default(#20075,#10000,2,2,2,9) hasLocation(#20074,#20075) -stmt_containers(#20074,#20001) +regexp_const_value(#20074,"abc&&bcd") #20076=* -exprs(#20076,5,#20074,0,"/abc&&bcd/v") -hasLocation(#20076,#20035) -enclosing_stmt(#20076,#20074) -expr_containers(#20076,#20001) -literals("/abc&&bcd/v","/abc&&bcd/v",#20076) -#20077=* -regexpterm(#20077,14,#20076,0,"abc&&bcd") -#20078=@"loc,{#10000},2,2,2,9" -locations_default(#20078,#10000,2,2,2,9) -hasLocation(#20077,#20078) -regexp_const_value(#20077,"abc&&bcd") +stmts(#20076,2,#20001,2,"/[abc]&&[bcd]/v;") +#20077=@"loc,{#10000},3,1,3,16" +locations_default(#20077,#10000,3,1,3,16) +hasLocation(#20076,#20077) +stmt_containers(#20076,#20001) +#20078=* +exprs(#20078,5,#20076,0,"/[abc]&&[bcd]/v") +hasLocation(#20078,#20039) +enclosing_stmt(#20078,#20076) +expr_containers(#20078,#20001) +literals("/[abc]&&[bcd]/v","/[abc]&&[bcd]/v",#20078) #20079=* -stmts(#20079,2,#20001,2,"/[abc]&&[bcd]/v;") -#20080=@"loc,{#10000},3,1,3,16" -locations_default(#20080,#10000,3,1,3,16) +regexpterm(#20079,1,#20078,0,"[abc]&&[bcd]") +#20080=@"loc,{#10000},3,2,3,13" +locations_default(#20080,#10000,3,2,3,13) hasLocation(#20079,#20080) -stmt_containers(#20079,#20001) #20081=* -exprs(#20081,5,#20079,0,"/[abc]&&[bcd]/v") -hasLocation(#20081,#20039) -enclosing_stmt(#20081,#20079) -expr_containers(#20081,#20001) -literals("/[abc]&&[bcd]/v","/[abc]&&[bcd]/v",#20081) -#20082=* -regexpterm(#20082,1,#20081,0,"[abc]&&[bcd]") -#20083=@"loc,{#10000},3,2,3,13" -locations_default(#20083,#10000,3,2,3,13) -hasLocation(#20082,#20083) -#20084=* -regexpterm(#20084,23,#20082,0,"[abc]") -#20085=@"loc,{#10000},3,2,3,6" -locations_default(#20085,#10000,3,2,3,6) -hasLocation(#20084,#20085) -#20086=* -regexpterm(#20086,14,#20084,0,"a") -#20087=@"loc,{#10000},3,3,3,3" -locations_default(#20087,#10000,3,3,3,3) -hasLocation(#20086,#20087) -regexp_const_value(#20086,"a") -#20088=* -regexpterm(#20088,14,#20084,1,"b") -#20089=@"loc,{#10000},3,4,3,4" -locations_default(#20089,#10000,3,4,3,4) -hasLocation(#20088,#20089) -regexp_const_value(#20088,"b") -#20090=* -regexpterm(#20090,14,#20084,2,"c") -#20091=@"loc,{#10000},3,5,3,5" -locations_default(#20091,#10000,3,5,3,5) -hasLocation(#20090,#20091) -regexp_const_value(#20090,"c") -#20092=* -regexpterm(#20092,14,#20082,1,"&&") -#20093=@"loc,{#10000},3,7,3,8" -locations_default(#20093,#10000,3,7,3,8) -hasLocation(#20092,#20093) -regexp_const_value(#20092,"&&") -#20094=* -regexpterm(#20094,23,#20082,2,"[bcd]") -#20095=@"loc,{#10000},3,9,3,13" -locations_default(#20095,#10000,3,9,3,13) -hasLocation(#20094,#20095) -#20096=* -regexpterm(#20096,14,#20094,0,"b") -#20097=@"loc,{#10000},3,10,3,10" -locations_default(#20097,#10000,3,10,3,10) -hasLocation(#20096,#20097) -regexp_const_value(#20096,"b") -#20098=* -regexpterm(#20098,14,#20094,1,"c") -#20099=@"loc,{#10000},3,11,3,11" -locations_default(#20099,#10000,3,11,3,11) -hasLocation(#20098,#20099) -regexp_const_value(#20098,"c") -#20100=* -regexpterm(#20100,14,#20094,2,"d") -#20101=@"loc,{#10000},3,12,3,12" -locations_default(#20101,#10000,3,12,3,12) -hasLocation(#20100,#20101) -regexp_const_value(#20100,"d") +regexpterm(#20081,23,#20079,0,"[abc]") +#20082=@"loc,{#10000},3,2,3,6" +locations_default(#20082,#10000,3,2,3,6) +hasLocation(#20081,#20082) +#20083=* +regexpterm(#20083,14,#20081,0,"a") +#20084=@"loc,{#10000},3,3,3,3" +locations_default(#20084,#10000,3,3,3,3) +hasLocation(#20083,#20084) +regexp_const_value(#20083,"a") +#20085=* +regexpterm(#20085,14,#20081,1,"b") +#20086=@"loc,{#10000},3,4,3,4" +locations_default(#20086,#10000,3,4,3,4) +hasLocation(#20085,#20086) +regexp_const_value(#20085,"b") +#20087=* +regexpterm(#20087,14,#20081,2,"c") +#20088=@"loc,{#10000},3,5,3,5" +locations_default(#20088,#10000,3,5,3,5) +hasLocation(#20087,#20088) +regexp_const_value(#20087,"c") +#20089=* +regexpterm(#20089,14,#20079,1,"&&") +#20090=@"loc,{#10000},3,7,3,8" +locations_default(#20090,#10000,3,7,3,8) +hasLocation(#20089,#20090) +regexp_const_value(#20089,"&&") +#20091=* +regexpterm(#20091,23,#20079,2,"[bcd]") +#20092=@"loc,{#10000},3,9,3,13" +locations_default(#20092,#10000,3,9,3,13) +hasLocation(#20091,#20092) +#20093=* +regexpterm(#20093,14,#20091,0,"b") +#20094=@"loc,{#10000},3,10,3,10" +locations_default(#20094,#10000,3,10,3,10) +hasLocation(#20093,#20094) +regexp_const_value(#20093,"b") +#20095=* +regexpterm(#20095,14,#20091,1,"c") +#20096=@"loc,{#10000},3,11,3,11" +locations_default(#20096,#10000,3,11,3,11) +hasLocation(#20095,#20096) +regexp_const_value(#20095,"c") +#20097=* +regexpterm(#20097,14,#20091,2,"d") +#20098=@"loc,{#10000},3,12,3,12" +locations_default(#20098,#10000,3,12,3,12) +hasLocation(#20097,#20098) +regexp_const_value(#20097,"d") +#20099=* +stmts(#20099,2,#20001,3,"/[[abc] ... [c]]/v;") +#20100=@"loc,{#10000},7,1,7,23" +locations_default(#20100,#10000,7,1,7,23) +hasLocation(#20099,#20100) +stmt_containers(#20099,#20001) +#20101=* +exprs(#20101,5,#20099,0,"/[[abc] ... &[c]]/v") +hasLocation(#20101,#20043) +enclosing_stmt(#20101,#20099) +expr_containers(#20101,#20001) +literals("/[[abc]&&[bcd]&&[c]]/v","/[[abc]&&[bcd]&&[c]]/v",#20101) #20102=* -stmts(#20102,2,#20001,3,"/[[abc] ... [c]]/v;") -#20103=@"loc,{#10000},7,1,7,23" -locations_default(#20103,#10000,7,1,7,23) +regexpterm(#20102,23,#20101,0,"[[abc]&&[bcd]&&[c]]") +#20103=@"loc,{#10000},7,2,7,20" +locations_default(#20103,#10000,7,2,7,20) hasLocation(#20102,#20103) -stmt_containers(#20102,#20001) #20104=* -exprs(#20104,5,#20102,0,"/[[abc] ... &[c]]/v") -hasLocation(#20104,#20043) -enclosing_stmt(#20104,#20102) -expr_containers(#20104,#20001) -literals("/[[abc]&&[bcd]&&[c]]/v","/[[abc]&&[bcd]&&[c]]/v",#20104) +regexpterm(#20104,29,#20102,0,"[[abc]&&[bcd]&&[c]]") +hasLocation(#20104,#20103) #20105=* -regexpterm(#20105,23,#20104,0,"[[abc]&&[bcd]&&[c]]") -#20106=@"loc,{#10000},7,2,7,20" -locations_default(#20106,#10000,7,2,7,20) +regexpterm(#20105,23,#20104,0,"[abc]") +#20106=@"loc,{#10000},7,3,7,7" +locations_default(#20106,#10000,7,3,7,7) hasLocation(#20105,#20106) #20107=* -regexpterm(#20107,23,#20105,0,"[abc]") -#20108=@"loc,{#10000},7,3,7,7" -locations_default(#20108,#10000,7,3,7,7) +regexpterm(#20107,14,#20105,0,"a") +#20108=@"loc,{#10000},7,4,7,4" +locations_default(#20108,#10000,7,4,7,4) hasLocation(#20107,#20108) +regexp_const_value(#20107,"a") #20109=* -regexpterm(#20109,14,#20107,0,"a") -#20110=@"loc,{#10000},7,4,7,4" -locations_default(#20110,#10000,7,4,7,4) +regexpterm(#20109,14,#20105,1,"b") +#20110=@"loc,{#10000},7,5,7,5" +locations_default(#20110,#10000,7,5,7,5) hasLocation(#20109,#20110) -regexp_const_value(#20109,"a") +regexp_const_value(#20109,"b") #20111=* -regexpterm(#20111,14,#20107,1,"b") -#20112=@"loc,{#10000},7,5,7,5" -locations_default(#20112,#10000,7,5,7,5) +regexpterm(#20111,14,#20105,2,"c") +#20112=@"loc,{#10000},7,6,7,6" +locations_default(#20112,#10000,7,6,7,6) hasLocation(#20111,#20112) -regexp_const_value(#20111,"b") +regexp_const_value(#20111,"c") #20113=* -regexpterm(#20113,14,#20107,2,"c") -#20114=@"loc,{#10000},7,6,7,6" -locations_default(#20114,#10000,7,6,7,6) +regexpterm(#20113,23,#20104,1,"[bcd]") +#20114=@"loc,{#10000},7,10,7,14" +locations_default(#20114,#10000,7,10,7,14) hasLocation(#20113,#20114) -regexp_const_value(#20113,"c") #20115=* -regexpterm(#20115,14,#20105,1,"&") -#20116=@"loc,{#10000},7,8,7,8" -locations_default(#20116,#10000,7,8,7,8) +regexpterm(#20115,14,#20113,0,"b") +#20116=@"loc,{#10000},7,11,7,11" +locations_default(#20116,#10000,7,11,7,11) hasLocation(#20115,#20116) -regexp_const_value(#20115,"&") +regexp_const_value(#20115,"b") #20117=* -regexpterm(#20117,14,#20105,2,"&") -#20118=@"loc,{#10000},7,9,7,9" -locations_default(#20118,#10000,7,9,7,9) +regexpterm(#20117,14,#20113,1,"c") +#20118=@"loc,{#10000},7,12,7,12" +locations_default(#20118,#10000,7,12,7,12) hasLocation(#20117,#20118) -regexp_const_value(#20117,"&") +regexp_const_value(#20117,"c") #20119=* -regexpterm(#20119,23,#20105,3,"[bcd]") -#20120=@"loc,{#10000},7,10,7,14" -locations_default(#20120,#10000,7,10,7,14) +regexpterm(#20119,14,#20113,2,"d") +#20120=@"loc,{#10000},7,13,7,13" +locations_default(#20120,#10000,7,13,7,13) hasLocation(#20119,#20120) +regexp_const_value(#20119,"d") #20121=* -regexpterm(#20121,14,#20119,0,"b") -#20122=@"loc,{#10000},7,11,7,11" -locations_default(#20122,#10000,7,11,7,11) +regexpterm(#20121,23,#20104,2,"[c]") +#20122=@"loc,{#10000},7,17,7,19" +locations_default(#20122,#10000,7,17,7,19) hasLocation(#20121,#20122) -regexp_const_value(#20121,"b") #20123=* -regexpterm(#20123,14,#20119,1,"c") -#20124=@"loc,{#10000},7,12,7,12" -locations_default(#20124,#10000,7,12,7,12) +regexpterm(#20123,14,#20121,0,"c") +#20124=@"loc,{#10000},7,18,7,18" +locations_default(#20124,#10000,7,18,7,18) hasLocation(#20123,#20124) regexp_const_value(#20123,"c") #20125=* -regexpterm(#20125,14,#20119,2,"d") -#20126=@"loc,{#10000},7,13,7,13" -locations_default(#20126,#10000,7,13,7,13) +entry_cfg_node(#20125,#20001) +#20126=@"loc,{#10000},1,1,1,0" +locations_default(#20126,#10000,1,1,1,0) hasLocation(#20125,#20126) -regexp_const_value(#20125,"d") #20127=* -regexpterm(#20127,14,#20105,4,"&") -#20128=@"loc,{#10000},7,15,7,15" -locations_default(#20128,#10000,7,15,7,15) -hasLocation(#20127,#20128) -regexp_const_value(#20127,"&") -#20129=* -regexpterm(#20129,14,#20105,5,"&") -#20130=@"loc,{#10000},7,16,7,16" -locations_default(#20130,#10000,7,16,7,16) -hasLocation(#20129,#20130) -regexp_const_value(#20129,"&") -#20131=* -regexpterm(#20131,23,#20105,6,"[c]") -#20132=@"loc,{#10000},7,17,7,19" -locations_default(#20132,#10000,7,17,7,19) -hasLocation(#20131,#20132) -#20133=* -regexpterm(#20133,14,#20131,0,"c") -#20134=@"loc,{#10000},7,18,7,18" -locations_default(#20134,#10000,7,18,7,18) -hasLocation(#20133,#20134) -regexp_const_value(#20133,"c") -#20135=* -entry_cfg_node(#20135,#20001) -#20136=@"loc,{#10000},1,1,1,0" -locations_default(#20136,#10000,1,1,1,0) -hasLocation(#20135,#20136) -#20137=* -exit_cfg_node(#20137,#20001) -hasLocation(#20137,#20047) -successor(#20102,#20104) -successor(#20104,#20137) -successor(#20079,#20081) -successor(#20081,#20102) -successor(#20074,#20076) -successor(#20076,#20079) +exit_cfg_node(#20127,#20001) +hasLocation(#20127,#20047) +successor(#20099,#20101) +successor(#20101,#20127) +successor(#20076,#20078) +successor(#20078,#20099) +successor(#20071,#20073) +successor(#20073,#20076) successor(#20049,#20051) -successor(#20051,#20074) -successor(#20135,#20049) +successor(#20051,#20071) +successor(#20125,#20049) numlines(#10000,7,4,7) filetype(#10000,"javascript") From ee83c42b7197b7b7fafcf88d541ff1ecdef3795d Mon Sep 17 00:00:00 2001 From: Napalys Date: Sun, 2 Mar 2025 19:27:10 +0100 Subject: [PATCH 09/27] Added test cases for subtraction `--`. --- .../tests/es2024/input/subtraction.js | 3 + .../es2024/output/trap/subtraction.js.trap | 328 ++++++++++++++++++ 2 files changed, 331 insertions(+) create mode 100644 javascript/extractor/tests/es2024/input/subtraction.js create mode 100644 javascript/extractor/tests/es2024/output/trap/subtraction.js.trap diff --git a/javascript/extractor/tests/es2024/input/subtraction.js b/javascript/extractor/tests/es2024/input/subtraction.js new file mode 100644 index 000000000000..918375fb911c --- /dev/null +++ b/javascript/extractor/tests/es2024/input/subtraction.js @@ -0,0 +1,3 @@ +/[\p{Script_Extensions=Greek}--\p{Letter}]/v; +/[[abc]--[cbd]]/v; +/[[abc]--[cbd]--[bde]]/v; diff --git a/javascript/extractor/tests/es2024/output/trap/subtraction.js.trap b/javascript/extractor/tests/es2024/output/trap/subtraction.js.trap new file mode 100644 index 000000000000..7b0fdb9be178 --- /dev/null +++ b/javascript/extractor/tests/es2024/output/trap/subtraction.js.trap @@ -0,0 +1,328 @@ +#10000=@"/subtraction.js;sourcefile" +files(#10000,"/subtraction.js") +#10001=@"/;folder" +folders(#10001,"/") +containerparent(#10001,#10000) +#10002=@"loc,{#10000},0,0,0,0" +locations_default(#10002,#10000,0,0,0,0) +hasLocation(#10000,#10002) +#20000=@"global_scope" +scopes(#20000,0) +#20001=@"script;{#10000},1,1" +#20002=* +lines(#20002,#20001,"/[\p{Script_Extensions=Greek}--\p{Letter}]/v;"," +") +#20003=@"loc,{#10000},1,1,1,45" +locations_default(#20003,#10000,1,1,1,45) +hasLocation(#20002,#20003) +#20004=* +lines(#20004,#20001,"/[[abc]--[cbd]]/v;"," +") +#20005=@"loc,{#10000},2,1,2,18" +locations_default(#20005,#10000,2,1,2,18) +hasLocation(#20004,#20005) +#20006=* +lines(#20006,#20001,"/[[abc]--[cbd]--[bde]]/v;"," +") +#20007=@"loc,{#10000},3,1,3,25" +locations_default(#20007,#10000,3,1,3,25) +hasLocation(#20006,#20007) +numlines(#20001,3,3,0) +#20008=* +tokeninfo(#20008,5,#20001,0,"/[\p{Script_Extensions=Greek}--\p{Letter}]/v") +#20009=@"loc,{#10000},1,1,1,44" +locations_default(#20009,#10000,1,1,1,44) +hasLocation(#20008,#20009) +#20010=* +tokeninfo(#20010,8,#20001,1,";") +#20011=@"loc,{#10000},1,45,1,45" +locations_default(#20011,#10000,1,45,1,45) +hasLocation(#20010,#20011) +#20012=* +tokeninfo(#20012,5,#20001,2,"/[[abc]--[cbd]]/v") +#20013=@"loc,{#10000},2,1,2,17" +locations_default(#20013,#10000,2,1,2,17) +hasLocation(#20012,#20013) +#20014=* +tokeninfo(#20014,8,#20001,3,";") +#20015=@"loc,{#10000},2,18,2,18" +locations_default(#20015,#10000,2,18,2,18) +hasLocation(#20014,#20015) +#20016=* +tokeninfo(#20016,5,#20001,4,"/[[abc]--[cbd]--[bde]]/v") +#20017=@"loc,{#10000},3,1,3,24" +locations_default(#20017,#10000,3,1,3,24) +hasLocation(#20016,#20017) +#20018=* +tokeninfo(#20018,8,#20001,5,";") +#20019=@"loc,{#10000},3,25,3,25" +locations_default(#20019,#10000,3,25,3,25) +hasLocation(#20018,#20019) +#20020=* +tokeninfo(#20020,0,#20001,6,"") +#20021=@"loc,{#10000},4,1,4,0" +locations_default(#20021,#10000,4,1,4,0) +hasLocation(#20020,#20021) +toplevels(#20001,0) +#20022=@"loc,{#10000},1,1,4,0" +locations_default(#20022,#10000,1,1,4,0) +hasLocation(#20001,#20022) +#20023=* +stmts(#20023,2,#20001,0,"/[\p{Sc ... er}]/v;") +hasLocation(#20023,#20003) +stmt_containers(#20023,#20001) +#20024=* +exprs(#20024,5,#20023,0,"/[\p{Sc ... ter}]/v") +hasLocation(#20024,#20009) +enclosing_stmt(#20024,#20023) +expr_containers(#20024,#20001) +literals("/[\p{Script_Extensions=Greek}--\p{Letter}]/v","/[\p{Script_Extensions=Greek}--\p{Letter}]/v",#20024) +#20025=* +regexpterm(#20025,23,#20024,0,"[\p{Script_Extensions=Greek}--\p{Letter}]") +#20026=@"loc,{#10000},1,2,1,42" +locations_default(#20026,#10000,1,2,1,42) +hasLocation(#20025,#20026) +#20027=* +regexpterm(#20027,24,#20025,0,"\p{Script_Extensions=Greek}--") +#20028=@"loc,{#10000},1,3,1,31" +locations_default(#20028,#10000,1,3,1,31) +hasLocation(#20027,#20028) +#20029=* +regexpterm(#20029,27,#20027,0,"\p{Script_Extensions=Greek}") +#20030=@"loc,{#10000},1,3,1,29" +locations_default(#20030,#10000,1,3,1,29) +hasLocation(#20029,#20030) +unicode_property_escapename(#20029,"Script_Extensions") +unicode_property_escapevalue(#20029,"Greek") +#20031=* +regexpterm(#20031,14,#20027,1,"-") +#20032=@"loc,{#10000},1,31,1,31" +locations_default(#20032,#10000,1,31,1,31) +hasLocation(#20031,#20032) +regexp_const_value(#20031,"-") +#20033=* +regexpterm(#20033,27,#20025,1,"\p{Letter}") +#20034=@"loc,{#10000},1,32,1,41" +locations_default(#20034,#10000,1,32,1,41) +hasLocation(#20033,#20034) +unicode_property_escapename(#20033,"Letter") +#20035=* +stmts(#20035,2,#20001,1,"/[[abc]--[cbd]]/v;") +hasLocation(#20035,#20005) +stmt_containers(#20035,#20001) +#20036=* +exprs(#20036,5,#20035,0,"/[[abc]--[cbd]]/v") +hasLocation(#20036,#20013) +enclosing_stmt(#20036,#20035) +expr_containers(#20036,#20001) +literals("/[[abc]--[cbd]]/v","/[[abc]--[cbd]]/v",#20036) +#20037=* +regexpterm(#20037,1,#20036,0,"[[abc]--[cbd]]") +#20038=@"loc,{#10000},2,2,2,15" +locations_default(#20038,#10000,2,2,2,15) +hasLocation(#20037,#20038) +#20039=* +regexpterm(#20039,23,#20037,0,"[[abc]--[cbd]") +#20040=@"loc,{#10000},2,2,2,14" +locations_default(#20040,#10000,2,2,2,14) +hasLocation(#20039,#20040) +#20041=* +regexpterm(#20041,23,#20039,0,"[abc]") +#20042=@"loc,{#10000},2,3,2,7" +locations_default(#20042,#10000,2,3,2,7) +hasLocation(#20041,#20042) +#20043=* +regexpterm(#20043,14,#20041,0,"a") +#20044=@"loc,{#10000},2,4,2,4" +locations_default(#20044,#10000,2,4,2,4) +hasLocation(#20043,#20044) +regexp_const_value(#20043,"a") +#20045=* +regexpterm(#20045,14,#20041,1,"b") +#20046=@"loc,{#10000},2,5,2,5" +locations_default(#20046,#10000,2,5,2,5) +hasLocation(#20045,#20046) +regexp_const_value(#20045,"b") +#20047=* +regexpterm(#20047,14,#20041,2,"c") +#20048=@"loc,{#10000},2,6,2,6" +locations_default(#20048,#10000,2,6,2,6) +hasLocation(#20047,#20048) +regexp_const_value(#20047,"c") +#20049=* +regexpterm(#20049,24,#20039,1,"--[") +#20050=@"loc,{#10000},2,8,2,10" +locations_default(#20050,#10000,2,8,2,10) +hasLocation(#20049,#20050) +#20051=* +regexpterm(#20051,14,#20049,0,"-") +#20052=@"loc,{#10000},2,8,2,8" +locations_default(#20052,#10000,2,8,2,8) +hasLocation(#20051,#20052) +regexp_const_value(#20051,"-") +#20053=* +regexpterm(#20053,14,#20049,1,"[") +#20054=@"loc,{#10000},2,10,2,10" +locations_default(#20054,#10000,2,10,2,10) +hasLocation(#20053,#20054) +regexp_const_value(#20053,"[") +#20055=* +regexpterm(#20055,14,#20039,2,"c") +#20056=@"loc,{#10000},2,11,2,11" +locations_default(#20056,#10000,2,11,2,11) +hasLocation(#20055,#20056) +regexp_const_value(#20055,"c") +#20057=* +regexpterm(#20057,14,#20039,3,"b") +#20058=@"loc,{#10000},2,12,2,12" +locations_default(#20058,#10000,2,12,2,12) +hasLocation(#20057,#20058) +regexp_const_value(#20057,"b") +#20059=* +regexpterm(#20059,14,#20039,4,"d") +#20060=@"loc,{#10000},2,13,2,13" +locations_default(#20060,#10000,2,13,2,13) +hasLocation(#20059,#20060) +regexp_const_value(#20059,"d") +#20061=* +regexpterm(#20061,14,#20037,1,"]") +#20062=@"loc,{#10000},2,15,2,15" +locations_default(#20062,#10000,2,15,2,15) +hasLocation(#20061,#20062) +regexp_const_value(#20061,"]") +#20063=* +regexp_parse_errors(#20063,#20037,"unexpected character") +hasLocation(#20063,#20062) +#20064=* +stmts(#20064,2,#20001,2,"/[[abc] ... de]]/v;") +hasLocation(#20064,#20007) +stmt_containers(#20064,#20001) +#20065=* +exprs(#20065,5,#20064,0,"/[[abc] ... bde]]/v") +hasLocation(#20065,#20017) +enclosing_stmt(#20065,#20064) +expr_containers(#20065,#20001) +literals("/[[abc]--[cbd]--[bde]]/v","/[[abc]--[cbd]--[bde]]/v",#20065) +#20066=* +regexpterm(#20066,1,#20065,0,"[[abc]--[cbd]--[bde]]") +#20067=@"loc,{#10000},3,2,3,22" +locations_default(#20067,#10000,3,2,3,22) +hasLocation(#20066,#20067) +#20068=* +regexpterm(#20068,23,#20066,0,"[[abc]--[cbd]") +#20069=@"loc,{#10000},3,2,3,14" +locations_default(#20069,#10000,3,2,3,14) +hasLocation(#20068,#20069) +#20070=* +regexpterm(#20070,23,#20068,0,"[abc]") +#20071=@"loc,{#10000},3,3,3,7" +locations_default(#20071,#10000,3,3,3,7) +hasLocation(#20070,#20071) +#20072=* +regexpterm(#20072,14,#20070,0,"a") +#20073=@"loc,{#10000},3,4,3,4" +locations_default(#20073,#10000,3,4,3,4) +hasLocation(#20072,#20073) +regexp_const_value(#20072,"a") +#20074=* +regexpterm(#20074,14,#20070,1,"b") +#20075=@"loc,{#10000},3,5,3,5" +locations_default(#20075,#10000,3,5,3,5) +hasLocation(#20074,#20075) +regexp_const_value(#20074,"b") +#20076=* +regexpterm(#20076,14,#20070,2,"c") +#20077=@"loc,{#10000},3,6,3,6" +locations_default(#20077,#10000,3,6,3,6) +hasLocation(#20076,#20077) +regexp_const_value(#20076,"c") +#20078=* +regexpterm(#20078,24,#20068,1,"--[") +#20079=@"loc,{#10000},3,8,3,10" +locations_default(#20079,#10000,3,8,3,10) +hasLocation(#20078,#20079) +#20080=* +regexpterm(#20080,14,#20078,0,"-") +#20081=@"loc,{#10000},3,8,3,8" +locations_default(#20081,#10000,3,8,3,8) +hasLocation(#20080,#20081) +regexp_const_value(#20080,"-") +#20082=* +regexpterm(#20082,14,#20078,1,"[") +#20083=@"loc,{#10000},3,10,3,10" +locations_default(#20083,#10000,3,10,3,10) +hasLocation(#20082,#20083) +regexp_const_value(#20082,"[") +#20084=* +regexpterm(#20084,14,#20068,2,"c") +#20085=@"loc,{#10000},3,11,3,11" +locations_default(#20085,#10000,3,11,3,11) +hasLocation(#20084,#20085) +regexp_const_value(#20084,"c") +#20086=* +regexpterm(#20086,14,#20068,3,"b") +#20087=@"loc,{#10000},3,12,3,12" +locations_default(#20087,#10000,3,12,3,12) +hasLocation(#20086,#20087) +regexp_const_value(#20086,"b") +#20088=* +regexpterm(#20088,14,#20068,4,"d") +#20089=@"loc,{#10000},3,13,3,13" +locations_default(#20089,#10000,3,13,3,13) +hasLocation(#20088,#20089) +regexp_const_value(#20088,"d") +#20090=* +regexpterm(#20090,14,#20066,1,"--") +#20091=@"loc,{#10000},3,15,3,16" +locations_default(#20091,#10000,3,15,3,16) +hasLocation(#20090,#20091) +regexp_const_value(#20090,"--") +#20092=* +regexpterm(#20092,23,#20066,2,"[bde]") +#20093=@"loc,{#10000},3,17,3,21" +locations_default(#20093,#10000,3,17,3,21) +hasLocation(#20092,#20093) +#20094=* +regexpterm(#20094,14,#20092,0,"b") +#20095=@"loc,{#10000},3,18,3,18" +locations_default(#20095,#10000,3,18,3,18) +hasLocation(#20094,#20095) +regexp_const_value(#20094,"b") +#20096=* +regexpterm(#20096,14,#20092,1,"d") +#20097=@"loc,{#10000},3,19,3,19" +locations_default(#20097,#10000,3,19,3,19) +hasLocation(#20096,#20097) +regexp_const_value(#20096,"d") +#20098=* +regexpterm(#20098,14,#20092,2,"e") +#20099=@"loc,{#10000},3,20,3,20" +locations_default(#20099,#10000,3,20,3,20) +hasLocation(#20098,#20099) +regexp_const_value(#20098,"e") +#20100=* +regexpterm(#20100,14,#20066,3,"]") +#20101=@"loc,{#10000},3,22,3,22" +locations_default(#20101,#10000,3,22,3,22) +hasLocation(#20100,#20101) +regexp_const_value(#20100,"]") +#20102=* +regexp_parse_errors(#20102,#20066,"unexpected character") +hasLocation(#20102,#20101) +#20103=* +entry_cfg_node(#20103,#20001) +#20104=@"loc,{#10000},1,1,1,0" +locations_default(#20104,#10000,1,1,1,0) +hasLocation(#20103,#20104) +#20105=* +exit_cfg_node(#20105,#20001) +hasLocation(#20105,#20021) +successor(#20064,#20065) +successor(#20065,#20105) +successor(#20035,#20036) +successor(#20036,#20064) +successor(#20023,#20024) +successor(#20024,#20035) +successor(#20103,#20023) +numlines(#10000,3,3,0) +filetype(#10000,"javascript") From 3664d507727ef46e2a8ed076c39dc057975b4aac Mon Sep 17 00:00:00 2001 From: Napalys Date: Sun, 2 Mar 2025 19:46:56 +0100 Subject: [PATCH 10/27] Added support for `--` subtraction opetor. --- .../ast/regexp/CharacterClassSubtraction.java | 22 ++ .../src/com/semmle/js/ast/regexp/Visitor.java | 2 + .../semmle/js/extractor/RegExpExtractor.java | 10 + .../com/semmle/js/parser/RegExpParser.java | 10 +- .../es2024/output/trap/subtraction.js.trap | 334 +++++++----------- 5 files changed, 180 insertions(+), 198 deletions(-) create mode 100644 javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassSubtraction.java diff --git a/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassSubtraction.java b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassSubtraction.java new file mode 100644 index 000000000000..70ddbfa1b00a --- /dev/null +++ b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassSubtraction.java @@ -0,0 +1,22 @@ +package com.semmle.js.ast.regexp; + +import com.semmle.js.ast.SourceLocation; +import java.util.List; + +public class CharacterClassSubtraction extends RegExpTerm { + private final List subtraction; + + public CharacterClassSubtraction(SourceLocation loc, List subtraction) { + super(loc, "CharacterClassSubtraction"); + this.subtraction = subtraction; + } + + @Override + public void accept(Visitor v) { + v.visit(this); + } + + public List getSubtraction() { + return subtraction; + } +} diff --git a/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java b/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java index 31126b63005a..373cb727c5a6 100644 --- a/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java +++ b/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java @@ -65,4 +65,6 @@ public interface Visitor { public void visit(CharacterClassQuotedString nd); public void visit(CharacterClassIntersection nd); + + public void visit(CharacterClassSubtraction nd); } diff --git a/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java index a226ccc9cc61..8d382466c411 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java @@ -12,6 +12,7 @@ import com.semmle.js.ast.regexp.CharacterClassEscape; import com.semmle.js.ast.regexp.CharacterClassQuotedString; import com.semmle.js.ast.regexp.CharacterClassRange; +import com.semmle.js.ast.regexp.CharacterClassSubtraction; import com.semmle.js.ast.regexp.Constant; import com.semmle.js.ast.regexp.ControlEscape; import com.semmle.js.ast.regexp.ControlLetter; @@ -96,6 +97,7 @@ public RegExpExtractor(TrapWriter trapwriter, LocationManager locationManager) { termkinds.put("UnicodePropertyEscape", 27); termkinds.put("CharacterClassQuotedString", 28); termkinds.put("CharacterClassIntersection", 29); + termkinds.put("CharacterClassSubtraction", 30); } private static final String[] errmsgs = @@ -362,6 +364,14 @@ public void visit(CharacterClassIntersection nd) { for (RegExpTerm element : nd.getIntersections()) visit(element, lbl, i++); } + + @Override + public void visit(CharacterClassSubtraction nd) { + Label lbl = extractTerm(nd, parent, idx); + int i = 0; + for (RegExpTerm element : nd.getSubtraction()) + visit(element, lbl, i++); + } } public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing, String flags) { diff --git a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java index ee5da2b6ba3f..18cfddf0171f 100644 --- a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java +++ b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java @@ -8,6 +8,7 @@ import com.semmle.js.ast.regexp.CharacterClassEscape; import com.semmle.js.ast.regexp.CharacterClassQuotedString; import com.semmle.js.ast.regexp.CharacterClassRange; +import com.semmle.js.ast.regexp.CharacterClassSubtraction; import com.semmle.js.ast.regexp.Constant; import com.semmle.js.ast.regexp.ControlEscape; import com.semmle.js.ast.regexp.ControlLetter; @@ -566,6 +567,7 @@ private RegExpTerm parseCharacterClass() { private enum CharacterClassType { STANDARD, INTERSECTION, + SUBTRACTION, } // ECMA 2024 `v` flag allows nested character classes. @@ -588,6 +590,10 @@ else if (lookahead("&&")) { this.match("&&"); classType = CharacterClassType.INTERSECTION; } + else if (lookahead("--")) { + this.match("--"); + classType = CharacterClassType.SUBTRACTION; + } else { elements.add(this.parseCharacterClassElement()); } @@ -597,6 +603,8 @@ else if (lookahead("&&")) { switch (classType) { case INTERSECTION: return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassIntersection(loc, elements)), inverted)); + case SUBTRACTION: + return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassSubtraction(loc, elements)), inverted)); case STANDARD: default: return this.finishTerm(new CharacterClass(loc, elements, inverted)); @@ -614,7 +622,7 @@ private RegExpTerm parseCharacterClassElement() { return atom; } } - if (!this.lookahead("-]") && this.match("-") && !(atom instanceof CharacterClassEscape)) + if (!this.lookahead("-]") && !this.lookahead("--") && this.match("-") && !(atom instanceof CharacterClassEscape)) return this.finishTerm(new CharacterClassRange(loc, atom, this.parseCharacterClassAtom())); return atom; } diff --git a/javascript/extractor/tests/es2024/output/trap/subtraction.js.trap b/javascript/extractor/tests/es2024/output/trap/subtraction.js.trap index 7b0fdb9be178..ee3dc27da9d5 100644 --- a/javascript/extractor/tests/es2024/output/trap/subtraction.js.trap +++ b/javascript/extractor/tests/es2024/output/trap/subtraction.js.trap @@ -83,246 +83,186 @@ regexpterm(#20025,23,#20024,0,"[\p{Script_Extensions=Greek}--\p{Letter}]") locations_default(#20026,#10000,1,2,1,42) hasLocation(#20025,#20026) #20027=* -regexpterm(#20027,24,#20025,0,"\p{Script_Extensions=Greek}--") -#20028=@"loc,{#10000},1,3,1,31" -locations_default(#20028,#10000,1,3,1,31) -hasLocation(#20027,#20028) -#20029=* -regexpterm(#20029,27,#20027,0,"\p{Script_Extensions=Greek}") -#20030=@"loc,{#10000},1,3,1,29" -locations_default(#20030,#10000,1,3,1,29) -hasLocation(#20029,#20030) -unicode_property_escapename(#20029,"Script_Extensions") -unicode_property_escapevalue(#20029,"Greek") -#20031=* -regexpterm(#20031,14,#20027,1,"-") -#20032=@"loc,{#10000},1,31,1,31" -locations_default(#20032,#10000,1,31,1,31) -hasLocation(#20031,#20032) -regexp_const_value(#20031,"-") +regexpterm(#20027,30,#20025,0,"[\p{Script_Extensions=Greek}--\p{Letter}]") +hasLocation(#20027,#20026) +#20028=* +regexpterm(#20028,27,#20027,0,"\p{Script_Extensions=Greek}") +#20029=@"loc,{#10000},1,3,1,29" +locations_default(#20029,#10000,1,3,1,29) +hasLocation(#20028,#20029) +unicode_property_escapename(#20028,"Script_Extensions") +unicode_property_escapevalue(#20028,"Greek") +#20030=* +regexpterm(#20030,27,#20027,1,"\p{Letter}") +#20031=@"loc,{#10000},1,32,1,41" +locations_default(#20031,#10000,1,32,1,41) +hasLocation(#20030,#20031) +unicode_property_escapename(#20030,"Letter") +#20032=* +stmts(#20032,2,#20001,1,"/[[abc]--[cbd]]/v;") +hasLocation(#20032,#20005) +stmt_containers(#20032,#20001) #20033=* -regexpterm(#20033,27,#20025,1,"\p{Letter}") -#20034=@"loc,{#10000},1,32,1,41" -locations_default(#20034,#10000,1,32,1,41) -hasLocation(#20033,#20034) -unicode_property_escapename(#20033,"Letter") -#20035=* -stmts(#20035,2,#20001,1,"/[[abc]--[cbd]]/v;") -hasLocation(#20035,#20005) -stmt_containers(#20035,#20001) +exprs(#20033,5,#20032,0,"/[[abc]--[cbd]]/v") +hasLocation(#20033,#20013) +enclosing_stmt(#20033,#20032) +expr_containers(#20033,#20001) +literals("/[[abc]--[cbd]]/v","/[[abc]--[cbd]]/v",#20033) +#20034=* +regexpterm(#20034,23,#20033,0,"[[abc]--[cbd]]") +#20035=@"loc,{#10000},2,2,2,15" +locations_default(#20035,#10000,2,2,2,15) +hasLocation(#20034,#20035) #20036=* -exprs(#20036,5,#20035,0,"/[[abc]--[cbd]]/v") -hasLocation(#20036,#20013) -enclosing_stmt(#20036,#20035) -expr_containers(#20036,#20001) -literals("/[[abc]--[cbd]]/v","/[[abc]--[cbd]]/v",#20036) +regexpterm(#20036,30,#20034,0,"[[abc]--[cbd]]") +hasLocation(#20036,#20035) #20037=* -regexpterm(#20037,1,#20036,0,"[[abc]--[cbd]]") -#20038=@"loc,{#10000},2,2,2,15" -locations_default(#20038,#10000,2,2,2,15) +regexpterm(#20037,23,#20036,0,"[abc]") +#20038=@"loc,{#10000},2,3,2,7" +locations_default(#20038,#10000,2,3,2,7) hasLocation(#20037,#20038) #20039=* -regexpterm(#20039,23,#20037,0,"[[abc]--[cbd]") -#20040=@"loc,{#10000},2,2,2,14" -locations_default(#20040,#10000,2,2,2,14) +regexpterm(#20039,14,#20037,0,"a") +#20040=@"loc,{#10000},2,4,2,4" +locations_default(#20040,#10000,2,4,2,4) hasLocation(#20039,#20040) +regexp_const_value(#20039,"a") #20041=* -regexpterm(#20041,23,#20039,0,"[abc]") -#20042=@"loc,{#10000},2,3,2,7" -locations_default(#20042,#10000,2,3,2,7) +regexpterm(#20041,14,#20037,1,"b") +#20042=@"loc,{#10000},2,5,2,5" +locations_default(#20042,#10000,2,5,2,5) hasLocation(#20041,#20042) +regexp_const_value(#20041,"b") #20043=* -regexpterm(#20043,14,#20041,0,"a") -#20044=@"loc,{#10000},2,4,2,4" -locations_default(#20044,#10000,2,4,2,4) +regexpterm(#20043,14,#20037,2,"c") +#20044=@"loc,{#10000},2,6,2,6" +locations_default(#20044,#10000,2,6,2,6) hasLocation(#20043,#20044) -regexp_const_value(#20043,"a") +regexp_const_value(#20043,"c") #20045=* -regexpterm(#20045,14,#20041,1,"b") -#20046=@"loc,{#10000},2,5,2,5" -locations_default(#20046,#10000,2,5,2,5) +regexpterm(#20045,23,#20036,1,"[cbd]") +#20046=@"loc,{#10000},2,10,2,14" +locations_default(#20046,#10000,2,10,2,14) hasLocation(#20045,#20046) -regexp_const_value(#20045,"b") #20047=* -regexpterm(#20047,14,#20041,2,"c") -#20048=@"loc,{#10000},2,6,2,6" -locations_default(#20048,#10000,2,6,2,6) +regexpterm(#20047,14,#20045,0,"c") +#20048=@"loc,{#10000},2,11,2,11" +locations_default(#20048,#10000,2,11,2,11) hasLocation(#20047,#20048) regexp_const_value(#20047,"c") #20049=* -regexpterm(#20049,24,#20039,1,"--[") -#20050=@"loc,{#10000},2,8,2,10" -locations_default(#20050,#10000,2,8,2,10) +regexpterm(#20049,14,#20045,1,"b") +#20050=@"loc,{#10000},2,12,2,12" +locations_default(#20050,#10000,2,12,2,12) hasLocation(#20049,#20050) +regexp_const_value(#20049,"b") #20051=* -regexpterm(#20051,14,#20049,0,"-") -#20052=@"loc,{#10000},2,8,2,8" -locations_default(#20052,#10000,2,8,2,8) +regexpterm(#20051,14,#20045,2,"d") +#20052=@"loc,{#10000},2,13,2,13" +locations_default(#20052,#10000,2,13,2,13) hasLocation(#20051,#20052) -regexp_const_value(#20051,"-") +regexp_const_value(#20051,"d") #20053=* -regexpterm(#20053,14,#20049,1,"[") -#20054=@"loc,{#10000},2,10,2,10" -locations_default(#20054,#10000,2,10,2,10) -hasLocation(#20053,#20054) -regexp_const_value(#20053,"[") +stmts(#20053,2,#20001,2,"/[[abc] ... de]]/v;") +hasLocation(#20053,#20007) +stmt_containers(#20053,#20001) +#20054=* +exprs(#20054,5,#20053,0,"/[[abc] ... bde]]/v") +hasLocation(#20054,#20017) +enclosing_stmt(#20054,#20053) +expr_containers(#20054,#20001) +literals("/[[abc]--[cbd]--[bde]]/v","/[[abc]--[cbd]--[bde]]/v",#20054) #20055=* -regexpterm(#20055,14,#20039,2,"c") -#20056=@"loc,{#10000},2,11,2,11" -locations_default(#20056,#10000,2,11,2,11) +regexpterm(#20055,23,#20054,0,"[[abc]--[cbd]--[bde]]") +#20056=@"loc,{#10000},3,2,3,22" +locations_default(#20056,#10000,3,2,3,22) hasLocation(#20055,#20056) -regexp_const_value(#20055,"c") #20057=* -regexpterm(#20057,14,#20039,3,"b") -#20058=@"loc,{#10000},2,12,2,12" -locations_default(#20058,#10000,2,12,2,12) -hasLocation(#20057,#20058) -regexp_const_value(#20057,"b") -#20059=* -regexpterm(#20059,14,#20039,4,"d") -#20060=@"loc,{#10000},2,13,2,13" -locations_default(#20060,#10000,2,13,2,13) -hasLocation(#20059,#20060) -regexp_const_value(#20059,"d") -#20061=* -regexpterm(#20061,14,#20037,1,"]") -#20062=@"loc,{#10000},2,15,2,15" -locations_default(#20062,#10000,2,15,2,15) -hasLocation(#20061,#20062) -regexp_const_value(#20061,"]") -#20063=* -regexp_parse_errors(#20063,#20037,"unexpected character") -hasLocation(#20063,#20062) +regexpterm(#20057,30,#20055,0,"[[abc]--[cbd]--[bde]]") +hasLocation(#20057,#20056) +#20058=* +regexpterm(#20058,23,#20057,0,"[abc]") +#20059=@"loc,{#10000},3,3,3,7" +locations_default(#20059,#10000,3,3,3,7) +hasLocation(#20058,#20059) +#20060=* +regexpterm(#20060,14,#20058,0,"a") +#20061=@"loc,{#10000},3,4,3,4" +locations_default(#20061,#10000,3,4,3,4) +hasLocation(#20060,#20061) +regexp_const_value(#20060,"a") +#20062=* +regexpterm(#20062,14,#20058,1,"b") +#20063=@"loc,{#10000},3,5,3,5" +locations_default(#20063,#10000,3,5,3,5) +hasLocation(#20062,#20063) +regexp_const_value(#20062,"b") #20064=* -stmts(#20064,2,#20001,2,"/[[abc] ... de]]/v;") -hasLocation(#20064,#20007) -stmt_containers(#20064,#20001) -#20065=* -exprs(#20065,5,#20064,0,"/[[abc] ... bde]]/v") -hasLocation(#20065,#20017) -enclosing_stmt(#20065,#20064) -expr_containers(#20065,#20001) -literals("/[[abc]--[cbd]--[bde]]/v","/[[abc]--[cbd]--[bde]]/v",#20065) +regexpterm(#20064,14,#20058,2,"c") +#20065=@"loc,{#10000},3,6,3,6" +locations_default(#20065,#10000,3,6,3,6) +hasLocation(#20064,#20065) +regexp_const_value(#20064,"c") #20066=* -regexpterm(#20066,1,#20065,0,"[[abc]--[cbd]--[bde]]") -#20067=@"loc,{#10000},3,2,3,22" -locations_default(#20067,#10000,3,2,3,22) +regexpterm(#20066,23,#20057,1,"[cbd]") +#20067=@"loc,{#10000},3,10,3,14" +locations_default(#20067,#10000,3,10,3,14) hasLocation(#20066,#20067) #20068=* -regexpterm(#20068,23,#20066,0,"[[abc]--[cbd]") -#20069=@"loc,{#10000},3,2,3,14" -locations_default(#20069,#10000,3,2,3,14) +regexpterm(#20068,14,#20066,0,"c") +#20069=@"loc,{#10000},3,11,3,11" +locations_default(#20069,#10000,3,11,3,11) hasLocation(#20068,#20069) +regexp_const_value(#20068,"c") #20070=* -regexpterm(#20070,23,#20068,0,"[abc]") -#20071=@"loc,{#10000},3,3,3,7" -locations_default(#20071,#10000,3,3,3,7) +regexpterm(#20070,14,#20066,1,"b") +#20071=@"loc,{#10000},3,12,3,12" +locations_default(#20071,#10000,3,12,3,12) hasLocation(#20070,#20071) +regexp_const_value(#20070,"b") #20072=* -regexpterm(#20072,14,#20070,0,"a") -#20073=@"loc,{#10000},3,4,3,4" -locations_default(#20073,#10000,3,4,3,4) +regexpterm(#20072,14,#20066,2,"d") +#20073=@"loc,{#10000},3,13,3,13" +locations_default(#20073,#10000,3,13,3,13) hasLocation(#20072,#20073) -regexp_const_value(#20072,"a") +regexp_const_value(#20072,"d") #20074=* -regexpterm(#20074,14,#20070,1,"b") -#20075=@"loc,{#10000},3,5,3,5" -locations_default(#20075,#10000,3,5,3,5) +regexpterm(#20074,23,#20057,2,"[bde]") +#20075=@"loc,{#10000},3,17,3,21" +locations_default(#20075,#10000,3,17,3,21) hasLocation(#20074,#20075) -regexp_const_value(#20074,"b") #20076=* -regexpterm(#20076,14,#20070,2,"c") -#20077=@"loc,{#10000},3,6,3,6" -locations_default(#20077,#10000,3,6,3,6) +regexpterm(#20076,14,#20074,0,"b") +#20077=@"loc,{#10000},3,18,3,18" +locations_default(#20077,#10000,3,18,3,18) hasLocation(#20076,#20077) -regexp_const_value(#20076,"c") +regexp_const_value(#20076,"b") #20078=* -regexpterm(#20078,24,#20068,1,"--[") -#20079=@"loc,{#10000},3,8,3,10" -locations_default(#20079,#10000,3,8,3,10) +regexpterm(#20078,14,#20074,1,"d") +#20079=@"loc,{#10000},3,19,3,19" +locations_default(#20079,#10000,3,19,3,19) hasLocation(#20078,#20079) +regexp_const_value(#20078,"d") #20080=* -regexpterm(#20080,14,#20078,0,"-") -#20081=@"loc,{#10000},3,8,3,8" -locations_default(#20081,#10000,3,8,3,8) +regexpterm(#20080,14,#20074,2,"e") +#20081=@"loc,{#10000},3,20,3,20" +locations_default(#20081,#10000,3,20,3,20) hasLocation(#20080,#20081) -regexp_const_value(#20080,"-") +regexp_const_value(#20080,"e") #20082=* -regexpterm(#20082,14,#20078,1,"[") -#20083=@"loc,{#10000},3,10,3,10" -locations_default(#20083,#10000,3,10,3,10) +entry_cfg_node(#20082,#20001) +#20083=@"loc,{#10000},1,1,1,0" +locations_default(#20083,#10000,1,1,1,0) hasLocation(#20082,#20083) -regexp_const_value(#20082,"[") #20084=* -regexpterm(#20084,14,#20068,2,"c") -#20085=@"loc,{#10000},3,11,3,11" -locations_default(#20085,#10000,3,11,3,11) -hasLocation(#20084,#20085) -regexp_const_value(#20084,"c") -#20086=* -regexpterm(#20086,14,#20068,3,"b") -#20087=@"loc,{#10000},3,12,3,12" -locations_default(#20087,#10000,3,12,3,12) -hasLocation(#20086,#20087) -regexp_const_value(#20086,"b") -#20088=* -regexpterm(#20088,14,#20068,4,"d") -#20089=@"loc,{#10000},3,13,3,13" -locations_default(#20089,#10000,3,13,3,13) -hasLocation(#20088,#20089) -regexp_const_value(#20088,"d") -#20090=* -regexpterm(#20090,14,#20066,1,"--") -#20091=@"loc,{#10000},3,15,3,16" -locations_default(#20091,#10000,3,15,3,16) -hasLocation(#20090,#20091) -regexp_const_value(#20090,"--") -#20092=* -regexpterm(#20092,23,#20066,2,"[bde]") -#20093=@"loc,{#10000},3,17,3,21" -locations_default(#20093,#10000,3,17,3,21) -hasLocation(#20092,#20093) -#20094=* -regexpterm(#20094,14,#20092,0,"b") -#20095=@"loc,{#10000},3,18,3,18" -locations_default(#20095,#10000,3,18,3,18) -hasLocation(#20094,#20095) -regexp_const_value(#20094,"b") -#20096=* -regexpterm(#20096,14,#20092,1,"d") -#20097=@"loc,{#10000},3,19,3,19" -locations_default(#20097,#10000,3,19,3,19) -hasLocation(#20096,#20097) -regexp_const_value(#20096,"d") -#20098=* -regexpterm(#20098,14,#20092,2,"e") -#20099=@"loc,{#10000},3,20,3,20" -locations_default(#20099,#10000,3,20,3,20) -hasLocation(#20098,#20099) -regexp_const_value(#20098,"e") -#20100=* -regexpterm(#20100,14,#20066,3,"]") -#20101=@"loc,{#10000},3,22,3,22" -locations_default(#20101,#10000,3,22,3,22) -hasLocation(#20100,#20101) -regexp_const_value(#20100,"]") -#20102=* -regexp_parse_errors(#20102,#20066,"unexpected character") -hasLocation(#20102,#20101) -#20103=* -entry_cfg_node(#20103,#20001) -#20104=@"loc,{#10000},1,1,1,0" -locations_default(#20104,#10000,1,1,1,0) -hasLocation(#20103,#20104) -#20105=* -exit_cfg_node(#20105,#20001) -hasLocation(#20105,#20021) -successor(#20064,#20065) -successor(#20065,#20105) -successor(#20035,#20036) -successor(#20036,#20064) +exit_cfg_node(#20084,#20001) +hasLocation(#20084,#20021) +successor(#20053,#20054) +successor(#20054,#20084) +successor(#20032,#20033) +successor(#20033,#20053) successor(#20023,#20024) -successor(#20024,#20035) -successor(#20103,#20023) +successor(#20024,#20032) +successor(#20082,#20023) numlines(#10000,3,3,0) filetype(#10000,"javascript") From 1e05f327d635b92adbf143fbd67cc4c9f8bcb44b Mon Sep 17 00:00:00 2001 From: Napalys Date: Sun, 2 Mar 2025 19:50:00 +0100 Subject: [PATCH 11/27] Added test cases for union. --- .../extractor/tests/es2024/input/union.js | 6 + .../tests/es2024/output/trap/union.js.trap | 392 ++++++++++++++++++ 2 files changed, 398 insertions(+) create mode 100644 javascript/extractor/tests/es2024/input/union.js create mode 100644 javascript/extractor/tests/es2024/output/trap/union.js.trap diff --git a/javascript/extractor/tests/es2024/input/union.js b/javascript/extractor/tests/es2024/input/union.js new file mode 100644 index 000000000000..9eeab43f43aa --- /dev/null +++ b/javascript/extractor/tests/es2024/input/union.js @@ -0,0 +1,6 @@ +/[\p{Script_Extensions=Greek}\p{RGI_Emoji}]/v; +/[[abc][cbd]]/v; +/[\p{Emoji}\q{a&}byz]/v; +/[\q{\\\}a&}byz]/v; +/[\q{\\}]/v; +/[\q{abc|cbd|\}}]/v; diff --git a/javascript/extractor/tests/es2024/output/trap/union.js.trap b/javascript/extractor/tests/es2024/output/trap/union.js.trap new file mode 100644 index 000000000000..31d98f755e85 --- /dev/null +++ b/javascript/extractor/tests/es2024/output/trap/union.js.trap @@ -0,0 +1,392 @@ +#10000=@"/union.js;sourcefile" +files(#10000,"/union.js") +#10001=@"/;folder" +folders(#10001,"/") +containerparent(#10001,#10000) +#10002=@"loc,{#10000},0,0,0,0" +locations_default(#10002,#10000,0,0,0,0) +hasLocation(#10000,#10002) +#20000=@"global_scope" +scopes(#20000,0) +#20001=@"script;{#10000},1,1" +#20002=* +lines(#20002,#20001,"/[\p{Script_Extensions=Greek}\p{RGI_Emoji}]/v;"," +") +#20003=@"loc,{#10000},1,1,1,46" +locations_default(#20003,#10000,1,1,1,46) +hasLocation(#20002,#20003) +#20004=* +lines(#20004,#20001,"/[[abc][cbd]]/v;"," +") +#20005=@"loc,{#10000},2,1,2,16" +locations_default(#20005,#10000,2,1,2,16) +hasLocation(#20004,#20005) +#20006=* +lines(#20006,#20001,"/[\p{Emoji}\q{a&}byz]/v;"," +") +#20007=@"loc,{#10000},3,1,3,24" +locations_default(#20007,#10000,3,1,3,24) +hasLocation(#20006,#20007) +#20008=* +lines(#20008,#20001,"/[\q{\\\}a&}byz]/v;"," +") +#20009=@"loc,{#10000},4,1,4,19" +locations_default(#20009,#10000,4,1,4,19) +hasLocation(#20008,#20009) +#20010=* +lines(#20010,#20001,"/[\q{\\}]/v;"," +") +#20011=@"loc,{#10000},5,1,5,12" +locations_default(#20011,#10000,5,1,5,12) +hasLocation(#20010,#20011) +#20012=* +lines(#20012,#20001,"/[\q{abc|cbd|\}}]/v;"," +") +#20013=@"loc,{#10000},6,1,6,20" +locations_default(#20013,#10000,6,1,6,20) +hasLocation(#20012,#20013) +numlines(#20001,6,6,0) +#20014=* +tokeninfo(#20014,5,#20001,0,"/[\p{Script_Extensions=Greek}\p{RGI_Emoji}]/v") +#20015=@"loc,{#10000},1,1,1,45" +locations_default(#20015,#10000,1,1,1,45) +hasLocation(#20014,#20015) +#20016=* +tokeninfo(#20016,8,#20001,1,";") +#20017=@"loc,{#10000},1,46,1,46" +locations_default(#20017,#10000,1,46,1,46) +hasLocation(#20016,#20017) +#20018=* +tokeninfo(#20018,5,#20001,2,"/[[abc][cbd]]/v") +#20019=@"loc,{#10000},2,1,2,15" +locations_default(#20019,#10000,2,1,2,15) +hasLocation(#20018,#20019) +#20020=* +tokeninfo(#20020,8,#20001,3,";") +#20021=@"loc,{#10000},2,16,2,16" +locations_default(#20021,#10000,2,16,2,16) +hasLocation(#20020,#20021) +#20022=* +tokeninfo(#20022,5,#20001,4,"/[\p{Emoji}\q{a&}byz]/v") +#20023=@"loc,{#10000},3,1,3,23" +locations_default(#20023,#10000,3,1,3,23) +hasLocation(#20022,#20023) +#20024=* +tokeninfo(#20024,8,#20001,5,";") +#20025=@"loc,{#10000},3,24,3,24" +locations_default(#20025,#10000,3,24,3,24) +hasLocation(#20024,#20025) +#20026=* +tokeninfo(#20026,5,#20001,6,"/[\q{\\\}a&}byz]/v") +#20027=@"loc,{#10000},4,1,4,18" +locations_default(#20027,#10000,4,1,4,18) +hasLocation(#20026,#20027) +#20028=* +tokeninfo(#20028,8,#20001,7,";") +#20029=@"loc,{#10000},4,19,4,19" +locations_default(#20029,#10000,4,19,4,19) +hasLocation(#20028,#20029) +#20030=* +tokeninfo(#20030,5,#20001,8,"/[\q{\\}]/v") +#20031=@"loc,{#10000},5,1,5,11" +locations_default(#20031,#10000,5,1,5,11) +hasLocation(#20030,#20031) +#20032=* +tokeninfo(#20032,8,#20001,9,";") +#20033=@"loc,{#10000},5,12,5,12" +locations_default(#20033,#10000,5,12,5,12) +hasLocation(#20032,#20033) +#20034=* +tokeninfo(#20034,5,#20001,10,"/[\q{abc|cbd|\}}]/v") +#20035=@"loc,{#10000},6,1,6,19" +locations_default(#20035,#10000,6,1,6,19) +hasLocation(#20034,#20035) +#20036=* +tokeninfo(#20036,8,#20001,11,";") +#20037=@"loc,{#10000},6,20,6,20" +locations_default(#20037,#10000,6,20,6,20) +hasLocation(#20036,#20037) +#20038=* +tokeninfo(#20038,0,#20001,12,"") +#20039=@"loc,{#10000},7,1,7,0" +locations_default(#20039,#10000,7,1,7,0) +hasLocation(#20038,#20039) +toplevels(#20001,0) +#20040=@"loc,{#10000},1,1,7,0" +locations_default(#20040,#10000,1,1,7,0) +hasLocation(#20001,#20040) +#20041=* +stmts(#20041,2,#20001,0,"/[\p{Sc ... ji}]/v;") +hasLocation(#20041,#20003) +stmt_containers(#20041,#20001) +#20042=* +exprs(#20042,5,#20041,0,"/[\p{Sc ... oji}]/v") +hasLocation(#20042,#20015) +enclosing_stmt(#20042,#20041) +expr_containers(#20042,#20001) +literals("/[\p{Script_Extensions=Greek}\p{RGI_Emoji}]/v","/[\p{Script_Extensions=Greek}\p{RGI_Emoji}]/v",#20042) +#20043=* +regexpterm(#20043,23,#20042,0,"[\p{Script_Extensions=Greek}\p{RGI_Emoji}]") +#20044=@"loc,{#10000},1,2,1,43" +locations_default(#20044,#10000,1,2,1,43) +hasLocation(#20043,#20044) +#20045=* +regexpterm(#20045,27,#20043,0,"\p{Script_Extensions=Greek}") +#20046=@"loc,{#10000},1,3,1,29" +locations_default(#20046,#10000,1,3,1,29) +hasLocation(#20045,#20046) +unicode_property_escapename(#20045,"Script_Extensions") +unicode_property_escapevalue(#20045,"Greek") +#20047=* +regexpterm(#20047,27,#20043,1,"\p{RGI_Emoji}") +#20048=@"loc,{#10000},1,30,1,42" +locations_default(#20048,#10000,1,30,1,42) +hasLocation(#20047,#20048) +unicode_property_escapename(#20047,"RGI_Emoji") +#20049=* +stmts(#20049,2,#20001,1,"/[[abc][cbd]]/v;") +hasLocation(#20049,#20005) +stmt_containers(#20049,#20001) +#20050=* +exprs(#20050,5,#20049,0,"/[[abc][cbd]]/v") +hasLocation(#20050,#20019) +enclosing_stmt(#20050,#20049) +expr_containers(#20050,#20001) +literals("/[[abc][cbd]]/v","/[[abc][cbd]]/v",#20050) +#20051=* +regexpterm(#20051,23,#20050,0,"[[abc][cbd]]") +#20052=@"loc,{#10000},2,2,2,13" +locations_default(#20052,#10000,2,2,2,13) +hasLocation(#20051,#20052) +#20053=* +regexpterm(#20053,23,#20051,0,"[abc]") +#20054=@"loc,{#10000},2,3,2,7" +locations_default(#20054,#10000,2,3,2,7) +hasLocation(#20053,#20054) +#20055=* +regexpterm(#20055,14,#20053,0,"a") +#20056=@"loc,{#10000},2,4,2,4" +locations_default(#20056,#10000,2,4,2,4) +hasLocation(#20055,#20056) +regexp_const_value(#20055,"a") +#20057=* +regexpterm(#20057,14,#20053,1,"b") +#20058=@"loc,{#10000},2,5,2,5" +locations_default(#20058,#10000,2,5,2,5) +hasLocation(#20057,#20058) +regexp_const_value(#20057,"b") +#20059=* +regexpterm(#20059,14,#20053,2,"c") +#20060=@"loc,{#10000},2,6,2,6" +locations_default(#20060,#10000,2,6,2,6) +hasLocation(#20059,#20060) +regexp_const_value(#20059,"c") +#20061=* +regexpterm(#20061,23,#20051,1,"[cbd]") +#20062=@"loc,{#10000},2,8,2,12" +locations_default(#20062,#10000,2,8,2,12) +hasLocation(#20061,#20062) +#20063=* +regexpterm(#20063,14,#20061,0,"c") +#20064=@"loc,{#10000},2,9,2,9" +locations_default(#20064,#10000,2,9,2,9) +hasLocation(#20063,#20064) +regexp_const_value(#20063,"c") +#20065=* +regexpterm(#20065,14,#20061,1,"b") +#20066=@"loc,{#10000},2,10,2,10" +locations_default(#20066,#10000,2,10,2,10) +hasLocation(#20065,#20066) +regexp_const_value(#20065,"b") +#20067=* +regexpterm(#20067,14,#20061,2,"d") +#20068=@"loc,{#10000},2,11,2,11" +locations_default(#20068,#10000,2,11,2,11) +hasLocation(#20067,#20068) +regexp_const_value(#20067,"d") +#20069=* +stmts(#20069,2,#20001,2,"/[\p{Em ... byz]/v;") +hasLocation(#20069,#20007) +stmt_containers(#20069,#20001) +#20070=* +exprs(#20070,5,#20069,0,"/[\p{Em ... }byz]/v") +hasLocation(#20070,#20023) +enclosing_stmt(#20070,#20069) +expr_containers(#20070,#20001) +literals("/[\p{Emoji}\q{a&}byz]/v","/[\p{Emoji}\q{a&}byz]/v",#20070) +#20071=* +regexpterm(#20071,23,#20070,0,"[\p{Emoji}\q{a&}byz]") +#20072=@"loc,{#10000},3,2,3,21" +locations_default(#20072,#10000,3,2,3,21) +hasLocation(#20071,#20072) +#20073=* +regexpterm(#20073,27,#20071,0,"\p{Emoji}") +#20074=@"loc,{#10000},3,3,3,11" +locations_default(#20074,#10000,3,3,3,11) +hasLocation(#20073,#20074) +unicode_property_escapename(#20073,"Emoji") +#20075=* +regexpterm(#20075,28,#20071,1,"\q{a&}") +#20076=@"loc,{#10000},3,12,3,17" +locations_default(#20076,#10000,3,12,3,17) +hasLocation(#20075,#20076) +#20077=* +regexpterm(#20077,14,#20075,0,"a&") +#20078=@"loc,{#10000},3,15,3,16" +locations_default(#20078,#10000,3,15,3,16) +hasLocation(#20077,#20078) +regexp_const_value(#20077,"a&") +#20079=* +regexpterm(#20079,14,#20071,2,"b") +#20080=@"loc,{#10000},3,18,3,18" +locations_default(#20080,#10000,3,18,3,18) +hasLocation(#20079,#20080) +regexp_const_value(#20079,"b") +#20081=* +regexpterm(#20081,14,#20071,3,"y") +#20082=@"loc,{#10000},3,19,3,19" +locations_default(#20082,#10000,3,19,3,19) +hasLocation(#20081,#20082) +regexp_const_value(#20081,"y") +#20083=* +regexpterm(#20083,14,#20071,4,"z") +#20084=@"loc,{#10000},3,20,3,20" +locations_default(#20084,#10000,3,20,3,20) +hasLocation(#20083,#20084) +regexp_const_value(#20083,"z") +#20085=* +stmts(#20085,2,#20001,3,"/[\q{\\\}a&}byz]/v;") +hasLocation(#20085,#20009) +stmt_containers(#20085,#20001) +#20086=* +exprs(#20086,5,#20085,0,"/[\q{\\\}a&}byz]/v") +hasLocation(#20086,#20027) +enclosing_stmt(#20086,#20085) +expr_containers(#20086,#20001) +literals("/[\q{\\\}a&}byz]/v","/[\q{\\\}a&}byz]/v",#20086) +#20087=* +regexpterm(#20087,23,#20086,0,"[\q{\\\}a&}byz]") +#20088=@"loc,{#10000},4,2,4,16" +locations_default(#20088,#10000,4,2,4,16) +hasLocation(#20087,#20088) +#20089=* +regexpterm(#20089,28,#20087,0,"\q{\\\}a&}") +#20090=@"loc,{#10000},4,3,4,12" +locations_default(#20090,#10000,4,3,4,12) +hasLocation(#20089,#20090) +#20091=* +regexpterm(#20091,14,#20089,0,"\\\}a&") +#20092=@"loc,{#10000},4,6,4,11" +locations_default(#20092,#10000,4,6,4,11) +hasLocation(#20091,#20092) +regexp_const_value(#20091,"\\\}a&") +#20093=* +regexpterm(#20093,14,#20087,1,"b") +#20094=@"loc,{#10000},4,13,4,13" +locations_default(#20094,#10000,4,13,4,13) +hasLocation(#20093,#20094) +regexp_const_value(#20093,"b") +#20095=* +regexpterm(#20095,14,#20087,2,"y") +#20096=@"loc,{#10000},4,14,4,14" +locations_default(#20096,#10000,4,14,4,14) +hasLocation(#20095,#20096) +regexp_const_value(#20095,"y") +#20097=* +regexpterm(#20097,14,#20087,3,"z") +#20098=@"loc,{#10000},4,15,4,15" +locations_default(#20098,#10000,4,15,4,15) +hasLocation(#20097,#20098) +regexp_const_value(#20097,"z") +#20099=* +stmts(#20099,2,#20001,4,"/[\q{\\}]/v;") +hasLocation(#20099,#20011) +stmt_containers(#20099,#20001) +#20100=* +exprs(#20100,5,#20099,0,"/[\q{\\}]/v") +hasLocation(#20100,#20031) +enclosing_stmt(#20100,#20099) +expr_containers(#20100,#20001) +literals("/[\q{\\}]/v","/[\q{\\}]/v",#20100) +#20101=* +regexpterm(#20101,23,#20100,0,"[\q{\\}]") +#20102=@"loc,{#10000},5,2,5,9" +locations_default(#20102,#10000,5,2,5,9) +hasLocation(#20101,#20102) +#20103=* +regexpterm(#20103,28,#20101,0,"\q{\\}") +#20104=@"loc,{#10000},5,3,5,8" +locations_default(#20104,#10000,5,3,5,8) +hasLocation(#20103,#20104) +#20105=* +regexpterm(#20105,14,#20103,0,"\\") +#20106=@"loc,{#10000},5,6,5,7" +locations_default(#20106,#10000,5,6,5,7) +hasLocation(#20105,#20106) +regexp_const_value(#20105,"\\") +#20107=* +stmts(#20107,2,#20001,5,"/[\q{abc|cbd|\}}]/v;") +hasLocation(#20107,#20013) +stmt_containers(#20107,#20001) +#20108=* +exprs(#20108,5,#20107,0,"/[\q{abc|cbd|\}}]/v") +hasLocation(#20108,#20035) +enclosing_stmt(#20108,#20107) +expr_containers(#20108,#20001) +literals("/[\q{abc|cbd|\}}]/v","/[\q{abc|cbd|\}}]/v",#20108) +#20109=* +regexpterm(#20109,23,#20108,0,"[\q{abc|cbd|\}}]") +#20110=@"loc,{#10000},6,2,6,17" +locations_default(#20110,#10000,6,2,6,17) +hasLocation(#20109,#20110) +#20111=* +regexpterm(#20111,28,#20109,0,"\q{abc|cbd|\}}") +#20112=@"loc,{#10000},6,3,6,16" +locations_default(#20112,#10000,6,3,6,16) +hasLocation(#20111,#20112) +#20113=* +regexpterm(#20113,0,#20111,0,"abc|cbd|\}") +#20114=@"loc,{#10000},6,6,6,15" +locations_default(#20114,#10000,6,6,6,15) +hasLocation(#20113,#20114) +#20115=* +regexpterm(#20115,14,#20113,0,"abc") +#20116=@"loc,{#10000},6,6,6,8" +locations_default(#20116,#10000,6,6,6,8) +hasLocation(#20115,#20116) +regexp_const_value(#20115,"abc") +#20117=* +regexpterm(#20117,14,#20113,1,"cbd") +#20118=@"loc,{#10000},6,10,6,12" +locations_default(#20118,#10000,6,10,6,12) +hasLocation(#20117,#20118) +regexp_const_value(#20117,"cbd") +#20119=* +regexpterm(#20119,14,#20113,2,"\}") +#20120=@"loc,{#10000},6,14,6,15" +locations_default(#20120,#10000,6,14,6,15) +hasLocation(#20119,#20120) +regexp_const_value(#20119,"\}") +#20121=* +entry_cfg_node(#20121,#20001) +#20122=@"loc,{#10000},1,1,1,0" +locations_default(#20122,#10000,1,1,1,0) +hasLocation(#20121,#20122) +#20123=* +exit_cfg_node(#20123,#20001) +hasLocation(#20123,#20039) +successor(#20107,#20108) +successor(#20108,#20123) +successor(#20099,#20100) +successor(#20100,#20107) +successor(#20085,#20086) +successor(#20086,#20099) +successor(#20069,#20070) +successor(#20070,#20085) +successor(#20049,#20050) +successor(#20050,#20069) +successor(#20041,#20042) +successor(#20042,#20049) +successor(#20121,#20041) +numlines(#10000,6,6,0) +filetype(#10000,"javascript") From fe6de2f672dfd9408ecd585f5aa6cc76a7e25dcc Mon Sep 17 00:00:00 2001 From: Napalys Date: Mon, 3 Mar 2025 08:26:48 +0100 Subject: [PATCH 12/27] Added support for character class union in regex processing --- .../js/ast/regexp/CharacterClassUnion.java | 22 + .../src/com/semmle/js/ast/regexp/Visitor.java | 2 + .../semmle/js/extractor/RegExpExtractor.java | 10 + .../com/semmle/js/parser/RegExpParser.java | 16 + .../trap/regex_nested_character_class.js.trap | 108 ++--- .../tests/es2024/output/trap/union.js.trap | 424 +++++++++--------- 6 files changed, 325 insertions(+), 257 deletions(-) create mode 100644 javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassUnion.java diff --git a/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassUnion.java b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassUnion.java new file mode 100644 index 000000000000..7d3ec4510d52 --- /dev/null +++ b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassUnion.java @@ -0,0 +1,22 @@ +package com.semmle.js.ast.regexp; + +import com.semmle.js.ast.SourceLocation; +import java.util.List; + +public class CharacterClassUnion extends RegExpTerm { + private final List union; + + public CharacterClassUnion(SourceLocation loc, List union) { + super(loc, "CharacterClassUnion"); + this.union = union; + } + + @Override + public void accept(Visitor v) { + v.visit(this); + } + + public List getUnion() { + return union; + } +} diff --git a/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java b/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java index 373cb727c5a6..45f737794884 100644 --- a/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java +++ b/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java @@ -67,4 +67,6 @@ public interface Visitor { public void visit(CharacterClassIntersection nd); public void visit(CharacterClassSubtraction nd); + + public void visit(CharacterClassUnion nd); } diff --git a/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java index 8d382466c411..4fbf1b4f6cf6 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java @@ -13,6 +13,7 @@ import com.semmle.js.ast.regexp.CharacterClassQuotedString; import com.semmle.js.ast.regexp.CharacterClassRange; import com.semmle.js.ast.regexp.CharacterClassSubtraction; +import com.semmle.js.ast.regexp.CharacterClassUnion; import com.semmle.js.ast.regexp.Constant; import com.semmle.js.ast.regexp.ControlEscape; import com.semmle.js.ast.regexp.ControlLetter; @@ -98,6 +99,7 @@ public RegExpExtractor(TrapWriter trapwriter, LocationManager locationManager) { termkinds.put("CharacterClassQuotedString", 28); termkinds.put("CharacterClassIntersection", 29); termkinds.put("CharacterClassSubtraction", 30); + termkinds.put("CharacterClassUnion", 31); } private static final String[] errmsgs = @@ -372,6 +374,14 @@ public void visit(CharacterClassSubtraction nd) { for (RegExpTerm element : nd.getSubtraction()) visit(element, lbl, i++); } + + @Override + public void visit(CharacterClassUnion nd) { + Label lbl = extractTerm(nd, parent, idx); + int i = 0; + for (RegExpTerm element : nd.getUnion()) + visit(element, lbl, i++); + } } public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing, String flags) { diff --git a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java index 18cfddf0171f..70b3487d88b2 100644 --- a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java +++ b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java @@ -9,6 +9,7 @@ import com.semmle.js.ast.regexp.CharacterClassQuotedString; import com.semmle.js.ast.regexp.CharacterClassRange; import com.semmle.js.ast.regexp.CharacterClassSubtraction; +import com.semmle.js.ast.regexp.CharacterClassUnion; import com.semmle.js.ast.regexp.Constant; import com.semmle.js.ast.regexp.ControlEscape; import com.semmle.js.ast.regexp.ControlLetter; @@ -568,6 +569,7 @@ private enum CharacterClassType { STANDARD, INTERSECTION, SUBTRACTION, + UNION } // ECMA 2024 `v` flag allows nested character classes. @@ -599,12 +601,26 @@ else if (lookahead("--")) { } } + boolean containsComplex = elements.stream().anyMatch(term -> term instanceof UnicodePropertyEscape || + term instanceof CharacterClassQuotedString || + term instanceof CharacterClass); + + // Set type to UNION only if: + // 1. We haven't already determined a specific type (intersection/subtraction) + // 2. We have more than one element + // 3. We have at least one complex element (i.e. a nested character class or a UnicodePropertyEscape) + if (containsComplex && classType == CharacterClassType.STANDARD && elements.size() > 1) { + classType = CharacterClassType.UNION; + } + // Create appropriate RegExpTerm based on the detected class type switch (classType) { case INTERSECTION: return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassIntersection(loc, elements)), inverted)); case SUBTRACTION: return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassSubtraction(loc, elements)), inverted)); + case UNION: + return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassUnion(loc, elements)), inverted)); case STANDARD: default: return this.finishTerm(new CharacterClass(loc, elements, inverted)); diff --git a/javascript/extractor/tests/es2024/output/trap/regex_nested_character_class.js.trap b/javascript/extractor/tests/es2024/output/trap/regex_nested_character_class.js.trap index c3bc9112cc56..25ba1d8bb6db 100644 --- a/javascript/extractor/tests/es2024/output/trap/regex_nested_character_class.js.trap +++ b/javascript/extractor/tests/es2024/output/trap/regex_nested_character_class.js.trap @@ -137,75 +137,81 @@ regexpterm(#20042,23,#20041,0,"[ [] [ [] [] ] ]") locations_default(#20043,#10000,3,2,3,17) hasLocation(#20042,#20043) #20044=* -regexpterm(#20044,14,#20042,0," ") -#20045=@"loc,{#10000},3,3,3,3" -locations_default(#20045,#10000,3,3,3,3) -hasLocation(#20044,#20045) -regexp_const_value(#20044," ") -#20046=* -regexpterm(#20046,23,#20042,1,"[]") -#20047=@"loc,{#10000},3,4,3,5" -locations_default(#20047,#10000,3,4,3,5) -hasLocation(#20046,#20047) -#20048=* -regexpterm(#20048,14,#20042,2," ") -#20049=@"loc,{#10000},3,6,3,6" -locations_default(#20049,#10000,3,6,3,6) -hasLocation(#20048,#20049) -regexp_const_value(#20048," ") -#20050=* -regexpterm(#20050,23,#20042,3,"[ [] [] ]") -#20051=@"loc,{#10000},3,7,3,15" -locations_default(#20051,#10000,3,7,3,15) -hasLocation(#20050,#20051) -#20052=* -regexpterm(#20052,14,#20050,0," ") -#20053=@"loc,{#10000},3,8,3,8" -locations_default(#20053,#10000,3,8,3,8) -hasLocation(#20052,#20053) -regexp_const_value(#20052," ") +regexpterm(#20044,31,#20042,0,"[ [] [ [] [] ] ]") +hasLocation(#20044,#20043) +#20045=* +regexpterm(#20045,14,#20044,0," ") +#20046=@"loc,{#10000},3,3,3,3" +locations_default(#20046,#10000,3,3,3,3) +hasLocation(#20045,#20046) +regexp_const_value(#20045," ") +#20047=* +regexpterm(#20047,23,#20044,1,"[]") +#20048=@"loc,{#10000},3,4,3,5" +locations_default(#20048,#10000,3,4,3,5) +hasLocation(#20047,#20048) +#20049=* +regexpterm(#20049,14,#20044,2," ") +#20050=@"loc,{#10000},3,6,3,6" +locations_default(#20050,#10000,3,6,3,6) +hasLocation(#20049,#20050) +regexp_const_value(#20049," ") +#20051=* +regexpterm(#20051,23,#20044,3,"[ [] [] ]") +#20052=@"loc,{#10000},3,7,3,15" +locations_default(#20052,#10000,3,7,3,15) +hasLocation(#20051,#20052) +#20053=* +regexpterm(#20053,31,#20051,0,"[ [] [] ]") +hasLocation(#20053,#20052) #20054=* -regexpterm(#20054,23,#20050,1,"[]") -#20055=@"loc,{#10000},3,9,3,10" -locations_default(#20055,#10000,3,9,3,10) +regexpterm(#20054,14,#20053,0," ") +#20055=@"loc,{#10000},3,8,3,8" +locations_default(#20055,#10000,3,8,3,8) hasLocation(#20054,#20055) +regexp_const_value(#20054," ") #20056=* -regexpterm(#20056,14,#20050,2," ") -#20057=@"loc,{#10000},3,11,3,11" -locations_default(#20057,#10000,3,11,3,11) +regexpterm(#20056,23,#20053,1,"[]") +#20057=@"loc,{#10000},3,9,3,10" +locations_default(#20057,#10000,3,9,3,10) hasLocation(#20056,#20057) -regexp_const_value(#20056," ") #20058=* -regexpterm(#20058,23,#20050,3,"[]") -#20059=@"loc,{#10000},3,12,3,13" -locations_default(#20059,#10000,3,12,3,13) +regexpterm(#20058,14,#20053,2," ") +#20059=@"loc,{#10000},3,11,3,11" +locations_default(#20059,#10000,3,11,3,11) hasLocation(#20058,#20059) +regexp_const_value(#20058," ") #20060=* -regexpterm(#20060,14,#20050,4," ") -#20061=@"loc,{#10000},3,14,3,14" -locations_default(#20061,#10000,3,14,3,14) +regexpterm(#20060,23,#20053,3,"[]") +#20061=@"loc,{#10000},3,12,3,13" +locations_default(#20061,#10000,3,12,3,13) hasLocation(#20060,#20061) -regexp_const_value(#20060," ") #20062=* -regexpterm(#20062,14,#20042,4," ") -#20063=@"loc,{#10000},3,16,3,16" -locations_default(#20063,#10000,3,16,3,16) +regexpterm(#20062,14,#20053,4," ") +#20063=@"loc,{#10000},3,14,3,14" +locations_default(#20063,#10000,3,14,3,14) hasLocation(#20062,#20063) regexp_const_value(#20062," ") #20064=* -entry_cfg_node(#20064,#20001) -#20065=@"loc,{#10000},1,1,1,0" -locations_default(#20065,#10000,1,1,1,0) +regexpterm(#20064,14,#20044,4," ") +#20065=@"loc,{#10000},3,16,3,16" +locations_default(#20065,#10000,3,16,3,16) hasLocation(#20064,#20065) +regexp_const_value(#20064," ") #20066=* -exit_cfg_node(#20066,#20001) -hasLocation(#20066,#20023) +entry_cfg_node(#20066,#20001) +#20067=@"loc,{#10000},1,1,1,0" +locations_default(#20067,#10000,1,1,1,0) +hasLocation(#20066,#20067) +#20068=* +exit_cfg_node(#20068,#20001) +hasLocation(#20068,#20023) successor(#20040,#20041) -successor(#20041,#20066) +successor(#20041,#20068) successor(#20032,#20033) successor(#20033,#20040) successor(#20025,#20027) successor(#20027,#20032) -successor(#20064,#20025) +successor(#20066,#20025) numlines(#10000,3,3,1) filetype(#10000,"javascript") diff --git a/javascript/extractor/tests/es2024/output/trap/union.js.trap b/javascript/extractor/tests/es2024/output/trap/union.js.trap index 31d98f755e85..32674c50e53b 100644 --- a/javascript/extractor/tests/es2024/output/trap/union.js.trap +++ b/javascript/extractor/tests/es2024/output/trap/union.js.trap @@ -131,262 +131,274 @@ regexpterm(#20043,23,#20042,0,"[\p{Script_Extensions=Greek}\p{RGI_Emoji}]") locations_default(#20044,#10000,1,2,1,43) hasLocation(#20043,#20044) #20045=* -regexpterm(#20045,27,#20043,0,"\p{Script_Extensions=Greek}") -#20046=@"loc,{#10000},1,3,1,29" -locations_default(#20046,#10000,1,3,1,29) -hasLocation(#20045,#20046) -unicode_property_escapename(#20045,"Script_Extensions") -unicode_property_escapevalue(#20045,"Greek") -#20047=* -regexpterm(#20047,27,#20043,1,"\p{RGI_Emoji}") -#20048=@"loc,{#10000},1,30,1,42" -locations_default(#20048,#10000,1,30,1,42) -hasLocation(#20047,#20048) -unicode_property_escapename(#20047,"RGI_Emoji") -#20049=* -stmts(#20049,2,#20001,1,"/[[abc][cbd]]/v;") -hasLocation(#20049,#20005) -stmt_containers(#20049,#20001) +regexpterm(#20045,31,#20043,0,"[\p{Script_Extensions=Greek}\p{RGI_Emoji}]") +hasLocation(#20045,#20044) +#20046=* +regexpterm(#20046,27,#20045,0,"\p{Script_Extensions=Greek}") +#20047=@"loc,{#10000},1,3,1,29" +locations_default(#20047,#10000,1,3,1,29) +hasLocation(#20046,#20047) +unicode_property_escapename(#20046,"Script_Extensions") +unicode_property_escapevalue(#20046,"Greek") +#20048=* +regexpterm(#20048,27,#20045,1,"\p{RGI_Emoji}") +#20049=@"loc,{#10000},1,30,1,42" +locations_default(#20049,#10000,1,30,1,42) +hasLocation(#20048,#20049) +unicode_property_escapename(#20048,"RGI_Emoji") #20050=* -exprs(#20050,5,#20049,0,"/[[abc][cbd]]/v") -hasLocation(#20050,#20019) -enclosing_stmt(#20050,#20049) -expr_containers(#20050,#20001) -literals("/[[abc][cbd]]/v","/[[abc][cbd]]/v",#20050) +stmts(#20050,2,#20001,1,"/[[abc][cbd]]/v;") +hasLocation(#20050,#20005) +stmt_containers(#20050,#20001) #20051=* -regexpterm(#20051,23,#20050,0,"[[abc][cbd]]") -#20052=@"loc,{#10000},2,2,2,13" -locations_default(#20052,#10000,2,2,2,13) -hasLocation(#20051,#20052) -#20053=* -regexpterm(#20053,23,#20051,0,"[abc]") -#20054=@"loc,{#10000},2,3,2,7" -locations_default(#20054,#10000,2,3,2,7) -hasLocation(#20053,#20054) +exprs(#20051,5,#20050,0,"/[[abc][cbd]]/v") +hasLocation(#20051,#20019) +enclosing_stmt(#20051,#20050) +expr_containers(#20051,#20001) +literals("/[[abc][cbd]]/v","/[[abc][cbd]]/v",#20051) +#20052=* +regexpterm(#20052,23,#20051,0,"[[abc][cbd]]") +#20053=@"loc,{#10000},2,2,2,13" +locations_default(#20053,#10000,2,2,2,13) +hasLocation(#20052,#20053) +#20054=* +regexpterm(#20054,31,#20052,0,"[[abc][cbd]]") +hasLocation(#20054,#20053) #20055=* -regexpterm(#20055,14,#20053,0,"a") -#20056=@"loc,{#10000},2,4,2,4" -locations_default(#20056,#10000,2,4,2,4) +regexpterm(#20055,23,#20054,0,"[abc]") +#20056=@"loc,{#10000},2,3,2,7" +locations_default(#20056,#10000,2,3,2,7) hasLocation(#20055,#20056) -regexp_const_value(#20055,"a") #20057=* -regexpterm(#20057,14,#20053,1,"b") -#20058=@"loc,{#10000},2,5,2,5" -locations_default(#20058,#10000,2,5,2,5) +regexpterm(#20057,14,#20055,0,"a") +#20058=@"loc,{#10000},2,4,2,4" +locations_default(#20058,#10000,2,4,2,4) hasLocation(#20057,#20058) -regexp_const_value(#20057,"b") +regexp_const_value(#20057,"a") #20059=* -regexpterm(#20059,14,#20053,2,"c") -#20060=@"loc,{#10000},2,6,2,6" -locations_default(#20060,#10000,2,6,2,6) +regexpterm(#20059,14,#20055,1,"b") +#20060=@"loc,{#10000},2,5,2,5" +locations_default(#20060,#10000,2,5,2,5) hasLocation(#20059,#20060) -regexp_const_value(#20059,"c") +regexp_const_value(#20059,"b") #20061=* -regexpterm(#20061,23,#20051,1,"[cbd]") -#20062=@"loc,{#10000},2,8,2,12" -locations_default(#20062,#10000,2,8,2,12) +regexpterm(#20061,14,#20055,2,"c") +#20062=@"loc,{#10000},2,6,2,6" +locations_default(#20062,#10000,2,6,2,6) hasLocation(#20061,#20062) +regexp_const_value(#20061,"c") #20063=* -regexpterm(#20063,14,#20061,0,"c") -#20064=@"loc,{#10000},2,9,2,9" -locations_default(#20064,#10000,2,9,2,9) +regexpterm(#20063,23,#20054,1,"[cbd]") +#20064=@"loc,{#10000},2,8,2,12" +locations_default(#20064,#10000,2,8,2,12) hasLocation(#20063,#20064) -regexp_const_value(#20063,"c") #20065=* -regexpterm(#20065,14,#20061,1,"b") -#20066=@"loc,{#10000},2,10,2,10" -locations_default(#20066,#10000,2,10,2,10) +regexpterm(#20065,14,#20063,0,"c") +#20066=@"loc,{#10000},2,9,2,9" +locations_default(#20066,#10000,2,9,2,9) hasLocation(#20065,#20066) -regexp_const_value(#20065,"b") +regexp_const_value(#20065,"c") #20067=* -regexpterm(#20067,14,#20061,2,"d") -#20068=@"loc,{#10000},2,11,2,11" -locations_default(#20068,#10000,2,11,2,11) +regexpterm(#20067,14,#20063,1,"b") +#20068=@"loc,{#10000},2,10,2,10" +locations_default(#20068,#10000,2,10,2,10) hasLocation(#20067,#20068) -regexp_const_value(#20067,"d") +regexp_const_value(#20067,"b") #20069=* -stmts(#20069,2,#20001,2,"/[\p{Em ... byz]/v;") -hasLocation(#20069,#20007) -stmt_containers(#20069,#20001) -#20070=* -exprs(#20070,5,#20069,0,"/[\p{Em ... }byz]/v") -hasLocation(#20070,#20023) -enclosing_stmt(#20070,#20069) -expr_containers(#20070,#20001) -literals("/[\p{Emoji}\q{a&}byz]/v","/[\p{Emoji}\q{a&}byz]/v",#20070) +regexpterm(#20069,14,#20063,2,"d") +#20070=@"loc,{#10000},2,11,2,11" +locations_default(#20070,#10000,2,11,2,11) +hasLocation(#20069,#20070) +regexp_const_value(#20069,"d") #20071=* -regexpterm(#20071,23,#20070,0,"[\p{Emoji}\q{a&}byz]") -#20072=@"loc,{#10000},3,2,3,21" -locations_default(#20072,#10000,3,2,3,21) -hasLocation(#20071,#20072) +stmts(#20071,2,#20001,2,"/[\p{Em ... byz]/v;") +hasLocation(#20071,#20007) +stmt_containers(#20071,#20001) +#20072=* +exprs(#20072,5,#20071,0,"/[\p{Em ... }byz]/v") +hasLocation(#20072,#20023) +enclosing_stmt(#20072,#20071) +expr_containers(#20072,#20001) +literals("/[\p{Emoji}\q{a&}byz]/v","/[\p{Emoji}\q{a&}byz]/v",#20072) #20073=* -regexpterm(#20073,27,#20071,0,"\p{Emoji}") -#20074=@"loc,{#10000},3,3,3,11" -locations_default(#20074,#10000,3,3,3,11) +regexpterm(#20073,23,#20072,0,"[\p{Emoji}\q{a&}byz]") +#20074=@"loc,{#10000},3,2,3,21" +locations_default(#20074,#10000,3,2,3,21) hasLocation(#20073,#20074) -unicode_property_escapename(#20073,"Emoji") #20075=* -regexpterm(#20075,28,#20071,1,"\q{a&}") -#20076=@"loc,{#10000},3,12,3,17" -locations_default(#20076,#10000,3,12,3,17) -hasLocation(#20075,#20076) -#20077=* -regexpterm(#20077,14,#20075,0,"a&") -#20078=@"loc,{#10000},3,15,3,16" -locations_default(#20078,#10000,3,15,3,16) -hasLocation(#20077,#20078) -regexp_const_value(#20077,"a&") -#20079=* -regexpterm(#20079,14,#20071,2,"b") -#20080=@"loc,{#10000},3,18,3,18" -locations_default(#20080,#10000,3,18,3,18) -hasLocation(#20079,#20080) -regexp_const_value(#20079,"b") -#20081=* -regexpterm(#20081,14,#20071,3,"y") -#20082=@"loc,{#10000},3,19,3,19" -locations_default(#20082,#10000,3,19,3,19) -hasLocation(#20081,#20082) -regexp_const_value(#20081,"y") -#20083=* -regexpterm(#20083,14,#20071,4,"z") -#20084=@"loc,{#10000},3,20,3,20" -locations_default(#20084,#10000,3,20,3,20) -hasLocation(#20083,#20084) -regexp_const_value(#20083,"z") -#20085=* -stmts(#20085,2,#20001,3,"/[\q{\\\}a&}byz]/v;") -hasLocation(#20085,#20009) -stmt_containers(#20085,#20001) +regexpterm(#20075,31,#20073,0,"[\p{Emoji}\q{a&}byz]") +hasLocation(#20075,#20074) +#20076=* +regexpterm(#20076,27,#20075,0,"\p{Emoji}") +#20077=@"loc,{#10000},3,3,3,11" +locations_default(#20077,#10000,3,3,3,11) +hasLocation(#20076,#20077) +unicode_property_escapename(#20076,"Emoji") +#20078=* +regexpterm(#20078,28,#20075,1,"\q{a&}") +#20079=@"loc,{#10000},3,12,3,17" +locations_default(#20079,#10000,3,12,3,17) +hasLocation(#20078,#20079) +#20080=* +regexpterm(#20080,14,#20078,0,"a&") +#20081=@"loc,{#10000},3,15,3,16" +locations_default(#20081,#10000,3,15,3,16) +hasLocation(#20080,#20081) +regexp_const_value(#20080,"a&") +#20082=* +regexpterm(#20082,14,#20075,2,"b") +#20083=@"loc,{#10000},3,18,3,18" +locations_default(#20083,#10000,3,18,3,18) +hasLocation(#20082,#20083) +regexp_const_value(#20082,"b") +#20084=* +regexpterm(#20084,14,#20075,3,"y") +#20085=@"loc,{#10000},3,19,3,19" +locations_default(#20085,#10000,3,19,3,19) +hasLocation(#20084,#20085) +regexp_const_value(#20084,"y") #20086=* -exprs(#20086,5,#20085,0,"/[\q{\\\}a&}byz]/v") -hasLocation(#20086,#20027) -enclosing_stmt(#20086,#20085) -expr_containers(#20086,#20001) -literals("/[\q{\\\}a&}byz]/v","/[\q{\\\}a&}byz]/v",#20086) -#20087=* -regexpterm(#20087,23,#20086,0,"[\q{\\\}a&}byz]") -#20088=@"loc,{#10000},4,2,4,16" -locations_default(#20088,#10000,4,2,4,16) -hasLocation(#20087,#20088) +regexpterm(#20086,14,#20075,4,"z") +#20087=@"loc,{#10000},3,20,3,20" +locations_default(#20087,#10000,3,20,3,20) +hasLocation(#20086,#20087) +regexp_const_value(#20086,"z") +#20088=* +stmts(#20088,2,#20001,3,"/[\q{\\\}a&}byz]/v;") +hasLocation(#20088,#20009) +stmt_containers(#20088,#20001) #20089=* -regexpterm(#20089,28,#20087,0,"\q{\\\}a&}") -#20090=@"loc,{#10000},4,3,4,12" -locations_default(#20090,#10000,4,3,4,12) -hasLocation(#20089,#20090) -#20091=* -regexpterm(#20091,14,#20089,0,"\\\}a&") -#20092=@"loc,{#10000},4,6,4,11" -locations_default(#20092,#10000,4,6,4,11) -hasLocation(#20091,#20092) -regexp_const_value(#20091,"\\\}a&") +exprs(#20089,5,#20088,0,"/[\q{\\\}a&}byz]/v") +hasLocation(#20089,#20027) +enclosing_stmt(#20089,#20088) +expr_containers(#20089,#20001) +literals("/[\q{\\\}a&}byz]/v","/[\q{\\\}a&}byz]/v",#20089) +#20090=* +regexpterm(#20090,23,#20089,0,"[\q{\\\}a&}byz]") +#20091=@"loc,{#10000},4,2,4,16" +locations_default(#20091,#10000,4,2,4,16) +hasLocation(#20090,#20091) +#20092=* +regexpterm(#20092,31,#20090,0,"[\q{\\\}a&}byz]") +hasLocation(#20092,#20091) #20093=* -regexpterm(#20093,14,#20087,1,"b") -#20094=@"loc,{#10000},4,13,4,13" -locations_default(#20094,#10000,4,13,4,13) +regexpterm(#20093,28,#20092,0,"\q{\\\}a&}") +#20094=@"loc,{#10000},4,3,4,12" +locations_default(#20094,#10000,4,3,4,12) hasLocation(#20093,#20094) -regexp_const_value(#20093,"b") #20095=* -regexpterm(#20095,14,#20087,2,"y") -#20096=@"loc,{#10000},4,14,4,14" -locations_default(#20096,#10000,4,14,4,14) +regexpterm(#20095,14,#20093,0,"\\\}a&") +#20096=@"loc,{#10000},4,6,4,11" +locations_default(#20096,#10000,4,6,4,11) hasLocation(#20095,#20096) -regexp_const_value(#20095,"y") +regexp_const_value(#20095,"\\\}a&") #20097=* -regexpterm(#20097,14,#20087,3,"z") -#20098=@"loc,{#10000},4,15,4,15" -locations_default(#20098,#10000,4,15,4,15) +regexpterm(#20097,14,#20092,1,"b") +#20098=@"loc,{#10000},4,13,4,13" +locations_default(#20098,#10000,4,13,4,13) hasLocation(#20097,#20098) -regexp_const_value(#20097,"z") +regexp_const_value(#20097,"b") #20099=* -stmts(#20099,2,#20001,4,"/[\q{\\}]/v;") -hasLocation(#20099,#20011) -stmt_containers(#20099,#20001) -#20100=* -exprs(#20100,5,#20099,0,"/[\q{\\}]/v") -hasLocation(#20100,#20031) -enclosing_stmt(#20100,#20099) -expr_containers(#20100,#20001) -literals("/[\q{\\}]/v","/[\q{\\}]/v",#20100) +regexpterm(#20099,14,#20092,2,"y") +#20100=@"loc,{#10000},4,14,4,14" +locations_default(#20100,#10000,4,14,4,14) +hasLocation(#20099,#20100) +regexp_const_value(#20099,"y") #20101=* -regexpterm(#20101,23,#20100,0,"[\q{\\}]") -#20102=@"loc,{#10000},5,2,5,9" -locations_default(#20102,#10000,5,2,5,9) +regexpterm(#20101,14,#20092,3,"z") +#20102=@"loc,{#10000},4,15,4,15" +locations_default(#20102,#10000,4,15,4,15) hasLocation(#20101,#20102) +regexp_const_value(#20101,"z") #20103=* -regexpterm(#20103,28,#20101,0,"\q{\\}") -#20104=@"loc,{#10000},5,3,5,8" -locations_default(#20104,#10000,5,3,5,8) -hasLocation(#20103,#20104) +stmts(#20103,2,#20001,4,"/[\q{\\}]/v;") +hasLocation(#20103,#20011) +stmt_containers(#20103,#20001) +#20104=* +exprs(#20104,5,#20103,0,"/[\q{\\}]/v") +hasLocation(#20104,#20031) +enclosing_stmt(#20104,#20103) +expr_containers(#20104,#20001) +literals("/[\q{\\}]/v","/[\q{\\}]/v",#20104) #20105=* -regexpterm(#20105,14,#20103,0,"\\") -#20106=@"loc,{#10000},5,6,5,7" -locations_default(#20106,#10000,5,6,5,7) +regexpterm(#20105,23,#20104,0,"[\q{\\}]") +#20106=@"loc,{#10000},5,2,5,9" +locations_default(#20106,#10000,5,2,5,9) hasLocation(#20105,#20106) -regexp_const_value(#20105,"\\") #20107=* -stmts(#20107,2,#20001,5,"/[\q{abc|cbd|\}}]/v;") -hasLocation(#20107,#20013) -stmt_containers(#20107,#20001) -#20108=* -exprs(#20108,5,#20107,0,"/[\q{abc|cbd|\}}]/v") -hasLocation(#20108,#20035) -enclosing_stmt(#20108,#20107) -expr_containers(#20108,#20001) -literals("/[\q{abc|cbd|\}}]/v","/[\q{abc|cbd|\}}]/v",#20108) +regexpterm(#20107,28,#20105,0,"\q{\\}") +#20108=@"loc,{#10000},5,3,5,8" +locations_default(#20108,#10000,5,3,5,8) +hasLocation(#20107,#20108) #20109=* -regexpterm(#20109,23,#20108,0,"[\q{abc|cbd|\}}]") -#20110=@"loc,{#10000},6,2,6,17" -locations_default(#20110,#10000,6,2,6,17) +regexpterm(#20109,14,#20107,0,"\\") +#20110=@"loc,{#10000},5,6,5,7" +locations_default(#20110,#10000,5,6,5,7) hasLocation(#20109,#20110) +regexp_const_value(#20109,"\\") #20111=* -regexpterm(#20111,28,#20109,0,"\q{abc|cbd|\}}") -#20112=@"loc,{#10000},6,3,6,16" -locations_default(#20112,#10000,6,3,6,16) -hasLocation(#20111,#20112) +stmts(#20111,2,#20001,5,"/[\q{abc|cbd|\}}]/v;") +hasLocation(#20111,#20013) +stmt_containers(#20111,#20001) +#20112=* +exprs(#20112,5,#20111,0,"/[\q{abc|cbd|\}}]/v") +hasLocation(#20112,#20035) +enclosing_stmt(#20112,#20111) +expr_containers(#20112,#20001) +literals("/[\q{abc|cbd|\}}]/v","/[\q{abc|cbd|\}}]/v",#20112) #20113=* -regexpterm(#20113,0,#20111,0,"abc|cbd|\}") -#20114=@"loc,{#10000},6,6,6,15" -locations_default(#20114,#10000,6,6,6,15) +regexpterm(#20113,23,#20112,0,"[\q{abc|cbd|\}}]") +#20114=@"loc,{#10000},6,2,6,17" +locations_default(#20114,#10000,6,2,6,17) hasLocation(#20113,#20114) #20115=* -regexpterm(#20115,14,#20113,0,"abc") -#20116=@"loc,{#10000},6,6,6,8" -locations_default(#20116,#10000,6,6,6,8) +regexpterm(#20115,28,#20113,0,"\q{abc|cbd|\}}") +#20116=@"loc,{#10000},6,3,6,16" +locations_default(#20116,#10000,6,3,6,16) hasLocation(#20115,#20116) -regexp_const_value(#20115,"abc") #20117=* -regexpterm(#20117,14,#20113,1,"cbd") -#20118=@"loc,{#10000},6,10,6,12" -locations_default(#20118,#10000,6,10,6,12) +regexpterm(#20117,0,#20115,0,"abc|cbd|\}") +#20118=@"loc,{#10000},6,6,6,15" +locations_default(#20118,#10000,6,6,6,15) hasLocation(#20117,#20118) -regexp_const_value(#20117,"cbd") #20119=* -regexpterm(#20119,14,#20113,2,"\}") -#20120=@"loc,{#10000},6,14,6,15" -locations_default(#20120,#10000,6,14,6,15) +regexpterm(#20119,14,#20117,0,"abc") +#20120=@"loc,{#10000},6,6,6,8" +locations_default(#20120,#10000,6,6,6,8) hasLocation(#20119,#20120) -regexp_const_value(#20119,"\}") +regexp_const_value(#20119,"abc") #20121=* -entry_cfg_node(#20121,#20001) -#20122=@"loc,{#10000},1,1,1,0" -locations_default(#20122,#10000,1,1,1,0) +regexpterm(#20121,14,#20117,1,"cbd") +#20122=@"loc,{#10000},6,10,6,12" +locations_default(#20122,#10000,6,10,6,12) hasLocation(#20121,#20122) +regexp_const_value(#20121,"cbd") #20123=* -exit_cfg_node(#20123,#20001) -hasLocation(#20123,#20039) -successor(#20107,#20108) -successor(#20108,#20123) -successor(#20099,#20100) -successor(#20100,#20107) -successor(#20085,#20086) -successor(#20086,#20099) -successor(#20069,#20070) -successor(#20070,#20085) -successor(#20049,#20050) -successor(#20050,#20069) +regexpterm(#20123,14,#20117,2,"\}") +#20124=@"loc,{#10000},6,14,6,15" +locations_default(#20124,#10000,6,14,6,15) +hasLocation(#20123,#20124) +regexp_const_value(#20123,"\}") +#20125=* +entry_cfg_node(#20125,#20001) +#20126=@"loc,{#10000},1,1,1,0" +locations_default(#20126,#10000,1,1,1,0) +hasLocation(#20125,#20126) +#20127=* +exit_cfg_node(#20127,#20001) +hasLocation(#20127,#20039) +successor(#20111,#20112) +successor(#20112,#20127) +successor(#20103,#20104) +successor(#20104,#20111) +successor(#20088,#20089) +successor(#20089,#20103) +successor(#20071,#20072) +successor(#20072,#20088) +successor(#20050,#20051) +successor(#20051,#20071) successor(#20041,#20042) -successor(#20042,#20049) -successor(#20121,#20041) +successor(#20042,#20050) +successor(#20125,#20041) numlines(#10000,6,6,0) filetype(#10000,"javascript") From c0202f6085137d7dfda47e25093235e8c50282ce Mon Sep 17 00:00:00 2001 From: Napalys Date: Mon, 3 Mar 2025 08:34:58 +0100 Subject: [PATCH 13/27] Updated dbscheme --- javascript/ql/lib/semmlecode.javascript.dbscheme | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/javascript/ql/lib/semmlecode.javascript.dbscheme b/javascript/ql/lib/semmlecode.javascript.dbscheme index c88c69174bd0..25c5d086618e 100644 --- a/javascript/ql/lib/semmlecode.javascript.dbscheme +++ b/javascript/ql/lib/semmlecode.javascript.dbscheme @@ -859,7 +859,11 @@ case @regexpterm.kind of | 24 = @regexp_char_range | 25 = @regexp_positive_lookbehind | 26 = @regexp_negative_lookbehind -| 27 = @regexp_unicode_property_escape; +| 27 = @regexp_unicode_property_escape +| 28 = @regexp_quoted_string +| 29 = @regexp_intersection +| 30 = @regexp_subtraction +| 31 = @regexp_union; regexp_parse_errors (unique int id: @regexp_parse_error, int regexp: @regexpterm ref, From c7f03df1ebc7c07d3cac3c4c84177fc52c908f22 Mon Sep 17 00:00:00 2001 From: Napalys Date: Mon, 3 Mar 2025 08:50:18 +0100 Subject: [PATCH 14/27] Added change note --- javascript/ql/lib/change-notes/2025-03-03-regex-v.md | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 javascript/ql/lib/change-notes/2025-03-03-regex-v.md diff --git a/javascript/ql/lib/change-notes/2025-03-03-regex-v.md b/javascript/ql/lib/change-notes/2025-03-03-regex-v.md new file mode 100644 index 000000000000..d93c887acac9 --- /dev/null +++ b/javascript/ql/lib/change-notes/2025-03-03-regex-v.md @@ -0,0 +1,8 @@ +--- +category: feature +--- +* Added ability to parse new ECMA 2024 `v` flag operations: + - Intersection `&&` + - Subtraction `--` + - Union + - `\q` quoted string From 9ea89cd63f2e6df4f0e8769275b57b1da97c3db9 Mon Sep 17 00:00:00 2001 From: Napalys Date: Tue, 4 Mar 2025 11:02:11 +0100 Subject: [PATCH 15/27] Added a test case from #18854 --- .../extractor/tests/es2024/input/test.js | 1 + .../tests/es2024/output/trap/test.js.trap | 525 ++++++++++++++++++ .../DuplicateCharacterInCharacterClass/tst.js | 1 + 3 files changed, 527 insertions(+) create mode 100644 javascript/extractor/tests/es2024/input/test.js create mode 100644 javascript/extractor/tests/es2024/output/trap/test.js.trap diff --git a/javascript/extractor/tests/es2024/input/test.js b/javascript/extractor/tests/es2024/input/test.js new file mode 100644 index 000000000000..1f96a35e4c21 --- /dev/null +++ b/javascript/extractor/tests/es2024/input/test.js @@ -0,0 +1 @@ +const regex = /\b(?:https?:\/\/|mailto:|www\.)(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+|\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)/gmv; diff --git a/javascript/extractor/tests/es2024/output/trap/test.js.trap b/javascript/extractor/tests/es2024/output/trap/test.js.trap new file mode 100644 index 000000000000..3baa7137d9ce --- /dev/null +++ b/javascript/extractor/tests/es2024/output/trap/test.js.trap @@ -0,0 +1,525 @@ +#10000=@"/test.js;sourcefile" +files(#10000,"/test.js") +#10001=@"/;folder" +folders(#10001,"/") +containerparent(#10001,#10000) +#10002=@"loc,{#10000},0,0,0,0" +locations_default(#10002,#10000,0,0,0,0) +hasLocation(#10000,#10002) +#20000=@"global_scope" +scopes(#20000,0) +#20001=@"script;{#10000},1,1" +#20002=* +lines(#20002,#20001,"const regex = /\b(?:https?:\/\/|mailto:|www\.)(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+|\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)/gmv;"," +") +#20003=@"loc,{#10000},1,1,1,172" +locations_default(#20003,#10000,1,1,1,172) +hasLocation(#20002,#20003) +numlines(#20001,1,1,0) +#20004=* +tokeninfo(#20004,7,#20001,0,"const") +#20005=@"loc,{#10000},1,1,1,5" +locations_default(#20005,#10000,1,1,1,5) +hasLocation(#20004,#20005) +#20006=* +tokeninfo(#20006,6,#20001,1,"regex") +#20007=@"loc,{#10000},1,7,1,11" +locations_default(#20007,#10000,1,7,1,11) +hasLocation(#20006,#20007) +#20008=* +tokeninfo(#20008,8,#20001,2,"=") +#20009=@"loc,{#10000},1,13,1,13" +locations_default(#20009,#10000,1,13,1,13) +hasLocation(#20008,#20009) +#20010=* +tokeninfo(#20010,5,#20001,3,"/\b(?:https?:\/\/|mailto:|www\.)(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+|\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)/gmv") +#20011=@"loc,{#10000},1,15,1,171" +locations_default(#20011,#10000,1,15,1,171) +hasLocation(#20010,#20011) +#20012=* +tokeninfo(#20012,8,#20001,4,";") +#20013=@"loc,{#10000},1,172,1,172" +locations_default(#20013,#10000,1,172,1,172) +hasLocation(#20012,#20013) +#20014=* +tokeninfo(#20014,0,#20001,5,"") +#20015=@"loc,{#10000},2,1,2,0" +locations_default(#20015,#10000,2,1,2,0) +hasLocation(#20014,#20015) +toplevels(#20001,0) +#20016=@"loc,{#10000},1,1,2,0" +locations_default(#20016,#10000,1,1,2,0) +hasLocation(#20001,#20016) +#20017=@"var;{regex};{#20000}" +variables(#20017,"regex",#20000) +#20018=* +stmts(#20018,22,#20001,0,"const r ... +)/gmv;") +hasLocation(#20018,#20003) +stmt_containers(#20018,#20001) +#20019=* +exprs(#20019,64,#20018,0,"regex = ... )+)/gmv") +#20020=@"loc,{#10000},1,7,1,171" +locations_default(#20020,#10000,1,7,1,171) +hasLocation(#20019,#20020) +enclosing_stmt(#20019,#20018) +expr_containers(#20019,#20001) +#20021=* +exprs(#20021,78,#20019,0,"regex") +hasLocation(#20021,#20007) +enclosing_stmt(#20021,#20018) +expr_containers(#20021,#20001) +literals("regex","regex",#20021) +decl(#20021,#20017) +#20022=* +exprs(#20022,5,#20019,1,"/\b(?:h ... )+)/gmv") +hasLocation(#20022,#20011) +enclosing_stmt(#20022,#20018) +expr_containers(#20022,#20001) +literals("/\b(?:https?:\/\/|mailto:|www\.)(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+|\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)/gmv","/\b(?:https?:\/\/|mailto:|www\.)(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+|\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)/gmv",#20022) +#20023=* +regexpterm(#20023,0,#20022,0,"\b(?:https?:\/\/|mailto:|www\.)(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+|\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)") +#20024=@"loc,{#10000},1,16,1,167" +locations_default(#20024,#10000,1,16,1,167) +hasLocation(#20023,#20024) +#20025=* +regexpterm(#20025,1,#20023,0,"\b(?:https?:\/\/|mailto:|www\.)(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+") +#20026=@"loc,{#10000},1,16,1,98" +locations_default(#20026,#10000,1,16,1,98) +hasLocation(#20025,#20026) +#20027=* +regexpterm(#20027,4,#20025,0,"\b") +#20028=@"loc,{#10000},1,16,1,17" +locations_default(#20028,#10000,1,16,1,17) +hasLocation(#20027,#20028) +#20029=* +regexpterm(#20029,13,#20025,1,"(?:https?:\/\/|mailto:|www\.)") +#20030=@"loc,{#10000},1,18,1,46" +locations_default(#20030,#10000,1,18,1,46) +hasLocation(#20029,#20030) +#20031=* +regexpterm(#20031,0,#20029,0,"https?:\/\/|mailto:|www\.") +#20032=@"loc,{#10000},1,21,1,45" +locations_default(#20032,#10000,1,21,1,45) +hasLocation(#20031,#20032) +#20033=* +regexpterm(#20033,1,#20031,0,"https?:\/\/") +#20034=@"loc,{#10000},1,21,1,31" +locations_default(#20034,#10000,1,21,1,31) +hasLocation(#20033,#20034) +#20035=* +regexpterm(#20035,14,#20033,0,"http") +#20036=@"loc,{#10000},1,21,1,24" +locations_default(#20036,#10000,1,21,1,24) +hasLocation(#20035,#20036) +regexp_const_value(#20035,"http") +#20037=* +regexpterm(#20037,10,#20033,1,"s?") +#20038=@"loc,{#10000},1,25,1,26" +locations_default(#20038,#10000,1,25,1,26) +hasLocation(#20037,#20038) +is_greedy(#20037) +#20039=* +regexpterm(#20039,14,#20037,0,"s") +#20040=@"loc,{#10000},1,25,1,25" +locations_default(#20040,#10000,1,25,1,25) +hasLocation(#20039,#20040) +regexp_const_value(#20039,"s") +#20041=* +regexpterm(#20041,14,#20033,2,":") +#20042=@"loc,{#10000},1,27,1,27" +locations_default(#20042,#10000,1,27,1,27) +hasLocation(#20041,#20042) +regexp_const_value(#20041,":") +#20043=* +regexpterm(#20043,21,#20033,3,"\/") +#20044=@"loc,{#10000},1,28,1,29" +locations_default(#20044,#10000,1,28,1,29) +hasLocation(#20043,#20044) +regexp_const_value(#20043,"/") +#20045=* +regexpterm(#20045,21,#20033,4,"\/") +#20046=@"loc,{#10000},1,30,1,31" +locations_default(#20046,#10000,1,30,1,31) +hasLocation(#20045,#20046) +regexp_const_value(#20045,"/") +#20047=* +regexpterm(#20047,14,#20031,1,"mailto:") +#20048=@"loc,{#10000},1,33,1,39" +locations_default(#20048,#10000,1,33,1,39) +hasLocation(#20047,#20048) +regexp_const_value(#20047,"mailto:") +#20049=* +regexpterm(#20049,1,#20031,2,"www\.") +#20050=@"loc,{#10000},1,41,1,45" +locations_default(#20050,#10000,1,41,1,45) +hasLocation(#20049,#20050) +#20051=* +regexpterm(#20051,14,#20049,0,"www") +#20052=@"loc,{#10000},1,41,1,43" +locations_default(#20052,#10000,1,41,1,43) +hasLocation(#20051,#20052) +regexp_const_value(#20051,"www") +#20053=* +regexpterm(#20053,21,#20049,1,"\.") +#20054=@"loc,{#10000},1,44,1,45" +locations_default(#20054,#10000,1,44,1,45) +hasLocation(#20053,#20054) +regexp_const_value(#20053,".") +#20055=* +regexpterm(#20055,9,#20025,2,"(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+") +#20056=@"loc,{#10000},1,47,1,98" +locations_default(#20056,#10000,1,47,1,98) +hasLocation(#20055,#20056) +is_greedy(#20055) +#20057=* +regexpterm(#20057,13,#20055,0,"(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])") +#20058=@"loc,{#10000},1,47,1,97" +locations_default(#20058,#10000,1,47,1,97) +hasLocation(#20057,#20058) +#20059=* +regexpterm(#20059,0,#20057,0,"[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]]") +#20060=@"loc,{#10000},1,50,1,96" +locations_default(#20060,#10000,1,50,1,96) +hasLocation(#20059,#20060) +#20061=* +regexpterm(#20061,23,#20059,0,"[\S--[\p{P}<>]]") +#20062=@"loc,{#10000},1,50,1,64" +locations_default(#20062,#10000,1,50,1,64) +hasLocation(#20061,#20062) +#20063=* +regexpterm(#20063,30,#20061,0,"[\S--[\p{P}<>]]") +hasLocation(#20063,#20062) +#20064=* +regexpterm(#20064,20,#20063,0,"\S") +#20065=@"loc,{#10000},1,51,1,52" +locations_default(#20065,#10000,1,51,1,52) +hasLocation(#20064,#20065) +char_class_escape(#20064,"S") +#20066=* +regexpterm(#20066,23,#20063,1,"[\p{P}<>]") +#20067=@"loc,{#10000},1,55,1,63" +locations_default(#20067,#10000,1,55,1,63) +hasLocation(#20066,#20067) +#20068=* +regexpterm(#20068,31,#20066,0,"[\p{P}<>]") +hasLocation(#20068,#20067) +#20069=* +regexpterm(#20069,27,#20068,0,"\p{P}") +#20070=@"loc,{#10000},1,56,1,60" +locations_default(#20070,#10000,1,56,1,60) +hasLocation(#20069,#20070) +unicode_property_escapename(#20069,"P") +#20071=* +regexpterm(#20071,14,#20068,1,"<") +#20072=@"loc,{#10000},1,61,1,61" +locations_default(#20072,#10000,1,61,1,61) +hasLocation(#20071,#20072) +regexp_const_value(#20071,"<") +#20073=* +regexpterm(#20073,14,#20068,2,">") +#20074=@"loc,{#10000},1,62,1,62" +locations_default(#20074,#10000,1,62,1,62) +hasLocation(#20073,#20074) +regexp_const_value(#20073,">") +#20075=* +regexpterm(#20075,21,#20059,1,"\/") +#20076=@"loc,{#10000},1,66,1,67" +locations_default(#20076,#10000,1,66,1,67) +hasLocation(#20075,#20076) +regexp_const_value(#20075,"/") +#20077=* +regexpterm(#20077,1,#20059,2,"[\S--[\[\]]]+[\S--[\p{P}<>]]") +#20078=@"loc,{#10000},1,69,1,96" +locations_default(#20078,#10000,1,69,1,96) +hasLocation(#20077,#20078) +#20079=* +regexpterm(#20079,9,#20077,0,"[\S--[\[\]]]+") +#20080=@"loc,{#10000},1,69,1,81" +locations_default(#20080,#10000,1,69,1,81) +hasLocation(#20079,#20080) +is_greedy(#20079) +#20081=* +regexpterm(#20081,23,#20079,0,"[\S--[\[\]]]") +#20082=@"loc,{#10000},1,69,1,80" +locations_default(#20082,#10000,1,69,1,80) +hasLocation(#20081,#20082) +#20083=* +regexpterm(#20083,30,#20081,0,"[\S--[\[\]]]") +hasLocation(#20083,#20082) +#20084=* +regexpterm(#20084,20,#20083,0,"\S") +#20085=@"loc,{#10000},1,70,1,71" +locations_default(#20085,#10000,1,70,1,71) +hasLocation(#20084,#20085) +char_class_escape(#20084,"S") +#20086=* +regexpterm(#20086,23,#20083,1,"[\[\]]") +#20087=@"loc,{#10000},1,74,1,79" +locations_default(#20087,#10000,1,74,1,79) +hasLocation(#20086,#20087) +#20088=* +regexpterm(#20088,21,#20086,0,"\[") +#20089=@"loc,{#10000},1,75,1,76" +locations_default(#20089,#10000,1,75,1,76) +hasLocation(#20088,#20089) +regexp_const_value(#20088,"[") +#20090=* +regexpterm(#20090,21,#20086,1,"\]") +#20091=@"loc,{#10000},1,77,1,78" +locations_default(#20091,#10000,1,77,1,78) +hasLocation(#20090,#20091) +regexp_const_value(#20090,"]") +#20092=* +regexpterm(#20092,23,#20077,1,"[\S--[\p{P}<>]]") +#20093=@"loc,{#10000},1,82,1,96" +locations_default(#20093,#10000,1,82,1,96) +hasLocation(#20092,#20093) +#20094=* +regexpterm(#20094,30,#20092,0,"[\S--[\p{P}<>]]") +hasLocation(#20094,#20093) +#20095=* +regexpterm(#20095,20,#20094,0,"\S") +#20096=@"loc,{#10000},1,83,1,84" +locations_default(#20096,#10000,1,83,1,84) +hasLocation(#20095,#20096) +char_class_escape(#20095,"S") +#20097=* +regexpterm(#20097,23,#20094,1,"[\p{P}<>]") +#20098=@"loc,{#10000},1,87,1,95" +locations_default(#20098,#10000,1,87,1,95) +hasLocation(#20097,#20098) +#20099=* +regexpterm(#20099,31,#20097,0,"[\p{P}<>]") +hasLocation(#20099,#20098) +#20100=* +regexpterm(#20100,27,#20099,0,"\p{P}") +#20101=@"loc,{#10000},1,88,1,92" +locations_default(#20101,#10000,1,88,1,92) +hasLocation(#20100,#20101) +unicode_property_escapename(#20100,"P") +#20102=* +regexpterm(#20102,14,#20099,1,"<") +#20103=@"loc,{#10000},1,93,1,93" +locations_default(#20103,#10000,1,93,1,93) +hasLocation(#20102,#20103) +regexp_const_value(#20102,"<") +#20104=* +regexpterm(#20104,14,#20099,2,">") +#20105=@"loc,{#10000},1,94,1,94" +locations_default(#20105,#10000,1,94,1,94) +hasLocation(#20104,#20105) +regexp_const_value(#20104,">") +#20106=* +regexpterm(#20106,1,#20023,1,"\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)") +#20107=@"loc,{#10000},1,100,1,167" +locations_default(#20107,#10000,1,100,1,167) +hasLocation(#20106,#20107) +#20108=* +regexpterm(#20108,4,#20106,0,"\b") +#20109=@"loc,{#10000},1,100,1,101" +locations_default(#20109,#10000,1,100,1,101) +hasLocation(#20108,#20109) +#20110=* +regexpterm(#20110,9,#20106,1,"[\S--[@\p{Ps}\p{Pe}<>]]+") +#20111=@"loc,{#10000},1,102,1,125" +locations_default(#20111,#10000,1,102,1,125) +hasLocation(#20110,#20111) +is_greedy(#20110) +#20112=* +regexpterm(#20112,23,#20110,0,"[\S--[@\p{Ps}\p{Pe}<>]]") +#20113=@"loc,{#10000},1,102,1,124" +locations_default(#20113,#10000,1,102,1,124) +hasLocation(#20112,#20113) +#20114=* +regexpterm(#20114,30,#20112,0,"[\S--[@\p{Ps}\p{Pe}<>]]") +hasLocation(#20114,#20113) +#20115=* +regexpterm(#20115,20,#20114,0,"\S") +#20116=@"loc,{#10000},1,103,1,104" +locations_default(#20116,#10000,1,103,1,104) +hasLocation(#20115,#20116) +char_class_escape(#20115,"S") +#20117=* +regexpterm(#20117,23,#20114,1,"[@\p{Ps}\p{Pe}<>]") +#20118=@"loc,{#10000},1,107,1,123" +locations_default(#20118,#10000,1,107,1,123) +hasLocation(#20117,#20118) +#20119=* +regexpterm(#20119,31,#20117,0,"[@\p{Ps}\p{Pe}<>]") +hasLocation(#20119,#20118) +#20120=* +regexpterm(#20120,14,#20119,0,"@") +#20121=@"loc,{#10000},1,108,1,108" +locations_default(#20121,#10000,1,108,1,108) +hasLocation(#20120,#20121) +regexp_const_value(#20120,"@") +#20122=* +regexpterm(#20122,27,#20119,1,"\p{Ps}") +#20123=@"loc,{#10000},1,109,1,114" +locations_default(#20123,#10000,1,109,1,114) +hasLocation(#20122,#20123) +unicode_property_escapename(#20122,"Ps") +#20124=* +regexpterm(#20124,27,#20119,2,"\p{Pe}") +#20125=@"loc,{#10000},1,115,1,120" +locations_default(#20125,#10000,1,115,1,120) +hasLocation(#20124,#20125) +unicode_property_escapename(#20124,"Pe") +#20126=* +regexpterm(#20126,14,#20119,3,"<") +#20127=@"loc,{#10000},1,121,1,121" +locations_default(#20127,#10000,1,121,1,121) +hasLocation(#20126,#20127) +regexp_const_value(#20126,"<") +#20128=* +regexpterm(#20128,14,#20119,4,">") +#20129=@"loc,{#10000},1,122,1,122" +locations_default(#20129,#10000,1,122,1,122) +hasLocation(#20128,#20129) +regexp_const_value(#20128,">") +#20130=* +regexpterm(#20130,14,#20106,2,"@") +#20131=@"loc,{#10000},1,126,1,126" +locations_default(#20131,#10000,1,126,1,126) +hasLocation(#20130,#20131) +regexp_const_value(#20130,"@") +#20132=* +regexpterm(#20132,13,#20106,3,"([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)") +#20133=@"loc,{#10000},1,127,1,167" +locations_default(#20133,#10000,1,127,1,167) +hasLocation(#20132,#20133) +is_capture(#20132,1) +#20134=* +regexpterm(#20134,1,#20132,0,"[\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+") +#20135=@"loc,{#10000},1,128,1,166" +locations_default(#20135,#10000,1,128,1,166) +hasLocation(#20134,#20135) +#20136=* +regexpterm(#20136,9,#20134,0,"[\S--[\p{P}<>]]+") +#20137=@"loc,{#10000},1,128,1,143" +locations_default(#20137,#10000,1,128,1,143) +hasLocation(#20136,#20137) +is_greedy(#20136) +#20138=* +regexpterm(#20138,23,#20136,0,"[\S--[\p{P}<>]]") +#20139=@"loc,{#10000},1,128,1,142" +locations_default(#20139,#10000,1,128,1,142) +hasLocation(#20138,#20139) +#20140=* +regexpterm(#20140,30,#20138,0,"[\S--[\p{P}<>]]") +hasLocation(#20140,#20139) +#20141=* +regexpterm(#20141,20,#20140,0,"\S") +#20142=@"loc,{#10000},1,129,1,130" +locations_default(#20142,#10000,1,129,1,130) +hasLocation(#20141,#20142) +char_class_escape(#20141,"S") +#20143=* +regexpterm(#20143,23,#20140,1,"[\p{P}<>]") +#20144=@"loc,{#10000},1,133,1,141" +locations_default(#20144,#10000,1,133,1,141) +hasLocation(#20143,#20144) +#20145=* +regexpterm(#20145,31,#20143,0,"[\p{P}<>]") +hasLocation(#20145,#20144) +#20146=* +regexpterm(#20146,27,#20145,0,"\p{P}") +#20147=@"loc,{#10000},1,134,1,138" +locations_default(#20147,#10000,1,134,1,138) +hasLocation(#20146,#20147) +unicode_property_escapename(#20146,"P") +#20148=* +regexpterm(#20148,14,#20145,1,"<") +#20149=@"loc,{#10000},1,139,1,139" +locations_default(#20149,#10000,1,139,1,139) +hasLocation(#20148,#20149) +regexp_const_value(#20148,"<") +#20150=* +regexpterm(#20150,14,#20145,2,">") +#20151=@"loc,{#10000},1,140,1,140" +locations_default(#20151,#10000,1,140,1,140) +hasLocation(#20150,#20151) +regexp_const_value(#20150,">") +#20152=* +regexpterm(#20152,9,#20134,1,"(?:\.[\S--[\p{P}<>]]+)+") +#20153=@"loc,{#10000},1,144,1,166" +locations_default(#20153,#10000,1,144,1,166) +hasLocation(#20152,#20153) +is_greedy(#20152) +#20154=* +regexpterm(#20154,13,#20152,0,"(?:\.[\S--[\p{P}<>]]+)") +#20155=@"loc,{#10000},1,144,1,165" +locations_default(#20155,#10000,1,144,1,165) +hasLocation(#20154,#20155) +#20156=* +regexpterm(#20156,1,#20154,0,"\.[\S--[\p{P}<>]]+") +#20157=@"loc,{#10000},1,147,1,164" +locations_default(#20157,#10000,1,147,1,164) +hasLocation(#20156,#20157) +#20158=* +regexpterm(#20158,21,#20156,0,"\.") +#20159=@"loc,{#10000},1,147,1,148" +locations_default(#20159,#10000,1,147,1,148) +hasLocation(#20158,#20159) +regexp_const_value(#20158,".") +#20160=* +regexpterm(#20160,9,#20156,1,"[\S--[\p{P}<>]]+") +#20161=@"loc,{#10000},1,149,1,164" +locations_default(#20161,#10000,1,149,1,164) +hasLocation(#20160,#20161) +is_greedy(#20160) +#20162=* +regexpterm(#20162,23,#20160,0,"[\S--[\p{P}<>]]") +#20163=@"loc,{#10000},1,149,1,163" +locations_default(#20163,#10000,1,149,1,163) +hasLocation(#20162,#20163) +#20164=* +regexpterm(#20164,30,#20162,0,"[\S--[\p{P}<>]]") +hasLocation(#20164,#20163) +#20165=* +regexpterm(#20165,20,#20164,0,"\S") +#20166=@"loc,{#10000},1,150,1,151" +locations_default(#20166,#10000,1,150,1,151) +hasLocation(#20165,#20166) +char_class_escape(#20165,"S") +#20167=* +regexpterm(#20167,23,#20164,1,"[\p{P}<>]") +#20168=@"loc,{#10000},1,154,1,162" +locations_default(#20168,#10000,1,154,1,162) +hasLocation(#20167,#20168) +#20169=* +regexpterm(#20169,31,#20167,0,"[\p{P}<>]") +hasLocation(#20169,#20168) +#20170=* +regexpterm(#20170,27,#20169,0,"\p{P}") +#20171=@"loc,{#10000},1,155,1,159" +locations_default(#20171,#10000,1,155,1,159) +hasLocation(#20170,#20171) +unicode_property_escapename(#20170,"P") +#20172=* +regexpterm(#20172,14,#20169,1,"<") +#20173=@"loc,{#10000},1,160,1,160" +locations_default(#20173,#10000,1,160,1,160) +hasLocation(#20172,#20173) +regexp_const_value(#20172,"<") +#20174=* +regexpterm(#20174,14,#20169,2,">") +#20175=@"loc,{#10000},1,161,1,161" +locations_default(#20175,#10000,1,161,1,161) +hasLocation(#20174,#20175) +regexp_const_value(#20174,">") +#20176=* +entry_cfg_node(#20176,#20001) +#20177=@"loc,{#10000},1,1,1,0" +locations_default(#20177,#10000,1,1,1,0) +hasLocation(#20176,#20177) +#20178=* +exit_cfg_node(#20178,#20001) +hasLocation(#20178,#20015) +successor(#20018,#20021) +successor(#20022,#20019) +successor(#20021,#20022) +successor(#20019,#20178) +successor(#20176,#20018) +numlines(#10000,1,1,0) +filetype(#10000,"javascript") diff --git a/javascript/ql/test/query-tests/RegExp/DuplicateCharacterInCharacterClass/tst.js b/javascript/ql/test/query-tests/RegExp/DuplicateCharacterInCharacterClass/tst.js index 59114b90c54a..82c0ebcd566d 100644 --- a/javascript/ql/test/query-tests/RegExp/DuplicateCharacterInCharacterClass/tst.js +++ b/javascript/ql/test/query-tests/RegExp/DuplicateCharacterInCharacterClass/tst.js @@ -11,3 +11,4 @@ /[\u{ff}]/; /[\u{12340}-\u{12345}]/u; // OK new RegExp("[\u{12340}-\u{12345}]", "u"); // OK +const regex = /\b(?:https?:\/\/|mailto:|www\.)(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+|\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)/gmv; From 8099423b6d4a9d08fbbe58dd41c204d16b87479b Mon Sep 17 00:00:00 2001 From: Napalys Date: Wed, 5 Mar 2025 09:14:13 +0100 Subject: [PATCH 16/27] Renamed character class operators lists to `elements`. --- .../js/ast/regexp/CharacterClassIntersection.java | 10 +++++----- .../js/ast/regexp/CharacterClassSubtraction.java | 10 +++++----- .../com/semmle/js/ast/regexp/CharacterClassUnion.java | 10 +++++----- .../src/com/semmle/js/extractor/RegExpExtractor.java | 6 +++--- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassIntersection.java b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassIntersection.java index 6e1a424976d6..663400403ea5 100644 --- a/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassIntersection.java +++ b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassIntersection.java @@ -4,11 +4,11 @@ import java.util.List; public class CharacterClassIntersection extends RegExpTerm { - private final List intersections; + private final List elements; - public CharacterClassIntersection(SourceLocation loc, List intersections) { + public CharacterClassIntersection(SourceLocation loc, List elements) { super(loc, "CharacterClassIntersection"); - this.intersections = intersections; + this.elements = elements; } @Override @@ -16,7 +16,7 @@ public void accept(Visitor v) { v.visit(this); } - public List getIntersections() { - return intersections; + public List getElements() { + return elements; } } diff --git a/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassSubtraction.java b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassSubtraction.java index 70ddbfa1b00a..b1cf5cb5de51 100644 --- a/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassSubtraction.java +++ b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassSubtraction.java @@ -4,11 +4,11 @@ import java.util.List; public class CharacterClassSubtraction extends RegExpTerm { - private final List subtraction; + private final List elements; - public CharacterClassSubtraction(SourceLocation loc, List subtraction) { + public CharacterClassSubtraction(SourceLocation loc, List elements) { super(loc, "CharacterClassSubtraction"); - this.subtraction = subtraction; + this.elements = elements; } @Override @@ -16,7 +16,7 @@ public void accept(Visitor v) { v.visit(this); } - public List getSubtraction() { - return subtraction; + public List getElements() { + return elements; } } diff --git a/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassUnion.java b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassUnion.java index 7d3ec4510d52..09bf748383fa 100644 --- a/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassUnion.java +++ b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassUnion.java @@ -4,11 +4,11 @@ import java.util.List; public class CharacterClassUnion extends RegExpTerm { - private final List union; + private final List elements; - public CharacterClassUnion(SourceLocation loc, List union) { + public CharacterClassUnion(SourceLocation loc, List elements) { super(loc, "CharacterClassUnion"); - this.union = union; + this.elements = elements; } @Override @@ -16,7 +16,7 @@ public void accept(Visitor v) { v.visit(this); } - public List getUnion() { - return union; + public List getElements() { + return elements; } } diff --git a/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java index 4fbf1b4f6cf6..9b9cbb5aec63 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java @@ -363,7 +363,7 @@ public void visit(CharacterClassQuotedString nd) { public void visit(CharacterClassIntersection nd) { Label lbl = extractTerm(nd, parent, idx); int i = 0; - for (RegExpTerm element : nd.getIntersections()) + for (RegExpTerm element : nd.getElements()) visit(element, lbl, i++); } @@ -371,7 +371,7 @@ public void visit(CharacterClassIntersection nd) { public void visit(CharacterClassSubtraction nd) { Label lbl = extractTerm(nd, parent, idx); int i = 0; - for (RegExpTerm element : nd.getSubtraction()) + for (RegExpTerm element : nd.getElements()) visit(element, lbl, i++); } @@ -379,7 +379,7 @@ public void visit(CharacterClassSubtraction nd) { public void visit(CharacterClassUnion nd) { Label lbl = extractTerm(nd, parent, idx); int i = 0; - for (RegExpTerm element : nd.getUnion()) + for (RegExpTerm element : nd.getElements()) visit(element, lbl, i++); } } From 8086c25abe09ba311ec4a8c9d170952d1e2005e2 Mon Sep 17 00:00:00 2001 From: Napalys Date: Wed, 5 Mar 2025 10:07:20 +0100 Subject: [PATCH 17/27] Removed `Union` as standard character class is already an union. --- .../js/ast/regexp/CharacterClassUnion.java | 22 - .../src/com/semmle/js/ast/regexp/Visitor.java | 2 - .../semmle/js/extractor/RegExpExtractor.java | 10 - .../com/semmle/js/parser/RegExpParser.java | 18 +- .../trap/regex_nested_character_class.js.trap | 108 ++-- .../tests/es2024/output/trap/test.js.trap | 533 +++++++++--------- .../tests/es2024/output/trap/union.js.trap | 424 +++++++------- .../ql/lib/semmlecode.javascript.dbscheme | 3 +- 8 files changed, 518 insertions(+), 602 deletions(-) delete mode 100644 javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassUnion.java diff --git a/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassUnion.java b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassUnion.java deleted file mode 100644 index 09bf748383fa..000000000000 --- a/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassUnion.java +++ /dev/null @@ -1,22 +0,0 @@ -package com.semmle.js.ast.regexp; - -import com.semmle.js.ast.SourceLocation; -import java.util.List; - -public class CharacterClassUnion extends RegExpTerm { - private final List elements; - - public CharacterClassUnion(SourceLocation loc, List elements) { - super(loc, "CharacterClassUnion"); - this.elements = elements; - } - - @Override - public void accept(Visitor v) { - v.visit(this); - } - - public List getElements() { - return elements; - } -} diff --git a/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java b/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java index 45f737794884..373cb727c5a6 100644 --- a/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java +++ b/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java @@ -67,6 +67,4 @@ public interface Visitor { public void visit(CharacterClassIntersection nd); public void visit(CharacterClassSubtraction nd); - - public void visit(CharacterClassUnion nd); } diff --git a/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java index 9b9cbb5aec63..0731f8a7c0f5 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java @@ -13,7 +13,6 @@ import com.semmle.js.ast.regexp.CharacterClassQuotedString; import com.semmle.js.ast.regexp.CharacterClassRange; import com.semmle.js.ast.regexp.CharacterClassSubtraction; -import com.semmle.js.ast.regexp.CharacterClassUnion; import com.semmle.js.ast.regexp.Constant; import com.semmle.js.ast.regexp.ControlEscape; import com.semmle.js.ast.regexp.ControlLetter; @@ -99,7 +98,6 @@ public RegExpExtractor(TrapWriter trapwriter, LocationManager locationManager) { termkinds.put("CharacterClassQuotedString", 28); termkinds.put("CharacterClassIntersection", 29); termkinds.put("CharacterClassSubtraction", 30); - termkinds.put("CharacterClassUnion", 31); } private static final String[] errmsgs = @@ -374,14 +372,6 @@ public void visit(CharacterClassSubtraction nd) { for (RegExpTerm element : nd.getElements()) visit(element, lbl, i++); } - - @Override - public void visit(CharacterClassUnion nd) { - Label lbl = extractTerm(nd, parent, idx); - int i = 0; - for (RegExpTerm element : nd.getElements()) - visit(element, lbl, i++); - } } public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing, String flags) { diff --git a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java index 70b3487d88b2..8d358d30be69 100644 --- a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java +++ b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java @@ -9,7 +9,6 @@ import com.semmle.js.ast.regexp.CharacterClassQuotedString; import com.semmle.js.ast.regexp.CharacterClassRange; import com.semmle.js.ast.regexp.CharacterClassSubtraction; -import com.semmle.js.ast.regexp.CharacterClassUnion; import com.semmle.js.ast.regexp.Constant; import com.semmle.js.ast.regexp.ControlEscape; import com.semmle.js.ast.regexp.ControlLetter; @@ -568,8 +567,7 @@ private RegExpTerm parseCharacterClass() { private enum CharacterClassType { STANDARD, INTERSECTION, - SUBTRACTION, - UNION + SUBTRACTION } // ECMA 2024 `v` flag allows nested character classes. @@ -601,26 +599,12 @@ else if (lookahead("--")) { } } - boolean containsComplex = elements.stream().anyMatch(term -> term instanceof UnicodePropertyEscape || - term instanceof CharacterClassQuotedString || - term instanceof CharacterClass); - - // Set type to UNION only if: - // 1. We haven't already determined a specific type (intersection/subtraction) - // 2. We have more than one element - // 3. We have at least one complex element (i.e. a nested character class or a UnicodePropertyEscape) - if (containsComplex && classType == CharacterClassType.STANDARD && elements.size() > 1) { - classType = CharacterClassType.UNION; - } - // Create appropriate RegExpTerm based on the detected class type switch (classType) { case INTERSECTION: return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassIntersection(loc, elements)), inverted)); case SUBTRACTION: return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassSubtraction(loc, elements)), inverted)); - case UNION: - return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassUnion(loc, elements)), inverted)); case STANDARD: default: return this.finishTerm(new CharacterClass(loc, elements, inverted)); diff --git a/javascript/extractor/tests/es2024/output/trap/regex_nested_character_class.js.trap b/javascript/extractor/tests/es2024/output/trap/regex_nested_character_class.js.trap index 25ba1d8bb6db..c3bc9112cc56 100644 --- a/javascript/extractor/tests/es2024/output/trap/regex_nested_character_class.js.trap +++ b/javascript/extractor/tests/es2024/output/trap/regex_nested_character_class.js.trap @@ -137,81 +137,75 @@ regexpterm(#20042,23,#20041,0,"[ [] [ [] [] ] ]") locations_default(#20043,#10000,3,2,3,17) hasLocation(#20042,#20043) #20044=* -regexpterm(#20044,31,#20042,0,"[ [] [ [] [] ] ]") -hasLocation(#20044,#20043) -#20045=* -regexpterm(#20045,14,#20044,0," ") -#20046=@"loc,{#10000},3,3,3,3" -locations_default(#20046,#10000,3,3,3,3) -hasLocation(#20045,#20046) -regexp_const_value(#20045," ") -#20047=* -regexpterm(#20047,23,#20044,1,"[]") -#20048=@"loc,{#10000},3,4,3,5" -locations_default(#20048,#10000,3,4,3,5) -hasLocation(#20047,#20048) -#20049=* -regexpterm(#20049,14,#20044,2," ") -#20050=@"loc,{#10000},3,6,3,6" -locations_default(#20050,#10000,3,6,3,6) -hasLocation(#20049,#20050) -regexp_const_value(#20049," ") -#20051=* -regexpterm(#20051,23,#20044,3,"[ [] [] ]") -#20052=@"loc,{#10000},3,7,3,15" -locations_default(#20052,#10000,3,7,3,15) -hasLocation(#20051,#20052) -#20053=* -regexpterm(#20053,31,#20051,0,"[ [] [] ]") -hasLocation(#20053,#20052) +regexpterm(#20044,14,#20042,0," ") +#20045=@"loc,{#10000},3,3,3,3" +locations_default(#20045,#10000,3,3,3,3) +hasLocation(#20044,#20045) +regexp_const_value(#20044," ") +#20046=* +regexpterm(#20046,23,#20042,1,"[]") +#20047=@"loc,{#10000},3,4,3,5" +locations_default(#20047,#10000,3,4,3,5) +hasLocation(#20046,#20047) +#20048=* +regexpterm(#20048,14,#20042,2," ") +#20049=@"loc,{#10000},3,6,3,6" +locations_default(#20049,#10000,3,6,3,6) +hasLocation(#20048,#20049) +regexp_const_value(#20048," ") +#20050=* +regexpterm(#20050,23,#20042,3,"[ [] [] ]") +#20051=@"loc,{#10000},3,7,3,15" +locations_default(#20051,#10000,3,7,3,15) +hasLocation(#20050,#20051) +#20052=* +regexpterm(#20052,14,#20050,0," ") +#20053=@"loc,{#10000},3,8,3,8" +locations_default(#20053,#10000,3,8,3,8) +hasLocation(#20052,#20053) +regexp_const_value(#20052," ") #20054=* -regexpterm(#20054,14,#20053,0," ") -#20055=@"loc,{#10000},3,8,3,8" -locations_default(#20055,#10000,3,8,3,8) +regexpterm(#20054,23,#20050,1,"[]") +#20055=@"loc,{#10000},3,9,3,10" +locations_default(#20055,#10000,3,9,3,10) hasLocation(#20054,#20055) -regexp_const_value(#20054," ") #20056=* -regexpterm(#20056,23,#20053,1,"[]") -#20057=@"loc,{#10000},3,9,3,10" -locations_default(#20057,#10000,3,9,3,10) +regexpterm(#20056,14,#20050,2," ") +#20057=@"loc,{#10000},3,11,3,11" +locations_default(#20057,#10000,3,11,3,11) hasLocation(#20056,#20057) +regexp_const_value(#20056," ") #20058=* -regexpterm(#20058,14,#20053,2," ") -#20059=@"loc,{#10000},3,11,3,11" -locations_default(#20059,#10000,3,11,3,11) +regexpterm(#20058,23,#20050,3,"[]") +#20059=@"loc,{#10000},3,12,3,13" +locations_default(#20059,#10000,3,12,3,13) hasLocation(#20058,#20059) -regexp_const_value(#20058," ") #20060=* -regexpterm(#20060,23,#20053,3,"[]") -#20061=@"loc,{#10000},3,12,3,13" -locations_default(#20061,#10000,3,12,3,13) +regexpterm(#20060,14,#20050,4," ") +#20061=@"loc,{#10000},3,14,3,14" +locations_default(#20061,#10000,3,14,3,14) hasLocation(#20060,#20061) +regexp_const_value(#20060," ") #20062=* -regexpterm(#20062,14,#20053,4," ") -#20063=@"loc,{#10000},3,14,3,14" -locations_default(#20063,#10000,3,14,3,14) +regexpterm(#20062,14,#20042,4," ") +#20063=@"loc,{#10000},3,16,3,16" +locations_default(#20063,#10000,3,16,3,16) hasLocation(#20062,#20063) regexp_const_value(#20062," ") #20064=* -regexpterm(#20064,14,#20044,4," ") -#20065=@"loc,{#10000},3,16,3,16" -locations_default(#20065,#10000,3,16,3,16) +entry_cfg_node(#20064,#20001) +#20065=@"loc,{#10000},1,1,1,0" +locations_default(#20065,#10000,1,1,1,0) hasLocation(#20064,#20065) -regexp_const_value(#20064," ") #20066=* -entry_cfg_node(#20066,#20001) -#20067=@"loc,{#10000},1,1,1,0" -locations_default(#20067,#10000,1,1,1,0) -hasLocation(#20066,#20067) -#20068=* -exit_cfg_node(#20068,#20001) -hasLocation(#20068,#20023) +exit_cfg_node(#20066,#20001) +hasLocation(#20066,#20023) successor(#20040,#20041) -successor(#20041,#20068) +successor(#20041,#20066) successor(#20032,#20033) successor(#20033,#20040) successor(#20025,#20027) successor(#20027,#20032) -successor(#20066,#20025) +successor(#20064,#20025) numlines(#10000,3,3,1) filetype(#10000,"javascript") diff --git a/javascript/extractor/tests/es2024/output/trap/test.js.trap b/javascript/extractor/tests/es2024/output/trap/test.js.trap index 3baa7137d9ce..2cacd2029baa 100644 --- a/javascript/extractor/tests/es2024/output/trap/test.js.trap +++ b/javascript/extractor/tests/es2024/output/trap/test.js.trap @@ -201,325 +201,310 @@ regexpterm(#20066,23,#20063,1,"[\p{P}<>]") locations_default(#20067,#10000,1,55,1,63) hasLocation(#20066,#20067) #20068=* -regexpterm(#20068,31,#20066,0,"[\p{P}<>]") -hasLocation(#20068,#20067) -#20069=* -regexpterm(#20069,27,#20068,0,"\p{P}") -#20070=@"loc,{#10000},1,56,1,60" -locations_default(#20070,#10000,1,56,1,60) -hasLocation(#20069,#20070) -unicode_property_escapename(#20069,"P") -#20071=* -regexpterm(#20071,14,#20068,1,"<") -#20072=@"loc,{#10000},1,61,1,61" -locations_default(#20072,#10000,1,61,1,61) -hasLocation(#20071,#20072) -regexp_const_value(#20071,"<") -#20073=* -regexpterm(#20073,14,#20068,2,">") -#20074=@"loc,{#10000},1,62,1,62" -locations_default(#20074,#10000,1,62,1,62) -hasLocation(#20073,#20074) -regexp_const_value(#20073,">") -#20075=* -regexpterm(#20075,21,#20059,1,"\/") -#20076=@"loc,{#10000},1,66,1,67" -locations_default(#20076,#10000,1,66,1,67) -hasLocation(#20075,#20076) -regexp_const_value(#20075,"/") -#20077=* -regexpterm(#20077,1,#20059,2,"[\S--[\[\]]]+[\S--[\p{P}<>]]") -#20078=@"loc,{#10000},1,69,1,96" -locations_default(#20078,#10000,1,69,1,96) -hasLocation(#20077,#20078) -#20079=* -regexpterm(#20079,9,#20077,0,"[\S--[\[\]]]+") -#20080=@"loc,{#10000},1,69,1,81" -locations_default(#20080,#10000,1,69,1,81) -hasLocation(#20079,#20080) -is_greedy(#20079) -#20081=* -regexpterm(#20081,23,#20079,0,"[\S--[\[\]]]") -#20082=@"loc,{#10000},1,69,1,80" -locations_default(#20082,#10000,1,69,1,80) -hasLocation(#20081,#20082) +regexpterm(#20068,27,#20066,0,"\p{P}") +#20069=@"loc,{#10000},1,56,1,60" +locations_default(#20069,#10000,1,56,1,60) +hasLocation(#20068,#20069) +unicode_property_escapename(#20068,"P") +#20070=* +regexpterm(#20070,14,#20066,1,"<") +#20071=@"loc,{#10000},1,61,1,61" +locations_default(#20071,#10000,1,61,1,61) +hasLocation(#20070,#20071) +regexp_const_value(#20070,"<") +#20072=* +regexpterm(#20072,14,#20066,2,">") +#20073=@"loc,{#10000},1,62,1,62" +locations_default(#20073,#10000,1,62,1,62) +hasLocation(#20072,#20073) +regexp_const_value(#20072,">") +#20074=* +regexpterm(#20074,21,#20059,1,"\/") +#20075=@"loc,{#10000},1,66,1,67" +locations_default(#20075,#10000,1,66,1,67) +hasLocation(#20074,#20075) +regexp_const_value(#20074,"/") +#20076=* +regexpterm(#20076,1,#20059,2,"[\S--[\[\]]]+[\S--[\p{P}<>]]") +#20077=@"loc,{#10000},1,69,1,96" +locations_default(#20077,#10000,1,69,1,96) +hasLocation(#20076,#20077) +#20078=* +regexpterm(#20078,9,#20076,0,"[\S--[\[\]]]+") +#20079=@"loc,{#10000},1,69,1,81" +locations_default(#20079,#10000,1,69,1,81) +hasLocation(#20078,#20079) +is_greedy(#20078) +#20080=* +regexpterm(#20080,23,#20078,0,"[\S--[\[\]]]") +#20081=@"loc,{#10000},1,69,1,80" +locations_default(#20081,#10000,1,69,1,80) +hasLocation(#20080,#20081) +#20082=* +regexpterm(#20082,30,#20080,0,"[\S--[\[\]]]") +hasLocation(#20082,#20081) #20083=* -regexpterm(#20083,30,#20081,0,"[\S--[\[\]]]") -hasLocation(#20083,#20082) -#20084=* -regexpterm(#20084,20,#20083,0,"\S") -#20085=@"loc,{#10000},1,70,1,71" -locations_default(#20085,#10000,1,70,1,71) -hasLocation(#20084,#20085) -char_class_escape(#20084,"S") -#20086=* -regexpterm(#20086,23,#20083,1,"[\[\]]") -#20087=@"loc,{#10000},1,74,1,79" -locations_default(#20087,#10000,1,74,1,79) -hasLocation(#20086,#20087) -#20088=* -regexpterm(#20088,21,#20086,0,"\[") -#20089=@"loc,{#10000},1,75,1,76" -locations_default(#20089,#10000,1,75,1,76) -hasLocation(#20088,#20089) -regexp_const_value(#20088,"[") -#20090=* -regexpterm(#20090,21,#20086,1,"\]") -#20091=@"loc,{#10000},1,77,1,78" -locations_default(#20091,#10000,1,77,1,78) -hasLocation(#20090,#20091) -regexp_const_value(#20090,"]") -#20092=* -regexpterm(#20092,23,#20077,1,"[\S--[\p{P}<>]]") -#20093=@"loc,{#10000},1,82,1,96" -locations_default(#20093,#10000,1,82,1,96) -hasLocation(#20092,#20093) +regexpterm(#20083,20,#20082,0,"\S") +#20084=@"loc,{#10000},1,70,1,71" +locations_default(#20084,#10000,1,70,1,71) +hasLocation(#20083,#20084) +char_class_escape(#20083,"S") +#20085=* +regexpterm(#20085,23,#20082,1,"[\[\]]") +#20086=@"loc,{#10000},1,74,1,79" +locations_default(#20086,#10000,1,74,1,79) +hasLocation(#20085,#20086) +#20087=* +regexpterm(#20087,21,#20085,0,"\[") +#20088=@"loc,{#10000},1,75,1,76" +locations_default(#20088,#10000,1,75,1,76) +hasLocation(#20087,#20088) +regexp_const_value(#20087,"[") +#20089=* +regexpterm(#20089,21,#20085,1,"\]") +#20090=@"loc,{#10000},1,77,1,78" +locations_default(#20090,#10000,1,77,1,78) +hasLocation(#20089,#20090) +regexp_const_value(#20089,"]") +#20091=* +regexpterm(#20091,23,#20076,1,"[\S--[\p{P}<>]]") +#20092=@"loc,{#10000},1,82,1,96" +locations_default(#20092,#10000,1,82,1,96) +hasLocation(#20091,#20092) +#20093=* +regexpterm(#20093,30,#20091,0,"[\S--[\p{P}<>]]") +hasLocation(#20093,#20092) #20094=* -regexpterm(#20094,30,#20092,0,"[\S--[\p{P}<>]]") -hasLocation(#20094,#20093) -#20095=* -regexpterm(#20095,20,#20094,0,"\S") -#20096=@"loc,{#10000},1,83,1,84" -locations_default(#20096,#10000,1,83,1,84) -hasLocation(#20095,#20096) -char_class_escape(#20095,"S") -#20097=* -regexpterm(#20097,23,#20094,1,"[\p{P}<>]") -#20098=@"loc,{#10000},1,87,1,95" -locations_default(#20098,#10000,1,87,1,95) -hasLocation(#20097,#20098) -#20099=* -regexpterm(#20099,31,#20097,0,"[\p{P}<>]") -hasLocation(#20099,#20098) +regexpterm(#20094,20,#20093,0,"\S") +#20095=@"loc,{#10000},1,83,1,84" +locations_default(#20095,#10000,1,83,1,84) +hasLocation(#20094,#20095) +char_class_escape(#20094,"S") +#20096=* +regexpterm(#20096,23,#20093,1,"[\p{P}<>]") +#20097=@"loc,{#10000},1,87,1,95" +locations_default(#20097,#10000,1,87,1,95) +hasLocation(#20096,#20097) +#20098=* +regexpterm(#20098,27,#20096,0,"\p{P}") +#20099=@"loc,{#10000},1,88,1,92" +locations_default(#20099,#10000,1,88,1,92) +hasLocation(#20098,#20099) +unicode_property_escapename(#20098,"P") #20100=* -regexpterm(#20100,27,#20099,0,"\p{P}") -#20101=@"loc,{#10000},1,88,1,92" -locations_default(#20101,#10000,1,88,1,92) +regexpterm(#20100,14,#20096,1,"<") +#20101=@"loc,{#10000},1,93,1,93" +locations_default(#20101,#10000,1,93,1,93) hasLocation(#20100,#20101) -unicode_property_escapename(#20100,"P") +regexp_const_value(#20100,"<") #20102=* -regexpterm(#20102,14,#20099,1,"<") -#20103=@"loc,{#10000},1,93,1,93" -locations_default(#20103,#10000,1,93,1,93) +regexpterm(#20102,14,#20096,2,">") +#20103=@"loc,{#10000},1,94,1,94" +locations_default(#20103,#10000,1,94,1,94) hasLocation(#20102,#20103) -regexp_const_value(#20102,"<") +regexp_const_value(#20102,">") #20104=* -regexpterm(#20104,14,#20099,2,">") -#20105=@"loc,{#10000},1,94,1,94" -locations_default(#20105,#10000,1,94,1,94) +regexpterm(#20104,1,#20023,1,"\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)") +#20105=@"loc,{#10000},1,100,1,167" +locations_default(#20105,#10000,1,100,1,167) hasLocation(#20104,#20105) -regexp_const_value(#20104,">") #20106=* -regexpterm(#20106,1,#20023,1,"\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)") -#20107=@"loc,{#10000},1,100,1,167" -locations_default(#20107,#10000,1,100,1,167) +regexpterm(#20106,4,#20104,0,"\b") +#20107=@"loc,{#10000},1,100,1,101" +locations_default(#20107,#10000,1,100,1,101) hasLocation(#20106,#20107) #20108=* -regexpterm(#20108,4,#20106,0,"\b") -#20109=@"loc,{#10000},1,100,1,101" -locations_default(#20109,#10000,1,100,1,101) +regexpterm(#20108,9,#20104,1,"[\S--[@\p{Ps}\p{Pe}<>]]+") +#20109=@"loc,{#10000},1,102,1,125" +locations_default(#20109,#10000,1,102,1,125) hasLocation(#20108,#20109) +is_greedy(#20108) #20110=* -regexpterm(#20110,9,#20106,1,"[\S--[@\p{Ps}\p{Pe}<>]]+") -#20111=@"loc,{#10000},1,102,1,125" -locations_default(#20111,#10000,1,102,1,125) +regexpterm(#20110,23,#20108,0,"[\S--[@\p{Ps}\p{Pe}<>]]") +#20111=@"loc,{#10000},1,102,1,124" +locations_default(#20111,#10000,1,102,1,124) hasLocation(#20110,#20111) -is_greedy(#20110) #20112=* -regexpterm(#20112,23,#20110,0,"[\S--[@\p{Ps}\p{Pe}<>]]") -#20113=@"loc,{#10000},1,102,1,124" -locations_default(#20113,#10000,1,102,1,124) -hasLocation(#20112,#20113) -#20114=* -regexpterm(#20114,30,#20112,0,"[\S--[@\p{Ps}\p{Pe}<>]]") -hasLocation(#20114,#20113) +regexpterm(#20112,30,#20110,0,"[\S--[@\p{Ps}\p{Pe}<>]]") +hasLocation(#20112,#20111) +#20113=* +regexpterm(#20113,20,#20112,0,"\S") +#20114=@"loc,{#10000},1,103,1,104" +locations_default(#20114,#10000,1,103,1,104) +hasLocation(#20113,#20114) +char_class_escape(#20113,"S") #20115=* -regexpterm(#20115,20,#20114,0,"\S") -#20116=@"loc,{#10000},1,103,1,104" -locations_default(#20116,#10000,1,103,1,104) +regexpterm(#20115,23,#20112,1,"[@\p{Ps}\p{Pe}<>]") +#20116=@"loc,{#10000},1,107,1,123" +locations_default(#20116,#10000,1,107,1,123) hasLocation(#20115,#20116) -char_class_escape(#20115,"S") #20117=* -regexpterm(#20117,23,#20114,1,"[@\p{Ps}\p{Pe}<>]") -#20118=@"loc,{#10000},1,107,1,123" -locations_default(#20118,#10000,1,107,1,123) +regexpterm(#20117,14,#20115,0,"@") +#20118=@"loc,{#10000},1,108,1,108" +locations_default(#20118,#10000,1,108,1,108) hasLocation(#20117,#20118) +regexp_const_value(#20117,"@") #20119=* -regexpterm(#20119,31,#20117,0,"[@\p{Ps}\p{Pe}<>]") -hasLocation(#20119,#20118) -#20120=* -regexpterm(#20120,14,#20119,0,"@") -#20121=@"loc,{#10000},1,108,1,108" -locations_default(#20121,#10000,1,108,1,108) -hasLocation(#20120,#20121) -regexp_const_value(#20120,"@") -#20122=* -regexpterm(#20122,27,#20119,1,"\p{Ps}") -#20123=@"loc,{#10000},1,109,1,114" -locations_default(#20123,#10000,1,109,1,114) -hasLocation(#20122,#20123) -unicode_property_escapename(#20122,"Ps") -#20124=* -regexpterm(#20124,27,#20119,2,"\p{Pe}") -#20125=@"loc,{#10000},1,115,1,120" -locations_default(#20125,#10000,1,115,1,120) -hasLocation(#20124,#20125) -unicode_property_escapename(#20124,"Pe") -#20126=* -regexpterm(#20126,14,#20119,3,"<") -#20127=@"loc,{#10000},1,121,1,121" -locations_default(#20127,#10000,1,121,1,121) -hasLocation(#20126,#20127) -regexp_const_value(#20126,"<") -#20128=* -regexpterm(#20128,14,#20119,4,">") -#20129=@"loc,{#10000},1,122,1,122" -locations_default(#20129,#10000,1,122,1,122) -hasLocation(#20128,#20129) -regexp_const_value(#20128,">") -#20130=* -regexpterm(#20130,14,#20106,2,"@") -#20131=@"loc,{#10000},1,126,1,126" -locations_default(#20131,#10000,1,126,1,126) -hasLocation(#20130,#20131) -regexp_const_value(#20130,"@") -#20132=* -regexpterm(#20132,13,#20106,3,"([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)") -#20133=@"loc,{#10000},1,127,1,167" -locations_default(#20133,#10000,1,127,1,167) -hasLocation(#20132,#20133) -is_capture(#20132,1) -#20134=* -regexpterm(#20134,1,#20132,0,"[\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+") -#20135=@"loc,{#10000},1,128,1,166" -locations_default(#20135,#10000,1,128,1,166) -hasLocation(#20134,#20135) -#20136=* -regexpterm(#20136,9,#20134,0,"[\S--[\p{P}<>]]+") -#20137=@"loc,{#10000},1,128,1,143" -locations_default(#20137,#10000,1,128,1,143) -hasLocation(#20136,#20137) -is_greedy(#20136) +regexpterm(#20119,27,#20115,1,"\p{Ps}") +#20120=@"loc,{#10000},1,109,1,114" +locations_default(#20120,#10000,1,109,1,114) +hasLocation(#20119,#20120) +unicode_property_escapename(#20119,"Ps") +#20121=* +regexpterm(#20121,27,#20115,2,"\p{Pe}") +#20122=@"loc,{#10000},1,115,1,120" +locations_default(#20122,#10000,1,115,1,120) +hasLocation(#20121,#20122) +unicode_property_escapename(#20121,"Pe") +#20123=* +regexpterm(#20123,14,#20115,3,"<") +#20124=@"loc,{#10000},1,121,1,121" +locations_default(#20124,#10000,1,121,1,121) +hasLocation(#20123,#20124) +regexp_const_value(#20123,"<") +#20125=* +regexpterm(#20125,14,#20115,4,">") +#20126=@"loc,{#10000},1,122,1,122" +locations_default(#20126,#10000,1,122,1,122) +hasLocation(#20125,#20126) +regexp_const_value(#20125,">") +#20127=* +regexpterm(#20127,14,#20104,2,"@") +#20128=@"loc,{#10000},1,126,1,126" +locations_default(#20128,#10000,1,126,1,126) +hasLocation(#20127,#20128) +regexp_const_value(#20127,"@") +#20129=* +regexpterm(#20129,13,#20104,3,"([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)") +#20130=@"loc,{#10000},1,127,1,167" +locations_default(#20130,#10000,1,127,1,167) +hasLocation(#20129,#20130) +is_capture(#20129,1) +#20131=* +regexpterm(#20131,1,#20129,0,"[\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+") +#20132=@"loc,{#10000},1,128,1,166" +locations_default(#20132,#10000,1,128,1,166) +hasLocation(#20131,#20132) +#20133=* +regexpterm(#20133,9,#20131,0,"[\S--[\p{P}<>]]+") +#20134=@"loc,{#10000},1,128,1,143" +locations_default(#20134,#10000,1,128,1,143) +hasLocation(#20133,#20134) +is_greedy(#20133) +#20135=* +regexpterm(#20135,23,#20133,0,"[\S--[\p{P}<>]]") +#20136=@"loc,{#10000},1,128,1,142" +locations_default(#20136,#10000,1,128,1,142) +hasLocation(#20135,#20136) +#20137=* +regexpterm(#20137,30,#20135,0,"[\S--[\p{P}<>]]") +hasLocation(#20137,#20136) #20138=* -regexpterm(#20138,23,#20136,0,"[\S--[\p{P}<>]]") -#20139=@"loc,{#10000},1,128,1,142" -locations_default(#20139,#10000,1,128,1,142) +regexpterm(#20138,20,#20137,0,"\S") +#20139=@"loc,{#10000},1,129,1,130" +locations_default(#20139,#10000,1,129,1,130) hasLocation(#20138,#20139) +char_class_escape(#20138,"S") #20140=* -regexpterm(#20140,30,#20138,0,"[\S--[\p{P}<>]]") -hasLocation(#20140,#20139) -#20141=* -regexpterm(#20141,20,#20140,0,"\S") -#20142=@"loc,{#10000},1,129,1,130" -locations_default(#20142,#10000,1,129,1,130) -hasLocation(#20141,#20142) -char_class_escape(#20141,"S") -#20143=* -regexpterm(#20143,23,#20140,1,"[\p{P}<>]") -#20144=@"loc,{#10000},1,133,1,141" -locations_default(#20144,#10000,1,133,1,141) -hasLocation(#20143,#20144) -#20145=* -regexpterm(#20145,31,#20143,0,"[\p{P}<>]") -hasLocation(#20145,#20144) +regexpterm(#20140,23,#20137,1,"[\p{P}<>]") +#20141=@"loc,{#10000},1,133,1,141" +locations_default(#20141,#10000,1,133,1,141) +hasLocation(#20140,#20141) +#20142=* +regexpterm(#20142,27,#20140,0,"\p{P}") +#20143=@"loc,{#10000},1,134,1,138" +locations_default(#20143,#10000,1,134,1,138) +hasLocation(#20142,#20143) +unicode_property_escapename(#20142,"P") +#20144=* +regexpterm(#20144,14,#20140,1,"<") +#20145=@"loc,{#10000},1,139,1,139" +locations_default(#20145,#10000,1,139,1,139) +hasLocation(#20144,#20145) +regexp_const_value(#20144,"<") #20146=* -regexpterm(#20146,27,#20145,0,"\p{P}") -#20147=@"loc,{#10000},1,134,1,138" -locations_default(#20147,#10000,1,134,1,138) +regexpterm(#20146,14,#20140,2,">") +#20147=@"loc,{#10000},1,140,1,140" +locations_default(#20147,#10000,1,140,1,140) hasLocation(#20146,#20147) -unicode_property_escapename(#20146,"P") +regexp_const_value(#20146,">") #20148=* -regexpterm(#20148,14,#20145,1,"<") -#20149=@"loc,{#10000},1,139,1,139" -locations_default(#20149,#10000,1,139,1,139) +regexpterm(#20148,9,#20131,1,"(?:\.[\S--[\p{P}<>]]+)+") +#20149=@"loc,{#10000},1,144,1,166" +locations_default(#20149,#10000,1,144,1,166) hasLocation(#20148,#20149) -regexp_const_value(#20148,"<") +is_greedy(#20148) #20150=* -regexpterm(#20150,14,#20145,2,">") -#20151=@"loc,{#10000},1,140,1,140" -locations_default(#20151,#10000,1,140,1,140) +regexpterm(#20150,13,#20148,0,"(?:\.[\S--[\p{P}<>]]+)") +#20151=@"loc,{#10000},1,144,1,165" +locations_default(#20151,#10000,1,144,1,165) hasLocation(#20150,#20151) -regexp_const_value(#20150,">") #20152=* -regexpterm(#20152,9,#20134,1,"(?:\.[\S--[\p{P}<>]]+)+") -#20153=@"loc,{#10000},1,144,1,166" -locations_default(#20153,#10000,1,144,1,166) +regexpterm(#20152,1,#20150,0,"\.[\S--[\p{P}<>]]+") +#20153=@"loc,{#10000},1,147,1,164" +locations_default(#20153,#10000,1,147,1,164) hasLocation(#20152,#20153) -is_greedy(#20152) #20154=* -regexpterm(#20154,13,#20152,0,"(?:\.[\S--[\p{P}<>]]+)") -#20155=@"loc,{#10000},1,144,1,165" -locations_default(#20155,#10000,1,144,1,165) +regexpterm(#20154,21,#20152,0,"\.") +#20155=@"loc,{#10000},1,147,1,148" +locations_default(#20155,#10000,1,147,1,148) hasLocation(#20154,#20155) +regexp_const_value(#20154,".") #20156=* -regexpterm(#20156,1,#20154,0,"\.[\S--[\p{P}<>]]+") -#20157=@"loc,{#10000},1,147,1,164" -locations_default(#20157,#10000,1,147,1,164) +regexpterm(#20156,9,#20152,1,"[\S--[\p{P}<>]]+") +#20157=@"loc,{#10000},1,149,1,164" +locations_default(#20157,#10000,1,149,1,164) hasLocation(#20156,#20157) +is_greedy(#20156) #20158=* -regexpterm(#20158,21,#20156,0,"\.") -#20159=@"loc,{#10000},1,147,1,148" -locations_default(#20159,#10000,1,147,1,148) +regexpterm(#20158,23,#20156,0,"[\S--[\p{P}<>]]") +#20159=@"loc,{#10000},1,149,1,163" +locations_default(#20159,#10000,1,149,1,163) hasLocation(#20158,#20159) -regexp_const_value(#20158,".") #20160=* -regexpterm(#20160,9,#20156,1,"[\S--[\p{P}<>]]+") -#20161=@"loc,{#10000},1,149,1,164" -locations_default(#20161,#10000,1,149,1,164) -hasLocation(#20160,#20161) -is_greedy(#20160) -#20162=* -regexpterm(#20162,23,#20160,0,"[\S--[\p{P}<>]]") -#20163=@"loc,{#10000},1,149,1,163" -locations_default(#20163,#10000,1,149,1,163) -hasLocation(#20162,#20163) -#20164=* -regexpterm(#20164,30,#20162,0,"[\S--[\p{P}<>]]") -hasLocation(#20164,#20163) +regexpterm(#20160,30,#20158,0,"[\S--[\p{P}<>]]") +hasLocation(#20160,#20159) +#20161=* +regexpterm(#20161,20,#20160,0,"\S") +#20162=@"loc,{#10000},1,150,1,151" +locations_default(#20162,#10000,1,150,1,151) +hasLocation(#20161,#20162) +char_class_escape(#20161,"S") +#20163=* +regexpterm(#20163,23,#20160,1,"[\p{P}<>]") +#20164=@"loc,{#10000},1,154,1,162" +locations_default(#20164,#10000,1,154,1,162) +hasLocation(#20163,#20164) #20165=* -regexpterm(#20165,20,#20164,0,"\S") -#20166=@"loc,{#10000},1,150,1,151" -locations_default(#20166,#10000,1,150,1,151) +regexpterm(#20165,27,#20163,0,"\p{P}") +#20166=@"loc,{#10000},1,155,1,159" +locations_default(#20166,#10000,1,155,1,159) hasLocation(#20165,#20166) -char_class_escape(#20165,"S") +unicode_property_escapename(#20165,"P") #20167=* -regexpterm(#20167,23,#20164,1,"[\p{P}<>]") -#20168=@"loc,{#10000},1,154,1,162" -locations_default(#20168,#10000,1,154,1,162) +regexpterm(#20167,14,#20163,1,"<") +#20168=@"loc,{#10000},1,160,1,160" +locations_default(#20168,#10000,1,160,1,160) hasLocation(#20167,#20168) +regexp_const_value(#20167,"<") #20169=* -regexpterm(#20169,31,#20167,0,"[\p{P}<>]") -hasLocation(#20169,#20168) -#20170=* -regexpterm(#20170,27,#20169,0,"\p{P}") -#20171=@"loc,{#10000},1,155,1,159" -locations_default(#20171,#10000,1,155,1,159) -hasLocation(#20170,#20171) -unicode_property_escapename(#20170,"P") -#20172=* -regexpterm(#20172,14,#20169,1,"<") -#20173=@"loc,{#10000},1,160,1,160" -locations_default(#20173,#10000,1,160,1,160) -hasLocation(#20172,#20173) -regexp_const_value(#20172,"<") -#20174=* -regexpterm(#20174,14,#20169,2,">") -#20175=@"loc,{#10000},1,161,1,161" -locations_default(#20175,#10000,1,161,1,161) -hasLocation(#20174,#20175) -regexp_const_value(#20174,">") -#20176=* -entry_cfg_node(#20176,#20001) -#20177=@"loc,{#10000},1,1,1,0" -locations_default(#20177,#10000,1,1,1,0) -hasLocation(#20176,#20177) -#20178=* -exit_cfg_node(#20178,#20001) -hasLocation(#20178,#20015) +regexpterm(#20169,14,#20163,2,">") +#20170=@"loc,{#10000},1,161,1,161" +locations_default(#20170,#10000,1,161,1,161) +hasLocation(#20169,#20170) +regexp_const_value(#20169,">") +#20171=* +entry_cfg_node(#20171,#20001) +#20172=@"loc,{#10000},1,1,1,0" +locations_default(#20172,#10000,1,1,1,0) +hasLocation(#20171,#20172) +#20173=* +exit_cfg_node(#20173,#20001) +hasLocation(#20173,#20015) successor(#20018,#20021) successor(#20022,#20019) successor(#20021,#20022) -successor(#20019,#20178) -successor(#20176,#20018) +successor(#20019,#20173) +successor(#20171,#20018) numlines(#10000,1,1,0) filetype(#10000,"javascript") diff --git a/javascript/extractor/tests/es2024/output/trap/union.js.trap b/javascript/extractor/tests/es2024/output/trap/union.js.trap index 32674c50e53b..31d98f755e85 100644 --- a/javascript/extractor/tests/es2024/output/trap/union.js.trap +++ b/javascript/extractor/tests/es2024/output/trap/union.js.trap @@ -131,274 +131,262 @@ regexpterm(#20043,23,#20042,0,"[\p{Script_Extensions=Greek}\p{RGI_Emoji}]") locations_default(#20044,#10000,1,2,1,43) hasLocation(#20043,#20044) #20045=* -regexpterm(#20045,31,#20043,0,"[\p{Script_Extensions=Greek}\p{RGI_Emoji}]") -hasLocation(#20045,#20044) -#20046=* -regexpterm(#20046,27,#20045,0,"\p{Script_Extensions=Greek}") -#20047=@"loc,{#10000},1,3,1,29" -locations_default(#20047,#10000,1,3,1,29) -hasLocation(#20046,#20047) -unicode_property_escapename(#20046,"Script_Extensions") -unicode_property_escapevalue(#20046,"Greek") -#20048=* -regexpterm(#20048,27,#20045,1,"\p{RGI_Emoji}") -#20049=@"loc,{#10000},1,30,1,42" -locations_default(#20049,#10000,1,30,1,42) -hasLocation(#20048,#20049) -unicode_property_escapename(#20048,"RGI_Emoji") +regexpterm(#20045,27,#20043,0,"\p{Script_Extensions=Greek}") +#20046=@"loc,{#10000},1,3,1,29" +locations_default(#20046,#10000,1,3,1,29) +hasLocation(#20045,#20046) +unicode_property_escapename(#20045,"Script_Extensions") +unicode_property_escapevalue(#20045,"Greek") +#20047=* +regexpterm(#20047,27,#20043,1,"\p{RGI_Emoji}") +#20048=@"loc,{#10000},1,30,1,42" +locations_default(#20048,#10000,1,30,1,42) +hasLocation(#20047,#20048) +unicode_property_escapename(#20047,"RGI_Emoji") +#20049=* +stmts(#20049,2,#20001,1,"/[[abc][cbd]]/v;") +hasLocation(#20049,#20005) +stmt_containers(#20049,#20001) #20050=* -stmts(#20050,2,#20001,1,"/[[abc][cbd]]/v;") -hasLocation(#20050,#20005) -stmt_containers(#20050,#20001) +exprs(#20050,5,#20049,0,"/[[abc][cbd]]/v") +hasLocation(#20050,#20019) +enclosing_stmt(#20050,#20049) +expr_containers(#20050,#20001) +literals("/[[abc][cbd]]/v","/[[abc][cbd]]/v",#20050) #20051=* -exprs(#20051,5,#20050,0,"/[[abc][cbd]]/v") -hasLocation(#20051,#20019) -enclosing_stmt(#20051,#20050) -expr_containers(#20051,#20001) -literals("/[[abc][cbd]]/v","/[[abc][cbd]]/v",#20051) -#20052=* -regexpterm(#20052,23,#20051,0,"[[abc][cbd]]") -#20053=@"loc,{#10000},2,2,2,13" -locations_default(#20053,#10000,2,2,2,13) -hasLocation(#20052,#20053) -#20054=* -regexpterm(#20054,31,#20052,0,"[[abc][cbd]]") -hasLocation(#20054,#20053) +regexpterm(#20051,23,#20050,0,"[[abc][cbd]]") +#20052=@"loc,{#10000},2,2,2,13" +locations_default(#20052,#10000,2,2,2,13) +hasLocation(#20051,#20052) +#20053=* +regexpterm(#20053,23,#20051,0,"[abc]") +#20054=@"loc,{#10000},2,3,2,7" +locations_default(#20054,#10000,2,3,2,7) +hasLocation(#20053,#20054) #20055=* -regexpterm(#20055,23,#20054,0,"[abc]") -#20056=@"loc,{#10000},2,3,2,7" -locations_default(#20056,#10000,2,3,2,7) +regexpterm(#20055,14,#20053,0,"a") +#20056=@"loc,{#10000},2,4,2,4" +locations_default(#20056,#10000,2,4,2,4) hasLocation(#20055,#20056) +regexp_const_value(#20055,"a") #20057=* -regexpterm(#20057,14,#20055,0,"a") -#20058=@"loc,{#10000},2,4,2,4" -locations_default(#20058,#10000,2,4,2,4) +regexpterm(#20057,14,#20053,1,"b") +#20058=@"loc,{#10000},2,5,2,5" +locations_default(#20058,#10000,2,5,2,5) hasLocation(#20057,#20058) -regexp_const_value(#20057,"a") +regexp_const_value(#20057,"b") #20059=* -regexpterm(#20059,14,#20055,1,"b") -#20060=@"loc,{#10000},2,5,2,5" -locations_default(#20060,#10000,2,5,2,5) +regexpterm(#20059,14,#20053,2,"c") +#20060=@"loc,{#10000},2,6,2,6" +locations_default(#20060,#10000,2,6,2,6) hasLocation(#20059,#20060) -regexp_const_value(#20059,"b") +regexp_const_value(#20059,"c") #20061=* -regexpterm(#20061,14,#20055,2,"c") -#20062=@"loc,{#10000},2,6,2,6" -locations_default(#20062,#10000,2,6,2,6) +regexpterm(#20061,23,#20051,1,"[cbd]") +#20062=@"loc,{#10000},2,8,2,12" +locations_default(#20062,#10000,2,8,2,12) hasLocation(#20061,#20062) -regexp_const_value(#20061,"c") #20063=* -regexpterm(#20063,23,#20054,1,"[cbd]") -#20064=@"loc,{#10000},2,8,2,12" -locations_default(#20064,#10000,2,8,2,12) +regexpterm(#20063,14,#20061,0,"c") +#20064=@"loc,{#10000},2,9,2,9" +locations_default(#20064,#10000,2,9,2,9) hasLocation(#20063,#20064) +regexp_const_value(#20063,"c") #20065=* -regexpterm(#20065,14,#20063,0,"c") -#20066=@"loc,{#10000},2,9,2,9" -locations_default(#20066,#10000,2,9,2,9) +regexpterm(#20065,14,#20061,1,"b") +#20066=@"loc,{#10000},2,10,2,10" +locations_default(#20066,#10000,2,10,2,10) hasLocation(#20065,#20066) -regexp_const_value(#20065,"c") +regexp_const_value(#20065,"b") #20067=* -regexpterm(#20067,14,#20063,1,"b") -#20068=@"loc,{#10000},2,10,2,10" -locations_default(#20068,#10000,2,10,2,10) +regexpterm(#20067,14,#20061,2,"d") +#20068=@"loc,{#10000},2,11,2,11" +locations_default(#20068,#10000,2,11,2,11) hasLocation(#20067,#20068) -regexp_const_value(#20067,"b") +regexp_const_value(#20067,"d") #20069=* -regexpterm(#20069,14,#20063,2,"d") -#20070=@"loc,{#10000},2,11,2,11" -locations_default(#20070,#10000,2,11,2,11) -hasLocation(#20069,#20070) -regexp_const_value(#20069,"d") +stmts(#20069,2,#20001,2,"/[\p{Em ... byz]/v;") +hasLocation(#20069,#20007) +stmt_containers(#20069,#20001) +#20070=* +exprs(#20070,5,#20069,0,"/[\p{Em ... }byz]/v") +hasLocation(#20070,#20023) +enclosing_stmt(#20070,#20069) +expr_containers(#20070,#20001) +literals("/[\p{Emoji}\q{a&}byz]/v","/[\p{Emoji}\q{a&}byz]/v",#20070) #20071=* -stmts(#20071,2,#20001,2,"/[\p{Em ... byz]/v;") -hasLocation(#20071,#20007) -stmt_containers(#20071,#20001) -#20072=* -exprs(#20072,5,#20071,0,"/[\p{Em ... }byz]/v") -hasLocation(#20072,#20023) -enclosing_stmt(#20072,#20071) -expr_containers(#20072,#20001) -literals("/[\p{Emoji}\q{a&}byz]/v","/[\p{Emoji}\q{a&}byz]/v",#20072) +regexpterm(#20071,23,#20070,0,"[\p{Emoji}\q{a&}byz]") +#20072=@"loc,{#10000},3,2,3,21" +locations_default(#20072,#10000,3,2,3,21) +hasLocation(#20071,#20072) #20073=* -regexpterm(#20073,23,#20072,0,"[\p{Emoji}\q{a&}byz]") -#20074=@"loc,{#10000},3,2,3,21" -locations_default(#20074,#10000,3,2,3,21) +regexpterm(#20073,27,#20071,0,"\p{Emoji}") +#20074=@"loc,{#10000},3,3,3,11" +locations_default(#20074,#10000,3,3,3,11) hasLocation(#20073,#20074) +unicode_property_escapename(#20073,"Emoji") #20075=* -regexpterm(#20075,31,#20073,0,"[\p{Emoji}\q{a&}byz]") -hasLocation(#20075,#20074) -#20076=* -regexpterm(#20076,27,#20075,0,"\p{Emoji}") -#20077=@"loc,{#10000},3,3,3,11" -locations_default(#20077,#10000,3,3,3,11) -hasLocation(#20076,#20077) -unicode_property_escapename(#20076,"Emoji") -#20078=* -regexpterm(#20078,28,#20075,1,"\q{a&}") -#20079=@"loc,{#10000},3,12,3,17" -locations_default(#20079,#10000,3,12,3,17) -hasLocation(#20078,#20079) -#20080=* -regexpterm(#20080,14,#20078,0,"a&") -#20081=@"loc,{#10000},3,15,3,16" -locations_default(#20081,#10000,3,15,3,16) -hasLocation(#20080,#20081) -regexp_const_value(#20080,"a&") -#20082=* -regexpterm(#20082,14,#20075,2,"b") -#20083=@"loc,{#10000},3,18,3,18" -locations_default(#20083,#10000,3,18,3,18) -hasLocation(#20082,#20083) -regexp_const_value(#20082,"b") -#20084=* -regexpterm(#20084,14,#20075,3,"y") -#20085=@"loc,{#10000},3,19,3,19" -locations_default(#20085,#10000,3,19,3,19) -hasLocation(#20084,#20085) -regexp_const_value(#20084,"y") +regexpterm(#20075,28,#20071,1,"\q{a&}") +#20076=@"loc,{#10000},3,12,3,17" +locations_default(#20076,#10000,3,12,3,17) +hasLocation(#20075,#20076) +#20077=* +regexpterm(#20077,14,#20075,0,"a&") +#20078=@"loc,{#10000},3,15,3,16" +locations_default(#20078,#10000,3,15,3,16) +hasLocation(#20077,#20078) +regexp_const_value(#20077,"a&") +#20079=* +regexpterm(#20079,14,#20071,2,"b") +#20080=@"loc,{#10000},3,18,3,18" +locations_default(#20080,#10000,3,18,3,18) +hasLocation(#20079,#20080) +regexp_const_value(#20079,"b") +#20081=* +regexpterm(#20081,14,#20071,3,"y") +#20082=@"loc,{#10000},3,19,3,19" +locations_default(#20082,#10000,3,19,3,19) +hasLocation(#20081,#20082) +regexp_const_value(#20081,"y") +#20083=* +regexpterm(#20083,14,#20071,4,"z") +#20084=@"loc,{#10000},3,20,3,20" +locations_default(#20084,#10000,3,20,3,20) +hasLocation(#20083,#20084) +regexp_const_value(#20083,"z") +#20085=* +stmts(#20085,2,#20001,3,"/[\q{\\\}a&}byz]/v;") +hasLocation(#20085,#20009) +stmt_containers(#20085,#20001) #20086=* -regexpterm(#20086,14,#20075,4,"z") -#20087=@"loc,{#10000},3,20,3,20" -locations_default(#20087,#10000,3,20,3,20) -hasLocation(#20086,#20087) -regexp_const_value(#20086,"z") -#20088=* -stmts(#20088,2,#20001,3,"/[\q{\\\}a&}byz]/v;") -hasLocation(#20088,#20009) -stmt_containers(#20088,#20001) +exprs(#20086,5,#20085,0,"/[\q{\\\}a&}byz]/v") +hasLocation(#20086,#20027) +enclosing_stmt(#20086,#20085) +expr_containers(#20086,#20001) +literals("/[\q{\\\}a&}byz]/v","/[\q{\\\}a&}byz]/v",#20086) +#20087=* +regexpterm(#20087,23,#20086,0,"[\q{\\\}a&}byz]") +#20088=@"loc,{#10000},4,2,4,16" +locations_default(#20088,#10000,4,2,4,16) +hasLocation(#20087,#20088) #20089=* -exprs(#20089,5,#20088,0,"/[\q{\\\}a&}byz]/v") -hasLocation(#20089,#20027) -enclosing_stmt(#20089,#20088) -expr_containers(#20089,#20001) -literals("/[\q{\\\}a&}byz]/v","/[\q{\\\}a&}byz]/v",#20089) -#20090=* -regexpterm(#20090,23,#20089,0,"[\q{\\\}a&}byz]") -#20091=@"loc,{#10000},4,2,4,16" -locations_default(#20091,#10000,4,2,4,16) -hasLocation(#20090,#20091) -#20092=* -regexpterm(#20092,31,#20090,0,"[\q{\\\}a&}byz]") -hasLocation(#20092,#20091) +regexpterm(#20089,28,#20087,0,"\q{\\\}a&}") +#20090=@"loc,{#10000},4,3,4,12" +locations_default(#20090,#10000,4,3,4,12) +hasLocation(#20089,#20090) +#20091=* +regexpterm(#20091,14,#20089,0,"\\\}a&") +#20092=@"loc,{#10000},4,6,4,11" +locations_default(#20092,#10000,4,6,4,11) +hasLocation(#20091,#20092) +regexp_const_value(#20091,"\\\}a&") #20093=* -regexpterm(#20093,28,#20092,0,"\q{\\\}a&}") -#20094=@"loc,{#10000},4,3,4,12" -locations_default(#20094,#10000,4,3,4,12) +regexpterm(#20093,14,#20087,1,"b") +#20094=@"loc,{#10000},4,13,4,13" +locations_default(#20094,#10000,4,13,4,13) hasLocation(#20093,#20094) +regexp_const_value(#20093,"b") #20095=* -regexpterm(#20095,14,#20093,0,"\\\}a&") -#20096=@"loc,{#10000},4,6,4,11" -locations_default(#20096,#10000,4,6,4,11) +regexpterm(#20095,14,#20087,2,"y") +#20096=@"loc,{#10000},4,14,4,14" +locations_default(#20096,#10000,4,14,4,14) hasLocation(#20095,#20096) -regexp_const_value(#20095,"\\\}a&") +regexp_const_value(#20095,"y") #20097=* -regexpterm(#20097,14,#20092,1,"b") -#20098=@"loc,{#10000},4,13,4,13" -locations_default(#20098,#10000,4,13,4,13) +regexpterm(#20097,14,#20087,3,"z") +#20098=@"loc,{#10000},4,15,4,15" +locations_default(#20098,#10000,4,15,4,15) hasLocation(#20097,#20098) -regexp_const_value(#20097,"b") +regexp_const_value(#20097,"z") #20099=* -regexpterm(#20099,14,#20092,2,"y") -#20100=@"loc,{#10000},4,14,4,14" -locations_default(#20100,#10000,4,14,4,14) -hasLocation(#20099,#20100) -regexp_const_value(#20099,"y") +stmts(#20099,2,#20001,4,"/[\q{\\}]/v;") +hasLocation(#20099,#20011) +stmt_containers(#20099,#20001) +#20100=* +exprs(#20100,5,#20099,0,"/[\q{\\}]/v") +hasLocation(#20100,#20031) +enclosing_stmt(#20100,#20099) +expr_containers(#20100,#20001) +literals("/[\q{\\}]/v","/[\q{\\}]/v",#20100) #20101=* -regexpterm(#20101,14,#20092,3,"z") -#20102=@"loc,{#10000},4,15,4,15" -locations_default(#20102,#10000,4,15,4,15) +regexpterm(#20101,23,#20100,0,"[\q{\\}]") +#20102=@"loc,{#10000},5,2,5,9" +locations_default(#20102,#10000,5,2,5,9) hasLocation(#20101,#20102) -regexp_const_value(#20101,"z") #20103=* -stmts(#20103,2,#20001,4,"/[\q{\\}]/v;") -hasLocation(#20103,#20011) -stmt_containers(#20103,#20001) -#20104=* -exprs(#20104,5,#20103,0,"/[\q{\\}]/v") -hasLocation(#20104,#20031) -enclosing_stmt(#20104,#20103) -expr_containers(#20104,#20001) -literals("/[\q{\\}]/v","/[\q{\\}]/v",#20104) +regexpterm(#20103,28,#20101,0,"\q{\\}") +#20104=@"loc,{#10000},5,3,5,8" +locations_default(#20104,#10000,5,3,5,8) +hasLocation(#20103,#20104) #20105=* -regexpterm(#20105,23,#20104,0,"[\q{\\}]") -#20106=@"loc,{#10000},5,2,5,9" -locations_default(#20106,#10000,5,2,5,9) +regexpterm(#20105,14,#20103,0,"\\") +#20106=@"loc,{#10000},5,6,5,7" +locations_default(#20106,#10000,5,6,5,7) hasLocation(#20105,#20106) +regexp_const_value(#20105,"\\") #20107=* -regexpterm(#20107,28,#20105,0,"\q{\\}") -#20108=@"loc,{#10000},5,3,5,8" -locations_default(#20108,#10000,5,3,5,8) -hasLocation(#20107,#20108) +stmts(#20107,2,#20001,5,"/[\q{abc|cbd|\}}]/v;") +hasLocation(#20107,#20013) +stmt_containers(#20107,#20001) +#20108=* +exprs(#20108,5,#20107,0,"/[\q{abc|cbd|\}}]/v") +hasLocation(#20108,#20035) +enclosing_stmt(#20108,#20107) +expr_containers(#20108,#20001) +literals("/[\q{abc|cbd|\}}]/v","/[\q{abc|cbd|\}}]/v",#20108) #20109=* -regexpterm(#20109,14,#20107,0,"\\") -#20110=@"loc,{#10000},5,6,5,7" -locations_default(#20110,#10000,5,6,5,7) +regexpterm(#20109,23,#20108,0,"[\q{abc|cbd|\}}]") +#20110=@"loc,{#10000},6,2,6,17" +locations_default(#20110,#10000,6,2,6,17) hasLocation(#20109,#20110) -regexp_const_value(#20109,"\\") #20111=* -stmts(#20111,2,#20001,5,"/[\q{abc|cbd|\}}]/v;") -hasLocation(#20111,#20013) -stmt_containers(#20111,#20001) -#20112=* -exprs(#20112,5,#20111,0,"/[\q{abc|cbd|\}}]/v") -hasLocation(#20112,#20035) -enclosing_stmt(#20112,#20111) -expr_containers(#20112,#20001) -literals("/[\q{abc|cbd|\}}]/v","/[\q{abc|cbd|\}}]/v",#20112) +regexpterm(#20111,28,#20109,0,"\q{abc|cbd|\}}") +#20112=@"loc,{#10000},6,3,6,16" +locations_default(#20112,#10000,6,3,6,16) +hasLocation(#20111,#20112) #20113=* -regexpterm(#20113,23,#20112,0,"[\q{abc|cbd|\}}]") -#20114=@"loc,{#10000},6,2,6,17" -locations_default(#20114,#10000,6,2,6,17) +regexpterm(#20113,0,#20111,0,"abc|cbd|\}") +#20114=@"loc,{#10000},6,6,6,15" +locations_default(#20114,#10000,6,6,6,15) hasLocation(#20113,#20114) #20115=* -regexpterm(#20115,28,#20113,0,"\q{abc|cbd|\}}") -#20116=@"loc,{#10000},6,3,6,16" -locations_default(#20116,#10000,6,3,6,16) +regexpterm(#20115,14,#20113,0,"abc") +#20116=@"loc,{#10000},6,6,6,8" +locations_default(#20116,#10000,6,6,6,8) hasLocation(#20115,#20116) +regexp_const_value(#20115,"abc") #20117=* -regexpterm(#20117,0,#20115,0,"abc|cbd|\}") -#20118=@"loc,{#10000},6,6,6,15" -locations_default(#20118,#10000,6,6,6,15) +regexpterm(#20117,14,#20113,1,"cbd") +#20118=@"loc,{#10000},6,10,6,12" +locations_default(#20118,#10000,6,10,6,12) hasLocation(#20117,#20118) +regexp_const_value(#20117,"cbd") #20119=* -regexpterm(#20119,14,#20117,0,"abc") -#20120=@"loc,{#10000},6,6,6,8" -locations_default(#20120,#10000,6,6,6,8) +regexpterm(#20119,14,#20113,2,"\}") +#20120=@"loc,{#10000},6,14,6,15" +locations_default(#20120,#10000,6,14,6,15) hasLocation(#20119,#20120) -regexp_const_value(#20119,"abc") +regexp_const_value(#20119,"\}") #20121=* -regexpterm(#20121,14,#20117,1,"cbd") -#20122=@"loc,{#10000},6,10,6,12" -locations_default(#20122,#10000,6,10,6,12) +entry_cfg_node(#20121,#20001) +#20122=@"loc,{#10000},1,1,1,0" +locations_default(#20122,#10000,1,1,1,0) hasLocation(#20121,#20122) -regexp_const_value(#20121,"cbd") #20123=* -regexpterm(#20123,14,#20117,2,"\}") -#20124=@"loc,{#10000},6,14,6,15" -locations_default(#20124,#10000,6,14,6,15) -hasLocation(#20123,#20124) -regexp_const_value(#20123,"\}") -#20125=* -entry_cfg_node(#20125,#20001) -#20126=@"loc,{#10000},1,1,1,0" -locations_default(#20126,#10000,1,1,1,0) -hasLocation(#20125,#20126) -#20127=* -exit_cfg_node(#20127,#20001) -hasLocation(#20127,#20039) -successor(#20111,#20112) -successor(#20112,#20127) -successor(#20103,#20104) -successor(#20104,#20111) -successor(#20088,#20089) -successor(#20089,#20103) -successor(#20071,#20072) -successor(#20072,#20088) -successor(#20050,#20051) -successor(#20051,#20071) +exit_cfg_node(#20123,#20001) +hasLocation(#20123,#20039) +successor(#20107,#20108) +successor(#20108,#20123) +successor(#20099,#20100) +successor(#20100,#20107) +successor(#20085,#20086) +successor(#20086,#20099) +successor(#20069,#20070) +successor(#20070,#20085) +successor(#20049,#20050) +successor(#20050,#20069) successor(#20041,#20042) -successor(#20042,#20050) -successor(#20125,#20041) +successor(#20042,#20049) +successor(#20121,#20041) numlines(#10000,6,6,0) filetype(#10000,"javascript") diff --git a/javascript/ql/lib/semmlecode.javascript.dbscheme b/javascript/ql/lib/semmlecode.javascript.dbscheme index 25c5d086618e..5b5db607d20c 100644 --- a/javascript/ql/lib/semmlecode.javascript.dbscheme +++ b/javascript/ql/lib/semmlecode.javascript.dbscheme @@ -862,8 +862,7 @@ case @regexpterm.kind of | 27 = @regexp_unicode_property_escape | 28 = @regexp_quoted_string | 29 = @regexp_intersection -| 30 = @regexp_subtraction -| 31 = @regexp_union; +| 30 = @regexp_subtraction; regexp_parse_errors (unique int id: @regexp_parse_error, int regexp: @regexpterm ref, From 95d05ceab8d247ab19670fb92ba609a37db9e730 Mon Sep 17 00:00:00 2001 From: Napalys Date: Wed, 5 Mar 2025 10:34:38 +0100 Subject: [PATCH 18/27] Now store `vFlagEnabled` instead of each time searching for it. Added `uFlagEnabled` for checking how should `\p{}` be treated. And small optimization. --- .../com/semmle/js/parser/RegExpParser.java | 38 +++++++------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java index 8d358d30be69..c10e847dce44 100644 --- a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java +++ b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java @@ -71,7 +71,8 @@ public List getErrors() { private List errors; private List backrefs; private int maxbackref; - private String flags; + private Boolean vFlagEnabled = false; + private Boolean uFlagEnabled = false; /** Parse the given string as a regular expression. */ public Result parse(String src) { @@ -88,7 +89,8 @@ public Result parse(String src) { } public Result parse(String src, String flags) { - this.flags = flags; + vFlagEnabled = flags != null && flags.contains("v"); + uFlagEnabled = flags != null && flags.contains("u"); return parse(src); } @@ -300,7 +302,7 @@ private RegExpTerm parseDisjunctionInsideQuotedString() { private RegExpTerm parseAlternativeInsideQuotedString() { SourceLocation loc = new SourceLocation(pos()); - StringBuilder sb = new StringBuilder(); + int startPos = this.pos; boolean escaped = false; while (true) { // If we're at the end of the string, something went wrong. @@ -316,13 +318,11 @@ private RegExpTerm parseAlternativeInsideQuotedString() { char c = this.nextChar(); // Track whether the character is an escape character. escaped = !escaped && (c == '\\'); - sb.append(c); } - - String literal = sb.toString(); + String literal = src.substring(startPos, pos); loc.setEnd(pos()); loc.setSource(literal); - + return new Constant(loc, literal); } @@ -470,13 +470,13 @@ private RegExpTerm parseAtomEscape(SourceLocation loc, boolean inCharClass) { return this.finishTerm(new NamedBackReference(loc, name, "\\k<" + name + ">")); } - if (this.match("q{")) { + if (vFlagEnabled && this.match("q{")) { RegExpTerm term = parseDisjunctionInsideQuotedString(); this.expectRBrace(); return this.finishTerm(new CharacterClassQuotedString(loc, term)); } - if (this.match("p{", "P{")) { + if ((vFlagEnabled || uFlagEnabled) && this.match("p{", "P{")) { String name = this.readIdentifier(); if (this.match("=")) { value = this.readIdentifier(); @@ -548,7 +548,7 @@ private RegExpTerm parseAtomEscape(SourceLocation loc, boolean inCharClass) { } private RegExpTerm parseCharacterClass() { - if (flags != null && flags.contains("v")) return parseNestedCharacterClass(); + if (vFlagEnabled) return parseNestedCharacterClass(); SourceLocation loc = new SourceLocation(pos()); List elements = new ArrayList<>(); @@ -583,20 +583,10 @@ private RegExpTerm parseNestedCharacterClass() { this.error(Error.EXPECTED_RBRACKET); break; } - if (lookahead("[")) { - elements.add(parseNestedCharacterClass()); - } - else if (lookahead("&&")) { - this.match("&&"); - classType = CharacterClassType.INTERSECTION; - } - else if (lookahead("--")) { - this.match("--"); - classType = CharacterClassType.SUBTRACTION; - } - else { - elements.add(this.parseCharacterClassElement()); - } + if (lookahead("[")) elements.add(parseNestedCharacterClass()); + else if (this.match("&&")) classType = CharacterClassType.INTERSECTION; + else if (this.match("--")) classType = CharacterClassType.SUBTRACTION; + else elements.add(this.parseCharacterClassElement()); } // Create appropriate RegExpTerm based on the detected class type From d884e5fe6b5e41c403c13656269b2954a7d2f133 Mon Sep 17 00:00:00 2001 From: Napalys Date: Wed, 5 Mar 2025 11:06:56 +0100 Subject: [PATCH 19/27] Upgraded `javascrip` database schema --- .../old.dbscheme | 1193 +++++++++++++++++ .../semmlecode.javascript.dbscheme | 1190 ++++++++++++++++ .../upgrade.properties | 2 + .../old.dbscheme | 1190 ++++++++++++++++ .../semmlecode.javascript.dbscheme | 1193 +++++++++++++++++ .../upgrade.properties | 2 + 6 files changed, 4770 insertions(+) create mode 100644 javascript/downgrades/5b5db607d20c7b449cef2d1c926b24d77c69bebb/old.dbscheme create mode 100644 javascript/downgrades/5b5db607d20c7b449cef2d1c926b24d77c69bebb/semmlecode.javascript.dbscheme create mode 100644 javascript/downgrades/5b5db607d20c7b449cef2d1c926b24d77c69bebb/upgrade.properties create mode 100644 javascript/ql/lib/upgrades/c88c69174bd0dd4e95f1bcfbada68a2505e812c3/old.dbscheme create mode 100644 javascript/ql/lib/upgrades/c88c69174bd0dd4e95f1bcfbada68a2505e812c3/semmlecode.javascript.dbscheme create mode 100644 javascript/ql/lib/upgrades/c88c69174bd0dd4e95f1bcfbada68a2505e812c3/upgrade.properties diff --git a/javascript/downgrades/5b5db607d20c7b449cef2d1c926b24d77c69bebb/old.dbscheme b/javascript/downgrades/5b5db607d20c7b449cef2d1c926b24d77c69bebb/old.dbscheme new file mode 100644 index 000000000000..5b5db607d20c --- /dev/null +++ b/javascript/downgrades/5b5db607d20c7b449cef2d1c926b24d77c69bebb/old.dbscheme @@ -0,0 +1,1193 @@ +/*** Standard fragments ***/ + +/*- Files and folders -*/ + +/** + * The location of an element. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `file`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ +locations_default( + unique int id: @location_default, + int file: @file ref, + int beginLine: int ref, + int beginColumn: int ref, + int endLine: int ref, + int endColumn: int ref +); + +files( + unique int id: @file, + string name: string ref +); + +folders( + unique int id: @folder, + string name: string ref +); + +@container = @file | @folder + +containerparent( + int parent: @container ref, + unique int child: @container ref +); + +/*- Lines of code -*/ + +numlines( + int element_id: @sourceline ref, + int num_lines: int ref, + int num_code: int ref, + int num_comment: int ref +); + +/*- External data -*/ + +/** + * External data, loaded from CSV files during snapshot creation. See + * [Tutorial: Incorporating external data](https://help.semmle.com/wiki/display/SD/Tutorial%3A+Incorporating+external+data) + * for more information. + */ +externalData( + int id : @externalDataElement, + string path : string ref, + int column: int ref, + string value : string ref +); + +/*- Source location prefix -*/ + +/** + * The source location of the snapshot. + */ +sourceLocationPrefix(string prefix : string ref); + +/*- JavaScript-specific part -*/ + +@location = @location_default + +@sourceline = @locatable; + +filetype( + int file: @file ref, + string filetype: string ref +) + +// top-level code fragments +toplevels (unique int id: @toplevel, + int kind: int ref); + +is_externs (int toplevel: @toplevel ref); + +case @toplevel.kind of + 0 = @script +| 1 = @inline_script +| 2 = @event_handler +| 3 = @javascript_url +| 4 = @template_toplevel; + +is_module (int tl: @toplevel ref); +is_nodejs (int tl: @toplevel ref); +is_es2015_module (int tl: @toplevel ref); +is_closure_module (int tl: @toplevel ref); + +@xml_node_with_code = @xmlelement | @xmlattribute | @template_placeholder_tag; +toplevel_parent_xml_node( + unique int toplevel: @toplevel ref, + int xmlnode: @xml_node_with_code ref); + +xml_element_parent_expression( + unique int xmlnode: @xmlelement ref, + int expression: @expr ref, + int index: int ref); + +// statements +#keyset[parent, idx] +stmts (unique int id: @stmt, + int kind: int ref, + int parent: @stmt_parent ref, + int idx: int ref, + varchar(900) tostring: string ref); + +stmt_containers (unique int stmt: @stmt ref, + int container: @stmt_container ref); + +jump_targets (unique int jump: @stmt ref, + int target: @stmt ref); + +@stmt_parent = @stmt | @toplevel | @function_expr | @arrow_function_expr | @static_initializer; +@stmt_container = @toplevel | @function | @namespace_declaration | @external_module_declaration | @global_augmentation_declaration; + +case @stmt.kind of + 0 = @empty_stmt +| 1 = @block_stmt +| 2 = @expr_stmt +| 3 = @if_stmt +| 4 = @labeled_stmt +| 5 = @break_stmt +| 6 = @continue_stmt +| 7 = @with_stmt +| 8 = @switch_stmt +| 9 = @return_stmt +| 10 = @throw_stmt +| 11 = @try_stmt +| 12 = @while_stmt +| 13 = @do_while_stmt +| 14 = @for_stmt +| 15 = @for_in_stmt +| 16 = @debugger_stmt +| 17 = @function_decl_stmt +| 18 = @var_decl_stmt +| 19 = @case +| 20 = @catch_clause +| 21 = @for_of_stmt +| 22 = @const_decl_stmt +| 23 = @let_stmt +| 24 = @legacy_let_stmt +| 25 = @for_each_stmt +| 26 = @class_decl_stmt +| 27 = @import_declaration +| 28 = @export_all_declaration +| 29 = @export_default_declaration +| 30 = @export_named_declaration +| 31 = @namespace_declaration +| 32 = @import_equals_declaration +| 33 = @export_assign_declaration +| 34 = @interface_declaration +| 35 = @type_alias_declaration +| 36 = @enum_declaration +| 37 = @external_module_declaration +| 38 = @export_as_namespace_declaration +| 39 = @global_augmentation_declaration +| 40 = @using_decl_stmt +; + +@decl_stmt = @var_decl_stmt | @const_decl_stmt | @let_stmt | @legacy_let_stmt | @using_decl_stmt; + +@export_declaration = @export_all_declaration | @export_default_declaration | @export_named_declaration; + +@namespace_definition = @namespace_declaration | @enum_declaration; +@type_definition = @class_definition | @interface_declaration | @enum_declaration | @type_alias_declaration | @enum_member; + +is_instantiated(unique int decl: @namespace_declaration ref); + +@declarable_node = @decl_stmt | @namespace_declaration | @class_decl_stmt | @function_decl_stmt | @enum_declaration | @external_module_declaration | @global_augmentation_declaration | @field; +has_declare_keyword(unique int stmt: @declarable_node ref); + +is_for_await_of(unique int forof: @for_of_stmt ref); + +// expressions +#keyset[parent, idx] +exprs (unique int id: @expr, + int kind: int ref, + int parent: @expr_parent ref, + int idx: int ref, + varchar(900) tostring: string ref); + +literals (varchar(900) value: string ref, + varchar(900) raw: string ref, + unique int expr: @expr_or_type ref); + +enclosing_stmt (unique int expr: @expr_or_type ref, + int stmt: @stmt ref); + +expr_containers (unique int expr: @expr_or_type ref, + int container: @stmt_container ref); + +array_size (unique int ae: @arraylike ref, + int sz: int ref); + +is_delegating (int yield: @yield_expr ref); + +@expr_or_stmt = @expr | @stmt; +@expr_or_type = @expr | @typeexpr; +@expr_parent = @expr_or_stmt | @property | @function_typeexpr; +@arraylike = @array_expr | @array_pattern; +@type_annotation = @typeexpr | @jsdoc_type_expr; +@node_in_stmt_container = @cfg_node | @type_annotation | @toplevel; + +case @expr.kind of + 0 = @label +| 1 = @null_literal +| 2 = @boolean_literal +| 3 = @number_literal +| 4 = @string_literal +| 5 = @regexp_literal +| 6 = @this_expr +| 7 = @array_expr +| 8 = @obj_expr +| 9 = @function_expr +| 10 = @seq_expr +| 11 = @conditional_expr +| 12 = @new_expr +| 13 = @call_expr +| 14 = @dot_expr +| 15 = @index_expr +| 16 = @neg_expr +| 17 = @plus_expr +| 18 = @log_not_expr +| 19 = @bit_not_expr +| 20 = @typeof_expr +| 21 = @void_expr +| 22 = @delete_expr +| 23 = @eq_expr +| 24 = @neq_expr +| 25 = @eqq_expr +| 26 = @neqq_expr +| 27 = @lt_expr +| 28 = @le_expr +| 29 = @gt_expr +| 30 = @ge_expr +| 31 = @lshift_expr +| 32 = @rshift_expr +| 33 = @urshift_expr +| 34 = @add_expr +| 35 = @sub_expr +| 36 = @mul_expr +| 37 = @div_expr +| 38 = @mod_expr +| 39 = @bitor_expr +| 40 = @xor_expr +| 41 = @bitand_expr +| 42 = @in_expr +| 43 = @instanceof_expr +| 44 = @logand_expr +| 45 = @logor_expr +| 47 = @assign_expr +| 48 = @assign_add_expr +| 49 = @assign_sub_expr +| 50 = @assign_mul_expr +| 51 = @assign_div_expr +| 52 = @assign_mod_expr +| 53 = @assign_lshift_expr +| 54 = @assign_rshift_expr +| 55 = @assign_urshift_expr +| 56 = @assign_or_expr +| 57 = @assign_xor_expr +| 58 = @assign_and_expr +| 59 = @preinc_expr +| 60 = @postinc_expr +| 61 = @predec_expr +| 62 = @postdec_expr +| 63 = @par_expr +| 64 = @var_declarator +| 65 = @arrow_function_expr +| 66 = @spread_element +| 67 = @array_pattern +| 68 = @object_pattern +| 69 = @yield_expr +| 70 = @tagged_template_expr +| 71 = @template_literal +| 72 = @template_element +| 73 = @array_comprehension_expr +| 74 = @generator_expr +| 75 = @for_in_comprehension_block +| 76 = @for_of_comprehension_block +| 77 = @legacy_letexpr +| 78 = @var_decl +| 79 = @proper_varaccess +| 80 = @class_expr +| 81 = @super_expr +| 82 = @newtarget_expr +| 83 = @named_import_specifier +| 84 = @import_default_specifier +| 85 = @import_namespace_specifier +| 86 = @named_export_specifier +| 87 = @exp_expr +| 88 = @assign_exp_expr +| 89 = @jsx_element +| 90 = @jsx_qualified_name +| 91 = @jsx_empty_expr +| 92 = @await_expr +| 93 = @function_sent_expr +| 94 = @decorator +| 95 = @export_default_specifier +| 96 = @export_namespace_specifier +| 97 = @bind_expr +| 98 = @external_module_reference +| 99 = @dynamic_import +| 100 = @expression_with_type_arguments +| 101 = @prefix_type_assertion +| 102 = @as_type_assertion +| 103 = @export_varaccess +| 104 = @decorator_list +| 105 = @non_null_assertion +| 106 = @bigint_literal +| 107 = @nullishcoalescing_expr +| 108 = @e4x_xml_anyname +| 109 = @e4x_xml_static_attribute_selector +| 110 = @e4x_xml_dynamic_attribute_selector +| 111 = @e4x_xml_filter_expression +| 112 = @e4x_xml_static_qualident +| 113 = @e4x_xml_dynamic_qualident +| 114 = @e4x_xml_dotdotexpr +| 115 = @import_meta_expr +| 116 = @assignlogandexpr +| 117 = @assignlogorexpr +| 118 = @assignnullishcoalescingexpr +| 119 = @template_pipe_ref +| 120 = @generated_code_expr +| 121 = @satisfies_expr +; + +@varaccess = @proper_varaccess | @export_varaccess; +@varref = @var_decl | @varaccess; + +@identifier = @label | @varref | @type_identifier; + +@literal = @null_literal | @boolean_literal | @number_literal | @string_literal | @regexp_literal | @bigint_literal; + +@propaccess = @dot_expr | @index_expr; + +@invokeexpr = @new_expr | @call_expr; + +@unaryexpr = @neg_expr | @plus_expr | @log_not_expr | @bit_not_expr | @typeof_expr | @void_expr | @delete_expr | @spread_element; + +@equality_test = @eq_expr | @neq_expr | @eqq_expr | @neqq_expr; + +@comparison = @equality_test | @lt_expr | @le_expr | @gt_expr | @ge_expr; + +@binaryexpr = @comparison | @lshift_expr | @rshift_expr | @urshift_expr | @add_expr | @sub_expr | @mul_expr | @div_expr | @mod_expr | @exp_expr | @bitor_expr | @xor_expr | @bitand_expr | @in_expr | @instanceof_expr | @logand_expr | @logor_expr | @nullishcoalescing_expr; + +@assignment = @assign_expr | @assign_add_expr | @assign_sub_expr | @assign_mul_expr | @assign_div_expr | @assign_mod_expr | @assign_exp_expr | @assign_lshift_expr | @assign_rshift_expr | @assign_urshift_expr | @assign_or_expr | @assign_xor_expr | @assign_and_expr | @assignlogandexpr | @assignlogorexpr | @assignnullishcoalescingexpr; + +@updateexpr = @preinc_expr | @postinc_expr | @predec_expr | @postdec_expr; + +@pattern = @varref | @array_pattern | @object_pattern; + +@comprehension_expr = @array_comprehension_expr | @generator_expr; + +@comprehension_block = @for_in_comprehension_block | @for_of_comprehension_block; + +@import_specifier = @named_import_specifier | @import_default_specifier | @import_namespace_specifier; + +@exportspecifier = @named_export_specifier | @export_default_specifier | @export_namespace_specifier; + +@type_keyword_operand = @import_declaration | @export_declaration | @import_specifier; + +@type_assertion = @as_type_assertion | @prefix_type_assertion; + +@class_definition = @class_decl_stmt | @class_expr; +@interface_definition = @interface_declaration | @interface_typeexpr; +@class_or_interface = @class_definition | @interface_definition; + +@lexical_decl = @var_decl | @type_decl; +@lexical_access = @varaccess | @local_type_access | @local_var_type_access | @local_namespace_access; +@lexical_ref = @lexical_decl | @lexical_access; + +@e4x_xml_attribute_selector = @e4x_xml_static_attribute_selector | @e4x_xml_dynamic_attribute_selector; +@e4x_xml_qualident = @e4x_xml_static_qualident | @e4x_xml_dynamic_qualident; + +expr_contains_template_tag_location( + int expr: @expr ref, + int location: @location ref +); + +@template_placeholder_tag_parent = @xmlelement | @xmlattribute | @file; + +template_placeholder_tag_info( + unique int node: @template_placeholder_tag, + int parentNode: @template_placeholder_tag_parent ref, + varchar(900) raw: string ref +); + +// scopes +scopes (unique int id: @scope, + int kind: int ref); + +case @scope.kind of + 0 = @global_scope +| 1 = @function_scope +| 2 = @catch_scope +| 3 = @module_scope +| 4 = @block_scope +| 5 = @for_scope +| 6 = @for_in_scope // for-of scopes work the same as for-in scopes +| 7 = @comprehension_block_scope +| 8 = @class_expr_scope +| 9 = @namespace_scope +| 10 = @class_decl_scope +| 11 = @interface_scope +| 12 = @type_alias_scope +| 13 = @mapped_type_scope +| 14 = @enum_scope +| 15 = @external_module_scope +| 16 = @conditional_type_scope; + +scopenodes (unique int node: @ast_node ref, + int scope: @scope ref); + +scopenesting (unique int inner: @scope ref, + int outer: @scope ref); + +// functions +@function = @function_decl_stmt | @function_expr | @arrow_function_expr; + +@parameterized = @function | @catch_clause; +@type_parameterized = @function | @class_or_interface | @type_alias_declaration | @mapped_typeexpr | @infer_typeexpr; + +is_generator (int fun: @function ref); +has_rest_parameter (int fun: @function ref); +is_async (int fun: @function ref); + +// variables and lexically scoped type names +#keyset[scope, name] +variables (unique int id: @variable, + varchar(900) name: string ref, + int scope: @scope ref); + +#keyset[scope, name] +local_type_names (unique int id: @local_type_name, + varchar(900) name: string ref, + int scope: @scope ref); + +#keyset[scope, name] +local_namespace_names (unique int id: @local_namespace_name, + varchar(900) name: string ref, + int scope: @scope ref); + +is_arguments_object (int id: @variable ref); + +@lexical_name = @variable | @local_type_name | @local_namespace_name; + +@bind_id = @varaccess | @local_var_type_access; +bind (unique int id: @bind_id ref, + int decl: @variable ref); + +decl (unique int id: @var_decl ref, + int decl: @variable ref); + +@typebind_id = @local_type_access | @export_varaccess; +typebind (unique int id: @typebind_id ref, + int decl: @local_type_name ref); + +@typedecl_id = @type_decl | @var_decl; +typedecl (unique int id: @typedecl_id ref, + int decl: @local_type_name ref); + +namespacedecl (unique int id: @var_decl ref, + int decl: @local_namespace_name ref); + +@namespacebind_id = @local_namespace_access | @export_varaccess; +namespacebind (unique int id: @namespacebind_id ref, + int decl: @local_namespace_name ref); + + +// properties in object literals, property patterns in object patterns, and method declarations in classes +#keyset[parent, index] +properties (unique int id: @property, + int parent: @property_parent ref, + int index: int ref, + int kind: int ref, + varchar(900) tostring: string ref); + +case @property.kind of + 0 = @value_property +| 1 = @property_getter +| 2 = @property_setter +| 3 = @jsx_attribute +| 4 = @function_call_signature +| 5 = @constructor_call_signature +| 6 = @index_signature +| 7 = @enum_member +| 8 = @proper_field +| 9 = @parameter_field +| 10 = @static_initializer +; + +@property_parent = @obj_expr | @object_pattern | @class_definition | @jsx_element | @interface_definition | @enum_declaration; +@property_accessor = @property_getter | @property_setter; +@call_signature = @function_call_signature | @constructor_call_signature; +@field = @proper_field | @parameter_field; +@field_or_vardeclarator = @field | @var_declarator; + +is_computed (int id: @property ref); +is_method (int id: @property ref); +is_static (int id: @property ref); +is_abstract_member (int id: @property ref); +is_const_enum (int id: @enum_declaration ref); +is_abstract_class (int id: @class_decl_stmt ref); + +has_public_keyword (int id: @property ref); +has_private_keyword (int id: @property ref); +has_protected_keyword (int id: @property ref); +has_readonly_keyword (int id: @property ref); +has_type_keyword (int id: @type_keyword_operand ref); +is_optional_member (int id: @property ref); +has_definite_assignment_assertion (int id: @field_or_vardeclarator ref); +is_optional_parameter_declaration (unique int parameter: @pattern ref); + +#keyset[constructor, param_index] +parameter_fields( + unique int field: @parameter_field ref, + int constructor: @function_expr ref, + int param_index: int ref +); + +// types +#keyset[parent, idx] +typeexprs ( + unique int id: @typeexpr, + int kind: int ref, + int parent: @typeexpr_parent ref, + int idx: int ref, + varchar(900) tostring: string ref +); + +case @typeexpr.kind of + 0 = @local_type_access +| 1 = @type_decl +| 2 = @keyword_typeexpr +| 3 = @string_literal_typeexpr +| 4 = @number_literal_typeexpr +| 5 = @boolean_literal_typeexpr +| 6 = @array_typeexpr +| 7 = @union_typeexpr +| 8 = @indexed_access_typeexpr +| 9 = @intersection_typeexpr +| 10 = @parenthesized_typeexpr +| 11 = @tuple_typeexpr +| 12 = @keyof_typeexpr +| 13 = @qualified_type_access +| 14 = @generic_typeexpr +| 15 = @type_label +| 16 = @typeof_typeexpr +| 17 = @local_var_type_access +| 18 = @qualified_var_type_access +| 19 = @this_var_type_access +| 20 = @predicate_typeexpr +| 21 = @interface_typeexpr +| 22 = @type_parameter +| 23 = @plain_function_typeexpr +| 24 = @constructor_typeexpr +| 25 = @local_namespace_access +| 26 = @qualified_namespace_access +| 27 = @mapped_typeexpr +| 28 = @conditional_typeexpr +| 29 = @infer_typeexpr +| 30 = @import_type_access +| 31 = @import_namespace_access +| 32 = @import_var_type_access +| 33 = @optional_typeexpr +| 34 = @rest_typeexpr +| 35 = @bigint_literal_typeexpr +| 36 = @readonly_typeexpr +| 37 = @template_literal_typeexpr +; + +@typeref = @typeaccess | @type_decl; +@type_identifier = @type_decl | @local_type_access | @type_label | @local_var_type_access | @local_namespace_access; +@typeexpr_parent = @expr | @stmt | @property | @typeexpr; +@literal_typeexpr = @string_literal_typeexpr | @number_literal_typeexpr | @boolean_literal_typeexpr | @bigint_literal_typeexpr; +@typeaccess = @local_type_access | @qualified_type_access | @import_type_access; +@vartypeaccess = @local_var_type_access | @qualified_var_type_access | @this_var_type_access | @import_var_type_access; +@namespace_access = @local_namespace_access | @qualified_namespace_access | @import_namespace_access; +@import_typeexpr = @import_type_access | @import_namespace_access | @import_var_type_access; + +@function_typeexpr = @plain_function_typeexpr | @constructor_typeexpr; + +// types +types ( + unique int id: @type, + int kind: int ref, + varchar(900) tostring: string ref +); + +#keyset[parent, idx] +type_child ( + int child: @type ref, + int parent: @type ref, + int idx: int ref +); + +case @type.kind of + 0 = @any_type +| 1 = @string_type +| 2 = @number_type +| 3 = @union_type +| 4 = @true_type +| 5 = @false_type +| 6 = @type_reference +| 7 = @object_type +| 8 = @canonical_type_variable_type +| 9 = @typeof_type +| 10 = @void_type +| 11 = @undefined_type +| 12 = @null_type +| 13 = @never_type +| 14 = @plain_symbol_type +| 15 = @unique_symbol_type +| 16 = @objectkeyword_type +| 17 = @intersection_type +| 18 = @tuple_type +| 19 = @lexical_type_variable_type +| 20 = @this_type +| 21 = @number_literal_type +| 22 = @string_literal_type +| 23 = @unknown_type +| 24 = @bigint_type +| 25 = @bigint_literal_type +; + +@boolean_literal_type = @true_type | @false_type; +@symbol_type = @plain_symbol_type | @unique_symbol_type; +@union_or_intersection_type = @union_type | @intersection_type; +@typevariable_type = @canonical_type_variable_type | @lexical_type_variable_type; + +has_asserts_keyword(int node: @predicate_typeexpr ref); + +@typed_ast_node = @expr | @typeexpr | @function; +ast_node_type( + unique int node: @typed_ast_node ref, + int typ: @type ref); + +declared_function_signature( + unique int node: @function ref, + int sig: @signature_type ref +); + +invoke_expr_signature( + unique int node: @invokeexpr ref, + int sig: @signature_type ref +); + +invoke_expr_overload_index( + unique int node: @invokeexpr ref, + int index: int ref +); + +symbols ( + unique int id: @symbol, + int kind: int ref, + varchar(900) name: string ref +); + +symbol_parent ( + unique int symbol: @symbol ref, + int parent: @symbol ref +); + +symbol_module ( + int symbol: @symbol ref, + varchar(900) moduleName: string ref +); + +symbol_global ( + int symbol: @symbol ref, + varchar(900) globalName: string ref +); + +case @symbol.kind of + 0 = @root_symbol +| 1 = @member_symbol +| 2 = @other_symbol +; + +@type_with_symbol = @type_reference | @typevariable_type | @typeof_type | @unique_symbol_type; +@ast_node_with_symbol = @type_definition | @namespace_definition | @toplevel | @typeaccess | @namespace_access | @var_decl | @function | @invokeexpr | @import_declaration | @external_module_reference | @external_module_declaration; + +ast_node_symbol( + unique int node: @ast_node_with_symbol ref, + int symbol: @symbol ref); + +type_symbol( + unique int typ: @type_with_symbol ref, + int symbol: @symbol ref); + +#keyset[typ, name] +type_property( + int typ: @type ref, + varchar(900) name: string ref, + int propertyType: @type ref); + +type_alias( + unique int aliasType: @type ref, + int underlyingType: @type ref); + +@literal_type = @string_literal_type | @number_literal_type | @boolean_literal_type | @bigint_literal_type; +@type_with_literal_value = @string_literal_type | @number_literal_type | @bigint_literal_type; +type_literal_value( + unique int typ: @type_with_literal_value ref, + varchar(900) value: string ref); + +signature_types ( + unique int id: @signature_type, + int kind: int ref, + varchar(900) tostring: string ref, + int type_parameters: int ref, + int required_params: int ref +); + +is_abstract_signature( + unique int sig: @signature_type ref +); + +signature_rest_parameter( + unique int sig: @signature_type ref, + int rest_param_arra_type: @type ref +); + +case @signature_type.kind of + 0 = @function_signature_type +| 1 = @constructor_signature_type +; + +#keyset[typ, kind, index] +type_contains_signature ( + int typ: @type ref, + int kind: int ref, // constructor/call/index + int index: int ref, // ordering of overloaded signatures + int sig: @signature_type ref +); + +#keyset[parent, index] +signature_contains_type ( + int child: @type ref, + int parent: @signature_type ref, + int index: int ref +); + +#keyset[sig, index] +signature_parameter_name ( + int sig: @signature_type ref, + int index: int ref, + varchar(900) name: string ref +); + +number_index_type ( + unique int baseType: @type ref, + int propertyType: @type ref +); + +string_index_type ( + unique int baseType: @type ref, + int propertyType: @type ref +); + +base_type_names( + int typeName: @symbol ref, + int baseTypeName: @symbol ref +); + +self_types( + int typeName: @symbol ref, + int selfType: @type_reference ref +); + +tuple_type_min_length( + unique int typ: @type ref, + int minLength: int ref +); + +tuple_type_rest_index( + unique int typ: @type ref, + int index: int ref +); + +// comments +comments (unique int id: @comment, + int kind: int ref, + int toplevel: @toplevel ref, + varchar(900) text: string ref, + varchar(900) tostring: string ref); + +case @comment.kind of + 0 = @slashslash_comment +| 1 = @slashstar_comment +| 2 = @doc_comment +| 3 = @html_comment_start +| 4 = @htmlcommentend; + +@html_comment = @html_comment_start | @htmlcommentend; +@line_comment = @slashslash_comment | @html_comment; +@block_comment = @slashstar_comment | @doc_comment; + +// source lines +lines (unique int id: @line, + int toplevel: @toplevel ref, + varchar(900) text: string ref, + varchar(2) terminator: string ref); +indentation (int file: @file ref, + int lineno: int ref, + varchar(1) indentChar: string ref, + int indentDepth: int ref); + +// JavaScript parse errors +js_parse_errors (unique int id: @js_parse_error, + int toplevel: @toplevel ref, + varchar(900) message: string ref, + varchar(900) line: string ref); + +// regular expressions +#keyset[parent, idx] +regexpterm (unique int id: @regexpterm, + int kind: int ref, + int parent: @regexpparent ref, + int idx: int ref, + varchar(900) tostring: string ref); + +@regexpparent = @regexpterm | @regexp_literal | @string_literal | @add_expr; + +case @regexpterm.kind of + 0 = @regexp_alt +| 1 = @regexp_seq +| 2 = @regexp_caret +| 3 = @regexp_dollar +| 4 = @regexp_wordboundary +| 5 = @regexp_nonwordboundary +| 6 = @regexp_positive_lookahead +| 7 = @regexp_negative_lookahead +| 8 = @regexp_star +| 9 = @regexp_plus +| 10 = @regexp_opt +| 11 = @regexp_range +| 12 = @regexp_dot +| 13 = @regexp_group +| 14 = @regexp_normal_constant +| 15 = @regexp_hex_escape +| 16 = @regexp_unicode_escape +| 17 = @regexp_dec_escape +| 18 = @regexp_oct_escape +| 19 = @regexp_ctrl_escape +| 20 = @regexp_char_class_escape +| 21 = @regexp_id_escape +| 22 = @regexp_backref +| 23 = @regexp_char_class +| 24 = @regexp_char_range +| 25 = @regexp_positive_lookbehind +| 26 = @regexp_negative_lookbehind +| 27 = @regexp_unicode_property_escape +| 28 = @regexp_quoted_string +| 29 = @regexp_intersection +| 30 = @regexp_subtraction; + +regexp_parse_errors (unique int id: @regexp_parse_error, + int regexp: @regexpterm ref, + varchar(900) message: string ref); + +@regexp_quantifier = @regexp_star | @regexp_plus | @regexp_opt | @regexp_range; +@regexp_escape = @regexp_char_escape | @regexp_char_class_escape | @regexp_unicode_property_escape; +@regexp_char_escape = @regexp_hex_escape | @regexp_unicode_escape | @regexp_dec_escape | @regexp_oct_escape | @regexp_ctrl_escape | @regexp_id_escape; +@regexp_constant = @regexp_normal_constant | @regexp_char_escape; +@regexp_lookahead = @regexp_positive_lookahead | @regexp_negative_lookahead; +@regexp_lookbehind = @regexp_positive_lookbehind | @regexp_negative_lookbehind; +@regexp_subpattern = @regexp_lookahead | @regexp_lookbehind; +@regexp_anchor = @regexp_dollar | @regexp_caret; + +is_greedy (int id: @regexp_quantifier ref); +range_quantifier_lower_bound (unique int id: @regexp_range ref, int lo: int ref); +range_quantifier_upper_bound (unique int id: @regexp_range ref, int hi: int ref); +is_capture (unique int id: @regexp_group ref, int number: int ref); +is_named_capture (unique int id: @regexp_group ref, string name: string ref); +is_inverted (int id: @regexp_char_class ref); +regexp_const_value (unique int id: @regexp_constant ref, varchar(1) value: string ref); +char_class_escape (unique int id: @regexp_char_class_escape ref, varchar(1) value: string ref); +backref (unique int id: @regexp_backref ref, int value: int ref); +named_backref (unique int id: @regexp_backref ref, string name: string ref); +unicode_property_escapename (unique int id: @regexp_unicode_property_escape ref, string name: string ref); +unicode_property_escapevalue (unique int id: @regexp_unicode_property_escape ref, string value: string ref); + +// tokens +#keyset[toplevel, idx] +tokeninfo (unique int id: @token, + int kind: int ref, + int toplevel: @toplevel ref, + int idx: int ref, + varchar(900) value: string ref); + +case @token.kind of + 0 = @token_eof +| 1 = @token_null_literal +| 2 = @token_boolean_literal +| 3 = @token_numeric_literal +| 4 = @token_string_literal +| 5 = @token_regular_expression +| 6 = @token_identifier +| 7 = @token_keyword +| 8 = @token_punctuator; + +// associate comments with the token immediately following them (which may be EOF) +next_token (int comment: @comment ref, int token: @token ref); + +// JSON +#keyset[parent, idx] +json (unique int id: @json_value, + int kind: int ref, + int parent: @json_parent ref, + int idx: int ref, + varchar(900) tostring: string ref); + +json_literals (varchar(900) value: string ref, + varchar(900) raw: string ref, + unique int expr: @json_value ref); + +json_properties (int obj: @json_object ref, + varchar(900) property: string ref, + int value: @json_value ref); + +json_errors (unique int id: @json_parse_error, + varchar(900) message: string ref); + +json_locations(unique int locatable: @json_locatable ref, + int location: @location_default ref); + +case @json_value.kind of + 0 = @json_null +| 1 = @json_boolean +| 2 = @json_number +| 3 = @json_string +| 4 = @json_array +| 5 = @json_object; + +@json_parent = @json_object | @json_array | @file; + +@json_locatable = @json_value | @json_parse_error; + +// locations +@ast_node = @toplevel | @stmt | @expr | @property | @typeexpr; + +@locatable = @file + | @ast_node + | @comment + | @line + | @js_parse_error | @regexp_parse_error + | @regexpterm + | @json_locatable + | @token + | @cfg_node + | @jsdoc | @jsdoc_type_expr | @jsdoc_tag + | @yaml_locatable + | @xmllocatable + | @configLocatable + | @template_placeholder_tag; + +hasLocation (unique int locatable: @locatable ref, + int location: @location ref); + +// CFG +entry_cfg_node (unique int id: @entry_node, int container: @stmt_container ref); +exit_cfg_node (unique int id: @exit_node, int container: @stmt_container ref); +guard_node (unique int id: @guard_node, int kind: int ref, int test: @expr ref); +case @guard_node.kind of + 0 = @falsy_guard +| 1 = @truthy_guard; +@condition_guard = @falsy_guard | @truthy_guard; + +@synthetic_cfg_node = @entry_node | @exit_node | @guard_node; +@cfg_node = @synthetic_cfg_node | @expr_parent; + +successor (int pred: @cfg_node ref, int succ: @cfg_node ref); + +// JSDoc comments +jsdoc (unique int id: @jsdoc, varchar(900) description: string ref, int comment: @comment ref); +#keyset[parent, idx] +jsdoc_tags (unique int id: @jsdoc_tag, varchar(900) title: string ref, + int parent: @jsdoc ref, int idx: int ref, varchar(900) tostring: string ref); +jsdoc_tag_descriptions (unique int tag: @jsdoc_tag ref, varchar(900) text: string ref); +jsdoc_tag_names (unique int tag: @jsdoc_tag ref, varchar(900) text: string ref); + +#keyset[parent, idx] +jsdoc_type_exprs (unique int id: @jsdoc_type_expr, + int kind: int ref, + int parent: @jsdoc_type_expr_parent ref, + int idx: int ref, + varchar(900) tostring: string ref); +case @jsdoc_type_expr.kind of + 0 = @jsdoc_any_type_expr +| 1 = @jsdoc_null_type_expr +| 2 = @jsdoc_undefined_type_expr +| 3 = @jsdoc_unknown_type_expr +| 4 = @jsdoc_void_type_expr +| 5 = @jsdoc_named_type_expr +| 6 = @jsdoc_applied_type_expr +| 7 = @jsdoc_nullable_type_expr +| 8 = @jsdoc_non_nullable_type_expr +| 9 = @jsdoc_record_type_expr +| 10 = @jsdoc_array_type_expr +| 11 = @jsdoc_union_type_expr +| 12 = @jsdoc_function_type_expr +| 13 = @jsdoc_optional_type_expr +| 14 = @jsdoc_rest_type_expr +; + +#keyset[id, idx] +jsdoc_record_field_name (int id: @jsdoc_record_type_expr ref, int idx: int ref, varchar(900) name: string ref); +jsdoc_prefix_qualifier (int id: @jsdoc_type_expr ref); +jsdoc_has_new_parameter (int fn: @jsdoc_function_type_expr ref); + +@jsdoc_type_expr_parent = @jsdoc_type_expr | @jsdoc_tag; + +jsdoc_errors (unique int id: @jsdoc_error, int tag: @jsdoc_tag ref, varchar(900) message: string ref, varchar(900) tostring: string ref); + +@dataflownode = @expr | @function_decl_stmt | @class_decl_stmt | @namespace_declaration | @enum_declaration | @property; + +@optionalchainable = @call_expr | @propaccess; + +isOptionalChaining(int id: @optionalchainable ref); + +/** + * The time taken for the extraction of a file. + * This table contains non-deterministic content. + * + * The sum of the `time` column for each (`file`, `timerKind`) pair + * is the total time taken for extraction of `file`. The `extractionPhase` + * column provides a granular view of the extraction time of the file. + */ +extraction_time( + int file : @file ref, + // see `com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase`. + int extractionPhase: int ref, + // 0 for the elapsed CPU time in nanoseconds, 1 for the elapsed wallclock time in nanoseconds + int timerKind: int ref, + float time: float ref +) + +/** +* Non-timing related data for the extraction of a single file. +* This table contains non-deterministic content. +*/ +extraction_data( + int file : @file ref, + // the absolute path to the cache file + varchar(900) cacheFile: string ref, + boolean fromCache: boolean ref, + int length: int ref +) + +/*- YAML -*/ + +#keyset[parent, idx] +yaml (unique int id: @yaml_node, + int kind: int ref, + int parent: @yaml_node_parent ref, + int idx: int ref, + string tag: string ref, + string tostring: string ref); + +case @yaml_node.kind of + 0 = @yaml_scalar_node +| 1 = @yaml_mapping_node +| 2 = @yaml_sequence_node +| 3 = @yaml_alias_node +; + +@yaml_collection_node = @yaml_mapping_node | @yaml_sequence_node; + +@yaml_node_parent = @yaml_collection_node | @file; + +yaml_anchors (unique int node: @yaml_node ref, + string anchor: string ref); + +yaml_aliases (unique int alias: @yaml_alias_node ref, + string target: string ref); + +yaml_scalars (unique int scalar: @yaml_scalar_node ref, + int style: int ref, + string value: string ref); + +yaml_errors (unique int id: @yaml_error, + string message: string ref); + +yaml_locations(unique int locatable: @yaml_locatable ref, + int location: @location_default ref); + +@yaml_locatable = @yaml_node | @yaml_error; + +/*- XML Files -*/ + +xmlEncoding( + unique int id: @file ref, + string encoding: string ref +); + +xmlDTDs( + unique int id: @xmldtd, + string root: string ref, + string publicId: string ref, + string systemId: string ref, + int fileid: @file ref +); + +xmlElements( + unique int id: @xmlelement, + string name: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int fileid: @file ref +); + +xmlAttrs( + unique int id: @xmlattribute, + int elementid: @xmlelement ref, + string name: string ref, + string value: string ref, + int idx: int ref, + int fileid: @file ref +); + +xmlNs( + int id: @xmlnamespace, + string prefixName: string ref, + string URI: string ref, + int fileid: @file ref +); + +xmlHasNs( + int elementId: @xmlnamespaceable ref, + int nsId: @xmlnamespace ref, + int fileid: @file ref +); + +xmlComments( + unique int id: @xmlcomment, + string text: string ref, + int parentid: @xmlparent ref, + int fileid: @file ref +); + +xmlChars( + unique int id: @xmlcharacters, + string text: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int isCDATA: int ref, + int fileid: @file ref +); + +@xmlparent = @file | @xmlelement; +@xmlnamespaceable = @xmlelement | @xmlattribute; + +xmllocations( + int xmlElement: @xmllocatable ref, + int location: @location_default ref +); + +@xmllocatable = @xmlcharacters | @xmlelement | @xmlcomment | @xmlattribute | @xmldtd | @file | @xmlnamespace; + +/*- Configuration files with key value pairs -*/ + +configs( + unique int id: @config +); + +configNames( + unique int id: @configName, + int config: @config ref, + string name: string ref +); + +configValues( + unique int id: @configValue, + int config: @config ref, + string value: string ref +); + +configLocations( + int locatable: @configLocatable ref, + int location: @location_default ref +); + +@configLocatable = @config | @configName | @configValue; diff --git a/javascript/downgrades/5b5db607d20c7b449cef2d1c926b24d77c69bebb/semmlecode.javascript.dbscheme b/javascript/downgrades/5b5db607d20c7b449cef2d1c926b24d77c69bebb/semmlecode.javascript.dbscheme new file mode 100644 index 000000000000..c88c69174bd0 --- /dev/null +++ b/javascript/downgrades/5b5db607d20c7b449cef2d1c926b24d77c69bebb/semmlecode.javascript.dbscheme @@ -0,0 +1,1190 @@ +/*** Standard fragments ***/ + +/*- Files and folders -*/ + +/** + * The location of an element. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `file`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ +locations_default( + unique int id: @location_default, + int file: @file ref, + int beginLine: int ref, + int beginColumn: int ref, + int endLine: int ref, + int endColumn: int ref +); + +files( + unique int id: @file, + string name: string ref +); + +folders( + unique int id: @folder, + string name: string ref +); + +@container = @file | @folder + +containerparent( + int parent: @container ref, + unique int child: @container ref +); + +/*- Lines of code -*/ + +numlines( + int element_id: @sourceline ref, + int num_lines: int ref, + int num_code: int ref, + int num_comment: int ref +); + +/*- External data -*/ + +/** + * External data, loaded from CSV files during snapshot creation. See + * [Tutorial: Incorporating external data](https://help.semmle.com/wiki/display/SD/Tutorial%3A+Incorporating+external+data) + * for more information. + */ +externalData( + int id : @externalDataElement, + string path : string ref, + int column: int ref, + string value : string ref +); + +/*- Source location prefix -*/ + +/** + * The source location of the snapshot. + */ +sourceLocationPrefix(string prefix : string ref); + +/*- JavaScript-specific part -*/ + +@location = @location_default + +@sourceline = @locatable; + +filetype( + int file: @file ref, + string filetype: string ref +) + +// top-level code fragments +toplevels (unique int id: @toplevel, + int kind: int ref); + +is_externs (int toplevel: @toplevel ref); + +case @toplevel.kind of + 0 = @script +| 1 = @inline_script +| 2 = @event_handler +| 3 = @javascript_url +| 4 = @template_toplevel; + +is_module (int tl: @toplevel ref); +is_nodejs (int tl: @toplevel ref); +is_es2015_module (int tl: @toplevel ref); +is_closure_module (int tl: @toplevel ref); + +@xml_node_with_code = @xmlelement | @xmlattribute | @template_placeholder_tag; +toplevel_parent_xml_node( + unique int toplevel: @toplevel ref, + int xmlnode: @xml_node_with_code ref); + +xml_element_parent_expression( + unique int xmlnode: @xmlelement ref, + int expression: @expr ref, + int index: int ref); + +// statements +#keyset[parent, idx] +stmts (unique int id: @stmt, + int kind: int ref, + int parent: @stmt_parent ref, + int idx: int ref, + varchar(900) tostring: string ref); + +stmt_containers (unique int stmt: @stmt ref, + int container: @stmt_container ref); + +jump_targets (unique int jump: @stmt ref, + int target: @stmt ref); + +@stmt_parent = @stmt | @toplevel | @function_expr | @arrow_function_expr | @static_initializer; +@stmt_container = @toplevel | @function | @namespace_declaration | @external_module_declaration | @global_augmentation_declaration; + +case @stmt.kind of + 0 = @empty_stmt +| 1 = @block_stmt +| 2 = @expr_stmt +| 3 = @if_stmt +| 4 = @labeled_stmt +| 5 = @break_stmt +| 6 = @continue_stmt +| 7 = @with_stmt +| 8 = @switch_stmt +| 9 = @return_stmt +| 10 = @throw_stmt +| 11 = @try_stmt +| 12 = @while_stmt +| 13 = @do_while_stmt +| 14 = @for_stmt +| 15 = @for_in_stmt +| 16 = @debugger_stmt +| 17 = @function_decl_stmt +| 18 = @var_decl_stmt +| 19 = @case +| 20 = @catch_clause +| 21 = @for_of_stmt +| 22 = @const_decl_stmt +| 23 = @let_stmt +| 24 = @legacy_let_stmt +| 25 = @for_each_stmt +| 26 = @class_decl_stmt +| 27 = @import_declaration +| 28 = @export_all_declaration +| 29 = @export_default_declaration +| 30 = @export_named_declaration +| 31 = @namespace_declaration +| 32 = @import_equals_declaration +| 33 = @export_assign_declaration +| 34 = @interface_declaration +| 35 = @type_alias_declaration +| 36 = @enum_declaration +| 37 = @external_module_declaration +| 38 = @export_as_namespace_declaration +| 39 = @global_augmentation_declaration +| 40 = @using_decl_stmt +; + +@decl_stmt = @var_decl_stmt | @const_decl_stmt | @let_stmt | @legacy_let_stmt | @using_decl_stmt; + +@export_declaration = @export_all_declaration | @export_default_declaration | @export_named_declaration; + +@namespace_definition = @namespace_declaration | @enum_declaration; +@type_definition = @class_definition | @interface_declaration | @enum_declaration | @type_alias_declaration | @enum_member; + +is_instantiated(unique int decl: @namespace_declaration ref); + +@declarable_node = @decl_stmt | @namespace_declaration | @class_decl_stmt | @function_decl_stmt | @enum_declaration | @external_module_declaration | @global_augmentation_declaration | @field; +has_declare_keyword(unique int stmt: @declarable_node ref); + +is_for_await_of(unique int forof: @for_of_stmt ref); + +// expressions +#keyset[parent, idx] +exprs (unique int id: @expr, + int kind: int ref, + int parent: @expr_parent ref, + int idx: int ref, + varchar(900) tostring: string ref); + +literals (varchar(900) value: string ref, + varchar(900) raw: string ref, + unique int expr: @expr_or_type ref); + +enclosing_stmt (unique int expr: @expr_or_type ref, + int stmt: @stmt ref); + +expr_containers (unique int expr: @expr_or_type ref, + int container: @stmt_container ref); + +array_size (unique int ae: @arraylike ref, + int sz: int ref); + +is_delegating (int yield: @yield_expr ref); + +@expr_or_stmt = @expr | @stmt; +@expr_or_type = @expr | @typeexpr; +@expr_parent = @expr_or_stmt | @property | @function_typeexpr; +@arraylike = @array_expr | @array_pattern; +@type_annotation = @typeexpr | @jsdoc_type_expr; +@node_in_stmt_container = @cfg_node | @type_annotation | @toplevel; + +case @expr.kind of + 0 = @label +| 1 = @null_literal +| 2 = @boolean_literal +| 3 = @number_literal +| 4 = @string_literal +| 5 = @regexp_literal +| 6 = @this_expr +| 7 = @array_expr +| 8 = @obj_expr +| 9 = @function_expr +| 10 = @seq_expr +| 11 = @conditional_expr +| 12 = @new_expr +| 13 = @call_expr +| 14 = @dot_expr +| 15 = @index_expr +| 16 = @neg_expr +| 17 = @plus_expr +| 18 = @log_not_expr +| 19 = @bit_not_expr +| 20 = @typeof_expr +| 21 = @void_expr +| 22 = @delete_expr +| 23 = @eq_expr +| 24 = @neq_expr +| 25 = @eqq_expr +| 26 = @neqq_expr +| 27 = @lt_expr +| 28 = @le_expr +| 29 = @gt_expr +| 30 = @ge_expr +| 31 = @lshift_expr +| 32 = @rshift_expr +| 33 = @urshift_expr +| 34 = @add_expr +| 35 = @sub_expr +| 36 = @mul_expr +| 37 = @div_expr +| 38 = @mod_expr +| 39 = @bitor_expr +| 40 = @xor_expr +| 41 = @bitand_expr +| 42 = @in_expr +| 43 = @instanceof_expr +| 44 = @logand_expr +| 45 = @logor_expr +| 47 = @assign_expr +| 48 = @assign_add_expr +| 49 = @assign_sub_expr +| 50 = @assign_mul_expr +| 51 = @assign_div_expr +| 52 = @assign_mod_expr +| 53 = @assign_lshift_expr +| 54 = @assign_rshift_expr +| 55 = @assign_urshift_expr +| 56 = @assign_or_expr +| 57 = @assign_xor_expr +| 58 = @assign_and_expr +| 59 = @preinc_expr +| 60 = @postinc_expr +| 61 = @predec_expr +| 62 = @postdec_expr +| 63 = @par_expr +| 64 = @var_declarator +| 65 = @arrow_function_expr +| 66 = @spread_element +| 67 = @array_pattern +| 68 = @object_pattern +| 69 = @yield_expr +| 70 = @tagged_template_expr +| 71 = @template_literal +| 72 = @template_element +| 73 = @array_comprehension_expr +| 74 = @generator_expr +| 75 = @for_in_comprehension_block +| 76 = @for_of_comprehension_block +| 77 = @legacy_letexpr +| 78 = @var_decl +| 79 = @proper_varaccess +| 80 = @class_expr +| 81 = @super_expr +| 82 = @newtarget_expr +| 83 = @named_import_specifier +| 84 = @import_default_specifier +| 85 = @import_namespace_specifier +| 86 = @named_export_specifier +| 87 = @exp_expr +| 88 = @assign_exp_expr +| 89 = @jsx_element +| 90 = @jsx_qualified_name +| 91 = @jsx_empty_expr +| 92 = @await_expr +| 93 = @function_sent_expr +| 94 = @decorator +| 95 = @export_default_specifier +| 96 = @export_namespace_specifier +| 97 = @bind_expr +| 98 = @external_module_reference +| 99 = @dynamic_import +| 100 = @expression_with_type_arguments +| 101 = @prefix_type_assertion +| 102 = @as_type_assertion +| 103 = @export_varaccess +| 104 = @decorator_list +| 105 = @non_null_assertion +| 106 = @bigint_literal +| 107 = @nullishcoalescing_expr +| 108 = @e4x_xml_anyname +| 109 = @e4x_xml_static_attribute_selector +| 110 = @e4x_xml_dynamic_attribute_selector +| 111 = @e4x_xml_filter_expression +| 112 = @e4x_xml_static_qualident +| 113 = @e4x_xml_dynamic_qualident +| 114 = @e4x_xml_dotdotexpr +| 115 = @import_meta_expr +| 116 = @assignlogandexpr +| 117 = @assignlogorexpr +| 118 = @assignnullishcoalescingexpr +| 119 = @template_pipe_ref +| 120 = @generated_code_expr +| 121 = @satisfies_expr +; + +@varaccess = @proper_varaccess | @export_varaccess; +@varref = @var_decl | @varaccess; + +@identifier = @label | @varref | @type_identifier; + +@literal = @null_literal | @boolean_literal | @number_literal | @string_literal | @regexp_literal | @bigint_literal; + +@propaccess = @dot_expr | @index_expr; + +@invokeexpr = @new_expr | @call_expr; + +@unaryexpr = @neg_expr | @plus_expr | @log_not_expr | @bit_not_expr | @typeof_expr | @void_expr | @delete_expr | @spread_element; + +@equality_test = @eq_expr | @neq_expr | @eqq_expr | @neqq_expr; + +@comparison = @equality_test | @lt_expr | @le_expr | @gt_expr | @ge_expr; + +@binaryexpr = @comparison | @lshift_expr | @rshift_expr | @urshift_expr | @add_expr | @sub_expr | @mul_expr | @div_expr | @mod_expr | @exp_expr | @bitor_expr | @xor_expr | @bitand_expr | @in_expr | @instanceof_expr | @logand_expr | @logor_expr | @nullishcoalescing_expr; + +@assignment = @assign_expr | @assign_add_expr | @assign_sub_expr | @assign_mul_expr | @assign_div_expr | @assign_mod_expr | @assign_exp_expr | @assign_lshift_expr | @assign_rshift_expr | @assign_urshift_expr | @assign_or_expr | @assign_xor_expr | @assign_and_expr | @assignlogandexpr | @assignlogorexpr | @assignnullishcoalescingexpr; + +@updateexpr = @preinc_expr | @postinc_expr | @predec_expr | @postdec_expr; + +@pattern = @varref | @array_pattern | @object_pattern; + +@comprehension_expr = @array_comprehension_expr | @generator_expr; + +@comprehension_block = @for_in_comprehension_block | @for_of_comprehension_block; + +@import_specifier = @named_import_specifier | @import_default_specifier | @import_namespace_specifier; + +@exportspecifier = @named_export_specifier | @export_default_specifier | @export_namespace_specifier; + +@type_keyword_operand = @import_declaration | @export_declaration | @import_specifier; + +@type_assertion = @as_type_assertion | @prefix_type_assertion; + +@class_definition = @class_decl_stmt | @class_expr; +@interface_definition = @interface_declaration | @interface_typeexpr; +@class_or_interface = @class_definition | @interface_definition; + +@lexical_decl = @var_decl | @type_decl; +@lexical_access = @varaccess | @local_type_access | @local_var_type_access | @local_namespace_access; +@lexical_ref = @lexical_decl | @lexical_access; + +@e4x_xml_attribute_selector = @e4x_xml_static_attribute_selector | @e4x_xml_dynamic_attribute_selector; +@e4x_xml_qualident = @e4x_xml_static_qualident | @e4x_xml_dynamic_qualident; + +expr_contains_template_tag_location( + int expr: @expr ref, + int location: @location ref +); + +@template_placeholder_tag_parent = @xmlelement | @xmlattribute | @file; + +template_placeholder_tag_info( + unique int node: @template_placeholder_tag, + int parentNode: @template_placeholder_tag_parent ref, + varchar(900) raw: string ref +); + +// scopes +scopes (unique int id: @scope, + int kind: int ref); + +case @scope.kind of + 0 = @global_scope +| 1 = @function_scope +| 2 = @catch_scope +| 3 = @module_scope +| 4 = @block_scope +| 5 = @for_scope +| 6 = @for_in_scope // for-of scopes work the same as for-in scopes +| 7 = @comprehension_block_scope +| 8 = @class_expr_scope +| 9 = @namespace_scope +| 10 = @class_decl_scope +| 11 = @interface_scope +| 12 = @type_alias_scope +| 13 = @mapped_type_scope +| 14 = @enum_scope +| 15 = @external_module_scope +| 16 = @conditional_type_scope; + +scopenodes (unique int node: @ast_node ref, + int scope: @scope ref); + +scopenesting (unique int inner: @scope ref, + int outer: @scope ref); + +// functions +@function = @function_decl_stmt | @function_expr | @arrow_function_expr; + +@parameterized = @function | @catch_clause; +@type_parameterized = @function | @class_or_interface | @type_alias_declaration | @mapped_typeexpr | @infer_typeexpr; + +is_generator (int fun: @function ref); +has_rest_parameter (int fun: @function ref); +is_async (int fun: @function ref); + +// variables and lexically scoped type names +#keyset[scope, name] +variables (unique int id: @variable, + varchar(900) name: string ref, + int scope: @scope ref); + +#keyset[scope, name] +local_type_names (unique int id: @local_type_name, + varchar(900) name: string ref, + int scope: @scope ref); + +#keyset[scope, name] +local_namespace_names (unique int id: @local_namespace_name, + varchar(900) name: string ref, + int scope: @scope ref); + +is_arguments_object (int id: @variable ref); + +@lexical_name = @variable | @local_type_name | @local_namespace_name; + +@bind_id = @varaccess | @local_var_type_access; +bind (unique int id: @bind_id ref, + int decl: @variable ref); + +decl (unique int id: @var_decl ref, + int decl: @variable ref); + +@typebind_id = @local_type_access | @export_varaccess; +typebind (unique int id: @typebind_id ref, + int decl: @local_type_name ref); + +@typedecl_id = @type_decl | @var_decl; +typedecl (unique int id: @typedecl_id ref, + int decl: @local_type_name ref); + +namespacedecl (unique int id: @var_decl ref, + int decl: @local_namespace_name ref); + +@namespacebind_id = @local_namespace_access | @export_varaccess; +namespacebind (unique int id: @namespacebind_id ref, + int decl: @local_namespace_name ref); + + +// properties in object literals, property patterns in object patterns, and method declarations in classes +#keyset[parent, index] +properties (unique int id: @property, + int parent: @property_parent ref, + int index: int ref, + int kind: int ref, + varchar(900) tostring: string ref); + +case @property.kind of + 0 = @value_property +| 1 = @property_getter +| 2 = @property_setter +| 3 = @jsx_attribute +| 4 = @function_call_signature +| 5 = @constructor_call_signature +| 6 = @index_signature +| 7 = @enum_member +| 8 = @proper_field +| 9 = @parameter_field +| 10 = @static_initializer +; + +@property_parent = @obj_expr | @object_pattern | @class_definition | @jsx_element | @interface_definition | @enum_declaration; +@property_accessor = @property_getter | @property_setter; +@call_signature = @function_call_signature | @constructor_call_signature; +@field = @proper_field | @parameter_field; +@field_or_vardeclarator = @field | @var_declarator; + +is_computed (int id: @property ref); +is_method (int id: @property ref); +is_static (int id: @property ref); +is_abstract_member (int id: @property ref); +is_const_enum (int id: @enum_declaration ref); +is_abstract_class (int id: @class_decl_stmt ref); + +has_public_keyword (int id: @property ref); +has_private_keyword (int id: @property ref); +has_protected_keyword (int id: @property ref); +has_readonly_keyword (int id: @property ref); +has_type_keyword (int id: @type_keyword_operand ref); +is_optional_member (int id: @property ref); +has_definite_assignment_assertion (int id: @field_or_vardeclarator ref); +is_optional_parameter_declaration (unique int parameter: @pattern ref); + +#keyset[constructor, param_index] +parameter_fields( + unique int field: @parameter_field ref, + int constructor: @function_expr ref, + int param_index: int ref +); + +// types +#keyset[parent, idx] +typeexprs ( + unique int id: @typeexpr, + int kind: int ref, + int parent: @typeexpr_parent ref, + int idx: int ref, + varchar(900) tostring: string ref +); + +case @typeexpr.kind of + 0 = @local_type_access +| 1 = @type_decl +| 2 = @keyword_typeexpr +| 3 = @string_literal_typeexpr +| 4 = @number_literal_typeexpr +| 5 = @boolean_literal_typeexpr +| 6 = @array_typeexpr +| 7 = @union_typeexpr +| 8 = @indexed_access_typeexpr +| 9 = @intersection_typeexpr +| 10 = @parenthesized_typeexpr +| 11 = @tuple_typeexpr +| 12 = @keyof_typeexpr +| 13 = @qualified_type_access +| 14 = @generic_typeexpr +| 15 = @type_label +| 16 = @typeof_typeexpr +| 17 = @local_var_type_access +| 18 = @qualified_var_type_access +| 19 = @this_var_type_access +| 20 = @predicate_typeexpr +| 21 = @interface_typeexpr +| 22 = @type_parameter +| 23 = @plain_function_typeexpr +| 24 = @constructor_typeexpr +| 25 = @local_namespace_access +| 26 = @qualified_namespace_access +| 27 = @mapped_typeexpr +| 28 = @conditional_typeexpr +| 29 = @infer_typeexpr +| 30 = @import_type_access +| 31 = @import_namespace_access +| 32 = @import_var_type_access +| 33 = @optional_typeexpr +| 34 = @rest_typeexpr +| 35 = @bigint_literal_typeexpr +| 36 = @readonly_typeexpr +| 37 = @template_literal_typeexpr +; + +@typeref = @typeaccess | @type_decl; +@type_identifier = @type_decl | @local_type_access | @type_label | @local_var_type_access | @local_namespace_access; +@typeexpr_parent = @expr | @stmt | @property | @typeexpr; +@literal_typeexpr = @string_literal_typeexpr | @number_literal_typeexpr | @boolean_literal_typeexpr | @bigint_literal_typeexpr; +@typeaccess = @local_type_access | @qualified_type_access | @import_type_access; +@vartypeaccess = @local_var_type_access | @qualified_var_type_access | @this_var_type_access | @import_var_type_access; +@namespace_access = @local_namespace_access | @qualified_namespace_access | @import_namespace_access; +@import_typeexpr = @import_type_access | @import_namespace_access | @import_var_type_access; + +@function_typeexpr = @plain_function_typeexpr | @constructor_typeexpr; + +// types +types ( + unique int id: @type, + int kind: int ref, + varchar(900) tostring: string ref +); + +#keyset[parent, idx] +type_child ( + int child: @type ref, + int parent: @type ref, + int idx: int ref +); + +case @type.kind of + 0 = @any_type +| 1 = @string_type +| 2 = @number_type +| 3 = @union_type +| 4 = @true_type +| 5 = @false_type +| 6 = @type_reference +| 7 = @object_type +| 8 = @canonical_type_variable_type +| 9 = @typeof_type +| 10 = @void_type +| 11 = @undefined_type +| 12 = @null_type +| 13 = @never_type +| 14 = @plain_symbol_type +| 15 = @unique_symbol_type +| 16 = @objectkeyword_type +| 17 = @intersection_type +| 18 = @tuple_type +| 19 = @lexical_type_variable_type +| 20 = @this_type +| 21 = @number_literal_type +| 22 = @string_literal_type +| 23 = @unknown_type +| 24 = @bigint_type +| 25 = @bigint_literal_type +; + +@boolean_literal_type = @true_type | @false_type; +@symbol_type = @plain_symbol_type | @unique_symbol_type; +@union_or_intersection_type = @union_type | @intersection_type; +@typevariable_type = @canonical_type_variable_type | @lexical_type_variable_type; + +has_asserts_keyword(int node: @predicate_typeexpr ref); + +@typed_ast_node = @expr | @typeexpr | @function; +ast_node_type( + unique int node: @typed_ast_node ref, + int typ: @type ref); + +declared_function_signature( + unique int node: @function ref, + int sig: @signature_type ref +); + +invoke_expr_signature( + unique int node: @invokeexpr ref, + int sig: @signature_type ref +); + +invoke_expr_overload_index( + unique int node: @invokeexpr ref, + int index: int ref +); + +symbols ( + unique int id: @symbol, + int kind: int ref, + varchar(900) name: string ref +); + +symbol_parent ( + unique int symbol: @symbol ref, + int parent: @symbol ref +); + +symbol_module ( + int symbol: @symbol ref, + varchar(900) moduleName: string ref +); + +symbol_global ( + int symbol: @symbol ref, + varchar(900) globalName: string ref +); + +case @symbol.kind of + 0 = @root_symbol +| 1 = @member_symbol +| 2 = @other_symbol +; + +@type_with_symbol = @type_reference | @typevariable_type | @typeof_type | @unique_symbol_type; +@ast_node_with_symbol = @type_definition | @namespace_definition | @toplevel | @typeaccess | @namespace_access | @var_decl | @function | @invokeexpr | @import_declaration | @external_module_reference | @external_module_declaration; + +ast_node_symbol( + unique int node: @ast_node_with_symbol ref, + int symbol: @symbol ref); + +type_symbol( + unique int typ: @type_with_symbol ref, + int symbol: @symbol ref); + +#keyset[typ, name] +type_property( + int typ: @type ref, + varchar(900) name: string ref, + int propertyType: @type ref); + +type_alias( + unique int aliasType: @type ref, + int underlyingType: @type ref); + +@literal_type = @string_literal_type | @number_literal_type | @boolean_literal_type | @bigint_literal_type; +@type_with_literal_value = @string_literal_type | @number_literal_type | @bigint_literal_type; +type_literal_value( + unique int typ: @type_with_literal_value ref, + varchar(900) value: string ref); + +signature_types ( + unique int id: @signature_type, + int kind: int ref, + varchar(900) tostring: string ref, + int type_parameters: int ref, + int required_params: int ref +); + +is_abstract_signature( + unique int sig: @signature_type ref +); + +signature_rest_parameter( + unique int sig: @signature_type ref, + int rest_param_arra_type: @type ref +); + +case @signature_type.kind of + 0 = @function_signature_type +| 1 = @constructor_signature_type +; + +#keyset[typ, kind, index] +type_contains_signature ( + int typ: @type ref, + int kind: int ref, // constructor/call/index + int index: int ref, // ordering of overloaded signatures + int sig: @signature_type ref +); + +#keyset[parent, index] +signature_contains_type ( + int child: @type ref, + int parent: @signature_type ref, + int index: int ref +); + +#keyset[sig, index] +signature_parameter_name ( + int sig: @signature_type ref, + int index: int ref, + varchar(900) name: string ref +); + +number_index_type ( + unique int baseType: @type ref, + int propertyType: @type ref +); + +string_index_type ( + unique int baseType: @type ref, + int propertyType: @type ref +); + +base_type_names( + int typeName: @symbol ref, + int baseTypeName: @symbol ref +); + +self_types( + int typeName: @symbol ref, + int selfType: @type_reference ref +); + +tuple_type_min_length( + unique int typ: @type ref, + int minLength: int ref +); + +tuple_type_rest_index( + unique int typ: @type ref, + int index: int ref +); + +// comments +comments (unique int id: @comment, + int kind: int ref, + int toplevel: @toplevel ref, + varchar(900) text: string ref, + varchar(900) tostring: string ref); + +case @comment.kind of + 0 = @slashslash_comment +| 1 = @slashstar_comment +| 2 = @doc_comment +| 3 = @html_comment_start +| 4 = @htmlcommentend; + +@html_comment = @html_comment_start | @htmlcommentend; +@line_comment = @slashslash_comment | @html_comment; +@block_comment = @slashstar_comment | @doc_comment; + +// source lines +lines (unique int id: @line, + int toplevel: @toplevel ref, + varchar(900) text: string ref, + varchar(2) terminator: string ref); +indentation (int file: @file ref, + int lineno: int ref, + varchar(1) indentChar: string ref, + int indentDepth: int ref); + +// JavaScript parse errors +js_parse_errors (unique int id: @js_parse_error, + int toplevel: @toplevel ref, + varchar(900) message: string ref, + varchar(900) line: string ref); + +// regular expressions +#keyset[parent, idx] +regexpterm (unique int id: @regexpterm, + int kind: int ref, + int parent: @regexpparent ref, + int idx: int ref, + varchar(900) tostring: string ref); + +@regexpparent = @regexpterm | @regexp_literal | @string_literal | @add_expr; + +case @regexpterm.kind of + 0 = @regexp_alt +| 1 = @regexp_seq +| 2 = @regexp_caret +| 3 = @regexp_dollar +| 4 = @regexp_wordboundary +| 5 = @regexp_nonwordboundary +| 6 = @regexp_positive_lookahead +| 7 = @regexp_negative_lookahead +| 8 = @regexp_star +| 9 = @regexp_plus +| 10 = @regexp_opt +| 11 = @regexp_range +| 12 = @regexp_dot +| 13 = @regexp_group +| 14 = @regexp_normal_constant +| 15 = @regexp_hex_escape +| 16 = @regexp_unicode_escape +| 17 = @regexp_dec_escape +| 18 = @regexp_oct_escape +| 19 = @regexp_ctrl_escape +| 20 = @regexp_char_class_escape +| 21 = @regexp_id_escape +| 22 = @regexp_backref +| 23 = @regexp_char_class +| 24 = @regexp_char_range +| 25 = @regexp_positive_lookbehind +| 26 = @regexp_negative_lookbehind +| 27 = @regexp_unicode_property_escape; + +regexp_parse_errors (unique int id: @regexp_parse_error, + int regexp: @regexpterm ref, + varchar(900) message: string ref); + +@regexp_quantifier = @regexp_star | @regexp_plus | @regexp_opt | @regexp_range; +@regexp_escape = @regexp_char_escape | @regexp_char_class_escape | @regexp_unicode_property_escape; +@regexp_char_escape = @regexp_hex_escape | @regexp_unicode_escape | @regexp_dec_escape | @regexp_oct_escape | @regexp_ctrl_escape | @regexp_id_escape; +@regexp_constant = @regexp_normal_constant | @regexp_char_escape; +@regexp_lookahead = @regexp_positive_lookahead | @regexp_negative_lookahead; +@regexp_lookbehind = @regexp_positive_lookbehind | @regexp_negative_lookbehind; +@regexp_subpattern = @regexp_lookahead | @regexp_lookbehind; +@regexp_anchor = @regexp_dollar | @regexp_caret; + +is_greedy (int id: @regexp_quantifier ref); +range_quantifier_lower_bound (unique int id: @regexp_range ref, int lo: int ref); +range_quantifier_upper_bound (unique int id: @regexp_range ref, int hi: int ref); +is_capture (unique int id: @regexp_group ref, int number: int ref); +is_named_capture (unique int id: @regexp_group ref, string name: string ref); +is_inverted (int id: @regexp_char_class ref); +regexp_const_value (unique int id: @regexp_constant ref, varchar(1) value: string ref); +char_class_escape (unique int id: @regexp_char_class_escape ref, varchar(1) value: string ref); +backref (unique int id: @regexp_backref ref, int value: int ref); +named_backref (unique int id: @regexp_backref ref, string name: string ref); +unicode_property_escapename (unique int id: @regexp_unicode_property_escape ref, string name: string ref); +unicode_property_escapevalue (unique int id: @regexp_unicode_property_escape ref, string value: string ref); + +// tokens +#keyset[toplevel, idx] +tokeninfo (unique int id: @token, + int kind: int ref, + int toplevel: @toplevel ref, + int idx: int ref, + varchar(900) value: string ref); + +case @token.kind of + 0 = @token_eof +| 1 = @token_null_literal +| 2 = @token_boolean_literal +| 3 = @token_numeric_literal +| 4 = @token_string_literal +| 5 = @token_regular_expression +| 6 = @token_identifier +| 7 = @token_keyword +| 8 = @token_punctuator; + +// associate comments with the token immediately following them (which may be EOF) +next_token (int comment: @comment ref, int token: @token ref); + +// JSON +#keyset[parent, idx] +json (unique int id: @json_value, + int kind: int ref, + int parent: @json_parent ref, + int idx: int ref, + varchar(900) tostring: string ref); + +json_literals (varchar(900) value: string ref, + varchar(900) raw: string ref, + unique int expr: @json_value ref); + +json_properties (int obj: @json_object ref, + varchar(900) property: string ref, + int value: @json_value ref); + +json_errors (unique int id: @json_parse_error, + varchar(900) message: string ref); + +json_locations(unique int locatable: @json_locatable ref, + int location: @location_default ref); + +case @json_value.kind of + 0 = @json_null +| 1 = @json_boolean +| 2 = @json_number +| 3 = @json_string +| 4 = @json_array +| 5 = @json_object; + +@json_parent = @json_object | @json_array | @file; + +@json_locatable = @json_value | @json_parse_error; + +// locations +@ast_node = @toplevel | @stmt | @expr | @property | @typeexpr; + +@locatable = @file + | @ast_node + | @comment + | @line + | @js_parse_error | @regexp_parse_error + | @regexpterm + | @json_locatable + | @token + | @cfg_node + | @jsdoc | @jsdoc_type_expr | @jsdoc_tag + | @yaml_locatable + | @xmllocatable + | @configLocatable + | @template_placeholder_tag; + +hasLocation (unique int locatable: @locatable ref, + int location: @location ref); + +// CFG +entry_cfg_node (unique int id: @entry_node, int container: @stmt_container ref); +exit_cfg_node (unique int id: @exit_node, int container: @stmt_container ref); +guard_node (unique int id: @guard_node, int kind: int ref, int test: @expr ref); +case @guard_node.kind of + 0 = @falsy_guard +| 1 = @truthy_guard; +@condition_guard = @falsy_guard | @truthy_guard; + +@synthetic_cfg_node = @entry_node | @exit_node | @guard_node; +@cfg_node = @synthetic_cfg_node | @expr_parent; + +successor (int pred: @cfg_node ref, int succ: @cfg_node ref); + +// JSDoc comments +jsdoc (unique int id: @jsdoc, varchar(900) description: string ref, int comment: @comment ref); +#keyset[parent, idx] +jsdoc_tags (unique int id: @jsdoc_tag, varchar(900) title: string ref, + int parent: @jsdoc ref, int idx: int ref, varchar(900) tostring: string ref); +jsdoc_tag_descriptions (unique int tag: @jsdoc_tag ref, varchar(900) text: string ref); +jsdoc_tag_names (unique int tag: @jsdoc_tag ref, varchar(900) text: string ref); + +#keyset[parent, idx] +jsdoc_type_exprs (unique int id: @jsdoc_type_expr, + int kind: int ref, + int parent: @jsdoc_type_expr_parent ref, + int idx: int ref, + varchar(900) tostring: string ref); +case @jsdoc_type_expr.kind of + 0 = @jsdoc_any_type_expr +| 1 = @jsdoc_null_type_expr +| 2 = @jsdoc_undefined_type_expr +| 3 = @jsdoc_unknown_type_expr +| 4 = @jsdoc_void_type_expr +| 5 = @jsdoc_named_type_expr +| 6 = @jsdoc_applied_type_expr +| 7 = @jsdoc_nullable_type_expr +| 8 = @jsdoc_non_nullable_type_expr +| 9 = @jsdoc_record_type_expr +| 10 = @jsdoc_array_type_expr +| 11 = @jsdoc_union_type_expr +| 12 = @jsdoc_function_type_expr +| 13 = @jsdoc_optional_type_expr +| 14 = @jsdoc_rest_type_expr +; + +#keyset[id, idx] +jsdoc_record_field_name (int id: @jsdoc_record_type_expr ref, int idx: int ref, varchar(900) name: string ref); +jsdoc_prefix_qualifier (int id: @jsdoc_type_expr ref); +jsdoc_has_new_parameter (int fn: @jsdoc_function_type_expr ref); + +@jsdoc_type_expr_parent = @jsdoc_type_expr | @jsdoc_tag; + +jsdoc_errors (unique int id: @jsdoc_error, int tag: @jsdoc_tag ref, varchar(900) message: string ref, varchar(900) tostring: string ref); + +@dataflownode = @expr | @function_decl_stmt | @class_decl_stmt | @namespace_declaration | @enum_declaration | @property; + +@optionalchainable = @call_expr | @propaccess; + +isOptionalChaining(int id: @optionalchainable ref); + +/** + * The time taken for the extraction of a file. + * This table contains non-deterministic content. + * + * The sum of the `time` column for each (`file`, `timerKind`) pair + * is the total time taken for extraction of `file`. The `extractionPhase` + * column provides a granular view of the extraction time of the file. + */ +extraction_time( + int file : @file ref, + // see `com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase`. + int extractionPhase: int ref, + // 0 for the elapsed CPU time in nanoseconds, 1 for the elapsed wallclock time in nanoseconds + int timerKind: int ref, + float time: float ref +) + +/** +* Non-timing related data for the extraction of a single file. +* This table contains non-deterministic content. +*/ +extraction_data( + int file : @file ref, + // the absolute path to the cache file + varchar(900) cacheFile: string ref, + boolean fromCache: boolean ref, + int length: int ref +) + +/*- YAML -*/ + +#keyset[parent, idx] +yaml (unique int id: @yaml_node, + int kind: int ref, + int parent: @yaml_node_parent ref, + int idx: int ref, + string tag: string ref, + string tostring: string ref); + +case @yaml_node.kind of + 0 = @yaml_scalar_node +| 1 = @yaml_mapping_node +| 2 = @yaml_sequence_node +| 3 = @yaml_alias_node +; + +@yaml_collection_node = @yaml_mapping_node | @yaml_sequence_node; + +@yaml_node_parent = @yaml_collection_node | @file; + +yaml_anchors (unique int node: @yaml_node ref, + string anchor: string ref); + +yaml_aliases (unique int alias: @yaml_alias_node ref, + string target: string ref); + +yaml_scalars (unique int scalar: @yaml_scalar_node ref, + int style: int ref, + string value: string ref); + +yaml_errors (unique int id: @yaml_error, + string message: string ref); + +yaml_locations(unique int locatable: @yaml_locatable ref, + int location: @location_default ref); + +@yaml_locatable = @yaml_node | @yaml_error; + +/*- XML Files -*/ + +xmlEncoding( + unique int id: @file ref, + string encoding: string ref +); + +xmlDTDs( + unique int id: @xmldtd, + string root: string ref, + string publicId: string ref, + string systemId: string ref, + int fileid: @file ref +); + +xmlElements( + unique int id: @xmlelement, + string name: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int fileid: @file ref +); + +xmlAttrs( + unique int id: @xmlattribute, + int elementid: @xmlelement ref, + string name: string ref, + string value: string ref, + int idx: int ref, + int fileid: @file ref +); + +xmlNs( + int id: @xmlnamespace, + string prefixName: string ref, + string URI: string ref, + int fileid: @file ref +); + +xmlHasNs( + int elementId: @xmlnamespaceable ref, + int nsId: @xmlnamespace ref, + int fileid: @file ref +); + +xmlComments( + unique int id: @xmlcomment, + string text: string ref, + int parentid: @xmlparent ref, + int fileid: @file ref +); + +xmlChars( + unique int id: @xmlcharacters, + string text: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int isCDATA: int ref, + int fileid: @file ref +); + +@xmlparent = @file | @xmlelement; +@xmlnamespaceable = @xmlelement | @xmlattribute; + +xmllocations( + int xmlElement: @xmllocatable ref, + int location: @location_default ref +); + +@xmllocatable = @xmlcharacters | @xmlelement | @xmlcomment | @xmlattribute | @xmldtd | @file | @xmlnamespace; + +/*- Configuration files with key value pairs -*/ + +configs( + unique int id: @config +); + +configNames( + unique int id: @configName, + int config: @config ref, + string name: string ref +); + +configValues( + unique int id: @configValue, + int config: @config ref, + string value: string ref +); + +configLocations( + int locatable: @configLocatable ref, + int location: @location_default ref +); + +@configLocatable = @config | @configName | @configValue; diff --git a/javascript/downgrades/5b5db607d20c7b449cef2d1c926b24d77c69bebb/upgrade.properties b/javascript/downgrades/5b5db607d20c7b449cef2d1c926b24d77c69bebb/upgrade.properties new file mode 100644 index 000000000000..13f42c6a9d1b --- /dev/null +++ b/javascript/downgrades/5b5db607d20c7b449cef2d1c926b24d77c69bebb/upgrade.properties @@ -0,0 +1,2 @@ +description: Add support for quoted string, intersection and subtraction +compatibility: backwards diff --git a/javascript/ql/lib/upgrades/c88c69174bd0dd4e95f1bcfbada68a2505e812c3/old.dbscheme b/javascript/ql/lib/upgrades/c88c69174bd0dd4e95f1bcfbada68a2505e812c3/old.dbscheme new file mode 100644 index 000000000000..c88c69174bd0 --- /dev/null +++ b/javascript/ql/lib/upgrades/c88c69174bd0dd4e95f1bcfbada68a2505e812c3/old.dbscheme @@ -0,0 +1,1190 @@ +/*** Standard fragments ***/ + +/*- Files and folders -*/ + +/** + * The location of an element. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `file`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ +locations_default( + unique int id: @location_default, + int file: @file ref, + int beginLine: int ref, + int beginColumn: int ref, + int endLine: int ref, + int endColumn: int ref +); + +files( + unique int id: @file, + string name: string ref +); + +folders( + unique int id: @folder, + string name: string ref +); + +@container = @file | @folder + +containerparent( + int parent: @container ref, + unique int child: @container ref +); + +/*- Lines of code -*/ + +numlines( + int element_id: @sourceline ref, + int num_lines: int ref, + int num_code: int ref, + int num_comment: int ref +); + +/*- External data -*/ + +/** + * External data, loaded from CSV files during snapshot creation. See + * [Tutorial: Incorporating external data](https://help.semmle.com/wiki/display/SD/Tutorial%3A+Incorporating+external+data) + * for more information. + */ +externalData( + int id : @externalDataElement, + string path : string ref, + int column: int ref, + string value : string ref +); + +/*- Source location prefix -*/ + +/** + * The source location of the snapshot. + */ +sourceLocationPrefix(string prefix : string ref); + +/*- JavaScript-specific part -*/ + +@location = @location_default + +@sourceline = @locatable; + +filetype( + int file: @file ref, + string filetype: string ref +) + +// top-level code fragments +toplevels (unique int id: @toplevel, + int kind: int ref); + +is_externs (int toplevel: @toplevel ref); + +case @toplevel.kind of + 0 = @script +| 1 = @inline_script +| 2 = @event_handler +| 3 = @javascript_url +| 4 = @template_toplevel; + +is_module (int tl: @toplevel ref); +is_nodejs (int tl: @toplevel ref); +is_es2015_module (int tl: @toplevel ref); +is_closure_module (int tl: @toplevel ref); + +@xml_node_with_code = @xmlelement | @xmlattribute | @template_placeholder_tag; +toplevel_parent_xml_node( + unique int toplevel: @toplevel ref, + int xmlnode: @xml_node_with_code ref); + +xml_element_parent_expression( + unique int xmlnode: @xmlelement ref, + int expression: @expr ref, + int index: int ref); + +// statements +#keyset[parent, idx] +stmts (unique int id: @stmt, + int kind: int ref, + int parent: @stmt_parent ref, + int idx: int ref, + varchar(900) tostring: string ref); + +stmt_containers (unique int stmt: @stmt ref, + int container: @stmt_container ref); + +jump_targets (unique int jump: @stmt ref, + int target: @stmt ref); + +@stmt_parent = @stmt | @toplevel | @function_expr | @arrow_function_expr | @static_initializer; +@stmt_container = @toplevel | @function | @namespace_declaration | @external_module_declaration | @global_augmentation_declaration; + +case @stmt.kind of + 0 = @empty_stmt +| 1 = @block_stmt +| 2 = @expr_stmt +| 3 = @if_stmt +| 4 = @labeled_stmt +| 5 = @break_stmt +| 6 = @continue_stmt +| 7 = @with_stmt +| 8 = @switch_stmt +| 9 = @return_stmt +| 10 = @throw_stmt +| 11 = @try_stmt +| 12 = @while_stmt +| 13 = @do_while_stmt +| 14 = @for_stmt +| 15 = @for_in_stmt +| 16 = @debugger_stmt +| 17 = @function_decl_stmt +| 18 = @var_decl_stmt +| 19 = @case +| 20 = @catch_clause +| 21 = @for_of_stmt +| 22 = @const_decl_stmt +| 23 = @let_stmt +| 24 = @legacy_let_stmt +| 25 = @for_each_stmt +| 26 = @class_decl_stmt +| 27 = @import_declaration +| 28 = @export_all_declaration +| 29 = @export_default_declaration +| 30 = @export_named_declaration +| 31 = @namespace_declaration +| 32 = @import_equals_declaration +| 33 = @export_assign_declaration +| 34 = @interface_declaration +| 35 = @type_alias_declaration +| 36 = @enum_declaration +| 37 = @external_module_declaration +| 38 = @export_as_namespace_declaration +| 39 = @global_augmentation_declaration +| 40 = @using_decl_stmt +; + +@decl_stmt = @var_decl_stmt | @const_decl_stmt | @let_stmt | @legacy_let_stmt | @using_decl_stmt; + +@export_declaration = @export_all_declaration | @export_default_declaration | @export_named_declaration; + +@namespace_definition = @namespace_declaration | @enum_declaration; +@type_definition = @class_definition | @interface_declaration | @enum_declaration | @type_alias_declaration | @enum_member; + +is_instantiated(unique int decl: @namespace_declaration ref); + +@declarable_node = @decl_stmt | @namespace_declaration | @class_decl_stmt | @function_decl_stmt | @enum_declaration | @external_module_declaration | @global_augmentation_declaration | @field; +has_declare_keyword(unique int stmt: @declarable_node ref); + +is_for_await_of(unique int forof: @for_of_stmt ref); + +// expressions +#keyset[parent, idx] +exprs (unique int id: @expr, + int kind: int ref, + int parent: @expr_parent ref, + int idx: int ref, + varchar(900) tostring: string ref); + +literals (varchar(900) value: string ref, + varchar(900) raw: string ref, + unique int expr: @expr_or_type ref); + +enclosing_stmt (unique int expr: @expr_or_type ref, + int stmt: @stmt ref); + +expr_containers (unique int expr: @expr_or_type ref, + int container: @stmt_container ref); + +array_size (unique int ae: @arraylike ref, + int sz: int ref); + +is_delegating (int yield: @yield_expr ref); + +@expr_or_stmt = @expr | @stmt; +@expr_or_type = @expr | @typeexpr; +@expr_parent = @expr_or_stmt | @property | @function_typeexpr; +@arraylike = @array_expr | @array_pattern; +@type_annotation = @typeexpr | @jsdoc_type_expr; +@node_in_stmt_container = @cfg_node | @type_annotation | @toplevel; + +case @expr.kind of + 0 = @label +| 1 = @null_literal +| 2 = @boolean_literal +| 3 = @number_literal +| 4 = @string_literal +| 5 = @regexp_literal +| 6 = @this_expr +| 7 = @array_expr +| 8 = @obj_expr +| 9 = @function_expr +| 10 = @seq_expr +| 11 = @conditional_expr +| 12 = @new_expr +| 13 = @call_expr +| 14 = @dot_expr +| 15 = @index_expr +| 16 = @neg_expr +| 17 = @plus_expr +| 18 = @log_not_expr +| 19 = @bit_not_expr +| 20 = @typeof_expr +| 21 = @void_expr +| 22 = @delete_expr +| 23 = @eq_expr +| 24 = @neq_expr +| 25 = @eqq_expr +| 26 = @neqq_expr +| 27 = @lt_expr +| 28 = @le_expr +| 29 = @gt_expr +| 30 = @ge_expr +| 31 = @lshift_expr +| 32 = @rshift_expr +| 33 = @urshift_expr +| 34 = @add_expr +| 35 = @sub_expr +| 36 = @mul_expr +| 37 = @div_expr +| 38 = @mod_expr +| 39 = @bitor_expr +| 40 = @xor_expr +| 41 = @bitand_expr +| 42 = @in_expr +| 43 = @instanceof_expr +| 44 = @logand_expr +| 45 = @logor_expr +| 47 = @assign_expr +| 48 = @assign_add_expr +| 49 = @assign_sub_expr +| 50 = @assign_mul_expr +| 51 = @assign_div_expr +| 52 = @assign_mod_expr +| 53 = @assign_lshift_expr +| 54 = @assign_rshift_expr +| 55 = @assign_urshift_expr +| 56 = @assign_or_expr +| 57 = @assign_xor_expr +| 58 = @assign_and_expr +| 59 = @preinc_expr +| 60 = @postinc_expr +| 61 = @predec_expr +| 62 = @postdec_expr +| 63 = @par_expr +| 64 = @var_declarator +| 65 = @arrow_function_expr +| 66 = @spread_element +| 67 = @array_pattern +| 68 = @object_pattern +| 69 = @yield_expr +| 70 = @tagged_template_expr +| 71 = @template_literal +| 72 = @template_element +| 73 = @array_comprehension_expr +| 74 = @generator_expr +| 75 = @for_in_comprehension_block +| 76 = @for_of_comprehension_block +| 77 = @legacy_letexpr +| 78 = @var_decl +| 79 = @proper_varaccess +| 80 = @class_expr +| 81 = @super_expr +| 82 = @newtarget_expr +| 83 = @named_import_specifier +| 84 = @import_default_specifier +| 85 = @import_namespace_specifier +| 86 = @named_export_specifier +| 87 = @exp_expr +| 88 = @assign_exp_expr +| 89 = @jsx_element +| 90 = @jsx_qualified_name +| 91 = @jsx_empty_expr +| 92 = @await_expr +| 93 = @function_sent_expr +| 94 = @decorator +| 95 = @export_default_specifier +| 96 = @export_namespace_specifier +| 97 = @bind_expr +| 98 = @external_module_reference +| 99 = @dynamic_import +| 100 = @expression_with_type_arguments +| 101 = @prefix_type_assertion +| 102 = @as_type_assertion +| 103 = @export_varaccess +| 104 = @decorator_list +| 105 = @non_null_assertion +| 106 = @bigint_literal +| 107 = @nullishcoalescing_expr +| 108 = @e4x_xml_anyname +| 109 = @e4x_xml_static_attribute_selector +| 110 = @e4x_xml_dynamic_attribute_selector +| 111 = @e4x_xml_filter_expression +| 112 = @e4x_xml_static_qualident +| 113 = @e4x_xml_dynamic_qualident +| 114 = @e4x_xml_dotdotexpr +| 115 = @import_meta_expr +| 116 = @assignlogandexpr +| 117 = @assignlogorexpr +| 118 = @assignnullishcoalescingexpr +| 119 = @template_pipe_ref +| 120 = @generated_code_expr +| 121 = @satisfies_expr +; + +@varaccess = @proper_varaccess | @export_varaccess; +@varref = @var_decl | @varaccess; + +@identifier = @label | @varref | @type_identifier; + +@literal = @null_literal | @boolean_literal | @number_literal | @string_literal | @regexp_literal | @bigint_literal; + +@propaccess = @dot_expr | @index_expr; + +@invokeexpr = @new_expr | @call_expr; + +@unaryexpr = @neg_expr | @plus_expr | @log_not_expr | @bit_not_expr | @typeof_expr | @void_expr | @delete_expr | @spread_element; + +@equality_test = @eq_expr | @neq_expr | @eqq_expr | @neqq_expr; + +@comparison = @equality_test | @lt_expr | @le_expr | @gt_expr | @ge_expr; + +@binaryexpr = @comparison | @lshift_expr | @rshift_expr | @urshift_expr | @add_expr | @sub_expr | @mul_expr | @div_expr | @mod_expr | @exp_expr | @bitor_expr | @xor_expr | @bitand_expr | @in_expr | @instanceof_expr | @logand_expr | @logor_expr | @nullishcoalescing_expr; + +@assignment = @assign_expr | @assign_add_expr | @assign_sub_expr | @assign_mul_expr | @assign_div_expr | @assign_mod_expr | @assign_exp_expr | @assign_lshift_expr | @assign_rshift_expr | @assign_urshift_expr | @assign_or_expr | @assign_xor_expr | @assign_and_expr | @assignlogandexpr | @assignlogorexpr | @assignnullishcoalescingexpr; + +@updateexpr = @preinc_expr | @postinc_expr | @predec_expr | @postdec_expr; + +@pattern = @varref | @array_pattern | @object_pattern; + +@comprehension_expr = @array_comprehension_expr | @generator_expr; + +@comprehension_block = @for_in_comprehension_block | @for_of_comprehension_block; + +@import_specifier = @named_import_specifier | @import_default_specifier | @import_namespace_specifier; + +@exportspecifier = @named_export_specifier | @export_default_specifier | @export_namespace_specifier; + +@type_keyword_operand = @import_declaration | @export_declaration | @import_specifier; + +@type_assertion = @as_type_assertion | @prefix_type_assertion; + +@class_definition = @class_decl_stmt | @class_expr; +@interface_definition = @interface_declaration | @interface_typeexpr; +@class_or_interface = @class_definition | @interface_definition; + +@lexical_decl = @var_decl | @type_decl; +@lexical_access = @varaccess | @local_type_access | @local_var_type_access | @local_namespace_access; +@lexical_ref = @lexical_decl | @lexical_access; + +@e4x_xml_attribute_selector = @e4x_xml_static_attribute_selector | @e4x_xml_dynamic_attribute_selector; +@e4x_xml_qualident = @e4x_xml_static_qualident | @e4x_xml_dynamic_qualident; + +expr_contains_template_tag_location( + int expr: @expr ref, + int location: @location ref +); + +@template_placeholder_tag_parent = @xmlelement | @xmlattribute | @file; + +template_placeholder_tag_info( + unique int node: @template_placeholder_tag, + int parentNode: @template_placeholder_tag_parent ref, + varchar(900) raw: string ref +); + +// scopes +scopes (unique int id: @scope, + int kind: int ref); + +case @scope.kind of + 0 = @global_scope +| 1 = @function_scope +| 2 = @catch_scope +| 3 = @module_scope +| 4 = @block_scope +| 5 = @for_scope +| 6 = @for_in_scope // for-of scopes work the same as for-in scopes +| 7 = @comprehension_block_scope +| 8 = @class_expr_scope +| 9 = @namespace_scope +| 10 = @class_decl_scope +| 11 = @interface_scope +| 12 = @type_alias_scope +| 13 = @mapped_type_scope +| 14 = @enum_scope +| 15 = @external_module_scope +| 16 = @conditional_type_scope; + +scopenodes (unique int node: @ast_node ref, + int scope: @scope ref); + +scopenesting (unique int inner: @scope ref, + int outer: @scope ref); + +// functions +@function = @function_decl_stmt | @function_expr | @arrow_function_expr; + +@parameterized = @function | @catch_clause; +@type_parameterized = @function | @class_or_interface | @type_alias_declaration | @mapped_typeexpr | @infer_typeexpr; + +is_generator (int fun: @function ref); +has_rest_parameter (int fun: @function ref); +is_async (int fun: @function ref); + +// variables and lexically scoped type names +#keyset[scope, name] +variables (unique int id: @variable, + varchar(900) name: string ref, + int scope: @scope ref); + +#keyset[scope, name] +local_type_names (unique int id: @local_type_name, + varchar(900) name: string ref, + int scope: @scope ref); + +#keyset[scope, name] +local_namespace_names (unique int id: @local_namespace_name, + varchar(900) name: string ref, + int scope: @scope ref); + +is_arguments_object (int id: @variable ref); + +@lexical_name = @variable | @local_type_name | @local_namespace_name; + +@bind_id = @varaccess | @local_var_type_access; +bind (unique int id: @bind_id ref, + int decl: @variable ref); + +decl (unique int id: @var_decl ref, + int decl: @variable ref); + +@typebind_id = @local_type_access | @export_varaccess; +typebind (unique int id: @typebind_id ref, + int decl: @local_type_name ref); + +@typedecl_id = @type_decl | @var_decl; +typedecl (unique int id: @typedecl_id ref, + int decl: @local_type_name ref); + +namespacedecl (unique int id: @var_decl ref, + int decl: @local_namespace_name ref); + +@namespacebind_id = @local_namespace_access | @export_varaccess; +namespacebind (unique int id: @namespacebind_id ref, + int decl: @local_namespace_name ref); + + +// properties in object literals, property patterns in object patterns, and method declarations in classes +#keyset[parent, index] +properties (unique int id: @property, + int parent: @property_parent ref, + int index: int ref, + int kind: int ref, + varchar(900) tostring: string ref); + +case @property.kind of + 0 = @value_property +| 1 = @property_getter +| 2 = @property_setter +| 3 = @jsx_attribute +| 4 = @function_call_signature +| 5 = @constructor_call_signature +| 6 = @index_signature +| 7 = @enum_member +| 8 = @proper_field +| 9 = @parameter_field +| 10 = @static_initializer +; + +@property_parent = @obj_expr | @object_pattern | @class_definition | @jsx_element | @interface_definition | @enum_declaration; +@property_accessor = @property_getter | @property_setter; +@call_signature = @function_call_signature | @constructor_call_signature; +@field = @proper_field | @parameter_field; +@field_or_vardeclarator = @field | @var_declarator; + +is_computed (int id: @property ref); +is_method (int id: @property ref); +is_static (int id: @property ref); +is_abstract_member (int id: @property ref); +is_const_enum (int id: @enum_declaration ref); +is_abstract_class (int id: @class_decl_stmt ref); + +has_public_keyword (int id: @property ref); +has_private_keyword (int id: @property ref); +has_protected_keyword (int id: @property ref); +has_readonly_keyword (int id: @property ref); +has_type_keyword (int id: @type_keyword_operand ref); +is_optional_member (int id: @property ref); +has_definite_assignment_assertion (int id: @field_or_vardeclarator ref); +is_optional_parameter_declaration (unique int parameter: @pattern ref); + +#keyset[constructor, param_index] +parameter_fields( + unique int field: @parameter_field ref, + int constructor: @function_expr ref, + int param_index: int ref +); + +// types +#keyset[parent, idx] +typeexprs ( + unique int id: @typeexpr, + int kind: int ref, + int parent: @typeexpr_parent ref, + int idx: int ref, + varchar(900) tostring: string ref +); + +case @typeexpr.kind of + 0 = @local_type_access +| 1 = @type_decl +| 2 = @keyword_typeexpr +| 3 = @string_literal_typeexpr +| 4 = @number_literal_typeexpr +| 5 = @boolean_literal_typeexpr +| 6 = @array_typeexpr +| 7 = @union_typeexpr +| 8 = @indexed_access_typeexpr +| 9 = @intersection_typeexpr +| 10 = @parenthesized_typeexpr +| 11 = @tuple_typeexpr +| 12 = @keyof_typeexpr +| 13 = @qualified_type_access +| 14 = @generic_typeexpr +| 15 = @type_label +| 16 = @typeof_typeexpr +| 17 = @local_var_type_access +| 18 = @qualified_var_type_access +| 19 = @this_var_type_access +| 20 = @predicate_typeexpr +| 21 = @interface_typeexpr +| 22 = @type_parameter +| 23 = @plain_function_typeexpr +| 24 = @constructor_typeexpr +| 25 = @local_namespace_access +| 26 = @qualified_namespace_access +| 27 = @mapped_typeexpr +| 28 = @conditional_typeexpr +| 29 = @infer_typeexpr +| 30 = @import_type_access +| 31 = @import_namespace_access +| 32 = @import_var_type_access +| 33 = @optional_typeexpr +| 34 = @rest_typeexpr +| 35 = @bigint_literal_typeexpr +| 36 = @readonly_typeexpr +| 37 = @template_literal_typeexpr +; + +@typeref = @typeaccess | @type_decl; +@type_identifier = @type_decl | @local_type_access | @type_label | @local_var_type_access | @local_namespace_access; +@typeexpr_parent = @expr | @stmt | @property | @typeexpr; +@literal_typeexpr = @string_literal_typeexpr | @number_literal_typeexpr | @boolean_literal_typeexpr | @bigint_literal_typeexpr; +@typeaccess = @local_type_access | @qualified_type_access | @import_type_access; +@vartypeaccess = @local_var_type_access | @qualified_var_type_access | @this_var_type_access | @import_var_type_access; +@namespace_access = @local_namespace_access | @qualified_namespace_access | @import_namespace_access; +@import_typeexpr = @import_type_access | @import_namespace_access | @import_var_type_access; + +@function_typeexpr = @plain_function_typeexpr | @constructor_typeexpr; + +// types +types ( + unique int id: @type, + int kind: int ref, + varchar(900) tostring: string ref +); + +#keyset[parent, idx] +type_child ( + int child: @type ref, + int parent: @type ref, + int idx: int ref +); + +case @type.kind of + 0 = @any_type +| 1 = @string_type +| 2 = @number_type +| 3 = @union_type +| 4 = @true_type +| 5 = @false_type +| 6 = @type_reference +| 7 = @object_type +| 8 = @canonical_type_variable_type +| 9 = @typeof_type +| 10 = @void_type +| 11 = @undefined_type +| 12 = @null_type +| 13 = @never_type +| 14 = @plain_symbol_type +| 15 = @unique_symbol_type +| 16 = @objectkeyword_type +| 17 = @intersection_type +| 18 = @tuple_type +| 19 = @lexical_type_variable_type +| 20 = @this_type +| 21 = @number_literal_type +| 22 = @string_literal_type +| 23 = @unknown_type +| 24 = @bigint_type +| 25 = @bigint_literal_type +; + +@boolean_literal_type = @true_type | @false_type; +@symbol_type = @plain_symbol_type | @unique_symbol_type; +@union_or_intersection_type = @union_type | @intersection_type; +@typevariable_type = @canonical_type_variable_type | @lexical_type_variable_type; + +has_asserts_keyword(int node: @predicate_typeexpr ref); + +@typed_ast_node = @expr | @typeexpr | @function; +ast_node_type( + unique int node: @typed_ast_node ref, + int typ: @type ref); + +declared_function_signature( + unique int node: @function ref, + int sig: @signature_type ref +); + +invoke_expr_signature( + unique int node: @invokeexpr ref, + int sig: @signature_type ref +); + +invoke_expr_overload_index( + unique int node: @invokeexpr ref, + int index: int ref +); + +symbols ( + unique int id: @symbol, + int kind: int ref, + varchar(900) name: string ref +); + +symbol_parent ( + unique int symbol: @symbol ref, + int parent: @symbol ref +); + +symbol_module ( + int symbol: @symbol ref, + varchar(900) moduleName: string ref +); + +symbol_global ( + int symbol: @symbol ref, + varchar(900) globalName: string ref +); + +case @symbol.kind of + 0 = @root_symbol +| 1 = @member_symbol +| 2 = @other_symbol +; + +@type_with_symbol = @type_reference | @typevariable_type | @typeof_type | @unique_symbol_type; +@ast_node_with_symbol = @type_definition | @namespace_definition | @toplevel | @typeaccess | @namespace_access | @var_decl | @function | @invokeexpr | @import_declaration | @external_module_reference | @external_module_declaration; + +ast_node_symbol( + unique int node: @ast_node_with_symbol ref, + int symbol: @symbol ref); + +type_symbol( + unique int typ: @type_with_symbol ref, + int symbol: @symbol ref); + +#keyset[typ, name] +type_property( + int typ: @type ref, + varchar(900) name: string ref, + int propertyType: @type ref); + +type_alias( + unique int aliasType: @type ref, + int underlyingType: @type ref); + +@literal_type = @string_literal_type | @number_literal_type | @boolean_literal_type | @bigint_literal_type; +@type_with_literal_value = @string_literal_type | @number_literal_type | @bigint_literal_type; +type_literal_value( + unique int typ: @type_with_literal_value ref, + varchar(900) value: string ref); + +signature_types ( + unique int id: @signature_type, + int kind: int ref, + varchar(900) tostring: string ref, + int type_parameters: int ref, + int required_params: int ref +); + +is_abstract_signature( + unique int sig: @signature_type ref +); + +signature_rest_parameter( + unique int sig: @signature_type ref, + int rest_param_arra_type: @type ref +); + +case @signature_type.kind of + 0 = @function_signature_type +| 1 = @constructor_signature_type +; + +#keyset[typ, kind, index] +type_contains_signature ( + int typ: @type ref, + int kind: int ref, // constructor/call/index + int index: int ref, // ordering of overloaded signatures + int sig: @signature_type ref +); + +#keyset[parent, index] +signature_contains_type ( + int child: @type ref, + int parent: @signature_type ref, + int index: int ref +); + +#keyset[sig, index] +signature_parameter_name ( + int sig: @signature_type ref, + int index: int ref, + varchar(900) name: string ref +); + +number_index_type ( + unique int baseType: @type ref, + int propertyType: @type ref +); + +string_index_type ( + unique int baseType: @type ref, + int propertyType: @type ref +); + +base_type_names( + int typeName: @symbol ref, + int baseTypeName: @symbol ref +); + +self_types( + int typeName: @symbol ref, + int selfType: @type_reference ref +); + +tuple_type_min_length( + unique int typ: @type ref, + int minLength: int ref +); + +tuple_type_rest_index( + unique int typ: @type ref, + int index: int ref +); + +// comments +comments (unique int id: @comment, + int kind: int ref, + int toplevel: @toplevel ref, + varchar(900) text: string ref, + varchar(900) tostring: string ref); + +case @comment.kind of + 0 = @slashslash_comment +| 1 = @slashstar_comment +| 2 = @doc_comment +| 3 = @html_comment_start +| 4 = @htmlcommentend; + +@html_comment = @html_comment_start | @htmlcommentend; +@line_comment = @slashslash_comment | @html_comment; +@block_comment = @slashstar_comment | @doc_comment; + +// source lines +lines (unique int id: @line, + int toplevel: @toplevel ref, + varchar(900) text: string ref, + varchar(2) terminator: string ref); +indentation (int file: @file ref, + int lineno: int ref, + varchar(1) indentChar: string ref, + int indentDepth: int ref); + +// JavaScript parse errors +js_parse_errors (unique int id: @js_parse_error, + int toplevel: @toplevel ref, + varchar(900) message: string ref, + varchar(900) line: string ref); + +// regular expressions +#keyset[parent, idx] +regexpterm (unique int id: @regexpterm, + int kind: int ref, + int parent: @regexpparent ref, + int idx: int ref, + varchar(900) tostring: string ref); + +@regexpparent = @regexpterm | @regexp_literal | @string_literal | @add_expr; + +case @regexpterm.kind of + 0 = @regexp_alt +| 1 = @regexp_seq +| 2 = @regexp_caret +| 3 = @regexp_dollar +| 4 = @regexp_wordboundary +| 5 = @regexp_nonwordboundary +| 6 = @regexp_positive_lookahead +| 7 = @regexp_negative_lookahead +| 8 = @regexp_star +| 9 = @regexp_plus +| 10 = @regexp_opt +| 11 = @regexp_range +| 12 = @regexp_dot +| 13 = @regexp_group +| 14 = @regexp_normal_constant +| 15 = @regexp_hex_escape +| 16 = @regexp_unicode_escape +| 17 = @regexp_dec_escape +| 18 = @regexp_oct_escape +| 19 = @regexp_ctrl_escape +| 20 = @regexp_char_class_escape +| 21 = @regexp_id_escape +| 22 = @regexp_backref +| 23 = @regexp_char_class +| 24 = @regexp_char_range +| 25 = @regexp_positive_lookbehind +| 26 = @regexp_negative_lookbehind +| 27 = @regexp_unicode_property_escape; + +regexp_parse_errors (unique int id: @regexp_parse_error, + int regexp: @regexpterm ref, + varchar(900) message: string ref); + +@regexp_quantifier = @regexp_star | @regexp_plus | @regexp_opt | @regexp_range; +@regexp_escape = @regexp_char_escape | @regexp_char_class_escape | @regexp_unicode_property_escape; +@regexp_char_escape = @regexp_hex_escape | @regexp_unicode_escape | @regexp_dec_escape | @regexp_oct_escape | @regexp_ctrl_escape | @regexp_id_escape; +@regexp_constant = @regexp_normal_constant | @regexp_char_escape; +@regexp_lookahead = @regexp_positive_lookahead | @regexp_negative_lookahead; +@regexp_lookbehind = @regexp_positive_lookbehind | @regexp_negative_lookbehind; +@regexp_subpattern = @regexp_lookahead | @regexp_lookbehind; +@regexp_anchor = @regexp_dollar | @regexp_caret; + +is_greedy (int id: @regexp_quantifier ref); +range_quantifier_lower_bound (unique int id: @regexp_range ref, int lo: int ref); +range_quantifier_upper_bound (unique int id: @regexp_range ref, int hi: int ref); +is_capture (unique int id: @regexp_group ref, int number: int ref); +is_named_capture (unique int id: @regexp_group ref, string name: string ref); +is_inverted (int id: @regexp_char_class ref); +regexp_const_value (unique int id: @regexp_constant ref, varchar(1) value: string ref); +char_class_escape (unique int id: @regexp_char_class_escape ref, varchar(1) value: string ref); +backref (unique int id: @regexp_backref ref, int value: int ref); +named_backref (unique int id: @regexp_backref ref, string name: string ref); +unicode_property_escapename (unique int id: @regexp_unicode_property_escape ref, string name: string ref); +unicode_property_escapevalue (unique int id: @regexp_unicode_property_escape ref, string value: string ref); + +// tokens +#keyset[toplevel, idx] +tokeninfo (unique int id: @token, + int kind: int ref, + int toplevel: @toplevel ref, + int idx: int ref, + varchar(900) value: string ref); + +case @token.kind of + 0 = @token_eof +| 1 = @token_null_literal +| 2 = @token_boolean_literal +| 3 = @token_numeric_literal +| 4 = @token_string_literal +| 5 = @token_regular_expression +| 6 = @token_identifier +| 7 = @token_keyword +| 8 = @token_punctuator; + +// associate comments with the token immediately following them (which may be EOF) +next_token (int comment: @comment ref, int token: @token ref); + +// JSON +#keyset[parent, idx] +json (unique int id: @json_value, + int kind: int ref, + int parent: @json_parent ref, + int idx: int ref, + varchar(900) tostring: string ref); + +json_literals (varchar(900) value: string ref, + varchar(900) raw: string ref, + unique int expr: @json_value ref); + +json_properties (int obj: @json_object ref, + varchar(900) property: string ref, + int value: @json_value ref); + +json_errors (unique int id: @json_parse_error, + varchar(900) message: string ref); + +json_locations(unique int locatable: @json_locatable ref, + int location: @location_default ref); + +case @json_value.kind of + 0 = @json_null +| 1 = @json_boolean +| 2 = @json_number +| 3 = @json_string +| 4 = @json_array +| 5 = @json_object; + +@json_parent = @json_object | @json_array | @file; + +@json_locatable = @json_value | @json_parse_error; + +// locations +@ast_node = @toplevel | @stmt | @expr | @property | @typeexpr; + +@locatable = @file + | @ast_node + | @comment + | @line + | @js_parse_error | @regexp_parse_error + | @regexpterm + | @json_locatable + | @token + | @cfg_node + | @jsdoc | @jsdoc_type_expr | @jsdoc_tag + | @yaml_locatable + | @xmllocatable + | @configLocatable + | @template_placeholder_tag; + +hasLocation (unique int locatable: @locatable ref, + int location: @location ref); + +// CFG +entry_cfg_node (unique int id: @entry_node, int container: @stmt_container ref); +exit_cfg_node (unique int id: @exit_node, int container: @stmt_container ref); +guard_node (unique int id: @guard_node, int kind: int ref, int test: @expr ref); +case @guard_node.kind of + 0 = @falsy_guard +| 1 = @truthy_guard; +@condition_guard = @falsy_guard | @truthy_guard; + +@synthetic_cfg_node = @entry_node | @exit_node | @guard_node; +@cfg_node = @synthetic_cfg_node | @expr_parent; + +successor (int pred: @cfg_node ref, int succ: @cfg_node ref); + +// JSDoc comments +jsdoc (unique int id: @jsdoc, varchar(900) description: string ref, int comment: @comment ref); +#keyset[parent, idx] +jsdoc_tags (unique int id: @jsdoc_tag, varchar(900) title: string ref, + int parent: @jsdoc ref, int idx: int ref, varchar(900) tostring: string ref); +jsdoc_tag_descriptions (unique int tag: @jsdoc_tag ref, varchar(900) text: string ref); +jsdoc_tag_names (unique int tag: @jsdoc_tag ref, varchar(900) text: string ref); + +#keyset[parent, idx] +jsdoc_type_exprs (unique int id: @jsdoc_type_expr, + int kind: int ref, + int parent: @jsdoc_type_expr_parent ref, + int idx: int ref, + varchar(900) tostring: string ref); +case @jsdoc_type_expr.kind of + 0 = @jsdoc_any_type_expr +| 1 = @jsdoc_null_type_expr +| 2 = @jsdoc_undefined_type_expr +| 3 = @jsdoc_unknown_type_expr +| 4 = @jsdoc_void_type_expr +| 5 = @jsdoc_named_type_expr +| 6 = @jsdoc_applied_type_expr +| 7 = @jsdoc_nullable_type_expr +| 8 = @jsdoc_non_nullable_type_expr +| 9 = @jsdoc_record_type_expr +| 10 = @jsdoc_array_type_expr +| 11 = @jsdoc_union_type_expr +| 12 = @jsdoc_function_type_expr +| 13 = @jsdoc_optional_type_expr +| 14 = @jsdoc_rest_type_expr +; + +#keyset[id, idx] +jsdoc_record_field_name (int id: @jsdoc_record_type_expr ref, int idx: int ref, varchar(900) name: string ref); +jsdoc_prefix_qualifier (int id: @jsdoc_type_expr ref); +jsdoc_has_new_parameter (int fn: @jsdoc_function_type_expr ref); + +@jsdoc_type_expr_parent = @jsdoc_type_expr | @jsdoc_tag; + +jsdoc_errors (unique int id: @jsdoc_error, int tag: @jsdoc_tag ref, varchar(900) message: string ref, varchar(900) tostring: string ref); + +@dataflownode = @expr | @function_decl_stmt | @class_decl_stmt | @namespace_declaration | @enum_declaration | @property; + +@optionalchainable = @call_expr | @propaccess; + +isOptionalChaining(int id: @optionalchainable ref); + +/** + * The time taken for the extraction of a file. + * This table contains non-deterministic content. + * + * The sum of the `time` column for each (`file`, `timerKind`) pair + * is the total time taken for extraction of `file`. The `extractionPhase` + * column provides a granular view of the extraction time of the file. + */ +extraction_time( + int file : @file ref, + // see `com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase`. + int extractionPhase: int ref, + // 0 for the elapsed CPU time in nanoseconds, 1 for the elapsed wallclock time in nanoseconds + int timerKind: int ref, + float time: float ref +) + +/** +* Non-timing related data for the extraction of a single file. +* This table contains non-deterministic content. +*/ +extraction_data( + int file : @file ref, + // the absolute path to the cache file + varchar(900) cacheFile: string ref, + boolean fromCache: boolean ref, + int length: int ref +) + +/*- YAML -*/ + +#keyset[parent, idx] +yaml (unique int id: @yaml_node, + int kind: int ref, + int parent: @yaml_node_parent ref, + int idx: int ref, + string tag: string ref, + string tostring: string ref); + +case @yaml_node.kind of + 0 = @yaml_scalar_node +| 1 = @yaml_mapping_node +| 2 = @yaml_sequence_node +| 3 = @yaml_alias_node +; + +@yaml_collection_node = @yaml_mapping_node | @yaml_sequence_node; + +@yaml_node_parent = @yaml_collection_node | @file; + +yaml_anchors (unique int node: @yaml_node ref, + string anchor: string ref); + +yaml_aliases (unique int alias: @yaml_alias_node ref, + string target: string ref); + +yaml_scalars (unique int scalar: @yaml_scalar_node ref, + int style: int ref, + string value: string ref); + +yaml_errors (unique int id: @yaml_error, + string message: string ref); + +yaml_locations(unique int locatable: @yaml_locatable ref, + int location: @location_default ref); + +@yaml_locatable = @yaml_node | @yaml_error; + +/*- XML Files -*/ + +xmlEncoding( + unique int id: @file ref, + string encoding: string ref +); + +xmlDTDs( + unique int id: @xmldtd, + string root: string ref, + string publicId: string ref, + string systemId: string ref, + int fileid: @file ref +); + +xmlElements( + unique int id: @xmlelement, + string name: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int fileid: @file ref +); + +xmlAttrs( + unique int id: @xmlattribute, + int elementid: @xmlelement ref, + string name: string ref, + string value: string ref, + int idx: int ref, + int fileid: @file ref +); + +xmlNs( + int id: @xmlnamespace, + string prefixName: string ref, + string URI: string ref, + int fileid: @file ref +); + +xmlHasNs( + int elementId: @xmlnamespaceable ref, + int nsId: @xmlnamespace ref, + int fileid: @file ref +); + +xmlComments( + unique int id: @xmlcomment, + string text: string ref, + int parentid: @xmlparent ref, + int fileid: @file ref +); + +xmlChars( + unique int id: @xmlcharacters, + string text: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int isCDATA: int ref, + int fileid: @file ref +); + +@xmlparent = @file | @xmlelement; +@xmlnamespaceable = @xmlelement | @xmlattribute; + +xmllocations( + int xmlElement: @xmllocatable ref, + int location: @location_default ref +); + +@xmllocatable = @xmlcharacters | @xmlelement | @xmlcomment | @xmlattribute | @xmldtd | @file | @xmlnamespace; + +/*- Configuration files with key value pairs -*/ + +configs( + unique int id: @config +); + +configNames( + unique int id: @configName, + int config: @config ref, + string name: string ref +); + +configValues( + unique int id: @configValue, + int config: @config ref, + string value: string ref +); + +configLocations( + int locatable: @configLocatable ref, + int location: @location_default ref +); + +@configLocatable = @config | @configName | @configValue; diff --git a/javascript/ql/lib/upgrades/c88c69174bd0dd4e95f1bcfbada68a2505e812c3/semmlecode.javascript.dbscheme b/javascript/ql/lib/upgrades/c88c69174bd0dd4e95f1bcfbada68a2505e812c3/semmlecode.javascript.dbscheme new file mode 100644 index 000000000000..5b5db607d20c --- /dev/null +++ b/javascript/ql/lib/upgrades/c88c69174bd0dd4e95f1bcfbada68a2505e812c3/semmlecode.javascript.dbscheme @@ -0,0 +1,1193 @@ +/*** Standard fragments ***/ + +/*- Files and folders -*/ + +/** + * The location of an element. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `file`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ +locations_default( + unique int id: @location_default, + int file: @file ref, + int beginLine: int ref, + int beginColumn: int ref, + int endLine: int ref, + int endColumn: int ref +); + +files( + unique int id: @file, + string name: string ref +); + +folders( + unique int id: @folder, + string name: string ref +); + +@container = @file | @folder + +containerparent( + int parent: @container ref, + unique int child: @container ref +); + +/*- Lines of code -*/ + +numlines( + int element_id: @sourceline ref, + int num_lines: int ref, + int num_code: int ref, + int num_comment: int ref +); + +/*- External data -*/ + +/** + * External data, loaded from CSV files during snapshot creation. See + * [Tutorial: Incorporating external data](https://help.semmle.com/wiki/display/SD/Tutorial%3A+Incorporating+external+data) + * for more information. + */ +externalData( + int id : @externalDataElement, + string path : string ref, + int column: int ref, + string value : string ref +); + +/*- Source location prefix -*/ + +/** + * The source location of the snapshot. + */ +sourceLocationPrefix(string prefix : string ref); + +/*- JavaScript-specific part -*/ + +@location = @location_default + +@sourceline = @locatable; + +filetype( + int file: @file ref, + string filetype: string ref +) + +// top-level code fragments +toplevels (unique int id: @toplevel, + int kind: int ref); + +is_externs (int toplevel: @toplevel ref); + +case @toplevel.kind of + 0 = @script +| 1 = @inline_script +| 2 = @event_handler +| 3 = @javascript_url +| 4 = @template_toplevel; + +is_module (int tl: @toplevel ref); +is_nodejs (int tl: @toplevel ref); +is_es2015_module (int tl: @toplevel ref); +is_closure_module (int tl: @toplevel ref); + +@xml_node_with_code = @xmlelement | @xmlattribute | @template_placeholder_tag; +toplevel_parent_xml_node( + unique int toplevel: @toplevel ref, + int xmlnode: @xml_node_with_code ref); + +xml_element_parent_expression( + unique int xmlnode: @xmlelement ref, + int expression: @expr ref, + int index: int ref); + +// statements +#keyset[parent, idx] +stmts (unique int id: @stmt, + int kind: int ref, + int parent: @stmt_parent ref, + int idx: int ref, + varchar(900) tostring: string ref); + +stmt_containers (unique int stmt: @stmt ref, + int container: @stmt_container ref); + +jump_targets (unique int jump: @stmt ref, + int target: @stmt ref); + +@stmt_parent = @stmt | @toplevel | @function_expr | @arrow_function_expr | @static_initializer; +@stmt_container = @toplevel | @function | @namespace_declaration | @external_module_declaration | @global_augmentation_declaration; + +case @stmt.kind of + 0 = @empty_stmt +| 1 = @block_stmt +| 2 = @expr_stmt +| 3 = @if_stmt +| 4 = @labeled_stmt +| 5 = @break_stmt +| 6 = @continue_stmt +| 7 = @with_stmt +| 8 = @switch_stmt +| 9 = @return_stmt +| 10 = @throw_stmt +| 11 = @try_stmt +| 12 = @while_stmt +| 13 = @do_while_stmt +| 14 = @for_stmt +| 15 = @for_in_stmt +| 16 = @debugger_stmt +| 17 = @function_decl_stmt +| 18 = @var_decl_stmt +| 19 = @case +| 20 = @catch_clause +| 21 = @for_of_stmt +| 22 = @const_decl_stmt +| 23 = @let_stmt +| 24 = @legacy_let_stmt +| 25 = @for_each_stmt +| 26 = @class_decl_stmt +| 27 = @import_declaration +| 28 = @export_all_declaration +| 29 = @export_default_declaration +| 30 = @export_named_declaration +| 31 = @namespace_declaration +| 32 = @import_equals_declaration +| 33 = @export_assign_declaration +| 34 = @interface_declaration +| 35 = @type_alias_declaration +| 36 = @enum_declaration +| 37 = @external_module_declaration +| 38 = @export_as_namespace_declaration +| 39 = @global_augmentation_declaration +| 40 = @using_decl_stmt +; + +@decl_stmt = @var_decl_stmt | @const_decl_stmt | @let_stmt | @legacy_let_stmt | @using_decl_stmt; + +@export_declaration = @export_all_declaration | @export_default_declaration | @export_named_declaration; + +@namespace_definition = @namespace_declaration | @enum_declaration; +@type_definition = @class_definition | @interface_declaration | @enum_declaration | @type_alias_declaration | @enum_member; + +is_instantiated(unique int decl: @namespace_declaration ref); + +@declarable_node = @decl_stmt | @namespace_declaration | @class_decl_stmt | @function_decl_stmt | @enum_declaration | @external_module_declaration | @global_augmentation_declaration | @field; +has_declare_keyword(unique int stmt: @declarable_node ref); + +is_for_await_of(unique int forof: @for_of_stmt ref); + +// expressions +#keyset[parent, idx] +exprs (unique int id: @expr, + int kind: int ref, + int parent: @expr_parent ref, + int idx: int ref, + varchar(900) tostring: string ref); + +literals (varchar(900) value: string ref, + varchar(900) raw: string ref, + unique int expr: @expr_or_type ref); + +enclosing_stmt (unique int expr: @expr_or_type ref, + int stmt: @stmt ref); + +expr_containers (unique int expr: @expr_or_type ref, + int container: @stmt_container ref); + +array_size (unique int ae: @arraylike ref, + int sz: int ref); + +is_delegating (int yield: @yield_expr ref); + +@expr_or_stmt = @expr | @stmt; +@expr_or_type = @expr | @typeexpr; +@expr_parent = @expr_or_stmt | @property | @function_typeexpr; +@arraylike = @array_expr | @array_pattern; +@type_annotation = @typeexpr | @jsdoc_type_expr; +@node_in_stmt_container = @cfg_node | @type_annotation | @toplevel; + +case @expr.kind of + 0 = @label +| 1 = @null_literal +| 2 = @boolean_literal +| 3 = @number_literal +| 4 = @string_literal +| 5 = @regexp_literal +| 6 = @this_expr +| 7 = @array_expr +| 8 = @obj_expr +| 9 = @function_expr +| 10 = @seq_expr +| 11 = @conditional_expr +| 12 = @new_expr +| 13 = @call_expr +| 14 = @dot_expr +| 15 = @index_expr +| 16 = @neg_expr +| 17 = @plus_expr +| 18 = @log_not_expr +| 19 = @bit_not_expr +| 20 = @typeof_expr +| 21 = @void_expr +| 22 = @delete_expr +| 23 = @eq_expr +| 24 = @neq_expr +| 25 = @eqq_expr +| 26 = @neqq_expr +| 27 = @lt_expr +| 28 = @le_expr +| 29 = @gt_expr +| 30 = @ge_expr +| 31 = @lshift_expr +| 32 = @rshift_expr +| 33 = @urshift_expr +| 34 = @add_expr +| 35 = @sub_expr +| 36 = @mul_expr +| 37 = @div_expr +| 38 = @mod_expr +| 39 = @bitor_expr +| 40 = @xor_expr +| 41 = @bitand_expr +| 42 = @in_expr +| 43 = @instanceof_expr +| 44 = @logand_expr +| 45 = @logor_expr +| 47 = @assign_expr +| 48 = @assign_add_expr +| 49 = @assign_sub_expr +| 50 = @assign_mul_expr +| 51 = @assign_div_expr +| 52 = @assign_mod_expr +| 53 = @assign_lshift_expr +| 54 = @assign_rshift_expr +| 55 = @assign_urshift_expr +| 56 = @assign_or_expr +| 57 = @assign_xor_expr +| 58 = @assign_and_expr +| 59 = @preinc_expr +| 60 = @postinc_expr +| 61 = @predec_expr +| 62 = @postdec_expr +| 63 = @par_expr +| 64 = @var_declarator +| 65 = @arrow_function_expr +| 66 = @spread_element +| 67 = @array_pattern +| 68 = @object_pattern +| 69 = @yield_expr +| 70 = @tagged_template_expr +| 71 = @template_literal +| 72 = @template_element +| 73 = @array_comprehension_expr +| 74 = @generator_expr +| 75 = @for_in_comprehension_block +| 76 = @for_of_comprehension_block +| 77 = @legacy_letexpr +| 78 = @var_decl +| 79 = @proper_varaccess +| 80 = @class_expr +| 81 = @super_expr +| 82 = @newtarget_expr +| 83 = @named_import_specifier +| 84 = @import_default_specifier +| 85 = @import_namespace_specifier +| 86 = @named_export_specifier +| 87 = @exp_expr +| 88 = @assign_exp_expr +| 89 = @jsx_element +| 90 = @jsx_qualified_name +| 91 = @jsx_empty_expr +| 92 = @await_expr +| 93 = @function_sent_expr +| 94 = @decorator +| 95 = @export_default_specifier +| 96 = @export_namespace_specifier +| 97 = @bind_expr +| 98 = @external_module_reference +| 99 = @dynamic_import +| 100 = @expression_with_type_arguments +| 101 = @prefix_type_assertion +| 102 = @as_type_assertion +| 103 = @export_varaccess +| 104 = @decorator_list +| 105 = @non_null_assertion +| 106 = @bigint_literal +| 107 = @nullishcoalescing_expr +| 108 = @e4x_xml_anyname +| 109 = @e4x_xml_static_attribute_selector +| 110 = @e4x_xml_dynamic_attribute_selector +| 111 = @e4x_xml_filter_expression +| 112 = @e4x_xml_static_qualident +| 113 = @e4x_xml_dynamic_qualident +| 114 = @e4x_xml_dotdotexpr +| 115 = @import_meta_expr +| 116 = @assignlogandexpr +| 117 = @assignlogorexpr +| 118 = @assignnullishcoalescingexpr +| 119 = @template_pipe_ref +| 120 = @generated_code_expr +| 121 = @satisfies_expr +; + +@varaccess = @proper_varaccess | @export_varaccess; +@varref = @var_decl | @varaccess; + +@identifier = @label | @varref | @type_identifier; + +@literal = @null_literal | @boolean_literal | @number_literal | @string_literal | @regexp_literal | @bigint_literal; + +@propaccess = @dot_expr | @index_expr; + +@invokeexpr = @new_expr | @call_expr; + +@unaryexpr = @neg_expr | @plus_expr | @log_not_expr | @bit_not_expr | @typeof_expr | @void_expr | @delete_expr | @spread_element; + +@equality_test = @eq_expr | @neq_expr | @eqq_expr | @neqq_expr; + +@comparison = @equality_test | @lt_expr | @le_expr | @gt_expr | @ge_expr; + +@binaryexpr = @comparison | @lshift_expr | @rshift_expr | @urshift_expr | @add_expr | @sub_expr | @mul_expr | @div_expr | @mod_expr | @exp_expr | @bitor_expr | @xor_expr | @bitand_expr | @in_expr | @instanceof_expr | @logand_expr | @logor_expr | @nullishcoalescing_expr; + +@assignment = @assign_expr | @assign_add_expr | @assign_sub_expr | @assign_mul_expr | @assign_div_expr | @assign_mod_expr | @assign_exp_expr | @assign_lshift_expr | @assign_rshift_expr | @assign_urshift_expr | @assign_or_expr | @assign_xor_expr | @assign_and_expr | @assignlogandexpr | @assignlogorexpr | @assignnullishcoalescingexpr; + +@updateexpr = @preinc_expr | @postinc_expr | @predec_expr | @postdec_expr; + +@pattern = @varref | @array_pattern | @object_pattern; + +@comprehension_expr = @array_comprehension_expr | @generator_expr; + +@comprehension_block = @for_in_comprehension_block | @for_of_comprehension_block; + +@import_specifier = @named_import_specifier | @import_default_specifier | @import_namespace_specifier; + +@exportspecifier = @named_export_specifier | @export_default_specifier | @export_namespace_specifier; + +@type_keyword_operand = @import_declaration | @export_declaration | @import_specifier; + +@type_assertion = @as_type_assertion | @prefix_type_assertion; + +@class_definition = @class_decl_stmt | @class_expr; +@interface_definition = @interface_declaration | @interface_typeexpr; +@class_or_interface = @class_definition | @interface_definition; + +@lexical_decl = @var_decl | @type_decl; +@lexical_access = @varaccess | @local_type_access | @local_var_type_access | @local_namespace_access; +@lexical_ref = @lexical_decl | @lexical_access; + +@e4x_xml_attribute_selector = @e4x_xml_static_attribute_selector | @e4x_xml_dynamic_attribute_selector; +@e4x_xml_qualident = @e4x_xml_static_qualident | @e4x_xml_dynamic_qualident; + +expr_contains_template_tag_location( + int expr: @expr ref, + int location: @location ref +); + +@template_placeholder_tag_parent = @xmlelement | @xmlattribute | @file; + +template_placeholder_tag_info( + unique int node: @template_placeholder_tag, + int parentNode: @template_placeholder_tag_parent ref, + varchar(900) raw: string ref +); + +// scopes +scopes (unique int id: @scope, + int kind: int ref); + +case @scope.kind of + 0 = @global_scope +| 1 = @function_scope +| 2 = @catch_scope +| 3 = @module_scope +| 4 = @block_scope +| 5 = @for_scope +| 6 = @for_in_scope // for-of scopes work the same as for-in scopes +| 7 = @comprehension_block_scope +| 8 = @class_expr_scope +| 9 = @namespace_scope +| 10 = @class_decl_scope +| 11 = @interface_scope +| 12 = @type_alias_scope +| 13 = @mapped_type_scope +| 14 = @enum_scope +| 15 = @external_module_scope +| 16 = @conditional_type_scope; + +scopenodes (unique int node: @ast_node ref, + int scope: @scope ref); + +scopenesting (unique int inner: @scope ref, + int outer: @scope ref); + +// functions +@function = @function_decl_stmt | @function_expr | @arrow_function_expr; + +@parameterized = @function | @catch_clause; +@type_parameterized = @function | @class_or_interface | @type_alias_declaration | @mapped_typeexpr | @infer_typeexpr; + +is_generator (int fun: @function ref); +has_rest_parameter (int fun: @function ref); +is_async (int fun: @function ref); + +// variables and lexically scoped type names +#keyset[scope, name] +variables (unique int id: @variable, + varchar(900) name: string ref, + int scope: @scope ref); + +#keyset[scope, name] +local_type_names (unique int id: @local_type_name, + varchar(900) name: string ref, + int scope: @scope ref); + +#keyset[scope, name] +local_namespace_names (unique int id: @local_namespace_name, + varchar(900) name: string ref, + int scope: @scope ref); + +is_arguments_object (int id: @variable ref); + +@lexical_name = @variable | @local_type_name | @local_namespace_name; + +@bind_id = @varaccess | @local_var_type_access; +bind (unique int id: @bind_id ref, + int decl: @variable ref); + +decl (unique int id: @var_decl ref, + int decl: @variable ref); + +@typebind_id = @local_type_access | @export_varaccess; +typebind (unique int id: @typebind_id ref, + int decl: @local_type_name ref); + +@typedecl_id = @type_decl | @var_decl; +typedecl (unique int id: @typedecl_id ref, + int decl: @local_type_name ref); + +namespacedecl (unique int id: @var_decl ref, + int decl: @local_namespace_name ref); + +@namespacebind_id = @local_namespace_access | @export_varaccess; +namespacebind (unique int id: @namespacebind_id ref, + int decl: @local_namespace_name ref); + + +// properties in object literals, property patterns in object patterns, and method declarations in classes +#keyset[parent, index] +properties (unique int id: @property, + int parent: @property_parent ref, + int index: int ref, + int kind: int ref, + varchar(900) tostring: string ref); + +case @property.kind of + 0 = @value_property +| 1 = @property_getter +| 2 = @property_setter +| 3 = @jsx_attribute +| 4 = @function_call_signature +| 5 = @constructor_call_signature +| 6 = @index_signature +| 7 = @enum_member +| 8 = @proper_field +| 9 = @parameter_field +| 10 = @static_initializer +; + +@property_parent = @obj_expr | @object_pattern | @class_definition | @jsx_element | @interface_definition | @enum_declaration; +@property_accessor = @property_getter | @property_setter; +@call_signature = @function_call_signature | @constructor_call_signature; +@field = @proper_field | @parameter_field; +@field_or_vardeclarator = @field | @var_declarator; + +is_computed (int id: @property ref); +is_method (int id: @property ref); +is_static (int id: @property ref); +is_abstract_member (int id: @property ref); +is_const_enum (int id: @enum_declaration ref); +is_abstract_class (int id: @class_decl_stmt ref); + +has_public_keyword (int id: @property ref); +has_private_keyword (int id: @property ref); +has_protected_keyword (int id: @property ref); +has_readonly_keyword (int id: @property ref); +has_type_keyword (int id: @type_keyword_operand ref); +is_optional_member (int id: @property ref); +has_definite_assignment_assertion (int id: @field_or_vardeclarator ref); +is_optional_parameter_declaration (unique int parameter: @pattern ref); + +#keyset[constructor, param_index] +parameter_fields( + unique int field: @parameter_field ref, + int constructor: @function_expr ref, + int param_index: int ref +); + +// types +#keyset[parent, idx] +typeexprs ( + unique int id: @typeexpr, + int kind: int ref, + int parent: @typeexpr_parent ref, + int idx: int ref, + varchar(900) tostring: string ref +); + +case @typeexpr.kind of + 0 = @local_type_access +| 1 = @type_decl +| 2 = @keyword_typeexpr +| 3 = @string_literal_typeexpr +| 4 = @number_literal_typeexpr +| 5 = @boolean_literal_typeexpr +| 6 = @array_typeexpr +| 7 = @union_typeexpr +| 8 = @indexed_access_typeexpr +| 9 = @intersection_typeexpr +| 10 = @parenthesized_typeexpr +| 11 = @tuple_typeexpr +| 12 = @keyof_typeexpr +| 13 = @qualified_type_access +| 14 = @generic_typeexpr +| 15 = @type_label +| 16 = @typeof_typeexpr +| 17 = @local_var_type_access +| 18 = @qualified_var_type_access +| 19 = @this_var_type_access +| 20 = @predicate_typeexpr +| 21 = @interface_typeexpr +| 22 = @type_parameter +| 23 = @plain_function_typeexpr +| 24 = @constructor_typeexpr +| 25 = @local_namespace_access +| 26 = @qualified_namespace_access +| 27 = @mapped_typeexpr +| 28 = @conditional_typeexpr +| 29 = @infer_typeexpr +| 30 = @import_type_access +| 31 = @import_namespace_access +| 32 = @import_var_type_access +| 33 = @optional_typeexpr +| 34 = @rest_typeexpr +| 35 = @bigint_literal_typeexpr +| 36 = @readonly_typeexpr +| 37 = @template_literal_typeexpr +; + +@typeref = @typeaccess | @type_decl; +@type_identifier = @type_decl | @local_type_access | @type_label | @local_var_type_access | @local_namespace_access; +@typeexpr_parent = @expr | @stmt | @property | @typeexpr; +@literal_typeexpr = @string_literal_typeexpr | @number_literal_typeexpr | @boolean_literal_typeexpr | @bigint_literal_typeexpr; +@typeaccess = @local_type_access | @qualified_type_access | @import_type_access; +@vartypeaccess = @local_var_type_access | @qualified_var_type_access | @this_var_type_access | @import_var_type_access; +@namespace_access = @local_namespace_access | @qualified_namespace_access | @import_namespace_access; +@import_typeexpr = @import_type_access | @import_namespace_access | @import_var_type_access; + +@function_typeexpr = @plain_function_typeexpr | @constructor_typeexpr; + +// types +types ( + unique int id: @type, + int kind: int ref, + varchar(900) tostring: string ref +); + +#keyset[parent, idx] +type_child ( + int child: @type ref, + int parent: @type ref, + int idx: int ref +); + +case @type.kind of + 0 = @any_type +| 1 = @string_type +| 2 = @number_type +| 3 = @union_type +| 4 = @true_type +| 5 = @false_type +| 6 = @type_reference +| 7 = @object_type +| 8 = @canonical_type_variable_type +| 9 = @typeof_type +| 10 = @void_type +| 11 = @undefined_type +| 12 = @null_type +| 13 = @never_type +| 14 = @plain_symbol_type +| 15 = @unique_symbol_type +| 16 = @objectkeyword_type +| 17 = @intersection_type +| 18 = @tuple_type +| 19 = @lexical_type_variable_type +| 20 = @this_type +| 21 = @number_literal_type +| 22 = @string_literal_type +| 23 = @unknown_type +| 24 = @bigint_type +| 25 = @bigint_literal_type +; + +@boolean_literal_type = @true_type | @false_type; +@symbol_type = @plain_symbol_type | @unique_symbol_type; +@union_or_intersection_type = @union_type | @intersection_type; +@typevariable_type = @canonical_type_variable_type | @lexical_type_variable_type; + +has_asserts_keyword(int node: @predicate_typeexpr ref); + +@typed_ast_node = @expr | @typeexpr | @function; +ast_node_type( + unique int node: @typed_ast_node ref, + int typ: @type ref); + +declared_function_signature( + unique int node: @function ref, + int sig: @signature_type ref +); + +invoke_expr_signature( + unique int node: @invokeexpr ref, + int sig: @signature_type ref +); + +invoke_expr_overload_index( + unique int node: @invokeexpr ref, + int index: int ref +); + +symbols ( + unique int id: @symbol, + int kind: int ref, + varchar(900) name: string ref +); + +symbol_parent ( + unique int symbol: @symbol ref, + int parent: @symbol ref +); + +symbol_module ( + int symbol: @symbol ref, + varchar(900) moduleName: string ref +); + +symbol_global ( + int symbol: @symbol ref, + varchar(900) globalName: string ref +); + +case @symbol.kind of + 0 = @root_symbol +| 1 = @member_symbol +| 2 = @other_symbol +; + +@type_with_symbol = @type_reference | @typevariable_type | @typeof_type | @unique_symbol_type; +@ast_node_with_symbol = @type_definition | @namespace_definition | @toplevel | @typeaccess | @namespace_access | @var_decl | @function | @invokeexpr | @import_declaration | @external_module_reference | @external_module_declaration; + +ast_node_symbol( + unique int node: @ast_node_with_symbol ref, + int symbol: @symbol ref); + +type_symbol( + unique int typ: @type_with_symbol ref, + int symbol: @symbol ref); + +#keyset[typ, name] +type_property( + int typ: @type ref, + varchar(900) name: string ref, + int propertyType: @type ref); + +type_alias( + unique int aliasType: @type ref, + int underlyingType: @type ref); + +@literal_type = @string_literal_type | @number_literal_type | @boolean_literal_type | @bigint_literal_type; +@type_with_literal_value = @string_literal_type | @number_literal_type | @bigint_literal_type; +type_literal_value( + unique int typ: @type_with_literal_value ref, + varchar(900) value: string ref); + +signature_types ( + unique int id: @signature_type, + int kind: int ref, + varchar(900) tostring: string ref, + int type_parameters: int ref, + int required_params: int ref +); + +is_abstract_signature( + unique int sig: @signature_type ref +); + +signature_rest_parameter( + unique int sig: @signature_type ref, + int rest_param_arra_type: @type ref +); + +case @signature_type.kind of + 0 = @function_signature_type +| 1 = @constructor_signature_type +; + +#keyset[typ, kind, index] +type_contains_signature ( + int typ: @type ref, + int kind: int ref, // constructor/call/index + int index: int ref, // ordering of overloaded signatures + int sig: @signature_type ref +); + +#keyset[parent, index] +signature_contains_type ( + int child: @type ref, + int parent: @signature_type ref, + int index: int ref +); + +#keyset[sig, index] +signature_parameter_name ( + int sig: @signature_type ref, + int index: int ref, + varchar(900) name: string ref +); + +number_index_type ( + unique int baseType: @type ref, + int propertyType: @type ref +); + +string_index_type ( + unique int baseType: @type ref, + int propertyType: @type ref +); + +base_type_names( + int typeName: @symbol ref, + int baseTypeName: @symbol ref +); + +self_types( + int typeName: @symbol ref, + int selfType: @type_reference ref +); + +tuple_type_min_length( + unique int typ: @type ref, + int minLength: int ref +); + +tuple_type_rest_index( + unique int typ: @type ref, + int index: int ref +); + +// comments +comments (unique int id: @comment, + int kind: int ref, + int toplevel: @toplevel ref, + varchar(900) text: string ref, + varchar(900) tostring: string ref); + +case @comment.kind of + 0 = @slashslash_comment +| 1 = @slashstar_comment +| 2 = @doc_comment +| 3 = @html_comment_start +| 4 = @htmlcommentend; + +@html_comment = @html_comment_start | @htmlcommentend; +@line_comment = @slashslash_comment | @html_comment; +@block_comment = @slashstar_comment | @doc_comment; + +// source lines +lines (unique int id: @line, + int toplevel: @toplevel ref, + varchar(900) text: string ref, + varchar(2) terminator: string ref); +indentation (int file: @file ref, + int lineno: int ref, + varchar(1) indentChar: string ref, + int indentDepth: int ref); + +// JavaScript parse errors +js_parse_errors (unique int id: @js_parse_error, + int toplevel: @toplevel ref, + varchar(900) message: string ref, + varchar(900) line: string ref); + +// regular expressions +#keyset[parent, idx] +regexpterm (unique int id: @regexpterm, + int kind: int ref, + int parent: @regexpparent ref, + int idx: int ref, + varchar(900) tostring: string ref); + +@regexpparent = @regexpterm | @regexp_literal | @string_literal | @add_expr; + +case @regexpterm.kind of + 0 = @regexp_alt +| 1 = @regexp_seq +| 2 = @regexp_caret +| 3 = @regexp_dollar +| 4 = @regexp_wordboundary +| 5 = @regexp_nonwordboundary +| 6 = @regexp_positive_lookahead +| 7 = @regexp_negative_lookahead +| 8 = @regexp_star +| 9 = @regexp_plus +| 10 = @regexp_opt +| 11 = @regexp_range +| 12 = @regexp_dot +| 13 = @regexp_group +| 14 = @regexp_normal_constant +| 15 = @regexp_hex_escape +| 16 = @regexp_unicode_escape +| 17 = @regexp_dec_escape +| 18 = @regexp_oct_escape +| 19 = @regexp_ctrl_escape +| 20 = @regexp_char_class_escape +| 21 = @regexp_id_escape +| 22 = @regexp_backref +| 23 = @regexp_char_class +| 24 = @regexp_char_range +| 25 = @regexp_positive_lookbehind +| 26 = @regexp_negative_lookbehind +| 27 = @regexp_unicode_property_escape +| 28 = @regexp_quoted_string +| 29 = @regexp_intersection +| 30 = @regexp_subtraction; + +regexp_parse_errors (unique int id: @regexp_parse_error, + int regexp: @regexpterm ref, + varchar(900) message: string ref); + +@regexp_quantifier = @regexp_star | @regexp_plus | @regexp_opt | @regexp_range; +@regexp_escape = @regexp_char_escape | @regexp_char_class_escape | @regexp_unicode_property_escape; +@regexp_char_escape = @regexp_hex_escape | @regexp_unicode_escape | @regexp_dec_escape | @regexp_oct_escape | @regexp_ctrl_escape | @regexp_id_escape; +@regexp_constant = @regexp_normal_constant | @regexp_char_escape; +@regexp_lookahead = @regexp_positive_lookahead | @regexp_negative_lookahead; +@regexp_lookbehind = @regexp_positive_lookbehind | @regexp_negative_lookbehind; +@regexp_subpattern = @regexp_lookahead | @regexp_lookbehind; +@regexp_anchor = @regexp_dollar | @regexp_caret; + +is_greedy (int id: @regexp_quantifier ref); +range_quantifier_lower_bound (unique int id: @regexp_range ref, int lo: int ref); +range_quantifier_upper_bound (unique int id: @regexp_range ref, int hi: int ref); +is_capture (unique int id: @regexp_group ref, int number: int ref); +is_named_capture (unique int id: @regexp_group ref, string name: string ref); +is_inverted (int id: @regexp_char_class ref); +regexp_const_value (unique int id: @regexp_constant ref, varchar(1) value: string ref); +char_class_escape (unique int id: @regexp_char_class_escape ref, varchar(1) value: string ref); +backref (unique int id: @regexp_backref ref, int value: int ref); +named_backref (unique int id: @regexp_backref ref, string name: string ref); +unicode_property_escapename (unique int id: @regexp_unicode_property_escape ref, string name: string ref); +unicode_property_escapevalue (unique int id: @regexp_unicode_property_escape ref, string value: string ref); + +// tokens +#keyset[toplevel, idx] +tokeninfo (unique int id: @token, + int kind: int ref, + int toplevel: @toplevel ref, + int idx: int ref, + varchar(900) value: string ref); + +case @token.kind of + 0 = @token_eof +| 1 = @token_null_literal +| 2 = @token_boolean_literal +| 3 = @token_numeric_literal +| 4 = @token_string_literal +| 5 = @token_regular_expression +| 6 = @token_identifier +| 7 = @token_keyword +| 8 = @token_punctuator; + +// associate comments with the token immediately following them (which may be EOF) +next_token (int comment: @comment ref, int token: @token ref); + +// JSON +#keyset[parent, idx] +json (unique int id: @json_value, + int kind: int ref, + int parent: @json_parent ref, + int idx: int ref, + varchar(900) tostring: string ref); + +json_literals (varchar(900) value: string ref, + varchar(900) raw: string ref, + unique int expr: @json_value ref); + +json_properties (int obj: @json_object ref, + varchar(900) property: string ref, + int value: @json_value ref); + +json_errors (unique int id: @json_parse_error, + varchar(900) message: string ref); + +json_locations(unique int locatable: @json_locatable ref, + int location: @location_default ref); + +case @json_value.kind of + 0 = @json_null +| 1 = @json_boolean +| 2 = @json_number +| 3 = @json_string +| 4 = @json_array +| 5 = @json_object; + +@json_parent = @json_object | @json_array | @file; + +@json_locatable = @json_value | @json_parse_error; + +// locations +@ast_node = @toplevel | @stmt | @expr | @property | @typeexpr; + +@locatable = @file + | @ast_node + | @comment + | @line + | @js_parse_error | @regexp_parse_error + | @regexpterm + | @json_locatable + | @token + | @cfg_node + | @jsdoc | @jsdoc_type_expr | @jsdoc_tag + | @yaml_locatable + | @xmllocatable + | @configLocatable + | @template_placeholder_tag; + +hasLocation (unique int locatable: @locatable ref, + int location: @location ref); + +// CFG +entry_cfg_node (unique int id: @entry_node, int container: @stmt_container ref); +exit_cfg_node (unique int id: @exit_node, int container: @stmt_container ref); +guard_node (unique int id: @guard_node, int kind: int ref, int test: @expr ref); +case @guard_node.kind of + 0 = @falsy_guard +| 1 = @truthy_guard; +@condition_guard = @falsy_guard | @truthy_guard; + +@synthetic_cfg_node = @entry_node | @exit_node | @guard_node; +@cfg_node = @synthetic_cfg_node | @expr_parent; + +successor (int pred: @cfg_node ref, int succ: @cfg_node ref); + +// JSDoc comments +jsdoc (unique int id: @jsdoc, varchar(900) description: string ref, int comment: @comment ref); +#keyset[parent, idx] +jsdoc_tags (unique int id: @jsdoc_tag, varchar(900) title: string ref, + int parent: @jsdoc ref, int idx: int ref, varchar(900) tostring: string ref); +jsdoc_tag_descriptions (unique int tag: @jsdoc_tag ref, varchar(900) text: string ref); +jsdoc_tag_names (unique int tag: @jsdoc_tag ref, varchar(900) text: string ref); + +#keyset[parent, idx] +jsdoc_type_exprs (unique int id: @jsdoc_type_expr, + int kind: int ref, + int parent: @jsdoc_type_expr_parent ref, + int idx: int ref, + varchar(900) tostring: string ref); +case @jsdoc_type_expr.kind of + 0 = @jsdoc_any_type_expr +| 1 = @jsdoc_null_type_expr +| 2 = @jsdoc_undefined_type_expr +| 3 = @jsdoc_unknown_type_expr +| 4 = @jsdoc_void_type_expr +| 5 = @jsdoc_named_type_expr +| 6 = @jsdoc_applied_type_expr +| 7 = @jsdoc_nullable_type_expr +| 8 = @jsdoc_non_nullable_type_expr +| 9 = @jsdoc_record_type_expr +| 10 = @jsdoc_array_type_expr +| 11 = @jsdoc_union_type_expr +| 12 = @jsdoc_function_type_expr +| 13 = @jsdoc_optional_type_expr +| 14 = @jsdoc_rest_type_expr +; + +#keyset[id, idx] +jsdoc_record_field_name (int id: @jsdoc_record_type_expr ref, int idx: int ref, varchar(900) name: string ref); +jsdoc_prefix_qualifier (int id: @jsdoc_type_expr ref); +jsdoc_has_new_parameter (int fn: @jsdoc_function_type_expr ref); + +@jsdoc_type_expr_parent = @jsdoc_type_expr | @jsdoc_tag; + +jsdoc_errors (unique int id: @jsdoc_error, int tag: @jsdoc_tag ref, varchar(900) message: string ref, varchar(900) tostring: string ref); + +@dataflownode = @expr | @function_decl_stmt | @class_decl_stmt | @namespace_declaration | @enum_declaration | @property; + +@optionalchainable = @call_expr | @propaccess; + +isOptionalChaining(int id: @optionalchainable ref); + +/** + * The time taken for the extraction of a file. + * This table contains non-deterministic content. + * + * The sum of the `time` column for each (`file`, `timerKind`) pair + * is the total time taken for extraction of `file`. The `extractionPhase` + * column provides a granular view of the extraction time of the file. + */ +extraction_time( + int file : @file ref, + // see `com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase`. + int extractionPhase: int ref, + // 0 for the elapsed CPU time in nanoseconds, 1 for the elapsed wallclock time in nanoseconds + int timerKind: int ref, + float time: float ref +) + +/** +* Non-timing related data for the extraction of a single file. +* This table contains non-deterministic content. +*/ +extraction_data( + int file : @file ref, + // the absolute path to the cache file + varchar(900) cacheFile: string ref, + boolean fromCache: boolean ref, + int length: int ref +) + +/*- YAML -*/ + +#keyset[parent, idx] +yaml (unique int id: @yaml_node, + int kind: int ref, + int parent: @yaml_node_parent ref, + int idx: int ref, + string tag: string ref, + string tostring: string ref); + +case @yaml_node.kind of + 0 = @yaml_scalar_node +| 1 = @yaml_mapping_node +| 2 = @yaml_sequence_node +| 3 = @yaml_alias_node +; + +@yaml_collection_node = @yaml_mapping_node | @yaml_sequence_node; + +@yaml_node_parent = @yaml_collection_node | @file; + +yaml_anchors (unique int node: @yaml_node ref, + string anchor: string ref); + +yaml_aliases (unique int alias: @yaml_alias_node ref, + string target: string ref); + +yaml_scalars (unique int scalar: @yaml_scalar_node ref, + int style: int ref, + string value: string ref); + +yaml_errors (unique int id: @yaml_error, + string message: string ref); + +yaml_locations(unique int locatable: @yaml_locatable ref, + int location: @location_default ref); + +@yaml_locatable = @yaml_node | @yaml_error; + +/*- XML Files -*/ + +xmlEncoding( + unique int id: @file ref, + string encoding: string ref +); + +xmlDTDs( + unique int id: @xmldtd, + string root: string ref, + string publicId: string ref, + string systemId: string ref, + int fileid: @file ref +); + +xmlElements( + unique int id: @xmlelement, + string name: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int fileid: @file ref +); + +xmlAttrs( + unique int id: @xmlattribute, + int elementid: @xmlelement ref, + string name: string ref, + string value: string ref, + int idx: int ref, + int fileid: @file ref +); + +xmlNs( + int id: @xmlnamespace, + string prefixName: string ref, + string URI: string ref, + int fileid: @file ref +); + +xmlHasNs( + int elementId: @xmlnamespaceable ref, + int nsId: @xmlnamespace ref, + int fileid: @file ref +); + +xmlComments( + unique int id: @xmlcomment, + string text: string ref, + int parentid: @xmlparent ref, + int fileid: @file ref +); + +xmlChars( + unique int id: @xmlcharacters, + string text: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int isCDATA: int ref, + int fileid: @file ref +); + +@xmlparent = @file | @xmlelement; +@xmlnamespaceable = @xmlelement | @xmlattribute; + +xmllocations( + int xmlElement: @xmllocatable ref, + int location: @location_default ref +); + +@xmllocatable = @xmlcharacters | @xmlelement | @xmlcomment | @xmlattribute | @xmldtd | @file | @xmlnamespace; + +/*- Configuration files with key value pairs -*/ + +configs( + unique int id: @config +); + +configNames( + unique int id: @configName, + int config: @config ref, + string name: string ref +); + +configValues( + unique int id: @configValue, + int config: @config ref, + string value: string ref +); + +configLocations( + int locatable: @configLocatable ref, + int location: @location_default ref +); + +@configLocatable = @config | @configName | @configValue; diff --git a/javascript/ql/lib/upgrades/c88c69174bd0dd4e95f1bcfbada68a2505e812c3/upgrade.properties b/javascript/ql/lib/upgrades/c88c69174bd0dd4e95f1bcfbada68a2505e812c3/upgrade.properties new file mode 100644 index 000000000000..13f42c6a9d1b --- /dev/null +++ b/javascript/ql/lib/upgrades/c88c69174bd0dd4e95f1bcfbada68a2505e812c3/upgrade.properties @@ -0,0 +1,2 @@ +description: Add support for quoted string, intersection and subtraction +compatibility: backwards From 9cc26208d4cac843fe89a52dbffe996ea4ac18d7 Mon Sep 17 00:00:00 2001 From: Napalys Date: Thu, 6 Mar 2025 18:08:59 +0100 Subject: [PATCH 20/27] Add test cases for `v` flag operators in RegExp library-tests. --- .../CombinationOfOperators/printAst.expected | 66 ++++++++++ .../CombinationOfOperators/printAst.ql | 1 + .../CombinationOfOperators/tst.js | 1 + .../Intersection/printAst.expected | 91 +++++++++++++ .../VFlagOperations/Intersection/printAst.ql | 1 + .../VFlagOperations/Intersection/tst.js | 6 + .../QuotedString/printAst.expected | 121 ++++++++++++++++++ .../VFlagOperations/QuotedString/printAst.ql | 1 + .../VFlagOperations/QuotedString/tst.js | 6 + .../Subtraction/printAst.expected | 103 +++++++++++++++ .../VFlagOperations/Subtraction/printAst.ql | 1 + .../RegExp/VFlagOperations/Subtraction/tst.js | 3 + 12 files changed, 401 insertions(+) create mode 100644 javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.expected create mode 100644 javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.ql create mode 100644 javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/tst.js create mode 100644 javascript/ql/test/library-tests/RegExp/VFlagOperations/Intersection/printAst.expected create mode 100644 javascript/ql/test/library-tests/RegExp/VFlagOperations/Intersection/printAst.ql create mode 100644 javascript/ql/test/library-tests/RegExp/VFlagOperations/Intersection/tst.js create mode 100644 javascript/ql/test/library-tests/RegExp/VFlagOperations/QuotedString/printAst.expected create mode 100644 javascript/ql/test/library-tests/RegExp/VFlagOperations/QuotedString/printAst.ql create mode 100644 javascript/ql/test/library-tests/RegExp/VFlagOperations/QuotedString/tst.js create mode 100644 javascript/ql/test/library-tests/RegExp/VFlagOperations/Subtraction/printAst.expected create mode 100644 javascript/ql/test/library-tests/RegExp/VFlagOperations/Subtraction/printAst.ql create mode 100644 javascript/ql/test/library-tests/RegExp/VFlagOperations/Subtraction/tst.js diff --git a/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.expected b/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.expected new file mode 100644 index 000000000000..876ff9316062 --- /dev/null +++ b/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.expected @@ -0,0 +1,66 @@ +nodes +| tst.js:1:1:1:44 | [RegExpLiteral] /[[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]]/v | semmle.label | [RegExpLiteral] /[[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]]/v | +| tst.js:1:1:1:45 | [ExprStmt] /[[[ab1 ... a}]]/v; | semmle.label | [ExprStmt] /[[[ab1 ... a}]]/v; | +| tst.js:1:1:1:45 | [ExprStmt] /[[[ab1 ... a}]]/v; | semmle.order | 1 | +| tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | semmle.label | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | +| tst.js:1:2:1:42 | [RegExpCharacterClass] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | semmle.label | [RegExpCharacterClass] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | +| tst.js:1:3:1:15 | [???] [[ab1]&&[b1]] | semmle.label | [???] [[ab1]&&[b1]] | +| tst.js:1:3:1:15 | [RegExpCharacterClass] [[ab1]&&[b1]] | semmle.label | [RegExpCharacterClass] [[ab1]&&[b1]] | +| tst.js:1:4:1:8 | [RegExpCharacterClass] [ab1] | semmle.label | [RegExpCharacterClass] [ab1] | +| tst.js:1:5:1:5 | [RegExpNormalConstant] a | semmle.label | [RegExpNormalConstant] a | +| tst.js:1:6:1:6 | [RegExpNormalConstant] b | semmle.label | [RegExpNormalConstant] b | +| tst.js:1:7:1:7 | [RegExpNormalConstant] 1 | semmle.label | [RegExpNormalConstant] 1 | +| tst.js:1:11:1:14 | [RegExpCharacterClass] [b1] | semmle.label | [RegExpCharacterClass] [b1] | +| tst.js:1:12:1:12 | [RegExpNormalConstant] b | semmle.label | [RegExpNormalConstant] b | +| tst.js:1:13:1:13 | [RegExpNormalConstant] 1 | semmle.label | [RegExpNormalConstant] 1 | +| tst.js:1:18:1:20 | [RegExpCharacterClass] [a] | semmle.label | [RegExpCharacterClass] [a] | +| tst.js:1:19:1:19 | [RegExpNormalConstant] a | semmle.label | [RegExpNormalConstant] a | +| tst.js:1:23:1:41 | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | semmle.label | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | +| tst.js:1:24:1:33 | [RegExpUnicodePropertyEscape] \\p{Number} | semmle.label | [RegExpUnicodePropertyEscape] \\p{Number} | +| tst.js:1:34:1:40 | [???] \\q{z\|a} | semmle.label | [???] \\q{z\|a} | +| tst.js:1:37:1:37 | [RegExpNormalConstant] z | semmle.label | [RegExpNormalConstant] z | +| tst.js:1:37:1:39 | [RegExpAlt] z\|a | semmle.label | [RegExpAlt] z\|a | +| tst.js:1:39:1:39 | [RegExpNormalConstant] a | semmle.label | [RegExpNormalConstant] a | +edges +| tst.js:1:1:1:44 | [RegExpLiteral] /[[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]]/v | tst.js:1:2:1:42 | [RegExpCharacterClass] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | semmle.label | 0 | +| tst.js:1:1:1:44 | [RegExpLiteral] /[[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]]/v | tst.js:1:2:1:42 | [RegExpCharacterClass] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | semmle.order | 0 | +| tst.js:1:1:1:45 | [ExprStmt] /[[[ab1 ... a}]]/v; | tst.js:1:1:1:44 | [RegExpLiteral] /[[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]]/v | semmle.label | 1 | +| tst.js:1:1:1:45 | [ExprStmt] /[[[ab1 ... a}]]/v; | tst.js:1:1:1:44 | [RegExpLiteral] /[[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]]/v | semmle.order | 1 | +| tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:3:1:15 | [RegExpCharacterClass] [[ab1]&&[b1]] | semmle.label | 0 | +| tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:3:1:15 | [RegExpCharacterClass] [[ab1]&&[b1]] | semmle.order | 0 | +| tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:18:1:20 | [RegExpCharacterClass] [a] | semmle.label | 1 | +| tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:18:1:20 | [RegExpCharacterClass] [a] | semmle.order | 1 | +| tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:23:1:41 | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | semmle.label | 2 | +| tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:23:1:41 | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | semmle.order | 2 | +| tst.js:1:2:1:42 | [RegExpCharacterClass] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | semmle.label | 0 | +| tst.js:1:2:1:42 | [RegExpCharacterClass] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | semmle.order | 0 | +| tst.js:1:3:1:15 | [???] [[ab1]&&[b1]] | tst.js:1:4:1:8 | [RegExpCharacterClass] [ab1] | semmle.label | 0 | +| tst.js:1:3:1:15 | [???] [[ab1]&&[b1]] | tst.js:1:4:1:8 | [RegExpCharacterClass] [ab1] | semmle.order | 0 | +| tst.js:1:3:1:15 | [???] [[ab1]&&[b1]] | tst.js:1:11:1:14 | [RegExpCharacterClass] [b1] | semmle.label | 1 | +| tst.js:1:3:1:15 | [???] [[ab1]&&[b1]] | tst.js:1:11:1:14 | [RegExpCharacterClass] [b1] | semmle.order | 1 | +| tst.js:1:3:1:15 | [RegExpCharacterClass] [[ab1]&&[b1]] | tst.js:1:3:1:15 | [???] [[ab1]&&[b1]] | semmle.label | 0 | +| tst.js:1:3:1:15 | [RegExpCharacterClass] [[ab1]&&[b1]] | tst.js:1:3:1:15 | [???] [[ab1]&&[b1]] | semmle.order | 0 | +| tst.js:1:4:1:8 | [RegExpCharacterClass] [ab1] | tst.js:1:5:1:5 | [RegExpNormalConstant] a | semmle.label | 0 | +| tst.js:1:4:1:8 | [RegExpCharacterClass] [ab1] | tst.js:1:5:1:5 | [RegExpNormalConstant] a | semmle.order | 0 | +| tst.js:1:4:1:8 | [RegExpCharacterClass] [ab1] | tst.js:1:6:1:6 | [RegExpNormalConstant] b | semmle.label | 1 | +| tst.js:1:4:1:8 | [RegExpCharacterClass] [ab1] | tst.js:1:6:1:6 | [RegExpNormalConstant] b | semmle.order | 1 | +| tst.js:1:4:1:8 | [RegExpCharacterClass] [ab1] | tst.js:1:7:1:7 | [RegExpNormalConstant] 1 | semmle.label | 2 | +| tst.js:1:4:1:8 | [RegExpCharacterClass] [ab1] | tst.js:1:7:1:7 | [RegExpNormalConstant] 1 | semmle.order | 2 | +| tst.js:1:11:1:14 | [RegExpCharacterClass] [b1] | tst.js:1:12:1:12 | [RegExpNormalConstant] b | semmle.label | 0 | +| tst.js:1:11:1:14 | [RegExpCharacterClass] [b1] | tst.js:1:12:1:12 | [RegExpNormalConstant] b | semmle.order | 0 | +| tst.js:1:11:1:14 | [RegExpCharacterClass] [b1] | tst.js:1:13:1:13 | [RegExpNormalConstant] 1 | semmle.label | 1 | +| tst.js:1:11:1:14 | [RegExpCharacterClass] [b1] | tst.js:1:13:1:13 | [RegExpNormalConstant] 1 | semmle.order | 1 | +| tst.js:1:18:1:20 | [RegExpCharacterClass] [a] | tst.js:1:19:1:19 | [RegExpNormalConstant] a | semmle.label | 0 | +| tst.js:1:18:1:20 | [RegExpCharacterClass] [a] | tst.js:1:19:1:19 | [RegExpNormalConstant] a | semmle.order | 0 | +| tst.js:1:23:1:41 | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | tst.js:1:24:1:33 | [RegExpUnicodePropertyEscape] \\p{Number} | semmle.label | 0 | +| tst.js:1:23:1:41 | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | tst.js:1:24:1:33 | [RegExpUnicodePropertyEscape] \\p{Number} | semmle.order | 0 | +| tst.js:1:23:1:41 | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | tst.js:1:34:1:40 | [???] \\q{z\|a} | semmle.label | 1 | +| tst.js:1:23:1:41 | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | tst.js:1:34:1:40 | [???] \\q{z\|a} | semmle.order | 1 | +| tst.js:1:34:1:40 | [???] \\q{z\|a} | tst.js:1:37:1:39 | [RegExpAlt] z\|a | semmle.label | 0 | +| tst.js:1:34:1:40 | [???] \\q{z\|a} | tst.js:1:37:1:39 | [RegExpAlt] z\|a | semmle.order | 0 | +| tst.js:1:37:1:39 | [RegExpAlt] z\|a | tst.js:1:37:1:37 | [RegExpNormalConstant] z | semmle.label | 0 | +| tst.js:1:37:1:39 | [RegExpAlt] z\|a | tst.js:1:37:1:37 | [RegExpNormalConstant] z | semmle.order | 0 | +| tst.js:1:37:1:39 | [RegExpAlt] z\|a | tst.js:1:39:1:39 | [RegExpNormalConstant] a | semmle.label | 1 | +| tst.js:1:37:1:39 | [RegExpAlt] z\|a | tst.js:1:39:1:39 | [RegExpNormalConstant] a | semmle.order | 1 | +graphProperties +| semmle.graphKind | tree | diff --git a/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.ql b/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.ql new file mode 100644 index 000000000000..8ceaf83964a3 --- /dev/null +++ b/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.ql @@ -0,0 +1 @@ +import semmle.javascript.PrintAst diff --git a/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/tst.js b/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/tst.js new file mode 100644 index 000000000000..bc9b0a966a44 --- /dev/null +++ b/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/tst.js @@ -0,0 +1 @@ +/[[[ab1]&&[b1]]--[a]--[\p{Number}\q{z|a}]]/v; diff --git a/javascript/ql/test/library-tests/RegExp/VFlagOperations/Intersection/printAst.expected b/javascript/ql/test/library-tests/RegExp/VFlagOperations/Intersection/printAst.expected new file mode 100644 index 000000000000..dc638ef5cd25 --- /dev/null +++ b/javascript/ql/test/library-tests/RegExp/VFlagOperations/Intersection/printAst.expected @@ -0,0 +1,91 @@ +nodes +| tst.js:1:1:1:23 | [RegExpLiteral] /[[abc]&&[bcd]&&[cd]]/v | semmle.label | [RegExpLiteral] /[[abc]&&[bcd]&&[cd]]/v | +| tst.js:1:1:1:24 | [ExprStmt] /[[abc] ... cd]]/v; | semmle.label | [ExprStmt] /[[abc] ... cd]]/v; | +| tst.js:1:1:1:24 | [ExprStmt] /[[abc] ... cd]]/v; | semmle.order | 1 | +| tst.js:1:2:1:21 | [???] [[abc]&&[bcd]&&[cd]] | semmle.label | [???] [[abc]&&[bcd]&&[cd]] | +| tst.js:1:2:1:21 | [RegExpCharacterClass] [[abc]&&[bcd]&&[cd]] | semmle.label | [RegExpCharacterClass] [[abc]&&[bcd]&&[cd]] | +| tst.js:1:3:1:7 | [RegExpCharacterClass] [abc] | semmle.label | [RegExpCharacterClass] [abc] | +| tst.js:1:4:1:4 | [RegExpNormalConstant] a | semmle.label | [RegExpNormalConstant] a | +| tst.js:1:5:1:5 | [RegExpNormalConstant] b | semmle.label | [RegExpNormalConstant] b | +| tst.js:1:6:1:6 | [RegExpNormalConstant] c | semmle.label | [RegExpNormalConstant] c | +| tst.js:1:10:1:14 | [RegExpCharacterClass] [bcd] | semmle.label | [RegExpCharacterClass] [bcd] | +| tst.js:1:11:1:11 | [RegExpNormalConstant] b | semmle.label | [RegExpNormalConstant] b | +| tst.js:1:12:1:12 | [RegExpNormalConstant] c | semmle.label | [RegExpNormalConstant] c | +| tst.js:1:13:1:13 | [RegExpNormalConstant] d | semmle.label | [RegExpNormalConstant] d | +| tst.js:1:17:1:20 | [RegExpCharacterClass] [cd] | semmle.label | [RegExpCharacterClass] [cd] | +| tst.js:1:18:1:18 | [RegExpNormalConstant] c | semmle.label | [RegExpNormalConstant] c | +| tst.js:1:19:1:19 | [RegExpNormalConstant] d | semmle.label | [RegExpNormalConstant] d | +| tst.js:2:1:2:11 | [RegExpLiteral] /abc&&bcd/v | semmle.label | [RegExpLiteral] /abc&&bcd/v | +| tst.js:2:1:2:12 | [ExprStmt] /abc&&bcd/v; | semmle.label | [ExprStmt] /abc&&bcd/v; | +| tst.js:2:1:2:12 | [ExprStmt] /abc&&bcd/v; | semmle.order | 2 | +| tst.js:2:2:2:9 | [RegExpNormalConstant] abc&&bcd | semmle.label | [RegExpNormalConstant] abc&&bcd | +| tst.js:3:1:3:15 | [RegExpLiteral] /[abc]&&[bcd]/v | semmle.label | [RegExpLiteral] /[abc]&&[bcd]/v | +| tst.js:3:1:3:16 | [ExprStmt] /[abc]&&[bcd]/v; | semmle.label | [ExprStmt] /[abc]&&[bcd]/v; | +| tst.js:3:1:3:16 | [ExprStmt] /[abc]&&[bcd]/v; | semmle.order | 3 | +| tst.js:3:2:3:6 | [RegExpCharacterClass] [abc] | semmle.label | [RegExpCharacterClass] [abc] | +| tst.js:3:2:3:13 | [RegExpSequence] [abc]&&[bcd] | semmle.label | [RegExpSequence] [abc]&&[bcd] | +| tst.js:3:3:3:3 | [RegExpNormalConstant] a | semmle.label | [RegExpNormalConstant] a | +| tst.js:3:4:3:4 | [RegExpNormalConstant] b | semmle.label | [RegExpNormalConstant] b | +| tst.js:3:5:3:5 | [RegExpNormalConstant] c | semmle.label | [RegExpNormalConstant] c | +| tst.js:3:7:3:8 | [RegExpNormalConstant] && | semmle.label | [RegExpNormalConstant] && | +| tst.js:3:9:3:13 | [RegExpCharacterClass] [bcd] | semmle.label | [RegExpCharacterClass] [bcd] | +| tst.js:3:10:3:10 | [RegExpNormalConstant] b | semmle.label | [RegExpNormalConstant] b | +| tst.js:3:11:3:11 | [RegExpNormalConstant] c | semmle.label | [RegExpNormalConstant] c | +| tst.js:3:12:3:12 | [RegExpNormalConstant] d | semmle.label | [RegExpNormalConstant] d | +edges +| tst.js:1:1:1:23 | [RegExpLiteral] /[[abc]&&[bcd]&&[cd]]/v | tst.js:1:2:1:21 | [RegExpCharacterClass] [[abc]&&[bcd]&&[cd]] | semmle.label | 0 | +| tst.js:1:1:1:23 | [RegExpLiteral] /[[abc]&&[bcd]&&[cd]]/v | tst.js:1:2:1:21 | [RegExpCharacterClass] [[abc]&&[bcd]&&[cd]] | semmle.order | 0 | +| tst.js:1:1:1:24 | [ExprStmt] /[[abc] ... cd]]/v; | tst.js:1:1:1:23 | [RegExpLiteral] /[[abc]&&[bcd]&&[cd]]/v | semmle.label | 1 | +| tst.js:1:1:1:24 | [ExprStmt] /[[abc] ... cd]]/v; | tst.js:1:1:1:23 | [RegExpLiteral] /[[abc]&&[bcd]&&[cd]]/v | semmle.order | 1 | +| tst.js:1:2:1:21 | [???] [[abc]&&[bcd]&&[cd]] | tst.js:1:3:1:7 | [RegExpCharacterClass] [abc] | semmle.label | 0 | +| tst.js:1:2:1:21 | [???] [[abc]&&[bcd]&&[cd]] | tst.js:1:3:1:7 | [RegExpCharacterClass] [abc] | semmle.order | 0 | +| tst.js:1:2:1:21 | [???] [[abc]&&[bcd]&&[cd]] | tst.js:1:10:1:14 | [RegExpCharacterClass] [bcd] | semmle.label | 1 | +| tst.js:1:2:1:21 | [???] [[abc]&&[bcd]&&[cd]] | tst.js:1:10:1:14 | [RegExpCharacterClass] [bcd] | semmle.order | 1 | +| tst.js:1:2:1:21 | [???] [[abc]&&[bcd]&&[cd]] | tst.js:1:17:1:20 | [RegExpCharacterClass] [cd] | semmle.label | 2 | +| tst.js:1:2:1:21 | [???] [[abc]&&[bcd]&&[cd]] | tst.js:1:17:1:20 | [RegExpCharacterClass] [cd] | semmle.order | 2 | +| tst.js:1:2:1:21 | [RegExpCharacterClass] [[abc]&&[bcd]&&[cd]] | tst.js:1:2:1:21 | [???] [[abc]&&[bcd]&&[cd]] | semmle.label | 0 | +| tst.js:1:2:1:21 | [RegExpCharacterClass] [[abc]&&[bcd]&&[cd]] | tst.js:1:2:1:21 | [???] [[abc]&&[bcd]&&[cd]] | semmle.order | 0 | +| tst.js:1:3:1:7 | [RegExpCharacterClass] [abc] | tst.js:1:4:1:4 | [RegExpNormalConstant] a | semmle.label | 0 | +| tst.js:1:3:1:7 | [RegExpCharacterClass] [abc] | tst.js:1:4:1:4 | [RegExpNormalConstant] a | semmle.order | 0 | +| tst.js:1:3:1:7 | [RegExpCharacterClass] [abc] | tst.js:1:5:1:5 | [RegExpNormalConstant] b | semmle.label | 1 | +| tst.js:1:3:1:7 | [RegExpCharacterClass] [abc] | tst.js:1:5:1:5 | [RegExpNormalConstant] b | semmle.order | 1 | +| tst.js:1:3:1:7 | [RegExpCharacterClass] [abc] | tst.js:1:6:1:6 | [RegExpNormalConstant] c | semmle.label | 2 | +| tst.js:1:3:1:7 | [RegExpCharacterClass] [abc] | tst.js:1:6:1:6 | [RegExpNormalConstant] c | semmle.order | 2 | +| tst.js:1:10:1:14 | [RegExpCharacterClass] [bcd] | tst.js:1:11:1:11 | [RegExpNormalConstant] b | semmle.label | 0 | +| tst.js:1:10:1:14 | [RegExpCharacterClass] [bcd] | tst.js:1:11:1:11 | [RegExpNormalConstant] b | semmle.order | 0 | +| tst.js:1:10:1:14 | [RegExpCharacterClass] [bcd] | tst.js:1:12:1:12 | [RegExpNormalConstant] c | semmle.label | 1 | +| tst.js:1:10:1:14 | [RegExpCharacterClass] [bcd] | tst.js:1:12:1:12 | [RegExpNormalConstant] c | semmle.order | 1 | +| tst.js:1:10:1:14 | [RegExpCharacterClass] [bcd] | tst.js:1:13:1:13 | [RegExpNormalConstant] d | semmle.label | 2 | +| tst.js:1:10:1:14 | [RegExpCharacterClass] [bcd] | tst.js:1:13:1:13 | [RegExpNormalConstant] d | semmle.order | 2 | +| tst.js:1:17:1:20 | [RegExpCharacterClass] [cd] | tst.js:1:18:1:18 | [RegExpNormalConstant] c | semmle.label | 0 | +| tst.js:1:17:1:20 | [RegExpCharacterClass] [cd] | tst.js:1:18:1:18 | [RegExpNormalConstant] c | semmle.order | 0 | +| tst.js:1:17:1:20 | [RegExpCharacterClass] [cd] | tst.js:1:19:1:19 | [RegExpNormalConstant] d | semmle.label | 1 | +| tst.js:1:17:1:20 | [RegExpCharacterClass] [cd] | tst.js:1:19:1:19 | [RegExpNormalConstant] d | semmle.order | 1 | +| tst.js:2:1:2:11 | [RegExpLiteral] /abc&&bcd/v | tst.js:2:2:2:9 | [RegExpNormalConstant] abc&&bcd | semmle.label | 0 | +| tst.js:2:1:2:11 | [RegExpLiteral] /abc&&bcd/v | tst.js:2:2:2:9 | [RegExpNormalConstant] abc&&bcd | semmle.order | 0 | +| tst.js:2:1:2:12 | [ExprStmt] /abc&&bcd/v; | tst.js:2:1:2:11 | [RegExpLiteral] /abc&&bcd/v | semmle.label | 1 | +| tst.js:2:1:2:12 | [ExprStmt] /abc&&bcd/v; | tst.js:2:1:2:11 | [RegExpLiteral] /abc&&bcd/v | semmle.order | 1 | +| tst.js:3:1:3:15 | [RegExpLiteral] /[abc]&&[bcd]/v | tst.js:3:2:3:13 | [RegExpSequence] [abc]&&[bcd] | semmle.label | 0 | +| tst.js:3:1:3:15 | [RegExpLiteral] /[abc]&&[bcd]/v | tst.js:3:2:3:13 | [RegExpSequence] [abc]&&[bcd] | semmle.order | 0 | +| tst.js:3:1:3:16 | [ExprStmt] /[abc]&&[bcd]/v; | tst.js:3:1:3:15 | [RegExpLiteral] /[abc]&&[bcd]/v | semmle.label | 1 | +| tst.js:3:1:3:16 | [ExprStmt] /[abc]&&[bcd]/v; | tst.js:3:1:3:15 | [RegExpLiteral] /[abc]&&[bcd]/v | semmle.order | 1 | +| tst.js:3:2:3:6 | [RegExpCharacterClass] [abc] | tst.js:3:3:3:3 | [RegExpNormalConstant] a | semmle.label | 0 | +| tst.js:3:2:3:6 | [RegExpCharacterClass] [abc] | tst.js:3:3:3:3 | [RegExpNormalConstant] a | semmle.order | 0 | +| tst.js:3:2:3:6 | [RegExpCharacterClass] [abc] | tst.js:3:4:3:4 | [RegExpNormalConstant] b | semmle.label | 1 | +| tst.js:3:2:3:6 | [RegExpCharacterClass] [abc] | tst.js:3:4:3:4 | [RegExpNormalConstant] b | semmle.order | 1 | +| tst.js:3:2:3:6 | [RegExpCharacterClass] [abc] | tst.js:3:5:3:5 | [RegExpNormalConstant] c | semmle.label | 2 | +| tst.js:3:2:3:6 | [RegExpCharacterClass] [abc] | tst.js:3:5:3:5 | [RegExpNormalConstant] c | semmle.order | 2 | +| tst.js:3:2:3:13 | [RegExpSequence] [abc]&&[bcd] | tst.js:3:2:3:6 | [RegExpCharacterClass] [abc] | semmle.label | 0 | +| tst.js:3:2:3:13 | [RegExpSequence] [abc]&&[bcd] | tst.js:3:2:3:6 | [RegExpCharacterClass] [abc] | semmle.order | 0 | +| tst.js:3:2:3:13 | [RegExpSequence] [abc]&&[bcd] | tst.js:3:7:3:8 | [RegExpNormalConstant] && | semmle.label | 1 | +| tst.js:3:2:3:13 | [RegExpSequence] [abc]&&[bcd] | tst.js:3:7:3:8 | [RegExpNormalConstant] && | semmle.order | 1 | +| tst.js:3:2:3:13 | [RegExpSequence] [abc]&&[bcd] | tst.js:3:9:3:13 | [RegExpCharacterClass] [bcd] | semmle.label | 2 | +| tst.js:3:2:3:13 | [RegExpSequence] [abc]&&[bcd] | tst.js:3:9:3:13 | [RegExpCharacterClass] [bcd] | semmle.order | 2 | +| tst.js:3:9:3:13 | [RegExpCharacterClass] [bcd] | tst.js:3:10:3:10 | [RegExpNormalConstant] b | semmle.label | 0 | +| tst.js:3:9:3:13 | [RegExpCharacterClass] [bcd] | tst.js:3:10:3:10 | [RegExpNormalConstant] b | semmle.order | 0 | +| tst.js:3:9:3:13 | [RegExpCharacterClass] [bcd] | tst.js:3:11:3:11 | [RegExpNormalConstant] c | semmle.label | 1 | +| tst.js:3:9:3:13 | [RegExpCharacterClass] [bcd] | tst.js:3:11:3:11 | [RegExpNormalConstant] c | semmle.order | 1 | +| tst.js:3:9:3:13 | [RegExpCharacterClass] [bcd] | tst.js:3:12:3:12 | [RegExpNormalConstant] d | semmle.label | 2 | +| tst.js:3:9:3:13 | [RegExpCharacterClass] [bcd] | tst.js:3:12:3:12 | [RegExpNormalConstant] d | semmle.order | 2 | +graphProperties +| semmle.graphKind | tree | diff --git a/javascript/ql/test/library-tests/RegExp/VFlagOperations/Intersection/printAst.ql b/javascript/ql/test/library-tests/RegExp/VFlagOperations/Intersection/printAst.ql new file mode 100644 index 000000000000..8ceaf83964a3 --- /dev/null +++ b/javascript/ql/test/library-tests/RegExp/VFlagOperations/Intersection/printAst.ql @@ -0,0 +1 @@ +import semmle.javascript.PrintAst diff --git a/javascript/ql/test/library-tests/RegExp/VFlagOperations/Intersection/tst.js b/javascript/ql/test/library-tests/RegExp/VFlagOperations/Intersection/tst.js new file mode 100644 index 000000000000..b2aa6fc6322e --- /dev/null +++ b/javascript/ql/test/library-tests/RegExp/VFlagOperations/Intersection/tst.js @@ -0,0 +1,6 @@ +/[[abc]&&[bcd]&&[cd]]/v; // Valid use of intersection operator, matches b or c +/abc&&bcd/v; //Valid regex, but no intersection operation: Matches the literal string "abc&&bcd" +/[abc]&&[bcd]/v; // Valid regex, but incorrect intersection operation: + // - Matches a single character from [abc] + // - Then the literal "&&" + // - Then a single character from [bcd] diff --git a/javascript/ql/test/library-tests/RegExp/VFlagOperations/QuotedString/printAst.expected b/javascript/ql/test/library-tests/RegExp/VFlagOperations/QuotedString/printAst.expected new file mode 100644 index 000000000000..a87b2b935f57 --- /dev/null +++ b/javascript/ql/test/library-tests/RegExp/VFlagOperations/QuotedString/printAst.expected @@ -0,0 +1,121 @@ +nodes +| tst.js:1:1:1:12 | [RegExpLiteral] /[\\q{abc}]/v | semmle.label | [RegExpLiteral] /[\\q{abc}]/v | +| tst.js:1:1:1:13 | [ExprStmt] /[\\q{abc}]/v; | semmle.label | [ExprStmt] /[\\q{abc}]/v; | +| tst.js:1:1:1:13 | [ExprStmt] /[\\q{abc}]/v; | semmle.order | 1 | +| tst.js:1:2:1:10 | [RegExpCharacterClass] [\\q{abc}] | semmle.label | [RegExpCharacterClass] [\\q{abc}] | +| tst.js:1:3:1:9 | [???] \\q{abc} | semmle.label | [???] \\q{abc} | +| tst.js:1:6:1:8 | [RegExpNormalConstant] abc | semmle.label | [RegExpNormalConstant] abc | +| tst.js:2:1:2:20 | [RegExpLiteral] /[\\q{abc\|cbd\|dcb}]/v | semmle.label | [RegExpLiteral] /[\\q{abc\|cbd\|dcb}]/v | +| tst.js:2:1:2:21 | [ExprStmt] /[\\q{ab ... cb}]/v; | semmle.label | [ExprStmt] /[\\q{ab ... cb}]/v; | +| tst.js:2:1:2:21 | [ExprStmt] /[\\q{ab ... cb}]/v; | semmle.order | 2 | +| tst.js:2:2:2:18 | [RegExpCharacterClass] [\\q{abc\|cbd\|dcb}] | semmle.label | [RegExpCharacterClass] [\\q{abc\|cbd\|dcb}] | +| tst.js:2:3:2:17 | [???] \\q{abc\|cbd\|dcb} | semmle.label | [???] \\q{abc\|cbd\|dcb} | +| tst.js:2:6:2:8 | [RegExpNormalConstant] abc | semmle.label | [RegExpNormalConstant] abc | +| tst.js:2:6:2:16 | [RegExpAlt] abc\|cbd\|dcb | semmle.label | [RegExpAlt] abc\|cbd\|dcb | +| tst.js:2:10:2:12 | [RegExpNormalConstant] cbd | semmle.label | [RegExpNormalConstant] cbd | +| tst.js:2:14:2:16 | [RegExpNormalConstant] dcb | semmle.label | [RegExpNormalConstant] dcb | +| tst.js:3:1:3:11 | [RegExpLiteral] /[\\q{\\}}]/v | semmle.label | [RegExpLiteral] /[\\q{\\}}]/v | +| tst.js:3:1:3:12 | [ExprStmt] /[\\q{\\}}]/v; | semmle.label | [ExprStmt] /[\\q{\\}}]/v; | +| tst.js:3:1:3:12 | [ExprStmt] /[\\q{\\}}]/v; | semmle.order | 3 | +| tst.js:3:2:3:9 | [RegExpCharacterClass] [\\q{\\}}] | semmle.label | [RegExpCharacterClass] [\\q{\\}}] | +| tst.js:3:3:3:8 | [???] \\q{\\}} | semmle.label | [???] \\q{\\}} | +| tst.js:3:6:3:7 | [RegExpNormalConstant] \\} | semmle.label | [RegExpNormalConstant] \\} | +| tst.js:4:1:4:11 | [RegExpLiteral] /[\\q{\\{}]/v | semmle.label | [RegExpLiteral] /[\\q{\\{}]/v | +| tst.js:4:1:4:12 | [ExprStmt] /[\\q{\\{}]/v; | semmle.label | [ExprStmt] /[\\q{\\{}]/v; | +| tst.js:4:1:4:12 | [ExprStmt] /[\\q{\\{}]/v; | semmle.order | 4 | +| tst.js:4:2:4:9 | [RegExpCharacterClass] [\\q{\\{}] | semmle.label | [RegExpCharacterClass] [\\q{\\{}] | +| tst.js:4:3:4:8 | [???] \\q{\\{} | semmle.label | [???] \\q{\\{} | +| tst.js:4:6:4:7 | [RegExpNormalConstant] \\{ | semmle.label | [RegExpNormalConstant] \\{ | +| tst.js:5:1:5:18 | [RegExpLiteral] /[\\q{cc\|\\}a\|cc}]/v | semmle.label | [RegExpLiteral] /[\\q{cc\|\\}a\|cc}]/v | +| tst.js:5:1:5:19 | [ExprStmt] /[\\q{cc\|\\}a\|cc}]/v; | semmle.label | [ExprStmt] /[\\q{cc\|\\}a\|cc}]/v; | +| tst.js:5:1:5:19 | [ExprStmt] /[\\q{cc\|\\}a\|cc}]/v; | semmle.order | 5 | +| tst.js:5:2:5:16 | [RegExpCharacterClass] [\\q{cc\|\\}a\|cc}] | semmle.label | [RegExpCharacterClass] [\\q{cc\|\\}a\|cc}] | +| tst.js:5:3:5:15 | [???] \\q{cc\|\\}a\|cc} | semmle.label | [???] \\q{cc\|\\}a\|cc} | +| tst.js:5:6:5:7 | [RegExpNormalConstant] cc | semmle.label | [RegExpNormalConstant] cc | +| tst.js:5:6:5:14 | [RegExpAlt] cc\|\\}a\|cc | semmle.label | [RegExpAlt] cc\|\\}a\|cc | +| tst.js:5:9:5:11 | [RegExpNormalConstant] \\}a | semmle.label | [RegExpNormalConstant] \\}a | +| tst.js:5:13:5:14 | [RegExpNormalConstant] cc | semmle.label | [RegExpNormalConstant] cc | +| tst.js:6:1:6:12 | [RegExpLiteral] /[\\qq{a\|b}]/ | semmle.label | [RegExpLiteral] /[\\qq{a\|b}]/ | +| tst.js:6:1:6:13 | [ExprStmt] /[\\qq{a\|b}]/; | semmle.label | [ExprStmt] /[\\qq{a\|b}]/; | +| tst.js:6:1:6:13 | [ExprStmt] /[\\qq{a\|b}]/; | semmle.order | 6 | +| tst.js:6:2:6:11 | [RegExpCharacterClass] [\\qq{a\|b}] | semmle.label | [RegExpCharacterClass] [\\qq{a\|b}] | +| tst.js:6:3:6:4 | [RegExpIdentityEscape] \\q | semmle.label | [RegExpIdentityEscape] \\q | +| tst.js:6:5:6:5 | [RegExpNormalConstant] q | semmle.label | [RegExpNormalConstant] q | +| tst.js:6:6:6:6 | [RegExpNormalConstant] { | semmle.label | [RegExpNormalConstant] { | +| tst.js:6:7:6:7 | [RegExpNormalConstant] a | semmle.label | [RegExpNormalConstant] a | +| tst.js:6:8:6:8 | [RegExpNormalConstant] \| | semmle.label | [RegExpNormalConstant] \| | +| tst.js:6:9:6:9 | [RegExpNormalConstant] b | semmle.label | [RegExpNormalConstant] b | +| tst.js:6:10:6:10 | [RegExpNormalConstant] } | semmle.label | [RegExpNormalConstant] } | +edges +| tst.js:1:1:1:12 | [RegExpLiteral] /[\\q{abc}]/v | tst.js:1:2:1:10 | [RegExpCharacterClass] [\\q{abc}] | semmle.label | 0 | +| tst.js:1:1:1:12 | [RegExpLiteral] /[\\q{abc}]/v | tst.js:1:2:1:10 | [RegExpCharacterClass] [\\q{abc}] | semmle.order | 0 | +| tst.js:1:1:1:13 | [ExprStmt] /[\\q{abc}]/v; | tst.js:1:1:1:12 | [RegExpLiteral] /[\\q{abc}]/v | semmle.label | 1 | +| tst.js:1:1:1:13 | [ExprStmt] /[\\q{abc}]/v; | tst.js:1:1:1:12 | [RegExpLiteral] /[\\q{abc}]/v | semmle.order | 1 | +| tst.js:1:2:1:10 | [RegExpCharacterClass] [\\q{abc}] | tst.js:1:3:1:9 | [???] \\q{abc} | semmle.label | 0 | +| tst.js:1:2:1:10 | [RegExpCharacterClass] [\\q{abc}] | tst.js:1:3:1:9 | [???] \\q{abc} | semmle.order | 0 | +| tst.js:1:3:1:9 | [???] \\q{abc} | tst.js:1:6:1:8 | [RegExpNormalConstant] abc | semmle.label | 0 | +| tst.js:1:3:1:9 | [???] \\q{abc} | tst.js:1:6:1:8 | [RegExpNormalConstant] abc | semmle.order | 0 | +| tst.js:2:1:2:20 | [RegExpLiteral] /[\\q{abc\|cbd\|dcb}]/v | tst.js:2:2:2:18 | [RegExpCharacterClass] [\\q{abc\|cbd\|dcb}] | semmle.label | 0 | +| tst.js:2:1:2:20 | [RegExpLiteral] /[\\q{abc\|cbd\|dcb}]/v | tst.js:2:2:2:18 | [RegExpCharacterClass] [\\q{abc\|cbd\|dcb}] | semmle.order | 0 | +| tst.js:2:1:2:21 | [ExprStmt] /[\\q{ab ... cb}]/v; | tst.js:2:1:2:20 | [RegExpLiteral] /[\\q{abc\|cbd\|dcb}]/v | semmle.label | 1 | +| tst.js:2:1:2:21 | [ExprStmt] /[\\q{ab ... cb}]/v; | tst.js:2:1:2:20 | [RegExpLiteral] /[\\q{abc\|cbd\|dcb}]/v | semmle.order | 1 | +| tst.js:2:2:2:18 | [RegExpCharacterClass] [\\q{abc\|cbd\|dcb}] | tst.js:2:3:2:17 | [???] \\q{abc\|cbd\|dcb} | semmle.label | 0 | +| tst.js:2:2:2:18 | [RegExpCharacterClass] [\\q{abc\|cbd\|dcb}] | tst.js:2:3:2:17 | [???] \\q{abc\|cbd\|dcb} | semmle.order | 0 | +| tst.js:2:3:2:17 | [???] \\q{abc\|cbd\|dcb} | tst.js:2:6:2:16 | [RegExpAlt] abc\|cbd\|dcb | semmle.label | 0 | +| tst.js:2:3:2:17 | [???] \\q{abc\|cbd\|dcb} | tst.js:2:6:2:16 | [RegExpAlt] abc\|cbd\|dcb | semmle.order | 0 | +| tst.js:2:6:2:16 | [RegExpAlt] abc\|cbd\|dcb | tst.js:2:6:2:8 | [RegExpNormalConstant] abc | semmle.label | 0 | +| tst.js:2:6:2:16 | [RegExpAlt] abc\|cbd\|dcb | tst.js:2:6:2:8 | [RegExpNormalConstant] abc | semmle.order | 0 | +| tst.js:2:6:2:16 | [RegExpAlt] abc\|cbd\|dcb | tst.js:2:10:2:12 | [RegExpNormalConstant] cbd | semmle.label | 1 | +| tst.js:2:6:2:16 | [RegExpAlt] abc\|cbd\|dcb | tst.js:2:10:2:12 | [RegExpNormalConstant] cbd | semmle.order | 1 | +| tst.js:2:6:2:16 | [RegExpAlt] abc\|cbd\|dcb | tst.js:2:14:2:16 | [RegExpNormalConstant] dcb | semmle.label | 2 | +| tst.js:2:6:2:16 | [RegExpAlt] abc\|cbd\|dcb | tst.js:2:14:2:16 | [RegExpNormalConstant] dcb | semmle.order | 2 | +| tst.js:3:1:3:11 | [RegExpLiteral] /[\\q{\\}}]/v | tst.js:3:2:3:9 | [RegExpCharacterClass] [\\q{\\}}] | semmle.label | 0 | +| tst.js:3:1:3:11 | [RegExpLiteral] /[\\q{\\}}]/v | tst.js:3:2:3:9 | [RegExpCharacterClass] [\\q{\\}}] | semmle.order | 0 | +| tst.js:3:1:3:12 | [ExprStmt] /[\\q{\\}}]/v; | tst.js:3:1:3:11 | [RegExpLiteral] /[\\q{\\}}]/v | semmle.label | 1 | +| tst.js:3:1:3:12 | [ExprStmt] /[\\q{\\}}]/v; | tst.js:3:1:3:11 | [RegExpLiteral] /[\\q{\\}}]/v | semmle.order | 1 | +| tst.js:3:2:3:9 | [RegExpCharacterClass] [\\q{\\}}] | tst.js:3:3:3:8 | [???] \\q{\\}} | semmle.label | 0 | +| tst.js:3:2:3:9 | [RegExpCharacterClass] [\\q{\\}}] | tst.js:3:3:3:8 | [???] \\q{\\}} | semmle.order | 0 | +| tst.js:3:3:3:8 | [???] \\q{\\}} | tst.js:3:6:3:7 | [RegExpNormalConstant] \\} | semmle.label | 0 | +| tst.js:3:3:3:8 | [???] \\q{\\}} | tst.js:3:6:3:7 | [RegExpNormalConstant] \\} | semmle.order | 0 | +| tst.js:4:1:4:11 | [RegExpLiteral] /[\\q{\\{}]/v | tst.js:4:2:4:9 | [RegExpCharacterClass] [\\q{\\{}] | semmle.label | 0 | +| tst.js:4:1:4:11 | [RegExpLiteral] /[\\q{\\{}]/v | tst.js:4:2:4:9 | [RegExpCharacterClass] [\\q{\\{}] | semmle.order | 0 | +| tst.js:4:1:4:12 | [ExprStmt] /[\\q{\\{}]/v; | tst.js:4:1:4:11 | [RegExpLiteral] /[\\q{\\{}]/v | semmle.label | 1 | +| tst.js:4:1:4:12 | [ExprStmt] /[\\q{\\{}]/v; | tst.js:4:1:4:11 | [RegExpLiteral] /[\\q{\\{}]/v | semmle.order | 1 | +| tst.js:4:2:4:9 | [RegExpCharacterClass] [\\q{\\{}] | tst.js:4:3:4:8 | [???] \\q{\\{} | semmle.label | 0 | +| tst.js:4:2:4:9 | [RegExpCharacterClass] [\\q{\\{}] | tst.js:4:3:4:8 | [???] \\q{\\{} | semmle.order | 0 | +| tst.js:4:3:4:8 | [???] \\q{\\{} | tst.js:4:6:4:7 | [RegExpNormalConstant] \\{ | semmle.label | 0 | +| tst.js:4:3:4:8 | [???] \\q{\\{} | tst.js:4:6:4:7 | [RegExpNormalConstant] \\{ | semmle.order | 0 | +| tst.js:5:1:5:18 | [RegExpLiteral] /[\\q{cc\|\\}a\|cc}]/v | tst.js:5:2:5:16 | [RegExpCharacterClass] [\\q{cc\|\\}a\|cc}] | semmle.label | 0 | +| tst.js:5:1:5:18 | [RegExpLiteral] /[\\q{cc\|\\}a\|cc}]/v | tst.js:5:2:5:16 | [RegExpCharacterClass] [\\q{cc\|\\}a\|cc}] | semmle.order | 0 | +| tst.js:5:1:5:19 | [ExprStmt] /[\\q{cc\|\\}a\|cc}]/v; | tst.js:5:1:5:18 | [RegExpLiteral] /[\\q{cc\|\\}a\|cc}]/v | semmle.label | 1 | +| tst.js:5:1:5:19 | [ExprStmt] /[\\q{cc\|\\}a\|cc}]/v; | tst.js:5:1:5:18 | [RegExpLiteral] /[\\q{cc\|\\}a\|cc}]/v | semmle.order | 1 | +| tst.js:5:2:5:16 | [RegExpCharacterClass] [\\q{cc\|\\}a\|cc}] | tst.js:5:3:5:15 | [???] \\q{cc\|\\}a\|cc} | semmle.label | 0 | +| tst.js:5:2:5:16 | [RegExpCharacterClass] [\\q{cc\|\\}a\|cc}] | tst.js:5:3:5:15 | [???] \\q{cc\|\\}a\|cc} | semmle.order | 0 | +| tst.js:5:3:5:15 | [???] \\q{cc\|\\}a\|cc} | tst.js:5:6:5:14 | [RegExpAlt] cc\|\\}a\|cc | semmle.label | 0 | +| tst.js:5:3:5:15 | [???] \\q{cc\|\\}a\|cc} | tst.js:5:6:5:14 | [RegExpAlt] cc\|\\}a\|cc | semmle.order | 0 | +| tst.js:5:6:5:14 | [RegExpAlt] cc\|\\}a\|cc | tst.js:5:6:5:7 | [RegExpNormalConstant] cc | semmle.label | 0 | +| tst.js:5:6:5:14 | [RegExpAlt] cc\|\\}a\|cc | tst.js:5:6:5:7 | [RegExpNormalConstant] cc | semmle.order | 0 | +| tst.js:5:6:5:14 | [RegExpAlt] cc\|\\}a\|cc | tst.js:5:9:5:11 | [RegExpNormalConstant] \\}a | semmle.label | 1 | +| tst.js:5:6:5:14 | [RegExpAlt] cc\|\\}a\|cc | tst.js:5:9:5:11 | [RegExpNormalConstant] \\}a | semmle.order | 1 | +| tst.js:5:6:5:14 | [RegExpAlt] cc\|\\}a\|cc | tst.js:5:13:5:14 | [RegExpNormalConstant] cc | semmle.label | 2 | +| tst.js:5:6:5:14 | [RegExpAlt] cc\|\\}a\|cc | tst.js:5:13:5:14 | [RegExpNormalConstant] cc | semmle.order | 2 | +| tst.js:6:1:6:12 | [RegExpLiteral] /[\\qq{a\|b}]/ | tst.js:6:2:6:11 | [RegExpCharacterClass] [\\qq{a\|b}] | semmle.label | 0 | +| tst.js:6:1:6:12 | [RegExpLiteral] /[\\qq{a\|b}]/ | tst.js:6:2:6:11 | [RegExpCharacterClass] [\\qq{a\|b}] | semmle.order | 0 | +| tst.js:6:1:6:13 | [ExprStmt] /[\\qq{a\|b}]/; | tst.js:6:1:6:12 | [RegExpLiteral] /[\\qq{a\|b}]/ | semmle.label | 1 | +| tst.js:6:1:6:13 | [ExprStmt] /[\\qq{a\|b}]/; | tst.js:6:1:6:12 | [RegExpLiteral] /[\\qq{a\|b}]/ | semmle.order | 1 | +| tst.js:6:2:6:11 | [RegExpCharacterClass] [\\qq{a\|b}] | tst.js:6:3:6:4 | [RegExpIdentityEscape] \\q | semmle.label | 0 | +| tst.js:6:2:6:11 | [RegExpCharacterClass] [\\qq{a\|b}] | tst.js:6:3:6:4 | [RegExpIdentityEscape] \\q | semmle.order | 0 | +| tst.js:6:2:6:11 | [RegExpCharacterClass] [\\qq{a\|b}] | tst.js:6:5:6:5 | [RegExpNormalConstant] q | semmle.label | 1 | +| tst.js:6:2:6:11 | [RegExpCharacterClass] [\\qq{a\|b}] | tst.js:6:5:6:5 | [RegExpNormalConstant] q | semmle.order | 1 | +| tst.js:6:2:6:11 | [RegExpCharacterClass] [\\qq{a\|b}] | tst.js:6:6:6:6 | [RegExpNormalConstant] { | semmle.label | 2 | +| tst.js:6:2:6:11 | [RegExpCharacterClass] [\\qq{a\|b}] | tst.js:6:6:6:6 | [RegExpNormalConstant] { | semmle.order | 2 | +| tst.js:6:2:6:11 | [RegExpCharacterClass] [\\qq{a\|b}] | tst.js:6:7:6:7 | [RegExpNormalConstant] a | semmle.label | 3 | +| tst.js:6:2:6:11 | [RegExpCharacterClass] [\\qq{a\|b}] | tst.js:6:7:6:7 | [RegExpNormalConstant] a | semmle.order | 3 | +| tst.js:6:2:6:11 | [RegExpCharacterClass] [\\qq{a\|b}] | tst.js:6:8:6:8 | [RegExpNormalConstant] \| | semmle.label | 4 | +| tst.js:6:2:6:11 | [RegExpCharacterClass] [\\qq{a\|b}] | tst.js:6:8:6:8 | [RegExpNormalConstant] \| | semmle.order | 4 | +| tst.js:6:2:6:11 | [RegExpCharacterClass] [\\qq{a\|b}] | tst.js:6:9:6:9 | [RegExpNormalConstant] b | semmle.label | 5 | +| tst.js:6:2:6:11 | [RegExpCharacterClass] [\\qq{a\|b}] | tst.js:6:9:6:9 | [RegExpNormalConstant] b | semmle.order | 5 | +| tst.js:6:2:6:11 | [RegExpCharacterClass] [\\qq{a\|b}] | tst.js:6:10:6:10 | [RegExpNormalConstant] } | semmle.label | 6 | +| tst.js:6:2:6:11 | [RegExpCharacterClass] [\\qq{a\|b}] | tst.js:6:10:6:10 | [RegExpNormalConstant] } | semmle.order | 6 | +graphProperties +| semmle.graphKind | tree | diff --git a/javascript/ql/test/library-tests/RegExp/VFlagOperations/QuotedString/printAst.ql b/javascript/ql/test/library-tests/RegExp/VFlagOperations/QuotedString/printAst.ql new file mode 100644 index 000000000000..8ceaf83964a3 --- /dev/null +++ b/javascript/ql/test/library-tests/RegExp/VFlagOperations/QuotedString/printAst.ql @@ -0,0 +1 @@ +import semmle.javascript.PrintAst diff --git a/javascript/ql/test/library-tests/RegExp/VFlagOperations/QuotedString/tst.js b/javascript/ql/test/library-tests/RegExp/VFlagOperations/QuotedString/tst.js new file mode 100644 index 000000000000..37f010aacd48 --- /dev/null +++ b/javascript/ql/test/library-tests/RegExp/VFlagOperations/QuotedString/tst.js @@ -0,0 +1,6 @@ +/[\q{abc}]/v; +/[\q{abc|cbd|dcb}]/v; +/[\q{\}}]/v; +/[\q{\{}]/v; +/[\q{cc|\}a|cc}]/v; +/[\qq{a|b}]/; // Since v flag is not present matches 'q{a|b}' diff --git a/javascript/ql/test/library-tests/RegExp/VFlagOperations/Subtraction/printAst.expected b/javascript/ql/test/library-tests/RegExp/VFlagOperations/Subtraction/printAst.expected new file mode 100644 index 000000000000..a2038941ac82 --- /dev/null +++ b/javascript/ql/test/library-tests/RegExp/VFlagOperations/Subtraction/printAst.expected @@ -0,0 +1,103 @@ +nodes +| tst.js:1:1:1:44 | [RegExpLiteral] /[\\p{Script_Extensions=Greek}--\\p{Letter}]/v | semmle.label | [RegExpLiteral] /[\\p{Script_Extensions=Greek}--\\p{Letter}]/v | +| tst.js:1:1:1:45 | [ExprStmt] /[\\p{Sc ... er}]/v; | semmle.label | [ExprStmt] /[\\p{Sc ... er}]/v; | +| tst.js:1:1:1:45 | [ExprStmt] /[\\p{Sc ... er}]/v; | semmle.order | 1 | +| tst.js:1:2:1:42 | [???] [\\p{Script_Extensions=Greek}--\\p{Letter}] | semmle.label | [???] [\\p{Script_Extensions=Greek}--\\p{Letter}] | +| tst.js:1:2:1:42 | [RegExpCharacterClass] [\\p{Script_Extensions=Greek}--\\p{Letter}] | semmle.label | [RegExpCharacterClass] [\\p{Script_Extensions=Greek}--\\p{Letter}] | +| tst.js:1:3:1:29 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.label | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | +| tst.js:1:32:1:41 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.label | [RegExpUnicodePropertyEscape] \\p{Letter} | +| tst.js:2:1:2:17 | [RegExpLiteral] /[[abc]--[cbd]]/v | semmle.label | [RegExpLiteral] /[[abc]--[cbd]]/v | +| tst.js:2:1:2:18 | [ExprStmt] /[[abc]--[cbd]]/v; | semmle.label | [ExprStmt] /[[abc]--[cbd]]/v; | +| tst.js:2:1:2:18 | [ExprStmt] /[[abc]--[cbd]]/v; | semmle.order | 2 | +| tst.js:2:2:2:15 | [???] [[abc]--[cbd]] | semmle.label | [???] [[abc]--[cbd]] | +| tst.js:2:2:2:15 | [RegExpCharacterClass] [[abc]--[cbd]] | semmle.label | [RegExpCharacterClass] [[abc]--[cbd]] | +| tst.js:2:3:2:7 | [RegExpCharacterClass] [abc] | semmle.label | [RegExpCharacterClass] [abc] | +| tst.js:2:4:2:4 | [RegExpNormalConstant] a | semmle.label | [RegExpNormalConstant] a | +| tst.js:2:5:2:5 | [RegExpNormalConstant] b | semmle.label | [RegExpNormalConstant] b | +| tst.js:2:6:2:6 | [RegExpNormalConstant] c | semmle.label | [RegExpNormalConstant] c | +| tst.js:2:10:2:14 | [RegExpCharacterClass] [cbd] | semmle.label | [RegExpCharacterClass] [cbd] | +| tst.js:2:11:2:11 | [RegExpNormalConstant] c | semmle.label | [RegExpNormalConstant] c | +| tst.js:2:12:2:12 | [RegExpNormalConstant] b | semmle.label | [RegExpNormalConstant] b | +| tst.js:2:13:2:13 | [RegExpNormalConstant] d | semmle.label | [RegExpNormalConstant] d | +| tst.js:3:1:3:24 | [RegExpLiteral] /[[abc]--[cbd]--[bde]]/v | semmle.label | [RegExpLiteral] /[[abc]--[cbd]--[bde]]/v | +| tst.js:3:1:3:25 | [ExprStmt] /[[abc] ... de]]/v; | semmle.label | [ExprStmt] /[[abc] ... de]]/v; | +| tst.js:3:1:3:25 | [ExprStmt] /[[abc] ... de]]/v; | semmle.order | 3 | +| tst.js:3:2:3:22 | [???] [[abc]--[cbd]--[bde]] | semmle.label | [???] [[abc]--[cbd]--[bde]] | +| tst.js:3:2:3:22 | [RegExpCharacterClass] [[abc]--[cbd]--[bde]] | semmle.label | [RegExpCharacterClass] [[abc]--[cbd]--[bde]] | +| tst.js:3:3:3:7 | [RegExpCharacterClass] [abc] | semmle.label | [RegExpCharacterClass] [abc] | +| tst.js:3:4:3:4 | [RegExpNormalConstant] a | semmle.label | [RegExpNormalConstant] a | +| tst.js:3:5:3:5 | [RegExpNormalConstant] b | semmle.label | [RegExpNormalConstant] b | +| tst.js:3:6:3:6 | [RegExpNormalConstant] c | semmle.label | [RegExpNormalConstant] c | +| tst.js:3:10:3:14 | [RegExpCharacterClass] [cbd] | semmle.label | [RegExpCharacterClass] [cbd] | +| tst.js:3:11:3:11 | [RegExpNormalConstant] c | semmle.label | [RegExpNormalConstant] c | +| tst.js:3:12:3:12 | [RegExpNormalConstant] b | semmle.label | [RegExpNormalConstant] b | +| tst.js:3:13:3:13 | [RegExpNormalConstant] d | semmle.label | [RegExpNormalConstant] d | +| tst.js:3:17:3:21 | [RegExpCharacterClass] [bde] | semmle.label | [RegExpCharacterClass] [bde] | +| tst.js:3:18:3:18 | [RegExpNormalConstant] b | semmle.label | [RegExpNormalConstant] b | +| tst.js:3:19:3:19 | [RegExpNormalConstant] d | semmle.label | [RegExpNormalConstant] d | +| tst.js:3:20:3:20 | [RegExpNormalConstant] e | semmle.label | [RegExpNormalConstant] e | +edges +| tst.js:1:1:1:44 | [RegExpLiteral] /[\\p{Script_Extensions=Greek}--\\p{Letter}]/v | tst.js:1:2:1:42 | [RegExpCharacterClass] [\\p{Script_Extensions=Greek}--\\p{Letter}] | semmle.label | 0 | +| tst.js:1:1:1:44 | [RegExpLiteral] /[\\p{Script_Extensions=Greek}--\\p{Letter}]/v | tst.js:1:2:1:42 | [RegExpCharacterClass] [\\p{Script_Extensions=Greek}--\\p{Letter}] | semmle.order | 0 | +| tst.js:1:1:1:45 | [ExprStmt] /[\\p{Sc ... er}]/v; | tst.js:1:1:1:44 | [RegExpLiteral] /[\\p{Script_Extensions=Greek}--\\p{Letter}]/v | semmle.label | 1 | +| tst.js:1:1:1:45 | [ExprStmt] /[\\p{Sc ... er}]/v; | tst.js:1:1:1:44 | [RegExpLiteral] /[\\p{Script_Extensions=Greek}--\\p{Letter}]/v | semmle.order | 1 | +| tst.js:1:2:1:42 | [???] [\\p{Script_Extensions=Greek}--\\p{Letter}] | tst.js:1:3:1:29 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.label | 0 | +| tst.js:1:2:1:42 | [???] [\\p{Script_Extensions=Greek}--\\p{Letter}] | tst.js:1:3:1:29 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.order | 0 | +| tst.js:1:2:1:42 | [???] [\\p{Script_Extensions=Greek}--\\p{Letter}] | tst.js:1:32:1:41 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.label | 1 | +| tst.js:1:2:1:42 | [???] [\\p{Script_Extensions=Greek}--\\p{Letter}] | tst.js:1:32:1:41 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.order | 1 | +| tst.js:1:2:1:42 | [RegExpCharacterClass] [\\p{Script_Extensions=Greek}--\\p{Letter}] | tst.js:1:2:1:42 | [???] [\\p{Script_Extensions=Greek}--\\p{Letter}] | semmle.label | 0 | +| tst.js:1:2:1:42 | [RegExpCharacterClass] [\\p{Script_Extensions=Greek}--\\p{Letter}] | tst.js:1:2:1:42 | [???] [\\p{Script_Extensions=Greek}--\\p{Letter}] | semmle.order | 0 | +| tst.js:2:1:2:17 | [RegExpLiteral] /[[abc]--[cbd]]/v | tst.js:2:2:2:15 | [RegExpCharacterClass] [[abc]--[cbd]] | semmle.label | 0 | +| tst.js:2:1:2:17 | [RegExpLiteral] /[[abc]--[cbd]]/v | tst.js:2:2:2:15 | [RegExpCharacterClass] [[abc]--[cbd]] | semmle.order | 0 | +| tst.js:2:1:2:18 | [ExprStmt] /[[abc]--[cbd]]/v; | tst.js:2:1:2:17 | [RegExpLiteral] /[[abc]--[cbd]]/v | semmle.label | 1 | +| tst.js:2:1:2:18 | [ExprStmt] /[[abc]--[cbd]]/v; | tst.js:2:1:2:17 | [RegExpLiteral] /[[abc]--[cbd]]/v | semmle.order | 1 | +| tst.js:2:2:2:15 | [???] [[abc]--[cbd]] | tst.js:2:3:2:7 | [RegExpCharacterClass] [abc] | semmle.label | 0 | +| tst.js:2:2:2:15 | [???] [[abc]--[cbd]] | tst.js:2:3:2:7 | [RegExpCharacterClass] [abc] | semmle.order | 0 | +| tst.js:2:2:2:15 | [???] [[abc]--[cbd]] | tst.js:2:10:2:14 | [RegExpCharacterClass] [cbd] | semmle.label | 1 | +| tst.js:2:2:2:15 | [???] [[abc]--[cbd]] | tst.js:2:10:2:14 | [RegExpCharacterClass] [cbd] | semmle.order | 1 | +| tst.js:2:2:2:15 | [RegExpCharacterClass] [[abc]--[cbd]] | tst.js:2:2:2:15 | [???] [[abc]--[cbd]] | semmle.label | 0 | +| tst.js:2:2:2:15 | [RegExpCharacterClass] [[abc]--[cbd]] | tst.js:2:2:2:15 | [???] [[abc]--[cbd]] | semmle.order | 0 | +| tst.js:2:3:2:7 | [RegExpCharacterClass] [abc] | tst.js:2:4:2:4 | [RegExpNormalConstant] a | semmle.label | 0 | +| tst.js:2:3:2:7 | [RegExpCharacterClass] [abc] | tst.js:2:4:2:4 | [RegExpNormalConstant] a | semmle.order | 0 | +| tst.js:2:3:2:7 | [RegExpCharacterClass] [abc] | tst.js:2:5:2:5 | [RegExpNormalConstant] b | semmle.label | 1 | +| tst.js:2:3:2:7 | [RegExpCharacterClass] [abc] | tst.js:2:5:2:5 | [RegExpNormalConstant] b | semmle.order | 1 | +| tst.js:2:3:2:7 | [RegExpCharacterClass] [abc] | tst.js:2:6:2:6 | [RegExpNormalConstant] c | semmle.label | 2 | +| tst.js:2:3:2:7 | [RegExpCharacterClass] [abc] | tst.js:2:6:2:6 | [RegExpNormalConstant] c | semmle.order | 2 | +| tst.js:2:10:2:14 | [RegExpCharacterClass] [cbd] | tst.js:2:11:2:11 | [RegExpNormalConstant] c | semmle.label | 0 | +| tst.js:2:10:2:14 | [RegExpCharacterClass] [cbd] | tst.js:2:11:2:11 | [RegExpNormalConstant] c | semmle.order | 0 | +| tst.js:2:10:2:14 | [RegExpCharacterClass] [cbd] | tst.js:2:12:2:12 | [RegExpNormalConstant] b | semmle.label | 1 | +| tst.js:2:10:2:14 | [RegExpCharacterClass] [cbd] | tst.js:2:12:2:12 | [RegExpNormalConstant] b | semmle.order | 1 | +| tst.js:2:10:2:14 | [RegExpCharacterClass] [cbd] | tst.js:2:13:2:13 | [RegExpNormalConstant] d | semmle.label | 2 | +| tst.js:2:10:2:14 | [RegExpCharacterClass] [cbd] | tst.js:2:13:2:13 | [RegExpNormalConstant] d | semmle.order | 2 | +| tst.js:3:1:3:24 | [RegExpLiteral] /[[abc]--[cbd]--[bde]]/v | tst.js:3:2:3:22 | [RegExpCharacterClass] [[abc]--[cbd]--[bde]] | semmle.label | 0 | +| tst.js:3:1:3:24 | [RegExpLiteral] /[[abc]--[cbd]--[bde]]/v | tst.js:3:2:3:22 | [RegExpCharacterClass] [[abc]--[cbd]--[bde]] | semmle.order | 0 | +| tst.js:3:1:3:25 | [ExprStmt] /[[abc] ... de]]/v; | tst.js:3:1:3:24 | [RegExpLiteral] /[[abc]--[cbd]--[bde]]/v | semmle.label | 1 | +| tst.js:3:1:3:25 | [ExprStmt] /[[abc] ... de]]/v; | tst.js:3:1:3:24 | [RegExpLiteral] /[[abc]--[cbd]--[bde]]/v | semmle.order | 1 | +| tst.js:3:2:3:22 | [???] [[abc]--[cbd]--[bde]] | tst.js:3:3:3:7 | [RegExpCharacterClass] [abc] | semmle.label | 0 | +| tst.js:3:2:3:22 | [???] [[abc]--[cbd]--[bde]] | tst.js:3:3:3:7 | [RegExpCharacterClass] [abc] | semmle.order | 0 | +| tst.js:3:2:3:22 | [???] [[abc]--[cbd]--[bde]] | tst.js:3:10:3:14 | [RegExpCharacterClass] [cbd] | semmle.label | 1 | +| tst.js:3:2:3:22 | [???] [[abc]--[cbd]--[bde]] | tst.js:3:10:3:14 | [RegExpCharacterClass] [cbd] | semmle.order | 1 | +| tst.js:3:2:3:22 | [???] [[abc]--[cbd]--[bde]] | tst.js:3:17:3:21 | [RegExpCharacterClass] [bde] | semmle.label | 2 | +| tst.js:3:2:3:22 | [???] [[abc]--[cbd]--[bde]] | tst.js:3:17:3:21 | [RegExpCharacterClass] [bde] | semmle.order | 2 | +| tst.js:3:2:3:22 | [RegExpCharacterClass] [[abc]--[cbd]--[bde]] | tst.js:3:2:3:22 | [???] [[abc]--[cbd]--[bde]] | semmle.label | 0 | +| tst.js:3:2:3:22 | [RegExpCharacterClass] [[abc]--[cbd]--[bde]] | tst.js:3:2:3:22 | [???] [[abc]--[cbd]--[bde]] | semmle.order | 0 | +| tst.js:3:3:3:7 | [RegExpCharacterClass] [abc] | tst.js:3:4:3:4 | [RegExpNormalConstant] a | semmle.label | 0 | +| tst.js:3:3:3:7 | [RegExpCharacterClass] [abc] | tst.js:3:4:3:4 | [RegExpNormalConstant] a | semmle.order | 0 | +| tst.js:3:3:3:7 | [RegExpCharacterClass] [abc] | tst.js:3:5:3:5 | [RegExpNormalConstant] b | semmle.label | 1 | +| tst.js:3:3:3:7 | [RegExpCharacterClass] [abc] | tst.js:3:5:3:5 | [RegExpNormalConstant] b | semmle.order | 1 | +| tst.js:3:3:3:7 | [RegExpCharacterClass] [abc] | tst.js:3:6:3:6 | [RegExpNormalConstant] c | semmle.label | 2 | +| tst.js:3:3:3:7 | [RegExpCharacterClass] [abc] | tst.js:3:6:3:6 | [RegExpNormalConstant] c | semmle.order | 2 | +| tst.js:3:10:3:14 | [RegExpCharacterClass] [cbd] | tst.js:3:11:3:11 | [RegExpNormalConstant] c | semmle.label | 0 | +| tst.js:3:10:3:14 | [RegExpCharacterClass] [cbd] | tst.js:3:11:3:11 | [RegExpNormalConstant] c | semmle.order | 0 | +| tst.js:3:10:3:14 | [RegExpCharacterClass] [cbd] | tst.js:3:12:3:12 | [RegExpNormalConstant] b | semmle.label | 1 | +| tst.js:3:10:3:14 | [RegExpCharacterClass] [cbd] | tst.js:3:12:3:12 | [RegExpNormalConstant] b | semmle.order | 1 | +| tst.js:3:10:3:14 | [RegExpCharacterClass] [cbd] | tst.js:3:13:3:13 | [RegExpNormalConstant] d | semmle.label | 2 | +| tst.js:3:10:3:14 | [RegExpCharacterClass] [cbd] | tst.js:3:13:3:13 | [RegExpNormalConstant] d | semmle.order | 2 | +| tst.js:3:17:3:21 | [RegExpCharacterClass] [bde] | tst.js:3:18:3:18 | [RegExpNormalConstant] b | semmle.label | 0 | +| tst.js:3:17:3:21 | [RegExpCharacterClass] [bde] | tst.js:3:18:3:18 | [RegExpNormalConstant] b | semmle.order | 0 | +| tst.js:3:17:3:21 | [RegExpCharacterClass] [bde] | tst.js:3:19:3:19 | [RegExpNormalConstant] d | semmle.label | 1 | +| tst.js:3:17:3:21 | [RegExpCharacterClass] [bde] | tst.js:3:19:3:19 | [RegExpNormalConstant] d | semmle.order | 1 | +| tst.js:3:17:3:21 | [RegExpCharacterClass] [bde] | tst.js:3:20:3:20 | [RegExpNormalConstant] e | semmle.label | 2 | +| tst.js:3:17:3:21 | [RegExpCharacterClass] [bde] | tst.js:3:20:3:20 | [RegExpNormalConstant] e | semmle.order | 2 | +graphProperties +| semmle.graphKind | tree | diff --git a/javascript/ql/test/library-tests/RegExp/VFlagOperations/Subtraction/printAst.ql b/javascript/ql/test/library-tests/RegExp/VFlagOperations/Subtraction/printAst.ql new file mode 100644 index 000000000000..8ceaf83964a3 --- /dev/null +++ b/javascript/ql/test/library-tests/RegExp/VFlagOperations/Subtraction/printAst.ql @@ -0,0 +1 @@ +import semmle.javascript.PrintAst diff --git a/javascript/ql/test/library-tests/RegExp/VFlagOperations/Subtraction/tst.js b/javascript/ql/test/library-tests/RegExp/VFlagOperations/Subtraction/tst.js new file mode 100644 index 000000000000..918375fb911c --- /dev/null +++ b/javascript/ql/test/library-tests/RegExp/VFlagOperations/Subtraction/tst.js @@ -0,0 +1,3 @@ +/[\p{Script_Extensions=Greek}--\p{Letter}]/v; +/[[abc]--[cbd]]/v; +/[[abc]--[cbd]--[bde]]/v; From e0f20b2bd1a63355ff676e99a87fdb4bd5752d37 Mon Sep 17 00:00:00 2001 From: Napalys Date: Fri, 7 Mar 2025 08:58:19 +0100 Subject: [PATCH 21/27] Add RegExpIntersection class to support intersection terms in regex --- .../ql/lib/semmle/javascript/Regexp.qll | 28 +++++++++++++++++++ .../CombinationOfOperators/printAst.expected | 14 +++++----- .../Intersection/printAst.expected | 18 ++++++------ 3 files changed, 44 insertions(+), 16 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/Regexp.qll b/javascript/ql/lib/semmle/javascript/Regexp.qll index acfc888756e8..37f80e9bfd94 100644 --- a/javascript/ql/lib/semmle/javascript/Regexp.qll +++ b/javascript/ql/lib/semmle/javascript/Regexp.qll @@ -301,6 +301,34 @@ class RegExpAlt extends RegExpTerm, @regexp_alt { override string getAPrimaryQlClass() { result = "RegExpAlt" } } +/** + * An intersection term, that is, a term of the form `[[a]&&[ab]]`. + * + * Example: + * + * ``` + * /[[abc]&&[bcd]]/v - which matches 'b' and 'c' only. + * ``` + */ +class RegExpIntersection extends RegExpTerm, @regexp_intersection { + /** Gets an intersected term of this term. */ + RegExpTerm getIntersectedTerm() { result = this.getAChild() } + + /** Gets the number of intersected terms of this term. */ + int getNumIntersectedTerm() { result = this.getNumChild() } + + override predicate isNullable() { this.getIntersectedTerm().isNullable() } + + override string getAMatchedString() { + exists(string s | s = this.getChild(0).getAMatchedString() | + forall(int i | i in [1 .. this.getNumChild() - 1] | s = this.getChild(i).getAMatchedString()) and + result = s + ) + } + + override string getAPrimaryQlClass() { result = "RegExpIntersection" } +} + /** * A sequence term. * diff --git a/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.expected b/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.expected index 876ff9316062..d7fc98f28ffb 100644 --- a/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.expected +++ b/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.expected @@ -4,8 +4,8 @@ nodes | tst.js:1:1:1:45 | [ExprStmt] /[[[ab1 ... a}]]/v; | semmle.order | 1 | | tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | semmle.label | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | | tst.js:1:2:1:42 | [RegExpCharacterClass] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | semmle.label | [RegExpCharacterClass] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | -| tst.js:1:3:1:15 | [???] [[ab1]&&[b1]] | semmle.label | [???] [[ab1]&&[b1]] | | tst.js:1:3:1:15 | [RegExpCharacterClass] [[ab1]&&[b1]] | semmle.label | [RegExpCharacterClass] [[ab1]&&[b1]] | +| tst.js:1:3:1:15 | [RegExpIntersection] [[ab1]&&[b1]] | semmle.label | [RegExpIntersection] [[ab1]&&[b1]] | | tst.js:1:4:1:8 | [RegExpCharacterClass] [ab1] | semmle.label | [RegExpCharacterClass] [ab1] | | tst.js:1:5:1:5 | [RegExpNormalConstant] a | semmle.label | [RegExpNormalConstant] a | | tst.js:1:6:1:6 | [RegExpNormalConstant] b | semmle.label | [RegExpNormalConstant] b | @@ -34,12 +34,12 @@ edges | tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:23:1:41 | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | semmle.order | 2 | | tst.js:1:2:1:42 | [RegExpCharacterClass] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | semmle.label | 0 | | tst.js:1:2:1:42 | [RegExpCharacterClass] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | semmle.order | 0 | -| tst.js:1:3:1:15 | [???] [[ab1]&&[b1]] | tst.js:1:4:1:8 | [RegExpCharacterClass] [ab1] | semmle.label | 0 | -| tst.js:1:3:1:15 | [???] [[ab1]&&[b1]] | tst.js:1:4:1:8 | [RegExpCharacterClass] [ab1] | semmle.order | 0 | -| tst.js:1:3:1:15 | [???] [[ab1]&&[b1]] | tst.js:1:11:1:14 | [RegExpCharacterClass] [b1] | semmle.label | 1 | -| tst.js:1:3:1:15 | [???] [[ab1]&&[b1]] | tst.js:1:11:1:14 | [RegExpCharacterClass] [b1] | semmle.order | 1 | -| tst.js:1:3:1:15 | [RegExpCharacterClass] [[ab1]&&[b1]] | tst.js:1:3:1:15 | [???] [[ab1]&&[b1]] | semmle.label | 0 | -| tst.js:1:3:1:15 | [RegExpCharacterClass] [[ab1]&&[b1]] | tst.js:1:3:1:15 | [???] [[ab1]&&[b1]] | semmle.order | 0 | +| tst.js:1:3:1:15 | [RegExpCharacterClass] [[ab1]&&[b1]] | tst.js:1:3:1:15 | [RegExpIntersection] [[ab1]&&[b1]] | semmle.label | 0 | +| tst.js:1:3:1:15 | [RegExpCharacterClass] [[ab1]&&[b1]] | tst.js:1:3:1:15 | [RegExpIntersection] [[ab1]&&[b1]] | semmle.order | 0 | +| tst.js:1:3:1:15 | [RegExpIntersection] [[ab1]&&[b1]] | tst.js:1:4:1:8 | [RegExpCharacterClass] [ab1] | semmle.label | 0 | +| tst.js:1:3:1:15 | [RegExpIntersection] [[ab1]&&[b1]] | tst.js:1:4:1:8 | [RegExpCharacterClass] [ab1] | semmle.order | 0 | +| tst.js:1:3:1:15 | [RegExpIntersection] [[ab1]&&[b1]] | tst.js:1:11:1:14 | [RegExpCharacterClass] [b1] | semmle.label | 1 | +| tst.js:1:3:1:15 | [RegExpIntersection] [[ab1]&&[b1]] | tst.js:1:11:1:14 | [RegExpCharacterClass] [b1] | semmle.order | 1 | | tst.js:1:4:1:8 | [RegExpCharacterClass] [ab1] | tst.js:1:5:1:5 | [RegExpNormalConstant] a | semmle.label | 0 | | tst.js:1:4:1:8 | [RegExpCharacterClass] [ab1] | tst.js:1:5:1:5 | [RegExpNormalConstant] a | semmle.order | 0 | | tst.js:1:4:1:8 | [RegExpCharacterClass] [ab1] | tst.js:1:6:1:6 | [RegExpNormalConstant] b | semmle.label | 1 | diff --git a/javascript/ql/test/library-tests/RegExp/VFlagOperations/Intersection/printAst.expected b/javascript/ql/test/library-tests/RegExp/VFlagOperations/Intersection/printAst.expected index dc638ef5cd25..39aec1194e2a 100644 --- a/javascript/ql/test/library-tests/RegExp/VFlagOperations/Intersection/printAst.expected +++ b/javascript/ql/test/library-tests/RegExp/VFlagOperations/Intersection/printAst.expected @@ -2,8 +2,8 @@ nodes | tst.js:1:1:1:23 | [RegExpLiteral] /[[abc]&&[bcd]&&[cd]]/v | semmle.label | [RegExpLiteral] /[[abc]&&[bcd]&&[cd]]/v | | tst.js:1:1:1:24 | [ExprStmt] /[[abc] ... cd]]/v; | semmle.label | [ExprStmt] /[[abc] ... cd]]/v; | | tst.js:1:1:1:24 | [ExprStmt] /[[abc] ... cd]]/v; | semmle.order | 1 | -| tst.js:1:2:1:21 | [???] [[abc]&&[bcd]&&[cd]] | semmle.label | [???] [[abc]&&[bcd]&&[cd]] | | tst.js:1:2:1:21 | [RegExpCharacterClass] [[abc]&&[bcd]&&[cd]] | semmle.label | [RegExpCharacterClass] [[abc]&&[bcd]&&[cd]] | +| tst.js:1:2:1:21 | [RegExpIntersection] [[abc]&&[bcd]&&[cd]] | semmle.label | [RegExpIntersection] [[abc]&&[bcd]&&[cd]] | | tst.js:1:3:1:7 | [RegExpCharacterClass] [abc] | semmle.label | [RegExpCharacterClass] [abc] | | tst.js:1:4:1:4 | [RegExpNormalConstant] a | semmle.label | [RegExpNormalConstant] a | | tst.js:1:5:1:5 | [RegExpNormalConstant] b | semmle.label | [RegExpNormalConstant] b | @@ -37,14 +37,14 @@ edges | tst.js:1:1:1:23 | [RegExpLiteral] /[[abc]&&[bcd]&&[cd]]/v | tst.js:1:2:1:21 | [RegExpCharacterClass] [[abc]&&[bcd]&&[cd]] | semmle.order | 0 | | tst.js:1:1:1:24 | [ExprStmt] /[[abc] ... cd]]/v; | tst.js:1:1:1:23 | [RegExpLiteral] /[[abc]&&[bcd]&&[cd]]/v | semmle.label | 1 | | tst.js:1:1:1:24 | [ExprStmt] /[[abc] ... cd]]/v; | tst.js:1:1:1:23 | [RegExpLiteral] /[[abc]&&[bcd]&&[cd]]/v | semmle.order | 1 | -| tst.js:1:2:1:21 | [???] [[abc]&&[bcd]&&[cd]] | tst.js:1:3:1:7 | [RegExpCharacterClass] [abc] | semmle.label | 0 | -| tst.js:1:2:1:21 | [???] [[abc]&&[bcd]&&[cd]] | tst.js:1:3:1:7 | [RegExpCharacterClass] [abc] | semmle.order | 0 | -| tst.js:1:2:1:21 | [???] [[abc]&&[bcd]&&[cd]] | tst.js:1:10:1:14 | [RegExpCharacterClass] [bcd] | semmle.label | 1 | -| tst.js:1:2:1:21 | [???] [[abc]&&[bcd]&&[cd]] | tst.js:1:10:1:14 | [RegExpCharacterClass] [bcd] | semmle.order | 1 | -| tst.js:1:2:1:21 | [???] [[abc]&&[bcd]&&[cd]] | tst.js:1:17:1:20 | [RegExpCharacterClass] [cd] | semmle.label | 2 | -| tst.js:1:2:1:21 | [???] [[abc]&&[bcd]&&[cd]] | tst.js:1:17:1:20 | [RegExpCharacterClass] [cd] | semmle.order | 2 | -| tst.js:1:2:1:21 | [RegExpCharacterClass] [[abc]&&[bcd]&&[cd]] | tst.js:1:2:1:21 | [???] [[abc]&&[bcd]&&[cd]] | semmle.label | 0 | -| tst.js:1:2:1:21 | [RegExpCharacterClass] [[abc]&&[bcd]&&[cd]] | tst.js:1:2:1:21 | [???] [[abc]&&[bcd]&&[cd]] | semmle.order | 0 | +| tst.js:1:2:1:21 | [RegExpCharacterClass] [[abc]&&[bcd]&&[cd]] | tst.js:1:2:1:21 | [RegExpIntersection] [[abc]&&[bcd]&&[cd]] | semmle.label | 0 | +| tst.js:1:2:1:21 | [RegExpCharacterClass] [[abc]&&[bcd]&&[cd]] | tst.js:1:2:1:21 | [RegExpIntersection] [[abc]&&[bcd]&&[cd]] | semmle.order | 0 | +| tst.js:1:2:1:21 | [RegExpIntersection] [[abc]&&[bcd]&&[cd]] | tst.js:1:3:1:7 | [RegExpCharacterClass] [abc] | semmle.label | 0 | +| tst.js:1:2:1:21 | [RegExpIntersection] [[abc]&&[bcd]&&[cd]] | tst.js:1:3:1:7 | [RegExpCharacterClass] [abc] | semmle.order | 0 | +| tst.js:1:2:1:21 | [RegExpIntersection] [[abc]&&[bcd]&&[cd]] | tst.js:1:10:1:14 | [RegExpCharacterClass] [bcd] | semmle.label | 1 | +| tst.js:1:2:1:21 | [RegExpIntersection] [[abc]&&[bcd]&&[cd]] | tst.js:1:10:1:14 | [RegExpCharacterClass] [bcd] | semmle.order | 1 | +| tst.js:1:2:1:21 | [RegExpIntersection] [[abc]&&[bcd]&&[cd]] | tst.js:1:17:1:20 | [RegExpCharacterClass] [cd] | semmle.label | 2 | +| tst.js:1:2:1:21 | [RegExpIntersection] [[abc]&&[bcd]&&[cd]] | tst.js:1:17:1:20 | [RegExpCharacterClass] [cd] | semmle.order | 2 | | tst.js:1:3:1:7 | [RegExpCharacterClass] [abc] | tst.js:1:4:1:4 | [RegExpNormalConstant] a | semmle.label | 0 | | tst.js:1:3:1:7 | [RegExpCharacterClass] [abc] | tst.js:1:4:1:4 | [RegExpNormalConstant] a | semmle.order | 0 | | tst.js:1:3:1:7 | [RegExpCharacterClass] [abc] | tst.js:1:5:1:5 | [RegExpNormalConstant] b | semmle.label | 1 | From 8cbc0aea056c5a5d0ded3df01f23c7331bdef838 Mon Sep 17 00:00:00 2001 From: Napalys Date: Fri, 7 Mar 2025 10:42:10 +0100 Subject: [PATCH 22/27] Add `RegExpQuotedString` class to support quoted string escapes in regex --- .../ql/lib/semmle/javascript/Regexp.qll | 22 ++++++++ .../CombinationOfOperators/printAst.expected | 10 ++-- .../QuotedString/printAst.expected | 50 +++++++++---------- 3 files changed, 52 insertions(+), 30 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/Regexp.qll b/javascript/ql/lib/semmle/javascript/Regexp.qll index 37f80e9bfd94..5f934307ff01 100644 --- a/javascript/ql/lib/semmle/javascript/Regexp.qll +++ b/javascript/ql/lib/semmle/javascript/Regexp.qll @@ -1170,6 +1170,28 @@ private class StringConcatRegExpPatternSource extends RegExpPatternSource { override RegExpTerm getRegExpTerm() { result = this.asExpr().(AddExpr).asRegExp() } } +/** + * A quoted string escape in a regular expression, using the `\q` syntax. + * The only operation supported inside a quoted string is alternation, using `|`. + * + * Example: + * + * ``` + * \q{foo} + * \q{a|b|c} + * ``` + */ +class RegExpQuotedString extends RegExpTerm, @regexp_quoted_string { + /** Gets the term representing the contents of this quoted string. */ + RegExpTerm getQuotedString() { result = this.getAChild() } + + override predicate isNullable() { none() } + + override string getAMatchedString() { result = this.getQuotedString().getAMatchedString() } + + override string getAPrimaryQlClass() { result = "RegExpQuotedString" } +} + module RegExp { /** Gets the string `"?"` used to represent a regular expression whose flags are unknown. */ string unknownFlag() { result = "?" } diff --git a/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.expected b/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.expected index d7fc98f28ffb..24688152bef7 100644 --- a/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.expected +++ b/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.expected @@ -17,7 +17,7 @@ nodes | tst.js:1:19:1:19 | [RegExpNormalConstant] a | semmle.label | [RegExpNormalConstant] a | | tst.js:1:23:1:41 | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | semmle.label | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | | tst.js:1:24:1:33 | [RegExpUnicodePropertyEscape] \\p{Number} | semmle.label | [RegExpUnicodePropertyEscape] \\p{Number} | -| tst.js:1:34:1:40 | [???] \\q{z\|a} | semmle.label | [???] \\q{z\|a} | +| tst.js:1:34:1:40 | [RegExpQuotedString] \\q{z\|a} | semmle.label | [RegExpQuotedString] \\q{z\|a} | | tst.js:1:37:1:37 | [RegExpNormalConstant] z | semmle.label | [RegExpNormalConstant] z | | tst.js:1:37:1:39 | [RegExpAlt] z\|a | semmle.label | [RegExpAlt] z\|a | | tst.js:1:39:1:39 | [RegExpNormalConstant] a | semmle.label | [RegExpNormalConstant] a | @@ -54,10 +54,10 @@ edges | tst.js:1:18:1:20 | [RegExpCharacterClass] [a] | tst.js:1:19:1:19 | [RegExpNormalConstant] a | semmle.order | 0 | | tst.js:1:23:1:41 | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | tst.js:1:24:1:33 | [RegExpUnicodePropertyEscape] \\p{Number} | semmle.label | 0 | | tst.js:1:23:1:41 | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | tst.js:1:24:1:33 | [RegExpUnicodePropertyEscape] \\p{Number} | semmle.order | 0 | -| tst.js:1:23:1:41 | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | tst.js:1:34:1:40 | [???] \\q{z\|a} | semmle.label | 1 | -| tst.js:1:23:1:41 | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | tst.js:1:34:1:40 | [???] \\q{z\|a} | semmle.order | 1 | -| tst.js:1:34:1:40 | [???] \\q{z\|a} | tst.js:1:37:1:39 | [RegExpAlt] z\|a | semmle.label | 0 | -| tst.js:1:34:1:40 | [???] \\q{z\|a} | tst.js:1:37:1:39 | [RegExpAlt] z\|a | semmle.order | 0 | +| tst.js:1:23:1:41 | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | tst.js:1:34:1:40 | [RegExpQuotedString] \\q{z\|a} | semmle.label | 1 | +| tst.js:1:23:1:41 | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | tst.js:1:34:1:40 | [RegExpQuotedString] \\q{z\|a} | semmle.order | 1 | +| tst.js:1:34:1:40 | [RegExpQuotedString] \\q{z\|a} | tst.js:1:37:1:39 | [RegExpAlt] z\|a | semmle.label | 0 | +| tst.js:1:34:1:40 | [RegExpQuotedString] \\q{z\|a} | tst.js:1:37:1:39 | [RegExpAlt] z\|a | semmle.order | 0 | | tst.js:1:37:1:39 | [RegExpAlt] z\|a | tst.js:1:37:1:37 | [RegExpNormalConstant] z | semmle.label | 0 | | tst.js:1:37:1:39 | [RegExpAlt] z\|a | tst.js:1:37:1:37 | [RegExpNormalConstant] z | semmle.order | 0 | | tst.js:1:37:1:39 | [RegExpAlt] z\|a | tst.js:1:39:1:39 | [RegExpNormalConstant] a | semmle.label | 1 | diff --git a/javascript/ql/test/library-tests/RegExp/VFlagOperations/QuotedString/printAst.expected b/javascript/ql/test/library-tests/RegExp/VFlagOperations/QuotedString/printAst.expected index a87b2b935f57..fc47fce03cd4 100644 --- a/javascript/ql/test/library-tests/RegExp/VFlagOperations/QuotedString/printAst.expected +++ b/javascript/ql/test/library-tests/RegExp/VFlagOperations/QuotedString/printAst.expected @@ -3,13 +3,13 @@ nodes | tst.js:1:1:1:13 | [ExprStmt] /[\\q{abc}]/v; | semmle.label | [ExprStmt] /[\\q{abc}]/v; | | tst.js:1:1:1:13 | [ExprStmt] /[\\q{abc}]/v; | semmle.order | 1 | | tst.js:1:2:1:10 | [RegExpCharacterClass] [\\q{abc}] | semmle.label | [RegExpCharacterClass] [\\q{abc}] | -| tst.js:1:3:1:9 | [???] \\q{abc} | semmle.label | [???] \\q{abc} | +| tst.js:1:3:1:9 | [RegExpQuotedString] \\q{abc} | semmle.label | [RegExpQuotedString] \\q{abc} | | tst.js:1:6:1:8 | [RegExpNormalConstant] abc | semmle.label | [RegExpNormalConstant] abc | | tst.js:2:1:2:20 | [RegExpLiteral] /[\\q{abc\|cbd\|dcb}]/v | semmle.label | [RegExpLiteral] /[\\q{abc\|cbd\|dcb}]/v | | tst.js:2:1:2:21 | [ExprStmt] /[\\q{ab ... cb}]/v; | semmle.label | [ExprStmt] /[\\q{ab ... cb}]/v; | | tst.js:2:1:2:21 | [ExprStmt] /[\\q{ab ... cb}]/v; | semmle.order | 2 | | tst.js:2:2:2:18 | [RegExpCharacterClass] [\\q{abc\|cbd\|dcb}] | semmle.label | [RegExpCharacterClass] [\\q{abc\|cbd\|dcb}] | -| tst.js:2:3:2:17 | [???] \\q{abc\|cbd\|dcb} | semmle.label | [???] \\q{abc\|cbd\|dcb} | +| tst.js:2:3:2:17 | [RegExpQuotedString] \\q{abc\|cbd\|dcb} | semmle.label | [RegExpQuotedString] \\q{abc\|cbd\|dcb} | | tst.js:2:6:2:8 | [RegExpNormalConstant] abc | semmle.label | [RegExpNormalConstant] abc | | tst.js:2:6:2:16 | [RegExpAlt] abc\|cbd\|dcb | semmle.label | [RegExpAlt] abc\|cbd\|dcb | | tst.js:2:10:2:12 | [RegExpNormalConstant] cbd | semmle.label | [RegExpNormalConstant] cbd | @@ -18,19 +18,19 @@ nodes | tst.js:3:1:3:12 | [ExprStmt] /[\\q{\\}}]/v; | semmle.label | [ExprStmt] /[\\q{\\}}]/v; | | tst.js:3:1:3:12 | [ExprStmt] /[\\q{\\}}]/v; | semmle.order | 3 | | tst.js:3:2:3:9 | [RegExpCharacterClass] [\\q{\\}}] | semmle.label | [RegExpCharacterClass] [\\q{\\}}] | -| tst.js:3:3:3:8 | [???] \\q{\\}} | semmle.label | [???] \\q{\\}} | +| tst.js:3:3:3:8 | [RegExpQuotedString] \\q{\\}} | semmle.label | [RegExpQuotedString] \\q{\\}} | | tst.js:3:6:3:7 | [RegExpNormalConstant] \\} | semmle.label | [RegExpNormalConstant] \\} | | tst.js:4:1:4:11 | [RegExpLiteral] /[\\q{\\{}]/v | semmle.label | [RegExpLiteral] /[\\q{\\{}]/v | | tst.js:4:1:4:12 | [ExprStmt] /[\\q{\\{}]/v; | semmle.label | [ExprStmt] /[\\q{\\{}]/v; | | tst.js:4:1:4:12 | [ExprStmt] /[\\q{\\{}]/v; | semmle.order | 4 | | tst.js:4:2:4:9 | [RegExpCharacterClass] [\\q{\\{}] | semmle.label | [RegExpCharacterClass] [\\q{\\{}] | -| tst.js:4:3:4:8 | [???] \\q{\\{} | semmle.label | [???] \\q{\\{} | +| tst.js:4:3:4:8 | [RegExpQuotedString] \\q{\\{} | semmle.label | [RegExpQuotedString] \\q{\\{} | | tst.js:4:6:4:7 | [RegExpNormalConstant] \\{ | semmle.label | [RegExpNormalConstant] \\{ | | tst.js:5:1:5:18 | [RegExpLiteral] /[\\q{cc\|\\}a\|cc}]/v | semmle.label | [RegExpLiteral] /[\\q{cc\|\\}a\|cc}]/v | | tst.js:5:1:5:19 | [ExprStmt] /[\\q{cc\|\\}a\|cc}]/v; | semmle.label | [ExprStmt] /[\\q{cc\|\\}a\|cc}]/v; | | tst.js:5:1:5:19 | [ExprStmt] /[\\q{cc\|\\}a\|cc}]/v; | semmle.order | 5 | | tst.js:5:2:5:16 | [RegExpCharacterClass] [\\q{cc\|\\}a\|cc}] | semmle.label | [RegExpCharacterClass] [\\q{cc\|\\}a\|cc}] | -| tst.js:5:3:5:15 | [???] \\q{cc\|\\}a\|cc} | semmle.label | [???] \\q{cc\|\\}a\|cc} | +| tst.js:5:3:5:15 | [RegExpQuotedString] \\q{cc\|\\}a\|cc} | semmle.label | [RegExpQuotedString] \\q{cc\|\\}a\|cc} | | tst.js:5:6:5:7 | [RegExpNormalConstant] cc | semmle.label | [RegExpNormalConstant] cc | | tst.js:5:6:5:14 | [RegExpAlt] cc\|\\}a\|cc | semmle.label | [RegExpAlt] cc\|\\}a\|cc | | tst.js:5:9:5:11 | [RegExpNormalConstant] \\}a | semmle.label | [RegExpNormalConstant] \\}a | @@ -51,18 +51,18 @@ edges | tst.js:1:1:1:12 | [RegExpLiteral] /[\\q{abc}]/v | tst.js:1:2:1:10 | [RegExpCharacterClass] [\\q{abc}] | semmle.order | 0 | | tst.js:1:1:1:13 | [ExprStmt] /[\\q{abc}]/v; | tst.js:1:1:1:12 | [RegExpLiteral] /[\\q{abc}]/v | semmle.label | 1 | | tst.js:1:1:1:13 | [ExprStmt] /[\\q{abc}]/v; | tst.js:1:1:1:12 | [RegExpLiteral] /[\\q{abc}]/v | semmle.order | 1 | -| tst.js:1:2:1:10 | [RegExpCharacterClass] [\\q{abc}] | tst.js:1:3:1:9 | [???] \\q{abc} | semmle.label | 0 | -| tst.js:1:2:1:10 | [RegExpCharacterClass] [\\q{abc}] | tst.js:1:3:1:9 | [???] \\q{abc} | semmle.order | 0 | -| tst.js:1:3:1:9 | [???] \\q{abc} | tst.js:1:6:1:8 | [RegExpNormalConstant] abc | semmle.label | 0 | -| tst.js:1:3:1:9 | [???] \\q{abc} | tst.js:1:6:1:8 | [RegExpNormalConstant] abc | semmle.order | 0 | +| tst.js:1:2:1:10 | [RegExpCharacterClass] [\\q{abc}] | tst.js:1:3:1:9 | [RegExpQuotedString] \\q{abc} | semmle.label | 0 | +| tst.js:1:2:1:10 | [RegExpCharacterClass] [\\q{abc}] | tst.js:1:3:1:9 | [RegExpQuotedString] \\q{abc} | semmle.order | 0 | +| tst.js:1:3:1:9 | [RegExpQuotedString] \\q{abc} | tst.js:1:6:1:8 | [RegExpNormalConstant] abc | semmle.label | 0 | +| tst.js:1:3:1:9 | [RegExpQuotedString] \\q{abc} | tst.js:1:6:1:8 | [RegExpNormalConstant] abc | semmle.order | 0 | | tst.js:2:1:2:20 | [RegExpLiteral] /[\\q{abc\|cbd\|dcb}]/v | tst.js:2:2:2:18 | [RegExpCharacterClass] [\\q{abc\|cbd\|dcb}] | semmle.label | 0 | | tst.js:2:1:2:20 | [RegExpLiteral] /[\\q{abc\|cbd\|dcb}]/v | tst.js:2:2:2:18 | [RegExpCharacterClass] [\\q{abc\|cbd\|dcb}] | semmle.order | 0 | | tst.js:2:1:2:21 | [ExprStmt] /[\\q{ab ... cb}]/v; | tst.js:2:1:2:20 | [RegExpLiteral] /[\\q{abc\|cbd\|dcb}]/v | semmle.label | 1 | | tst.js:2:1:2:21 | [ExprStmt] /[\\q{ab ... cb}]/v; | tst.js:2:1:2:20 | [RegExpLiteral] /[\\q{abc\|cbd\|dcb}]/v | semmle.order | 1 | -| tst.js:2:2:2:18 | [RegExpCharacterClass] [\\q{abc\|cbd\|dcb}] | tst.js:2:3:2:17 | [???] \\q{abc\|cbd\|dcb} | semmle.label | 0 | -| tst.js:2:2:2:18 | [RegExpCharacterClass] [\\q{abc\|cbd\|dcb}] | tst.js:2:3:2:17 | [???] \\q{abc\|cbd\|dcb} | semmle.order | 0 | -| tst.js:2:3:2:17 | [???] \\q{abc\|cbd\|dcb} | tst.js:2:6:2:16 | [RegExpAlt] abc\|cbd\|dcb | semmle.label | 0 | -| tst.js:2:3:2:17 | [???] \\q{abc\|cbd\|dcb} | tst.js:2:6:2:16 | [RegExpAlt] abc\|cbd\|dcb | semmle.order | 0 | +| tst.js:2:2:2:18 | [RegExpCharacterClass] [\\q{abc\|cbd\|dcb}] | tst.js:2:3:2:17 | [RegExpQuotedString] \\q{abc\|cbd\|dcb} | semmle.label | 0 | +| tst.js:2:2:2:18 | [RegExpCharacterClass] [\\q{abc\|cbd\|dcb}] | tst.js:2:3:2:17 | [RegExpQuotedString] \\q{abc\|cbd\|dcb} | semmle.order | 0 | +| tst.js:2:3:2:17 | [RegExpQuotedString] \\q{abc\|cbd\|dcb} | tst.js:2:6:2:16 | [RegExpAlt] abc\|cbd\|dcb | semmle.label | 0 | +| tst.js:2:3:2:17 | [RegExpQuotedString] \\q{abc\|cbd\|dcb} | tst.js:2:6:2:16 | [RegExpAlt] abc\|cbd\|dcb | semmle.order | 0 | | tst.js:2:6:2:16 | [RegExpAlt] abc\|cbd\|dcb | tst.js:2:6:2:8 | [RegExpNormalConstant] abc | semmle.label | 0 | | tst.js:2:6:2:16 | [RegExpAlt] abc\|cbd\|dcb | tst.js:2:6:2:8 | [RegExpNormalConstant] abc | semmle.order | 0 | | tst.js:2:6:2:16 | [RegExpAlt] abc\|cbd\|dcb | tst.js:2:10:2:12 | [RegExpNormalConstant] cbd | semmle.label | 1 | @@ -73,26 +73,26 @@ edges | tst.js:3:1:3:11 | [RegExpLiteral] /[\\q{\\}}]/v | tst.js:3:2:3:9 | [RegExpCharacterClass] [\\q{\\}}] | semmle.order | 0 | | tst.js:3:1:3:12 | [ExprStmt] /[\\q{\\}}]/v; | tst.js:3:1:3:11 | [RegExpLiteral] /[\\q{\\}}]/v | semmle.label | 1 | | tst.js:3:1:3:12 | [ExprStmt] /[\\q{\\}}]/v; | tst.js:3:1:3:11 | [RegExpLiteral] /[\\q{\\}}]/v | semmle.order | 1 | -| tst.js:3:2:3:9 | [RegExpCharacterClass] [\\q{\\}}] | tst.js:3:3:3:8 | [???] \\q{\\}} | semmle.label | 0 | -| tst.js:3:2:3:9 | [RegExpCharacterClass] [\\q{\\}}] | tst.js:3:3:3:8 | [???] \\q{\\}} | semmle.order | 0 | -| tst.js:3:3:3:8 | [???] \\q{\\}} | tst.js:3:6:3:7 | [RegExpNormalConstant] \\} | semmle.label | 0 | -| tst.js:3:3:3:8 | [???] \\q{\\}} | tst.js:3:6:3:7 | [RegExpNormalConstant] \\} | semmle.order | 0 | +| tst.js:3:2:3:9 | [RegExpCharacterClass] [\\q{\\}}] | tst.js:3:3:3:8 | [RegExpQuotedString] \\q{\\}} | semmle.label | 0 | +| tst.js:3:2:3:9 | [RegExpCharacterClass] [\\q{\\}}] | tst.js:3:3:3:8 | [RegExpQuotedString] \\q{\\}} | semmle.order | 0 | +| tst.js:3:3:3:8 | [RegExpQuotedString] \\q{\\}} | tst.js:3:6:3:7 | [RegExpNormalConstant] \\} | semmle.label | 0 | +| tst.js:3:3:3:8 | [RegExpQuotedString] \\q{\\}} | tst.js:3:6:3:7 | [RegExpNormalConstant] \\} | semmle.order | 0 | | tst.js:4:1:4:11 | [RegExpLiteral] /[\\q{\\{}]/v | tst.js:4:2:4:9 | [RegExpCharacterClass] [\\q{\\{}] | semmle.label | 0 | | tst.js:4:1:4:11 | [RegExpLiteral] /[\\q{\\{}]/v | tst.js:4:2:4:9 | [RegExpCharacterClass] [\\q{\\{}] | semmle.order | 0 | | tst.js:4:1:4:12 | [ExprStmt] /[\\q{\\{}]/v; | tst.js:4:1:4:11 | [RegExpLiteral] /[\\q{\\{}]/v | semmle.label | 1 | | tst.js:4:1:4:12 | [ExprStmt] /[\\q{\\{}]/v; | tst.js:4:1:4:11 | [RegExpLiteral] /[\\q{\\{}]/v | semmle.order | 1 | -| tst.js:4:2:4:9 | [RegExpCharacterClass] [\\q{\\{}] | tst.js:4:3:4:8 | [???] \\q{\\{} | semmle.label | 0 | -| tst.js:4:2:4:9 | [RegExpCharacterClass] [\\q{\\{}] | tst.js:4:3:4:8 | [???] \\q{\\{} | semmle.order | 0 | -| tst.js:4:3:4:8 | [???] \\q{\\{} | tst.js:4:6:4:7 | [RegExpNormalConstant] \\{ | semmle.label | 0 | -| tst.js:4:3:4:8 | [???] \\q{\\{} | tst.js:4:6:4:7 | [RegExpNormalConstant] \\{ | semmle.order | 0 | +| tst.js:4:2:4:9 | [RegExpCharacterClass] [\\q{\\{}] | tst.js:4:3:4:8 | [RegExpQuotedString] \\q{\\{} | semmle.label | 0 | +| tst.js:4:2:4:9 | [RegExpCharacterClass] [\\q{\\{}] | tst.js:4:3:4:8 | [RegExpQuotedString] \\q{\\{} | semmle.order | 0 | +| tst.js:4:3:4:8 | [RegExpQuotedString] \\q{\\{} | tst.js:4:6:4:7 | [RegExpNormalConstant] \\{ | semmle.label | 0 | +| tst.js:4:3:4:8 | [RegExpQuotedString] \\q{\\{} | tst.js:4:6:4:7 | [RegExpNormalConstant] \\{ | semmle.order | 0 | | tst.js:5:1:5:18 | [RegExpLiteral] /[\\q{cc\|\\}a\|cc}]/v | tst.js:5:2:5:16 | [RegExpCharacterClass] [\\q{cc\|\\}a\|cc}] | semmle.label | 0 | | tst.js:5:1:5:18 | [RegExpLiteral] /[\\q{cc\|\\}a\|cc}]/v | tst.js:5:2:5:16 | [RegExpCharacterClass] [\\q{cc\|\\}a\|cc}] | semmle.order | 0 | | tst.js:5:1:5:19 | [ExprStmt] /[\\q{cc\|\\}a\|cc}]/v; | tst.js:5:1:5:18 | [RegExpLiteral] /[\\q{cc\|\\}a\|cc}]/v | semmle.label | 1 | | tst.js:5:1:5:19 | [ExprStmt] /[\\q{cc\|\\}a\|cc}]/v; | tst.js:5:1:5:18 | [RegExpLiteral] /[\\q{cc\|\\}a\|cc}]/v | semmle.order | 1 | -| tst.js:5:2:5:16 | [RegExpCharacterClass] [\\q{cc\|\\}a\|cc}] | tst.js:5:3:5:15 | [???] \\q{cc\|\\}a\|cc} | semmle.label | 0 | -| tst.js:5:2:5:16 | [RegExpCharacterClass] [\\q{cc\|\\}a\|cc}] | tst.js:5:3:5:15 | [???] \\q{cc\|\\}a\|cc} | semmle.order | 0 | -| tst.js:5:3:5:15 | [???] \\q{cc\|\\}a\|cc} | tst.js:5:6:5:14 | [RegExpAlt] cc\|\\}a\|cc | semmle.label | 0 | -| tst.js:5:3:5:15 | [???] \\q{cc\|\\}a\|cc} | tst.js:5:6:5:14 | [RegExpAlt] cc\|\\}a\|cc | semmle.order | 0 | +| tst.js:5:2:5:16 | [RegExpCharacterClass] [\\q{cc\|\\}a\|cc}] | tst.js:5:3:5:15 | [RegExpQuotedString] \\q{cc\|\\}a\|cc} | semmle.label | 0 | +| tst.js:5:2:5:16 | [RegExpCharacterClass] [\\q{cc\|\\}a\|cc}] | tst.js:5:3:5:15 | [RegExpQuotedString] \\q{cc\|\\}a\|cc} | semmle.order | 0 | +| tst.js:5:3:5:15 | [RegExpQuotedString] \\q{cc\|\\}a\|cc} | tst.js:5:6:5:14 | [RegExpAlt] cc\|\\}a\|cc | semmle.label | 0 | +| tst.js:5:3:5:15 | [RegExpQuotedString] \\q{cc\|\\}a\|cc} | tst.js:5:6:5:14 | [RegExpAlt] cc\|\\}a\|cc | semmle.order | 0 | | tst.js:5:6:5:14 | [RegExpAlt] cc\|\\}a\|cc | tst.js:5:6:5:7 | [RegExpNormalConstant] cc | semmle.label | 0 | | tst.js:5:6:5:14 | [RegExpAlt] cc\|\\}a\|cc | tst.js:5:6:5:7 | [RegExpNormalConstant] cc | semmle.order | 0 | | tst.js:5:6:5:14 | [RegExpAlt] cc\|\\}a\|cc | tst.js:5:9:5:11 | [RegExpNormalConstant] \\}a | semmle.label | 1 | From f48eab903fc58890c2e6748c0cc4d9a964db67cc Mon Sep 17 00:00:00 2001 From: Napalys Date: Sun, 9 Mar 2025 15:43:59 +0100 Subject: [PATCH 23/27] Add `RegExpSubtraction` class to support subtraction terms in regex --- .../ql/lib/semmle/javascript/Regexp.qll | 24 ++++++++++ .../CombinationOfOperators/printAst.expected | 18 ++++---- .../Subtraction/printAst.expected | 46 +++++++++---------- 3 files changed, 56 insertions(+), 32 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/Regexp.qll b/javascript/ql/lib/semmle/javascript/Regexp.qll index 5f934307ff01..d2f4b782486d 100644 --- a/javascript/ql/lib/semmle/javascript/Regexp.qll +++ b/javascript/ql/lib/semmle/javascript/Regexp.qll @@ -329,6 +329,30 @@ class RegExpIntersection extends RegExpTerm, @regexp_intersection { override string getAPrimaryQlClass() { result = "RegExpIntersection" } } +/** + * A subtraction term, that is, a term of the form `[[a]--[ab]]`. + * + * Example: + * + * ``` + * /[[abc]--[bc]]/v - which matches 'a' only. + * ``` + */ +class RegExpSubtraction extends RegExpTerm, @regexp_subtraction { + /** Gets the minuend (the left operand) of this subtraction. */ + RegExpTerm getFirstTerm() { result = this.getChild(0) } + + /** Gets the number of subtractions terms of this term. */ + int getNumSubtractedTerm() { result = this.getNumChild() - 1 } + + /** Gets the subtrahend (the right operand) of this subtraction. */ + RegExpTerm getASubtractedTerm() { exists(int i | i > 0 and result = this.getChild(i)) } + + override predicate isNullable() { none() } + + override string getAPrimaryQlClass() { result = "RegExpSubtraction" } +} + /** * A sequence term. * diff --git a/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.expected b/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.expected index 24688152bef7..37d81a9a76b9 100644 --- a/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.expected +++ b/javascript/ql/test/library-tests/RegExp/VFlagOperations/CombinationOfOperators/printAst.expected @@ -2,8 +2,8 @@ nodes | tst.js:1:1:1:44 | [RegExpLiteral] /[[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]]/v | semmle.label | [RegExpLiteral] /[[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]]/v | | tst.js:1:1:1:45 | [ExprStmt] /[[[ab1 ... a}]]/v; | semmle.label | [ExprStmt] /[[[ab1 ... a}]]/v; | | tst.js:1:1:1:45 | [ExprStmt] /[[[ab1 ... a}]]/v; | semmle.order | 1 | -| tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | semmle.label | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | | tst.js:1:2:1:42 | [RegExpCharacterClass] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | semmle.label | [RegExpCharacterClass] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | +| tst.js:1:2:1:42 | [RegExpSubtraction] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | semmle.label | [RegExpSubtraction] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | | tst.js:1:3:1:15 | [RegExpCharacterClass] [[ab1]&&[b1]] | semmle.label | [RegExpCharacterClass] [[ab1]&&[b1]] | | tst.js:1:3:1:15 | [RegExpIntersection] [[ab1]&&[b1]] | semmle.label | [RegExpIntersection] [[ab1]&&[b1]] | | tst.js:1:4:1:8 | [RegExpCharacterClass] [ab1] | semmle.label | [RegExpCharacterClass] [ab1] | @@ -26,14 +26,14 @@ edges | tst.js:1:1:1:44 | [RegExpLiteral] /[[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]]/v | tst.js:1:2:1:42 | [RegExpCharacterClass] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | semmle.order | 0 | | tst.js:1:1:1:45 | [ExprStmt] /[[[ab1 ... a}]]/v; | tst.js:1:1:1:44 | [RegExpLiteral] /[[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]]/v | semmle.label | 1 | | tst.js:1:1:1:45 | [ExprStmt] /[[[ab1 ... a}]]/v; | tst.js:1:1:1:44 | [RegExpLiteral] /[[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]]/v | semmle.order | 1 | -| tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:3:1:15 | [RegExpCharacterClass] [[ab1]&&[b1]] | semmle.label | 0 | -| tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:3:1:15 | [RegExpCharacterClass] [[ab1]&&[b1]] | semmle.order | 0 | -| tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:18:1:20 | [RegExpCharacterClass] [a] | semmle.label | 1 | -| tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:18:1:20 | [RegExpCharacterClass] [a] | semmle.order | 1 | -| tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:23:1:41 | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | semmle.label | 2 | -| tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:23:1:41 | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | semmle.order | 2 | -| tst.js:1:2:1:42 | [RegExpCharacterClass] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | semmle.label | 0 | -| tst.js:1:2:1:42 | [RegExpCharacterClass] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:2:1:42 | [???] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | semmle.order | 0 | +| tst.js:1:2:1:42 | [RegExpCharacterClass] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:2:1:42 | [RegExpSubtraction] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | semmle.label | 0 | +| tst.js:1:2:1:42 | [RegExpCharacterClass] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:2:1:42 | [RegExpSubtraction] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | semmle.order | 0 | +| tst.js:1:2:1:42 | [RegExpSubtraction] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:3:1:15 | [RegExpCharacterClass] [[ab1]&&[b1]] | semmle.label | 0 | +| tst.js:1:2:1:42 | [RegExpSubtraction] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:3:1:15 | [RegExpCharacterClass] [[ab1]&&[b1]] | semmle.order | 0 | +| tst.js:1:2:1:42 | [RegExpSubtraction] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:18:1:20 | [RegExpCharacterClass] [a] | semmle.label | 1 | +| tst.js:1:2:1:42 | [RegExpSubtraction] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:18:1:20 | [RegExpCharacterClass] [a] | semmle.order | 1 | +| tst.js:1:2:1:42 | [RegExpSubtraction] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:23:1:41 | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | semmle.label | 2 | +| tst.js:1:2:1:42 | [RegExpSubtraction] [[[ab1]&&[b1]]--[a]--[\\p{Number}\\q{z\|a}]] | tst.js:1:23:1:41 | [RegExpCharacterClass] [\\p{Number}\\q{z\|a}] | semmle.order | 2 | | tst.js:1:3:1:15 | [RegExpCharacterClass] [[ab1]&&[b1]] | tst.js:1:3:1:15 | [RegExpIntersection] [[ab1]&&[b1]] | semmle.label | 0 | | tst.js:1:3:1:15 | [RegExpCharacterClass] [[ab1]&&[b1]] | tst.js:1:3:1:15 | [RegExpIntersection] [[ab1]&&[b1]] | semmle.order | 0 | | tst.js:1:3:1:15 | [RegExpIntersection] [[ab1]&&[b1]] | tst.js:1:4:1:8 | [RegExpCharacterClass] [ab1] | semmle.label | 0 | diff --git a/javascript/ql/test/library-tests/RegExp/VFlagOperations/Subtraction/printAst.expected b/javascript/ql/test/library-tests/RegExp/VFlagOperations/Subtraction/printAst.expected index a2038941ac82..d2e57f4c1c8e 100644 --- a/javascript/ql/test/library-tests/RegExp/VFlagOperations/Subtraction/printAst.expected +++ b/javascript/ql/test/library-tests/RegExp/VFlagOperations/Subtraction/printAst.expected @@ -2,15 +2,15 @@ nodes | tst.js:1:1:1:44 | [RegExpLiteral] /[\\p{Script_Extensions=Greek}--\\p{Letter}]/v | semmle.label | [RegExpLiteral] /[\\p{Script_Extensions=Greek}--\\p{Letter}]/v | | tst.js:1:1:1:45 | [ExprStmt] /[\\p{Sc ... er}]/v; | semmle.label | [ExprStmt] /[\\p{Sc ... er}]/v; | | tst.js:1:1:1:45 | [ExprStmt] /[\\p{Sc ... er}]/v; | semmle.order | 1 | -| tst.js:1:2:1:42 | [???] [\\p{Script_Extensions=Greek}--\\p{Letter}] | semmle.label | [???] [\\p{Script_Extensions=Greek}--\\p{Letter}] | | tst.js:1:2:1:42 | [RegExpCharacterClass] [\\p{Script_Extensions=Greek}--\\p{Letter}] | semmle.label | [RegExpCharacterClass] [\\p{Script_Extensions=Greek}--\\p{Letter}] | +| tst.js:1:2:1:42 | [RegExpSubtraction] [\\p{Script_Extensions=Greek}--\\p{Letter}] | semmle.label | [RegExpSubtraction] [\\p{Script_Extensions=Greek}--\\p{Letter}] | | tst.js:1:3:1:29 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.label | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | | tst.js:1:32:1:41 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.label | [RegExpUnicodePropertyEscape] \\p{Letter} | | tst.js:2:1:2:17 | [RegExpLiteral] /[[abc]--[cbd]]/v | semmle.label | [RegExpLiteral] /[[abc]--[cbd]]/v | | tst.js:2:1:2:18 | [ExprStmt] /[[abc]--[cbd]]/v; | semmle.label | [ExprStmt] /[[abc]--[cbd]]/v; | | tst.js:2:1:2:18 | [ExprStmt] /[[abc]--[cbd]]/v; | semmle.order | 2 | -| tst.js:2:2:2:15 | [???] [[abc]--[cbd]] | semmle.label | [???] [[abc]--[cbd]] | | tst.js:2:2:2:15 | [RegExpCharacterClass] [[abc]--[cbd]] | semmle.label | [RegExpCharacterClass] [[abc]--[cbd]] | +| tst.js:2:2:2:15 | [RegExpSubtraction] [[abc]--[cbd]] | semmle.label | [RegExpSubtraction] [[abc]--[cbd]] | | tst.js:2:3:2:7 | [RegExpCharacterClass] [abc] | semmle.label | [RegExpCharacterClass] [abc] | | tst.js:2:4:2:4 | [RegExpNormalConstant] a | semmle.label | [RegExpNormalConstant] a | | tst.js:2:5:2:5 | [RegExpNormalConstant] b | semmle.label | [RegExpNormalConstant] b | @@ -22,8 +22,8 @@ nodes | tst.js:3:1:3:24 | [RegExpLiteral] /[[abc]--[cbd]--[bde]]/v | semmle.label | [RegExpLiteral] /[[abc]--[cbd]--[bde]]/v | | tst.js:3:1:3:25 | [ExprStmt] /[[abc] ... de]]/v; | semmle.label | [ExprStmt] /[[abc] ... de]]/v; | | tst.js:3:1:3:25 | [ExprStmt] /[[abc] ... de]]/v; | semmle.order | 3 | -| tst.js:3:2:3:22 | [???] [[abc]--[cbd]--[bde]] | semmle.label | [???] [[abc]--[cbd]--[bde]] | | tst.js:3:2:3:22 | [RegExpCharacterClass] [[abc]--[cbd]--[bde]] | semmle.label | [RegExpCharacterClass] [[abc]--[cbd]--[bde]] | +| tst.js:3:2:3:22 | [RegExpSubtraction] [[abc]--[cbd]--[bde]] | semmle.label | [RegExpSubtraction] [[abc]--[cbd]--[bde]] | | tst.js:3:3:3:7 | [RegExpCharacterClass] [abc] | semmle.label | [RegExpCharacterClass] [abc] | | tst.js:3:4:3:4 | [RegExpNormalConstant] a | semmle.label | [RegExpNormalConstant] a | | tst.js:3:5:3:5 | [RegExpNormalConstant] b | semmle.label | [RegExpNormalConstant] b | @@ -41,22 +41,22 @@ edges | tst.js:1:1:1:44 | [RegExpLiteral] /[\\p{Script_Extensions=Greek}--\\p{Letter}]/v | tst.js:1:2:1:42 | [RegExpCharacterClass] [\\p{Script_Extensions=Greek}--\\p{Letter}] | semmle.order | 0 | | tst.js:1:1:1:45 | [ExprStmt] /[\\p{Sc ... er}]/v; | tst.js:1:1:1:44 | [RegExpLiteral] /[\\p{Script_Extensions=Greek}--\\p{Letter}]/v | semmle.label | 1 | | tst.js:1:1:1:45 | [ExprStmt] /[\\p{Sc ... er}]/v; | tst.js:1:1:1:44 | [RegExpLiteral] /[\\p{Script_Extensions=Greek}--\\p{Letter}]/v | semmle.order | 1 | -| tst.js:1:2:1:42 | [???] [\\p{Script_Extensions=Greek}--\\p{Letter}] | tst.js:1:3:1:29 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.label | 0 | -| tst.js:1:2:1:42 | [???] [\\p{Script_Extensions=Greek}--\\p{Letter}] | tst.js:1:3:1:29 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.order | 0 | -| tst.js:1:2:1:42 | [???] [\\p{Script_Extensions=Greek}--\\p{Letter}] | tst.js:1:32:1:41 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.label | 1 | -| tst.js:1:2:1:42 | [???] [\\p{Script_Extensions=Greek}--\\p{Letter}] | tst.js:1:32:1:41 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.order | 1 | -| tst.js:1:2:1:42 | [RegExpCharacterClass] [\\p{Script_Extensions=Greek}--\\p{Letter}] | tst.js:1:2:1:42 | [???] [\\p{Script_Extensions=Greek}--\\p{Letter}] | semmle.label | 0 | -| tst.js:1:2:1:42 | [RegExpCharacterClass] [\\p{Script_Extensions=Greek}--\\p{Letter}] | tst.js:1:2:1:42 | [???] [\\p{Script_Extensions=Greek}--\\p{Letter}] | semmle.order | 0 | +| tst.js:1:2:1:42 | [RegExpCharacterClass] [\\p{Script_Extensions=Greek}--\\p{Letter}] | tst.js:1:2:1:42 | [RegExpSubtraction] [\\p{Script_Extensions=Greek}--\\p{Letter}] | semmle.label | 0 | +| tst.js:1:2:1:42 | [RegExpCharacterClass] [\\p{Script_Extensions=Greek}--\\p{Letter}] | tst.js:1:2:1:42 | [RegExpSubtraction] [\\p{Script_Extensions=Greek}--\\p{Letter}] | semmle.order | 0 | +| tst.js:1:2:1:42 | [RegExpSubtraction] [\\p{Script_Extensions=Greek}--\\p{Letter}] | tst.js:1:3:1:29 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.label | 0 | +| tst.js:1:2:1:42 | [RegExpSubtraction] [\\p{Script_Extensions=Greek}--\\p{Letter}] | tst.js:1:3:1:29 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.order | 0 | +| tst.js:1:2:1:42 | [RegExpSubtraction] [\\p{Script_Extensions=Greek}--\\p{Letter}] | tst.js:1:32:1:41 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.label | 1 | +| tst.js:1:2:1:42 | [RegExpSubtraction] [\\p{Script_Extensions=Greek}--\\p{Letter}] | tst.js:1:32:1:41 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.order | 1 | | tst.js:2:1:2:17 | [RegExpLiteral] /[[abc]--[cbd]]/v | tst.js:2:2:2:15 | [RegExpCharacterClass] [[abc]--[cbd]] | semmle.label | 0 | | tst.js:2:1:2:17 | [RegExpLiteral] /[[abc]--[cbd]]/v | tst.js:2:2:2:15 | [RegExpCharacterClass] [[abc]--[cbd]] | semmle.order | 0 | | tst.js:2:1:2:18 | [ExprStmt] /[[abc]--[cbd]]/v; | tst.js:2:1:2:17 | [RegExpLiteral] /[[abc]--[cbd]]/v | semmle.label | 1 | | tst.js:2:1:2:18 | [ExprStmt] /[[abc]--[cbd]]/v; | tst.js:2:1:2:17 | [RegExpLiteral] /[[abc]--[cbd]]/v | semmle.order | 1 | -| tst.js:2:2:2:15 | [???] [[abc]--[cbd]] | tst.js:2:3:2:7 | [RegExpCharacterClass] [abc] | semmle.label | 0 | -| tst.js:2:2:2:15 | [???] [[abc]--[cbd]] | tst.js:2:3:2:7 | [RegExpCharacterClass] [abc] | semmle.order | 0 | -| tst.js:2:2:2:15 | [???] [[abc]--[cbd]] | tst.js:2:10:2:14 | [RegExpCharacterClass] [cbd] | semmle.label | 1 | -| tst.js:2:2:2:15 | [???] [[abc]--[cbd]] | tst.js:2:10:2:14 | [RegExpCharacterClass] [cbd] | semmle.order | 1 | -| tst.js:2:2:2:15 | [RegExpCharacterClass] [[abc]--[cbd]] | tst.js:2:2:2:15 | [???] [[abc]--[cbd]] | semmle.label | 0 | -| tst.js:2:2:2:15 | [RegExpCharacterClass] [[abc]--[cbd]] | tst.js:2:2:2:15 | [???] [[abc]--[cbd]] | semmle.order | 0 | +| tst.js:2:2:2:15 | [RegExpCharacterClass] [[abc]--[cbd]] | tst.js:2:2:2:15 | [RegExpSubtraction] [[abc]--[cbd]] | semmle.label | 0 | +| tst.js:2:2:2:15 | [RegExpCharacterClass] [[abc]--[cbd]] | tst.js:2:2:2:15 | [RegExpSubtraction] [[abc]--[cbd]] | semmle.order | 0 | +| tst.js:2:2:2:15 | [RegExpSubtraction] [[abc]--[cbd]] | tst.js:2:3:2:7 | [RegExpCharacterClass] [abc] | semmle.label | 0 | +| tst.js:2:2:2:15 | [RegExpSubtraction] [[abc]--[cbd]] | tst.js:2:3:2:7 | [RegExpCharacterClass] [abc] | semmle.order | 0 | +| tst.js:2:2:2:15 | [RegExpSubtraction] [[abc]--[cbd]] | tst.js:2:10:2:14 | [RegExpCharacterClass] [cbd] | semmle.label | 1 | +| tst.js:2:2:2:15 | [RegExpSubtraction] [[abc]--[cbd]] | tst.js:2:10:2:14 | [RegExpCharacterClass] [cbd] | semmle.order | 1 | | tst.js:2:3:2:7 | [RegExpCharacterClass] [abc] | tst.js:2:4:2:4 | [RegExpNormalConstant] a | semmle.label | 0 | | tst.js:2:3:2:7 | [RegExpCharacterClass] [abc] | tst.js:2:4:2:4 | [RegExpNormalConstant] a | semmle.order | 0 | | tst.js:2:3:2:7 | [RegExpCharacterClass] [abc] | tst.js:2:5:2:5 | [RegExpNormalConstant] b | semmle.label | 1 | @@ -73,14 +73,14 @@ edges | tst.js:3:1:3:24 | [RegExpLiteral] /[[abc]--[cbd]--[bde]]/v | tst.js:3:2:3:22 | [RegExpCharacterClass] [[abc]--[cbd]--[bde]] | semmle.order | 0 | | tst.js:3:1:3:25 | [ExprStmt] /[[abc] ... de]]/v; | tst.js:3:1:3:24 | [RegExpLiteral] /[[abc]--[cbd]--[bde]]/v | semmle.label | 1 | | tst.js:3:1:3:25 | [ExprStmt] /[[abc] ... de]]/v; | tst.js:3:1:3:24 | [RegExpLiteral] /[[abc]--[cbd]--[bde]]/v | semmle.order | 1 | -| tst.js:3:2:3:22 | [???] [[abc]--[cbd]--[bde]] | tst.js:3:3:3:7 | [RegExpCharacterClass] [abc] | semmle.label | 0 | -| tst.js:3:2:3:22 | [???] [[abc]--[cbd]--[bde]] | tst.js:3:3:3:7 | [RegExpCharacterClass] [abc] | semmle.order | 0 | -| tst.js:3:2:3:22 | [???] [[abc]--[cbd]--[bde]] | tst.js:3:10:3:14 | [RegExpCharacterClass] [cbd] | semmle.label | 1 | -| tst.js:3:2:3:22 | [???] [[abc]--[cbd]--[bde]] | tst.js:3:10:3:14 | [RegExpCharacterClass] [cbd] | semmle.order | 1 | -| tst.js:3:2:3:22 | [???] [[abc]--[cbd]--[bde]] | tst.js:3:17:3:21 | [RegExpCharacterClass] [bde] | semmle.label | 2 | -| tst.js:3:2:3:22 | [???] [[abc]--[cbd]--[bde]] | tst.js:3:17:3:21 | [RegExpCharacterClass] [bde] | semmle.order | 2 | -| tst.js:3:2:3:22 | [RegExpCharacterClass] [[abc]--[cbd]--[bde]] | tst.js:3:2:3:22 | [???] [[abc]--[cbd]--[bde]] | semmle.label | 0 | -| tst.js:3:2:3:22 | [RegExpCharacterClass] [[abc]--[cbd]--[bde]] | tst.js:3:2:3:22 | [???] [[abc]--[cbd]--[bde]] | semmle.order | 0 | +| tst.js:3:2:3:22 | [RegExpCharacterClass] [[abc]--[cbd]--[bde]] | tst.js:3:2:3:22 | [RegExpSubtraction] [[abc]--[cbd]--[bde]] | semmle.label | 0 | +| tst.js:3:2:3:22 | [RegExpCharacterClass] [[abc]--[cbd]--[bde]] | tst.js:3:2:3:22 | [RegExpSubtraction] [[abc]--[cbd]--[bde]] | semmle.order | 0 | +| tst.js:3:2:3:22 | [RegExpSubtraction] [[abc]--[cbd]--[bde]] | tst.js:3:3:3:7 | [RegExpCharacterClass] [abc] | semmle.label | 0 | +| tst.js:3:2:3:22 | [RegExpSubtraction] [[abc]--[cbd]--[bde]] | tst.js:3:3:3:7 | [RegExpCharacterClass] [abc] | semmle.order | 0 | +| tst.js:3:2:3:22 | [RegExpSubtraction] [[abc]--[cbd]--[bde]] | tst.js:3:10:3:14 | [RegExpCharacterClass] [cbd] | semmle.label | 1 | +| tst.js:3:2:3:22 | [RegExpSubtraction] [[abc]--[cbd]--[bde]] | tst.js:3:10:3:14 | [RegExpCharacterClass] [cbd] | semmle.order | 1 | +| tst.js:3:2:3:22 | [RegExpSubtraction] [[abc]--[cbd]--[bde]] | tst.js:3:17:3:21 | [RegExpCharacterClass] [bde] | semmle.label | 2 | +| tst.js:3:2:3:22 | [RegExpSubtraction] [[abc]--[cbd]--[bde]] | tst.js:3:17:3:21 | [RegExpCharacterClass] [bde] | semmle.order | 2 | | tst.js:3:3:3:7 | [RegExpCharacterClass] [abc] | tst.js:3:4:3:4 | [RegExpNormalConstant] a | semmle.label | 0 | | tst.js:3:3:3:7 | [RegExpCharacterClass] [abc] | tst.js:3:4:3:4 | [RegExpNormalConstant] a | semmle.order | 0 | | tst.js:3:3:3:7 | [RegExpCharacterClass] [abc] | tst.js:3:5:3:5 | [RegExpNormalConstant] b | semmle.label | 1 | From 9c8e0a5537023c14f40f85fd978e42c9f7003c15 Mon Sep 17 00:00:00 2001 From: Napalys Date: Mon, 10 Mar 2025 13:26:15 +0100 Subject: [PATCH 24/27] Applied changes from comments. Co-authored-by: Asgerf --- .../com/semmle/js/parser/RegExpParser.java | 4 ++-- .../ql/lib/semmle/javascript/Regexp.qll | 19 ++++++------------- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java index c10e847dce44..3bf29ff4c4de 100644 --- a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java +++ b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java @@ -71,8 +71,8 @@ public List getErrors() { private List errors; private List backrefs; private int maxbackref; - private Boolean vFlagEnabled = false; - private Boolean uFlagEnabled = false; + private boolean vFlagEnabled = false; + private boolean uFlagEnabled = false; /** Parse the given string as a regular expression. */ public Result parse(String src) { diff --git a/javascript/ql/lib/semmle/javascript/Regexp.qll b/javascript/ql/lib/semmle/javascript/Regexp.qll index d2f4b782486d..642a3d196fb7 100644 --- a/javascript/ql/lib/semmle/javascript/Regexp.qll +++ b/javascript/ql/lib/semmle/javascript/Regexp.qll @@ -312,19 +312,12 @@ class RegExpAlt extends RegExpTerm, @regexp_alt { */ class RegExpIntersection extends RegExpTerm, @regexp_intersection { /** Gets an intersected term of this term. */ - RegExpTerm getIntersectedTerm() { result = this.getAChild() } + RegExpTerm getAnElement() { result = this.getAChild() } /** Gets the number of intersected terms of this term. */ int getNumIntersectedTerm() { result = this.getNumChild() } - override predicate isNullable() { this.getIntersectedTerm().isNullable() } - - override string getAMatchedString() { - exists(string s | s = this.getChild(0).getAMatchedString() | - forall(int i | i in [1 .. this.getNumChild() - 1] | s = this.getChild(i).getAMatchedString()) and - result = s - ) - } + override predicate isNullable() { this.getAnElement().isNullable() } override string getAPrimaryQlClass() { result = "RegExpIntersection" } } @@ -339,13 +332,13 @@ class RegExpIntersection extends RegExpTerm, @regexp_intersection { * ``` */ class RegExpSubtraction extends RegExpTerm, @regexp_subtraction { - /** Gets the minuend (the left operand) of this subtraction. */ + /** Gets the minuend (left operand) of this subtraction. */ RegExpTerm getFirstTerm() { result = this.getChild(0) } /** Gets the number of subtractions terms of this term. */ int getNumSubtractedTerm() { result = this.getNumChild() - 1 } - /** Gets the subtrahend (the right operand) of this subtraction. */ + /** Gets a subtrahend (right operand) of this subtraction. */ RegExpTerm getASubtractedTerm() { exists(int i | i > 0 and result = this.getChild(i)) } override predicate isNullable() { none() } @@ -1207,11 +1200,11 @@ private class StringConcatRegExpPatternSource extends RegExpPatternSource { */ class RegExpQuotedString extends RegExpTerm, @regexp_quoted_string { /** Gets the term representing the contents of this quoted string. */ - RegExpTerm getQuotedString() { result = this.getAChild() } + RegExpTerm getTerm() { result = this.getAChild() } override predicate isNullable() { none() } - override string getAMatchedString() { result = this.getQuotedString().getAMatchedString() } + override string getAMatchedString() { result = this.getTerm().getAMatchedString() } override string getAPrimaryQlClass() { result = "RegExpQuotedString" } } From 08c07f815f752f0893a81e3b1a503cfc56ba109e Mon Sep 17 00:00:00 2001 From: Napalys Date: Tue, 11 Mar 2025 08:27:38 +0100 Subject: [PATCH 25/27] Improved documentation, removed union fram change note. --- .../semmle/js/ast/regexp/CharacterClassIntersection.java | 4 ++++ .../semmle/js/ast/regexp/CharacterClassQuotedString.java | 9 ++++++--- .../semmle/js/ast/regexp/CharacterClassSubtraction.java | 4 ++++ .../extractor/src/com/semmle/js/parser/RegExpParser.java | 4 ++-- javascript/ql/lib/change-notes/2025-03-03-regex-v.md | 1 - 5 files changed, 16 insertions(+), 6 deletions(-) diff --git a/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassIntersection.java b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassIntersection.java index 663400403ea5..5f4917363f56 100644 --- a/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassIntersection.java +++ b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassIntersection.java @@ -3,6 +3,10 @@ import com.semmle.js.ast.SourceLocation; import java.util.List; +/** + * A character class intersection in a regular expression available only with the `v` flag. + * Example: [[abc]&&[ab]&&[b]] matches character `b` only. + */ public class CharacterClassIntersection extends RegExpTerm { private final List elements; diff --git a/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassQuotedString.java b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassQuotedString.java index b92e9e67c723..f4a937b70eb3 100644 --- a/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassQuotedString.java +++ b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassQuotedString.java @@ -3,8 +3,11 @@ import com.semmle.js.ast.SourceLocation; /** - * A '\q{}' escape sequence in a regular expression, which is a special extension - * to standard regular expressions. + * A quoted string escape sequence '\q{}' in a regular expression. + * This feature is a non-standard extension that requires the 'v' flag. + * + * Example: [\q{abc|def}] creates a character class that matches either the string + * "abc" or "def". Within the quoted string, only the alternation operator '|' is supported. */ public class CharacterClassQuotedString extends RegExpTerm { private final RegExpTerm term; @@ -17,7 +20,7 @@ public CharacterClassQuotedString(SourceLocation loc, RegExpTerm term) { public RegExpTerm getTerm() { return term; } - + @Override public void accept(Visitor v) { v.visit(this); diff --git a/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassSubtraction.java b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassSubtraction.java index b1cf5cb5de51..07f37f6bb1da 100644 --- a/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassSubtraction.java +++ b/javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassSubtraction.java @@ -3,6 +3,10 @@ import com.semmle.js.ast.SourceLocation; import java.util.List; +/** + * A character class subtraction in a regular expression available only with the `v` flag. + * Example: [[abc]--[a]--[b]] matches character `c` only. + */ public class CharacterClassSubtraction extends RegExpTerm { private final List elements; diff --git a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java index 3bf29ff4c4de..9a993407a70a 100644 --- a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java +++ b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java @@ -297,8 +297,8 @@ private RegExpTerm parseDisjunctionInsideQuotedString() { disjuncts.add(this.parseAlternativeInsideQuotedString()); } if (disjuncts.size() == 1) return disjuncts.get(0); - return this.finishTerm(new Disjunction(loc, disjuncts)); - } + return this.finishTerm(new Disjunction(loc, disjuncts)); + } private RegExpTerm parseAlternativeInsideQuotedString() { SourceLocation loc = new SourceLocation(pos()); diff --git a/javascript/ql/lib/change-notes/2025-03-03-regex-v.md b/javascript/ql/lib/change-notes/2025-03-03-regex-v.md index d93c887acac9..383f2b4edd38 100644 --- a/javascript/ql/lib/change-notes/2025-03-03-regex-v.md +++ b/javascript/ql/lib/change-notes/2025-03-03-regex-v.md @@ -4,5 +4,4 @@ category: feature * Added ability to parse new ECMA 2024 `v` flag operations: - Intersection `&&` - Subtraction `--` - - Union - `\q` quoted string From 3191b2c6fc133ac45ccd6b90a4b3296440f1711a Mon Sep 17 00:00:00 2001 From: Napalys Klicius Date: Tue, 11 Mar 2025 09:40:24 +0100 Subject: [PATCH 26/27] Update javascript/extractor/src/com/semmle/js/parser/RegExpParser.java Co-authored-by: Erik Krogh Kristensen --- javascript/extractor/src/com/semmle/js/parser/RegExpParser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java index 9a993407a70a..9a489268b18c 100644 --- a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java +++ b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java @@ -298,7 +298,7 @@ private RegExpTerm parseDisjunctionInsideQuotedString() { } if (disjuncts.size() == 1) return disjuncts.get(0); return this.finishTerm(new Disjunction(loc, disjuncts)); - } + } private RegExpTerm parseAlternativeInsideQuotedString() { SourceLocation loc = new SourceLocation(pos()); From a900f2cea474e9c862a469912c7c2d0c1259d77f Mon Sep 17 00:00:00 2001 From: Napalys Klicius Date: Tue, 11 Mar 2025 11:57:28 +0100 Subject: [PATCH 27/27] Update javascript/ql/lib/change-notes/2025-03-03-regex-v.md Co-authored-by: Asger F --- javascript/ql/lib/change-notes/2025-03-03-regex-v.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/javascript/ql/lib/change-notes/2025-03-03-regex-v.md b/javascript/ql/lib/change-notes/2025-03-03-regex-v.md index 383f2b4edd38..4c6948049ebf 100644 --- a/javascript/ql/lib/change-notes/2025-03-03-regex-v.md +++ b/javascript/ql/lib/change-notes/2025-03-03-regex-v.md @@ -1,7 +1,7 @@ --- category: feature --- -* Added ability to parse new ECMA 2024 `v` flag operations: +* Extraction now supports regular expressions with the `v` flag, using the new operators: - Intersection `&&` - Subtraction `--` - `\q` quoted string