From 2d36a5d47820c667a0e6e6f44bd0a127b0d6a496 Mon Sep 17 00:00:00 2001 From: Asger F Date: Mon, 3 Feb 2025 13:16:02 +0100 Subject: [PATCH 1/2] JS: Use JSX syntax in first attempt when extension is .jsx --- .../com/semmle/js/extractor/JSExtractor.java | 6 +- .../src/com/semmle/js/parser/JSParser.java | 4 +- .../com/semmle/js/parser/JcornWrapper.java | 13 +- .../extractor/test/ES2015DetectorTests.java | 2 +- .../extractor/test/NodeJSDetectorTests.java | 2 +- .../extractor/tests/jsx/input/repro1.jsx | 4 + .../tests/jsx/output/trap/repro1.jsx.trap | 385 ++++++++++++++++++ 7 files changed, 408 insertions(+), 8 deletions(-) create mode 100644 javascript/extractor/tests/jsx/input/repro1.jsx create mode 100644 javascript/extractor/tests/jsx/output/trap/repro1.jsx.trap diff --git a/javascript/extractor/src/com/semmle/js/extractor/JSExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/JSExtractor.java index a8e20fdc0d90..01144166b3eb 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/JSExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/JSExtractor.java @@ -56,8 +56,10 @@ public Pair extract( SourceType sourceType = establishSourceType(source, true); + String extension = textualExtractor.getLocationManager().getSourceFileExtension(); + JSParser.Result parserRes = - JSParser.parse(config, sourceType, source, textualExtractor.getMetrics()); + JSParser.parse(config, sourceType, extension, source, textualExtractor.getMetrics()); // Check if we guessed wrong with the regex in `establishSourceType`, (which could // happen due to a block-comment line starting with ' import'). @@ -74,7 +76,7 @@ public Pair extract( if (wrongGuess) { sourceType = SourceType.SCRIPT; parserRes = - JSParser.parse(config, sourceType, source, textualExtractor.getMetrics()); + JSParser.parse(config, sourceType, extension, source, textualExtractor.getMetrics()); } } diff --git a/javascript/extractor/src/com/semmle/js/parser/JSParser.java b/javascript/extractor/src/com/semmle/js/parser/JSParser.java index d912886303f8..67b5e565e99c 100644 --- a/javascript/extractor/src/com/semmle/js/parser/JSParser.java +++ b/javascript/extractor/src/com/semmle/js/parser/JSParser.java @@ -68,9 +68,9 @@ public List getErrors() { } public static Result parse( - ExtractorConfig config, SourceType sourceType, String source, ExtractionMetrics metrics) { + ExtractorConfig config, SourceType sourceType, String extension, String source, ExtractionMetrics metrics) { metrics.startPhase(ExtractionPhase.JSParser_parse); - Result result = JcornWrapper.parse(config, sourceType, source); + Result result = JcornWrapper.parse(config, sourceType, extension, source); metrics.stopPhase(ExtractionPhase.JSParser_parse); return result; } diff --git a/javascript/extractor/src/com/semmle/js/parser/JcornWrapper.java b/javascript/extractor/src/com/semmle/js/parser/JcornWrapper.java index 2ccfc2c39ebf..dc3e68607b94 100644 --- a/javascript/extractor/src/com/semmle/js/parser/JcornWrapper.java +++ b/javascript/extractor/src/com/semmle/js/parser/JcornWrapper.java @@ -14,9 +14,14 @@ import com.semmle.js.extractor.ExtractorConfig.SourceType; public class JcornWrapper { + public static boolean alwaysParseWithJsx(String extension) { + // Note that .tsx is not relevant here since this is specifically for the JS parser. + return extension.equals(".jsx"); + } + /** Parse source code as a program. */ public static JSParser.Result parse( - ExtractorConfig config, SourceType sourceType, String source) { + ExtractorConfig config, SourceType sourceType, String extension, String source) { ECMAVersion ecmaVersion = config.getEcmaVersion(); List comments = new ArrayList<>(); List tokens = new ArrayList<>(); @@ -32,7 +37,11 @@ public static JSParser.Result parse( Program program = null; List errors = new ArrayList<>(); - + + // If the file extension implies JSX syntax, use that in the first parsing attempt. + // This enables us to parse JSX files that the Flow parser cannot handle due to ambiguous syntax. + if (alwaysParseWithJsx(extension)) options = new JSXOptions(options); + try { try { // First try to parse as a regular JavaScript program. diff --git a/javascript/extractor/test/com/semmle/js/extractor/test/ES2015DetectorTests.java b/javascript/extractor/test/com/semmle/js/extractor/test/ES2015DetectorTests.java index 669824c076f4..4e3dff9ac2ae 100644 --- a/javascript/extractor/test/com/semmle/js/extractor/test/ES2015DetectorTests.java +++ b/javascript/extractor/test/com/semmle/js/extractor/test/ES2015DetectorTests.java @@ -16,7 +16,7 @@ public class ES2015DetectorTests { private static final ExtractorConfig CONFIG = new ExtractorConfig(true); private void isES2015(String src, boolean expected) { - Result res = JSParser.parse(CONFIG, SourceType.MODULE, src, new ExtractionMetrics()); + Result res = JSParser.parse(CONFIG, SourceType.MODULE, ".jsx", src, new ExtractionMetrics()); Node ast = res.getAST(); Assert.assertNotNull(ast); Assert.assertTrue(ES2015Detector.looksLikeES2015(ast) == expected); diff --git a/javascript/extractor/test/com/semmle/js/extractor/test/NodeJSDetectorTests.java b/javascript/extractor/test/com/semmle/js/extractor/test/NodeJSDetectorTests.java index ead1d1265612..17aa4d9ec4ee 100644 --- a/javascript/extractor/test/com/semmle/js/extractor/test/NodeJSDetectorTests.java +++ b/javascript/extractor/test/com/semmle/js/extractor/test/NodeJSDetectorTests.java @@ -16,7 +16,7 @@ public class NodeJSDetectorTests { private static final ExtractorConfig CONFIG = new ExtractorConfig(true); private void isNodeJS(String src, boolean expected) { - Result res = JSParser.parse(CONFIG, SourceType.SCRIPT, src, new ExtractionMetrics()); + Result res = JSParser.parse(CONFIG, SourceType.SCRIPT, ".jsx", src, new ExtractionMetrics()); Node ast = res.getAST(); Assert.assertNotNull(ast); Assert.assertTrue(NodeJSDetector.looksLikeNodeJS(ast) == expected); diff --git a/javascript/extractor/tests/jsx/input/repro1.jsx b/javascript/extractor/tests/jsx/input/repro1.jsx new file mode 100644 index 000000000000..9ec8b056e2ff --- /dev/null +++ b/javascript/extractor/tests/jsx/input/repro1.jsx @@ -0,0 +1,4 @@ +function foo() { + let x =
; + return true ? (null, null) : e => { }; +} diff --git a/javascript/extractor/tests/jsx/output/trap/repro1.jsx.trap b/javascript/extractor/tests/jsx/output/trap/repro1.jsx.trap new file mode 100644 index 000000000000..c1cb84747511 --- /dev/null +++ b/javascript/extractor/tests/jsx/output/trap/repro1.jsx.trap @@ -0,0 +1,385 @@ +#10000=@"/repro1.jsx;sourcefile" +files(#10000,"/repro1.jsx") +#10001=@"/;folder" +folders(#10001,"/") +containerparent(#10001,#10000) +#10002=@"loc,{#10000},0,0,0,0" +locations_default(#10002,#10000,0,0,0,0) +hasLocation(#10000,#10002) +#20000=@"global_scope" +scopes(#20000,0) +#20001=@"script;{#10000},1,1" +#20002=* +lines(#20002,#20001,"function foo() {"," +") +#20003=@"loc,{#10000},1,1,1,16" +locations_default(#20003,#10000,1,1,1,16) +hasLocation(#20002,#20003) +#20004=* +lines(#20004,#20001," let x =
;"," +") +#20005=@"loc,{#10000},2,1,2,24" +locations_default(#20005,#10000,2,1,2,24) +hasLocation(#20004,#20005) +indentation(#10000,2," ",4) +#20006=* +lines(#20006,#20001," return true ? (null, null) : e => { };"," +") +#20007=@"loc,{#10000},3,1,3,42" +locations_default(#20007,#10000,3,1,3,42) +hasLocation(#20006,#20007) +indentation(#10000,3," ",4) +#20008=* +lines(#20008,#20001,"}"," +") +#20009=@"loc,{#10000},4,1,4,1" +locations_default(#20009,#10000,4,1,4,1) +hasLocation(#20008,#20009) +numlines(#20001,4,4,0) +#20010=* +tokeninfo(#20010,7,#20001,0,"function") +#20011=@"loc,{#10000},1,1,1,8" +locations_default(#20011,#10000,1,1,1,8) +hasLocation(#20010,#20011) +#20012=* +tokeninfo(#20012,6,#20001,1,"foo") +#20013=@"loc,{#10000},1,10,1,12" +locations_default(#20013,#10000,1,10,1,12) +hasLocation(#20012,#20013) +#20014=* +tokeninfo(#20014,8,#20001,2,"(") +#20015=@"loc,{#10000},1,13,1,13" +locations_default(#20015,#10000,1,13,1,13) +hasLocation(#20014,#20015) +#20016=* +tokeninfo(#20016,8,#20001,3,")") +#20017=@"loc,{#10000},1,14,1,14" +locations_default(#20017,#10000,1,14,1,14) +hasLocation(#20016,#20017) +#20018=* +tokeninfo(#20018,8,#20001,4,"{") +#20019=@"loc,{#10000},1,16,1,16" +locations_default(#20019,#10000,1,16,1,16) +hasLocation(#20018,#20019) +#20020=* +tokeninfo(#20020,7,#20001,5,"let") +#20021=@"loc,{#10000},2,5,2,7" +locations_default(#20021,#10000,2,5,2,7) +hasLocation(#20020,#20021) +#20022=* +tokeninfo(#20022,6,#20001,6,"x") +#20023=@"loc,{#10000},2,9,2,9" +locations_default(#20023,#10000,2,9,2,9) +hasLocation(#20022,#20023) +#20024=* +tokeninfo(#20024,8,#20001,7,"=") +#20025=@"loc,{#10000},2,11,2,11" +locations_default(#20025,#10000,2,11,2,11) +hasLocation(#20024,#20025) +#20026=* +tokeninfo(#20026,8,#20001,8,"<") +#20027=@"loc,{#10000},2,13,2,13" +locations_default(#20027,#10000,2,13,2,13) +hasLocation(#20026,#20027) +#20028=* +tokeninfo(#20028,6,#20001,9,"div") +#20029=@"loc,{#10000},2,14,2,16" +locations_default(#20029,#10000,2,14,2,16) +hasLocation(#20028,#20029) +#20030=* +tokeninfo(#20030,8,#20001,10,">") +#20031=@"loc,{#10000},2,17,2,17" +locations_default(#20031,#10000,2,17,2,17) +hasLocation(#20030,#20031) +#20032=* +tokeninfo(#20032,8,#20001,11,"<") +#20033=@"loc,{#10000},2,18,2,18" +locations_default(#20033,#10000,2,18,2,18) +hasLocation(#20032,#20033) +#20034=* +tokeninfo(#20034,8,#20001,12,"/") +#20035=@"loc,{#10000},2,19,2,19" +locations_default(#20035,#10000,2,19,2,19) +hasLocation(#20034,#20035) +#20036=* +tokeninfo(#20036,6,#20001,13,"div") +#20037=@"loc,{#10000},2,20,2,22" +locations_default(#20037,#10000,2,20,2,22) +hasLocation(#20036,#20037) +#20038=* +tokeninfo(#20038,8,#20001,14,">") +#20039=@"loc,{#10000},2,23,2,23" +locations_default(#20039,#10000,2,23,2,23) +hasLocation(#20038,#20039) +#20040=* +tokeninfo(#20040,8,#20001,15,";") +#20041=@"loc,{#10000},2,24,2,24" +locations_default(#20041,#10000,2,24,2,24) +hasLocation(#20040,#20041) +#20042=* +tokeninfo(#20042,7,#20001,16,"return") +#20043=@"loc,{#10000},3,5,3,10" +locations_default(#20043,#10000,3,5,3,10) +hasLocation(#20042,#20043) +#20044=* +tokeninfo(#20044,2,#20001,17,"true") +#20045=@"loc,{#10000},3,12,3,15" +locations_default(#20045,#10000,3,12,3,15) +hasLocation(#20044,#20045) +#20046=* +tokeninfo(#20046,8,#20001,18,"?") +#20047=@"loc,{#10000},3,17,3,17" +locations_default(#20047,#10000,3,17,3,17) +hasLocation(#20046,#20047) +#20048=* +tokeninfo(#20048,8,#20001,19,"(") +#20049=@"loc,{#10000},3,19,3,19" +locations_default(#20049,#10000,3,19,3,19) +hasLocation(#20048,#20049) +#20050=* +tokeninfo(#20050,1,#20001,20,"null") +#20051=@"loc,{#10000},3,20,3,23" +locations_default(#20051,#10000,3,20,3,23) +hasLocation(#20050,#20051) +#20052=* +tokeninfo(#20052,8,#20001,21,",") +#20053=@"loc,{#10000},3,24,3,24" +locations_default(#20053,#10000,3,24,3,24) +hasLocation(#20052,#20053) +#20054=* +tokeninfo(#20054,1,#20001,22,"null") +#20055=@"loc,{#10000},3,26,3,29" +locations_default(#20055,#10000,3,26,3,29) +hasLocation(#20054,#20055) +#20056=* +tokeninfo(#20056,8,#20001,23,")") +#20057=@"loc,{#10000},3,30,3,30" +locations_default(#20057,#10000,3,30,3,30) +hasLocation(#20056,#20057) +#20058=* +tokeninfo(#20058,8,#20001,24,":") +#20059=@"loc,{#10000},3,32,3,32" +locations_default(#20059,#10000,3,32,3,32) +hasLocation(#20058,#20059) +#20060=* +tokeninfo(#20060,6,#20001,25,"e") +#20061=@"loc,{#10000},3,34,3,34" +locations_default(#20061,#10000,3,34,3,34) +hasLocation(#20060,#20061) +#20062=* +tokeninfo(#20062,8,#20001,26,"=>") +#20063=@"loc,{#10000},3,36,3,37" +locations_default(#20063,#10000,3,36,3,37) +hasLocation(#20062,#20063) +#20064=* +tokeninfo(#20064,8,#20001,27,"{") +#20065=@"loc,{#10000},3,39,3,39" +locations_default(#20065,#10000,3,39,3,39) +hasLocation(#20064,#20065) +#20066=* +tokeninfo(#20066,8,#20001,28,"}") +#20067=@"loc,{#10000},3,41,3,41" +locations_default(#20067,#10000,3,41,3,41) +hasLocation(#20066,#20067) +#20068=* +tokeninfo(#20068,8,#20001,29,";") +#20069=@"loc,{#10000},3,42,3,42" +locations_default(#20069,#10000,3,42,3,42) +hasLocation(#20068,#20069) +#20070=* +tokeninfo(#20070,8,#20001,30,"}") +hasLocation(#20070,#20009) +#20071=* +tokeninfo(#20071,0,#20001,31,"") +#20072=@"loc,{#10000},5,1,5,0" +locations_default(#20072,#10000,5,1,5,0) +hasLocation(#20071,#20072) +toplevels(#20001,0) +#20073=@"loc,{#10000},1,1,5,0" +locations_default(#20073,#10000,1,1,5,0) +hasLocation(#20001,#20073) +#20074=@"var;{foo};{#20000}" +variables(#20074,"foo",#20000) +#20075=* +stmts(#20075,17,#20001,0,"functio ... { };\n}") +#20076=@"loc,{#10000},1,1,4,1" +locations_default(#20076,#10000,1,1,4,1) +hasLocation(#20075,#20076) +stmt_containers(#20075,#20001) +#20077=* +exprs(#20077,78,#20075,-1,"foo") +hasLocation(#20077,#20013) +expr_containers(#20077,#20075) +literals("foo","foo",#20077) +decl(#20077,#20074) +#20078=* +scopes(#20078,1) +scopenodes(#20075,#20078) +scopenesting(#20078,#20000) +#20079=@"var;{arguments};{#20078}" +variables(#20079,"arguments",#20078) +is_arguments_object(#20079) +#20080=* +stmts(#20080,1,#20075,-2,"{\n l ... { };\n}") +#20081=@"loc,{#10000},1,16,4,1" +locations_default(#20081,#10000,1,16,4,1) +hasLocation(#20080,#20081) +stmt_containers(#20080,#20075) +#20082=* +scopes(#20082,4) +scopenodes(#20080,#20082) +scopenesting(#20082,#20078) +#20083=@"var;{x};{#20082}" +variables(#20083,"x",#20082) +#20084=* +stmts(#20084,23,#20080,0,"let x =
;") +#20085=@"loc,{#10000},2,5,2,24" +locations_default(#20085,#10000,2,5,2,24) +hasLocation(#20084,#20085) +stmt_containers(#20084,#20075) +#20086=* +exprs(#20086,64,#20084,0,"x =
") +#20087=@"loc,{#10000},2,9,2,23" +locations_default(#20087,#10000,2,9,2,23) +hasLocation(#20086,#20087) +enclosing_stmt(#20086,#20084) +expr_containers(#20086,#20075) +#20088=* +exprs(#20088,78,#20086,0,"x") +hasLocation(#20088,#20023) +enclosing_stmt(#20088,#20084) +expr_containers(#20088,#20075) +literals("x","x",#20088) +decl(#20088,#20083) +#20089=* +exprs(#20089,89,#20086,1,"
") +#20090=@"loc,{#10000},2,13,2,23" +locations_default(#20090,#10000,2,13,2,23) +hasLocation(#20089,#20090) +enclosing_stmt(#20089,#20084) +expr_containers(#20089,#20075) +#20091=* +exprs(#20091,0,#20089,-1,"div") +hasLocation(#20091,#20029) +enclosing_stmt(#20091,#20084) +expr_containers(#20091,#20075) +literals("div","div",#20091) +#20092=* +stmts(#20092,9,#20080,1,"return ... => { };") +#20093=@"loc,{#10000},3,5,3,42" +locations_default(#20093,#10000,3,5,3,42) +hasLocation(#20092,#20093) +stmt_containers(#20092,#20075) +#20094=* +exprs(#20094,11,#20092,0,"true ? ... => { }") +#20095=@"loc,{#10000},3,12,3,41" +locations_default(#20095,#10000,3,12,3,41) +hasLocation(#20094,#20095) +enclosing_stmt(#20094,#20092) +expr_containers(#20094,#20075) +#20096=* +exprs(#20096,2,#20094,0,"true") +hasLocation(#20096,#20045) +enclosing_stmt(#20096,#20092) +expr_containers(#20096,#20075) +literals("true","true",#20096) +#20097=* +exprs(#20097,63,#20094,1,"(null, null)") +#20098=@"loc,{#10000},3,19,3,30" +locations_default(#20098,#10000,3,19,3,30) +hasLocation(#20097,#20098) +enclosing_stmt(#20097,#20092) +expr_containers(#20097,#20075) +#20099=* +exprs(#20099,10,#20097,0,"null, null") +#20100=@"loc,{#10000},3,20,3,29" +locations_default(#20100,#10000,3,20,3,29) +hasLocation(#20099,#20100) +enclosing_stmt(#20099,#20092) +expr_containers(#20099,#20075) +#20101=* +exprs(#20101,1,#20099,0,"null") +hasLocation(#20101,#20051) +enclosing_stmt(#20101,#20092) +expr_containers(#20101,#20075) +literals("null","null",#20101) +#20102=* +exprs(#20102,1,#20099,1,"null") +hasLocation(#20102,#20055) +enclosing_stmt(#20102,#20092) +expr_containers(#20102,#20075) +literals("null","null",#20102) +#20103=* +exprs(#20103,65,#20094,2,"e => { }") +#20104=@"loc,{#10000},3,34,3,41" +locations_default(#20104,#10000,3,34,3,41) +hasLocation(#20103,#20104) +enclosing_stmt(#20103,#20092) +expr_containers(#20103,#20075) +#20105=* +scopes(#20105,1) +scopenodes(#20103,#20105) +scopenesting(#20105,#20082) +#20106=@"var;{e};{#20105}" +variables(#20106,"e",#20105) +#20107=* +exprs(#20107,78,#20103,0,"e") +hasLocation(#20107,#20061) +expr_containers(#20107,#20103) +literals("e","e",#20107) +decl(#20107,#20106) +#20108=* +stmts(#20108,1,#20103,-2,"{ }") +#20109=@"loc,{#10000},3,39,3,41" +locations_default(#20109,#10000,3,39,3,41) +hasLocation(#20108,#20109) +stmt_containers(#20108,#20103) +#20110=* +entry_cfg_node(#20110,#20001) +#20111=@"loc,{#10000},1,1,1,0" +locations_default(#20111,#10000,1,1,1,0) +hasLocation(#20110,#20111) +#20112=* +exit_cfg_node(#20112,#20001) +hasLocation(#20112,#20072) +successor(#20075,#20112) +#20113=* +entry_cfg_node(#20113,#20075) +hasLocation(#20113,#20111) +#20114=* +exit_cfg_node(#20114,#20075) +#20115=@"loc,{#10000},4,2,4,1" +locations_default(#20115,#10000,4,2,4,1) +hasLocation(#20114,#20115) +successor(#20080,#20084) +successor(#20094,#20096) +successor(#20096,#20097) +successor(#20097,#20099) +successor(#20099,#20101) +successor(#20102,#20092) +successor(#20101,#20102) +successor(#20103,#20092) +#20116=* +entry_cfg_node(#20116,#20103) +#20117=@"loc,{#10000},3,34,3,33" +locations_default(#20117,#10000,3,34,3,33) +hasLocation(#20116,#20117) +#20118=* +exit_cfg_node(#20118,#20103) +#20119=@"loc,{#10000},3,42,3,41" +locations_default(#20119,#10000,3,42,3,41) +hasLocation(#20118,#20119) +successor(#20108,#20118) +successor(#20107,#20108) +successor(#20116,#20107) +successor(#20092,#20114) +successor(#20084,#20088) +successor(#20091,#20089) +successor(#20089,#20086) +successor(#20088,#20091) +successor(#20086,#20094) +successor(#20113,#20080) +successor(#20077,#20075) +successor(#20110,#20077) +numlines(#10000,4,4,0) +filetype(#10000,"javascript") From 09270f4e20e75498a23dc32b781e30f80aedfe95 Mon Sep 17 00:00:00 2001 From: Asger F Date: Tue, 4 Feb 2025 09:36:46 +0100 Subject: [PATCH 2/2] JS: Change note --- .../src/change-notes/2025-02-04-jsx-parser-first-attempt.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 javascript/ql/src/change-notes/2025-02-04-jsx-parser-first-attempt.md diff --git a/javascript/ql/src/change-notes/2025-02-04-jsx-parser-first-attempt.md b/javascript/ql/src/change-notes/2025-02-04-jsx-parser-first-attempt.md new file mode 100644 index 000000000000..5a5f7acc0baf --- /dev/null +++ b/javascript/ql/src/change-notes/2025-02-04-jsx-parser-first-attempt.md @@ -0,0 +1,5 @@ +--- +category: fix +--- +* Fixed a bug that would cause parse errors in `.jsx` files in rare cases where the file + contained syntax that was misinterpreted as Flow syntax.