From 4097aa9f78fe8d3ba8a3c6f357ad8c3a7fd997da Mon Sep 17 00:00:00 2001 From: Napalys Date: Thu, 21 Nov 2024 13:44:55 +0100 Subject: [PATCH 1/5] JS: Added ecma2021, thus extractor now can deal with RegExp v flag --- javascript/extractor/src/com/semmle/jcorn/Parser.java | 1 + .../src/com/semmle/js/extractor/ExtractorConfig.java | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/javascript/extractor/src/com/semmle/jcorn/Parser.java b/javascript/extractor/src/com/semmle/jcorn/Parser.java index 79a63e9a1217..e55567123f94 100644 --- a/javascript/extractor/src/com/semmle/jcorn/Parser.java +++ b/javascript/extractor/src/com/semmle/jcorn/Parser.java @@ -788,6 +788,7 @@ private Token readRegexp() { String validFlags = "gim"; if (this.options.ecmaVersion() >= 6) validFlags = "gimuy"; if (this.options.ecmaVersion() >= 9) validFlags = "gimsuy"; + if (this.options.ecmaVersion() >= 12) validFlags = "gimsuyv"; if (!mods.matches("^[" + validFlags + "]*$")) this.raise(start, "Invalid regular expression flag"); if (mods.indexOf('u') >= 0) { diff --git a/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java b/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java index cb04f3074bfe..95057467a791 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java +++ b/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java @@ -41,7 +41,8 @@ public static enum ECMAVersion { ECMA2017(2017, 8), ECMA2018(2018, 9), ECMA2019(2019, 10), - ECMA2020(2020, 11); + ECMA2020(2020, 11), + ECMA2021(2021, 12); private final int version; public final int legacyVersion; @@ -232,7 +233,7 @@ public Set getPredefinedGlobals() { private VirtualSourceRoot virtualSourceRoot; public ExtractorConfig(boolean experimental) { - this.ecmaVersion = experimental ? ECMAVersion.ECMA2020 : ECMAVersion.ECMA2019; + this.ecmaVersion = experimental ? ECMAVersion.ECMA2021 : ECMAVersion.ECMA2019; this.platform = Platform.AUTO; this.jsx = true; this.sourceType = SourceType.AUTO; From 3ec038e7b6b88a25d9ab20548b0bbfb8d5ae3587 Mon Sep 17 00:00:00 2001 From: Napalys Date: Thu, 21 Nov 2024 13:48:56 +0100 Subject: [PATCH 2/5] JS: Added predicate to check if v flag is used on regular expression --- javascript/ql/lib/semmle/javascript/Expr.qll | 3 +++ javascript/ql/lib/semmle/javascript/Regexp.qll | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/javascript/ql/lib/semmle/javascript/Expr.qll b/javascript/ql/lib/semmle/javascript/Expr.qll index 0049c5f5aca7..e8ec55f0174d 100644 --- a/javascript/ql/lib/semmle/javascript/Expr.qll +++ b/javascript/ql/lib/semmle/javascript/Expr.qll @@ -481,6 +481,9 @@ class RegExpLiteral extends @regexp_literal, Literal, RegExpParent { /** Holds if this regular expression has an `s` flag. */ predicate isDotAll() { RegExp::isDotAll(this.getFlags()) } + /** Holds if this regular expression has an `v` flag. */ + predicate isUnicodeSets() { RegExp::isUnicodeSets(this.getFlags()) } + override string getAPrimaryQlClass() { result = "RegExpLiteral" } } diff --git a/javascript/ql/lib/semmle/javascript/Regexp.qll b/javascript/ql/lib/semmle/javascript/Regexp.qll index dc7b0190c916..acfc888756e8 100644 --- a/javascript/ql/lib/semmle/javascript/Regexp.qll +++ b/javascript/ql/lib/semmle/javascript/Regexp.qll @@ -1162,6 +1162,10 @@ module RegExp { bindingset[flags] predicate isDotAll(string flags) { flags.matches("%s%") } + /** Holds if `flags` includes the `v` flag. */ + bindingset[flags] + predicate isUnicodeSets(string flags) { flags.matches("%v%") } + /** Holds if `flags` includes the `m` flag or is the unknown flag `?`. */ bindingset[flags] predicate maybeMultiline(string flags) { flags = unknownFlag() or isMultiline(flags) } From 01d70a6d73adee884f984c3f0459aacea613e3c9 Mon Sep 17 00:00:00 2001 From: erik-krogh Date: Sun, 16 Feb 2025 19:01:02 +0100 Subject: [PATCH 3/5] add test of the new `v` flag --- .../Security/CWE-400/ReDoS/PolynomialBackTracking.expected | 1 + .../ql/test/query-tests/Security/CWE-400/ReDoS/ReDoS.expected | 1 + javascript/ql/test/query-tests/Security/CWE-400/ReDoS/tst.js | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/javascript/ql/test/query-tests/Security/CWE-400/ReDoS/PolynomialBackTracking.expected b/javascript/ql/test/query-tests/Security/CWE-400/ReDoS/PolynomialBackTracking.expected index 106b143111f8..5740204b2d37 100644 --- a/javascript/ql/test/query-tests/Security/CWE-400/ReDoS/PolynomialBackTracking.expected +++ b/javascript/ql/test/query-tests/Security/CWE-400/ReDoS/PolynomialBackTracking.expected @@ -538,3 +538,4 @@ | tst.js:407:128:407:129 | * | Strings starting with '0/*' and with many repetitions of ' ' can start matching anywhere after the start of the preceeding \\s* | | tst.js:409:23:409:29 | [\\w.-]* | Strings starting with '//' and with many repetitions of '//' can start matching anywhere after the start of the preceeding (\\/(?:\\/[\\w.-]*)*){0,1}:([\\w.-]+) | | tst.js:411:15:411:19 | a{1,} | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (a{1,})* | +| tst.js:417:20:417:25 | (aa?)* | Strings with many repetitions of 'aa' can start matching anywhere after the start of the preceeding (aa?)*b | diff --git a/javascript/ql/test/query-tests/Security/CWE-400/ReDoS/ReDoS.expected b/javascript/ql/test/query-tests/Security/CWE-400/ReDoS/ReDoS.expected index 662dadaaa5be..470c8422659e 100644 --- a/javascript/ql/test/query-tests/Security/CWE-400/ReDoS/ReDoS.expected +++ b/javascript/ql/test/query-tests/Security/CWE-400/ReDoS/ReDoS.expected @@ -201,3 +201,4 @@ | tst.js:411:15:411:19 | a{1,} | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. | | tst.js:413:25:413:35 | (\\u0000\|.)+ | This part of the regular expression may cause exponential backtracking on strings starting with '\\n\\u0000' and containing many repetitions of '\\u0000'. | | tst.js:415:44:415:57 | (\ud83d\ude80\|.)+ | This part of the regular expression may cause exponential backtracking on strings starting with '\\n\\u{1f680}' and containing many repetitions of '\\u{1f680}'. | +| tst.js:417:22:417:23 | a? | This part of the regular expression may cause exponential backtracking on strings starting with 'a' and containing many repetitions of 'aa'. | diff --git a/javascript/ql/test/query-tests/Security/CWE-400/ReDoS/tst.js b/javascript/ql/test/query-tests/Security/CWE-400/ReDoS/tst.js index ef82076e7028..598ff7027184 100644 --- a/javascript/ql/test/query-tests/Security/CWE-400/ReDoS/tst.js +++ b/javascript/ql/test/query-tests/Security/CWE-400/ReDoS/tst.js @@ -412,4 +412,6 @@ var bad99 = /(a{1,})*b/; var unicode = /^\n\u0000(\u0000|.)+$/; -var largeUnicode = new RegExp("^\n\u{1F680}(\u{1F680}|.)+X$"); \ No newline at end of file +var largeUnicode = new RegExp("^\n\u{1F680}(\u{1F680}|.)+X$"); + +var unicodeSets = /(aa?)*b/v; \ No newline at end of file From 55b8e8b7483c98aed8f7b721964cc8a23dc8cba4 Mon Sep 17 00:00:00 2001 From: erik-krogh Date: Sun, 16 Feb 2025 19:06:14 +0100 Subject: [PATCH 4/5] fix the ECMAScript version to be ES2024 --- javascript/extractor/src/com/semmle/jcorn/Parser.java | 2 +- .../src/com/semmle/js/extractor/ExtractorConfig.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/javascript/extractor/src/com/semmle/jcorn/Parser.java b/javascript/extractor/src/com/semmle/jcorn/Parser.java index e55567123f94..9c2eeb9e2411 100644 --- a/javascript/extractor/src/com/semmle/jcorn/Parser.java +++ b/javascript/extractor/src/com/semmle/jcorn/Parser.java @@ -788,7 +788,7 @@ private Token readRegexp() { String validFlags = "gim"; if (this.options.ecmaVersion() >= 6) validFlags = "gimuy"; if (this.options.ecmaVersion() >= 9) validFlags = "gimsuy"; - if (this.options.ecmaVersion() >= 12) validFlags = "gimsuyv"; + if (this.options.ecmaVersion() >= 15) validFlags = "gimsuyv"; if (!mods.matches("^[" + validFlags + "]*$")) this.raise(start, "Invalid regular expression flag"); if (mods.indexOf('u') >= 0) { diff --git a/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java b/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java index 95057467a791..0104e566e35f 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java +++ b/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java @@ -42,7 +42,7 @@ public static enum ECMAVersion { ECMA2018(2018, 9), ECMA2019(2019, 10), ECMA2020(2020, 11), - ECMA2021(2021, 12); + ECMA2024(2024, 15); private final int version; public final int legacyVersion; @@ -233,7 +233,7 @@ public Set getPredefinedGlobals() { private VirtualSourceRoot virtualSourceRoot; public ExtractorConfig(boolean experimental) { - this.ecmaVersion = experimental ? ECMAVersion.ECMA2021 : ECMAVersion.ECMA2019; + this.ecmaVersion = experimental ? ECMAVersion.ECMA2024 : ECMAVersion.ECMA2019; this.platform = Platform.AUTO; this.jsx = true; this.sourceType = SourceType.AUTO; From 6ebffd59f64688486b2c2daf552a8f6904b22c3b Mon Sep 17 00:00:00 2001 From: erik-krogh Date: Sun, 16 Feb 2025 19:23:44 +0100 Subject: [PATCH 5/5] add change-note --- javascript/ql/lib/change-notes/2025-02-16-v-flag.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 javascript/ql/lib/change-notes/2025-02-16-v-flag.md diff --git a/javascript/ql/lib/change-notes/2025-02-16-v-flag.md b/javascript/ql/lib/change-notes/2025-02-16-v-flag.md new file mode 100644 index 000000000000..9fe3c681028b --- /dev/null +++ b/javascript/ql/lib/change-notes/2025-02-16-v-flag.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Added support for regular expressions using the `v` flag.