From d97d67359bf93a4bb180b1fe0ab21bfc9deefe25 Mon Sep 17 00:00:00 2001 From: Asger F Date: Fri, 28 Feb 2025 13:56:13 +0100 Subject: [PATCH 1/4] JS: Add test case showing lack of flow through non-sanitising regexp --- .../library-tests/TaintTracking/DataFlowTracking.expected | 1 + .../ql/test/library-tests/TaintTracking/regexp-sanitiser.js | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 javascript/ql/test/library-tests/TaintTracking/regexp-sanitiser.js diff --git a/javascript/ql/test/library-tests/TaintTracking/DataFlowTracking.expected b/javascript/ql/test/library-tests/TaintTracking/DataFlowTracking.expected index 42595adc131b..6335d6dbd49a 100644 --- a/javascript/ql/test/library-tests/TaintTracking/DataFlowTracking.expected +++ b/javascript/ql/test/library-tests/TaintTracking/DataFlowTracking.expected @@ -161,6 +161,7 @@ flow | partialCalls.js:4:17:4:24 | source() | partialCalls.js:30:14:30:20 | x.value | | partialCalls.js:4:17:4:24 | source() | partialCalls.js:41:10:41:18 | id(taint) | | partialCalls.js:4:17:4:24 | source() | partialCalls.js:51:14:51:14 | x | +| regexp-sanitiser.js:2:19:2:26 | source() | regexp-sanitiser.js:4:14:4:18 | taint | | sanitizer-function.js:12:17:12:24 | source() | sanitizer-function.js:14:10:14:14 | taint | | sanitizer-function.js:12:17:12:24 | source() | sanitizer-function.js:17:14:17:18 | taint | | sanitizer-function.js:12:17:12:24 | source() | sanitizer-function.js:21:14:21:18 | taint | diff --git a/javascript/ql/test/library-tests/TaintTracking/regexp-sanitiser.js b/javascript/ql/test/library-tests/TaintTracking/regexp-sanitiser.js new file mode 100644 index 000000000000..605b73c5be76 --- /dev/null +++ b/javascript/ql/test/library-tests/TaintTracking/regexp-sanitiser.js @@ -0,0 +1,6 @@ +function foo() { + const taint = source(); + if (/^asd[\s\S]*$/.test(taint)) { + sink(taint); // NOT OK [INCONSISTENCY] + } +} From c3ad805fe8599f0f0d23b808a3ab9d3cbdc704ec Mon Sep 17 00:00:00 2001 From: Asger F Date: Fri, 28 Feb 2025 09:38:15 +0100 Subject: [PATCH 2/4] JS: Sharpen up EnumerationRegExp --- .../semmle/javascript/MembershipCandidates.qll | 17 ++++++----------- .../TaintTracking/BasicTaintTracking.expected | 1 + .../TaintTracking/regexp-sanitiser.js | 2 +- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/MembershipCandidates.qll b/javascript/ql/lib/semmle/javascript/MembershipCandidates.qll index da9e90744ef0..b294cb7ca3ab 100644 --- a/javascript/ql/lib/semmle/javascript/MembershipCandidates.qll +++ b/javascript/ql/lib/semmle/javascript/MembershipCandidates.qll @@ -140,22 +140,17 @@ module MembershipCandidate { EnumerationRegExp() { this.isRootTerm() and RegExp::isFullyAnchoredTerm(this) and - exists(RegExpTerm child | this.getAChild*() = child | - child instanceof RegExpSequence or - child instanceof RegExpCaret or - child instanceof RegExpDollar or - child instanceof RegExpConstant or - child instanceof RegExpAlt or - child instanceof RegExpGroup - ) and - // exclude "length matches" that match every string - not this.getAChild*() instanceof RegExpDot + not exists(RegExpTerm child | child.getRootTerm() = this | + child instanceof RegExpDot or + child instanceof RegExpCharacterClass or + child instanceof RegExpUnicodePropertyEscape + ) } /** * Gets a string matched by this regular expression. */ - string getAMember() { result = this.getAChild*().getAMatchedString() } + string getAMember() { result = any(RegExpTerm t | t.getRootTerm() = this).getAMatchedString() } } /** diff --git a/javascript/ql/test/library-tests/TaintTracking/BasicTaintTracking.expected b/javascript/ql/test/library-tests/TaintTracking/BasicTaintTracking.expected index d8ba7545b0d2..4a0575eb73e1 100644 --- a/javascript/ql/test/library-tests/TaintTracking/BasicTaintTracking.expected +++ b/javascript/ql/test/library-tests/TaintTracking/BasicTaintTracking.expected @@ -238,6 +238,7 @@ flow | promise.js:18:22:18:29 | source() | promise.js:24:10:24:10 | e | | promise.js:33:21:33:28 | source() | promise.js:38:10:38:10 | e | | promise.js:43:20:43:27 | source() | promise.js:43:8:43:28 | Promise ... urce()) | +| regexp-sanitiser.js:2:19:2:26 | source() | regexp-sanitiser.js:4:14:4:18 | taint | | rxjs.js:3:1:3:8 | source() | rxjs.js:10:14:10:17 | data | | rxjs.js:13:1:13:8 | source() | rxjs.js:17:23:17:23 | x | | rxjs.js:13:1:13:8 | source() | rxjs.js:18:23:18:23 | x | diff --git a/javascript/ql/test/library-tests/TaintTracking/regexp-sanitiser.js b/javascript/ql/test/library-tests/TaintTracking/regexp-sanitiser.js index 605b73c5be76..67481589bb62 100644 --- a/javascript/ql/test/library-tests/TaintTracking/regexp-sanitiser.js +++ b/javascript/ql/test/library-tests/TaintTracking/regexp-sanitiser.js @@ -1,6 +1,6 @@ function foo() { const taint = source(); if (/^asd[\s\S]*$/.test(taint)) { - sink(taint); // NOT OK [INCONSISTENCY] + sink(taint); // NOT OK } } From c8a89c42035e8c0b538c48e55152668cce21231d Mon Sep 17 00:00:00 2001 From: Asger F Date: Fri, 28 Feb 2025 14:04:40 +0100 Subject: [PATCH 3/4] JS: Change note --- .../src/change-notes/2025-02-28-membership-regexp-test.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 javascript/ql/src/change-notes/2025-02-28-membership-regexp-test.md diff --git a/javascript/ql/src/change-notes/2025-02-28-membership-regexp-test.md b/javascript/ql/src/change-notes/2025-02-28-membership-regexp-test.md new file mode 100644 index 000000000000..4cbfca9d404c --- /dev/null +++ b/javascript/ql/src/change-notes/2025-02-28-membership-regexp-test.md @@ -0,0 +1,7 @@ +--- +category: fix +--- +* Fixed a bug that would in rare cases cause some regexp-based checks + to seen as generic taint sanitisers, even though the underlying regexp + is not restrictive enough. The regexps are now analysed more precisely, + and unrestrictive regexp checks will no longer block taint flow. From 2e32e441b8f1fc1dcb08b311db2ba9e8e9faec24 Mon Sep 17 00:00:00 2001 From: Asger F Date: Fri, 28 Feb 2025 14:25:56 +0100 Subject: [PATCH 4/4] Update javascript/ql/src/change-notes/2025-02-28-membership-regexp-test.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../ql/src/change-notes/2025-02-28-membership-regexp-test.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/javascript/ql/src/change-notes/2025-02-28-membership-regexp-test.md b/javascript/ql/src/change-notes/2025-02-28-membership-regexp-test.md index 4cbfca9d404c..a1c46f0d795f 100644 --- a/javascript/ql/src/change-notes/2025-02-28-membership-regexp-test.md +++ b/javascript/ql/src/change-notes/2025-02-28-membership-regexp-test.md @@ -2,6 +2,6 @@ category: fix --- * Fixed a bug that would in rare cases cause some regexp-based checks - to seen as generic taint sanitisers, even though the underlying regexp + to be seen as generic taint sanitisers, even though the underlying regexp is not restrictive enough. The regexps are now analysed more precisely, and unrestrictive regexp checks will no longer block taint flow.