Skip to content

Commit a6e7525

Browse files
committed
JS: More fine-grained regexp-based sanitizer guards
1 parent ad6e949 commit a6e7525

File tree

3 files changed

+100
-8
lines changed

3 files changed

+100
-8
lines changed

javascript/ql/src/semmle/javascript/Regexp.qll

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -968,4 +968,92 @@ module RegExp {
968968
/** Holds `flags` includes the `s` flag or is the unknown flag `?`. */
969969
bindingset[flags]
970970
predicate maybeDotAll(string flags) { flags = unknownFlag() or isDotAll(flags) }
971+
972+
/** Holds if `term` and all of its disjuncts are anchored on both ends. */
973+
predicate isFullyAnchoredTerm(RegExpTerm term) {
974+
exists(RegExpSequence seq | term = seq |
975+
seq.getChild(0) instanceof RegExpCaret and
976+
seq.getLastChild() instanceof RegExpDollar
977+
)
978+
or
979+
isFullyAnchoredTerm(term.(RegExpGroup).getAChild())
980+
or
981+
isFullyAnchoredAlt(term, term.getNumChild())
982+
}
983+
984+
/** Holds if the first `i` disjuncts of `term` are fully anchored. */
985+
private predicate isFullyAnchoredAlt(RegExpAlt term, int i) {
986+
isFullyAnchoredTerm(term.getChild(0)) and i = 1
987+
or
988+
isFullyAnchoredAlt(term, i - 1) and
989+
isFullyAnchoredTerm(term.getChild(i - 1))
990+
}
991+
992+
/**
993+
* Holds if `term` is matches any character except for explicitly listed exceptions.
994+
*
995+
* For example, holds for `.`, `[^<>]`, or `\W`, but not for `[a-z]`, `\w`, or `[^\W\S]`.
996+
*/
997+
predicate isWildcardLike(RegExpTerm term) {
998+
term instanceof RegExpDot
999+
or
1000+
term.(RegExpCharacterClassEscape).getValue().isUppercase()
1001+
or
1002+
exists(RegExpCharacterClass cls | term = cls |
1003+
cls.isInverted() and
1004+
not cls.getAChild().(RegExpCharacterClassEscape).getValue().isUppercase()
1005+
)
1006+
or
1007+
exists(RegExpCharacterClass cls | term = cls |
1008+
not cls.isInverted() and
1009+
cls.getAChild().(RegExpCharacterClassEscape).getValue().isUppercase()
1010+
)
1011+
}
1012+
1013+
/**
1014+
* Holds if `term` is a generic sanitizer for strings that match (if `outcome` is true)
1015+
* or strings that don't match (if `outcome` is false).
1016+
*
1017+
* Specifically, whitelisting regexps such as `^(foo|bar)$` sanitize matches in the true case.
1018+
* Inverted character classes such as `[^a-z]` or `\W` sanitize matches in the false case.
1019+
*/
1020+
predicate isGenericRegExpSanitizer(RegExpTerm term, boolean outcome) {
1021+
term.isRootTerm() and
1022+
(
1023+
outcome = true and
1024+
isFullyAnchoredTerm(term) and
1025+
not isWildcardLike(term.getAChild*())
1026+
or
1027+
outcome = false and
1028+
exists(RegExpTerm root |
1029+
root = term
1030+
or
1031+
root = term.(RegExpGroup).getAChild()
1032+
|
1033+
isWildcardLike(root)
1034+
or
1035+
isWildcardLike(root.(RegExpAlt).getAChild())
1036+
)
1037+
)
1038+
}
1039+
1040+
/**
1041+
* Gets the AST of a regular expression object that can flow to `node`.
1042+
*/
1043+
RegExpTerm getRegExpObjectFromNode(DataFlow::Node node) {
1044+
exists(DataFlow::RegExpCreationNode regexp |
1045+
regexp.getAReference().flowsTo(node) and
1046+
result = regexp.getRegExpTerm()
1047+
)
1048+
}
1049+
1050+
/**
1051+
* Gets the AST of a regular expression that can flow to `node`,
1052+
* including `RegExp` objects as well as strings interpreted as regular expressions.
1053+
*/
1054+
RegExpTerm getRegExpFromNode(DataFlow::Node node) {
1055+
result = getRegExpObjectFromNode(node)
1056+
or
1057+
result = node.asExpr().(StringLiteral).asRegExp()
1058+
}
9711059
}

javascript/ql/src/semmle/javascript/dataflow/TaintTracking.qll

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -696,33 +696,37 @@ module TaintTracking {
696696
*/
697697
class SanitizingRegExpTest extends AdditionalSanitizerGuardNode, DataFlow::ValueNode {
698698
Expr expr;
699+
boolean sanitizedOutcome;
699700

700701
SanitizingRegExpTest() {
701702
exists(MethodCallExpr mce, Expr base, string m, Expr firstArg |
702703
mce = astNode and mce.calls(base, m) and firstArg = mce.getArgument(0)
703704
|
704705
// /re/.test(u) or /re/.exec(u)
705-
base.analyze().getAType() = TTRegExp() and
706+
RegExp::isGenericRegExpSanitizer(RegExp::getRegExpObjectFromNode(base.flow()), sanitizedOutcome) and
706707
(m = "test" or m = "exec") and
707708
firstArg = expr
708709
or
709710
// u.match(/re/) or u.match("re")
710711
base = expr and
711712
m = "match" and
712-
exists(InferredType firstArgType | firstArgType = firstArg.analyze().getAType() |
713-
firstArgType = TTRegExp() or firstArgType = TTString()
714-
)
713+
RegExp::isGenericRegExpSanitizer(RegExp::getRegExpFromNode(firstArg.flow()), sanitizedOutcome)
715714
)
716715
or
717716
// m = /re/.exec(u) and similar
718-
DataFlow::valueNode(astNode.(AssignExpr).getRhs()).(SanitizingRegExpTest).getSanitizedExpr() =
719-
expr
717+
exists(SanitizingRegExpTest other |
718+
other = DataFlow::valueNode(astNode.(AssignExpr).getRhs()) and
719+
expr = other.getSanitizedExpr() and
720+
sanitizedOutcome = other.getSanitizedOutcome()
721+
)
720722
}
721723

722724
private Expr getSanitizedExpr() { result = expr }
723725

726+
private boolean getSanitizedOutcome() { result = sanitizedOutcome }
727+
724728
override predicate sanitizes(boolean outcome, Expr e) {
725-
(outcome = true or outcome = false) and
729+
outcome = sanitizedOutcome and
726730
e = expr
727731
}
728732

javascript/ql/test/query-tests/Security/CWE-079/tst.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ function tst() {
132132
document.write(v);
133133
}
134134

135-
if (!(/\d+/.test(v)))
135+
if (!(/^\d+$/.test(v)))
136136
return;
137137

138138
// OK

0 commit comments

Comments
 (0)