Apply reveiw suggestions

joefarebrother · joefarebrother · commit 9078e13f1c27 · 2022-05-04T15:41:41.000+01:00
- make java imports private
- qdoc fixes
- reorder predicates
- simplifications
diff --git a/java/ql/lib/semmle/code/java/regex/RegexTreeView.qll b/java/ql/lib/semmle/code/java/regex/RegexTreeView.qll
@@ -1,6 +1,6 @@
 /** Provides a class hierarchy corresponding to a parse tree of regular expressions. */
 
-import java
+private import java
 private import semmle.code.java.regex.regex
 
 /**
diff --git a/java/ql/lib/semmle/code/java/regex/regex.qll b/java/ql/lib/semmle/code/java/regex/regex.qll
@@ -62,7 +62,7 @@ abstract class RegexString extends StringLiteral {
 
   /**
    * Helper predicate for `quote`.
-   * Holds if the char at `pos` is the one-based `index`th occourence of a quote delimiter (`\Q` or `\E`)
+   * Holds if the char at `pos` is the one-based `index`th occurence of a quote delimiter (`\Q` or `\E`)
    * Result is `true` for `\Q` and `false` for `\E`.
    */
   private boolean quoteDelimiter(int index, int pos) {
@@ -73,7 +73,7 @@ abstract class RegexString extends StringLiteral {
   /** Holds if a quoted sequence is found between `start` and `end` */
   predicate quote(int start, int end) { this.quote(start, end, _, _) }
 
-  /** Holds if a quoted sequence is found between `start` and `end`, with ontent found between `inner_start` and `inner_end`. */
+  /** Holds if a quoted sequence is found between `start` and `end`, with content found between `inner_start` and `inner_end`. */
   predicate quote(int start, int end, int inner_start, int inner_end) {
     exists(int index |
       this.quoteDelimiter(index, start) = true and
@@ -98,7 +98,7 @@ abstract class RegexString extends StringLiteral {
   }
 
   /**
-   * A control sequence, `\cx`
+   * Holds if there is a control sequence, `\cx`, between `start` and `end`.
    * `x` may be any ascii character including special characters.
    */
   predicate controlEscape(int start, int end) {
@@ -107,6 +107,65 @@ abstract class RegexString extends StringLiteral {
     end = start + 3
   }
 
+  pragma[inline]
+  private predicate isOctal(int index) { this.getChar(index) = [0 .. 7].toString() }
+
+  /** An escape sequence that includes braces, such as named characters (\N{degree sign}), named classes (\p{Lower}), or hex values (\x{h..h}) */
+  private predicate escapedBraces(int start, int end) {
+    this.escapingChar(start) and
+    this.getChar(start + 1) = ["N", "p", "P", "x"] and
+    this.getChar(start + 2) = "{" and
+    end = min(int i | start + 2 < i and this.getChar(i - 1) = "}")
+  }
+
+  /**
+   * Holds if an escaped character is found between `start` and `end`.
+   * Escaped characters include hex values, octal values and named escapes,
+   * but excludes backreferences.
+   */
+  predicate escapedCharacter(int start, int end) {
+    this.escapingChar(start) and
+    not this.backreference(start, _) and
+    (
+      // hex value \xhh
+      this.getChar(start + 1) = "x" and
+      this.getChar(start + 2) != "{" and
+      end = start + 4
+      or
+      // octal value \0o, \0oo, or \0ooo. Max of 0377.
+      this.getChar(start + 1) = "0" and
+      this.isOctal(start + 2) and
+      (
+        if this.isOctal(start + 3)
+        then
+          if this.isOctal(start + 4) and this.getChar(start + 2) in ["0", "1", "2", "3"]
+          then end = start + 5
+          else end = start + 4
+        else end = start + 3
+      )
+      or
+      // 16-bit hex value \uhhhh
+      this.getChar(start + 1) = "u" and end = start + 6
+      or
+      this.escapedBraces(start, end)
+      or
+      // Boundary matchers \b, \b{g}
+      this.getChar(start + 1) = "b" and
+      (
+        if this.getText().substring(start + 2, start + 5) = "{g}"
+        then end = start + 5
+        else end = start + 2
+      )
+      or
+      this.controlEscape(start, end)
+      or
+      // escape not handled above, update when adding a new case
+      not this.getChar(start + 1) in ["x", "0", "u", "p", "P", "N", "b", "c"] and
+      not exists(this.getChar(start + 1).toInt()) and
+      end = start + 2
+    )
+  }
+
   private string nonEscapedCharAt(int i) {
     result = this.getChar(i) and
     not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1]) and
@@ -128,7 +187,7 @@ abstract class RegexString extends StringLiteral {
 
   /**
    * Holds if the character at `pos` starts a character set delimiter.
-   * Result is 1 for `[` and 0 for `]`.
+   * Result is 1 for `[` and -1 for `]`.
    */
   private int charSetDelimiter(int pos) {
     result = 1 and this.charSetStart0(pos, _)
@@ -145,17 +204,14 @@ abstract class RegexString extends StringLiteral {
     pos = rank[index](int p | exists(this.charSetDelimiter(p)))
   }
 
-  bindingset[x]
-  private int max_zero(int x) { result = max([x, 0]) }
-
   /**
    * Gets the nesting depth of character classes after position `pos`,
    * where `pos` is the position of a character set delimiter.
    */
   private int charSetDepth(int index, int pos) {
-    index = 1 and result = max_zero(charSetDelimiter(index, pos))
+    index = 1 and result = 0.maximum(this.charSetDelimiter(index, pos))
     or
-    result = max_zero(charSetDelimiter(index, pos) + charSetDepth(index - 1, _))
+    result = 0.maximum(this.charSetDelimiter(index, pos) + this.charSetDepth(index - 1, _))
   }
 
   /** Hold if a top-level character set starts between `start` and `end`. */
@@ -209,26 +265,10 @@ abstract class RegexString extends StringLiteral {
 
   /** An indexed version of `charSetToken/3` */
   private predicate charSetToken(int charset_start, int index, int token_start, int token_end) {
-    token_start =
-      rank[index](int start, int end | this.charSetToken(charset_start, start, end) | start) and
+    token_start = rank[index](int start | this.charSetToken(charset_start, start, _) | start) and
     this.charSetToken(charset_start, token_start, token_end)
   }
 
-  /**
-   * Holds if the character set starting at `charset_start` contains either
-   * a character or a range found between `start` and `end`.
-   */
-  predicate charSetChild(int charset_start, int start, int end) {
-    this.charSetToken(charset_start, start, end) and
-    not exists(int range_start, int range_end |
-      this.charRange(charset_start, range_start, _, _, range_end) and
-      range_start <= start and
-      range_end >= end
-    )
-    or
-    this.charRange(charset_start, start, _, _, end)
-  }
-
   /**
    * Helper predicate for `charRange`.
    * We can determine where character ranges end by a left to right sweep.
@@ -272,63 +312,19 @@ abstract class RegexString extends StringLiteral {
     )
   }
 
-  pragma[inline]
-  private predicate isOctal(int index) { this.getChar(index) = [0 .. 7].toString() }
-
-  /** An escape sequence that includes braces, such as named characters (\N{degree sign}), named classes (\p{Lower}), or hex values (\x{h..h}) */
-  private predicate escapedBraces(int start, int end) {
-    this.escapingChar(start) and
-    this.getChar(start + 1) = ["N", "p", "P", "x"] and
-    this.getChar(start + 2) = "{" and
-    end = min(int i | start + 2 < i and this.getChar(i - 1) = "}")
-  }
-
   /**
-   * Holds if an escaped character is found between `start` and `end`.
-   * Escaped characters include hex values, octal values and named escapes,
-   * but excludes backreferences.
+   * Holds if the character set starting at `charset_start` contains either
+   * a character or a range found between `start` and `end`.
    */
-  predicate escapedCharacter(int start, int end) {
-    this.escapingChar(start) and
-    not this.backreference(start, _) and
-    (
-      // hex value \xhh
-      this.getChar(start + 1) = "x" and
-      this.getChar(start + 2) != "{" and
-      end = start + 4
-      or
-      // octal value \0o, \0oo, or \0ooo. Max of 0377.
-      this.getChar(start + 1) = "0" and
-      this.isOctal(start + 2) and
-      (
-        if this.isOctal(start + 3)
-        then
-          if this.isOctal(start + 4) and this.getChar(start + 2) in ["0", "1", "2", "3"]
-          then end = start + 5
-          else end = start + 4
-        else end = start + 3
-      )
-      or
-      // 16-bit hex value \uhhhh
-      this.getChar(start + 1) = "u" and end = start + 6
-      or
-      this.escapedBraces(start, end)
-      or
-      // Boundary matchers \b, \b{g}
-      this.getChar(start + 1) = "b" and
-      (
-        if this.getText().substring(start + 2, start + 5) = "{g}"
-        then end = start + 5
-        else end = start + 2
-      )
-      or
-      this.controlEscape(start, end)
-      or
-      // escape not handled above, update when adding a new case
-      not this.getChar(start + 1) in ["x", "0", "u", "p", "P", "N", "b", "c"] and
-      not exists(this.getChar(start + 1).toInt()) and
-      end = start + 2
+  predicate charSetChild(int charset_start, int start, int end) {
+    this.charSetToken(charset_start, start, end) and
+    not exists(int range_start, int range_end |
+      this.charRange(charset_start, range_start, _, _, range_end) and
+      range_start <= start and
+      range_end >= end
     )
+    or
+    this.charRange(charset_start, start, _, _, end)
   }
 
   /** Holds if `index` is inside a character set. */
@@ -871,9 +867,9 @@ abstract class RegexString extends StringLiteral {
    * Holds if a character is represented between `start` and `end` in the source literal.
    */
   private predicate sourceCharacter(int start, int end) {
-    sourceEscapedCharacter(start, end)
+    this.sourceEscapedCharacter(start, end)
     or
-    sourceNonEscapedCharacter(start) and
+    this.sourceNonEscapedCharacter(start) and
     end = start + 1
   }
 
@@ -885,8 +881,8 @@ abstract class RegexString extends StringLiteral {
    */
   predicate sourceCharacter(int pos, int start, int end) {
     exists(this.getChar(pos)) and
-    sourceCharacter(start, end) and
-    start = rank[pos + 2](int s | sourceCharacter(s, _))
+    this.sourceCharacter(start, end) and
+    start = rank[pos + 2](int s | this.sourceCharacter(s, _))
   }
 }
 
diff --git a/java/ql/lib/semmle/code/java/security/performance/ReDoSUtilSpecific.qll b/java/ql/lib/semmle/code/java/security/performance/ReDoSUtilSpecific.qll
@@ -3,7 +3,7 @@
  * This is the interface to the shared ReDoS library.
  */
 
-import java
+private import java
 import semmle.code.java.regex.RegexTreeView
 
 /**