11/**
22 * Library for parsing Swift regular expressions.
33 *
4+ * See https://developer.apple.com/documentation/foundation/nsregularexpression
5+ * for the regular expression syntax we aim to support.
6+ *
47 * N.B. does not yet handle stripping whitespace and comments in regexes with
58 * the `x` (free-spacing) flag.
69 */
@@ -9,6 +12,17 @@ import swift
912private import RegexTracking
1013private import codeql.swift.regex.Regex
1114
15+ /**
16+ * A mode character that can be used in a regular expression.
17+ * ```
18+ * NSRegularExpression accepts: dim suwxDPSUW
19+ * Regex accepts: imns x
20+ * ```
21+ */
22+ private predicate availableRegexModeCharacter ( string char ) {
23+ char = [ "d" , "i" , "m" , "n" , "s" , "u" , "w" , "x" , "D" , "P" , "S" , "U" , "W" ]
24+ }
25+
1226/**
1327 * A `Expr` containing a regular expression term, that is, either
1428 * a regular expression literal, or a string literal used in a context where
@@ -283,7 +297,7 @@ abstract class RegExp extends Expr {
283297 private predicate flagGroupStartNoModes ( int start , int end ) {
284298 this .isGroupStart ( start ) and
285299 this .getChar ( start + 1 ) = "?" and
286- this .getChar ( start + 2 ) in [ "i" , "x" , "s" , "m" , "w" ] and
300+ availableRegexModeCharacter ( this .getChar ( start + 2 ) ) and
287301 end = start + 2
288302 }
289303
@@ -295,7 +309,7 @@ abstract class RegExp extends Expr {
295309 this .flagGroupStartNoModes ( start , pos )
296310 or
297311 this .modeCharacter ( start , pos - 1 ) and
298- this .getChar ( pos ) in [ "i" , "x" , "s" , "m" , "w" ]
312+ availableRegexModeCharacter ( this .getChar ( pos ) )
299313 }
300314
301315 /**
@@ -333,7 +347,10 @@ abstract class RegExp extends Expr {
333347 or
334348 c = "m" and result = "MULTILINE" // `^` and `$` also match beginning and end of lines
335349 or
336- c = "w" and result = "UNICODE" // Unicode UAX 29 word boundary mode
350+ c = "w" and result = "UNICODEBOUNDARY" // Unicode UAX 29 word boundary mode
351+ or
352+ c = "u" and result = "UNICODE" // Unicode matching
353+ // (other flags exist that are not translated here)
337354 )
338355 }
339356
@@ -344,6 +361,7 @@ abstract class RegExp extends Expr {
344361 * VERBOSE
345362 * DOTALL
346363 * MULTILINE
364+ * UNICODEBOUNDARY
347365 * UNICODE
348366 */
349367 string getAMode ( ) {
0 commit comments