@@ -3069,6 +3069,212 @@ private module StdlibPrivate {
30693069 override string getName ( ) { result = "re." + method }
30703070 }
30713071
3072+ /**
3073+ * A flow summary for compiled regex objects
3074+ *
3075+ * See https://docs.python.org/3.11/library/re.html#re-objects
3076+ */
3077+ class RePatternSummary extends SummarizedCallable {
3078+ RePatternSummary ( ) { this = "re.Pattern" }
3079+
3080+ override DataFlow:: CallCfgNode getACall ( ) {
3081+ result = API:: moduleImport ( "re" ) .getMember ( "compile" ) .getACall ( )
3082+ }
3083+
3084+ override DataFlow:: ArgumentNode getACallback ( ) {
3085+ result = API:: moduleImport ( "re" ) .getMember ( "compile" ) .getAValueReachableFromSource ( )
3086+ }
3087+
3088+ override predicate propagatesFlowExt ( string input , string output , boolean preservesValue ) {
3089+ input in [ "Argument[0]" , "Argument[pattern:]" ] and
3090+ output = "ReturnValue.Attribute[pattern]" and
3091+ preservesValue = true
3092+ }
3093+ }
3094+
3095+ /**
3096+ * A flow summary for methods returning a `re.Match` object
3097+ *
3098+ * See https://docs.python.org/3/library/re.html#re.Match
3099+ */
3100+ class ReMatchSummary extends SummarizedCallable {
3101+ ReMatchSummary ( ) { this = [ "re.Match" , "compiled re.Match" ] }
3102+
3103+ override DataFlow:: CallCfgNode getACall ( ) {
3104+ this = "re.Match" and
3105+ result = API:: moduleImport ( "re" ) .getMember ( [ "match" , "search" , "fullmatch" ] ) .getACall ( )
3106+ or
3107+ this = "compiled re.Match" and
3108+ result =
3109+ any ( RePatternSummary c )
3110+ .getACall ( )
3111+ .( API:: CallNode )
3112+ .getReturn ( )
3113+ .getMember ( [ "match" , "search" , "fullmatch" ] )
3114+ .getACall ( )
3115+ }
3116+
3117+ override DataFlow:: ArgumentNode getACallback ( ) { none ( ) }
3118+
3119+ override predicate propagatesFlowExt ( string input , string output , boolean preservesValue ) {
3120+ exists ( string arg |
3121+ this = "re.Match" and arg = "Argument[1]"
3122+ or
3123+ this = "compiled re.Match" and arg = "Argument[0]"
3124+ |
3125+ input in [ arg , "Argument[string:]" ] and
3126+ (
3127+ output = "ReturnValue.Attribute[string]" and
3128+ preservesValue = true
3129+ or
3130+ // indexing such as `match[g]` is the same as `match.group(g)`
3131+ // since you can index with both integers and strings, we model it as
3132+ // both list element and dictionary... a bit of a hack, but no way to model
3133+ // subscript operators directly with flow-summaries :|
3134+ output in [ "ReturnValue.ListElement" , "ReturnValue.DictionaryElementAny" ] and
3135+ preservesValue = false
3136+ )
3137+ )
3138+ or
3139+ // regex pattern
3140+ (
3141+ this = "re.Match" and input in [ "Argument[0]" , "Argument[pattern:]" ]
3142+ or
3143+ // for compiled regexes, this it is already stored in the `pattern` attribute
3144+ this = "compiled re.Match" and input = "Argument[self].Attribute[pattern]"
3145+ ) and
3146+ output = "ReturnValue.Attribute[re].Attribute[pattern]" and
3147+ preservesValue = true
3148+ }
3149+ }
3150+
3151+ /**
3152+ * A flow summary for methods on a `re.Match` object
3153+ *
3154+ * See https://docs.python.org/3/library/re.html#re.Match
3155+ */
3156+ class ReMatchMethodsSummary extends SummarizedCallable {
3157+ string methodName ;
3158+
3159+ ReMatchMethodsSummary ( ) {
3160+ this = "re.Match." + methodName and
3161+ methodName in [ "expand" , "group" , "groups" , "groupdict" ]
3162+ }
3163+
3164+ override DataFlow:: CallCfgNode getACall ( ) {
3165+ result =
3166+ any ( ReMatchSummary c )
3167+ .getACall ( )
3168+ .( API:: CallNode )
3169+ .getReturn ( )
3170+ .getMember ( methodName )
3171+ .getACall ( )
3172+ }
3173+
3174+ override DataFlow:: ArgumentNode getACallback ( ) { none ( ) }
3175+
3176+ override predicate propagatesFlowExt ( string input , string output , boolean preservesValue ) {
3177+ methodName = "expand" and
3178+ preservesValue = false and
3179+ (
3180+ input = "Argument[0]" and output = "ReturnValue"
3181+ or
3182+ input = "Argument[self].Attribute[string]" and
3183+ output = "ReturnValue"
3184+ )
3185+ or
3186+ methodName = "group" and
3187+ input = "Argument[self].Attribute[string]" and
3188+ output in [ "ReturnValue" , "ReturnValue.ListElement" ] and
3189+ preservesValue = false
3190+ or
3191+ methodName = "groups" and
3192+ input = "Argument[self].Attribute[string]" and
3193+ output = "ReturnValue.ListElement" and
3194+ preservesValue = false
3195+ or
3196+ methodName = "groupdict" and
3197+ input = "Argument[self].Attribute[string]" and
3198+ output = "ReturnValue.DictionaryElementAny" and
3199+ preservesValue = false
3200+ }
3201+ }
3202+
3203+ /**
3204+ * A flow summary for `re` methods not returning a `re.Match` object
3205+ *
3206+ * See https://docs.python.org/3/library/re.html#functions
3207+ */
3208+ class ReFunctionsSummary extends SummarizedCallable {
3209+ string methodName ;
3210+
3211+ ReFunctionsSummary ( ) {
3212+ methodName in [ "split" , "findall" , "finditer" , "sub" , "subn" ] and
3213+ this = [ "re." , "compiled re." ] + methodName
3214+ }
3215+
3216+ override DataFlow:: CallCfgNode getACall ( ) {
3217+ this = "re." + methodName and
3218+ result = API:: moduleImport ( "re" ) .getMember ( methodName ) .getACall ( )
3219+ or
3220+ this = "compiled re." + methodName and
3221+ result =
3222+ any ( RePatternSummary c )
3223+ .getACall ( )
3224+ .( API:: CallNode )
3225+ .getReturn ( )
3226+ .getMember ( methodName )
3227+ .getACall ( )
3228+ }
3229+
3230+ override DataFlow:: ArgumentNode getACallback ( ) { none ( ) }
3231+
3232+ override predicate propagatesFlowExt ( string input , string output , boolean preservesValue ) {
3233+ exists ( int offset |
3234+ // for non-compiled regex the first argument is the pattern, so we need to
3235+ // account for this difference
3236+ this = "re." + methodName and offset = 0
3237+ or
3238+ this = "compiled re." + methodName and offset = 1
3239+ |
3240+ // flow from input string to results
3241+ exists ( int arg | arg = methodName .( RegexExecutionMethod ) .getStringArgIndex ( ) - offset |
3242+ preservesValue = false and
3243+ input in [ "Argument[" + arg + "]" , "Argument[string:]" ] and
3244+ (
3245+ methodName in [ "split" , "findall" , "finditer" ] and
3246+ output = "ReturnValue.ListElement"
3247+ or
3248+ // TODO: Since we currently model iterables as tainted when their elements
3249+ // are, the result of findall, finditer, split needs to be tainted
3250+ methodName in [ "split" , "findall" , "finditer" ] and
3251+ output = "ReturnValue"
3252+ or
3253+ methodName = "sub" and
3254+ output = "ReturnValue"
3255+ or
3256+ methodName = "subn" and
3257+ output = "ReturnValue.TupleElement[0]"
3258+ )
3259+ )
3260+ or
3261+ // flow from replacement value for substitution
3262+ exists ( string argumentSpec |
3263+ argumentSpec in [ "Argument[" + ( 1 - offset ) + "]" , "Argument[repl:]" ] and
3264+ // `repl` can also be a function
3265+ input = [ argumentSpec , argumentSpec + ".ReturnValue" ]
3266+ |
3267+ (
3268+ methodName = "sub" and output = "ReturnValue"
3269+ or
3270+ methodName = "subn" and output = "ReturnValue.TupleElement[0]"
3271+ ) and
3272+ preservesValue = false
3273+ )
3274+ )
3275+ }
3276+ }
3277+
30723278 /**
30733279 * A call to 're.escape'.
30743280 * See https://docs.python.org/3/library/re.html#re.escape
0 commit comments