@@ -3026,6 +3026,137 @@ private module StdlibPrivate {
30263026 override string getName ( ) { result = "re." + method }
30273027 }
30283028
3029+ /**
3030+ * A flow summary for compiled regex objects
3031+ *
3032+ * See https://docs.python.org/3.11/library/re.html#re-objects
3033+ */
3034+ class RePatternSummary extends SummarizedCallable {
3035+ RePatternSummary ( ) { this = "re.Pattern" }
3036+
3037+ override DataFlow:: CallCfgNode getACall ( ) {
3038+ result = API:: moduleImport ( "re" ) .getMember ( "compile" ) .getACall ( )
3039+ }
3040+
3041+ override DataFlow:: ArgumentNode getACallback ( ) {
3042+ result = API:: moduleImport ( "re" ) .getMember ( "compile" ) .getAValueReachableFromSource ( )
3043+ }
3044+
3045+ override predicate propagatesFlowExt ( string input , string output , boolean preservesValue ) {
3046+ input in [ "Argument[0]" , "Argument[pattern:]" ] and
3047+ output = "ReturnValue.Attribute[pattern]" and
3048+ preservesValue = true
3049+ }
3050+ }
3051+
3052+ /**
3053+ * A flow summary for methods returning a `re.Match` object
3054+ *
3055+ * See https://docs.python.org/3/library/re.html#re.Match
3056+ */
3057+ class ReMatchSummary extends SummarizedCallable {
3058+ ReMatchSummary ( ) { this = [ "re.Match" , "compiled re.Match" ] }
3059+
3060+ override DataFlow:: CallCfgNode getACall ( ) {
3061+ this = "re.Match" and
3062+ result = API:: moduleImport ( "re" ) .getMember ( [ "match" , "search" , "fullmatch" ] ) .getACall ( )
3063+ or
3064+ this = "compiled re.Match" and
3065+ result =
3066+ any ( RePatternSummary c )
3067+ .getACall ( )
3068+ .( API:: CallNode )
3069+ .getReturn ( )
3070+ .getMember ( [ "match" , "search" , "fullmatch" ] )
3071+ .getACall ( )
3072+ }
3073+
3074+ override DataFlow:: ArgumentNode getACallback ( ) { none ( ) }
3075+
3076+ override predicate propagatesFlowExt ( string input , string output , boolean preservesValue ) {
3077+ exists ( string arg |
3078+ this = "re.Match" and arg = "Argument[1]"
3079+ or
3080+ this = "compiled re.Match" and arg = "Argument[0]"
3081+ |
3082+ input in [ arg , "Argument[string:]" ] and
3083+ (
3084+ output = "ReturnValue.Attribute[string]" and
3085+ preservesValue = true
3086+ or
3087+ // indexing such as `match[g]` is the same as `match.group(g)`
3088+ // since you can index with both integers and strings, we model it as
3089+ // both list element and dictionary... a bit of a hack, but no way to model
3090+ // subscript operators directly with flow-summaries :|
3091+ output in [ "ReturnValue.ListElement" , "ReturnValue.DictionaryElementAny" ] and
3092+ preservesValue = false
3093+ )
3094+ )
3095+ or
3096+ // regex pattern
3097+ (
3098+ this = "re.Match" and input in [ "Argument[0]" , "Argument[pattern:]" ]
3099+ or
3100+ // for compiled regexes, this it is already stored in the `pattern` attribute
3101+ this = "compiled re.Match" and input = "Argument[self].Attribute[pattern]"
3102+ ) and
3103+ output = "ReturnValue.Attribute[re].Attribute[pattern]" and
3104+ preservesValue = true
3105+ }
3106+ }
3107+
3108+ /**
3109+ * A flow summary for methods on a `re.Match` object
3110+ *
3111+ * See https://docs.python.org/3/library/re.html#re.Match
3112+ */
3113+ class ReMatchMethodsSummary extends SummarizedCallable {
3114+ string methodName ;
3115+
3116+ ReMatchMethodsSummary ( ) {
3117+ this = "re.Match." + methodName and
3118+ methodName in [ "expand" , "group" , "groups" , "groupdict" ]
3119+ }
3120+
3121+ override DataFlow:: CallCfgNode getACall ( ) {
3122+ result =
3123+ any ( ReMatchSummary c )
3124+ .getACall ( )
3125+ .( API:: CallNode )
3126+ .getReturn ( )
3127+ .getMember ( methodName )
3128+ .getACall ( )
3129+ }
3130+
3131+ override DataFlow:: ArgumentNode getACallback ( ) { none ( ) }
3132+
3133+ override predicate propagatesFlowExt ( string input , string output , boolean preservesValue ) {
3134+ methodName = "expand" and
3135+ preservesValue = false and
3136+ (
3137+ input = "Argument[0]" and output = "ReturnValue"
3138+ or
3139+ input = "Argument[self].Attribute[string]" and
3140+ output = "ReturnValue"
3141+ )
3142+ or
3143+ methodName = "group" and
3144+ input = "Argument[self].Attribute[string]" and
3145+ output in [ "ReturnValue" , "ReturnValue.ListElement" ] and
3146+ preservesValue = false
3147+ or
3148+ methodName = "groups" and
3149+ input = "Argument[self].Attribute[string]" and
3150+ output = "ReturnValue.ListElement" and
3151+ preservesValue = false
3152+ or
3153+ methodName = "groupdict" and
3154+ input = "Argument[self].Attribute[string]" and
3155+ output = "ReturnValue.DictionaryElementAny" and
3156+ preservesValue = false
3157+ }
3158+ }
3159+
30293160 /**
30303161 * A call to 're.escape'.
30313162 * See https://docs.python.org/3/library/re.html#re.escape
0 commit comments