@@ -34,9 +34,11 @@ private import semmle.python.dataflow.new.TypeTracker
3434 *
3535 * 1. If `foo` is a module, and `bar` is an attribute of `foo`, then `from foo import bar` imports
3636 * the attribute `bar` into the current module (binding it to the name `bar`).
37- * 2. If `foo` is a package, and `bar` is a submodule of `foo`, then `from foo import bar` first imports
38- * `foo.bar`, and then reads the `bar` attribute on `foo`. In most cases, that attribute
39- * will then point to the `bar` submodule.
37+ * 2. If `foo` is a package, and `bar` is already defined in `foo/__init__.py`,
38+ * that value will be imported. If it is not defined, and `bar` is a submodule of `foo`, then
39+ * `bar` is imported to `foo`, and the `bar` submodule imported.
40+ * Note: We don't currently model if the attribute is already defined in `__init__.py`
41+ * and always assume that the submodule will be used.
4042 *
4143 * Now, when it comes to how these imports are represented in the AST, things get a bit complicated.
4244 * First of all, both of the above forms of imports get mapped to the same kind of AST node:
@@ -58,7 +60,7 @@ private import semmle.python.dataflow.new.TypeTracker
5860 * package, or the `bar` subpackage of the `foo` package. The practical difference here is the name of
5961 * the module that is imported, as the package `foo.bar` will have the "name" `foo.bar.__init__`,
6062 * corresponding to the fact that the code that is executed is in the `__init__.py` file of the
61- * `bar` package .
63+ * `bar` subpackage .
6264 */
6365module ImportResolution {
6466 /**
@@ -168,8 +170,17 @@ module ImportResolution {
168170 isPreferredModuleForName ( result .getFile ( ) , i .getImportedModuleName ( ) )
169171 }
170172
171- /** Gets a data-flow node that may be a reference to a module with the name `module_name`. */
172- DataFlow:: Node getReferenceToModuleName ( string module_name ) {
173+ /**
174+ * Gets a data-flow node that may be a reference to a module with the name `module_name`.
175+ *
176+ * This is a helper predicate for `getImmediateModuleReference`. It captures the fact that in an
177+ * import such as `import foo`,
178+ * - `foo` may simply be the name of a module, or
179+ * - `foo` may be the name of a package (in which case its name is actually `foo.__init__`), or
180+ * - `foo` may be a module name that has been added to `sys.modules` (in which case its actual name can
181+ * be anything, for instance `os.path` is either `posixpath` or `ntpath`).
182+ */
183+ private DataFlow:: Node getReferenceToModuleName ( string module_name ) {
173184 // Regular import statements, e.g.
174185 // import foo # implicitly `import foo as foo`
175186 // import foo as foo_alias
@@ -188,7 +199,6 @@ module ImportResolution {
188199 // from foo.bar import baz # imports foo.bar.baz as baz
189200 // from foo.bar import baz as baz_alias # imports foo.bar.baz as baz_alias
190201 exists ( Import i , Alias a , ImportMember im | a = i .getAName ( ) and im = a .getValue ( ) |
191- i .isFromImport ( ) and
192202 result .asExpr ( ) = a .getAsname ( ) and
193203 module_name = im .getModule ( ) .( ImportExpr ) .getImportedModuleName ( ) + "." + im .getName ( )
194204 )
@@ -201,12 +211,15 @@ module ImportResolution {
201211 any ( ImportMember i |
202212 i .getModule ( ) .( ImportExpr ) .getImportedModuleName ( ) = module_name
203213 or
204- i .getModule ( ) .( ImportExpr ) .getImportedModuleName ( ) + "." + i .getName ( ) = module_name and
205- none ( )
214+ i .getModule ( ) .( ImportExpr ) .getImportedModuleName ( ) + "." + i .getName ( ) = module_name
206215 )
207216 }
208217
209- /** Gets a dataflow node that is an immediate reference to the module `m`. */
218+ /**
219+ * Gets a dataflow node that is an immediate reference to the module `m`.
220+ *
221+ * Because of attribute lookups, this is mutually recursive with `getModuleReference`.
222+ */
210223 DataFlow:: Node getImmediateModuleReference ( Module m ) {
211224 exists ( string module_name | result = getReferenceToModuleName ( module_name ) |
212225 // Depending on whether the referenced module is a package or not, we may need to add a
@@ -219,9 +232,8 @@ module ImportResolution {
219232 or
220233 // Reading an attribute on a module may return a submodule (or subpackage).
221234 exists ( DataFlow:: AttrRead ar , Module p , string attr_name |
222- ar .getObject ( ) = getModuleReference ( p ) and
235+ ar .accesses ( getModuleReference ( p ) , attr_name ) and
223236 attr_name = any ( Module m0 ) .getFile ( ) .getStem ( ) and
224- ar .getAttributeName ( ) = attr_name and
225237 result = ar
226238 |
227239 isPreferredModuleForName ( m .getFile ( ) , p .getPackageName ( ) + "." + attr_name + [ "" , ".__init__" ] )
@@ -242,15 +254,15 @@ module ImportResolution {
242254
243255 /** Join-order helper for `getModuleReference`. */
244256 pragma [ nomagic]
245- private predicate module_name_in_scope ( DataFlow:: Node node , Scope s , string name , Module m ) {
257+ private predicate module_reference_in_scope ( DataFlow:: Node node , Scope s , string name , Module m ) {
246258 node .getScope ( ) = s and
247259 node .asExpr ( ) .( Name ) .getId ( ) = name and
248260 pragma [ only_bind_into ] ( node ) = getImmediateModuleReference ( pragma [ only_bind_into ] ( m ) )
249261 }
250262
251263 /** Join-order helper for `getModuleReference`. */
252264 pragma [ nomagic]
253- private predicate module_reference_in_scope ( DataFlow:: Node node , Scope s , string name ) {
265+ private predicate module_name_in_scope ( DataFlow:: Node node , Scope s , string name ) {
254266 node .getScope ( ) = s and
255267 exists ( Name n | n = node .asExpr ( ) |
256268 n .getId ( ) = name and
@@ -277,9 +289,9 @@ module ImportResolution {
277289 or
278290 // A reference to a name that is bound to a module in an enclosing scope.
279291 exists ( DataFlow:: Node def , Scope def_scope , Scope use_scope , string name |
280- module_name_in_scope ( pragma [ only_bind_into ] ( def ) , pragma [ only_bind_into ] ( def_scope ) ,
292+ module_reference_in_scope ( pragma [ only_bind_into ] ( def ) , pragma [ only_bind_into ] ( def_scope ) ,
281293 pragma [ only_bind_into ] ( name ) , pragma [ only_bind_into ] ( m ) ) and
282- module_reference_in_scope ( result , use_scope , name ) and
294+ module_name_in_scope ( result , use_scope , name ) and
283295 use_scope .getEnclosingScope * ( ) = def_scope
284296 )
285297 }
0 commit comments