Skip to content

Commit c412227

Browse files
committed
Python: Bring back support for flow-summaries
Also needed to fix up `TestUtil/UnresolvedCalls.qll` after a bad merge conflict resolution. Since all calls are now DataFlowCall, and not JUST the ones that can be resolved, we need to put in the restriction that the callable can also be resolved.
1 parent e5fdeae commit c412227

File tree

16 files changed

+218
-93
lines changed

16 files changed

+218
-93
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll

Lines changed: 57 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,25 @@ private import python
3636
private import DataFlowPublic
3737
private import DataFlowPrivate
3838
private import FlowSummaryImpl as FlowSummaryImpl
39+
private import FlowSummaryImplSpecific as FlowSummaryImplSpecific
3940

4041
newtype TParameterPosition =
4142
/** Used for `self` in methods, and `cls` in classmethods. */
4243
TSelfParameterPosition() or
43-
TPositionalParameterPosition(int pos) { pos = any(Parameter p).getPosition() } or
44-
TKeywordParameterPosition(string name) { name = any(Parameter p).getName() } or
44+
TPositionalParameterPosition(int pos) {
45+
pos = any(Parameter p).getPosition()
46+
or
47+
// since synthetic parameters are made for a synthetic summary callable, based on
48+
// what Argument positions they have flow for, we need to make sure we have such
49+
// parameter positions available.
50+
FlowSummaryImplSpecific::ParsePositions::isParsedPositionalArgumentPosition(_, pos)
51+
} or
52+
TKeywordParameterPosition(string name) {
53+
name = any(Parameter p).getName()
54+
or
55+
// see comment for TPositionalParameterPosition
56+
FlowSummaryImplSpecific::ParsePositions::isParsedKeywordArgumentPosition(_, name)
57+
} or
4558
TStarArgsParameterPosition(int pos) {
4659
// since `.getPosition` does not work for `*args`, we need *args parameter positions
4760
// at index 1 larger than the largest positional parameter position (and 0 must be
@@ -114,8 +127,20 @@ class ParameterPosition extends TParameterPosition {
114127
newtype TArgumentPosition =
115128
/** Used for `self` in methods, and `cls` in classmethods. */
116129
TSelfArgumentPosition() or
117-
TPositionalArgumentPosition(int pos) { exists(any(CallNode c).getArg(pos)) } or
118-
TKeywordArgumentPosition(string name) { exists(any(CallNode c).getArgByName(name)) } or
130+
TPositionalArgumentPosition(int pos) {
131+
exists(any(CallNode c).getArg(pos))
132+
or
133+
// since synthetic calls within a summarized callable could use a unique argument
134+
// position, we need to ensure we make these available (these are specified as
135+
// parameters in the flow-summary spec)
136+
FlowSummaryImplSpecific::ParsePositions::isParsedPositionalParameterPosition(_, pos)
137+
} or
138+
TKeywordArgumentPosition(string name) {
139+
exists(any(CallNode c).getArgByName(name))
140+
or
141+
// see comment for TPositionalArgumentPosition
142+
FlowSummaryImplSpecific::ParsePositions::isParsedKeywordParameterPosition(_, name)
143+
} or
119144
TStarArgsArgumentPosition(int pos) { exists(Call c | c.getPositionalArg(pos) instanceof Starred) } or
120145
TDictSplatArgumentPosition()
121146

@@ -376,7 +401,7 @@ class LibraryCallableValue extends DataFlowCallable, TLibraryCallable {
376401

377402
LibraryCallableValue() { this = TLibraryCallable(callable) }
378403

379-
override string toString() { result = callable.toString() }
404+
override string toString() { result = "LibraryCallableValue: " + callable.toString() }
380405

381406
override string getQualifiedName() { result = callable.toString() }
382407

@@ -1038,7 +1063,8 @@ predicate resolveCall(ControlFlowNode call, Function target, CallType type) {
10381063
* Holds if the argument of `call` at position `apos` is `arg`. This is just a helper
10391064
* predicate that maps ArgumentPositions to the arguments of the underlying `CallNode`.
10401065
*/
1041-
private predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) {
1066+
cached
1067+
predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) {
10421068
exists(int index |
10431069
apos.isPositional(index) and
10441070
arg.asCfgNode() = call.getArg(index)
@@ -1170,8 +1196,8 @@ predicate getCallArg(
11701196
// DataFlowCall
11711197
// =============================================================================
11721198
newtype TDataFlowCall =
1173-
TNormalCall(CallNode call, Function target, CallType type) { resolveCall(call, target, type) }
1174-
or
1199+
TNormalCall(CallNode call, Function target, CallType type) { resolveCall(call, target, type) } or
1200+
TPotentialLibraryCall(CallNode call) or
11751201
/** A synthesized call inside a summarized callable */
11761202
TSummaryCall(FlowSummaryImpl::Public::SummarizedCallable c, Node receiver) {
11771203
FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
@@ -1253,49 +1279,44 @@ class NormalCall extends ExtractedDataFlowCall, TNormalCall {
12531279
}
12541280

12551281
/**
1256-
* A call to a summarized callable, a `LibraryCallable`.
1282+
* A potential call to a summarized callable, a `LibraryCallable`.
12571283
*
12581284
* We currently exclude all resolved calls. This means that a call to, say, `map`, which
12591285
* is a `ClassCall`, cannot currently be given a summary.
12601286
* We hope to lift this restriction in the future and include all potential calls to summaries
12611287
* in this class.
12621288
*/
1263-
class LibraryCall extends DataFlowCall {
1264-
LibraryCall() {
1265-
// TODO(call-graph): implement this!
1266-
none()
1267-
}
1289+
class PotentialLibraryCall extends ExtractedDataFlowCall, TPotentialLibraryCall {
1290+
CallNode call;
1291+
1292+
PotentialLibraryCall() { this = TPotentialLibraryCall(call) }
12681293

12691294
override string toString() {
1270-
// TODO(call-graph): implement this!
1271-
none()
1295+
// note: if we used toString directly on the CallNode we would get
1296+
// `ControlFlowNode for func()`
1297+
// but the `ControlFlowNode` part is just clutter, so we go directly to the AST node
1298+
// instead.
1299+
result = call.getNode().toString()
12721300
}
12731301

1274-
// We cannot refer to a `LibraryCallable` here,
1302+
// We cannot refer to a `PotentialLibraryCall` here,
12751303
// as that could in turn refer to type tracking.
1276-
// This call will be tied to a `LibraryCallable` via
1277-
// `getViableCallabe` when the global data flow is assembled.
1304+
// This call will be tied to a `PotentialLibraryCall` via
1305+
// `viableCallable` when the global data flow is assembled.
12781306
override DataFlowCallable getCallable() { none() }
12791307

12801308
override ArgumentNode getArgument(ArgumentPosition apos) {
1281-
// TODO(call-graph): implement this!
1282-
none()
1283-
}
1284-
1285-
override ControlFlowNode getNode() {
1286-
// TODO(call-graph): implement this!
1287-
none()
1309+
normalCallArg(call, result, apos)
1310+
or
1311+
// potential self argument, from `foo.bar()` -- note that this could also just be a
1312+
// module reference, but we really don't have a good way of knowing :|
1313+
apos.isSelf() and
1314+
result = any(MethodCallNode mc | mc.getFunction().asCfgNode() = call.getFunction()).getObject()
12881315
}
12891316

1290-
override DataFlowCallable getEnclosingCallable() {
1291-
// TODO(call-graph): implement this!
1292-
none()
1293-
}
1317+
override ControlFlowNode getNode() { result = call }
12941318

1295-
override Location getLocation() {
1296-
// TODO(call-graph): implement this!
1297-
none()
1298-
}
1319+
override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getScope() }
12991320
}
13001321

13011322
/**
@@ -1433,7 +1454,8 @@ DataFlowCallable viableCallable(ExtractedDataFlowCall call) {
14331454
// Instead we resolve the call from the summary.
14341455
exists(LibraryCallable callable |
14351456
result = TLibraryCallable(callable) and
1436-
call.getNode() = callable.getACall().getNode()
1457+
call.getNode() = callable.getACall().getNode() and
1458+
call instanceof PotentialLibraryCall
14371459
)
14381460
}
14391461

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -981,18 +981,10 @@ class LambdaCallKind = Unit;
981981

982982
/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
983983
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) {
984-
// TODO(call-graph): implement this!
985-
//
986-
// // lambda
987-
// kind = kind and
988-
// creation.asExpr() = c.(DataFlowLambda).getDefinition()
989-
// or
990-
// // normal function
991-
// exists(FunctionDef def |
992-
// def.defines(creation.asVar().getSourceVariable()) and
993-
// def.getDefinedFunction() = c.(DataFlowCallableValue).getCallableValue().getScope()
994-
// )
995-
// or
984+
// lambda and plain functions
985+
kind = kind and
986+
creation.asExpr() = c.(DataFlowPlainFunction).getScope().getDefinition()
987+
or
996988
// summarized function
997989
exists(kind) and // avoid warning on unused 'kind'
998990
exists(Call call |

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -337,11 +337,19 @@ abstract class ArgumentNode extends Node {
337337
}
338338

339339
/**
340-
* A data flow node that represents a call argument found in the source code,
341-
* where the call can be resolved.
340+
* A data flow node that represents a call argument found in the source code.
342341
*/
343342
class ExtractedArgumentNode extends ArgumentNode {
344-
ExtractedArgumentNode() { getCallArg(_, _, _, this, _) }
343+
ExtractedArgumentNode() {
344+
// for resolved calls, we need to allow all argument nodes
345+
getCallArg(_, _, _, this, _)
346+
or
347+
// for potential summaries we allow all normal call arguments
348+
normalCallArg(_, this, _)
349+
or
350+
// and self arguments
351+
this = any(MethodCallNode mc).getObject()
352+
}
345353

346354
final override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
347355
this = call.getArgument(pos) and

python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll

Lines changed: 62 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,34 @@ string getComponentSpecificCsv(SummaryComponent sc) {
115115
}
116116

117117
/** Gets the textual representation of a parameter position in the format used for flow summaries. */
118-
string getParameterPositionCsv(ParameterPosition pos) { result = pos.toString() }
118+
string getParameterPositionCsv(ParameterPosition pos) {
119+
pos.isSelf() and result = "self"
120+
or
121+
exists(int i |
122+
pos.isPositional(i) and
123+
result = i.toString()
124+
)
125+
or
126+
exists(string name |
127+
pos.isKeyword(name) and
128+
result = name + ":"
129+
)
130+
}
119131

120132
/** Gets the textual representation of an argument position in the format used for flow summaries. */
121-
string getArgumentPositionCsv(ArgumentPosition pos) { result = pos.toString() }
133+
string getArgumentPositionCsv(ArgumentPosition pos) {
134+
pos.isSelf() and result = "self"
135+
or
136+
exists(int i |
137+
pos.isPositional(i) and
138+
result = i.toString()
139+
)
140+
or
141+
exists(string name |
142+
pos.isKeyword(name) and
143+
result = name + ":"
144+
)
145+
}
122146

123147
/** Holds if input specification component `c` needs a reference. */
124148
predicate inputNeedsReferenceSpecific(string c) { none() }
@@ -200,33 +224,55 @@ module ParsePositions {
200224
)
201225
}
202226

203-
predicate isParsedParameterPosition(string c, int i) {
227+
predicate isParsedPositionalParameterPosition(string c, int i) {
204228
isParamBody(c) and
205229
i = AccessPath::parseInt(c)
206230
}
207231

208-
predicate isParsedArgumentPosition(string c, int i) {
232+
predicate isParsedKeywordParameterPosition(string c, string paramName) {
233+
isParamBody(c) and
234+
c = paramName + ":"
235+
}
236+
237+
predicate isParsedPositionalArgumentPosition(string c, int i) {
209238
isArgBody(c) and
210239
i = AccessPath::parseInt(c)
211240
}
241+
242+
predicate isParsedKeywordArgumentPosition(string c, string argName) {
243+
isArgBody(c) and
244+
c = argName + ":"
245+
}
212246
}
213247

214248
/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */
215249
ArgumentPosition parseParamBody(string s) {
216-
none()
217-
// TODO(call-graph): implement this!
218-
// exists(int i |
219-
// ParsePositions::isParsedParameterPosition(s, i) and
220-
// result.isPositional(i)
221-
// )
250+
exists(int i |
251+
ParsePositions::isParsedPositionalParameterPosition(s, i) and
252+
result.isPositional(i)
253+
)
254+
or
255+
exists(string name |
256+
ParsePositions::isParsedKeywordParameterPosition(s, name) and
257+
result.isKeyword(name)
258+
)
259+
or
260+
s = "self" and
261+
result.isSelf()
222262
}
223263

224264
/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */
225265
ParameterPosition parseArgBody(string s) {
226-
none()
227-
// TODO(call-graph): implement this!
228-
// exists(int i |
229-
// ParsePositions::isParsedArgumentPosition(s, i) and
230-
// result.isPositional(i)
231-
// )
266+
exists(int i |
267+
ParsePositions::isParsedPositionalArgumentPosition(s, i) and
268+
result.isPositional(i)
269+
)
270+
or
271+
exists(string name |
272+
ParsePositions::isParsedKeywordArgumentPosition(s, name) and
273+
result.isKeyword(name)
274+
)
275+
or
276+
s = "self" and
277+
result.isSelf()
232278
}

python/ql/test/experimental/dataflow/TestUtil/UnresolvedCalls.qll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ class UnresolvedCallExpectations extends InlineExpectationsTest {
1212
override predicate hasActualResult(Location location, string element, string tag, string value) {
1313
exists(location.getFile().getRelativePath()) and
1414
exists(CallNode call |
15-
not exists(DataFlowPrivate::DataFlowCall dfc | dfc.getNode() = call) and
15+
not exists(DataFlowPrivate::DataFlowCall dfc |
16+
exists(dfc.getCallable()) and dfc.getNode() = call
17+
) and
1618
not DataFlowPrivate::resolveClassCall(call, _) and
1719
not call = API::builtin(_).getACall().asCfgNode() and
1820
location = call.getLocation() and
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
12
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
23
| test.py:1:19:1:19 | ControlFlowNode for x |
34
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1+
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
12
| test.py:4:10:4:10 | ControlFlowNode for z |
23
| test.py:7:19:7:19 | ControlFlowNode for a |

python/ql/test/experimental/dataflow/basic/global.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
12
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
23
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
34
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |

python/ql/test/experimental/dataflow/basic/globalStep.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
12
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
23
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
34
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |

python/ql/test/experimental/dataflow/basic/local.expected

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
2+
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
3+
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
4+
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
5+
| file://:0:0:0:0 | parameter position 0 of builtins.reversed | file://:0:0:0:0 | parameter position 0 of builtins.reversed |
16
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | GSSA Variable __name__ |
27
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | GSSA Variable __package__ |
38
| test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | GSSA Variable b |

0 commit comments

Comments
 (0)