Skip to content

Commit d2af4ab

Browse files
authored
Merge pull request #227 from xiemaisi/js/taint-kinds
JavaScript: Add support for state-based taint tracking.
2 parents 2904ebb + 335adee commit d2af4ab

File tree

15 files changed

+290
-144
lines changed

15 files changed

+290
-144
lines changed

change-notes/1.19/analysis-javascript.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,5 @@
3434
| Whitespace contradicts operator precedence | Fewer false-positive results | This rule no longer flags operators with asymmetric whitespace. |
3535

3636
## Changes to QL libraries
37+
38+
* The flow configuration framework now supports distinguishing and tracking different kinds of taint, specified by an extensible class `FlowLabel` (which can also be referred to by its alias `TaintKind`).

javascript/ql/src/Security/CWE-078/CommandInjection.ql

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,8 @@ import semmle.javascript.security.dataflow.CommandInjection::CommandInjection
1717

1818
from Configuration cfg, DataFlow::Node source, DataFlow::Node sink, DataFlow::Node highlight
1919
where cfg.hasFlow(source, sink) and
20-
if cfg.isSink(sink, _) then cfg.isSink(sink, highlight) else highlight = sink
20+
if cfg.isSinkWithHighlight(sink, _) then
21+
cfg.isSinkWithHighlight(sink, highlight)
22+
else
23+
highlight = sink
2124
select highlight, "This command depends on $@.", source, "a user-provided value"

javascript/ql/src/semmle/javascript/dataflow/Configuration.qll

Lines changed: 120 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -88,15 +88,35 @@ abstract class Configuration extends string {
8888
/**
8989
* Holds if `source` is a relevant data flow source for this configuration.
9090
*/
91-
abstract predicate isSource(DataFlow::Node source);
91+
predicate isSource(DataFlow::Node source) {
92+
none()
93+
}
94+
95+
/**
96+
* Holds if `source` is a source of flow labelled with `lbl` that is relevant
97+
* for this configuration.
98+
*/
99+
predicate isSource(DataFlow::Node source, FlowLabel lbl) {
100+
none()
101+
}
92102

93103
/**
94104
* Holds if `sink` is a relevant data flow sink for this configuration.
95105
*/
96-
abstract predicate isSink(DataFlow::Node sink);
106+
predicate isSink(DataFlow::Node sink) {
107+
none()
108+
}
97109

98110
/**
99-
* Holds if `source -> sink` should be considered as a flow edge
111+
* Holds if `sink` is a sink of flow labelled with `lbl` that is relevant
112+
* for this configuration.
113+
*/
114+
predicate isSink(DataFlow::Node sink, FlowLabel lbl) {
115+
none()
116+
}
117+
118+
/**
119+
* Holds if `src -> trg` should be considered as a flow edge
100120
* in addition to standard data flow edges.
101121
*/
102122
predicate isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node trg) { none() }
@@ -105,14 +125,22 @@ abstract class Configuration extends string {
105125
* INTERNAL: This predicate should not normally be used outside the data flow
106126
* library.
107127
*
108-
* Holds if `source -> sink` should be considered as a flow edge
128+
* Holds if `src -> trg` should be considered as a flow edge
109129
* in addition to standard data flow edges, with `valuePreserving`
110130
* indicating whether the step preserves values or just taintedness.
111131
*/
112132
predicate isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node trg, boolean valuePreserving) {
113133
isAdditionalFlowStep(src, trg) and valuePreserving = true
114134
}
115135

136+
/**
137+
* Holds if `src -> trg` is a flow edge converting flow with label `inlbl` to
138+
* flow with label `outlbl`.
139+
*/
140+
predicate isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node trg, FlowLabel inlbl, FlowLabel outlbl) {
141+
none()
142+
}
143+
116144
/**
117145
* Holds if the intermediate flow node `node` is prohibited.
118146
*/
@@ -128,6 +156,11 @@ abstract class Configuration extends string {
128156
*/
129157
predicate isBarrier(DataFlow::Node src, DataFlow::Node trg) { none() }
130158

159+
/**
160+
* Holds if flow with label `lbl` cannot flow from `src` to `trg`.
161+
*/
162+
predicate isBarrier(DataFlow::Node src, DataFlow::Node trg, FlowLabel lbl) { none() }
163+
131164
/**
132165
* Holds if data flow node `guard` can act as a barrier when appearing
133166
* in a condition.
@@ -142,7 +175,7 @@ abstract class Configuration extends string {
142175
* Holds if data may flow from `source` to `sink` for this configuration.
143176
*/
144177
predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
145-
isSource(_, this) and isSink(_, this) and
178+
isSource(_, this, _) and isSink(_, this, _) and
146179
exists (SourcePathNode flowsource, SinkPathNode flowsink |
147180
hasPathFlow(flowsource, flowsink) and
148181
source = flowsource.getNode() and
@@ -176,6 +209,44 @@ abstract class Configuration extends string {
176209
}
177210
}
178211

212+
/**
213+
* A label describing the kind of information tracked by a flow configuration.
214+
*
215+
* There are two standard labels "data" and "taint", the former describing values
216+
* that directly originate from a flow source, the latter values that are derived
217+
* from a flow source via one or more transformations (such as string operations).
218+
*/
219+
abstract class FlowLabel extends string {
220+
bindingset[this] FlowLabel() { any() }
221+
}
222+
223+
/**
224+
* A kind of taint tracked by a taint-tracking configuration.
225+
*
226+
* This is an alias of `FlowLabel`, so the two types can be used interchangeably.
227+
*/
228+
class TaintKind = FlowLabel;
229+
230+
/**
231+
* A standard flow label, that is, either `FlowLabel::data()` or `FlowLabel::taint()`.
232+
*/
233+
private class StandardFlowLabel extends FlowLabel {
234+
StandardFlowLabel() { this = "data" or this = "taint" }
235+
}
236+
237+
module FlowLabel {
238+
/**
239+
* Gets the standard flow label for describing values that directly originate from a flow source.
240+
*/
241+
FlowLabel data() { result = "data" }
242+
243+
/**
244+
* Gets the standard flow label for describing values that are influenced ("tainted") by a flow
245+
* source, but not necessarily directly derived from it.
246+
*/
247+
FlowLabel taint() { result = "taint" }
248+
}
249+
179250
/**
180251
* A node that can act as a barrier when appearing in a condition.
181252
*
@@ -353,26 +424,27 @@ private predicate basicFlowStep(DataFlow::Node pred, DataFlow::Node succ, PathSu
353424
isRelevantForward(pred, cfg) and
354425
(
355426
// Local flow
356-
exists (boolean valuePreserving |
357-
localFlowStep(pred, succ, cfg, valuePreserving) and
358-
summary = PathSummary::level(valuePreserving)
427+
exists (FlowLabel predlbl, FlowLabel succlbl |
428+
localFlowStep(pred, succ, cfg, predlbl, succlbl) and
429+
not cfg.isBarrier(pred, succ, predlbl) and
430+
summary = MkPathSummary(false, false, predlbl, succlbl)
359431
)
360432
or
361433
// Flow through properties of objects
362434
propertyFlowStep(pred, succ) and
363-
summary = PathSummary::level(true)
435+
summary = PathSummary::level()
364436
or
365437
// Flow through global variables
366438
globalFlowStep(pred, succ) and
367-
summary = PathSummary::level(true)
439+
summary = PathSummary::level()
368440
or
369441
// Flow into function
370442
callStep(pred, succ) and
371-
summary = PathSummary::call(true)
443+
summary = PathSummary::call()
372444
or
373445
// Flow out of function
374446
returnStep(pred, succ) and
375-
summary = PathSummary::return(true)
447+
summary = PathSummary::return()
376448
)
377449
}
378450

@@ -393,15 +465,21 @@ private predicate exploratoryFlowStep(DataFlow::Node pred, DataFlow::Node succ,
393465
/**
394466
* Holds if `nd` is a source node for configuration `cfg`.
395467
*/
396-
private predicate isSource(DataFlow::Node nd, DataFlow::Configuration cfg) {
397-
cfg.isSource(nd) or nd.(AdditionalSource).isSourceFor(cfg)
468+
private predicate isSource(DataFlow::Node nd, DataFlow::Configuration cfg, FlowLabel lbl) {
469+
(cfg.isSource(nd) or nd.(AdditionalSource).isSourceFor(cfg)) and
470+
lbl = FlowLabel::data()
471+
or
472+
cfg.isSource(nd, lbl)
398473
}
399474

400475
/**
401476
* Holds if `nd` is a sink node for configuration `cfg`.
402477
*/
403-
private predicate isSink(DataFlow::Node nd, DataFlow::Configuration cfg) {
404-
cfg.isSink(nd) or nd.(AdditionalSink).isSinkFor(cfg)
478+
private predicate isSink(DataFlow::Node nd, DataFlow::Configuration cfg, FlowLabel lbl) {
479+
(cfg.isSink(nd) or nd.(AdditionalSink).isSinkFor(cfg)) and
480+
lbl = any(StandardFlowLabel f)
481+
or
482+
cfg.isSink(nd, lbl)
405483
}
406484

407485
/**
@@ -410,7 +488,7 @@ private predicate isSink(DataFlow::Node nd, DataFlow::Configuration cfg) {
410488
* No call/return matching is done, so this is a relatively coarse over-approximation.
411489
*/
412490
private predicate isRelevantForward(DataFlow::Node nd, DataFlow::Configuration cfg) {
413-
isSource(nd, cfg)
491+
isSource(nd, cfg, _)
414492
or
415493
exists (DataFlow::Node mid |
416494
isRelevantForward(mid, cfg) and exploratoryFlowStep(mid, nd, cfg)
@@ -424,7 +502,7 @@ private predicate isRelevantForward(DataFlow::Node nd, DataFlow::Configuration c
424502
*/
425503
private predicate isRelevant(DataFlow::Node nd, DataFlow::Configuration cfg) {
426504
isRelevantForward(nd, cfg) and
427-
isSink(nd, cfg)
505+
isSink(nd, cfg, _)
428506
or
429507
exists (DataFlow::Node mid |
430508
isRelevant(mid, cfg) and
@@ -472,7 +550,7 @@ private predicate reachableFromInput(Function f, DataFlow::Node invk,
472550
DataFlow::Node input, DataFlow::Node nd,
473551
DataFlow::Configuration cfg, PathSummary summary) {
474552
callInputStep(f, invk, input, nd, cfg) and
475-
summary = PathSummary::empty()
553+
summary = PathSummary::level()
476554
or
477555
exists (DataFlow::Node mid, PathSummary oldSummary, PathSummary newSummary |
478556
reachableFromInput(f, invk, input, mid, cfg, oldSummary) and
@@ -487,11 +565,12 @@ private predicate reachableFromInput(Function f, DataFlow::Node invk,
487565
* configuration `cfg`, possibly through callees.
488566
*/
489567
private predicate flowThroughCall(DataFlow::Node input, DataFlow::Node invk,
490-
DataFlow::Configuration cfg, boolean valuePreserving) {
568+
DataFlow::Configuration cfg, PathSummary summary) {
491569
exists (Function f, DataFlow::ValueNode ret |
492570
ret.asExpr() = f.getAReturnedExpr() and
493571
calls(invk, f) and // Do not consider partial calls
494-
reachableFromInput(f, invk, input, ret, cfg, PathSummary::level(valuePreserving))
572+
reachableFromInput(f, invk, input, ret, cfg, summary) and
573+
not cfg.isBarrier(ret, invk)
495574
)
496575
}
497576

@@ -502,7 +581,7 @@ private predicate flowThroughCall(DataFlow::Node input, DataFlow::Node invk,
502581
private predicate storeStep(DataFlow::Node pred, DataFlow::SourceNode succ, string prop,
503582
DataFlow::Configuration cfg, PathSummary summary) {
504583
basicStoreStep(pred, succ, prop) and
505-
summary = PathSummary::level(true)
584+
summary = PathSummary::level()
506585
or
507586
exists (Function f, DataFlow::Node mid, DataFlow::SourceNode base |
508587
// `f` stores its parameter `pred` in property `prop` of a value that it returns,
@@ -526,8 +605,7 @@ private predicate reachableFromStoreBase(string prop, DataFlow::Node rhs, DataFl
526605
exists (DataFlow::Node mid, PathSummary oldSummary, PathSummary newSummary |
527606
reachableFromStoreBase(prop, rhs, mid, cfg, oldSummary) and
528607
flowStep(mid, cfg, nd, newSummary) and
529-
newSummary.valuePreserving() = true and
530-
summary = oldSummary.append(newSummary)
608+
summary = oldSummary.appendValuePreserving(newSummary)
531609
)
532610
}
533611

@@ -557,10 +635,7 @@ private predicate flowStep(DataFlow::Node pred, DataFlow::Configuration cfg,
557635
or
558636
// Flow through a function that returns a value that depends on one of its arguments
559637
// or a captured variable
560-
exists (boolean valuePreserving |
561-
flowThroughCall(pred, succ, cfg, valuePreserving) and
562-
summary = PathSummary::level(valuePreserving)
563-
)
638+
flowThroughCall(pred, succ, cfg, summary)
564639
or
565640
// Flow through a property write/read pair
566641
flowThroughProperty(pred, succ, cfg, summary)
@@ -588,9 +663,11 @@ private predicate flowsTo(PathNode flowsource, DataFlow::Node source,
588663
*/
589664
private predicate reachableFromSource(DataFlow::Node nd, DataFlow::Configuration cfg,
590665
PathSummary summary) {
591-
isSource(nd, cfg) and
592-
not cfg.isBarrier(nd) and
593-
summary = PathSummary::empty()
666+
exists (FlowLabel lbl |
667+
isSource(nd, cfg, lbl) and
668+
not cfg.isBarrier(nd) and
669+
summary = MkPathSummary(false, false, lbl, lbl)
670+
)
594671
or
595672
exists (DataFlow::Node pred, PathSummary oldSummary, PathSummary newSummary |
596673
reachableFromSource(pred, cfg, oldSummary) and
@@ -601,21 +678,18 @@ private predicate reachableFromSource(DataFlow::Node nd, DataFlow::Configuration
601678

602679
/**
603680
* Holds if `nd` can be reached from a source under `cfg`, and in turn a sink is
604-
* reachable from `nd`. The path from the source to `nd` is summarized by `summary1`,
605-
* the path from `nd` to the sink is summarized by `summary2`.
681+
* reachable from `nd`, where the path from the source to `nd` is summarized by `summary`.
606682
*/
607683
private predicate onPath(DataFlow::Node nd, DataFlow::Configuration cfg,
608-
PathSummary summary1, PathSummary summary2) {
609-
reachableFromSource(nd, cfg, summary1) and
610-
isSink(nd, cfg) and
611-
not cfg.isBarrier(nd) and
612-
summary2 = PathSummary::empty()
684+
PathSummary summary) {
685+
reachableFromSource(nd, cfg, summary) and
686+
isSink(nd, cfg, summary.getEndLabel()) and
687+
not cfg.isBarrier(nd)
613688
or
614-
exists (DataFlow::Node mid, PathSummary newSummary, PathSummary oldSummary |
615-
onPath(mid, cfg, _, oldSummary) and
616-
flowStep(nd, cfg, mid, newSummary) and
617-
reachableFromSource(nd, cfg, summary1) and
618-
summary2 = oldSummary.prepend(newSummary)
689+
exists (DataFlow::Node mid, PathSummary stepSummary |
690+
reachableFromSource(nd, cfg, summary) and
691+
flowStep(nd, cfg, mid, stepSummary) and
692+
onPath(mid, cfg, summary.append(stepSummary))
619693
)
620694
}
621695

@@ -624,7 +698,7 @@ private predicate onPath(DataFlow::Node nd, DataFlow::Configuration cfg,
624698
*/
625699
private newtype TPathNode =
626700
MkPathNode(DataFlow::Node nd, DataFlow::Configuration cfg, PathSummary summary) {
627-
onPath(nd, cfg, summary, _)
701+
onPath(nd, cfg, summary)
628702
}
629703

630704
/**
@@ -693,7 +767,7 @@ class PathNode extends TPathNode {
693767
*/
694768
class SourcePathNode extends PathNode {
695769
SourcePathNode() {
696-
isSource(nd, cfg)
770+
isSource(nd, cfg, _)
697771
}
698772
}
699773

@@ -702,7 +776,7 @@ class SourcePathNode extends PathNode {
702776
*/
703777
class SinkPathNode extends PathNode {
704778
SinkPathNode() {
705-
isSink(nd, cfg)
779+
isSink(nd, cfg, _)
706780
}
707781
}
708782

javascript/ql/src/semmle/javascript/dataflow/TaintTracking.qll

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,19 @@ module TaintTracking {
3838
* The smaller this predicate is, the faster `hasFlow()` will converge.
3939
*/
4040
// overridden to provide taint-tracking specific qldoc
41-
abstract override predicate isSource(DataFlow::Node source);
41+
override predicate isSource(DataFlow::Node source) {
42+
super.isSource(source)
43+
}
4244

4345
/**
4446
* Holds if `sink` is a relevant taint sink.
4547
*
4648
* The smaller this predicate is, the faster `hasFlow()` will converge.
4749
*/
4850
// overridden to provide taint-tracking specific qldoc
49-
abstract override predicate isSink(DataFlow::Node sink);
51+
override predicate isSink(DataFlow::Node sink) {
52+
super.isSink(sink)
53+
}
5054

5155
/** Holds if the intermediate node `node` is a taint sanitizer. */
5256
predicate isSanitizer(DataFlow::Node node) {
@@ -58,6 +62,11 @@ module TaintTracking {
5862
none()
5963
}
6064

65+
/** Holds if the edge from `source` to `sink` is a taint sanitizer for data labelled with `lbl`. */
66+
predicate isSanitizer(DataFlow::Node source, DataFlow::Node sink, DataFlow::FlowLabel lbl) {
67+
none()
68+
}
69+
6170
/**
6271
* Holds if data flow node `guard` can act as a sanitizer when appearing
6372
* in a condition.
@@ -82,6 +91,12 @@ module TaintTracking {
8291
isSanitizer(source, sink)
8392
}
8493

94+
final
95+
override predicate isBarrier(DataFlow::Node source, DataFlow::Node sink, DataFlow::FlowLabel lbl) {
96+
super.isBarrier(source, sink, lbl) or
97+
isSanitizer(source, sink, lbl)
98+
}
99+
85100
final
86101
override predicate isBarrierGuard(DataFlow::BarrierGuardNode guard) {
87102
super.isBarrierGuard(guard) or

0 commit comments

Comments
 (0)