Skip to content

Commit 8ac1367

Browse files
committed
Shared: Generate value-preserving summaries when possible.
1 parent f5a295c commit 8ac1367

File tree

2 files changed

+131
-45
lines changed

2 files changed

+131
-45
lines changed

shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll

Lines changed: 126 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ private import codeql.dataflow.internal.ContentDataFlowImpl
1111
private import codeql.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon
1212
private import codeql.util.Location
1313
private import ModelPrinting
14+
private import codeql.util.Unit
1415

1516
/**
1617
* Provides language-specific model generator parameters.
@@ -464,14 +465,22 @@ module MakeModelGenerator<
464465
override string toString() { result = "TaintStore(" + step + ")" }
465466
}
466467

467-
/**
468-
* A data flow configuration for tracking flow through APIs.
469-
* The sources are the parameters of an API and the sinks are the return values (excluding `this`) and parameters.
470-
*
471-
* This can be used to generate Flow summaries for APIs from parameter to return.
472-
*/
473-
private module PropagateFlowConfig implements DataFlow::StateConfigSig {
474-
class FlowState = TaintState;
468+
private signature module PropagateFlowConfigInputSig {
469+
class FlowState;
470+
471+
FlowState initialState();
472+
473+
default predicate isAdditionalFlowStep(
474+
DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2
475+
) {
476+
none()
477+
}
478+
}
479+
480+
private module PropagateFlowConfig<PropagateFlowConfigInputSig PropagateFlowConfigInput>
481+
implements DataFlow::StateConfigSig
482+
{
483+
import PropagateFlowConfigInput
475484

476485
predicate isSource(DataFlow::Node source, FlowState state) {
477486
source instanceof DataFlow::ParameterNode and
@@ -480,7 +489,7 @@ module MakeModelGenerator<
480489
c instanceof DataFlowSummaryTargetApi and
481490
not isUninterestingForHeuristicDataFlowModels(c)
482491
) and
483-
state.(TaintRead).getStep() = 0
492+
state = initialState()
484493
}
485494

486495
predicate isSink(DataFlow::Node sink, FlowState state) {
@@ -494,6 +503,31 @@ module MakeModelGenerator<
494503
not exists(captureQualifierFlow(getAsExprEnclosingCallable(sink)))
495504
}
496505

506+
predicate isAdditionalFlowStep = PropagateFlowConfigInput::isAdditionalFlowStep/4;
507+
508+
predicate isBarrier(DataFlow::Node n) {
509+
exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t))
510+
}
511+
512+
DataFlow::FlowFeature getAFeature() {
513+
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
514+
}
515+
}
516+
517+
/**
518+
* A module used to construct a data flow configuration for tracking taint-
519+
* flow through APIs.
520+
* The sources are the parameters of an API and the sinks are the return
521+
* values (excluding `this`) and parameters.
522+
*
523+
* This can be used to generate flow summaries for APIs from parameter to
524+
* return.
525+
*/
526+
module PropagateFlowConfigInputTaintInput implements PropagateFlowConfigInputSig {
527+
class FlowState = TaintState;
528+
529+
FlowState initialState() { result.(TaintRead).getStep() = 0 }
530+
497531
predicate isAdditionalFlowStep(
498532
DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2
499533
) {
@@ -515,51 +549,98 @@ module MakeModelGenerator<
515549
state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep()
516550
)
517551
}
552+
}
518553

519-
predicate isBarrier(DataFlow::Node n) {
520-
exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t))
521-
}
554+
/**
555+
* A data flow configuration for tracking taint-flow through APIs.
556+
* The sources are the parameters of an API and the sinks are the return
557+
* values (excluding `this`) and parameters.
558+
*
559+
* This can be used to generate flow summaries for APIs from parameter to
560+
* return.
561+
*/
562+
private module PropagateTaintFlowConfig =
563+
PropagateFlowConfig<PropagateFlowConfigInputTaintInput>;
522564

523-
DataFlow::FlowFeature getAFeature() {
524-
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
525-
}
565+
module PropagateTaintFlow = TaintTracking::GlobalWithState<PropagateTaintFlowConfig>;
566+
567+
/**
568+
* A module used to construct a data flow configuration for tracking
569+
* data flow through APIs.
570+
* The sources are the parameters of an API and the sinks are the return
571+
* values (excluding `this`) and parameters.
572+
*
573+
* This can be used to generate value-preserving flow summaries for APIs
574+
* from parameter to return.
575+
*/
576+
module PropagateFlowConfigInputDataFlowInput implements PropagateFlowConfigInputSig {
577+
class FlowState = Unit;
578+
579+
FlowState initialState() { any() }
526580
}
527581

528-
module PropagateFlow = TaintTracking::GlobalWithState<PropagateFlowConfig>;
582+
/**
583+
* A data flow configuration for tracking data flow through APIs.
584+
* The sources are the parameters of an API and the sinks are the return
585+
* values (excluding `this`) and parameters.
586+
*
587+
* This can be used to generate flow summaries for APIs from parameter to
588+
* return.
589+
*/
590+
private module PropagateDataFlowConfig =
591+
PropagateFlowConfig<PropagateFlowConfigInputDataFlowInput>;
592+
593+
module PropagateDataFlow = DataFlow::GlobalWithState<PropagateDataFlowConfig>;
529594

530595
/**
531-
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
596+
* Holds if there should be a summary of `api` specifying flow from `input` to `output`.
597+
*
598+
* `preservesValue` is true if the summary is value-preserving, or `false` otherwise.
532599
*/
533-
string captureThroughFlow0(
534-
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt
600+
private predicate captureThroughFlow0(
601+
DataFlowSummaryTargetApi api, string input, string output, boolean preservesValue
535602
) {
536-
exists(string input, string output |
603+
exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt |
604+
PropagateDataFlow::flow(p, returnNodeExt) and preservesValue = true
605+
or
606+
not PropagateDataFlow::flow(p, returnNodeExt) and
607+
PropagateTaintFlow::flow(p, returnNodeExt) and
608+
preservesValue = false
609+
|
537610
getEnclosingCallable(p) = api and
538611
getEnclosingCallable(returnNodeExt) = api and
539612
input = parameterNodeAsInput(p) and
540613
output = getOutput(returnNodeExt) and
541-
input != output and
542-
result = ModelPrinting::asLiftedTaintModel(api, input, output)
614+
input != output
543615
)
544616
}
545617

546618
/**
547619
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
620+
*
621+
* `preservesValue` is `true` if the summary is value-preserving, and `false` otherwise.
548622
*/
549-
private string captureThroughFlow(DataFlowSummaryTargetApi api) {
550-
exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt |
551-
PropagateFlow::flow(p, returnNodeExt) and
552-
result = captureThroughFlow0(api, p, returnNodeExt)
623+
private string captureThroughFlow(DataFlowSummaryTargetApi api, boolean preservesValue) {
624+
exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, string input, string output |
625+
getEnclosingCallable(p) = api and
626+
getEnclosingCallable(returnNodeExt) = api and
627+
input = parameterNodeAsInput(p) and
628+
output = getOutput(returnNodeExt) and
629+
preservesValue = max(boolean b | captureThroughFlow0(api, input, output, b)) and
630+
result = ModelPrinting::asLiftedTaintModel(api, input, output, preservesValue)
553631
)
554632
}
555633

556634
/**
557635
* Gets the summary model(s) of `api`, if there is flow from parameters to the
558636
* return value or parameter or if `api` is a fluent API.
637+
*
638+
* `preservesValue` is `true` if the summary is value-preserving, and `false` otherwise.
559639
*/
560-
string captureFlow(DataFlowSummaryTargetApi api) {
561-
result = captureQualifierFlow(api) or
562-
result = captureThroughFlow(api)
640+
string captureHeuristicFlow(DataFlowSummaryTargetApi api, boolean preservesValue) {
641+
result = captureQualifierFlow(api) and preservesValue = true
642+
or
643+
result = captureThroughFlow(api, preservesValue)
563644
}
564645

565646
/**
@@ -569,7 +650,7 @@ module MakeModelGenerator<
569650
*/
570651
string captureNoFlow(DataFlowSummaryTargetApi api) {
571652
not exists(DataFlowSummaryTargetApi api0 |
572-
exists(captureFlow(api0)) and api0.lift() = api.lift()
653+
exists(captureFlow(api0, _)) and api0.lift() = api.lift()
573654
) and
574655
api.isRelevant() and
575656
result = ModelPrinting::asNeutralSummaryModel(api)
@@ -1024,12 +1105,13 @@ module MakeModelGenerator<
10241105
/**
10251106
* Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to
10261107
* the return value or a parameter). `lift` is true, if the model should be lifted, otherwise false.
1108+
* `preservesValue` is `true` if the summary is value-preserving, and `false` otherwise.
10271109
*
10281110
* Models are lifted to the best type in case the read and store access paths do not
10291111
* contain a field or synthetic field access.
10301112
*/
1031-
string captureFlow(ContentDataFlowSummaryTargetApi api, boolean lift) {
1032-
exists(string input, string output, boolean preservesValue |
1113+
string captureFlow(ContentDataFlowSummaryTargetApi api, boolean lift, boolean preservesValue) {
1114+
exists(string input, string output |
10331115
captureFlow0(api, input, output, _, lift) and
10341116
preservesValue = max(boolean p | captureFlow0(api, input, output, p, lift)) and
10351117
result = ContentModelPrinting::asModel(api, input, output, preservesValue, lift)
@@ -1046,17 +1128,19 @@ module MakeModelGenerator<
10461128
* generate flow summaries using the heuristic based summary generator.
10471129
*/
10481130
string captureFlow(DataFlowSummaryTargetApi api, boolean lift) {
1049-
result = ContentSensitive::captureFlow(api, lift)
1050-
or
1051-
not exists(DataFlowSummaryTargetApi api0 |
1052-
(api0 = api or api.lift() = api0) and
1053-
exists(ContentSensitive::captureFlow(api0, false))
1131+
exists(boolean preservesValue |
1132+
result = ContentSensitive::captureFlow(api, lift, preservesValue)
10541133
or
1055-
api0.lift() = api.lift() and
1056-
exists(ContentSensitive::captureFlow(api0, true))
1057-
) and
1058-
result = Heuristic::captureFlow(api) and
1059-
lift = true
1134+
not exists(DataFlowSummaryTargetApi api0 |
1135+
(api0 = api or api.lift() = api0) and
1136+
exists(ContentSensitive::captureFlow(api0, false, preservesValue))
1137+
or
1138+
api0.lift() = api.lift() and
1139+
exists(ContentSensitive::captureFlow(api0, true, preservesValue))
1140+
) and
1141+
result = Heuristic::captureHeuristicFlow(api, preservesValue) and
1142+
lift = true
1143+
)
10601144
}
10611145

10621146
/**

shared/mad/codeql/mad/modelgenerator/internal/ModelPrinting.qll

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,11 @@ module ModelPrintingImpl<ModelPrintingLangSig Lang> {
8686
/**
8787
* Gets the lifted taint summary model for `api` with `input` and `output`.
8888
*/
89-
bindingset[input, output]
90-
string asLiftedTaintModel(Printing::SummaryApi api, string input, string output) {
91-
result = asModel(api, input, output, false, true)
89+
bindingset[input, output, preservesValue]
90+
string asLiftedTaintModel(
91+
Printing::SummaryApi api, string input, string output, boolean preservesValue
92+
) {
93+
result = asModel(api, input, output, preservesValue, true)
9294
}
9395

9496
/**

0 commit comments

Comments
 (0)