Skip to content

Commit 1a9a40c

Browse files
committed
C++: Apply widening based on number of bounds measure
1 parent 70a8c4f commit 1a9a40c

File tree

5 files changed

+1438
-392
lines changed

5 files changed

+1438
-392
lines changed

cpp/ql/lib/semmle/code/cpp/rangeanalysis/SimpleRangeAnalysis.qll

Lines changed: 290 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,292 @@ private predicate isRecursiveExpr(Expr e) {
517517
)
518518
}
519519

520+
/**
521+
* Provides predicates that estimate the number of bounds that the range
522+
* analysis might produce.
523+
*/
524+
private module BoundsEstimate {
525+
/**
526+
* Gets the limit beyond which we enable widening. I.e., if the estimated
527+
* number of bounds exceeds this limit, we enable widening such that the limit
528+
* will not be reached.
529+
*/
530+
float getBoundsLimit() { result = 4.0.pow(20) }
531+
532+
/** Gets the maximum number of bounds possible when widening is used. */
533+
private int getNrOfWideningBounds() {
534+
result =
535+
max(ArithmeticType t | | count(wideningLowerBounds(t)).maximum(count(wideningUpperBounds(t))))
536+
}
537+
538+
/**
539+
* Holds if `boundFromGuard(guard, v, _, branch)` holds, but without
540+
* relying on range analysis (which would cause non-monotonic recursion
541+
* elsewhere).
542+
*/
543+
private predicate hasBoundFromGuard(Expr guard, VariableAccess v, boolean branch) {
544+
exists(float p, float q, Expr lhs | linearAccess(lhs, v, p, q) |
545+
relOpWithSwapAndNegate(guard, lhs, _, _, _, branch)
546+
or
547+
eqOpWithSwapAndNegate(guard, lhs, _, true, branch)
548+
or
549+
eqZeroWithNegate(guard, lhs, true, branch)
550+
)
551+
}
552+
553+
/** Holds if `def` and `v` is a guard phi node with a bound from a guard. */
554+
predicate isGuardPhiWithBound(RangeSsaDefinition def, StackVariable v, VariableAccess access) {
555+
exists(Expr guard, boolean branch |
556+
def.isGuardPhi(v, access, guard, branch) and
557+
hasBoundFromGuard(guard, access, branch)
558+
)
559+
}
560+
561+
/** Gets the number of bounds for `def` and `v` as guard phi node. */
562+
language[monotonicAggregates]
563+
private float nrOfBoundsPhiGuard(RangeSsaDefinition def, StackVariable v) {
564+
// If there's different `access`es, then they refer to the same variable
565+
// with the same lower bounds. Hence adding these guards make no sense (the
566+
// implementation will take the union but they'll be removed by
567+
// deduplication). Hence we use `max` as an approximation.
568+
result =
569+
max(VariableAccess access | isGuardPhiWithBound(def, v, access) | nrOfBoundsExpr(access))
570+
or
571+
def.isPhiNode(v) and
572+
not isGuardPhiWithBound(def, v, _) and
573+
result = 0
574+
}
575+
576+
/** Gets the number of bounds for `def` and `v` as normal phi node. */
577+
language[monotonicAggregates]
578+
private float nrOfBoundsPhiNormal(RangeSsaDefinition def, StackVariable v) {
579+
// The implementation
580+
result =
581+
strictsum(RangeSsaDefinition inputDef |
582+
inputDef = def.getAPhiInput(v)
583+
|
584+
nrOfBoundsDef(inputDef, v)
585+
)
586+
or
587+
def.isPhiNode(v) and
588+
not exists(def.getAPhiInput(v)) and
589+
result = 0
590+
}
591+
592+
/** Gets the number of bounds for `def` and `v` as an NE phi node. */
593+
private float nrOfBoundsNEPhi(RangeSsaDefinition def, StackVariable v) {
594+
exists(VariableAccess access | isNEPhi(v, def, access, _) and result = nrOfBoundsExpr(access))
595+
or
596+
def.isPhiNode(v) and
597+
not isNEPhi(v, def, _, _) and
598+
result = 0
599+
}
600+
601+
/** Gets the number of bounds for `def` and `v` as an unsupported guard phi node. */
602+
private float nrOfBoundsUnsupportedGuardPhi(RangeSsaDefinition def, StackVariable v) {
603+
exists(VariableAccess access |
604+
isUnsupportedGuardPhi(v, def, access) and
605+
result = nrOfBoundsExpr(access)
606+
)
607+
or
608+
def.isPhiNode(v) and
609+
not isUnsupportedGuardPhi(v, def, _) and
610+
result = 0
611+
}
612+
613+
private float nrOfBoundsPhi(RangeSsaDefinition def, StackVariable v) {
614+
// The cases for phi nodes are not mutually exclusive. For instance a phi
615+
// node can be both a guard phi node and a normal phi node. To handle this
616+
// we sum the contributions from the different cases.
617+
result =
618+
nrOfBoundsPhiGuard(def, v) + nrOfBoundsPhiNormal(def, v) + nrOfBoundsNEPhi(def, v) +
619+
nrOfBoundsUnsupportedGuardPhi(def, v) and
620+
result != 0
621+
}
622+
623+
/** Gets the estimated number of bounds for `def` and `v`. */
624+
float nrOfBoundsDef(RangeSsaDefinition def, StackVariable v) {
625+
// Recursive definitions are already widened, so we simply estimate them as
626+
// having the number of widening bounds available. This is crucial as it
627+
// ensures that we don't follow recursive cycles when calculating the
628+
// estimate. Had that not been the case the estimate itself would be at risk
629+
// of causing performance issues and being non-functional.
630+
if isRecursiveDef(def, v)
631+
then result = getNrOfWideningBounds()
632+
else (
633+
// Definitions with a defining value
634+
exists(Expr defExpr | assignmentDef(def, v, defExpr) and result = nrOfBoundsExpr(defExpr))
635+
or
636+
// Assignment operations with a defining value
637+
exists(AssignOperation assignOp |
638+
def = assignOp and
639+
assignOp.getLValue() = v.getAnAccess() and
640+
result = nrOfBoundsExpr(assignOp)
641+
)
642+
or
643+
// Phi nodes
644+
result = nrOfBoundsPhi(def, v)
645+
or
646+
unanalyzableDefBounds(def, v, _, _) and result = 1
647+
)
648+
}
649+
650+
/**
651+
* Gets a naive estimate of the number of bounds for `e`.
652+
*
653+
* The estimate is like an abstract interpretation of the range analysis,
654+
* where the abstract value is the number of bounds. For instance,
655+
* `nrOfBoundsExpr(12) = 1` and `nrOfBoundsExpr(x + y) = nrOfBoundsExpr(x) *
656+
* nrOfBoundsExpr(y)`.
657+
*
658+
* The estimated number of bounds will usually be greater than the actual
659+
* number of bounds, as the estimate can not detect cases where bounds are cut
660+
* down when tracked precisely. For instance, in
661+
* ```c
662+
* int x = 1;
663+
* if (cond) { x = 1; }
664+
* int y = x + x;
665+
* ```
666+
* the actual number of bounds for `y` is 1. However, the estimate will be 4
667+
* as the conditional assignment to `x` gives two bounds for `x` on the last
668+
* line and the addition gives 2 * 2 bounds. There are two sources of anncuracies:
669+
*
670+
* 1. Without tracking the lower bounds we can't see that `x` is assigned a
671+
* value that is equal to its lower bound.
672+
* 2. Had the conditional assignment been `x = 2` then the estimate of two
673+
* bounds for `x` would have been correct. However, the estimate of 4 for `y`
674+
* would still be incorrect. Summing the actual bounds `{1,2}` with itself
675+
* gives `{2,3,4}` which is only three bounds. Again, we can't realise this
676+
* without tracking the bounds.
677+
*
678+
* Since these inaccuracies compound the estimated number of bounds can often
679+
* be _much_ greater than the actual number of bounds. Do note though that the
680+
* estimate is not _guaranteed_ to be an upper bound. In some cases the
681+
* approximations might underestimate the number of bounds.
682+
*
683+
* This predicate is functional. This is crucial as:
684+
*
685+
* - It ensures that the computing the estimate itself is fast.
686+
* - Our use of monotonic aggregates assumes functionality.
687+
*
688+
* Any non-functional case should be considered a bug.
689+
*/
690+
float nrOfBoundsExpr(Expr e) {
691+
// Similarly to what we do for definitions, we do not attempt to measure the
692+
// number of bounds for recursive expressions.
693+
if isRecursiveExpr(e)
694+
then result = getNrOfWideningBounds()
695+
else
696+
if analyzableExpr(e)
697+
then
698+
// The cases here are an abstraction of and mirrors the cases inside
699+
// `getLowerBoundsImpl`/`getUpperBoundsImpl`.
700+
result = 1 and exists(getValue(e).toFloat())
701+
or
702+
exists(Expr operand | result = nrOfBoundsExpr(operand) |
703+
effectivelyMultipliesByPositive(e, operand, _)
704+
or
705+
effectivelyMultipliesByNegative(e, operand, _)
706+
)
707+
or
708+
exists(ConditionalExpr condExpr |
709+
e = condExpr and
710+
result = nrOfBoundsExpr(condExpr.getThen()) * nrOfBoundsExpr(condExpr.getElse())
711+
)
712+
or
713+
exists(BinaryArithmeticOperation binop |
714+
e = binop and
715+
result = nrOfBoundsExpr(binop.getLeftOperand()) * nrOfBoundsExpr(binop.getRightOperand())
716+
|
717+
e instanceof MaxExpr or
718+
e instanceof MinExpr or
719+
e instanceof AddExpr or
720+
e instanceof SubExpr or
721+
e instanceof UnsignedMulExpr
722+
)
723+
or
724+
exists(AssignExpr assign | e = assign and result = nrOfBoundsExpr(assign.getRValue()))
725+
or
726+
exists(AssignArithmeticOperation assignOp |
727+
e = assignOp and
728+
result = nrOfBoundsExpr(assignOp.getLValue()) * nrOfBoundsExpr(assignOp.getRValue())
729+
|
730+
e instanceof AssignAddExpr or
731+
e instanceof AssignSubExpr or
732+
e instanceof UnsignedAssignMulExpr
733+
)
734+
or
735+
// Handles `AssignMulByPositiveConstantExpr` and `AssignMulByNegativeConstantExpr`
736+
exists(AssignMulByConstantExpr mulExpr |
737+
e = mulExpr and
738+
result = nrOfBoundsExpr(mulExpr.getLValue())
739+
)
740+
or
741+
// Handles the prefix and postfix increment and decrement operators.
742+
exists(CrementOperation crementOp |
743+
e = crementOp and result = nrOfBoundsExpr(crementOp.getOperand())
744+
)
745+
or
746+
exists(RemExpr remExpr | e = remExpr | result = nrOfBoundsExpr(remExpr.getLeftOperand()))
747+
or
748+
exists(Conversion convExpr |
749+
e = convExpr and
750+
if convExpr.getUnspecifiedType() instanceof BoolType
751+
then result = 1
752+
else result = nrOfBoundsExpr(convExpr.getExpr())
753+
)
754+
or
755+
exists(RangeSsaDefinition def, StackVariable v |
756+
e = def.getAUse(v) and
757+
result = nrOfBoundsDef(def, v) and
758+
not exists(getValue(e).toFloat()) // Why?
759+
)
760+
or
761+
e instanceof UnsignedBitwiseAndExpr and
762+
result = 1
763+
or
764+
exists(RShiftExpr rsExpr |
765+
e = rsExpr and
766+
exists(getValue(rsExpr.getRightOperand().getFullyConverted()).toInt()) and
767+
result = nrOfBoundsExpr(rsExpr.getLeftOperand())
768+
)
769+
else (
770+
exists(exprMinVal(e)) and result = 1
771+
)
772+
}
773+
}
774+
775+
/**
776+
* Holds if `v` is a variable for which widening should be used, as otherwise a
777+
* very large number of bounds might be generated during the range analysis for
778+
* `v`.
779+
*/
780+
private predicate varHasTooManyBounds(StackVariable v) {
781+
exists(RangeSsaDefinition def |
782+
def.getAVariable() = v and
783+
BoundsEstimate::nrOfBoundsDef(def, v) > BoundsEstimate::getBoundsLimit()
784+
)
785+
}
786+
787+
/**
788+
* Holds if `e` is an expression for which widening should be used, as otherwise
789+
* a very large number of bounds might be generated during the range analysis
790+
* for `e`.
791+
*/
792+
private predicate exprHasTooManyBounds(Expr e) {
793+
BoundsEstimate::nrOfBoundsExpr(e) > BoundsEstimate::getBoundsLimit()
794+
or
795+
// A subexpressions of an expression with too many bounds may itself not have
796+
// to many bounds. For instance, `x + y` can have too many bounds without `x`
797+
// having as well. But in these cases, still want to consider `e` as having
798+
// too many bounds since:
799+
// - The overall result is widened anyway, so widening `e` as well is unlikely
800+
// to cause further precision loss.
801+
// - The number of bounds could be very large but still below the arbitrary
802+
// limit. Hence widening `e` can improve performance.
803+
exists(Expr pe | exprHasTooManyBounds(pe) and e.getParent() = pe)
804+
}
805+
520806
/**
521807
* Holds if `binop` is a binary operation that's likely to be assigned a
522808
* quadratic (or more) number of candidate bounds during the analysis. This can
@@ -667,7 +953,7 @@ private float getTruncatedLowerBounds(Expr expr) {
667953
if exprMinVal(expr) <= newLB and newLB <= exprMaxVal(expr)
668954
then
669955
// Apply widening where we might get a combinatorial explosion.
670-
if isRecursiveBinary(expr)
956+
if isRecursiveBinary(expr) or exprHasTooManyBounds(expr)
671957
then result = widenLowerBound(expr.getUnspecifiedType(), newLB)
672958
else result = newLB
673959
else result = exprMinVal(expr)
@@ -721,7 +1007,7 @@ private float getTruncatedUpperBounds(Expr expr) {
7211007
if exprMinVal(expr) <= newUB and newUB <= exprMaxVal(expr)
7221008
then
7231009
// Apply widening where we might get a combinatorial explosion.
724-
if isRecursiveBinary(expr)
1010+
if isRecursiveBinary(expr) or exprHasTooManyBounds(expr)
7251011
then result = widenUpperBound(expr.getUnspecifiedType(), newUB)
7261012
else result = newUB
7271013
else result = exprMaxVal(expr)
@@ -1812,7 +2098,7 @@ module SimpleRangeAnalysisInternal {
18122098
|
18132099
// Widening: check whether the new lower bound is from a source which
18142100
// depends recursively on the current definition.
1815-
if isRecursiveDef(def, v)
2101+
if isRecursiveDef(def, v) or varHasTooManyBounds(v)
18162102
then
18172103
// The new lower bound is from a recursive source, so we round
18182104
// down to one of a limited set of values to prevent the
@@ -1836,7 +2122,7 @@ module SimpleRangeAnalysisInternal {
18362122
|
18372123
// Widening: check whether the new upper bound is from a source which
18382124
// depends recursively on the current definition.
1839-
if isRecursiveDef(def, v)
2125+
if isRecursiveDef(def, v) or varHasTooManyBounds(v)
18402126
then
18412127
// The new upper bound is from a recursive source, so we round
18422128
// up to one of a fixed set of values to prevent the recursion

0 commit comments

Comments
 (0)