Skip to content

Commit 033a4c3

Browse files
committed
C++: Fix perf of IR value numbering
On some snapshots, notably ffmpeg, the IR `ValueNumbering` recursion would generate billions of tuples and eventually run out of space. It turns out it was fairly common for an `Instruction` to get more than one `ValueNumber` in the base cases for `VariableAddressInstruction` and `InitializeParameterInstruction`, and it could also happen in an instruction with more than one operand of the same `OperandTag`. When a binary operation was applied to an instruction with `m` value numbers and another instruction with `n` value numbers, the result would get `m * n` value numbers. This led to doubly-exponential growth in the number of value numbers in rare cases. The underlying reason why a `VariableAddressInstruction` could get multiple value numbers is that it was keyed on the associated `IRVariable`, and the `IRVariable` is defined in part by the type of its underlying `Variable` (or other AST element). If the extractor defines a variable to have multiple types because of linker ambiguity, this leads to the creation of multiple `IRVariable`s. That should ideally be solved in `TIRVariable.qll`, but for now I've put a workaround in `ValueNumberingInternal.qll` instead. To remove the problem with instructions having multiple operands, the construction in `Operand.qll` will now filter out any such operand. It wasn't enough to apply that filter to the `raw` stage, so I've applied it to all three stages.
1 parent de45b1a commit 033a4c3

File tree

8 files changed

+91
-43
lines changed

8 files changed

+91
-43
lines changed

cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Operand.qll

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,19 @@ cached
1111
private newtype TOperand =
1212
TRegisterOperand(Instruction useInstr, RegisterOperandTag tag, Instruction defInstr) {
1313
defInstr = Construction::getRegisterOperandDefinition(useInstr, tag) and
14-
not Construction::isInCycle(useInstr)
14+
not Construction::isInCycle(useInstr) and
15+
strictcount(Construction::getRegisterOperandDefinition(useInstr, tag)) = 1
1516
} or
1617
TNonPhiMemoryOperand(
1718
Instruction useInstr, MemoryOperandTag tag, Instruction defInstr, Overlap overlap
1819
) {
1920
defInstr = Construction::getMemoryOperandDefinition(useInstr, tag, overlap) and
20-
not Construction::isInCycle(useInstr)
21+
not Construction::isInCycle(useInstr) and
22+
(
23+
strictcount(Construction::getMemoryOperandDefinition(useInstr, tag, _)) = 1
24+
or
25+
tag instanceof UnmodeledUseOperandTag
26+
)
2127
} or
2228
TPhiOperand(
2329
PhiInstruction useInstr, Instruction defInstr, IRBlock predecessorBlock, Overlap overlap

cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/internal/ValueNumberingInternal.qll

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ private import ValueNumberingImports
22
private import cpp
33

44
newtype TValueNumber =
5-
TVariableAddressValueNumber(IRFunction irFunc, IRVariable var) {
6-
variableAddressValueNumber(_, irFunc, var)
5+
TVariableAddressValueNumber(IRFunction irFunc, Language::AST ast) {
6+
variableAddressValueNumber(_, irFunc, ast)
77
} or
8-
TInitializeParameterValueNumber(IRFunction irFunc, IRVariable var) {
8+
TInitializeParameterValueNumber(IRFunction irFunc, Language::AST var) {
99
initializeParameterValueNumber(_, irFunc, var)
1010
} or
1111
TInitializeThisValueNumber(IRFunction irFunc) { initializeThisValueNumber(_, irFunc) } or
@@ -100,17 +100,23 @@ private predicate numberableInstruction(Instruction instr) {
100100
}
101101

102102
private predicate variableAddressValueNumber(
103-
VariableAddressInstruction instr, IRFunction irFunc, IRVariable var
103+
VariableAddressInstruction instr, IRFunction irFunc, Language::AST ast
104104
) {
105105
instr.getEnclosingIRFunction() = irFunc and
106-
instr.getIRVariable() = var
106+
// The underlying AST element is used as value-numbering key instead of the
107+
// `IRVariable` to work around a problem where a variable or expression with
108+
// multiple types gives rise to multiple `IRVariable`s.
109+
instr.getIRVariable().getAST() = ast
107110
}
108111

109112
private predicate initializeParameterValueNumber(
110-
InitializeParameterInstruction instr, IRFunction irFunc, IRVariable var
113+
InitializeParameterInstruction instr, IRFunction irFunc, Language::AST var
111114
) {
112115
instr.getEnclosingIRFunction() = irFunc and
113-
instr.getIRVariable() = var
116+
// The underlying AST element is used as value-numbering key instead of the
117+
// `IRVariable` to work around a problem where a variable or expression with
118+
// multiple types gives rise to multiple `IRVariable`s.
119+
instr.getIRVariable().getAST() = var
114120
}
115121

116122
private predicate initializeThisValueNumber(InitializeThisInstruction instr, IRFunction irFunc) {
@@ -236,12 +242,12 @@ private TValueNumber nonUniqueValueNumber(Instruction instr) {
236242
exists(IRFunction irFunc |
237243
irFunc = instr.getEnclosingIRFunction() and
238244
(
239-
exists(IRVariable var |
240-
variableAddressValueNumber(instr, irFunc, var) and
241-
result = TVariableAddressValueNumber(irFunc, var)
245+
exists(Language::AST ast |
246+
variableAddressValueNumber(instr, irFunc, ast) and
247+
result = TVariableAddressValueNumber(irFunc, ast)
242248
)
243249
or
244-
exists(IRVariable var |
250+
exists(Language::AST var |
245251
initializeParameterValueNumber(instr, irFunc, var) and
246252
result = TInitializeParameterValueNumber(irFunc, var)
247253
)

cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Operand.qll

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,19 @@ cached
1111
private newtype TOperand =
1212
TRegisterOperand(Instruction useInstr, RegisterOperandTag tag, Instruction defInstr) {
1313
defInstr = Construction::getRegisterOperandDefinition(useInstr, tag) and
14-
not Construction::isInCycle(useInstr)
14+
not Construction::isInCycle(useInstr) and
15+
strictcount(Construction::getRegisterOperandDefinition(useInstr, tag)) = 1
1516
} or
1617
TNonPhiMemoryOperand(
1718
Instruction useInstr, MemoryOperandTag tag, Instruction defInstr, Overlap overlap
1819
) {
1920
defInstr = Construction::getMemoryOperandDefinition(useInstr, tag, overlap) and
20-
not Construction::isInCycle(useInstr)
21+
not Construction::isInCycle(useInstr) and
22+
(
23+
strictcount(Construction::getMemoryOperandDefinition(useInstr, tag, _)) = 1
24+
or
25+
tag instanceof UnmodeledUseOperandTag
26+
)
2127
} or
2228
TPhiOperand(
2329
PhiInstruction useInstr, Instruction defInstr, IRBlock predecessorBlock, Overlap overlap

cpp/ql/src/semmle/code/cpp/ir/implementation/raw/gvn/internal/ValueNumberingInternal.qll

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ private import ValueNumberingImports
22
private import cpp
33

44
newtype TValueNumber =
5-
TVariableAddressValueNumber(IRFunction irFunc, IRVariable var) {
6-
variableAddressValueNumber(_, irFunc, var)
5+
TVariableAddressValueNumber(IRFunction irFunc, Language::AST ast) {
6+
variableAddressValueNumber(_, irFunc, ast)
77
} or
8-
TInitializeParameterValueNumber(IRFunction irFunc, IRVariable var) {
8+
TInitializeParameterValueNumber(IRFunction irFunc, Language::AST var) {
99
initializeParameterValueNumber(_, irFunc, var)
1010
} or
1111
TInitializeThisValueNumber(IRFunction irFunc) { initializeThisValueNumber(_, irFunc) } or
@@ -100,17 +100,23 @@ private predicate numberableInstruction(Instruction instr) {
100100
}
101101

102102
private predicate variableAddressValueNumber(
103-
VariableAddressInstruction instr, IRFunction irFunc, IRVariable var
103+
VariableAddressInstruction instr, IRFunction irFunc, Language::AST ast
104104
) {
105105
instr.getEnclosingIRFunction() = irFunc and
106-
instr.getIRVariable() = var
106+
// The underlying AST element is used as value-numbering key instead of the
107+
// `IRVariable` to work around a problem where a variable or expression with
108+
// multiple types gives rise to multiple `IRVariable`s.
109+
instr.getIRVariable().getAST() = ast
107110
}
108111

109112
private predicate initializeParameterValueNumber(
110-
InitializeParameterInstruction instr, IRFunction irFunc, IRVariable var
113+
InitializeParameterInstruction instr, IRFunction irFunc, Language::AST var
111114
) {
112115
instr.getEnclosingIRFunction() = irFunc and
113-
instr.getIRVariable() = var
116+
// The underlying AST element is used as value-numbering key instead of the
117+
// `IRVariable` to work around a problem where a variable or expression with
118+
// multiple types gives rise to multiple `IRVariable`s.
119+
instr.getIRVariable().getAST() = var
114120
}
115121

116122
private predicate initializeThisValueNumber(InitializeThisInstruction instr, IRFunction irFunc) {
@@ -236,12 +242,12 @@ private TValueNumber nonUniqueValueNumber(Instruction instr) {
236242
exists(IRFunction irFunc |
237243
irFunc = instr.getEnclosingIRFunction() and
238244
(
239-
exists(IRVariable var |
240-
variableAddressValueNumber(instr, irFunc, var) and
241-
result = TVariableAddressValueNumber(irFunc, var)
245+
exists(Language::AST ast |
246+
variableAddressValueNumber(instr, irFunc, ast) and
247+
result = TVariableAddressValueNumber(irFunc, ast)
242248
)
243249
or
244-
exists(IRVariable var |
250+
exists(Language::AST var |
245251
initializeParameterValueNumber(instr, irFunc, var) and
246252
result = TInitializeParameterValueNumber(irFunc, var)
247253
)

cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Operand.qll

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,19 @@ cached
1111
private newtype TOperand =
1212
TRegisterOperand(Instruction useInstr, RegisterOperandTag tag, Instruction defInstr) {
1313
defInstr = Construction::getRegisterOperandDefinition(useInstr, tag) and
14-
not Construction::isInCycle(useInstr)
14+
not Construction::isInCycle(useInstr) and
15+
strictcount(Construction::getRegisterOperandDefinition(useInstr, tag)) = 1
1516
} or
1617
TNonPhiMemoryOperand(
1718
Instruction useInstr, MemoryOperandTag tag, Instruction defInstr, Overlap overlap
1819
) {
1920
defInstr = Construction::getMemoryOperandDefinition(useInstr, tag, overlap) and
20-
not Construction::isInCycle(useInstr)
21+
not Construction::isInCycle(useInstr) and
22+
(
23+
strictcount(Construction::getMemoryOperandDefinition(useInstr, tag, _)) = 1
24+
or
25+
tag instanceof UnmodeledUseOperandTag
26+
)
2127
} or
2228
TPhiOperand(
2329
PhiInstruction useInstr, Instruction defInstr, IRBlock predecessorBlock, Overlap overlap

cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/internal/ValueNumberingInternal.qll

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ private import ValueNumberingImports
22
private import cpp
33

44
newtype TValueNumber =
5-
TVariableAddressValueNumber(IRFunction irFunc, IRVariable var) {
6-
variableAddressValueNumber(_, irFunc, var)
5+
TVariableAddressValueNumber(IRFunction irFunc, Language::AST ast) {
6+
variableAddressValueNumber(_, irFunc, ast)
77
} or
8-
TInitializeParameterValueNumber(IRFunction irFunc, IRVariable var) {
8+
TInitializeParameterValueNumber(IRFunction irFunc, Language::AST var) {
99
initializeParameterValueNumber(_, irFunc, var)
1010
} or
1111
TInitializeThisValueNumber(IRFunction irFunc) { initializeThisValueNumber(_, irFunc) } or
@@ -100,17 +100,23 @@ private predicate numberableInstruction(Instruction instr) {
100100
}
101101

102102
private predicate variableAddressValueNumber(
103-
VariableAddressInstruction instr, IRFunction irFunc, IRVariable var
103+
VariableAddressInstruction instr, IRFunction irFunc, Language::AST ast
104104
) {
105105
instr.getEnclosingIRFunction() = irFunc and
106-
instr.getIRVariable() = var
106+
// The underlying AST element is used as value-numbering key instead of the
107+
// `IRVariable` to work around a problem where a variable or expression with
108+
// multiple types gives rise to multiple `IRVariable`s.
109+
instr.getIRVariable().getAST() = ast
107110
}
108111

109112
private predicate initializeParameterValueNumber(
110-
InitializeParameterInstruction instr, IRFunction irFunc, IRVariable var
113+
InitializeParameterInstruction instr, IRFunction irFunc, Language::AST var
111114
) {
112115
instr.getEnclosingIRFunction() = irFunc and
113-
instr.getIRVariable() = var
116+
// The underlying AST element is used as value-numbering key instead of the
117+
// `IRVariable` to work around a problem where a variable or expression with
118+
// multiple types gives rise to multiple `IRVariable`s.
119+
instr.getIRVariable().getAST() = var
114120
}
115121

116122
private predicate initializeThisValueNumber(InitializeThisInstruction instr, IRFunction irFunc) {
@@ -236,12 +242,12 @@ private TValueNumber nonUniqueValueNumber(Instruction instr) {
236242
exists(IRFunction irFunc |
237243
irFunc = instr.getEnclosingIRFunction() and
238244
(
239-
exists(IRVariable var |
240-
variableAddressValueNumber(instr, irFunc, var) and
241-
result = TVariableAddressValueNumber(irFunc, var)
245+
exists(Language::AST ast |
246+
variableAddressValueNumber(instr, irFunc, ast) and
247+
result = TVariableAddressValueNumber(irFunc, ast)
242248
)
243249
or
244-
exists(IRVariable var |
250+
exists(Language::AST var |
245251
initializeParameterValueNumber(instr, irFunc, var) and
246252
result = TInitializeParameterValueNumber(irFunc, var)
247253
)

csharp/ql/src/semmle/code/csharp/ir/implementation/raw/Operand.qll

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,19 @@ cached
1111
private newtype TOperand =
1212
TRegisterOperand(Instruction useInstr, RegisterOperandTag tag, Instruction defInstr) {
1313
defInstr = Construction::getRegisterOperandDefinition(useInstr, tag) and
14-
not Construction::isInCycle(useInstr)
14+
not Construction::isInCycle(useInstr) and
15+
strictcount(Construction::getRegisterOperandDefinition(useInstr, tag)) = 1
1516
} or
1617
TNonPhiMemoryOperand(
1718
Instruction useInstr, MemoryOperandTag tag, Instruction defInstr, Overlap overlap
1819
) {
1920
defInstr = Construction::getMemoryOperandDefinition(useInstr, tag, overlap) and
20-
not Construction::isInCycle(useInstr)
21+
not Construction::isInCycle(useInstr) and
22+
(
23+
strictcount(Construction::getMemoryOperandDefinition(useInstr, tag, _)) = 1
24+
or
25+
tag instanceof UnmodeledUseOperandTag
26+
)
2127
} or
2228
TPhiOperand(
2329
PhiInstruction useInstr, Instruction defInstr, IRBlock predecessorBlock, Overlap overlap

csharp/ql/src/semmle/code/csharp/ir/implementation/unaliased_ssa/Operand.qll

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,19 @@ cached
1111
private newtype TOperand =
1212
TRegisterOperand(Instruction useInstr, RegisterOperandTag tag, Instruction defInstr) {
1313
defInstr = Construction::getRegisterOperandDefinition(useInstr, tag) and
14-
not Construction::isInCycle(useInstr)
14+
not Construction::isInCycle(useInstr) and
15+
strictcount(Construction::getRegisterOperandDefinition(useInstr, tag)) = 1
1516
} or
1617
TNonPhiMemoryOperand(
1718
Instruction useInstr, MemoryOperandTag tag, Instruction defInstr, Overlap overlap
1819
) {
1920
defInstr = Construction::getMemoryOperandDefinition(useInstr, tag, overlap) and
20-
not Construction::isInCycle(useInstr)
21+
not Construction::isInCycle(useInstr) and
22+
(
23+
strictcount(Construction::getMemoryOperandDefinition(useInstr, tag, _)) = 1
24+
or
25+
tag instanceof UnmodeledUseOperandTag
26+
)
2127
} or
2228
TPhiOperand(
2329
PhiInstruction useInstr, Instruction defInstr, IRBlock predecessorBlock, Overlap overlap

0 commit comments

Comments
 (0)