Skip to content

Commit a9e3bfb

Browse files
author
Dave Bartolomeo
committed
C++/C#: Treat string literals like read-only global variables for alias purposes.
Previously, we didn't track string literals as known memory locations at all, so they all just got marked as `UnknownMemoryLocation`, just like an aribtrary read from a random pointer. This led to some confusing def-use chains, where it would look like the contents of a string literal were being written to by the side effect of an earlier function call, which of course is impossible. To fix this, I've made two changes. First, each string literal is now given a corresponding `IRVariable` (specifically `IRStringLiteral`), since a string literal behaves more or less as a read-only global variable. Second, the `IRVariable` for each string literal is now marked `isReadOnly()`, which the alias analysis uses to determine that an arbitrary write to aliased memory will not overwrite the contents of a string literal. I originally planned to treat all string literals with the same value as being the same memory location, since this is the usual behavior of modern compilers. However, this made implementing `IRVariable.getAST()` tricky for string literals, so I left them unpooled.
1 parent 47a292b commit a9e3bfb

File tree

23 files changed

+359
-130
lines changed

23 files changed

+359
-130
lines changed

cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRVariable.qll

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@ abstract class IRVariable extends TIRVariable {
2222

2323
abstract string toString();
2424

25+
/**
26+
* Holds if this variable's value cannot be changed within a function. Currently used for string
27+
* literals, but could also apply to `const` global and static variables.
28+
*/
29+
predicate isReadOnly() { none() }
30+
2531
/**
2632
* Gets the type of the variable.
2733
*/
@@ -113,34 +119,43 @@ class IRStaticUserVariable extends IRUserVariable {
113119
final override Language::StaticVariable getVariable() { result = var }
114120
}
115121

122+
abstract class IRGeneratedVariable extends IRVariable {
123+
Language::AST ast;
124+
Language::LanguageType type;
125+
126+
final override Language::LanguageType getLanguageType() { result = type }
127+
128+
final override Language::AST getAST() { result = ast }
129+
130+
override string toString() { result = getBaseString() + getLocationString() }
131+
132+
override string getUniqueId() { none() }
133+
134+
final string getLocationString() {
135+
result = ast.getLocation().getStartLine().toString() + ":" +
136+
ast.getLocation().getStartColumn().toString()
137+
}
138+
139+
string getBaseString() { none() }
140+
}
141+
116142
IRTempVariable getIRTempVariable(Language::AST ast, TempVariableTag tag) {
117143
result.getAST() = ast and
118144
result.getTag() = tag
119145
}
120146

121-
class IRTempVariable extends IRVariable, IRAutomaticVariable, TIRTempVariable {
122-
Language::AST ast;
147+
class IRTempVariable extends IRGeneratedVariable, IRAutomaticVariable, TIRTempVariable {
123148
TempVariableTag tag;
124-
Language::LanguageType type;
125149

126150
IRTempVariable() { this = TIRTempVariable(func, ast, tag, type) }
127151

128-
final override Language::LanguageType getLanguageType() { result = type }
129-
130-
final override Language::AST getAST() { result = ast }
131-
132152
final override string getUniqueId() {
133153
result = "Temp: " + Construction::getTempVariableUniqueId(this)
134154
}
135155

136156
final TempVariableTag getTag() { result = tag }
137157

138-
override string toString() {
139-
result = getBaseString() + ast.getLocation().getStartLine().toString() + ":" +
140-
ast.getLocation().getStartColumn().toString()
141-
}
142-
143-
string getBaseString() { result = "#temp" }
158+
override string getBaseString() { result = "#temp" }
144159
}
145160

146161
class IRReturnVariable extends IRTempVariable {
@@ -154,3 +169,19 @@ class IRThrowVariable extends IRTempVariable {
154169

155170
override string getBaseString() { result = "#throw" }
156171
}
172+
173+
class IRStringLiteral extends IRGeneratedVariable, TIRStringLiteral {
174+
Language::StringLiteral literal;
175+
176+
IRStringLiteral() { this = TIRStringLiteral(func, ast, type, literal) }
177+
178+
final override predicate isReadOnly() { any() }
179+
180+
final override string getUniqueId() {
181+
result = "String: " + getLocationString() + "=" + Language::getStringLiteralText(literal)
182+
}
183+
184+
override string getBaseString() { result = "#string" }
185+
186+
final Language::StringLiteral getLiteral() { result = literal }
187+
}

cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -808,14 +808,12 @@ class FloatConstantInstruction extends ConstantInstruction {
808808
FloatConstantInstruction() { getResultType() instanceof Language::FloatingPointType }
809809
}
810810

811-
class StringConstantInstruction extends Instruction {
812-
Language::StringLiteral value;
811+
class StringConstantInstruction extends VariableInstruction {
812+
override IRStringLiteral var;
813813

814-
StringConstantInstruction() { value = Construction::getInstructionStringLiteral(this) }
814+
final override string getImmediateString() { result = Language::getStringLiteralText(getValue()) }
815815

816-
final override string getImmediateString() { result = Language::getStringLiteralText(value) }
817-
818-
final Language::StringLiteral getValue() { result = value }
816+
final Language::StringLiteral getValue() { result = var.getLiteral() }
819817
}
820818

821819
class BinaryInstruction extends Instruction {

cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasAnalysis.qll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,10 @@ predicate resultPointsTo(Instruction instr, IRVariable var, IntValue bitOffset)
308308
instr.(VariableAddressInstruction).getIRVariable() = var and
309309
bitOffset = 0
310310
or
311+
// A string literal is just a special read-only global variable.
312+
instr.(StringConstantInstruction).getIRVariable() = var and
313+
bitOffset = 0
314+
or
311315
exists(Operand operand, IntValue originalBitOffset, IntValue propagatedBitOffset |
312316
operand = instr.getAnOperand() and
313317
// If an operand is propagated, then the result points to the same variable,

cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasedSSA.qll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -210,17 +210,21 @@ Overlap getOverlap(MemoryLocation def, MemoryLocation use) {
210210
def instanceof UnknownVirtualVariable and
211211
result instanceof MustTotallyOverlap
212212
or
213-
// An UnknownMemoryLocation may partially overlap any Location within the same virtual variable.
213+
// An UnknownMemoryLocation may partially overlap any Location within the same virtual variable,
214+
// unless the location is read-only.
214215
def.getVirtualVariable() = use.getVirtualVariable() and
215216
def instanceof UnknownMemoryLocation and
216-
result instanceof MayPartiallyOverlap
217+
result instanceof MayPartiallyOverlap and
218+
not use.(VariableMemoryLocation).getVariable().isReadOnly()
217219
or
218220
// An UnknownNonLocalMemoryLocation may partially overlap any location within the same virtual
219-
// variable, except a local variable.
221+
// variable, except a local variable or read-only variable.
220222
def.getVirtualVariable() = use.getVirtualVariable() and
221223
def instanceof UnknownNonLocalMemoryLocation and
222224
result instanceof MayPartiallyOverlap and
223-
not use.(VariableMemoryLocation).getVariable() instanceof IRAutomaticVariable
225+
not exists(IRVariable var | var = use.(VariableMemoryLocation).getVariable() |
226+
var instanceof IRAutomaticVariable or var.isReadOnly()
227+
)
224228
or
225229
exists(VariableMemoryLocation defVariableLocation |
226230
defVariableLocation = def and

cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstruction.qll

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -341,11 +341,6 @@ private module Cached {
341341
result = getOldInstruction(instruction).(OldIR::ConstantValueInstruction).getValue()
342342
}
343343

344-
cached
345-
Language::StringLiteral getInstructionStringLiteral(Instruction instruction) {
346-
result = getOldInstruction(instruction).(OldIR::StringConstantInstruction).getValue()
347-
}
348-
349344
cached
350345
Language::BuiltInOperation getInstructionBuiltInOperation(Instruction instruction) {
351346
result = getOldInstruction(instruction)

cpp/ql/src/semmle/code/cpp/ir/implementation/internal/TIRVariable.qll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,10 @@ newtype TIRVariable =
99
Language::Function func, Language::AST ast, TempVariableTag tag, Language::LanguageType type
1010
) {
1111
Construction::hasTempVariable(func, ast, tag, type)
12+
} or
13+
TIRStringLiteral(
14+
Language::Function func, Language::AST ast, Language::LanguageType type,
15+
Language::StringLiteral literal
16+
) {
17+
Construction::hasStringLiteral(func, ast, type, literal)
1218
}

cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRVariable.qll

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@ abstract class IRVariable extends TIRVariable {
2222

2323
abstract string toString();
2424

25+
/**
26+
* Holds if this variable's value cannot be changed within a function. Currently used for string
27+
* literals, but could also apply to `const` global and static variables.
28+
*/
29+
predicate isReadOnly() { none() }
30+
2531
/**
2632
* Gets the type of the variable.
2733
*/
@@ -113,34 +119,43 @@ class IRStaticUserVariable extends IRUserVariable {
113119
final override Language::StaticVariable getVariable() { result = var }
114120
}
115121

122+
abstract class IRGeneratedVariable extends IRVariable {
123+
Language::AST ast;
124+
Language::LanguageType type;
125+
126+
final override Language::LanguageType getLanguageType() { result = type }
127+
128+
final override Language::AST getAST() { result = ast }
129+
130+
override string toString() { result = getBaseString() + getLocationString() }
131+
132+
override string getUniqueId() { none() }
133+
134+
final string getLocationString() {
135+
result = ast.getLocation().getStartLine().toString() + ":" +
136+
ast.getLocation().getStartColumn().toString()
137+
}
138+
139+
string getBaseString() { none() }
140+
}
141+
116142
IRTempVariable getIRTempVariable(Language::AST ast, TempVariableTag tag) {
117143
result.getAST() = ast and
118144
result.getTag() = tag
119145
}
120146

121-
class IRTempVariable extends IRVariable, IRAutomaticVariable, TIRTempVariable {
122-
Language::AST ast;
147+
class IRTempVariable extends IRGeneratedVariable, IRAutomaticVariable, TIRTempVariable {
123148
TempVariableTag tag;
124-
Language::LanguageType type;
125149

126150
IRTempVariable() { this = TIRTempVariable(func, ast, tag, type) }
127151

128-
final override Language::LanguageType getLanguageType() { result = type }
129-
130-
final override Language::AST getAST() { result = ast }
131-
132152
final override string getUniqueId() {
133153
result = "Temp: " + Construction::getTempVariableUniqueId(this)
134154
}
135155

136156
final TempVariableTag getTag() { result = tag }
137157

138-
override string toString() {
139-
result = getBaseString() + ast.getLocation().getStartLine().toString() + ":" +
140-
ast.getLocation().getStartColumn().toString()
141-
}
142-
143-
string getBaseString() { result = "#temp" }
158+
override string getBaseString() { result = "#temp" }
144159
}
145160

146161
class IRReturnVariable extends IRTempVariable {
@@ -154,3 +169,19 @@ class IRThrowVariable extends IRTempVariable {
154169

155170
override string getBaseString() { result = "#throw" }
156171
}
172+
173+
class IRStringLiteral extends IRGeneratedVariable, TIRStringLiteral {
174+
Language::StringLiteral literal;
175+
176+
IRStringLiteral() { this = TIRStringLiteral(func, ast, type, literal) }
177+
178+
final override predicate isReadOnly() { any() }
179+
180+
final override string getUniqueId() {
181+
result = "String: " + getLocationString() + "=" + Language::getStringLiteralText(literal)
182+
}
183+
184+
override string getBaseString() { result = "#string" }
185+
186+
final Language::StringLiteral getLiteral() { result = literal }
187+
}

cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -808,14 +808,12 @@ class FloatConstantInstruction extends ConstantInstruction {
808808
FloatConstantInstruction() { getResultType() instanceof Language::FloatingPointType }
809809
}
810810

811-
class StringConstantInstruction extends Instruction {
812-
Language::StringLiteral value;
811+
class StringConstantInstruction extends VariableInstruction {
812+
override IRStringLiteral var;
813813

814-
StringConstantInstruction() { value = Construction::getInstructionStringLiteral(this) }
814+
final override string getImmediateString() { result = Language::getStringLiteralText(getValue()) }
815815

816-
final override string getImmediateString() { result = Language::getStringLiteralText(value) }
817-
818-
final Language::StringLiteral getValue() { result = value }
816+
final Language::StringLiteral getValue() { result = var.getLiteral() }
819817
}
820818

821819
class BinaryInstruction extends Instruction {

cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/IRConstruction.qll

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,13 @@ private module Cached {
4444
)
4545
}
4646

47+
cached
48+
predicate hasStringLiteral(Function func, Locatable ast, CppType type, StringLiteral literal) {
49+
literal = ast and
50+
literal.getEnclosingFunction() = func and
51+
getTypeForPRValue(literal.getType()) = type
52+
}
53+
4754
cached
4855
predicate hasModeledMemoryResult(Instruction instruction) { none() }
4956

@@ -231,8 +238,14 @@ private module Cached {
231238

232239
cached
233240
IRVariable getInstructionVariable(Instruction instruction) {
234-
result = getInstructionTranslatedElement(instruction)
235-
.getInstructionVariable(getInstructionTag(instruction))
241+
exists(TranslatedElement element, InstructionTag tag |
242+
element = getInstructionTranslatedElement(instruction) and
243+
tag = getInstructionTag(instruction) and
244+
(
245+
result = element.getInstructionVariable(tag) or
246+
result.(IRStringLiteral).getAST() = element.getInstructionStringLiteral(tag)
247+
)
248+
)
236249
}
237250

238251
cached
@@ -263,12 +276,6 @@ private module Cached {
263276
)
264277
}
265278

266-
cached
267-
StringLiteral getInstructionStringLiteral(Instruction instruction) {
268-
result = getInstructionTranslatedElement(instruction)
269-
.getInstructionStringLiteral(getInstructionTag(instruction))
270-
}
271-
272279
cached
273280
BuiltInOperation getInstructionBuiltInOperation(Instruction instruction) {
274281
result = getInstructionTranslatedElement(instruction)

0 commit comments

Comments
 (0)