Skip to content

Commit 22de0ef

Browse files
authored
Merge pull request #2008 from dave-bartolomeo/dave/IRType2
C++: Implement language-neutral IR type system
2 parents 80fd5b2 + 80e29dc commit 22de0ef

File tree

130 files changed

+3536
-2177
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

130 files changed

+3536
-2177
lines changed

config/identical-files.json

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,11 @@
7676
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Operand.qll",
7777
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Operand.qll",
7878
"csharp/ql/src/semmle/code/csharp/ir/implementation/raw/Operand.qll",
79-
"csharp/ql/src/semmle/code/csharp/ir/implementation/unaliased_ssa/Operand.qll"
79+
"csharp/ql/src/semmle/code/csharp/ir/implementation/unaliased_ssa/Operand.qll"
80+
],
81+
"IR IRType": [
82+
"cpp/ql/src/semmle/code/cpp/ir/implementation/IRType.qll",
83+
"csharp/ql/src/semmle/code/csharp/ir/implementation/IRType.qll"
8084
],
8185
"IR Operand Tag": [
8286
"cpp/ql/src/semmle/code/cpp/ir/implementation/internal/OperandTag.qll",
@@ -169,22 +173,39 @@
169173
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/PrintIRImports.qll",
170174
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/PrintIRImports.qll"
171175
],
176+
"C++ SSA SSAConstructionImports": [
177+
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConstructionImports.qll",
178+
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstructionImports.qll"
179+
],
172180
"C++ SSA AliasAnalysis": [
173181
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasAnalysis.qll",
174182
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasAnalysis.qll"
175183
],
176-
"C++ SSA SSAConstruction": [
184+
"C++ IR ValueNumberingImports": [
185+
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/gvn/internal/ValueNumberingImports.qll",
186+
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/internal/ValueNumberingImports.qll",
187+
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/internal/ValueNumberingImports.qll"
188+
],
189+
"IR SSA SimpleSSA": [
190+
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SimpleSSA.qll",
191+
"csharp/ql/src/semmle/code/csharp/ir/implementation/unaliased_ssa/internal/SimpleSSA.qll"
192+
],
193+
"IR SSA SSAConstruction": [
177194
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConstruction.qll",
178-
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstruction.qll"
195+
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstruction.qll",
196+
"csharp/ql/src/semmle/code/csharp/ir/implementation/unaliased_ssa/internal/SSAConstruction.qll"
179197
],
180-
"C++ SSA PrintSSA": [
198+
"IR SSA PrintSSA": [
181199
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/PrintSSA.qll",
182-
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/PrintSSA.qll"
200+
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/PrintSSA.qll",
201+
"csharp/ql/src/semmle/code/csharp/ir/implementation/unaliased_ssa/internal/PrintSSA.qll"
183202
],
184-
"C++ IR ValueNumber": [
203+
"IR ValueNumber": [
185204
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/gvn/ValueNumbering.qll",
186205
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/ValueNumbering.qll",
187-
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/ValueNumbering.qll"
206+
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/ValueNumbering.qll",
207+
"csharp/ql/src/semmle/code/csharp/ir/implementation/raw/gvn/ValueNumbering.qll",
208+
"csharp/ql/src/semmle/code/csharp/ir/implementation/unaliased_ssa/gvn/ValueNumbering.qll"
188209
],
189210
"C++ IR ConstantAnalysis": [
190211
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/constant/ConstantAnalysis.qll",
@@ -235,5 +256,9 @@
235256
"C# IR PrintIRImports": [
236257
"csharp/ql/src/semmle/code/csharp/ir/implementation/raw/internal/PrintIRImports.qll",
237258
"csharp/ql/src/semmle/code/csharp/ir/implementation/unaliased_ssa/internal/PrintIRImports.qll"
259+
],
260+
"C# IR ValueNumberingImports": [
261+
"csharp/ql/src/semmle/code/csharp/ir/implementation/raw/gvn/internal/ValueNumberingImports.qll",
262+
"csharp/ql/src/semmle/code/csharp/ir/implementation/unaliased_ssa/gvn/internal/ValueNumberingImports.qll"
238263
]
239264
}

cpp/ql/src/semmle/code/cpp/Type.qll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -610,7 +610,9 @@ class FloatingPointType extends ArithmeticType {
610610
(
611611
kind >= 24 and kind <= 32
612612
or
613-
kind = 38
613+
kind >= 38 and kind <= 42
614+
or
615+
kind >= 45 and kind <= 50
614616
)
615617
)
616618
}

cpp/ql/src/semmle/code/cpp/exprs/Expr.qll

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,13 @@ class ErrorExpr extends Expr, @errorexpr {
540540
*/
541541
class AssumeExpr extends Expr, @assume {
542542
override string toString() { result = "__assume(...)" }
543+
544+
override string getCanonicalQLClass() { result = "AssumeExpr" }
545+
546+
/**
547+
* Gets the operand of the `__assume` expressions.
548+
*/
549+
Expr getOperand() { this.hasChild(result, 0) }
543550
}
544551

545552
/**
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
/**
2+
* Minimal, language-neutral type system for the IR.
3+
*/
4+
5+
private import internal.IRTypeInternal
6+
7+
private newtype TIRType =
8+
TIRVoidType() or
9+
TIRUnknownType() or
10+
TIRErrorType() { Language::hasErrorType() } or
11+
TIRBooleanType(int byteSize) { Language::hasBooleanType(byteSize) } or
12+
TIRSignedIntegerType(int byteSize) { Language::hasSignedIntegerType(byteSize) } or
13+
TIRUnsignedIntegerType(int byteSize) { Language::hasUnsignedIntegerType(byteSize) } or
14+
TIRFloatingPointType(int byteSize) { Language::hasFloatingPointType(byteSize) } or
15+
TIRAddressType(int byteSize) { Language::hasAddressType(byteSize) } or
16+
TIRFunctionAddressType(int byteSize) { Language::hasFunctionAddressType(byteSize) } or
17+
TIROpaqueType(Language::OpaqueTypeTag tag, int byteSize) {
18+
Language::hasOpaqueType(tag, byteSize)
19+
}
20+
21+
/**
22+
* The language-neutral type of an IR `Instruction`, `Operand`, or `IRVariable`.
23+
* The interface to `IRType` and its subclasses is the same across all languages for which the IR
24+
* is supported, so analyses that expect to be used for multiple languages should generally use
25+
* `IRType` rather than a language-specific type.
26+
*
27+
* Many types from the language-specific type system will map to a single canonical `IRType`. Two
28+
* types that map to the same `IRType` are considered equivalent by the IR. As an example, in C++,
29+
* all pointer types map to the same instance of `IRAddressType`.
30+
*/
31+
class IRType extends TIRType {
32+
string toString() { none() }
33+
34+
/**
35+
* Gets a string that uniquely identifies this `IRType`. This string is often the same as the
36+
* result of `IRType.toString()`, but for some types it may be more verbose to ensure uniqueness.
37+
*/
38+
string getIdentityString() { result = toString() }
39+
40+
/**
41+
* Gets the size of the type, in bytes, if known.
42+
*
43+
* This will hold for all `IRType` objects except `IRUnknownType`.
44+
*/
45+
int getByteSize() { none() }
46+
47+
/**
48+
* Gets a single instance of `LanguageType` that maps to this `IRType`.
49+
*/
50+
Language::LanguageType getCanonicalLanguageType() { none() }
51+
}
52+
53+
/**
54+
* An unknown type. Generally used to represent results and operands that access an unknown set of
55+
* memory locations, such as the side effects of a function call.
56+
*/
57+
class IRUnknownType extends IRType, TIRUnknownType {
58+
final override string toString() { result = "unknown" }
59+
60+
final override int getByteSize() { none() }
61+
62+
final override Language::LanguageType getCanonicalLanguageType() {
63+
result = Language::getCanonicalUnknownType()
64+
}
65+
}
66+
67+
/**
68+
* A void type, which has no values. Used to represent the result type of an instruction that does
69+
* not produce a result.
70+
*/
71+
class IRVoidType extends IRType, TIRVoidType {
72+
final override string toString() { result = "void" }
73+
74+
final override int getByteSize() { result = 0 }
75+
76+
final override Language::LanguageType getCanonicalLanguageType() {
77+
result = Language::getCanonicalVoidType()
78+
}
79+
}
80+
81+
/**
82+
* An error type. Used when an error in the source code prevents the extractor from determining the
83+
* proper type.
84+
*/
85+
class IRErrorType extends IRType, TIRErrorType {
86+
final override string toString() { result = "error" }
87+
88+
final override int getByteSize() { result = 0 }
89+
90+
final override Language::LanguageType getCanonicalLanguageType() {
91+
result = Language::getCanonicalErrorType()
92+
}
93+
}
94+
95+
private class IRSizedType extends IRType {
96+
int byteSize;
97+
98+
IRSizedType() {
99+
this = TIRBooleanType(byteSize) or
100+
this = TIRSignedIntegerType(byteSize) or
101+
this = TIRUnsignedIntegerType(byteSize) or
102+
this = TIRFloatingPointType(byteSize) or
103+
this = TIRAddressType(byteSize) or
104+
this = TIRFunctionAddressType(byteSize) or
105+
this = TIROpaqueType(_, byteSize)
106+
}
107+
108+
final override int getByteSize() { result = byteSize }
109+
}
110+
111+
/**
112+
* A Boolean type, which can hold the values `true` (non-zero) or `false` (zero).
113+
*/
114+
class IRBooleanType extends IRSizedType, TIRBooleanType {
115+
final override string toString() { result = "bool" + byteSize.toString() }
116+
117+
final override Language::LanguageType getCanonicalLanguageType() {
118+
result = Language::getCanonicalBooleanType(byteSize)
119+
}
120+
}
121+
122+
/**
123+
* A numberic type. This includes `IRSignedIntegerType`, `IRUnsignedIntegerType`, and
124+
* `IRFloatingPointType`.
125+
*/
126+
class IRNumericType extends IRSizedType {
127+
IRNumericType() {
128+
this = TIRSignedIntegerType(byteSize) or
129+
this = TIRUnsignedIntegerType(byteSize) or
130+
this = TIRFloatingPointType(byteSize)
131+
}
132+
}
133+
134+
/**
135+
* A signed two's-complement integer. Also used to represent enums whose underlying type is a signed
136+
* integer, as well as character types whose representation is signed.
137+
*/
138+
class IRSignedIntegerType extends IRNumericType, TIRSignedIntegerType {
139+
final override string toString() { result = "int" + byteSize.toString() }
140+
141+
final override Language::LanguageType getCanonicalLanguageType() {
142+
result = Language::getCanonicalSignedIntegerType(byteSize)
143+
}
144+
}
145+
146+
/**
147+
* An unsigned two's-complement integer. Also used to represent enums whose underlying type is an
148+
* unsigned integer, as well as character types whose representation is unsigned.
149+
*/
150+
class IRUnsignedIntegerType extends IRNumericType, TIRUnsignedIntegerType {
151+
final override string toString() { result = "uint" + byteSize.toString() }
152+
153+
final override Language::LanguageType getCanonicalLanguageType() {
154+
result = Language::getCanonicalUnsignedIntegerType(byteSize)
155+
}
156+
}
157+
158+
/**
159+
* A floating-point type.
160+
*/
161+
class IRFloatingPointType extends IRNumericType, TIRFloatingPointType {
162+
final override string toString() { result = "float" + byteSize.toString() }
163+
164+
final override Language::LanguageType getCanonicalLanguageType() {
165+
result = Language::getCanonicalFloatingPointType(byteSize)
166+
}
167+
}
168+
169+
/**
170+
* An address type, representing the memory address of data. Used to represent pointers, references,
171+
* and lvalues, include those that are garbage collected.
172+
*
173+
* The address of a function is represented by the separate `IRFunctionAddressType`.
174+
*/
175+
class IRAddressType extends IRSizedType, TIRAddressType {
176+
final override string toString() { result = "addr" + byteSize.toString() }
177+
178+
final override Language::LanguageType getCanonicalLanguageType() {
179+
result = Language::getCanonicalAddressType(byteSize)
180+
}
181+
}
182+
183+
/**
184+
* An address type, representing the memory address of code. Used to represent function pointers,
185+
* function references, and the target of a direct function call.
186+
*/
187+
class IRFunctionAddressType extends IRSizedType, TIRFunctionAddressType {
188+
final override string toString() { result = "func" + byteSize.toString() }
189+
190+
final override Language::LanguageType getCanonicalLanguageType() {
191+
result = Language::getCanonicalFunctionAddressType(byteSize)
192+
}
193+
}
194+
195+
/**
196+
* A type with known size that does not fit any of the other kinds of type. Used to represent
197+
* classes, structs, unions, fixed-size arrays, pointers-to-member, and more.
198+
*/
199+
class IROpaqueType extends IRSizedType, TIROpaqueType {
200+
Language::OpaqueTypeTag tag;
201+
202+
IROpaqueType() { this = TIROpaqueType(tag, byteSize) }
203+
204+
final override string toString() {
205+
result = "opaque" + byteSize.toString() + "{" + tag.toString() + "}"
206+
}
207+
208+
final override string getIdentityString() {
209+
result = "opaque" + byteSize.toString() + "{" + Language::getOpaqueTagIdentityString(tag) + "}"
210+
}
211+
212+
final override Language::LanguageType getCanonicalLanguageType() {
213+
result = Language::getCanonicalOpaqueType(tag, byteSize)
214+
}
215+
216+
/**
217+
* Gets the "tag" that differentiates this type from other incompatible opaque types that have the
218+
* same size.
219+
*/
220+
final Language::OpaqueTypeTag getTag() { result = tag }
221+
}
222+
223+
module IRTypeSanity {
224+
query predicate missingCanonicalLanguageType(IRType type, string message) {
225+
not exists(type.getCanonicalLanguageType()) and
226+
message = "Type does not have a canonical `LanguageType`"
227+
}
228+
229+
query predicate multipleCanonicalLanguageTypes(IRType type, string message) {
230+
strictcount(type.getCanonicalLanguageType()) > 1 and
231+
message = "Type has multiple canonical `LanguageType`s: " +
232+
concat(type.getCanonicalLanguageType().toString(), ", ")
233+
}
234+
235+
query predicate missingIRType(Language::LanguageType type, string message) {
236+
not exists(type.getIRType()) and
237+
message = "`LanguageType` does not have a corresponding `IRType`."
238+
}
239+
240+
query predicate multipleIRTypes(Language::LanguageType type, string message) {
241+
strictcount(type.getIRType()) > 1 and
242+
message = "`LanguageType` " + type.getAQlClass() + " has multiple `IRType`s: " +
243+
concat(type.getIRType().toString(), ", ")
244+
}
245+
246+
import Language::LanguageTypeSanity
247+
}

cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IR.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import IRVariable
55
import Operand
66
private import internal.IRImports as Imports
77
import Imports::EdgeKind
8+
import Imports::IRType
89
import Imports::MemoryAccessKind
910

1011
private newtype TIRPropertyProvider = MkIRPropertyProvider()
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
private import IR
22
import InstructionSanity
3+
import IRTypeSanity

0 commit comments

Comments
 (0)