|
| 1 | +/** |
| 2 | + * Minimal, language-neutral type system for the IR. |
| 3 | + */ |
| 4 | + |
| 5 | +private import internal.IRTypeInternal |
| 6 | + |
| 7 | +private newtype TIRType = |
| 8 | + TIRVoidType() or |
| 9 | + TIRUnknownType() or |
| 10 | + TIRErrorType() { Language::hasErrorType() } or |
| 11 | + TIRBooleanType(int byteSize) { Language::hasBooleanType(byteSize) } or |
| 12 | + TIRSignedIntegerType(int byteSize) { Language::hasSignedIntegerType(byteSize) } or |
| 13 | + TIRUnsignedIntegerType(int byteSize) { Language::hasUnsignedIntegerType(byteSize) } or |
| 14 | + TIRFloatingPointType(int byteSize) { Language::hasFloatingPointType(byteSize) } or |
| 15 | + TIRAddressType(int byteSize) { Language::hasAddressType(byteSize) } or |
| 16 | + TIRFunctionAddressType(int byteSize) { Language::hasFunctionAddressType(byteSize) } or |
| 17 | + TIROpaqueType(Language::OpaqueTypeTag tag, int byteSize) { |
| 18 | + Language::hasOpaqueType(tag, byteSize) |
| 19 | + } |
| 20 | + |
| 21 | +/** |
| 22 | + * The language-neutral type of an IR `Instruction`, `Operand`, or `IRVariable`. |
| 23 | + * The interface to `IRType` and its subclasses is the same across all languages for which the IR |
| 24 | + * is supported, so analyses that expect to be used for multiple languages should generally use |
| 25 | + * `IRType` rather than a language-specific type. |
| 26 | + * |
| 27 | + * Many types from the language-specific type system will map to a single canonical `IRType`. Two |
| 28 | + * types that map to the same `IRType` are considered equivalent by the IR. As an example, in C++, |
| 29 | + * all pointer types map to the same instance of `IRAddressType`. |
| 30 | + */ |
| 31 | +class IRType extends TIRType { |
| 32 | + string toString() { none() } |
| 33 | + |
| 34 | + /** |
| 35 | + * Gets a string that uniquely identifies this `IRType`. This string is often the same as the |
| 36 | + * result of `IRType.toString()`, but for some types it may be more verbose to ensure uniqueness. |
| 37 | + */ |
| 38 | + string getIdentityString() { result = toString() } |
| 39 | + |
| 40 | + /** |
| 41 | + * Gets the size of the type, in bytes, if known. |
| 42 | + * |
| 43 | + * This will hold for all `IRType` objects except `IRUnknownType`. |
| 44 | + */ |
| 45 | + int getByteSize() { none() } |
| 46 | + |
| 47 | + /** |
| 48 | + * Gets a single instance of `LanguageType` that maps to this `IRType`. |
| 49 | + */ |
| 50 | + Language::LanguageType getCanonicalLanguageType() { none() } |
| 51 | +} |
| 52 | + |
| 53 | +/** |
| 54 | + * An unknown type. Generally used to represent results and operands that access an unknown set of |
| 55 | + * memory locations, such as the side effects of a function call. |
| 56 | + */ |
| 57 | +class IRUnknownType extends IRType, TIRUnknownType { |
| 58 | + final override string toString() { result = "unknown" } |
| 59 | + |
| 60 | + final override int getByteSize() { none() } |
| 61 | + |
| 62 | + final override Language::LanguageType getCanonicalLanguageType() { |
| 63 | + result = Language::getCanonicalUnknownType() |
| 64 | + } |
| 65 | +} |
| 66 | + |
| 67 | +/** |
| 68 | + * A void type, which has no values. Used to represent the result type of an instruction that does |
| 69 | + * not produce a result. |
| 70 | + */ |
| 71 | +class IRVoidType extends IRType, TIRVoidType { |
| 72 | + final override string toString() { result = "void" } |
| 73 | + |
| 74 | + final override int getByteSize() { result = 0 } |
| 75 | + |
| 76 | + final override Language::LanguageType getCanonicalLanguageType() { |
| 77 | + result = Language::getCanonicalVoidType() |
| 78 | + } |
| 79 | +} |
| 80 | + |
| 81 | +/** |
| 82 | + * An error type. Used when an error in the source code prevents the extractor from determining the |
| 83 | + * proper type. |
| 84 | + */ |
| 85 | +class IRErrorType extends IRType, TIRErrorType { |
| 86 | + final override string toString() { result = "error" } |
| 87 | + |
| 88 | + final override int getByteSize() { result = 0 } |
| 89 | + |
| 90 | + final override Language::LanguageType getCanonicalLanguageType() { |
| 91 | + result = Language::getCanonicalErrorType() |
| 92 | + } |
| 93 | +} |
| 94 | + |
| 95 | +private class IRSizedType extends IRType { |
| 96 | + int byteSize; |
| 97 | + |
| 98 | + IRSizedType() { |
| 99 | + this = TIRBooleanType(byteSize) or |
| 100 | + this = TIRSignedIntegerType(byteSize) or |
| 101 | + this = TIRUnsignedIntegerType(byteSize) or |
| 102 | + this = TIRFloatingPointType(byteSize) or |
| 103 | + this = TIRAddressType(byteSize) or |
| 104 | + this = TIRFunctionAddressType(byteSize) or |
| 105 | + this = TIROpaqueType(_, byteSize) |
| 106 | + } |
| 107 | + |
| 108 | + final override int getByteSize() { result = byteSize } |
| 109 | +} |
| 110 | + |
| 111 | +/** |
| 112 | + * A Boolean type, which can hold the values `true` (non-zero) or `false` (zero). |
| 113 | + */ |
| 114 | +class IRBooleanType extends IRSizedType, TIRBooleanType { |
| 115 | + final override string toString() { result = "bool" + byteSize.toString() } |
| 116 | + |
| 117 | + final override Language::LanguageType getCanonicalLanguageType() { |
| 118 | + result = Language::getCanonicalBooleanType(byteSize) |
| 119 | + } |
| 120 | +} |
| 121 | + |
| 122 | +/** |
| 123 | + * A numberic type. This includes `IRSignedIntegerType`, `IRUnsignedIntegerType`, and |
| 124 | + * `IRFloatingPointType`. |
| 125 | + */ |
| 126 | +class IRNumericType extends IRSizedType { |
| 127 | + IRNumericType() { |
| 128 | + this = TIRSignedIntegerType(byteSize) or |
| 129 | + this = TIRUnsignedIntegerType(byteSize) or |
| 130 | + this = TIRFloatingPointType(byteSize) |
| 131 | + } |
| 132 | +} |
| 133 | + |
| 134 | +/** |
| 135 | + * A signed two's-complement integer. Also used to represent enums whose underlying type is a signed |
| 136 | + * integer, as well as character types whose representation is signed. |
| 137 | + */ |
| 138 | +class IRSignedIntegerType extends IRNumericType, TIRSignedIntegerType { |
| 139 | + final override string toString() { result = "int" + byteSize.toString() } |
| 140 | + |
| 141 | + final override Language::LanguageType getCanonicalLanguageType() { |
| 142 | + result = Language::getCanonicalSignedIntegerType(byteSize) |
| 143 | + } |
| 144 | +} |
| 145 | + |
| 146 | +/** |
| 147 | + * An unsigned two's-complement integer. Also used to represent enums whose underlying type is an |
| 148 | + * unsigned integer, as well as character types whose representation is unsigned. |
| 149 | + */ |
| 150 | +class IRUnsignedIntegerType extends IRNumericType, TIRUnsignedIntegerType { |
| 151 | + final override string toString() { result = "uint" + byteSize.toString() } |
| 152 | + |
| 153 | + final override Language::LanguageType getCanonicalLanguageType() { |
| 154 | + result = Language::getCanonicalUnsignedIntegerType(byteSize) |
| 155 | + } |
| 156 | +} |
| 157 | + |
| 158 | +/** |
| 159 | + * A floating-point type. |
| 160 | + */ |
| 161 | +class IRFloatingPointType extends IRNumericType, TIRFloatingPointType { |
| 162 | + final override string toString() { result = "float" + byteSize.toString() } |
| 163 | + |
| 164 | + final override Language::LanguageType getCanonicalLanguageType() { |
| 165 | + result = Language::getCanonicalFloatingPointType(byteSize) |
| 166 | + } |
| 167 | +} |
| 168 | + |
| 169 | +/** |
| 170 | + * An address type, representing the memory address of data. Used to represent pointers, references, |
| 171 | + * and lvalues, include those that are garbage collected. |
| 172 | + * |
| 173 | + * The address of a function is represented by the separate `IRFunctionAddressType`. |
| 174 | + */ |
| 175 | +class IRAddressType extends IRSizedType, TIRAddressType { |
| 176 | + final override string toString() { result = "addr" + byteSize.toString() } |
| 177 | + |
| 178 | + final override Language::LanguageType getCanonicalLanguageType() { |
| 179 | + result = Language::getCanonicalAddressType(byteSize) |
| 180 | + } |
| 181 | +} |
| 182 | + |
| 183 | +/** |
| 184 | + * An address type, representing the memory address of code. Used to represent function pointers, |
| 185 | + * function references, and the target of a direct function call. |
| 186 | + */ |
| 187 | +class IRFunctionAddressType extends IRSizedType, TIRFunctionAddressType { |
| 188 | + final override string toString() { result = "func" + byteSize.toString() } |
| 189 | + |
| 190 | + final override Language::LanguageType getCanonicalLanguageType() { |
| 191 | + result = Language::getCanonicalFunctionAddressType(byteSize) |
| 192 | + } |
| 193 | +} |
| 194 | + |
| 195 | +/** |
| 196 | + * A type with known size that does not fit any of the other kinds of type. Used to represent |
| 197 | + * classes, structs, unions, fixed-size arrays, pointers-to-member, and more. |
| 198 | + */ |
| 199 | +class IROpaqueType extends IRSizedType, TIROpaqueType { |
| 200 | + Language::OpaqueTypeTag tag; |
| 201 | + |
| 202 | + IROpaqueType() { this = TIROpaqueType(tag, byteSize) } |
| 203 | + |
| 204 | + final override string toString() { |
| 205 | + result = "opaque" + byteSize.toString() + "{" + tag.toString() + "}" |
| 206 | + } |
| 207 | + |
| 208 | + final override string getIdentityString() { |
| 209 | + result = "opaque" + byteSize.toString() + "{" + Language::getOpaqueTagIdentityString(tag) + "}" |
| 210 | + } |
| 211 | + |
| 212 | + final override Language::LanguageType getCanonicalLanguageType() { |
| 213 | + result = Language::getCanonicalOpaqueType(tag, byteSize) |
| 214 | + } |
| 215 | + |
| 216 | + /** |
| 217 | + * Gets the "tag" that differentiates this type from other incompatible opaque types that have the |
| 218 | + * same size. |
| 219 | + */ |
| 220 | + final Language::OpaqueTypeTag getTag() { result = tag } |
| 221 | +} |
| 222 | + |
| 223 | +module IRTypeSanity { |
| 224 | + query predicate missingCanonicalLanguageType(IRType type, string message) { |
| 225 | + not exists(type.getCanonicalLanguageType()) and |
| 226 | + message = "Type does not have a canonical `LanguageType`" |
| 227 | + } |
| 228 | + |
| 229 | + query predicate multipleCanonicalLanguageTypes(IRType type, string message) { |
| 230 | + strictcount(type.getCanonicalLanguageType()) > 1 and |
| 231 | + message = "Type has multiple canonical `LanguageType`s: " + |
| 232 | + concat(type.getCanonicalLanguageType().toString(), ", ") |
| 233 | + } |
| 234 | + |
| 235 | + query predicate missingIRType(Language::LanguageType type, string message) { |
| 236 | + not exists(type.getIRType()) and |
| 237 | + message = "`LanguageType` does not have a corresponding `IRType`." |
| 238 | + } |
| 239 | + |
| 240 | + query predicate multipleIRTypes(Language::LanguageType type, string message) { |
| 241 | + strictcount(type.getIRType()) > 1 and |
| 242 | + message = "`LanguageType` " + type.getAQlClass() + " has multiple `IRType`s: " + |
| 243 | + concat(type.getIRType().toString(), ", ") |
| 244 | + } |
| 245 | + |
| 246 | + import Language::LanguageTypeSanity |
| 247 | +} |
0 commit comments