diff --git a/rust/ql/lib/codeql/rust/elements/LiteralExprExt.qll b/rust/ql/lib/codeql/rust/elements/LiteralExprExt.qll new file mode 100644 index 000000000000..e86d45c71120 --- /dev/null +++ b/rust/ql/lib/codeql/rust/elements/LiteralExprExt.qll @@ -0,0 +1,15 @@ +/** Provides sub classes of literal expressions. */ + +private import internal.LiteralExprImpl + +final class CharLiteralExpr = Impl::CharLiteralExpr; + +final class StringLiteralExpr = Impl::StringLiteralExpr; + +final class NumberLiteralExpr = Impl::NumberLiteralExpr; + +final class IntegerLiteralExpr = Impl::IntegerLiteralExpr; + +final class FloatLiteralExpr = Impl::FloatLiteralExpr; + +final class BooleanLiteralExpr = Impl::BooleanLiteralExpr; diff --git a/rust/ql/lib/codeql/rust/elements/internal/LiteralExprImpl.qll b/rust/ql/lib/codeql/rust/elements/internal/LiteralExprImpl.qll index 6a8d02d9380e..f848663a99bb 100644 --- a/rust/ql/lib/codeql/rust/elements/internal/LiteralExprImpl.qll +++ b/rust/ql/lib/codeql/rust/elements/internal/LiteralExprImpl.qll @@ -42,4 +42,182 @@ module Impl { ) } } + + /** + * A [character literal][1]. For example: + * + * ```rust + * 'x'; + * ``` + * + * [1]: https://doc.rust-lang.org/reference/tokens.html#character-literals + */ + class CharLiteralExpr extends LiteralExpr { + CharLiteralExpr() { this.getTextValue().regexpMatch("'.*'") } + + override string getAPrimaryQlClass() { result = "CharLiteralExpr" } + } + + /** + * A [string literal][1]. For example: + * + * ```rust + * "Hello, world!"; + * ``` + * + * [1]: https://doc.rust-lang.org/reference/tokens.html#string-literals + */ + class StringLiteralExpr extends LiteralExpr { + StringLiteralExpr() { this.getTextValue().regexpMatch("r?#*\".*\"#*") } + + override string getAPrimaryQlClass() { result = "StringLiteralExpr" } + } + + /** + * A number literal. + */ + abstract class NumberLiteralExpr extends LiteralExpr { } + + // https://doc.rust-lang.org/reference/tokens.html#integer-literals + private module IntegerLiteralRegexs { + bindingset[s] + string paren(string s) { result = "(?:" + s + ")" } + + string integerLiteral() { + result = + paren(paren(decLiteral()) + "|" + paren(binLiteral()) + "|" + paren(octLiteral()) + "|" + + paren(hexLiteral())) + "(" + suffix() + ")?" + } + + private string suffix() { result = "u8|i8|u16|i16|u32|i32|u64|i64|u128|i128|usize|isize" } + + string decLiteral() { result = decDigit() + "(?:" + decDigit() + "|_)*" } + + string binLiteral() { + result = "0b(?:" + binDigit() + "|_)*" + binDigit() + "(?:" + binDigit() + "|_)*" + } + + string octLiteral() { + result = "0o(?:" + octDigit() + "|_)*" + octDigit() + "(?:" + octDigit() + "|_)*" + } + + string hexLiteral() { + result = "0x(?:" + hexDigit() + "|_)*" + hexDigit() + "(?:" + hexDigit() + "|_)*" + } + + string decDigit() { result = "[0-9]" } + + string binDigit() { result = "[01]" } + + string octDigit() { result = "[0-7]" } + + string hexDigit() { result = "[0-9a-fA-F]" } + } + + /** + * An [integer literal][1]. For example: + * + * ```rust + * 42; + * ``` + * + * [1]: https://doc.rust-lang.org/reference/tokens.html#integer-literals + */ + class IntegerLiteralExpr extends NumberLiteralExpr { + IntegerLiteralExpr() { this.getTextValue().regexpMatch(IntegerLiteralRegexs::integerLiteral()) } + + /** + * Get the suffix of this integer literal, if any. + * + * For example, `42u8` has the suffix `u8`. + */ + string getSuffix() { + exists(string s, string reg | + s = this.getTextValue() and + reg = IntegerLiteralRegexs::integerLiteral() and + result = s.regexpCapture(reg, 1) + ) + } + + override string getAPrimaryQlClass() { result = "IntegerLiteralExpr" } + } + + // https://doc.rust-lang.org/reference/tokens.html#floating-point-literals + private module FloatLiteralRegexs { + private import IntegerLiteralRegexs + + string floatLiteral() { + result = + paren(decLiteral() + "\\.") + "|" + paren(floatLiteralSuffix1()) + "|" + + paren(floatLiteralSuffix2()) + } + + string floatLiteralSuffix1() { + result = decLiteral() + "\\." + decLiteral() + "(" + suffix() + ")?" + } + + string floatLiteralSuffix2() { + result = + decLiteral() + paren("\\." + decLiteral()) + "?" + paren(exponent()) + "(" + suffix() + ")?" + } + + string integerSuffixLiteral() { + result = + paren(paren(decLiteral()) + "|" + paren(binLiteral()) + "|" + paren(octLiteral()) + "|" + + paren(hexLiteral())) + "(" + suffix() + ")" + } + + private string suffix() { result = "f32|f64" } + + string exponent() { + result = + "(?:e|E)(?:\\+|-)?(?:" + decDigit() + "|_)*" + decDigit() + "(?:" + decDigit() + "|_)*" + } + } + + /** + * A [floating-point literal][1]. For example: + * + * ```rust + * 42.0; + * ``` + * + * [1]: https://doc.rust-lang.org/reference/tokens.html#floating-point-literals + */ + class FloatLiteralExpr extends NumberLiteralExpr { + FloatLiteralExpr() { + this.getTextValue() + .regexpMatch(IntegerLiteralRegexs::paren(FloatLiteralRegexs::floatLiteral()) + "|" + + IntegerLiteralRegexs::paren(FloatLiteralRegexs::integerSuffixLiteral())) and + // E.g. `0x01_f32` is an integer, not a float + not this instanceof IntegerLiteralExpr + } + + /** + * Get the suffix of this floating-point literal, if any. + * + * For example, `42.0f32` has the suffix `f32`. + */ + string getSuffix() { + exists(string s, string reg | + reg = + IntegerLiteralRegexs::paren(FloatLiteralRegexs::floatLiteralSuffix1()) + "|" + + IntegerLiteralRegexs::paren(FloatLiteralRegexs::floatLiteralSuffix2()) + "|" + + IntegerLiteralRegexs::paren(FloatLiteralRegexs::integerSuffixLiteral()) and + s = this.getTextValue() and + result = s.regexpCapture(reg, [1, 2, 3]) + ) + } + + override string getAPrimaryQlClass() { result = "FloatLiteralExpr" } + } + + /** + * A Boolean literal. Either `true` or `false`. + */ + class BooleanLiteralExpr extends LiteralExpr { + BooleanLiteralExpr() { this.getTextValue() = ["false", "true"] } + + override string getAPrimaryQlClass() { result = "BooleanLiteralExpr" } + } } diff --git a/rust/ql/lib/rust.qll b/rust/ql/lib/rust.qll index ff97a37b5d1f..b64f37533aa1 100644 --- a/rust/ql/lib/rust.qll +++ b/rust/ql/lib/rust.qll @@ -4,6 +4,7 @@ import codeql.rust.elements import codeql.Locations import codeql.files.FileSystem import codeql.rust.elements.AssignmentOperation +import codeql.rust.elements.LiteralExprExt import codeql.rust.elements.LogicalOperation import codeql.rust.elements.AsyncBlockExpr import codeql.rust.elements.Variable diff --git a/rust/ql/test/extractor-tests/literal/literal.expected b/rust/ql/test/extractor-tests/literal/literal.expected new file mode 100644 index 000000000000..d229d9684671 --- /dev/null +++ b/rust/ql/test/extractor-tests/literal/literal.expected @@ -0,0 +1,45 @@ +charLiteral +| literal.rs:2:5:2:7 | 'a' | +| literal.rs:3:5:3:7 | 'b' | +| literal.rs:4:5:4:8 | '\\'' | +| literal.rs:5:5:5:8 | '\\n' | +| literal.rs:6:5:6:15 | '\\u{1F600}' | +stringLiteral +| literal.rs:11:5:11:9 | "foo" | +| literal.rs:12:5:12:10 | r"foo" | +| literal.rs:13:5:13:13 | "\\"foo\\"" | +| literal.rs:14:5:14:14 | r#""foo""# | +| literal.rs:16:5:16:18 | "foo #\\"# bar" | +| literal.rs:17:5:17:22 | r##"foo #"# bar"## | +| literal.rs:19:5:19:10 | "\\x52" | +| literal.rs:20:5:20:7 | "R" | +| literal.rs:21:5:21:8 | r"R" | +| literal.rs:22:5:22:11 | "\\\\x52" | +| literal.rs:23:5:23:11 | r"\\x52" | +integerLiteral +| literal.rs:28:5:28:7 | 123 | | +| literal.rs:29:5:29:10 | 123i32 | i32 | +| literal.rs:30:5:30:10 | 123u32 | u32 | +| literal.rs:31:5:31:11 | 123_u32 | u32 | +| literal.rs:33:5:33:8 | 0xff | | +| literal.rs:34:5:34:11 | 0xff_u8 | u8 | +| literal.rs:35:5:35:12 | 0x01_f32 | | +| literal.rs:36:5:36:11 | 0x01_e3 | | +| literal.rs:38:5:38:8 | 0o70 | | +| literal.rs:39:5:39:12 | 0o70_i16 | i16 | +| literal.rs:41:5:41:25 | 0b1111_1111_1001_0000 | | +| literal.rs:42:5:42:28 | 0b1111_1111_1001_0000i64 | i64 | +| literal.rs:43:5:43:15 | 0b________1 | | +| literal.rs:45:5:45:10 | 0usize | usize | +| literal.rs:48:5:49:10 | 128_i8 | i8 | +| literal.rs:50:5:51:10 | 256_u8 | u8 | +floatLiteral +| literal.rs:56:5:56:8 | 5f32 | f32 | +| literal.rs:58:5:58:12 | 123.0f64 | f64 | +| literal.rs:59:5:59:10 | 0.1f64 | f64 | +| literal.rs:60:5:60:10 | 0.1f32 | f32 | +| literal.rs:61:5:61:14 | 12E+99_f64 | f64 | +| literal.rs:62:18:62:19 | 2. | | +booleanLiteral +| literal.rs:66:5:66:8 | true | +| literal.rs:67:5:67:9 | false | diff --git a/rust/ql/test/extractor-tests/literal/literal.ql b/rust/ql/test/extractor-tests/literal/literal.ql new file mode 100644 index 000000000000..3585ad2f5b91 --- /dev/null +++ b/rust/ql/test/extractor-tests/literal/literal.ql @@ -0,0 +1,13 @@ +import rust + +query predicate charLiteral(CharLiteralExpr e) { any() } + +query predicate stringLiteral(StringLiteralExpr e) { any() } + +query predicate integerLiteral(IntegerLiteralExpr e, string suffix) { + suffix = concat(e.getSuffix()) +} + +query predicate floatLiteral(FloatLiteralExpr e, string suffix) { suffix = concat(e.getSuffix()) } + +query predicate booleanLiteral(BooleanLiteralExpr e) { any() } diff --git a/rust/ql/test/extractor-tests/literal/literal.rs b/rust/ql/test/extractor-tests/literal/literal.rs new file mode 100644 index 000000000000..ea4ccdece630 --- /dev/null +++ b/rust/ql/test/extractor-tests/literal/literal.rs @@ -0,0 +1,68 @@ +fn char_literals() { + 'a'; + 'b'; + '\''; + '\n'; + '\u{1F600}'; +} + +fn string_literals() { + // from https://doc.rust-lang.org/reference/tokens.html#string-literals + "foo"; + r"foo"; // foo + "\"foo\""; + r#""foo""#; // "foo" + + "foo #\"# bar"; + r##"foo #"# bar"##; // foo #"# bar + + "\x52"; + "R"; + r"R"; // R + "\\x52"; + r"\x52"; // \x52 +} + +fn integer_literals() { + // from https://doc.rust-lang.org/reference/tokens.html#integer-literals + 123; + 123i32; + 123u32; + 123_u32; + + 0xff; + 0xff_u8; + 0x01_f32; // integer 7986, not floating-point 1.0 + 0x01_e3; // integer 483, not floating-point 1000.0 + + 0o70; + 0o70_i16; + + 0b1111_1111_1001_0000; + 0b1111_1111_1001_0000i64; + 0b________1; + + 0usize; + + // These are too big for their type, but are accepted as literal expressions. + #[allow(overflowing_literals)] + 128_i8; + #[allow(overflowing_literals)] + 256_u8; +} + +fn float_literals() { + // This is an integer literal, accepted as a floating-point literal expression. + 5f32; + + 123.0f64; + 0.1f64; + 0.1f32; + 12E+99_f64; + let x: f64 = 2.; +} + +fn boolean_literals() { + true; + false; +}