From cd01bd0e0747584f1074f55e94dbdbf2ffe76998 Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Mon, 12 May 2025 20:19:47 +0200 Subject: [PATCH 1/3] Rust: Add `LiteralExpr` sub classes --- .../codeql/rust/elements/LiteralExprExt.qll | 15 ++ .../elements/internal/LiteralExprImpl.qll | 187 ++++++++++++++++++ rust/ql/lib/rust.qll | 1 + .../extractor-tests/literal/literal.expected | 42 ++++ .../test/extractor-tests/literal/literal.ql | 13 ++ .../test/extractor-tests/literal/literal.rs | 65 ++++++ 6 files changed, 323 insertions(+) create mode 100644 rust/ql/lib/codeql/rust/elements/LiteralExprExt.qll create mode 100644 rust/ql/test/extractor-tests/literal/literal.expected create mode 100644 rust/ql/test/extractor-tests/literal/literal.ql create mode 100644 rust/ql/test/extractor-tests/literal/literal.rs diff --git a/rust/ql/lib/codeql/rust/elements/LiteralExprExt.qll b/rust/ql/lib/codeql/rust/elements/LiteralExprExt.qll new file mode 100644 index 000000000000..e86d45c71120 --- /dev/null +++ b/rust/ql/lib/codeql/rust/elements/LiteralExprExt.qll @@ -0,0 +1,15 @@ +/** Provides sub classes of literal expressions. */ + +private import internal.LiteralExprImpl + +final class CharLiteralExpr = Impl::CharLiteralExpr; + +final class StringLiteralExpr = Impl::StringLiteralExpr; + +final class NumberLiteralExpr = Impl::NumberLiteralExpr; + +final class IntegerLiteralExpr = Impl::IntegerLiteralExpr; + +final class FloatLiteralExpr = Impl::FloatLiteralExpr; + +final class BooleanLiteralExpr = Impl::BooleanLiteralExpr; diff --git a/rust/ql/lib/codeql/rust/elements/internal/LiteralExprImpl.qll b/rust/ql/lib/codeql/rust/elements/internal/LiteralExprImpl.qll index 6a8d02d9380e..c424ff5471da 100644 --- a/rust/ql/lib/codeql/rust/elements/internal/LiteralExprImpl.qll +++ b/rust/ql/lib/codeql/rust/elements/internal/LiteralExprImpl.qll @@ -42,4 +42,191 @@ module Impl { ) } } + + /** + * A [character literal][1]. For example: + * + * ```rust + * 'x'; + * ``` + * + * [1]: https://doc.rust-lang.org/reference/tokens.html#character-literals + */ + class CharLiteralExpr extends LiteralExpr { + CharLiteralExpr() { + // todo: proper implementation + this.getTextValue().regexpMatch("'.*'") + } + + override string getAPrimaryQlClass() { result = "CharLiteralExpr" } + } + + /** + * A [string literal][1]. For example: + * + * ```rust + * "Hello, world!"; + * ``` + * + * [1]: https://doc.rust-lang.org/reference/tokens.html#string-literals + */ + class StringLiteralExpr extends LiteralExpr { + StringLiteralExpr() { + // todo: proper implementation + this.getTextValue().regexpMatch("r?#*\".*\"#*") + } + + override string getAPrimaryQlClass() { result = "StringLiteralExpr" } + } + + /** + * A number literal. + */ + abstract class NumberLiteralExpr extends LiteralExpr { } + + // https://doc.rust-lang.org/reference/tokens.html#integer-literals + private module IntegerLiteralRegexs { + bindingset[s] + string paren(string s) { result = "(" + s + ")" } + + string integerLiteral() { + result = + paren(paren(decLiteral()) + "|" + paren(binLiteral()) + "|" + paren(octLiteral()) + "|" + + paren(hexLiteral())) + paren(suffix()) + "?" + } + + private string suffix() { result = "u8|i8|u16|i16|u32|i32|u64|i64|u128|i128|usize|isize" } + + string decLiteral() { result = decDigit() + "(" + decDigit() + "|_)*" } + + string binLiteral() { + result = "0b(" + binDigit() + "|_)*" + binDigit() + "(" + binDigit() + "|_)*" + } + + string octLiteral() { + result = "0o(" + octDigit() + "|_)*" + octDigit() + "(" + octDigit() + "|_)*" + } + + string hexLiteral() { + result = "0x(" + hexDigit() + "|_)*" + hexDigit() + "(" + hexDigit() + "|_)*" + } + + string decDigit() { result = "[0-9]" } + + string binDigit() { result = "[01]" } + + string octDigit() { result = "[0-7]" } + + string hexDigit() { result = "[0-9a-fA-F]" } + } + + /** + * An [integer literal][1]. For example: + * + * ```rust + * 42; + * ``` + * + * [1]: https://doc.rust-lang.org/reference/tokens.html#integer-literals + */ + class IntegerLiteralExpr extends NumberLiteralExpr { + IntegerLiteralExpr() { this.getTextValue().regexpMatch(IntegerLiteralRegexs::integerLiteral()) } + + /** + * Get the suffix of this integer literal, if any. + * + * For example, `42u8` has the suffix `u8`. + */ + string getSuffix() { + exists(string s, string reg, int last | + s = this.getTextValue() and + reg = IntegerLiteralRegexs::integerLiteral() and + last = strictcount(reg.indexOf("(")) and + result = s.regexpCapture(reg, last) + ) + } + + override string getAPrimaryQlClass() { result = "IntegerLiteralExpr" } + } + + // https://doc.rust-lang.org/reference/tokens.html#floating-point-literals + private module FloatLiteralRegexs { + private import IntegerLiteralRegexs + + string floatLiteral() { + result = + paren(decLiteral() + "\\.") + "|" + paren(floatLiteralSuffix1()) + "|" + + paren(floatLiteralSuffix2()) + } + + string floatLiteralSuffix1() { + result = decLiteral() + "\\." + decLiteral() + paren(suffix()) + "?" + } + + string floatLiteralSuffix2() { + result = + decLiteral() + paren("\\." + decLiteral()) + "?" + paren(exponent()) + paren(suffix()) + "?" + } + + string integerSuffixLiteral() { + result = + paren(paren(decLiteral()) + "|" + paren(binLiteral()) + "|" + paren(octLiteral()) + "|" + + paren(hexLiteral())) + paren(suffix()) + } + + private string suffix() { result = "f32|f64" } + + string exponent() { + result = "(e|E)(\\+|-)?(" + decDigit() + "|_)*" + decDigit() + "(" + decDigit() + "|_)*" + } + } + + /** + * A [floating-point literal][1]. For example: + * + * ```rust + * 42.0; + * ``` + * + * [1]: https://doc.rust-lang.org/reference/tokens.html#floating-point-literals + */ + class FloatLiteralExpr extends NumberLiteralExpr { + FloatLiteralExpr() { + this.getTextValue() + .regexpMatch([ + FloatLiteralRegexs::floatLiteral(), FloatLiteralRegexs::integerSuffixLiteral() + ]) and + // E.g. `0x01_f32` is an integer, not a float + not this instanceof IntegerLiteralExpr + } + + /** + * Get the suffix of this floating-point literal, if any. + * + * For example, `42.0f32` has the suffix `f32`. + */ + string getSuffix() { + exists(string s, string reg, int last | + s = this.getTextValue() and + reg = + [ + FloatLiteralRegexs::floatLiteralSuffix1(), FloatLiteralRegexs::floatLiteralSuffix2(), + FloatLiteralRegexs::integerSuffixLiteral() + ] and + last = strictcount(reg.indexOf("(")) and + result = s.regexpCapture(reg, last) + ) + } + + override string getAPrimaryQlClass() { result = "FloatLiteralExpr" } + } + + /** + * A Boolean literal. Either `true` or `false`. + */ + class BooleanLiteralExpr extends LiteralExpr { + BooleanLiteralExpr() { this.getTextValue() = ["false", "true"] } + + override string getAPrimaryQlClass() { result = "BooleanLiteralExpr" } + } } diff --git a/rust/ql/lib/rust.qll b/rust/ql/lib/rust.qll index ff97a37b5d1f..b64f37533aa1 100644 --- a/rust/ql/lib/rust.qll +++ b/rust/ql/lib/rust.qll @@ -4,6 +4,7 @@ import codeql.rust.elements import codeql.Locations import codeql.files.FileSystem import codeql.rust.elements.AssignmentOperation +import codeql.rust.elements.LiteralExprExt import codeql.rust.elements.LogicalOperation import codeql.rust.elements.AsyncBlockExpr import codeql.rust.elements.Variable diff --git a/rust/ql/test/extractor-tests/literal/literal.expected b/rust/ql/test/extractor-tests/literal/literal.expected new file mode 100644 index 000000000000..55d1ac11013c --- /dev/null +++ b/rust/ql/test/extractor-tests/literal/literal.expected @@ -0,0 +1,42 @@ +charLiteral +| literal.rs:2:5:2:7 | 'a' | +| literal.rs:3:5:3:7 | 'b' | +stringLiteral +| literal.rs:8:5:8:9 | "foo" | +| literal.rs:9:5:9:10 | r"foo" | +| literal.rs:10:5:10:13 | "\\"foo\\"" | +| literal.rs:11:5:11:14 | r#""foo""# | +| literal.rs:13:5:13:18 | "foo #\\"# bar" | +| literal.rs:14:5:14:22 | r##"foo #"# bar"## | +| literal.rs:16:5:16:10 | "\\x52" | +| literal.rs:17:5:17:7 | "R" | +| literal.rs:18:5:18:8 | r"R" | +| literal.rs:19:5:19:11 | "\\\\x52" | +| literal.rs:20:5:20:11 | r"\\x52" | +integerLiteral +| literal.rs:25:5:25:7 | 123 | | +| literal.rs:26:5:26:10 | 123i32 | i32 | +| literal.rs:27:5:27:10 | 123u32 | u32 | +| literal.rs:28:5:28:11 | 123_u32 | u32 | +| literal.rs:30:5:30:8 | 0xff | | +| literal.rs:31:5:31:11 | 0xff_u8 | u8 | +| literal.rs:32:5:32:12 | 0x01_f32 | | +| literal.rs:33:5:33:11 | 0x01_e3 | | +| literal.rs:35:5:35:8 | 0o70 | | +| literal.rs:36:5:36:12 | 0o70_i16 | i16 | +| literal.rs:38:5:38:25 | 0b1111_1111_1001_0000 | | +| literal.rs:39:5:39:28 | 0b1111_1111_1001_0000i64 | i64 | +| literal.rs:40:5:40:15 | 0b________1 | | +| literal.rs:42:5:42:10 | 0usize | usize | +| literal.rs:45:5:46:10 | 128_i8 | i8 | +| literal.rs:47:5:48:10 | 256_u8 | u8 | +floatLiteral +| literal.rs:53:5:53:8 | 5f32 | f32 | +| literal.rs:55:5:55:12 | 123.0f64 | f64 | +| literal.rs:56:5:56:10 | 0.1f64 | f64 | +| literal.rs:57:5:57:10 | 0.1f32 | f32 | +| literal.rs:58:5:58:14 | 12E+99_f64 | f64 | +| literal.rs:59:18:59:19 | 2. | | +booleanLiteral +| literal.rs:63:5:63:8 | true | +| literal.rs:64:5:64:9 | false | diff --git a/rust/ql/test/extractor-tests/literal/literal.ql b/rust/ql/test/extractor-tests/literal/literal.ql new file mode 100644 index 000000000000..3585ad2f5b91 --- /dev/null +++ b/rust/ql/test/extractor-tests/literal/literal.ql @@ -0,0 +1,13 @@ +import rust + +query predicate charLiteral(CharLiteralExpr e) { any() } + +query predicate stringLiteral(StringLiteralExpr e) { any() } + +query predicate integerLiteral(IntegerLiteralExpr e, string suffix) { + suffix = concat(e.getSuffix()) +} + +query predicate floatLiteral(FloatLiteralExpr e, string suffix) { suffix = concat(e.getSuffix()) } + +query predicate booleanLiteral(BooleanLiteralExpr e) { any() } diff --git a/rust/ql/test/extractor-tests/literal/literal.rs b/rust/ql/test/extractor-tests/literal/literal.rs new file mode 100644 index 000000000000..74503f2a24e4 --- /dev/null +++ b/rust/ql/test/extractor-tests/literal/literal.rs @@ -0,0 +1,65 @@ +fn char_literals() { + 'a'; + 'b'; +} + +fn string_literals() { + // from https://doc.rust-lang.org/reference/tokens.html#string-literals + "foo"; + r"foo"; // foo + "\"foo\""; + r#""foo""#; // "foo" + + "foo #\"# bar"; + r##"foo #"# bar"##; // foo #"# bar + + "\x52"; + "R"; + r"R"; // R + "\\x52"; + r"\x52"; // \x52 +} + +fn integer_literals() { + // from https://doc.rust-lang.org/reference/tokens.html#integer-literals + 123; + 123i32; + 123u32; + 123_u32; + + 0xff; + 0xff_u8; + 0x01_f32; // integer 7986, not floating-point 1.0 + 0x01_e3; // integer 483, not floating-point 1000.0 + + 0o70; + 0o70_i16; + + 0b1111_1111_1001_0000; + 0b1111_1111_1001_0000i64; + 0b________1; + + 0usize; + + // These are too big for their type, but are accepted as literal expressions. + #[allow(overflowing_literals)] + 128_i8; + #[allow(overflowing_literals)] + 256_u8; +} + +fn float_literals() { + // This is an integer literal, accepted as a floating-point literal expression. + 5f32; + + 123.0f64; + 0.1f64; + 0.1f32; + 12E+99_f64; + let x: f64 = 2.; +} + +fn boolean_literals() { + true; + false; +} From 7494eac35c4050563903d4426c10844fc28e7919 Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Tue, 13 May 2025 12:47:49 +0200 Subject: [PATCH 2/3] Address review comments --- .../elements/internal/LiteralExprImpl.qll | 39 +++++----- .../extractor-tests/literal/literal.expected | 73 ++++++++++--------- .../test/extractor-tests/literal/literal.rs | 3 + 3 files changed, 58 insertions(+), 57 deletions(-) diff --git a/rust/ql/lib/codeql/rust/elements/internal/LiteralExprImpl.qll b/rust/ql/lib/codeql/rust/elements/internal/LiteralExprImpl.qll index c424ff5471da..2a2a754e8899 100644 --- a/rust/ql/lib/codeql/rust/elements/internal/LiteralExprImpl.qll +++ b/rust/ql/lib/codeql/rust/elements/internal/LiteralExprImpl.qll @@ -53,10 +53,7 @@ module Impl { * [1]: https://doc.rust-lang.org/reference/tokens.html#character-literals */ class CharLiteralExpr extends LiteralExpr { - CharLiteralExpr() { - // todo: proper implementation - this.getTextValue().regexpMatch("'.*'") - } + CharLiteralExpr() { this.getTextValue().regexpMatch("'.*'") } override string getAPrimaryQlClass() { result = "CharLiteralExpr" } } @@ -71,10 +68,7 @@ module Impl { * [1]: https://doc.rust-lang.org/reference/tokens.html#string-literals */ class StringLiteralExpr extends LiteralExpr { - StringLiteralExpr() { - // todo: proper implementation - this.getTextValue().regexpMatch("r?#*\".*\"#*") - } + StringLiteralExpr() { this.getTextValue().regexpMatch("r?#*\".*\"#*") } override string getAPrimaryQlClass() { result = "StringLiteralExpr" } } @@ -138,11 +132,10 @@ module Impl { * For example, `42u8` has the suffix `u8`. */ string getSuffix() { - exists(string s, string reg, int last | + exists(string s, string reg | s = this.getTextValue() and reg = IntegerLiteralRegexs::integerLiteral() and - last = strictcount(reg.indexOf("(")) and - result = s.regexpCapture(reg, last) + result = s.regexpCapture(reg, 13) ) } @@ -193,9 +186,8 @@ module Impl { class FloatLiteralExpr extends NumberLiteralExpr { FloatLiteralExpr() { this.getTextValue() - .regexpMatch([ - FloatLiteralRegexs::floatLiteral(), FloatLiteralRegexs::integerSuffixLiteral() - ]) and + .regexpMatch(IntegerLiteralRegexs::paren(FloatLiteralRegexs::floatLiteral()) + "|" + + IntegerLiteralRegexs::paren(FloatLiteralRegexs::integerSuffixLiteral())) and // E.g. `0x01_f32` is an integer, not a float not this instanceof IntegerLiteralExpr } @@ -206,15 +198,18 @@ module Impl { * For example, `42.0f32` has the suffix `f32`. */ string getSuffix() { - exists(string s, string reg, int last | + exists(string s, string reg, int group | + reg = FloatLiteralRegexs::floatLiteralSuffix1() and + group = 3 + or + reg = FloatLiteralRegexs::floatLiteralSuffix2() and + group = 9 + or + reg = FloatLiteralRegexs::integerSuffixLiteral() and + group = 13 + | s = this.getTextValue() and - reg = - [ - FloatLiteralRegexs::floatLiteralSuffix1(), FloatLiteralRegexs::floatLiteralSuffix2(), - FloatLiteralRegexs::integerSuffixLiteral() - ] and - last = strictcount(reg.indexOf("(")) and - result = s.regexpCapture(reg, last) + result = s.regexpCapture(reg, group) ) } diff --git a/rust/ql/test/extractor-tests/literal/literal.expected b/rust/ql/test/extractor-tests/literal/literal.expected index 55d1ac11013c..d229d9684671 100644 --- a/rust/ql/test/extractor-tests/literal/literal.expected +++ b/rust/ql/test/extractor-tests/literal/literal.expected @@ -1,42 +1,45 @@ charLiteral | literal.rs:2:5:2:7 | 'a' | | literal.rs:3:5:3:7 | 'b' | +| literal.rs:4:5:4:8 | '\\'' | +| literal.rs:5:5:5:8 | '\\n' | +| literal.rs:6:5:6:15 | '\\u{1F600}' | stringLiteral -| literal.rs:8:5:8:9 | "foo" | -| literal.rs:9:5:9:10 | r"foo" | -| literal.rs:10:5:10:13 | "\\"foo\\"" | -| literal.rs:11:5:11:14 | r#""foo""# | -| literal.rs:13:5:13:18 | "foo #\\"# bar" | -| literal.rs:14:5:14:22 | r##"foo #"# bar"## | -| literal.rs:16:5:16:10 | "\\x52" | -| literal.rs:17:5:17:7 | "R" | -| literal.rs:18:5:18:8 | r"R" | -| literal.rs:19:5:19:11 | "\\\\x52" | -| literal.rs:20:5:20:11 | r"\\x52" | +| literal.rs:11:5:11:9 | "foo" | +| literal.rs:12:5:12:10 | r"foo" | +| literal.rs:13:5:13:13 | "\\"foo\\"" | +| literal.rs:14:5:14:14 | r#""foo""# | +| literal.rs:16:5:16:18 | "foo #\\"# bar" | +| literal.rs:17:5:17:22 | r##"foo #"# bar"## | +| literal.rs:19:5:19:10 | "\\x52" | +| literal.rs:20:5:20:7 | "R" | +| literal.rs:21:5:21:8 | r"R" | +| literal.rs:22:5:22:11 | "\\\\x52" | +| literal.rs:23:5:23:11 | r"\\x52" | integerLiteral -| literal.rs:25:5:25:7 | 123 | | -| literal.rs:26:5:26:10 | 123i32 | i32 | -| literal.rs:27:5:27:10 | 123u32 | u32 | -| literal.rs:28:5:28:11 | 123_u32 | u32 | -| literal.rs:30:5:30:8 | 0xff | | -| literal.rs:31:5:31:11 | 0xff_u8 | u8 | -| literal.rs:32:5:32:12 | 0x01_f32 | | -| literal.rs:33:5:33:11 | 0x01_e3 | | -| literal.rs:35:5:35:8 | 0o70 | | -| literal.rs:36:5:36:12 | 0o70_i16 | i16 | -| literal.rs:38:5:38:25 | 0b1111_1111_1001_0000 | | -| literal.rs:39:5:39:28 | 0b1111_1111_1001_0000i64 | i64 | -| literal.rs:40:5:40:15 | 0b________1 | | -| literal.rs:42:5:42:10 | 0usize | usize | -| literal.rs:45:5:46:10 | 128_i8 | i8 | -| literal.rs:47:5:48:10 | 256_u8 | u8 | +| literal.rs:28:5:28:7 | 123 | | +| literal.rs:29:5:29:10 | 123i32 | i32 | +| literal.rs:30:5:30:10 | 123u32 | u32 | +| literal.rs:31:5:31:11 | 123_u32 | u32 | +| literal.rs:33:5:33:8 | 0xff | | +| literal.rs:34:5:34:11 | 0xff_u8 | u8 | +| literal.rs:35:5:35:12 | 0x01_f32 | | +| literal.rs:36:5:36:11 | 0x01_e3 | | +| literal.rs:38:5:38:8 | 0o70 | | +| literal.rs:39:5:39:12 | 0o70_i16 | i16 | +| literal.rs:41:5:41:25 | 0b1111_1111_1001_0000 | | +| literal.rs:42:5:42:28 | 0b1111_1111_1001_0000i64 | i64 | +| literal.rs:43:5:43:15 | 0b________1 | | +| literal.rs:45:5:45:10 | 0usize | usize | +| literal.rs:48:5:49:10 | 128_i8 | i8 | +| literal.rs:50:5:51:10 | 256_u8 | u8 | floatLiteral -| literal.rs:53:5:53:8 | 5f32 | f32 | -| literal.rs:55:5:55:12 | 123.0f64 | f64 | -| literal.rs:56:5:56:10 | 0.1f64 | f64 | -| literal.rs:57:5:57:10 | 0.1f32 | f32 | -| literal.rs:58:5:58:14 | 12E+99_f64 | f64 | -| literal.rs:59:18:59:19 | 2. | | +| literal.rs:56:5:56:8 | 5f32 | f32 | +| literal.rs:58:5:58:12 | 123.0f64 | f64 | +| literal.rs:59:5:59:10 | 0.1f64 | f64 | +| literal.rs:60:5:60:10 | 0.1f32 | f32 | +| literal.rs:61:5:61:14 | 12E+99_f64 | f64 | +| literal.rs:62:18:62:19 | 2. | | booleanLiteral -| literal.rs:63:5:63:8 | true | -| literal.rs:64:5:64:9 | false | +| literal.rs:66:5:66:8 | true | +| literal.rs:67:5:67:9 | false | diff --git a/rust/ql/test/extractor-tests/literal/literal.rs b/rust/ql/test/extractor-tests/literal/literal.rs index 74503f2a24e4..ea4ccdece630 100644 --- a/rust/ql/test/extractor-tests/literal/literal.rs +++ b/rust/ql/test/extractor-tests/literal/literal.rs @@ -1,6 +1,9 @@ fn char_literals() { 'a'; 'b'; + '\''; + '\n'; + '\u{1F600}'; } fn string_literals() { From ae54c62001603dd9e4ec3e85d92251877e11c552 Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Tue, 13 May 2025 15:06:43 +0200 Subject: [PATCH 3/3] Simplify using non-capturing groups --- .../elements/internal/LiteralExprImpl.qll | 40 +++++++++---------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/rust/ql/lib/codeql/rust/elements/internal/LiteralExprImpl.qll b/rust/ql/lib/codeql/rust/elements/internal/LiteralExprImpl.qll index 2a2a754e8899..f848663a99bb 100644 --- a/rust/ql/lib/codeql/rust/elements/internal/LiteralExprImpl.qll +++ b/rust/ql/lib/codeql/rust/elements/internal/LiteralExprImpl.qll @@ -81,28 +81,28 @@ module Impl { // https://doc.rust-lang.org/reference/tokens.html#integer-literals private module IntegerLiteralRegexs { bindingset[s] - string paren(string s) { result = "(" + s + ")" } + string paren(string s) { result = "(?:" + s + ")" } string integerLiteral() { result = paren(paren(decLiteral()) + "|" + paren(binLiteral()) + "|" + paren(octLiteral()) + "|" + - paren(hexLiteral())) + paren(suffix()) + "?" + paren(hexLiteral())) + "(" + suffix() + ")?" } private string suffix() { result = "u8|i8|u16|i16|u32|i32|u64|i64|u128|i128|usize|isize" } - string decLiteral() { result = decDigit() + "(" + decDigit() + "|_)*" } + string decLiteral() { result = decDigit() + "(?:" + decDigit() + "|_)*" } string binLiteral() { - result = "0b(" + binDigit() + "|_)*" + binDigit() + "(" + binDigit() + "|_)*" + result = "0b(?:" + binDigit() + "|_)*" + binDigit() + "(?:" + binDigit() + "|_)*" } string octLiteral() { - result = "0o(" + octDigit() + "|_)*" + octDigit() + "(" + octDigit() + "|_)*" + result = "0o(?:" + octDigit() + "|_)*" + octDigit() + "(?:" + octDigit() + "|_)*" } string hexLiteral() { - result = "0x(" + hexDigit() + "|_)*" + hexDigit() + "(" + hexDigit() + "|_)*" + result = "0x(?:" + hexDigit() + "|_)*" + hexDigit() + "(?:" + hexDigit() + "|_)*" } string decDigit() { result = "[0-9]" } @@ -135,7 +135,7 @@ module Impl { exists(string s, string reg | s = this.getTextValue() and reg = IntegerLiteralRegexs::integerLiteral() and - result = s.regexpCapture(reg, 13) + result = s.regexpCapture(reg, 1) ) } @@ -153,24 +153,25 @@ module Impl { } string floatLiteralSuffix1() { - result = decLiteral() + "\\." + decLiteral() + paren(suffix()) + "?" + result = decLiteral() + "\\." + decLiteral() + "(" + suffix() + ")?" } string floatLiteralSuffix2() { result = - decLiteral() + paren("\\." + decLiteral()) + "?" + paren(exponent()) + paren(suffix()) + "?" + decLiteral() + paren("\\." + decLiteral()) + "?" + paren(exponent()) + "(" + suffix() + ")?" } string integerSuffixLiteral() { result = paren(paren(decLiteral()) + "|" + paren(binLiteral()) + "|" + paren(octLiteral()) + "|" + - paren(hexLiteral())) + paren(suffix()) + paren(hexLiteral())) + "(" + suffix() + ")" } private string suffix() { result = "f32|f64" } string exponent() { - result = "(e|E)(\\+|-)?(" + decDigit() + "|_)*" + decDigit() + "(" + decDigit() + "|_)*" + result = + "(?:e|E)(?:\\+|-)?(?:" + decDigit() + "|_)*" + decDigit() + "(?:" + decDigit() + "|_)*" } } @@ -198,18 +199,13 @@ module Impl { * For example, `42.0f32` has the suffix `f32`. */ string getSuffix() { - exists(string s, string reg, int group | - reg = FloatLiteralRegexs::floatLiteralSuffix1() and - group = 3 - or - reg = FloatLiteralRegexs::floatLiteralSuffix2() and - group = 9 - or - reg = FloatLiteralRegexs::integerSuffixLiteral() and - group = 13 - | + exists(string s, string reg | + reg = + IntegerLiteralRegexs::paren(FloatLiteralRegexs::floatLiteralSuffix1()) + "|" + + IntegerLiteralRegexs::paren(FloatLiteralRegexs::floatLiteralSuffix2()) + "|" + + IntegerLiteralRegexs::paren(FloatLiteralRegexs::integerSuffixLiteral()) and s = this.getTextValue() and - result = s.regexpCapture(reg, group) + result = s.regexpCapture(reg, [1, 2, 3]) ) }