From 76f15a890c7c5080d8e34e39490f96748ae68789 Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 30 Jun 2025 14:23:53 +0000 Subject: [PATCH 1/8] Python: Update `tree-sitter` dependency Updates the Python extractor to depend on version 0.24.7 of tree-sitter (and 0.12.0 of tree-sitter-graph). A few changes were needed in order to make the code build and run after updating the dependencies: - In `main.rs`, the `Language` parameter is now passed as a reference. - In `python.tsg`, many queries had captures that were not actually used in the body of the stanza. This is no longer allowed (unless the captures start with an underscore), as it may indicate an error. To fix this, I added underscores in the appropriate places (and verified that none of these unused captures were in fact bugs). --- python/extractor/tsg-python/Cargo.lock | 52 ++++++++-------------- python/extractor/tsg-python/Cargo.toml | 4 +- python/extractor/tsg-python/python.tsg | 30 ++++++------- python/extractor/tsg-python/src/main.rs | 2 +- python/extractor/tsg-python/tsp/Cargo.toml | 2 +- 5 files changed, 37 insertions(+), 53 deletions(-) diff --git a/python/extractor/tsg-python/Cargo.lock b/python/extractor/tsg-python/Cargo.lock index 16849dc7f4d7..f3604d1a3870 100644 --- a/python/extractor/tsg-python/Cargo.lock +++ b/python/extractor/tsg-python/Cargo.lock @@ -1,12 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 - -[[package]] -name = "ahash" -version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0453232ace82dee0dd0b4c87a59bd90f7b53b314f3e0f61fe2ee7c8a16482289" +version = 4 [[package]] name = "aho-corasick" @@ -82,12 +76,6 @@ dependencies = [ "shlex", ] -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - [[package]] name = "clap" version = "4.5.30" @@ -121,15 +109,6 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" -[[package]] -name = "hashbrown" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" -dependencies = [ - "ahash", -] - [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -258,14 +237,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" [[package]] -name = "string-interner" -version = "0.12.2" +name = "streaming-iterator" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "383196d1876517ee6f9f0864d1fc1070331b803335d3c6daaa04bbcccd823c08" -dependencies = [ - "cfg-if", - "hashbrown", -] +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" [[package]] name = "strsim" @@ -306,30 +281,39 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.20.4" +version = "0.24.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e34327f8eac545e3f037382471b2b19367725a242bba7bc45edb9efb49fe39a" +checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75" dependencies = [ "cc", "regex", + "regex-syntax", + "streaming-iterator", + "tree-sitter-language", ] [[package]] name = "tree-sitter-graph" -version = "0.7.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "639d21e886f581d293de5f5081f09af003c54607ff3fa85efa159b243ba1f97a" +checksum = "63f86eb73c7d891c4b9b6fe4d4e63dd94c506e4788af7c2296afdcfbeea626cc" dependencies = [ "log", "regex", "serde", "serde_json", "smallvec", - "string-interner", + "streaming-iterator", "thiserror", "tree-sitter", ] +[[package]] +name = "tree-sitter-language" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8" + [[package]] name = "tsg-python" version = "0.1.0" diff --git a/python/extractor/tsg-python/Cargo.toml b/python/extractor/tsg-python/Cargo.toml index 7ad2c1c949e6..f02fb06931b2 100644 --- a/python/extractor/tsg-python/Cargo.toml +++ b/python/extractor/tsg-python/Cargo.toml @@ -10,7 +10,7 @@ edition = "2024" [dependencies] anyhow = "1.0" regex = "1" -tree-sitter = "=0.20.4" -tree-sitter-graph = "0.7.0" +tree-sitter = "=0.24.7" +tree-sitter-graph = "0.12.0" tsp = {path = "tsp"} clap = "4.5" diff --git a/python/extractor/tsg-python/python.tsg b/python/extractor/tsg-python/python.tsg index 7ad0f3f14244..8dec9ad5d3ef 100644 --- a/python/extractor/tsg-python/python.tsg +++ b/python/extractor/tsg-python/python.tsg @@ -416,13 +416,13 @@ attr (@if.node) _location_end = (location-end @expr) } -(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @child [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @genexpr +(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @child [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @_genexpr { attr (@child.node) _location_start = (location-start @start) attr (@child.node) _location_end = (location-end @end) } -(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @end . (comment)* . ")" .) @genexpr +(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @end . (comment)* . ")" .) @_genexpr { attr (@end.node) _location_start = (location-start @start) attr (@end.node) _location_end = (location-end @end) @@ -524,7 +524,7 @@ attr (@del.node -> @target.node) targets = (named-child-index @target) } -(delete_statement target: (_) @target) @del +(delete_statement target: (_) @target) @_del { attr (@target.node) ctx = "del" } @@ -798,8 +798,8 @@ (dictionary_comprehension body: (pair - key: (_) @key - value: (_) @value + key: (_) @_key + value: (_) @_value ) ) @genexpr { @@ -1299,7 +1299,7 @@ ; the index of the left-hand side of the current assignment. ; Base case, for the outermost assignment we set the outermost node to this node, and the index to zero. -(expression_statement (assignment !type) @assign) @expr +(expression_statement (assignment !type) @assign) @_expr { let @assign.outermost_assignment = @assign.node let @assign.target_index = 0 @@ -1358,7 +1358,7 @@ } (assignment - left: (_) @target + left: (_) @_target type: (_) right: (_) @value ) @assign @@ -2330,7 +2330,7 @@ attr (@operand.node) ctx = "load" } -(unary_operator "~" @op) @unaryop +(unary_operator "~" @_op) @unaryop { attr (@unaryop.node) op = "~" } @@ -2614,7 +2614,7 @@ ; Async status ; NOTE: We only set the `is_async` field on the _first_ clause of the `with` statement, ; as this is the behaviour of the old parser. -(with_statement "async" "with" @with_keyword (with_clause . (with_item) @with)) +(with_statement "async" "with" @_with_keyword (with_clause . (with_item) @with)) { attr (@with.node) is_async = #true } @@ -2800,7 +2800,7 @@ (identifier) @obj . (identifier) @attr -) @match_value_pattern +) @_match_value_pattern { let attribute = (ast-node @attr "Attribute") attr (@attr.node) _skip_to = attribute @@ -2814,7 +2814,7 @@ (match_value_pattern . (identifier) @id -) @match_value_pattern +) @_match_value_pattern { attr (@id.node) ctx = "load" } @@ -3267,8 +3267,8 @@ (decorated_definition (decorator (expression) @exp1) @dec1 . (comment)* . - (decorator (expression) @exp2) @dec2 -) @decorator + (decorator (expression) @_exp2) @dec2 +) @_decorator { attr (@dec1.node) func = @exp1.node edge @dec1.node -> @dec2.node @@ -3279,7 +3279,7 @@ (decorator (expression) @exp) @last . (comment)* . definition: (function_definition) @funcdef -) @decorator +) @_decorator { attr (@last.node) func = @exp.node edge @last.node -> @funcdef.funcexpr @@ -3291,7 +3291,7 @@ (decorator (expression) @exp) @last . (comment)* . definition: (class_definition) @class -) @decorator +) @_decorator { attr (@last.node) func = @exp.node edge @last.node -> @class.class_expr diff --git a/python/extractor/tsg-python/src/main.rs b/python/extractor/tsg-python/src/main.rs index 6b72efdb6ef6..c99145132f76 100644 --- a/python/extractor/tsg-python/src/main.rs +++ b/python/extractor/tsg-python/src/main.rs @@ -502,7 +502,7 @@ fn main() -> Result<()> { let source_path = Path::new(matches.get_one::("source").unwrap()); let language = tsp::language(); let mut parser = Parser::new(); - parser.set_language(language)?; + parser.set_language(&language)?; // Statically include `python.tsg`: let tsg = if matches.contains_id("tsg") { std::fs::read(&tsg_path).with_context(|| format!("Error reading TSG file {}", tsg_path))? diff --git a/python/extractor/tsg-python/tsp/Cargo.toml b/python/extractor/tsg-python/tsp/Cargo.toml index e36144566627..3d8587bbe5ab 100644 --- a/python/extractor/tsg-python/tsp/Cargo.toml +++ b/python/extractor/tsg-python/tsp/Cargo.toml @@ -26,7 +26,7 @@ path = "bindings/rust/lib.rs" ## When updating these dependencies, run `misc/bazel/3rdparty/update_cargo_deps.sh` [dependencies] -tree-sitter = ">= 0.20, < 0.21" +tree-sitter = "=0.24.7" [build-dependencies] cc = "1.2" From 5fb28b9f6d05535b9a87c6b29d398e5ac75235cc Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 30 Jun 2025 14:57:42 +0000 Subject: [PATCH 2/8] Python: Update bazel dependencies --- MODULE.bazel | 4 +- misc/bazel/3rdparty/py_deps/BUILD.bazel | 12 +-- .../py_deps/BUILD.hashbrown-0.9.1.bazel | 99 ----------------- ...l => BUILD.streaming-iterator-0.1.9.bazel} | 6 +- .../BUILD.string-interner-0.12.2.bazel | 101 ------------------ ...4.bazel => BUILD.tree-sitter-0.24.7.bazel} | 20 +++- ...l => BUILD.tree-sitter-graph-0.12.0.bazel} | 6 +- ...=> BUILD.tree-sitter-language-0.1.5.bazel} | 6 +- misc/bazel/3rdparty/py_deps/defs.bzl | 80 ++++++-------- 9 files changed, 63 insertions(+), 271 deletions(-) delete mode 100644 misc/bazel/3rdparty/py_deps/BUILD.hashbrown-0.9.1.bazel rename misc/bazel/3rdparty/py_deps/{BUILD.ahash-0.4.8.bazel => BUILD.streaming-iterator-0.1.9.bazel} (97%) delete mode 100644 misc/bazel/3rdparty/py_deps/BUILD.string-interner-0.12.2.bazel rename misc/bazel/3rdparty/py_deps/{BUILD.tree-sitter-0.20.4.bazel => BUILD.tree-sitter-0.24.7.bazel} (91%) rename misc/bazel/3rdparty/py_deps/{BUILD.tree-sitter-graph-0.7.0.bazel => BUILD.tree-sitter-graph-0.12.0.bazel} (96%) rename misc/bazel/3rdparty/py_deps/{BUILD.cfg-if-1.0.0.bazel => BUILD.tree-sitter-language-0.1.5.bazel} (97%) diff --git a/MODULE.bazel b/MODULE.bazel index 2f7a18b35fa1..4c22307da69e 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -89,8 +89,8 @@ use_repo( "vendor_py__cc-1.2.14", "vendor_py__clap-4.5.30", "vendor_py__regex-1.11.1", - "vendor_py__tree-sitter-0.20.4", - "vendor_py__tree-sitter-graph-0.7.0", + "vendor_py__tree-sitter-0.24.7", + "vendor_py__tree-sitter-graph-0.12.0", ) # deps for ruby+rust diff --git a/misc/bazel/3rdparty/py_deps/BUILD.bazel b/misc/bazel/3rdparty/py_deps/BUILD.bazel index f756b4a519f7..86bfde266419 100644 --- a/misc/bazel/3rdparty/py_deps/BUILD.bazel +++ b/misc/bazel/3rdparty/py_deps/BUILD.bazel @@ -80,26 +80,26 @@ alias( ) alias( - name = "tree-sitter-0.20.4", - actual = "@vendor_py__tree-sitter-0.20.4//:tree_sitter", + name = "tree-sitter-0.24.7", + actual = "@vendor_py__tree-sitter-0.24.7//:tree_sitter", tags = ["manual"], ) alias( name = "tree-sitter", - actual = "@vendor_py__tree-sitter-0.20.4//:tree_sitter", + actual = "@vendor_py__tree-sitter-0.24.7//:tree_sitter", tags = ["manual"], ) alias( - name = "tree-sitter-graph-0.7.0", - actual = "@vendor_py__tree-sitter-graph-0.7.0//:tree_sitter_graph", + name = "tree-sitter-graph-0.12.0", + actual = "@vendor_py__tree-sitter-graph-0.12.0//:tree_sitter_graph", tags = ["manual"], ) alias( name = "tree-sitter-graph", - actual = "@vendor_py__tree-sitter-graph-0.7.0//:tree_sitter_graph", + actual = "@vendor_py__tree-sitter-graph-0.12.0//:tree_sitter_graph", tags = ["manual"], ) diff --git a/misc/bazel/3rdparty/py_deps/BUILD.hashbrown-0.9.1.bazel b/misc/bazel/3rdparty/py_deps/BUILD.hashbrown-0.9.1.bazel deleted file mode 100644 index 15fc24acbf1d..000000000000 --- a/misc/bazel/3rdparty/py_deps/BUILD.hashbrown-0.9.1.bazel +++ /dev/null @@ -1,99 +0,0 @@ -############################################################################### -# @generated -# DO NOT MODIFY: This file is auto-generated by a crate_universe tool. To -# regenerate this file, run the following: -# -# bazel run @@//misc/bazel/3rdparty:vendor_py_deps -############################################################################### - -load("@rules_rust//cargo:defs.bzl", "cargo_toml_env_vars") -load("@rules_rust//rust:defs.bzl", "rust_library") - -package(default_visibility = ["//visibility:public"]) - -cargo_toml_env_vars( - name = "cargo_toml_env_vars", - src = "Cargo.toml", -) - -rust_library( - name = "hashbrown", - srcs = glob( - include = ["**/*.rs"], - allow_empty = True, - ), - compile_data = glob( - include = ["**"], - allow_empty = True, - exclude = [ - "**/* *", - ".tmp_git_root/**/*", - "BUILD", - "BUILD.bazel", - "WORKSPACE", - "WORKSPACE.bazel", - ], - ), - crate_features = [ - "ahash", - "inline-more", - ], - crate_root = "src/lib.rs", - edition = "2018", - rustc_env_files = [ - ":cargo_toml_env_vars", - ], - rustc_flags = [ - "--cap-lints=allow", - ], - tags = [ - "cargo-bazel", - "crate-name=hashbrown", - "manual", - "noclippy", - "norustfmt", - ], - target_compatible_with = select({ - "@rules_rust//rust/platform:aarch64-apple-darwin": [], - "@rules_rust//rust/platform:aarch64-apple-ios": [], - "@rules_rust//rust/platform:aarch64-apple-ios-sim": [], - "@rules_rust//rust/platform:aarch64-linux-android": [], - "@rules_rust//rust/platform:aarch64-pc-windows-msvc": [], - "@rules_rust//rust/platform:aarch64-unknown-fuchsia": [], - "@rules_rust//rust/platform:aarch64-unknown-linux-gnu": [], - "@rules_rust//rust/platform:aarch64-unknown-nixos-gnu": [], - "@rules_rust//rust/platform:aarch64-unknown-nto-qnx710": [], - "@rules_rust//rust/platform:aarch64-unknown-uefi": [], - "@rules_rust//rust/platform:arm-unknown-linux-gnueabi": [], - "@rules_rust//rust/platform:armv7-linux-androideabi": [], - "@rules_rust//rust/platform:armv7-unknown-linux-gnueabi": [], - "@rules_rust//rust/platform:i686-apple-darwin": [], - "@rules_rust//rust/platform:i686-linux-android": [], - "@rules_rust//rust/platform:i686-pc-windows-msvc": [], - "@rules_rust//rust/platform:i686-unknown-freebsd": [], - "@rules_rust//rust/platform:i686-unknown-linux-gnu": [], - "@rules_rust//rust/platform:powerpc-unknown-linux-gnu": [], - "@rules_rust//rust/platform:riscv32imc-unknown-none-elf": [], - "@rules_rust//rust/platform:riscv64gc-unknown-none-elf": [], - "@rules_rust//rust/platform:s390x-unknown-linux-gnu": [], - "@rules_rust//rust/platform:thumbv7em-none-eabi": [], - "@rules_rust//rust/platform:thumbv8m.main-none-eabi": [], - "@rules_rust//rust/platform:wasm32-unknown-unknown": [], - "@rules_rust//rust/platform:wasm32-wasip1": [], - "@rules_rust//rust/platform:x86_64-apple-darwin": [], - "@rules_rust//rust/platform:x86_64-apple-ios": [], - "@rules_rust//rust/platform:x86_64-linux-android": [], - "@rules_rust//rust/platform:x86_64-pc-windows-msvc": [], - "@rules_rust//rust/platform:x86_64-unknown-freebsd": [], - "@rules_rust//rust/platform:x86_64-unknown-fuchsia": [], - "@rules_rust//rust/platform:x86_64-unknown-linux-gnu": [], - "@rules_rust//rust/platform:x86_64-unknown-nixos-gnu": [], - "@rules_rust//rust/platform:x86_64-unknown-none": [], - "@rules_rust//rust/platform:x86_64-unknown-uefi": [], - "//conditions:default": ["@platforms//:incompatible"], - }), - version = "0.9.1", - deps = [ - "@vendor_py__ahash-0.4.8//:ahash", - ], -) diff --git a/misc/bazel/3rdparty/py_deps/BUILD.ahash-0.4.8.bazel b/misc/bazel/3rdparty/py_deps/BUILD.streaming-iterator-0.1.9.bazel similarity index 97% rename from misc/bazel/3rdparty/py_deps/BUILD.ahash-0.4.8.bazel rename to misc/bazel/3rdparty/py_deps/BUILD.streaming-iterator-0.1.9.bazel index 057a5225197a..94d02b1de3f7 100644 --- a/misc/bazel/3rdparty/py_deps/BUILD.ahash-0.4.8.bazel +++ b/misc/bazel/3rdparty/py_deps/BUILD.streaming-iterator-0.1.9.bazel @@ -17,7 +17,7 @@ cargo_toml_env_vars( ) rust_library( - name = "ahash", + name = "streaming_iterator", srcs = glob( include = ["**/*.rs"], allow_empty = True, @@ -44,7 +44,7 @@ rust_library( ], tags = [ "cargo-bazel", - "crate-name=ahash", + "crate-name=streaming-iterator", "manual", "noclippy", "norustfmt", @@ -88,5 +88,5 @@ rust_library( "@rules_rust//rust/platform:x86_64-unknown-uefi": [], "//conditions:default": ["@platforms//:incompatible"], }), - version = "0.4.8", + version = "0.1.9", ) diff --git a/misc/bazel/3rdparty/py_deps/BUILD.string-interner-0.12.2.bazel b/misc/bazel/3rdparty/py_deps/BUILD.string-interner-0.12.2.bazel deleted file mode 100644 index 5e177a306e97..000000000000 --- a/misc/bazel/3rdparty/py_deps/BUILD.string-interner-0.12.2.bazel +++ /dev/null @@ -1,101 +0,0 @@ -############################################################################### -# @generated -# DO NOT MODIFY: This file is auto-generated by a crate_universe tool. To -# regenerate this file, run the following: -# -# bazel run @@//misc/bazel/3rdparty:vendor_py_deps -############################################################################### - -load("@rules_rust//cargo:defs.bzl", "cargo_toml_env_vars") -load("@rules_rust//rust:defs.bzl", "rust_library") - -package(default_visibility = ["//visibility:public"]) - -cargo_toml_env_vars( - name = "cargo_toml_env_vars", - src = "Cargo.toml", -) - -rust_library( - name = "string_interner", - srcs = glob( - include = ["**/*.rs"], - allow_empty = True, - ), - compile_data = glob( - include = ["**"], - allow_empty = True, - exclude = [ - "**/* *", - ".tmp_git_root/**/*", - "BUILD", - "BUILD.bazel", - "WORKSPACE", - "WORKSPACE.bazel", - ], - ), - crate_features = [ - "backends", - "inline-more", - "std", - ], - crate_root = "src/lib.rs", - edition = "2018", - rustc_env_files = [ - ":cargo_toml_env_vars", - ], - rustc_flags = [ - "--cap-lints=allow", - ], - tags = [ - "cargo-bazel", - "crate-name=string-interner", - "manual", - "noclippy", - "norustfmt", - ], - target_compatible_with = select({ - "@rules_rust//rust/platform:aarch64-apple-darwin": [], - "@rules_rust//rust/platform:aarch64-apple-ios": [], - "@rules_rust//rust/platform:aarch64-apple-ios-sim": [], - "@rules_rust//rust/platform:aarch64-linux-android": [], - "@rules_rust//rust/platform:aarch64-pc-windows-msvc": [], - "@rules_rust//rust/platform:aarch64-unknown-fuchsia": [], - "@rules_rust//rust/platform:aarch64-unknown-linux-gnu": [], - "@rules_rust//rust/platform:aarch64-unknown-nixos-gnu": [], - "@rules_rust//rust/platform:aarch64-unknown-nto-qnx710": [], - "@rules_rust//rust/platform:aarch64-unknown-uefi": [], - "@rules_rust//rust/platform:arm-unknown-linux-gnueabi": [], - "@rules_rust//rust/platform:armv7-linux-androideabi": [], - "@rules_rust//rust/platform:armv7-unknown-linux-gnueabi": [], - "@rules_rust//rust/platform:i686-apple-darwin": [], - "@rules_rust//rust/platform:i686-linux-android": [], - "@rules_rust//rust/platform:i686-pc-windows-msvc": [], - "@rules_rust//rust/platform:i686-unknown-freebsd": [], - "@rules_rust//rust/platform:i686-unknown-linux-gnu": [], - "@rules_rust//rust/platform:powerpc-unknown-linux-gnu": [], - "@rules_rust//rust/platform:riscv32imc-unknown-none-elf": [], - "@rules_rust//rust/platform:riscv64gc-unknown-none-elf": [], - "@rules_rust//rust/platform:s390x-unknown-linux-gnu": [], - "@rules_rust//rust/platform:thumbv7em-none-eabi": [], - "@rules_rust//rust/platform:thumbv8m.main-none-eabi": [], - "@rules_rust//rust/platform:wasm32-unknown-unknown": [], - "@rules_rust//rust/platform:wasm32-wasip1": [], - "@rules_rust//rust/platform:x86_64-apple-darwin": [], - "@rules_rust//rust/platform:x86_64-apple-ios": [], - "@rules_rust//rust/platform:x86_64-linux-android": [], - "@rules_rust//rust/platform:x86_64-pc-windows-msvc": [], - "@rules_rust//rust/platform:x86_64-unknown-freebsd": [], - "@rules_rust//rust/platform:x86_64-unknown-fuchsia": [], - "@rules_rust//rust/platform:x86_64-unknown-linux-gnu": [], - "@rules_rust//rust/platform:x86_64-unknown-nixos-gnu": [], - "@rules_rust//rust/platform:x86_64-unknown-none": [], - "@rules_rust//rust/platform:x86_64-unknown-uefi": [], - "//conditions:default": ["@platforms//:incompatible"], - }), - version = "0.12.2", - deps = [ - "@vendor_py__cfg-if-1.0.0//:cfg_if", - "@vendor_py__hashbrown-0.9.1//:hashbrown", - ], -) diff --git a/misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-0.20.4.bazel b/misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-0.24.7.bazel similarity index 91% rename from misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-0.20.4.bazel rename to misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-0.24.7.bazel index fea7b8b185e7..283db7e13449 100644 --- a/misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-0.20.4.bazel +++ b/misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-0.24.7.bazel @@ -38,6 +38,10 @@ rust_library( "WORKSPACE.bazel", ], ), + crate_features = [ + "default", + "std", + ], crate_root = "binding_rust/lib.rs", edition = "2018", rustc_env_files = [ @@ -92,10 +96,13 @@ rust_library( "@rules_rust//rust/platform:x86_64-unknown-uefi": [], "//conditions:default": ["@platforms//:incompatible"], }), - version = "0.20.4", + version = "0.24.7", deps = [ "@vendor_py__regex-1.11.1//:regex", - "@vendor_py__tree-sitter-0.20.4//:build_script_build", + "@vendor_py__regex-syntax-0.8.5//:regex_syntax", + "@vendor_py__streaming-iterator-0.1.9//:streaming_iterator", + "@vendor_py__tree-sitter-0.24.7//:build_script_build", + "@vendor_py__tree-sitter-language-0.1.5//:tree_sitter_language", ], ) @@ -118,6 +125,10 @@ cargo_build_script( "WORKSPACE.bazel", ], ), + crate_features = [ + "default", + "std", + ], crate_name = "build_script_build", crate_root = "binding_rust/build.rs", data = glob( @@ -132,7 +143,8 @@ cargo_build_script( "WORKSPACE.bazel", ], ), - edition = "2018", + edition = "2021", + links = "tree-sitter", pkg_name = "tree-sitter", rustc_env_files = [ ":cargo_toml_env_vars", @@ -147,7 +159,7 @@ cargo_build_script( "noclippy", "norustfmt", ], - version = "0.20.4", + version = "0.24.7", visibility = ["//visibility:private"], deps = [ "@vendor_py__cc-1.2.14//:cc", diff --git a/misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-graph-0.7.0.bazel b/misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-graph-0.12.0.bazel similarity index 96% rename from misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-graph-0.7.0.bazel rename to misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-graph-0.12.0.bazel index 4db3e576764f..ad8dd864b008 100644 --- a/misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-graph-0.7.0.bazel +++ b/misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-graph-0.12.0.bazel @@ -88,15 +88,15 @@ rust_library( "@rules_rust//rust/platform:x86_64-unknown-uefi": [], "//conditions:default": ["@platforms//:incompatible"], }), - version = "0.7.0", + version = "0.12.0", deps = [ "@vendor_py__log-0.4.25//:log", "@vendor_py__regex-1.11.1//:regex", "@vendor_py__serde-1.0.217//:serde", "@vendor_py__serde_json-1.0.138//:serde_json", "@vendor_py__smallvec-1.14.0//:smallvec", - "@vendor_py__string-interner-0.12.2//:string_interner", + "@vendor_py__streaming-iterator-0.1.9//:streaming_iterator", "@vendor_py__thiserror-1.0.69//:thiserror", - "@vendor_py__tree-sitter-0.20.4//:tree_sitter", + "@vendor_py__tree-sitter-0.24.7//:tree_sitter", ], ) diff --git a/misc/bazel/3rdparty/py_deps/BUILD.cfg-if-1.0.0.bazel b/misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-language-0.1.5.bazel similarity index 97% rename from misc/bazel/3rdparty/py_deps/BUILD.cfg-if-1.0.0.bazel rename to misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-language-0.1.5.bazel index da9dbdfb977c..c6155d86164f 100644 --- a/misc/bazel/3rdparty/py_deps/BUILD.cfg-if-1.0.0.bazel +++ b/misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-language-0.1.5.bazel @@ -17,7 +17,7 @@ cargo_toml_env_vars( ) rust_library( - name = "cfg_if", + name = "tree_sitter_language", srcs = glob( include = ["**/*.rs"], allow_empty = True, @@ -44,7 +44,7 @@ rust_library( ], tags = [ "cargo-bazel", - "crate-name=cfg-if", + "crate-name=tree-sitter-language", "manual", "noclippy", "norustfmt", @@ -88,5 +88,5 @@ rust_library( "@rules_rust//rust/platform:x86_64-unknown-uefi": [], "//conditions:default": ["@platforms//:incompatible"], }), - version = "1.0.0", + version = "0.1.5", ) diff --git a/misc/bazel/3rdparty/py_deps/defs.bzl b/misc/bazel/3rdparty/py_deps/defs.bzl index 9d78e46b2f88..ea04edbae037 100644 --- a/misc/bazel/3rdparty/py_deps/defs.bzl +++ b/misc/bazel/3rdparty/py_deps/defs.bzl @@ -298,13 +298,13 @@ _NORMAL_DEPENDENCIES = { "anyhow": Label("@vendor_py__anyhow-1.0.95//:anyhow"), "clap": Label("@vendor_py__clap-4.5.30//:clap"), "regex": Label("@vendor_py__regex-1.11.1//:regex"), - "tree-sitter": Label("@vendor_py__tree-sitter-0.20.4//:tree_sitter"), - "tree-sitter-graph": Label("@vendor_py__tree-sitter-graph-0.7.0//:tree_sitter_graph"), + "tree-sitter": Label("@vendor_py__tree-sitter-0.24.7//:tree_sitter"), + "tree-sitter-graph": Label("@vendor_py__tree-sitter-graph-0.12.0//:tree_sitter_graph"), }, }, "python/extractor/tsg-python/tsp": { _COMMON_CONDITION: { - "tree-sitter": Label("@vendor_py__tree-sitter-0.20.4//:tree_sitter"), + "tree-sitter": Label("@vendor_py__tree-sitter-0.24.7//:tree_sitter"), }, }, } @@ -452,16 +452,6 @@ def crate_repositories(): Returns: A list of repos visible to the module through the module extension. """ - maybe( - http_archive, - name = "vendor_py__ahash-0.4.8", - sha256 = "0453232ace82dee0dd0b4c87a59bd90f7b53b314f3e0f61fe2ee7c8a16482289", - type = "tar.gz", - urls = ["https://static.crates.io/crates/ahash/0.4.8/download"], - strip_prefix = "ahash-0.4.8", - build_file = Label("//misc/bazel/3rdparty/py_deps:BUILD.ahash-0.4.8.bazel"), - ) - maybe( http_archive, name = "vendor_py__aho-corasick-1.1.3", @@ -542,16 +532,6 @@ def crate_repositories(): build_file = Label("//misc/bazel/3rdparty/py_deps:BUILD.cc-1.2.14.bazel"), ) - maybe( - http_archive, - name = "vendor_py__cfg-if-1.0.0", - sha256 = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd", - type = "tar.gz", - urls = ["https://static.crates.io/crates/cfg-if/1.0.0/download"], - strip_prefix = "cfg-if-1.0.0", - build_file = Label("//misc/bazel/3rdparty/py_deps:BUILD.cfg-if-1.0.0.bazel"), - ) - maybe( http_archive, name = "vendor_py__clap-4.5.30", @@ -592,16 +572,6 @@ def crate_repositories(): build_file = Label("//misc/bazel/3rdparty/py_deps:BUILD.colorchoice-1.0.3.bazel"), ) - maybe( - http_archive, - name = "vendor_py__hashbrown-0.9.1", - sha256 = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04", - type = "tar.gz", - urls = ["https://static.crates.io/crates/hashbrown/0.9.1/download"], - strip_prefix = "hashbrown-0.9.1", - build_file = Label("//misc/bazel/3rdparty/py_deps:BUILD.hashbrown-0.9.1.bazel"), - ) - maybe( http_archive, name = "vendor_py__is_terminal_polyfill-1.70.1", @@ -764,12 +734,12 @@ def crate_repositories(): maybe( http_archive, - name = "vendor_py__string-interner-0.12.2", - sha256 = "383196d1876517ee6f9f0864d1fc1070331b803335d3c6daaa04bbcccd823c08", + name = "vendor_py__streaming-iterator-0.1.9", + sha256 = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520", type = "tar.gz", - urls = ["https://static.crates.io/crates/string-interner/0.12.2/download"], - strip_prefix = "string-interner-0.12.2", - build_file = Label("//misc/bazel/3rdparty/py_deps:BUILD.string-interner-0.12.2.bazel"), + urls = ["https://static.crates.io/crates/streaming-iterator/0.1.9/download"], + strip_prefix = "streaming-iterator-0.1.9", + build_file = Label("//misc/bazel/3rdparty/py_deps:BUILD.streaming-iterator-0.1.9.bazel"), ) maybe( @@ -814,22 +784,32 @@ def crate_repositories(): maybe( http_archive, - name = "vendor_py__tree-sitter-0.20.4", - sha256 = "4e34327f8eac545e3f037382471b2b19367725a242bba7bc45edb9efb49fe39a", + name = "vendor_py__tree-sitter-0.24.7", + sha256 = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75", + type = "tar.gz", + urls = ["https://static.crates.io/crates/tree-sitter/0.24.7/download"], + strip_prefix = "tree-sitter-0.24.7", + build_file = Label("//misc/bazel/3rdparty/py_deps:BUILD.tree-sitter-0.24.7.bazel"), + ) + + maybe( + http_archive, + name = "vendor_py__tree-sitter-graph-0.12.0", + sha256 = "63f86eb73c7d891c4b9b6fe4d4e63dd94c506e4788af7c2296afdcfbeea626cc", type = "tar.gz", - urls = ["https://static.crates.io/crates/tree-sitter/0.20.4/download"], - strip_prefix = "tree-sitter-0.20.4", - build_file = Label("//misc/bazel/3rdparty/py_deps:BUILD.tree-sitter-0.20.4.bazel"), + urls = ["https://static.crates.io/crates/tree-sitter-graph/0.12.0/download"], + strip_prefix = "tree-sitter-graph-0.12.0", + build_file = Label("//misc/bazel/3rdparty/py_deps:BUILD.tree-sitter-graph-0.12.0.bazel"), ) maybe( http_archive, - name = "vendor_py__tree-sitter-graph-0.7.0", - sha256 = "639d21e886f581d293de5f5081f09af003c54607ff3fa85efa159b243ba1f97a", + name = "vendor_py__tree-sitter-language-0.1.5", + sha256 = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8", type = "tar.gz", - urls = ["https://static.crates.io/crates/tree-sitter-graph/0.7.0/download"], - strip_prefix = "tree-sitter-graph-0.7.0", - build_file = Label("//misc/bazel/3rdparty/py_deps:BUILD.tree-sitter-graph-0.7.0.bazel"), + urls = ["https://static.crates.io/crates/tree-sitter-language/0.1.5/download"], + strip_prefix = "tree-sitter-language-0.1.5", + build_file = Label("//misc/bazel/3rdparty/py_deps:BUILD.tree-sitter-language-0.1.5.bazel"), ) maybe( @@ -957,6 +937,6 @@ def crate_repositories(): struct(repo = "vendor_py__cc-1.2.14", is_dev_dep = False), struct(repo = "vendor_py__clap-4.5.30", is_dev_dep = False), struct(repo = "vendor_py__regex-1.11.1", is_dev_dep = False), - struct(repo = "vendor_py__tree-sitter-0.20.4", is_dev_dep = False), - struct(repo = "vendor_py__tree-sitter-graph-0.7.0", is_dev_dep = False), + struct(repo = "vendor_py__tree-sitter-0.24.7", is_dev_dep = False), + struct(repo = "vendor_py__tree-sitter-graph-0.12.0", is_dev_dep = False), ] From b108d47b26fe64468e60f6e4a8d485065612ac7e Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 30 Jun 2025 15:10:27 +0000 Subject: [PATCH 3/8] Python: Update parser test output It seems that with a newer version of tree-sitter, we no longer parse the (not actually valid!) syntax `Spam[**P2]` as if the `**` is an exponentiation operation (with a missing left operand). --- python/extractor/tests/parser/types_new.expected | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/python/extractor/tests/parser/types_new.expected b/python/extractor/tests/parser/types_new.expected index a390ed1aae2a..ac470c08cf03 100644 --- a/python/extractor/tests/parser/types_new.expected +++ b/python/extractor/tests/parser/types_new.expected @@ -338,16 +338,9 @@ Module: [1, 0] - [23, 0] variable: Variable('Spam', None) ctx: Load index: - BinOp: [20, 36] - [20, 40] - left: - Name: [20, 36] - [20, 36] - variable: Variable('', None) - ctx: Load - op: Pow - right: - Name: [20, 38] - [20, 40] - variable: Variable('P2', None) - ctx: Load + Name: [20, 38] - [20, 40] + variable: Variable('P2', None) + ctx: Load ctx: Load TypeAlias: [21, 0] - [21, 41] name: From 235822d782ec3fbf70af81b582b598dee4a09a4c Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 2 Jul 2025 12:33:39 +0000 Subject: [PATCH 4/8] Python: Improve handling of syntax errors Rather than relying on matching arbitrary nodes inside tree-sitter-graph and then checking whether they are of type ERROR or MISSING (which seems to have stopped working in later versions of tree-sitter), we now explicitly go through the tree-sitter tree, locating all of the error and missing nodes along the way. We then add these on to the graph output in the same format as was previously produced by tree-sitter-graph. Note that it's very likely that some of the syntax errors will move around a bit as a consequence of this change. In general, we don't expect syntax errors to have stable locations, as small changes in the grammar can cause an error to appear in a different position, even if the underlying (erroneous) code has not changed. --- python/extractor/tsg-python/python.tsg | 10 --- python/extractor/tsg-python/src/main.rs | 107 +++++++++++++++++++++++- 2 files changed, 104 insertions(+), 13 deletions(-) diff --git a/python/extractor/tsg-python/python.tsg b/python/extractor/tsg-python/python.tsg index 8dec9ad5d3ef..963df06fd7c7 100644 --- a/python/extractor/tsg-python/python.tsg +++ b/python/extractor/tsg-python/python.tsg @@ -6,16 +6,6 @@ (module) @mod { let @mod.node = (ast-node @mod "Module") } -(_) @anynode -{ - scan (node-type @anynode) { - "^(ERROR|MISSING)$" { - let @anynode.node = (ast-node @anynode "SyntaxErrorNode") - attr (@anynode.node) source = (source-text @anynode) - } - } -} - (parenthesized_expression) @nd { let @nd.node = (ast-node @nd "Expr") } diff --git a/python/extractor/tsg-python/src/main.rs b/python/extractor/tsg-python/src/main.rs index c99145132f76..a94d93ba2092 100644 --- a/python/extractor/tsg-python/src/main.rs +++ b/python/extractor/tsg-python/src/main.rs @@ -480,6 +480,99 @@ pub mod extra_functions { } } +struct TreeIterator<'a> { + nodes_to_visit: Vec>, +} + +impl<'a> TreeIterator<'a> { + fn new(root: tree_sitter::Node<'a>) -> Self { + Self { + nodes_to_visit: vec![root], + } + } +} + +impl<'a> Iterator for TreeIterator<'a> { + type Item = tree_sitter::Node<'a>; + + fn next(&mut self) -> Option { + if let Some(node) = self.nodes_to_visit.pop() { + // Add all children to the queue for processing + self.nodes_to_visit + .extend((0..node.child_count()).rev().filter_map(|i| node.child(i))); + Some(node) + } else { + None + } + } +} + +#[derive(Debug, Clone)] +struct SyntaxError { + start_pos: tree_sitter::Point, + end_pos: tree_sitter::Point, + source: String, +} + +fn syntax_errors_from_tree<'a>( + root: tree_sitter::Node<'a>, + source: &'a str, +) -> impl Iterator + 'a { + TreeIterator::new(root) + .filter(|&node| node.is_error() || node.is_missing()) + .map(move |node| { + let start_pos = node.start_position(); + let end_pos = node.end_position(); + let text = &source[node.byte_range()]; + SyntaxError { + start_pos, + end_pos, + source: text.to_string(), + } + }) +} + +fn add_syntax_error_nodes(graph: &mut tree_sitter_graph::graph::Graph, errors: &[SyntaxError]) { + for error in errors { + let error_node = graph.add_graph_node(); + + // Add _kind attribute + graph[error_node] + .attributes + .add( + tree_sitter_graph::Identifier::from("_kind"), + tree_sitter_graph::graph::Value::String("SyntaxErrorNode".to_string()), + ) + .expect("Fresh node should not have duplicate attributes"); + + // Add _location attribute + let location = tree_sitter_graph::graph::Value::List( + vec![ + error.start_pos.row, + error.start_pos.column, + error.end_pos.row, + error.end_pos.column, + ] + .into_iter() + .map(|v| tree_sitter_graph::graph::Value::from(v as u32)) + .collect(), + ); + graph[error_node] + .attributes + .add(tree_sitter_graph::Identifier::from("_location"), location) + .expect("Fresh node should not have duplicate attributes"); + + // Add source attribute + graph[error_node] + .attributes + .add( + tree_sitter_graph::Identifier::from("source"), + tree_sitter_graph::graph::Value::String(error.source.clone()), + ) + .expect("Fresh node should not have duplicate attributes"); + } +} + fn main() -> Result<()> { let matches = Command::new("tsg-python") .version(BUILD_VERSION) @@ -581,10 +674,18 @@ fn main() -> Result<()> { ); let globals = Variables::new(); - let mut config = ExecutionConfig::new(&mut functions, &globals).lazy(false); - let graph = file - .execute(&tree, &source, &mut config, &NoCancellation) + let config = ExecutionConfig::new(&functions, &globals).lazy(false); + let mut graph = file + .execute(&tree, &source, &config, &NoCancellation) .with_context(|| format!("Could not execute TSG file {}", tsg_path))?; + + // Collect and add syntax error nodes to the graph + if tree.root_node().has_error() { + let syntax_errors: Vec = + syntax_errors_from_tree(tree.root_node(), &source).collect(); + add_syntax_error_nodes(&mut graph, &syntax_errors); + } + print!("{}", graph.pretty_print()); Ok(()) } From 9802ad77dc74201eb28d0af062acd1af024ad66c Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 2 Jul 2025 12:53:39 +0000 Subject: [PATCH 5/8] Python: Update `types_new.py` and test output --- .../extractor/tests/parser/types_new.expected | 34 ------------------- python/extractor/tests/parser/types_new.py | 2 +- 2 files changed, 1 insertion(+), 35 deletions(-) diff --git a/python/extractor/tests/parser/types_new.expected b/python/extractor/tests/parser/types_new.expected index ac470c08cf03..de4f44862800 100644 --- a/python/extractor/tests/parser/types_new.expected +++ b/python/extractor/tests/parser/types_new.expected @@ -308,40 +308,6 @@ Module: [1, 0] - [23, 0] ] ctx: Load ctx: Load - TypeAlias: [20, 0] - [20, 41] - name: - Name: [20, 5] - [20, 9] - variable: Variable('Baz2', None) - ctx: Store - type_parameters: [ - ParamSpec: [20, 10] - [20, 27] - name: - Name: [20, 12] - [20, 14] - variable: Variable('P2', None) - ctx: Store - default: - List: [20, 17] - [20, 27] - elts: [ - Name: [20, 18] - [20, 21] - variable: Variable('int', None) - ctx: Load - Name: [20, 23] - [20, 26] - variable: Variable('str', None) - ctx: Load - ] - ctx: Load - ] - value: - Subscript: [20, 31] - [20, 41] - value: - Name: [20, 31] - [20, 35] - variable: Variable('Spam', None) - ctx: Load - index: - Name: [20, 38] - [20, 40] - variable: Variable('P2', None) - ctx: Load - ctx: Load TypeAlias: [21, 0] - [21, 41] name: Name: [21, 5] - [21, 9] diff --git a/python/extractor/tests/parser/types_new.py b/python/extractor/tests/parser/types_new.py index 12e5eac0556c..9709d0f30449 100644 --- a/python/extractor/tests/parser/types_new.py +++ b/python/extractor/tests/parser/types_new.py @@ -17,6 +17,6 @@ class Qux1[*Ts1 = *tuple[int, bool]]: ... # TypeAliases type Foo2[T15, U1 = str] = Bar1[T15, U1] -type Baz2[**P2 = [int, str]] = Spam[**P2] +# type Baz2[**P2 = [int, str]] = Spam[**P2] # From the PEP, but this is not actually valid syntax! type Qux2[*Ts2 = *tuple[str]] = Ham[*Ts2] type Rab[U2, T15 = str] = Bar2[T15, U2] From bda522052b1d1fb3470ec7c01c5d13b2ccd5af71 Mon Sep 17 00:00:00 2001 From: Taus Date: Tue, 2 Sep 2025 12:51:36 +0000 Subject: [PATCH 6/8] Python: Update bazel dependencies --- .../3rdparty/py_deps/BUILD.streaming-iterator-0.1.9.bazel | 2 +- misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-0.24.7.bazel | 2 +- .../3rdparty/py_deps/BUILD.tree-sitter-language-0.1.5.bazel | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/misc/bazel/3rdparty/py_deps/BUILD.streaming-iterator-0.1.9.bazel b/misc/bazel/3rdparty/py_deps/BUILD.streaming-iterator-0.1.9.bazel index 94d02b1de3f7..2e3575479e84 100644 --- a/misc/bazel/3rdparty/py_deps/BUILD.streaming-iterator-0.1.9.bazel +++ b/misc/bazel/3rdparty/py_deps/BUILD.streaming-iterator-0.1.9.bazel @@ -35,7 +35,7 @@ rust_library( ], ), crate_root = "src/lib.rs", - edition = "2018", + edition = "2021", rustc_env_files = [ ":cargo_toml_env_vars", ], diff --git a/misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-0.24.7.bazel b/misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-0.24.7.bazel index 283db7e13449..312b70e23379 100644 --- a/misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-0.24.7.bazel +++ b/misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-0.24.7.bazel @@ -43,7 +43,7 @@ rust_library( "std", ], crate_root = "binding_rust/lib.rs", - edition = "2018", + edition = "2021", rustc_env_files = [ ":cargo_toml_env_vars", ], diff --git a/misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-language-0.1.5.bazel b/misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-language-0.1.5.bazel index c6155d86164f..409bc07da9f9 100644 --- a/misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-language-0.1.5.bazel +++ b/misc/bazel/3rdparty/py_deps/BUILD.tree-sitter-language-0.1.5.bazel @@ -34,8 +34,8 @@ rust_library( "WORKSPACE.bazel", ], ), - crate_root = "src/lib.rs", - edition = "2018", + crate_root = "language.rs", + edition = "2021", rustc_env_files = [ ":cargo_toml_env_vars", ], From 13a93c7e3236440bb0ef750a5673bb6fd6dd07b9 Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 3 Sep 2025 11:55:49 +0000 Subject: [PATCH 7/8] Python: Add suggestions from Copilot --- python/extractor/tsg-python/src/main.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/python/extractor/tsg-python/src/main.rs b/python/extractor/tsg-python/src/main.rs index a94d93ba2092..d71c93d7f2c5 100644 --- a/python/extractor/tsg-python/src/main.rs +++ b/python/extractor/tsg-python/src/main.rs @@ -498,8 +498,11 @@ impl<'a> Iterator for TreeIterator<'a> { fn next(&mut self) -> Option { if let Some(node) = self.nodes_to_visit.pop() { // Add all children to the queue for processing - self.nodes_to_visit - .extend((0..node.child_count()).rev().filter_map(|i| node.child(i))); + let children: Vec<_> = (0..node.child_count()) + .rev() + .filter_map(|i| node.child(i)) + .collect(); + self.nodes_to_visit.extend(children); Some(node) } else { None @@ -523,7 +526,7 @@ fn syntax_errors_from_tree<'a>( .map(move |node| { let start_pos = node.start_position(); let end_pos = node.end_position(); - let text = &source[node.byte_range()]; + let text = &source.get(node.byte_range()).unwrap_or(""); SyntaxError { start_pos, end_pos, From f6732a927b22b499826319fe349e0a7846fba33f Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 3 Sep 2025 11:56:54 +0000 Subject: [PATCH 8/8] Python: Bump extractor version --- python/extractor/semmle/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/extractor/semmle/util.py b/python/extractor/semmle/util.py index 56f7889ae231..8196f76e4375 100644 --- a/python/extractor/semmle/util.py +++ b/python/extractor/semmle/util.py @@ -10,7 +10,7 @@ #Semantic version of extractor. #Update this if any changes are made -VERSION = "7.1.3" +VERSION = "7.1.4" PY_EXTENSIONS = ".py", ".pyw"