diff --git a/.cargo/config.toml b/.cargo/config.toml index e0e3f5b..4036f31 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -4,5 +4,9 @@ test_details = ["test", "--target", "aarch64-apple-darwin"] [build] target = "wasm32-wasip1" +[env] +CC_wasm32_wasip1 = { value = "/Users/christian/.wasi-sdk/bin/clang", force = true } +CFLAGS_wasm32_wasip1 = { value = "--sysroot=/Users/christian/.wasi-sdk/share/wasi-sysroot", force = true } + [target.'cfg(all(target_arch = "wasm32"))'] runner = "viceroy run -C ../../fastly.toml -- " diff --git a/Cargo.lock b/Cargo.lock index 1a2227d..cb44e36 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1989,6 +1989,7 @@ version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ + "indexmap", "itoa", "memchr", "ryu", @@ -2123,6 +2124,12 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + [[package]] name = "strsim" version = "0.11.1" @@ -2352,6 +2359,36 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df8b2b54733674ad286d16267dcfc7a71ed5c776e4ac7aa3c3e2561f7c637bf2" +[[package]] +name = "tree-sitter" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2135256a14d38ef69702cf6afdb6a36805d8914523de06eb4e2756cee08736f2" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "serde_json", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68204f2abc0627a90bdf06e605f5c470aa26fdcb2081ea553a04bdad756693f5" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ae62f7eae5eb549c71b76658648b72cc6111f2d87d24a1e31fa907f4943e3ce" + [[package]] name = "trusted-server-common" version = "0.1.0" @@ -2390,6 +2427,8 @@ dependencies = [ "tokio", "tokio-test", "toml", + "tree-sitter", + "tree-sitter-javascript", "trusted-server-js", "url", "urlencoding", diff --git a/Cargo.toml b/Cargo.toml index 9545c84..566d46c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,3 +56,6 @@ urlencoding = "2.1" uuid = { version = "1.18", features = ["v4"] } validator = { version = "0.20", features = ["derive"] } which = "8" +tree-sitter = { version = "0.26.2" } +tree-sitter-javascript = { version = "0.25.0" } + diff --git a/README.md b/README.md index 49f1dc8..bd5457c 100644 --- a/README.md +++ b/README.md @@ -112,6 +112,26 @@ See https://asdf-vm.com/guide/getting-started.html#_2-configure-asdf git clone git@github.com:IABTechLab/trusted-server.git ``` +### WASI SDK (Required for Tree-sitter) + +This project uses tree-sitter for JavaScript parsing, which requires the WASI SDK to compile C code for WebAssembly. + +#### Download and Install WASI SDK + +```sh +cd /tmp +curl -LO https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-24/wasi-sdk-24.0-arm64-macos.tar.gz +tar -xzf wasi-sdk-24.0-arm64-macos.tar.gz +mv wasi-sdk-24.0-arm64-macos ~/.wasi-sdk +``` + +:warning: For Linux or x86_64 macOS, download the appropriate release from [WASI SDK releases](https://github.com/WebAssembly/wasi-sdk/releases/tag/wasi-sdk-24): +- Linux x86_64: `wasi-sdk-24.0-x86_64-linux.tar.gz` +- Linux arm64: `wasi-sdk-24.0-arm64-linux.tar.gz` +- macOS x86_64: `wasi-sdk-24.0-x86_64-macos.tar.gz` + +The WASI SDK paths are already configured in `.cargo/config.toml` and will be used automatically during build. + ### Configure #### Edit configuration files diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index bbb8a2a..d1537ab 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -45,6 +45,8 @@ uuid = { workspace = true } validator = { workspace = true } ed25519-dalek = { workspace = true } once_cell = { workspace = true } +tree-sitter = { workspace = true } +tree-sitter-javascript = { workspace = true } [build-dependencies] config = { workspace = true } diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs index fc0b888..7e5ba7b 100644 --- a/crates/common/src/lib.rs +++ b/crates/common/src/lib.rs @@ -43,4 +43,5 @@ pub mod streaming_processor; pub mod streaming_replacer; pub mod synthetic; pub mod test_support; +pub mod tree_sitter_test; pub mod tsjs; diff --git a/crates/common/src/tree_sitter_test.rs b/crates/common/src/tree_sitter_test.rs new file mode 100644 index 0000000..6d72e2a --- /dev/null +++ b/crates/common/src/tree_sitter_test.rs @@ -0,0 +1,108 @@ +use tree_sitter::{Parser, Tree}; + +/// Initialize a tree-sitter parser with JavaScript language support +pub fn create_js_parser() -> Parser { + let mut parser = Parser::new(); + let language = tree_sitter_javascript::LANGUAGE.into(); + parser + .set_language(&language) + .expect("Failed to set JavaScript language"); + parser +} + +/// Parse JavaScript source code and return the syntax tree +pub fn parse_js(source: &str) -> Option { + let mut parser = create_js_parser(); + parser.parse(source, None) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parser_creation() { + let parser = create_js_parser(); + // Parser should be created successfully with JavaScript language + assert!(parser.language().is_some()); + } + + #[test] + fn test_parse_simple_function() { + let source = "function add(a, b) { return a + b; }"; + let tree = parse_js(source).expect("Failed to parse JavaScript"); + + let root_node = tree.root_node(); + assert_eq!(root_node.kind(), "program"); + assert_eq!(root_node.child_count(), 1); + + // First child should be a function declaration + let function_node = root_node.child(0).expect("Should have a child"); + assert_eq!(function_node.kind(), "function_declaration"); + } + + #[test] + fn test_parse_variable_declaration() { + let source = "const x = 42;"; + let tree = parse_js(source).expect("Failed to parse JavaScript"); + + let root_node = tree.root_node(); + assert_eq!(root_node.kind(), "program"); + + // First child should be a lexical declaration + let declaration = root_node.child(0).expect("Should have a child"); + assert_eq!(declaration.kind(), "lexical_declaration"); + } + + #[test] + fn test_parse_complex_code() { + let source = r#" + class Calculator { + constructor() { + this.result = 0; + } + + add(x, y) { + return x + y; + } + } + + const calc = new Calculator(); + console.log(calc.add(5, 3)); + "#; + + let tree = parse_js(source).expect("Failed to parse JavaScript"); + let root_node = tree.root_node(); + + assert_eq!(root_node.kind(), "program"); + // Should have at least 3 children: class, const declaration, expression statement + assert!(root_node.child_count() >= 3); + + // Verify the class declaration + let class_node = root_node.child(0).expect("Should have first child"); + assert_eq!(class_node.kind(), "class_declaration"); + } + + #[test] + fn test_parse_arrow_function() { + let source = "const multiply = (a, b) => a * b;"; + let tree = parse_js(source).expect("Failed to parse JavaScript"); + + let root_node = tree.root_node(); + assert_eq!(root_node.kind(), "program"); + + let declaration = root_node.child(0).expect("Should have a child"); + assert_eq!(declaration.kind(), "lexical_declaration"); + } + + #[test] + fn test_parse_with_syntax_error() { + // This should still produce a tree, but with error nodes + let source = "function broken( { return x; }"; + let tree = parse_js(source); + + assert!(tree.is_some()); + let tree = tree.unwrap(); + assert!(tree.root_node().has_error()); + } +}