diff --git a/crates/codegraph-core/src/cfg.rs b/crates/codegraph-core/src/cfg.rs index 0ae0503a..cc24a842 100644 --- a/crates/codegraph-core/src/cfg.rs +++ b/crates/codegraph-core/src/cfg.rs @@ -1,4 +1,5 @@ use tree_sitter::Node; +use crate::constants::MAX_WALK_DEPTH; use crate::types::{CfgBlock, CfgData, CfgEdge}; // ─── CFG Rules ────────────────────────────────────────────────────────── @@ -617,6 +618,17 @@ impl<'a> CfgBuilder<'a> { /// Process if/else-if/else chain (handles patterns A, B, C). fn process_if(&mut self, if_stmt: &Node, current: u32) -> Option { + self.process_if_depth(if_stmt, current, 0) + } + + fn process_if_depth(&mut self, if_stmt: &Node, current: u32, depth: usize) -> Option { + if depth >= MAX_WALK_DEPTH { + // Depth limit reached: return `current` so the caller can still + // wire up a fallthrough edge to its join block. The else-if chain + // will be silently truncated — the resulting CFG is structurally + // valid but incomplete for very deeply nested if-else ladders. + return Some(current); + } self.set_end_line(current, node_line(if_stmt)); let cond_block = self.make_block("condition", Some(node_line(if_stmt)), Some(node_line(if_stmt)), Some("if")); @@ -653,7 +665,7 @@ impl<'a> CfgBuilder<'a> { if matches_opt(alt_kind, self.rules.if_node) || matches_slice(alt_kind, self.rules.if_nodes) { let false_block = self.make_block("branch_false", None, None, Some("else-if")); self.add_edge(cond_block, false_block, "branch_false"); - let else_if_end = self.process_if(&alternative, false_block); + let else_if_end = self.process_if_depth(&alternative, false_block, depth + 1); if let Some(eie) = else_if_end { self.add_edge(eie, join_block, "fallthrough"); } @@ -679,7 +691,7 @@ impl<'a> CfgBuilder<'a> { // else-if: recurse let false_block = self.make_block("branch_false", None, None, Some("else-if")); self.add_edge(cond_block, false_block, "branch_false"); - let else_if_end = self.process_if(&else_children[0], false_block); + let else_if_end = self.process_if_depth(&else_children[0], false_block, depth + 1); if let Some(eie) = else_if_end { self.add_edge(eie, join_block, "fallthrough"); } diff --git a/crates/codegraph-core/src/complexity.rs b/crates/codegraph-core/src/complexity.rs index 93458e0e..9b8f4f49 100644 --- a/crates/codegraph-core/src/complexity.rs +++ b/crates/codegraph-core/src/complexity.rs @@ -1,5 +1,6 @@ use tree_sitter::Node; +use crate::constants::MAX_WALK_DEPTH; use crate::types::ComplexityMetrics; // ─── Language-Configurable Complexity Rules ─────────────────────────────── @@ -373,6 +374,7 @@ pub fn compute_function_complexity( &mut cognitive, &mut cyclomatic, &mut max_nesting, + 0, ); ComplexityMetrics::basic(cognitive, cyclomatic, max_nesting) @@ -386,7 +388,11 @@ fn walk_children( cognitive: &mut u32, cyclomatic: &mut u32, max_nesting: &mut u32, + depth: usize, ) { + if depth >= MAX_WALK_DEPTH { + return; + } for i in 0..node.child_count() { if let Some(child) = node.child(i) { walk( @@ -397,6 +403,7 @@ fn walk_children( cognitive, cyclomatic, max_nesting, + depth + 1, ); } } @@ -410,7 +417,11 @@ fn walk( cognitive: &mut u32, cyclomatic: &mut u32, max_nesting: &mut u32, + depth: usize, ) { + if depth >= MAX_WALK_DEPTH { + return; + } let kind = node.kind(); // Track nesting depth @@ -450,6 +461,7 @@ fn walk( cognitive, cyclomatic, max_nesting, + depth, ); return; } @@ -481,6 +493,7 @@ fn walk( cognitive, cyclomatic, max_nesting, + depth, ); return; } @@ -494,6 +507,7 @@ fn walk( cognitive, cyclomatic, max_nesting, + depth, ); return; } @@ -512,6 +526,7 @@ fn walk( cognitive, cyclomatic, max_nesting, + depth, ); return; } @@ -558,6 +573,7 @@ fn walk( cognitive, cyclomatic, max_nesting, + depth, ); return; } @@ -580,6 +596,7 @@ fn walk( cognitive, cyclomatic, max_nesting, + depth, ); return; } @@ -604,6 +621,7 @@ fn walk( cognitive, cyclomatic, max_nesting, + depth, ); return; } @@ -628,6 +646,7 @@ fn walk( cognitive, cyclomatic, max_nesting, + depth, ); return; } @@ -641,6 +660,7 @@ fn walk( cognitive, cyclomatic, max_nesting, + depth, ); } diff --git a/crates/codegraph-core/src/constants.rs b/crates/codegraph-core/src/constants.rs new file mode 100644 index 00000000..d1156147 --- /dev/null +++ b/crates/codegraph-core/src/constants.rs @@ -0,0 +1,3 @@ +/// Maximum recursion depth for AST traversal to prevent stack overflow +/// on deeply nested trees. Used by extractors, complexity, CFG, and dataflow. +pub const MAX_WALK_DEPTH: usize = 200; diff --git a/crates/codegraph-core/src/dataflow.rs b/crates/codegraph-core/src/dataflow.rs index 82c30220..35bff96a 100644 --- a/crates/codegraph-core/src/dataflow.rs +++ b/crates/codegraph-core/src/dataflow.rs @@ -1,15 +1,12 @@ use std::collections::HashMap; use tree_sitter::{Node, Tree}; +use crate::constants::MAX_WALK_DEPTH; use crate::types::{ DataflowArgFlow, DataflowAssignment, DataflowMutation, DataflowParam, DataflowResult, DataflowReturn, }; -/// Maximum recursion depth for AST traversal to prevent stack overflow -/// on deeply nested trees. Matches the approach used in cfg.rs. -const MAX_VISIT_DEPTH: usize = 200; - // ─── Param Strategy ────────────────────────────────────────────────────── /// Per-language parameter extraction strategy. @@ -852,7 +849,7 @@ fn member_receiver(member_expr: &Node, rules: &DataflowRules, source: &[u8]) -> /// Collect all identifier names referenced within a node. fn collect_identifiers(node: &Node, out: &mut Vec, rules: &DataflowRules, source: &[u8], depth: usize) { - if depth >= MAX_VISIT_DEPTH { + if depth >= MAX_WALK_DEPTH { return; } if is_ident(rules, node.kind()) { @@ -964,7 +961,7 @@ fn visit( mutations: &mut Vec, depth: usize, ) { - if depth >= MAX_VISIT_DEPTH { + if depth >= MAX_WALK_DEPTH { return; } diff --git a/crates/codegraph-core/src/extractors/csharp.rs b/crates/codegraph-core/src/extractors/csharp.rs index 77c14cb7..8ef0cc16 100644 --- a/crates/codegraph-core/src/extractors/csharp.rs +++ b/crates/codegraph-core/src/extractors/csharp.rs @@ -34,6 +34,13 @@ fn find_csharp_parent_type<'a>(node: &Node<'a>, source: &[u8]) -> Option } fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + walk_node_depth(node, source, symbols, 0); +} + +fn walk_node_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: usize) { + if depth >= MAX_WALK_DEPTH { + return; + } match node.kind() { "class_declaration" => { if let Some(name_node) = node.child_by_field_name("name") { @@ -293,7 +300,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { for i in 0..node.child_count() { if let Some(child) = node.child(i) { - walk_node(&child, source, symbols); + walk_node_depth(&child, source, symbols, depth + 1); } } } diff --git a/crates/codegraph-core/src/extractors/go.rs b/crates/codegraph-core/src/extractors/go.rs index 19a0d31e..b823935c 100644 --- a/crates/codegraph-core/src/extractors/go.rs +++ b/crates/codegraph-core/src/extractors/go.rs @@ -17,6 +17,13 @@ impl SymbolExtractor for GoExtractor { } fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + walk_node_depth(node, source, symbols, 0); +} + +fn walk_node_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: usize) { + if depth >= MAX_WALK_DEPTH { + return; + } match node.kind() { "function_declaration" => { if let Some(name_node) = node.child_by_field_name("name") { @@ -228,7 +235,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { for i in 0..node.child_count() { if let Some(child) = node.child(i) { - walk_node(&child, source, symbols); + walk_node_depth(&child, source, symbols, depth + 1); } } } diff --git a/crates/codegraph-core/src/extractors/hcl.rs b/crates/codegraph-core/src/extractors/hcl.rs index 349bc827..3ff48559 100644 --- a/crates/codegraph-core/src/extractors/hcl.rs +++ b/crates/codegraph-core/src/extractors/hcl.rs @@ -14,6 +14,13 @@ impl SymbolExtractor for HclExtractor { } fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + walk_node_depth(node, source, symbols, 0); +} + +fn walk_node_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: usize) { + if depth >= MAX_WALK_DEPTH { + return; + } if node.kind() == "block" { let mut identifiers = Vec::new(); let mut strings = Vec::new(); @@ -111,7 +118,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { for i in 0..node.child_count() { if let Some(child) = node.child(i) { - walk_node(&child, source, symbols); + walk_node_depth(&child, source, symbols, depth + 1); } } } diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs index 7dd10394..a9ac2524 100644 --- a/crates/codegraph-core/src/extractors/helpers.rs +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -1,6 +1,9 @@ use tree_sitter::Node; use crate::types::{AstNode, Definition}; +// Re-export so extractors that `use super::helpers::*` still see it. +pub use crate::constants::MAX_WALK_DEPTH; + /// Get the text of a node from the source bytes. pub fn node_text<'a>(node: &Node, source: &'a [u8]) -> &'a str { node.utf8_text(source).unwrap_or("") @@ -211,6 +214,19 @@ pub fn walk_ast_nodes_with_config( ast_nodes: &mut Vec, config: &LangAstConfig, ) { + walk_ast_nodes_with_config_depth(node, source, ast_nodes, config, 0); +} + +fn walk_ast_nodes_with_config_depth( + node: &Node, + source: &[u8], + ast_nodes: &mut Vec, + config: &LangAstConfig, + depth: usize, +) { + if depth >= MAX_WALK_DEPTH { + return; + } let kind = node.kind(); if config.new_types.contains(&kind) { @@ -276,7 +292,7 @@ pub fn walk_ast_nodes_with_config( if content.chars().count() < 2 { for i in 0..node.child_count() { if let Some(child) = node.child(i) { - walk_ast_nodes_with_config(&child, source, ast_nodes, config); + walk_ast_nodes_with_config_depth(&child, source, ast_nodes, config, depth + 1); } } return; @@ -307,7 +323,7 @@ pub fn walk_ast_nodes_with_config( for i in 0..node.child_count() { if let Some(child) = node.child(i) { - walk_ast_nodes_with_config(&child, source, ast_nodes, config); + walk_ast_nodes_with_config_depth(&child, source, ast_nodes, config, depth + 1); } } } diff --git a/crates/codegraph-core/src/extractors/java.rs b/crates/codegraph-core/src/extractors/java.rs index 6b6f784a..2fefde95 100644 --- a/crates/codegraph-core/src/extractors/java.rs +++ b/crates/codegraph-core/src/extractors/java.rs @@ -33,6 +33,13 @@ fn find_java_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option } fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + walk_node_depth(node, source, symbols, 0); +} + +fn walk_node_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: usize) { + if depth >= MAX_WALK_DEPTH { + return; + } match node.kind() { "class_declaration" => { if let Some(name_node) = node.child_by_field_name("name") { @@ -252,7 +259,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { for i in 0..node.child_count() { if let Some(child) = node.child(i) { - walk_node(&child, source, symbols); + walk_node_depth(&child, source, symbols, depth + 1); } } } diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index 50da1ca9..51e94a40 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -17,6 +17,13 @@ impl SymbolExtractor for JsExtractor { } fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + walk_node_depth(node, source, symbols, 0); +} + +fn walk_node_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: usize) { + if depth >= MAX_WALK_DEPTH { + return; + } match node.kind() { "function_declaration" => { if let Some(name_node) = node.child_by_field_name("name") { @@ -397,7 +404,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { for i in 0..node.child_count() { if let Some(child) = node.child(i) { - walk_node(&child, source, symbols); + walk_node_depth(&child, source, symbols, depth + 1); } } } @@ -409,6 +416,13 @@ const TEXT_MAX: usize = 200; /// Walk the tree collecting new/throw/await/string/regex AST nodes. /// Mirrors `walkAst()` in `ast.js:216-276`. fn walk_ast_nodes(node: &Node, source: &[u8], ast_nodes: &mut Vec) { + walk_ast_nodes_depth(node, source, ast_nodes, 0); +} + +fn walk_ast_nodes_depth(node: &Node, source: &[u8], ast_nodes: &mut Vec, depth: usize) { + if depth >= MAX_WALK_DEPTH { + return; + } match node.kind() { "new_expression" => { let name = extract_new_name(node, source); @@ -459,7 +473,7 @@ fn walk_ast_nodes(node: &Node, source: &[u8], ast_nodes: &mut Vec) { // Still recurse children (template_string may have nested expressions) for i in 0..node.child_count() { if let Some(child) = node.child(i) { - walk_ast_nodes(&child, source, ast_nodes); + walk_ast_nodes_depth(&child, source, ast_nodes, depth + 1); } } return; @@ -493,7 +507,7 @@ fn walk_ast_nodes(node: &Node, source: &[u8], ast_nodes: &mut Vec) { for i in 0..node.child_count() { if let Some(child) = node.child(i) { - walk_ast_nodes(&child, source, ast_nodes); + walk_ast_nodes_depth(&child, source, ast_nodes, depth + 1); } } } @@ -763,13 +777,20 @@ fn extract_implements(heritage: &Node, source: &[u8]) -> Vec { } fn extract_implements_from_node(node: &Node, source: &[u8], result: &mut Vec) { + extract_implements_depth(node, source, result, 0); +} + +fn extract_implements_depth(node: &Node, source: &[u8], result: &mut Vec, depth: usize) { + if depth >= MAX_WALK_DEPTH { + return; + } for i in 0..node.child_count() { if let Some(child) = node.child(i) { if child.kind() == "identifier" || child.kind() == "type_identifier" { result.push(node_text(&child, source).to_string()); } if child.child_count() > 0 { - extract_implements_from_node(&child, source, result); + extract_implements_depth(&child, source, result, depth + 1); } } } @@ -1113,6 +1134,13 @@ fn extract_import_names(node: &Node, source: &[u8]) -> Vec { } fn scan_import_names(node: &Node, source: &[u8], names: &mut Vec) { + scan_import_names_depth(node, source, names, 0); +} + +fn scan_import_names_depth(node: &Node, source: &[u8], names: &mut Vec, depth: usize) { + if depth >= MAX_WALK_DEPTH { + return; + } match node.kind() { "import_specifier" | "export_specifier" => { let name_node = node @@ -1138,7 +1166,7 @@ fn scan_import_names(node: &Node, source: &[u8], names: &mut Vec) { } for i in 0..node.child_count() { if let Some(child) = node.child(i) { - scan_import_names(&child, source, names); + scan_import_names_depth(&child, source, names, depth + 1); } } } diff --git a/crates/codegraph-core/src/extractors/php.rs b/crates/codegraph-core/src/extractors/php.rs index bd31619b..432f3c3e 100644 --- a/crates/codegraph-core/src/extractors/php.rs +++ b/crates/codegraph-core/src/extractors/php.rs @@ -33,6 +33,13 @@ fn find_php_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option { } fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + walk_node_depth(node, source, symbols, 0); +} + +fn walk_node_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: usize) { + if depth >= MAX_WALK_DEPTH { + return; + } match node.kind() { "function_definition" => { if let Some(name_node) = node.child_by_field_name("name") { @@ -306,7 +313,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { for i in 0..node.child_count() { if let Some(child) = node.child(i) { - walk_node(&child, source, symbols); + walk_node_depth(&child, source, symbols, depth + 1); } } } diff --git a/crates/codegraph-core/src/extractors/python.rs b/crates/codegraph-core/src/extractors/python.rs index 70406c67..01f3df7b 100644 --- a/crates/codegraph-core/src/extractors/python.rs +++ b/crates/codegraph-core/src/extractors/python.rs @@ -17,6 +17,13 @@ impl SymbolExtractor for PythonExtractor { } fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + walk_node_depth(node, source, symbols, 0); +} + +fn walk_node_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: usize) { + if depth >= MAX_WALK_DEPTH { + return; + } match node.kind() { "function_definition" => { if let Some(name_node) = node.child_by_field_name("name") { @@ -88,7 +95,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { // Walk children directly to handle decorated functions/classes for i in 0..node.child_count() { if let Some(child) = node.child(i) { - walk_node(&child, source, symbols); + walk_node_depth(&child, source, symbols, depth + 1); } } return; @@ -212,7 +219,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { for i in 0..node.child_count() { if let Some(child) = node.child(i) { - walk_node(&child, source, symbols); + walk_node_depth(&child, source, symbols, depth + 1); } } } diff --git a/crates/codegraph-core/src/extractors/ruby.rs b/crates/codegraph-core/src/extractors/ruby.rs index b09ad93d..4e592815 100644 --- a/crates/codegraph-core/src/extractors/ruby.rs +++ b/crates/codegraph-core/src/extractors/ruby.rs @@ -33,6 +33,13 @@ fn find_ruby_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option } fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + walk_node_depth(node, source, symbols, 0); +} + +fn walk_node_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: usize) { + if depth >= MAX_WALK_DEPTH { + return; + } match node.kind() { "class" => { if let Some(name_node) = node.child_by_field_name("name") { @@ -176,7 +183,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { for i in 0..node.child_count() { if let Some(child) = node.child(i) { - walk_node(&child, source, symbols); + walk_node_depth(&child, source, symbols, depth + 1); } } } diff --git a/crates/codegraph-core/src/extractors/rust_lang.rs b/crates/codegraph-core/src/extractors/rust_lang.rs index edbf88e9..1a1e2d25 100644 --- a/crates/codegraph-core/src/extractors/rust_lang.rs +++ b/crates/codegraph-core/src/extractors/rust_lang.rs @@ -30,6 +30,13 @@ fn find_current_impl<'a>(node: &Node<'a>, source: &[u8]) -> Option { } fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + walk_node_depth(node, source, symbols, 0); +} + +fn walk_node_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth: usize) { + if depth >= MAX_WALK_DEPTH { + return; + } match node.kind() { "function_item" => { if let Some(name_node) = node.child_by_field_name("name") { @@ -222,7 +229,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { for i in 0..node.child_count() { if let Some(child) = node.child(i) { - walk_node(&child, source, symbols); + walk_node_depth(&child, source, symbols, depth + 1); } } } diff --git a/crates/codegraph-core/src/lib.rs b/crates/codegraph-core/src/lib.rs index c3cb6f54..6d3aa6d0 100644 --- a/crates/codegraph-core/src/lib.rs +++ b/crates/codegraph-core/src/lib.rs @@ -1,5 +1,6 @@ pub mod cfg; pub mod complexity; +pub mod constants; pub mod cycles; pub mod dataflow; pub mod edge_builder;