Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,5 @@ venv.bak/

# IDEs
.idea/
.vscode/
.vscode/target/
Cargo.lock
2 changes: 1 addition & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ async fn scan(req: web::Json<ScanRequest>) -> impl Responder {
path.clone().unwrap()
};

let result = Python::with_gil(|py| -> Result<String, String> {
let result = Python::attach(|py| -> Result<String, String> {
// Import the required modules
let pyspector_cli = py.import("pyspector.cli").map_err(|e| {
format!("Failed to import pyspector.cli: {}. Is PySpector installed?", e)
Expand Down
24 changes: 15 additions & 9 deletions src/pyspector/_rust_core/src/analysis/ast_analysis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,25 @@ use crate::rules::{RuleSet, Rule, Defaults};

// Main entry point for AST scanning
pub fn scan_ast(ast: &AstNode, file_path: &str, content: &str, ruleset: &RuleSet) -> Vec<Issue> {
let mut issues = Vec::new();
// Pre-filter applicable rules ONCE per file — not per AST node.
// This is critical for performance: file_content_exclude runs a regex against
// the full file content. Calling it inside walk_ast meant it ran O(nodes × rules)
// times — 5M+ times for large files. Pre-filtering reduces this to O(rules) = ~100.
let ast_rules: Vec<&Rule> = ruleset.rules.iter()
.filter(|r| r.ast_match.is_some())
.filter(|r| !r.is_excluded(file_path, content, &ruleset.defaults))
.collect();

if ast_rules.is_empty() { return issues; }
if ast_rules.is_empty() { return Vec::new(); }

walk_ast(ast, file_path, content, &ast_rules, &ruleset.defaults, &mut issues);
let mut issues = Vec::new();
walk_ast(ast, file_path, content, &ast_rules, &mut issues);
issues
}

// Recursively walks the AST, checking each node against the rules
fn walk_ast(node: &AstNode, file_path: &str, content: &str, rules: &[&Rule], defaults: &Defaults, issues: &mut Vec<Issue>) {
// Recursively walks the AST, checking each node against pre-filtered rules.
// Rules are already filtered for this file — no exclusion checks needed here.
fn walk_ast(node: &AstNode, file_path: &str, content: &str, rules: &[&Rule], issues: &mut Vec<Issue>) {
for rule in rules.iter() {
// Respect global defaults + rule-level exclude_file_pattern
if rule.is_file_excluded(file_path, defaults) {
Expand All @@ -27,7 +33,7 @@ fn walk_ast(node: &AstNode, file_path: &str, content: &str, rules: &[&Rule], def
if check_node_match(node, match_pattern) {
let line_content = content.lines().nth(node.lineno.saturating_sub(1) as usize).unwrap_or("").to_string();

// Respect exclude_pattern on the matched line
// Respect line-level exclude_pattern on the matched line
if let Some(exclude) = &rule.exclude_pattern {
if exclude.is_match(&line_content) {
continue;
Expand Down Expand Up @@ -77,7 +83,7 @@ fn check_node_match(node: &AstNode, match_pattern: &str) -> bool {
}
}
}

true
}

Expand Down Expand Up @@ -112,6 +118,6 @@ fn node_has_property(node: &AstNode, path: &[&str], expected_value: &str) -> boo
}
}
}

false
}
}
4 changes: 2 additions & 2 deletions src/pyspector/_rust_core/src/analysis/config_analysis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ pub fn scan_file(file_path: &str, content: &str, ruleset: &RuleSet) -> Vec<Issue
}
}

// Respect global defaults + rule-level exclude_file_pattern
if rule.is_file_excluded(file_path, &ruleset.defaults) {
// Respect global defaults + rule-level file exclusions (path + content)
if rule.is_excluded(file_path, content, &ruleset.defaults) {
continue;
}

Expand Down
26 changes: 13 additions & 13 deletions src/pyspector/_rust_core/src/analysis/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,46 +55,46 @@ pub fn run_analysis(mut context: AnalysisContext) -> Vec<Issue> {
}
}

println!("[+] Found {} files to scan", files_to_scan.len());

println!("[+] Found {} files to scan ({} non-Python)", files_to_scan.len(),
files_to_scan.iter().filter(|f| !f.ends_with(".py")).count());

// Scan all files with regex patterns
let t_config = std::time::Instant::now();
let mut issues: Vec<Issue> = files_to_scan
.par_iter()
.flat_map(|file_path| {
if let Ok(content) = fs::read_to_string(file_path) {
config_analysis::scan_file(file_path, &content, &context.ruleset)
} else {
Vec::new()
} else {
Vec::new()
}
})
.collect();

println!("[+] Found {} issues from config analysis", issues.len());
println!("[*] Pattern/config scan: {:.2}s → {} issues", t_config.elapsed().as_secs_f64(), issues.len());

// Process Python files with AST analysis
let t_ast = std::time::Instant::now();
let python_issues: Vec<Issue> = context.py_files
.par_iter()
.flat_map(|py_file| {
let mut findings = Vec::new();
if is_excluded(Path::new(&py_file.file_path), &enhanced_exclusions) {
return findings;
if is_excluded(Path::new(&py_file.file_path), &enhanced_exclusions) {
return findings;
}

// Skip regex scan for Python files (already done above)

if let Some(ast) = &py_file.ast {
let ast_findings = ast_analysis::scan_ast(ast, &py_file.file_path, &py_file.content, &context.ruleset);
findings.extend(ast_findings);
}
findings
})
.collect();

println!("[+] {} issues from Python AST analysis", python_issues.len());
println!("[*] AST analysis: {:.2}s → {} issues", t_ast.elapsed().as_secs_f64(), python_issues.len());
issues.extend(python_issues);

// Build the call graph and run taint analysis
let t_callgraph = std::time::Instant::now();
let call_graph = call_graph_builder::build_call_graph(context.py_files);
println!("[*] Call graph build: {:.2}s", t_callgraph.elapsed().as_secs_f64());
let taint_issues = taint_analysis::analyze_program_for_taint(&call_graph, &context.ruleset);
println!("[+] Found {} issues from taint analysis", taint_issues.len());
issues.extend(taint_issues);
Expand Down
Loading
Loading