diff --git a/.gitignore b/.gitignore
index 204739ff..0cf8023a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -60,4 +60,5 @@ venv.bak/
 
 # IDEs
 .idea/
-.vscode/
\ No newline at end of file
+.vscode/target/
+Cargo.lock
diff --git a/src/main.rs b/src/main.rs
index 3d0399b4..3e22feb3 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -57,7 +57,7 @@ async fn scan(req: web::Json<ScanRequest>) -> impl Responder {
             path.clone().unwrap()
         };
 
-        let result = Python::with_gil(|py| -> Result<String, String> {
+        let result = Python::attach(|py| -> Result<String, String> {
             // Import the required modules
             let pyspector_cli = py.import("pyspector.cli").map_err(|e| {
                 format!("Failed to import pyspector.cli: {}. Is PySpector installed?", e)
diff --git a/src/pyspector/_rust_core/src/analysis/ast_analysis.rs b/src/pyspector/_rust_core/src/analysis/ast_analysis.rs
index 715dd5c9..a5d08932 100644
--- a/src/pyspector/_rust_core/src/analysis/ast_analysis.rs
+++ b/src/pyspector/_rust_core/src/analysis/ast_analysis.rs
@@ -4,19 +4,25 @@ use crate::rules::{RuleSet, Rule, Defaults};
 
 // Main entry point for AST scanning
 pub fn scan_ast(ast: &AstNode, file_path: &str, content: &str, ruleset: &RuleSet) -> Vec<Issue> {
-    let mut issues = Vec::new();
+    // Pre-filter applicable rules ONCE per file — not per AST node.
+    // This is critical for performance: file_content_exclude runs a regex against
+    // the full file content. Calling it inside walk_ast meant it ran O(nodes × rules)
+    // times — 5M+ times for large files. Pre-filtering reduces this to O(rules) = ~100.
     let ast_rules: Vec<&Rule> = ruleset.rules.iter()
         .filter(|r| r.ast_match.is_some())
+        .filter(|r| !r.is_excluded(file_path, content, &ruleset.defaults))
         .collect();
 
-    if ast_rules.is_empty() { return issues; }
+    if ast_rules.is_empty() { return Vec::new(); }
 
-    walk_ast(ast, file_path, content, &ast_rules, &ruleset.defaults, &mut issues);
+    let mut issues = Vec::new();
+    walk_ast(ast, file_path, content, &ast_rules, &mut issues);
     issues
 }
 
-// Recursively walks the AST, checking each node against the rules
-fn walk_ast(node: &AstNode, file_path: &str, content: &str, rules: &[&Rule], defaults: &Defaults, issues: &mut Vec<Issue>) {
+// Recursively walks the AST, checking each node against pre-filtered rules.
+// Rules are already filtered for this file — no exclusion checks needed here.
+fn walk_ast(node: &AstNode, file_path: &str, content: &str, rules: &[&Rule], issues: &mut Vec<Issue>) {
     for rule in rules.iter() {
         // Respect global defaults + rule-level exclude_file_pattern
         if rule.is_file_excluded(file_path, defaults) {
@@ -27,7 +33,7 @@ fn walk_ast(node: &AstNode, file_path: &str, content: &str, rules: &[&Rule], def
             if check_node_match(node, match_pattern) {
                 let line_content = content.lines().nth(node.lineno.saturating_sub(1) as usize).unwrap_or("").to_string();
 
-                // Respect exclude_pattern on the matched line
+                // Respect line-level exclude_pattern on the matched line
                 if let Some(exclude) = &rule.exclude_pattern {
                     if exclude.is_match(&line_content) {
                         continue;
@@ -77,7 +83,7 @@ fn check_node_match(node: &AstNode, match_pattern: &str) -> bool {
             }
         }
     }
-    
+
     true
 }
 
@@ -112,6 +118,6 @@ fn node_has_property(node: &AstNode, path: &[&str], expected_value: &str) -> boo
             }
         }
     }
-    
+
     false
-}
\ No newline at end of file
+}
diff --git a/src/pyspector/_rust_core/src/analysis/config_analysis.rs b/src/pyspector/_rust_core/src/analysis/config_analysis.rs
index a512afc5..b8a814b2 100644
--- a/src/pyspector/_rust_core/src/analysis/config_analysis.rs
+++ b/src/pyspector/_rust_core/src/analysis/config_analysis.rs
@@ -18,8 +18,8 @@ pub fn scan_file(file_path: &str, content: &str, ruleset: &RuleSet) -> Vec<Issue
             }
         }
 
-        // Respect global defaults + rule-level exclude_file_pattern
-        if rule.is_file_excluded(file_path, &ruleset.defaults) {
+        // Respect global defaults + rule-level file exclusions (path + content)
+        if rule.is_excluded(file_path, content, &ruleset.defaults) {
             continue;
         }
 
diff --git a/src/pyspector/_rust_core/src/analysis/mod.rs b/src/pyspector/_rust_core/src/analysis/mod.rs
index 3dfb3548..d4141167 100644
--- a/src/pyspector/_rust_core/src/analysis/mod.rs
+++ b/src/pyspector/_rust_core/src/analysis/mod.rs
@@ -55,33 +55,32 @@ pub fn run_analysis(mut context: AnalysisContext) -> Vec<Issue> {
         }
     }
     
-    println!("[+] Found {} files to scan", files_to_scan.len());
-    
+    println!("[+] Found {} files to scan ({} non-Python)", files_to_scan.len(),
+             files_to_scan.iter().filter(|f| !f.ends_with(".py")).count());
+
     // Scan all files with regex patterns
+    let t_config = std::time::Instant::now();
     let mut issues: Vec<Issue> = files_to_scan
         .par_iter()
         .flat_map(|file_path| {
             if let Ok(content) = fs::read_to_string(file_path) {
                 config_analysis::scan_file(file_path, &content, &context.ruleset)
-            } else { 
-                Vec::new() 
+            } else {
+                Vec::new()
             }
         })
         .collect();
-
-    println!("[+] Found {} issues from config analysis", issues.len());
+    println!("[*] Pattern/config scan: {:.2}s → {} issues", t_config.elapsed().as_secs_f64(), issues.len());
 
     // Process Python files with AST analysis
+    let t_ast = std::time::Instant::now();
     let python_issues: Vec<Issue> = context.py_files
         .par_iter()
         .flat_map(|py_file| {
             let mut findings = Vec::new();
-            if is_excluded(Path::new(&py_file.file_path), &enhanced_exclusions) { 
-                return findings; 
+            if is_excluded(Path::new(&py_file.file_path), &enhanced_exclusions) {
+                return findings;
             }
-            
-            // Skip regex scan for Python files (already done above)
-            
             if let Some(ast) = &py_file.ast {
                 let ast_findings = ast_analysis::scan_ast(ast, &py_file.file_path, &py_file.content, &context.ruleset);
                 findings.extend(ast_findings);
@@ -89,12 +88,13 @@ pub fn run_analysis(mut context: AnalysisContext) -> Vec<Issue> {
             findings
         })
         .collect();
-        
-    println!("[+] {} issues from Python AST analysis", python_issues.len());
+    println!("[*] AST analysis: {:.2}s → {} issues", t_ast.elapsed().as_secs_f64(), python_issues.len());
     issues.extend(python_issues);
 
     // Build the call graph and run taint analysis
+    let t_callgraph = std::time::Instant::now();
     let call_graph = call_graph_builder::build_call_graph(context.py_files);
+    println!("[*] Call graph build: {:.2}s", t_callgraph.elapsed().as_secs_f64());
     let taint_issues = taint_analysis::analyze_program_for_taint(&call_graph, &context.ruleset);
     println!("[+] Found {} issues from taint analysis", taint_issues.len());
     issues.extend(taint_issues);
diff --git a/src/pyspector/_rust_core/src/analysis/taint_analysis.rs b/src/pyspector/_rust_core/src/analysis/taint_analysis.rs
index 3a11fda9..8c6e8a82 100644
--- a/src/pyspector/_rust_core/src/analysis/taint_analysis.rs
+++ b/src/pyspector/_rust_core/src/analysis/taint_analysis.rs
@@ -4,17 +4,94 @@ use crate::graph::cfg_builder::build_cfg;
 use crate::graph::representation::{BasicBlock, BlockId, ControlFlowGraph};
 use crate::issues::Issue;
 use crate::rules::RuleSet;
+use rayon::prelude::*;
 use std::collections::{HashMap, HashSet, VecDeque};
 
-/// Origin of a taint
+/// Provenance of a value — universal Python semantics, no framework knowledge.
+///
+/// The provenance lattice (least trusted → most trusted):
+///   HttpRequest → ShellSanitized → OperatorConfig → DeveloperDefined / SystemGenerated
+///
+/// HttpRequest and ShellSanitized are attacker-controlled (trigger most sinks).
+/// ShellSanitized specifically does NOT trigger shell injection sinks (PY102/SHELL*).
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum TaintOrigin {
-    External,      // From a known source (e.g. input(), request.get())
-    Param(usize),  // From a function parameter (index)
+    /// Attacker-controlled: request.GET.get(), request.POST, cookies, body,
+    /// HTTP API responses (.json(), iter_lines()), CLI arguments.
+    HttpRequest,
+
+    /// Attacker-controlled data that has been through shlex.quote().
+    /// Safe for shell metacharacter injection (PY102) — shlex.quote prevents that.
+    /// Still dangerous for: path traversal (PATH813), f-string injection (FSTRING867),
+    /// file open (OPEN1149), URL injection (SSRF_001), SQL injection (PY101).
+    ShellSanitized,
+
+    /// Attacker-controlled data that has been through html.escape() or format_html().
+    /// Safe for HTML XSS — still dangerous for SQL, shell, path, URLs.
+    HtmlSanitized,
+
+    /// Attacker-controlled data that has been through quote_name() or similar SQL sanitizers.
+    /// Safe for SQL identifier injection — still dangerous for shell, path, HTML.
+    SqlSanitized,
+
+    /// Operator-controlled: os.environ.get(), config files loaded at startup.
+    OperatorConfig,
+
+    /// Developer-defined: string literals, class attributes, module constants.
+    DeveloperDefined,
+
+    /// System-generated: tempfile.*, uuid4(), os.urandom(), secrets.*.
+    SystemGenerated,
+
+    // Legacy — kept for backward compatibility
+    External,
+    Param(usize),
+}
+
+impl TaintOrigin {
+    /// True if this origin is attacker-controlled and should trigger sink findings.
+    ///
+    /// HtmlSanitized and SqlSanitized are NOT attacker-controlled for general sinks:
+    /// - html.escape/format_html/conditional_escape are complete XSS mitigations
+    /// - quote_name is a complete SQL injection mitigation
+    /// These sanitizers clear taint for all sinks — they were comprehensive mitigations.
+    ///
+    /// ShellSanitized IS still attacker-controlled for non-shell sinks:
+    /// - shlex.quote prevents shell injection but NOT path traversal, f-string, SSRF, SQL
+    /// - So ShellSanitized data still triggers PATH813, OPEN1149, FSTRING867, SSRF_001, PY101
+    pub fn is_attacker_controlled(&self) -> bool {
+        matches!(self,
+            TaintOrigin::HttpRequest |
+            TaintOrigin::External |
+            TaintOrigin::ShellSanitized
+        )
+    }
+
+    /// True only for HttpRequest/External — not ShellSanitized.
+    /// Used by shell injection sinks (PY102, SHELL*): shlex.quote is a valid mitigation.
+    pub fn is_shell_injectable(&self) -> bool {
+        matches!(self, TaintOrigin::HttpRequest | TaintOrigin::External)
+    }
+
+    /// True if this origin should still trigger SQL sinks.
+    /// ShellSanitized is still SQL-injectable (shlex.quote doesn't sanitize SQL).
+    pub fn is_sql_injectable(&self) -> bool {
+        matches!(self, TaintOrigin::HttpRequest | TaintOrigin::External | TaintOrigin::ShellSanitized)
+    }
+
+    /// Convert a sanitizer's transforms_to string to a TaintOrigin.
+    pub fn from_transforms_to(s: &str) -> Option<Self> {
+        match s {
+            "ShellSanitized" => Some(TaintOrigin::ShellSanitized),
+            "HtmlSanitized"  => Some(TaintOrigin::HtmlSanitized),
+            "SqlSanitized"   => Some(TaintOrigin::SqlSanitized),
+            _                => None,
+        }
+    }
 }
 
-/// Per-block taint state: maps variable names to their taint origins
-/// If a variable is not in the map, it is not tainted.
+/// Per-block taint state: maps variable names to their taint origins.
+/// If a variable is not in the map, it is untainted (safe).
 type TaintState = HashMap<String, HashSet<TaintOrigin>>;
 
 /// Summary of a function's taint behavior
@@ -30,6 +107,17 @@ struct FunctionSummary {
 struct GlobalTaintContext {
     /// Summaries for all functions in the program
     summaries: HashMap<String, FunctionSummary>,
+
+    /// Call-site taint: maps callee function name → per-parameter taint origins.
+    call_site_taints: HashMap<String, Vec<HashSet<TaintOrigin>>>,
+
+    /// Class attribute taint: maps (file_prefix, attr_name) → taint origins.
+    class_attr_taints: HashMap<(String, String), HashSet<TaintOrigin>>,
+
+    /// CFG cache: pre-built control flow graphs for all functions.
+    /// build_cfg() is expensive (AST traversal + graph construction).
+    /// Caching avoids rebuilding the same CFG in each iteration and the final pass.
+    cfg_cache: HashMap<String, ControlFlowGraph>,
 }
 
 /// Context for the intra-procedural fixed-point worklist algorithm
@@ -51,12 +139,25 @@ impl TaintContext {
 
 // Main entry point for inter-procedural taint analysis
 pub fn analyze_program_for_taint(call_graph: &CallGraph, ruleset: &RuleSet) -> Vec<Issue> {
+    let t0 = std::time::Instant::now();
     println!("[*] Starting inter-procedural taint analysis with {} functions", call_graph.functions.len());
-    
+
+    // Pre-build all CFGs once — reuse across convergence iterations and final pass.
+    // Parallel build using Rayon: each function's CFG is independent.
+    println!("[*] Pre-building CFGs for {} functions (parallel)...", call_graph.functions.len());
+    let cfg_cache: HashMap<String, ControlFlowGraph> = call_graph.functions
+        .par_iter()
+        .map(|(func_id, func_node)| (func_id.clone(), build_cfg(func_node)))
+        .collect();
+    println!("[*] CFG pre-build: {:.2}s", t0.elapsed().as_secs_f64());
+
     let mut global_ctx = GlobalTaintContext {
         summaries: HashMap::new(),
+        call_site_taints: HashMap::new(),
+        class_attr_taints: HashMap::new(),
+        cfg_cache,
     };
-    
+
     // Initialize summaries for all functions
     for func_id in call_graph.functions.keys() {
         global_ctx.summaries.insert(func_id.clone(), FunctionSummary::default() as FunctionSummary);
@@ -64,56 +165,223 @@ pub fn analyze_program_for_taint(call_graph: &CallGraph, ruleset: &RuleSet) -> V
     
     let mut all_issues = Vec::new();
     let mut iterations = 0;
-    const MAX_GLOBAL_ITERATIONS: usize = 10; 
-    
+    const MAX_GLOBAL_ITERATIONS: usize = 10;
+
+    // Pre-compute which files contain any taint source marker.
+    // Functions in files with NO taint markers cannot have internal taint sources —
+    // they may only receive taint from callers (handled by lazy call_site_taint filter).
+    // This pre-filter eliminates ~80% of function analyses in typical codebases.
+    const FILE_TAINT_MARKERS: &[&str] = &[
+        // Django request access
+        "request.GET", "request.POST", "request.FILES", "request.COOKIES",
+        "request.META", "request.headers",
+        // Flask / generic request
+        "request.get(", "request.args", "request.form",
+        "request.values", "request.json",
+        // Environment / CLI
+        "os.environ.get", "sys.argv",
+        // HTTP streaming
+        ".iter_lines", ".iter_text", ".iter_raw", ".iter_bytes",
+        // Deserialization
+        "marshal.loads", "json.load(", "json.loads(",
+        ".json()",       // HTTP response .json() method
+        "input(",        // CLI interactive input
+    ];
+
+    let taint_active_files: std::collections::HashSet<&str> = call_graph.file_contents
+        .iter()
+        .filter(|(_, content)| FILE_TAINT_MARKERS.iter().any(|m| content.contains(m)))
+        .map(|(path, _)| path.as_str())
+        .collect();
+
+    println!("[*] Taint-active files: {}/{} ({:.0}% of total)",
+             taint_active_files.len(),
+             call_graph.file_contents.len(),
+             100.0 * taint_active_files.len() as f64 / call_graph.file_contents.len().max(1) as f64);
+
+    let t_convergence = std::time::Instant::now();
     loop {
+        let t_iter = std::time::Instant::now();
         iterations += 1;
-        println!("[*] Global fixed-point iteration {}", iterations);
         let mut summaries_changed = false;
-        let mut current_pass_issues = Vec::new();
+        let mut current_pass_issues: Vec<Issue> = Vec::new();
         
-        // Analyze each function
-        for (func_id, func_node) in &call_graph.functions {
-            let cfg = build_cfg(func_node);
-            
-            let file_path: &str = func_id.split("::").next().unwrap_or("");
-            let default_content = String::new();
-            let content = call_graph.file_contents.get(file_path).unwrap_or(&default_content);
-            
-            let (new_summary, issues) = analyze_function_taint(
-                &cfg, 
-                func_node,
-                ruleset, 
-                file_path, 
-                content,
-                &global_ctx
-            );
-            
-            if let Some(old_summary) = global_ctx.summaries.get(func_id) {
+        // Analyze functions IN PARALLEL using Rayon.
+        // Each function reads global_ctx (immutable snapshot of this iteration's state)
+        // and returns (func_id, summary, call_sites, class_attrs).
+        // Results are merged serially after all parallel analyses complete.
+        //
+        // Correctness: with parallel analysis, function B doesn't see call_site_taints
+        // produced by function A in the SAME iteration — it sees them in the NEXT
+        // iteration. This may require one extra iteration vs sequential but is safe.
+        //
+        // Lazy filter: iterations 2+ skip functions with no taint to propagate.
+        // A function has taint to propagate if:
+        //   (a) it's an HTTP/CLI entry point (has tainted params)
+        //   (b) it was called with tainted arguments (call_site_taint)
+        //   (c) it's in a file where class attributes have been tainted (class_attr_taint)
+        //       — e.g., self.output_dir set in __init__ propagates to all same-file methods
+        let files_with_class_attr_taints: std::collections::HashSet<&str> = global_ctx.class_attr_taints
+            .keys()
+            .filter(|(_, _)| true)
+            .map(|(file, _)| file.as_str())
+            .collect();
+
+        let iter_results: Vec<(String, FunctionSummary,
+                                HashMap<String, Vec<HashSet<TaintOrigin>>>,
+                                HashMap<(String, String), HashSet<TaintOrigin>>)> =
+            call_graph.functions
+                .par_iter()
+                .filter(|(func_id, func_node)| {
+                    if iterations == 1 { return true; }
+                    let func_name = func_node.fields.get("name")
+                        .and_then(|v| v.as_ref()).and_then(|v| v.as_str())
+                        .unwrap_or("");
+                    let file_path = func_id.split("::").next().unwrap_or("");
+                    !extract_cli_tainted_params(func_node).is_empty()
+                        || (global_ctx.call_site_taints.contains_key(func_name)
+                            && global_ctx.call_site_taints[func_name].iter().any(|s| !s.is_empty()))
+                        || files_with_class_attr_taints.contains(file_path)
+                })
+                .map(|(func_id, func_node)| {
+                    let cfg_owned;
+                    let cfg = match global_ctx.cfg_cache.get(func_id.as_str()) {
+                        Some(c) => c,
+                        None => { cfg_owned = build_cfg(func_node); &cfg_owned }
+                    };
+                    let file_path: &str = func_id.split("::").next().unwrap_or("");
+                    let default_content = String::new();
+                    let content = call_graph.file_contents.get(file_path)
+                        .unwrap_or(&default_content);
+                    let (summary, call_sites, class_attrs, _issues) =
+                        analyze_function_taint(&cfg, func_node, ruleset, file_path, content, &global_ctx);
+                    (func_id.clone(), summary, call_sites, class_attrs)
+                })
+                .collect();
+
+        // Serial merge of parallel results into global_ctx
+        for (func_id, new_summary, new_call_sites, new_class_attrs) in iter_results {
+            for (callee, param_taints) in new_call_sites {
+                let entry = global_ctx.call_site_taints
+                    .entry(callee)
+                    .or_insert_with(Vec::new);
+                let mut changed = false;
+                for (i, origins) in param_taints.iter().enumerate() {
+                    if i >= entry.len() { entry.resize(i + 1, HashSet::new()); }
+                    let before_len = entry[i].len();
+                    entry[i].extend(origins.iter().cloned());
+                    if entry[i].len() > before_len { changed = true; }
+                }
+                if changed { summaries_changed = true; }
+            }
+            for (key, origins) in new_class_attrs {
+                let entry = global_ctx.class_attr_taints
+                    .entry(key).or_insert_with(HashSet::new);
+                let before_len = entry.len();
+                entry.extend(origins.iter().cloned());
+                if entry.len() > before_len { summaries_changed = true; }
+            }
+            if let Some(old_summary) = global_ctx.summaries.get(&func_id) {
                 if &new_summary != old_summary {
                     println!("[*] Summary changed for {}", func_id);
                     global_ctx.summaries.insert(func_id.clone(), new_summary);
                     summaries_changed = true;
                 }
             }
-            
-            // Collect issues from the latest pass
-            // We clear the list at the start of each global iteration so we don't duplicate
-            // But we accumulate across functions in the same pass
-            current_pass_issues.extend(issues);
+
+            // Issues from convergence loop are discarded — collected in final pass.
         }
-        
+
+        println!("[*] Iteration {} done in {:.2}s", iterations, t_iter.elapsed().as_secs_f64());
         if !summaries_changed || iterations >= MAX_GLOBAL_ITERATIONS {
             if summaries_changed {
                 println!("[!] Warning: Max global iterations reached without convergence");
             } else {
-                println!("[+] Global convergence reached after {} iterations", iterations);
+                println!("[+] Global convergence reached after {} iterations in {:.2}s total",
+                         iterations, t_convergence.elapsed().as_secs_f64());
             }
-            all_issues = current_pass_issues;
             break;
         }
     }
 
+    // ── Final issue collection pass ──────────────────────────────────────────
+    // After convergence: collect issues using the converged global_ctx.
+    //
+    // Optimization: for large codebases (>5k functions), apply a file-level
+    // pre-filter to skip the ~80% of functions in files with no taint markers.
+    // These functions cannot produce findings since they have no taint sources.
+    // For small codebases, the filter overhead outweighs the savings — use
+    // the simpler full par_iter which has lower overhead.
+    const FILE_FILTER_THRESHOLD: usize = 5_000;
+    let use_file_filter = call_graph.functions.len() > FILE_FILTER_THRESHOLD;
+
+    let t_final_start = std::time::Instant::now();
+    let parallel_issues: Vec<Vec<Issue>> = if use_file_filter {
+        let final_func_ids: Vec<&String> = call_graph.functions
+            .keys()
+            .filter(|func_id| {
+                let file_path = func_id.split("::").next().unwrap_or("");
+                if taint_active_files.contains(file_path) { return true; }
+                if let Some(func_node) = call_graph.functions.get(*func_id) {
+                    if !extract_cli_tainted_params(func_node).is_empty() { return true; }
+                    let func_name = func_node.fields.get("name")
+                        .and_then(|v| v.as_ref()).and_then(|v| v.as_str()).unwrap_or("");
+                    if global_ctx.call_site_taints.contains_key(func_name)
+                        && global_ctx.call_site_taints[func_name].iter().any(|s| !s.is_empty()) {
+                        return true;
+                    }
+                }
+                false
+            })
+            .collect();
+        println!("[*] Final pass (parallel+filter): {}/{} functions ({}% filtered out)",
+                 final_func_ids.len(), call_graph.functions.len(),
+                 100 - 100 * final_func_ids.len() / call_graph.functions.len().max(1));
+        final_func_ids
+            .par_iter()
+            .filter_map(|func_id| call_graph.functions.get(*func_id).map(|fn_node| {
+                let cfg_owned;
+                let cfg = match global_ctx.cfg_cache.get(*func_id) {
+                    Some(c) => c,
+                    None => { cfg_owned = build_cfg(fn_node); &cfg_owned }
+                };
+                let file_path: &str = func_id.split("::").next().unwrap_or("");
+                let default_content = String::new();
+                let content = call_graph.file_contents.get(file_path).unwrap_or(&default_content);
+                let (_, _, _, issues) = analyze_function_taint(
+                    &cfg, fn_node, ruleset, file_path, content, &global_ctx
+                );
+                issues
+            }))
+            .collect()
+    } else {
+        let t_final = t_final_start;
+        println!("[*] Final pass (parallel): {} functions...", call_graph.functions.len());
+        let result = call_graph.functions
+            .par_iter()
+            .map(|(func_id, func_node)| {
+                let cfg_owned;
+                let cfg = match global_ctx.cfg_cache.get(func_id.as_str()) {
+                    Some(c) => c,
+                    None => { cfg_owned = build_cfg(func_node); &cfg_owned }
+                };
+                let file_path: &str = func_id.split("::").next().unwrap_or("");
+                let default_content = String::new();
+                let content = call_graph.file_contents.get(file_path).unwrap_or(&default_content);
+                let (_, _, _, issues) = analyze_function_taint(
+                    &cfg, func_node, ruleset, file_path, content, &global_ctx
+                );
+                issues
+            })
+            .collect();
+        println!("[*] Final pass done in {:.2}s", t_final.elapsed().as_secs_f64());
+        result
+    };
+    for issues in parallel_issues {
+        all_issues.extend(issues);
+    }
+    println!("[*] Total taint analysis: {:.2}s", t0.elapsed().as_secs_f64());
+
     // Deduplicate issues
     let mut unique_issues = Vec::new();
     let mut seen_fingerprints = HashSet::new();
@@ -129,6 +397,9 @@ pub fn analyze_program_for_taint(call_graph: &CallGraph, ruleset: &RuleSet) -> V
     unique_issues
 }
 
+/// Return type: (summary, call_site_taints, class_attr_taints, issues)
+/// - call_site_taints: Map<callee_name, Vec<taint_per_param>> — collected at each call site
+/// - class_attr_taints: Map<(file, attr), origins> — from `self.attr = tainted` assignments
 fn analyze_function_taint(
     cfg: &ControlFlowGraph,
     func_node: &AstNode,
@@ -136,17 +407,71 @@ fn analyze_function_taint(
     file_path: &str,
     content: &str,
     global_ctx: &GlobalTaintContext,
-) -> (FunctionSummary, Vec<Issue>) {
+) -> (FunctionSummary, HashMap<String, Vec<HashSet<TaintOrigin>>>, HashMap<(String, String), HashSet<TaintOrigin>>, Vec<Issue>) {
     let mut ctx = TaintContext::new();
     
     // Extract parameters and initialize taint state
     let params = extract_function_params(func_node);
     let mut initial_state = TaintState::new();
     
-    for (idx, param_name) in params.iter().enumerate() {
+    // Seed 1: decorator-detected entry-point parameters.
+    let entry_params = extract_cli_tainted_params(func_node);
+    // HTTP params (routes, API endpoints) → HttpRequest: attacker-controlled via network
+    for param in &entry_params.http {
+        let mut origins = HashSet::new();
+        origins.insert(TaintOrigin::HttpRequest);
+        initial_state.insert(param.clone(), origins);
+    }
+    // CLI params (commands, options) → OperatorConfig: trusted operator chose these.
+    // Sinks like PATH813/SSRF/PY102 check is_attacker_controlled() which returns false
+    // for OperatorConfig, so they won't fire. FILE_DESERIALIZERS will upgrade file
+    // *contents* to HttpRequest, preserving supply-chain detection.
+    for param in &entry_params.operator {
         let mut origins = HashSet::new();
-        origins.insert(TaintOrigin::Param(idx));
-        initial_state.insert(param_name.clone(), origins);
+        origins.insert(TaintOrigin::OperatorConfig);
+        initial_state.insert(param.clone(), origins);
+    }
+
+    // Seed 2: inter-procedural call-site taint — if callers passed tainted args,
+    // seed the matching parameters with their accumulated taint.
+    //
+    // Self-offset: for methods where params[0] is "self" or "cls", call-site args
+    // are indexed without self (caller writes `obj.method(arg0)`, not `method(self, arg0)`).
+    // Shift recorded arg indices by 1 to align with the method's param list.
+    let func_name = func_node.fields.get("name")
+        .and_then(|v| v.as_ref()).and_then(|v| v.as_str())
+        .unwrap_or("");
+    let self_offset = params.first().map(|p| p == "self" || p == "cls").unwrap_or(false) as usize;
+    if let Some(param_taints) = global_ctx.call_site_taints.get(func_name) {
+        for (i, origins) in param_taints.iter().enumerate() {
+            if !origins.is_empty() {
+                let param_idx = i + self_offset;
+                if let Some(param_name) = params.get(param_idx) {
+                    let entry = initial_state.entry(param_name.clone()).or_insert_with(HashSet::new);
+                    entry.extend(origins.iter().cloned());
+                }
+            }
+        }
+    }
+
+    // Seed 3: class attribute taint — if any method of this class (same file)
+    // assigned `self.attr = tainted` AND this function was seeded by call-site
+    // taint (i.e. it's in the taint chain), propagate those attributes here.
+    //
+    // Seed class attribute taints — always seed for same-file methods.
+    // Class attributes represent shared state within a class. Any method that could
+    // access these attributes should see their taint, regardless of whether it has
+    // initial_state. Scope guard was removed because cross-file FPs are caused by
+    // inter-proc arg propagation, not class_attr_taints seeding.
+    for ((attr_file, attr_name), origins) in &global_ctx.class_attr_taints {
+        if attr_file == file_path && !origins.is_empty() {
+            let key = format!("self.{}", attr_name);
+            let entry = initial_state.entry(key).or_insert_with(HashSet::new);
+            entry.extend(origins.iter().cloned());
+            // Seed bare attr name for BinOp like `base / self.output_dir`
+            let entry2 = initial_state.entry(attr_name.clone()).or_insert_with(HashSet::new);
+            entry2.extend(origins.iter().cloned());
+        }
     }
     
     // Initialize blocks
@@ -215,43 +540,152 @@ fn analyze_function_taint(
         }
     }
     
-    // Collect issues and compute summary from final state
+    // Collect issues, summary, call-site taints, and class-attr taints
     let mut issues = Vec::new();
     let mut summary = FunctionSummary::default();
-    
+    // call_site_taints: callee_func_name → per-arg taint origins
+    let mut call_site_taints: HashMap<String, Vec<HashSet<TaintOrigin>>> = HashMap::new();
+    // class_attr_taints: (file, attr_name) → origins from `self.attr = tainted`
+    let mut class_attr_taints: HashMap<(String, String), HashSet<TaintOrigin>> = HashMap::new();
+
     for block in cfg.blocks.values() {
-        // Re-run transfer to get issues
         let entry_state = ctx.entry_states.get(&block.id).cloned().unwrap_or_default();
         let (exit_state, block_issues) = transfer_function(
-            block, 
-            entry_state, 
-            ruleset, 
-            file_path, 
-            content, 
+            block,
+            entry_state.clone(),
+            ruleset,
+            file_path,
+            content,
             global_ctx
         );
         issues.extend(block_issues);
-        
-        // Check Return statements for summary
+
+        // Scan all statements for:
+        // 1. Function calls with tainted arguments → record call-site taint
+        // 2. self.attr = tainted assignments → record class attr taint
+        // 3. Return statements → update function summary
+        // Use exit_state as running_state so we see all assignments in the block.
+        // This is conservative (uses end-of-block state for all stmts) but avoids
+        // false negatives from forward assignments in the same block.
+        let running_state = exit_state.clone();
+        for stmt in &block.statements {
+            // Track self.attr = tainted assignments
+            if stmt.node_type == "Assign" {
+                // Check targets for `self.attr` pattern
+                if let Some(targets) = stmt.children.get("targets") {
+                    for target in targets {
+                        if target.node_type == "Attribute" {
+                            let attr_name = target.fields.get("attr")
+                                .and_then(|v| v.as_ref()).and_then(|v| v.as_str())
+                                .unwrap_or("");
+                            let is_self = target.children.get("value")
+                                .and_then(|v| v.get(0))
+                                .and_then(|v| v.fields.get("id"))
+                                .and_then(|v| v.as_ref()).and_then(|v| v.as_str())
+                                .map(|s| s == "self")
+                                .unwrap_or(false);
+                            if is_self && !attr_name.is_empty() {
+                                // Get the value being assigned and check if it's tainted
+                                if let Some(val) = stmt.children.get("value").and_then(|v| v.get(0)) {
+                                    let val_names = extract_all_names(val);
+                                    let mut origins: HashSet<TaintOrigin> = HashSet::new();
+                                    for name in &val_names {
+                                        if let Some(o) = running_state.get(name) {
+                                            origins.extend(o.iter().filter(|o| o.is_attacker_controlled()).cloned());
+                                        }
+                                    }
+                                    if !origins.is_empty() {
+                                        class_attr_taints
+                                            .entry((file_path.to_string(), attr_name.to_string()))
+                                            .or_insert_with(HashSet::new)
+                                            .extend(origins.iter().cloned());
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            // Track function calls with tainted arguments → call-site taint
+            // Record under both the full name AND the bare method name so that
+            // p.initialize(config) registers as call_site_taints["initialize"][0].
+            let mut call_nodes: Vec<&AstNode> = Vec::new();
+            find_call_sites(stmt, &mut call_nodes);
+            for call_node in call_nodes {
+                let call_name = get_full_call_name(call_node);
+                if call_name.is_empty() { continue; }
+
+                // The lookup key(s) to record taint under:
+                // - For bare call `f(x)`: just "f"
+                // - For method `obj.method(x)`: both "obj.method" and "method"
+                let lookup_names: Vec<String> = if call_name.contains('.') {
+                    let method_part = call_name.rsplit('.').next().unwrap_or("").to_string();
+                    if method_part.is_empty() { vec![call_name.clone()] }
+                    else { vec![call_name.clone(), method_part] }
+                } else {
+                    vec![call_name.clone()]
+                };
+
+                if let Some(args) = call_node.children.get("args") {
+                    let mut param_taints: Vec<HashSet<TaintOrigin>> = Vec::new();
+                    for arg in args {
+                        let mut origins: HashSet<TaintOrigin> = HashSet::new();
+                        for name in extract_all_names(arg) {
+                            if let Some(o) = running_state.get(&name) {
+                                origins.extend(o.iter().filter(|o| o.is_attacker_controlled()).cloned());
+                            }
+                        }
+                        param_taints.push(origins);
+                    }
+                    if param_taints.iter().any(|o| !o.is_empty()) {
+                        for key in &lookup_names {
+                            let entry = call_site_taints
+                                .entry(key.clone())
+                                .or_insert_with(Vec::new);
+                            let needed = param_taints.len();
+                            if entry.len() < needed { entry.resize(needed, HashSet::new()); }
+                            for (i, origins) in param_taints.iter().enumerate() {
+                                entry[i].extend(origins.iter().cloned());
+                            }
+                        }
+                    }
+                }
+            }
+
+            // running_state = exit_state (already set above, no per-stmt update needed)
+        }
+
+        // Check Return statements for summary using exit_state
+        // Also check for sinks inside return values (e.g. `return FunctionType(tainted_code, ...)`)
         for stmt in &block.statements {
             if stmt.node_type == "Return" {
                 if let Some(value) = stmt.children.get("value").and_then(|v| v.get(0)) {
-                    // Check if return value is a direct source call
                     if value.node_type == "Call" {
-                         let call_name = get_full_call_name(value);
-                         if ruleset.taint_sources.iter().any(|s| call_name.contains(&s.function_call)) {
-                             summary.returns_external_taint = true;
-                         }
+                        // Check if return value is a sink with tainted argument
+                        check_sink_and_report(value, &exit_state, ruleset, file_path, content, &mut issues);
+
+                        let call_name = get_full_call_name(value);
+                        let is_src = ruleset.taint_sources.iter().any(|s| {
+                            if s.function_call.contains('.') {
+                                call_name.contains(&s.function_call) ||
+                                s.function_call.contains(&call_name)
+                            } else {
+                                call_name == s.function_call
+                            }
+                        });
+                        if is_src { summary.returns_external_taint = true; }
                     }
-                    
-                    // Check taint of returned variables
                     let names = extract_all_names(value);
                     for name in names {
                         if let Some(origins) = exit_state.get(&name) {
                             for origin in origins {
                                 match origin {
-                                    TaintOrigin::External => summary.returns_external_taint = true,
-                                    TaintOrigin::Param(idx) => { summary.param_flows_to_return.insert(*idx); }
+                                    TaintOrigin::External | TaintOrigin::HttpRequest =>
+                                        summary.returns_external_taint = true,
+                                    TaintOrigin::Param(idx) =>
+                                        { summary.param_flows_to_return.insert(*idx); }
+                                    _ => {}
                                 }
                             }
                         }
@@ -260,8 +694,8 @@ fn analyze_function_taint(
             }
         }
     }
-    
-    (summary, issues)
+
+    (summary, call_site_taints, class_attr_taints, issues)
 }
 
 fn compute_entry_state(
@@ -308,34 +742,279 @@ fn transfer_function(
                                 .collect()
                         })
                         .unwrap_or_default();
-                    
-                    if value_node.node_type == "Call" {
+
+                    // --- Phase 2: Subscript taint sources ---
+                    // Handles: attr = request.GET['key']  (Subscript node, not a Call)
+                    if value_node.node_type == "Subscript" {
+                        let container = get_subscript_container(value_node);
+                        // HTTP request containers — attacker-controlled
+                        const HTTP_CONTAINERS: &[&str] = &[
+                            "request.GET", "request.POST", "request.FILES",
+                            "request.COOKIES", "request.META", "request.headers",
+                            "request.args", "request.form", "request.values",
+                            "request.json",
+                        ];
+                        // Operator-supplied containers — trusted (CLI, env config)
+                        // sys.argv is set by whoever invokes the program (the operator).
+                        // os.environ is set by the deployment environment (the operator).
+                        // Neither is attacker-controlled in the HTTP threat model.
+                        const OPERATOR_CONTAINERS: &[&str] = &[
+                            "sys.argv", "os.environ",
+                        ];
+                        if HTTP_CONTAINERS.iter().any(|tc| container.contains(tc)) {
+                            let mut origins = HashSet::new();
+                            origins.insert(TaintOrigin::External);
+                            for target in &targets {
+                                state.insert(target.clone(), origins.clone());
+                            }
+                        } else if OPERATOR_CONTAINERS.iter().any(|tc| container.contains(tc)) {
+                            let mut origins = HashSet::new();
+                            origins.insert(TaintOrigin::OperatorConfig);
+                            for target in &targets {
+                                state.insert(target.clone(), origins.clone());
+                            }
+                        } else {
+                            let mut new_origins = HashSet::new();
+
+                            // Propagate taint from the subscript base if already tainted
+                            // e.g. data = tainted_dict['key'] → data is tainted
+                            let base_names = get_subscript_base_names(value_node);
+                            for name in &base_names {
+                                if let Some(origins) = state.get(name.as_str()) {
+                                    new_origins.extend(origins.iter().cloned());
+                                }
+                            }
+
+                            // Also: if the subscript base is itself a taint source CALL,
+                            // the subscript result is tainted.
+                            // e.g. msg = r.json()["key"] → r.json() is a taint source → msg tainted
+                            if let Some(base_value) = value_node.children.get("value").and_then(|v| v.get(0)) {
+                                if base_value.node_type == "Call" {
+                                    let base_call_name = get_full_call_name(base_value);
+                                    let is_base_source = !base_call_name.is_empty() &&
+                                        ruleset.taint_sources.iter().any(|source| {
+                                            if source.function_call.contains('.') {
+                                                base_call_name.contains(&source.function_call) ||
+                                                source.function_call.contains(&base_call_name)
+                                            } else {
+                                                base_call_name == source.function_call
+                                            }
+                                        });
+                                    if is_base_source {
+                                        new_origins.insert(TaintOrigin::HttpRequest);
+                                    }
+                                }
+                            }
+
+                            if !new_origins.is_empty() {
+                                for target in &targets {
+                                    state.insert(target.clone(), new_origins.clone());
+                                }
+                            }
+                        }
+                    } else if value_node.node_type == "Call" {
                         let call_name = get_full_call_name(value_node);
                         
                         // 1. Check for Taint Source
-                        let is_source = ruleset.taint_sources.iter().any(|source| {
-                            call_name.contains(&source.function_call) || 
-                            source.function_call.contains(&call_name)
+                        let is_source = !call_name.is_empty() && ruleset.taint_sources.iter().any(|source| {
+                            if source.function_call.contains('.') {
+                                call_name.contains(&source.function_call) ||
+                                source.function_call.contains(&call_name)
+                            } else {
+                                call_name == source.function_call
+                            }
                         });
                         
-                        if is_source {
+                        // Check for SystemGenerated sources — tempfile/uuid/secrets
+                        // These are never attacker-controlled regardless of framework
+                        const SYSTEM_GENERATED_CALLS: &[&str] = &[
+                            "tempfile.", "uuid.", "secrets.", "os.urandom",
+                            "random.randbytes", "hashlib.new",
+                        ];
+                        let is_system_generated = !call_name.is_empty() &&
+                            SYSTEM_GENERATED_CALLS.iter().any(|sg| call_name.starts_with(sg) || call_name == *sg);
+
+                        // json.load(f) is an independent taint source: file contents can
+                        // come from third parties (plugins, packages) even if the file PATH
+                        // is operator-chosen. This allows CLI decorator params to be
+                        // OperatorConfig (trusted) while still catching supply-chain attacks
+                        // via loaded config files.
+                        // json.loads (string parsing) is taint-PRESERVING instead — the
+                        // string's own trust level determines the output trust level.
+                        const FILE_DESERIALIZERS: &[&str] = &[
+                            "json.load",    // reads from file handle — contents are external
+                            "yaml.load",    // reads from file — check separate for SafeLoader
+                            "toml.load",    // reads from file
+                            "pickle.load",  // reads from file (also caught by PY301 pattern)
+                        ];
+                        let is_file_deserializer = !call_name.is_empty() &&
+                            FILE_DESERIALIZERS.iter().any(|fd| call_name.contains(fd));
+
+                        // Type conversion wrappers and deserializers that preserve taint:
+                        // list(), tuple(), json.load(f), etc. — output has the same trust
+                        // level as input. Propagate taint from first argument.
+                        // INTENTIONALLY NARROW: only type conversions that preserve the
+                        // data identity (list/tuple/set) AND JSON deserialization.
+                        // Do NOT include sorted/reversed/enumerate/zip/map/filter —
+                        // those push taint into DoS/join/sorted rules and produce
+                        // massive false positives across large codebases.
+                        const TAINT_PRESERVING_CALLS: &[&str] = &[
+                            "list", "tuple", "set", "frozenset",
+                            "json.loads",
+                            // Regex operations propagate taint from input to match objects
+                            "re.search", "re.match", "re.fullmatch",
+                            "re.findall", "re.finditer",
+                            "group", "groups", "groupdict",
+                            // Path construction/normalization — taint from any component
+                            // propagates to the result. os.path.join(base, user_path) and
+                            // Path(user_path) both carry the taint forward to file-operation sinks.
+                            "os.path.join", "os.path.normpath", "os.path.abspath",
+                            // pathlib.Path constructor: Path(tainted_str) → tainted Path object
+                            // → .read_text(), .write_text(), .open() etc. fire PATH813/OPEN1149
+                            "Path", "PurePath", "PosixPath", "WindowsPath",
+                            // URL parsing/construction: taint flows through URL manipulation.
+                            // os.environ["CI_URL"] → urlsplit() → _replace() → urlunsplit() →
+                            // git fetch <url>  triggers ENV_GIT_URL001 / PY102 / SSRF_001.
+                            "urlsplit", "urlunsplit", "urlparse", "urlunparse",
+                            "urljoin", "urlencode",
+                            "urllib.parse.urlsplit", "urllib.parse.urlunsplit",
+                            "urllib.parse.urlparse", "urllib.parse.urlunparse",
+                            "urllib.parse.urljoin", "urllib.parse.urlencode",
+                        ];
+                        // Match both exact names (re.match) and method suffixes (m.group → .group)
+                        let is_taint_preserving = !call_name.is_empty() &&
+                            TAINT_PRESERVING_CALLS.iter().any(|tp| {
+                                call_name == *tp ||
+                                call_name.ends_with(&format!(".{}", tp))
+                            });
+
+                        if is_taint_preserving {
+                            // Propagate taint from arguments to the result
+                            if let Some(args) = value_node.children.get("args") {
+                                let mut new_origins: HashSet<TaintOrigin> = HashSet::new();
+                                for arg in args {
+                                    for name in extract_all_names(arg) {
+                                        if let Some(origins) = state.get(&name) {
+                                            new_origins.extend(origins.iter().cloned());
+                                        }
+                                    }
+                                }
+                                if !new_origins.is_empty() {
+                                    for target in &targets {
+                                        state.insert(target.clone(), new_origins.clone());
+                                    }
+                                }
+                            }
+                        } else if is_system_generated {
                             for target in &targets {
                                 let mut origins = HashSet::new();
-                                origins.insert(TaintOrigin::External);
+                                origins.insert(TaintOrigin::SystemGenerated);
                                 state.insert(target.clone(), origins);
                             }
+                        } else if is_file_deserializer || is_source {
+                            // Operator-config call sources: os.environ.get(), os.getenv()
+                            // These read values set by the deployment operator, not by
+                            // HTTP request senders.
+                            const OPERATOR_CALL_SOURCES: &[&str] = &[
+                                "os.environ.get", "os.getenv", "os.environ[",
+                            ];
+                            let is_operator_source = !call_name.is_empty() &&
+                                OPERATOR_CALL_SOURCES.iter().any(|op| call_name.contains(op));
+
+                            if is_operator_source {
+                                for target in &targets {
+                                    let mut origins = HashSet::new();
+                                    origins.insert(TaintOrigin::OperatorConfig);
+                                    state.insert(target.clone(), origins);
+                                }
+                            } else {
+                                // is_file_deserializer: json.load(f), yaml.load(f), etc.
+                                //   — always HttpRequest regardless of f's trust level,
+                                //     because file contents can be third-party (supply chain)
+                                // is_source: request.GET.get(), iter_lines(), .json(), etc.
+                                for target in &targets {
+                                    let mut origins = HashSet::new();
+                                    origins.insert(TaintOrigin::HttpRequest);
+                                    state.insert(target.clone(), origins);
+                                }
+                            }
                         } else {
                             // 2. Check for Sanitizer
-                            let is_sanitizer = ruleset.taint_sanitizers.iter().any(|san| {
+                            // If transforms_to is set: transform taint origin instead of clearing.
+                            // If no transforms_to: clear taint (data is fully sanitized).
+                            let matching_sanitizer = ruleset.taint_sanitizers.iter().find(|san| {
                                 call_name.contains(&san.function_call) ||
                                 san.function_call.contains(&call_name)
                             });
-                            
-                            if is_sanitizer {
-                                for target in &targets {
-                                    state.remove(target);
+
+                            if let Some(san) = matching_sanitizer {
+                                if let Some(ref transforms_to) = san.transforms_to {
+                                    // Partial sanitization: transform origin, preserve taintedness
+                                    if let Some(new_origin) = TaintOrigin::from_transforms_to(transforms_to) {
+                                        for target in &targets {
+                                            let mut new_origins = HashSet::new();
+                                            new_origins.insert(new_origin.clone());
+                                            state.insert(target.clone(), new_origins);
+                                        }
+                                    } else {
+                                        // Unknown transforms_to value — fall back to clearing
+                                        for target in &targets { state.remove(target); }
+                                    }
+                                } else {
+                                    // Full sanitization: clear taint completely
+                                    for target in &targets { state.remove(target); }
                                 }
                             } else {
+                                // 2b. Known sink call: propagate taint to result if a
+                                // vulnerable argument is tainted (e.g. b=bytes(tainted))
+                                let sink_taint = {
+                                    let mut found = HashSet::new();
+                                    for sink in &ruleset.taint_sinks {
+                                        let matches = if sink.function_call.contains('.') {
+                                            // Forward-only: "urllib.request.urlopen".contains("open") would be a FP
+                                            call_name.contains(&sink.function_call)
+                                        } else if sink.is_method {
+                                            let dc = call_name.chars().filter(|&c| c == '.').count();
+                                            match dc {
+                                                0 => call_name == sink.function_call,
+                                                _ => {
+                                                    const MP: &[&str] = &["posixpath.","ntpath.","genericpath.","pathlib.","os.","sys.","re.","json.","urllib.","http.","xml.","html.","csv.","io.","base64.","hashlib.","hmac.","struct.","itertools.","functools.","operator.","execute.","ops.","eager."];
+                                                    call_name.ends_with(&format!(".{}", sink.function_call)) && !MP.iter().any(|pfx| call_name.starts_with(pfx))
+                                                }
+                                            }
+                                        } else {
+                                            call_name == sink.function_call
+                                        };
+                                        if !matches { continue; }
+                                        // Check if the vulnerable argument is tainted
+                                        let arg_tainted = if sink.vulnerable_receiver {
+                                            if let Some(func) = value_node.children.get("func").and_then(|v| v.get(0)) {
+                                                if func.node_type == "Attribute" {
+                                                    if let Some(recv) = func.children.get("value").and_then(|v| v.get(0)) {
+                                                        get_direct_taint_names(recv).iter().any(|n| is_attacker_tainted(&state, n))
+                                                    } else { false }
+                                                } else { false }
+                                            } else { false }
+                                        } else {
+                                            if let Some(args) = value_node.children.get("args") {
+                                                if args.len() > sink.vulnerable_parameter_index {
+                                                    get_direct_taint_names(&args[sink.vulnerable_parameter_index]).iter().any(|n| is_attacker_tainted(&state, n))
+                                                } else { false }
+                                            } else { false }
+                                        };
+                                        if arg_tainted {
+                                            found.insert(TaintOrigin::External);
+                                            break;
+                                        }
+                                    }
+                                    found
+                                };
+                                if !sink_taint.is_empty() {
+                                    for target in &targets {
+                                        state.insert(target.clone(), sink_taint.clone());
+                                    }
+                                }
+
                                 // 3. Check for Inter-procedural Taint (Summaries)
                                 
                                 let mut new_origins = HashSet::new();
@@ -364,18 +1043,23 @@ fn transfer_function(
                                         }
                                     }
                                 } else {
-                                    // Fallback: Conservative propagation if unknown function
-                                    if check_args_tainted(value_node, &state) {
-                                        // We propagate the origins from args
-                                        if let Some(args) = value_node.children.get("args") {
-                                            for arg in args {
-                                                let names = extract_all_names(arg);
+                                    // Method receiver propagation ONLY:
+                                    // tainted_obj.method() → result is tainted.
+                                    // We do NOT propagate through positional args of unknown functions
+                                    // (disabled: causes taint explosion through every utility call).
+                                    if let Some(func) = value_node.children.get("func").and_then(|v| v.get(0)) {
+                                        if func.node_type == "Attribute" {
+                                            if let Some(receiver) = func.children.get("value").and_then(|v| v.get(0)) {
+                                                let names = extract_all_names(receiver);
                                                 for name in names {
                                                     if let Some(origins) = state.get(&name) {
                                                         new_origins.extend(origins.iter().cloned());
                                                     }
                                                 }
                                             }
+                                        // dead code below — kept for structure
+                                        } else {
+                                            let _ = (); // no positional arg propagation
                                         }
                                     }
                                 }
@@ -387,8 +1071,39 @@ fn transfer_function(
                                 }
                             }
                         }
+                    } else if value_node.node_type == "Constant" || value_node.node_type == "JoinedStr" {
+                        // Tier 3: Constant folding — string/numeric literals are DeveloperDefined.
+                        // "text" or f"text with {constant}" → developer wrote it, never user input.
+                        // This handles: INTERNAL_RESET_SESSION_TOKEN = "_password_reset_token"
+                        // and all other module-level or class-level constant assignments.
+                        let is_all_constant = value_node.node_type == "Constant" || {
+                            // For f-strings: DeveloperDefined only if ALL FormattedValues are also constants/DeveloperDefined
+                            value_node.children.get("values").map_or(true, |vals| {
+                                vals.iter().all(|v| {
+                                    v.node_type == "Constant" || (
+                                        v.node_type == "FormattedValue" &&
+                                        v.children.get("value").and_then(|vv| vv.get(0))
+                                            .map_or(false, |expr| {
+                                                // Check if the expr name is DeveloperDefined in state
+                                                get_direct_taint_names(expr).iter().all(|n| {
+                                                    state.get(n).map_or(true, |origins| {
+                                                        origins.iter().all(|o| !o.is_attacker_controlled())
+                                                    })
+                                                })
+                                            })
+                                    )
+                                })
+                            })
+                        };
+                        if is_all_constant {
+                            for target in &targets {
+                                let mut origins = HashSet::new();
+                                origins.insert(TaintOrigin::DeveloperDefined);
+                                state.insert(target.clone(), origins);
+                            }
+                        }
                     } else {
-                        // Transitive propagation (Assignment)
+                        // Transitive propagation (Assignment from Name/Attribute/etc.)
                         let mut new_origins = HashSet::new();
                         let src_names = extract_all_names(value_node);
                         for name in src_names {
@@ -396,23 +1111,197 @@ fn transfer_function(
                                 new_origins.extend(origins.iter().cloned());
                             }
                         }
-                        
                         if !new_origins.is_empty() {
                             for target in &targets {
                                 state.insert(target.clone(), new_origins.clone());
                             }
                         }
                     }
+
+                    // BinOp taint propagation: x = tainted % "..." or "..." % tainted
+                    // Handles Python string formatting: sql = "SELECT * FROM %s" % table
+                    if value_node.node_type == "BinOp" {
+                        let mut binop_origins = HashSet::new();
+                        for side in ["left", "right"] {
+                            if let Some(operand) = value_node.children.get(side).and_then(|v| v.get(0)) {
+                                let names = get_direct_taint_names(operand);
+                                for name in names {
+                                    if let Some(origins) = state.get(&name) {
+                                        binop_origins.extend(origins.iter().cloned());
+                                    }
+                                }
+                            }
+                        }
+                        if !binop_origins.is_empty() {
+                            for target in &targets {
+                                state.insert(target.clone(), binop_origins.clone());
+                            }
+                        }
+                    }
+
+                    // BoolOp taint propagation: x = a or b, x = a and b
+                    // If any operand is tainted, x is tainted.
+                    // Handles: config = plugin_config or {}  →  config is tainted if plugin_config is
+                    if value_node.node_type == "BoolOp" {
+                        let mut bool_origins = HashSet::new();
+                        if let Some(values) = value_node.children.get("values") {
+                            for val in values {
+                                for name in extract_all_names(val) {
+                                    if let Some(origins) = state.get(&name) {
+                                        bool_origins.extend(origins.iter().cloned());
+                                    }
+                                }
+                            }
+                        }
+                        if !bool_origins.is_empty() {
+                            for target in &targets {
+                                state.insert(target.clone(), bool_origins.clone());
+                            }
+                        }
+                    }
+
+                    // Check ALL call nodes within the RHS for sinks.
+                    // Using find_call_sites (not just the outermost call) catches nested
+                    // sinks like: result = env.from_string(tainted).render()
+                    // where from_string is the dangerous call, not render.
+                    if value_node.node_type == "Call" {
+                        let mut rhs_calls = Vec::new();
+                        find_call_sites(value_node, &mut rhs_calls);
+                        for call in rhs_calls {
+                            check_sink_and_report(call, &state, ruleset, file_path, content, &mut issues);
+                        }
+                    }
+                    // f-string: x = f"...{tainted}..."
+                    // 1. Flag FSTRING867 if any slot contains tainted variable.
+                    // 2. Propagate taint to x (the f-string result carries taint forward).
+                    if value_node.node_type == "JoinedStr" {
+                        check_fstring_taint(value_node, &state, ruleset, file_path, content, &mut issues);
+                        // Propagate: if any FormattedValue is tainted, result is tainted
+                        let mut origins = HashSet::new();
+                        if let Some(values) = value_node.children.get("values") {
+                            for val in values {
+                                if val.node_type == "FormattedValue" {
+                                    if let Some(expr) = val.children.get("value").and_then(|v| v.get(0)) {
+                                        for name in extract_all_names(expr) {
+                                            if let Some(o) = state.get(&name) {
+                                                origins.extend(o.iter().cloned());
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                        if !origins.is_empty() {
+                            for target in &targets {
+                                state.insert(target.clone(), origins.clone());
+                            }
+                        }
+                    }
+                }
+            }
+            // For-loop variable binding: `for x in tainted_collection` → x is tainted.
+            // The CFG flattens for-loops so the For node appears as a statement
+            // in the header block. Propagate taint from iter to target.
+            "For" => {
+                if let Some(iter) = stmt.children.get("iter").and_then(|v| v.get(0)) {
+                    let iter_names = extract_all_names(iter);
+                    let mut loop_origins: HashSet<TaintOrigin> = HashSet::new();
+                    for name in &iter_names {
+                        if let Some(origins) = state.get(name) {
+                            loop_origins.extend(origins.iter().cloned());
+                        }
+                    }
+                    if !loop_origins.is_empty() {
+                        if let Some(target) = stmt.children.get("target").and_then(|v| v.get(0)) {
+                            let target_names: Vec<String> = match target.node_type.as_str() {
+                                "Name" => target.fields.get("id")
+                                    .and_then(|v| v.as_ref()).and_then(|v| v.as_str())
+                                    .map(|s| vec![s.to_string()])
+                                    .unwrap_or_default(),
+                                "Tuple" => target.children.get("elts")
+                                    .map(|elts| elts.iter()
+                                        .filter_map(|e| e.fields.get("id")
+                                            .and_then(|v| v.as_ref()).and_then(|v| v.as_str())
+                                            .map(|s| s.to_string()))
+                                        .collect())
+                                    .unwrap_or_default(),
+                                _ => vec![],
+                            };
+                            for name in target_names {
+                                state.insert(name, loop_origins.clone());
+                            }
+                        }
+                    }
+                }
+                // Also check any sink calls in the for-loop header
+                let mut call_sites = Vec::new();
+                find_call_sites(stmt, &mut call_sites);
+                for call_node in call_sites {
+                    check_sink_and_report(call_node, &state, ruleset, file_path, content, &mut issues);
                 }
             }
             "Expr" => {
                 if let Some(value) = stmt.children.get("value").and_then(|v| v.get(0)) {
                     if value.node_type == "Call" {
                         check_sink_and_report(value, &state, ruleset, file_path, content, &mut issues);
-                        
-                        // Sanitizer as standalone statement
+                    }
+                    if value.node_type == "JoinedStr" {
+                        check_fstring_taint(value, &state, ruleset, file_path, content, &mut issues);
+                    }
+                }
+            }
+            // With statement: `with expr as var` → var inherits taint from expr.
+            // Handles: with open(tainted_path) as f → f is tainted
+            //          with tainted_ctx as val → val is tainted
+            "With" => {
+                if let Some(items) = stmt.children.get("items") {
+                    for item in items {
+                        // context_expr is the expression (e.g. open(path))
+                        // optional_vars is the `as var` binding
+                        let ctx_tainted: HashSet<TaintOrigin> = {
+                            let mut origins = HashSet::new();
+                            if let Some(ctx) = item.children.get("context_expr").and_then(|v| v.get(0)) {
+                                // Check if context_expr is a call that is a sink (e.g. open())
+                                // and whether its arguments are tainted → ctx gets taint
+                                if ctx.node_type == "Call" {
+                                    check_sink_and_report(ctx, &state, ruleset, file_path, content, &mut issues);
+                                    // Propagate taint from call arguments to context var
+                                    if let Some(args) = ctx.children.get("args") {
+                                        for arg in args {
+                                            for name in extract_all_names(arg) {
+                                                if let Some(o) = state.get(&name) {
+                                                    origins.extend(o.iter().cloned());
+                                                }
+                                            }
+                                        }
+                                    }
+                                } else {
+                                    for name in extract_all_names(ctx) {
+                                        if let Some(o) = state.get(&name) {
+                                            origins.extend(o.iter().cloned());
+                                        }
+                                    }
+                                }
+                            }
+                            origins
+                        };
+                        if !ctx_tainted.is_empty() {
+                            if let Some(opt_vars) = item.children.get("optional_vars").and_then(|v| v.get(0)) {
+                                if let Some(var_name) = opt_vars.fields.get("id")
+                                    .and_then(|v| v.as_ref()).and_then(|v| v.as_str())
+                                {
+                                    state.insert(var_name.to_string(), ctx_tainted);
+                                }
+                            }
+                        }
                     }
                 }
+                // Also check sinks in the With body via the fallthrough
+                let mut call_sites = Vec::new();
+                find_call_sites(stmt, &mut call_sites);
+                for call_node in call_sites {
+                    check_sink_and_report(call_node, &state, ruleset, file_path, content, &mut issues);
+                }
             }
             _ => {
                 let mut call_sites = Vec::new();
@@ -423,10 +1312,73 @@ fn transfer_function(
             }
         }
     }
-    
+
     (state, issues)
 }
 
+/// Returns only the DIRECT variable name(s) of an AST node for taint checking.
+/// Unlike `extract_all_names`, this does NOT recurse into attribute receivers.
+/// - Name("attr")     → ["attr"]
+/// - Attribute("self.STANDARD_UNIT") → ["STANDARD_UNIT"]  (not "self")
+/// - Subscript(d["key"]) → ["d"]
+/// Returns true if the state contains attacker-controlled taint for this name.
+/// DeveloperDefined, SystemGenerated, OperatorConfig do NOT trigger sinks.
+fn is_attacker_tainted(state: &TaintState, name: &str) -> bool {
+    state.get(name).map_or(false, |origins| {
+        origins.iter().any(|o| o.is_attacker_controlled())
+    })
+}
+
+/// Check taint considering the sink's triggers_on policy.
+///
+/// "all" (default)        — fires for all attacker-controlled origins.
+/// "shell_injectable"     — fires for all EXCEPT ShellSanitized.
+///                          Use for PY102 — shlex.quote is valid shell mitigation.
+/// "sql_injectable"       — fires for all EXCEPT SqlSanitized.
+///                          Use for PY101 — quote_name is valid SQL mitigation.
+/// "html_injectable"      — fires for all EXCEPT HtmlSanitized.
+///                          Use for XSS sinks — html.escape/format_html are valid.
+/// "injectable_only"      — fires ONLY for HttpRequest/External (no sanitized variants).
+///                          Legacy / strict mode.
+fn is_tainted_for_sink(state: &TaintState, name: &str, triggers_on: &str) -> bool {
+    state.get(name).map_or(false, |origins| {
+        origins.iter().any(|o| {
+            match triggers_on {
+                "shell_injectable" => o.is_shell_injectable(),   // HttpRequest|External only
+                "sql_injectable"   => o.is_sql_injectable(),     // HttpRequest|External|ShellSanitized
+                "html_injectable"  => o.is_attacker_controlled(), // all (HtmlSanitized is not attacker-controlled)
+                "injectable_only"  => o.is_shell_injectable(),
+                _                  => o.is_attacker_controlled(), // "all" default
+            }
+        })
+    })
+}
+
+fn get_direct_taint_names(node: &AstNode) -> Vec<String> {
+    match node.node_type.as_str() {
+        "Name" => {
+            if let Some(id) = node.fields.get("id").and_then(|v| v.as_ref()).and_then(|v| v.as_str()) {
+                return vec![id.to_string()];
+            }
+        }
+        "Attribute" => {
+            // Only return the attribute name itself, NOT the receiver.
+            // This prevents self.STANDARD_UNIT from matching because self is tainted.
+            if let Some(attr) = node.fields.get("attr").and_then(|v| v.as_ref()).and_then(|v| v.as_str()) {
+                return vec![attr.to_string()];
+            }
+        }
+        "Subscript" => {
+            // Return the container name for subscript access (e.g., dict["key"] → "dict")
+            if let Some(value) = node.children.get("value").and_then(|v| v.get(0)) {
+                return get_direct_taint_names(value);
+            }
+        }
+        _ => {}
+    }
+    Vec::new()
+}
+
 fn check_sink_and_report(
     call_node: &AstNode,
     state: &TaintState,
@@ -436,39 +1388,261 @@ fn check_sink_and_report(
     issues: &mut Vec<Issue>,
 ) {
     let call_name = get_full_call_name(call_node);
-    
+
+    // Skip unresolvable calls (empty name matches everything via contains(""))
+    if call_name.is_empty() {
+        return;
+    }
+
     for sink in &ruleset.taint_sinks {
-        if call_name.contains(&sink.function_call) || sink.function_call.contains(&call_name) {
+        // Matching strategy:
+        // - Dotted sink paths ("subprocess.run"): substring match
+        // - Method sinks (is_method=true, e.g. "replace", "join", "format"):
+        //     call_name must end with ".funcname" (avoids "set" matching builtin "set()")
+        // - Builtin sinks (is_method=false, e.g. "set", "open", "getattr"):
+        //     call_name must equal funcname exactly (prevents "cache.set" matching "set")
+        let matches = if sink.function_call.contains('.') {
+            // Forward-only: "urllib.request.urlopen".contains("open") is a FP
+            call_name.contains(&sink.function_call)
+        } else if sink.is_method {
+            // Method sinks (replace, join, center, etc.):
+            // - 0 dots: receiver was a literal/constant → exact match
+            // - 1 dot: normal method call "s.method" → ends_with ".method"
+            //   EXCEPT when receiver looks like a module (posixpath, ntpath, etc.)
+            // - 2+ dots: module path → NOT a method, skip
+            const MODULE_PREFIXES: &[&str] = &[
+                "posixpath.", "ntpath.", "genericpath.", "pathlib.",
+                "os.", "sys.", "re.", "json.", "urllib.", "http.",
+                "xml.", "html.", "csv.", "io.", "base64.", "hashlib.",
+                "hmac.", "struct.", "itertools.", "functools.", "operator.",
+                // ML framework module prefixes that have .execute() but are NOT SQL sinks:
+                //   execute.execute(b"Fill", ...) — eager op execution
+                //   ops.execute(...)              — operation execution
+                "execute.", "ops.", "eager.",
+            ];
+            let dot_count = call_name.chars().filter(|&c| c == '.').count();
+            // For dot_count=0 (e.g. the receiver was a literal, so get_full_call_name
+            // only returns the method name), require the func node to be an Attribute
+            // to distinguish `'/'.join(parts)` (method on literal) from `execute(x)` (standalone).
+            let func_is_attribute = call_node.children.get("func")
+                .and_then(|v| v.get(0))
+                .map(|f| f.node_type == "Attribute")
+                .unwrap_or(false);
+            match dot_count {
+                0 => func_is_attribute && call_name == sink.function_call,
+                _ => {
+                    call_name.ends_with(&format!(".{}", sink.function_call)) &&
+                    !MODULE_PREFIXES.iter().any(|pfx| call_name.starts_with(pfx))
+                }
+            }
+        } else {
+            call_name == sink.function_call
+        };
+        if !matches {
+            continue;
+        }
+
+        let mut found_taint = false;
+
+        let triggers_on = sink.triggers_on.as_str();
+
+        if sink.vulnerable_receiver {
+            // Check method receiver: tainted_obj.method(...) → receiver is tainted.
+            // Use extract_all_names so inline expressions like Path(tainted).mkdir()
+            // are correctly detected — Path(output) is a Call whose arg "output" is tainted.
+            if let Some(func) = call_node.children.get("func").and_then(|v| v.get(0)) {
+                if func.node_type == "Attribute" {
+                    if let Some(receiver) = func.children.get("value").and_then(|v| v.get(0)) {
+                        let names = extract_all_names(receiver);
+                        for name in names {
+                            if is_tainted_for_sink(state, &name, triggers_on) {
+                                found_taint = true;
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+        } else {
+            // Check positional argument at vulnerable_parameter_index.
+            // When vulnerable_keyword is specified, skip Phase 1 entirely — the sink
+            // is keyword-only (e.g. create(password=tainted), not create(tainted)).
+            // Without this guard, Q.create(tainted_list) fires PLAIN_PWD001 because
+            // args[0] is tainted even though no password= keyword is present.
+            let skip_positional = sink.vulnerable_keyword.is_some();
+            if !skip_positional {
             if let Some(args) = call_node.children.get("args") {
                 if args.len() > sink.vulnerable_parameter_index {
                     let arg = &args[sink.vulnerable_parameter_index];
                     let arg_names = extract_all_names(arg);
-                    
                     for name in arg_names {
-                        if let Some(_origins) = state.get(&name) {
-                            // We found a tainted variable flowing to a sink
-                            
-                            println!("[!] VULNERABILITY: Tainted variable '{}' flows to sink '{}'", name, call_name);
-                            report_issue(ruleset, &sink.vulnerability_id, file_path, call_node, content, issues);
-                            break; // Report once per sink call
+                        if is_tainted_for_sink(state, &name, triggers_on) {
+                            found_taint = true;
+                            break;
                         }
                     }
+                    // Also check if the arg contains an inline taint source call
+                    // e.g. httpx.stream("GET", r.json()["url"]) — r.json() is a source
+                    if !found_taint {
+                        let mut inline_calls: Vec<&AstNode> = Vec::new();
+                        find_call_sites(arg, &mut inline_calls);
+                        for inline_call in inline_calls {
+                            let inline_name = get_full_call_name(inline_call);
+                            let is_inline_source = ruleset.taint_sources.iter().any(|s| {
+                                if s.function_call.contains('.') {
+                                    inline_name.contains(&s.function_call) ||
+                                    s.function_call.contains(&inline_name)
+                                } else {
+                                    inline_name == s.function_call
+                                }
+                            });
+                            if is_inline_source {
+                                found_taint = true;
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+            } // end skip_positional guard
+        }
+
+        // Phase 3: keyword arguments for positional-arg sinks only.
+        // If vulnerable_keyword is set, only that named kwarg triggers.
+        // Otherwise, any tainted kwarg can trigger (for sinks that accept kwargs).
+        if !found_taint && !sink.vulnerable_receiver {
+            if let Some(keywords) = call_node.children.get("keywords") {
+                for kw in keywords {
+                    let kw_arg_name = kw.fields.get("arg")
+                        .and_then(|v| v.as_ref()).and_then(|v| v.as_str())
+                        .unwrap_or("");
+                    // If vulnerable_keyword is specified, skip non-matching kwargs
+                    if let Some(ref vk) = sink.vulnerable_keyword {
+                        if kw_arg_name != vk.as_str() { continue; }
+                    }
+                    if let Some(kw_value) = kw.children.get("value").and_then(|v| v.get(0)) {
+                        let kw_names = get_direct_taint_names(kw_value);
+                        for name in kw_names {
+                            if is_attacker_tainted(state, &name) {
+                                found_taint = true;
+                                break;
+                            }
+                        }
+                    }
+                    if found_taint { break; }
                 }
             }
         }
+
+        if found_taint {
+            println!("[!] VULNERABILITY: Tainted variable flows to sink '{}'", call_name);
+            report_issue(ruleset, &sink.vulnerability_id, file_path, call_node, content, issues);
+        }
+        // Note: found_taint is true only when is_attacker_controlled() returned true
+        // (see get_direct_taint_names usage above — we check state.contains_key which
+        //  only contains attacker-controlled taint after the provenance gate below)
     }
 }
 
-fn check_args_tainted(call_node: &AstNode, state: &TaintState) -> bool {
-    if let Some(args) = call_node.children.get("args") {
-        for arg in args {
-            let names = extract_all_names(arg);
-            if names.iter().any(|name| state.contains_key(name)) {
-                return true;
+/// Check if an f-string (JoinedStr) contains a directly tainted variable and report FSTRING867.
+///
+/// Uses get_direct_taint_names (not extract_all_names) so only DIRECT variable references
+/// inside the f-string slots trigger the rule. This prevents FPs where tainted data is
+/// wrapped in a safe function call: `f"count: {len(data)}"` does NOT fire because `len()`
+/// transforms the tainted data before interpolation (result is an integer, not injectable).
+///
+/// Cases that fire:
+///   f"{user_input}"           — direct Name reference, tainted → fires
+///   f"{obj.field}"            — Attribute, field is tainted → fires
+///   f"{data[key]}"            — Subscript, data is tainted → fires
+///
+/// Cases that do NOT fire (correctly suppressed):
+///   f"{len(tainted_list)}"    — len() wraps it, returns int, not injectable
+///   f"{str(tainted)}"         — str() is a safe conversion
+///   f"{repr(tainted)}"        — repr() wraps it safely
+///   f"{x!r}"                  — !r conversion quotes the value (same as repr)
+///   f"{x!a}"                  — !a conversion applies ascii(), quotes non-ASCII
+fn check_fstring_taint(
+    node: &AstNode,
+    state: &TaintState,
+    ruleset: &RuleSet,
+    file_path: &str,
+    content: &str,
+    issues: &mut Vec<Issue>,
+) {
+    // JoinedStr.children["values"] contains Constant and FormattedValue nodes.
+    if let Some(values) = node.children.get("values") {
+        for val in values {
+            if val.node_type == "FormattedValue" {
+                // Skip slots with repr/ascii conversion: {x!r} and {x!a} quote the value,
+                // making it safe for injection. conversion field: 114='r', 97='a', 115='s', -1=none.
+                let conversion = val.fields.get("conversion")
+                    .and_then(|v| v.as_ref()).and_then(|v| v.as_i64())
+                    .unwrap_or(-1);
+                if conversion == 114 || conversion == 97 { // !r or !a
+                    continue;
+                }
+                // FormattedValue.children["value"] is the expression inside {}.
+                if let Some(expr) = val.children.get("value").and_then(|v| v.get(0)) {
+                    // Use get_direct_taint_names: only direct Name/Attribute/Subscript
+                    // references — NOT recursive into function call arguments.
+                    let names = get_direct_taint_names(expr);
+                    for name in names {
+                        if is_attacker_tainted(state, &name) {
+                            report_issue(ruleset, "FSTRING867", file_path, node, content, issues);
+                            return; // report once per f-string
+                        }
+                    }
+                }
             }
         }
     }
-    false
+}
+
+/// Returns a dotted string representing the container of a Subscript node.
+/// For `request.GET['key']` returns "request.GET".
+fn get_subscript_container(node: &AstNode) -> String {
+    if let Some(value) = node.children.get("value").and_then(|v| v.get(0)) {
+        match value.node_type.as_str() {
+            "Attribute" => {
+                let mut parts = Vec::new();
+                let mut cur = value;
+                loop {
+                    if let Some(attr) = cur.fields.get("attr").and_then(|v| v.as_ref()).and_then(|v| v.as_str()) {
+                        parts.push(attr.to_string());
+                    }
+                    if let Some(next) = cur.children.get("value").and_then(|v| v.get(0)) {
+                        cur = next;
+                    } else {
+                        break;
+                    }
+                }
+                if let Some(base) = cur.fields.get("id").and_then(|v| v.as_ref()).and_then(|v| v.as_str()) {
+                    parts.push(base.to_string());
+                }
+                parts.reverse();
+                parts.join(".")
+            }
+            "Name" => value.fields.get("id")
+                .and_then(|v| v.as_ref())
+                .and_then(|v| v.as_str())
+                .unwrap_or("")
+                .to_string(),
+            _ => String::new(),
+        }
+    } else {
+        String::new()
+    }
+}
+
+/// Returns all Name identifiers in the base (non-slice) part of a Subscript.
+/// For `tainted_dict['key']` returns ["tainted_dict"].
+fn get_subscript_base_names(node: &AstNode) -> Vec<String> {
+    if let Some(value) = node.children.get("value").and_then(|v| v.get(0)) {
+        extract_all_names(value)
+    } else {
+        Vec::new()
+    }
 }
 
 fn extract_function_params(func_node: &AstNode) -> Vec<String> {
@@ -546,8 +1720,204 @@ fn get_full_call_name(call_node: &AstNode) -> String {
     String::new()
 }
 
+/// Inspect a FunctionDef node's decorator_list and return the names of parameters
+/// that receive user-controlled input based on known entry-point decorators.
+///
+/// Supported frameworks and decorator patterns:
+///
+/// **CLI** (click, typer, argparse):
+///   @click.command / @click.option("--flag", "param_name") / @click.argument("name")
+///   @app.command() / @typer.option / @typer.argument  (Typer uses same conventions)
+///
+/// **Web** (Flask, FastAPI, Django REST, aiohttp, Bottle, Falcon, Starlette):
+///   @app.route("/path") / @app.get / @app.post / @app.put / @app.delete / @app.patch
+///   @router.get / @router.post / @api_view / @require_http_methods
+///   @web.get / @web.post  (aiohttp)
+///
+/// **Task queues** (Celery, RQ, Huey, Dramatiq):
+///   @app.task / @celery.task / @shared_task / @dramatiq.actor / @huey.task
+///   @periodic_task / @rq.job
+///
+/// **Event handlers** (Django signals, Flask signals, AWS Lambda, GCP Functions):
+///   @receiver(signal) / @app.before_request / @app.after_request
+///   @lambda_handler / @functions_framework.http
+///
+/// For all of these, ALL parameters (except self/cls) are considered user-controlled
+/// because the framework injects request/event/message data into them.
+/// Parameters classified by decorator type and the taint origin they should receive.
+struct EntryPointParams {
+    /// HTTP decorator params (@app.route, @api_view) → TaintOrigin::HttpRequest.
+    /// Attacker-controlled: any internet user can send arbitrary values.
+    http: Vec<String>,
+    /// CLI decorator params (@app.command, @click.option) → TaintOrigin::OperatorConfig.
+    /// Operator-trusted: the person running the tool chose these values.
+    /// FILE_DESERIALIZERS still produce HttpRequest when reading file *contents*,
+    /// so supply-chain detection is preserved even for operator-specified file paths.
+    operator: Vec<String>,
+}
+
+impl EntryPointParams {
+    fn is_empty(&self) -> bool { self.http.is_empty() && self.operator.is_empty() }
+}
+
+fn extract_cli_tainted_params(func_node: &AstNode) -> EntryPointParams {
+    let mut result = EntryPointParams { http: Vec::new(), operator: Vec::new() };
+
+    let decorator_list = match func_node.children.get("decorator_list") {
+        Some(d) => d,
+        None => return result,
+    };
+
+    // HTTP entry points — parameters receive attacker-controlled data from network requests.
+    // These produce HttpRequest taint which triggers all security sinks.
+    const HTTP_TAINT_DECORATOR_ATTRS: &[&str] = &[
+        // Web frameworks — route/endpoint decorators
+        "route", "get", "post", "put", "delete", "patch", "head", "options",
+        // Django REST Framework
+        "api_view", "action", "require_http_methods", "require_GET", "require_POST",
+        // aiohttp
+        "view", "endpoint",
+        // Starlette / FastAPI router
+        "add_route",
+        // Task queues — tasks receive data from external message brokers
+        "task", "shared_task", "periodic_task", "actor", "job",
+        // Event handlers
+        "receiver", "before_request", "after_request", "teardown_request",
+        "before_app_request", "after_app_request",
+        // Serverless
+        "handler",
+    ];
+
+    // CLI entry points (Click, Typer) are treated the same as HTTP entry points:
+    // both produce HttpRequest taint on all parameters.
+    // Rationale: CLI tools that process third-party file contents (plugin configs,
+    // user-supplied data) share the same supply-chain risk as HTTP handlers.
+    const CLI_TAINT_DECORATOR_ATTRS: &[&str] = &[
+        "command", "group",
+    ];
+
+    let mut has_http_taint_decorator = false;
+    let mut has_cli_taint_decorator = false;
+    let mut click_option_params: Vec<String> = Vec::new();
+
+    for decorator in decorator_list {
+        if decorator.node_type != "Call" {
+            // Bare decorator (no parens): @app.route, @app.command
+            if let Some(func) = decorator.children.get("func").and_then(|v| v.get(0)) {
+                let attr = func.fields.get("attr")
+                    .and_then(|v| v.as_ref()).and_then(|v| v.as_str())
+                    .unwrap_or("");
+                if HTTP_TAINT_DECORATOR_ATTRS.contains(&attr) {
+                    has_http_taint_decorator = true;
+                } else if CLI_TAINT_DECORATOR_ATTRS.contains(&attr) {
+                    has_cli_taint_decorator = true;
+                }
+            }
+            continue;
+        }
+
+        // Call decorator: @click.option("--flag", "param_name") etc.
+        let func = match decorator.children.get("func").and_then(|v| v.get(0)) {
+            Some(f) => f,
+            None => continue,
+        };
+
+        let attr = func.fields.get("attr")
+            .and_then(|v| v.as_ref()).and_then(|v| v.as_str())
+            .unwrap_or("");
+
+        if HTTP_TAINT_DECORATOR_ATTRS.contains(&attr) {
+            has_http_taint_decorator = true;
+            continue;
+        } else if CLI_TAINT_DECORATOR_ATTRS.contains(&attr) {
+            has_cli_taint_decorator = true;
+            continue;
+        }
+
+        // click.option("--flag-name", "python_param_name") or just ("--flag-name")
+        if attr == "option" {
+            let args = decorator.children.get("args").map(|v| v.as_slice()).unwrap_or(&[]);
+            let param_name = if args.len() >= 2 {
+                // Second positional arg is the explicit Python parameter name
+                args[1].fields.get("value")
+                    .and_then(|v| v.as_ref()).and_then(|v| v.as_str())
+                    .map(|s| s.to_string())
+            } else if args.len() == 1 {
+                // Derive from flag: "--my-option" → "my_option"
+                args[0].fields.get("value")
+                    .and_then(|v| v.as_ref()).and_then(|v| v.as_str())
+                    .map(|s| s.trim_start_matches('-').replace('-', "_"))
+            } else {
+                None
+            };
+            if let Some(name) = param_name {
+                click_option_params.push(name);
+            }
+        }
+
+        // click.argument("param_name") or typer.argument
+        if attr == "argument" {
+            let args = decorator.children.get("args").map(|v| v.as_slice()).unwrap_or(&[]);
+            if let Some(first) = args.first() {
+                if let Some(name) = first.fields.get("value")
+                    .and_then(|v| v.as_ref()).and_then(|v| v.as_str())
+                {
+                    click_option_params.push(name.to_lowercase());
+                }
+            }
+        }
+    }
+
+    // Helper closure: collect all non-self/cls parameter names
+    let collect_params = |args_node: &AstNode| -> Vec<String> {
+        let mut names = Vec::new();
+        for key in &["args", "posonlyargs", "kwonlyargs"] {
+            if let Some(params) = args_node.children.get(*key) {
+                for param in params {
+                    if let Some(name) = param.fields.get("arg")
+                        .and_then(|v| v.as_ref()).and_then(|v| v.as_str())
+                    {
+                        if name != "self" && name != "cls" {
+                            names.push(name.to_string());
+                        }
+                    }
+                }
+            }
+        }
+        names
+    };
+
+    if has_http_taint_decorator {
+        // HTTP entry point: all params → HttpRequest (attacker-controlled via network)
+        if let Some(args_node) = func_node.children.get("args").and_then(|v| v.get(0)) {
+            for name in collect_params(args_node) {
+                result.http.push(name);
+            }
+        }
+    } else if has_cli_taint_decorator {
+        // CLI entry point: all params → OperatorConfig (operator chose these values).
+        // The operator is trusted for PATH/URL choices. File CONTENTS they point to
+        // may be third-party — FILE_DESERIALIZERS will upgrade those to HttpRequest.
+        if let Some(args_node) = func_node.children.get("args").and_then(|v| v.get(0)) {
+            for name in collect_params(args_node) {
+                result.operator.push(name);
+            }
+        }
+    } else {
+        // @click.option / @click.argument without a command decorator:
+        // these are also operator-controlled inputs
+        result.operator.extend(click_option_params);
+    }
+
+    result
+}
+
 fn report_issue(ruleset: &RuleSet, vuln_id: &str, file_path: &str, stmt: &AstNode, content: &str, issues: &mut Vec<Issue>) {
     if let Some(vuln_rule) = ruleset.rules.iter().find(|r| r.id == vuln_id) {
+        // Apply global and rule-level file exclusions (path + content) to taint findings
+        if vuln_rule.is_excluded(file_path, content, &ruleset.defaults) {
+            return;
+        }
         let line_content = content.lines().nth(stmt.lineno.saturating_sub(1) as usize).unwrap_or("").to_string();
         issues.push(Issue::new(
             vuln_rule.id.clone(),
diff --git a/src/pyspector/_rust_core/src/graph/call_graph_builder.rs b/src/pyspector/_rust_core/src/graph/call_graph_builder.rs
index 312be4c2..04275034 100644
--- a/src/pyspector/_rust_core/src/graph/call_graph_builder.rs
+++ b/src/pyspector/_rust_core/src/graph/call_graph_builder.rs
@@ -11,46 +11,121 @@ pub struct CallGraph<'a> {
     pub file_contents: HashMap<String, String>,
 }
 
+/// Returns true if a file path should be excluded from taint analysis.
+/// Excluded: test files, documentation code, and example code.
+///
+/// These files are excluded because:
+/// - Test files: test functions never receive real attacker-controlled data,
+///   so they only add functions without adding security-relevant taint paths.
+/// - Docs/examples: tutorial and example code uses hardcoded credentials,
+///   simplified patterns, and intentional anti-patterns for illustration.
+///   Including them as taint entry points produces false positives in the
+///   library code being demonstrated.
+fn is_test_file(file_path: &str) -> bool {
+    let lower = file_path.to_lowercase();
+    // Test infrastructure
+    if lower.contains("/test") || lower.contains("\\test")
+        || lower.starts_with("test")
+        || lower.contains("/tests/") || lower.contains("\\tests\\")
+        || lower.ends_with("_test.py")
+        || lower.contains("/conftest") || lower.contains("\\conftest")
+        || lower.contains("/fixture") || lower.contains("\\fixture")
+        || (lower.contains("/mock") && lower.ends_with(".py"))
+    {
+        return true;
+    }
+    // Documentation, example code, and project maintenance scripts.
+    // Entry points in these directories are for documentation or project tooling,
+    // not production user-facing code. Including them as taint entry points produces
+    // false positives in library code being demonstrated or maintained.
+    lower.contains("/docs/") || lower.contains("\\docs\\")
+        || lower.contains("/docs_src/") || lower.contains("\\docs_src\\")
+        || lower.contains("/examples/") || lower.contains("\\examples\\")
+        || lower.contains("/example/") || lower.contains("\\example\\")
+        || lower.contains("/tutorial/") || lower.contains("\\tutorial\\")
+        || lower.contains("/tutorials/") || lower.contains("\\tutorials\\")
+        || lower.contains("/samples/") || lower.contains("\\samples\\")
+        || lower.contains("/demo/") || lower.contains("\\demo\\")
+        // Project maintenance scripts: documentation generation, release management,
+        // linting/formatting, CI helpers. These are operator-run tools, not
+        // user-facing entry points.
+        || lower.contains("/scripts/") || lower.contains("\\scripts\\")
+        || lower.starts_with("scripts/") || lower.starts_with("scripts\\")
+        // Machine-generated data files — contain language docs/data as string literals.
+        // They are not executable entry points; including them pollutes the call graph.
+        || lower.contains("/pydoc_data/") || lower.contains("\\pydoc_data\\")
+}
+
 // Builds a call graph from all parsed Python files.
 pub fn build_call_graph(py_files: &[PythonFile]) -> CallGraph {
-    println!("[*] Building call graph from {} files", py_files.len());
-    
+    let production_files: Vec<&PythonFile> = py_files
+        .iter()
+        .filter(|f| !is_test_file(&f.file_path))
+        .collect();
+
+    println!("[*] Building call graph from {}/{} files (test files excluded from taint analysis)",
+             production_files.len(), py_files.len());
+
     let mut call_graph = CallGraph::default();
     let mut all_funcs = HashMap::new();
 
-    // First pass: find all function definitions and store their content.
-    for file in py_files {
-        println!("[*] Processing file: {}", file.file_path);
-        
+    // First pass: find all function definitions.
+    // Removed per-file and per-function println — 18k+ print syscalls dominated runtime.
+    for file in &production_files {
         if let Some(ast) = &file.ast {
             let mut funcs_in_file = Vec::new();
             find_functions(ast, &mut funcs_in_file);
-            
+
             for func_node in funcs_in_file {
                 if let Some(func_name) = get_name_from_node(func_node) {
                     let func_id = format!("{}::{}", file.file_path, func_name);
-                    println!("[*] Found function: {}", func_id);
                     all_funcs.insert(func_id, func_node);
                 }
             }
         }
         call_graph.file_contents.insert(file.file_path.clone(), file.content.clone());
     }
-    
+
     call_graph.functions = all_funcs;
     println!("[+] Found {} total functions", call_graph.functions.len());
 
-    // Second pass: find all call sites in each function.
+    // Build a name index: bare_function_name → [func_id, ...] for O(1) call resolution.
+    // Without this index, Pass 2 is O(functions × call_sites × functions) — O(n²).
+    // With the index it's O(functions × call_sites) — O(n).
+    let mut name_index: HashMap<String, Vec<String>> = HashMap::new();
+    for func_id in call_graph.functions.keys() {
+        // Extract bare name after "::" (may include class prefix like "ClassName.method")
+        if let Some(bare) = func_id.rsplit("::").next() {
+            name_index.entry(bare.to_string()).or_default().push(func_id.clone());
+            // Also index just the method suffix for "ClassName.method" → "method"
+            if let Some(method) = bare.rsplit('.').next() {
+                if method != bare {
+                    name_index.entry(method.to_string()).or_default().push(func_id.clone());
+                }
+            }
+        }
+    }
+
+    // Second pass: resolve call sites using the O(1) index.
     for (func_id, func_node) in &call_graph.functions {
         let mut calls = HashSet::new();
         let mut call_sites = Vec::new();
         find_call_sites(func_node, &mut call_sites);
-        
+
         for call_node in call_sites {
             let callee_name = get_full_call_name(call_node);
-            for (potential_target_id, _) in &call_graph.functions {
-                if potential_target_id.ends_with(&format!("::{}", callee_name)) {
-                    calls.insert(potential_target_id.clone());
+            if callee_name.is_empty() { continue; }
+
+            // Direct lookup: exact callee name
+            if let Some(targets) = name_index.get(&callee_name) {
+                calls.extend(targets.iter().cloned());
+            }
+            // Method suffix lookup: "obj.method" → "method"
+            if let Some(method) = callee_name.rsplit('.').next() {
+                if method != callee_name {
+                    if let Some(targets) = name_index.get(method) {
+                        calls.extend(targets.iter().cloned());
+                    }
                 }
             }
         }
diff --git a/src/pyspector/_rust_core/src/graph/cfg_builder.rs b/src/pyspector/_rust_core/src/graph/cfg_builder.rs
index 9b62122a..2052c502 100644
--- a/src/pyspector/_rust_core/src/graph/cfg_builder.rs
+++ b/src/pyspector/_rust_core/src/graph/cfg_builder.rs
@@ -23,6 +23,11 @@ fn build_from_statements(
     for stmt in stmts {
         match stmt.node_type.as_str() {
             "If" => {
+                // Add the If node to the current block so taint analysis can scan
+                // the condition for call-site taint (e.g. `if not plugin.initialize(config)`)
+                if let Some(block) = cfg.blocks.get_mut(&current_block_id) {
+                    block.statements.push(stmt.clone());
+                }
                 // Create blocks for the two branches and the merge point after the if/else
                 let if_body_block_id = cfg.add_block().id;
                 let merge_block_id = cfg.add_block().id;
@@ -55,6 +60,12 @@ fn build_from_statements(
                 current_block_id = merge_block_id;
             }
             "For" | "While" => {
+                // Add the For/While node to the current block so taint analysis
+                // can see the loop variable binding (target = iter element).
+                if let Some(block) = cfg.blocks.get_mut(&current_block_id) {
+                    block.statements.push(stmt.clone());
+                }
+
                 let loop_body_id = cfg.add_block().id;
                 let after_loop_id = cfg.add_block().id;
 
@@ -83,6 +94,31 @@ fn build_from_statements(
                 // A break creates a new, unconnected block after it to stop flow
                 current_block_id = cfg.add_block().id;
             }
+            // With statement: add the With node itself (so taint analysis can handle
+            // `with X as y` bindings), then unfold the body into the same block so
+            // body statements are processed in sequence after `y` is tainted.
+            "With" => {
+                if let Some(block) = cfg.blocks.get_mut(&current_block_id) {
+                    block.statements.push(stmt.clone());
+                }
+                if let Some(body) = stmt.children.get("body") {
+                    current_block_id = build_from_statements(cfg, body, current_block_id, loop_exits);
+                }
+            }
+            // Try/except: unfold the body so taint flows through guarded calls.
+            // Exceptions are uncommon taint paths; we conservatively analyze the
+            // try-body as if it executes sequentially (no exception handling model).
+            "Try" | "TryStar" => {
+                if let Some(body) = stmt.children.get("body") {
+                    current_block_id = build_from_statements(cfg, body, current_block_id, loop_exits);
+                }
+                // Also process the else branch (runs when no exception)
+                if let Some(orelse) = stmt.children.get("orelse") {
+                    if !orelse.is_empty() {
+                        current_block_id = build_from_statements(cfg, orelse, current_block_id, loop_exits);
+                    }
+                }
+            }
             // For all other statements, just add them to the current block
             _ => {
                 if let Some(block) = cfg.blocks.get_mut(&current_block_id) {
diff --git a/src/pyspector/_rust_core/src/graph/representation.rs b/src/pyspector/_rust_core/src/graph/representation.rs
index b6c417b7..88052838 100644
--- a/src/pyspector/_rust_core/src/graph/representation.rs
+++ b/src/pyspector/_rust_core/src/graph/representation.rs
@@ -23,7 +23,7 @@ impl BasicBlock {
     }
 }
 
-#[derive(Debug, Default)]
+#[derive(Debug, Default, Clone)]
 pub struct ControlFlowGraph {
     pub blocks: HashMap<BlockId, BasicBlock>,
     pub entry: BlockId,
diff --git a/src/pyspector/_rust_core/src/rules.rs b/src/pyspector/_rust_core/src/rules.rs
index 1af59fdc..e4d38524 100644
--- a/src/pyspector/_rust_core/src/rules.rs
+++ b/src/pyspector/_rust_core/src/rules.rs
@@ -36,22 +36,46 @@ pub struct Rule {
     /// Rule-level glob to exclude specific files (stacks on top of [defaults]).
     #[serde(default)]
     pub exclude_file_pattern: Option<String>,
+    /// Regex checked against the FULL FILE CONTENT. If the file content matches,
+    /// this rule is suppressed for that file regardless of line-level matches.
+    /// Use to avoid library-specific FPs: e.g. suppress yaml.load() findings in
+    /// files that import ruamel.yaml (which is safe by default).
+    /// Example: file_content_exclude = "from ruamel\\.yaml|import ruamel"
+    #[serde(with = "serde_regex", default)]
+    pub file_content_exclude: Option<regex::Regex>,
 }
 
 impl Rule {
-    /// Returns true if `file_path` is excluded by this rule's own exclude_file_pattern
-    /// OR by the global defaults.
+    /// Returns true if the file should be excluded based on path patterns OR
+    /// file content (file_content_exclude checked against the full file text).
     pub fn is_file_excluded(&self, file_path: &str, defaults: &Defaults) -> bool {
+        self.is_excluded(file_path, "", defaults)
+    }
+
+    /// Full exclusion check: path patterns + optional file content regex.
+    /// Pass file content when available for the most accurate result.
+    pub fn is_excluded(&self, file_path: &str, content: &str, defaults: &Defaults) -> bool {
         // Check global default exclusions first
         for pattern in &defaults.exclude_file_patterns {
             if wildmatch::WildMatch::new(pattern).matches(file_path) {
                 return true;
             }
         }
-        // Then rule-level exclusion
+        // Then rule-level file path exclusion (supports comma-separated patterns)
         if let Some(efp) = &self.exclude_file_pattern {
-            if wildmatch::WildMatch::new(efp).matches(file_path) {
-                return true;
+            for pattern in efp.split(',') {
+                if wildmatch::WildMatch::new(pattern.trim()).matches(file_path) {
+                    return true;
+                }
+            }
+        }
+        // Finally, file content exclusion — suppress rule if the file imports
+        // a library or uses a pattern that makes the rule inapplicable.
+        if !content.is_empty() {
+            if let Some(fce) = &self.file_content_exclude {
+                if fce.is_match(content) {
+                    return true;
+                }
             }
         }
         false
@@ -74,14 +98,48 @@ pub struct TaintSinkRule {
     pub vulnerability_id: String,
     pub description: String,
     pub function_call: String,
+    /// Index of the positional argument that must be tainted to trigger this sink.
+    /// Ignored when vulnerable_receiver = true.
+    #[serde(default)]
     pub vulnerable_parameter_index: usize,
+    /// When true, the method *receiver* (the object before the dot) must be
+    /// tainted rather than a positional argument.
+    /// e.g. tainted_template.format(...)  →  receiver "tainted_template" is the risk.
+    #[serde(default)]
+    pub vulnerable_receiver: bool,
+    /// When true, this sink is a method call (called as obj.method()), so matching
+    /// uses ends_with(".function_call"). When false (default), it is a direct builtin
+    /// call (e.g. set(), open()) matched with exact equality to prevent "cache.set"
+    /// matching the "set" builtin sink.
+    #[serde(default)]
+    pub is_method: bool,
+    /// Which taint origins trigger this sink (default = "all" attacker-controlled).
+    /// "injectable_only" — only fires for HttpRequest/External, NOT ShellSanitized.
+    ///   Use for shell injection sinks (PY102): shlex.quote() is a valid mitigation.
+    /// "all" (default) — fires for HttpRequest, External, AND ShellSanitized.
+    ///   Use for path/SQL/URL sinks where shlex.quote doesn't help.
+    #[serde(default = "default_triggers_on")]
+    pub triggers_on: String,
+    /// When set, only this named keyword argument triggers the sink.
+    /// e.g. vulnerable_keyword = "password" fires only on create(..., password=tainted).
+    /// When absent, any tainted positional or keyword arg may trigger.
+    #[serde(default)]
+    pub vulnerable_keyword: Option<String>,
 }
 
+fn default_triggers_on() -> String { "all".to_string() }
+
 #[derive(Debug, Deserialize)]
 pub struct TaintSanitizerRule {
     pub id: String,
     pub description: String,
     pub function_call: String,
+    /// When set, the sanitizer does NOT clear taint but transforms its origin.
+    /// e.g. transforms_to = "ShellSanitized" means shlex.quote() turns
+    /// HttpRequest taint into ShellSanitized taint — still risky for path
+    /// traversal / f-strings, but safe for shell injection (PY102).
+    #[serde(default)]
+    pub transforms_to: Option<String>,
 }
 
 #[derive(Debug, Deserialize)]
diff --git a/src/pyspector/cli.py b/src/pyspector/cli.py
index 50ae9ca5..af1608cf 100644
--- a/src/pyspector/cli.py
+++ b/src/pyspector/cli.py
@@ -645,13 +645,14 @@ def _execute_scan(
             )
 
     # ── AST Generation ────────────────────────────────────────────────────
+    t_parse = time.time()
     ast_stats_meta: Dict[str, int] = {}
     python_files_data = get_python_file_asts(
         scan_path,
         enable_syntax_warnings=syntax_warnings,
         _stats_meta=ast_stats_meta,
     )
-    click.echo(f"[*] Successfully parsed {len(python_files_data)} Python files")
+    click.echo(f"[*] Successfully parsed {len(python_files_data)} Python files in {time.time()-t_parse:.2f}s")
 
     if stats:
         stats.record_files(
@@ -704,10 +705,12 @@ def _execute_scan(
             click.echo(click.style(f"Error during supply chain scan: {e}", fg="red"))
 
     # ── Run Scan (Rust core) ───────────────────────────────────────────────
+    t_rust = time.time()
     try:
         raw_issues = run_scan(
             str(scan_path.resolve()), rules_toml_str, config, python_files_data
         )
+        click.echo(f"[*] Rust core scan: {time.time()-t_rust:.2f}s")
     except ValueError as e:
         click.echo(
             click.style(
diff --git a/src/pyspector/plugin_system.py b/src/pyspector/plugin_system.py
index d40e662a..91bd4564 100644
--- a/src/pyspector/plugin_system.py
+++ b/src/pyspector/plugin_system.py
@@ -147,6 +147,13 @@ def validate_plugin_code(plugin_path: Path) -> tuple[bool, str]:
             "eval", "exec", "compile", "__import__",
             # Reflection/introspection
             "vars", "getattr",
+            # Sandbox escape via class hierarchy traversal —
+            # object.__subclasses__() retrieves ALL loaded classes (including subprocess.Popen)
+            # without any import, bypassing every import-level check.
+            "__subclasses__",
+            # Globals access via function object — exposes the full module namespace
+            # of any function, including builtins and imported modules.
+            "__globals__", "__builtins__",
             # importlib — dynamic module loading (all public entry-points)
             "importlib.import_module",
             "importlib.util.spec_from_file_location",
@@ -201,6 +208,9 @@ def validate_plugin_code(plugin_path: Path) -> tuple[bool, str]:
             "getoutput", "getstatusoutput",
             "exec", "eval", "compile",
             "load_module", "exec_module",  # importlib loader API
+            # Sandbox escape primitives
+            "__subclasses__", "__globals__", "__builtins__",
+            "__reduce__", "__reduce_ex__",  # pickle deserialization hooks
         }
  
         warning_calls: set[str] = {"open", "builtins.open"}
@@ -303,7 +313,7 @@ def visit_Call(self, node: ast.Call) -> None:
  
                 else:
                     simplified = name.replace("builtins.", "")
- 
+
                     if simplified in fatal_calls:
                         detected_fatal.add(simplified)
                     elif simplified in warning_calls:
@@ -316,7 +326,15 @@ def visit_Call(self, node: ast.Call) -> None:
                             detected_fatal.add(normalised)
                         elif normalised in warning_calls:
                             detected_warnings.add(normalised)
- 
+
+                    # Also block dangerous dunder methods regardless of receiver:
+                    # object.__subclasses__(), cls.__subclasses__(), etc.
+                    # These are sandbox-escape primitives and have no place in plugins.
+                    if "." in simplified:
+                        method_attr = simplified.rsplit(".", 1)[-1]
+                        if method_attr in dangerous_opaque_attrs:
+                            detected_fatal.add(f"<receiver>.{method_attr}()")
+
                 self.generic_visit(node)
  
         Analyzer().visit(tree)
diff --git a/src/pyspector/reporting.py b/src/pyspector/reporting.py
index fb355ee5..2e58b98e 100644
--- a/src/pyspector/reporting.py
+++ b/src/pyspector/reporting.py
@@ -132,7 +132,7 @@ def to_json(self) -> str:
                     "file_path": issue.file_path,
                     "line_number": issue.line_number,
                     "code": issue.code,
-                    "severity": _severity_key(issue),
+                    "severity": str(issue.severity).split(".")[-1],
                     "remediation": issue.remediation,
                 }
                 for issue in self.issues
diff --git a/src/pyspector/rules/built-in-rules.toml b/src/pyspector/rules/built-in-rules.toml
index 7a7c11f5..8fd5df65 100644
--- a/src/pyspector/rules/built-in-rules.toml
+++ b/src/pyspector/rules/built-in-rules.toml
@@ -7,10 +7,32 @@
 # File-path globs excluded from ALL rules unless a rule opts out.
 # Add paths here instead of repeating exclude_file_pattern on each rule.
 exclude_file_patterns = [
-  "*tests*",       # test directories and test_*.py / *_test.py files
-  "*fixtures*",    # fixture data
-  "*testdata*",    # test data
-  "*conftest*",    # pytest configuration
+  "*tests*",        # test directories and test_*.py / *_test.py files
+  "*fixtures*",     # fixture data — never production code
+  "*testdata*",     # test data
+  "*conftest*",     # pytest configuration
+  "*/test/*",       # test infrastructure directories (e.g. django/test/)
+  "*lorem_ipsum*",  # demo/placeholder text generators
+  "*fake_data*",    # synthetic data generators
+  "*sample_data*",  # sample data files
+  # Documentation and example code — hardcoded credentials/simplified patterns are intentional.
+  # Patterns anchor on path separators to avoid substring matches (e.g. "frutadocs").
+  "*/docs/*",       # /docs/ as a path component (nested)
+  "docs/*",         # top-level docs/
+  "*/docs_src/*",   # /docs_src/ — documentation source (used by many projects)
+  "docs_src/*",     # top-level docs_src/
+  "*/examples/*",   # /examples/ as a path component
+  "examples/*",     # top-level examples/
+  "*/example/*",    # /example/ as a path component
+  "example/*",      # top-level example/
+  "*/samples/*",    # /samples/
+  "*/demo/*",       # /demo/
+  "*/tutorial/*",   # /tutorial/
+  "*/tutorials/*",  # /tutorials/
+  # Machine-generated data files — contain language docs/data as string literals,
+  # not executable code. Pattern-matching against these produces 100% FPs.
+  "*/pydoc_data/*", # Python language docs embedded as string dictionaries
+  "pydoc_data/*",
 ]
 
 # Rules disabled globally because they produce 100% false positives by flagging
@@ -18,36 +40,9 @@ exclude_file_patterns = [
 # These rules have no security value on their own without taint analysis.
 # Re-enable any of these per-project by removing the ID from this list.
 disabled_rule_ids = [
-  # Python built-in functions — not security sinks without taint context
-  "ABS1089", "ALL1107", "ANY1104", "BOOL1035", "BYTEARRAY1008", "BYTES1005",
-  "CALLABLE1131", "CAPITALIZE954", "CASEFOLD918", "CHR1017", "CLASSMETHOD1125",
-  "COUNT909", "DECODE882", "DICT1050", "DIR849", "DIVMOD1098",
-  "ENCODE885", "ENDSWITH900", "ENUMERATE1059", "FILTER1068", "FIND903",
-  "FLOAT1029", "FROZENSET1053", "HASH1137", "HEX1020", "ID1134",
-  "INDEX906", "INT1038", "ISALPHA972", "ISASCII975", "ISDIGIT981",
-  "ISIDENTIFIER984", "ISINSTANCE855", "ISPRINTABLE993", "ISSPACE996",
-  "ISUPPER1002", "ITER1110", "JOIN876", "LEN1101", "LIST1041",
-  "LJUST930", "LOWER888", "LSTRIP957", "MAP1065", "MAX1083",
-  "MEMORYVIEW1011", "MIN1086", "NEXT1113", "ORD1014", "PARTITION936",
-  "PRINT1146", "PROPERTY1119", "RANGE1056", "REDUCE1071", "REMOVEPREFIX963",
-  "REMOVESUFFIX966", "REPLACE879", "REPR858", "REVERSED1077", "RJUST933",
-  "ROUND1092", "RPARTITION939", "RSPLIT942", "RSTRIP960", "SET1047",
-  "SLICE1116", "SORTED1074", "SPLIT873", "SPLITLINES945", "STARTSWITH897",
-  "STATICMETHOD1122", "STR861", "STRIP894", "SUM1080", "SUPER1128",
-  "TITLE951", "TRANSLATE912", "TUPLE1044", "TYPE852", "UPPER891",
-  "VARS840", "ZIP1062",
-  # Medium-noise rules: too broad without taint analysis
-  "FSTRING867",    # every f-string is NOT an injection risk
-  "GETATTR828",    # every getattr() is NOT unsafe
-  "SETATTR831",    # every setattr() is NOT unsafe
-  "HASATTR837",    # every hasattr() is NOT a disclosure risk
-  "DELATTR834",    # every delattr() is NOT unsafe
-  "FORMAT864",     # every .format() is NOT an injection risk
-  "DJG513",        # csrf_exempt covered by CSRF747 already
-  "MIME786",       # HttpResponse with content_type is not a vulnerability
-  "BRUTE765",      # login_required is not "missing brute force protection"
-  "INFO738",       # traceback.print_exc is not information disclosure by itself
-  "SER522",        # serializers.serialize() is not inherently unsafe
+  # Valid concept, needs taint or context to avoid FPs before activating:
+  "CACHE756",     # cache.set(request.*) — cache poisoning; needs taint to confirm HTTP origin
+  "INFO738",      # traceback.print_exc() — information disclosure; needs prod-vs-test context
 ]
 
 # -------------------------------------------
@@ -60,2153 +55,2097 @@ description = "Data from a web request is considered tainted."
 function_call = "request.get"
 taint_target = "return"
 
-[[taint_sink]]
-id = "SK001"
-vulnerability_id = "PY102" # This sink triggers the high-confidence Command Injection rule
-description = "Data is passed to a command execution function."
-function_call = "subprocess.run"
-vulnerable_parameter_index = 0
+[[taint_source]]
+id = "TS002"
+description = "Django GET parameter is tainted."
+function_call = "request.GET.get"
+taint_target = "return"
 
-[[taint_sanitizer]]
-id = "SN001"
-description = "Shell argument escaping sanitizes data for command execution."
-function_call = "shlex.quote"
+[[taint_source]]
+id = "TS003"
+description = "Django POST parameter is tainted."
+function_call = "request.POST.get"
+taint_target = "return"
 
-# -------------------------------------------
-# SECTION: Injection (OWASP A03:2021)
-# -------------------------------------------
+[[taint_source]]
+id = "TS004"
+description = "Flask query string parameter is tainted."
+function_call = "request.args.get"
+taint_target = "return"
 
-[[rule]]
-id = "PY102"
-description = "Command Injection detected via Taint Analysis."
-severity = "Critical"
-confidence = "High"
-remediation = "User-controlled data reached a command execution function without sanitization. Use 'shlex.quote()' to escape arguments or avoid passing user input to shell commands entirely."
-# NOTE: This rule has no 'pattern' or 'ast_match'. It is triggered ONLY by the taint engine.
+[[taint_source]]
+id = "TS005"
+description = "Flask form field is tainted."
+function_call = "request.form.get"
+taint_target = "return"
 
-[[rule]]
-id = "PY001"
-description = "Use of 'eval()' is highly dangerous."
-severity = "High"
-remediation = "Avoid 'eval()'. Use safer alternatives like 'ast.literal_eval' for data parsing."
-ast_match = "Call(func.id=eval)"
-file_pattern = "*.py"
+[[taint_source]]
+id = "TS006"
+description = "Interactive user input is tainted."
+function_call = "input"
+taint_target = "return"
 
-[[rule]]
-id = "PY103"
-description = "Use of os.system is a command injection risk."
-severity = "High"
-remediation = "Avoid 'os.system'. Use the 'subprocess' module with command and arguments as a list."
-ast_match = "Call(func.value.id=os, func.attr=system)"
-file_pattern = "*.py"
+[[taint_source]]
+id = "TS007"
+description = "Environment variable is considered tainted."
+function_call = "os.environ.get"
+taint_target = "return"
 
-[[rule]]
-id = "PY101"
-description = "Potential SQL injection via string formatting in database query."
-severity = "Critical"
-confidence = "High"
-remediation = "Use parameterized queries (e.g., cursor.execute('SELECT * FROM users WHERE name = ?', (name,))) instead of string formatting."
-pattern = "\\.(execute|executemany)\\s*\\(\\s*f?[\"'].*SELECT.*(FROM|UPDATE|DELETE).*%s.*[\"']"
-file_pattern = "*.py"
+[[taint_source]]
+id = "TS008"
+description = "CLI argument via argparse — user-controlled input."
+function_call = "parse_args"
+taint_target = "return"
 
-[[rule]]
-id = "PY104"
-description = "LDAP injection may be possible with string formatting."
-severity = "High"
-remediation = "Use a proper LDAP escaping library for any user-controlled data in LDAP queries."
-pattern = "\\.search_s\\s*\\(.*f[\"']"
-file_pattern = "*.py"
+[[taint_source]]
+id = "TS009"
+description = "CLI argument via click — user-controlled input."
+function_call = "click.argument"
+taint_target = "return"
 
-[[rule]]
-id = "PY105"
-description = "Potential XSS vulnerability with mark_safe or Markup."
-severity = "Medium"
-remediation = "Ensure that data passed to 'mark_safe' or 'Markup' is from a trusted source or has been properly sanitized."
-pattern = "(mark_safe|Markup)\\s*\\("
-file_pattern = "*.py"
+[[taint_source]]
+id = "TS010"
+description = "sys.argv — raw command-line arguments, user-controlled."
+function_call = "sys.argv"
+taint_target = "return"
 
-[[rule]]
-id = "PY106"
-description = "Use of subprocess.run with shell=True is a command injection risk."
-severity = "High"
-remediation = "Avoid shell=True with subprocess.run. Pass commands as a list instead of a string."
-ast_match = "Call(func.value.id=subprocess, func.attr=run)"
-file_pattern = "*.py"
+# HTTP CLIENT RESPONSE SOURCES
+# Data received from external HTTP APIs is attacker-controlled when the API
+# server is compromised or a MITM attack is in progress.
 
-[[rule]]
-id = "PY107"
-description = "Unsafe deserialization with 'yaml.load'."
-severity = "High"
-remediation = "Use 'yaml.safe_load()' instead of 'yaml.load()'."
-ast_match = "Call(func.value.id=yaml, func.attr=load)"
-file_pattern = "*.py"
-# Do not flag when SafeLoader or BaseLoader is explicitly passed
-exclude_pattern = "Loader\\s*=\\s*(yaml\\.)?(Safe|Base)Loader"
+[[taint_source]]
+id = "TS011"
+description = "HTTP response streaming line iterator — network data is tainted."
+function_call = ".iter_lines"
+taint_target = "return"
+# Leading dot matches any receiver: s.iter_lines(), response.iter_lines()
+
+[[taint_source]]
+id = "TS012"
+description = "HTTP response streaming text iterator — network data is tainted."
+function_call = ".iter_text"
+taint_target = "return"
+
+[[taint_source]]
+id = "TS013"
+description = "HTTP response streaming bytes/raw iterator — network data is tainted."
+function_call = ".iter_bytes"
+taint_target = "return"
+
+[[taint_source]]
+id = "TS013B"
+description = "HTTP response raw chunk iterator."
+function_call = ".iter_raw"
+taint_target = "return"
+
+[[taint_source]]
+id = "TS014"
+description = "HTTP response .json() method on any response object — parsed API data is tainted."
+function_call = ".json"
+taint_target = "return"
+# Matches: local_run.json(), response.json(), res.json(), new_api_call().json()
+# Does NOT match: json.loads(), json.dumps() (those have 'json' as module prefix, not method)
+
+[[taint_source]]
+id = "TS015"
+description = "marshal.loads() returns a deserialized Python code object — treat as dangerous taint."
+function_call = "marshal.loads"
+taint_target = "return"
+# The deserialized code object is dangerous bytecode from an untrusted source.
+# Any function created from it (FunctionType, exec) should be flagged.
+# Works with DESER723 (pattern) and SK_DESER724 (taint sink for FunctionType).
 
 # -------------------------------------------
-# SECTION: Cryptographic Failures (OWASP A02:2021)
+# SECTION: Taint Sinks
 # -------------------------------------------
 
-[[rule]]
-id = "PY201"
-description = "Use of weak hashing algorithm MD5."
-severity = "Medium"
-remediation = "Use a stronger hashing algorithm like SHA-256 or a password-specific hashing function like bcrypt."
-ast_match = "Call(func.value.id=hashlib, func.attr=md5)"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK001"
+vulnerability_id = "PY102"
+description = "Data is passed to a command execution function."
+function_call = "subprocess.run"
+vulnerable_parameter_index = 0
+triggers_on = "shell_injectable"
 
-[[rule]]
-id = "PY202"
-description = "Use of broken hashing algorithm SHA1."
-severity = "Medium"
-remediation = "Use a stronger hashing algorithm like SHA-256."
-ast_match = "Call(func.value.id=hashlib, func.attr=sha1)"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK001B"
+vulnerability_id = "PY102"
+description = "User-controlled command string passed to asyncio create_subprocess_shell()."
+function_call = "create_subprocess_shell"
+vulnerable_parameter_index = 0
+is_method = false
+triggers_on = "shell_injectable"
 
-[[rule]]
-id = "PY203"
-description = "Use of insecure SSL/TLS protocol version."
-severity = "High"
-remediation = "Use 'ssl.PROTOCOL_TLS' or higher. Avoid SSLv2, SSLv3, and TLSv1.0/1.1."
-pattern = "ssl\\.PROTOCOL_(SSLv2|SSLv3|TLSv1|TLSv1_1)"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK001C"
+vulnerability_id = "PY102"
+description = "User-controlled args passed to asyncio create_subprocess_exec()."
+function_call = "create_subprocess_exec"
+vulnerable_parameter_index = 0
+is_method = false
+triggers_on = "shell_injectable"
 
-[[rule]]
-id = "PY204"
-description = "Use of the 'pycrypto' library is discouraged due to known vulnerabilities."
-severity = "High"
-remediation = "Migrate from 'pycrypto' to a more secure and actively maintained library like 'pycryptodome'."
-pattern = "from\\s+Crypto|import\\s+Crypto"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK002"
+vulnerability_id = "GETATTR828"
+description = "Tainted attribute name passed to getattr() — attacker controls which attribute is accessed."
+function_call = "getattr"
+vulnerable_parameter_index = 1
 
-[[rule]]
-id = "PY205"
-description = "Use of PyNaCl with low-level functions can be insecure if misused."
-severity = "Low"
-confidence = "Low"
-remediation = "Prefer using high-level APIs like 'Box' and 'SecretBox' unless low-level functions are explicitly required and understood."
-pattern = "nacl\\.low_level"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK003"
+vulnerability_id = "OPEN1149"
+description = "Tainted file path passed to open() — attacker may read/write arbitrary files."
+function_call = "open"
+vulnerable_parameter_index = 0
 
-# -------------------------------------------
-# SECTION: Insecure Deserialization & Design (OWASP A08:2021)
-# -------------------------------------------
+[[taint_sink]]
+id = "SK004"
+vulnerability_id = "PY103"
+description = "Tainted command passed to os.system()."
+function_call = "os.system"
+vulnerable_parameter_index = 0
+triggers_on = "shell_injectable"
 
-[[rule]]
-id = "PY002"
-description = "Use of 'pickle.loads' for deserialization can lead to remote code execution."
-severity = "High"
-remediation = "Use a safer serialization format like JSON if deserializing untrusted data."
-ast_match = "Call(func.value.id=pickle, func.attr=loads)"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK005"
+vulnerability_id = "SETATTR831"
+description = "Tainted attribute name passed to setattr() — attacker writes arbitrary object attributes."
+function_call = "setattr"
+vulnerable_parameter_index = 1
 
-[[rule]]
-id = "PY301"
-description = "Use of 'pickle.load' for deserialization can lead to remote code execution."
-severity = "High"
-remediation = "Use a safer serialization format like JSON if deserializing untrusted data."
-ast_match = "Call(func.attr=load, func.value.id=pickle)"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK006"
+vulnerability_id = "DELATTR834"
+description = "Tainted attribute name passed to delattr() — attacker deletes arbitrary object attributes."
+function_call = "delattr"
+vulnerable_parameter_index = 1
 
-[[rule]]
-id = "PY302"
-description = "Use of 'yaml.load()' is insecure. Use 'yaml.safe_load()'."
-severity = "High"
-remediation = "Always use 'yaml.safe_load()' to prevent arbitrary code execution from malicious YAML."
-pattern = "^\\s*[^#]*yaml\\.load" # This regex ignores comment lines
-file_pattern = "*.py"
-# Do not flag when SafeLoader or safe_load is used
-exclude_pattern = "Loader\\s*=\\s*(yaml\\.)?(Safe|Base)Loader|yaml\\.safe_load"
+[[taint_sink]]
+id = "SK007"
+vulnerability_id = "SER522"
+description = "Tainted format/queryset arg[0] to serializer."
+function_call = "serialize"
+vulnerable_parameter_index = 0
 
-[[rule]]
-id = "PY303"
-description = "XML parsing with 'xml.etree.ElementTree' is vulnerable to XML bombs."
-severity = "High"
-remediation = "Use 'defusedxml.ElementTree' to parse untrusted XML data safely."
-pattern = "xml\\.etree\\.ElementTree\\.(parse|fromstring)"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK007B"
+vulnerability_id = "SER522"
+description = "Tainted data object (arg[1]) passed to serializer."
+function_call = "serialize"
+vulnerable_parameter_index = 1
 
-[[rule]]
-id = "PY304"
-description = "Insecure temporary file creation may lead to race conditions."
-severity = "Medium"
-remediation = "Use 'tempfile.mkstemp()' instead of 'tempfile.mktemp()' for secure temporary file creation."
-pattern = "tempfile\\.mktemp"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK008"
+vulnerability_id = "RAND810"
+description = "Tainted seed passed to random.seed() — predictable PRNG output."
+function_call = "random.seed"
+vulnerable_parameter_index = 0
 
-[[rule]]
-id = "PY305"
-description = "Use of exec() enables arbitrary code execution"
-severity = "Critical"
-ast_match = "Call(func.id=exec)"
+[[taint_sink]]
+id = "SK009"
+vulnerability_id = "FORMAT864"
+description = "Tainted format string used as template in .format() — SSTI-like injection."
+function_call = "format"
+is_method = true
+vulnerable_receiver = true
+# Only fires when the FORMAT STRING ITSELF is tainted (receiver = the template).
+# Tainted ARGUMENTS to .format() are not themselves dangerous — the receiver
+# controls the template structure. Removing vulnerable_parameter_index prevents
+# FPs from os.replace(), code.replace(), node.replace() and similar APIs.
 
-[[rule]]
-id = "PY306"  
-description = "Unsafe pickle.loads() can execute arbitrary code"
-severity = "High"
-ast_match = "Call(func.value.id=pickle, func.attr=loads)"
+[[taint_sink]]
+id = "SK010"
+vulnerability_id = "REPLACE879"
+description = "Tainted first arg (search string) in .replace() — filter bypass possible."
+function_call = "replace"
+vulnerable_parameter_index = 0
+is_method = true
+vulnerable_receiver = false
+
+[[taint_sink]]
+id = "SK010B"
+vulnerability_id = "REPLACE879"
+description = "Tainted second arg (replacement string) in .replace() — injection via replacement."
+function_call = "replace"
+vulnerable_parameter_index = 1
+is_method = true
+vulnerable_receiver = false
+
+[[taint_sink]]
+id = "SK011"
+vulnerability_id = "TRANSLATE912"
+description = "Tainted translation table in .translate() — sanitization bypass."
+function_call = "translate"
+vulnerable_parameter_index = 0
+is_method = true
+vulnerable_receiver = false
 
 # -------------------------------------------
-# SECTION: Security Misconfiguration (OWASP A05:2021)
+# SECTION: A_SINK rules — attribute/object inspection
 # -------------------------------------------
 
-[[rule]]
-id = "G401"
-description = "Flask app is running with the development server in a non-debug context."
-severity = "Medium"
-confidence = "Low"
-remediation = "Use a production-ready WSGI server like Gunicorn or uWSGI instead of 'app.run()'."
-pattern = "app\\.run\\(host=.*0\\.0\\.0\\.0"
-file_pattern = "*.py"
-
-[[rule]]
-id = "G402"
-description = "Django DEBUG mode is enabled in a settings file."
-severity = "High"
-remediation = "Ensure DEBUG is set to False in production settings."
-pattern = "^\\s*DEBUG\\s*=\\s*True"
-file_pattern = "*settings*.py"
+[[taint_sink]]
+id = "SK012"
+vulnerability_id = "HASATTR837"
+description = "Tainted attribute name to hasattr() — attacker probes object's attributes."
+function_call = "hasattr"
+vulnerable_parameter_index = 1
 
-[[rule]]
-id = "G403"
-description = "Flask DEBUG mode is enabled."
-severity = "High"
-remediation = "Ensure app.debug is False or the DEBUG config variable is False in production."
-pattern = "app\\.run\\(.*debug=True"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK013"
+vulnerability_id = "VARS840"
+description = "Tainted object to vars() — attacker dumps object's internal dict."
+function_call = "vars"
+vulnerable_parameter_index = 0
 
-[[rule]]
-id = "G404"
-description = "Django's CSRF protection appears to be disabled globally."
-severity = "Critical"
-remediation = "Ensure 'django.middleware.csrf.CsrfViewMiddleware' is active in your MIDDLEWARE setting."
-pattern = "#.*CsrfViewMiddleware" # Simple check for commented-out middleware
-file_pattern = "*settings*.py"
+[[taint_sink]]
+id = "SK014"
+vulnerability_id = "DIR849"
+description = "Tainted object to dir() — attacker enumerates object attributes."
+function_call = "dir"
+vulnerable_parameter_index = 0
 
-[[rule]]
-id = "G405"
-description = "Requests made without certificate verification."
-severity = "High"
-remediation = "Remove 'verify=False' from requests calls to prevent man-in-the-middle attacks."
-ast_match = "Call(keywords.*.arg=verify, keywords.*.value.value=False)"
-file_pattern = "*.py"
+# SK015 (CALLABLE1131) removed — rule disabled, sink caused downstream FP propagation
 
 # -------------------------------------------
-# SECTION: Hardcoded Secrets (OWASP A07:2021)
+# A_SINK — encoding / low-level byte operations
 # -------------------------------------------
 
-[[rule]]
-id = "G101"
-description = "Hardcoded password or secret detected."
-severity = "High"
-confidence = "Medium"
-remediation = "Store credentials in environment variables or a secrets management system."
-pattern = "(?i)(password|secret|api_key|token|authkey|bearer|cred|credentials)\\s*[:=]\\s*[\"']\\w{8,}[\"']"
-file_pattern = "*.py"
+# SK016 (BYTES1005) removed — rule disabled, sink caused downstream FP propagation
 
-[[rule]]
-id = "G102"
-description = "Hardcoded private key detected."
-severity = "Critical"
-confidence = "High"
-remediation = "Load private keys from a secure, encrypted file or secrets manager."
-pattern = "-----BEGIN (RSA|EC|OPENSSH|PGP) PRIVATE KEY-----"
-
-[[rule]]
-id = "G103"
-description = "Use of a blank password for a user or service."
-severity = "High"
-remediation = "Ensure all users and service accounts have strong, non-empty passwords."
-pattern = "(?i)(password|passwd|pass)\\s*[:=]\\s*[\"']\\s*[\"']"
-file_pattern = "*.py"
-
-[[rule]]
-id = "G104"
-description = "JWT secret is hardcoded."
-severity = "Critical"
-remediation = "Load JWT secrets from environment variables or a secrets management system."
-pattern = "(?i)(jwt_secret|jwt_key)\\s*[:=]\\s*[\"'].+[\"']"
-file_pattern = "*.py"
-
-# -------------------------------------------
-# SECTION: IaC and Configuration File Security
-# -------------------------------------------
-
-[[rule]]
-id = "DKR001"
-description = "Password or secret found in Dockerfile ENV instruction."
-severity = "High"
-remediation = "Use build-time arguments (ARG) with the --secret flag or a secrets management tool."
-pattern = "(?i)ENV\\s+(PASS|PASSWORD|SECRET|TOKEN|API_KEY)\\s+"
-file_pattern = "Dockerfile"
-
-[[rule]]
-id = "DKR002"
-description = "Use of 'latest' tag for base image is not recommended for production."
-severity = "Low"
-remediation = "Pin base images to a specific version digest for reproducible and secure builds."
-pattern = "FROM\\s+\\w+:latest"
-file_pattern = "Dockerfile"
-
-[[rule]]
-id = "DKR003"
-description = "Exposing Docker daemon socket inside a container is a security risk."
-severity = "Critical"
-remediation = "Avoid mounting '/var/run/docker.sock' into containers."
-pattern = "/var/run/docker\\.sock"
-file_pattern = "docker-compose*.y*ml"
-
-[[rule]]
-id = "K8S001"
-description = "Kubernetes container running in privileged mode."
-severity = "Critical"
-remediation = "Set 'securityContext.privileged' to 'false' or remove it."
-pattern = "privileged:\\s*true"
-file_pattern = "*.y*ml"
+[[taint_sink]]
+id = "SK017"
+vulnerability_id = "BYTEARRAY1008"
+description = "Tainted data passed to bytearray() — mutable buffer from untrusted input."
+function_call = "bytearray"
+vulnerable_parameter_index = 0
 
-[[rule]]
-id = "K8S002"
-description = "Kubernetes container allows privilege escalation."
-severity = "High"
-remediation = "Explicitly set 'securityContext.allowPrivilegeEscalation' to 'false'."
-pattern = "allowPrivilegeEscalation:\\s*true"
-file_pattern = "*.y*ml"
+# SK018 (MEMORYVIEW1011) removed — rule disabled
 
-[[rule]]
-id = "TF001"
-description = "Terraform AWS S3 bucket is publicly readable."
-severity = "Critical"
-remediation = "Set the 'acl' property of 'aws_s3_bucket' to 'private', not 'public-read' or 'public-read-write'."
-pattern = "acl\\s*=\\s*\"(public-read|public-read-write)\""
-file_pattern = "*.tf"
+[[taint_sink]]
+id = "SK019"
+vulnerability_id = "ORD1014"
+description = "Tainted character to ord() — extracts code point from untrusted input."
+function_call = "ord"
+vulnerable_parameter_index = 0
 
-[[rule]]
-id = "CFG001"
-description = "AWS credentials detected in configuration file."
-severity = "Critical"
-remediation = "Use IAM roles or environment variables for AWS credentials."
-pattern = "(?i)(aws_access_key_id|aws_secret_access_key)\\s*=\\s*[A-Za-z0-9/+=]{20,}"
-file_pattern = "*.ini"
+[[taint_sink]]
+id = "SK020"
+vulnerability_id = "CHR1017"
+description = "Tainted code point to chr() — generates character from attacker-controlled value."
+function_call = "chr"
+vulnerable_parameter_index = 0
 
 # -------------------------------------------
-# SECTION: ADDITIONAL SECURITY RULES
+# A_SINK — width-based memory exhaustion
 # -------------------------------------------
 
-[[rule]]
-id = "PY500"
-description = "Dynamic code execution using builtins.exec() function."
-severity = "High"
-confidence = "Medium"
-remediation = "Avoid dynamic code execution. Consider safer alternatives or validate input thoroughly."
-ast_match = "Call(func.attr=exec, func.value.id=builtins)"
-file_pattern = "*.py"
-
-[[rule]]
-id = "SEC501"
-description = "Generic exec pattern detected in code."
-severity = "Medium"
-confidence = "Medium"
-remediation = "Dynamic code execution can be dangerous. Validate all inputs and consider safer alternatives."
-pattern = "\\bexec\\b\\s*\\("
-file_pattern = "*.py"
-
-[[rule]]
-id = "SEC502"
-description = "Subprocess Popen with shell=True detected."
-severity = "Medium"
-confidence = "Medium"
-remediation = "Using shell=True with subprocess.Popen can lead to command injection. Use argument lists instead."
-ast_match = "Call(func.value.id=subprocess, func.attr=Popen, keywords.*.arg=shell, keywords.*.value.value=True)"
-file_pattern = "*.py"
-
-[[rule]]
-id = "PY503"
-description = "Shell command execution with user-controllable input."
-severity = "Low"
-confidence = "Medium"
-remediation = "Avoid using shell=True with subprocess calls. Use argument arrays for safer command execution."
-pattern = "subprocess\\.(Popen|call)\\(.*shell\\s*=\\s*True"
-file_pattern = "*.py"
-
-[[rule]]
-id = "SEC504"
-description = "Reading sensitive system file /etc/passwd."
-severity = "Low"
-remediation = "Accessing system password files should be done with proper authorization checks."
-pattern = "open\\s*\\(\\s*['\\\"]/etc/passwd"
-file_pattern = "*.sh"
-
-[[rule]]
-id = "PY505"
-description = "File reading operation using open().read() pattern."
-severity = "High"
-remediation = "Ensure file access controls and validate file paths to prevent unauthorized access."
-ast_match = "Attribute(attr=read, value.func.id=open)"
-file_pattern = "*.py"
-
-[[rule]]
-id = "JS506"
-description = "JavaScript eval() function usage detected."
-severity = "Medium"
-remediation = "Avoid using eval() in JavaScript. Use JSON.parse() for data or safer alternatives."
-pattern = "eval\\s*\\("
-file_pattern = "*.js"
-
-[[rule]]
-id = "PY507"
-description = "Method call to exec function detected."
-severity = "Critical"
-remediation = "Method-based exec calls can execute arbitrary code. Validate inputs and use safer alternatives."
-pattern = "\\.exec\\s*\\("
-file_pattern = "*.py"
-
-[[rule]]
-id = "WEB508"
-description = "Insecure Content Security Policy with unsafe-inline."
-severity = "Medium"
-confidence = "Medium"
-remediation = "Remove unsafe-inline from CSP directives and use nonces or hashes instead."
-pattern = "Content-Security-Policy\\s*:\\s*.*unsafe-inline"
-file_pattern = "*.conf"
+[[taint_sink]]
+id = "SK021"
+vulnerability_id = "CENTER927"
+description = "Tainted width in .center() — attacker may allocate excessive memory."
+function_call = "center"
+vulnerable_parameter_index = 0
+is_method = true
+vulnerable_receiver = false
 
-[[rule]]
-id = "JS509"
-description = "Dynamic function creation using Function constructor."
-severity = "Low"
-remediation = "Function constructor can execute arbitrary code. Use predefined functions or validate inputs."
-pattern = "new\\s+Function\\s*\\("
-file_pattern = "*.js"
+[[taint_sink]]
+id = "SK022"
+vulnerability_id = "LJUST930"
+description = "Tainted width in .ljust() — attacker may allocate excessive memory."
+function_call = "ljust"
+vulnerable_parameter_index = 0
+is_method = true
+vulnerable_receiver = false
 
-[[rule]]
-id = "CFG510"
-description = "AWS access key detected in configuration."
-severity = "Low"
-remediation = "Store AWS credentials securely using IAM roles or environment variables."
-pattern = "aws_access_key_id\\s*[:=]\\s*['\\\"][A-Za-z0-9/+=]{16,}"
-file_pattern = "*.ini"
+[[taint_sink]]
+id = "SK023"
+vulnerability_id = "RJUST933"
+description = "Tainted width in .rjust() — attacker may allocate excessive memory."
+function_call = "rjust"
+vulnerable_parameter_index = 0
+is_method = true
+vulnerable_receiver = false
 
-[[rule]]
-id = "PY511"
-description = "JSON deserialization without validation."
-severity = "Low"
-confidence = "Low"
-remediation = "json.loads() is safe from code execution. Only flag if the result feeds into eval/exec/pickle."
-ast_match = "Call(func.value.id=json, func.attr=loads)"
-file_pattern = "*.py"
+# SK024-SK028 removed — associated rules disabled (RANGE1056, JOIN876, SORTED1074, SUM1080, SET1047)
+# These sinks caused downstream FP propagation: disabling the rule but keeping the sink
+# continued to taint downstream variables, causing cascading false positives in SQL rules.
 
-[[rule]]
-id = "WEB512"
-description = "Bearer token in configuration header."
-severity = "Medium"
-remediation = "Store authentication tokens securely and avoid hardcoding in configuration files."
-pattern = "Authorization\\s*:\\s*\\bBearer\\b"
-file_pattern = "*.conf"
+[[taint_sink]]
+id = "SK_PY105"
+vulnerability_id = "PY105"
+description = "Tainted data passed to mark_safe() — XSS risk if data contains HTML."
+function_call = "mark_safe"
+vulnerable_parameter_index = 0
+triggers_on = "html_injectable"
 
-[[rule]]
-id = "DJG513"
-description = "Django CSRF protection bypass detected."
-severity = "Low"
-remediation = "Do not use csrf_exempt decorator unless absolutely necessary and with proper justification."
-pattern = "csrf_exempt"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_PY105B"
+vulnerability_id = "PY105"
+description = "Tainted data passed to Markup() — XSS risk."
+function_call = "Markup"
+vulnerable_parameter_index = 0
+triggers_on = "html_injectable"
 
-[[rule]]
-id = "WEB514"
-description = "X-Frame-Options set to allow framing."
-severity = "Medium"
-remediation = "Set X-Frame-Options to DENY or SAMEORIGIN to prevent clickjacking attacks."
-pattern = "X-Frame-Options\\s*:\\s*ALLOW"
-file_pattern = "*.conf"
+[[taint_sanitizer]]
+id = "SN001"
+description = "Shell argument escaping — transforms to ShellSanitized instead of clearing."
+function_call = "shlex.quote"
+transforms_to = "ShellSanitized"
+# shlex.quote converts HttpRequest → ShellSanitized:
+# - PY102/SHELL sinks (triggers_on = "shell_injectable"): do NOT fire — shlex.quote is valid mitigation
+# - PATH813/OPEN1149/FSTRING867/SSRF (triggers_on = "all"): STILL fire — quoted path still traverses
+# Result: `cat {shlex.quote(tainted_path)} | bash` correctly fires FSTRING867
+#         `subprocess.run(["bash", shlex.quote(arg)])` correctly does NOT fire PY102
 
-[[rule]]
-id = "PY515"
-description = "Code compilation using compile() function."
-severity = "High"
-remediation = "Dynamic code compilation can be dangerous. Validate all inputs and consider static alternatives."
-ast_match = "Call(func.attr=compile)"
-file_pattern = "*.py"
-# re.compile() and sql compiler.compile() are not Python code execution
-exclude_pattern = "re\\.compile|regex\\.compile|compiler\\.compile|self\\.compile"
+[[taint_sanitizer]]
+id = "SN002"
+description = "HTML escaping — transforms to HtmlSanitized."
+function_call = "escape"
+transforms_to = "HtmlSanitized"
 
-[[rule]]
-id = "DOM516"
-description = "DOM manipulation using document.write()."
-severity = "Medium"
-remediation = "Use safer DOM manipulation methods like createElement() and appendChild()."
-pattern = "document\\.write\\s*\\("
-file_pattern = "*.js"
+[[taint_sanitizer]]
+id = "SN003"
+description = "format_html safely escapes for HTML — transforms to HtmlSanitized."
+function_call = "format_html"
+transforms_to = "HtmlSanitized"
 
-[[rule]]
-id = "XSS517"
-description = "InnerHTML assignment detected."
-severity = "Low"
-remediation = "Using innerHTML can lead to XSS vulnerabilities. Use textContent or createElement instead."
-pattern = "innerHTML\\s*="
-file_pattern = "*.html"
+[[taint_sanitizer]]
+id = "SN004"
+description = "conditional_escape for HTML — transforms to HtmlSanitized."
+function_call = "conditional_escape"
+transforms_to = "HtmlSanitized"
 
-[[rule]]
-id = "PY518"
-description = "Subprocess execution with shell parameter enabled."
-severity = "High"
-confidence = "Medium"
-remediation = "Disable shell parameter or validate all inputs to prevent command injection."
-ast_match = "Call(func.value.id=subprocess, func.attr=Popen, keywords.*.arg=shell, keywords.*.value.value=True)"
-file_pattern = "*.py"
+[[taint_sanitizer]]
+id = "SN005"
+description = "DB identifier quoting — transforms to SqlSanitized."
+function_call = "quote_name"
+transforms_to = "SqlSanitized"
 
-[[rule]]
-id = "TIME519"
-description = "JavaScript setTimeout with string parameter."
-severity = "Low"
-remediation = "Pass function references to setTimeout instead of string code."
-pattern = "setTimeout\\s*\\(\\s*['\\\"]"
-file_pattern = "*.js"
+[[taint_sanitizer]]
+id = "SN006"
+description = "DB identifier quoting via ops — transforms to SqlSanitized."
+function_call = "ops.quote_name"
+transforms_to = "SqlSanitized"
 
-[[rule]]
-id = "DB520"
-description = "Mongoose query construction detected."
-severity = "Medium"
-remediation = "Use parameterized queries to prevent NoSQL injection attacks."
-pattern = "mongoose\\.query\\s*\\("
-file_pattern = "*.js"
+[[taint_sanitizer]]
+id = "SN_SAFE_URL001"
+description = "Django is_safe_url() validates the URL host against an allowed-hosts list — prevents open redirect."
+function_call = "is_safe_url"
 
-[[rule]]
-id = "SER522"
-description = "Object serialization function detected."
-severity = "Low"
-remediation = "Ensure serialized data comes from trusted sources to prevent deserialization attacks."
-pattern = "\\bserialize\\b\\s*\\("
-file_pattern = "*.py"
+[[taint_sanitizer]]
+id = "SN_SAFE_URL002"
+description = "Django url_has_allowed_host_and_scheme() validates URL host and scheme — prevents open redirect."
+function_call = "url_has_allowed_host_and_scheme"
 
-[[rule]]
-id = "NODE525"
-description = "Node.js child_process module import."
-severity = "Low"
-remediation = "Child process execution can be dangerous. Validate all inputs and limit functionality."
-pattern = "require\\s*\\(.*child_process"
-file_pattern = "*.js"
+# -------------------------------------------
+# SECTION: SQL Injection Taint Sinks
+# -------------------------------------------
 
-[[rule]]
-id = "FILE526"
-description = "File read operation using open attribute access."
-severity = "Medium"
-remediation = "Implement proper file access controls and validate file paths."
-ast_match = "Attribute(attr=read, value.id=open)"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_SQL001"
+vulnerability_id = "PY101"
+description = "Tainted SQL string passed to cursor.execute() — SQL injection risk."
+function_call = "execute"
+vulnerable_parameter_index = 0
+is_method = true
+triggers_on = "sql_injectable"
 
-[[rule]]
-id = "PERM527"
-description = "Setting overly permissive file permissions (777)."
-severity = "High"
-remediation = "Use more restrictive permissions. Consider 644 for files and 755 for directories."
-pattern = "chmod\\s+777"
-file_pattern = "*.sh"
+[[taint_sink]]
+id = "SK_SQL002"
+vulnerability_id = "PY101"
+description = "Tainted SQL string passed to cursor.executemany() — SQL injection risk."
+function_call = "executemany"
+vulnerable_parameter_index = 0
+is_method = true
+triggers_on = "sql_injectable"
 
-[[rule]]
-id = "FILE528"
-description = "Direct access to system password file."
-severity = "High"
-confidence = "Medium"
-remediation = "Accessing /etc/passwd should be done through proper system APIs with authorization."
-pattern = "open\\s*\\(\\s*['\\\"]/etc/passwd"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_PY507"
+vulnerability_id = "PY507"
+description = "Tainted data passed to .exec() method — attacker may inject code or SQL."
+function_call = "exec"
+vulnerable_parameter_index = 0
+is_method = true
+triggers_on = "sql_injectable"
 
-[[rule]]
-id = "TEMP529"
-description = "Insecure temporary file creation using mktemp -u."
-severity = "Low"
-remediation = "Use mktemp without -u flag or mkstemp for secure temporary file creation."
-pattern = "mktemp\\s+-u"
-file_pattern = "*.sh"
+[[taint_sink]]
+id = "SK_MKDIR001"
+vulnerability_id = "PATH813"
+description = "Tainted path used in mkdir() — attacker can create directories at arbitrary locations."
+function_call = "mkdir"
+vulnerable_receiver = true
+is_method = true
 
-[[rule]]
-id = "SSL531"
-description = "SSL/TLS certificate verification disabled."
-severity = "Medium"
-remediation = "Enable certificate verification to prevent man-in-the-middle attacks."
-pattern = "verify\\s*:\\s*false"
-file_pattern = "*.y*ml"
+[[taint_sink]]
+id = "SK_MAKEDIRS001"
+vulnerability_id = "PATH813"
+description = "Tainted path used in os.makedirs() — attacker can create directories at arbitrary locations."
+function_call = "os.makedirs"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "CRYPTO532"
-description = "Deprecated SSL/TLS protocol version usage."
-severity = "Medium"
-remediation = "Use TLS 1.2 or higher. Avoid deprecated SSL and early TLS versions."
-pattern = "ssl\\.PROTOCOL_(SSLv2|SSLv3|TLSv1|TLSv1_1)"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_SYMLINK001"
+vulnerability_id = "SYMLINK816"
+description = "User-controlled path as symlink source — attacker can create links to arbitrary files."
+function_call = "os.symlink"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "PERM568"
-description = "File permission change to world-writable detected."
-severity = "High"
-confidence = "Medium"
-remediation = "Avoid setting world-writable permissions. Use more restrictive file access controls."
-pattern = "chmod\\s+777"
-file_pattern = "*.sh"
+[[taint_sink]]
+id = "SK_DESER724"
+vulnerability_id = "DESER724"
+description = "Deserialized code object passed to types.FunctionType() — creates callable from untrusted bytecode."
+function_call = "types.FunctionType"
+vulnerable_parameter_index = 0
+is_method = false
+# Dotted path uses contains(): matches types.FunctionType AND python_types.FunctionType
+# (python_TYPES contains "types" as suffix → "python_types.FunctionType".contains("types.FunctionType") = true)
 
-[[rule]]
-id = "WEB575"
-description = "Content Security Policy allows unsafe inline execution."
-severity = "High"
-confidence = "Medium"
-remediation = "Remove unsafe-inline from CSP directives and implement nonce-based or hash-based CSP."
-pattern = "Content-Security-Policy\\s*:\\s*.*unsafe-inline"
-file_pattern = "*.conf"
+[[taint_sink]]
+id = "SK_DESER724B"
+vulnerability_id = "DESER724"
+description = "Deserialized code object passed to FunctionType() (direct import) — creates callable from untrusted bytecode."
+function_call = "FunctionType"
+vulnerable_parameter_index = 0
+is_method = false
+# Matches: from types import FunctionType; FunctionType(code, ...)
 
-[[rule]]
-id = "SQL586"
-description = "String formatting in SQL query execution."
-severity = "Critical"
-confidence = "Medium"
-remediation = "Use parameterized queries instead of string formatting to prevent SQL injection."
-pattern = "\\.(execute|executemany)\\s*\\(\\s*f?[\"'].*SELECT.*(FROM|UPDATE|DELETE).*%s.*[\"']"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_SSTI001"
+vulnerability_id = "SSTI001"
+description = "Tainted string passed to Flask render_template_string() — Jinja2 SSTI → RCE."
+function_call = "render_template_string"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "FUNC596"
-description = "JavaScript Function constructor usage."
-severity = "Critical"
-confidence = "Medium"
-remediation = "Avoid Function constructor as it can execute arbitrary code. Use predefined functions."
-pattern = "new\\s+Function\\s*\\("
-file_pattern = "*.js"
+# SK_SSTI002 removed: from_string() is too generic — fires on DeviceSpec.from_string(), etc.
 
-[[rule]]
-id = "SHELL602"
-description = "Shell command execution with dynamic arguments."
-severity = "High"
-confidence = "Medium"
-remediation = "Use subprocess with argument arrays instead of shell command strings."
-pattern = "subprocess\\.(Popen|call)\\(.*shell\\s*=\\s*True"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_ORMRAW001"
+vulnerability_id = "ORM002"
+description = "Tainted SQL string passed to Django QuerySet.raw() — SQL injection via ORM."
+function_call = "raw"
+vulnerable_parameter_index = 0
+is_method = true
+triggers_on = "sql_injectable"
 
-[[rule]]
-id = "CODE607"
-description = "Content Security Policy with unsafe inline directives."
-severity = "High"
-confidence = "Medium"
-remediation = "Implement strict CSP without unsafe-inline to prevent XSS attacks."
-pattern = "Content-Security-Policy\\s*:\\s*.*unsafe-inline"
-file_pattern = "*.conf"
+[[taint_sink]]
+id = "SK_ORMORDER001"
+vulnerability_id = "ORM002"
+description = "User-controlled field name in QuerySet.order_by() — Django ORM injection (CVE-2021-35042)."
+function_call = "order_by"
+vulnerable_parameter_index = 0
+is_method = true
 
-[[rule]]
-id = "JSON612"
-description = "JSON parsing without input validation."
-severity = "Low"
-confidence = "Low"
-remediation = "json.loads() is safe from code execution. Only flag if result feeds into eval/exec/pickle."
-ast_match = "Call(func.value.id=json, func.attr=loads)"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_ORMEXTRA001"
+vulnerability_id = "ORM002"
+description = "User-controlled SQL fragments in QuerySet.extra() — SQL injection via ORM."
+function_call = "extra"
+vulnerable_parameter_index = 0
+is_method = true
+triggers_on = "sql_injectable"
 
-[[rule]]
-id = "YAML619"
-description = "Shell execution in subprocess with dynamic input."
-severity = "High"
-confidence = "Medium"
-remediation = "Use argument lists with subprocess to prevent command injection attacks."
-pattern = "subprocess\\.(Popen|call)\\(.*shell\\s*=\\s*True"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_DESER725"
+vulnerability_id = "DESER725"
+description = "User-controlled data passed to jsonpickle.decode() — arbitrary Python object deserialization → RCE."
+function_call = "jsonpickle.decode"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "SHELL631"
-description = "SQL injection vulnerability in database query."
-severity = "Critical"
-confidence = "Medium"
-remediation = "Use parameterized queries with placeholders instead of string concatenation."
-pattern = "\\.(execute|executemany)\\s*\\(\\s*f?[\"'].*SELECT.*(FROM|UPDATE|DELETE).*%s.*[\"']"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_DESER726"
+vulnerability_id = "DESER726"
+description = "User-controlled data passed to dill.loads() — arbitrary Python object deserialization → RCE."
+function_call = "dill.loads"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "JS635"
-description = "Dynamic function creation in JavaScript."
-severity = "High"
-confidence = "Medium"
-remediation = "Avoid Function constructor to prevent code injection. Use predefined function references."
-pattern = "new\\s+Function\\s*\\("
-file_pattern = "*.js"
+[[taint_sink]]
+id = "SK_DESER_JOBLIB"
+vulnerability_id = "DESER_JOBLIB001"
+description = "User-controlled path passed to joblib.load() — arbitrary Python object deserialization → RCE."
+function_call = "joblib.load"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "CSP640"
-description = "Unsafe Content Security Policy configuration."
-severity = "High"
-confidence = "Medium"
-remediation = "Configure CSP without unsafe-inline and unsafe-eval directives."
-pattern = "Content-Security-Policy\\s*:\\s*.*unsafe-inline"
-file_pattern = "*.conf"
+[[taint_sink]]
+id = "SK_MARKUP001"
+vulnerability_id = "PY105"
+description = "Tainted string passed to jinja2.Markup() — bypasses Jinja2 auto-escaping, XSS risk."
+function_call = "Markup"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "SHELL645"
-description = "Dynamic code compilation with user input."
-severity = "High"
-confidence = "Medium"
-remediation = "Avoid compile() function with untrusted input. Use static code analysis instead."
-ast_match = "Call(func.attr=compile)"
-file_pattern = "*.py"
-exclude_pattern = "re\\.compile|regex\\.compile|compiler\\.compile|self\\.compile"
+[[taint_sink]]
+id = "SK_ORM_VALUES001"
+vulnerability_id = "ORM002"
+description = "User-controlled field name in QuerySet.values() — CVE-2024-42005 Django ORM injection."
+function_call = "values"
+vulnerable_parameter_index = 0
+is_method = true
 
-[[rule]]
-id = "PERM650"
-description = "SQL query with potential injection vulnerability."
-severity = "Critical"
-confidence = "Medium"
-remediation = "Implement prepared statements and parameterized queries to prevent SQL injection."
-pattern = "\\.(execute|executemany)\\s*\\(\\s*f?[\"'].*SELECT.*(FROM|UPDATE|DELETE).*%s.*[\"']"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_ORM_VALUES_LIST001"
+vulnerability_id = "ORM002"
+description = "User-controlled field name in QuerySet.values_list() — column name injection."
+function_call = "values_list"
+vulnerable_parameter_index = 0
+is_method = true
 
-[[rule]]
-id = "JS655"
-description = "Dynamic function constructor in JavaScript code."
-severity = "High"
-confidence = "Medium"
-remediation = "Replace Function constructor with safer alternatives to prevent code injection."
-pattern = "new\\s+Function\\s*\\("
-file_pattern = "*.js"
+[[taint_sink]]
+id = "SK_PATH_READ001"
+vulnerability_id = "PATH813"
+description = "Tainted path receiver for read_text() — arbitrary file read via path traversal."
+function_call = "read_text"
+vulnerable_receiver = true
+is_method = true
 
-[[rule]]
-id = "SHELL660"
-description = "Process execution with shell parameter enabled."
-severity = "High"
-confidence = "Medium"
-remediation = "Use subprocess without shell parameter and pass arguments as a list."
-pattern = "subprocess\\.(Popen|call)\\(.*shell\\s*=\\s*True"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_PATH_READ002"
+vulnerability_id = "PATH813"
+description = "Tainted path receiver for read_bytes() — arbitrary file read via path traversal."
+function_call = "read_bytes"
+vulnerable_receiver = true
+is_method = true
 
-[[rule]]
-id = "CSP665"
-description = "Insecure Content Security Policy allowing inline scripts."
-severity = "High"
-confidence = "Medium"
-remediation = "Use nonce or hash-based CSP instead of unsafe-inline directive."
-pattern = "Content-Security-Policy\\s*:\\s*.*unsafe-inline"
-file_pattern = "*.conf"
+[[taint_sink]]
+id = "SK_PATH_WRITE001"
+vulnerability_id = "PATH813"
+description = "Tainted path receiver for write_text() — arbitrary file write via path traversal."
+function_call = "write_text"
+vulnerable_receiver = true
+is_method = true
 
-[[rule]]
-id = "SHELL670"
-description = "Code compilation function usage."
-severity = "High"
-confidence = "Medium"
-remediation = "Avoid dynamic code compilation. Consider static analysis or predefined code patterns."
-ast_match = "Call(func.attr=compile)"
-file_pattern = "*.py"
-exclude_pattern = "re\\.compile|regex\\.compile|compiler\\.compile|self\\.compile"
+[[taint_sink]]
+id = "SK_PATH_WRITE002"
+vulnerability_id = "PATH813"
+description = "Tainted path receiver for write_bytes() — arbitrary file write via path traversal."
+function_call = "write_bytes"
+vulnerable_receiver = true
+is_method = true
 
-[[rule]]
-id = "SHELL675"
-description = "Database query with string interpolation."
-severity = "Critical"
-confidence = "Medium"
-remediation = "Use ORM methods or prepared statements instead of string formatting in SQL queries."
-pattern = "\\.(execute|executemany)\\s*\\(\\s*f?[\"'].*SELECT.*(FROM|UPDATE|DELETE).*%s.*[\"']"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_PATH_UNLINK001"
+vulnerability_id = "PATH813"
+description = "Tainted path receiver for unlink() — attacker-controlled file deletion."
+function_call = "unlink"
+vulnerable_receiver = true
+is_method = true
 
-[[rule]]
-id = "PERM679"
-description = "Subprocess call with shell execution enabled."
-severity = "High"
-confidence = "Medium"
-remediation = "Disable shell parameter in subprocess calls to prevent command injection."
-ast_match = "Call(func.value.id=subprocess, func.attr=Popen, keywords.*.arg=shell, keywords.*.value.value=True)"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_SSRF_HTTPX001"
+vulnerability_id = "SSRF_001"
+description = "User-controlled URL passed to httpx async client — SSRF risk."
+function_call = "httpx.AsyncClient.get"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "DOM683"
-description = "DOM write operation using document.write."
-severity = "High"
-confidence = "Medium"
-remediation = "Use modern DOM manipulation methods instead of document.write to prevent XSS."
-pattern = "document\\.write\\s*\\("
-file_pattern = "*.js"
+[[taint_sink]]
+id = "SK_SSRF_HTTPX002"
+vulnerability_id = "SSRF_001"
+description = "User-controlled URL passed to httpx async client — SSRF risk."
+function_call = "httpx.AsyncClient.post"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "SHELL689"
-description = "Process creation with shell command execution."
-severity = "High"
-confidence = "Medium"
-remediation = "Use process execution without shell to avoid command injection vulnerabilities."
-pattern = "subprocess\\.(Popen|call)\\(.*shell\\s*=\\s*True"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_SSRF_AIOHTTP001"
+vulnerability_id = "SSRF_001"
+description = "User-controlled URL passed to aiohttp ClientSession.get() — SSRF risk."
+function_call = "aiohttp.ClientSession.get"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "SQL693"
-description = "String formatting in database execute statement."
-severity = "Critical"
-confidence = "Medium"
-remediation = "Implement parameterized queries to eliminate SQL injection risks."
-pattern = "\\.(execute|executemany)\\s*\\(\\s*f?[\"'].*SELECT.*(FROM|UPDATE|DELETE).*%s.*[\"']"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_SSRF_AIOHTTP002"
+vulnerability_id = "SSRF_001"
+description = "User-controlled URL passed to aiohttp ClientSession.post() — SSRF risk."
+function_call = "aiohttp.ClientSession.post"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "DOM697"
-description = "Direct DOM manipulation using document.write method."
-severity = "High"
-confidence = "Medium"
-remediation = "Use createElement and appendChild methods for safer DOM manipulation."
-pattern = "document\\.write\\s*\\("
-file_pattern = "*.js"
+[[taint_sink]]
+id = "SK_TMPL_PATH001"
+vulnerability_id = "PATH813"
+description = "User-controlled string in Django render() template name — path traversal loads arbitrary templates."
+function_call = "render"
+vulnerable_parameter_index = 1
+is_method = false
 
-[[rule]]
-id = "PERM702"
-description = "File permission modification to world-accessible."
-severity = "High"
-confidence = "Medium"
-remediation = "Set appropriate file permissions. Avoid 777 permissions on production systems."
-pattern = "chmod\\s+777"
-file_pattern = "*.sh"
+[[taint_sink]]
+id = "SK_IMG_EVAL001"
+vulnerability_id = "PY001"
+description = "User-controlled expression in PIL.ImageMath.eval() — arbitrary Python code execution (CVE-2023-50447)."
+function_call = "ImageMath.eval"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "NET705"
-description = "Network request without SSL certificate verification."
-severity = "High"
-confidence = "Medium"
-remediation = "Enable SSL certificate verification to prevent man-in-the-middle attacks."
-pattern = "requests\\.(get|post|put|delete)\\(.*verify\\s*=\\s*False"
-file_pattern = "*.py"
+# SK_FILE_WRITE001 removed: write() is too generic (HTTP response writes, cache writes, etc.)
 
-[[rule]]
-id = "CRYPTO708"
-description = "Weak cryptographic key generation detected."
-severity = "Medium"
-confidence = "Medium"
-remediation = "Use cryptographically secure random number generators for key generation."
-pattern = "random\\.(randint|random)\\("
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_REDIRECT001"
+vulnerability_id = "OPEN_REDIRECT001"
+description = "User-controlled URL in Flask redirect() — open redirect / SSRF."
+function_call = "redirect"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "AUTH711"
-description = "Authentication bypass using hardcoded credentials."
-severity = "Critical"
-confidence = "High"
-remediation = "Implement proper authentication mechanisms without hardcoded credentials."
-pattern = "(?i)(username|user)\\s*[:=]\\s*[\"']admin[\"']"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_REDIRECT002"
+vulnerability_id = "OPEN_REDIRECT001"
+description = "User-controlled URL in Django HttpResponseRedirect() — open redirect."
+function_call = "HttpResponseRedirect"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "XSS714"
-description = "Cross-site scripting vulnerability in template rendering."
-severity = "High"
-confidence = "Medium"
-remediation = "Use template engines with automatic escaping or manually escape user input."
-pattern = "\\|safe\\b"
-file_pattern = "*.html"
+[[taint_sink]]
+id = "SK_PLAIN_PWD001"
+vulnerability_id = "PLAIN_PWD001"
+description = "Tainted value stored as 'password' in Django ORM create() — plaintext password stored in database."
+function_call = "create"
+is_method = true
+vulnerable_keyword = "password"
 
-[[rule]]
-id = "LDAP717"
-description = "LDAP injection vulnerability in search filter."
-severity = "High"
-confidence = "Medium"
-remediation = "Properly escape LDAP filter characters or use parameterized LDAP queries."
-pattern = "\\.search\\(.*filter.*%s"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_COOKIE_JAR001"
+vulnerability_id = "COOKIE_FILE001"
+description = "Attacker-controlled path loaded as cookie jar — cookie injection into HTTP sessions."
+function_call = "MozillaCookieJar"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "XPATH720"
-description = "XPath injection vulnerability detected."
-severity = "High"
-confidence = "Medium"
-remediation = "Use parameterized XPath queries or properly escape user input."
-pattern = "xpath\\(.*%s"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_EXEC_MODULE001"
+vulnerability_id = "IMPORT825"
+description = "User-controlled path reaches exec_module() — arbitrary code execution via dynamic import."
+function_call = "exec_module"
+vulnerable_parameter_index = 0
+is_method = true
 
-[[rule]]
-id = "DESER723"
-description = "Unsafe deserialization of untrusted data."
-severity = "Critical"
-confidence = "High"
-remediation = "Validate and sanitize data before deserialization or use safer formats."
-ast_match = "Call(func.value.id=marshal, func.attr=loads)"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_SPEC_FILE001"
+vulnerability_id = "IMPORT825"
+description = "User-controlled path passed to spec_from_file_location() — loads arbitrary Python file as module."
+function_call = "importlib.util.spec_from_file_location"
+vulnerable_parameter_index = 1
+is_method = false
+
+# SSRF sinks — HTTP client functions where the URL argument is tainted
+[[taint_sink]]
+id = "SK_SSRF001"
+vulnerability_id = "SSRF_001"
+description = "User-controlled URL in httpx.stream() — SSRF: attacker can redirect to internal services or file:// URIs."
+function_call = "httpx.stream"
+vulnerable_parameter_index = 1
+is_method = false
 
-[[rule]]
-id = "PRIV726"
-description = "Privilege escalation through setuid binary execution."
-severity = "High"
-confidence = "Medium"
-remediation = "Avoid executing setuid binaries or implement proper privilege checks."
-pattern = "os\\.setuid\\("
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_SSRF002"
+vulnerability_id = "SSRF_001"
+description = "User-controlled URL in httpx.get() — SSRF risk."
+function_call = "httpx.get"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "RACE729"
-description = "Race condition in file operations."
-severity = "Medium"
-confidence = "Low"
-remediation = "Use atomic file operations or proper locking mechanisms."
-pattern = "os\\.path\\.exists.*open\\("
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_SSRF003"
+vulnerability_id = "SSRF_001"
+description = "User-controlled URL in httpx.post() — SSRF risk."
+function_call = "httpx.post"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "MEM732"
-description = "Memory exhaustion through unbounded data structure."
-severity = "Medium"
-confidence = "Low"
-remediation = "Implement size limits on data structures to prevent memory exhaustion."
-pattern = "\\[\\]\\s*\\*\\s*\\w+"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_SSRF004"
+vulnerability_id = "SSRF_001"
+description = "User-controlled URL in httpx.request() — SSRF risk."
+function_call = "httpx.request"
+vulnerable_parameter_index = 1
+is_method = false
 
-[[rule]]
-id = "DIR735"
-description = "Directory traversal vulnerability in file path."
-severity = "High"
-confidence = "Medium"
-remediation = "Validate and sanitize file paths to prevent directory traversal attacks."
-pattern = "\\.\\./|\\.\\.\\\\|%2e%2e%2f"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_SSRF005"
+vulnerability_id = "SSRF_001"
+description = "User-controlled URL in requests.get() — SSRF risk."
+function_call = "requests.get"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "INFO738"
-description = "Information disclosure through error messages."
-severity = "Low"
-confidence = "Low"
-remediation = "Implement generic error messages that don't reveal system information."
-pattern = "traceback\\.print_exc\\("
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_SSRF006"
+vulnerability_id = "SSRF_001"
+description = "User-controlled URL in requests.post() — SSRF risk."
+function_call = "requests.post"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "LOG741"
-description = "Log injection vulnerability detected."
-severity = "Medium"
-confidence = "Medium"
-remediation = "Sanitize user input before logging to prevent log injection attacks."
-pattern = "logging\\.(info|debug|warning|error)\\(.*%s"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_SSRF007"
+vulnerability_id = "SSRF_001"
+description = "User-controlled URL in requests.request() — SSRF risk."
+function_call = "requests.request"
+vulnerable_parameter_index = 1
+is_method = false
 
-[[rule]]
-id = "SESS744"
-description = "Session fixation vulnerability in session handling."
-severity = "High"
-confidence = "Medium"
-remediation = "Regenerate session IDs after authentication to prevent fixation attacks."
-# Writing data to a session is NOT session fixation. Only flag direct session key assignment from request.
-pattern = "session\\.session_key\\s*=.*request\\."
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_SSRF008"
+vulnerability_id = "SSRF_001"
+description = "User-controlled URL in urllib.request.urlopen() — SSRF risk."
+function_call = "urllib.request.urlopen"
+vulnerable_parameter_index = 0
+is_method = false
 
-[[rule]]
-id = "CSRF747"
-description = "Cross-Site Request Forgery protection bypass."
-severity = "High"
-confidence = "Medium"
-remediation = "Implement proper CSRF tokens for state-changing operations."
-pattern = "@csrf_exempt"
-file_pattern = "*.py"
+# LOG741 taint sinks — only fire when tainted data reaches a logging call.
+# This replaces the pattern rule (which fired on any logging call with %s format).
+# Internal objects (proto, op_name, config) are never tainted → no FPs.
 
-[[rule]]
-id = "HTTP750"
-description = "HTTP response splitting vulnerability."
-severity = "High"
-confidence = "Medium"
-remediation = "Validate and sanitize HTTP headers to prevent response splitting."
-pattern = "HttpResponse\\(.*\\\\r\\\\n"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_LOG741_INFO"
+vulnerability_id = "LOG741"
+description = "User-controlled data in logging.info() — log injection risk."
+function_call = "logging.info"
+vulnerable_parameter_index = 1
+is_method = false
 
-[[rule]]
-id = "UPLOAD753"
-description = "Unrestricted file upload vulnerability."
-severity = "High"
-confidence = "Medium"
-remediation = "Implement file type validation and size limits for uploads."
-pattern = "request\\.FILES\\[.*\\]\\.save\\("
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_LOG741_WARN"
+vulnerability_id = "LOG741"
+description = "User-controlled data in logging.warning() — log injection risk."
+function_call = "logging.warning"
+vulnerable_parameter_index = 1
+is_method = false
 
-[[rule]]
-id = "CACHE756"
-description = "Cache poisoning vulnerability in HTTP caching."
-severity = "Medium"
-confidence = "Low"
-remediation = "Validate cache keys and implement proper cache invalidation."
-pattern = "cache\\.set\\(.*request\\."
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_LOG741_ERROR"
+vulnerability_id = "LOG741"
+description = "User-controlled data in logging.error() — log injection risk."
+function_call = "logging.error"
+vulnerable_parameter_index = 1
+is_method = false
 
-[[rule]]
-id = "TIMING759"
-description = "Timing attack vulnerability in authentication."
-severity = "Medium"
-confidence = "Low"
-remediation = "Use constant-time comparison functions for sensitive operations."
-pattern = "password\\s*==\\s*.*"
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_LOG741_DEBUG"
+vulnerability_id = "LOG741"
+description = "User-controlled data in logging.debug() — log injection risk."
+function_call = "logging.debug"
+vulnerable_parameter_index = 1
+is_method = false
 
-[[rule]]
-id = "ENUM762"
-description = "User enumeration vulnerability in login system."
-severity = "Low"
-confidence = "Low"
-remediation = "Return identical responses for valid and invalid usernames."
-pattern = "User\\.objects\\.get\\(username="
-file_pattern = "*.py"
+[[taint_sink]]
+id = "SK_LOG741_CRITICAL"
+vulnerability_id = "LOG741"
+description = "User-controlled data in logging.critical() — log injection risk."
+function_call = "logging.critical"
+vulnerable_parameter_index = 1
+is_method = false
 
-[[rule]]
-id = "BRUTE765"
-description = "Missing brute force protection on authentication."
-severity = "Medium"
-confidence = "Low"
-remediation = "Implement rate limiting and account lockout mechanisms."
-pattern = "login_required"
-file_pattern = "*.py"
+# -------------------------------------------
+# SECTION: Injection (OWASP A03:2021)
+# -------------------------------------------
 
 [[rule]]
-id = "WEAK768"
-description = "Weak password policy implementation."
-severity = "Low"
-confidence = "Low"
-remediation = "Implement strong password requirements and validation."
-pattern = "len\\(password\\)\\s*<\\s*[1-6]"
-file_pattern = "*.py"
+id = "PY102"
+description = "Command Injection detected via Taint Analysis."
+severity = "Critical"
+confidence = "High"
+remediation = "User-controlled data reached a command execution function without sanitization. Use 'shlex.quote()' to escape arguments or avoid passing user input to shell commands entirely."
+# No ast_match — triggered only by taint engine
+# NOTE: This rule has no 'pattern' or 'ast_match'. It is triggered ONLY by the taint engine.
 
 [[rule]]
-id = "TOKEN771"
-description = "JWT token potentially without expiration time (Manual inspection suggested)."
-severity = "Medium"
-confidence = "Medium"
-remediation = "Set appropriate expiration times for JWT tokens."
-pattern = "jwt\\.encode\\s*\\("
+id = "PY001"
+description = "Use of 'eval()' is highly dangerous."
+severity = "High"
+remediation = "Avoid 'eval()'. Use safer alternatives like 'ast.literal_eval' for data parsing."
+ast_match = "Call(func.id=eval)"
 file_pattern = "*.py"
 
 [[rule]]
-id = "OAUTH774"
-description = "OAuth state parameter potentially missing in authorization request (Manual inspection suggested)."
+id = "PY103"
+description = "Use of os.system is a command injection risk."
 severity = "High"
-confidence = "Medium"
-remediation = "Include state parameter in OAuth flows to prevent CSRF attacks."
-pattern = "oauth.*authorize.*"
-file_pattern = "*.py"
+remediation = "Avoid 'os.system'. Use the 'subprocess' module with command and arguments as a list."
+# No ast_match — triggered only by taint engine
 
 [[rule]]
-id = "API777"
-description = "API endpoint without rate limiting."
-severity = "Medium"
-confidence = "Low"
-remediation = "Implement rate limiting on API endpoints to prevent abuse."
-pattern = "@app\\.route.*methods.*POST"
-file_pattern = "*.py"
+id = "PY101"
+description = "Potential SQL injection via string formatting in database query."
+severity = "Critical"
+confidence = "High"
+remediation = "Use parameterized queries (e.g., cursor.execute('SELECT * FROM users WHERE name = ?', (name,))) instead of string formatting."
+# No pattern — triggered by taint engine (SK_SQL001/SK_SQL002)
+# Exclude migration files: ORM DDL in migrations uses cursor.execute() with developer-controlled
+# schema parameters (table names, column names) that are not user input.
+exclude_file_pattern = "*/migrations/*,*/alembic/*,*/backends/*"
 
 [[rule]]
-id = "CORS780"
-description = "Overly permissive CORS configuration."
-severity = "Medium"
-confidence = "Medium"
-remediation = "Restrict CORS origins to trusted domains only."
-pattern = "Access-Control-Allow-Origin\\s*:\\s*\\*"
+id = "PY104"
+description = "LDAP injection may be possible with string formatting."
+severity = "High"
+remediation = "Use a proper LDAP escaping library for any user-controlled data in LDAP queries."
+pattern = "\\.search_s\\s*\\(.*f[\"']"
 file_pattern = "*.py"
 
 [[rule]]
-id = "CLICK783"
-description = "Potential Clickjacking vulnerability due to missing X-Frame-Options (Manual inspection suggested)."
-severity = "Medium"
-confidence = "Low"
-remediation = "Set X-Frame-Options header to DENY or SAMEORIGIN."
-pattern = "HttpResponse\\s*\\("
-file_pattern = "*.py"
+id = "PY105"
+description = "User-controlled data passed to mark_safe() or Markup() — potential XSS."
+severity = "High"
+confidence = "High"
+remediation = "Never pass user-controlled data to mark_safe() or Markup(). Sanitize with django.utils.html.escape() first."
+# No pattern — triggered only by taint engine (SK_PY105 / SK_PY105B)
 
 [[rule]]
-id = "MIME786"
-description = "MIME type sniffing vulnerability."
-severity = "Low"
-confidence = "Low"
-remediation = "Set X-Content-Type-Options header to nosniff."
-pattern = "HttpResponse\\(.*content_type="
+id = "PY106"
+description = "Use of subprocess.run with shell=True is a command injection risk."
+severity = "High"
+remediation = "Avoid shell=True with subprocess.run. Pass commands as a list instead of a string."
+# Only fire when shell=True is explicitly passed — not for every subprocess.run call
+ast_match = "Call(func.value.id=subprocess, func.attr=run, keywords.*.arg=shell, keywords.*.value.value=True)"
 file_pattern = "*.py"
 
 [[rule]]
-id = "HTTPS789"
-description = "Missing HTTPS enforcement in security-sensitive context."
+id = "PY107"
+description = "Unsafe deserialization with 'yaml.load' — no Loader specified."
 severity = "High"
 confidence = "Medium"
-remediation = "Enforce HTTPS for all security-sensitive operations."
-pattern = "SECURE_SSL_REDIRECT\\s*=\\s*False"
-file_pattern = "*settings*.py"
-
-[[rule]]
-id = "COOKIE792"
-description = "Insecure cookie configuration detected."
-severity = "Medium"
-confidence = "Medium"
-remediation = "Set secure and httponly flags on sensitive cookies."
-pattern = "set_cookie\\(.*secure=False"
+remediation = "Pass Loader=yaml.SafeLoader or use yaml.safe_load(). For ruamel.yaml, use YAML(typ='safe') or YAML(typ='rt') (round-trip is safe by default)."
+ast_match = "Call(func.value.id=yaml, func.attr=load)"
 file_pattern = "*.py"
+# Exclude when any Loader= is explicitly passed.
+# Note: ruamel.yaml's YAML() (round-trip) and YAML(typ="safe"/"rt"/"base") are all safe.
+# This rule may produce FPs when the variable named 'yaml' was assigned from ruamel's
+# YAML() constructor (not the PyYAML module). YAML(typ="unsafe") is caught by RUAMEL_UNSAFE001.
+exclude_pattern = "Loader\\s*=|YAML\\s*\\(\\s*\\)\\s*\\.\\s*load|typ\\s*=\\s*[\"'](safe|rt|base)[\"']"
+file_content_exclude = "from ruamel\\.yaml|import ruamel"
 
-[[rule]]
-id = "ADMIN795"
-description = "Default admin credentials detected."
-severity = "Critical"
-confidence = "High"
-remediation = "Change default administrative credentials before deployment."
-pattern = "(?i)(admin|administrator).*password.*password"
-file_pattern = "*.py"
+# -------------------------------------------
+# SECTION: Cryptographic Failures (OWASP A02:2021)
+# -------------------------------------------
 
 [[rule]]
-id = "DEBUG798"
-description = "Debug information exposed in production."
+id = "PY201"
+description = "Use of weak hashing algorithm MD5 — do not use for passwords or security-sensitive hashing."
 severity = "Medium"
-confidence = "Medium"
-remediation = "Disable debug mode and remove debug statements in production."
-pattern = "print\\(.*password\\|.*secret"
+remediation = "For passwords use bcrypt/argon2. For checksums/integrity: SHA-256 is preferred but MD5 is acceptable if not security-critical."
+ast_match = "Call(func.value.id=hashlib, func.attr=md5)"
 file_pattern = "*.py"
+# Exclude non-password MD5 uses:
+#   hexdigest / 0x7FFFFFFF  — deterministic int seed (sharding, seeding)
+#   checksum / integrity    — explicit file-integrity context
+#   hash_id / hash_file     — variable/function names indicating identity hash, not auth
+#   legacy                  — explicitly marked legacy/deprecated code path
+#   update(                 — incremental MD5 building (checksums use .update(), passwords don't)
+exclude_pattern = "hexdigest|checksum|integrity|fingerprint|digest\\(\\)|0x7FFFFFFF|int.*md5|md5.*int|hash_id|hash.*file|file.*hash|_hash|legacy|nonce|update\\s*\\(|hasher|algorithm"
 
 [[rule]]
-id = "BACKUP801"
-description = "Backup file with sensitive information accessible."
+id = "PY202"
+description = "Use of broken hashing algorithm SHA1."
 severity = "Medium"
-confidence = "Low"
-remediation = "Secure backup files and exclude them from web-accessible directories."
-pattern = "\\.(bak|backup|old|tmp)$"
-file_pattern = "*"
-
-[[rule]]
-id = "CONFIG804"
-description = "Configuration file with default values."
-severity = "Low"
-confidence = "Low"
-remediation = "Change default configuration values before production deployment."
-pattern = "(?i)secret_key.*changeme"
-file_pattern = "*settings*.py"
-
-[[rule]]
-id = "HASH807"
-description = "Use of insecure hash function for passwords."
-severity = "High"
-confidence = "High"
-remediation = "Use bcrypt, scrypt, or Argon2 for password hashing."
-ast_match = "Call(func.value.id=hashlib, func.attr=sha256)"
+remediation = "Use a stronger hashing algorithm like SHA-256."
+ast_match = "Call(func.value.id=hashlib, func.attr=sha1)"
 file_pattern = "*.py"
+# SHA1 for cache keys, template keys, content addressing is not a security vulnerability.
+# Only flag when SHA1 is used for passwords or authentication tokens.
+exclude_pattern = "cache|key|template|content|join\\(|etag|checksum|digest|signature|chunk|fingerprint|function|framework|hasher"
 
 [[rule]]
-id = "RAND810"
-description = "Use of predictable random number generator."
-severity = "Medium"
-confidence = "Medium"
-remediation = "Use cryptographically secure random generators for security purposes."
-ast_match = "Call(func.value.id=random, func.attr=choice)"
+id = "PY203"
+description = "Use of insecure SSL/TLS protocol version."
+severity = "High"
+remediation = "Use 'ssl.PROTOCOL_TLS' or higher. Avoid SSLv2, SSLv3, and TLSv1.0/1.1."
+pattern = "ssl\\.PROTOCOL_(SSLv2|SSLv3|TLSv1|TLSv1_1)"
 file_pattern = "*.py"
 
 [[rule]]
-id = "PATH813"
-description = "Path manipulation vulnerability in file operations."
+id = "PY204"
+description = "Use of the 'pycrypto' library is discouraged due to known vulnerabilities."
 severity = "High"
-confidence = "Medium"
-remediation = "Validate and normalize file paths to prevent directory traversal."
-pattern = "os\\.path\\.join\\(.*\\.\\."
+remediation = "Migrate from 'pycrypto' to a more secure and actively maintained library like 'pycryptodome'."
+pattern = "from\\s+Crypto|import\\s+Crypto"
 file_pattern = "*.py"
 
 [[rule]]
-id = "SYMLINK816"
-description = "Symbolic link vulnerability in file operations."
-severity = "Medium"
+id = "PY205"
+description = "Use of PyNaCl with low-level functions can be insecure if misused."
+severity = "Low"
 confidence = "Low"
-remediation = "Check for symbolic links and validate target paths."
-pattern = "os\\.symlink\\("
+remediation = "Prefer using high-level APIs like 'Box' and 'SecretBox' unless low-level functions are explicitly required and understood."
+pattern = "nacl\\.low_level"
 file_pattern = "*.py"
 
+# -------------------------------------------
+# SECTION: Insecure Deserialization & Design (OWASP A08:2021)
+# -------------------------------------------
+
 [[rule]]
-id = "PROC819"
-description = "Process injection vulnerability through command execution."
+id = "PY002"
+description = "Use of 'pickle.loads' for deserialization can lead to remote code execution."
 severity = "High"
-confidence = "Medium"
-remediation = "Validate and sanitize all inputs to process execution functions."
-ast_match = "Call(func.value.id=os, func.attr=popen)"
+remediation = "Use a safer serialization format like JSON if deserializing untrusted data."
+ast_match = "Call(func.value.id=pickle, func.attr=loads)"
 file_pattern = "*.py"
+exclude_file_pattern = "*/cache/backends/*"
 
 [[rule]]
-id = "ENV822"
-description = "Environment variable injection vulnerability."
-severity = "Medium"
-confidence = "Low"
-remediation = "Validate environment variables and use allow-lists where possible."
-pattern = "os\\.environ\\[.*\\+.*\\]"
+id = "PY301"
+description = "Use of 'pickle.load' for deserialization can lead to remote code execution."
+severity = "High"
+remediation = "Use a safer serialization format like JSON if deserializing untrusted data."
+ast_match = "Call(func.attr=load, func.value.id=pickle)"
 file_pattern = "*.py"
 
 [[rule]]
-id = "IMPORT825"
-description = "Dynamic import vulnerability allowing code execution."
+id = "PY302"
+description = "Use of 'yaml.load()' with no Loader — unsafe with PyYAML; allows !!python/object RCE."
 severity = "High"
 confidence = "Medium"
-remediation = "Avoid dynamic imports with user-controlled input."
-ast_match = "Call(func.id=__import__)"
+remediation = "Use yaml.safe_load() or pass Loader=yaml.SafeLoader. For ruamel.yaml, YAML(typ='safe') or the default YAML() round-trip are both safe; only YAML(typ='unsafe') is dangerous."
+pattern = "yaml\\.load[^a-zA-Z_]"
 file_pattern = "*.py"
+# Exclude:
+#   Comment lines                  — not executable
+#   yaml.safe_load()               — explicitly safe
+#   Any Loader= argument           — explicit loader choice
+#   ruamel.yaml safe modes         — YAML() round-trip and typ="safe"/"rt"/"base" are safe
+#   Inline YAML().load()           — ruamel inline construction is round-trip (safe)
+# Note: does not fully distinguish PyYAML (module) from ruamel YAML instance named 'yaml'.
+# Use RUAMEL_UNSAFE001 for ruamel's explicitly unsafe YAML(typ="unsafe") pattern.
+exclude_pattern = "^\\s*#|Loader\\s*=|yaml\\.safe_load|YAML\\s*\\(\\s*\\)\\s*\\.\\s*load|typ\\s*=\\s*[\"'](safe|rt|base)[\"']"
+file_content_exclude = "from ruamel\\.yaml|import ruamel"
 
 [[rule]]
-id = "GETATTR828"
-description = "Unsafe use of getattr with user input."
-severity = "Medium"
-confidence = "Medium"
-remediation = "Validate attribute names or use a whitelist of allowed attributes."
-ast_match = "Call(func.id=getattr)"
+id = "PY303"
+description = "XML parsing with 'xml.etree.ElementTree' is vulnerable to XML bombs."
+severity = "High"
+remediation = "Use 'defusedxml.ElementTree' to parse untrusted XML data safely."
+pattern = "xml\\.etree\\.ElementTree\\.(parse|fromstring)"
 file_pattern = "*.py"
 
 [[rule]]
-id = "SETATTR831"
-description = "Unsafe use of setattr with user input."
+id = "PY304"
+description = "Insecure temporary file creation may lead to race conditions."
 severity = "Medium"
-confidence = "Medium"
-remediation = "Validate attribute names and values before setting."
-ast_match = "Call(func.id=setattr)"
+remediation = "Use 'tempfile.mkstemp()' instead of 'tempfile.mktemp()' for secure temporary file creation."
+pattern = "tempfile\\.mktemp"
 file_pattern = "*.py"
 
 [[rule]]
-id = "DELATTR834"
-description = "Unsafe use of delattr with user input."
-severity = "Medium"
-confidence = "Medium"
-remediation = "Validate attribute names before deletion."
-ast_match = "Call(func.id=delattr)"
-file_pattern = "*.py"
+id = "PY305"
+description = "Use of exec() enables arbitrary code execution"
+severity = "Critical"
+ast_match = "Call(func.id=exec)"
 
 [[rule]]
-id = "HASATTR837"
-description = "Information disclosure through hasattr probing."
-severity = "Low"
-confidence = "Low"
-remediation = "Limit attribute access or implement access controls."
-ast_match = "Call(func.id=hasattr)"
+id = "SANDBOX307"
+description = "Python sandbox escape via object.__subclasses__() — traverses full class hierarchy to retrieve dangerous classes (subprocess.Popen, etc.) without any import."
+severity = "Critical"
+confidence = "High"
+remediation = "Remove __subclasses__() calls that operate on the root object class or traverse __mro__ to reach it. Legitimate code calls __subclasses__() on a specific known class, never on object or via MRO root traversal."
+pattern = "object\\s*\\.\\s*__subclasses__\\s*\\(|__mro__\\s*\\[\\s*-?\\d+\\s*\\]\\s*\\.\\s*__subclasses__\\s*\\("
 file_pattern = "*.py"
+# Matches:
+#   object.__subclasses__()                     — direct root traversal
+#   some.__mro__[-1].__subclasses__()           — MRO-based root traversal
+# Does NOT match:
+#   cls.__subclasses__()      — legitimate: find subclasses of a specific known class
+#   Model.__subclasses__()    — legitimate: ORM model registry
 
 [[rule]]
-id = "VARS840"
-description = "Information disclosure through vars() function."
-severity = "Low"
-confidence = "Low"
-remediation = "Avoid exposing internal object state through vars()."
-ast_match = "Call(func.id=vars)"
+id = "SANDBOX308"
+description = "Python sandbox escape via __init__.__globals__ — accesses the global namespace of a function object, bypassing import restrictions."
+severity = "Critical"
+confidence = "High"
+remediation = "Never access __globals__ on function objects. This is exclusively used to escape restricted execution environments."
+pattern = "__init__\\s*\\.\\s*__globals__|__func__\\s*\\.\\s*__globals__"
 file_pattern = "*.py"
 
+# -------------------------------------------
+# SECTION: Security Misconfiguration (OWASP A05:2021)
+# -------------------------------------------
+
 [[rule]]
-id = "GLOBALS843"
-description = "Access to global namespace through globals()."
+id = "G401"
+description = "Flask app is running with the development server in a non-debug context."
 severity = "Medium"
-confidence = "Medium"
-remediation = "Restrict access to global namespace in untrusted contexts."
-ast_match = "Call(func.id=globals)"
+confidence = "Low"
+remediation = "Use a production-ready WSGI server like Gunicorn or uWSGI instead of 'app.run()'."
+pattern = "app\\.run\\(host=.*0\\.0\\.0\\.0"
 file_pattern = "*.py"
 
 [[rule]]
-id = "LOCALS846"
-description = "Access to local namespace through locals()."
-severity = "Low"
-confidence = "Low"
-remediation = "Be cautious when exposing local variables."
-ast_match = "Call(func.id=locals)"
+id = "G403"
+description = "Flask DEBUG mode is enabled."
+severity = "High"
+remediation = "Ensure app.debug is False or the DEBUG config variable is False in production."
+pattern = "app\\.run\\(.*debug=True"
 file_pattern = "*.py"
 
 [[rule]]
-id = "DIR849"
-description = "Information disclosure through dir() function."
-severity = "Low"
-confidence = "Low"
-remediation = "Limit use of dir() in contexts accessible to untrusted users."
-ast_match = "Call(func.id=dir)"
-file_pattern = "*.py"
+id = "G404"
+description = "Django's CSRF protection appears to be disabled globally."
+severity = "Critical"
+remediation = "Ensure 'django.middleware.csrf.CsrfViewMiddleware' is active in your MIDDLEWARE setting."
+pattern = "#.*CsrfViewMiddleware" # Simple check for commented-out middleware
+file_pattern = "*settings*.py"
 
 [[rule]]
-id = "TYPE852"
-description = "Type confusion vulnerability through type manipulation."
-severity = "Low"
-confidence = "Low"
-remediation = "Validate object types before operations."
-ast_match = "Call(func.id=type)"
+id = "G405"
+description = "Requests made without certificate verification."
+severity = "High"
+remediation = "Remove 'verify=False' from requests calls to prevent man-in-the-middle attacks."
+ast_match = "Call(keywords.*.arg=verify, keywords.*.value.value=False)"
 file_pattern = "*.py"
 
+# -------------------------------------------
+# SECTION: Hardcoded Secrets (OWASP A07:2021)
+# -------------------------------------------
+
 [[rule]]
-id = "ISINSTANCE855"
-description = "Type checking bypass through isinstance manipulation."
-severity = "Low"
-confidence = "Low"
-remediation = "Use additional validation beyond isinstance checks."
-ast_match = "Call(func.id=isinstance)"
+id = "G101"
+description = "Hardcoded password or secret detected."
+severity = "High"
+confidence = "Medium"
+remediation = "Store credentials in environment variables or a secrets management system."
+pattern = "(?i)(password|secret|api_key|token|authkey|bearer|cred|credentials)\\s*[:=]\\s*[\"']\\w{8,}[\"']"
 file_pattern = "*.py"
+# UPPER_CASE_CONSTANTS = "value" are module-level DeveloperDefined constants, not secrets.
+# But uppercase variables whose NAMES are explicit secrets (SECRET_KEY, API_KEY etc.)
+# are caught by G101B below. Exclude only if not a known-secret name.
+exclude_pattern = "^\\s*[A-Z][A-Z0-9_]+\\s*="
 
 [[rule]]
-id = "REPR858"
-description = "Information disclosure through repr() function."
-severity = "Low"
-confidence = "Low"
-remediation = "Avoid using repr() on sensitive objects in user-facing contexts."
-ast_match = "Call(func.id=repr)"
+id = "G101B"
+description = "Hardcoded secret in uppercase constant — secret key, API key, token, or password assigned directly in code."
+severity = "High"
+confidence = "High"
+remediation = "Store secrets in environment variables: SECRET_KEY = os.environ.get('SECRET_KEY') or use a secrets manager."
+pattern = "(?i)\\b(SECRET[_\\s]?KEY|API[_\\s]?KEY|API[_\\s]?SECRET|ACCESS[_\\s]?KEY|ACCESS[_\\s]?SECRET|AUTH[_\\s]?TOKEN|AUTH[_\\s]?KEY|PRIVATE[_\\s]?KEY|CLIENT[_\\s]?SECRET|APP[_\\s]?SECRET|APP[_\\s]?KEY|SIGNING[_\\s]?KEY|ENCRYPTION[_\\s]?KEY|MASTER[_\\s]?KEY)\\s*=\\s*[\"'][^\"']{16,}[\"']"
 file_pattern = "*.py"
+# Safe: reading from environment or config system — not a hardcoded secret
+exclude_pattern = "os\\.environ|getenv|config\\(|env\\(|settings\\.|vault|secrets\\."
 
 [[rule]]
-id = "STR861"
-description = "Potential information disclosure through str() conversion."
-severity = "Low"
-confidence = "Low"
-remediation = "Control string representations of sensitive objects."
-ast_match = "Call(func.id=str)"
-file_pattern = "*.py"
+id = "G102"
+description = "Hardcoded private key detected."
+severity = "Critical"
+confidence = "High"
+remediation = "Load private keys from a secure, encrypted file or secrets manager."
+pattern = "-----BEGIN (RSA|EC|OPENSSH|PGP) PRIVATE KEY-----"
 
 [[rule]]
-id = "FORMAT864"
-description = "Format string vulnerability in string formatting."
-severity = "Medium"
-confidence = "Medium"
-remediation = "Use safe string formatting methods and validate format strings."
-ast_match = "Call(func.attr=format)"
+id = "G103"
+description = "Use of a blank password for a user or service."
+severity = "High"
+remediation = "Ensure all users and service accounts have strong, non-empty passwords."
+pattern = "(?i)(password|passwd|pass)\\s*[:=]\\s*[\"']\\s*[\"']"
 file_pattern = "*.py"
+# Exclude:
+#   Function parameter defaults: def login(passwd='') — optional API param
+#   Comment lines
+#   Chained initialization: login = account = password = '' — variable init, not a credential
+exclude_pattern = "^\\s*def\\s|^\\s*#|\\w+\\s*=\\s*\\w+\\s*="
+exclude_file_pattern = "*global_settings*"
 
 [[rule]]
-id = "FSTRING867"
-description = "Potential code injection through f-string formatting."
-severity = "Medium"
-confidence = "Low"
-remediation = "Validate and sanitize data used in f-string expressions."
-pattern = "f[\"'][^\"']*\\{.*\\}[^\"']*[\"']"
+id = "G104"
+description = "JWT secret is hardcoded."
+severity = "Critical"
+remediation = "Load JWT secrets from environment variables or a secrets management system."
+pattern = "(?i)(jwt_secret|jwt_key)\\s*[:=]\\s*[\"'].+[\"']"
 file_pattern = "*.py"
 
+# -------------------------------------------
+# SECTION: IaC and Configuration File Security
+# -------------------------------------------
+
 [[rule]]
-id = "REGEX870"
-description = "Regular expression denial of service (ReDoS) vulnerability."
-severity = "Medium"
-confidence = "Low"
-remediation = "Avoid nested quantifiers and catastrophic backtracking in regex."
-pattern = "re\\.(match|search|findall)\\(.*\\(.*\\+.*\\*"
-file_pattern = "*.py"
+id = "DKR001"
+description = "Password or secret found in Dockerfile ENV instruction."
+severity = "High"
+remediation = "Use build-time arguments (ARG) with the --secret flag or a secrets management tool."
+pattern = "(?i)ENV\\s+(PASS|PASSWORD|SECRET|TOKEN|API_KEY)\\s+"
+file_pattern = "Dockerfile"
 
 [[rule]]
-id = "SPLIT873"
-description = "Potential DoS through string split operations."
+id = "DKR002"
+description = "Use of 'latest' tag for base image is not recommended for production."
 severity = "Low"
-confidence = "Low"
-remediation = "Limit the number of splits or validate input size."
-pattern = "\\.split\\(.*maxsplit"
-file_pattern = "*.py"
+remediation = "Pin base images to a specific version digest for reproducible and secure builds."
+pattern = "FROM\\s+\\w+:latest"
+file_pattern = "Dockerfile"
 
 [[rule]]
-id = "JOIN876"
-description = "Memory exhaustion through string join operations."
-severity = "Low"
-confidence = "Low"
-remediation = "Validate the size of collections before joining."
-ast_match = "Call(func.attr=join)"
-file_pattern = "*.py"
+id = "DKR003"
+description = "Exposing Docker daemon socket inside a container is a security risk."
+severity = "Critical"
+remediation = "Avoid mounting '/var/run/docker.sock' into containers."
+pattern = "/var/run/docker\\.sock"
+file_pattern = "docker-compose*.y*ml"
 
 [[rule]]
-id = "REPLACE879"
-description = "Potential DoS through string replace operations."
-severity = "Low"
-confidence = "Low"
-remediation = "Limit replacement operations on large strings."
-ast_match = "Call(func.attr=replace)"
-file_pattern = "*.py"
+id = "K8S001"
+description = "Kubernetes container running in privileged mode."
+severity = "Critical"
+remediation = "Set 'securityContext.privileged' to 'false' or remove it."
+pattern = "privileged:\\s*true"
+file_pattern = "*.y*ml"
 
 [[rule]]
-id = "DECODE882"
-description = "Encoding vulnerability in string decode operations."
-severity = "Low"
-confidence = "Low"
-remediation = "Handle encoding errors properly and validate input."
-ast_match = "Call(func.attr=decode)"
-file_pattern = "*.py"
+id = "K8S002"
+description = "Kubernetes container allows privilege escalation."
+severity = "High"
+remediation = "Explicitly set 'securityContext.allowPrivilegeEscalation' to 'false'."
+pattern = "allowPrivilegeEscalation:\\s*true"
+file_pattern = "*.y*ml"
 
 [[rule]]
-id = "ENCODE885"
-description = "Information disclosure through string encoding."
-severity = "Low"
-confidence = "Low"
-remediation = "Be careful when encoding sensitive data."
-ast_match = "Call(func.attr=encode)"
-file_pattern = "*.py"
+id = "TF001"
+description = "Terraform AWS S3 bucket is publicly readable."
+severity = "Critical"
+remediation = "Set the 'acl' property of 'aws_s3_bucket' to 'private', not 'public-read' or 'public-read-write'."
+pattern = "acl\\s*=\\s*\"(public-read|public-read-write)\""
+file_pattern = "*.tf"
 
 [[rule]]
-id = "LOWER888"
-description = "Locale-dependent case conversion vulnerability."
-severity = "Low"
-confidence = "Low"
-remediation = "Use locale-independent case conversion for security comparisons."
-ast_match = "Call(func.attr=lower)"
-file_pattern = "*.py"
+id = "CFG001"
+description = "AWS credentials detected in configuration file."
+severity = "Critical"
+remediation = "Use IAM roles or environment variables for AWS credentials."
+pattern = "(?i)(aws_access_key_id|aws_secret_access_key)\\s*=\\s*[A-Za-z0-9/+=]{20,}"
+file_pattern = "*.ini"
+
+# -------------------------------------------
+# SECTION: ADDITIONAL SECURITY RULES
+# -------------------------------------------
 
 [[rule]]
-id = "UPPER891"
-description = "Locale-dependent case conversion vulnerability."
-severity = "Low"
-confidence = "Low"
-remediation = "Use locale-independent case conversion for security comparisons."
-ast_match = "Call(func.attr=upper)"
+id = "PY500"
+description = "Dynamic code execution using builtins.exec() function."
+severity = "High"
+confidence = "Medium"
+remediation = "Avoid dynamic code execution. Consider safer alternatives or validate input thoroughly."
+ast_match = "Call(func.attr=exec, func.value.id=builtins)"
 file_pattern = "*.py"
 
 [[rule]]
-id = "STRIP894"
-description = "Unicode normalization bypass in string stripping."
-severity = "Low"
-confidence = "Low"
-remediation = "Normalize Unicode strings before validation."
-ast_match = "Call(func.attr=strip)"
+id = "SEC501"
+description = "Generic exec pattern detected in code."
+severity = "Medium"
+confidence = "Medium"
+remediation = "Dynamic code execution can be dangerous. Validate all inputs and consider safer alternatives."
+pattern = "\\bexec\\b\\s*\\("
+# Exclude: function definitions (def exec(...), async def exec(...))
+# Exclude: comment lines
+# Exclude: method calls .exec(...) — taint-driven SK_PY507 handles those
+# Exclude: backtick-wrapped exec() in docstrings/prose
+# Exclude: quoted "exec()" or 'exec()' — documentation text, not actual calls
+exclude_pattern = "^\\s*(?:async\\s+)?def\\s|^\\s*#|\\.exec\\s*\\(|`exec\\(|\"exec\\(\\)\"|'exec\\(\\)'"
 file_pattern = "*.py"
 
 [[rule]]
-id = "STARTSWITH897"
-description = "Bypass vulnerability in string prefix checking."
-severity = "Low"
-confidence = "Low"
-remediation = "Normalize and validate strings before prefix checks."
-ast_match = "Call(func.attr=startswith)"
+id = "PY507"
+description = "Tainted data passed to .exec() method — potential code or SQL injection."
+severity = "Critical"
+confidence = "High"
+remediation = "Validate inputs before passing to .exec(). Use parameterized queries for SQL execution."
+# No pattern — triggered only by taint engine.
+# Pattern-based detection of .exec() generates 100% FPs: fires on ORM sessions
+# (Session.exec(select(...))), docstring code examples, and function definitions.
 file_pattern = "*.py"
 
 [[rule]]
-id = "ENDSWITH900"
-description = "Bypass vulnerability in string suffix checking."
-severity = "Low"
-confidence = "Low"
-remediation = "Normalize and validate strings before suffix checks."
-ast_match = "Call(func.attr=endswith)"
-file_pattern = "*.py"
+id = "WEB508"
+description = "Insecure Content Security Policy with unsafe-inline."
+severity = "Medium"
+confidence = "Medium"
+remediation = "Remove unsafe-inline from CSP directives and use nonces or hashes instead."
+pattern = "Content-Security-Policy\\s*:\\s*.*unsafe-inline"
+file_pattern = "*.conf"
 
 [[rule]]
-id = "FIND903"
-description = "Logic error in string search operations."
+id = "CFG510"
+description = "AWS access key detected in configuration."
 severity = "Low"
-confidence = "Low"
-remediation = "Handle -1 return value from find() properly."
-ast_match = "Call(func.attr=find)"
-file_pattern = "*.py"
+remediation = "Store AWS credentials securely using IAM roles or environment variables."
+pattern = "aws_access_key_id\\s*[:=]\\s*['\\\"][A-Za-z0-9/+=]{16,}"
+file_pattern = "*.ini"
 
 [[rule]]
-id = "INDEX906"
-description = "Exception handling bypass in string index operations."
-severity = "Low"
-confidence = "Low"
-remediation = "Use find() instead of index() or handle exceptions properly."
-ast_match = "Call(func.attr=index)"
-file_pattern = "*.py"
+id = "WEB512"
+description = "Bearer token in configuration header."
+severity = "Medium"
+remediation = "Store authentication tokens securely and avoid hardcoding in configuration files."
+pattern = "Authorization\\s*:\\s*\\bBearer\\b"
+file_pattern = "*.conf"
 
 [[rule]]
-id = "COUNT909"
-description = "DoS vulnerability through string count operations."
-severity = "Low"
-confidence = "Low"
-remediation = "Limit the size of strings used in count operations."
-ast_match = "Call(func.attr=count)"
-file_pattern = "*.py"
+id = "WEB514"
+description = "X-Frame-Options set to allow framing."
+severity = "Medium"
+remediation = "Set X-Frame-Options to DENY or SAMEORIGIN to prevent clickjacking attacks."
+pattern = "X-Frame-Options\\s*:\\s*ALLOW"
+file_pattern = "*.conf"
 
 [[rule]]
-id = "TRANSLATE912"
-description = "Character encoding bypass through translate operations."
+id = "SER522"
+description = "Object serialization function detected."
 severity = "Low"
-confidence = "Low"
-remediation = "Validate translation tables and input strings."
-ast_match = "Call(func.attr=translate)"
+remediation = "Ensure serialized data comes from trusted sources to prevent deserialization attacks."
+# No ast_match/pattern — triggered only by taint engine (SK007)
+
+[[rule]]
+id = "FILE526"
+description = "File read operation using open attribute access."
+severity = "Medium"
+remediation = "Implement proper file access controls and validate file paths."
+ast_match = "Attribute(attr=read, value.id=open)"
 file_pattern = "*.py"
 
 [[rule]]
-id = "MAKETRANS915"
-description = "Translation table manipulation vulnerability."
-severity = "Low"
-confidence = "Low"
-remediation = "Validate translation mappings for security contexts."
-ast_match = "Call(func.attr=maketrans)"
-file_pattern = "*.py"
+id = "PERM527"
+description = "Setting overly permissive file permissions (777)."
+severity = "High"
+remediation = "Use more restrictive permissions. Consider 644 for files and 755 for directories."
+pattern = "chmod\\s+777"
+file_pattern = "*.sh"
 
 [[rule]]
-id = "CASEFOLD918"
-description = "Unicode normalization vulnerability in casefold operations."
-severity = "Low"
-confidence = "Low"
-remediation = "Be aware of Unicode normalization effects in security contexts."
-ast_match = "Call(func.attr=casefold)"
+id = "FILE528"
+description = "Direct access to system password file."
+severity = "High"
+confidence = "Medium"
+remediation = "Accessing /etc/passwd should be done through proper system APIs with authorization."
+pattern = "open\\s*\\(\\s*['\\\"]/etc/passwd"
 file_pattern = "*.py"
 
 [[rule]]
-id = "EXPANDTABS921"
-description = "Tab expansion DoS vulnerability."
+id = "TEMP529"
+description = "Insecure temporary file creation using mktemp -u."
 severity = "Low"
-confidence = "Low"
-remediation = "Limit tab expansion or validate input size."
-ast_match = "Call(func.attr=expandtabs)"
-file_pattern = "*.py"
+remediation = "Use mktemp without -u flag or mkstemp for secure temporary file creation."
+pattern = "mktemp\\s+-u"
+file_pattern = "*.sh"
 
 [[rule]]
-id = "ZFILL924"
-description = "Memory exhaustion through zero-fill operations."
-severity = "Low"
-confidence = "Low"
-remediation = "Limit the width parameter in zfill operations."
-ast_match = "Call(func.attr=zfill)"
-file_pattern = "*.py"
+id = "SSL531"
+description = "SSL/TLS certificate verification disabled."
+severity = "Medium"
+remediation = "Enable certificate verification to prevent man-in-the-middle attacks."
+pattern = "verify\\s*:\\s*false"
+file_pattern = "*.y*ml"
 
 [[rule]]
-id = "CENTER927"
-description = "Memory exhaustion through string centering operations."
-severity = "Low"
-confidence = "Low"
-remediation = "Limit the width parameter in center operations."
-ast_match = "Call(func.attr=center)"
-file_pattern = "*.py"
+id = "WEB575"
+description = "Content Security Policy allows unsafe inline execution."
+severity = "High"
+confidence = "Medium"
+remediation = "Remove unsafe-inline from CSP directives and implement nonce-based or hash-based CSP."
+pattern = "Content-Security-Policy\\s*:\\s*.*unsafe-inline"
+file_pattern = "*.conf"
 
 [[rule]]
-id = "LJUST930"
-description = "Memory exhaustion through string justification operations."
-severity = "Low"
-confidence = "Low"
-remediation = "Limit the width parameter in ljust operations."
-ast_match = "Call(func.attr=ljust)"
-file_pattern = "*.py"
+id = "SQL586"
+description = "String formatting in SQL query execution."
+severity = "Critical"
+confidence = "Medium"
+remediation = "Use parameterized queries instead of string formatting to prevent SQL injection."
+# No pattern — triggered by taint engine (SK_SQL001/SK_SQL002)
 
 [[rule]]
-id = "RJUST933"
-description = "Memory exhaustion through string justification operations."
-severity = "Low"
-confidence = "Low"
-remediation = "Limit the width parameter in rjust operations."
-ast_match = "Call(func.attr=rjust)"
+id = "SHELL602"
+description = "Shell command execution with dynamic arguments."
+severity = "High"
+confidence = "Medium"
+remediation = "Use subprocess with argument arrays instead of shell command strings."
+pattern = "subprocess\\.(Popen|call)\\(.*shell\\s*=\\s*True"
 file_pattern = "*.py"
 
 [[rule]]
-id = "PARTITION936"
-description = "Logic error in string partition operations."
-severity = "Low"
-confidence = "Low"
-remediation = "Validate partition results and handle edge cases."
-ast_match = "Call(func.attr=partition)"
-file_pattern = "*.py"
+id = "CODE607"
+description = "Content Security Policy with unsafe inline directives."
+severity = "High"
+confidence = "Medium"
+remediation = "Implement strict CSP without unsafe-inline to prevent XSS attacks."
+pattern = "Content-Security-Policy\\s*:\\s*.*unsafe-inline"
+file_pattern = "*.conf"
 
 [[rule]]
-id = "RPARTITION939"
-description = "Logic error in string reverse partition operations."
-severity = "Low"
-confidence = "Low"
-remediation = "Validate rpartition results and handle edge cases."
-ast_match = "Call(func.attr=rpartition)"
-file_pattern = "*.py"
+id = "SHELL631"
+description = "SQL injection vulnerability in database query."
+severity = "Critical"
+confidence = "Medium"
+remediation = "Use parameterized queries with placeholders instead of string concatenation."
+# No pattern — triggered by taint engine (SK_SQL001/SK_SQL002)
 
 [[rule]]
-id = "RSPLIT942"
-description = "Logic error in reverse string split operations."
-severity = "Low"
-confidence = "Low"
-remediation = "Validate rsplit results and handle maxsplit parameter."
-ast_match = "Call(func.attr=rsplit)"
-file_pattern = "*.py"
+id = "CSP640"
+description = "Unsafe Content Security Policy configuration."
+severity = "High"
+confidence = "Medium"
+remediation = "Configure CSP without unsafe-inline and unsafe-eval directives."
+pattern = "Content-Security-Policy\\s*:\\s*.*unsafe-inline"
+file_pattern = "*.conf"
 
 [[rule]]
-id = "SPLITLINES945"
-description = "Line ending normalization vulnerability."
-severity = "Low"
-confidence = "Low"
-remediation = "Be aware of different line ending interpretations."
-ast_match = "Call(func.attr=splitlines)"
-file_pattern = "*.py"
+id = "PERM650"
+description = "SQL query with potential injection vulnerability."
+severity = "Critical"
+confidence = "Medium"
+remediation = "Implement prepared statements and parameterized queries to prevent SQL injection."
+# No pattern — triggered by taint engine (SK_SQL001/SK_SQL002)
 
 [[rule]]
-id = "SWAPCASE948"
-description = "Locale-dependent case swapping vulnerability."
-severity = "Low"
-confidence = "Low"
-remediation = "Avoid swapcase in security-sensitive contexts."
-ast_match = "Call(func.attr=swapcase)"
-file_pattern = "*.py"
+id = "CSP665"
+description = "Insecure Content Security Policy allowing inline scripts."
+severity = "High"
+confidence = "Medium"
+remediation = "Use nonce or hash-based CSP instead of unsafe-inline directive."
+pattern = "Content-Security-Policy\\s*:\\s*.*unsafe-inline"
+file_pattern = "*.conf"
 
 [[rule]]
-id = "TITLE951"
-description = "Locale-dependent title casing vulnerability."
-severity = "Low"
-confidence = "Low"
-remediation = "Use consistent title casing for security comparisons."
-ast_match = "Call(func.attr=title)"
-file_pattern = "*.py"
+id = "SHELL675"
+description = "Database query with string interpolation."
+severity = "Critical"
+confidence = "Medium"
+remediation = "Use ORM methods or prepared statements instead of string formatting in SQL queries."
+# No pattern — triggered by taint engine (SK_SQL001/SK_SQL002)
 
 [[rule]]
-id = "CAPITALIZE954"
-description = "Locale-dependent capitalization vulnerability."
-severity = "Low"
-confidence = "Low"
-remediation = "Use consistent capitalization for security comparisons."
-ast_match = "Call(func.attr=capitalize)"
-file_pattern = "*.py"
+id = "SHELL689"
+description = "Process creation with shell command execution."
+severity = "High"
+confidence = "Medium"
+remediation = "Use process execution without shell to avoid command injection vulnerabilities."
+# No pattern — triggered by taint engine (SK_SQL001/SK_SQL002)
 
 [[rule]]
-id = "LSTRIP957"
-description = "Unicode normalization bypass in left string stripping."
-severity = "Low"
-confidence = "Low"
-remediation = "Normalize Unicode strings before validation."
-ast_match = "Call(func.attr=lstrip)"
-file_pattern = "*.py"
+id = "SQL693"
+description = "String formatting in database execute statement."
+severity = "Critical"
+confidence = "Medium"
+remediation = "Implement parameterized queries to eliminate SQL injection risks."
+# No pattern — triggered by taint engine (SK_SQL001/SK_SQL002)
 
 [[rule]]
-id = "RSTRIP960"
-description = "Unicode normalization bypass in right string stripping."
-severity = "Low"
-confidence = "Low"
-remediation = "Normalize Unicode strings before validation."
-ast_match = "Call(func.attr=rstrip)"
+id = "NET705"
+description = "Network request without SSL certificate verification."
+severity = "High"
+confidence = "Medium"
+remediation = "Enable SSL certificate verification to prevent man-in-the-middle attacks."
+pattern = "requests\\.(get|post|put|delete)\\(.*verify\\s*=\\s*False"
 file_pattern = "*.py"
 
 [[rule]]
-id = "REMOVEPREFIX963"
-description = "Logic error in prefix removal operations."
-severity = "Low"
-confidence = "Low"
-remediation = "Validate prefix removal and handle Unicode normalization."
-ast_match = "Call(func.attr=removeprefix)"
+id = "CRYPTO708"
+description = "Weak cryptographic key generation — non-CSPRNG used to generate tokens, keys, or secrets."
+severity = "Medium"
+confidence = "Medium"
+remediation = "Use secrets.token_hex(), secrets.token_urlsafe(), or secrets.choice() for security-sensitive values. The random module uses Mersenne Twister which is predictable and not cryptographically secure."
+# Extended to include random.choices/sample/randrange — all non-CSPRNG selection functions
+# commonly misused to generate API keys, OTPs, session tokens, and passwords.
+pattern = "random\\.(randint|random|choices|sample|randrange|choice)\\("
 file_pattern = "*.py"
+# Exclude non-cryptographic uses:
+#   np.random.*  — NumPy random, used for ML data generation/seeds, not key material
+#   len(...)     — load balancing / server selection
+#   range(...)   — list indexing
+#   choice/randbelow — selection, not key generation
+#   variable names suggesting non-security context (index, delay, seed for ML)
+exclude_pattern = "np\\.random\\.|numpy\\.random\\.|len\\(|range\\(|\\b(index|idx|pos|offset|delay|sleep_|sleep|wait|_n|num_|seed|shape|size|dim|batch|epoch)\\b|_time\\b|_delay\\b|_wait\\b|random\\.choice|randbelow|input_shape|array_ops|benchmark"
 
 [[rule]]
-id = "REMOVESUFFIX966"
-description = "Logic error in suffix removal operations."
-severity = "Low"
-confidence = "Low"
-remediation = "Validate suffix removal and handle Unicode normalization."
-ast_match = "Call(func.attr=removesuffix)"
+id = "AUTH711"
+description = "Authentication bypass using hardcoded credentials."
+severity = "Critical"
+confidence = "High"
+remediation = "Implement proper authentication mechanisms without hardcoded credentials."
+pattern = "(?i)(username|user)\\s*[:=]\\s*[\"']admin[\"']"
 file_pattern = "*.py"
 
 [[rule]]
-id = "ISALNUM969"
-description = "Unicode category bypass in alphanumeric checking."
-severity = "Low"
-confidence = "Low"
-remediation = "Be aware of Unicode character categories in validation."
-ast_match = "Call(func.attr=isalnum)"
+id = "LDAP717"
+description = "LDAP injection vulnerability in search filter."
+severity = "High"
+confidence = "Medium"
+remediation = "Properly escape LDAP filter characters or use parameterized LDAP queries."
+pattern = "\\.search\\(.*filter.*%s"
 file_pattern = "*.py"
 
 [[rule]]
-id = "ISALPHA972"
-description = "Unicode category bypass in alphabetic checking."
-severity = "Low"
-confidence = "Low"
-remediation = "Be aware of Unicode character categories in validation."
-ast_match = "Call(func.attr=isalpha)"
+id = "XPATH720"
+description = "XPath injection vulnerability detected."
+severity = "High"
+confidence = "Medium"
+remediation = "Use parameterized XPath queries or properly escape user input."
+pattern = "xpath\\(.*%s"
 file_pattern = "*.py"
 
 [[rule]]
-id = "ISASCII975"
-description = "ASCII validation bypass with Unicode characters."
-severity = "Low"
-confidence = "Low"
-remediation = "Use proper Unicode handling for international support."
-ast_match = "Call(func.attr=isascii)"
+id = "DESER723"
+description = "Unsafe deserialization of untrusted data via marshal.loads()."
+severity = "Critical"
+confidence = "High"
+remediation = "Never deserialize marshal bytecode from untrusted sources. Use JSON/protobuf for data exchange. For model serialization, use SavedModel format instead of custom bytecode paths."
+ast_match = "Call(func.value.id=marshal, func.attr=loads)"
 file_pattern = "*.py"
 
 [[rule]]
-id = "ISDECIMAL978"
-description = "Unicode decimal category bypass in number validation."
-severity = "Low"
-confidence = "Low"
-remediation = "Be aware of Unicode decimal categories beyond 0-9."
-ast_match = "Call(func.attr=isdecimal)"
+id = "DESER724"
+description = "Deserialized bytecode executed via types.FunctionType() — arbitrary code execution from untrusted marshal.loads() output."
+severity = "Critical"
+confidence = "High"
+remediation = "Never create functions from deserialized code objects. This is equivalent to pickle.loads() and allows full RCE. Use marshal only for trusted, developer-controlled bytecode in controlled build environments."
 file_pattern = "*.py"
+# No pattern — triggered only by taint engine (SK_DESER724):
+# marshal.loads(raw) → code is tainted → FunctionType(code, globals()) fires this rule.
 
 [[rule]]
-id = "ISDIGIT981"
-description = "Unicode digit category bypass in number validation."
-severity = "Low"
-confidence = "Low"
-remediation = "Be aware of Unicode digit categories beyond 0-9."
-ast_match = "Call(func.attr=isdigit)"
+id = "PRIV726"
+description = "Privilege escalation through setuid binary execution."
+severity = "High"
+confidence = "Medium"
+remediation = "Avoid executing setuid binaries or implement proper privilege checks."
+pattern = "os\\.setuid\\("
 file_pattern = "*.py"
 
 [[rule]]
-id = "ISIDENTIFIER984"
-description = "Python identifier validation bypass."
-severity = "Low"
+id = "RACE729"
+description = "Race condition in file operations."
+severity = "Medium"
 confidence = "Low"
-remediation = "Validate identifiers against allowed patterns."
-ast_match = "Call(func.attr=isidentifier)"
+remediation = "Use atomic file operations or proper locking mechanisms."
+pattern = "os\\.path\\.exists.*open\\("
 file_pattern = "*.py"
 
 [[rule]]
-id = "ISLOWER987"
-description = "Case checking bypass with Unicode characters."
+id = "INFO738"
+description = "Information disclosure through error messages."
 severity = "Low"
 confidence = "Low"
-remediation = "Be aware of Unicode case categories."
-ast_match = "Call(func.attr=islower)"
+remediation = "Implement generic error messages that don't reveal system information."
+pattern = "traceback\\.print_exc\\("
 file_pattern = "*.py"
 
 [[rule]]
-id = "ISNUMERIC990"
-description = "Unicode numeric category bypass in validation."
+id = "LOG741"
+description = "User-controlled data in log statement — log injection risk."
 severity = "Low"
-confidence = "Low"
-remediation = "Be aware of Unicode numeric categories."
-ast_match = "Call(func.attr=isnumeric)"
+confidence = "Medium"
+remediation = "Sanitize user input before logging. An attacker who controls log content can fake entries, inject ANSI escape codes, or corrupt log parsers."
 file_pattern = "*.py"
+# No pattern — triggered only by taint engine (SK_LOG741_*)
+# Only fires when data traced from request.GET/POST/CLI args/API responses
+# reaches a logging call. Internal framework objects and computed values
+# are never tainted → no false positives on framework internals.
 
 [[rule]]
-id = "ISPRINTABLE993"
-description = "Printable character validation bypass."
-severity = "Low"
-confidence = "Low"
-remediation = "Be aware of Unicode printable character definitions."
-ast_match = "Call(func.attr=isprintable)"
+id = "SESS744"
+description = "Session fixation vulnerability in session handling."
+severity = "High"
+confidence = "Medium"
+remediation = "Regenerate session IDs after authentication to prevent fixation attacks."
+# Writing data to a session is NOT session fixation. Only flag direct session key assignment from request.
+pattern = "session\\.session_key\\s*=.*request\\."
 file_pattern = "*.py"
 
 [[rule]]
-id = "ISSPACE996"
-description = "Whitespace character validation bypass."
-severity = "Low"
-confidence = "Low"
-remediation = "Be aware of Unicode whitespace character definitions."
-ast_match = "Call(func.attr=isspace)"
+id = "CSRF747"
+description = "Cross-Site Request Forgery protection bypass."
+severity = "High"
+confidence = "Medium"
+remediation = "Implement proper CSRF tokens for state-changing operations."
+pattern = "@csrf_exempt"
 file_pattern = "*.py"
 
 [[rule]]
-id = "ISTITLE999"
-description = "Title case validation bypass with Unicode."
-severity = "Low"
-confidence = "Low"
-remediation = "Be aware of Unicode title case definitions."
-ast_match = "Call(func.attr=istitle)"
+id = "HTTP750"
+description = "HTTP response splitting vulnerability."
+severity = "High"
+confidence = "Medium"
+remediation = "Validate and sanitize HTTP headers to prevent response splitting."
+pattern = "HttpResponse\\(.*\\\\r\\\\n"
 file_pattern = "*.py"
 
 [[rule]]
-id = "ISUPPER1002"
-description = "Upper case validation bypass with Unicode."
-severity = "Low"
-confidence = "Low"
-remediation = "Be aware of Unicode upper case definitions."
-ast_match = "Call(func.attr=isupper)"
+id = "UPLOAD753"
+description = "Unrestricted file upload vulnerability."
+severity = "High"
+confidence = "Medium"
+remediation = "Implement file type validation and size limits for uploads."
+pattern = "request\\.FILES\\[.*\\]\\.save\\("
 file_pattern = "*.py"
 
 [[rule]]
-id = "BYTES1005"
-description = "Bytes object creation with user input."
-severity = "Low"
+id = "CACHE756"
+description = "Cache poisoning vulnerability in HTTP caching."
+severity = "Medium"
 confidence = "Low"
-remediation = "Validate encoding when creating bytes from user input."
-ast_match = "Call(func.id=bytes)"
+remediation = "Validate cache keys and implement proper cache invalidation."
+pattern = "cache\\.set\\(.*request\\."
 file_pattern = "*.py"
 
 [[rule]]
-id = "BYTEARRAY1008"
-description = "Mutable byte array creation with user input."
-severity = "Low"
+id = "TIMING759"
+description = "Timing attack vulnerability in authentication — direct equality comparison of secret values."
+severity = "Medium"
 confidence = "Low"
-remediation = "Validate encoding when creating bytearrays from user input."
-ast_match = "Call(func.id=bytearray)"
+remediation = "Use hmac.compare_digest() or secrets.compare_digest() for all secret/hash comparisons."
+pattern = "password\\s*==\\s*.*"
 file_pattern = "*.py"
+# Exclude null/empty checks: `if password is None or password == ""` is a presence check,
+# not a secret comparison. Also exclude `password != ""` style guards.
+exclude_pattern = "is None|== \"\"|== ''|!= \"\"|!= ''|^\\s*#"
 
 [[rule]]
-id = "MEMORYVIEW1011"
-description = "Memory view creation exposing internal buffer."
+id = "ENUM762"
+description = "User enumeration vulnerability in login system."
 severity = "Low"
 confidence = "Low"
-remediation = "Be careful when exposing memory views of sensitive data."
-ast_match = "Call(func.id=memoryview)"
+remediation = "Return identical responses for valid and invalid usernames."
+pattern = "User\\.objects\\.get\\(username="
 file_pattern = "*.py"
 
 [[rule]]
-id = "ORD1014"
-description = "Character code point extraction."
-severity = "Low"
+id = "TOKEN771"
+description = "JWT token created without expiration — tokens valid indefinitely if stolen."
+severity = "Medium"
 confidence = "Low"
-remediation = "Validate character input before extracting code points."
-ast_match = "Call(func.id=ord)"
+remediation = "Always include 'exp' claim in JWT payload: {'sub': user_id, 'exp': datetime.utcnow() + timedelta(hours=1)}."
+# jwt.encode() is the creation side — only flag when no 'exp' key is visible nearby.
+# jwt.decode() without verify is caught by JWT001.
+pattern = "jwt\\.encode\\s*\\("
 file_pattern = "*.py"
+exclude_pattern = "^\\s*#|[\"']exp[\"']|datetime|timedelta"
 
 [[rule]]
-id = "CHR1017"
-description = "Character creation from code point."
-severity = "Low"
-confidence = "Low"
-remediation = "Validate code points to prevent Unicode injection."
-ast_match = "Call(func.id=chr)"
+id = "OAUTH774"
+description = "OAuth state parameter potentially missing in authorization request (Manual inspection suggested)."
+severity = "High"
+confidence = "Medium"
+remediation = "Include state parameter in OAuth flows to prevent CSRF attacks."
+pattern = "oauth.*authorize.*"
 file_pattern = "*.py"
+# Public OAuth authorization URLs in string literals are DeveloperDefined endpoints, not missing state params
+exclude_pattern = "[\"']https?://.*oauth.*authorize|client_id="
 
 [[rule]]
-id = "HEX1020"
-description = "Hexadecimal conversion exposing internal data."
-severity = "Low"
+id = "API777"
+description = "API endpoint without rate limiting."
+severity = "Medium"
 confidence = "Low"
-remediation = "Be careful when converting sensitive data to hex."
-ast_match = "Call(func.attr=hex)"
+remediation = "Implement rate limiting on API endpoints to prevent abuse."
+pattern = "@app\\.route.*methods.*POST"
 file_pattern = "*.py"
 
 [[rule]]
-id = "OCT1023"
-description = "Octal conversion potentially exposing data."
-severity = "Low"
-confidence = "Low"
-remediation = "Validate octal conversions in security contexts."
-ast_match = "Call(func.id=oct)"
+id = "CORS780"
+description = "Overly permissive CORS configuration."
+severity = "Medium"
+confidence = "Medium"
+remediation = "Restrict CORS origins to trusted domains only."
+pattern = "Access-Control-Allow-Origin\\s*:\\s*\\*"
 file_pattern = "*.py"
 
 [[rule]]
-id = "BIN1026"
-description = "Binary conversion potentially exposing data."
-severity = "Low"
-confidence = "Low"
-remediation = "Validate binary conversions in security contexts."
-ast_match = "Call(func.id=bin)"
-file_pattern = "*.py"
+id = "HTTPS789"
+description = "Missing HTTPS enforcement in security-sensitive context."
+severity = "High"
+confidence = "Medium"
+remediation = "Enforce HTTPS for all security-sensitive operations."
+pattern = "SECURE_SSL_REDIRECT\\s*=\\s*False"
+file_pattern = "*settings*.py"
+# global_settings.py is a framework defaults file — False here is the intended default.
+# Deployments must override this in their project settings.
+exclude_file_pattern = "*global_settings*"
 
 [[rule]]
-id = "FLOAT1029"
-description = "Floating point precision issues in security calculations."
-severity = "Low"
-confidence = "Low"
-remediation = "Use decimal module for precise financial calculations."
-ast_match = "Call(func.id=float)"
+id = "COOKIE792"
+description = "Insecure cookie configuration detected."
+severity = "Medium"
+confidence = "Medium"
+remediation = "Set secure and httponly flags on sensitive cookies."
+pattern = "set_cookie\\(.*secure=False"
 file_pattern = "*.py"
 
 [[rule]]
-id = "COMPLEX1032"
-description = "Complex number usage in security contexts."
-severity = "Low"
-confidence = "Low"
-remediation = "Avoid complex numbers in security-sensitive calculations."
-ast_match = "Call(func.id=complex)"
+id = "ADMIN795"
+description = "Default admin credentials detected."
+severity = "Critical"
+confidence = "High"
+remediation = "Change default administrative credentials before deployment."
+pattern = "(?i)(admin|administrator).*password.*password"
 file_pattern = "*.py"
+# "class AdminPasswordChangeForm" is a Python class declaration — DeveloperDefined name, not a credential
+exclude_pattern = "^\\s*class\\s+"
 
 [[rule]]
-id = "BOOL1035"
-description = "Boolean conversion potentially hiding truthy/falsy behavior."
-severity = "Low"
-confidence = "Low"
-remediation = "Be explicit about boolean conversions in security checks."
-ast_match = "Call(func.id=bool)"
+id = "DEBUG798"
+description = "Debug information exposed in production."
+severity = "Medium"
+confidence = "Medium"
+remediation = "Disable debug mode and remove debug statements in production."
+pattern = "print\\(.*password\\|.*secret"
 file_pattern = "*.py"
 
 [[rule]]
-id = "INT1038"
-description = "Integer conversion with potential overflow."
-severity = "Low"
+id = "BACKUP801"
+description = "Backup file with sensitive information accessible."
+severity = "Medium"
 confidence = "Low"
-remediation = "Validate integer conversions and handle overflow."
-ast_match = "Call(func.id=int)"
-file_pattern = "*.py"
+remediation = "Secure backup files and exclude them from web-accessible directories."
+# Require a real filename base (word char) before the backup extension — prevents
+# matching bare extension strings like '.bak', '*.old', '".bak"' in code comments,
+# docs, and build scripts that reference backup extensions without actual file paths.
+pattern = "['\"][^'\"]*\\w\\.(bak|backup|old)['\"]"
+file_pattern = "*"
+exclude_file_pattern = "*.sh,*.rst,*.md,*.txt"
 
 [[rule]]
-id = "LIST1041"
-description = "List creation with potential memory exhaustion."
+id = "CONFIG804"
+description = "Configuration file with default values."
 severity = "Low"
 confidence = "Low"
-remediation = "Limit list sizes to prevent memory exhaustion."
-ast_match = "Call(func.id=list)"
-file_pattern = "*.py"
+remediation = "Change default configuration values before production deployment."
+pattern = "(?i)secret_key.*changeme"
+file_pattern = "*settings*.py"
 
 [[rule]]
-id = "TUPLE1044"
-description = "Tuple creation with potential memory exhaustion."
-severity = "Low"
+id = "HASH807"
+description = "Use of SHA-256 for password hashing — prefer a KDF (bcrypt, scrypt, Argon2)."
+severity = "Medium"
 confidence = "Low"
-remediation = "Limit tuple sizes to prevent memory exhaustion."
-ast_match = "Call(func.id=tuple)"
+remediation = "For password storage use bcrypt, scrypt, or Argon2. SHA-256 without a salt/iteration factor is fast and vulnerable to brute force."
+# SHA-256 is strong for general purposes; only flag when context suggests password hashing
+# (e.g. variable name contains 'password'). Exclude pure integrity/fingerprinting uses.
+ast_match = "Call(func.value.id=hashlib, func.attr=sha256)"
 file_pattern = "*.py"
+exclude_pattern = "fingerprint|checksum|digest|integrity|hash_file|file_hash|sha256_file|content_hash|benchmark|test|sample|example|demo"
 
 [[rule]]
-id = "SET1047"
-description = "Set creation with potential memory exhaustion."
-severity = "Low"
-confidence = "Low"
-remediation = "Limit set sizes to prevent memory exhaustion."
-ast_match = "Call(func.id=set)"
-file_pattern = "*.py"
+id = "RAND810"
+description = "Use of predictable random number generator."
+severity = "Medium"
+confidence = "Medium"
+remediation = "Use cryptographically secure random generators for security purposes."
+# No ast_match/pattern — triggered only by taint engine (SK008)
 
 [[rule]]
-id = "DICT1050"
-description = "Dictionary creation with potential memory exhaustion."
-severity = "Low"
-confidence = "Low"
-remediation = "Limit dictionary sizes to prevent memory exhaustion."
-ast_match = "Call(func.id=dict)"
+id = "SSRF_001"
+description = "Server-Side Request Forgery — user-controlled URL in HTTP client request."
+severity = "High"
+confidence = "High"
+remediation = "Validate URLs against an allowlist of trusted hosts/schemes before using in HTTP requests. Reject file://, internal IPs (10.x, 172.16-31.x, 192.168.x), and metadata endpoints (169.254.169.254)."
 file_pattern = "*.py"
+# No pattern — triggered only by taint engine (SK_SSRF001-SK_SSRF008)
+# Note: SSRF requires control of the HOST, not just path components.
+# 'https://api.example.com/v1/%s' % user_id  — NOT SSRF (host is literal)
+# r.json()["url"] flowing to httpx.stream()  — SSRF (full URL is attacker-controlled)
+# The taint engine correctly handles this: taint must reach the URL argument.
+# For CLI args (parse_args taint source) flowing into format strings where only
+# path params vary, the engine may produce FPs. Those cases need per-sink
+# host-vs-path discrimination — a future enhancement.
 
 [[rule]]
-id = "FROZENSET1053"
-description = "Frozenset creation with potential memory exhaustion."
-severity = "Low"
-confidence = "Low"
-remediation = "Limit frozenset sizes to prevent memory exhaustion."
-ast_match = "Call(func.id=frozenset)"
+id = "PATH813"
+description = "Path manipulation vulnerability in file operations."
+severity = "High"
+confidence = "Medium"
+remediation = "Validate and normalize file paths to prevent directory traversal."
+pattern = "os\\.path\\.join\\(.*\\.\\."
 file_pattern = "*.py"
+# Exclude safe package-root navigation patterns:
+#   os.path.join(__file__, '..', '..')  — finding package root from current file
+#   os.path.join(module.__file__, '..')  — navigating relative to installed module
+#   os.path.join(os.path.dirname(__file__), ..)  — standard Python package path
+exclude_pattern = "__file__|module\\.__file__|dirname\\(__file__\\)|abspath.*dirname"
 
 [[rule]]
-id = "RANGE1056"
-description = "Range creation with potential memory exhaustion."
-severity = "Low"
+id = "SYMLINK816"
+description = "Symbolic link vulnerability — user-controlled path in os.symlink()."
+severity = "Medium"
 confidence = "Low"
-remediation = "Validate range parameters to prevent excessive iterations."
-ast_match = "Call(func.id=range)"
+remediation = "Validate symlink target paths; never use untrusted input as a symlink source."
 file_pattern = "*.py"
+# Pattern removed — SYMLINK816 is now taint-driven only (see taint_sink SK_SYMLINK001).
+# Pattern-based matching produced 100% FPs (capability detection, static file management).
+# Only fires when the symlink source argument is HttpRequest-tainted.
 
 [[rule]]
-id = "ENUMERATE1059"
-description = "Enumeration with potential performance impact."
-severity = "Low"
-confidence = "Low"
-remediation = "Consider performance implications of enumerating large collections."
-ast_match = "Call(func.id=enumerate)"
+id = "PROC819"
+description = "Process injection vulnerability through command execution."
+severity = "High"
+confidence = "Medium"
+remediation = "Validate and sanitize all inputs to process execution functions."
+ast_match = "Call(func.value.id=os, func.attr=popen)"
 file_pattern = "*.py"
 
 [[rule]]
-id = "ZIP1062"
-description = "Zip operation with potential memory exhaustion."
-severity = "Low"
-confidence = "Low"
-remediation = "Be careful when zipping large collections."
-ast_match = "Call(func.id=zip)"
+id = "IMPORT825"
+description = "Dynamic import vulnerability allowing code execution."
+severity = "High"
+confidence = "Medium"
+remediation = "Avoid dynamic imports with user-controlled input. Use importlib with validated module names."
+ast_match = "Call(func.id=__import__)"
 file_pattern = "*.py"
+# Exclude Python 2/3 compatibility shims (six, future) and stdlib-only imports.
+# These use __import__ with fixed or validated module names from the Python
+# standard library, not from user input.
+# Also exclude when the import name is from a known-safe source (self.LIB,
+# self.package) — these are class attributes set from validated plugin registries.
+exclude_pattern = "self\\.(LIB|package|base_class|module)|__import__\\(name\\)|six\\.|future\\."
 
 [[rule]]
-id = "MAP1065"
-description = "Map operation with potential memory exhaustion."
-severity = "Low"
-confidence = "Low"
-remediation = "Consider memory usage when mapping over large collections."
-ast_match = "Call(func.id=map)"
-file_pattern = "*.py"
+id = "GETATTR828"
+description = "User-controlled attribute name passed to getattr() — attacker may access arbitrary attributes."
+severity = "High"
+confidence = "High"
+remediation = "Validate attribute names against an allowlist before passing to getattr(). Never let user input control which attribute is accessed."
+# No ast_match — this rule is triggered ONLY by the taint engine (SK002).
+# Taint flow: request.* → variable → getattr(obj, variable)
+# Exclude ORM serializer patterns: getattr(obj, field.name) where field.name comes from
+# ORM model _meta (developer-defined schema), not user input. These generate high FP
+# rates in serializer/schema code across all ORM frameworks.
+exclude_file_pattern = "*pyct*,*serializer*,*schema*,*/pandas/core/*,pandas/core/*,*/pandas/io/*,pandas/io/*"
 
 [[rule]]
-id = "FILTER1068"
-description = "Filter operation with potential performance impact."
-severity = "Low"
-confidence = "Low"
-remediation = "Consider performance when filtering large collections."
-ast_match = "Call(func.id=filter)"
-file_pattern = "*.py"
+id = "SETATTR831"
+description = "Unsafe use of setattr with user input."
+severity = "Medium"
+confidence = "Medium"
+remediation = "Validate attribute names and values before setting."
+# No ast_match/pattern — triggered only by taint engine (SK005)
 
 [[rule]]
-id = "REDUCE1071"
-description = "Reduce operation with potential performance impact."
-severity = "Low"
-confidence = "Low"
-remediation = "Consider performance when reducing large collections."
-pattern = "functools\\.reduce\\("
-file_pattern = "*.py"
+id = "DELATTR834"
+description = "Unsafe use of delattr with user input."
+severity = "Medium"
+confidence = "Medium"
+remediation = "Validate attribute names before deletion."
+# No ast_match/pattern — triggered only by taint engine (SK006)
 
 [[rule]]
-id = "SORTED1074"
-description = "Sorting operation with potential DoS impact."
-severity = "Low"
-confidence = "Low"
-remediation = "Limit collection sizes before sorting to prevent DoS."
-ast_match = "Call(func.id=sorted)"
+id = "GLOBALS843"
+description = "globals() used in code-execution context — exec/eval with global namespace."
+severity = "Medium"
+confidence = "Medium"
+remediation = "Never pass globals() to exec/eval with untrusted code. Dynamic module attribute registration via globals()[name]=value is acceptable for plugin/codec loading."
+# Only matches exec/eval with globals() — the genuinely dangerous pattern.
+# Removed: globals()['key'] subscript assignment — this is standard Python for
+# dynamic module attribute registration (hashlib hash functions, plugin loaders,
+# codec registration) and generates high FP rates in framework code.
+pattern = "exec[\\s(].*globals\\s*\\(\\)|eval[\\s(].*globals\\s*\\(\\)"
 file_pattern = "*.py"
 
 [[rule]]
-id = "REVERSED1077"
-description = "Reverse operation with potential memory impact."
-severity = "Low"
-confidence = "Low"
-remediation = "Consider memory usage when reversing large collections."
-ast_match = "Call(func.id=reversed)"
-file_pattern = "*.py"
+id = "FORMAT864"
+description = "Format string vulnerability in string formatting."
+severity = "Medium"
+confidence = "Medium"
+remediation = "Use safe string formatting methods and validate format strings."
+# No ast_match/pattern — triggered only by taint engine (SK009)
 
 [[rule]]
-id = "SUM1080"
-description = "Sum operation with potential overflow or DoS."
-severity = "Low"
+id = "REGEX870"
+description = "Regular expression denial of service (ReDoS) vulnerability — nested quantifiers."
+severity = "Medium"
 confidence = "Low"
-remediation = "Validate numeric ranges to prevent overflow or DoS."
-ast_match = "Call(func.id=sum)"
+remediation = "Avoid nested quantifiers: (x+)+, (a*)+, (a+)* cause catastrophic backtracking."
+pattern = "re\\.(match|search|findall|compile)\\(.*\\([^)]*[+*][^)]*\\)([+*]|\\{[0-9])"
 file_pattern = "*.py"
+# Only flag when a capturing/non-capturing group itself has a quantifier INSIDE and OUTSIDE:
+# (a+)+  (a*)*  (a+)*  (a+){2,} → dangerous nested quantifiers
+# (\s+){key_name} → f-string brace after ), safe (brace not followed by digit)
+#
+# Safe pattern: (\\w+\\.)+\\w+  — matches dotted identifiers like "foo.bar.baz"
+# \\w+ only matches [a-zA-Z0-9_] so alternation between dot and word chars is non-overlapping
+# → no catastrophic backtracking. Exclude when inner group uses \\w or \\d only.
+exclude_pattern = "\\\\w\\+\\.\\)\\+|\\\\d\\+\\.\\)\\+|\\\\w\\+\\.\\)\\*"
 
 [[rule]]
-id = "MAX1083"
-description = "Max operation with potential performance impact."
-severity = "Low"
-confidence = "Low"
-remediation = "Consider performance when finding max of large collections."
-ast_match = "Call(func.id=max)"
-file_pattern = "*.py"
+id = "OPEN1149"
+description = "User-controlled path passed to open() — potential path traversal or arbitrary file read/write."
+severity = "High"
+confidence = "High"
+remediation = "Validate and sanitize file paths. Use os.path.realpath() and verify the result stays within the expected directory."
+# No ast_match — triggered ONLY by taint engine (SK003).
+# Taint flow: request.* → variable → open(variable)
 
 [[rule]]
-id = "MIN1086"
-description = "Min operation with potential performance impact."
-severity = "Low"
-confidence = "Low"
-remediation = "Consider performance when finding min of large collections."
-ast_match = "Call(func.id=min)"
+id = "SSTI001"
+description = "Server-Side Template Injection — user-controlled data used as Jinja2/Mako template string."
+severity = "Critical"
+confidence = "High"
+remediation = "Never pass user input as the template string. Use render_template() with a static file. Pass user data as template VARIABLES (context), not as the template source itself. For Jinja2, use SandboxedEnvironment if dynamic templates are required."
 file_pattern = "*.py"
+# Triggered by taint engine (SK_SSTI001: render_template_string, SK_SSTI002: env.from_string).
+# render_template_string(user_template) or env.from_string(user_template).render() → Jinja2 RCE.
 
 [[rule]]
-id = "ABS1089"
-description = "Absolute value operation with potential overflow."
-severity = "Low"
-confidence = "Low"
-remediation = "Handle potential overflow in absolute value calculations."
-ast_match = "Call(func.id=abs)"
+id = "ORM002"
+description = "Django ORM injection — user-controlled value in raw(), order_by(), or extra() QuerySet method."
+severity = "Critical"
+confidence = "High"
+remediation = "Never pass user input directly to raw(), order_by(), or extra(). For sorting, validate the field name against an explicit allowlist. For raw queries, use parameterized placeholders (%s). Avoid extra() entirely — use annotate() with Case/When instead."
 file_pattern = "*.py"
+# Triggered by taint engine: SK_ORMRAW001 (raw), SK_ORMORDER001 (order_by), SK_ORMEXTRA001 (extra).
+# CVE-2021-35042: order_by(user_input) allows column name injection.
+# CVE-2022-28346/28347: extra(**user_dict) allows SQL injection via crafted kwargs.
 
 [[rule]]
-id = "ROUND1092"
-description = "Rounding operation with potential precision loss."
-severity = "Low"
-confidence = "Low"
-remediation = "Be aware of floating point precision issues in rounding."
-ast_match = "Call(func.id=round)"
+id = "DESER725"
+description = "Insecure deserialization via jsonpickle.decode() — arbitrary Python object deserialization leading to RCE."
+severity = "Critical"
+confidence = "High"
+remediation = "Never pass untrusted data to jsonpickle.decode(). jsonpickle restores arbitrary Python objects including __reduce__ gadgets. Use json.loads() for safe data exchange. CVE-2020-22083, CVE-2024 (Splunk RCE)."
+pattern = "jsonpickle\\.decode\\s*\\("
 file_pattern = "*.py"
+exclude_pattern = "^\\s*#"
 
 [[rule]]
-id = "POW1095"
-description = "Power operation with potential overflow or DoS."
-severity = "Medium"
-confidence = "Low"
-remediation = "Limit exponents to prevent computational DoS attacks."
-ast_match = "Call(func.id=pow)"
+id = "DESER726"
+description = "Insecure deserialization via dill.loads() — arbitrary Python object deserialization leading to RCE."
+severity = "Critical"
+confidence = "High"
+remediation = "Never pass untrusted data to dill.loads(). dill extends pickle with support for lambdas and closures, enabling full RCE via crafted serialized payloads. Use json.loads() or protocol buffers for data exchange."
+pattern = "dill\\.loads\\s*\\("
 file_pattern = "*.py"
+exclude_pattern = "^\\s*#"
 
 [[rule]]
-id = "DIVMOD1098"
-description = "Division with modulo operation potential issues."
-severity = "Low"
-confidence = "Low"
-remediation = "Handle division by zero and validate operands."
-ast_match = "Call(func.id=divmod)"
+id = "TLS001"
+description = "TLS certificate verification disabled — connection is vulnerable to man-in-the-middle attacks."
+severity = "High"
+confidence = "High"
+remediation = "Remove verify=False. Always verify TLS certificates. If using a custom CA, pass verify='/path/to/ca-bundle.crt' instead of disabling verification. For urllib3, remove urllib3.disable_warnings(InsecureRequestWarning)."
+pattern = "\\bverify\\s*=\\s*False\\b|urllib3\\.disable_warnings\\s*\\(.*InsecureRequestWarning|TCPConnector\\s*\\(.*ssl\\s*=\\s*False|check_hostname\\s*=\\s*False"
 file_pattern = "*.py"
+# Exclude:
+#   Comment/docstring lines
+#   Array/indexer operations: _mgr.take(verify=False), indexer=..., verify=False
+#   Lines containing axis= (pandas internal indexer calls)
+#   Bare verify=False on its own line (fragment of a multi-line pandas call)
+#   Docstring text describing the verify parameter
+exclude_pattern = "^\\s*#|\\baxis\\s*=|_mgr\\.|_block|block_manager|Pass\\s+verify|^\\s+verify=False,?\\s*$|take\\s*\\(|indexer[^=]*verify|assumed|codes equal|parameter|description"
 
 [[rule]]
-id = "LEN1101"
-description = "Length operation with potential performance impact."
-severity = "Low"
-confidence = "Low"
-remediation = "Be aware that len() on some objects can be expensive."
-ast_match = "Call(func.id=len)"
+id = "SSH001"
+description = "Paramiko host key validation disabled — SSH connection vulnerable to man-in-the-middle attacks."
+severity = "High"
+confidence = "High"
+remediation = "Use RejectPolicy() or load known_hosts with client.load_system_host_keys() or client.load_host_keys(). AutoAddPolicy blindly accepts any server's host key, enabling MITM attacks that intercept SSH sessions and credentials."
+pattern = "AutoAddPolicy\\s*\\(\\s*\\)"
 file_pattern = "*.py"
+exclude_pattern = "^\\s*#"
 
 [[rule]]
-id = "ANY1104"
-description = "Any operation with potential short-circuit bypass."
-severity = "Low"
-confidence = "Low"
-remediation = "Be aware of short-circuit evaluation in security checks."
-ast_match = "Call(func.id=any)"
+id = "JWT001"
+description = "JWT signature verification disabled — tokens accepted without cryptographic validation."
+severity = "High"
+confidence = "High"
+remediation = "Never set verify_signature=False or algorithms=['none'] in jwt.decode(). Without signature verification, any attacker can forge arbitrary JWT claims (user ID, role, expiry). Always verify the signature with the correct key and algorithm."
+pattern = "verify_signature[\"']?\\s*:\\s*False|[\"']none[\"']\\s*.*algorithm|algorithms\\s*=\\s*\\[[\"']none[\"']"
 file_pattern = "*.py"
+exclude_pattern = "^\\s*#"
 
 [[rule]]
-id = "ALL1107"
-description = "All operation with potential short-circuit bypass."
-severity = "Low"
+id = "ZIPSLIP001"
+description = "Archive extraction without path validation — Zip Slip / Tar Slip arbitrary file write."
+severity = "High"
 confidence = "Low"
-remediation = "Be aware of short-circuit evaluation in security checks."
-ast_match = "Call(func.id=all)"
+remediation = "Before extractall(), validate every member path: reject entries containing '../' or absolute paths. Use a safe extraction helper that checks paths, or iterate members manually with extract() after validation."
+pattern = "\\.extractall\\s*\\("
 file_pattern = "*.py"
+# Exclude:
+#   filter= argument — Python 3.12+ safe extraction filter
+#   str.extractall() — pandas/polars string accessor for regex extraction (NOT archive)
+#   Series.str.extractall — same, string regex method
+exclude_pattern = "^\\s*#|filter\\s*=|str\\.extractall|strings.*extractall|accessor.*extractall|\\.str\\."
+# Low confidence: legitimate uses exist when archives are trusted/developer-controlled.
 
 [[rule]]
-id = "ITER1110"
-description = "Iterator creation with potential memory issues."
-severity = "Low"
-confidence = "Low"
-remediation = "Be careful with iterators over large or infinite sequences."
-ast_match = "Call(func.id=iter)"
+id = "XXE001"
+description = "lxml XML parser with external entity resolution — XML External Entity (XXE) vulnerability."
+severity = "High"
+confidence = "Medium"
+remediation = "Use defusedxml.lxml, or create a safe parser: etree.XMLParser(resolve_entities=False, no_network=True, load_dtd=False). lxml's default XMLParser has resolve_entities=True, allowing XXE via crafted XML."
+pattern = "etree\\.(parse|fromstring|XML|HTML)\\s*\\("
 file_pattern = "*.py"
+# lxml's default parser resolves external entities. Attacker-controlled XML can read
+# arbitrary files (/etc/passwd) or trigger SSRF to internal services via entity references.
+exclude_pattern = "^\\s*#|defusedxml|resolve_entities\\s*=\\s*False"
 
 [[rule]]
-id = "NEXT1113"
-description = "Next operation with potential StopIteration issues."
-severity = "Low"
-confidence = "Low"
-remediation = "Handle StopIteration exceptions properly."
-ast_match = "Call(func.id=next)"
+id = "ORM001"
+description = "SQLAlchemy text() with string formatting — SQL injection via ORM raw query escape hatch."
+severity = "Critical"
+confidence = "High"
+remediation = "Use bound parameters: text('SELECT * FROM users WHERE id = :id').bindparams(id=user_id). Never construct the SQL string with f-strings, %, or .format(). The text() function is for static SQL only."
+# \b (word boundary) prevents matching gettext(), pgettext(), ngettext():
+# in "gettext(" the 't' in "text" is preceded by 'e' (word char) — no boundary, no match.
+# in "text(" or "sa.text(" the 't' is preceded by non-word — boundary matches.
+pattern = "\\btext\\s*\\(\\s*f[\"']|\\btext\\s*\\(.*[\"']\\s*%|\\btext\\s*\\(.*\\.format\\s*\\("
 file_pattern = "*.py"
+exclude_pattern = "^\\s*#"
+# Exclude migration/backend files: f-strings in migrations contain hardcoded schema
+# identifiers, not user input. Backend files are ORM infrastructure, not application code.
+exclude_file_pattern = "*/migrations/*,*/alembic/*,*/backends/*"
 
 [[rule]]
-id = "SLICE1116"
-description = "Slice operation with potential memory exhaustion."
-severity = "Low"
-confidence = "Low"
-remediation = "Validate slice parameters to prevent excessive memory usage."
-ast_match = "Call(func.id=slice)"
+id = "FLASK001"
+description = "Flask application running with debug mode enabled — Werkzeug interactive debugger exposed."
+severity = "Critical"
+confidence = "High"
+remediation = "Never run Flask with debug=True in production. The Werkzeug debugger provides an authenticated Python REPL on every 500 error, allowing full RCE for anyone who can trigger an exception."
+pattern = "app\\.run\\s*\\(.*\\bdebug\\s*=\\s*True|app\\.debug\\s*=\\s*True|[\"']DEBUG[\"']\\s*:\\s*True"
 file_pattern = "*.py"
+exclude_pattern = "^\\s*#"
 
 [[rule]]
-id = "PROPERTY1119"
-description = "Property creation with potential access control bypass."
-severity = "Low"
-confidence = "Low"
-remediation = "Implement proper access controls in property getters/setters."
-ast_match = "Call(func.id=property)"
-file_pattern = "*.py"
+id = "AI002"
+description = "Hardcoded Anthropic (Claude) API key detected."
+severity = "High"
+remediation = "Remove hardcoded API keys and load them from environment variables or a secure secrets manager."
+pattern = "(?i)sk-ant-api[0-9]*-[A-Za-z0-9_-]{20,}"
+file_pattern = ".*\\.py"
 
 [[rule]]
-id = "STATICMETHOD1122"
-description = "Static method bypassing instance access controls."
-severity = "Low"
-confidence = "Low"
-remediation = "Ensure static methods don't bypass intended access controls."
-ast_match = "Call(func.id=staticmethod)"
-file_pattern = "*.py"
+id = "PY306_CACHE"
+description = "pickle.loads() in cache backend — cache poisoning leads to remote code execution."
+severity = "Critical"
+confidence = "High"
+remediation = "Replace pickle-based cache serialization with JSON or msgpack. If pickle is required, authenticate the cache channel and use HMAC to verify payload integrity before deserializing."
+pattern = "pickle\\.loads\\s*\\("
+file_pattern = "*cache/backends/*.py"
 
 [[rule]]
-id = "CLASSMETHOD1125"
-description = "Class method with potential privilege escalation."
-severity = "Low"
-confidence = "Low"
-remediation = "Ensure class methods don't provide unintended access."
-ast_match = "Call(func.id=classmethod)"
+id = "SHELL_BYPASS001"
+description = "Explicit shell interpreter bypasses shell=False — functionally equivalent to shell injection."
+severity = "High"
+confidence = "High"
+remediation = "Never pass user-controlled data as the -c argument to bash/sh/cmd. Use subprocess with a list of arguments and shell=False, validating each element independently."
+pattern = "subprocess\\.(run|Popen|call)\\s*\\(\\s*\\[\\s*[\"'](bash|sh|zsh|cmd\\.exe|powershell)[\"']\\s*,\\s*[\"']-c[\"']"
 file_pattern = "*.py"
+exclude_pattern = "^\\s*#"
 
 [[rule]]
-id = "SUPER1128"
-description = "Super call bypassing method resolution order."
-severity = "Low"
-confidence = "Low"
-remediation = "Be careful with super() calls in security-sensitive contexts."
-ast_match = "Call(func.id=super)"
+id = "OPEN_REDIRECT001"
+description = "Unvalidated URL in redirect — open redirect enables phishing and OAuth token stealing."
+severity = "High"
+confidence = "Medium"
+remediation = "Validate redirect URLs against an allowlist of trusted domains. Use url_has_allowed_host_and_scheme() in Django or validate against a whitelist. Never redirect to a user-supplied URL without checking the host."
 file_pattern = "*.py"
+# No pattern — triggered only by taint engine (SK_REDIRECT001/002).
+# Taint flow: request.GET/POST['next'] → redirect()/HttpResponseRedirect()
+# Conditional sanitization (if is_safe_url(url): redirect(url)) is not detectable
+# by static taint analysis — url remains tainted through the conditional check.
+# Exclude Django's own framework files — they validate redirects with is_safe_url() /
+# url_has_allowed_host_and_scheme() before calling redirect(), but the call is safe.
+exclude_file_pattern = "*/django/contrib/*,django/contrib/*,*/django/views/*,django/views/*"
 
 [[rule]]
-id = "CALLABLE1131"
-description = "Callable check with potential type confusion."
-severity = "Low"
-confidence = "Low"
-remediation = "Validate callable objects before invocation."
-ast_match = "Call(func.id=callable)"
+id = "PLAIN_PWD001"
+description = "User-supplied password stored without hashing — plaintext password in database."
+severity = "Critical"
+confidence = "High"
+remediation = "Use Django's make_password() or set_password() before storing. Never assign request data directly to a password field: User.objects.create_user(password=request.POST['password']) hashes automatically; raw create(..., password=raw) does not."
 file_pattern = "*.py"
+# No pattern — triggered only by taint engine (SK_PLAIN_PWD001).
+# Taint flow: request.POST['password'] → Model.objects.create(password=tainted)
 
 [[rule]]
-id = "ID1134"
-description = "Object identity check with potential security implications."
-severity = "Low"
-confidence = "Low"
-remediation = "Be aware that object identity can be predictable."
-ast_match = "Call(func.id=id)"
+id = "DJANGO_DEBUG001"
+description = "DEBUG=True in settings — full stack traces and internal state exposed to any HTTP client."
+severity = "Critical"
+confidence = "High"
+remediation = "Set DEBUG=False in production. Use environment variables: DEBUG = os.environ.get('DEBUG', 'False') == 'True'. Applies to Django, Flask, and any framework that respects a DEBUG flag."
+pattern = "^\\s*DEBUG\\s*=\\s*True"
 file_pattern = "*.py"
+# Catches DEBUG=True in both Django settings.py and Flask config files.
+# Flask app.run(debug=True) is covered separately by FLASK001.
+# Different from FLASK001: this is a settings file value, not runtime configuration.
+exclude_file_pattern = "*/tests/*,*/test_*.py"
 
 [[rule]]
-id = "HASH1137"
-description = "Hash operation with potential collision attacks."
-severity = "Low"
-confidence = "Low"
-remediation = "Use cryptographic hashes for security-sensitive applications."
-ast_match = "Call(func.id=hash)"
+id = "RUAMEL_UNSAFE001"
+description = "ruamel.yaml loaded with typ='unsafe' — allows !!python/object gadget execution."
+severity = "Critical"
+confidence = "High"
+remediation = "Use YAML() (round-trip, safe by default) or YAML(typ='safe'). typ='unsafe' enables arbitrary Python object construction via YAML tags, equivalent to PyYAML's unsafe yaml.load()."
+pattern = "YAML\\s*\\(\\s*typ\\s*=\\s*[\"']unsafe[\"']\\s*\\)"
 file_pattern = "*.py"
+exclude_pattern = "^\\s*#"
 
 [[rule]]
-id = "ASCII1140"
-description = "ASCII representation potentially exposing data."
-severity = "Low"
-confidence = "Low"
-remediation = "Be careful when converting sensitive objects to ASCII."
-ast_match = "Call(func.id=ascii)"
+id = "ENV_URL001"
+description = "Environment variable used as HTTP endpoint URL — SSRF if the env var is attacker-controlled in CI/container environments."
+severity = "High"
+confidence = "Medium"
+remediation = "Validate env-var URLs against an allowlist of trusted domains before use. Never allow arbitrary HTTP endpoints via environment variables without scheme and host validation. Use a fixed default and only allow override to known-safe origins."
 file_pattern = "*.py"
+# Pattern: env var whose name contains URL used directly in HTTP calls.
+# Common pattern: SEMGREP_URL, API_URL, BASE_URL, ENDPOINT_URL etc.
+# The taint engine (SSRF_001) catches the downstream HTTP call when env-var URL propagates to requests/httpx.
+pattern = "os\\.environ(?:\\.get)?\\s*\\([\"'][A-Z_]*URL[A-Z_]*[\"']"
+exclude_pattern = "^\\s*#|allowlist|whitelist|validate|urlparse\\.scheme|startswith\\s*\\([\"']https"
 
 [[rule]]
-id = "INPUT1143"
-description = "User input function with potential injection risks."
-severity = "Medium"
-confidence = "Medium"
-remediation = "Validate and sanitize all user input."
-ast_match = "Call(func.id=input)"
+id = "COOKIE_FILE001"
+description = "Environment variable used as cookie file path — cookie injection into HTTP sessions."
+severity = "High"
+confidence = "High"
+remediation = "Never load a cookie jar from an env-var-specified path without validating the path is within an expected directory. Prefer in-memory session cookies over file-backed cookie jars for sensitive operations."
 file_pattern = "*.py"
+# No pattern — triggered by taint engine (SK_COOKIE_JAR001):
+# os.environ["SEMGREP_COOKIES_PATH"] → MozillaCookieJar(path) → cookies.load()
+# Allows attacker-controlled cookies to be injected into all HTTP requests.
 
 [[rule]]
-id = "PRINT1146"
-description = "Print statement potentially exposing sensitive data."
-severity = "Low"
-confidence = "Low"
-remediation = "Avoid printing sensitive information."
-ast_match = "Call(func.id=print)"
+id = "ENV_GIT_URL001"
+description = "CI environment variable used to construct a git fetch URL — CI_JOB_TOKEN or credentials embedded in attacker-controlled URL."
+severity = "High"
+confidence = "High"
+remediation = "Validate that CI_MERGE_REQUEST_PROJECT_URL and similar CI env vars match the expected repository host before embedding credentials. Use allowlist: only reconstruct URLs for the known project host."
 file_pattern = "*.py"
+# Taint-driven via existing SSRF_001 and PY102 sinks:
+# os.environ["CI_MERGE_REQUEST_PROJECT_URL"] → urlsplit() → _replace(netloc=token@host) →
+# urlunsplit() → git_check_output(["git", "fetch", url]) — PY102 fires on tainted subprocess arg.
+# This rule provides higher-confidence CI-specific context for the same finding.
+pattern = "CI_MERGE_REQUEST_PROJECT_URL|CI_JOB_TOKEN.*git.*fetch|git.*fetch.*CI_"
+exclude_pattern = "^\\s*#"
 
 [[rule]]
-id = "OPEN1149"
-description = "File open operation with potential path traversal."
-severity = "Medium"
-confidence = "Medium"
-remediation = "Validate file paths and use appropriate file modes."
-ast_match = "Call(func.id=open)"
+id = "DESER_JOBLIB001"
+description = "Insecure deserialization via joblib.load() — loads arbitrary Python objects → RCE."
+severity = "Critical"
+confidence = "High"
+remediation = "Never load joblib files from untrusted sources. joblib uses pickle internally — any crafted .pkl/.joblib file can execute arbitrary code. Use SafeLoader or JSON for data exchange."
+pattern = "joblib\\.load\\s*\\("
 file_pattern = "*.py"
+exclude_pattern = "^\\s*#"
 
 [[rule]]
-id = "FORMAT1152"
-description = "Format function with potential format string attacks."
-severity = "Medium"
-confidence = "Low"
-remediation = "Validate format strings and use safe formatting methods."
-ast_match = "Call(func.id=format)"
+id = "DESER_NUMPY001"
+description = "numpy.load() with allow_pickle=True — arbitrary Python object deserialization → RCE."
+severity = "Critical"
+confidence = "High"
+remediation = "Use allow_pickle=False (default in NumPy 1.17+). Only load .npy/.npz files from trusted sources when pickle is required. Use JSON or HDF5 for cross-origin data exchange."
+pattern = "np\\.load\\s*\\(.*allow_pickle\\s*=\\s*True|numpy\\.load\\s*\\(.*allow_pickle\\s*=\\s*True"
 file_pattern = "*.py"
+exclude_pattern = "^\\s*#"
 
 [[rule]]
-id = "AI002"
-description = "Hardcoded Anthropic (Claude) API key detected."
-severity = "High"
-remediation = "Remove hardcoded API keys and load them from environment variables or a secure secrets manager."
-pattern = "(?i)sk-ant-api[0-9]*-[A-Za-z0-9_-]{20,}"
-file_pattern = ".*\\.py"
+id = "DESER_TORCH001"
+description = "torch.load() uses pickle by default — loading untrusted PyTorch model files → RCE."
+severity = "Critical"
+confidence = "High"
+remediation = "Use torch.load(..., weights_only=True) (PyTorch 2.0+) to restrict deserialization. Never load model files from untrusted sources. For model exchange, use ONNX or safetensors format."
+pattern = "torch\\.load\\s*\\("
+file_pattern = "*.py"
+# weights_only=True is the safe version — exclude it
+exclude_pattern = "^\\s*#|weights_only\\s*=\\s*True"
diff --git a/src/pyspector/triage.py b/src/pyspector/triage.py
index b50e1d1c..18111bd4 100644
--- a/src/pyspector/triage.py
+++ b/src/pyspector/triage.py
@@ -7,14 +7,13 @@
 from textual.app import App, ComposeResult # type: ignore
 from textual.widgets import Header, Footer, DataTable, Static, Label # type: ignore
 from textual.containers import Vertical # type: ignore
-from textual.binding import Binding # type: ignore
 
 # Helper to create a unique, stable fingerprint for an issue
 def create_fingerprint(issue: Dict[str, Any]) -> str:
     # Use rule ID, file path relative to a potential project root, and the line content
     # This makes the fingerprint stable across different checkout directories
     unique_string = f"{issue.get('rule_id', '')}|{issue.get('file_path', '')}|{issue.get('line_number', '')}|{issue.get('code', '').strip()}"
-    return hashlib.sha1(unique_string.encode('utf-8')).hexdigest()
+    return hashlib.sha256(unique_string.encode('utf-8')).hexdigest()
 
 class PySpectorTriage(App):
     """An interactive TUI for triaging PySpector findings."""
diff --git a/tests/unit/reporting_test.py b/tests/unit/reporting_test.py
index aee2a796..1c703a86 100644
--- a/tests/unit/reporting_test.py
+++ b/tests/unit/reporting_test.py
@@ -45,7 +45,7 @@ def test_to_sarif(self):
 
         # Check top level SARIF fields
         self.assertEqual(output_json.get("version"), "2.1.0")
-        self.assertEqual(output_json.get("schema_uri"), "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json")
+        self.assertEqual(output_json.get("schema_uri"), "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json")
 
         # Check runs
         self.assertIn("runs", output_json)
@@ -54,7 +54,6 @@ def test_to_sarif(self):
 
         # Check unique single run
         run = output_json["runs"][0]
-        self.assertEqual(run["tool"]["driver"]["id"], "pyspector")
         self.assertEqual(run["tool"]["driver"]["name"], "PySpector")
 
         # Check run results
diff --git a/tests/unit/test_a_sink_rules.py b/tests/unit/test_a_sink_rules.py
new file mode 100644
index 00000000..c2aabc6d
--- /dev/null
+++ b/tests/unit/test_a_sink_rules.py
@@ -0,0 +1,167 @@
+"""Tests for A_SINK rules — all triggered by taint engine, verified without FPs."""
+
+import os, sys, tempfile, textwrap, warnings
+from pathlib import Path
+import pytest
+
+
+def _wrap(code):
+    ind = "\n".join("    " + l for l in textwrap.dedent(code).splitlines())
+    return f"def _view(request):\n{ind}\n"
+
+
+def run(code, filename="app.py"):
+    from pyspector._rust_core import run_scan
+    from pyspector.config import get_default_rules
+    import ast as _ast, json as _json
+    from pyspector.cli import AstEncoder
+    wrapped = _wrap(code)
+    rules = get_default_rules()
+    with tempfile.TemporaryDirectory() as d:
+        p = os.path.join(d, filename)
+        Path(p).write_text(wrapped)
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore")
+            try: aj = _json.dumps(_ast.parse(wrapped), cls=AstEncoder)
+            except: aj = "{}"
+        files = [{"file_path": filename, "content": wrapped, "ast_json": aj}]
+        return [{"rule_id": r.rule_id} for r in run_scan(d, rules, {"exclude": []}, files)]
+
+
+def fires(code, rule_id): return [f for f in run(code) if f["rule_id"] == rule_id]
+def not_fires(code, rule_id): return not fires(code, rule_id)
+
+
+# --- HASATTR837 ---
+class TestHasattr837:
+    def test_tainted_silent_disabled(self):
+        # HASATTR837 disabled: hasattr() returns bool — not a security sink,
+        # generates FPs on stdlib code that uses hasattr for duck-typing checks.
+        assert not_fires("attr=request.GET.get('f'); hasattr(obj,attr)", "HASATTR837")
+    def test_constant_safe(self):
+        assert not_fires("hasattr(obj,'is_active')", "HASATTR837")
+
+# --- VARS840 ---
+class TestVars840:
+    def test_tainted_silent_disabled(self):
+        # VARS840 disabled: vars() returns __dict__ — information disclosure but
+        # low security impact; generates FPs in code using vars() for introspection.
+        assert not_fires("o=request.GET.get('obj'); vars(o)", "VARS840")
+    def test_constant_safe(self):
+        assert not_fires("vars(MyClass())", "VARS840")
+
+# --- DIR849 ---
+class TestDir849:
+    def test_tainted_silent_disabled(self):
+        # DIR849 disabled: dir() lists attributes for introspection — not a security
+        # sink; generates FPs in code that uses dir() for reflection/debugging.
+        assert not_fires("o=request.GET.get('obj'); dir(o)", "DIR849")
+    def test_constant_safe(self):
+        assert not_fires("dir(str)", "DIR849")
+
+# --- CALLABLE1131 ---
+class TestCallable1131:
+    def test_tainted_silent_disabled(self):
+        # CALLABLE1131 disabled: callable() checks if object is callable —
+        # not a security sink; generates FPs from deep inter-procedural taint.
+        assert not_fires("o=request.GET.get('fn'); callable(o)", "CALLABLE1131")
+    def test_constant_safe(self):
+        assert not_fires("callable(print)", "CALLABLE1131")
+
+# --- BYTES1005 ---
+class TestBytes1005:
+    def test_tainted_silent_disabled(self):
+        # BYTES1005 disabled: bytes() encoding is not a security sink on its own.
+        assert not_fires("d=request.GET.get('data'); bytes(d,'utf-8')", "BYTES1005")
+    def test_constant_safe(self):
+        assert not_fires("bytes('hello','utf-8')", "BYTES1005")
+
+# --- BYTEARRAY1008 ---
+class TestBytearray1008:
+    def test_tainted_silent_disabled(self):
+        # BYTEARRAY1008 disabled: bytearray() creates a mutable buffer — not a
+        # security sink; generates FPs in asyncio/networking code that buffers I/O.
+        assert not_fires("d=request.GET.get('data'); bytearray(d,'utf-8')", "BYTEARRAY1008")
+    def test_constant_safe(self):
+        assert not_fires("bytearray(b'hello')", "BYTEARRAY1008")
+
+# --- MEMORYVIEW1011 ---
+class TestMemoryview1011:
+    def test_tainted_silent_disabled(self):
+        # MEMORYVIEW1011 disabled: memory view creation is not a security sink.
+        assert not_fires("d=request.GET.get('data'); b=bytes(d,'utf-8'); memoryview(b)", "MEMORYVIEW1011")
+    def test_constant_safe(self):
+        assert not_fires("memoryview(b'hello')", "MEMORYVIEW1011")
+
+# --- ORD1014 ---
+class TestOrd1014:
+    def test_tainted_silent_disabled(self):
+        # ORD1014 disabled: ord() returns the integer code point of a character —
+        # never a security sink; generates FPs in encoding/codec implementations.
+        assert not_fires("c=request.GET.get('char'); ord(c)", "ORD1014")
+    def test_constant_safe(self):
+        assert not_fires("ord('A')", "ORD1014")
+
+# --- CHR1017 ---
+class TestChr1017:
+    def test_tainted_silent_disabled(self):
+        # CHR1017 disabled: chr() converts an integer to a character —
+        # never a security sink; generates FPs in encoding implementations.
+        assert not_fires("n=request.GET.get('n'); chr(n)", "CHR1017")
+    def test_constant_safe(self):
+        assert not_fires("chr(65)", "CHR1017")
+
+# --- CENTER927 / LJUST930 / RJUST933 ---
+class TestJustification:
+    def test_center_silent_disabled(self):
+        # CENTER927 disabled: string centering is a cosmetic operation — not a sink.
+        assert not_fires("w=request.GET.get('w'); 'x'.center(w)", "CENTER927")
+    def test_center_constant_safe(self):
+        assert not_fires("'x'.center(80)", "CENTER927")
+    def test_ljust_silent_disabled(self):
+        # LJUST930 disabled: string left-justification is not a security sink.
+        assert not_fires("w=request.GET.get('w'); 'x'.ljust(w)", "LJUST930")
+    def test_rjust_silent_disabled(self):
+        # RJUST933 disabled: zero findings across all scanned repos.
+        assert not_fires("w=request.GET.get('w'); 'x'.rjust(w)", "RJUST933")
+
+# --- RANGE1056 ---
+class TestRange1056:
+    def test_tainted_silent_disabled(self):
+        # RANGE1056 disabled: range() iteration bound is not a security sink.
+        assert not_fires("n=request.GET.get('n'); range(n)", "RANGE1056")
+    def test_constant_safe(self):
+        assert not_fires("range(100)", "RANGE1056")
+
+# --- JOIN876 ---
+class TestJoin876:
+    def test_tainted_parts_silent_disabled(self):
+        # JOIN876 disabled: .join() with tainted data generates FPs from deep
+        # inter-proc taint reaching error messages and SQL placeholder construction.
+        assert not_fires("parts=request.GET.getlist('p'); '/'.join(parts)", "JOIN876")
+    def test_constant_safe(self):
+        assert not_fires("'/'.join(['a','b','c'])", "JOIN876")
+
+# --- SORTED1074 ---
+class TestSorted1074:
+    def test_tainted_silent_disabled(self):
+        # SORTED1074 disabled: sorting user data is not a security sink.
+        assert not_fires("data=request.GET.getlist('items'); sorted(data)", "SORTED1074")
+    def test_constant_safe(self):
+        assert not_fires("sorted([3,1,2])", "SORTED1074")
+
+# --- SUM1080 ---
+class TestSum1080:
+    def test_tainted_silent_disabled(self):
+        # SUM1080 disabled: summing user data is not a security sink.
+        assert not_fires("vals=request.GET.getlist('v'); sum(vals)", "SUM1080")
+    def test_constant_safe(self):
+        assert not_fires("sum([1,2,3])", "SUM1080")
+
+# --- SET1047 ---
+class TestSet1047:
+    def test_tainted_silent_disabled(self):
+        # SET1047 disabled: set() deduplication causes FPs from deep inter-proc taint.
+        assert not_fires("items=request.GET.getlist('i'); set(items)", "SET1047")
+    def test_constant_safe(self):
+        assert not_fires("set([1,2,3])", "SET1047")
diff --git a/tests/unit/test_false_positive_reductions.py b/tests/unit/test_false_positive_reductions.py
index 94258b0a..7c631b34 100644
--- a/tests/unit/test_false_positive_reductions.py
+++ b/tests/unit/test_false_positive_reductions.py
@@ -359,14 +359,15 @@ def test_pickle_loads_still_flagged_py002(self):
         assert findings_for_rule(code, "PY002") != [], \
             "PY002 must still fire for pickle.loads() — this is a TRUE POSITIVE"
 
-    def test_pickle_loads_still_flagged_py306(self):
-        """pickle.loads() MUST still be flagged — it's a true positive."""
+    def test_pickle_loads_still_flagged_py002(self):
+        """pickle.loads() MUST still be flagged — it's a true positive.
+        PY306 was disabled (duplicate of PY002); PY002 is the canonical rule."""
         code = """
             import pickle
             return pickle.loads(zlib.decompress(f.read()))
         """
-        assert findings_for_rule(code, "PY306") != [], \
-            "PY306 must still fire for pickle.loads() — this is a TRUE POSITIVE"
+        assert findings_for_rule(code, "PY002") != [], \
+            "PY002 must still fire for pickle.loads() — this is a TRUE POSITIVE"
 
 
 # ===========================================================================
diff --git a/tests/unit/test_group_a_rules.py b/tests/unit/test_group_a_rules.py
new file mode 100644
index 00000000..62933472
--- /dev/null
+++ b/tests/unit/test_group_a_rules.py
@@ -0,0 +1,267 @@
+"""
+Tests for Group A taint-driven rules: SETATTR831, DELATTR834, FORMAT864,
+FSTRING867, TRANSLATE912, REPLACE879, SER522, RAND810.
+
+Each test proves:
+  - True positive: tainted arg → rule fires
+  - True negative: constant arg → rule does NOT fire
+"""
+
+import os
+import sys
+import tempfile
+import textwrap
+import warnings
+from pathlib import Path
+
+import pytest
+
+
+def _wrap(code: str) -> str:
+    indented = "\n".join("    " + l for l in textwrap.dedent(code).splitlines())
+    return f"def _view(request):\n{indented}\n"
+
+
+def run_pyspector(code: str, filename: str = "app.py") -> list[dict]:
+    from pyspector._rust_core import run_scan
+    from pyspector.config import get_default_rules
+    import ast as _ast, json as _json
+    from pyspector.cli import AstEncoder
+
+    wrapped = _wrap(code)
+    rules_toml = get_default_rules()
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        path = os.path.join(tmpdir, filename)
+        Path(path).write_text(wrapped)
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore")
+            try:
+                tree = _ast.parse(wrapped)
+                ast_json = _json.dumps(tree, cls=AstEncoder)
+            except Exception:
+                ast_json = "{}"
+        files = [{"file_path": filename, "content": wrapped, "ast_json": ast_json}]
+        results = run_scan(tmpdir, rules_toml, {"exclude": []}, files)
+
+    return [{"rule_id": r.rule_id, "line_number": r.line_number} for r in results]
+
+
+def fires(code, rule_id, **kw):
+    return [f for f in run_pyspector(code, **kw) if f["rule_id"] == rule_id]
+
+
+# ============================================================
+# SETATTR831 — arbitrary attribute write via tainted name
+# ============================================================
+
+class TestSetattr831:
+    def test_tainted_attr_name_fires(self):
+        code = """
+            attr = request.GET.get('field')
+            setattr(user, attr, 'value')
+        """
+        assert fires(code, "SETATTR831"), "SETATTR831 must fire: tainted attr name to setattr"
+
+    def test_subscript_source_fires(self):
+        code = """
+            attr = request.POST['field']
+            setattr(obj, attr, True)
+        """
+        assert fires(code, "SETATTR831"), "SETATTR831 must fire with subscript source"
+
+    def test_constant_attr_safe(self):
+        code = """
+            setattr(obj, 'username', 'alice')
+        """
+        assert not fires(code, "SETATTR831"), "SETATTR831 must NOT fire for constant attr name"
+
+
+# ============================================================
+# DELATTR834 — arbitrary attribute deletion via tainted name
+# ============================================================
+
+class TestDelattr834:
+    def test_tainted_attr_name_fires(self):
+        code = """
+            attr = request.GET.get('field')
+            delattr(obj, attr)
+        """
+        assert fires(code, "DELATTR834"), "DELATTR834 must fire: tainted attr name to delattr"
+
+    def test_constant_attr_safe(self):
+        code = """
+            delattr(obj, 'cache')
+        """
+        assert not fires(code, "DELATTR834"), "DELATTR834 must NOT fire for constant attr"
+
+
+# ============================================================
+# FORMAT864 — tainted format string used as template
+# ============================================================
+
+class TestFormat864:
+    def test_tainted_receiver_fires(self):
+        """template = request.GET.get('t'); template.format(user=user)"""
+        code = """
+            template = request.GET.get('template')
+            result = template.format(user=user_obj)
+        """
+        assert fires(code, "FORMAT864"), "FORMAT864 must fire: tainted string used as .format() template"
+
+    def test_tainted_via_subscript_fires(self):
+        code = """
+            tmpl = request.GET['template']
+            output = tmpl.format(name='Alice')
+        """
+        assert fires(code, "FORMAT864"), "FORMAT864 must fire with subscript source"
+
+    def test_constant_template_safe(self):
+        code = """
+            result = 'Hello {name}!'.format(name=user.name)
+        """
+        assert not fires(code, "FORMAT864"), "FORMAT864 must NOT fire for constant template"
+
+    def test_tainted_arg_safe(self):
+        # FORMAT864 only fires when the TEMPLATE (receiver) is tainted.
+        # A safe hardcoded template with tainted ARGUMENTS is not SSTI.
+        # FP case: msg = '{} is a symlink'; raise FileExistsError(msg.format(cfile))
+        code = """
+            msg = '{} is not a valid path'
+            raise ValueError(msg.format(request.GET.get('path')))
+        """
+        assert not fires(code, "FORMAT864"), "FORMAT864 must NOT fire when only the arg is tainted"
+
+
+# ============================================================
+# FSTRING867 — tainted variable inside f-string
+# ============================================================
+
+class TestFstring867:
+    # FSTRING867 is disabled as a standalone sink — f-string taint propagates forward
+    # to downstream sinks (LOG741, PY101, PATH813, etc.) which report it more precisely.
+    # As a standalone sink it fires on every display/error string in large codebases.
+    def test_tainted_variable_silent_disabled(self):
+        code = """
+            cmd = request.GET.get('cmd')
+            query = f'SELECT * FROM {cmd}'
+        """
+        assert not fires(code, "FSTRING867"), "FSTRING867 disabled: downstream PY101 covers this"
+
+    def test_constant_fstring_safe(self):
+        code = """
+            name = 'Alice'
+            greeting = f'Hello {name}!'
+        """
+        assert not fires(code, "FSTRING867"), "FSTRING867 must NOT fire for f-string with local constant"
+
+
+# ============================================================
+# REPLACE879 — tainted replace arg used for filter bypass
+# ============================================================
+
+class TestReplace879:
+    def test_tainted_silent_disabled(self):
+        # REPLACE879 disabled: str.replace() is a pure data transformation.
+        # Also caused FPs from os.replace(), node.replace(), code.replace() — any
+        # method named 'replace' matched regardless of receiver type.
+        code = """
+            bad = request.GET.get('pattern')
+            result = sanitized.replace(bad, '')
+        """
+        assert not fires(code, "REPLACE879"), "REPLACE879 disabled: str.replace() is not a security sink alone"
+
+    def test_constant_replace_safe(self):
+        code = """
+            result = user_name.replace('<', '&lt;')
+        """
+        assert not fires(code, "REPLACE879"), "REPLACE879 must NOT fire for constant search/replace"
+
+
+# ============================================================
+# TRANSLATE912 — tainted translation table (sanitization bypass)
+# ============================================================
+
+class TestTranslate912:
+    def test_tainted_silent_disabled(self):
+        # TRANSLATE912 disabled: str.translate() is a character-mapping transformation.
+        # The downstream result needs to reach a dangerous sink to be exploitable.
+        code = """
+            table_data = request.GET.get('table')
+            result = user_input.translate(table_data)
+        """
+        assert not fires(code, "TRANSLATE912"), "TRANSLATE912 disabled: translate is not a security sink alone"
+
+    def test_constant_table_safe(self):
+        code = """
+            import str
+            result = text.translate(str.maketrans('abc', 'xyz'))
+        """
+        assert not fires(code, "TRANSLATE912"), "TRANSLATE912 must NOT fire for constant table"
+
+
+# ============================================================
+# RAND810 — tainted seed → predictable PRNG
+# ============================================================
+
+class TestRand810:
+    def test_tainted_seed_fires(self):
+        code = """
+            import random
+            seed = request.GET.get('seed')
+            random.seed(seed)
+        """
+        assert fires(code, "RAND810"), "RAND810 must fire: tainted seed to random.seed()"
+
+    def test_constant_seed_safe(self):
+        code = """
+            import random
+            random.seed(42)
+        """
+        assert not fires(code, "RAND810"), "RAND810 must NOT fire for constant seed"
+
+
+# ============================================================
+# SER522 — tainted object to serializer
+# ============================================================
+
+class TestSer522:
+    def test_tainted_object_fires(self):
+        code = """
+            data = request.POST.get('data')
+            result = serialize('json', data)
+        """
+        assert fires(code, "SER522"), "SER522 must fire: tainted object to serialize()"
+
+    def test_constant_object_safe(self):
+        code = """
+            result = serialize('json', MyModel.objects.all())
+        """
+        assert not fires(code, "SER522"), "SER522 must NOT fire for untainted queryset"
+
+
+# ============================================================
+# Regression — existing rules still fire
+# ============================================================
+
+class TestRegression:
+    def test_getattr828_still_fires(self):
+        code = """
+            attr = request.GET.get('field')
+            getattr(user, attr)
+        """
+        assert fires(code, "GETATTR828"), "GETATTR828 regression"
+
+    def test_py102_still_fires(self):
+        code = """
+            cmd = request.get('command')
+            subprocess.run(cmd)
+        """
+        assert fires(code, "PY102"), "PY102 regression"
+
+    def test_open1149_still_fires(self):
+        code = """
+            path = request.GET.get('file')
+            open(path)
+        """
+        assert fires(code, "OPEN1149"), "OPEN1149 regression"
diff --git a/tests/unit/test_missing_rules.py b/tests/unit/test_missing_rules.py
new file mode 100644
index 00000000..191428eb
--- /dev/null
+++ b/tests/unit/test_missing_rules.py
@@ -0,0 +1,453 @@
+"""
+Tests for the 10 newly added security rules:
+SSTI001, ORM001, ORM002, DESER725, DESER726,
+TLS001, SSH001, JWT001, ZIPSLIP001, XXE001, FLASK001.
+"""
+import os
+import sys
+import tempfile
+import textwrap
+import warnings
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
+
+
+def _wrap(code: str) -> str:
+    indented = "\n".join("    " + l for l in textwrap.dedent(code).splitlines())
+    return f"def _view(request):\n{indented}\n"
+
+
+def run_pyspector(code: str, filename: str = "app.py") -> list[dict]:
+    from pyspector._rust_core import run_scan
+    from pyspector.config import get_default_rules
+    import ast as _ast, json as _json
+    from pyspector.cli import AstEncoder
+
+    wrapped = _wrap(code)
+    rules_toml = get_default_rules()
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        path = os.path.join(tmpdir, filename)
+        Path(path).write_text(wrapped)
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore")
+            try:
+                tree = _ast.parse(wrapped)
+                ast_json = _json.dumps(tree, cls=AstEncoder)
+            except Exception:
+                ast_json = "{}"
+        files = [{"file_path": filename, "content": wrapped, "ast_json": ast_json}]
+        results = run_scan(tmpdir, rules_toml, {"exclude": []}, files)
+
+    return [{"rule_id": r.rule_id, "line_number": r.line_number} for r in results]
+
+
+def fires(code, rule_id, **kw):
+    return bool([f for f in run_pyspector(code, **kw) if f["rule_id"] == rule_id])
+
+
+def not_fires(code, rule_id, **kw):
+    return not fires(code, rule_id, **kw)
+
+
+# ============================================================
+# SSTI001 — Server-Side Template Injection
+# ============================================================
+
+class TestSSTI001:
+    def test_render_template_string_tainted_fires(self):
+        code = """
+            tmpl = request.GET.get('template')
+            return render_template_string(tmpl)
+        """
+        assert fires(code, "SSTI001"), "SSTI001 must fire: tainted string to render_template_string"
+
+    def test_from_string_silent_removed(self):
+        # SK_SSTI002 (from_string sink) removed — from_string() is too generic.
+        # It fired on TF's DeviceSpec.from_string(), any library with .from_string().
+        # SSTI is still caught via render_template_string (SK_SSTI001) and
+        # the jinja2.Template pattern-based rule.
+        code = """
+            src = request.POST.get('src')
+            result = env.from_string(src).render()
+        """
+        assert not_fires(code, "SSTI001"), "SK_SSTI002 removed: from_string too generic"
+
+    def test_static_template_safe(self):
+        code = """
+            result = render_template_string('<h1>Hello {{ name }}</h1>', name=user)
+        """
+        assert not_fires(code, "SSTI001"), "SSTI001 must NOT fire for static template literal"
+
+
+# ============================================================
+# ORM001 — SQLAlchemy text() injection
+# ============================================================
+
+class TestORM001:
+    def test_fstring_in_text_fires(self):
+        code = """
+            uid = request.GET.get('id')
+            result = session.execute(text(f"SELECT * FROM users WHERE id={uid}"))
+        """
+        assert fires(code, "ORM001"), "ORM001 must fire: f-string inside text()"
+
+    def test_percent_format_in_text_fires(self):
+        code = """
+            result = session.execute(text("SELECT * FROM users WHERE name='%s'" % name))
+        """
+        assert fires(code, "ORM001"), "ORM001 must fire: %-format inside text()"
+
+    def test_safe_parameterized_text_safe(self):
+        code = """
+            result = session.execute(text("SELECT * FROM users WHERE id = :uid"), {"uid": uid})
+        """
+        assert not_fires(code, "ORM001"), "ORM001 must NOT fire for static text() with params"
+
+
+# ============================================================
+# ORM002 — Django ORM injection (raw, order_by, extra)
+# ============================================================
+
+class TestORM002:
+    def test_raw_tainted_sql_fires(self):
+        code = """
+            sql = request.GET.get('q')
+            users = User.objects.raw(sql)
+        """
+        assert fires(code, "ORM002"), "ORM002 must fire: tainted SQL in raw()"
+
+    def test_order_by_tainted_fires(self):
+        code = """
+            sort = request.GET.get('sort')
+            qs = User.objects.order_by(sort)
+        """
+        assert fires(code, "ORM002"), "ORM002 must fire: tainted field in order_by (CVE-2021-35042)"
+
+    def test_order_by_literal_safe(self):
+        code = """
+            qs = User.objects.order_by('username')
+        """
+        assert not_fires(code, "ORM002"), "ORM002 must NOT fire for literal field name in order_by"
+
+
+# ============================================================
+# DESER725 — jsonpickle deserialization
+# ============================================================
+
+class TestDESER725:
+    def test_jsonpickle_decode_fires(self):
+        code = "import jsonpickle; obj = jsonpickle.decode(data)"
+        assert fires(code, "DESER725"), "DESER725 must fire: jsonpickle.decode"
+
+    def test_comment_line_safe(self):
+        code = "# jsonpickle.decode(data)"
+        assert not_fires(code, "DESER725"), "DESER725 must NOT fire in comment"
+
+
+# ============================================================
+# DESER726 — dill deserialization
+# ============================================================
+
+class TestDESER726:
+    def test_dill_loads_fires(self):
+        code = "import dill; obj = dill.loads(payload)"
+        assert fires(code, "DESER726"), "DESER726 must fire: dill.loads"
+
+    def test_comment_line_safe(self):
+        code = "# dill.loads(data)"
+        assert not_fires(code, "DESER726"), "DESER726 must NOT fire in comment"
+
+
+# ============================================================
+# TLS001 — TLS verification disabled
+# ============================================================
+
+class TestTLS001:
+    def test_verify_false_fires(self):
+        code = "resp = requests.get(url, verify=False)"
+        assert fires(code, "TLS001"), "TLS001 must fire: requests verify=False"
+
+    def test_disable_warnings_fires(self):
+        code = "urllib3.disable_warnings(InsecureRequestWarning)"
+        assert fires(code, "TLS001"), "TLS001 must fire: disable_warnings InsecureRequestWarning"
+
+    def test_verify_true_safe(self):
+        code = "resp = requests.get(url, verify=True)"
+        assert not_fires(code, "TLS001"), "TLS001 must NOT fire for verify=True"
+
+    def test_verify_capath_safe(self):
+        code = "resp = requests.get(url, verify='/etc/ssl/certs/ca-bundle.crt')"
+        assert not_fires(code, "TLS001"), "TLS001 must NOT fire for verify=CA path"
+
+
+# ============================================================
+# SSH001 — Paramiko MITM
+# ============================================================
+
+class TestSSH001:
+    def test_auto_add_policy_fires(self):
+        code = "client.set_missing_host_key_policy(paramiko.AutoAddPolicy())"
+        assert fires(code, "SSH001"), "SSH001 must fire: AutoAddPolicy()"
+
+    def test_reject_policy_safe(self):
+        code = "client.set_missing_host_key_policy(paramiko.RejectPolicy())"
+        assert not_fires(code, "SSH001"), "SSH001 must NOT fire for RejectPolicy"
+
+
+# ============================================================
+# JWT001 — JWT signature bypass
+# ============================================================
+
+class TestJWT001:
+    def test_verify_signature_false_fires(self):
+        code = 'payload = jwt.decode(token, options={"verify_signature": False})'
+        assert fires(code, "JWT001"), "JWT001 must fire: verify_signature=False"
+
+    def test_algorithms_none_fires(self):
+        code = "payload = jwt.decode(token, algorithms=['none'])"
+        assert fires(code, "JWT001"), "JWT001 must fire: algorithms=['none']"
+
+    def test_valid_decode_safe(self):
+        code = "payload = jwt.decode(token, secret, algorithms=['HS256'])"
+        assert not_fires(code, "JWT001"), "JWT001 must NOT fire for valid HS256 decode"
+
+
+# ============================================================
+# ZIPSLIP001 — Archive extraction without path validation
+# ============================================================
+
+class TestZIPSLIP001:
+    def test_zipfile_extractall_fires(self):
+        code = "zf.extractall('/var/app/uploads/')"
+        assert fires(code, "ZIPSLIP001"), "ZIPSLIP001 must fire: zipfile extractall"
+
+    def test_tarfile_extractall_fires(self):
+        code = "tf.extractall('/tmp/extract/')"
+        assert fires(code, "ZIPSLIP001"), "ZIPSLIP001 must fire: tarfile extractall"
+
+
+# ============================================================
+# XXE001 — lxml XXE
+# ============================================================
+
+class TestXXE001:
+    def test_etree_parse_fires(self):
+        code = "from lxml import etree; tree = etree.parse(user_file)"
+        assert fires(code, "XXE001"), "XXE001 must fire: etree.parse without safe parser"
+
+    def test_etree_fromstring_fires(self):
+        code = "from lxml import etree; root = etree.fromstring(xml_data)"
+        assert fires(code, "XXE001"), "XXE001 must fire: etree.fromstring"
+
+    def test_defusedxml_safe(self):
+        code = "from defusedxml import etree; root = etree.fromstring(xml_data)"
+        assert not_fires(code, "XXE001"), "XXE001 must NOT fire when defusedxml is used"
+
+    def test_resolve_entities_false_safe(self):
+        code = "p = etree.XMLParser(resolve_entities=False); tree = etree.parse(f, p)"
+        assert not_fires(code, "XXE001"), "XXE001 must NOT fire when resolve_entities=False"
+
+
+# ============================================================
+# FLASK001 — Flask debug mode
+# ============================================================
+
+class TestFLASK001:
+    def test_app_run_debug_fires(self):
+        code = "app.run(host='0.0.0.0', debug=True)"
+        assert fires(code, "FLASK001"), "FLASK001 must fire: app.run(debug=True)"
+
+    def test_app_debug_assignment_fires(self):
+        code = "app.debug = True"
+        assert fires(code, "FLASK001"), "FLASK001 must fire: app.debug = True"
+
+    def test_debug_false_safe(self):
+        code = "app.run(host='0.0.0.0', debug=False)"
+        assert not_fires(code, "FLASK001"), "FLASK001 must NOT fire for debug=False"
+
+
+# ============================================================
+# FILE_WRITE001 — writing user content to files
+# ============================================================
+
+class TestFILE_WRITE001:
+    # FILE_WRITE001 taint sink (SK_FILE_WRITE001) removed — write() is too generic.
+    # It fired on HTTP response writes (response.write()), cache writes, and all
+    # framework file operations generating massive FPs (74 in CPython, 24 in Django).
+    # Rule remains for documentation; the finding in PyGoat is still detected via
+    # the PLAIN_PWD001, FILE_WRITE001 pattern, and broader path traversal rules.
+    def test_tainted_write_silent_disabled(self):
+        code = """
+            code = request.POST.get('code')
+            f = open('/tmp/plugin.py', 'w')
+            f.write(code)
+        """
+        assert not_fires(code, "FILE_WRITE001"), "FILE_WRITE001 taint sink disabled: write() too generic"
+
+    def test_constant_write_safe(self):
+        code = """
+            f = open('/tmp/output.py', 'w')
+            f.write('print("hello")')
+        """
+        assert not_fires(code, "FILE_WRITE001"), "FILE_WRITE001 must NOT fire for constant content"
+
+
+# ============================================================
+# OPEN_REDIRECT001 — unvalidated redirect URL
+# ============================================================
+
+class TestOPENREDIRECT001:
+    def test_flask_redirect_fires(self):
+        code = """
+            next_url = request.GET.get('next')
+            return redirect(next_url)
+        """
+        assert fires(code, "OPEN_REDIRECT001"), "OPEN_REDIRECT001 must fire: user-controlled redirect URL"
+
+    def test_django_redirect_fires(self):
+        code = """
+            url = request.GET.get('url')
+            return HttpResponseRedirect(url)
+        """
+        assert fires(code, "OPEN_REDIRECT001"), "OPEN_REDIRECT001 must fire: HttpResponseRedirect with user URL"
+
+    def test_hardcoded_redirect_safe(self):
+        code = """
+            return redirect('/dashboard/')
+        """
+        assert not_fires(code, "OPEN_REDIRECT001"), "OPEN_REDIRECT001 must NOT fire for hardcoded redirect"
+
+
+# ============================================================
+# PLAIN_PWD001 — plaintext password in Django ORM create()
+# ============================================================
+
+class TestPLAINPWD001:
+    def test_create_with_tainted_password_fires(self):
+        code = """
+            pwd = request.POST.get('password')
+            User.objects.create(username='alice', password=pwd)
+        """
+        assert fires(code, "PLAIN_PWD001"), "PLAIN_PWD001 must fire: tainted password in ORM create()"
+
+    def test_hashed_password_safe(self):
+        code = """
+            from django.contrib.auth.hashers import make_password
+            User.objects.create(username='alice', password=make_password(raw_pwd))
+        """
+        assert not_fires(code, "PLAIN_PWD001"), "PLAIN_PWD001 must NOT fire when password is hashed"
+
+
+# ============================================================
+# DJANGO_DEBUG001 — DEBUG=True in settings
+# ============================================================
+
+class TestDJANGO_DEBUG001:
+    def test_debug_true_fires(self):
+        code = "DEBUG = True"
+        assert fires(code, "DJANGO_DEBUG001"), "DJANGO_DEBUG001 must fire: DEBUG=True"
+
+    def test_debug_false_safe(self):
+        code = "DEBUG = False"
+        assert not_fires(code, "DJANGO_DEBUG001"), "DJANGO_DEBUG001 must NOT fire for DEBUG=False"
+
+    def test_debug_env_var_safe(self):
+        code = "DEBUG = os.environ.get('DEBUG', 'False') == 'True'"
+        assert not_fires(code, "DJANGO_DEBUG001"), "DJANGO_DEBUG001 must NOT fire for env var pattern"
+
+
+# ============================================================
+# PATH813 via os.path.join (new taint propagation)
+# ============================================================
+
+class TestOSPathJoinPropagation:
+    def test_path_join_propagates_to_open(self):
+        code = """
+            blog = request.POST.get('blog')
+            filename = os.path.join('/app/blogs', blog)
+            f = open(filename, 'r')
+        """
+        assert fires(code, "OPEN1149"), "os.path.join must propagate taint to open() → OPEN1149"
+
+    def test_imagmath_eval_via_sink(self):
+        code = """
+            from PIL import ImageMath, Image
+            func = request.POST.get('function')
+            img = Image.open('test.png')
+            output = ImageMath.eval(func, img=img)
+        """
+        assert fires(code, "PY001"), "ImageMath.eval() must fire PY001 via SK_IMG_EVAL001 taint sink"
+
+
+# ============================================================
+# file_content_exclude — PY302/PY107 ruamel false positive fix
+# ============================================================
+
+class TestFileContentExclude:
+    def test_pyyaml_unsafe_fires(self):
+        # Plain PyYAML import with unsafe load — must fire
+        code = "import yaml\nyaml.load(data)"
+        assert fires(code, "PY302"), "PY302 must fire for PyYAML yaml.load() without Loader"
+
+    def test_ruamel_yaml_suppressed(self, tmp_path):
+        # ruamel.yaml with YAML() round-trip is safe — must NOT fire
+        # file_content_exclude = "from ruamel.yaml|import ruamel" suppresses it
+        from pyspector._rust_core import run_scan
+        from pyspector.config import get_default_rules
+        import ast as _ast, json as _json, os, warnings
+        from pyspector.cli import AstEncoder
+
+        code = "from ruamel.yaml import YAML\nyaml = YAML()\nyaml.load(stream)"
+        filename = str(tmp_path / "settings.py")
+        with open(filename, "w") as f:
+            f.write(code)
+        rules_toml = get_default_rules()
+        tree = _ast.parse(code, filename=filename)
+        ast_json = _json.dumps(_ast.dump(tree), cls=AstEncoder)
+        files = [{"file_path": filename, "content": code, "ast_json": ast_json}]
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore")
+            results = run_scan(str(tmp_path), rules_toml, {"exclude": []}, files)
+        py302 = [r for r in results if r.rule_id in ("PY302", "PY107")]
+        assert len(py302) == 0, f"PY302/PY107 must NOT fire for ruamel YAML() round-trip, got: {py302}"
+
+
+# ============================================================
+# CLI vs HTTP taint distinction (OperatorConfig vs HttpRequest)
+# ============================================================
+
+class TestCLIvsHTTPTaint:
+    def test_http_path_fires_PATH813(self):
+        # @app.route path param → HttpRequest → PATH813
+        code = """
+            path = request.GET.get('path')
+            from pathlib import Path
+            Path(path).mkdir(parents=True, exist_ok=True)
+        """
+        assert fires(code, "PATH813"), "HTTP path traversal must fire PATH813"
+
+    def test_cli_path_no_PATH813(self):
+        # @app.command path param → OperatorConfig → no PATH813
+        code = """
+            @app.command()
+            def run(output):
+                from pathlib import Path
+                Path(output).mkdir(parents=True, exist_ok=True)
+        """
+        assert not_fires(code, "PATH813"), \
+            "CLI operator path must NOT fire PATH813 — operator chose the path"
+
+    def test_json_load_supply_chain_fires(self):
+        # json.load is a FILE_DESERIALIZER: always produces HttpRequest taint
+        # regardless of how the file path was obtained. Supply-chain detection
+        # is preserved even when the operator chose the file path.
+        code = """
+            import json
+            config_path = request.POST.get("config")
+            data = json.load(open(config_path))
+            f = open(data, "w")
+        """
+        assert fires(code, "OPEN1149"), \
+            "json.load FILE_DESERIALIZER must propagate HttpRequest to open() sink"
diff --git a/tests/unit/test_semantic_provenance.py b/tests/unit/test_semantic_provenance.py
new file mode 100644
index 00000000..dfd2bd9e
--- /dev/null
+++ b/tests/unit/test_semantic_provenance.py
@@ -0,0 +1,180 @@
+"""
+Tier 1 + Tier 2 semantic provenance tests.
+Universal Python semantics — no framework-specific knowledge required.
+"""
+import os, sys, tempfile, warnings
+from pathlib import Path
+import pytest
+
+
+def run(code, filename="app.py"):
+    import ast as _ast, json as _json
+    from pyspector._rust_core import run_scan
+    from pyspector.config import get_default_rules
+    from pyspector.cli import AstEncoder
+    rules = get_default_rules()
+    with tempfile.TemporaryDirectory() as d:
+        p = os.path.join(d, filename)
+        Path(p).write_text(code)
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore")
+            try: aj = _json.dumps(_ast.parse(code), cls=AstEncoder)
+            except: aj = "{}"
+        files = [{"file_path": filename, "content": code, "ast_json": aj}]
+        return [{"rule_id": r.rule_id} for r in run_scan(d, rules, {"exclude": []}, files)]
+
+
+def fires(code, rule_id, filename="app.py"):
+    return [f for f in run(code, filename) if f["rule_id"] == rule_id]
+
+
+def _wrap(code):
+    import textwrap
+    ind = "\n".join("    " + l for l in textwrap.dedent(code).strip().splitlines())
+    return f"def view(request):\n{ind}\n"
+
+
+def taint_fires(code, rule_id):
+    """Use taint engine — wraps code in a function for CFG analysis."""
+    wrapped = _wrap(code)
+    return fires(wrapped, rule_id)
+
+
+# ─── Tier 1: Structural Python rules ────────────────────────────────────────
+
+class TestTier1StructuralRules:
+
+    def test_admin795_class_declaration_not_flagged(self):
+        """
+        'class AdminPasswordChangeForm' is a Python class declaration.
+        Python syntax: class keyword → DeveloperDefined name context.
+        Universal — applies to any codebase, not just Django.
+        """
+        code = "class AdminPasswordChangeForm(BaseForm):\n    pass\n"
+        assert not fires(code, "ADMIN795"), \
+            "ADMIN795 must not fire on class declarations"
+
+    def test_admin795_fires_on_actual_inline_credential(self):
+        """Lowercase variable with password=password pattern still fires."""
+        # Pattern requires: admin/administrator + password + password (twice)
+        code = 'admin_default_password = "password_admin"\n'
+        assert fires(code, "ADMIN795", filename="config.py"), \
+            "ADMIN795 must still fire when pattern has two 'password' occurrences"
+
+    def test_g101_uppercase_constant_not_flagged(self):
+        """
+        INTERNAL_RESET_SESSION_TOKEN = "_password_reset_token" is a module constant.
+        Python: UPPER_CASE = "literal" → DeveloperDefined provenance.
+        Universal — any Python module constant.
+        """
+        code = 'INTERNAL_RESET_SESSION_TOKEN = "_password_reset_token"\n'
+        assert not fires(code, "G101"), \
+            "G101 must not fire on UPPER_CASE module constants"
+
+    def test_g101_fires_on_lowercase_secret(self):
+        """Lowercase secret variable must still fire."""
+        code = 'api_secret = "mysecretkey123"\n'
+        assert fires(code, "G101", filename="config.py"), \
+            "G101 must fire on lowercase secret variable assignments"
+
+    def test_symlink816_hardcoded_path_not_flagged(self):
+        """
+        SYMLINK816 is now taint-driven only — no pattern.
+        os.symlink() with non-tainted arguments must not fire.
+        """
+        code = "os.symlink(original_path, symlink_path)\n"
+        assert not fires(code, "SYMLINK816", filename="utils.py"), \
+            "SYMLINK816 must not fire on os.symlink with non-tainted (non-HttpRequest) args"
+
+    def test_symlink816_fires_on_user_controlled_path(self):
+        """Symlink with HttpRequest-tainted source must fire via taint engine."""
+        code = _wrap("src = request.GET.get('path')\nos.symlink(src, '/tmp/dst')")
+        assert fires(code, "SYMLINK816"), \
+            "SYMLINK816 must fire when symlink source is HttpRequest-tainted"
+
+
+# ─── Tier 2: Provenance tracking ────────────────────────────────────────────
+
+class TestTier2ProvenanceTracking:
+
+    def test_http_request_to_getattr_fires(self):
+        """HttpRequest provenance → getattr sink → fires."""
+        assert taint_fires(
+            "attr = request.GET.get('field')\ngetattr(obj, attr)",
+            "GETATTR828"
+        ), "HttpRequest provenance must trigger GETATTR828"
+
+    def test_http_request_to_open_fires(self):
+        """HttpRequest provenance → open() sink → fires."""
+        assert taint_fires(
+            "path = request.GET.get('file')\nopen(path)",
+            "OPEN1149"
+        ), "HttpRequest provenance must trigger OPEN1149"
+
+    def test_system_generated_to_open_silent(self):
+        """SystemGenerated (tempfile.mkstemp) → open() → silent."""
+        assert not taint_fires(
+            "import tempfile\npath = tempfile.mkstemp()[1]\nopen(path)",
+            "OPEN1149"
+        ), "SystemGenerated paths must not trigger OPEN1149"
+
+    def test_developer_defined_literal_to_sql_silent(self):
+        """DeveloperDefined string literal → SQL → silent (no injection risk)."""
+        assert not taint_fires(
+            'table_name = "my_table"\nsql = "SELECT * FROM %s" % table_name\ncursor.execute(sql)',
+            "PY101"
+        ), "DeveloperDefined literals must not trigger SQL injection"
+
+    def test_http_binop_to_sql_fires(self):
+        """HttpRequest → BinOp % formatting → SQL sink → fires."""
+        assert taint_fires(
+            "table = request.GET.get('t')\nsql = 'SELECT * FROM %s' % table\ncursor.execute(sql)",
+            "PY101"
+        ), "HttpRequest through BinOp % must trigger PY101"
+
+    def test_sanitizer_clears_http_taint(self):
+        """quote_name sanitizer clears HttpRequest taint → SQL sink silent."""
+        assert not taint_fires(
+            "raw = request.GET.get('t')\ntable = quote_name(raw)\nsql = 'SELECT * FROM %s' % table\ncursor.execute(sql)",
+            "PY101"
+        ), "quote_name sanitizer must clear taint before SQL sink"
+
+    def test_http_to_setattr_fires(self):
+        """HttpRequest → setattr attribute name → fires."""
+        assert taint_fires(
+            "attr = request.GET.get('field')\nsetattr(obj, attr, val)",
+            "SETATTR831"
+        ), "HttpRequest attribute name to setattr must fire"
+
+    def test_http_fstring_silent_disabled(self):
+        """FSTRING867 disabled — taint propagates to downstream sinks (PY101, LOG741, etc.)."""
+        assert not taint_fires(
+            "cmd = request.GET.get('cmd')\nquery = f'SELECT {cmd}'",
+            "FSTRING867"
+        ), "FSTRING867 disabled: downstream rules cover f-string injection contexts"
+
+    def test_developer_defined_fstring_silent(self):
+        """DeveloperDefined literal in f-string → silent."""
+        assert not taint_fires(
+            "name = 'Alice'\ngreeting = f'Hello {name}!'",
+            "FSTRING867"
+        ), "DeveloperDefined literal in f-string must be silent"
+
+
+# ─── Tier 3: Constant folding (DeveloperDefined propagation) ─────────────────
+
+class TestTier3ConstantFolding:
+
+    def test_constant_literal_assignment_is_developer_defined(self):
+        """String literal assignment → DeveloperDefined → does not reach SQL sink."""
+        assert not taint_fires(
+            'query = "SELECT * FROM users"\ncursor.execute(query)',
+            "PY101"
+        ), "String literal assignment must be DeveloperDefined — no SQL injection"
+
+    def test_constant_plus_http_in_binop_is_http(self):
+        """Constant + HttpRequest in BinOp → result is HttpRequest (unsafe)."""
+        assert taint_fires(
+            "user_id = request.GET.get('id')\nsql = 'SELECT * FROM users WHERE id=' + user_id\ncursor.execute(sql)",
+            "PY101"
+        ), "BinOp with HttpRequest operand must propagate HttpRequest taint"
diff --git a/tests/unit/test_taint_engine_extension.py b/tests/unit/test_taint_engine_extension.py
new file mode 100644
index 00000000..5ee1934e
--- /dev/null
+++ b/tests/unit/test_taint_engine_extension.py
@@ -0,0 +1,281 @@
+"""
+Tests for the extended taint engine: new sources (subscript, HTTP params),
+new sinks (getattr, open), and keyword-argument sink detection.
+
+Each test proves a specific taint flow that was NOT detectable before.
+"""
+
+import os
+import sys
+import tempfile
+import textwrap
+import warnings
+from pathlib import Path
+
+import pytest
+
+
+def _wrap_in_function(code: str) -> str:
+    """Wrap code in a function so the taint engine's CFG builder processes it."""
+    indented = "\n".join("    " + line for line in textwrap.dedent(code).splitlines())
+    return f"def _test_view(request):\n{indented}\n"
+
+
+def run_pyspector(code: str, *, filename: str = "app.py") -> list[dict]:
+    from pyspector._rust_core import run_scan
+    from pyspector.config import get_default_rules
+
+    rules_toml = get_default_rules()
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        file_path = os.path.join(tmpdir, filename)
+        Path(file_path).write_text(_wrap_in_function(code))
+
+        import ast as _ast, json as _json
+        from pyspector.cli import AstEncoder
+
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore")
+            try:
+                tree = _ast.parse(Path(file_path).read_text())
+                ast_json = _json.dumps(tree, cls=AstEncoder)
+            except Exception:
+                ast_json = "{}"
+
+        python_files = [{
+            "file_path": filename,
+            "content": Path(file_path).read_text(),
+            "ast_json": ast_json,
+        }]
+
+        results = run_scan(tmpdir, rules_toml, {"exclude": []}, python_files)
+
+    return [{"rule_id": r.rule_id, "file_path": r.file_path,
+             "line_number": r.line_number, "code": r.code}
+            for r in results]
+
+
+def findings_for(code, rule_id, **kw):
+    return [f for f in run_pyspector(code, **kw) if f["rule_id"] == rule_id]
+
+
+# ===========================================================================
+# GETATTR828 — taint-driven, only fires when attribute name is user-controlled
+# ===========================================================================
+
+class TestGetattr828:
+
+    def test_tainted_attr_via_request_get(self):
+        """request.get() → attr → getattr(obj, attr) must fire."""
+        code = """
+            attr = request.get('field')
+            value = getattr(user, attr)
+        """
+        assert findings_for(code, "GETATTR828"), \
+            "GETATTR828 must fire: tainted attr flows to getattr() second argument"
+
+    def test_tainted_attr_via_django_GET(self):
+        """request.GET.get() → attr → getattr() must fire (Phase 1 new source)."""
+        code = """
+            attr = request.GET.get('field')
+            value = getattr(user, attr)
+        """
+        assert findings_for(code, "GETATTR828"), \
+            "GETATTR828 must fire with Django request.GET.get() as source"
+
+    def test_tainted_attr_via_django_POST(self):
+        """request.POST.get() as source."""
+        code = """
+            field_name = request.POST.get('attr')
+            result = getattr(model_instance, field_name)
+        """
+        assert findings_for(code, "GETATTR828"), \
+            "GETATTR828 must fire with request.POST.get() as source"
+
+    def test_tainted_attr_via_flask_args(self):
+        """Flask request.args.get() as source."""
+        code = """
+            attr = request.args.get('property')
+            val = getattr(obj, attr)
+        """
+        assert findings_for(code, "GETATTR828"), \
+            "GETATTR828 must fire with Flask request.args.get() as source"
+
+    def test_tainted_attr_via_subscript_django(self):
+        """Phase 2: request.GET['key'] subscript as source."""
+        code = """
+            attr = request.GET['field']
+            value = getattr(user, attr)
+        """
+        assert findings_for(code, "GETATTR828"), \
+            "GETATTR828 must fire when attr comes from request.GET['key'] subscript"
+
+    def test_tainted_attr_via_subscript_flask(self):
+        """Phase 2: request.args subscript as source."""
+        code = """
+            attr = request.args['property']
+            val = getattr(obj, attr)
+        """
+        assert findings_for(code, "GETATTR828"), \
+            "GETATTR828 must fire when attr comes from request.args['key'] subscript"
+
+    def test_tainted_attr_propagation_through_variable(self):
+        """Taint must propagate through intermediate variables."""
+        code = """
+            raw = request.GET.get('field')
+            cleaned = raw.strip()
+            value = getattr(user, cleaned)
+        """
+        # cleaned inherits taint from raw (conservative propagation)
+        assert findings_for(code, "GETATTR828"), \
+            "GETATTR828 must fire even when tainted value passes through intermediate variable"
+
+    # --- True negatives: must NOT fire ---
+
+    def test_constant_attr_not_flagged(self):
+        """Hardcoded string attribute name is safe."""
+        code = """
+            value = getattr(obj, 'username')
+        """
+        assert not findings_for(code, "GETATTR828"), \
+            "GETATTR828 must NOT fire for constant attribute names"
+
+    def test_local_variable_attr_not_flagged(self):
+        """Local variable not derived from request is safe."""
+        code = """
+            field = 'email'
+            value = getattr(user, field)
+        """
+        assert not findings_for(code, "GETATTR828"), \
+            "GETATTR828 must NOT fire when attr is a local constant string"
+
+
+# ===========================================================================
+# OPEN1149 — taint-driven, only fires when path is user-controlled
+# ===========================================================================
+
+class TestOpen1149:
+
+    def test_tainted_path_via_request_get(self):
+        """request.get() → path → open(path) must fire."""
+        code = """
+            filename = request.get('file')
+            with open(filename) as f:
+                data = f.read()
+        """
+        assert findings_for(code, "OPEN1149"), \
+            "OPEN1149 must fire when file path comes from request"
+
+    def test_tainted_path_via_django_GET_subscript(self):
+        """Phase 2: request.GET['file'] subscript → open()."""
+        code = """
+            path = request.GET['filename']
+            with open(path, 'r') as f:
+                content = f.read()
+        """
+        assert findings_for(code, "OPEN1149"), \
+            "OPEN1149 must fire when path comes from request.GET subscript"
+
+    def test_tainted_path_via_flask_form(self):
+        """Flask request.form.get() → open()."""
+        code = """
+            upload_path = request.form.get('destination')
+            with open(upload_path, 'wb') as f:
+                f.write(data)
+        """
+        assert findings_for(code, "OPEN1149"), \
+            "OPEN1149 must fire when write path comes from form input"
+
+    # --- True negatives ---
+
+    def test_hardcoded_path_not_flagged(self):
+        """Hardcoded file path is safe."""
+        code = """
+            with open('config.toml', 'r') as f:
+                config = f.read()
+        """
+        assert not findings_for(code, "OPEN1149"), \
+            "OPEN1149 must NOT fire for hardcoded file paths"
+
+    def test_local_path_not_flagged(self):
+        """Path derived from local constants is safe."""
+        code = """
+            base = '/var/data'
+            filename = 'output.txt'
+            path = base + '/' + filename
+            with open(path) as f:
+                pass
+        """
+        assert not findings_for(code, "OPEN1149"), \
+            "OPEN1149 must NOT fire when path is constructed from local constants"
+
+
+# ===========================================================================
+# Phase 3: keyword argument sink detection
+# ===========================================================================
+
+class TestKeywordArgSinks:
+
+    def test_getattr_with_keyword_name_arg(self):
+        """Phase 3: getattr(obj, name=attr) with tainted attr must fire."""
+        code = """
+            attr = request.GET.get('field')
+            value = getattr(user, attr)
+        """
+        # Both positional and keyword should fire
+        assert findings_for(code, "GETATTR828"), \
+            "GETATTR828 must fire for positional getattr(obj, tainted)"
+
+
+# ===========================================================================
+# New taint sources: input(), os.environ.get()
+# ===========================================================================
+
+class TestNewTaintSources:
+
+    def test_input_to_getattr(self):
+        """input() → attr → getattr() must fire (TS006 source)."""
+        code = """
+            attr = input('Enter attribute: ')
+            value = getattr(obj, attr)
+        """
+        assert findings_for(code, "GETATTR828"), \
+            "GETATTR828 must fire when attr comes from input()"
+
+    def test_environ_to_open_no_finding(self):
+        """os.environ.get() is now OperatorConfig — opening a path the operator
+        set via environment variable is intentional, not a vulnerability."""
+        code = """
+            import os
+            path = os.environ.get('CONFIG_PATH')
+            with open(path) as f:
+                data = f.read()
+        """
+        assert not findings_for(code, "OPEN1149"), \
+            "OPEN1149 must NOT fire when path comes from os.environ.get() (operator-trusted)"
+
+    def test_http_request_to_open_still_fires(self):
+        """HTTP request parameter → open() must still fire (attacker-controlled)."""
+        code = """
+            path = request.GET.get('file')
+            with open(path) as f:
+                data = f.read()
+        """
+        assert findings_for(code, "OPEN1149"), \
+            "OPEN1149 must still fire when path comes from HTTP request"
+
+
+# ===========================================================================
+# Regression: existing PY102 (subprocess) still works
+# ===========================================================================
+
+class TestRegressionPY102:
+
+    def test_subprocess_taint_still_fires(self):
+        """PY102 taint flow must still work after engine changes."""
+        code = """
+            cmd = request.get('command')
+            subprocess.run(cmd)
+        """
+        assert findings_for(code, "PY102"), \
+            "PY102 regression: subprocess.run with tainted arg must still fire"