Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions internal/httplink/httplink.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,14 +375,22 @@ func (l *Linker) discoverRoutes(rootPath string) []RouteHandler {
// C# ASP.NET: check attribute decorators
routes = append(routes, extractASPNetRoutes(f)...)

// Source-based route discovery (Go gin, Express.js, PHP Laravel, Kotlin Ktor)
// Source-based route discovery — each extractor only runs on its own language's files
// to prevent false positives (e.g., Python dict .get() matching Ktor/Go route regex).
if f.FilePath != "" && f.StartLine > 0 && f.EndLine > 0 {
ext := strings.ToLower(filepath.Ext(f.FilePath))
source := readSourceLines(rootPath, f.FilePath, f.StartLine, f.EndLine)
if source != "" {
routes = append(routes, extractGoRoutes(f, source)...)
routes = append(routes, extractExpressRoutes(f, source)...)
routes = append(routes, extractLaravelRoutes(f, source)...)
routes = append(routes, extractKtorRoutes(f, source)...)
switch ext {
case ".go":
routes = append(routes, extractGoRoutes(f, source)...)
case ".js", ".ts", ".mjs", ".mts", ".jsx", ".tsx":
routes = append(routes, extractExpressRoutes(f, source)...)
case ".php":
routes = append(routes, extractLaravelRoutes(f, source)...)
case ".kt", ".kts":
routes = append(routes, extractKtorRoutes(f, source)...)
}
}
}

Expand Down
19 changes: 14 additions & 5 deletions internal/pipeline/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -1328,9 +1328,9 @@ func collectEdgeQNs(results [][]resolvedEdge) (qnSet map[string]struct{}, totalE
return qnSet, totalEdges
}

// createLSPStubNodes creates stub nodes for LSP-resolved targets that don't exist in the graph.
// This happens for stdlib/external methods (e.g., context.Context.Done) that
// the LSP resolver correctly identifies but aren't indexed as nodes.
// createLSPStubNodes creates stub nodes for targets that don't exist in the graph.
// This handles LSP-resolved targets (stdlib/external methods) and DLL-resolved
// targets (dynamic DLL function references from GetProcAddress/dlsym/Resolve).
func (p *Pipeline) createLSPStubNodes(results [][]resolvedEdge, qnToID map[string]int64) {
var stubs []*store.Node
stubQNs := make(map[string]bool)
Expand All @@ -1343,7 +1343,7 @@ func (p *Pipeline) createLSPStubNodes(results [][]resolvedEdge, qnToID map[strin
continue
}
strategy, _ := re.Properties["resolution_strategy"].(string)
if !strings.HasPrefix(strategy, "lsp_") {
if !strings.HasPrefix(strategy, "lsp_") && strategy != "dll_resolve" {
continue
}
stubQNs[re.TargetQN] = true
Expand All @@ -1355,12 +1355,21 @@ func (p *Pipeline) createLSPStubNodes(results [][]resolvedEdge, qnToID map[strin
if strings.Count(re.TargetQN, ".") >= 2 {
label = "Method"
}

props := map[string]any{"stub": true, "source": strategy}
// Carry over DLL metadata for DLL-resolved stubs
if strategy == "dll_resolve" {
if dllName, ok := re.Properties["dll_name"].(string); ok {
props["dll_name"] = dllName
}
}

stubs = append(stubs, &store.Node{
Project: p.ProjectName,
Label: label,
Name: name,
QualifiedName: re.TargetQN,
Properties: map[string]any{"stub": true, "source": "lsp_resolution"},
Properties: props,
})
}
}
Expand Down
267 changes: 267 additions & 0 deletions internal/pipeline/pipeline_cbm.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package pipeline

import (
"fmt"
"log/slog"
"path/filepath"
"regexp"
"strings"

"github.com/DeusData/codebase-memory-mcp/internal/cbm"
Expand All @@ -19,10 +21,29 @@ type cachedExtraction struct {
Language lang.Language
}

// Per-language file size limits to prevent tree-sitter stack overflows.
// SQL deeply recurses on bulk INSERTs; Windows has a 1MB default C stack.
const (
maxSQLFileSize = 1 << 20 // 1 MB
maxGeneralFileSize = 4 << 20 // 4 MB
)

// cbmParseFile reads a file, calls cbm.ExtractFile(), and converts the
// result to the same parseResult format used by the batch write infrastructure.
// This replaces parseFileAST() — all AST walking happens in C.
func cbmParseFile(projectName string, f discover.FileInfo) *parseResult {
// Guard: SQL tree-sitter grammar deeply recurses on large files (bulk INSERT
// dumps), exhausting the C stack — especially on Windows (1 MB default).
if f.Language == lang.SQL && f.Size > maxSQLFileSize {
slog.Info("cbm.skip.large_sql", "path", f.RelPath, "size", f.Size)
return &parseResult{File: f, Err: fmt.Errorf("skipped: SQL file too large (%d bytes, max %d)", f.Size, maxSQLFileSize)}
}
// General safety: files > 4 MB are likely generated/vendored and risk OOM or stack issues.
if f.Size > maxGeneralFileSize {
slog.Info("cbm.skip.large_file", "path", f.RelPath, "size", f.Size)
return &parseResult{File: f, Err: fmt.Errorf("skipped: file too large (%d bytes, max %d)", f.Size, maxGeneralFileSize)}
}

source, cleanup, err := mmapFile(f.Path)
if cleanup != nil {
defer cleanup()
Expand Down Expand Up @@ -253,6 +274,12 @@ func (p *Pipeline) resolveFileCallsCBM(relPath string, ext *cachedExtraction) []
}
}

// Python: track FastAPI Depends(func_ref) as CALLS edges
edges = append(edges, p.extractPythonDependsEdges(relPath, ext)...)

// C/C++: track dynamic DLL resolution (GetProcAddress/dlsym/Resolve) as CALLS edges
edges = append(edges, p.extractDLLResolveEdges(relPath, ext)...)

return edges
}

Expand Down Expand Up @@ -603,3 +630,243 @@ func isCheckedException(excName string) bool {
}
return false
}

// --- Python FastAPI Depends() tracking (#27) ---

// pythonDependsRe matches Depends(func_ref) patterns in Python function signatures.
// Captures the function reference passed to Depends().
var pythonDependsRe = regexp.MustCompile(`Depends\(\s*([\w.]+)`)

// extractPythonDependsEdges scans Python function signatures for Depends(func_ref)
// patterns and creates CALLS edges from the endpoint to the dependency function.
// Without this, functions referenced via Depends() appear as dead code (in_degree=0).
func (p *Pipeline) extractPythonDependsEdges(relPath string, ext *cachedExtraction) []resolvedEdge {
if ext.Language != lang.Python || ext.Result == nil {
return nil
}

// Early bail: check if any call in this file targets "Depends"
hasDependsCall := false
for _, call := range ext.Result.Calls {
if call.CalleeName == "Depends" || strings.HasSuffix(call.CalleeName, ".Depends") {
hasDependsCall = true
break
}
}
if !hasDependsCall {
return nil
}

// Read full file source
source := readFileSource(p.RepoPath, relPath)
if len(source) == 0 {
return nil
}
lines := strings.Split(string(source), "\n")

moduleQN := fqn.ModuleQN(p.ProjectName, relPath)
importMap := p.importMaps[moduleQN]

var edges []resolvedEdge
seen := make(map[[2]string]bool)

for _, def := range ext.Result.Definitions {
if def.Label != "Function" && def.Label != "Method" {
continue
}
if def.StartLine <= 0 {
continue
}

// Extract function signature lines (def line through closing paren + colon).
// Signatures can span multiple lines in Python.
sigEnd := def.StartLine + 15 // scan up to 15 lines for multi-line signatures
if def.EndLine > 0 && sigEnd > def.EndLine {
sigEnd = def.EndLine
}
if sigEnd > len(lines) {
sigEnd = len(lines)
}

var sig strings.Builder
for i := def.StartLine - 1; i < sigEnd; i++ {
sig.WriteString(lines[i])
sig.WriteByte('\n')
trimmed := strings.TrimSpace(lines[i])
// Stop once we hit the colon that ends the function definition
if strings.HasSuffix(trimmed, "):") || strings.HasSuffix(trimmed, ") :") ||
strings.HasSuffix(trimmed, ") -> None:") || strings.Contains(trimmed, ") ->") {
break
}
}

sigStr := sig.String()
matches := pythonDependsRe.FindAllStringSubmatch(sigStr, -1)
for _, m := range matches {
funcRef := m[1]
if funcRef == "" {
continue
}

key := [2]string{def.QualifiedName, funcRef}
if seen[key] {
continue
}
seen[key] = true

result := p.registry.Resolve(funcRef, moduleQN, importMap)
if result.QualifiedName == "" {
// Fallback for import aliases: Depends(_dep_require_admin) where
// _dep_require_admin is aliased from "require_admin". Extract the
// original function name from the import path and retry.
if importPath, ok := importMap[funcRef]; ok {
if lastDot := strings.LastIndex(importPath, "."); lastDot >= 0 {
originalName := importPath[lastDot+1:]
result = p.registry.Resolve(originalName, moduleQN, importMap)
}
}
if result.QualifiedName == "" {
continue
}
}

edges = append(edges, resolvedEdge{
CallerQN: def.QualifiedName,
TargetQN: result.QualifiedName,
Type: "CALLS",
Properties: map[string]any{
"confidence": 0.95,
"confidence_band": "high",
"resolution_strategy": "fastapi_depends",
},
})
}
}

if len(edges) > 0 {
slog.Info("pass3.fastapi_depends", "file", relPath, "edges", len(edges))
}
return edges
}

// --- C/C++ dynamic DLL resolution tracking (#29) ---

// DLL resolution patterns for C/C++ dynamic linking.
var (
// GetProcAddress(handle, "FunctionName") — Win32 API
dllGetProcAddrRe = regexp.MustCompile(`GetProcAddress\s*\(\s*\w+\s*,\s*["'](\w+)["']`)
// dlsym(handle, "function_name") — POSIX
dllDlsymRe = regexp.MustCompile(`dlsym\s*\(\s*\w+\s*,\s*["'](\w+)["']`)
// obj.Resolve("FunctionName") or obj->Resolve("FunctionName") — custom DLL loaders
dllResolveRe = regexp.MustCompile(`(?:->|\.)\s*Resolve\s*\(\s*["'](\w+)["']`)
// LoadLibrary("dll_name.dll") or dlopen("lib.so") — DLL name extraction
dllLoadRe = regexp.MustCompile(`(?:LoadLibrary[AW]?|dlopen)\s*\(\s*["']([^"']+)["']`)
)

// extractDLLResolveEdges scans C/C++ function source for dynamic DLL resolution
// patterns (GetProcAddress, dlsym, Resolve) and creates CALLS edges to synthetic
// external function nodes, enabling call graph tracking across DLL boundaries.
func (p *Pipeline) extractDLLResolveEdges(relPath string, ext *cachedExtraction) []resolvedEdge {
if ext.Language != lang.CPP && ext.Language != lang.C {
return nil
}
if ext.Result == nil {
return nil
}

// Early bail: check if any call targets a DLL resolution function
hasDLLCall := false
for _, call := range ext.Result.Calls {
name := call.CalleeName
if name == "GetProcAddress" || name == "GetProcAddressA" || name == "GetProcAddressW" ||
name == "dlsym" || strings.HasSuffix(name, ".Resolve") || strings.HasSuffix(name, "->Resolve") {
hasDLLCall = true
break
}
}
if !hasDLLCall {
return nil
}

// Read full file source
source := readFileSource(p.RepoPath, relPath)
if len(source) == 0 {
return nil
}
sourceStr := string(source)
sourceLines := strings.Split(sourceStr, "\n")

// Extract DLL name from LoadLibrary/dlopen calls (best-effort)
dllName := "external"
if m := dllLoadRe.FindStringSubmatch(sourceStr); m != nil {
dllName = filepath.Base(m[1])
// Strip extension
if ext := filepath.Ext(dllName); ext != "" {
dllName = strings.TrimSuffix(dllName, ext)
}
}

moduleQN := fqn.ModuleQN(p.ProjectName, relPath)

var edges []resolvedEdge
seen := make(map[[2]string]bool)

for _, def := range ext.Result.Definitions {
if def.Label != "Function" && def.Label != "Method" {
continue
}
if def.StartLine <= 0 || def.EndLine <= 0 {
continue
}

// Extract function body source
endLine := def.EndLine
if endLine > len(sourceLines) {
endLine = len(sourceLines)
}
var body strings.Builder
for i := def.StartLine - 1; i < endLine; i++ {
body.WriteString(sourceLines[i])
body.WriteByte('\n')
}
bodyStr := body.String()

// Match DLL resolution patterns
for _, re := range []*regexp.Regexp{dllGetProcAddrRe, dllDlsymRe, dllResolveRe} {
for _, m := range re.FindAllStringSubmatch(bodyStr, -1) {
funcName := m[1]
if funcName == "" {
continue
}

callerQN := def.QualifiedName
targetQN := moduleQN + ".dll." + dllName + "." + funcName

key := [2]string{callerQN, targetQN}
if seen[key] {
continue
}
seen[key] = true

edges = append(edges, resolvedEdge{
CallerQN: callerQN,
TargetQN: targetQN,
Type: "CALLS",
Properties: map[string]any{
"confidence": 0.85,
"confidence_band": "high",
"resolution_strategy": "dll_resolve",
"dll_name": dllName,
"dll_function": funcName,
},
})
}
}
}

if len(edges) > 0 {
slog.Info("pass3.dll_resolve", "file", relPath, "edges", len(edges))
}

return edges
}