From db108822069d2f6f937d236fd033b210335c38d3 Mon Sep 17 00:00:00 2001 From: Sam Morrow Date: Mon, 9 Feb 2026 23:53:40 +0100 Subject: [PATCH 1/5] Add symbol extraction to get_file_contents tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an optional 'symbol' parameter to get_file_contents that uses tree-sitter to extract a specific named symbol (function, class, type, method, etc.) from a file. Instead of returning the entire file, only the matching symbol's source code is returned. Supports all languages from the structural diff engine: Go, Python, JavaScript, TypeScript, Ruby, Rust, Java, C/C++. For unsupported file types, returns an error suggesting the feature is not available. If the symbol is not found, the error message includes a list of available symbols in the file to help the model self-correct. This pairs well with the structural diff tool — a model can see which symbols changed via compare_file_contents, then fetch specific symbols via get_file_contents to examine them in detail. --- .../__toolsnaps__/get_file_contents.snap | 4 + pkg/github/repositories.go | 33 ++++- pkg/github/symbol_extraction.go | 67 ++++++++++ pkg/github/symbol_extraction_test.go | 116 ++++++++++++++++++ 4 files changed, 219 insertions(+), 1 deletion(-) create mode 100644 pkg/github/symbol_extraction.go create mode 100644 pkg/github/symbol_extraction_test.go diff --git a/pkg/github/__toolsnaps__/get_file_contents.snap b/pkg/github/__toolsnaps__/get_file_contents.snap index 94b7aeeda..bcfb51fc1 100644 --- a/pkg/github/__toolsnaps__/get_file_contents.snap +++ b/pkg/github/__toolsnaps__/get_file_contents.snap @@ -26,6 +26,10 @@ "sha": { "description": "Accepts optional commit SHA. If specified, it will be used instead of ref", "type": "string" + }, + "symbol": { + "description": "Optional: extract a specific symbol (function, class, type, etc.) from the file. For supported languages, returns only the symbol's source code instead of the entire file. If the symbol is not found, returns a list of available symbols.", + "type": "string" } }, "required": [ diff --git a/pkg/github/repositories.go b/pkg/github/repositories.go index f6203f39f..caa772cb5 100644 --- a/pkg/github/repositories.go +++ b/pkg/github/repositories.go @@ -652,6 +652,10 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool Type: "string", Description: "Accepts optional commit SHA. If specified, it will be used instead of ref", }, + "symbol": { + Type: "string", + Description: "Optional: extract a specific symbol (function, class, type, etc.) from the file. For supported languages, returns only the symbol's source code instead of the entire file. If the symbol is not found, returns a list of available symbols.", + }, }, Required: []string{"owner", "repo"}, }, @@ -684,6 +688,11 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool return utils.NewToolResultError(err.Error()), nil, nil } + symbol, err := OptionalParam[string](args, "symbol") + if err != nil { + return utils.NewToolResultError(err.Error()), nil, nil + } + client, err := deps.GetClient(ctx) if err != nil { return utils.NewToolResultError("failed to get GitHub client"), nil, nil @@ -769,9 +778,31 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool strings.HasSuffix(contentType, "+xml") if isTextContent { + content := string(body) + + // If a symbol was requested, extract just that symbol + if symbol != "" { + symbolText, symbolKind, extractErr := ExtractSymbol(path, body, symbol) + if extractErr != nil { + return utils.NewToolResultError(extractErr.Error()), nil, nil + } + content = symbolText + successMsg := fmt.Sprintf("extracted %s %q from %s", symbolKind, symbol, path) + if fileSHA != "" { + successMsg += fmt.Sprintf(" (SHA: %s)", fileSHA) + } + successMsg += successNote + result := &mcp.ResourceContents{ + URI: resourceURI, + Text: content, + MIMEType: contentType, + } + return utils.NewToolResultResource(successMsg, result), nil, nil + } + result := &mcp.ResourceContents{ URI: resourceURI, - Text: string(body), + Text: content, MIMEType: contentType, } // Include SHA in the result metadata diff --git a/pkg/github/symbol_extraction.go b/pkg/github/symbol_extraction.go new file mode 100644 index 000000000..2c46364b5 --- /dev/null +++ b/pkg/github/symbol_extraction.go @@ -0,0 +1,67 @@ +package github + +import ( + "fmt" + "strings" +) + +// ExtractSymbol searches source code for a named symbol and returns its text. +// It searches top-level declarations first, then recursively searches nested +// declarations (e.g. methods inside classes). Returns the symbol text and its +// kind, or an error if the symbol is not found or the language is unsupported. +func ExtractSymbol(path string, source []byte, symbolName string) (text string, kind string, err error) { + config := languageForPath(path) + if config == nil { + return "", "", fmt.Errorf("symbol extraction is not supported for this file type") + } + + decls, err := extractDeclarations(config, source) + if err != nil { + return "", "", fmt.Errorf("failed to parse file: %w", err) + } + + // Search top-level declarations + if text, kind, found := findSymbol(decls, symbolName); found { + return text, kind, nil + } + + // Search nested declarations (methods inside classes, etc.) + for _, decl := range decls { + nested := extractChildDeclarationsFromText(config, decl.Text) + if text, kind, found := findSymbol(nested, symbolName); found { + return text, kind, nil + } + } + + // Build list of available symbols for the error message + available := listSymbolNames(config, decls) + return "", "", fmt.Errorf("symbol %q not found. Available symbols: %s", symbolName, strings.Join(available, ", ")) +} + +// findSymbol searches a slice of declarations for a matching name. +func findSymbol(decls []declaration, name string) (string, string, bool) { + for _, d := range decls { + if d.Name == name { + return d.Text, d.Kind, true + } + } + return "", "", false +} + +// listSymbolNames returns all symbol names from top-level and one level of +// nested declarations, for use in error messages. +func listSymbolNames(config *languageConfig, decls []declaration) []string { + var names []string + for _, d := range decls { + if !strings.HasPrefix(d.Name, "_") { + names = append(names, d.Name) + } + nested := extractChildDeclarationsFromText(config, d.Text) + for _, n := range nested { + if !strings.HasPrefix(n.Name, "_") { + names = append(names, n.Name) + } + } + } + return names +} diff --git a/pkg/github/symbol_extraction_test.go b/pkg/github/symbol_extraction_test.go new file mode 100644 index 000000000..95f3b31b8 --- /dev/null +++ b/pkg/github/symbol_extraction_test.go @@ -0,0 +1,116 @@ +package github + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestExtractSymbol(t *testing.T) { + t.Run("Go function", func(t *testing.T) { + source := []byte("package main\n\nfunc hello() {\n\tfmt.Println(\"hello\")\n}\n\nfunc world() {\n\tfmt.Println(\"world\")\n}\n") + text, kind, err := ExtractSymbol("main.go", source, "hello") + require.NoError(t, err) + assert.Equal(t, "function_declaration", kind) + assert.Contains(t, text, "func hello()") + assert.Contains(t, text, "hello") + assert.NotContains(t, text, "world") + }) + + t.Run("Go method with receiver", func(t *testing.T) { + source := []byte("package main\n\ntype Server struct{}\n\nfunc (s *Server) Start() {\n\tlog.Println(\"start\")\n}\n\nfunc (s *Server) Stop() {\n\tlog.Println(\"stop\")\n}\n") + text, kind, err := ExtractSymbol("main.go", source, "(*Server).Start") + require.NoError(t, err) + assert.Equal(t, "method_declaration", kind) + assert.Contains(t, text, "Start") + assert.NotContains(t, text, "Stop") + }) + + t.Run("Go type", func(t *testing.T) { + source := []byte("package main\n\ntype Config struct {\n\tHost string\n\tPort int\n}\n") + text, kind, err := ExtractSymbol("main.go", source, "Config") + require.NoError(t, err) + assert.Equal(t, "type_declaration", kind) + assert.Contains(t, text, "Host string") + }) + + t.Run("Python function", func(t *testing.T) { + source := []byte("def hello():\n print('hello')\n\ndef world():\n print('world')\n") + text, kind, err := ExtractSymbol("app.py", source, "hello") + require.NoError(t, err) + assert.Equal(t, "function_definition", kind) + assert.Contains(t, text, "print('hello')") + assert.NotContains(t, text, "world") + }) + + t.Run("Python class method (nested)", func(t *testing.T) { + source := []byte("class Dog:\n def bark(self):\n return 'woof'\n def fetch(self):\n return 'ball'\n") + text, kind, err := ExtractSymbol("app.py", source, "bark") + require.NoError(t, err) + assert.Equal(t, "function_definition", kind) + assert.Contains(t, text, "woof") + assert.NotContains(t, text, "ball") + }) + + t.Run("TypeScript class", func(t *testing.T) { + source := []byte("class Api {\n get() {\n return fetch('/data');\n }\n}\n\nfunction helper() { return 1; }\n") + text, kind, err := ExtractSymbol("api.ts", source, "Api") + require.NoError(t, err) + assert.Equal(t, "class_declaration", kind) + assert.Contains(t, text, "get()") + assert.NotContains(t, text, "helper") + }) + + t.Run("TypeScript class method (nested)", func(t *testing.T) { + source := []byte("class Api {\n get() {\n return fetch('/data');\n }\n post() {\n return fetch('/post');\n }\n}\n") + text, kind, err := ExtractSymbol("api.ts", source, "get") + require.NoError(t, err) + assert.Equal(t, "method_definition", kind) + assert.Contains(t, text, "/data") + assert.NotContains(t, text, "/post") + }) + + t.Run("symbol not found lists available", func(t *testing.T) { + source := []byte("package main\n\nfunc hello() {}\n\nfunc world() {}\n") + _, _, err := ExtractSymbol("main.go", source, "nonexistent") + require.Error(t, err) + assert.Contains(t, err.Error(), "not found") + assert.Contains(t, err.Error(), "hello") + assert.Contains(t, err.Error(), "world") + }) + + t.Run("unsupported file type", func(t *testing.T) { + source := []byte("some content") + _, _, err := ExtractSymbol("README.md", source, "anything") + require.Error(t, err) + assert.Contains(t, err.Error(), "not supported") + }) + + t.Run("Java class with methods", func(t *testing.T) { + source := []byte("class Calculator {\n int add(int a, int b) {\n return a + b;\n }\n int multiply(int a, int b) {\n return a * b;\n }\n}\n") + text, kind, err := ExtractSymbol("Calculator.java", source, "add") + require.NoError(t, err) + assert.Equal(t, "method_declaration", kind) + assert.Contains(t, text, "a + b") + assert.NotContains(t, text, "a * b") + }) + + t.Run("Rust function", func(t *testing.T) { + source := []byte("fn hello() {\n println!(\"hello\");\n}\n\nfn world() {\n println!(\"world\");\n}\n") + text, kind, err := ExtractSymbol("main.rs", source, "hello") + require.NoError(t, err) + assert.Equal(t, "function_item", kind) + assert.Contains(t, text, "hello") + assert.NotContains(t, text, "world") + }) + + t.Run("Go var declaration", func(t *testing.T) { + source := []byte("package main\n\nvar defaultTimeout = 30\n\nvar maxRetries = 3\n") + text, kind, err := ExtractSymbol("main.go", source, "defaultTimeout") + require.NoError(t, err) + assert.Equal(t, "var_declaration", kind) + assert.Contains(t, text, "30") + assert.NotContains(t, text, "maxRetries") + }) +} From 9f6f67db07ba9e629b3f22e1e46f439967012609 Mon Sep 17 00:00:00 2001 From: Sam Morrow Date: Mon, 9 Feb 2026 23:55:17 +0100 Subject: [PATCH 2/5] Update copilot instructions with structural diff and symbol extraction docs Documents the tree-sitter structural diff engine, compare_file_contents tool, symbol extraction via get_file_contents, CGO requirement, and how to add new language support. Also updates build commands to include CGO_ENABLED=1. --- .github/copilot-instructions.md | 62 ++++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 5 deletions(-) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index f1b4cf9cb..067146ba3 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -46,17 +46,17 @@ If you change any MCP tool definitions or schemas: # Download dependencies (rarely needed - usually cached) go mod download -# Build the server binary -go build -v ./cmd/github-mcp-server +# Build the server binary (CGO required for tree-sitter) +CGO_ENABLED=1 go build -v ./cmd/github-mcp-server # Run the server ./github-mcp-server stdio # Run specific package tests -go test ./pkg/github -v +CGO_ENABLED=1 go test ./pkg/github -v # Run specific test -go test ./pkg/github -run TestGetMe +CGO_ENABLED=1 go test ./pkg/github -run TestGetMe ``` ## Project Structure @@ -94,7 +94,7 @@ go test ./pkg/github -run TestGetMe - **go.mod / go.sum:** Go module dependencies (Go 1.24.0+) - **.golangci.yml:** Linter configuration (v2 format, ~15 linters enabled) -- **Dockerfile:** Multi-stage build (golang:1.25.3-alpine → distroless) +- **Dockerfile:** Multi-stage build (golang:1.25.3-alpine → distroless), CGO_ENABLED=1 for tree-sitter - **server.json:** MCP server metadata for registry - **.goreleaser.yaml:** Release automation config - **.gitignore:** Excludes bin/, dist/, vendor/, *.DS_Store, github-mcp-server binary @@ -183,6 +183,58 @@ All workflows run on push/PR unless noted. Located in `.github/workflows/`: - **Test changes thoroughly** before committing - Export functions (capitalize) if they could be used by other repos as a library +## Structural Diff & Symbol Extraction (Tree-sitter) + +The server includes a tree-sitter-based code analysis engine that powers two features: + +### compare_file_contents Tool (Feature-flagged: `mcp_compare_file_contents`) + +Compares two versions of a file between refs/commits. Produces context-efficient diffs: + +- **Structured data** (JSON, YAML, CSV, TOML): Semantic path-based diffs showing only meaningful changes +- **Code files** (Go, Python, JS, TS, Ruby, Rust, Java, C/C++): Structural diffs showing declaration-level changes with inline detail +- **Other files**: Unified line-based diff as fallback + +Structural diffs are significantly more token-efficient (~74% average reduction) while preserving all meaningful information. They show which symbols changed, with inline line diffs for the specific changes within each symbol. + +Key behaviors: +- **Recursive nesting**: Classes/modules drill down to show which specific method changed +- **Whitespace normalization**: Indentation-only changes in brace languages collapse to "(whitespace/formatting changes only)" +- **Whitespace-significant languages**: Python preserves indentation as meaningful changes +- **Max depth of 5** prevents unbounded recursion into nested declarations + +### Symbol Extraction (get_file_contents `symbol` parameter) + +The `get_file_contents` tool accepts an optional `symbol` parameter that extracts a specific named symbol from a file using tree-sitter. Instead of returning the entire file, only the matching symbol's source code is returned. + +This pairs powerfully with `compare_file_contents`: +1. **Structural diff** shows which symbols changed (acts as a table of contents) +2. **Symbol extraction** fetches just the specific symbol to examine in detail + +If the symbol is not found, the error includes available symbol names to help self-correct. + +### Key Files + +- `pkg/github/structural_diff.go` — Tree-sitter AST extraction, declaration-level diffing, language configs +- `pkg/github/semantic_diff.go` — Core diff engine dispatching to format-specific parsers (JSON, YAML, CSV, TOML) and structural diff +- `pkg/github/symbol_extraction.go` — Symbol lookup by name, reusing tree-sitter configs +- `pkg/github/compare_file_contents.go` — MCP tool definition for compare_file_contents + +### CGO Requirement + +Tree-sitter requires CGO (`CGO_ENABLED=1`) for its C bindings. This affects: +- **Dockerfile**: Uses `gcc` and `musl-dev`, statically links with `-linkmode external -extldflags '-static'` +- **.goreleaser.yaml**: CGO_ENABLED=1, Windows builds excluded (no CGO cross-compilation without extra toolchain) +- **Local development**: `CGO_ENABLED=1` must be set when running tests or building if tree-sitter code is involved + +### Adding a New Language + +1. Add the grammar import in `structural_diff.go` (e.g., `github.com/smacker/go-tree-sitter/newlang`) +2. Create a config function (e.g., `newlangConfig()`) with declaration kinds and name extractor +3. Add the file extension mapping in `languageForPath()` +4. Write tests in `structural_diff_test.go` +5. Run `go mod tidy` to update dependencies + ## Common Development Workflows ### Adding a New MCP Tool From 04e669ae1b686b630f98c362a80bb90d75052cb2 Mon Sep 17 00:00:00 2001 From: Sam Morrow Date: Mon, 9 Feb 2026 23:56:19 +0100 Subject: [PATCH 3/5] Return plain text for symbol extraction instead of embedded resource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Symbol text is just code — a ResourceContents wrapper with URI/MIME type adds no value. Use NewToolResultText for a simpler, more natural response. --- pkg/github/repositories.go | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/pkg/github/repositories.go b/pkg/github/repositories.go index caa772cb5..80bcad974 100644 --- a/pkg/github/repositories.go +++ b/pkg/github/repositories.go @@ -786,18 +786,7 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool if extractErr != nil { return utils.NewToolResultError(extractErr.Error()), nil, nil } - content = symbolText - successMsg := fmt.Sprintf("extracted %s %q from %s", symbolKind, symbol, path) - if fileSHA != "" { - successMsg += fmt.Sprintf(" (SHA: %s)", fileSHA) - } - successMsg += successNote - result := &mcp.ResourceContents{ - URI: resourceURI, - Text: content, - MIMEType: contentType, - } - return utils.NewToolResultResource(successMsg, result), nil, nil + return utils.NewToolResultText(fmt.Sprintf("%s %s in %s:\n\n%s", symbolKind, symbol, path, symbolText)), nil, nil } result := &mcp.ResourceContents{ From fb8a684d158140dc83fe4db24a2d1c014e890e2d Mon Sep 17 00:00:00 2001 From: Sam Morrow Date: Tue, 10 Feb 2026 00:00:22 +0100 Subject: [PATCH 4/5] Add repos toolset server instructions for multi-tool workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds InstructionsFunc to the repos toolset describing how to combine compare_file_contents (structural diff) with get_file_contents symbol extraction for efficient code review. Server instructions focus on multi-tool flows only — single-tool features are already documented in each tool's own description. --- pkg/github/tools.go | 9 +++++---- pkg/github/toolset_instructions.go | 6 ++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/pkg/github/tools.go b/pkg/github/tools.go index d8165b65c..29e235070 100644 --- a/pkg/github/tools.go +++ b/pkg/github/tools.go @@ -35,10 +35,11 @@ var ( InstructionsFunc: generateContextToolsetInstructions, } ToolsetMetadataRepos = inventory.ToolsetMetadata{ - ID: "repos", - Description: "GitHub Repository related tools", - Default: true, - Icon: "repo", + ID: "repos", + Description: "GitHub Repository related tools", + Default: true, + Icon: "repo", + InstructionsFunc: generateReposToolsetInstructions, } ToolsetMetadataGit = inventory.ToolsetMetadata{ ID: "git", diff --git a/pkg/github/toolset_instructions.go b/pkg/github/toolset_instructions.go index bf2388a3d..912a05381 100644 --- a/pkg/github/toolset_instructions.go +++ b/pkg/github/toolset_instructions.go @@ -5,6 +5,12 @@ import "github.com/github/github-mcp-server/pkg/inventory" // Toolset instruction functions - these generate context-aware instructions for each toolset. // They are called during inventory build to generate server instructions. +func generateReposToolsetInstructions(_ *inventory.Inventory) string { + return `## Repos + +File comparison workflow: Use 'compare_file_contents' to see what changed between two refs — it produces structural diffs showing which symbols were added, removed, or modified. Then use 'get_file_contents' with 'symbol' to fetch specific changed symbols for detailed examination.` +} + func generateContextToolsetInstructions(_ *inventory.Inventory) string { return "Always call 'get_me' first to understand current user permissions and context." } From d0290bf74e975a158b9aaf5572b68bef24398381 Mon Sep 17 00:00:00 2001 From: Sam Morrow Date: Tue, 10 Feb 2026 00:04:13 +0100 Subject: [PATCH 5/5] Include file SHA in symbol extraction text result --- pkg/github/repositories.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pkg/github/repositories.go b/pkg/github/repositories.go index 80bcad974..ad6b35b7d 100644 --- a/pkg/github/repositories.go +++ b/pkg/github/repositories.go @@ -786,7 +786,11 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool if extractErr != nil { return utils.NewToolResultError(extractErr.Error()), nil, nil } - return utils.NewToolResultText(fmt.Sprintf("%s %s in %s:\n\n%s", symbolKind, symbol, path, symbolText)), nil, nil + msg := fmt.Sprintf("%s %s in %s", symbolKind, symbol, path) + if fileSHA != "" { + msg += fmt.Sprintf(" (SHA: %s)", fileSHA) + } + return utils.NewToolResultText(fmt.Sprintf("%s:\n\n%s", msg, symbolText)), nil, nil } result := &mcp.ResourceContents{