Skip to content

Commit 1777da6

Browse files
Add feature-flagged compare_file_contents tool with semantic diffs
Add a new compare_file_contents MCP tool that compares two versions of a file across refs (branches, tags, or SHAs). For structured data formats (JSON, YAML, CSV, TOML), it produces semantic diffs that show only meaningful changes, ignoring formatting differences. For unsupported formats, it falls back to unified diff. Key features: - Semantic diffs for JSON, YAML, CSV, TOML files - Unified diff fallback for code and other text files - Handles new files (base not found) and deleted files (head not found) - 1MB max file size to prevent excessive server-side processing - Gated behind 'compare_file_contents' feature flag This helps AI models by: - Reducing token usage (formatting noise eliminated) - Providing unambiguous before/after semantics - Enabling self-verification of edits to config/data files Refs: #1973
1 parent 121d50a commit 1777da6

File tree

6 files changed

+1482
-0
lines changed

6 files changed

+1482
-0
lines changed
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
{
2+
"annotations": {
3+
"readOnlyHint": true,
4+
"title": "Compare file contents between revisions"
5+
},
6+
"description": "Compare two versions of a file in a GitHub repository.\nFor structured formats (JSON, YAML, CSV, TOML), produces a semantic diff that shows only meaningful changes, ignoring formatting differences.\nFor other file types, produces a standard unified diff.\nThis is useful for understanding what actually changed between two versions of a file, especially for configuration files and data files where reformatting can obscure real changes.",
7+
"inputSchema": {
8+
"properties": {
9+
"base": {
10+
"description": "Base ref to compare from (commit SHA, branch name, or tag name)",
11+
"type": "string"
12+
},
13+
"head": {
14+
"description": "Head ref to compare to (commit SHA, branch name, or tag name)",
15+
"type": "string"
16+
},
17+
"owner": {
18+
"description": "Repository owner (username or organization)",
19+
"type": "string"
20+
},
21+
"path": {
22+
"description": "Path to the file to compare",
23+
"type": "string"
24+
},
25+
"repo": {
26+
"description": "Repository name",
27+
"type": "string"
28+
}
29+
},
30+
"required": [
31+
"owner",
32+
"repo",
33+
"path",
34+
"base",
35+
"head"
36+
],
37+
"type": "object"
38+
},
39+
"name": "compare_file_contents"
40+
}
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
package github
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"io"
8+
"net/http"
9+
10+
"github.com/github/github-mcp-server/pkg/inventory"
11+
"github.com/github/github-mcp-server/pkg/scopes"
12+
"github.com/github/github-mcp-server/pkg/translations"
13+
"github.com/github/github-mcp-server/pkg/utils"
14+
"github.com/google/go-github/v79/github"
15+
"github.com/google/jsonschema-go/jsonschema"
16+
"github.com/modelcontextprotocol/go-sdk/mcp"
17+
)
18+
19+
// FeatureFlagCompareFileContents is the feature flag for the compare_file_contents tool.
20+
const FeatureFlagCompareFileContents = "mcp_compare_file_contents"
21+
22+
// CompareFileContents creates a tool to compare two versions of a file in a GitHub repository.
23+
// For supported formats (JSON, YAML, CSV, TOML), it produces semantic diffs showing
24+
// only meaningful changes. For other formats, it falls back to unified diff.
25+
func CompareFileContents(t translations.TranslationHelperFunc) inventory.ServerTool {
26+
tool := NewTool(
27+
ToolsetMetadataRepos,
28+
mcp.Tool{
29+
Name: "compare_file_contents",
30+
Description: t("TOOL_COMPARE_FILE_CONTENTS_DESCRIPTION", `Compare two versions of a file in a GitHub repository.
31+
For structured formats (JSON, YAML, CSV, TOML), produces a semantic diff that shows only meaningful changes, ignoring formatting differences.
32+
For other file types, produces a standard unified diff.
33+
This is useful for understanding what actually changed between two versions of a file, especially for configuration files and data files where reformatting can obscure real changes.`),
34+
Annotations: &mcp.ToolAnnotations{
35+
Title: t("TOOL_COMPARE_FILE_CONTENTS_USER_TITLE", "Compare file contents between revisions"),
36+
ReadOnlyHint: true,
37+
},
38+
InputSchema: &jsonschema.Schema{
39+
Type: "object",
40+
Properties: map[string]*jsonschema.Schema{
41+
"owner": {
42+
Type: "string",
43+
Description: "Repository owner (username or organization)",
44+
},
45+
"repo": {
46+
Type: "string",
47+
Description: "Repository name",
48+
},
49+
"path": {
50+
Type: "string",
51+
Description: "Path to the file to compare",
52+
},
53+
"base": {
54+
Type: "string",
55+
Description: "Base ref to compare from (commit SHA, branch name, or tag name)",
56+
},
57+
"head": {
58+
Type: "string",
59+
Description: "Head ref to compare to (commit SHA, branch name, or tag name)",
60+
},
61+
},
62+
Required: []string{"owner", "repo", "path", "base", "head"},
63+
},
64+
},
65+
[]scopes.Scope{scopes.Repo},
66+
func(ctx context.Context, deps ToolDependencies, _ *mcp.CallToolRequest, args map[string]any) (*mcp.CallToolResult, any, error) {
67+
owner, err := RequiredParam[string](args, "owner")
68+
if err != nil {
69+
return utils.NewToolResultError(err.Error()), nil, nil
70+
}
71+
repo, err := RequiredParam[string](args, "repo")
72+
if err != nil {
73+
return utils.NewToolResultError(err.Error()), nil, nil
74+
}
75+
path, err := RequiredParam[string](args, "path")
76+
if err != nil {
77+
return utils.NewToolResultError(err.Error()), nil, nil
78+
}
79+
base, err := RequiredParam[string](args, "base")
80+
if err != nil {
81+
return utils.NewToolResultError(err.Error()), nil, nil
82+
}
83+
head, err := RequiredParam[string](args, "head")
84+
if err != nil {
85+
return utils.NewToolResultError(err.Error()), nil, nil
86+
}
87+
88+
client, err := deps.GetClient(ctx)
89+
if err != nil {
90+
return nil, nil, fmt.Errorf("failed to get GitHub client: %w", err)
91+
}
92+
93+
baseContent, baseErr := getFileAtRef(ctx, client, owner, repo, path, base)
94+
headContent, headErr := getFileAtRef(ctx, client, owner, repo, path, head)
95+
96+
// If both sides fail, report the errors
97+
if baseErr != nil && headErr != nil {
98+
return utils.NewToolResultError(fmt.Sprintf("failed to get file at both refs: base %q: %s, head %q: %s", base, baseErr, head, headErr)), nil, nil
99+
}
100+
101+
// A nil content with no error won't happen from getFileAtRef,
102+
// but a non-nil error on one side means the file doesn't exist at that ref.
103+
// Pass nil to SemanticDiff to indicate added/deleted file.
104+
if baseErr != nil {
105+
baseContent = nil
106+
}
107+
if headErr != nil {
108+
headContent = nil
109+
}
110+
111+
result := SemanticDiff(path, baseContent, headContent)
112+
113+
output, err := json.Marshal(result)
114+
if err != nil {
115+
return nil, nil, fmt.Errorf("failed to marshal diff result: %w", err)
116+
}
117+
118+
return utils.NewToolResultText(string(output)), nil, nil
119+
},
120+
)
121+
tool.FeatureFlagEnable = FeatureFlagCompareFileContents
122+
return tool
123+
}
124+
125+
// getFileAtRef fetches file content from a GitHub repository at a specific ref.
126+
func getFileAtRef(ctx context.Context, client *github.Client, owner, repo, path, ref string) ([]byte, error) {
127+
opts := &github.RepositoryContentGetOptions{Ref: ref}
128+
fileContent, _, resp, err := client.Repositories.GetContents(ctx, owner, repo, path, opts)
129+
if err != nil {
130+
return nil, err
131+
}
132+
if resp == nil {
133+
return nil, fmt.Errorf("no response received")
134+
}
135+
defer func() { _ = resp.Body.Close() }()
136+
137+
if resp.StatusCode != http.StatusOK {
138+
body, err := io.ReadAll(resp.Body)
139+
if err != nil {
140+
return nil, fmt.Errorf("failed to read response body: %w", err)
141+
}
142+
return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body))
143+
}
144+
145+
if fileContent == nil {
146+
return nil, fmt.Errorf("path %q is a directory, not a file", path)
147+
}
148+
149+
content, err := fileContent.GetContent()
150+
if err != nil {
151+
return nil, fmt.Errorf("failed to decode file content: %w", err)
152+
}
153+
154+
if len(content) > MaxSemanticDiffFileSize {
155+
return nil, fmt.Errorf("file exceeds maximum size of %d bytes", MaxSemanticDiffFileSize)
156+
}
157+
158+
return []byte(content), nil
159+
}

0 commit comments

Comments
 (0)