-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Apply semantic diffs to get_commit, get_diff, and get_files #1984
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: sammorrowdrums/symbol-extraction
Are you sure you want to change the base?
Changes from all commits
88a6a37
8473740
ab75920
d4a483f
3dc1535
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,167 @@ | ||
| package github | ||
|
|
||
| import ( | ||
| "fmt" | ||
| "strings" | ||
| ) | ||
|
|
||
| // applySemanticDiffToUnifiedPatch takes a unified diff patch for a single file | ||
| // and attempts to produce a semantic diff. It reconstructs the base and head | ||
| // content from the patch hunks, then runs them through SemanticDiff. | ||
| // Returns the original patch unchanged if the file type doesn't benefit from | ||
| // semantic diffing or if reconstruction fails. | ||
| func applySemanticDiffToUnifiedPatch(filename, patch string) string { | ||
| if patch == "" { | ||
| return patch | ||
| } | ||
|
|
||
| format := DetectDiffFormat(filename) | ||
| if format == DiffFormatUnified { | ||
| // Not a structured data or code file — keep the original patch | ||
| return patch | ||
| } | ||
|
|
||
| base, head, ok := reconstructFromPatch(patch) | ||
| if !ok { | ||
| return patch | ||
| } | ||
|
|
||
| result := SemanticDiff(filename, base, head) | ||
| if result.Format == DiffFormatFallback { | ||
| return patch | ||
| } | ||
|
|
||
| return result.Diff | ||
| } | ||
|
|
||
| // reconstructFromPatch extracts the base and head file content from a unified | ||
| // diff patch. Returns the reconstructed contents and true if successful. | ||
| // This only works well for complete file diffs — partial context diffs will | ||
| // produce incomplete content, which is fine for semantic comparison of | ||
| // structured data where the full structure is usually in the diff. | ||
| func reconstructFromPatch(patch string) (base, head []byte, ok bool) { | ||
| lines := strings.Split(patch, "\n") | ||
|
|
||
| var baseLines, headLines []string | ||
| inHunk := false | ||
|
|
||
| for _, line := range lines { | ||
| if strings.HasPrefix(line, "@@") { | ||
| inHunk = true | ||
| continue | ||
| } | ||
| if !inHunk { | ||
| continue | ||
| } | ||
|
|
||
| switch { | ||
| case strings.HasPrefix(line, "-"): | ||
| baseLines = append(baseLines, line[1:]) | ||
| case strings.HasPrefix(line, "+"): | ||
| headLines = append(headLines, line[1:]) | ||
| case strings.HasPrefix(line, " "): | ||
| baseLines = append(baseLines, line[1:]) | ||
| headLines = append(headLines, line[1:]) | ||
| case line == "": | ||
| // Could be end of patch or an empty context line | ||
| baseLines = append(baseLines, "") | ||
| headLines = append(headLines, "") | ||
| } | ||
| } | ||
|
|
||
| if len(baseLines) == 0 && len(headLines) == 0 { | ||
| return nil, nil, false | ||
| } | ||
|
|
||
| return []byte(strings.Join(baseLines, "\n")), | ||
| []byte(strings.Join(headLines, "\n")), | ||
| true | ||
| } | ||
|
|
||
| // processMultiFileDiff splits a multi-file unified diff into per-file sections | ||
| // and applies semantic diffing to each file where applicable. Returns a | ||
| // combined result with structural diffs for supported formats and original | ||
| // patches for unsupported ones. | ||
| func processMultiFileDiff(rawDiff string) string { | ||
| sections := splitDiffByFile(rawDiff) | ||
| if len(sections) == 0 { | ||
| return rawDiff | ||
| } | ||
|
|
||
| var result strings.Builder | ||
| for i, section := range sections { | ||
| if i > 0 { | ||
| result.WriteString("\n") | ||
| } | ||
|
|
||
| semanticPatch := applySemanticDiffToUnifiedPatch(section.filename, section.patch) | ||
| if semanticPatch != section.patch { | ||
| result.WriteString(fmt.Sprintf("--- %s (semantic diff) ---\n", section.filename)) | ||
| result.WriteString(semanticPatch) | ||
| } else { | ||
| result.WriteString(section.header) | ||
| if section.patch != "" { | ||
| result.WriteString("\n") | ||
| result.WriteString(section.patch) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return result.String() | ||
| } | ||
|
|
||
| type diffSection struct { | ||
| filename string | ||
| header string | ||
| patch string | ||
| } | ||
|
|
||
| // splitDiffByFile splits a raw multi-file unified diff into per-file sections. | ||
| func splitDiffByFile(rawDiff string) []diffSection { | ||
| lines := strings.Split(rawDiff, "\n") | ||
| var sections []diffSection | ||
| var current *diffSection | ||
|
|
||
| for _, line := range lines { | ||
| if strings.HasPrefix(line, "diff --git ") { | ||
| if current != nil { | ||
| sections = append(sections, *current) | ||
| } | ||
| // Extract filename from "diff --git a/path b/path" | ||
| parts := strings.SplitN(line, " b/", 2) | ||
| filename := "" | ||
| if len(parts) == 2 { | ||
| filename = parts[1] | ||
| } | ||
| current = &diffSection{ | ||
| filename: filename, | ||
| header: line, | ||
| } | ||
| continue | ||
| } | ||
|
|
||
| if current == nil { | ||
| continue | ||
| } | ||
|
|
||
| if strings.HasPrefix(line, "---") || strings.HasPrefix(line, "+++") || | ||
| strings.HasPrefix(line, "index ") || strings.HasPrefix(line, "new file") || | ||
| strings.HasPrefix(line, "deleted file") || strings.HasPrefix(line, "old mode") || | ||
| strings.HasPrefix(line, "new mode") || strings.HasPrefix(line, "similarity") || | ||
| strings.HasPrefix(line, "rename ") || strings.HasPrefix(line, "Binary") { | ||
| current.header += "\n" + line | ||
| } else { | ||
| if current.patch != "" { | ||
| current.patch += "\n" + line | ||
| } else { | ||
| current.patch = line | ||
| } | ||
| } | ||
| } | ||
|
|
||
| if current != nil { | ||
| sections = append(sections, *current) | ||
| } | ||
|
|
||
| return sections | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,142 @@ | ||
| package github | ||
|
|
||
| import ( | ||
| "testing" | ||
|
|
||
| "github.com/stretchr/testify/assert" | ||
| "github.com/stretchr/testify/require" | ||
| ) | ||
|
|
||
| func TestApplySemanticDiffToUnifiedPatch(t *testing.T) { | ||
| t.Run("JSON file gets semantic diff", func(t *testing.T) { | ||
| patch := `@@ -1,3 +1,3 @@ | ||
| { | ||
| - "name": "old" | ||
| + "name": "new" | ||
| }` | ||
| result := applySemanticDiffToUnifiedPatch("config.json", patch) | ||
| assert.NotEqual(t, patch, result) | ||
| assert.Contains(t, result, `name: "old" → "new"`) | ||
| }) | ||
|
|
||
| t.Run("Go file gets structural diff", func(t *testing.T) { | ||
| patch := `@@ -1,3 +1,4 @@ | ||
| func hello() { | ||
| + fmt.Println("world") | ||
| }` | ||
| result := applySemanticDiffToUnifiedPatch("main.go", patch) | ||
| assert.NotEqual(t, patch, result) | ||
| assert.Contains(t, result, "function_declaration") | ||
| }) | ||
|
|
||
| t.Run("Markdown file keeps original patch", func(t *testing.T) { | ||
| patch := `@@ -1,3 +1,3 @@ | ||
| # Title | ||
| -old text | ||
| +new text` | ||
| result := applySemanticDiffToUnifiedPatch("README.md", patch) | ||
| assert.Equal(t, patch, result) | ||
| }) | ||
|
|
||
| t.Run("empty patch returns empty", func(t *testing.T) { | ||
| result := applySemanticDiffToUnifiedPatch("config.json", "") | ||
| assert.Equal(t, "", result) | ||
| }) | ||
|
|
||
| t.Run("YAML file gets semantic diff", func(t *testing.T) { | ||
| patch := `@@ -1,2 +1,2 @@ | ||
| -name: old | ||
| +name: new` | ||
| result := applySemanticDiffToUnifiedPatch("config.yaml", patch) | ||
| assert.NotEqual(t, patch, result) | ||
| assert.Contains(t, result, `name: "old" → "new"`) | ||
| }) | ||
| } | ||
|
|
||
| func TestReconstructFromPatch(t *testing.T) { | ||
| t.Run("simple patch", func(t *testing.T) { | ||
| patch := `@@ -1,3 +1,3 @@ | ||
| { | ||
| - "name": "old" | ||
| + "name": "new" | ||
| }` | ||
| base, head, ok := reconstructFromPatch(patch) | ||
| require.True(t, ok) | ||
| assert.Contains(t, string(base), `"name": "old"`) | ||
| assert.Contains(t, string(head), `"name": "new"`) | ||
| }) | ||
|
|
||
| t.Run("addition only", func(t *testing.T) { | ||
| patch := `@@ -0,0 +1,3 @@ | ||
| +{ | ||
| + "new": true | ||
| +}` | ||
| base, head, ok := reconstructFromPatch(patch) | ||
| require.True(t, ok) | ||
| assert.Empty(t, string(base)) | ||
| assert.Contains(t, string(head), `"new": true`) | ||
| }) | ||
|
|
||
| t.Run("empty patch", func(t *testing.T) { | ||
| _, _, ok := reconstructFromPatch("") | ||
| assert.False(t, ok) | ||
| }) | ||
| } | ||
|
|
||
| func TestSplitDiffByFile(t *testing.T) { | ||
| rawDiff := `diff --git a/config.json b/config.json | ||
| index abc..def 100644 | ||
| --- a/config.json | ||
| +++ b/config.json | ||
| @@ -1,3 +1,3 @@ | ||
| { | ||
| - "name": "old" | ||
| + "name": "new" | ||
| } | ||
| diff --git a/main.go b/main.go | ||
| index abc..def 100644 | ||
| --- a/main.go | ||
| +++ b/main.go | ||
| @@ -1,3 +1,4 @@ | ||
| func hello() { | ||
| + fmt.Println("world") | ||
| }` | ||
|
|
||
| sections := splitDiffByFile(rawDiff) | ||
| require.Len(t, sections, 2) | ||
| assert.Equal(t, "config.json", sections[0].filename) | ||
| assert.Equal(t, "main.go", sections[1].filename) | ||
| assert.Contains(t, sections[0].patch, `"name": "old"`) | ||
| assert.Contains(t, sections[1].patch, `fmt.Println`) | ||
| } | ||
|
|
||
| func TestProcessMultiFileDiff(t *testing.T) { | ||
| rawDiff := `diff --git a/config.json b/config.json | ||
| index abc..def 100644 | ||
| --- a/config.json | ||
| +++ b/config.json | ||
| @@ -1,3 +1,3 @@ | ||
| { | ||
| - "name": "old" | ||
| + "name": "new" | ||
| } | ||
| diff --git a/README.md b/README.md | ||
| index abc..def 100644 | ||
| --- a/README.md | ||
| +++ b/README.md | ||
| @@ -1,3 +1,3 @@ | ||
| # Title | ||
| -old text | ||
| +new text` | ||
|
|
||
| result := processMultiFileDiff(rawDiff) | ||
|
|
||
| // JSON file should get semantic diff | ||
| assert.Contains(t, result, "semantic diff") | ||
| assert.Contains(t, result, `name: "old" → "new"`) | ||
|
|
||
| // Markdown should keep original patch format | ||
| assert.Contains(t, result, "README.md") | ||
| assert.Contains(t, result, "-old text") | ||
| assert.Contains(t, result, "+new text") | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -75,6 +75,7 @@ type MinimalCommitFile struct { | |
| Additions int `json:"additions,omitempty"` | ||
| Deletions int `json:"deletions,omitempty"` | ||
| Changes int `json:"changes,omitempty"` | ||
| Patch string `json:"patch,omitempty"` | ||
| } | ||
|
|
||
| // MinimalCommit is the trimmed output type for commit objects. | ||
|
|
@@ -236,12 +237,17 @@ func convertToMinimalCommit(commit *github.RepositoryCommit, includeDiffs bool) | |
| if len(commit.Files) > 0 { | ||
| minimalCommit.Files = make([]MinimalCommitFile, 0, len(commit.Files)) | ||
| for _, file := range commit.Files { | ||
| patch := file.GetPatch() | ||
| if patch != "" { | ||
| patch = applySemanticDiffToUnifiedPatch(file.GetFilename(), patch) | ||
| } | ||
|
Comment on lines
239
to
+243
|
||
| minimalFile := MinimalCommitFile{ | ||
| Filename: file.GetFilename(), | ||
| Status: file.GetStatus(), | ||
| Additions: file.GetAdditions(), | ||
| Deletions: file.GetDeletions(), | ||
| Changes: file.GetChanges(), | ||
| Patch: patch, | ||
| } | ||
| minimalCommit.Files = append(minimalCommit.Files, minimalFile) | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -220,8 +220,8 @@ func GetPullRequestDiff(ctx context.Context, client *github.Client, owner, repo | |
|
|
||
| defer func() { _ = resp.Body.Close() }() | ||
|
|
||
| // Return the raw response | ||
| return utils.NewToolResultText(string(raw)), nil | ||
| // Return the raw response, with semantic diffs applied where beneficial | ||
| return utils.NewToolResultText(processMultiFileDiff(string(raw))), nil | ||
|
Comment on lines
+223
to
+224
|
||
| } | ||
|
|
||
| func GetPullRequestStatus(ctx context.Context, client *github.Client, owner, repo string, pullNumber int) (*mcp.CallToolResult, error) { | ||
|
|
@@ -293,6 +293,14 @@ func GetPullRequestFiles(ctx context.Context, client *github.Client, owner, repo | |
| return ghErrors.NewGitHubAPIStatusErrorResponse(ctx, "failed to get pull request files", resp, body), nil | ||
| } | ||
|
|
||
| // Apply semantic diffs to file patches where beneficial | ||
| for _, file := range files { | ||
| if file.Patch != nil && file.Filename != nil { | ||
| semanticPatch := applySemanticDiffToUnifiedPatch(file.GetFilename(), file.GetPatch()) | ||
| file.Patch = &semanticPatch | ||
| } | ||
| } | ||
|
|
||
| r, err := json.Marshal(files) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to marshal response: %w", err) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When a per-file patch is converted to a semantic diff, the output drops the original per-file diff header (e.g.,
diff --git,index,new file mode,rename from/to,deleted file mode). This loses important metadata (notably renames and mode changes) and can make the result misleading. Preserve the original header information even when emitting a semantic/structural diff (or explicitly include any rename/mode metadata alongside the semantic diff).