Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/github/__toolsnaps__/get_file_contents.snap
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"type": "string"
},
"symbol": {
"description": "Optional: extract a specific symbol (function, class, type, etc.) from the file. For supported languages, returns only the symbol's source code instead of the entire file. If the symbol is not found, returns a list of available symbols.",
"description": "Optional: extract a specific symbol (function, class, type, etc.) from the file. For supported languages, returns only the symbol's source code instead of the entire file. For methods, use receiver prefix format: (*TypeName).MethodName — bare method names also work when unambiguous. If the symbol is not found, returns available symbols with suggestions.",
"type": "string"
}
},
Expand Down
167 changes: 167 additions & 0 deletions pkg/github/diff_integration.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
package github

import (
"fmt"
"strings"
)

// applySemanticDiffToUnifiedPatch takes a unified diff patch for a single file
// and attempts to produce a semantic diff. It reconstructs the base and head
// content from the patch hunks, then runs them through SemanticDiff.
// Returns the original patch unchanged if the file type doesn't benefit from
// semantic diffing or if reconstruction fails.
func applySemanticDiffToUnifiedPatch(filename, patch string) string {
if patch == "" {
return patch
}

format := DetectDiffFormat(filename)
if format == DiffFormatUnified {
// Not a structured data or code file — keep the original patch
return patch
}

base, head, ok := reconstructFromPatch(patch)
if !ok {
return patch
}

result := SemanticDiff(filename, base, head)
if result.Format == DiffFormatFallback {
return patch
}

return result.Diff
}

// reconstructFromPatch extracts the base and head file content from a unified
// diff patch. Returns the reconstructed contents and true if successful.
// This only works well for complete file diffs — partial context diffs will
// produce incomplete content, which is fine for semantic comparison of
// structured data where the full structure is usually in the diff.
func reconstructFromPatch(patch string) (base, head []byte, ok bool) {
lines := strings.Split(patch, "\n")

var baseLines, headLines []string
inHunk := false

for _, line := range lines {
if strings.HasPrefix(line, "@@") {
inHunk = true
continue
}
if !inHunk {
continue
}

switch {
case strings.HasPrefix(line, "-"):
baseLines = append(baseLines, line[1:])
case strings.HasPrefix(line, "+"):
headLines = append(headLines, line[1:])
case strings.HasPrefix(line, " "):
baseLines = append(baseLines, line[1:])
headLines = append(headLines, line[1:])
case line == "":
// Could be end of patch or an empty context line
baseLines = append(baseLines, "")
headLines = append(headLines, "")
}
}

if len(baseLines) == 0 && len(headLines) == 0 {
return nil, nil, false
}

return []byte(strings.Join(baseLines, "\n")),
[]byte(strings.Join(headLines, "\n")),
true
}

// processMultiFileDiff splits a multi-file unified diff into per-file sections
// and applies semantic diffing to each file where applicable. Returns a
// combined result with structural diffs for supported formats and original
// patches for unsupported ones.
func processMultiFileDiff(rawDiff string) string {
sections := splitDiffByFile(rawDiff)
if len(sections) == 0 {
return rawDiff
}

var result strings.Builder
for i, section := range sections {
if i > 0 {
result.WriteString("\n")
}

semanticPatch := applySemanticDiffToUnifiedPatch(section.filename, section.patch)
if semanticPatch != section.patch {
Copy link

Copilot AI Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When a per-file patch is converted to a semantic diff, the output drops the original per-file diff header (e.g., diff --git, index, new file mode, rename from/to, deleted file mode). This loses important metadata (notably renames and mode changes) and can make the result misleading. Preserve the original header information even when emitting a semantic/structural diff (or explicitly include any rename/mode metadata alongside the semantic diff).

Suggested change
if semanticPatch != section.patch {
if semanticPatch != section.patch {
// Preserve the original per-file header (including metadata such as renames/mode changes)
result.WriteString(section.header)
if section.header != "" {
result.WriteString("\n")
}

Copilot uses AI. Check for mistakes.
result.WriteString(fmt.Sprintf("--- %s (semantic diff) ---\n", section.filename))
result.WriteString(semanticPatch)
} else {
result.WriteString(section.header)
if section.patch != "" {
result.WriteString("\n")
result.WriteString(section.patch)
}
}
}

return result.String()
}

type diffSection struct {
filename string
header string
patch string
}

// splitDiffByFile splits a raw multi-file unified diff into per-file sections.
func splitDiffByFile(rawDiff string) []diffSection {
lines := strings.Split(rawDiff, "\n")
var sections []diffSection
var current *diffSection

for _, line := range lines {
if strings.HasPrefix(line, "diff --git ") {
if current != nil {
sections = append(sections, *current)
}
// Extract filename from "diff --git a/path b/path"
parts := strings.SplitN(line, " b/", 2)
filename := ""
if len(parts) == 2 {
filename = parts[1]
}
current = &diffSection{
filename: filename,
header: line,
}
continue
}

if current == nil {
continue
}

if strings.HasPrefix(line, "---") || strings.HasPrefix(line, "+++") ||
strings.HasPrefix(line, "index ") || strings.HasPrefix(line, "new file") ||
strings.HasPrefix(line, "deleted file") || strings.HasPrefix(line, "old mode") ||
strings.HasPrefix(line, "new mode") || strings.HasPrefix(line, "similarity") ||
strings.HasPrefix(line, "rename ") || strings.HasPrefix(line, "Binary") {
current.header += "\n" + line
} else {
if current.patch != "" {
current.patch += "\n" + line
} else {
current.patch = line
}
}
}

if current != nil {
sections = append(sections, *current)
}

return sections
}
142 changes: 142 additions & 0 deletions pkg/github/diff_integration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
package github

import (
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestApplySemanticDiffToUnifiedPatch(t *testing.T) {
t.Run("JSON file gets semantic diff", func(t *testing.T) {
patch := `@@ -1,3 +1,3 @@
{
- "name": "old"
+ "name": "new"
}`
result := applySemanticDiffToUnifiedPatch("config.json", patch)
assert.NotEqual(t, patch, result)
assert.Contains(t, result, `name: "old" → "new"`)
})

t.Run("Go file gets structural diff", func(t *testing.T) {
patch := `@@ -1,3 +1,4 @@
func hello() {
+ fmt.Println("world")
}`
result := applySemanticDiffToUnifiedPatch("main.go", patch)
assert.NotEqual(t, patch, result)
assert.Contains(t, result, "function_declaration")
})

t.Run("Markdown file keeps original patch", func(t *testing.T) {
patch := `@@ -1,3 +1,3 @@
# Title
-old text
+new text`
result := applySemanticDiffToUnifiedPatch("README.md", patch)
assert.Equal(t, patch, result)
})

t.Run("empty patch returns empty", func(t *testing.T) {
result := applySemanticDiffToUnifiedPatch("config.json", "")
assert.Equal(t, "", result)
})

t.Run("YAML file gets semantic diff", func(t *testing.T) {
patch := `@@ -1,2 +1,2 @@
-name: old
+name: new`
result := applySemanticDiffToUnifiedPatch("config.yaml", patch)
assert.NotEqual(t, patch, result)
assert.Contains(t, result, `name: "old" → "new"`)
})
}

func TestReconstructFromPatch(t *testing.T) {
t.Run("simple patch", func(t *testing.T) {
patch := `@@ -1,3 +1,3 @@
{
- "name": "old"
+ "name": "new"
}`
base, head, ok := reconstructFromPatch(patch)
require.True(t, ok)
assert.Contains(t, string(base), `"name": "old"`)
assert.Contains(t, string(head), `"name": "new"`)
})

t.Run("addition only", func(t *testing.T) {
patch := `@@ -0,0 +1,3 @@
+{
+ "new": true
+}`
base, head, ok := reconstructFromPatch(patch)
require.True(t, ok)
assert.Empty(t, string(base))
assert.Contains(t, string(head), `"new": true`)
})

t.Run("empty patch", func(t *testing.T) {
_, _, ok := reconstructFromPatch("")
assert.False(t, ok)
})
}

func TestSplitDiffByFile(t *testing.T) {
rawDiff := `diff --git a/config.json b/config.json
index abc..def 100644
--- a/config.json
+++ b/config.json
@@ -1,3 +1,3 @@
{
- "name": "old"
+ "name": "new"
}
diff --git a/main.go b/main.go
index abc..def 100644
--- a/main.go
+++ b/main.go
@@ -1,3 +1,4 @@
func hello() {
+ fmt.Println("world")
}`

sections := splitDiffByFile(rawDiff)
require.Len(t, sections, 2)
assert.Equal(t, "config.json", sections[0].filename)
assert.Equal(t, "main.go", sections[1].filename)
assert.Contains(t, sections[0].patch, `"name": "old"`)
assert.Contains(t, sections[1].patch, `fmt.Println`)
}

func TestProcessMultiFileDiff(t *testing.T) {
rawDiff := `diff --git a/config.json b/config.json
index abc..def 100644
--- a/config.json
+++ b/config.json
@@ -1,3 +1,3 @@
{
- "name": "old"
+ "name": "new"
}
diff --git a/README.md b/README.md
index abc..def 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,3 @@
# Title
-old text
+new text`

result := processMultiFileDiff(rawDiff)

// JSON file should get semantic diff
assert.Contains(t, result, "semantic diff")
assert.Contains(t, result, `name: "old" → "new"`)

// Markdown should keep original patch format
assert.Contains(t, result, "README.md")
assert.Contains(t, result, "-old text")
assert.Contains(t, result, "+new text")
}
6 changes: 6 additions & 0 deletions pkg/github/minimal_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ type MinimalCommitFile struct {
Additions int `json:"additions,omitempty"`
Deletions int `json:"deletions,omitempty"`
Changes int `json:"changes,omitempty"`
Patch string `json:"patch,omitempty"`
}

// MinimalCommit is the trimmed output type for commit objects.
Expand Down Expand Up @@ -236,12 +237,17 @@ func convertToMinimalCommit(commit *github.RepositoryCommit, includeDiffs bool)
if len(commit.Files) > 0 {
minimalCommit.Files = make([]MinimalCommitFile, 0, len(commit.Files))
for _, file := range commit.Files {
patch := file.GetPatch()
if patch != "" {
patch = applySemanticDiffToUnifiedPatch(file.GetFilename(), patch)
}
Comment on lines 239 to +243
Copy link

Copilot AI Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MinimalCommitFile now includes a patch field and convertToMinimalCommit applies semantic/structural diffing to it. There doesn't appear to be test coverage asserting that get_commit returns the new patch field and that semantic diffing is applied (or that include_diff=false still omits diffs). Adding a focused test will help prevent accidental regressions in commit output.

Copilot generated this review using guidance from repository custom instructions.
minimalFile := MinimalCommitFile{
Filename: file.GetFilename(),
Status: file.GetStatus(),
Additions: file.GetAdditions(),
Deletions: file.GetDeletions(),
Changes: file.GetChanges(),
Patch: patch,
}
minimalCommit.Files = append(minimalCommit.Files, minimalFile)
}
Expand Down
12 changes: 10 additions & 2 deletions pkg/github/pullrequests.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,8 @@ func GetPullRequestDiff(ctx context.Context, client *github.Client, owner, repo

defer func() { _ = resp.Body.Close() }()

// Return the raw response
return utils.NewToolResultText(string(raw)), nil
// Return the raw response, with semantic diffs applied where beneficial
return utils.NewToolResultText(processMultiFileDiff(string(raw))), nil
Comment on lines +223 to +224
Copy link

Copilot AI Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This changes get_diff output by post-processing the raw multi-file diff, but existing pull request tests only cover a Markdown diff (which is expected to remain unchanged). Add a test case that includes at least one supported semantic/structural file type (e.g., JSON or Go) and assert that the returned diff is transformed accordingly, so regressions in semantic application/splitting are caught.

This issue also appears on line 296 of the same file.

Copilot generated this review using guidance from repository custom instructions.
}

func GetPullRequestStatus(ctx context.Context, client *github.Client, owner, repo string, pullNumber int) (*mcp.CallToolResult, error) {
Expand Down Expand Up @@ -293,6 +293,14 @@ func GetPullRequestFiles(ctx context.Context, client *github.Client, owner, repo
return ghErrors.NewGitHubAPIStatusErrorResponse(ctx, "failed to get pull request files", resp, body), nil
}

// Apply semantic diffs to file patches where beneficial
for _, file := range files {
if file.Patch != nil && file.Filename != nil {
semanticPatch := applySemanticDiffToUnifiedPatch(file.GetFilename(), file.GetPatch())
file.Patch = &semanticPatch
}
}

r, err := json.Marshal(files)
if err != nil {
return nil, fmt.Errorf("failed to marshal response: %w", err)
Expand Down
2 changes: 1 addition & 1 deletion pkg/github/repositories.go
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,7 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool
},
"symbol": {
Type: "string",
Description: "Optional: extract a specific symbol (function, class, type, etc.) from the file. For supported languages, returns only the symbol's source code instead of the entire file. If the symbol is not found, returns a list of available symbols.",
Description: "Optional: extract a specific symbol (function, class, type, etc.) from the file. For supported languages, returns only the symbol's source code instead of the entire file. For methods, use receiver prefix format: (*TypeName).MethodName — bare method names also work when unambiguous. If the symbol is not found, returns available symbols with suggestions.",
},
},
Required: []string{"owner", "repo"},
Expand Down
Loading