From 92f0399f7f9178672f04fd2efb5cf87d54661d6b Mon Sep 17 00:00:00 2001 From: Joey L Date: Fri, 27 Feb 2026 04:48:53 +0000 Subject: [PATCH 01/39] Add back logic split from the big gitter PR --- go/cmd/gitter/gitter.go | 132 +++++++++++ go/cmd/gitter/gitter_test.go | 86 +++++++ go/cmd/gitter/repository.go | 175 ++++++++++++++ go/cmd/gitter/repository_test.go | 390 +++++++++++++++++++++++++++++++ 4 files changed, 783 insertions(+) diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index 7a97df829bb..0ebbbc0b8d2 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -45,6 +45,21 @@ const gitStoreFileName = "git-store" var endpointHandlers = map[string]http.HandlerFunc{ "GET /git": gitHandler, "POST /cache": cacheHandler, + "POST /affected-commits": affectedCommitsHandler, +} + +type EventType string + +const ( + EventTypeIntroduced EventType = "introduced" + EventTypeFixed EventType = "fixed" + EventTypeLastAffected EventType = "last_affected" + EventTypeLimit EventType = "limit" +) + +type Event struct { + Type EventType `json:"eventType"` + Hash string `json:"hash"` } var ( @@ -499,3 +514,120 @@ func cacheHandler(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) logger.InfoContext(ctx, "Request completed successfully: /cache", slog.Duration("duration", time.Since(start))) } + +func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { + start := time.Now() + // POST requets body processing + var body struct { + URL string `json:"url"` + Events []Event `json:"events"` + DetectCherrypicks bool `json:"detect_cherrypicks"` + ForceUpdate bool `json:"force_update"` + } + err := json.NewDecoder(r.Body).Decode(&body) + if err != nil { + http.Error(w, fmt.Sprintf("Error decoding JSON: %v", err), http.StatusBadRequest) + + return + } + defer r.Body.Close() + + url := body.URL + introduced := []SHA1{} + fixed := []SHA1{} + lastAffected := []SHA1{} + limit := []SHA1{} + cherrypick := body.DetectCherrypicks + + ctx := context.WithValue(r.Context(), urlKey, url) + + for _, event := range body.Events { + hash, err := hex.DecodeString(event.Hash) + if err != nil { + logger.ErrorContext(ctx, "Error parsing hash", slog.String("hash", event.Hash), slog.Any("error", err)) + http.Error(w, "Invalid hash: "+event.Hash, http.StatusBadRequest) + + return + } + + switch event.Type { + case EventTypeIntroduced: + introduced = append(introduced, SHA1(hash)) + case EventTypeFixed: + fixed = append(fixed, SHA1(hash)) + case EventTypeLastAffected: + lastAffected = append(lastAffected, SHA1(hash)) + case EventTypeLimit: + limit = append(limit, SHA1(hash)) + default: + logger.ErrorContext(ctx, "Invalid event type", slog.String("event_type", string(event.Type))) + http.Error(w, fmt.Sprintf("Invalid event type: %s", event.Type), http.StatusBadRequest) + + return + } + } + logger.InfoContext(ctx, "Received request: /affected-commits", slog.Any("introduced", introduced), slog.Any("fixed", fixed), slog.Any("last_affected", lastAffected), slog.Any("limit", limit), slog.Bool("cherrypick", cherrypick)) + + semaphore <- struct{}{} + defer func() { <-semaphore }() + logger.DebugContext(ctx, "Concurrent requests", slog.Int("count", len(semaphore))) + + // Limit and fixed/last_affected shouldn't exist in the same request as it doesn't make sense + if (len(fixed) > 0 || len(lastAffected) > 0) && len(limit) > 0 { + http.Error(w, "Limit and fixed/last_affected shouldn't exist in the same request", http.StatusBadRequest) + + return + } + + // Fetch repo if it's not fresh + // I can't change singleflight's interface + if _, err, _ := gFetch.Do(url, func() (any, error) { + return nil, FetchRepo(ctx, url, body.ForceUpdate) + }); err != nil { + logger.ErrorContext(ctx, "Error fetching blob", slog.Any("error", err)) + if isAuthError(err) { + http.Error(w, fmt.Sprintf("Error fetching blob: %v", err), http.StatusForbidden) + + return + } + http.Error(w, fmt.Sprintf("Error fetching blob: %v", err), http.StatusInternalServerError) + + return + } + + repoDirName := getRepoDirName(url) + repoPath := filepath.Join(gitStorePath, repoDirName) + + repoLock := GetRepoLock(url) + repoLock.RLock() + defer repoLock.RUnlock() + + // I can't change singleflight's interface + repoAny, err, _ := gLoad.Do(repoPath, func() (any, error) { + return LoadRepository(ctx, repoPath) + }) + if err != nil { + logger.ErrorContext(ctx, "Failed to load repository", slog.Any("error", err)) + http.Error(w, fmt.Sprintf("Failed to load repository: %v", err), http.StatusInternalServerError) + + return + } + repo := repoAny.(*Repository) + + var affectedCommits []*Commit + if len(limit) > 0 { + affectedCommits = repo.Between(introduced, limit) + } else { + affectedCommits = repo.Affected(introduced, fixed, lastAffected, cherrypick) + } + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + if err := json.NewEncoder(w).Encode(affectedCommits); err != nil { + logger.ErrorContext(ctx, "Error encoding affected commits", slog.Any("error", err)) + http.Error(w, fmt.Sprintf("Error encoding affected commits: %v", err), http.StatusInternalServerError) + + return + } + logger.InfoContext(ctx, "Request completed successfully: /affected-commits", slog.Duration("duration", time.Since(start))) +} diff --git a/go/cmd/gitter/gitter_test.go b/go/cmd/gitter/gitter_test.go index 770ba200865..f0107e77dc2 100644 --- a/go/cmd/gitter/gitter_test.go +++ b/go/cmd/gitter/gitter_test.go @@ -187,3 +187,89 @@ func TestCacheHandler(t *testing.T) { }) } } + +func TestAffectedCommitsHandler(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test in short mode") + } + + setupTest(t) + + tests := []struct { + name string + url string + introduced []string + fixed []string + lastAffected []string + limit []string + invalidType []string + expectedCode int + }{ + { + name: "Valid range in public repo", + url: "https://github.com/google/oss-fuzz-vulns.git", + introduced: []string{"3350c55f9525cb83fc3e0b61bde076433c2da8dc"}, + fixed: []string{"8920ed8e47c660a0c20c28cb1004a600780c5b59"}, + expectedCode: http.StatusOK, + }, + { + name: "Invalid mixed limit and fixed", + url: "https://github.com/google/oss-fuzz-vulns.git", + introduced: []string{"3350c55f9525cb83fc3e0b61bde076433c2da8dc"}, + fixed: []string{"8920ed8e47c660a0c20c28cb1004a600780c5b59"}, + limit: []string{"996962b987c856bf751948e55b9366751e806c64"}, + expectedCode: http.StatusBadRequest, + }, + { + name: "Non-existent repo", + url: "https://github.com/google/this-repo-does-not-exist-12345.git", + introduced: []string{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}, + expectedCode: http.StatusForbidden, + }, + { + name: "Invalid event type", + url: "https://github.com/google/oss-fuzz-vulns.git", + invalidType: []string{"3350c55f9525cb83fc3e0b61bde076433c2da8dc"}, + expectedCode: http.StatusBadRequest, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var events []Event + for _, h := range tt.introduced { + events = append(events, Event{Type: "introduced", Hash: h}) + } + for _, h := range tt.fixed { + events = append(events, Event{Type: "fixed", Hash: h}) + } + for _, h := range tt.lastAffected { + events = append(events, Event{Type: "last_affected", Hash: h}) + } + for _, h := range tt.limit { + events = append(events, Event{Type: "limit", Hash: h}) + } + for _, h := range tt.invalidType { + events = append(events, Event{Type: "invalid_type", Hash: h}) + } + + reqBody := map[string]any{ + "url": tt.url, + "events": events, + } + + body, _ := json.Marshal(reqBody) + req, err := http.NewRequest(http.MethodPost, "/affected-commits", bytes.NewBuffer(body)) + if err != nil { + t.Fatal(err) + } + rr := httptest.NewRecorder() + affectedCommitsHandler(rr, req) + + if status := rr.Code; status != tt.expectedCode { + t.Errorf("handler returned wrong status code: got %v want %v", + status, tt.expectedCode) + } + }) + } +} diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index 1c23f768694..ae442603201 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -7,7 +7,9 @@ import ( "errors" "fmt" "log/slog" + "maps" "os" + "slices" "strings" "sync" "time" @@ -395,3 +397,176 @@ func (r *Repository) updatePatchID(commitHash, patchID SHA1) { r.patchIDToCommits[patchID] = append(r.patchIDToCommits[patchID], commitHash) } + +// Affected returns a list of commits that are affected by the given introduced, fixed and last_affected events +func (r *Repository) Affected(introduced, fixed, lastAffected []SHA1, cherrypick bool) []*Commit { + // Expands the introduced and fixed commits to include cherrypick equivalents + // lastAffected should not be expanded because it does not imply a "fix" commit that can be cherrypicked to other branches + if cherrypick { + introduced = r.expandByCherrypick(introduced) + fixed = r.expandByCherrypick(fixed) + } + + safeCommits := r.findSafeCommits(introduced, fixed, lastAffected) + + var affectedCommits []*Commit + + stack := make([]SHA1, 0, len(introduced)) + stack = append(stack, introduced...) + + visited := make(map[SHA1]struct{}) + + for len(stack) > 0 { + curr := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + if _, ok := visited[curr]; ok { + continue + } + visited[curr] = struct{}{} + + // If commit is in safe set, we can stop the traversal + if _, ok := safeCommits[curr]; ok { + continue + } + + // Otherwise, add to affected commits + affectedCommits = append(affectedCommits, r.commitDetails[curr]) + + // Add children to DFS stack + if children, ok := r.commitGraph[curr]; ok { + stack = append(stack, children...) + } + } + + return affectedCommits +} + +// findSafeCommits returns a set of commits that are non-vulnerable +// Traversing from fixed and children of last affected to the next introduced (if exist) +func (r *Repository) findSafeCommits(introduced, fixed, lastAffected []SHA1) map[SHA1]struct{} { + introducedMap := make(map[SHA1]struct{}) + for _, commit := range introduced { + introducedMap[commit] = struct{}{} + } + + safeSet := make(map[SHA1]struct{}) + stack := make([]SHA1, 0, len(fixed)+len(lastAffected)) + stack = append(stack, fixed...) + + // All children of last affected commits are root for traversal + for _, commit := range lastAffected { + if children, ok := r.commitGraph[commit]; ok { + for _, child := range children { + // Except if child is an introduced commit + if _, ok := introducedMap[child]; ok { + continue + } + stack = append(stack, child) + } + } + } + + // DFS until we hit an "introduced" commit + for len(stack) > 0 { + curr := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + if _, ok := safeSet[curr]; ok { + continue + } + safeSet[curr] = struct{}{} + + if children, ok := r.commitGraph[curr]; ok { + for _, child := range children { + // vuln re-introduced at a later commit, subsequent commits are no longer safe + if _, ok := introducedMap[child]; ok { + continue + } + stack = append(stack, child) + } + } + } + + return safeSet +} + +// expandByCherrypick expands a slice of commits by adding commits that have the same Patch ID (cherrypicked commits) returns a new list containing the original commits PLUS any other commits that share the same Patch ID +func (r *Repository) expandByCherrypick(commits []SHA1) []SHA1 { + unique := make(map[SHA1]struct{}, len(commits)) // avoid duplication + var zeroPatchID SHA1 + + for _, hash := range commits { + // Find patch ID from commit details + details, ok := r.commitDetails[hash] + if !ok || details.PatchID == zeroPatchID { + unique[hash] = struct{}{} + continue + } + + // Add equivalent commits with the same Patch ID (including the current commit) + equivalents := r.patchIDToCommits[details.PatchID] + for _, eq := range equivalents { + unique[eq] = struct{}{} + } + } + + keys := slices.Collect(maps.Keys(unique)) + + return keys +} + +// Between walks and returns the commits that are strictly between introduced (inclusive) and limit (exclusive) +func (r *Repository) Between(introduced, limit []SHA1) []*Commit { + var affectedCommits []*Commit + + introMap := make(map[SHA1]struct{}, len(introduced)) + for _, commit := range introduced { + introMap[commit] = struct{}{} + } + + // DFS to walk from limit(s) to introduced (follow first parent) + stack := make([]SHA1, 0, len(limit)) + // Start from limits' parents + for _, commit := range limit { + details, ok := r.commitDetails[commit] + if !ok { + continue + } + if len(details.Parents) > 0 { + stack = append(stack, details.Parents[0]) + } + } + + visited := make(map[SHA1]struct{}) + + for len(stack) > 0 { + curr := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + if _, ok := visited[curr]; ok { + continue + } + visited[curr] = struct{}{} + + // Add current node to affected commits + details, ok := r.commitDetails[curr] + if !ok { + continue + } + + affectedCommits = append(affectedCommits, details) + + // If commit is in introduced, we can stop the traversal after adding it to affected + if _, ok := introMap[curr]; ok { + continue + } + + // Add first parent to stack to only walk the linear branch + if len(details.Parents) > 0 { + stack = append(stack, details.Parents[0]) + } + } + + return affectedCommits +} diff --git a/go/cmd/gitter/repository_test.go b/go/cmd/gitter/repository_test.go index 9ec916c30fc..8cbd90c8d02 100644 --- a/go/cmd/gitter/repository_test.go +++ b/go/cmd/gitter/repository_test.go @@ -2,10 +2,16 @@ package main import ( "context" + "encoding/hex" + "fmt" "os" "os/exec" "path/filepath" + "sort" + "strings" "testing" + + "github.com/google/go-cmp/cmp" ) // A very simple test repository with 3 commits and 2 tags. @@ -131,3 +137,387 @@ func TestLoadRepository(t *testing.T) { } } } + +// Helper to decode string into SHA1 +func decodeSHA1(s string) SHA1 { + var hash SHA1 + // Pad with zeros because the test strings are shorter than 40 char + padded := fmt.Sprintf("%040s", s) + b, err := hex.DecodeString(padded) + if err != nil { + panic(err) + } + copy(hash[:], b) + + return hash +} + +// Helper to encode SHA1 into string (leading 0's removed) +func encodeSHA1(hash SHA1) string { + // Remove padding zeros for a cleaner results + str := hex.EncodeToString(hash[:]) + + return strings.TrimLeft(str, "0") +} + +func TestExpandByCherrypick(t *testing.T) { + repo := NewRepository("/repo") + + // Commit hashes + h1 := decodeSHA1("aaaa") + h2 := decodeSHA1("bbbb") + h3 := decodeSHA1("cccc") + + // Patch ID + p1 := decodeSHA1("1111") + + // Setup commit details + repo.commitDetails[h1] = &Commit{Hash: h1, PatchID: p1} + repo.commitDetails[h2] = &Commit{Hash: h2} + repo.commitDetails[h3] = &Commit{Hash: h3, PatchID: p1} // h3 has the same patch ID as h1 should be cherry picked + + // Setup patch ID map + repo.patchIDToCommits[p1] = []SHA1{h1, h3} + + tests := []struct { + name string + input []SHA1 + expected []SHA1 + }{ + { + name: "Expand single commit with cherry-pick", + input: []SHA1{h1}, + expected: []SHA1{h1, h3}, + }, + { + name: "No expansion for commit without cherry-pick", + input: []SHA1{h2}, + expected: []SHA1{h2}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := repo.expandByCherrypick(tt.input) + + if diff := cmp.Diff(tt.expected, got); diff != "" { + t.Errorf("expandByCherrypick() mismatch (-want +got):\n%s", diff) + } + }) + } +} + +// Testing cases with introduced and fixed only. +func TestAffected_Introduced_Fixed(t *testing.T) { + repo := NewRepository("/repo") + + // Graph: (Parent -> Child) + // -> F -> G + // / + // A -> B -> C -> D -> E + // \ / + // -> H -> + + hA := decodeSHA1("aaaa") + hB := decodeSHA1("bbbb") + hC := decodeSHA1("cccc") + hD := decodeSHA1("dddd") + hE := decodeSHA1("eeee") + hF := decodeSHA1("ffff") + hG := decodeSHA1("abab") + hH := decodeSHA1("acac") + + // Setup graph (Parent -> Children) + repo.commitGraph[hA] = []SHA1{hB} + repo.commitGraph[hB] = []SHA1{hC, hH} + repo.commitGraph[hC] = []SHA1{hD, hF} + repo.commitGraph[hD] = []SHA1{hE} + repo.commitGraph[hF] = []SHA1{hG} + repo.commitGraph[hH] = []SHA1{hD} + + // Setup details + repo.commitDetails[hA] = &Commit{Hash: hA} + repo.commitDetails[hB] = &Commit{Hash: hB} + repo.commitDetails[hC] = &Commit{Hash: hC} + repo.commitDetails[hD] = &Commit{Hash: hD} + repo.commitDetails[hE] = &Commit{Hash: hE} + repo.commitDetails[hF] = &Commit{Hash: hF} + repo.commitDetails[hG] = &Commit{Hash: hG} + repo.commitDetails[hH] = &Commit{Hash: hH} + + tests := []struct { + name string + introduced []SHA1 + fixed []SHA1 + lastAffected []SHA1 + expected []SHA1 + }{ + { + name: "Linear: A introduced, B fixed", + introduced: []SHA1{hA}, + fixed: []SHA1{hB}, + expected: []SHA1{hA}, + }, + { + name: "Branch propagation: A introduced, D fixed", + introduced: []SHA1{hA}, + fixed: []SHA1{hD}, + expected: []SHA1{hA, hB, hC, hF, hG, hH}, + }, + { + name: "Diverged before introduce: C introduced, E fixed", + introduced: []SHA1{hC}, + fixed: []SHA1{hE}, + expected: []SHA1{hC, hD, hF, hG}, + }, + { + name: "Two sets: (A,C) introduced, (B,D,G) fixed", + introduced: []SHA1{hA, hC}, + fixed: []SHA1{hB, hD, hG}, + expected: []SHA1{hA, hC, hF}, + }, + { + name: "Merge fix: A introduced, H fixed", + introduced: []SHA1{hA}, + fixed: []SHA1{hH}, + expected: []SHA1{hA, hB, hC, hF, hG}, + }, + { + name: "Everything affected if no fix", + introduced: []SHA1{hA}, + expected: []SHA1{hA, hB, hC, hD, hE, hF, hG, hH}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotCommits := repo.Affected(tt.introduced, tt.fixed, tt.lastAffected, false) + var got []SHA1 + for _, c := range gotCommits { + got = append(got, c.Hash) + } + + // Sort got and expected for comparison + sort.Slice(got, func(i, j int) bool { + return string(got[i][:]) < string(got[j][:]) + }) + sort.Slice(tt.expected, func(i, j int) bool { + return string(tt.expected[i][:]) < string(tt.expected[j][:]) + }) + + if diff := cmp.Diff(tt.expected, got); diff != "" { + // Turn them back into strings so it's easier to read + gotStr := make([]string, len(got)) + for i, c := range got { + gotStr[i] = encodeSHA1(c) + } + expectedStr := make([]string, len(tt.expected)) + for i, c := range tt.expected { + expectedStr[i] = encodeSHA1(c) + } + + t.Errorf("TestAffected_Introduced_Fixed() mismatch\nGot: %v\nExpected: %v", gotStr, expectedStr) + } + }) + } +} + +func TestAffected_Introduced_LastAffected(t *testing.T) { + repo := NewRepository("/repo") + + // Graph: (Parent -> Child) + // A -> B -> C -> D -> E -> F + // \ / + // -> G -> H + + hA := decodeSHA1("aaaa") + hB := decodeSHA1("bbbb") + hC := decodeSHA1("cccc") + hD := decodeSHA1("dddd") + hE := decodeSHA1("eeee") + hF := decodeSHA1("ffff") + hG := decodeSHA1("abab") + hH := decodeSHA1("acac") + + // Setup graph (Parent -> Children) + repo.commitGraph[hA] = []SHA1{hB} + repo.commitGraph[hB] = []SHA1{hC, hG} + repo.commitGraph[hC] = []SHA1{hD} + repo.commitGraph[hD] = []SHA1{hE} + repo.commitGraph[hE] = []SHA1{hF} + repo.commitGraph[hG] = []SHA1{hD, hH} + + // Setup details + repo.commitDetails[hA] = &Commit{Hash: hA} + repo.commitDetails[hB] = &Commit{Hash: hB} + repo.commitDetails[hC] = &Commit{Hash: hC} + repo.commitDetails[hD] = &Commit{Hash: hD} + repo.commitDetails[hE] = &Commit{Hash: hE} + repo.commitDetails[hF] = &Commit{Hash: hF} + repo.commitDetails[hG] = &Commit{Hash: hG} + repo.commitDetails[hH] = &Commit{Hash: hH} + + tests := []struct { + name string + introduced []SHA1 + fixed []SHA1 + lastAffected []SHA1 + expected []SHA1 + }{ + { + name: "Linear: E introduced, F lastAffected", + introduced: []SHA1{hE}, + lastAffected: []SHA1{hF}, + expected: []SHA1{hE, hF}, + }, + { + name: "Branch propagation: A introduced, D lastAffected", + introduced: []SHA1{hA}, + lastAffected: []SHA1{hD}, + expected: []SHA1{hA, hB, hC, hD, hG, hH}, + }, + { + name: "Diverged before introduce: C introduced, E lastAffected", + introduced: []SHA1{hC}, + lastAffected: []SHA1{hE}, + expected: []SHA1{hC, hD, hE}, + }, + { + name: "Two sets: (C,E) introduced, (D,F) lastAffected", + introduced: []SHA1{hC, hE}, + lastAffected: []SHA1{hD, hF}, + expected: []SHA1{hC, hD, hE, hF}, + }, + { + name: "Everything affected if no lastAffected", + introduced: []SHA1{hA}, + expected: []SHA1{hA, hB, hC, hD, hE, hF, hG, hH}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotCommits := repo.Affected(tt.introduced, tt.fixed, tt.lastAffected, false) + var got []SHA1 + for _, c := range gotCommits { + got = append(got, c.Hash) + } + + // Sort got and expected for comparison + sort.Slice(got, func(i, j int) bool { + return string(got[i][:]) < string(got[j][:]) + }) + sort.Slice(tt.expected, func(i, j int) bool { + return string(tt.expected[i][:]) < string(tt.expected[j][:]) + }) + + if diff := cmp.Diff(tt.expected, got); diff != "" { + // Turn them back into strings so it's easier to read + gotStr := make([]string, len(got)) + for i, c := range got { + gotStr[i] = encodeSHA1(c) + } + expectedStr := make([]string, len(tt.expected)) + for i, c := range tt.expected { + expectedStr[i] = encodeSHA1(c) + } + + t.Errorf("TestAffected_Introduced_LastAffected() mismatch\nGot: %v\nExpected: %v", gotStr, expectedStr) + } + }) + } +} + +func TestBetween(t *testing.T) { + repo := NewRepository("/repo") + + // Graph: (Parent -> Child) + // A -> B -> C -> D -> E + // \ + // -> F -> G -> H + + hA := decodeSHA1("aaaa") + hB := decodeSHA1("bbbb") + hC := decodeSHA1("cccc") + hD := decodeSHA1("dddd") + hE := decodeSHA1("eeee") + hF := decodeSHA1("ffff") + hG := decodeSHA1("abab") + hH := decodeSHA1("acac") + + // Setup graph (Parent -> Children) + repo.commitGraph[hA] = []SHA1{hB} + repo.commitGraph[hB] = []SHA1{hC, hF} + repo.commitGraph[hC] = []SHA1{hD} + repo.commitGraph[hD] = []SHA1{hE} + repo.commitGraph[hF] = []SHA1{hG} + repo.commitGraph[hG] = []SHA1{hH} + + // Setup details + repo.commitDetails[hA] = &Commit{Hash: hA} + repo.commitDetails[hB] = &Commit{Hash: hB, Parents: []SHA1{hA}} + repo.commitDetails[hC] = &Commit{Hash: hC, Parents: []SHA1{hB}} + repo.commitDetails[hD] = &Commit{Hash: hD, Parents: []SHA1{hC}} + repo.commitDetails[hE] = &Commit{Hash: hE, Parents: []SHA1{hD}} + repo.commitDetails[hF] = &Commit{Hash: hF, Parents: []SHA1{hB}} + repo.commitDetails[hG] = &Commit{Hash: hG, Parents: []SHA1{hF}} + repo.commitDetails[hH] = &Commit{Hash: hH, Parents: []SHA1{hG}} + + tests := []struct { + name string + introduced []SHA1 + limit []SHA1 + expected []SHA1 + }{ + { + name: "One branch: A introduced, D limit", + introduced: []SHA1{hA}, + limit: []SHA1{hD}, + expected: []SHA1{hA, hB, hC}, + }, + { + name: "Side branch: A introduced, G limit", + introduced: []SHA1{hA}, + limit: []SHA1{hG}, + expected: []SHA1{hA, hB, hF}, + }, + { + name: "Two branches: A introduced, (D,G) limit", + introduced: []SHA1{hA}, + limit: []SHA1{hD, hG}, + expected: []SHA1{hA, hB, hC, hF}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotCommits := repo.Between(tt.introduced, tt.limit) + var got []SHA1 + for _, c := range gotCommits { + got = append(got, c.Hash) + } + + // Sort got and expected for comparison + sort.Slice(got, func(i, j int) bool { + return string(got[i][:]) < string(got[j][:]) + }) + sort.Slice(tt.expected, func(i, j int) bool { + return string(tt.expected[i][:]) < string(tt.expected[j][:]) + }) + + if diff := cmp.Diff(tt.expected, got); diff != "" { + // Turn them back into strings so it's easier to read + gotStr := make([]string, len(got)) + for i, c := range got { + gotStr[i] = encodeSHA1(c) + } + expectedStr := make([]string, len(tt.expected)) + for i, c := range tt.expected { + expectedStr[i] = encodeSHA1(c) + } + + t.Errorf("TestBetween() mismatch\nGot: %v\nExpected: %v", gotStr, expectedStr) + } + }) + } +} From 3912e923112b043dc7172a27020fde3bbdf19ac6 Mon Sep 17 00:00:00 2001 From: Joey L Date: Fri, 27 Feb 2026 07:15:29 +0000 Subject: [PATCH 02/39] making intro=0 work --- go/cmd/gitter/gitter.go | 28 ++++------ go/cmd/gitter/repository.go | 96 +++++++++++++++++++++++++++++++- go/cmd/gitter/repository_test.go | 68 ++++++++++++++++++---- 3 files changed, 162 insertions(+), 30 deletions(-) diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index 0ebbbc0b8d2..fe7bdcf1d2d 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -533,32 +533,24 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { defer r.Body.Close() url := body.URL - introduced := []SHA1{} - fixed := []SHA1{} - lastAffected := []SHA1{} - limit := []SHA1{} + introduced := []string{} + fixed := []string{} + lastAffected := []string{} + limit := []string{} cherrypick := body.DetectCherrypicks ctx := context.WithValue(r.Context(), urlKey, url) for _, event := range body.Events { - hash, err := hex.DecodeString(event.Hash) - if err != nil { - logger.ErrorContext(ctx, "Error parsing hash", slog.String("hash", event.Hash), slog.Any("error", err)) - http.Error(w, "Invalid hash: "+event.Hash, http.StatusBadRequest) - - return - } - switch event.Type { case EventTypeIntroduced: - introduced = append(introduced, SHA1(hash)) + introduced = append(introduced, event.Hash) case EventTypeFixed: - fixed = append(fixed, SHA1(hash)) + fixed = append(fixed, event.Hash) case EventTypeLastAffected: - lastAffected = append(lastAffected, SHA1(hash)) + lastAffected = append(lastAffected, event.Hash) case EventTypeLimit: - limit = append(limit, SHA1(hash)) + limit = append(limit, event.Hash) default: logger.ErrorContext(ctx, "Invalid event type", slog.String("event_type", string(event.Type))) http.Error(w, fmt.Sprintf("Invalid event type: %s", event.Type), http.StatusBadRequest) @@ -616,9 +608,9 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { var affectedCommits []*Commit if len(limit) > 0 { - affectedCommits = repo.Between(introduced, limit) + affectedCommits = repo.Between(ctx, introduced, limit) } else { - affectedCommits = repo.Affected(introduced, fixed, lastAffected, cherrypick) + affectedCommits = repo.Affected(ctx, introduced, fixed, lastAffected, cherrypick) } w.Header().Set("Content-Type", "application/json") diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index ae442603201..7ad0f725058 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -42,6 +42,9 @@ type Repository struct { tagToCommit map[string]SHA1 // For cherry-pick detection: PatchID -> []commit hash patchIDToCommits map[SHA1][]SHA1 + // Root commits (commits with no parents) + // In a typical repository this is the initial commit + rootCommits []SHA1 } // %H commit hash; %P parent hashes; %D:refs (tab delimited) @@ -200,6 +203,11 @@ func (r *Repository) buildCommitGraph(ctx context.Context, cache *pb.RepositoryC continue } + // We want to keep the root commit (no parent) easily accessible + if len(parentHashes) == 0 { + r.rootCommits = append(r.rootCommits, childHash) + } + // Add commit to graph (parent -> []child) for _, parentHash := range parentHashes { r.commitGraph[parentHash] = append(r.commitGraph[parentHash], childHash) @@ -398,8 +406,60 @@ func (r *Repository) updatePatchID(commitHash, patchID SHA1) { r.patchIDToCommits[patchID] = append(r.patchIDToCommits[patchID], commitHash) } +func parseHash(hash string) (SHA1, error) { + hashBytes, err := hex.DecodeString(hash) + if err != nil { + return SHA1{}, fmt.Errorf("failed to decode hash: %w", err) + } + return SHA1(hashBytes), nil +} + // Affected returns a list of commits that are affected by the given introduced, fixed and last_affected events -func (r *Repository) Affected(introduced, fixed, lastAffected []SHA1, cherrypick bool) []*Commit { +func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs []string, cherrypick bool) []*Commit { + introduced := []SHA1{} + fixed := []SHA1{} + lastAffected := []SHA1{} + + // Convert string input into SHA1 + // Introduced can be 0 and we'll replace it with the root commit + for _, s := range introStrs { + if s == "0" { + introduced = append(introduced, r.rootCommits...) + continue + } + + sha, err := parseHash(s) + if err != nil { + // Log error and continue if a commit hash is invalid + logger.ErrorContext(ctx, "failed to parse commit hash", slog.String("hash", s), slog.Any("err", err)) + continue + } + + introduced = append(introduced, sha) + } + + for _, s := range fixedStrs { + sha, err := parseHash(s) + if err != nil { + // Log error and continue if a commit hash is invalid + logger.ErrorContext(ctx, "failed to parse commit hash", slog.String("hash", s), slog.Any("err", err)) + continue + } + + fixed = append(fixed, sha) + } + + for _, s := range laStrs { + sha, err := parseHash(s) + if err != nil { + // Log error and continue if a commit hash is invalid + logger.ErrorContext(ctx, "failed to parse commit hash", slog.String("hash", s), slog.Any("err", err)) + continue + } + + lastAffected = append(lastAffected, sha) + } + // Expands the introduced and fixed commits to include cherrypick equivalents // lastAffected should not be expanded because it does not imply a "fix" commit that can be cherrypicked to other branches if cherrypick { @@ -517,7 +577,39 @@ func (r *Repository) expandByCherrypick(commits []SHA1) []SHA1 { } // Between walks and returns the commits that are strictly between introduced (inclusive) and limit (exclusive) -func (r *Repository) Between(introduced, limit []SHA1) []*Commit { +func (r *Repository) Between(ctx context.Context, introStrs, limitStrs []string) []*Commit { + introduced := []SHA1{} + limit := []SHA1{} + + // Convert string input into SHA1 + // Introduced can be 0 and we'll replace it with the root commit + for _, s := range introStrs { + if s == "0" { + introduced = append(introduced, r.rootCommits...) + continue + } + + sha, err := parseHash(s) + if err != nil { + // Log error and continue if a commit hash is invalid + logger.ErrorContext(ctx, "failed to parse commit hash", slog.String("hash", s), slog.Any("err", err)) + continue + } + + introduced = append(introduced, sha) + } + + for _, s := range limitStrs { + sha, err := parseHash(s) + if err != nil { + // Log error and continue if a commit hash is invalid + logger.ErrorContext(ctx, "failed to parse commit hash", slog.String("hash", s), slog.Any("err", err)) + continue + } + + limit = append(limit, sha) + } + var affectedCommits []*Commit introMap := make(map[SHA1]struct{}, len(introduced)) diff --git a/go/cmd/gitter/repository_test.go b/go/cmd/gitter/repository_test.go index 8cbd90c8d02..d3157eddd7a 100644 --- a/go/cmd/gitter/repository_test.go +++ b/go/cmd/gitter/repository_test.go @@ -152,8 +152,13 @@ func decodeSHA1(s string) SHA1 { return hash } -// Helper to encode SHA1 into string (leading 0's removed) +// Helper to encode SHA1 into string func encodeSHA1(hash SHA1) string { + return hex.EncodeToString(hash[:]) +} + +// Helper to pretty print SHA1 as string (leading 0's removed) +func printSHA1(hash SHA1) string { // Remove padding zeros for a cleaner results str := hex.EncodeToString(hash[:]) @@ -291,7 +296,22 @@ func TestAffected_Introduced_Fixed(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - gotCommits := repo.Affected(tt.introduced, tt.fixed, tt.lastAffected, false) + // Convert SHA1 to string for the new API + introStrs := make([]string, len(tt.introduced)) + for i, h := range tt.introduced { + introStrs[i] = encodeSHA1(h) + } + fixedStrs := make([]string, len(tt.fixed)) + for i, h := range tt.fixed { + fixedStrs[i] = encodeSHA1(h) + } + laStrs := make([]string, len(tt.lastAffected)) + for i, h := range tt.lastAffected { + laStrs[i] = encodeSHA1(h) + } + + gotCommits := repo.Affected(t.Context(), introStrs, fixedStrs, laStrs, false) + var got []SHA1 for _, c := range gotCommits { got = append(got, c.Hash) @@ -309,11 +329,11 @@ func TestAffected_Introduced_Fixed(t *testing.T) { // Turn them back into strings so it's easier to read gotStr := make([]string, len(got)) for i, c := range got { - gotStr[i] = encodeSHA1(c) + gotStr[i] = printSHA1(c) } expectedStr := make([]string, len(tt.expected)) for i, c := range tt.expected { - expectedStr[i] = encodeSHA1(c) + expectedStr[i] = printSHA1(c) } t.Errorf("TestAffected_Introduced_Fixed() mismatch\nGot: %v\nExpected: %v", gotStr, expectedStr) @@ -397,7 +417,22 @@ func TestAffected_Introduced_LastAffected(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - gotCommits := repo.Affected(tt.introduced, tt.fixed, tt.lastAffected, false) + // Convert SHA1 to string for the new API + introStrs := make([]string, len(tt.introduced)) + for i, h := range tt.introduced { + introStrs[i] = encodeSHA1(h) + } + fixedStrs := make([]string, len(tt.fixed)) + for i, h := range tt.fixed { + fixedStrs[i] = encodeSHA1(h) + } + laStrs := make([]string, len(tt.lastAffected)) + for i, h := range tt.lastAffected { + laStrs[i] = encodeSHA1(h) + } + + gotCommits := repo.Affected(t.Context(), introStrs, fixedStrs, laStrs, false) + var got []SHA1 for _, c := range gotCommits { got = append(got, c.Hash) @@ -415,11 +450,11 @@ func TestAffected_Introduced_LastAffected(t *testing.T) { // Turn them back into strings so it's easier to read gotStr := make([]string, len(got)) for i, c := range got { - gotStr[i] = encodeSHA1(c) + gotStr[i] = printSHA1(c) } expectedStr := make([]string, len(tt.expected)) for i, c := range tt.expected { - expectedStr[i] = encodeSHA1(c) + expectedStr[i] = printSHA1(c) } t.Errorf("TestAffected_Introduced_LastAffected() mismatch\nGot: %v\nExpected: %v", gotStr, expectedStr) @@ -428,6 +463,7 @@ func TestAffected_Introduced_LastAffected(t *testing.T) { } } + func TestBetween(t *testing.T) { repo := NewRepository("/repo") @@ -491,7 +527,18 @@ func TestBetween(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - gotCommits := repo.Between(tt.introduced, tt.limit) + // Convert SHA1 to string for the new API + introStrs := make([]string, len(tt.introduced)) + for i, h := range tt.introduced { + introStrs[i] = encodeSHA1(h) + } + limitStrs := make([]string, len(tt.limit)) + for i, h := range tt.limit { + limitStrs[i] = encodeSHA1(h) + } + + gotCommits := repo.Between(t.Context(), introStrs, limitStrs) + var got []SHA1 for _, c := range gotCommits { got = append(got, c.Hash) @@ -509,11 +556,11 @@ func TestBetween(t *testing.T) { // Turn them back into strings so it's easier to read gotStr := make([]string, len(got)) for i, c := range got { - gotStr[i] = encodeSHA1(c) + gotStr[i] = printSHA1(c) } expectedStr := make([]string, len(tt.expected)) for i, c := range tt.expected { - expectedStr[i] = encodeSHA1(c) + expectedStr[i] = printSHA1(c) } t.Errorf("TestBetween() mismatch\nGot: %v\nExpected: %v", gotStr, expectedStr) @@ -521,3 +568,4 @@ func TestBetween(t *testing.T) { }) } } + From e8d20bd1ac770a43c8214b13384c2d9be22ae27c Mon Sep 17 00:00:00 2001 From: Joey L Date: Thu, 5 Mar 2026 04:18:58 +0000 Subject: [PATCH 03/39] Add a sort to the cherrypick test because maps --- go/cmd/gitter/repository_test.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/go/cmd/gitter/repository_test.go b/go/cmd/gitter/repository_test.go index d3157eddd7a..a19b244c64c 100644 --- a/go/cmd/gitter/repository_test.go +++ b/go/cmd/gitter/repository_test.go @@ -205,6 +205,13 @@ func TestExpandByCherrypick(t *testing.T) { t.Run(tt.name, func(t *testing.T) { got := repo.expandByCherrypick(tt.input) + sort.Slice(got, func(i, j int) bool { + return string(got[i][:]) < string(got[j][:]) + }) + sort.Slice(tt.expected, func(i, j int) bool { + return string(tt.expected[i][:]) < string(tt.expected[j][:]) + }) + if diff := cmp.Diff(tt.expected, got); diff != "" { t.Errorf("expandByCherrypick() mismatch (-want +got):\n%s", diff) } From 671bd977bd16353aa307ee3c1f0e68458fe8e088 Mon Sep 17 00:00:00 2001 From: Joey L Date: Sun, 8 Mar 2026 22:40:37 +0000 Subject: [PATCH 04/39] New affected commits logic + individual cherrypick --- go/cmd/gitter/gitter.go | 22 +-- go/cmd/gitter/repository.go | 106 ++++++--------- go/cmd/gitter/repository_test.go | 224 +++++++++++++++++++++++++------ 3 files changed, 239 insertions(+), 113 deletions(-) diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index fe7bdcf1d2d..d95ecc0b207 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -43,8 +43,8 @@ const gitStoreFileName = "git-store" // API Endpoints var endpointHandlers = map[string]http.HandlerFunc{ - "GET /git": gitHandler, - "POST /cache": cacheHandler, + "GET /git": gitHandler, + "POST /cache": cacheHandler, "POST /affected-commits": affectedCommitsHandler, } @@ -519,10 +519,11 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { start := time.Now() // POST requets body processing var body struct { - URL string `json:"url"` - Events []Event `json:"events"` - DetectCherrypicks bool `json:"detect_cherrypicks"` - ForceUpdate bool `json:"force_update"` + URL string `json:"url"` + Events []Event `json:"events"` + DetectCherrypicksIntroduced bool `json:"detect_cherrypicks_introduced"` + DetectCherrypicksFixed bool `json:"detect_cherrypicks_fixed"` + ForceUpdate bool `json:"force_update"` } err := json.NewDecoder(r.Body).Decode(&body) if err != nil { @@ -537,7 +538,8 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { fixed := []string{} lastAffected := []string{} limit := []string{} - cherrypick := body.DetectCherrypicks + cherrypickIntro := body.DetectCherrypicksIntroduced + cherrypickFixed := body.DetectCherrypicksFixed ctx := context.WithValue(r.Context(), urlKey, url) @@ -558,7 +560,7 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { return } } - logger.InfoContext(ctx, "Received request: /affected-commits", slog.Any("introduced", introduced), slog.Any("fixed", fixed), slog.Any("last_affected", lastAffected), slog.Any("limit", limit), slog.Bool("cherrypick", cherrypick)) + logger.InfoContext(ctx, "Received request: /affected-commits", slog.Any("introduced", introduced), slog.Any("fixed", fixed), slog.Any("last_affected", lastAffected), slog.Any("limit", limit), slog.Bool("cherrypickIntro", cherrypickIntro), slog.Bool("cherrypickFixed", cherrypickFixed)) semaphore <- struct{}{} defer func() { <-semaphore }() @@ -608,9 +610,9 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { var affectedCommits []*Commit if len(limit) > 0 { - affectedCommits = repo.Between(ctx, introduced, limit) + affectedCommits = repo.Limit(ctx, introduced, limit) } else { - affectedCommits = repo.Affected(ctx, introduced, fixed, lastAffected, cherrypick) + affectedCommits = repo.Affected(ctx, introduced, fixed, lastAffected, cherrypickIntro, cherrypickFixed) } w.Header().Set("Content-Type", "application/json") diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index 7ad0f725058..7aee8270c1b 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -415,10 +415,10 @@ func parseHash(hash string) (SHA1, error) { } // Affected returns a list of commits that are affected by the given introduced, fixed and last_affected events -func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs []string, cherrypick bool) []*Commit { - introduced := []SHA1{} - fixed := []SHA1{} - lastAffected := []SHA1{} +func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs []string, cherrypickIntro, cherrypickFixed bool) []*Commit { + introduced := make([]SHA1, 0, len(introStrs)) + fixed := make([]SHA1, 0, len(fixedStrs)) + lastAffected := make([]SHA1, 0, len(laStrs)) // Convert string input into SHA1 // Introduced can be 0 and we'll replace it with the root commit @@ -431,7 +431,7 @@ func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs sha, err := parseHash(s) if err != nil { // Log error and continue if a commit hash is invalid - logger.ErrorContext(ctx, "failed to parse commit hash", slog.String("hash", s), slog.Any("err", err)) + logger.ErrorContext(ctx, "failed to parse commit hash: introduced", slog.String("hash", s), slog.Any("err", err)) continue } @@ -442,7 +442,7 @@ func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs sha, err := parseHash(s) if err != nil { // Log error and continue if a commit hash is invalid - logger.ErrorContext(ctx, "failed to parse commit hash", slog.String("hash", s), slog.Any("err", err)) + logger.ErrorContext(ctx, "failed to parse commit hash: fixed", slog.String("hash", s), slog.Any("err", err)) continue } @@ -453,7 +453,7 @@ func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs sha, err := parseHash(s) if err != nil { // Log error and continue if a commit hash is invalid - logger.ErrorContext(ctx, "failed to parse commit hash", slog.String("hash", s), slog.Any("err", err)) + logger.ErrorContext(ctx, "failed to parse commit hash: last_affected", slog.String("hash", s), slog.Any("err", err)) continue } @@ -462,15 +462,40 @@ func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs // Expands the introduced and fixed commits to include cherrypick equivalents // lastAffected should not be expanded because it does not imply a "fix" commit that can be cherrypicked to other branches - if cherrypick { + if cherrypickIntro { introduced = r.expandByCherrypick(introduced) + } + if cherrypickFixed { fixed = r.expandByCherrypick(fixed) } - safeCommits := r.findSafeCommits(introduced, fixed, lastAffected) - var affectedCommits []*Commit + // Fixed commits and children of last affected are both in this set + // For graph traversal sake they are both considered the fix + fixedMap := make(map[SHA1]struct{}, len(fixed)+len(lastAffected)) + + for _, commit := range fixed { + fixedMap[commit] = struct{}{} + } + + for _, commit := range lastAffected { + if _, ok := r.commitGraph[commit]; ok { + for _, child := range r.commitGraph[commit] { + fixedMap[child] = struct{}{} + } + } + } + + // In the case that a commit in fixedMap is also in introduced + // we should remove it from the fixedMap (the commit fixed one but introduced another vuln) + for _, commit := range introduced { + if _, ok := fixedMap[commit]; ok { + delete(fixedMap, commit) + } + } + + // The graph traversal stack := make([]SHA1, 0, len(introduced)) stack = append(stack, introduced...) @@ -485,13 +510,15 @@ func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs } visited[curr] = struct{}{} - // If commit is in safe set, we can stop the traversal - if _, ok := safeCommits[curr]; ok { + // Stop traversal if the commit is in the fixed set (fixed or children of last_affected) + if _, ok := fixedMap[curr]; ok { continue } // Otherwise, add to affected commits - affectedCommits = append(affectedCommits, r.commitDetails[curr]) + if details, ok := r.commitDetails[curr]; ok { + affectedCommits = append(affectedCommits, details) + } // Add children to DFS stack if children, ok := r.commitGraph[curr]; ok { @@ -502,56 +529,7 @@ func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs return affectedCommits } -// findSafeCommits returns a set of commits that are non-vulnerable -// Traversing from fixed and children of last affected to the next introduced (if exist) -func (r *Repository) findSafeCommits(introduced, fixed, lastAffected []SHA1) map[SHA1]struct{} { - introducedMap := make(map[SHA1]struct{}) - for _, commit := range introduced { - introducedMap[commit] = struct{}{} - } - - safeSet := make(map[SHA1]struct{}) - stack := make([]SHA1, 0, len(fixed)+len(lastAffected)) - stack = append(stack, fixed...) - - // All children of last affected commits are root for traversal - for _, commit := range lastAffected { - if children, ok := r.commitGraph[commit]; ok { - for _, child := range children { - // Except if child is an introduced commit - if _, ok := introducedMap[child]; ok { - continue - } - stack = append(stack, child) - } - } - } - - // DFS until we hit an "introduced" commit - for len(stack) > 0 { - curr := stack[len(stack)-1] - stack = stack[:len(stack)-1] - - if _, ok := safeSet[curr]; ok { - continue - } - safeSet[curr] = struct{}{} - - if children, ok := r.commitGraph[curr]; ok { - for _, child := range children { - // vuln re-introduced at a later commit, subsequent commits are no longer safe - if _, ok := introducedMap[child]; ok { - continue - } - stack = append(stack, child) - } - } - } - - return safeSet -} - -// expandByCherrypick expands a slice of commits by adding commits that have the same Patch ID (cherrypicked commits) returns a new list containing the original commits PLUS any other commits that share the same Patch ID +// expandByCherrypick expands a slice of commits by adding commits that have the same Patch ID (cherrypicked commits) returns a new list containing the original commits + any other commits that share the same Patch ID func (r *Repository) expandByCherrypick(commits []SHA1) []SHA1 { unique := make(map[SHA1]struct{}, len(commits)) // avoid duplication var zeroPatchID SHA1 @@ -577,7 +555,7 @@ func (r *Repository) expandByCherrypick(commits []SHA1) []SHA1 { } // Between walks and returns the commits that are strictly between introduced (inclusive) and limit (exclusive) -func (r *Repository) Between(ctx context.Context, introStrs, limitStrs []string) []*Commit { +func (r *Repository) Limit(ctx context.Context, introStrs, limitStrs []string) []*Commit { introduced := []SHA1{} limit := []SHA1{} diff --git a/go/cmd/gitter/repository_test.go b/go/cmd/gitter/repository_test.go index a19b244c64c..413d7935e46 100644 --- a/go/cmd/gitter/repository_test.go +++ b/go/cmd/gitter/repository_test.go @@ -271,29 +271,35 @@ func TestAffected_Introduced_Fixed(t *testing.T) { expected: []SHA1{hA}, }, { - name: "Branch propagation: A introduced, D fixed", + name: "Branch propagation: A introduced, C fixed", introduced: []SHA1{hA}, - fixed: []SHA1{hD}, - expected: []SHA1{hA, hB, hC, hF, hG, hH}, + fixed: []SHA1{hC}, + expected: []SHA1{hA, hB, hH, hD, hE}, }, { - name: "Diverged before introduce: C introduced, E fixed", - introduced: []SHA1{hC}, - fixed: []SHA1{hE}, - expected: []SHA1{hC, hD, hF, hG}, - }, - { - name: "Two sets: (A,C) introduced, (B,D,G) fixed", + name: "Re-introduced: (A,C) introduced, (B,D,G) fixed", introduced: []SHA1{hA, hC}, fixed: []SHA1{hB, hD, hG}, expected: []SHA1{hA, hC, hF}, }, { - name: "Merge fix: A introduced, H fixed", + name: "Merge intro: H introduced, E fixed", + introduced: []SHA1{hH}, + fixed: []SHA1{hE}, + expected: []SHA1{hH, hD}, + }, + { + name: "Merge fix (explicit merge commit): A introduced, (H, D) fixed", introduced: []SHA1{hA}, - fixed: []SHA1{hH}, + fixed: []SHA1{hH, hD}, expected: []SHA1{hA, hB, hC, hF, hG}, }, + { + name: "Merge fix (non-explicit): A introduced, H fixed", + introduced: []SHA1{hA}, + fixed: []SHA1{hH}, + expected: []SHA1{hA, hB, hC, hD, hE, hF, hG}, + }, { name: "Everything affected if no fix", introduced: []SHA1{hA}, @@ -317,7 +323,7 @@ func TestAffected_Introduced_Fixed(t *testing.T) { laStrs[i] = encodeSHA1(h) } - gotCommits := repo.Affected(t.Context(), introStrs, fixedStrs, laStrs, false) + gotCommits := repo.Affected(t.Context(), introStrs, fixedStrs, laStrs, false, false) var got []SHA1 for _, c := range gotCommits { @@ -353,9 +359,11 @@ func TestAffected_Introduced_LastAffected(t *testing.T) { repo := NewRepository("/repo") // Graph: (Parent -> Child) - // A -> B -> C -> D -> E -> F - // \ / - // -> G -> H + // -> F -> G + // / + // A -> B -> C -> D -> E + // \ / + // -> H -> hA := decodeSHA1("aaaa") hB := decodeSHA1("bbbb") @@ -368,11 +376,11 @@ func TestAffected_Introduced_LastAffected(t *testing.T) { // Setup graph (Parent -> Children) repo.commitGraph[hA] = []SHA1{hB} - repo.commitGraph[hB] = []SHA1{hC, hG} - repo.commitGraph[hC] = []SHA1{hD} + repo.commitGraph[hB] = []SHA1{hC, hH} + repo.commitGraph[hC] = []SHA1{hD, hF} repo.commitGraph[hD] = []SHA1{hE} - repo.commitGraph[hE] = []SHA1{hF} - repo.commitGraph[hG] = []SHA1{hD, hH} + repo.commitGraph[hF] = []SHA1{hG} + repo.commitGraph[hH] = []SHA1{hD} // Setup details repo.commitDetails[hA] = &Commit{Hash: hA} @@ -392,28 +400,40 @@ func TestAffected_Introduced_LastAffected(t *testing.T) { expected []SHA1 }{ { - name: "Linear: E introduced, F lastAffected", - introduced: []SHA1{hE}, - lastAffected: []SHA1{hF}, - expected: []SHA1{hE, hF}, + name: "Linear: D introduced, E lastAffected", + introduced: []SHA1{hD}, + lastAffected: []SHA1{hE}, + expected: []SHA1{hD, hE}, }, { - name: "Branch propagation: A introduced, D lastAffected", + name: "Branch propagation (affected): A introduced, D lastAffected", introduced: []SHA1{hA}, lastAffected: []SHA1{hD}, - expected: []SHA1{hA, hB, hC, hD, hG, hH}, + expected: []SHA1{hA, hB, hC, hD, hF, hG, hH}, }, { - name: "Diverged before introduce: C introduced, E lastAffected", - introduced: []SHA1{hC}, - lastAffected: []SHA1{hE}, - expected: []SHA1{hC, hD, hE}, + name: "Branch propagation (unaffected): A introduced, B lastAffected", + introduced: []SHA1{hA}, + lastAffected: []SHA1{hB}, + expected: []SHA1{hA, hB}, }, { - name: "Two sets: (C,E) introduced, (D,F) lastAffected", - introduced: []SHA1{hC, hE}, - lastAffected: []SHA1{hD, hF}, - expected: []SHA1{hC, hD, hE, hF}, + name: "Re-introduced: (A,D) introduced, (B,E) lastAffected", + introduced: []SHA1{hA, hD}, + lastAffected: []SHA1{hB, hE}, + expected: []SHA1{hA, hB, hD, hE}, + }, + { + name: "Merge intro: H introduced, D lastAffected", + introduced: []SHA1{hH}, + lastAffected: []SHA1{hD}, + expected: []SHA1{hH, hD}, + }, + { + name: "Merge lastAffected: A introduced, H lastAffected", // TODO: Discuss!! + introduced: []SHA1{hA}, + lastAffected: []SHA1{hH}, + expected: []SHA1{hA, hB, hC, hF, hG, hH}, }, { name: "Everything affected if no lastAffected", @@ -438,7 +458,7 @@ func TestAffected_Introduced_LastAffected(t *testing.T) { laStrs[i] = encodeSHA1(h) } - gotCommits := repo.Affected(t.Context(), introStrs, fixedStrs, laStrs, false) + gotCommits := repo.Affected(t.Context(), introStrs, fixedStrs, laStrs, false, false) var got []SHA1 for _, c := range gotCommits { @@ -470,8 +490,135 @@ func TestAffected_Introduced_LastAffected(t *testing.T) { } } +// Testing with both fixed and lastAffected +func TestAffected_Combined(t *testing.T) { + repo := NewRepository("/repo") + + // Graph: (Parent -> Child) + // -> F -> G + // / + // A -> B -> C -> D -> E + // \ / + // -> H -> + + hA := decodeSHA1("aaaa") + hB := decodeSHA1("bbbb") + hC := decodeSHA1("cccc") + hD := decodeSHA1("dddd") + hE := decodeSHA1("eeee") + hF := decodeSHA1("ffff") + hG := decodeSHA1("abab") + hH := decodeSHA1("acac") + + // Setup graph (Parent -> Children) + repo.commitGraph[hA] = []SHA1{hB} + repo.commitGraph[hB] = []SHA1{hC, hH} + repo.commitGraph[hC] = []SHA1{hD, hF} + repo.commitGraph[hD] = []SHA1{hE} + repo.commitGraph[hF] = []SHA1{hG} + repo.commitGraph[hH] = []SHA1{hD} + + // Setup details + repo.commitDetails[hA] = &Commit{Hash: hA} + repo.commitDetails[hB] = &Commit{Hash: hB} + repo.commitDetails[hC] = &Commit{Hash: hC} + repo.commitDetails[hD] = &Commit{Hash: hD} + repo.commitDetails[hE] = &Commit{Hash: hE} + repo.commitDetails[hF] = &Commit{Hash: hF} + repo.commitDetails[hG] = &Commit{Hash: hG} + repo.commitDetails[hH] = &Commit{Hash: hH} + + tests := []struct { + name string + introduced []SHA1 + fixed []SHA1 + lastAffected []SHA1 + expected []SHA1 + }{ + { + name: "Branching out: C introduced, G fixed, D lastAffected", + introduced: []SHA1{hC}, + fixed: []SHA1{hG}, + lastAffected: []SHA1{hD}, + expected: []SHA1{hC, hD, hF}, + }, + { + name: "Redundant Blocking: A introduced, B fixed, E lastAffected", + introduced: []SHA1{hA}, + fixed: []SHA1{hB}, + lastAffected: []SHA1{hE}, + expected: []SHA1{hA}, + }, + { + name: "Conflicting events: Fixed equals Introduced", // TODO will this happen? + introduced: []SHA1{hA, hB}, + fixed: []SHA1{hB}, + expected: []SHA1{hA, hB, hC, hD, hE, hF, hG, hH}, + }, + { + name: "Conflicting events: LastAffected equals Introduced", // TODO: Will this happen?? + introduced: []SHA1{hB, hH}, + lastAffected: []SHA1{hC, hH}, + expected: []SHA1{hB, hC, hH, hD, hE}, + }, + { + name: "Conflicting events: Fixed equals LastAffected", // TODO: DISCUSS? + introduced: []SHA1{hA}, + fixed: []SHA1{hB}, + lastAffected: []SHA1{hB}, + expected: []SHA1{hA}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Convert SHA1 to string for the new API + introStrs := make([]string, len(tt.introduced)) + for i, h := range tt.introduced { + introStrs[i] = encodeSHA1(h) + } + fixedStrs := make([]string, len(tt.fixed)) + for i, h := range tt.fixed { + fixedStrs[i] = encodeSHA1(h) + } + laStrs := make([]string, len(tt.lastAffected)) + for i, h := range tt.lastAffected { + laStrs[i] = encodeSHA1(h) + } + + gotCommits := repo.Affected(t.Context(), introStrs, fixedStrs, laStrs, false, false) + + var got []SHA1 + for _, c := range gotCommits { + got = append(got, c.Hash) + } + + // Sort got and expected for comparison + sort.Slice(got, func(i, j int) bool { + return string(got[i][:]) < string(got[j][:]) + }) + sort.Slice(tt.expected, func(i, j int) bool { + return string(tt.expected[i][:]) < string(tt.expected[j][:]) + }) -func TestBetween(t *testing.T) { + if diff := cmp.Diff(tt.expected, got); diff != "" { + // Turn them back into strings so it's easier to read + gotStr := make([]string, len(got)) + for i, c := range got { + gotStr[i] = printSHA1(c) + } + expectedStr := make([]string, len(tt.expected)) + for i, c := range tt.expected { + expectedStr[i] = printSHA1(c) + } + + t.Errorf("TestAffected_Introduced_LastAffected() mismatch\nGot: %v\nExpected: %v", gotStr, expectedStr) + } + }) + } +} + +func TestLimit(t *testing.T) { repo := NewRepository("/repo") // Graph: (Parent -> Child) @@ -544,7 +691,7 @@ func TestBetween(t *testing.T) { limitStrs[i] = encodeSHA1(h) } - gotCommits := repo.Between(t.Context(), introStrs, limitStrs) + gotCommits := repo.Limit(t.Context(), introStrs, limitStrs) var got []SHA1 for _, c := range gotCommits { @@ -570,9 +717,8 @@ func TestBetween(t *testing.T) { expectedStr[i] = printSHA1(c) } - t.Errorf("TestBetween() mismatch\nGot: %v\nExpected: %v", gotStr, expectedStr) + t.Errorf("TestLimit() mismatch\nGot: %v\nExpected: %v", gotStr, expectedStr) } }) } } - From 32d12ef0601de57f10315196838dc946cba4ac0b Mon Sep 17 00:00:00 2001 From: Joey L Date: Sun, 8 Mar 2026 23:43:53 +0000 Subject: [PATCH 05/39] lint --- go/cmd/gitter/repository.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index 7aee8270c1b..2bb2a086ca3 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -411,6 +411,7 @@ func parseHash(hash string) (SHA1, error) { if err != nil { return SHA1{}, fmt.Errorf("failed to decode hash: %w", err) } + return SHA1(hashBytes), nil } @@ -490,9 +491,7 @@ func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs // In the case that a commit in fixedMap is also in introduced // we should remove it from the fixedMap (the commit fixed one but introduced another vuln) for _, commit := range introduced { - if _, ok := fixedMap[commit]; ok { - delete(fixedMap, commit) - } + delete(fixedMap, commit) } // The graph traversal From 4360bcc52fb513ca8bd25523513382296566633a Mon Sep 17 00:00:00 2001 From: Joey L Date: Mon, 9 Mar 2026 03:50:32 +0000 Subject: [PATCH 06/39] new new logic + update tests --- go/cmd/gitter/repository.go | 92 +++++++++++++++++++++++++------- go/cmd/gitter/repository_test.go | 45 ++++------------ 2 files changed, 83 insertions(+), 54 deletions(-) diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index 2bb2a086ca3..f74c778ac1b 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -488,17 +488,80 @@ func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs } } - // In the case that a commit in fixedMap is also in introduced - // we should remove it from the fixedMap (the commit fixed one but introduced another vuln) - for _, commit := range introduced { - delete(fixedMap, commit) + // The graph traversal + // affectedMap deduplicates the affected commits from the graph walk from each introduced commit + affectedMap := make(map[SHA1]struct{}) + + // Walk each introduced commit and find its affected commit + for _, intro := range introduced { + // BFS from intro + queue := []SHA1{intro} + terminateMap := maps.Clone(fixedMap) + affectedFromIntro := make(map[SHA1]struct{}) + visited := make(map[SHA1]struct{}) + + for len(queue) > 0 { + curr := queue[0] + queue = queue[1:] + + if _, ok := visited[curr]; ok { + continue + } + visited[curr] = struct{}{} + + // When we hit a fixed commit (or its descendants), we aggressively map its + // entire downstream tree as terminated to satisfy the "any path blocked" rule. + if _, ok := terminateMap[curr]; ok { + // Inline DFS from current node to make all descendants unaffected / unaffectable + // 1. If a previous path added it to affected list, remove + // 2. Add to terminate set + stack := []SHA1{curr} + for len(stack) > 0 { + unaffected := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + // Remove from affected list if a bypass path added it previously + delete(affectedFromIntro, unaffected) + + if children, ok := r.commitGraph[unaffected]; ok { + for _, child := range children { + // If child is not already in the terminateSet, we want to traverse that path and mark descendants as unaffectable + if _, ok := terminateMap[child]; !ok { + terminateMap[child] = struct{}{} + stack = append(stack, child) + } + } + } + } + continue + } + + // If not in terminateSet, add to the intro-specific affected list and continue + affectedFromIntro[curr] = struct{}{} + if children, ok := r.commitGraph[curr]; ok { + queue = append(queue, children...) + } + } + + // Add the final affected list of this introduced commit to the global set + for commit := range affectedFromIntro { + affectedMap[commit] = struct{}{} + } } - // The graph traversal - stack := make([]SHA1, 0, len(introduced)) - stack = append(stack, introduced...) + // Return the affected commit details + for commit := range affectedMap { + affectedCommits = append(affectedCommits, r.commitDetails[commit]) + } + + return affectedCommits +} +// getTree returns all descendants of a commit (including the root itself) +func (r *Repository) getTree(root SHA1) []SHA1 { + stack := []SHA1{root} visited := make(map[SHA1]struct{}) + var result []SHA1 for len(stack) > 0 { curr := stack[len(stack)-1] @@ -508,24 +571,15 @@ func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs continue } visited[curr] = struct{}{} + result = append(result, curr) - // Stop traversal if the commit is in the fixed set (fixed or children of last_affected) - if _, ok := fixedMap[curr]; ok { - continue - } - - // Otherwise, add to affected commits - if details, ok := r.commitDetails[curr]; ok { - affectedCommits = append(affectedCommits, details) - } - - // Add children to DFS stack + // Add children to stack if children, ok := r.commitGraph[curr]; ok { stack = append(stack, children...) } } - return affectedCommits + return result } // expandByCherrypick expands a slice of commits by adding commits that have the same Patch ID (cherrypicked commits) returns a new list containing the original commits + any other commits that share the same Patch ID diff --git a/go/cmd/gitter/repository_test.go b/go/cmd/gitter/repository_test.go index 413d7935e46..e2cb9737eda 100644 --- a/go/cmd/gitter/repository_test.go +++ b/go/cmd/gitter/repository_test.go @@ -274,7 +274,7 @@ func TestAffected_Introduced_Fixed(t *testing.T) { name: "Branch propagation: A introduced, C fixed", introduced: []SHA1{hA}, fixed: []SHA1{hC}, - expected: []SHA1{hA, hB, hH, hD, hE}, + expected: []SHA1{hA, hB, hH}, }, { name: "Re-introduced: (A,C) introduced, (B,D,G) fixed", @@ -289,16 +289,16 @@ func TestAffected_Introduced_Fixed(t *testing.T) { expected: []SHA1{hH, hD}, }, { - name: "Merge fix (explicit merge commit): A introduced, (H, D) fixed", + name: "Merge fix: A introduced, H fixed", introduced: []SHA1{hA}, - fixed: []SHA1{hH, hD}, + fixed: []SHA1{hH}, expected: []SHA1{hA, hB, hC, hF, hG}, }, { - name: "Merge fix (non-explicit): A introduced, H fixed", - introduced: []SHA1{hA}, + name: "Merge intro and fix (different branches): C introduced, H fixed", + introduced: []SHA1{hC}, fixed: []SHA1{hH}, - expected: []SHA1{hA, hB, hC, hD, hE, hF, hG}, + expected: []SHA1{hC, hD, hE, hF, hG}, }, { name: "Everything affected if no fix", @@ -406,16 +406,10 @@ func TestAffected_Introduced_LastAffected(t *testing.T) { expected: []SHA1{hD, hE}, }, { - name: "Branch propagation (affected): A introduced, D lastAffected", + name: "Branch propagation: A introduced, C lastAffected", introduced: []SHA1{hA}, - lastAffected: []SHA1{hD}, - expected: []SHA1{hA, hB, hC, hD, hF, hG, hH}, - }, - { - name: "Branch propagation (unaffected): A introduced, B lastAffected", - introduced: []SHA1{hA}, - lastAffected: []SHA1{hB}, - expected: []SHA1{hA, hB}, + lastAffected: []SHA1{hC}, + expected: []SHA1{hA, hB, hC, hH}, }, { name: "Re-introduced: (A,D) introduced, (B,E) lastAffected", @@ -430,7 +424,7 @@ func TestAffected_Introduced_LastAffected(t *testing.T) { expected: []SHA1{hH, hD}, }, { - name: "Merge lastAffected: A introduced, H lastAffected", // TODO: Discuss!! + name: "Merge lastAffected: A introduced, H lastAffected", introduced: []SHA1{hA}, lastAffected: []SHA1{hH}, expected: []SHA1{hA, hB, hC, hF, hG, hH}, @@ -549,25 +543,6 @@ func TestAffected_Combined(t *testing.T) { lastAffected: []SHA1{hE}, expected: []SHA1{hA}, }, - { - name: "Conflicting events: Fixed equals Introduced", // TODO will this happen? - introduced: []SHA1{hA, hB}, - fixed: []SHA1{hB}, - expected: []SHA1{hA, hB, hC, hD, hE, hF, hG, hH}, - }, - { - name: "Conflicting events: LastAffected equals Introduced", // TODO: Will this happen?? - introduced: []SHA1{hB, hH}, - lastAffected: []SHA1{hC, hH}, - expected: []SHA1{hB, hC, hH, hD, hE}, - }, - { - name: "Conflicting events: Fixed equals LastAffected", // TODO: DISCUSS? - introduced: []SHA1{hA}, - fixed: []SHA1{hB}, - lastAffected: []SHA1{hB}, - expected: []SHA1{hA}, - }, } for _, tt := range tests { From 9e5da5258d7c5ae6a0d71540fc9d04998a7305f4 Mon Sep 17 00:00:00 2001 From: Joey L Date: Mon, 9 Mar 2026 04:15:51 +0000 Subject: [PATCH 07/39] =?UTF-8?q?=F0=9F=8D=92=E2=9B=8F=EF=B8=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- go/cmd/gitter/repository_test.go | 130 ++++++++++++++++++++++++++++++- 1 file changed, 129 insertions(+), 1 deletion(-) diff --git a/go/cmd/gitter/repository_test.go b/go/cmd/gitter/repository_test.go index e2cb9737eda..f9b332e6807 100644 --- a/go/cmd/gitter/repository_test.go +++ b/go/cmd/gitter/repository_test.go @@ -429,6 +429,12 @@ func TestAffected_Introduced_LastAffected(t *testing.T) { lastAffected: []SHA1{hH}, expected: []SHA1{hA, hB, hC, hF, hG, hH}, }, + { + name: "Merge intro and lastAffected (different branches): C introduced, H lastAffected", + introduced: []SHA1{hC}, + lastAffected: []SHA1{hH}, + expected: []SHA1{hC}, + }, { name: "Everything affected if no lastAffected", introduced: []SHA1{hA}, @@ -587,7 +593,129 @@ func TestAffected_Combined(t *testing.T) { expectedStr[i] = printSHA1(c) } - t.Errorf("TestAffected_Introduced_LastAffected() mismatch\nGot: %v\nExpected: %v", gotStr, expectedStr) + t.Errorf("TestAffected_Combined() mismatch\nGot: %v\nExpected: %v", gotStr, expectedStr) + } + }) + } +} + +func TestAffected_Cherrypick(t *testing.T) { + repo := NewRepository("/repo") + + // Graph: (Parent -> Child) + // A -> B -> C -> D + // | | + // | (cherrypick) + // | | + // E -> F -> G -> H + + hA := decodeSHA1("aaaa") + hB := decodeSHA1("bbbb") + hC := decodeSHA1("cccc") + hD := decodeSHA1("dddd") + hE := decodeSHA1("eeee") + hF := decodeSHA1("ffff") + hG := decodeSHA1("abab") + hH := decodeSHA1("acac") + + c1 := decodeSHA1("c1") + c2 := decodeSHA1("c2") + + // Setup graph (Parent -> Children) + repo.commitGraph[hA] = []SHA1{hB} + repo.commitGraph[hB] = []SHA1{hC} + repo.commitGraph[hC] = []SHA1{hD} + repo.commitGraph[hE] = []SHA1{hF} + repo.commitGraph[hF] = []SHA1{hG} + repo.commitGraph[hG] = []SHA1{hH} + + // Setup PatchID map for cherrypicking + repo.patchIDToCommits[c1] = []SHA1{hA, hE} + repo.patchIDToCommits[c2] = []SHA1{hC, hG} + + // Setup details + repo.commitDetails[hA] = &Commit{Hash: hA, PatchID: c1} + repo.commitDetails[hB] = &Commit{Hash: hB, Parents: []SHA1{hA}} + repo.commitDetails[hC] = &Commit{Hash: hC, Parents: []SHA1{hB}, PatchID: c2} + repo.commitDetails[hD] = &Commit{Hash: hD, Parents: []SHA1{hC}} + repo.commitDetails[hE] = &Commit{Hash: hE, PatchID: c1} + repo.commitDetails[hF] = &Commit{Hash: hF, Parents: []SHA1{hB}} + repo.commitDetails[hG] = &Commit{Hash: hG, Parents: []SHA1{hF}, PatchID: c2} + repo.commitDetails[hH] = &Commit{Hash: hH, Parents: []SHA1{hG}} + + tests := []struct { + name string + introduced []SHA1 + fixed []SHA1 + cherrypickIntro bool + cherrypickFixed bool + expected []SHA1 + }{ + { + name: "Cherrypick Introduced Only: A introduced, G fixed", + introduced: []SHA1{hA}, + fixed: []SHA1{hG}, + cherrypickIntro: true, + cherrypickFixed: false, + expected: []SHA1{hA, hB, hC, hD, hE, hF}, + }, + { + name: "Cherrypick Fixed Only: A introduced, G fixed", + introduced: []SHA1{hA}, + fixed: []SHA1{hG}, + cherrypickIntro: false, + cherrypickFixed: true, + expected: []SHA1{hA, hB}, + }, + { + name: "Cherrypick Introduced and Fixed: A introduced, G fixed", + introduced: []SHA1{hA}, + fixed: []SHA1{hG}, + cherrypickIntro: true, + cherrypickFixed: true, + expected: []SHA1{hA, hB, hE, hF}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Convert SHA1 to string for the new API + introStrs := make([]string, len(tt.introduced)) + for i, h := range tt.introduced { + introStrs[i] = encodeSHA1(h) + } + fixedStrs := make([]string, len(tt.fixed)) + for i, h := range tt.fixed { + fixedStrs[i] = encodeSHA1(h) + } + + gotCommits := repo.Affected(t.Context(), introStrs, fixedStrs, nil, tt.cherrypickIntro, tt.cherrypickFixed) + + var got []SHA1 + for _, c := range gotCommits { + got = append(got, c.Hash) + } + + // Sort got and expected for comparison + sort.Slice(got, func(i, j int) bool { + return string(got[i][:]) < string(got[j][:]) + }) + sort.Slice(tt.expected, func(i, j int) bool { + return string(tt.expected[i][:]) < string(tt.expected[j][:]) + }) + + if diff := cmp.Diff(tt.expected, got); diff != "" { + // Turn them back into strings so it's easier to read + gotStr := make([]string, len(got)) + for i, c := range got { + gotStr[i] = printSHA1(c) + } + expectedStr := make([]string, len(tt.expected)) + for i, c := range tt.expected { + expectedStr[i] = printSHA1(c) + } + + t.Errorf("TestAffected_Cherrypick() mismatch\nGot: %v\nExpected: %v", gotStr, expectedStr) } }) } From cb5c0218d95dfa17f3fef5cfce06d04957dc9118 Mon Sep 17 00:00:00 2001 From: Joey L Date: Mon, 9 Mar 2026 04:18:46 +0000 Subject: [PATCH 08/39] lint --- go/cmd/gitter/repository.go | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index f74c778ac1b..8c9edba4d1a 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -470,7 +470,6 @@ func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs fixed = r.expandByCherrypick(fixed) } - var affectedCommits []*Commit // Fixed commits and children of last affected are both in this set // For graph traversal sake they are both considered the fix @@ -533,6 +532,7 @@ func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs } } } + continue } @@ -550,6 +550,7 @@ func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs } // Return the affected commit details + affectedCommits := make([]*Commit, 0, len(affectedMap)) for commit := range affectedMap { affectedCommits = append(affectedCommits, r.commitDetails[commit]) } @@ -557,31 +558,6 @@ func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs return affectedCommits } -// getTree returns all descendants of a commit (including the root itself) -func (r *Repository) getTree(root SHA1) []SHA1 { - stack := []SHA1{root} - visited := make(map[SHA1]struct{}) - var result []SHA1 - - for len(stack) > 0 { - curr := stack[len(stack)-1] - stack = stack[:len(stack)-1] - - if _, ok := visited[curr]; ok { - continue - } - visited[curr] = struct{}{} - result = append(result, curr) - - // Add children to stack - if children, ok := r.commitGraph[curr]; ok { - stack = append(stack, children...) - } - } - - return result -} - // expandByCherrypick expands a slice of commits by adding commits that have the same Patch ID (cherrypicked commits) returns a new list containing the original commits + any other commits that share the same Patch ID func (r *Repository) expandByCherrypick(commits []SHA1) []SHA1 { unique := make(map[SHA1]struct{}, len(commits)) // avoid duplication From be723fe3bfcbf4bbabb0b706a6d1a59ee4ce8c26 Mon Sep 17 00:00:00 2001 From: Joey L Date: Mon, 9 Mar 2026 04:24:47 +0000 Subject: [PATCH 09/39] lint :0) --- go/cmd/gitter/repository.go | 1 - 1 file changed, 1 deletion(-) diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index 8c9edba4d1a..9a3dec869ff 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -470,7 +470,6 @@ func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs fixed = r.expandByCherrypick(fixed) } - // Fixed commits and children of last affected are both in this set // For graph traversal sake they are both considered the fix fixedMap := make(map[SHA1]struct{}, len(fixed)+len(lastAffected)) From ad67fc1b531eb0c2861c9cbfecb923d102ae468c Mon Sep 17 00:00:00 2001 From: Joey L Date: Mon, 9 Mar 2026 04:45:35 +0000 Subject: [PATCH 10/39] Fix a test --- go/cmd/gitter/repository_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/cmd/gitter/repository_test.go b/go/cmd/gitter/repository_test.go index f9b332e6807..7c4e1eef7d7 100644 --- a/go/cmd/gitter/repository_test.go +++ b/go/cmd/gitter/repository_test.go @@ -433,7 +433,7 @@ func TestAffected_Introduced_LastAffected(t *testing.T) { name: "Merge intro and lastAffected (different branches): C introduced, H lastAffected", introduced: []SHA1{hC}, lastAffected: []SHA1{hH}, - expected: []SHA1{hC}, + expected: []SHA1{hC, hF, hG}, }, { name: "Everything affected if no lastAffected", From e53c6f29370e1a31cfb7aa6ffba6d77a1172e3e2 Mon Sep 17 00:00:00 2001 From: Joey L Date: Mon, 9 Mar 2026 05:21:29 +0000 Subject: [PATCH 11/39] Protobuf for response. Include branches as well. --- go/cmd/gitter/gitter.go | 26 +++- go/cmd/gitter/pb/repository/repository.pb.go | 129 +++++++++++++++++-- go/cmd/gitter/pb/repository/repository.proto | 9 ++ go/cmd/gitter/repository.go | 37 +++--- 4 files changed, 167 insertions(+), 34 deletions(-) diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index d95ecc0b207..54e82c6bdbb 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -29,6 +29,9 @@ import ( "github.com/google/osv.dev/go/logger" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "golang.org/x/sync/singleflight" + "google.golang.org/protobuf/proto" + + pb "github.com/google/osv.dev/go/cmd/gitter/pb/repository" ) type contextKey string @@ -615,13 +618,26 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { affectedCommits = repo.Affected(ctx, introduced, fixed, lastAffected, cherrypickIntro, cherrypickFixed) } - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusOK) - if err := json.NewEncoder(w).Encode(affectedCommits); err != nil { - logger.ErrorContext(ctx, "Error encoding affected commits", slog.Any("error", err)) - http.Error(w, fmt.Sprintf("Error encoding affected commits: %v", err), http.StatusInternalServerError) + resp := &pb.AffectedCommitsResponse{} + for _, c := range affectedCommits { + resp.Commits = append(resp.Commits, &pb.AffectedCommit{ + Hash: c.Hash[:], + Refs: c.Refs, + }) + } + + out, err := proto.Marshal(resp) + if err != nil { + logger.ErrorContext(ctx, "Error marshaling affected commits", slog.Any("error", err)) + http.Error(w, fmt.Sprintf("Error marshaling affected commits: %v", err), http.StatusInternalServerError) return } + + w.Header().Set("Content-Type", "application/x-protobuf") + w.WriteHeader(http.StatusOK) + if _, err := w.Write(out); err != nil { + logger.ErrorContext(ctx, "Error writing response", slog.Any("error", err)) + } logger.InfoContext(ctx, "Request completed successfully: /affected-commits", slog.Duration("duration", time.Since(start))) } diff --git a/go/cmd/gitter/pb/repository/repository.pb.go b/go/cmd/gitter/pb/repository/repository.pb.go index 8d30547714c..3c1e0bb1928 100644 --- a/go/cmd/gitter/pb/repository/repository.pb.go +++ b/go/cmd/gitter/pb/repository/repository.pb.go @@ -7,12 +7,11 @@ package repository import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" reflect "reflect" sync "sync" unsafe "unsafe" - - protoreflect "google.golang.org/protobuf/reflect/protoreflect" - protoimpl "google.golang.org/protobuf/runtime/protoimpl" ) const ( @@ -120,6 +119,102 @@ func (x *RepositoryCache) GetCommits() []*CommitDetail { return nil } +type AffectedCommit struct { + state protoimpl.MessageState `protogen:"open.v1"` + Hash []byte `protobuf:"bytes,1,opt,name=hash,proto3" json:"hash,omitempty"` + Refs []string `protobuf:"bytes,2,rep,name=refs,proto3" json:"refs,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *AffectedCommit) Reset() { + *x = AffectedCommit{} + mi := &file_repository_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *AffectedCommit) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*AffectedCommit) ProtoMessage() {} + +func (x *AffectedCommit) ProtoReflect() protoreflect.Message { + mi := &file_repository_proto_msgTypes[2] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use AffectedCommit.ProtoReflect.Descriptor instead. +func (*AffectedCommit) Descriptor() ([]byte, []int) { + return file_repository_proto_rawDescGZIP(), []int{2} +} + +func (x *AffectedCommit) GetHash() []byte { + if x != nil { + return x.Hash + } + return nil +} + +func (x *AffectedCommit) GetRefs() []string { + if x != nil { + return x.Refs + } + return nil +} + +type AffectedCommitsResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + Commits []*AffectedCommit `protobuf:"bytes,1,rep,name=commits,proto3" json:"commits,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *AffectedCommitsResponse) Reset() { + *x = AffectedCommitsResponse{} + mi := &file_repository_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *AffectedCommitsResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*AffectedCommitsResponse) ProtoMessage() {} + +func (x *AffectedCommitsResponse) ProtoReflect() protoreflect.Message { + mi := &file_repository_proto_msgTypes[3] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use AffectedCommitsResponse.ProtoReflect.Descriptor instead. +func (*AffectedCommitsResponse) Descriptor() ([]byte, []int) { + return file_repository_proto_rawDescGZIP(), []int{3} +} + +func (x *AffectedCommitsResponse) GetCommits() []*AffectedCommit { + if x != nil { + return x.Commits + } + return nil +} + var File_repository_proto protoreflect.FileDescriptor const file_repository_proto_rawDesc = "" + @@ -129,7 +224,12 @@ const file_repository_proto_rawDesc = "" + "\x04hash\x18\x01 \x01(\fR\x04hash\x12\x19\n" + "\bpatch_id\x18\x02 \x01(\fR\apatchId\"A\n" + "\x0fRepositoryCache\x12.\n" + - "\acommits\x18\x01 \x03(\v2\x14.gitter.CommitDetailR\acommitsB\x0eZ\f./repositoryb\x06proto3" + "\acommits\x18\x01 \x03(\v2\x14.gitter.CommitDetailR\acommits\"8\n" + + "\x0eAffectedCommit\x12\x12\n" + + "\x04hash\x18\x01 \x01(\fR\x04hash\x12\x12\n" + + "\x04refs\x18\x02 \x03(\tR\x04refs\"K\n" + + "\x17AffectedCommitsResponse\x120\n" + + "\acommits\x18\x01 \x03(\v2\x16.gitter.AffectedCommitR\acommitsB\x0eZ\f./repositoryb\x06proto3" var ( file_repository_proto_rawDescOnce sync.Once @@ -143,18 +243,21 @@ func file_repository_proto_rawDescGZIP() []byte { return file_repository_proto_rawDescData } -var file_repository_proto_msgTypes = make([]protoimpl.MessageInfo, 2) +var file_repository_proto_msgTypes = make([]protoimpl.MessageInfo, 4) var file_repository_proto_goTypes = []any{ - (*CommitDetail)(nil), // 0: gitter.CommitDetail - (*RepositoryCache)(nil), // 1: gitter.RepositoryCache + (*CommitDetail)(nil), // 0: gitter.CommitDetail + (*RepositoryCache)(nil), // 1: gitter.RepositoryCache + (*AffectedCommit)(nil), // 2: gitter.AffectedCommit + (*AffectedCommitsResponse)(nil), // 3: gitter.AffectedCommitsResponse } var file_repository_proto_depIdxs = []int32{ 0, // 0: gitter.RepositoryCache.commits:type_name -> gitter.CommitDetail - 1, // [1:1] is the sub-list for method output_type - 1, // [1:1] is the sub-list for method input_type - 1, // [1:1] is the sub-list for extension type_name - 1, // [1:1] is the sub-list for extension extendee - 0, // [0:1] is the sub-list for field type_name + 2, // 1: gitter.AffectedCommitsResponse.commits:type_name -> gitter.AffectedCommit + 2, // [2:2] is the sub-list for method output_type + 2, // [2:2] is the sub-list for method input_type + 2, // [2:2] is the sub-list for extension type_name + 2, // [2:2] is the sub-list for extension extendee + 0, // [0:2] is the sub-list for field type_name } func init() { file_repository_proto_init() } @@ -168,7 +271,7 @@ func file_repository_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_repository_proto_rawDesc), len(file_repository_proto_rawDesc)), NumEnums: 0, - NumMessages: 2, + NumMessages: 4, NumExtensions: 0, NumServices: 0, }, diff --git a/go/cmd/gitter/pb/repository/repository.proto b/go/cmd/gitter/pb/repository/repository.proto index 2a128251da6..5f2062f573e 100644 --- a/go/cmd/gitter/pb/repository/repository.proto +++ b/go/cmd/gitter/pb/repository/repository.proto @@ -14,3 +14,12 @@ message CommitDetail { message RepositoryCache { repeated CommitDetail commits = 1; } + +message AffectedCommit { + bytes hash = 1; + repeated string refs = 2; +} + +message AffectedCommitsResponse { + repeated AffectedCommit commits = 1; +} diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index 9a3dec869ff..44185e17878 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -22,10 +22,10 @@ import ( type SHA1 [20]byte type Commit struct { - Hash SHA1 `json:"hash"` - PatchID SHA1 `json:"patch_id"` - Parents []SHA1 `json:"parents"` - Tags []string `json:"tags"` + Hash SHA1 + PatchID SHA1 + Parents []SHA1 + Refs []string } // Repository holds the commit graph and other details for a git repository. @@ -163,16 +163,26 @@ func (r *Repository) buildCommitGraph(ctx context.Context, cache *pb.RepositoryC var childHash SHA1 parentHashes := []SHA1{} - tags := []string{} + refs := []string{} switch len(commitInfo) { case 3: // refs are separated by commas - refs := strings.Split(commitInfo[2], ", ") - for _, ref := range refs { - // Remove prefixes from tags, other refs such as HEAD will be left as is - if strings.Contains(ref, "tag: ") { - tags = append(tags, strings.TrimPrefix(ref, "tag: ")) + rawRefs := strings.Split(commitInfo[2], ", ") + for _, ref := range rawRefs { + // Remove prefixes from tags, other refs such as branches will be left as is + if strings.HasPrefix(ref, "tag: ") { + tag := strings.TrimPrefix(ref, "tag: ") + refs = append(refs, tag) + // Also populate the tag-to-commit map + r.tagToCommit[tag] = childHash + } else { + // clean up HEAD -> branch-name to just keep the branch name + cleanRef := ref + if strings.HasPrefix(cleanRef, "HEAD -> ") { + cleanRef = strings.TrimPrefix(cleanRef, "HEAD -> ") + } + refs = append(refs, cleanRef) } } @@ -215,7 +225,7 @@ func (r *Repository) buildCommitGraph(ctx context.Context, cache *pb.RepositoryC commit := Commit{ Hash: childHash, - Tags: tags, + Refs: refs, Parents: parentHashes, } @@ -230,11 +240,6 @@ func (r *Repository) buildCommitGraph(ctx context.Context, cache *pb.RepositoryC } r.commitDetails[childHash] = &commit - - // Also populate the tag-to-commit map - for _, tag := range tags { - r.tagToCommit[tag] = childHash - } } logger.InfoContext(ctx, "Commit graph completed", slog.Int("new_commits", len(newCommits)), slog.Duration("duration", time.Since(start))) From f75101248ee86bc3ea2ba9b5be17347bfcbcfd30 Mon Sep 17 00:00:00 2001 From: Joey L Date: Mon, 9 Mar 2026 05:40:44 +0000 Subject: [PATCH 12/39] Fix lint --- go/cmd/gitter/repository.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index 44185e17878..8b9cd29c623 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -178,11 +178,8 @@ func (r *Repository) buildCommitGraph(ctx context.Context, cache *pb.RepositoryC r.tagToCommit[tag] = childHash } else { // clean up HEAD -> branch-name to just keep the branch name - cleanRef := ref - if strings.HasPrefix(cleanRef, "HEAD -> ") { - cleanRef = strings.TrimPrefix(cleanRef, "HEAD -> ") - } - refs = append(refs, cleanRef) + ref = strings.TrimPrefix(ref, "HEAD -> ") + refs = append(refs, ref) } } From 1c610ade43562e8b6026e0c2fda25a071f2d851d Mon Sep 17 00:00:00 2001 From: Joey L Date: Mon, 9 Mar 2026 06:09:37 +0000 Subject: [PATCH 13/39] Address gemini review --- go/cmd/gitter/gitter.go | 20 +++++++++++++++++++- go/cmd/gitter/repository.go | 3 +++ go/cmd/gitter/repository_test.go | 2 +- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index 54e82c6bdbb..871afe347c5 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -466,6 +466,15 @@ func cacheHandler(w http.ResponseWriter, r *http.Request) { defer r.Body.Close() url := body.URL + // If request came from a local ip, don't do the check + if !isLocalRequest(r) { + // Check if url starts with protocols: http(s)://, git://, ssh://, (s)ftp:// + if match, _ := regexp.MatchString("^(https?|git|ssh)://", url); !match { + http.Error(w, "Invalid url parameter", http.StatusBadRequest) + return + } + } + ctx := context.WithValue(r.Context(), urlKey, url) logger.InfoContext(ctx, "Received request: /cache") @@ -537,6 +546,15 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { defer r.Body.Close() url := body.URL + // If request came from a local ip, don't do the check + if !isLocalRequest(r) { + // Check if url starts with protocols: http(s)://, git://, ssh://, (s)ftp:// + if match, _ := regexp.MatchString("^(https?|git|ssh)://", url); !match { + http.Error(w, "Invalid url parameter", http.StatusBadRequest) + return + } + } + introduced := []string{} fixed := []string{} lastAffected := []string{} @@ -618,7 +636,7 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { affectedCommits = repo.Affected(ctx, introduced, fixed, lastAffected, cherrypickIntro, cherrypickFixed) } - resp := &pb.AffectedCommitsResponse{} + resp := &pb.AffectedCommitsResponse{Commits: make([]*pb.AffectedCommit, 0, len(affectedCommits))} for _, c := range affectedCommits { resp.Commits = append(resp.Commits, &pb.AffectedCommit{ Hash: c.Hash[:], diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index 8b9cd29c623..7d1d220c112 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -410,6 +410,9 @@ func (r *Repository) updatePatchID(commitHash, patchID SHA1) { func parseHash(hash string) (SHA1, error) { hashBytes, err := hex.DecodeString(hash) + if len(hashBytes) != 20 { + return SHA1{}, fmt.Errorf("invalid hash length: %d", len(hashBytes)) + } if err != nil { return SHA1{}, fmt.Errorf("failed to decode hash: %w", err) } diff --git a/go/cmd/gitter/repository_test.go b/go/cmd/gitter/repository_test.go index 7c4e1eef7d7..3889b6475e0 100644 --- a/go/cmd/gitter/repository_test.go +++ b/go/cmd/gitter/repository_test.go @@ -639,7 +639,7 @@ func TestAffected_Cherrypick(t *testing.T) { repo.commitDetails[hC] = &Commit{Hash: hC, Parents: []SHA1{hB}, PatchID: c2} repo.commitDetails[hD] = &Commit{Hash: hD, Parents: []SHA1{hC}} repo.commitDetails[hE] = &Commit{Hash: hE, PatchID: c1} - repo.commitDetails[hF] = &Commit{Hash: hF, Parents: []SHA1{hB}} + repo.commitDetails[hF] = &Commit{Hash: hF, Parents: []SHA1{hE}} repo.commitDetails[hG] = &Commit{Hash: hG, Parents: []SHA1{hF}, PatchID: c2} repo.commitDetails[hH] = &Commit{Hash: hH, Parents: []SHA1{hG}} From 228ec5f9de85041737b46de722519851ad1ab0cb Mon Sep 17 00:00:00 2001 From: Joey L Date: Mon, 9 Mar 2026 22:53:41 +0000 Subject: [PATCH 14/39] Change naming tag->ref. Update test. --- go/cmd/gitter/repository.go | 16 +++++++++++----- go/cmd/gitter/repository_test.go | 5 +++-- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index 7d1d220c112..89822db64ac 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -38,8 +38,8 @@ type Repository struct { commitGraph map[SHA1][]SHA1 // Actual commit details commitDetails map[SHA1]*Commit - // Store tags to commit because it's useful for CVE conversion - tagToCommit map[string]SHA1 + // Store refs to commit because it's useful for CVE conversion + refToCommit map[string]SHA1 // For cherry-pick detection: PatchID -> []commit hash patchIDToCommits map[SHA1][]SHA1 // Root commits (commits with no parents) @@ -60,7 +60,7 @@ func NewRepository(repoPath string) *Repository { repoPath: repoPath, commitGraph: make(map[SHA1][]SHA1), commitDetails: make(map[SHA1]*Commit), - tagToCommit: make(map[string]SHA1), + refToCommit: make(map[string]SHA1), patchIDToCommits: make(map[SHA1][]SHA1), } } @@ -170,12 +170,13 @@ func (r *Repository) buildCommitGraph(ctx context.Context, cache *pb.RepositoryC // refs are separated by commas rawRefs := strings.Split(commitInfo[2], ", ") for _, ref := range rawRefs { + if ref == "" { + continue + } // Remove prefixes from tags, other refs such as branches will be left as is if strings.HasPrefix(ref, "tag: ") { tag := strings.TrimPrefix(ref, "tag: ") refs = append(refs, tag) - // Also populate the tag-to-commit map - r.tagToCommit[tag] = childHash } else { // clean up HEAD -> branch-name to just keep the branch name ref = strings.TrimPrefix(ref, "HEAD -> ") @@ -237,6 +238,11 @@ func (r *Repository) buildCommitGraph(ctx context.Context, cache *pb.RepositoryC } r.commitDetails[childHash] = &commit + + // Also populate the ref-to-commit map + for _, ref := range refs { + r.refToCommit[ref] = childHash + } } logger.InfoContext(ctx, "Commit graph completed", slog.Int("new_commits", len(newCommits)), slog.Duration("duration", time.Since(start))) diff --git a/go/cmd/gitter/repository_test.go b/go/cmd/gitter/repository_test.go index 3889b6475e0..7ac2578a50f 100644 --- a/go/cmd/gitter/repository_test.go +++ b/go/cmd/gitter/repository_test.go @@ -79,8 +79,9 @@ func TestBuildCommitGraph(t *testing.T) { t.Errorf("expected 3 commits with details, got %d", len(r.commitDetails)) } - if len(r.tagToCommit) != 2 { - t.Errorf("expected 2 tags, got %d", len(r.tagToCommit)) + // 2 tags + main branch + if len(r.refToCommit) != 3 { + t.Errorf("expected 3 refs, got %d", len(r.refToCommit)) } } From 19951ae1f8329d389a4345341aca950093ffaae0 Mon Sep 17 00:00:00 2001 From: Joey L Date: Tue, 10 Mar 2026 00:59:56 +0000 Subject: [PATCH 15/39] Don't do maps.Clone everytime --- go/cmd/gitter/repository.go | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index 89822db64ac..c69245e86b0 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -505,7 +505,7 @@ func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs for _, intro := range introduced { // BFS from intro queue := []SHA1{intro} - terminateMap := maps.Clone(fixedMap) + unaffectableMap := make(map[SHA1]struct{}) affectedFromIntro := make(map[SHA1]struct{}) visited := make(map[SHA1]struct{}) @@ -518,25 +518,31 @@ func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs } visited[curr] = struct{}{} - // When we hit a fixed commit (or its descendants), we aggressively map its - // entire downstream tree as terminated to satisfy the "any path blocked" rule. - if _, ok := terminateMap[curr]; ok { - // Inline DFS from current node to make all descendants unaffected / unaffectable - // 1. If a previous path added it to affected list, remove - // 2. Add to terminate set + // Descendant of a fixed commit + if _, ok := unaffectableMap[curr]; ok { + continue + } + + // If we hit a fixed commit, its entire tree is treated as a unaffectable + // as any downstream commit can go through this fixed commit to become unaffected + if _, ok := fixedMap[curr]; ok { + unaffectableMap[curr] = struct{}{} + // Inline DFS from current (fixed) node to make all descendants as unaffected / unaffectable + // 1. If a previous path added the descendant to affected list, remove it + // 2. Add to the unaffectable set to block future paths stack := []SHA1{curr} for len(stack) > 0 { unaffected := stack[len(stack)-1] stack = stack[:len(stack)-1] - // Remove from affected list if a bypass path added it previously + // Remove from affected list if it was reached via a previous non-fixed path. delete(affectedFromIntro, unaffected) if children, ok := r.commitGraph[unaffected]; ok { for _, child := range children { - // If child is not already in the terminateSet, we want to traverse that path and mark descendants as unaffectable - if _, ok := terminateMap[child]; !ok { - terminateMap[child] = struct{}{} + // Continue down the path if the child isn't already blocked. + if _, ok := unaffectableMap[child]; !ok { + unaffectableMap[child] = struct{}{} stack = append(stack, child) } } @@ -546,7 +552,7 @@ func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs continue } - // If not in terminateSet, add to the intro-specific affected list and continue + // Otherwise, add to the intro-specific affected list and continue affectedFromIntro[curr] = struct{}{} if children, ok := r.commitGraph[curr]; ok { queue = append(queue, children...) From 97b9e25a988b575da8c7a4528bcec5f0370ab3aa Mon Sep 17 00:00:00 2001 From: Joey L Date: Tue, 10 Mar 2026 02:17:59 +0000 Subject: [PATCH 16/39] Put duplicated hash parsing into cleaner function --- go/cmd/gitter/repository.go | 99 +++++++++---------------------------- 1 file changed, 22 insertions(+), 77 deletions(-) diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index c69245e86b0..8d13c1dadcd 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -414,63 +414,37 @@ func (r *Repository) updatePatchID(commitHash, patchID SHA1) { r.patchIDToCommits[patchID] = append(r.patchIDToCommits[patchID], commitHash) } -func parseHash(hash string) (SHA1, error) { - hashBytes, err := hex.DecodeString(hash) - if len(hashBytes) != 20 { - return SHA1{}, fmt.Errorf("invalid hash length: %d", len(hashBytes)) - } - if err != nil { - return SHA1{}, fmt.Errorf("failed to decode hash: %w", err) - } - - return SHA1(hashBytes), nil -} - -// Affected returns a list of commits that are affected by the given introduced, fixed and last_affected events -func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs []string, cherrypickIntro, cherrypickFixed bool) []*Commit { - introduced := make([]SHA1, 0, len(introStrs)) - fixed := make([]SHA1, 0, len(fixedStrs)) - lastAffected := make([]SHA1, 0, len(laStrs)) - - // Convert string input into SHA1 - // Introduced can be 0 and we'll replace it with the root commit - for _, s := range introStrs { - if s == "0" { - introduced = append(introduced, r.rootCommits...) +// parseHashes converts slice of string hashes input into slice of SHA1 +func (r *Repository) parseHashes(ctx context.Context, hashesStr []string, isIntroduced bool) []SHA1 { + hashes := make([]SHA1, 0, len(hashesStr)) + for _, hash := range hashesStr { + if isIntroduced && hash == "0" { + hashes = append(hashes, r.rootCommits...) continue } - sha, err := parseHash(s) + hashBytes, err := hex.DecodeString(hash) + // Log error but continue with the rest of the hashes if a commit hash is invalid if err != nil { - // Log error and continue if a commit hash is invalid - logger.ErrorContext(ctx, "failed to parse commit hash: introduced", slog.String("hash", s), slog.Any("err", err)) + logger.ErrorContext(ctx, "failed to decode commit hash", slog.String("hash", hash), slog.Any("err", err)) continue } - - introduced = append(introduced, sha) - } - - for _, s := range fixedStrs { - sha, err := parseHash(s) - if err != nil { - // Log error and continue if a commit hash is invalid - logger.ErrorContext(ctx, "failed to parse commit hash: fixed", slog.String("hash", s), slog.Any("err", err)) + if len(hashBytes) != 20 { + logger.ErrorContext(ctx, "invalid hash length", slog.String("hash", hash), slog.Int("len", len(hashBytes))) continue } - fixed = append(fixed, sha) + hashes = append(hashes, SHA1(hashBytes)) } - for _, s := range laStrs { - sha, err := parseHash(s) - if err != nil { - // Log error and continue if a commit hash is invalid - logger.ErrorContext(ctx, "failed to parse commit hash: last_affected", slog.String("hash", s), slog.Any("err", err)) - continue - } + return hashes +} - lastAffected = append(lastAffected, sha) - } +// Affected returns a list of commits that are affected by the given introduced, fixed and last_affected events +func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs []string, cherrypickIntro, cherrypickFixed bool) []*Commit { + introduced := r.parseHashes(ctx, introStrs, true) + fixed := r.parseHashes(ctx, fixedStrs, false) + lastAffected := r.parseHashes(ctx, laStrs, false) // Expands the introduced and fixed commits to include cherrypick equivalents // lastAffected should not be expanded because it does not imply a "fix" commit that can be cherrypicked to other branches @@ -523,7 +497,7 @@ func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs continue } - // If we hit a fixed commit, its entire tree is treated as a unaffectable + // If we hit a fixed commit, its entire tree is treated as unaffectable // as any downstream commit can go through this fixed commit to become unaffected if _, ok := fixedMap[curr]; ok { unaffectableMap[curr] = struct{}{} @@ -601,37 +575,8 @@ func (r *Repository) expandByCherrypick(commits []SHA1) []SHA1 { // Between walks and returns the commits that are strictly between introduced (inclusive) and limit (exclusive) func (r *Repository) Limit(ctx context.Context, introStrs, limitStrs []string) []*Commit { - introduced := []SHA1{} - limit := []SHA1{} - - // Convert string input into SHA1 - // Introduced can be 0 and we'll replace it with the root commit - for _, s := range introStrs { - if s == "0" { - introduced = append(introduced, r.rootCommits...) - continue - } - - sha, err := parseHash(s) - if err != nil { - // Log error and continue if a commit hash is invalid - logger.ErrorContext(ctx, "failed to parse commit hash", slog.String("hash", s), slog.Any("err", err)) - continue - } - - introduced = append(introduced, sha) - } - - for _, s := range limitStrs { - sha, err := parseHash(s) - if err != nil { - // Log error and continue if a commit hash is invalid - logger.ErrorContext(ctx, "failed to parse commit hash", slog.String("hash", s), slog.Any("err", err)) - continue - } - - limit = append(limit, sha) - } + introduced := r.parseHashes(ctx, introStrs, true) + limit := r.parseHashes(ctx, limitStrs, false) var affectedCommits []*Commit From 354351e3add46e4ce4e62d72b898cf4dcaa00d4e Mon Sep 17 00:00:00 2001 From: Joey L Date: Tue, 10 Mar 2026 03:01:29 +0000 Subject: [PATCH 17/39] Use cmp.Options to simply prettify and sorting --- go/cmd/gitter/repository_test.go | 138 ++++++------------------------- 1 file changed, 27 insertions(+), 111 deletions(-) diff --git a/go/cmd/gitter/repository_test.go b/go/cmd/gitter/repository_test.go index 7ac2578a50f..85bc524fb60 100644 --- a/go/cmd/gitter/repository_test.go +++ b/go/cmd/gitter/repository_test.go @@ -7,11 +7,11 @@ import ( "os" "os/exec" "path/filepath" - "sort" "strings" "testing" "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" ) // A very simple test repository with 3 commits and 2 tags. @@ -104,7 +104,7 @@ func TestCalculatePatchIDs(t *testing.T) { for _, hash := range newCommits { details := r.commitDetails[hash] if details.PatchID == [20]byte{} { - t.Errorf("missing patch ID for commit %x", hash) + t.Errorf("missing patch ID for commit %s", printSHA1(hash)) } } } @@ -134,7 +134,7 @@ func TestLoadRepository(t *testing.T) { // Check that the two sets of Patch IDs are the same for hash, details := range r1.commitDetails { if details.PatchID != r2.commitDetails[hash].PatchID { - t.Errorf("patch ID mismatch for commit %x", hash) + t.Errorf("patch ID mismatch for commit %s", printSHA1(hash)) } } } @@ -166,6 +166,19 @@ func printSHA1(hash SHA1) string { return strings.TrimLeft(str, "0") } +var cmpSHA1Opts = []cmp.Option{ + cmp.Transformer("SHA1s", func(in []SHA1) []string { + out := make([]string, len(in)) + for i, h := range in { + out[i] = printSHA1(h) + } + return out + }), + cmpopts.SortSlices(func(a, b string) bool { + return a < b + }), +} + func TestExpandByCherrypick(t *testing.T) { repo := NewRepository("/repo") @@ -206,14 +219,7 @@ func TestExpandByCherrypick(t *testing.T) { t.Run(tt.name, func(t *testing.T) { got := repo.expandByCherrypick(tt.input) - sort.Slice(got, func(i, j int) bool { - return string(got[i][:]) < string(got[j][:]) - }) - sort.Slice(tt.expected, func(i, j int) bool { - return string(tt.expected[i][:]) < string(tt.expected[j][:]) - }) - - if diff := cmp.Diff(tt.expected, got); diff != "" { + if diff := cmp.Diff(tt.expected, got, cmpSHA1Opts...); diff != "" { t.Errorf("expandByCherrypick() mismatch (-want +got):\n%s", diff) } }) @@ -331,26 +337,8 @@ func TestAffected_Introduced_Fixed(t *testing.T) { got = append(got, c.Hash) } - // Sort got and expected for comparison - sort.Slice(got, func(i, j int) bool { - return string(got[i][:]) < string(got[j][:]) - }) - sort.Slice(tt.expected, func(i, j int) bool { - return string(tt.expected[i][:]) < string(tt.expected[j][:]) - }) - - if diff := cmp.Diff(tt.expected, got); diff != "" { - // Turn them back into strings so it's easier to read - gotStr := make([]string, len(got)) - for i, c := range got { - gotStr[i] = printSHA1(c) - } - expectedStr := make([]string, len(tt.expected)) - for i, c := range tt.expected { - expectedStr[i] = printSHA1(c) - } - - t.Errorf("TestAffected_Introduced_Fixed() mismatch\nGot: %v\nExpected: %v", gotStr, expectedStr) + if diff := cmp.Diff(tt.expected, got, cmpSHA1Opts...); diff != "" { + t.Errorf("TestAffected_Introduced_Fixed() mismatch (-want +got):\n%s", diff) } }) } @@ -466,26 +454,8 @@ func TestAffected_Introduced_LastAffected(t *testing.T) { got = append(got, c.Hash) } - // Sort got and expected for comparison - sort.Slice(got, func(i, j int) bool { - return string(got[i][:]) < string(got[j][:]) - }) - sort.Slice(tt.expected, func(i, j int) bool { - return string(tt.expected[i][:]) < string(tt.expected[j][:]) - }) - - if diff := cmp.Diff(tt.expected, got); diff != "" { - // Turn them back into strings so it's easier to read - gotStr := make([]string, len(got)) - for i, c := range got { - gotStr[i] = printSHA1(c) - } - expectedStr := make([]string, len(tt.expected)) - for i, c := range tt.expected { - expectedStr[i] = printSHA1(c) - } - - t.Errorf("TestAffected_Introduced_LastAffected() mismatch\nGot: %v\nExpected: %v", gotStr, expectedStr) + if diff := cmp.Diff(tt.expected, got, cmpSHA1Opts...); diff != "" { + t.Errorf("TestAffected_Introduced_LastAffected() mismatch (-want +got):\n%s", diff) } }) } @@ -575,26 +545,8 @@ func TestAffected_Combined(t *testing.T) { got = append(got, c.Hash) } - // Sort got and expected for comparison - sort.Slice(got, func(i, j int) bool { - return string(got[i][:]) < string(got[j][:]) - }) - sort.Slice(tt.expected, func(i, j int) bool { - return string(tt.expected[i][:]) < string(tt.expected[j][:]) - }) - - if diff := cmp.Diff(tt.expected, got); diff != "" { - // Turn them back into strings so it's easier to read - gotStr := make([]string, len(got)) - for i, c := range got { - gotStr[i] = printSHA1(c) - } - expectedStr := make([]string, len(tt.expected)) - for i, c := range tt.expected { - expectedStr[i] = printSHA1(c) - } - - t.Errorf("TestAffected_Combined() mismatch\nGot: %v\nExpected: %v", gotStr, expectedStr) + if diff := cmp.Diff(tt.expected, got, cmpSHA1Opts...); diff != "" { + t.Errorf("TestAffected_Combined() mismatch (-want +got):\n%s", diff) } }) } @@ -697,26 +649,8 @@ func TestAffected_Cherrypick(t *testing.T) { got = append(got, c.Hash) } - // Sort got and expected for comparison - sort.Slice(got, func(i, j int) bool { - return string(got[i][:]) < string(got[j][:]) - }) - sort.Slice(tt.expected, func(i, j int) bool { - return string(tt.expected[i][:]) < string(tt.expected[j][:]) - }) - - if diff := cmp.Diff(tt.expected, got); diff != "" { - // Turn them back into strings so it's easier to read - gotStr := make([]string, len(got)) - for i, c := range got { - gotStr[i] = printSHA1(c) - } - expectedStr := make([]string, len(tt.expected)) - for i, c := range tt.expected { - expectedStr[i] = printSHA1(c) - } - - t.Errorf("TestAffected_Cherrypick() mismatch\nGot: %v\nExpected: %v", gotStr, expectedStr) + if diff := cmp.Diff(tt.expected, got, cmpSHA1Opts...); diff != "" { + t.Errorf("TestAffected_Cherrypick() mismatch (-want +got):\n%s", diff) } }) } @@ -802,26 +736,8 @@ func TestLimit(t *testing.T) { got = append(got, c.Hash) } - // Sort got and expected for comparison - sort.Slice(got, func(i, j int) bool { - return string(got[i][:]) < string(got[j][:]) - }) - sort.Slice(tt.expected, func(i, j int) bool { - return string(tt.expected[i][:]) < string(tt.expected[j][:]) - }) - - if diff := cmp.Diff(tt.expected, got); diff != "" { - // Turn them back into strings so it's easier to read - gotStr := make([]string, len(got)) - for i, c := range got { - gotStr[i] = printSHA1(c) - } - expectedStr := make([]string, len(tt.expected)) - for i, c := range tt.expected { - expectedStr[i] = printSHA1(c) - } - - t.Errorf("TestLimit() mismatch\nGot: %v\nExpected: %v", gotStr, expectedStr) + if diff := cmp.Diff(tt.expected, got, cmpSHA1Opts...); diff != "" { + t.Errorf("TestLimit() mismatch (-want +got):\n%s", diff) } }) } From 566a8157977ec7df8dcd85320c01024297e93e11 Mon Sep 17 00:00:00 2001 From: Joey L Date: Tue, 10 Mar 2026 03:09:49 +0000 Subject: [PATCH 18/39] lint --- go/cmd/gitter/repository_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/go/cmd/gitter/repository_test.go b/go/cmd/gitter/repository_test.go index 85bc524fb60..45f98330d75 100644 --- a/go/cmd/gitter/repository_test.go +++ b/go/cmd/gitter/repository_test.go @@ -172,6 +172,7 @@ var cmpSHA1Opts = []cmp.Option{ for i, h := range in { out[i] = printSHA1(h) } + return out }), cmpopts.SortSlices(func(a, b string) bool { From c41131385ae22dca1f9c629beea599378ca0a450 Mon Sep 17 00:00:00 2001 From: Joey L Date: Tue, 10 Mar 2026 04:58:43 +0000 Subject: [PATCH 19/39] Gemini review changes --- go/cmd/gitter/gitter.go | 8 +++++--- go/cmd/gitter/gitter_test.go | 8 ++++---- go/cmd/gitter/repository.go | 2 +- go/cmd/gitter/repository_test.go | 3 +-- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index 871afe347c5..69fec9d1571 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -75,6 +75,8 @@ var ( semaphore chan struct{} // Request concurrency control ) +var validURLRegex = regexp.MustCompile(`^(https?|git|ssh)://`) + const shutdownTimeout = 10 * time.Second // repoLocks is a map of per-repository RWMutexes, with url as the key. @@ -384,7 +386,7 @@ func gitHandler(w http.ResponseWriter, r *http.Request) { // If request came from a local ip, don't do the check if !isLocalRequest(r) { // Check if url starts with protocols: http(s)://, git://, ssh://, (s)ftp:// - if match, _ := regexp.MatchString("^(https?|git|ssh)://", url); !match { + if !validURLRegex.MatchString(url) { http.Error(w, "Invalid url parameter", http.StatusBadRequest) return } @@ -469,7 +471,7 @@ func cacheHandler(w http.ResponseWriter, r *http.Request) { // If request came from a local ip, don't do the check if !isLocalRequest(r) { // Check if url starts with protocols: http(s)://, git://, ssh://, (s)ftp:// - if match, _ := regexp.MatchString("^(https?|git|ssh)://", url); !match { + if !validURLRegex.MatchString(url) { http.Error(w, "Invalid url parameter", http.StatusBadRequest) return } @@ -549,7 +551,7 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { // If request came from a local ip, don't do the check if !isLocalRequest(r) { // Check if url starts with protocols: http(s)://, git://, ssh://, (s)ftp:// - if match, _ := regexp.MatchString("^(https?|git|ssh)://", url); !match { + if !validURLRegex.MatchString(url) { http.Error(w, "Invalid url parameter", http.StatusBadRequest) return } diff --git a/go/cmd/gitter/gitter_test.go b/go/cmd/gitter/gitter_test.go index f0107e77dc2..f41949f6ebf 100644 --- a/go/cmd/gitter/gitter_test.go +++ b/go/cmd/gitter/gitter_test.go @@ -238,16 +238,16 @@ func TestAffectedCommitsHandler(t *testing.T) { t.Run(tt.name, func(t *testing.T) { var events []Event for _, h := range tt.introduced { - events = append(events, Event{Type: "introduced", Hash: h}) + events = append(events, Event{Type: EventTypeIntroduced, Hash: h}) } for _, h := range tt.fixed { - events = append(events, Event{Type: "fixed", Hash: h}) + events = append(events, Event{Type: EventTypeFixed, Hash: h}) } for _, h := range tt.lastAffected { - events = append(events, Event{Type: "last_affected", Hash: h}) + events = append(events, Event{Type: EventTypeLastAffected, Hash: h}) } for _, h := range tt.limit { - events = append(events, Event{Type: "limit", Hash: h}) + events = append(events, Event{Type: EventTypeLimit, Hash: h}) } for _, h := range tt.invalidType { events = append(events, Event{Type: "invalid_type", Hash: h}) diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index 8d13c1dadcd..309404b7afb 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -622,7 +622,7 @@ func (r *Repository) Limit(ctx context.Context, introStrs, limitStrs []string) [ continue } - // Add first parent to stack to only walk the linear branch + // In git merge, first parent is the HEAD commit at the time of merge (on the branch that gets merged into) if len(details.Parents) > 0 { stack = append(stack, details.Parents[0]) } diff --git a/go/cmd/gitter/repository_test.go b/go/cmd/gitter/repository_test.go index 45f98330d75..b0c7fd4d0e6 100644 --- a/go/cmd/gitter/repository_test.go +++ b/go/cmd/gitter/repository_test.go @@ -3,7 +3,6 @@ package main import ( "context" "encoding/hex" - "fmt" "os" "os/exec" "path/filepath" @@ -143,7 +142,7 @@ func TestLoadRepository(t *testing.T) { func decodeSHA1(s string) SHA1 { var hash SHA1 // Pad with zeros because the test strings are shorter than 40 char - padded := fmt.Sprintf("%040s", s) + padded := strings.Repeat("0", 40-len(s)) + s b, err := hex.DecodeString(padded) if err != nil { panic(err) From 2aba9ca9cfc2ff73635b9ba5096c5de41cb65412 Mon Sep 17 00:00:00 2001 From: Joey L Date: Tue, 10 Mar 2026 05:32:33 +0000 Subject: [PATCH 20/39] Fix how we reset the global vars for tests --- go/cmd/gitter/gitter_test.go | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/go/cmd/gitter/gitter_test.go b/go/cmd/gitter/gitter_test.go index f41949f6ebf..785a88a0f5b 100644 --- a/go/cmd/gitter/gitter_test.go +++ b/go/cmd/gitter/gitter_test.go @@ -79,13 +79,24 @@ func TestGitHandler_InvalidURL(t *testing.T) { } } +func resetSaveTimer() { + lastFetchMu.Lock() + defer lastFetchMu.Unlock() + if saveTimer != nil { + saveTimer.Stop() + saveTimer = nil + } +} + // Override global variables for test // Note: In a real app we might want to dependency inject these, // but for this simple script we modify package globals. func setupTest(t *testing.T) { t.Helper() - tmpDir := t.TempDir() + resetSaveTimer() + + tmpDir := t.TempDir() gitStorePath = tmpDir persistencePath = tmpDir + "/last-fetch.json" // Use simple path join for test fetchTimeout = time.Minute @@ -98,11 +109,7 @@ func setupTest(t *testing.T) { // Initialize semaphore for tests semaphore = make(chan struct{}, 100) - // Stop any existing timer - if saveTimer != nil { - saveTimer.Stop() - saveTimer = nil - } + t.Cleanup(resetSaveTimer) } func TestGitHandler_Integration(t *testing.T) { From dc52dff7efbed35e7827b5cca1138c78b1ed11c2 Mon Sep 17 00:00:00 2001 From: Joey L Date: Tue, 10 Mar 2026 23:13:33 +0000 Subject: [PATCH 21/39] robot wants better tests, robot gets better tests --- go/cmd/gitter/gitter_test.go | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/go/cmd/gitter/gitter_test.go b/go/cmd/gitter/gitter_test.go index 785a88a0f5b..710e9b47684 100644 --- a/go/cmd/gitter/gitter_test.go +++ b/go/cmd/gitter/gitter_test.go @@ -2,12 +2,17 @@ package main import ( "bytes" + "encoding/hex" "encoding/json" "errors" "net/http" "net/http/httptest" "testing" "time" + + "github.com/google/go-cmp/cmp" + pb "github.com/google/osv.dev/go/cmd/gitter/pb/repository" + "google.golang.org/protobuf/proto" ) func TestGetRepoDirName(t *testing.T) { @@ -211,6 +216,7 @@ func TestAffectedCommitsHandler(t *testing.T) { limit []string invalidType []string expectedCode int + expectedBody []string }{ { name: "Valid range in public repo", @@ -218,6 +224,7 @@ func TestAffectedCommitsHandler(t *testing.T) { introduced: []string{"3350c55f9525cb83fc3e0b61bde076433c2da8dc"}, fixed: []string{"8920ed8e47c660a0c20c28cb1004a600780c5b59"}, expectedCode: http.StatusOK, + expectedBody: []string{"3350c55f9525cb83fc3e0b61bde076433c2da8dc"}, }, { name: "Invalid mixed limit and fixed", @@ -277,6 +284,27 @@ func TestAffectedCommitsHandler(t *testing.T) { t.Errorf("handler returned wrong status code: got %v want %v", status, tt.expectedCode) } + + if tt.expectedBody == nil { + return + } + + respBody := &pb.AffectedCommitsResponse{} + if err := proto.Unmarshal(rr.Body.Bytes(), respBody); err != nil { + t.Fatalf("Failed to unmarshal response: %v", err) + } + + var gotHashes []string + for _, c := range respBody.GetCommits() { + gotHashes = append(gotHashes, hex.EncodeToString(c.GetHash())) + } + if gotHashes == nil { + gotHashes = []string{} + } + + if diff := cmp.Diff(tt.expectedBody, gotHashes); diff != "" { + t.Errorf("handler returned wrong body (-want +got):\n%s", diff) + } }) } } From 38e23536e465e687aa0368c95c7727c6605f0d20 Mon Sep 17 00:00:00 2001 From: Joey L Date: Wed, 11 Mar 2026 03:07:07 +0000 Subject: [PATCH 22/39] address comments on comments --- go/cmd/gitter/gitter.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index 69fec9d1571..847d7d59345 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -410,7 +410,6 @@ func gitHandler(w http.ResponseWriter, r *http.Request) { // That is highly unlikely in our use case, as importer only queries // the repo once, and always with force update. // This is a tradeoff for simplicity to avoid having to setup locks per repo. - // I can't change singleflight's interface _, err, _ := gFetch.Do(url, func() (any, error) { return nil, FetchRepo(ctx, url, forceUpdate) }) @@ -427,7 +426,6 @@ func gitHandler(w http.ResponseWriter, r *http.Request) { } // Archive repo - // I can't change singleflight's interface fileDataAny, err, _ := gArchive.Do(url, func() (any, error) { return ArchiveRepo(ctx, url) }) @@ -492,7 +490,6 @@ func cacheHandler(w http.ResponseWriter, r *http.Request) { logger.DebugContext(ctx, "Concurrent requests", slog.Int("count", len(semaphore))) // Fetch repo if it's not fresh - // I can't change singleflight's interface if _, err, _ := gFetch.Do(url, func() (any, error) { return nil, FetchRepo(ctx, url, body.ForceUpdate) }); err != nil { @@ -510,7 +507,6 @@ func cacheHandler(w http.ResponseWriter, r *http.Request) { repoDirName := getRepoDirName(url) repoPath := filepath.Join(gitStorePath, repoDirName) - // I can't change singleflight's interface _, err, _ = gLoad.Do(repoPath, func() (any, error) { repoLock := GetRepoLock(url) repoLock.RLock() @@ -597,7 +593,6 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { } // Fetch repo if it's not fresh - // I can't change singleflight's interface if _, err, _ := gFetch.Do(url, func() (any, error) { return nil, FetchRepo(ctx, url, body.ForceUpdate) }); err != nil { @@ -619,7 +614,6 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { repoLock.RLock() defer repoLock.RUnlock() - // I can't change singleflight's interface repoAny, err, _ := gLoad.Do(repoPath, func() (any, error) { return LoadRepository(ctx, repoPath) }) From 616046e63e12a28a388ce250e0cd58c449e2e675 Mon Sep 17 00:00:00 2001 From: Joey L Date: Wed, 11 Mar 2026 05:23:19 +0000 Subject: [PATCH 23/39] Proto-ify events and split it into SeparatedEvents --- go/cmd/gitter/gitter.go | 153 +++++----- go/cmd/gitter/gitter_test.go | 35 ++- go/cmd/gitter/pb/repository/repository.pb.go | 289 ++++++++++++++++++- go/cmd/gitter/pb/repository/repository.proto | 25 ++ go/cmd/gitter/repository.go | 14 +- go/cmd/gitter/repository_test.go | 36 ++- 6 files changed, 440 insertions(+), 112 deletions(-) diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index 847d7d59345..f7c1b341770 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -6,7 +6,6 @@ import ( "context" "crypto/sha256" "encoding/hex" - "encoding/json" "flag" "fmt" "io" @@ -29,6 +28,7 @@ import ( "github.com/google/osv.dev/go/logger" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "golang.org/x/sync/singleflight" + "google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/proto" pb "github.com/google/osv.dev/go/cmd/gitter/pb/repository" @@ -51,20 +51,6 @@ var endpointHandlers = map[string]http.HandlerFunc{ "POST /affected-commits": affectedCommitsHandler, } -type EventType string - -const ( - EventTypeIntroduced EventType = "introduced" - EventTypeFixed EventType = "fixed" - EventTypeLastAffected EventType = "last_affected" - EventTypeLimit EventType = "limit" -) - -type Event struct { - Type EventType `json:"eventType"` - Hash string `json:"hash"` -} - var ( gFetch singleflight.Group gArchive singleflight.Group @@ -79,6 +65,37 @@ var validURLRegex = regexp.MustCompile(`^(https?|git|ssh)://`) const shutdownTimeout = 10 * time.Second +type SeparatedEvents struct { + Introduced []string + Fixed []string + LastAffected []string + Limit []string +} + +func separateEvents(events []*pb.Event) (*SeparatedEvents, error) { + se := &SeparatedEvents{} + for _, event := range events { + switch event.EventType { + case pb.EventType_INTRODUCED: + se.Introduced = append(se.Introduced, event.Hash) + case pb.EventType_FIXED: + se.Fixed = append(se.Fixed, event.Hash) + case pb.EventType_LAST_AFFECTED: + se.LastAffected = append(se.LastAffected, event.Hash) + case pb.EventType_LIMIT: + se.Limit = append(se.Limit, event.Hash) + default: + return nil, fmt.Errorf("invalid event type: %s", event.EventType) + } + } + + if len(se.Limit) > 0 && (len(se.Fixed) > 0 || len(se.LastAffected) > 0) { + return nil, fmt.Errorf("limit and fixed/last_affected shouldn't exist in the same request") + } + + return se, nil +} + // repoLocks is a map of per-repository RWMutexes, with url as the key. // It coordinates access between write operations (FetchRepo) that modify the git directory on disk // and read operations (ArchiveRepo, LoadRepository, etc). @@ -453,19 +470,28 @@ func gitHandler(w http.ResponseWriter, r *http.Request) { func cacheHandler(w http.ResponseWriter, r *http.Request) { start := time.Now() // POST requets body processing - var body struct { - URL string `json:"url"` - ForceUpdate bool `json:"force_update"` - } - err := json.NewDecoder(r.Body).Decode(&body) + data, err := io.ReadAll(r.Body) if err != nil { - http.Error(w, fmt.Sprintf("Error decoding JSON: %v", err), http.StatusBadRequest) - + http.Error(w, fmt.Sprintf("Error reading body: %v", err), http.StatusBadRequest) return } defer r.Body.Close() - url := body.URL + body := &pb.CacheRequest{} + contentType := r.Header.Get("Content-Type") + if contentType == "application/x-protobuf" { + err = proto.Unmarshal(data, body) + } else { + // Default to JSON/protojson + err = protojson.Unmarshal(data, body) + } + + if err != nil { + http.Error(w, fmt.Sprintf("Error unmarshaling request: %v", err), http.StatusBadRequest) + return + } + + url := body.Url // If request came from a local ip, don't do the check if !isLocalRequest(r) { // Check if url starts with protocols: http(s)://, git://, ssh://, (s)ftp:// @@ -528,22 +554,28 @@ func cacheHandler(w http.ResponseWriter, r *http.Request) { func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { start := time.Now() // POST requets body processing - var body struct { - URL string `json:"url"` - Events []Event `json:"events"` - DetectCherrypicksIntroduced bool `json:"detect_cherrypicks_introduced"` - DetectCherrypicksFixed bool `json:"detect_cherrypicks_fixed"` - ForceUpdate bool `json:"force_update"` - } - err := json.NewDecoder(r.Body).Decode(&body) + data, err := io.ReadAll(r.Body) if err != nil { - http.Error(w, fmt.Sprintf("Error decoding JSON: %v", err), http.StatusBadRequest) - + http.Error(w, fmt.Sprintf("Error reading body: %v", err), http.StatusBadRequest) return } defer r.Body.Close() - url := body.URL + body := &pb.AffectedCommitsRequest{} + contentType := r.Header.Get("Content-Type") + if contentType == "application/x-protobuf" { + err = proto.Unmarshal(data, body) + } else { + // Default to JSON/protojson + err = protojson.Unmarshal(data, body) + } + + if err != nil { + http.Error(w, fmt.Sprintf("Error unmarshaling request: %v", err), http.StatusBadRequest) + return + } + + url := body.Url // If request came from a local ip, don't do the check if !isLocalRequest(r) { // Check if url starts with protocols: http(s)://, git://, ssh://, (s)ftp:// @@ -553,45 +585,23 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { } } - introduced := []string{} - fixed := []string{} - lastAffected := []string{} - limit := []string{} + se, err := separateEvents(body.Events) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + cherrypickIntro := body.DetectCherrypicksIntroduced cherrypickFixed := body.DetectCherrypicksFixed ctx := context.WithValue(r.Context(), urlKey, url) - for _, event := range body.Events { - switch event.Type { - case EventTypeIntroduced: - introduced = append(introduced, event.Hash) - case EventTypeFixed: - fixed = append(fixed, event.Hash) - case EventTypeLastAffected: - lastAffected = append(lastAffected, event.Hash) - case EventTypeLimit: - limit = append(limit, event.Hash) - default: - logger.ErrorContext(ctx, "Invalid event type", slog.String("event_type", string(event.Type))) - http.Error(w, fmt.Sprintf("Invalid event type: %s", event.Type), http.StatusBadRequest) - - return - } - } - logger.InfoContext(ctx, "Received request: /affected-commits", slog.Any("introduced", introduced), slog.Any("fixed", fixed), slog.Any("last_affected", lastAffected), slog.Any("limit", limit), slog.Bool("cherrypickIntro", cherrypickIntro), slog.Bool("cherrypickFixed", cherrypickFixed)) + logger.InfoContext(ctx, "Received request: /affected-commits", slog.Any("introduced", se.Introduced), slog.Any("fixed", se.Fixed), slog.Any("last_affected", se.LastAffected), slog.Any("limit", se.Limit), slog.Bool("cherrypickIntro", cherrypickIntro), slog.Bool("cherrypickFixed", cherrypickFixed)) semaphore <- struct{}{} defer func() { <-semaphore }() logger.DebugContext(ctx, "Concurrent requests", slog.Int("count", len(semaphore))) - // Limit and fixed/last_affected shouldn't exist in the same request as it doesn't make sense - if (len(fixed) > 0 || len(lastAffected) > 0) && len(limit) > 0 { - http.Error(w, "Limit and fixed/last_affected shouldn't exist in the same request", http.StatusBadRequest) - - return - } - // Fetch repo if it's not fresh if _, err, _ := gFetch.Do(url, func() (any, error) { return nil, FetchRepo(ctx, url, body.ForceUpdate) @@ -626,10 +636,10 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { repo := repoAny.(*Repository) var affectedCommits []*Commit - if len(limit) > 0 { - affectedCommits = repo.Limit(ctx, introduced, limit) + if len(se.Limit) > 0 { + affectedCommits = repo.Limit(ctx, se) } else { - affectedCommits = repo.Affected(ctx, introduced, fixed, lastAffected, cherrypickIntro, cherrypickFixed) + affectedCommits = repo.Affected(ctx, se, cherrypickIntro, cherrypickFixed) } resp := &pb.AffectedCommitsResponse{Commits: make([]*pb.AffectedCommit, 0, len(affectedCommits))} @@ -640,7 +650,15 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { }) } - out, err := proto.Marshal(resp) + var out []byte + if contentType == "application/x-protobuf" { + out, err = proto.Marshal(resp) + w.Header().Set("Content-Type", "application/x-protobuf") + } else { + out, err = protojson.Marshal(resp) + w.Header().Set("Content-Type", "application/json") + } + if err != nil { logger.ErrorContext(ctx, "Error marshaling affected commits", slog.Any("error", err)) http.Error(w, fmt.Sprintf("Error marshaling affected commits: %v", err), http.StatusInternalServerError) @@ -648,7 +666,6 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { return } - w.Header().Set("Content-Type", "application/x-protobuf") w.WriteHeader(http.StatusOK) if _, err := w.Write(out); err != nil { logger.ErrorContext(ctx, "Error writing response", slog.Any("error", err)) diff --git a/go/cmd/gitter/gitter_test.go b/go/cmd/gitter/gitter_test.go index 710e9b47684..6d11a038668 100644 --- a/go/cmd/gitter/gitter_test.go +++ b/go/cmd/gitter/gitter_test.go @@ -3,7 +3,6 @@ package main import ( "bytes" "encoding/hex" - "encoding/json" "errors" "net/http" "net/http/httptest" @@ -12,6 +11,7 @@ import ( "github.com/google/go-cmp/cmp" pb "github.com/google/osv.dev/go/cmd/gitter/pb/repository" + "google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/proto" ) @@ -184,7 +184,8 @@ func TestCacheHandler(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - body, _ := json.Marshal(map[string]string{"url": tt.url}) + reqProto := &pb.CacheRequest{Url: tt.url} + body, _ := protojson.Marshal(reqProto) req, err := http.NewRequest(http.MethodPost, "/cache", bytes.NewBuffer(body)) if err != nil { t.Fatal(err) @@ -250,29 +251,29 @@ func TestAffectedCommitsHandler(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - var events []Event + var events []*pb.Event for _, h := range tt.introduced { - events = append(events, Event{Type: EventTypeIntroduced, Hash: h}) + events = append(events, &pb.Event{EventType: pb.EventType_INTRODUCED, Hash: h}) } for _, h := range tt.fixed { - events = append(events, Event{Type: EventTypeFixed, Hash: h}) + events = append(events, &pb.Event{EventType: pb.EventType_FIXED, Hash: h}) } for _, h := range tt.lastAffected { - events = append(events, Event{Type: EventTypeLastAffected, Hash: h}) + events = append(events, &pb.Event{EventType: pb.EventType_LAST_AFFECTED, Hash: h}) } for _, h := range tt.limit { - events = append(events, Event{Type: EventTypeLimit, Hash: h}) + events = append(events, &pb.Event{EventType: pb.EventType_LIMIT, Hash: h}) } for _, h := range tt.invalidType { - events = append(events, Event{Type: "invalid_type", Hash: h}) + events = append(events, &pb.Event{EventType: 999, Hash: h}) } - reqBody := map[string]any{ - "url": tt.url, - "events": events, + reqProto := &pb.AffectedCommitsRequest{ + Url: tt.url, + Events: events, } - body, _ := json.Marshal(reqBody) + body, _ := protojson.Marshal(reqProto) req, err := http.NewRequest(http.MethodPost, "/affected-commits", bytes.NewBuffer(body)) if err != nil { t.Fatal(err) @@ -290,8 +291,14 @@ func TestAffectedCommitsHandler(t *testing.T) { } respBody := &pb.AffectedCommitsResponse{} - if err := proto.Unmarshal(rr.Body.Bytes(), respBody); err != nil { - t.Fatalf("Failed to unmarshal response: %v", err) + if rr.Header().Get("Content-Type") == "application/json" { + if err := protojson.Unmarshal(rr.Body.Bytes(), respBody); err != nil { + t.Fatalf("Failed to unmarshal JSON response: %v", err) + } + } else { + if err := proto.Unmarshal(rr.Body.Bytes(), respBody); err != nil { + t.Fatalf("Failed to unmarshal proto response: %v", err) + } } var gotHashes []string diff --git a/go/cmd/gitter/pb/repository/repository.pb.go b/go/cmd/gitter/pb/repository/repository.pb.go index 3c1e0bb1928..c3df1435065 100644 --- a/go/cmd/gitter/pb/repository/repository.pb.go +++ b/go/cmd/gitter/pb/repository/repository.pb.go @@ -21,6 +21,58 @@ const ( _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) ) +type EventType int32 + +const ( + EventType_INTRODUCED EventType = 0 + EventType_FIXED EventType = 1 + EventType_LAST_AFFECTED EventType = 2 + EventType_LIMIT EventType = 3 +) + +// Enum value maps for EventType. +var ( + EventType_name = map[int32]string{ + 0: "INTRODUCED", + 1: "FIXED", + 2: "LAST_AFFECTED", + 3: "LIMIT", + } + EventType_value = map[string]int32{ + "INTRODUCED": 0, + "FIXED": 1, + "LAST_AFFECTED": 2, + "LIMIT": 3, + } +) + +func (x EventType) Enum() *EventType { + p := new(EventType) + *p = x + return p +} + +func (x EventType) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (EventType) Descriptor() protoreflect.EnumDescriptor { + return file_repository_proto_enumTypes[0].Descriptor() +} + +func (EventType) Type() protoreflect.EnumType { + return &file_repository_proto_enumTypes[0] +} + +func (x EventType) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use EventType.Descriptor instead. +func (EventType) EnumDescriptor() ([]byte, []int) { + return file_repository_proto_rawDescGZIP(), []int{0} +} + type CommitDetail struct { state protoimpl.MessageState `protogen:"open.v1"` Hash []byte `protobuf:"bytes,1,opt,name=hash,proto3" json:"hash,omitempty"` @@ -215,6 +267,186 @@ func (x *AffectedCommitsResponse) GetCommits() []*AffectedCommit { return nil } +type Event struct { + state protoimpl.MessageState `protogen:"open.v1"` + EventType EventType `protobuf:"varint,1,opt,name=event_type,json=eventType,proto3,enum=gitter.EventType" json:"event_type,omitempty"` + Hash string `protobuf:"bytes,2,opt,name=hash,proto3" json:"hash,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *Event) Reset() { + *x = Event{} + mi := &file_repository_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Event) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Event) ProtoMessage() {} + +func (x *Event) ProtoReflect() protoreflect.Message { + mi := &file_repository_proto_msgTypes[4] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Event.ProtoReflect.Descriptor instead. +func (*Event) Descriptor() ([]byte, []int) { + return file_repository_proto_rawDescGZIP(), []int{4} +} + +func (x *Event) GetEventType() EventType { + if x != nil { + return x.EventType + } + return EventType_INTRODUCED +} + +func (x *Event) GetHash() string { + if x != nil { + return x.Hash + } + return "" +} + +type CacheRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + Url string `protobuf:"bytes,1,opt,name=url,proto3" json:"url,omitempty"` + ForceUpdate bool `protobuf:"varint,2,opt,name=force_update,json=forceUpdate,proto3" json:"force_update,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *CacheRequest) Reset() { + *x = CacheRequest{} + mi := &file_repository_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *CacheRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CacheRequest) ProtoMessage() {} + +func (x *CacheRequest) ProtoReflect() protoreflect.Message { + mi := &file_repository_proto_msgTypes[5] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CacheRequest.ProtoReflect.Descriptor instead. +func (*CacheRequest) Descriptor() ([]byte, []int) { + return file_repository_proto_rawDescGZIP(), []int{5} +} + +func (x *CacheRequest) GetUrl() string { + if x != nil { + return x.Url + } + return "" +} + +func (x *CacheRequest) GetForceUpdate() bool { + if x != nil { + return x.ForceUpdate + } + return false +} + +type AffectedCommitsRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + Url string `protobuf:"bytes,1,opt,name=url,proto3" json:"url,omitempty"` + Events []*Event `protobuf:"bytes,2,rep,name=events,proto3" json:"events,omitempty"` + DetectCherrypicksIntroduced bool `protobuf:"varint,3,opt,name=detect_cherrypicks_introduced,json=detectCherrypicksIntroduced,proto3" json:"detect_cherrypicks_introduced,omitempty"` + DetectCherrypicksFixed bool `protobuf:"varint,4,opt,name=detect_cherrypicks_fixed,json=detectCherrypicksFixed,proto3" json:"detect_cherrypicks_fixed,omitempty"` + ForceUpdate bool `protobuf:"varint,5,opt,name=force_update,json=forceUpdate,proto3" json:"force_update,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *AffectedCommitsRequest) Reset() { + *x = AffectedCommitsRequest{} + mi := &file_repository_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *AffectedCommitsRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*AffectedCommitsRequest) ProtoMessage() {} + +func (x *AffectedCommitsRequest) ProtoReflect() protoreflect.Message { + mi := &file_repository_proto_msgTypes[6] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use AffectedCommitsRequest.ProtoReflect.Descriptor instead. +func (*AffectedCommitsRequest) Descriptor() ([]byte, []int) { + return file_repository_proto_rawDescGZIP(), []int{6} +} + +func (x *AffectedCommitsRequest) GetUrl() string { + if x != nil { + return x.Url + } + return "" +} + +func (x *AffectedCommitsRequest) GetEvents() []*Event { + if x != nil { + return x.Events + } + return nil +} + +func (x *AffectedCommitsRequest) GetDetectCherrypicksIntroduced() bool { + if x != nil { + return x.DetectCherrypicksIntroduced + } + return false +} + +func (x *AffectedCommitsRequest) GetDetectCherrypicksFixed() bool { + if x != nil { + return x.DetectCherrypicksFixed + } + return false +} + +func (x *AffectedCommitsRequest) GetForceUpdate() bool { + if x != nil { + return x.ForceUpdate + } + return false +} + var File_repository_proto protoreflect.FileDescriptor const file_repository_proto_rawDesc = "" + @@ -229,7 +461,26 @@ const file_repository_proto_rawDesc = "" + "\x04hash\x18\x01 \x01(\fR\x04hash\x12\x12\n" + "\x04refs\x18\x02 \x03(\tR\x04refs\"K\n" + "\x17AffectedCommitsResponse\x120\n" + - "\acommits\x18\x01 \x03(\v2\x16.gitter.AffectedCommitR\acommitsB\x0eZ\f./repositoryb\x06proto3" + "\acommits\x18\x01 \x03(\v2\x16.gitter.AffectedCommitR\acommits\"M\n" + + "\x05Event\x120\n" + + "\n" + + "event_type\x18\x01 \x01(\x0e2\x11.gitter.EventTypeR\teventType\x12\x12\n" + + "\x04hash\x18\x02 \x01(\tR\x04hash\"C\n" + + "\fCacheRequest\x12\x10\n" + + "\x03url\x18\x01 \x01(\tR\x03url\x12!\n" + + "\fforce_update\x18\x02 \x01(\bR\vforceUpdate\"\xf2\x01\n" + + "\x16AffectedCommitsRequest\x12\x10\n" + + "\x03url\x18\x01 \x01(\tR\x03url\x12%\n" + + "\x06events\x18\x02 \x03(\v2\r.gitter.EventR\x06events\x12B\n" + + "\x1ddetect_cherrypicks_introduced\x18\x03 \x01(\bR\x1bdetectCherrypicksIntroduced\x128\n" + + "\x18detect_cherrypicks_fixed\x18\x04 \x01(\bR\x16detectCherrypicksFixed\x12!\n" + + "\fforce_update\x18\x05 \x01(\bR\vforceUpdate*D\n" + + "\tEventType\x12\x0e\n" + + "\n" + + "INTRODUCED\x10\x00\x12\t\n" + + "\x05FIXED\x10\x01\x12\x11\n" + + "\rLAST_AFFECTED\x10\x02\x12\t\n" + + "\x05LIMIT\x10\x03B\x0eZ\f./repositoryb\x06proto3" var ( file_repository_proto_rawDescOnce sync.Once @@ -243,21 +494,28 @@ func file_repository_proto_rawDescGZIP() []byte { return file_repository_proto_rawDescData } -var file_repository_proto_msgTypes = make([]protoimpl.MessageInfo, 4) +var file_repository_proto_enumTypes = make([]protoimpl.EnumInfo, 1) +var file_repository_proto_msgTypes = make([]protoimpl.MessageInfo, 7) var file_repository_proto_goTypes = []any{ - (*CommitDetail)(nil), // 0: gitter.CommitDetail - (*RepositoryCache)(nil), // 1: gitter.RepositoryCache - (*AffectedCommit)(nil), // 2: gitter.AffectedCommit - (*AffectedCommitsResponse)(nil), // 3: gitter.AffectedCommitsResponse + (EventType)(0), // 0: gitter.EventType + (*CommitDetail)(nil), // 1: gitter.CommitDetail + (*RepositoryCache)(nil), // 2: gitter.RepositoryCache + (*AffectedCommit)(nil), // 3: gitter.AffectedCommit + (*AffectedCommitsResponse)(nil), // 4: gitter.AffectedCommitsResponse + (*Event)(nil), // 5: gitter.Event + (*CacheRequest)(nil), // 6: gitter.CacheRequest + (*AffectedCommitsRequest)(nil), // 7: gitter.AffectedCommitsRequest } var file_repository_proto_depIdxs = []int32{ - 0, // 0: gitter.RepositoryCache.commits:type_name -> gitter.CommitDetail - 2, // 1: gitter.AffectedCommitsResponse.commits:type_name -> gitter.AffectedCommit - 2, // [2:2] is the sub-list for method output_type - 2, // [2:2] is the sub-list for method input_type - 2, // [2:2] is the sub-list for extension type_name - 2, // [2:2] is the sub-list for extension extendee - 0, // [0:2] is the sub-list for field type_name + 1, // 0: gitter.RepositoryCache.commits:type_name -> gitter.CommitDetail + 3, // 1: gitter.AffectedCommitsResponse.commits:type_name -> gitter.AffectedCommit + 0, // 2: gitter.Event.event_type:type_name -> gitter.EventType + 5, // 3: gitter.AffectedCommitsRequest.events:type_name -> gitter.Event + 4, // [4:4] is the sub-list for method output_type + 4, // [4:4] is the sub-list for method input_type + 4, // [4:4] is the sub-list for extension type_name + 4, // [4:4] is the sub-list for extension extendee + 0, // [0:4] is the sub-list for field type_name } func init() { file_repository_proto_init() } @@ -270,13 +528,14 @@ func file_repository_proto_init() { File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_repository_proto_rawDesc), len(file_repository_proto_rawDesc)), - NumEnums: 0, - NumMessages: 4, + NumEnums: 1, + NumMessages: 7, NumExtensions: 0, NumServices: 0, }, GoTypes: file_repository_proto_goTypes, DependencyIndexes: file_repository_proto_depIdxs, + EnumInfos: file_repository_proto_enumTypes, MessageInfos: file_repository_proto_msgTypes, }.Build() File_repository_proto = out.File diff --git a/go/cmd/gitter/pb/repository/repository.proto b/go/cmd/gitter/pb/repository/repository.proto index 5f2062f573e..13553a1a168 100644 --- a/go/cmd/gitter/pb/repository/repository.proto +++ b/go/cmd/gitter/pb/repository/repository.proto @@ -23,3 +23,28 @@ message AffectedCommit { message AffectedCommitsResponse { repeated AffectedCommit commits = 1; } + +enum EventType { + INTRODUCED = 0; + FIXED = 1; + LAST_AFFECTED = 2; + LIMIT = 3; +} + +message Event { + EventType event_type = 1; + string hash = 2; +} + +message CacheRequest { + string url = 1; + bool force_update = 2; +} + +message AffectedCommitsRequest { + string url = 1; + repeated Event events = 2; + bool detect_cherrypicks_introduced = 3; + bool detect_cherrypicks_fixed = 4; + bool force_update = 5; +} diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index 309404b7afb..a22254405f2 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -441,10 +441,10 @@ func (r *Repository) parseHashes(ctx context.Context, hashesStr []string, isIntr } // Affected returns a list of commits that are affected by the given introduced, fixed and last_affected events -func (r *Repository) Affected(ctx context.Context, introStrs, fixedStrs, laStrs []string, cherrypickIntro, cherrypickFixed bool) []*Commit { - introduced := r.parseHashes(ctx, introStrs, true) - fixed := r.parseHashes(ctx, fixedStrs, false) - lastAffected := r.parseHashes(ctx, laStrs, false) +func (r *Repository) Affected(ctx context.Context, se *SeparatedEvents, cherrypickIntro, cherrypickFixed bool) []*Commit { + introduced := r.parseHashes(ctx, se.Introduced, true) + fixed := r.parseHashes(ctx, se.Fixed, false) + lastAffected := r.parseHashes(ctx, se.LastAffected, false) // Expands the introduced and fixed commits to include cherrypick equivalents // lastAffected should not be expanded because it does not imply a "fix" commit that can be cherrypicked to other branches @@ -574,9 +574,9 @@ func (r *Repository) expandByCherrypick(commits []SHA1) []SHA1 { } // Between walks and returns the commits that are strictly between introduced (inclusive) and limit (exclusive) -func (r *Repository) Limit(ctx context.Context, introStrs, limitStrs []string) []*Commit { - introduced := r.parseHashes(ctx, introStrs, true) - limit := r.parseHashes(ctx, limitStrs, false) +func (r *Repository) Limit(ctx context.Context, se *SeparatedEvents) []*Commit { + introduced := r.parseHashes(ctx, se.Introduced, true) + limit := r.parseHashes(ctx, se.Limit, false) var affectedCommits []*Commit diff --git a/go/cmd/gitter/repository_test.go b/go/cmd/gitter/repository_test.go index b0c7fd4d0e6..b2be695bd59 100644 --- a/go/cmd/gitter/repository_test.go +++ b/go/cmd/gitter/repository_test.go @@ -329,8 +329,12 @@ func TestAffected_Introduced_Fixed(t *testing.T) { for i, h := range tt.lastAffected { laStrs[i] = encodeSHA1(h) } - - gotCommits := repo.Affected(t.Context(), introStrs, fixedStrs, laStrs, false, false) + se := &SeparatedEvents{ + Introduced: introStrs, + Fixed: fixedStrs, + LastAffected: laStrs, + } + gotCommits := repo.Affected(t.Context(), se, false, false) var got []SHA1 for _, c := range gotCommits { @@ -446,8 +450,12 @@ func TestAffected_Introduced_LastAffected(t *testing.T) { for i, h := range tt.lastAffected { laStrs[i] = encodeSHA1(h) } - - gotCommits := repo.Affected(t.Context(), introStrs, fixedStrs, laStrs, false, false) + se := &SeparatedEvents{ + Introduced: introStrs, + Fixed: fixedStrs, + LastAffected: laStrs, + } + gotCommits := repo.Affected(t.Context(), se, false, false) var got []SHA1 for _, c := range gotCommits { @@ -537,8 +545,12 @@ func TestAffected_Combined(t *testing.T) { for i, h := range tt.lastAffected { laStrs[i] = encodeSHA1(h) } - - gotCommits := repo.Affected(t.Context(), introStrs, fixedStrs, laStrs, false, false) + se := &SeparatedEvents{ + Introduced: introStrs, + Fixed: fixedStrs, + LastAffected: laStrs, + } + gotCommits := repo.Affected(t.Context(), se, false, false) var got []SHA1 for _, c := range gotCommits { @@ -642,7 +654,11 @@ func TestAffected_Cherrypick(t *testing.T) { fixedStrs[i] = encodeSHA1(h) } - gotCommits := repo.Affected(t.Context(), introStrs, fixedStrs, nil, tt.cherrypickIntro, tt.cherrypickFixed) + se := &SeparatedEvents{ + Introduced: introStrs, + Fixed: fixedStrs, + } + gotCommits := repo.Affected(t.Context(), se, tt.cherrypickIntro, tt.cherrypickFixed) var got []SHA1 for _, c := range gotCommits { @@ -729,7 +745,11 @@ func TestLimit(t *testing.T) { limitStrs[i] = encodeSHA1(h) } - gotCommits := repo.Limit(t.Context(), introStrs, limitStrs) + se := &SeparatedEvents{ + Introduced: introStrs, + Limit: limitStrs, + } + gotCommits := repo.Limit(t.Context(), se) var got []SHA1 for _, c := range gotCommits { From 98c69c99203bfc79009f0af6c28c180ee040ff5b Mon Sep 17 00:00:00 2001 From: Joey L Date: Wed, 11 Mar 2026 22:25:53 +0000 Subject: [PATCH 24/39] clean up some unnecessary ifs --- go/cmd/gitter/repository.go | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index a22254405f2..32b3ff35f82 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -174,14 +174,9 @@ func (r *Repository) buildCommitGraph(ctx context.Context, cache *pb.RepositoryC continue } // Remove prefixes from tags, other refs such as branches will be left as is - if strings.HasPrefix(ref, "tag: ") { - tag := strings.TrimPrefix(ref, "tag: ") - refs = append(refs, tag) - } else { - // clean up HEAD -> branch-name to just keep the branch name - ref = strings.TrimPrefix(ref, "HEAD -> ") - refs = append(refs, ref) - } + ref = strings.TrimPrefix(ref, "tag: ") + ref = strings.TrimPrefix(ref, "HEAD -> ") // clean up HEAD -> branch-name to just keep the branch name + refs = append(refs, ref) } fallthrough @@ -464,10 +459,8 @@ func (r *Repository) Affected(ctx context.Context, se *SeparatedEvents, cherrypi } for _, commit := range lastAffected { - if _, ok := r.commitGraph[commit]; ok { - for _, child := range r.commitGraph[commit] { - fixedMap[child] = struct{}{} - } + for _, child := range r.commitGraph[commit] { + fixedMap[child] = struct{}{} } } @@ -512,13 +505,11 @@ func (r *Repository) Affected(ctx context.Context, se *SeparatedEvents, cherrypi // Remove from affected list if it was reached via a previous non-fixed path. delete(affectedFromIntro, unaffected) - if children, ok := r.commitGraph[unaffected]; ok { - for _, child := range children { - // Continue down the path if the child isn't already blocked. - if _, ok := unaffectableMap[child]; !ok { - unaffectableMap[child] = struct{}{} - stack = append(stack, child) - } + for _, child := range r.commitGraph[unaffected] { + // Continue down the path if the child isn't already blocked. + if _, ok := unaffectableMap[child]; !ok { + unaffectableMap[child] = struct{}{} + stack = append(stack, child) } } } From 271f737403ac33f264b99e3538f9a7d84460df32 Mon Sep 17 00:00:00 2001 From: Joey L Date: Wed, 11 Mar 2026 23:06:49 +0000 Subject: [PATCH 25/39] add comment on the cmp.Option used in tests --- go/cmd/gitter/repository_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/go/cmd/gitter/repository_test.go b/go/cmd/gitter/repository_test.go index b2be695bd59..6a1255fdad6 100644 --- a/go/cmd/gitter/repository_test.go +++ b/go/cmd/gitter/repository_test.go @@ -165,6 +165,9 @@ func printSHA1(hash SHA1) string { return strings.TrimLeft(str, "0") } +// cmpSHA1Opts are applied to the cmp.Diff function to make the output more readable +// 1. Transform SHA1s to pretty strings +// 2. Sorts slices to ensure deterministic comparisons var cmpSHA1Opts = []cmp.Option{ cmp.Transformer("SHA1s", func(in []SHA1) []string { out := make([]string, len(in)) From 3631dbb1d28ccecf0f8c02a9bbf65229aceb1238 Mon Sep 17 00:00:00 2001 From: Joey L Date: Fri, 13 Mar 2026 00:34:02 +0000 Subject: [PATCH 26/39] =?UTF-8?q?Array=20goes=20zoom=20=F0=9F=92=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- go/cmd/gitter/persistence.go | 2 +- go/cmd/gitter/repository.go | 290 ++++++++++++++++++------------- go/cmd/gitter/repository_test.go | 171 ++++++++---------- 3 files changed, 243 insertions(+), 220 deletions(-) diff --git a/go/cmd/gitter/persistence.go b/go/cmd/gitter/persistence.go index 67f06859e67..67e83d31a69 100644 --- a/go/cmd/gitter/persistence.go +++ b/go/cmd/gitter/persistence.go @@ -90,7 +90,7 @@ func saveRepositoryCache(cachePath string, repo *Repository) error { logger.Info("Saving repository cache", slog.String("path", cachePath)) cache := &pb.RepositoryCache{} - for _, commit := range repo.commitDetails { + for _, commit := range repo.commits { cache.Commits = append(cache.Commits, &pb.CommitDetail{ Hash: commit.Hash[:], PatchId: commit.PatchID[:], diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index 32b3ff35f82..f2a853c063e 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -24,7 +24,7 @@ type SHA1 [20]byte type Commit struct { Hash SHA1 PatchID SHA1 - Parents []SHA1 + Parents []int Refs []string } @@ -34,17 +34,19 @@ type Repository struct { patchIDMu sync.Mutex // Path to the .git directory within gitter's working dir repoPath string - // Adjacency list: Parent -> []Children - commitGraph map[SHA1][]SHA1 - // Actual commit details - commitDetails map[SHA1]*Commit + // All commits in the repository (the array index is used as the commit index below) + commits []*Commit + // Adjacency list: Parent index -> []Children indexes + commitGraph [][]int + // Map of commit hash to its index in the commits slice + hashToIndex map[SHA1]int // Store refs to commit because it's useful for CVE conversion - refToCommit map[string]SHA1 - // For cherry-pick detection: PatchID -> []commit hash - patchIDToCommits map[SHA1][]SHA1 + refToCommit map[string]int + // For cherry-pick detection: PatchID -> []commit indexes + patchIDToCommits map[SHA1][]int // Root commits (commits with no parents) // In a typical repository this is the initial commit - rootCommits []SHA1 + rootCommits []int } // %H commit hash; %P parent hashes; %D:refs (tab delimited) @@ -58,10 +60,9 @@ var workers = 16 func NewRepository(repoPath string) *Repository { return &Repository{ repoPath: repoPath, - commitGraph: make(map[SHA1][]SHA1), - commitDetails: make(map[SHA1]*Commit), - refToCommit: make(map[string]SHA1), - patchIDToCommits: make(map[SHA1][]SHA1), + hashToIndex: make(map[SHA1]int), + refToCommit: make(map[string]int), + patchIDToCommits: make(map[SHA1][]int), } } @@ -105,10 +106,35 @@ func LoadRepository(ctx context.Context, repoPath string) (*Repository, error) { return repo, nil } +// getOrCreateIndex returns the index for a given commit hash. +// If the hash is new, it creates a new barebone commit and expands the graph structure to accommodate it. +func (r *Repository) getOrCreateIndex(hash SHA1) int { + // Check if we've already assigned an index to this hash + if idx, ok := r.hashToIndex[hash]; ok { + return idx + } + + idx := len(r.commits) + r.commits = append(r.commits, &Commit{Hash: hash}) + r.hashToIndex[hash] = idx + // Expand the commitGraph (adjacency list) to match the commits slice. + r.commitGraph = append(r.commitGraph, nil) + + return idx +} + +// For test setup +func (r *Repository) addEdgeForTest(parent, child SHA1) { + pIdx := r.getOrCreateIndex(parent) + cIdx := r.getOrCreateIndex(child) + r.commitGraph[pIdx] = append(r.commitGraph[pIdx], cIdx) + r.commits[cIdx].Parents = append(r.commits[cIdx].Parents, pIdx) +} + // buildCommitGraph builds the commit graph and associate commit details from scratch -// Returns a list of new commit hashes that don't have cached Patch IDs. +// Returns a list of new commit indexes that don't have cached Patch IDs. // The new commit list is in reverse chronological order based on commit date (the default for git log). -func (r *Repository) buildCommitGraph(ctx context.Context, cache *pb.RepositoryCache) ([]SHA1, error) { +func (r *Repository) buildCommitGraph(ctx context.Context, cache *pb.RepositoryCache) ([]int, error) { logger.InfoContext(ctx, "Starting graph construction") start := time.Now() @@ -124,7 +150,7 @@ func (r *Repository) buildCommitGraph(ctx context.Context, cache *pb.RepositoryC } } } - var newCommits []SHA1 + var newCommits []int // Temp outFile for git log output tmpFile, err := os.CreateTemp(r.repoPath, "git-log.out") @@ -206,37 +232,36 @@ func (r *Repository) buildCommitGraph(ctx context.Context, cache *pb.RepositoryC continue } - // We want to keep the root commit (no parent) easily accessible + childIdx := r.getOrCreateIndex(childHash) + commit := r.commits[childIdx] + commit.Refs = refs + + // We want to keep the root commit (no parent) easily accessible for introduced=0 if len(parentHashes) == 0 { - r.rootCommits = append(r.rootCommits, childHash) + r.rootCommits = append(r.rootCommits, childIdx) } // Add commit to graph (parent -> []child) for _, parentHash := range parentHashes { - r.commitGraph[parentHash] = append(r.commitGraph[parentHash], childHash) - } + parentIdx := r.getOrCreateIndex(parentHash) + commit.Parents = append(commit.Parents, parentIdx) - commit := Commit{ - Hash: childHash, - Refs: refs, - Parents: parentHashes, + r.commitGraph[parentIdx] = append(r.commitGraph[parentIdx], childIdx) } if patchID, ok := cachedPatchIDs[childHash]; ok { // Assign saved patch ID to commit details and map if found commit.PatchID = patchID // Also populate patchIDToCommits map - r.patchIDToCommits[patchID] = append(r.patchIDToCommits[patchID], childHash) + r.patchIDToCommits[patchID] = append(r.patchIDToCommits[patchID], childIdx) } else { // Add to slice for patch ID to be generated later - newCommits = append(newCommits, childHash) + newCommits = append(newCommits, childIdx) } - r.commitDetails[childHash] = &commit - // Also populate the ref-to-commit map for _, ref := range refs { - r.refToCommit[ref] = childHash + r.refToCommit[ref] = childIdx } } @@ -247,7 +272,7 @@ func (r *Repository) buildCommitGraph(ctx context.Context, cache *pb.RepositoryC // calculatePatchIDs calculates patch IDs only for the specific commits provided. // Commits should be passed in order if possible. Processing linear commits sequentially improves performance slightly (in the 'git show' commands). -func (r *Repository) calculatePatchIDs(ctx context.Context, commits []SHA1) error { +func (r *Repository) calculatePatchIDs(ctx context.Context, commits []int) error { logger.InfoContext(ctx, "Starting patch ID calculation") start := time.Now() @@ -285,7 +310,7 @@ func (r *Repository) calculatePatchIDs(ctx context.Context, commits []SHA1) erro // calculatePatchIDsWorker calculates patch IDs and update CommitDetail and patchIDToCommits map. // Essentially running `git show | git patch-id --stable` -func (r *Repository) calculatePatchIDsWorker(ctx context.Context, chunk []SHA1) error { +func (r *Repository) calculatePatchIDsWorker(ctx context.Context, chunk []int) error { // Prepare git commands // `git show --stdin --patch --first-parent --no-color`: // --patch to show diffs in a format that can be directly piped into `git patch-id` @@ -332,11 +357,12 @@ func (r *Repository) calculatePatchIDsWorker(ctx context.Context, chunk []SHA1) // Write hashes to git show stdin go func() { defer in.Close() - for _, hash := range chunk { + for _, idx := range chunk { // Handle context cancel if ctx.Err() != nil { return } + hash := r.commits[idx].Hash fmt.Fprintf(in, "%s\n", hex.EncodeToString(hash[:])) } }() @@ -402,16 +428,19 @@ func (r *Repository) updatePatchID(commitHash, patchID SHA1) { r.patchIDMu.Lock() defer r.patchIDMu.Unlock() - commit := r.commitDetails[commitHash] + idx, ok := r.hashToIndex[commitHash] + if !ok { + return + } + commit := r.commits[idx] commit.PatchID = patchID - r.commitDetails[commitHash] = commit - r.patchIDToCommits[patchID] = append(r.patchIDToCommits[patchID], commitHash) + r.patchIDToCommits[patchID] = append(r.patchIDToCommits[patchID], idx) } -// parseHashes converts slice of string hashes input into slice of SHA1 -func (r *Repository) parseHashes(ctx context.Context, hashesStr []string, isIntroduced bool) []SHA1 { - hashes := make([]SHA1, 0, len(hashesStr)) +// parseHashes converts slice of string hashes input into slice of commit indexes +func (r *Repository) parseHashes(ctx context.Context, hashesStr []string, isIntroduced bool) []int { + hashes := make([]int, 0, len(hashesStr)) for _, hash := range hashesStr { if isIntroduced && hash == "0" { hashes = append(hashes, r.rootCommits...) @@ -429,14 +458,47 @@ func (r *Repository) parseHashes(ctx context.Context, hashesStr []string, isIntr continue } - hashes = append(hashes, SHA1(hashBytes)) + h := SHA1(hashBytes) + if idx, ok := r.hashToIndex[h]; ok { + hashes = append(hashes, idx) + } else { + logger.ErrorContext(ctx, "commit hash not found in repository", slog.String("hash", hash)) + } } return hashes } +// expandByCherrypick expands a slice of commits by adding commits that have the same Patch ID (cherrypicked commits) returns a new list containing the original commits + any other commits that share the same Patch ID +func (r *Repository) expandByCherrypick(commits []int) []int { + unique := make(map[int]struct{}, len(commits)) // avoid duplication + var zeroPatchID SHA1 + + for _, idx := range commits { + // Find patch ID from commit details + commit := r.commits[idx] + if commit.PatchID == zeroPatchID { + unique[idx] = struct{}{} + continue + } + + // Add equivalent commits with the same Patch ID (including the current commit) + equivalents := r.patchIDToCommits[commit.PatchID] + for _, eq := range equivalents { + unique[eq] = struct{}{} + } + } + + keys := slices.Collect(maps.Keys(unique)) + + return keys +} + // Affected returns a list of commits that are affected by the given introduced, fixed and last_affected events func (r *Repository) Affected(ctx context.Context, se *SeparatedEvents, cherrypickIntro, cherrypickFixed bool) []*Commit { + logger.InfoContext(ctx, "Starting affected commit walking") + start := time.Now() + introduced := r.parseHashes(ctx, se.Introduced, true) fixed := r.parseHashes(ctx, se.Fixed, false) lastAffected := r.parseHashes(ctx, se.LastAffected, false) @@ -452,64 +514,76 @@ func (r *Repository) Affected(ctx context.Context, se *SeparatedEvents, cherrypi // Fixed commits and children of last affected are both in this set // For graph traversal sake they are both considered the fix - fixedMap := make(map[SHA1]struct{}, len(fixed)+len(lastAffected)) + fixedMap := make([]bool, len(r.commits)) - for _, commit := range fixed { - fixedMap[commit] = struct{}{} + for _, idx := range fixed { + fixedMap[idx] = true } - for _, commit := range lastAffected { - for _, child := range r.commitGraph[commit] { - fixedMap[child] = struct{}{} + for _, idx := range lastAffected { + if idx < len(r.commitGraph) { + for _, childIdx := range r.commitGraph[idx] { + fixedMap[childIdx] = true + } } } // The graph traversal // affectedMap deduplicates the affected commits from the graph walk from each introduced commit - affectedMap := make(map[SHA1]struct{}) + affectedMap := make([]bool, len(r.commits)) + + // Preallocating the big slices, they will be cleared inside the per-intro graph walking + queue := make([]int, 0, len(r.commits)) + affectedFromIntro := make([]bool, len(r.commits)) + updatedIdx := make([]int, len(r.commits)) + unaffectable := make([]bool, len(r.commits)) + visited := make([]bool, len(r.commits)) // Walk each introduced commit and find its affected commit - for _, intro := range introduced { + for _, introIdx := range introduced { // BFS from intro - queue := []SHA1{intro} - unaffectableMap := make(map[SHA1]struct{}) - affectedFromIntro := make(map[SHA1]struct{}) - visited := make(map[SHA1]struct{}) + queue = append(queue, introIdx) + clear(affectedFromIntro) + clear(updatedIdx) + clear(unaffectable) + clear(visited) for len(queue) > 0 { curr := queue[0] queue = queue[1:] - if _, ok := visited[curr]; ok { + if visited[curr] { continue } - visited[curr] = struct{}{} + visited[curr] = true // Descendant of a fixed commit - if _, ok := unaffectableMap[curr]; ok { + if unaffectable[curr] { continue } // If we hit a fixed commit, its entire tree is treated as unaffectable // as any downstream commit can go through this fixed commit to become unaffected - if _, ok := fixedMap[curr]; ok { - unaffectableMap[curr] = struct{}{} + if fixedMap[curr] { + unaffectable[curr] = true // Inline DFS from current (fixed) node to make all descendants as unaffected / unaffectable // 1. If a previous path added the descendant to affected list, remove it // 2. Add to the unaffectable set to block future paths - stack := []SHA1{curr} + stack := []int{curr} for len(stack) > 0 { unaffected := stack[len(stack)-1] stack = stack[:len(stack)-1] // Remove from affected list if it was reached via a previous non-fixed path. - delete(affectedFromIntro, unaffected) - - for _, child := range r.commitGraph[unaffected] { - // Continue down the path if the child isn't already blocked. - if _, ok := unaffectableMap[child]; !ok { - unaffectableMap[child] = struct{}{} - stack = append(stack, child) + affectedFromIntro[unaffected] = false + + if unaffected < len(r.commitGraph) { + for _, childIdx := range r.commitGraph[unaffected] { + // Continue down the path if the child isn't already blocked. + if !unaffectable[childIdx] { + unaffectable[childIdx] = true + stack = append(stack, childIdx) + } } } } @@ -518,104 +592,80 @@ func (r *Repository) Affected(ctx context.Context, se *SeparatedEvents, cherrypi } // Otherwise, add to the intro-specific affected list and continue - affectedFromIntro[curr] = struct{}{} - if children, ok := r.commitGraph[curr]; ok { - queue = append(queue, children...) + affectedFromIntro[curr] = true + updatedIdx = append(updatedIdx, curr) + if curr < len(r.commitGraph) { + queue = append(queue, r.commitGraph[curr]...) } } // Add the final affected list of this introduced commit to the global set - for commit := range affectedFromIntro { - affectedMap[commit] = struct{}{} + // We only look at the index are are updated in this loop + for _, commitIdx := range updatedIdx { + if affectedFromIntro[commitIdx] { + affectedMap[commitIdx] = true + } } } // Return the affected commit details - affectedCommits := make([]*Commit, 0, len(affectedMap)) - for commit := range affectedMap { - affectedCommits = append(affectedCommits, r.commitDetails[commit]) - } - - return affectedCommits -} - -// expandByCherrypick expands a slice of commits by adding commits that have the same Patch ID (cherrypicked commits) returns a new list containing the original commits + any other commits that share the same Patch ID -func (r *Repository) expandByCherrypick(commits []SHA1) []SHA1 { - unique := make(map[SHA1]struct{}, len(commits)) // avoid duplication - var zeroPatchID SHA1 - - for _, hash := range commits { - // Find patch ID from commit details - details, ok := r.commitDetails[hash] - if !ok || details.PatchID == zeroPatchID { - unique[hash] = struct{}{} - continue - } - - // Add equivalent commits with the same Patch ID (including the current commit) - equivalents := r.patchIDToCommits[details.PatchID] - for _, eq := range equivalents { - unique[eq] = struct{}{} + affectedCommits := make([]*Commit, 0) + for idx, affected := range affectedMap { + if affected { + affectedCommits = append(affectedCommits, r.commits[idx]) } } - keys := slices.Collect(maps.Keys(unique)) + logger.InfoContext(ctx, "Affected commit walking completed", slog.Duration("duration", time.Since(start))) - return keys + return affectedCommits } -// Between walks and returns the commits that are strictly between introduced (inclusive) and limit (exclusive) +// Limit walks and returns the commits that are strictly between introduced (inclusive) and limit (exclusive) func (r *Repository) Limit(ctx context.Context, se *SeparatedEvents) []*Commit { introduced := r.parseHashes(ctx, se.Introduced, true) limit := r.parseHashes(ctx, se.Limit, false) var affectedCommits []*Commit - introMap := make(map[SHA1]struct{}, len(introduced)) - for _, commit := range introduced { - introMap[commit] = struct{}{} + introMap := make([]bool, len(r.commits)) + for _, idx := range introduced { + introMap[idx] = true } // DFS to walk from limit(s) to introduced (follow first parent) - stack := make([]SHA1, 0, len(limit)) + stack := make([]int, 0, len(limit)) // Start from limits' parents - for _, commit := range limit { - details, ok := r.commitDetails[commit] - if !ok { - continue - } - if len(details.Parents) > 0 { - stack = append(stack, details.Parents[0]) + for _, idx := range limit { + commit := r.commits[idx] + if len(commit.Parents) > 0 { + stack = append(stack, commit.Parents[0]) } } - visited := make(map[SHA1]struct{}) + visited := make([]bool, len(r.commits)) for len(stack) > 0 { curr := stack[len(stack)-1] stack = stack[:len(stack)-1] - if _, ok := visited[curr]; ok { + if visited[curr] { continue } - visited[curr] = struct{}{} + visited[curr] = true // Add current node to affected commits - details, ok := r.commitDetails[curr] - if !ok { - continue - } - - affectedCommits = append(affectedCommits, details) + commit := r.commits[curr] + affectedCommits = append(affectedCommits, commit) // If commit is in introduced, we can stop the traversal after adding it to affected - if _, ok := introMap[curr]; ok { + if introMap[curr] { continue } // In git merge, first parent is the HEAD commit at the time of merge (on the branch that gets merged into) - if len(details.Parents) > 0 { - stack = append(stack, details.Parents[0]) + if len(commit.Parents) > 0 { + stack = append(stack, commit.Parents[0]) } } diff --git a/go/cmd/gitter/repository_test.go b/go/cmd/gitter/repository_test.go index 6a1255fdad6..1f06faf9663 100644 --- a/go/cmd/gitter/repository_test.go +++ b/go/cmd/gitter/repository_test.go @@ -74,8 +74,8 @@ func TestBuildCommitGraph(t *testing.T) { t.Errorf("expected 3 new commits, got %d", len(newCommits)) } - if len(r.commitDetails) != 3 { - t.Errorf("expected 3 commits with details, got %d", len(r.commitDetails)) + if len(r.commits) != 3 { + t.Errorf("expected 3 commits, got %d", len(r.commits)) } // 2 tags + main branch @@ -100,10 +100,10 @@ func TestCalculatePatchIDs(t *testing.T) { } // Verify all commits have patch IDs - for _, hash := range newCommits { - details := r.commitDetails[hash] - if details.PatchID == [20]byte{} { - t.Errorf("missing patch ID for commit %s", printSHA1(hash)) + for _, idx := range newCommits { + commit := r.commits[idx] + if commit.PatchID == [20]byte{} { + t.Errorf("missing patch ID for commit %s", printSHA1(commit.Hash)) } } } @@ -131,9 +131,9 @@ func TestLoadRepository(t *testing.T) { } // Check that the two sets of Patch IDs are the same - for hash, details := range r1.commitDetails { - if details.PatchID != r2.commitDetails[hash].PatchID { - t.Errorf("patch ID mismatch for commit %s", printSHA1(hash)) + for idx, commit := range r1.commits { + if commit.PatchID != r2.commits[idx].PatchID { + t.Errorf("patch ID mismatch for commit %s", printSHA1(commit.Hash)) } } } @@ -194,33 +194,40 @@ func TestExpandByCherrypick(t *testing.T) { p1 := decodeSHA1("1111") // Setup commit details - repo.commitDetails[h1] = &Commit{Hash: h1, PatchID: p1} - repo.commitDetails[h2] = &Commit{Hash: h2} - repo.commitDetails[h3] = &Commit{Hash: h3, PatchID: p1} // h3 has the same patch ID as h1 should be cherry picked + idx1 := repo.getOrCreateIndex(h1) + idx2 := repo.getOrCreateIndex(h2) + idx3 := repo.getOrCreateIndex(h3) + + repo.commits[idx1].PatchID = p1 + repo.commits[idx3].PatchID = p1 // h3 has the same patch ID as h1 should be cherry picked // Setup patch ID map - repo.patchIDToCommits[p1] = []SHA1{h1, h3} + repo.patchIDToCommits[p1] = []int{idx1, idx3} tests := []struct { name string - input []SHA1 + input []int expected []SHA1 }{ { name: "Expand single commit with cherry-pick", - input: []SHA1{h1}, + input: []int{idx1}, expected: []SHA1{h1, h3}, }, { name: "No expansion for commit without cherry-pick", - input: []SHA1{h2}, + input: []int{idx2}, expected: []SHA1{h2}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := repo.expandByCherrypick(tt.input) + gotIdxs := repo.expandByCherrypick(tt.input) + var got []SHA1 + for _, idx := range gotIdxs { + got = append(got, repo.commits[idx].Hash) + } if diff := cmp.Diff(tt.expected, got, cmpSHA1Opts...); diff != "" { t.Errorf("expandByCherrypick() mismatch (-want +got):\n%s", diff) @@ -250,22 +257,14 @@ func TestAffected_Introduced_Fixed(t *testing.T) { hH := decodeSHA1("acac") // Setup graph (Parent -> Children) - repo.commitGraph[hA] = []SHA1{hB} - repo.commitGraph[hB] = []SHA1{hC, hH} - repo.commitGraph[hC] = []SHA1{hD, hF} - repo.commitGraph[hD] = []SHA1{hE} - repo.commitGraph[hF] = []SHA1{hG} - repo.commitGraph[hH] = []SHA1{hD} - - // Setup details - repo.commitDetails[hA] = &Commit{Hash: hA} - repo.commitDetails[hB] = &Commit{Hash: hB} - repo.commitDetails[hC] = &Commit{Hash: hC} - repo.commitDetails[hD] = &Commit{Hash: hD} - repo.commitDetails[hE] = &Commit{Hash: hE} - repo.commitDetails[hF] = &Commit{Hash: hF} - repo.commitDetails[hG] = &Commit{Hash: hG} - repo.commitDetails[hH] = &Commit{Hash: hH} + repo.addEdgeForTest(hA, hB) + repo.addEdgeForTest(hB, hC) + repo.addEdgeForTest(hB, hH) + repo.addEdgeForTest(hC, hD) + repo.addEdgeForTest(hC, hF) + repo.addEdgeForTest(hD, hE) + repo.addEdgeForTest(hF, hG) + repo.addEdgeForTest(hH, hD) tests := []struct { name string @@ -371,22 +370,14 @@ func TestAffected_Introduced_LastAffected(t *testing.T) { hH := decodeSHA1("acac") // Setup graph (Parent -> Children) - repo.commitGraph[hA] = []SHA1{hB} - repo.commitGraph[hB] = []SHA1{hC, hH} - repo.commitGraph[hC] = []SHA1{hD, hF} - repo.commitGraph[hD] = []SHA1{hE} - repo.commitGraph[hF] = []SHA1{hG} - repo.commitGraph[hH] = []SHA1{hD} - - // Setup details - repo.commitDetails[hA] = &Commit{Hash: hA} - repo.commitDetails[hB] = &Commit{Hash: hB} - repo.commitDetails[hC] = &Commit{Hash: hC} - repo.commitDetails[hD] = &Commit{Hash: hD} - repo.commitDetails[hE] = &Commit{Hash: hE} - repo.commitDetails[hF] = &Commit{Hash: hF} - repo.commitDetails[hG] = &Commit{Hash: hG} - repo.commitDetails[hH] = &Commit{Hash: hH} + repo.addEdgeForTest(hA, hB) + repo.addEdgeForTest(hB, hC) + repo.addEdgeForTest(hB, hH) + repo.addEdgeForTest(hC, hD) + repo.addEdgeForTest(hC, hF) + repo.addEdgeForTest(hD, hE) + repo.addEdgeForTest(hF, hG) + repo.addEdgeForTest(hH, hD) tests := []struct { name string @@ -493,22 +484,14 @@ func TestAffected_Combined(t *testing.T) { hH := decodeSHA1("acac") // Setup graph (Parent -> Children) - repo.commitGraph[hA] = []SHA1{hB} - repo.commitGraph[hB] = []SHA1{hC, hH} - repo.commitGraph[hC] = []SHA1{hD, hF} - repo.commitGraph[hD] = []SHA1{hE} - repo.commitGraph[hF] = []SHA1{hG} - repo.commitGraph[hH] = []SHA1{hD} - - // Setup details - repo.commitDetails[hA] = &Commit{Hash: hA} - repo.commitDetails[hB] = &Commit{Hash: hB} - repo.commitDetails[hC] = &Commit{Hash: hC} - repo.commitDetails[hD] = &Commit{Hash: hD} - repo.commitDetails[hE] = &Commit{Hash: hE} - repo.commitDetails[hF] = &Commit{Hash: hF} - repo.commitDetails[hG] = &Commit{Hash: hG} - repo.commitDetails[hH] = &Commit{Hash: hH} + repo.addEdgeForTest(hA, hB) + repo.addEdgeForTest(hB, hC) + repo.addEdgeForTest(hB, hH) + repo.addEdgeForTest(hC, hD) + repo.addEdgeForTest(hC, hF) + repo.addEdgeForTest(hD, hE) + repo.addEdgeForTest(hF, hG) + repo.addEdgeForTest(hH, hD) tests := []struct { name string @@ -590,26 +573,25 @@ func TestAffected_Cherrypick(t *testing.T) { c2 := decodeSHA1("c2") // Setup graph (Parent -> Children) - repo.commitGraph[hA] = []SHA1{hB} - repo.commitGraph[hB] = []SHA1{hC} - repo.commitGraph[hC] = []SHA1{hD} - repo.commitGraph[hE] = []SHA1{hF} - repo.commitGraph[hF] = []SHA1{hG} - repo.commitGraph[hG] = []SHA1{hH} + repo.addEdgeForTest(hA, hB) + repo.addEdgeForTest(hB, hC) + repo.addEdgeForTest(hC, hD) + repo.addEdgeForTest(hE, hF) + repo.addEdgeForTest(hF, hG) + repo.addEdgeForTest(hG, hH) // Setup PatchID map for cherrypicking - repo.patchIDToCommits[c1] = []SHA1{hA, hE} - repo.patchIDToCommits[c2] = []SHA1{hC, hG} - - // Setup details - repo.commitDetails[hA] = &Commit{Hash: hA, PatchID: c1} - repo.commitDetails[hB] = &Commit{Hash: hB, Parents: []SHA1{hA}} - repo.commitDetails[hC] = &Commit{Hash: hC, Parents: []SHA1{hB}, PatchID: c2} - repo.commitDetails[hD] = &Commit{Hash: hD, Parents: []SHA1{hC}} - repo.commitDetails[hE] = &Commit{Hash: hE, PatchID: c1} - repo.commitDetails[hF] = &Commit{Hash: hF, Parents: []SHA1{hE}} - repo.commitDetails[hG] = &Commit{Hash: hG, Parents: []SHA1{hF}, PatchID: c2} - repo.commitDetails[hH] = &Commit{Hash: hH, Parents: []SHA1{hG}} + idxA := repo.getOrCreateIndex(hA) + idxE := repo.getOrCreateIndex(hE) + repo.patchIDToCommits[c1] = []int{idxA, idxE} + idxC := repo.getOrCreateIndex(hC) + idxG := repo.getOrCreateIndex(hG) + repo.patchIDToCommits[c2] = []int{idxC, idxG} + + repo.commits[idxA].PatchID = c1 + repo.commits[idxE].PatchID = c1 + repo.commits[idxC].PatchID = c2 + repo.commits[idxG].PatchID = c2 tests := []struct { name string @@ -693,22 +675,13 @@ func TestLimit(t *testing.T) { hH := decodeSHA1("acac") // Setup graph (Parent -> Children) - repo.commitGraph[hA] = []SHA1{hB} - repo.commitGraph[hB] = []SHA1{hC, hF} - repo.commitGraph[hC] = []SHA1{hD} - repo.commitGraph[hD] = []SHA1{hE} - repo.commitGraph[hF] = []SHA1{hG} - repo.commitGraph[hG] = []SHA1{hH} - - // Setup details - repo.commitDetails[hA] = &Commit{Hash: hA} - repo.commitDetails[hB] = &Commit{Hash: hB, Parents: []SHA1{hA}} - repo.commitDetails[hC] = &Commit{Hash: hC, Parents: []SHA1{hB}} - repo.commitDetails[hD] = &Commit{Hash: hD, Parents: []SHA1{hC}} - repo.commitDetails[hE] = &Commit{Hash: hE, Parents: []SHA1{hD}} - repo.commitDetails[hF] = &Commit{Hash: hF, Parents: []SHA1{hB}} - repo.commitDetails[hG] = &Commit{Hash: hG, Parents: []SHA1{hF}} - repo.commitDetails[hH] = &Commit{Hash: hH, Parents: []SHA1{hG}} + repo.addEdgeForTest(hA, hB) + repo.addEdgeForTest(hB, hC) + repo.addEdgeForTest(hB, hF) + repo.addEdgeForTest(hC, hD) + repo.addEdgeForTest(hD, hE) + repo.addEdgeForTest(hF, hG) + repo.addEdgeForTest(hG, hH) tests := []struct { name string From 7b58e4268f9374dd8a5317371939a1dfbac40b16 Mon Sep 17 00:00:00 2001 From: Joey L Date: Fri, 13 Mar 2026 01:01:05 +0000 Subject: [PATCH 27/39] Don't add intro=0 multiple times, removed arg Removed isIntroduced arg from parseHashes since it's unnecessary. --- go/cmd/gitter/repository.go | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index f2a853c063e..e30cfc8c8ee 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -438,12 +438,15 @@ func (r *Repository) updatePatchID(commitHash, patchID SHA1) { r.patchIDToCommits[patchID] = append(r.patchIDToCommits[patchID], idx) } -// parseHashes converts slice of string hashes input into slice of commit indexes -func (r *Repository) parseHashes(ctx context.Context, hashesStr []string, isIntroduced bool) []int { - hashes := make([]int, 0, len(hashesStr)) +// parseHashes converts a slice of string hashes into a slice of commit indexes. +func (r *Repository) parseHashes(ctx context.Context, hashesStr []string) []int { + indices := make([]int, 0, len(hashesStr)) + addedRoot := false // Only add root commits once if multiple intro=0 are provided + for _, hash := range hashesStr { - if isIntroduced && hash == "0" { - hashes = append(hashes, r.rootCommits...) + if hash == "0" && !addedRoot { + indices = append(indices, r.rootCommits...) + addedRoot = true continue } @@ -460,13 +463,13 @@ func (r *Repository) parseHashes(ctx context.Context, hashesStr []string, isIntr h := SHA1(hashBytes) if idx, ok := r.hashToIndex[h]; ok { - hashes = append(hashes, idx) + indices = append(indices, idx) } else { logger.ErrorContext(ctx, "commit hash not found in repository", slog.String("hash", hash)) } } - return hashes + return indices } // expandByCherrypick expands a slice of commits by adding commits that have the same Patch ID (cherrypicked commits) returns a new list containing the original commits + any other commits that share the same Patch ID @@ -499,9 +502,9 @@ func (r *Repository) Affected(ctx context.Context, se *SeparatedEvents, cherrypi logger.InfoContext(ctx, "Starting affected commit walking") start := time.Now() - introduced := r.parseHashes(ctx, se.Introduced, true) - fixed := r.parseHashes(ctx, se.Fixed, false) - lastAffected := r.parseHashes(ctx, se.LastAffected, false) + introduced := r.parseHashes(ctx, se.Introduced) + fixed := r.parseHashes(ctx, se.Fixed) + lastAffected := r.parseHashes(ctx, se.LastAffected) // Expands the introduced and fixed commits to include cherrypick equivalents // lastAffected should not be expanded because it does not imply a "fix" commit that can be cherrypicked to other branches @@ -623,8 +626,8 @@ func (r *Repository) Affected(ctx context.Context, se *SeparatedEvents, cherrypi // Limit walks and returns the commits that are strictly between introduced (inclusive) and limit (exclusive) func (r *Repository) Limit(ctx context.Context, se *SeparatedEvents) []*Commit { - introduced := r.parseHashes(ctx, se.Introduced, true) - limit := r.parseHashes(ctx, se.Limit, false) + introduced := r.parseHashes(ctx, se.Introduced) + limit := r.parseHashes(ctx, se.Limit) var affectedCommits []*Commit From d31c4d94106e532bbd06376051f998b487e00c5b Mon Sep 17 00:00:00 2001 From: Joey L Date: Tue, 17 Mar 2026 05:18:30 +0000 Subject: [PATCH 28/39] Add repo LRU cache --- go/cmd/gitter/gitter.go | 345 ++++++++++++++++++----------------- go/cmd/gitter/gitter_test.go | 2 + go/go.mod | 2 + go/go.sum | 4 + 4 files changed, 183 insertions(+), 170 deletions(-) diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index f7c1b341770..69536c94342 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -25,6 +25,7 @@ import ( _ "net/http/pprof" //nolint:gosec // This is a internal only service not public to the internet + "github.com/dgraph-io/ristretto/v2" "github.com/google/osv.dev/go/logger" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "golang.org/x/sync/singleflight" @@ -59,6 +60,9 @@ var ( gitStorePath = filepath.Join(defaultGitterWorkDir, gitStoreFileName) fetchTimeout time.Duration semaphore chan struct{} // Request concurrency control + // LRU cache for recently loaded repositories (key: repo URL) + repoCache *ristretto.Cache[string, *Repository] + repoTTL time.Duration = 5 * time.Minute ) var validURLRegex = regexp.MustCompile(`^(https?|git|ssh)://`) @@ -109,10 +113,53 @@ func GetRepoLock(url string) *sync.RWMutex { return lock.(*sync.RWMutex) } -// runCmd executes a command with context cancellation handled by sending SIGINT. -// It logs cancellation errors separately as requested. -func runCmd(ctx context.Context, dir string, env []string, name string, args ...string) error { - logger.DebugContext(ctx, "Running command", slog.String("cmd", name), slog.Any("args", args)) +// repoCost is the cost funtion for a repository in the LRU cache. +// The memory cost of a repository is approximated from the num of commits and a base overhead. +func repoCost(repo *Repository) int64 { + // Mutex (8 bytes), string for repo path (say 128 bytes), root commit (assume 1 root only, 32 bytes) + repoOverhead := 168 + // Assuming per commit adds: + // - Commit struct (Hash, PatchID, Parent []int of size 1, Refs []string) + // = 20 + 20 + 24 + 8 + 24 + ~= 128 bytes + // - 1 pointer into []*Commit + // = 8 bytes + // - 1 entry in commitGraph ([][]int, assuming linear history) + // = 24 + 8 = 32 bytes + // - 1 entry to hashToIndex (map[SHA1]int) + // = 20 + 8 ~= 32 bytes + // - 1 entry to patchIDToCommits (map[SHA1][]int, assuming all commits are unique) + // = 20 + 24 + 8 ~= 64 bytes + // TOTAL: 264 bytes -> We round up to 300 for some buffer + costPerCommit := 300 + return int64(repoOverhead + len(repo.commits)*costPerCommit) +} + +// InitRepoCache initializes the LRU cache for repositories. +func InitRepoCache() { + var err error + repoCache, err = ristretto.NewCache(&ristretto.Config[string, *Repository]{ + // General guidance is to make NumCounters 10x the cache capacity (in terms of items) + NumCounters: 1e5, + MaxCost: 100 << 30, // 100 GB + BufferItems: 64, + Cost: repoCost, + // Check for TTL expiry every 60 seconds + TtlTickerDurationInSec: 60, + }) + if err != nil { + logger.FatalContext(context.Background(), "Failed to initialize repository cache", slog.Any("err", err)) + } +} + +// CloseRepoCache closes the LRU cache. +func CloseRepoCache() { + if repoCache != nil { + repoCache.Close() + } +} + +// prepareCmd prepares the command with context cancellation handled by sending SIGINT. +func prepareCmd(ctx context.Context, dir string, env []string, name string, args ...string) *exec.Cmd { cmd := exec.CommandContext(ctx, name, args...) if dir != "" { cmd.Dir = dir @@ -128,6 +175,14 @@ func runCmd(ctx context.Context, dir string, env []string, name string, args ... // Ensure it eventually dies if it ignores SIGINT cmd.WaitDelay = shutdownTimeout / 2 + return cmd +} + +// runCmd executes a command with context cancellation handled by sending SIGINT. +// It logs cancellation errors separately as requested. +func runCmd(ctx context.Context, dir string, env []string, name string, args ...string) error { + logger.DebugContext(ctx, "Running command", slog.String("cmd", name), slog.Any("args", args)) + cmd := prepareCmd(ctx, dir, env, name, args...) out, err := cmd.CombinedOutput() if err != nil { if ctx.Err() != nil { @@ -143,26 +198,6 @@ func runCmd(ctx context.Context, dir string, env []string, name string, args ... return nil } -// prepareCmd prepares the command with context cancellation handled by sending SIGINT. -func prepareCmd(ctx context.Context, dir string, env []string, name string, args ...string) *exec.Cmd { - cmd := exec.CommandContext(ctx, name, args...) - if dir != "" { - cmd.Dir = dir - } - if len(env) > 0 { - cmd.Env = append(os.Environ(), env...) - } - // Use SIGINT instead of SIGKILL for graceful shutdown of subprocesses - cmd.Cancel = func() error { - logger.DebugContext(ctx, "SIGINT sent to command", slog.String("cmd", name), slog.Any("args", args)) - return cmd.Process.Signal(syscall.SIGINT) - } - // Ensure it eventually dies if it ignores SIGINT - cmd.WaitDelay = shutdownTimeout / 2 - - return cmd -} - func isLocalRequest(r *http.Request) bool { host, _, err := net.SplitHostPort(r.RemoteAddr) if err != nil { @@ -180,6 +215,20 @@ func isLocalRequest(r *http.Request) bool { return ip.IsLoopback() } +func validateURL(r *http.Request, url string) error { + if url == "" { + return fmt.Errorf("missing url parameter") + } + // If request came from a local ip, don't do the check + if !isLocalRequest(r) { + // Check if url starts with protocols: http(s)://, git://, ssh:// + if !validURLRegex.MatchString(url) { + return fmt.Errorf("invalid url parameter") + } + } + return nil +} + func getRepoDirName(url string) string { base := path.Base(url) base = filepath.Base(base) @@ -205,6 +254,84 @@ func isIndexLockError(err error) bool { return strings.Contains(errString, "index.lock") && strings.Contains(errString, "File exists") } +// Helper function to unmarshal request body based on Content-Type (protobuf or JSON) +func unmarshalRequest(r *http.Request, body proto.Message) error { + data, err := io.ReadAll(r.Body) + if err != nil { + return err + } + defer r.Body.Close() + + contentType := r.Header.Get("Content-Type") + if contentType == "application/json" { + return protojson.Unmarshal(data, body) + } + // Default to protobuf + return proto.Unmarshal(data, body) +} + +// Helper function to marshal response body based on Content-Type (protobuf or JSON) +func marshalResponse(r *http.Request, m proto.Message) ([]byte, error) { + contentType := r.Header.Get("Content-Type") + if contentType == "application/json" { + return protojson.Marshal(m) + } + // Default to protobuf + return proto.Marshal(m) +} + +func doFetch(ctx context.Context, w http.ResponseWriter, url string, forceUpdate bool) error { + _, err, _ := gFetch.Do(url, func() (any, error) { + return nil, FetchRepo(ctx, url, forceUpdate) + }) + if err != nil { + logger.ErrorContext(ctx, "Error fetching blob", slog.Any("error", err)) + if isAuthError(err) { + http.Error(w, fmt.Sprintf("Error fetching blob: %v", err), http.StatusForbidden) + } else { + http.Error(w, fmt.Sprintf("Error fetching blob: %v", err), http.StatusInternalServerError) + } + return err + } + return nil +} + +// getFreshRepo handles fetching and loading of a repository +// If forceUpdate is true, it will always refetch and rebuild the repository (commit graph, patch ID, etc) +// Otherwise, it will use a cache if available +func getFreshRepo(ctx context.Context, w http.ResponseWriter, url string, forceUpdate bool) (*Repository, error) { + repoDirName := getRepoDirName(url) + repoPath := filepath.Join(gitStorePath, repoDirName) + + if !forceUpdate { + if repo, ok := repoCache.Get(url); ok { + // repoCache.Get() will not return expired items, so we can safely return the repo + logger.InfoContext(ctx, "Repository already in cache, skipping fetch and load") + return repo, nil + } + } + + if err := doFetch(ctx, w, url, forceUpdate); err != nil { + return nil, err + } + + repoAny, err, _ := gLoad.Do(repoPath, func() (any, error) { + repoLock := GetRepoLock(url) + repoLock.RLock() + defer repoLock.RUnlock() + + return LoadRepository(ctx, repoPath) + }) + if err != nil { + logger.ErrorContext(ctx, "Failed to load repository", slog.Any("error", err)) + http.Error(w, fmt.Sprintf("Failed to load repository: %v", err), http.StatusInternalServerError) + return nil, err + } + repo := repoAny.(*Repository) + repoCache.SetWithTTL(url, repo, 0, repoTTL) + return repo, nil +} + func FetchRepo(ctx context.Context, url string, forceUpdate bool) error { logger.InfoContext(ctx, "Starting fetch repo") start := time.Now() @@ -333,6 +460,8 @@ func main() { } loadLastFetchMap() + InitRepoCache() + defer CloseRepoCache() // Create a context that listens for the interrupt signal from the OS. ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) @@ -391,23 +520,14 @@ func main() { func gitHandler(w http.ResponseWriter, r *http.Request) { url := r.URL.Query().Get("url") - if url == "" { - http.Error(w, "Missing url parameter", http.StatusBadRequest) + if err := validateURL(r, url); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) return } forceUpdate := r.URL.Query().Get("force-update") == "true" ctx := context.WithValue(r.Context(), urlKey, url) - logger.InfoContext(ctx, "Received request: /git", slog.Bool("forceUpdate", forceUpdate), slog.String("remoteAddr", r.RemoteAddr)) - // If request came from a local ip, don't do the check - if !isLocalRequest(r) { - // Check if url starts with protocols: http(s)://, git://, ssh://, (s)ftp:// - if !validURLRegex.MatchString(url) { - http.Error(w, "Invalid url parameter", http.StatusBadRequest) - return - } - } select { case semaphore <- struct{}{}: @@ -421,24 +541,7 @@ func gitHandler(w http.ResponseWriter, r *http.Request) { logger.DebugContext(ctx, "Concurrent requests", slog.Int("count", len(semaphore))) // Fetch repo first - // Keep the key as the url regardless of forceUpdate. - // Occasionally this could be problematic if an existing unforce updated - // query is already inplace, no force update will happen. - // That is highly unlikely in our use case, as importer only queries - // the repo once, and always with force update. - // This is a tradeoff for simplicity to avoid having to setup locks per repo. - _, err, _ := gFetch.Do(url, func() (any, error) { - return nil, FetchRepo(ctx, url, forceUpdate) - }) - if err != nil { - logger.ErrorContext(ctx, "Error fetching blob", slog.Any("error", err)) - if isAuthError(err) { - http.Error(w, fmt.Sprintf("Error fetching blob: %v", err), http.StatusForbidden) - - return - } - http.Error(w, fmt.Sprintf("Error fetching blob: %v", err), http.StatusInternalServerError) - + if err := doFetch(ctx, w, url, forceUpdate); err != nil { return } @@ -469,36 +572,16 @@ func gitHandler(w http.ResponseWriter, r *http.Request) { func cacheHandler(w http.ResponseWriter, r *http.Request) { start := time.Now() - // POST requets body processing - data, err := io.ReadAll(r.Body) - if err != nil { - http.Error(w, fmt.Sprintf("Error reading body: %v", err), http.StatusBadRequest) - return - } - defer r.Body.Close() - body := &pb.CacheRequest{} - contentType := r.Header.Get("Content-Type") - if contentType == "application/x-protobuf" { - err = proto.Unmarshal(data, body) - } else { - // Default to JSON/protojson - err = protojson.Unmarshal(data, body) - } - - if err != nil { + if err := unmarshalRequest(r, body); err != nil { http.Error(w, fmt.Sprintf("Error unmarshaling request: %v", err), http.StatusBadRequest) return } url := body.Url - // If request came from a local ip, don't do the check - if !isLocalRequest(r) { - // Check if url starts with protocols: http(s)://, git://, ssh://, (s)ftp:// - if !validURLRegex.MatchString(url) { - http.Error(w, "Invalid url parameter", http.StatusBadRequest) - return - } + if err := validateURL(r, url); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return } ctx := context.WithValue(r.Context(), urlKey, url) @@ -515,35 +598,7 @@ func cacheHandler(w http.ResponseWriter, r *http.Request) { } logger.DebugContext(ctx, "Concurrent requests", slog.Int("count", len(semaphore))) - // Fetch repo if it's not fresh - if _, err, _ := gFetch.Do(url, func() (any, error) { - return nil, FetchRepo(ctx, url, body.ForceUpdate) - }); err != nil { - logger.ErrorContext(ctx, "Error fetching blob", slog.Any("error", err)) - if isAuthError(err) { - http.Error(w, fmt.Sprintf("Error fetching blob: %v", err), http.StatusForbidden) - - return - } - http.Error(w, fmt.Sprintf("Error fetching blob: %v", err), http.StatusInternalServerError) - - return - } - - repoDirName := getRepoDirName(url) - repoPath := filepath.Join(gitStorePath, repoDirName) - - _, err, _ = gLoad.Do(repoPath, func() (any, error) { - repoLock := GetRepoLock(url) - repoLock.RLock() - defer repoLock.RUnlock() - - return LoadRepository(ctx, repoPath) - }) - if err != nil { - logger.ErrorContext(ctx, "Failed to load repository", slog.Any("error", err)) - http.Error(w, fmt.Sprintf("Failed to load repository: %v", err), http.StatusInternalServerError) - + if _, err := getFreshRepo(ctx, w, url, body.ForceUpdate); err != nil { return } @@ -553,36 +608,16 @@ func cacheHandler(w http.ResponseWriter, r *http.Request) { func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { start := time.Now() - // POST requets body processing - data, err := io.ReadAll(r.Body) - if err != nil { - http.Error(w, fmt.Sprintf("Error reading body: %v", err), http.StatusBadRequest) - return - } - defer r.Body.Close() - body := &pb.AffectedCommitsRequest{} - contentType := r.Header.Get("Content-Type") - if contentType == "application/x-protobuf" { - err = proto.Unmarshal(data, body) - } else { - // Default to JSON/protojson - err = protojson.Unmarshal(data, body) - } - - if err != nil { + if err := unmarshalRequest(r, body); err != nil { http.Error(w, fmt.Sprintf("Error unmarshaling request: %v", err), http.StatusBadRequest) return } url := body.Url - // If request came from a local ip, don't do the check - if !isLocalRequest(r) { - // Check if url starts with protocols: http(s)://, git://, ssh://, (s)ftp:// - if !validURLRegex.MatchString(url) { - http.Error(w, "Invalid url parameter", http.StatusBadRequest) - return - } + if err := validateURL(r, url); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return } se, err := separateEvents(body.Events) @@ -595,45 +630,23 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { cherrypickFixed := body.DetectCherrypicksFixed ctx := context.WithValue(r.Context(), urlKey, url) - logger.InfoContext(ctx, "Received request: /affected-commits", slog.Any("introduced", se.Introduced), slog.Any("fixed", se.Fixed), slog.Any("last_affected", se.LastAffected), slog.Any("limit", se.Limit), slog.Bool("cherrypickIntro", cherrypickIntro), slog.Bool("cherrypickFixed", cherrypickFixed)) - semaphore <- struct{}{} - defer func() { <-semaphore }() - logger.DebugContext(ctx, "Concurrent requests", slog.Int("count", len(semaphore))) - - // Fetch repo if it's not fresh - if _, err, _ := gFetch.Do(url, func() (any, error) { - return nil, FetchRepo(ctx, url, body.ForceUpdate) - }); err != nil { - logger.ErrorContext(ctx, "Error fetching blob", slog.Any("error", err)) - if isAuthError(err) { - http.Error(w, fmt.Sprintf("Error fetching blob: %v", err), http.StatusForbidden) - - return - } - http.Error(w, fmt.Sprintf("Error fetching blob: %v", err), http.StatusInternalServerError) + select { + case semaphore <- struct{}{}: + defer func() { <-semaphore }() + case <-ctx.Done(): + logger.WarnContext(ctx, "Request cancelled while waiting for semaphore") + http.Error(w, "Server context cancelled", http.StatusServiceUnavailable) return } + logger.DebugContext(ctx, "Concurrent requests", slog.Int("count", len(semaphore))) - repoDirName := getRepoDirName(url) - repoPath := filepath.Join(gitStorePath, repoDirName) - - repoLock := GetRepoLock(url) - repoLock.RLock() - defer repoLock.RUnlock() - - repoAny, err, _ := gLoad.Do(repoPath, func() (any, error) { - return LoadRepository(ctx, repoPath) - }) + repo, err := getFreshRepo(ctx, w, url, body.ForceUpdate) if err != nil { - logger.ErrorContext(ctx, "Failed to load repository", slog.Any("error", err)) - http.Error(w, fmt.Sprintf("Failed to load repository: %v", err), http.StatusInternalServerError) - return } - repo := repoAny.(*Repository) var affectedCommits []*Commit if len(se.Limit) > 0 { @@ -650,22 +663,14 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { }) } - var out []byte - if contentType == "application/x-protobuf" { - out, err = proto.Marshal(resp) - w.Header().Set("Content-Type", "application/x-protobuf") - } else { - out, err = protojson.Marshal(resp) - w.Header().Set("Content-Type", "application/json") - } - + out, err := marshalResponse(r, resp) if err != nil { logger.ErrorContext(ctx, "Error marshaling affected commits", slog.Any("error", err)) http.Error(w, fmt.Sprintf("Error marshaling affected commits: %v", err), http.StatusInternalServerError) - return } + w.Header().Set("Content-Type", r.Header.Get("Content-Type")) w.WriteHeader(http.StatusOK) if _, err := w.Write(out); err != nil { logger.ErrorContext(ctx, "Error writing response", slog.Any("error", err)) diff --git a/go/cmd/gitter/gitter_test.go b/go/cmd/gitter/gitter_test.go index 6d11a038668..3eb915f602e 100644 --- a/go/cmd/gitter/gitter_test.go +++ b/go/cmd/gitter/gitter_test.go @@ -187,6 +187,7 @@ func TestCacheHandler(t *testing.T) { reqProto := &pb.CacheRequest{Url: tt.url} body, _ := protojson.Marshal(reqProto) req, err := http.NewRequest(http.MethodPost, "/cache", bytes.NewBuffer(body)) + req.Header.Set("Content-Type", "application/json") if err != nil { t.Fatal(err) } @@ -275,6 +276,7 @@ func TestAffectedCommitsHandler(t *testing.T) { body, _ := protojson.Marshal(reqProto) req, err := http.NewRequest(http.MethodPost, "/affected-commits", bytes.NewBuffer(body)) + req.Header.Set("Content-Type", "application/json") if err != nil { t.Fatal(err) } diff --git a/go/go.mod b/go/go.mod index 8c0b626dcba..573040084b9 100644 --- a/go/go.mod +++ b/go/go.mod @@ -33,6 +33,8 @@ require ( github.com/charmbracelet/x/windows v0.2.2 // indirect github.com/clipperhouse/displaywidth v0.11.0 // indirect github.com/clipperhouse/uax29/v2 v2.7.0 // indirect + github.com/dgraph-io/ristretto/v2 v2.4.0 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect github.com/muesli/cancelreader v0.2.2 // indirect ) diff --git a/go/go.sum b/go/go.sum index b66c77f9d24..f462cf4a4e3 100644 --- a/go/go.sum +++ b/go/go.sum @@ -77,6 +77,10 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgraph-io/ristretto/v2 v2.4.0 h1:I/w09yLjhdcVD2QV192UJcq8dPBaAJb9pOuMyNy0XlU= +github.com/dgraph-io/ristretto/v2 v2.4.0/go.mod h1:0KsrXtXvnv0EqnzyowllbVJB8yBonswa2lTCK2gGo9E= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= From cb86421be8b1286f9930fb176659e382871ca83f Mon Sep 17 00:00:00 2001 From: Joey L Date: Tue, 17 Mar 2026 05:19:49 +0000 Subject: [PATCH 29/39] intro=0 should not reach decodeString at all --- go/cmd/gitter/repository.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index e30cfc8c8ee..babb1c8937a 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -444,9 +444,11 @@ func (r *Repository) parseHashes(ctx context.Context, hashesStr []string) []int addedRoot := false // Only add root commits once if multiple intro=0 are provided for _, hash := range hashesStr { - if hash == "0" && !addedRoot { - indices = append(indices, r.rootCommits...) - addedRoot = true + if hash == "0" { + if !addedRoot { + indices = append(indices, r.rootCommits...) + addedRoot = true + } continue } From e92be8d4f80c875d5493758e1457b90ca0e380ee Mon Sep 17 00:00:00 2001 From: Joey L Date: Tue, 17 Mar 2026 23:21:39 +0000 Subject: [PATCH 30/39] Make repo cache configurable --- deployment/clouddeploy/gke-workers/base/gitter.yaml | 2 ++ .../environments/oss-vdb-test/gitter.yaml | 2 ++ .../gke-workers/environments/oss-vdb/gitter.yaml | 2 ++ go/cmd/gitter/gitter.go | 12 ++++++++---- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/deployment/clouddeploy/gke-workers/base/gitter.yaml b/deployment/clouddeploy/gke-workers/base/gitter.yaml index 8a46fd0537d..6d46d8222a1 100644 --- a/deployment/clouddeploy/gke-workers/base/gitter.yaml +++ b/deployment/clouddeploy/gke-workers/base/gitter.yaml @@ -29,6 +29,8 @@ spec: - "--port=8888" - "--work-dir=/work/gitter" - "--fetch-timeout=1h" + - "--repo-cache-ttl=1h" + - "--repo-cache-max-cost=107374182400" # 100GB env: - name: GOMEMLIMIT value: "100GiB" diff --git a/deployment/clouddeploy/gke-workers/environments/oss-vdb-test/gitter.yaml b/deployment/clouddeploy/gke-workers/environments/oss-vdb-test/gitter.yaml index 25c098d7110..f8257a42226 100644 --- a/deployment/clouddeploy/gke-workers/environments/oss-vdb-test/gitter.yaml +++ b/deployment/clouddeploy/gke-workers/environments/oss-vdb-test/gitter.yaml @@ -11,6 +11,8 @@ spec: - "--port=8888" - "--work-dir=/work/gitter" - "--fetch-timeout=1h" + - "--repo-cache-ttl=1h" + - "--repo-cache-max-cost=107374182400" # 100GB env: - name: GOOGLE_CLOUD_PROJECT value: oss-vdb-test diff --git a/deployment/clouddeploy/gke-workers/environments/oss-vdb/gitter.yaml b/deployment/clouddeploy/gke-workers/environments/oss-vdb/gitter.yaml index fc1558ba8b4..658303d0316 100644 --- a/deployment/clouddeploy/gke-workers/environments/oss-vdb/gitter.yaml +++ b/deployment/clouddeploy/gke-workers/environments/oss-vdb/gitter.yaml @@ -11,6 +11,8 @@ spec: - "--port=8888" - "--work-dir=/work/gitter" - "--fetch-timeout=1h" + - "--repo-cache-ttl=1h" + - "--repo-cache-max-cost=107374182400" # 100GB env: - name: GOOGLE_CLOUD_PROJECT value: oss-vdb diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index 69536c94342..59c7c391c86 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -61,8 +61,9 @@ var ( fetchTimeout time.Duration semaphore chan struct{} // Request concurrency control // LRU cache for recently loaded repositories (key: repo URL) - repoCache *ristretto.Cache[string, *Repository] - repoTTL time.Duration = 5 * time.Minute + repoCache *ristretto.Cache[string, *Repository] + repoTTL time.Duration + repoCacheMaxCost int64 ) var validURLRegex = regexp.MustCompile(`^(https?|git|ssh)://`) @@ -136,11 +137,12 @@ func repoCost(repo *Repository) int64 { // InitRepoCache initializes the LRU cache for repositories. func InitRepoCache() { + numCounters := repoCacheMaxCost / (300 * 10000) var err error repoCache, err = ristretto.NewCache(&ristretto.Config[string, *Repository]{ // General guidance is to make NumCounters 10x the cache capacity (in terms of items) - NumCounters: 1e5, - MaxCost: 100 << 30, // 100 GB + NumCounters: numCounters, + MaxCost: repoCacheMaxCost, BufferItems: 64, Cost: repoCost, // Check for TTL expiry every 60 seconds @@ -449,6 +451,8 @@ func main() { workDir := flag.String("work-dir", defaultGitterWorkDir, "Work directory") flag.DurationVar(&fetchTimeout, "fetch-timeout", time.Hour, "Fetch timeout duration") concurrentLimit := flag.Int("concurrent-limit", 100, "Concurrent limit for unique requests") + flag.DurationVar(&repoTTL, "repo-cache-ttl", time.Hour, "Repository LRU cache time-to-live duration") + flag.Int64Var(&repoCacheMaxCost, "repo-cache-max-cost", 1<<30, "Repository LRU cache max cost (in bytes)") flag.Parse() semaphore = make(chan struct{}, *concurrentLimit) From abea20e541fb647ae4f8c82e21d7f956c0a945cb Mon Sep 17 00:00:00 2001 From: Joey L Date: Wed, 18 Mar 2026 00:30:53 +0000 Subject: [PATCH 31/39] Cleanup repo_test. Add intro=0 and more edge cases --- go/cmd/gitter/repository.go | 8 - go/cmd/gitter/repository_test.go | 407 ++++++++++++++++--------------- 2 files changed, 216 insertions(+), 199 deletions(-) diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index babb1c8937a..e9f30c48cfc 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -123,14 +123,6 @@ func (r *Repository) getOrCreateIndex(hash SHA1) int { return idx } -// For test setup -func (r *Repository) addEdgeForTest(parent, child SHA1) { - pIdx := r.getOrCreateIndex(parent) - cIdx := r.getOrCreateIndex(child) - r.commitGraph[pIdx] = append(r.commitGraph[pIdx], cIdx) - r.commits[cIdx].Parents = append(r.commits[cIdx].Parents, pIdx) -} - // buildCommitGraph builds the commit graph and associate commit details from scratch // Returns a list of new commit indexes that don't have cached Patch IDs. // The new commit list is in reverse chronological order based on commit date (the default for git log). diff --git a/go/cmd/gitter/repository_test.go b/go/cmd/gitter/repository_test.go index 1f06faf9663..9eb9a5f8db8 100644 --- a/go/cmd/gitter/repository_test.go +++ b/go/cmd/gitter/repository_test.go @@ -138,6 +138,14 @@ func TestLoadRepository(t *testing.T) { } } +// For test setup +func (r *Repository) addEdgeForTest(parent, child SHA1) { + pIdx := r.getOrCreateIndex(parent) + cIdx := r.getOrCreateIndex(child) + r.commitGraph[pIdx] = append(r.commitGraph[pIdx], cIdx) + r.commits[cIdx].Parents = append(r.commits[cIdx].Parents, pIdx) +} + // Helper to decode string into SHA1 func decodeSHA1(s string) SHA1 { var hash SHA1 @@ -265,78 +273,81 @@ func TestAffected_Introduced_Fixed(t *testing.T) { repo.addEdgeForTest(hD, hE) repo.addEdgeForTest(hF, hG) repo.addEdgeForTest(hH, hD) + repo.rootCommits = []int{0} // Root commit is A tests := []struct { name string - introduced []SHA1 - fixed []SHA1 - lastAffected []SHA1 + se *SeparatedEvents expected []SHA1 }{ { - name: "Linear: A introduced, B fixed", - introduced: []SHA1{hA}, - fixed: []SHA1{hB}, - expected: []SHA1{hA}, + name: "Linear: A introduced, B fixed", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hA)}, + Fixed: []string{encodeSHA1(hB)}, + }, + expected: []SHA1{hA}, + }, + { + name: "Branch propagation: A introduced, C fixed", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hA)}, + Fixed: []string{encodeSHA1(hC)}, + }, + expected: []SHA1{hA, hB, hH}, }, { - name: "Branch propagation: A introduced, C fixed", - introduced: []SHA1{hA}, - fixed: []SHA1{hC}, - expected: []SHA1{hA, hB, hH}, + name: "Re-introduced: (A,C) introduced, (B,D,G) fixed", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hA), encodeSHA1(hC)}, + Fixed: []string{encodeSHA1(hB), encodeSHA1(hD), encodeSHA1(hG)}, + }, + expected: []SHA1{hA, hC, hF}, }, { - name: "Re-introduced: (A,C) introduced, (B,D,G) fixed", - introduced: []SHA1{hA, hC}, - fixed: []SHA1{hB, hD, hG}, - expected: []SHA1{hA, hC, hF}, + name: "Merge intro: H introduced, E fixed", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hH)}, + Fixed: []string{encodeSHA1(hE)}, + }, + expected: []SHA1{hH, hD}, }, { - name: "Merge intro: H introduced, E fixed", - introduced: []SHA1{hH}, - fixed: []SHA1{hE}, - expected: []SHA1{hH, hD}, + name: "Merge fix: A introduced, H fixed", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hA)}, + Fixed: []string{encodeSHA1(hH)}, + }, + expected: []SHA1{hA, hB, hC, hF, hG}, }, { - name: "Merge fix: A introduced, H fixed", - introduced: []SHA1{hA}, - fixed: []SHA1{hH}, - expected: []SHA1{hA, hB, hC, hF, hG}, + name: "Merge intro and fix (different branches): C introduced, H fixed", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hC)}, + Fixed: []string{encodeSHA1(hH)}, + }, + expected: []SHA1{hC, hD, hE, hF, hG}, }, { - name: "Merge intro and fix (different branches): C introduced, H fixed", - introduced: []SHA1{hC}, - fixed: []SHA1{hH}, - expected: []SHA1{hC, hD, hE, hF, hG}, + name: "Introduced = 0: C fixed", + se: &SeparatedEvents{ + Introduced: []string{"0"}, + Fixed: []string{encodeSHA1(hC)}, + }, + expected: []SHA1{hA, hB, hH}, }, { - name: "Everything affected if no fix", - introduced: []SHA1{hA}, - expected: []SHA1{hA, hB, hC, hD, hE, hF, hG, hH}, + name: "Introduced = 0: no fix", + se: &SeparatedEvents{ + Introduced: []string{"0"}, + }, + expected: []SHA1{hA, hB, hC, hD, hE, hF, hG, hH}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // Convert SHA1 to string for the new API - introStrs := make([]string, len(tt.introduced)) - for i, h := range tt.introduced { - introStrs[i] = encodeSHA1(h) - } - fixedStrs := make([]string, len(tt.fixed)) - for i, h := range tt.fixed { - fixedStrs[i] = encodeSHA1(h) - } - laStrs := make([]string, len(tt.lastAffected)) - for i, h := range tt.lastAffected { - laStrs[i] = encodeSHA1(h) - } - se := &SeparatedEvents{ - Introduced: introStrs, - Fixed: fixedStrs, - LastAffected: laStrs, - } - gotCommits := repo.Affected(t.Context(), se, false, false) + gotCommits := repo.Affected(t.Context(), tt.se, false, false) var got []SHA1 for _, c := range gotCommits { @@ -378,78 +389,81 @@ func TestAffected_Introduced_LastAffected(t *testing.T) { repo.addEdgeForTest(hD, hE) repo.addEdgeForTest(hF, hG) repo.addEdgeForTest(hH, hD) + repo.rootCommits = []int{0} // Root commit is A tests := []struct { - name string - introduced []SHA1 - fixed []SHA1 - lastAffected []SHA1 - expected []SHA1 + name string + se *SeparatedEvents + expected []SHA1 }{ { - name: "Linear: D introduced, E lastAffected", - introduced: []SHA1{hD}, - lastAffected: []SHA1{hE}, - expected: []SHA1{hD, hE}, + name: "Linear: D introduced, E lastAffected", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hD)}, + LastAffected: []string{encodeSHA1(hE)}, + }, + expected: []SHA1{hD, hE}, }, { - name: "Branch propagation: A introduced, C lastAffected", - introduced: []SHA1{hA}, - lastAffected: []SHA1{hC}, - expected: []SHA1{hA, hB, hC, hH}, + name: "Branch propagation: A introduced, C lastAffected", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hA)}, + LastAffected: []string{encodeSHA1(hC)}, + }, + expected: []SHA1{hA, hB, hC, hH}, }, { - name: "Re-introduced: (A,D) introduced, (B,E) lastAffected", - introduced: []SHA1{hA, hD}, - lastAffected: []SHA1{hB, hE}, - expected: []SHA1{hA, hB, hD, hE}, + name: "Re-introduced: (A,D) introduced, (B,E) lastAffected", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hA), encodeSHA1(hD)}, + LastAffected: []string{encodeSHA1(hB), encodeSHA1(hE)}, + }, + expected: []SHA1{hA, hB, hD, hE}, }, { - name: "Merge intro: H introduced, D lastAffected", - introduced: []SHA1{hH}, - lastAffected: []SHA1{hD}, - expected: []SHA1{hH, hD}, + name: "Merge intro: H introduced, D lastAffected", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hH)}, + LastAffected: []string{encodeSHA1(hD)}, + }, + expected: []SHA1{hH, hD}, }, { - name: "Merge lastAffected: A introduced, H lastAffected", - introduced: []SHA1{hA}, - lastAffected: []SHA1{hH}, - expected: []SHA1{hA, hB, hC, hF, hG, hH}, + name: "Merge lastAffected: A introduced, H lastAffected", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hA)}, + LastAffected: []string{encodeSHA1(hH)}, + }, + expected: []SHA1{hA, hB, hC, hF, hG, hH}, }, { - name: "Merge intro and lastAffected (different branches): C introduced, H lastAffected", - introduced: []SHA1{hC}, - lastAffected: []SHA1{hH}, - expected: []SHA1{hC, hF, hG}, + name: "Merge intro and lastAffected (different branches): C introduced, H lastAffected", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hC)}, + LastAffected: []string{encodeSHA1(hH)}, + }, + expected: []SHA1{hC, hF, hG}, }, { - name: "Everything affected if no lastAffected", - introduced: []SHA1{hA}, - expected: []SHA1{hA, hB, hC, hD, hE, hF, hG, hH}, + name: "Introduced = 0: C lastAffected", + se: &SeparatedEvents{ + Introduced: []string{"0"}, + LastAffected: []string{encodeSHA1(hC)}, + }, + expected: []SHA1{hA, hB, hC, hH}, + }, + { + name: "Introduced = 0: no fix", + se: &SeparatedEvents{ + Introduced: []string{"0"}, + }, + expected: []SHA1{hA, hB, hC, hD, hE, hF, hG, hH}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // Convert SHA1 to string for the new API - introStrs := make([]string, len(tt.introduced)) - for i, h := range tt.introduced { - introStrs[i] = encodeSHA1(h) - } - fixedStrs := make([]string, len(tt.fixed)) - for i, h := range tt.fixed { - fixedStrs[i] = encodeSHA1(h) - } - laStrs := make([]string, len(tt.lastAffected)) - for i, h := range tt.lastAffected { - laStrs[i] = encodeSHA1(h) - } - se := &SeparatedEvents{ - Introduced: introStrs, - Fixed: fixedStrs, - LastAffected: laStrs, - } - gotCommits := repo.Affected(t.Context(), se, false, false) + gotCommits := repo.Affected(t.Context(), tt.se, false, false) var got []SHA1 for _, c := range gotCommits { @@ -494,49 +508,58 @@ func TestAffected_Combined(t *testing.T) { repo.addEdgeForTest(hH, hD) tests := []struct { - name string - introduced []SHA1 - fixed []SHA1 - lastAffected []SHA1 - expected []SHA1 + name string + se *SeparatedEvents + expected []SHA1 }{ { - name: "Branching out: C introduced, G fixed, D lastAffected", - introduced: []SHA1{hC}, - fixed: []SHA1{hG}, - lastAffected: []SHA1{hD}, - expected: []SHA1{hC, hD, hF}, + name: "Branching out: C introduced, G fixed, D lastAffected", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hC)}, + Fixed: []string{encodeSHA1(hG)}, + LastAffected: []string{encodeSHA1(hD)}, + }, + expected: []SHA1{hC, hD, hF}, }, { - name: "Redundant Blocking: A introduced, B fixed, E lastAffected", - introduced: []SHA1{hA}, - fixed: []SHA1{hB}, - lastAffected: []SHA1{hE}, - expected: []SHA1{hA}, + name: "Redundant Blocking: A introduced, B fixed, E lastAffected", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hA)}, + Fixed: []string{encodeSHA1(hB)}, + LastAffected: []string{encodeSHA1(hE)}, + }, + expected: []SHA1{hA}, + }, + { + name: "Introduced=Fixed: No affected commit", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hB)}, + Fixed: []string{encodeSHA1(hB)}, + }, + expected: []SHA1{}, + }, + { + name: "Introduced=lastAffected: Only 1 commit affected", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hB)}, + LastAffected: []string{encodeSHA1(hB)}, + }, + expected: []SHA1{hB}, + }, + { + name: "Fixed=lastAffected: Stop at fix, lastAffected no effect", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hA)}, + Fixed: []string{encodeSHA1(hB)}, + LastAffected: []string{encodeSHA1(hB)}, + }, + expected: []SHA1{hA}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // Convert SHA1 to string for the new API - introStrs := make([]string, len(tt.introduced)) - for i, h := range tt.introduced { - introStrs[i] = encodeSHA1(h) - } - fixedStrs := make([]string, len(tt.fixed)) - for i, h := range tt.fixed { - fixedStrs[i] = encodeSHA1(h) - } - laStrs := make([]string, len(tt.lastAffected)) - for i, h := range tt.lastAffected { - laStrs[i] = encodeSHA1(h) - } - se := &SeparatedEvents{ - Introduced: introStrs, - Fixed: fixedStrs, - LastAffected: laStrs, - } - gotCommits := repo.Affected(t.Context(), se, false, false) + gotCommits := repo.Affected(t.Context(), tt.se, false, false) var got []SHA1 for _, c := range gotCommits { @@ -579,6 +602,7 @@ func TestAffected_Cherrypick(t *testing.T) { repo.addEdgeForTest(hE, hF) repo.addEdgeForTest(hF, hG) repo.addEdgeForTest(hG, hH) + repo.rootCommits = []int{0} // Setup PatchID map for cherrypicking idxA := repo.getOrCreateIndex(hA) @@ -595,32 +619,47 @@ func TestAffected_Cherrypick(t *testing.T) { tests := []struct { name string - introduced []SHA1 - fixed []SHA1 + se *SeparatedEvents cherrypickIntro bool cherrypickFixed bool expected []SHA1 }{ { - name: "Cherrypick Introduced Only: A introduced, G fixed", - introduced: []SHA1{hA}, - fixed: []SHA1{hG}, + name: "Cherrypick Introduced Only: A introduced, G fixed", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hA)}, + Fixed: []string{encodeSHA1(hG)}, + }, cherrypickIntro: true, cherrypickFixed: false, expected: []SHA1{hA, hB, hC, hD, hE, hF}, }, { - name: "Cherrypick Fixed Only: A introduced, G fixed", - introduced: []SHA1{hA}, - fixed: []SHA1{hG}, + name: "Cherrypick Fixed Only: A introduced, G fixed", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hA)}, + Fixed: []string{encodeSHA1(hG)}, + }, cherrypickIntro: false, cherrypickFixed: true, expected: []SHA1{hA, hB}, }, { - name: "Cherrypick Introduced and Fixed: A introduced, G fixed", - introduced: []SHA1{hA}, - fixed: []SHA1{hG}, + name: "Cherrypick Introduced and Fixed: A introduced, G fixed", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hA)}, + Fixed: []string{encodeSHA1(hG)}, + }, + cherrypickIntro: true, + cherrypickFixed: true, + expected: []SHA1{hA, hB, hE, hF}, + }, + { + name: "Cherrypick Introduced=0: G fixed", + se: &SeparatedEvents{ + Introduced: []string{"0"}, + Fixed: []string{encodeSHA1(hG)}, + }, cherrypickIntro: true, cherrypickFixed: true, expected: []SHA1{hA, hB, hE, hF}, @@ -629,21 +668,7 @@ func TestAffected_Cherrypick(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // Convert SHA1 to string for the new API - introStrs := make([]string, len(tt.introduced)) - for i, h := range tt.introduced { - introStrs[i] = encodeSHA1(h) - } - fixedStrs := make([]string, len(tt.fixed)) - for i, h := range tt.fixed { - fixedStrs[i] = encodeSHA1(h) - } - - se := &SeparatedEvents{ - Introduced: introStrs, - Fixed: fixedStrs, - } - gotCommits := repo.Affected(t.Context(), se, tt.cherrypickIntro, tt.cherrypickFixed) + gotCommits := repo.Affected(t.Context(), tt.se, tt.cherrypickIntro, tt.cherrypickFixed) var got []SHA1 for _, c := range gotCommits { @@ -682,50 +707,50 @@ func TestLimit(t *testing.T) { repo.addEdgeForTest(hD, hE) repo.addEdgeForTest(hF, hG) repo.addEdgeForTest(hG, hH) + repo.rootCommits = []int{0} // A is root commit tests := []struct { - name string - introduced []SHA1 - limit []SHA1 - expected []SHA1 + name string + se *SeparatedEvents + expected []SHA1 }{ { - name: "One branch: A introduced, D limit", - introduced: []SHA1{hA}, - limit: []SHA1{hD}, - expected: []SHA1{hA, hB, hC}, + name: "One branch: A introduced, D limit", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hA)}, + Limit: []string{encodeSHA1(hD)}, + }, + expected: []SHA1{hA, hB, hC}, }, { - name: "Side branch: A introduced, G limit", - introduced: []SHA1{hA}, - limit: []SHA1{hG}, - expected: []SHA1{hA, hB, hF}, + name: "Side branch: A introduced, G limit", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hA)}, + Limit: []string{encodeSHA1(hG)}, + }, + expected: []SHA1{hA, hB, hF}, }, { - name: "Two branches: A introduced, (D,G) limit", - introduced: []SHA1{hA}, - limit: []SHA1{hD, hG}, - expected: []SHA1{hA, hB, hC, hF}, + name: "Two branches: A introduced, (D,G) limit", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hA)}, + Limit: []string{encodeSHA1(hD), encodeSHA1(hG)}, + }, + expected: []SHA1{hA, hB, hC, hF}, + }, + { + name: "Introduced=0, G limit", + se: &SeparatedEvents{ + Introduced: []string{"0"}, + Limit: []string{encodeSHA1(hG)}, + }, + expected: []SHA1{hA, hB, hF}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // Convert SHA1 to string for the new API - introStrs := make([]string, len(tt.introduced)) - for i, h := range tt.introduced { - introStrs[i] = encodeSHA1(h) - } - limitStrs := make([]string, len(tt.limit)) - for i, h := range tt.limit { - limitStrs[i] = encodeSHA1(h) - } - - se := &SeparatedEvents{ - Introduced: introStrs, - Limit: limitStrs, - } - gotCommits := repo.Limit(t.Context(), se) + gotCommits := repo.Limit(t.Context(), tt.se) var got []SHA1 for _, c := range gotCommits { From 134d705f34980ded4bb29d72c1bec0dd0e88e587 Mon Sep 17 00:00:00 2001 From: Joey L Date: Wed, 18 Mar 2026 02:57:27 +0000 Subject: [PATCH 32/39] Lint --- go/cmd/gitter/gitter.go | 35 +++++++++++++++++++------------- go/cmd/gitter/repository.go | 7 +++++-- go/cmd/gitter/repository_test.go | 14 ++++++------- 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index 59c7c391c86..babe4e1cfd4 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -80,17 +80,17 @@ type SeparatedEvents struct { func separateEvents(events []*pb.Event) (*SeparatedEvents, error) { se := &SeparatedEvents{} for _, event := range events { - switch event.EventType { + switch event.GetEventType() { case pb.EventType_INTRODUCED: - se.Introduced = append(se.Introduced, event.Hash) + se.Introduced = append(se.Introduced, event.GetHash()) case pb.EventType_FIXED: - se.Fixed = append(se.Fixed, event.Hash) + se.Fixed = append(se.Fixed, event.GetHash()) case pb.EventType_LAST_AFFECTED: - se.LastAffected = append(se.LastAffected, event.Hash) + se.LastAffected = append(se.LastAffected, event.GetHash()) case pb.EventType_LIMIT: - se.Limit = append(se.Limit, event.Hash) + se.Limit = append(se.Limit, event.GetHash()) default: - return nil, fmt.Errorf("invalid event type: %s", event.EventType) + return nil, fmt.Errorf("invalid event type: %s", event.GetEventType()) } } @@ -114,7 +114,7 @@ func GetRepoLock(url string) *sync.RWMutex { return lock.(*sync.RWMutex) } -// repoCost is the cost funtion for a repository in the LRU cache. +// repoCost is the cost function for a repository in the LRU cache. // The memory cost of a repository is approximated from the num of commits and a base overhead. func repoCost(repo *Repository) int64 { // Mutex (8 bytes), string for repo path (say 128 bytes), root commit (assume 1 root only, 32 bytes) @@ -132,6 +132,7 @@ func repoCost(repo *Repository) int64 { // = 20 + 24 + 8 ~= 64 bytes // TOTAL: 264 bytes -> We round up to 300 for some buffer costPerCommit := 300 + return int64(repoOverhead + len(repo.commits)*costPerCommit) } @@ -228,6 +229,7 @@ func validateURL(r *http.Request, url string) error { return fmt.Errorf("invalid url parameter") } } + return nil } @@ -293,8 +295,10 @@ func doFetch(ctx context.Context, w http.ResponseWriter, url string, forceUpdate } else { http.Error(w, fmt.Sprintf("Error fetching blob: %v", err), http.StatusInternalServerError) } + return err } + return nil } @@ -327,10 +331,12 @@ func getFreshRepo(ctx context.Context, w http.ResponseWriter, url string, forceU if err != nil { logger.ErrorContext(ctx, "Failed to load repository", slog.Any("error", err)) http.Error(w, fmt.Sprintf("Failed to load repository: %v", err), http.StatusInternalServerError) + return nil, err } repo := repoAny.(*Repository) repoCache.SetWithTTL(url, repo, 0, repoTTL) + return repo, nil } @@ -582,7 +588,7 @@ func cacheHandler(w http.ResponseWriter, r *http.Request) { return } - url := body.Url + url := body.GetUrl() if err := validateURL(r, url); err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return @@ -602,7 +608,7 @@ func cacheHandler(w http.ResponseWriter, r *http.Request) { } logger.DebugContext(ctx, "Concurrent requests", slog.Int("count", len(semaphore))) - if _, err := getFreshRepo(ctx, w, url, body.ForceUpdate); err != nil { + if _, err := getFreshRepo(ctx, w, url, body.GetForceUpdate()); err != nil { return } @@ -618,20 +624,20 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { return } - url := body.Url + url := body.GetUrl() if err := validateURL(r, url); err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } - se, err := separateEvents(body.Events) + se, err := separateEvents(body.GetEvents()) if err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } - cherrypickIntro := body.DetectCherrypicksIntroduced - cherrypickFixed := body.DetectCherrypicksFixed + cherrypickIntro := body.GetDetectCherrypicksIntroduced() + cherrypickFixed := body.GetDetectCherrypicksFixed() ctx := context.WithValue(r.Context(), urlKey, url) logger.InfoContext(ctx, "Received request: /affected-commits", slog.Any("introduced", se.Introduced), slog.Any("fixed", se.Fixed), slog.Any("last_affected", se.LastAffected), slog.Any("limit", se.Limit), slog.Bool("cherrypickIntro", cherrypickIntro), slog.Bool("cherrypickFixed", cherrypickFixed)) @@ -647,7 +653,7 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { } logger.DebugContext(ctx, "Concurrent requests", slog.Int("count", len(semaphore))) - repo, err := getFreshRepo(ctx, w, url, body.ForceUpdate) + repo, err := getFreshRepo(ctx, w, url, body.GetForceUpdate()) if err != nil { return } @@ -671,6 +677,7 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { if err != nil { logger.ErrorContext(ctx, "Error marshaling affected commits", slog.Any("error", err)) http.Error(w, fmt.Sprintf("Error marshaling affected commits: %v", err), http.StatusInternalServerError) + return } diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index e9f30c48cfc..c581edcbb3f 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -441,6 +441,7 @@ func (r *Repository) parseHashes(ctx context.Context, hashesStr []string) []int indices = append(indices, r.rootCommits...) addedRoot = true } + continue } @@ -492,6 +493,8 @@ func (r *Repository) expandByCherrypick(commits []int) []int { } // Affected returns a list of commits that are affected by the given introduced, fixed and last_affected events +// A commit is affected when: from at least one introduced that is an ancestor of the commit, there is no path between them that passes through a fix. +// A fix can either be a fixed commit, or the children of a lastAffected commit. func (r *Repository) Affected(ctx context.Context, se *SeparatedEvents, cherrypickIntro, cherrypickFixed bool) []*Commit { logger.InfoContext(ctx, "Starting affected commit walking") start := time.Now() @@ -532,7 +535,7 @@ func (r *Repository) Affected(ctx context.Context, se *SeparatedEvents, cherrypi // Preallocating the big slices, they will be cleared inside the per-intro graph walking queue := make([]int, 0, len(r.commits)) affectedFromIntro := make([]bool, len(r.commits)) - updatedIdx := make([]int, len(r.commits)) + updatedIdx := make([]int, 0, len(r.commits)) unaffectable := make([]bool, len(r.commits)) visited := make([]bool, len(r.commits)) @@ -597,7 +600,7 @@ func (r *Repository) Affected(ctx context.Context, se *SeparatedEvents, cherrypi } // Add the final affected list of this introduced commit to the global set - // We only look at the index are are updated in this loop + // We only look at the index that are updated in this loop for _, commitIdx := range updatedIdx { if affectedFromIntro[commitIdx] { affectedMap[commitIdx] = true diff --git a/go/cmd/gitter/repository_test.go b/go/cmd/gitter/repository_test.go index 9eb9a5f8db8..94df505595c 100644 --- a/go/cmd/gitter/repository_test.go +++ b/go/cmd/gitter/repository_test.go @@ -276,9 +276,9 @@ func TestAffected_Introduced_Fixed(t *testing.T) { repo.rootCommits = []int{0} // Root commit is A tests := []struct { - name string - se *SeparatedEvents - expected []SHA1 + name string + se *SeparatedEvents + expected []SHA1 }{ { name: "Linear: A introduced, B fixed", @@ -332,7 +332,7 @@ func TestAffected_Introduced_Fixed(t *testing.T) { name: "Introduced = 0: C fixed", se: &SeparatedEvents{ Introduced: []string{"0"}, - Fixed: []string{encodeSHA1(hC)}, + Fixed: []string{encodeSHA1(hC)}, }, expected: []SHA1{hA, hB, hH}, }, @@ -447,7 +447,7 @@ func TestAffected_Introduced_LastAffected(t *testing.T) { { name: "Introduced = 0: C lastAffected", se: &SeparatedEvents{ - Introduced: []string{"0"}, + Introduced: []string{"0"}, LastAffected: []string{encodeSHA1(hC)}, }, expected: []SHA1{hA, hB, hC, hH}, @@ -533,8 +533,8 @@ func TestAffected_Combined(t *testing.T) { { name: "Introduced=Fixed: No affected commit", se: &SeparatedEvents{ - Introduced: []string{encodeSHA1(hB)}, - Fixed: []string{encodeSHA1(hB)}, + Introduced: []string{encodeSHA1(hB)}, + Fixed: []string{encodeSHA1(hB)}, }, expected: []SHA1{}, }, From dceedbc507cc0dd694a28bc1b5259b25c340648c Mon Sep 17 00:00:00 2001 From: Joey L Date: Wed, 18 Mar 2026 03:05:08 +0000 Subject: [PATCH 33/39] It's never just one lint commit --- go/cmd/gitter/gitter.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index babe4e1cfd4..d72d302a265 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -6,6 +6,7 @@ import ( "context" "crypto/sha256" "encoding/hex" + "errors" "flag" "fmt" "io" @@ -95,7 +96,7 @@ func separateEvents(events []*pb.Event) (*SeparatedEvents, error) { } if len(se.Limit) > 0 && (len(se.Fixed) > 0 || len(se.LastAffected) > 0) { - return nil, fmt.Errorf("limit and fixed/last_affected shouldn't exist in the same request") + return nil, errors.New("limit and fixed/last_affected shouldn't exist in the same request") } return se, nil @@ -220,13 +221,13 @@ func isLocalRequest(r *http.Request) bool { func validateURL(r *http.Request, url string) error { if url == "" { - return fmt.Errorf("missing url parameter") + return errors.New("missing url parameter") } // If request came from a local ip, don't do the check if !isLocalRequest(r) { // Check if url starts with protocols: http(s)://, git://, ssh:// if !validURLRegex.MatchString(url) { - return fmt.Errorf("invalid url parameter") + return errors.New("invalid url parameter") } } From ef8f46031adec23f51fd0a2bc2de07a9944253c8 Mon Sep 17 00:00:00 2001 From: Joey L Date: Wed, 18 Mar 2026 03:53:42 +0000 Subject: [PATCH 34/39] Bring refs to top-level response --- go/cmd/gitter/gitter.go | 12 +- go/cmd/gitter/pb/repository/repository.pb.go | 197 ++++++++++++------- go/cmd/gitter/pb/repository/repository.proto | 7 +- 3 files changed, 143 insertions(+), 73 deletions(-) diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index d72d302a265..e417bc369e4 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -666,12 +666,20 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { affectedCommits = repo.Affected(ctx, se, cherrypickIntro, cherrypickFixed) } - resp := &pb.AffectedCommitsResponse{Commits: make([]*pb.AffectedCommit, 0, len(affectedCommits))} + resp := &pb.AffectedCommitsResponse{ + Commits: make([]*pb.AffectedCommit, 0, len(affectedCommits)), + Refs: make([]*pb.AffectedRefs, 0), + } for _, c := range affectedCommits { resp.Commits = append(resp.Commits, &pb.AffectedCommit{ Hash: c.Hash[:], - Refs: c.Refs, }) + for _, ref := range c.Refs { + resp.Refs = append(resp.Refs, &pb.AffectedRefs{ + Ref: ref, + Hash: c.Hash[:], + }) + } } out, err := marshalResponse(r, resp) diff --git a/go/cmd/gitter/pb/repository/repository.pb.go b/go/cmd/gitter/pb/repository/repository.pb.go index c3df1435065..9ffa342515c 100644 --- a/go/cmd/gitter/pb/repository/repository.pb.go +++ b/go/cmd/gitter/pb/repository/repository.pb.go @@ -2,7 +2,7 @@ // versions: // protoc-gen-go v1.36.11 // protoc v3.21.12 -// source: repository.proto +// source: pb/repository/repository.proto package repository @@ -57,11 +57,11 @@ func (x EventType) String() string { } func (EventType) Descriptor() protoreflect.EnumDescriptor { - return file_repository_proto_enumTypes[0].Descriptor() + return file_pb_repository_repository_proto_enumTypes[0].Descriptor() } func (EventType) Type() protoreflect.EnumType { - return &file_repository_proto_enumTypes[0] + return &file_pb_repository_repository_proto_enumTypes[0] } func (x EventType) Number() protoreflect.EnumNumber { @@ -70,7 +70,7 @@ func (x EventType) Number() protoreflect.EnumNumber { // Deprecated: Use EventType.Descriptor instead. func (EventType) EnumDescriptor() ([]byte, []int) { - return file_repository_proto_rawDescGZIP(), []int{0} + return file_pb_repository_repository_proto_rawDescGZIP(), []int{0} } type CommitDetail struct { @@ -83,7 +83,7 @@ type CommitDetail struct { func (x *CommitDetail) Reset() { *x = CommitDetail{} - mi := &file_repository_proto_msgTypes[0] + mi := &file_pb_repository_repository_proto_msgTypes[0] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -95,7 +95,7 @@ func (x *CommitDetail) String() string { func (*CommitDetail) ProtoMessage() {} func (x *CommitDetail) ProtoReflect() protoreflect.Message { - mi := &file_repository_proto_msgTypes[0] + mi := &file_pb_repository_repository_proto_msgTypes[0] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -108,7 +108,7 @@ func (x *CommitDetail) ProtoReflect() protoreflect.Message { // Deprecated: Use CommitDetail.ProtoReflect.Descriptor instead. func (*CommitDetail) Descriptor() ([]byte, []int) { - return file_repository_proto_rawDescGZIP(), []int{0} + return file_pb_repository_repository_proto_rawDescGZIP(), []int{0} } func (x *CommitDetail) GetHash() []byte { @@ -136,7 +136,7 @@ type RepositoryCache struct { func (x *RepositoryCache) Reset() { *x = RepositoryCache{} - mi := &file_repository_proto_msgTypes[1] + mi := &file_pb_repository_repository_proto_msgTypes[1] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -148,7 +148,7 @@ func (x *RepositoryCache) String() string { func (*RepositoryCache) ProtoMessage() {} func (x *RepositoryCache) ProtoReflect() protoreflect.Message { - mi := &file_repository_proto_msgTypes[1] + mi := &file_pb_repository_repository_proto_msgTypes[1] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -161,7 +161,7 @@ func (x *RepositoryCache) ProtoReflect() protoreflect.Message { // Deprecated: Use RepositoryCache.ProtoReflect.Descriptor instead. func (*RepositoryCache) Descriptor() ([]byte, []int) { - return file_repository_proto_rawDescGZIP(), []int{1} + return file_pb_repository_repository_proto_rawDescGZIP(), []int{1} } func (x *RepositoryCache) GetCommits() []*CommitDetail { @@ -174,14 +174,13 @@ func (x *RepositoryCache) GetCommits() []*CommitDetail { type AffectedCommit struct { state protoimpl.MessageState `protogen:"open.v1"` Hash []byte `protobuf:"bytes,1,opt,name=hash,proto3" json:"hash,omitempty"` - Refs []string `protobuf:"bytes,2,rep,name=refs,proto3" json:"refs,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } func (x *AffectedCommit) Reset() { *x = AffectedCommit{} - mi := &file_repository_proto_msgTypes[2] + mi := &file_pb_repository_repository_proto_msgTypes[2] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -193,7 +192,7 @@ func (x *AffectedCommit) String() string { func (*AffectedCommit) ProtoMessage() {} func (x *AffectedCommit) ProtoReflect() protoreflect.Message { - mi := &file_repository_proto_msgTypes[2] + mi := &file_pb_repository_repository_proto_msgTypes[2] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -206,7 +205,7 @@ func (x *AffectedCommit) ProtoReflect() protoreflect.Message { // Deprecated: Use AffectedCommit.ProtoReflect.Descriptor instead. func (*AffectedCommit) Descriptor() ([]byte, []int) { - return file_repository_proto_rawDescGZIP(), []int{2} + return file_pb_repository_repository_proto_rawDescGZIP(), []int{2} } func (x *AffectedCommit) GetHash() []byte { @@ -216,9 +215,54 @@ func (x *AffectedCommit) GetHash() []byte { return nil } -func (x *AffectedCommit) GetRefs() []string { +type AffectedRefs struct { + state protoimpl.MessageState `protogen:"open.v1"` + Ref string `protobuf:"bytes,1,opt,name=ref,proto3" json:"ref,omitempty"` + Hash []byte `protobuf:"bytes,2,opt,name=hash,proto3" json:"hash,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *AffectedRefs) Reset() { + *x = AffectedRefs{} + mi := &file_pb_repository_repository_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *AffectedRefs) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*AffectedRefs) ProtoMessage() {} + +func (x *AffectedRefs) ProtoReflect() protoreflect.Message { + mi := &file_pb_repository_repository_proto_msgTypes[3] if x != nil { - return x.Refs + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use AffectedRefs.ProtoReflect.Descriptor instead. +func (*AffectedRefs) Descriptor() ([]byte, []int) { + return file_pb_repository_repository_proto_rawDescGZIP(), []int{3} +} + +func (x *AffectedRefs) GetRef() string { + if x != nil { + return x.Ref + } + return "" +} + +func (x *AffectedRefs) GetHash() []byte { + if x != nil { + return x.Hash } return nil } @@ -226,13 +270,14 @@ func (x *AffectedCommit) GetRefs() []string { type AffectedCommitsResponse struct { state protoimpl.MessageState `protogen:"open.v1"` Commits []*AffectedCommit `protobuf:"bytes,1,rep,name=commits,proto3" json:"commits,omitempty"` + Refs []*AffectedRefs `protobuf:"bytes,2,rep,name=refs,proto3" json:"refs,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } func (x *AffectedCommitsResponse) Reset() { *x = AffectedCommitsResponse{} - mi := &file_repository_proto_msgTypes[3] + mi := &file_pb_repository_repository_proto_msgTypes[4] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -244,7 +289,7 @@ func (x *AffectedCommitsResponse) String() string { func (*AffectedCommitsResponse) ProtoMessage() {} func (x *AffectedCommitsResponse) ProtoReflect() protoreflect.Message { - mi := &file_repository_proto_msgTypes[3] + mi := &file_pb_repository_repository_proto_msgTypes[4] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -257,7 +302,7 @@ func (x *AffectedCommitsResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use AffectedCommitsResponse.ProtoReflect.Descriptor instead. func (*AffectedCommitsResponse) Descriptor() ([]byte, []int) { - return file_repository_proto_rawDescGZIP(), []int{3} + return file_pb_repository_repository_proto_rawDescGZIP(), []int{4} } func (x *AffectedCommitsResponse) GetCommits() []*AffectedCommit { @@ -267,6 +312,13 @@ func (x *AffectedCommitsResponse) GetCommits() []*AffectedCommit { return nil } +func (x *AffectedCommitsResponse) GetRefs() []*AffectedRefs { + if x != nil { + return x.Refs + } + return nil +} + type Event struct { state protoimpl.MessageState `protogen:"open.v1"` EventType EventType `protobuf:"varint,1,opt,name=event_type,json=eventType,proto3,enum=gitter.EventType" json:"event_type,omitempty"` @@ -277,7 +329,7 @@ type Event struct { func (x *Event) Reset() { *x = Event{} - mi := &file_repository_proto_msgTypes[4] + mi := &file_pb_repository_repository_proto_msgTypes[5] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -289,7 +341,7 @@ func (x *Event) String() string { func (*Event) ProtoMessage() {} func (x *Event) ProtoReflect() protoreflect.Message { - mi := &file_repository_proto_msgTypes[4] + mi := &file_pb_repository_repository_proto_msgTypes[5] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -302,7 +354,7 @@ func (x *Event) ProtoReflect() protoreflect.Message { // Deprecated: Use Event.ProtoReflect.Descriptor instead. func (*Event) Descriptor() ([]byte, []int) { - return file_repository_proto_rawDescGZIP(), []int{4} + return file_pb_repository_repository_proto_rawDescGZIP(), []int{5} } func (x *Event) GetEventType() EventType { @@ -329,7 +381,7 @@ type CacheRequest struct { func (x *CacheRequest) Reset() { *x = CacheRequest{} - mi := &file_repository_proto_msgTypes[5] + mi := &file_pb_repository_repository_proto_msgTypes[6] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -341,7 +393,7 @@ func (x *CacheRequest) String() string { func (*CacheRequest) ProtoMessage() {} func (x *CacheRequest) ProtoReflect() protoreflect.Message { - mi := &file_repository_proto_msgTypes[5] + mi := &file_pb_repository_repository_proto_msgTypes[6] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -354,7 +406,7 @@ func (x *CacheRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use CacheRequest.ProtoReflect.Descriptor instead. func (*CacheRequest) Descriptor() ([]byte, []int) { - return file_repository_proto_rawDescGZIP(), []int{5} + return file_pb_repository_repository_proto_rawDescGZIP(), []int{6} } func (x *CacheRequest) GetUrl() string { @@ -384,7 +436,7 @@ type AffectedCommitsRequest struct { func (x *AffectedCommitsRequest) Reset() { *x = AffectedCommitsRequest{} - mi := &file_repository_proto_msgTypes[6] + mi := &file_pb_repository_repository_proto_msgTypes[7] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -396,7 +448,7 @@ func (x *AffectedCommitsRequest) String() string { func (*AffectedCommitsRequest) ProtoMessage() {} func (x *AffectedCommitsRequest) ProtoReflect() protoreflect.Message { - mi := &file_repository_proto_msgTypes[6] + mi := &file_pb_repository_repository_proto_msgTypes[7] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -409,7 +461,7 @@ func (x *AffectedCommitsRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use AffectedCommitsRequest.ProtoReflect.Descriptor instead. func (*AffectedCommitsRequest) Descriptor() ([]byte, []int) { - return file_repository_proto_rawDescGZIP(), []int{6} + return file_pb_repository_repository_proto_rawDescGZIP(), []int{7} } func (x *AffectedCommitsRequest) GetUrl() string { @@ -447,21 +499,24 @@ func (x *AffectedCommitsRequest) GetForceUpdate() bool { return false } -var File_repository_proto protoreflect.FileDescriptor +var File_pb_repository_repository_proto protoreflect.FileDescriptor -const file_repository_proto_rawDesc = "" + +const file_pb_repository_repository_proto_rawDesc = "" + "\n" + - "\x10repository.proto\x12\x06gitter\"=\n" + + "\x1epb/repository/repository.proto\x12\x06gitter\"=\n" + "\fCommitDetail\x12\x12\n" + "\x04hash\x18\x01 \x01(\fR\x04hash\x12\x19\n" + "\bpatch_id\x18\x02 \x01(\fR\apatchId\"A\n" + "\x0fRepositoryCache\x12.\n" + - "\acommits\x18\x01 \x03(\v2\x14.gitter.CommitDetailR\acommits\"8\n" + + "\acommits\x18\x01 \x03(\v2\x14.gitter.CommitDetailR\acommits\"$\n" + "\x0eAffectedCommit\x12\x12\n" + - "\x04hash\x18\x01 \x01(\fR\x04hash\x12\x12\n" + - "\x04refs\x18\x02 \x03(\tR\x04refs\"K\n" + + "\x04hash\x18\x01 \x01(\fR\x04hash\"4\n" + + "\fAffectedRefs\x12\x10\n" + + "\x03ref\x18\x01 \x01(\tR\x03ref\x12\x12\n" + + "\x04hash\x18\x02 \x01(\fR\x04hash\"u\n" + "\x17AffectedCommitsResponse\x120\n" + - "\acommits\x18\x01 \x03(\v2\x16.gitter.AffectedCommitR\acommits\"M\n" + + "\acommits\x18\x01 \x03(\v2\x16.gitter.AffectedCommitR\acommits\x12(\n" + + "\x04refs\x18\x02 \x03(\v2\x14.gitter.AffectedRefsR\x04refs\"M\n" + "\x05Event\x120\n" + "\n" + "event_type\x18\x01 \x01(\x0e2\x11.gitter.EventTypeR\teventType\x12\x12\n" + @@ -483,62 +538,64 @@ const file_repository_proto_rawDesc = "" + "\x05LIMIT\x10\x03B\x0eZ\f./repositoryb\x06proto3" var ( - file_repository_proto_rawDescOnce sync.Once - file_repository_proto_rawDescData []byte + file_pb_repository_repository_proto_rawDescOnce sync.Once + file_pb_repository_repository_proto_rawDescData []byte ) -func file_repository_proto_rawDescGZIP() []byte { - file_repository_proto_rawDescOnce.Do(func() { - file_repository_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_repository_proto_rawDesc), len(file_repository_proto_rawDesc))) +func file_pb_repository_repository_proto_rawDescGZIP() []byte { + file_pb_repository_repository_proto_rawDescOnce.Do(func() { + file_pb_repository_repository_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_pb_repository_repository_proto_rawDesc), len(file_pb_repository_repository_proto_rawDesc))) }) - return file_repository_proto_rawDescData + return file_pb_repository_repository_proto_rawDescData } -var file_repository_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_repository_proto_msgTypes = make([]protoimpl.MessageInfo, 7) -var file_repository_proto_goTypes = []any{ +var file_pb_repository_repository_proto_enumTypes = make([]protoimpl.EnumInfo, 1) +var file_pb_repository_repository_proto_msgTypes = make([]protoimpl.MessageInfo, 8) +var file_pb_repository_repository_proto_goTypes = []any{ (EventType)(0), // 0: gitter.EventType (*CommitDetail)(nil), // 1: gitter.CommitDetail (*RepositoryCache)(nil), // 2: gitter.RepositoryCache (*AffectedCommit)(nil), // 3: gitter.AffectedCommit - (*AffectedCommitsResponse)(nil), // 4: gitter.AffectedCommitsResponse - (*Event)(nil), // 5: gitter.Event - (*CacheRequest)(nil), // 6: gitter.CacheRequest - (*AffectedCommitsRequest)(nil), // 7: gitter.AffectedCommitsRequest + (*AffectedRefs)(nil), // 4: gitter.AffectedRefs + (*AffectedCommitsResponse)(nil), // 5: gitter.AffectedCommitsResponse + (*Event)(nil), // 6: gitter.Event + (*CacheRequest)(nil), // 7: gitter.CacheRequest + (*AffectedCommitsRequest)(nil), // 8: gitter.AffectedCommitsRequest } -var file_repository_proto_depIdxs = []int32{ +var file_pb_repository_repository_proto_depIdxs = []int32{ 1, // 0: gitter.RepositoryCache.commits:type_name -> gitter.CommitDetail 3, // 1: gitter.AffectedCommitsResponse.commits:type_name -> gitter.AffectedCommit - 0, // 2: gitter.Event.event_type:type_name -> gitter.EventType - 5, // 3: gitter.AffectedCommitsRequest.events:type_name -> gitter.Event - 4, // [4:4] is the sub-list for method output_type - 4, // [4:4] is the sub-list for method input_type - 4, // [4:4] is the sub-list for extension type_name - 4, // [4:4] is the sub-list for extension extendee - 0, // [0:4] is the sub-list for field type_name -} - -func init() { file_repository_proto_init() } -func file_repository_proto_init() { - if File_repository_proto != nil { + 4, // 2: gitter.AffectedCommitsResponse.refs:type_name -> gitter.AffectedRefs + 0, // 3: gitter.Event.event_type:type_name -> gitter.EventType + 6, // 4: gitter.AffectedCommitsRequest.events:type_name -> gitter.Event + 5, // [5:5] is the sub-list for method output_type + 5, // [5:5] is the sub-list for method input_type + 5, // [5:5] is the sub-list for extension type_name + 5, // [5:5] is the sub-list for extension extendee + 0, // [0:5] is the sub-list for field type_name +} + +func init() { file_pb_repository_repository_proto_init() } +func file_pb_repository_repository_proto_init() { + if File_pb_repository_repository_proto != nil { return } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), - RawDescriptor: unsafe.Slice(unsafe.StringData(file_repository_proto_rawDesc), len(file_repository_proto_rawDesc)), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_pb_repository_repository_proto_rawDesc), len(file_pb_repository_repository_proto_rawDesc)), NumEnums: 1, - NumMessages: 7, + NumMessages: 8, NumExtensions: 0, NumServices: 0, }, - GoTypes: file_repository_proto_goTypes, - DependencyIndexes: file_repository_proto_depIdxs, - EnumInfos: file_repository_proto_enumTypes, - MessageInfos: file_repository_proto_msgTypes, + GoTypes: file_pb_repository_repository_proto_goTypes, + DependencyIndexes: file_pb_repository_repository_proto_depIdxs, + EnumInfos: file_pb_repository_repository_proto_enumTypes, + MessageInfos: file_pb_repository_repository_proto_msgTypes, }.Build() - File_repository_proto = out.File - file_repository_proto_goTypes = nil - file_repository_proto_depIdxs = nil + File_pb_repository_repository_proto = out.File + file_pb_repository_repository_proto_goTypes = nil + file_pb_repository_repository_proto_depIdxs = nil } diff --git a/go/cmd/gitter/pb/repository/repository.proto b/go/cmd/gitter/pb/repository/repository.proto index 13553a1a168..e413f6ea29d 100644 --- a/go/cmd/gitter/pb/repository/repository.proto +++ b/go/cmd/gitter/pb/repository/repository.proto @@ -17,11 +17,16 @@ message RepositoryCache { message AffectedCommit { bytes hash = 1; - repeated string refs = 2; +} + +message AffectedRefs { + string ref = 1; + bytes hash = 2; } message AffectedCommitsResponse { repeated AffectedCommit commits = 1; + repeated AffectedRefs refs = 2; } enum EventType { From d415c399eb5a812744731d6dad24dda222fdea39 Mon Sep 17 00:00:00 2001 From: Joey L Date: Thu, 19 Mar 2026 03:45:57 +0000 Subject: [PATCH 35/39] Address more comments --- .../clouddeploy/gke-workers/base/gitter.yaml | 2 +- .../environments/oss-vdb-test/gitter.yaml | 2 +- .../environments/oss-vdb/gitter.yaml | 2 +- go/cmd/gitter/gitter.go | 19 ++++++++++++------- go/cmd/gitter/repository.go | 2 ++ 5 files changed, 17 insertions(+), 10 deletions(-) diff --git a/deployment/clouddeploy/gke-workers/base/gitter.yaml b/deployment/clouddeploy/gke-workers/base/gitter.yaml index 6d46d8222a1..9b659954c76 100644 --- a/deployment/clouddeploy/gke-workers/base/gitter.yaml +++ b/deployment/clouddeploy/gke-workers/base/gitter.yaml @@ -30,7 +30,7 @@ spec: - "--work-dir=/work/gitter" - "--fetch-timeout=1h" - "--repo-cache-ttl=1h" - - "--repo-cache-max-cost=107374182400" # 100GB + - "--repo-cache-max-cost=100GiB" env: - name: GOMEMLIMIT value: "100GiB" diff --git a/deployment/clouddeploy/gke-workers/environments/oss-vdb-test/gitter.yaml b/deployment/clouddeploy/gke-workers/environments/oss-vdb-test/gitter.yaml index f8257a42226..576a83cfe6f 100644 --- a/deployment/clouddeploy/gke-workers/environments/oss-vdb-test/gitter.yaml +++ b/deployment/clouddeploy/gke-workers/environments/oss-vdb-test/gitter.yaml @@ -12,7 +12,7 @@ spec: - "--work-dir=/work/gitter" - "--fetch-timeout=1h" - "--repo-cache-ttl=1h" - - "--repo-cache-max-cost=107374182400" # 100GB + - "--repo-cache-max-cost=100GiB" env: - name: GOOGLE_CLOUD_PROJECT value: oss-vdb-test diff --git a/deployment/clouddeploy/gke-workers/environments/oss-vdb/gitter.yaml b/deployment/clouddeploy/gke-workers/environments/oss-vdb/gitter.yaml index 658303d0316..24554f18405 100644 --- a/deployment/clouddeploy/gke-workers/environments/oss-vdb/gitter.yaml +++ b/deployment/clouddeploy/gke-workers/environments/oss-vdb/gitter.yaml @@ -12,7 +12,7 @@ spec: - "--work-dir=/work/gitter" - "--fetch-timeout=1h" - "--repo-cache-ttl=1h" - - "--repo-cache-max-cost=107374182400" # 100GB + - "--repo-cache-max-cost=100GiB" env: - name: GOOGLE_CLOUD_PROJECT value: oss-vdb diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index e417bc369e4..97c3b53008d 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -27,6 +27,7 @@ import ( _ "net/http/pprof" //nolint:gosec // This is a internal only service not public to the internet "github.com/dgraph-io/ristretto/v2" + "github.com/dustin/go-humanize" "github.com/google/osv.dev/go/logger" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "golang.org/x/sync/singleflight" @@ -64,7 +65,7 @@ var ( // LRU cache for recently loaded repositories (key: repo URL) repoCache *ristretto.Cache[string, *Repository] repoTTL time.Duration - repoCacheMaxCost int64 + repoCacheMaxCost int64 // max cost in bytes ) var validURLRegex = regexp.MustCompile(`^(https?|git|ssh)://`) @@ -115,9 +116,9 @@ func GetRepoLock(url string) *sync.RWMutex { return lock.(*sync.RWMutex) } -// repoCost is the cost function for a repository in the LRU cache. +// repoCostBytes is the cost function for a repository in the LRU cache. // The memory cost of a repository is approximated from the num of commits and a base overhead. -func repoCost(repo *Repository) int64 { +func repoCostBytes(repo *Repository) int64 { // Mutex (8 bytes), string for repo path (say 128 bytes), root commit (assume 1 root only, 32 bytes) repoOverhead := 168 // Assuming per commit adds: @@ -139,14 +140,14 @@ func repoCost(repo *Repository) int64 { // InitRepoCache initializes the LRU cache for repositories. func InitRepoCache() { + // General guidance is to make NumCounters 10x the cache capacity (in terms of items) numCounters := repoCacheMaxCost / (300 * 10000) var err error repoCache, err = ristretto.NewCache(&ristretto.Config[string, *Repository]{ - // General guidance is to make NumCounters 10x the cache capacity (in terms of items) NumCounters: numCounters, MaxCost: repoCacheMaxCost, BufferItems: 64, - Cost: repoCost, + Cost: repoCostBytes, // Check for TTL expiry every 60 seconds TtlTickerDurationInSec: 60, }) @@ -459,16 +460,20 @@ func main() { flag.DurationVar(&fetchTimeout, "fetch-timeout", time.Hour, "Fetch timeout duration") concurrentLimit := flag.Int("concurrent-limit", 100, "Concurrent limit for unique requests") flag.DurationVar(&repoTTL, "repo-cache-ttl", time.Hour, "Repository LRU cache time-to-live duration") - flag.Int64Var(&repoCacheMaxCost, "repo-cache-max-cost", 1<<30, "Repository LRU cache max cost (in bytes)") + repoMaxCostStr := flag.String("repo-cache-max-cost", "1GiB", "Repository LRU cache max cost (in bytes)") flag.Parse() semaphore = make(chan struct{}, *concurrentLimit) persistencePath = filepath.Join(*workDir, persistenceFileName) gitStorePath = filepath.Join(*workDir, gitStoreFileName) - if err := os.MkdirAll(gitStorePath, 0755); err != nil { logger.Fatal("Failed to create git store path", slog.String("path", gitStorePath), slog.Any("error", err)) } + repoMaxCostUint, err := humanize.ParseBytes(*repoMaxCostStr) + if err != nil { + logger.Fatal("Failed to parse repo cache max cost", slog.String("maxCost", *repoMaxCostStr), slog.Any("error", err)) + } + repoCacheMaxCost = int64(repoMaxCostUint) loadLastFetchMap() InitRepoCache() diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index c581edcbb3f..007a4cd5d8f 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -416,12 +416,14 @@ func (r *Repository) calculatePatchIDsWorker(ctx context.Context, chunk []int) e return nil } +// updatePatchID updates the PatchID for a given commit and adds it to the patchIDToCommits map. func (r *Repository) updatePatchID(commitHash, patchID SHA1) { r.patchIDMu.Lock() defer r.patchIDMu.Unlock() idx, ok := r.hashToIndex[commitHash] if !ok { + // This should never happen because we only call git patch-id on commits we see when building commit graph. return } commit := r.commits[idx] From c44217c8bbd797c3ca67856a593ea3aaf9492c5f Mon Sep 17 00:00:00 2001 From: Joey L Date: Thu, 19 Mar 2026 03:55:59 +0000 Subject: [PATCH 36/39] =?UTF-8?q?=F0=9F=94=B4=E2=9C=8B=F0=9F=8D=92?= =?UTF-8?q?=E2=9B=8F=EF=B8=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- go/cmd/gitter/gitter.go | 9 +- go/cmd/gitter/pb/repository/repository.pb.go | 121 +++++++++--------- go/cmd/gitter/pb/repository/repository.proto | 3 +- go/cmd/gitter/repository.go | 9 +- go/cmd/gitter/repository_test.go | 123 ++++++++++++++++++- 5 files changed, 202 insertions(+), 63 deletions(-) diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index 97c3b53008d..b8a2b6226fe 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -138,10 +138,12 @@ func repoCostBytes(repo *Repository) int64 { return int64(repoOverhead + len(repo.commits)*costPerCommit) } +// General guidance is to make NumCounters 10x the cache capacity (in terms of items) +// We're assuming the cache will hold 5000 repositories +const numCounters = int64(10 * 5000) + // InitRepoCache initializes the LRU cache for repositories. func InitRepoCache() { - // General guidance is to make NumCounters 10x the cache capacity (in terms of items) - numCounters := repoCacheMaxCost / (300 * 10000) var err error repoCache, err = ristretto.NewCache(&ristretto.Config[string, *Repository]{ NumCounters: numCounters, @@ -644,6 +646,7 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { cherrypickIntro := body.GetDetectCherrypicksIntroduced() cherrypickFixed := body.GetDetectCherrypicksFixed() + cherrypickLimit := body.GetDetectCherrypicksLimit() ctx := context.WithValue(r.Context(), urlKey, url) logger.InfoContext(ctx, "Received request: /affected-commits", slog.Any("introduced", se.Introduced), slog.Any("fixed", se.Fixed), slog.Any("last_affected", se.LastAffected), slog.Any("limit", se.Limit), slog.Bool("cherrypickIntro", cherrypickIntro), slog.Bool("cherrypickFixed", cherrypickFixed)) @@ -666,7 +669,7 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { var affectedCommits []*Commit if len(se.Limit) > 0 { - affectedCommits = repo.Limit(ctx, se) + affectedCommits = repo.Limit(ctx, se, cherrypickIntro, cherrypickLimit) } else { affectedCommits = repo.Affected(ctx, se, cherrypickIntro, cherrypickFixed) } diff --git a/go/cmd/gitter/pb/repository/repository.pb.go b/go/cmd/gitter/pb/repository/repository.pb.go index 9ffa342515c..d8e9ad5de98 100644 --- a/go/cmd/gitter/pb/repository/repository.pb.go +++ b/go/cmd/gitter/pb/repository/repository.pb.go @@ -2,7 +2,7 @@ // versions: // protoc-gen-go v1.36.11 // protoc v3.21.12 -// source: pb/repository/repository.proto +// source: repository.proto package repository @@ -57,11 +57,11 @@ func (x EventType) String() string { } func (EventType) Descriptor() protoreflect.EnumDescriptor { - return file_pb_repository_repository_proto_enumTypes[0].Descriptor() + return file_repository_proto_enumTypes[0].Descriptor() } func (EventType) Type() protoreflect.EnumType { - return &file_pb_repository_repository_proto_enumTypes[0] + return &file_repository_proto_enumTypes[0] } func (x EventType) Number() protoreflect.EnumNumber { @@ -70,7 +70,7 @@ func (x EventType) Number() protoreflect.EnumNumber { // Deprecated: Use EventType.Descriptor instead. func (EventType) EnumDescriptor() ([]byte, []int) { - return file_pb_repository_repository_proto_rawDescGZIP(), []int{0} + return file_repository_proto_rawDescGZIP(), []int{0} } type CommitDetail struct { @@ -83,7 +83,7 @@ type CommitDetail struct { func (x *CommitDetail) Reset() { *x = CommitDetail{} - mi := &file_pb_repository_repository_proto_msgTypes[0] + mi := &file_repository_proto_msgTypes[0] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -95,7 +95,7 @@ func (x *CommitDetail) String() string { func (*CommitDetail) ProtoMessage() {} func (x *CommitDetail) ProtoReflect() protoreflect.Message { - mi := &file_pb_repository_repository_proto_msgTypes[0] + mi := &file_repository_proto_msgTypes[0] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -108,7 +108,7 @@ func (x *CommitDetail) ProtoReflect() protoreflect.Message { // Deprecated: Use CommitDetail.ProtoReflect.Descriptor instead. func (*CommitDetail) Descriptor() ([]byte, []int) { - return file_pb_repository_repository_proto_rawDescGZIP(), []int{0} + return file_repository_proto_rawDescGZIP(), []int{0} } func (x *CommitDetail) GetHash() []byte { @@ -136,7 +136,7 @@ type RepositoryCache struct { func (x *RepositoryCache) Reset() { *x = RepositoryCache{} - mi := &file_pb_repository_repository_proto_msgTypes[1] + mi := &file_repository_proto_msgTypes[1] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -148,7 +148,7 @@ func (x *RepositoryCache) String() string { func (*RepositoryCache) ProtoMessage() {} func (x *RepositoryCache) ProtoReflect() protoreflect.Message { - mi := &file_pb_repository_repository_proto_msgTypes[1] + mi := &file_repository_proto_msgTypes[1] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -161,7 +161,7 @@ func (x *RepositoryCache) ProtoReflect() protoreflect.Message { // Deprecated: Use RepositoryCache.ProtoReflect.Descriptor instead. func (*RepositoryCache) Descriptor() ([]byte, []int) { - return file_pb_repository_repository_proto_rawDescGZIP(), []int{1} + return file_repository_proto_rawDescGZIP(), []int{1} } func (x *RepositoryCache) GetCommits() []*CommitDetail { @@ -180,7 +180,7 @@ type AffectedCommit struct { func (x *AffectedCommit) Reset() { *x = AffectedCommit{} - mi := &file_pb_repository_repository_proto_msgTypes[2] + mi := &file_repository_proto_msgTypes[2] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -192,7 +192,7 @@ func (x *AffectedCommit) String() string { func (*AffectedCommit) ProtoMessage() {} func (x *AffectedCommit) ProtoReflect() protoreflect.Message { - mi := &file_pb_repository_repository_proto_msgTypes[2] + mi := &file_repository_proto_msgTypes[2] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -205,7 +205,7 @@ func (x *AffectedCommit) ProtoReflect() protoreflect.Message { // Deprecated: Use AffectedCommit.ProtoReflect.Descriptor instead. func (*AffectedCommit) Descriptor() ([]byte, []int) { - return file_pb_repository_repository_proto_rawDescGZIP(), []int{2} + return file_repository_proto_rawDescGZIP(), []int{2} } func (x *AffectedCommit) GetHash() []byte { @@ -225,7 +225,7 @@ type AffectedRefs struct { func (x *AffectedRefs) Reset() { *x = AffectedRefs{} - mi := &file_pb_repository_repository_proto_msgTypes[3] + mi := &file_repository_proto_msgTypes[3] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -237,7 +237,7 @@ func (x *AffectedRefs) String() string { func (*AffectedRefs) ProtoMessage() {} func (x *AffectedRefs) ProtoReflect() protoreflect.Message { - mi := &file_pb_repository_repository_proto_msgTypes[3] + mi := &file_repository_proto_msgTypes[3] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -250,7 +250,7 @@ func (x *AffectedRefs) ProtoReflect() protoreflect.Message { // Deprecated: Use AffectedRefs.ProtoReflect.Descriptor instead. func (*AffectedRefs) Descriptor() ([]byte, []int) { - return file_pb_repository_repository_proto_rawDescGZIP(), []int{3} + return file_repository_proto_rawDescGZIP(), []int{3} } func (x *AffectedRefs) GetRef() string { @@ -277,7 +277,7 @@ type AffectedCommitsResponse struct { func (x *AffectedCommitsResponse) Reset() { *x = AffectedCommitsResponse{} - mi := &file_pb_repository_repository_proto_msgTypes[4] + mi := &file_repository_proto_msgTypes[4] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -289,7 +289,7 @@ func (x *AffectedCommitsResponse) String() string { func (*AffectedCommitsResponse) ProtoMessage() {} func (x *AffectedCommitsResponse) ProtoReflect() protoreflect.Message { - mi := &file_pb_repository_repository_proto_msgTypes[4] + mi := &file_repository_proto_msgTypes[4] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -302,7 +302,7 @@ func (x *AffectedCommitsResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use AffectedCommitsResponse.ProtoReflect.Descriptor instead. func (*AffectedCommitsResponse) Descriptor() ([]byte, []int) { - return file_pb_repository_repository_proto_rawDescGZIP(), []int{4} + return file_repository_proto_rawDescGZIP(), []int{4} } func (x *AffectedCommitsResponse) GetCommits() []*AffectedCommit { @@ -329,7 +329,7 @@ type Event struct { func (x *Event) Reset() { *x = Event{} - mi := &file_pb_repository_repository_proto_msgTypes[5] + mi := &file_repository_proto_msgTypes[5] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -341,7 +341,7 @@ func (x *Event) String() string { func (*Event) ProtoMessage() {} func (x *Event) ProtoReflect() protoreflect.Message { - mi := &file_pb_repository_repository_proto_msgTypes[5] + mi := &file_repository_proto_msgTypes[5] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -354,7 +354,7 @@ func (x *Event) ProtoReflect() protoreflect.Message { // Deprecated: Use Event.ProtoReflect.Descriptor instead. func (*Event) Descriptor() ([]byte, []int) { - return file_pb_repository_repository_proto_rawDescGZIP(), []int{5} + return file_repository_proto_rawDescGZIP(), []int{5} } func (x *Event) GetEventType() EventType { @@ -381,7 +381,7 @@ type CacheRequest struct { func (x *CacheRequest) Reset() { *x = CacheRequest{} - mi := &file_pb_repository_repository_proto_msgTypes[6] + mi := &file_repository_proto_msgTypes[6] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -393,7 +393,7 @@ func (x *CacheRequest) String() string { func (*CacheRequest) ProtoMessage() {} func (x *CacheRequest) ProtoReflect() protoreflect.Message { - mi := &file_pb_repository_repository_proto_msgTypes[6] + mi := &file_repository_proto_msgTypes[6] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -406,7 +406,7 @@ func (x *CacheRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use CacheRequest.ProtoReflect.Descriptor instead. func (*CacheRequest) Descriptor() ([]byte, []int) { - return file_pb_repository_repository_proto_rawDescGZIP(), []int{6} + return file_repository_proto_rawDescGZIP(), []int{6} } func (x *CacheRequest) GetUrl() string { @@ -429,14 +429,15 @@ type AffectedCommitsRequest struct { Events []*Event `protobuf:"bytes,2,rep,name=events,proto3" json:"events,omitempty"` DetectCherrypicksIntroduced bool `protobuf:"varint,3,opt,name=detect_cherrypicks_introduced,json=detectCherrypicksIntroduced,proto3" json:"detect_cherrypicks_introduced,omitempty"` DetectCherrypicksFixed bool `protobuf:"varint,4,opt,name=detect_cherrypicks_fixed,json=detectCherrypicksFixed,proto3" json:"detect_cherrypicks_fixed,omitempty"` - ForceUpdate bool `protobuf:"varint,5,opt,name=force_update,json=forceUpdate,proto3" json:"force_update,omitempty"` + DetectCherrypicksLimit bool `protobuf:"varint,5,opt,name=detect_cherrypicks_limit,json=detectCherrypicksLimit,proto3" json:"detect_cherrypicks_limit,omitempty"` + ForceUpdate bool `protobuf:"varint,6,opt,name=force_update,json=forceUpdate,proto3" json:"force_update,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } func (x *AffectedCommitsRequest) Reset() { *x = AffectedCommitsRequest{} - mi := &file_pb_repository_repository_proto_msgTypes[7] + mi := &file_repository_proto_msgTypes[7] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -448,7 +449,7 @@ func (x *AffectedCommitsRequest) String() string { func (*AffectedCommitsRequest) ProtoMessage() {} func (x *AffectedCommitsRequest) ProtoReflect() protoreflect.Message { - mi := &file_pb_repository_repository_proto_msgTypes[7] + mi := &file_repository_proto_msgTypes[7] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -461,7 +462,7 @@ func (x *AffectedCommitsRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use AffectedCommitsRequest.ProtoReflect.Descriptor instead. func (*AffectedCommitsRequest) Descriptor() ([]byte, []int) { - return file_pb_repository_repository_proto_rawDescGZIP(), []int{7} + return file_repository_proto_rawDescGZIP(), []int{7} } func (x *AffectedCommitsRequest) GetUrl() string { @@ -492,6 +493,13 @@ func (x *AffectedCommitsRequest) GetDetectCherrypicksFixed() bool { return false } +func (x *AffectedCommitsRequest) GetDetectCherrypicksLimit() bool { + if x != nil { + return x.DetectCherrypicksLimit + } + return false +} + func (x *AffectedCommitsRequest) GetForceUpdate() bool { if x != nil { return x.ForceUpdate @@ -499,11 +507,11 @@ func (x *AffectedCommitsRequest) GetForceUpdate() bool { return false } -var File_pb_repository_repository_proto protoreflect.FileDescriptor +var File_repository_proto protoreflect.FileDescriptor -const file_pb_repository_repository_proto_rawDesc = "" + +const file_repository_proto_rawDesc = "" + "\n" + - "\x1epb/repository/repository.proto\x12\x06gitter\"=\n" + + "\x10repository.proto\x12\x06gitter\"=\n" + "\fCommitDetail\x12\x12\n" + "\x04hash\x18\x01 \x01(\fR\x04hash\x12\x19\n" + "\bpatch_id\x18\x02 \x01(\fR\apatchId\"A\n" + @@ -523,13 +531,14 @@ const file_pb_repository_repository_proto_rawDesc = "" + "\x04hash\x18\x02 \x01(\tR\x04hash\"C\n" + "\fCacheRequest\x12\x10\n" + "\x03url\x18\x01 \x01(\tR\x03url\x12!\n" + - "\fforce_update\x18\x02 \x01(\bR\vforceUpdate\"\xf2\x01\n" + + "\fforce_update\x18\x02 \x01(\bR\vforceUpdate\"\xac\x02\n" + "\x16AffectedCommitsRequest\x12\x10\n" + "\x03url\x18\x01 \x01(\tR\x03url\x12%\n" + "\x06events\x18\x02 \x03(\v2\r.gitter.EventR\x06events\x12B\n" + "\x1ddetect_cherrypicks_introduced\x18\x03 \x01(\bR\x1bdetectCherrypicksIntroduced\x128\n" + - "\x18detect_cherrypicks_fixed\x18\x04 \x01(\bR\x16detectCherrypicksFixed\x12!\n" + - "\fforce_update\x18\x05 \x01(\bR\vforceUpdate*D\n" + + "\x18detect_cherrypicks_fixed\x18\x04 \x01(\bR\x16detectCherrypicksFixed\x128\n" + + "\x18detect_cherrypicks_limit\x18\x05 \x01(\bR\x16detectCherrypicksLimit\x12!\n" + + "\fforce_update\x18\x06 \x01(\bR\vforceUpdate*D\n" + "\tEventType\x12\x0e\n" + "\n" + "INTRODUCED\x10\x00\x12\t\n" + @@ -538,20 +547,20 @@ const file_pb_repository_repository_proto_rawDesc = "" + "\x05LIMIT\x10\x03B\x0eZ\f./repositoryb\x06proto3" var ( - file_pb_repository_repository_proto_rawDescOnce sync.Once - file_pb_repository_repository_proto_rawDescData []byte + file_repository_proto_rawDescOnce sync.Once + file_repository_proto_rawDescData []byte ) -func file_pb_repository_repository_proto_rawDescGZIP() []byte { - file_pb_repository_repository_proto_rawDescOnce.Do(func() { - file_pb_repository_repository_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_pb_repository_repository_proto_rawDesc), len(file_pb_repository_repository_proto_rawDesc))) +func file_repository_proto_rawDescGZIP() []byte { + file_repository_proto_rawDescOnce.Do(func() { + file_repository_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_repository_proto_rawDesc), len(file_repository_proto_rawDesc))) }) - return file_pb_repository_repository_proto_rawDescData + return file_repository_proto_rawDescData } -var file_pb_repository_repository_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_pb_repository_repository_proto_msgTypes = make([]protoimpl.MessageInfo, 8) -var file_pb_repository_repository_proto_goTypes = []any{ +var file_repository_proto_enumTypes = make([]protoimpl.EnumInfo, 1) +var file_repository_proto_msgTypes = make([]protoimpl.MessageInfo, 8) +var file_repository_proto_goTypes = []any{ (EventType)(0), // 0: gitter.EventType (*CommitDetail)(nil), // 1: gitter.CommitDetail (*RepositoryCache)(nil), // 2: gitter.RepositoryCache @@ -562,7 +571,7 @@ var file_pb_repository_repository_proto_goTypes = []any{ (*CacheRequest)(nil), // 7: gitter.CacheRequest (*AffectedCommitsRequest)(nil), // 8: gitter.AffectedCommitsRequest } -var file_pb_repository_repository_proto_depIdxs = []int32{ +var file_repository_proto_depIdxs = []int32{ 1, // 0: gitter.RepositoryCache.commits:type_name -> gitter.CommitDetail 3, // 1: gitter.AffectedCommitsResponse.commits:type_name -> gitter.AffectedCommit 4, // 2: gitter.AffectedCommitsResponse.refs:type_name -> gitter.AffectedRefs @@ -575,27 +584,27 @@ var file_pb_repository_repository_proto_depIdxs = []int32{ 0, // [0:5] is the sub-list for field type_name } -func init() { file_pb_repository_repository_proto_init() } -func file_pb_repository_repository_proto_init() { - if File_pb_repository_repository_proto != nil { +func init() { file_repository_proto_init() } +func file_repository_proto_init() { + if File_repository_proto != nil { return } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), - RawDescriptor: unsafe.Slice(unsafe.StringData(file_pb_repository_repository_proto_rawDesc), len(file_pb_repository_repository_proto_rawDesc)), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_repository_proto_rawDesc), len(file_repository_proto_rawDesc)), NumEnums: 1, NumMessages: 8, NumExtensions: 0, NumServices: 0, }, - GoTypes: file_pb_repository_repository_proto_goTypes, - DependencyIndexes: file_pb_repository_repository_proto_depIdxs, - EnumInfos: file_pb_repository_repository_proto_enumTypes, - MessageInfos: file_pb_repository_repository_proto_msgTypes, + GoTypes: file_repository_proto_goTypes, + DependencyIndexes: file_repository_proto_depIdxs, + EnumInfos: file_repository_proto_enumTypes, + MessageInfos: file_repository_proto_msgTypes, }.Build() - File_pb_repository_repository_proto = out.File - file_pb_repository_repository_proto_goTypes = nil - file_pb_repository_repository_proto_depIdxs = nil + File_repository_proto = out.File + file_repository_proto_goTypes = nil + file_repository_proto_depIdxs = nil } diff --git a/go/cmd/gitter/pb/repository/repository.proto b/go/cmd/gitter/pb/repository/repository.proto index e413f6ea29d..26ea0d29657 100644 --- a/go/cmd/gitter/pb/repository/repository.proto +++ b/go/cmd/gitter/pb/repository/repository.proto @@ -51,5 +51,6 @@ message AffectedCommitsRequest { repeated Event events = 2; bool detect_cherrypicks_introduced = 3; bool detect_cherrypicks_fixed = 4; - bool force_update = 5; + bool detect_cherrypicks_limit = 5; + bool force_update = 6; } diff --git a/go/cmd/gitter/repository.go b/go/cmd/gitter/repository.go index 007a4cd5d8f..0b26dde611f 100644 --- a/go/cmd/gitter/repository.go +++ b/go/cmd/gitter/repository.go @@ -624,10 +624,17 @@ func (r *Repository) Affected(ctx context.Context, se *SeparatedEvents, cherrypi } // Limit walks and returns the commits that are strictly between introduced (inclusive) and limit (exclusive) -func (r *Repository) Limit(ctx context.Context, se *SeparatedEvents) []*Commit { +func (r *Repository) Limit(ctx context.Context, se *SeparatedEvents, cherrypickIntro, cherrypickLimit bool) []*Commit { introduced := r.parseHashes(ctx, se.Introduced) limit := r.parseHashes(ctx, se.Limit) + if cherrypickIntro { + introduced = r.expandByCherrypick(introduced) + } + if cherrypickLimit { + limit = r.expandByCherrypick(limit) + } + var affectedCommits []*Commit introMap := make([]bool, len(r.commits)) diff --git a/go/cmd/gitter/repository_test.go b/go/cmd/gitter/repository_test.go index 94df505595c..0dc96edb6a8 100644 --- a/go/cmd/gitter/repository_test.go +++ b/go/cmd/gitter/repository_test.go @@ -539,7 +539,7 @@ func TestAffected_Combined(t *testing.T) { expected: []SHA1{}, }, { - name: "Introduced=lastAffected: Only 1 commit affected", + name: "Introduced=lastAffected: Only current commit affected", se: &SeparatedEvents{ Introduced: []string{encodeSHA1(hB)}, LastAffected: []string{encodeSHA1(hB)}, @@ -555,6 +555,15 @@ func TestAffected_Combined(t *testing.T) { }, expected: []SHA1{hA}, }, + { + // This is the current behaviour as we treat child of lastAffected commit as a fixed commit + name: "Intro=lastAffected+1: commit not affected", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hA), encodeSHA1(hC)}, // C is the child of B + LastAffected: []string{encodeSHA1(hB)}, + }, + expected: []SHA1{hA, hB}, + }, } for _, tt := range tests { @@ -750,7 +759,7 @@ func TestLimit(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - gotCommits := repo.Limit(t.Context(), tt.se) + gotCommits := repo.Limit(t.Context(), tt.se, false, false) var got []SHA1 for _, c := range gotCommits { @@ -763,3 +772,113 @@ func TestLimit(t *testing.T) { }) } } + +func TestLimit_Cherrypick(t *testing.T) { + repo := NewRepository("/repo") + + // Graph: (Parent -> Child) + // A -> B -> C -> D + // | | + // (cherrypick) + // | | + // E -> F -> G -> H + + + hA := decodeSHA1("aaaa") + hB := decodeSHA1("bbbb") + hC := decodeSHA1("cccc") + hD := decodeSHA1("dddd") + hE := decodeSHA1("eeee") + hF := decodeSHA1("ffff") + hG := decodeSHA1("abab") + hH := decodeSHA1("acac") + + c1 := decodeSHA1("c1") + c2 := decodeSHA1("c2") + + // Setup graph (Parent -> Children) + repo.addEdgeForTest(hA, hB) + repo.addEdgeForTest(hB, hC) + repo.addEdgeForTest(hC, hD) + repo.addEdgeForTest(hE, hF) + repo.addEdgeForTest(hF, hG) + repo.addEdgeForTest(hG, hH) + repo.rootCommits = []int{0} + + // Setup PatchID map for cherrypicking + idxB := repo.getOrCreateIndex(hB) + idxF := repo.getOrCreateIndex(hF) + repo.patchIDToCommits[c1] = []int{idxB, idxF} + idxC := repo.getOrCreateIndex(hC) + idxG := repo.getOrCreateIndex(hG) + repo.patchIDToCommits[c2] = []int{idxC, idxG} + + repo.commits[idxB].PatchID = c1 + repo.commits[idxF].PatchID = c1 + repo.commits[idxC].PatchID = c2 + repo.commits[idxG].PatchID = c2 + + tests := []struct { + name string + se *SeparatedEvents + cherrypickIntro bool + cherrypickLimit bool + expected []SHA1 + }{ + { + name: "Cherrypick Introduced Only: B introduced, G limit", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hB)}, + Limit: []string{encodeSHA1(hG)}, + }, + cherrypickIntro: true, + cherrypickLimit: false, + expected: []SHA1{hF}, + }, + { + name: "Cherrypick Limit Only: B introduced, G limit", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hB)}, + Limit: []string{encodeSHA1(hG)}, + }, + cherrypickIntro: false, + cherrypickLimit: true, + expected: []SHA1{hB, hE, hF}, + }, + { + name: "Cherrypick Introduced and Limit: A introduced, G limit", + se: &SeparatedEvents{ + Introduced: []string{encodeSHA1(hA)}, + Limit: []string{encodeSHA1(hG)}, + }, + cherrypickIntro: true, + cherrypickLimit: true, + expected: []SHA1{hA, hB, hE, hF}, + }, + { + name: "Cherrypick Introduced=0: G limit", + se: &SeparatedEvents{ + Introduced: []string{"0"}, + Limit: []string{encodeSHA1(hG)}, + }, + cherrypickIntro: true, + cherrypickLimit: true, + expected: []SHA1{hA, hB, hE, hF}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotCommits := repo.Limit(t.Context(), tt.se, tt.cherrypickIntro, tt.cherrypickLimit) + + var got []SHA1 + for _, c := range gotCommits { + got = append(got, c.Hash) + } + + if diff := cmp.Diff(tt.expected, got, cmpSHA1Opts...); diff != "" { + t.Errorf("TestLimit_Cherrypick() mismatch (-want +got):\n%s", diff) + } + }) + } +} From 2c06274891ac8681694694062de3ca04144eadb8 Mon Sep 17 00:00:00 2001 From: Joey L Date: Thu, 19 Mar 2026 04:31:32 +0000 Subject: [PATCH 37/39] lint --- go/cmd/gitter/gitter.go | 6 ++++++ go/cmd/gitter/repository_test.go | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index b8a2b6226fe..47cea6a0c75 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -11,6 +11,7 @@ import ( "fmt" "io" "log/slog" + "math" "net" "net/http" "os" @@ -471,10 +472,15 @@ func main() { if err := os.MkdirAll(gitStorePath, 0755); err != nil { logger.Fatal("Failed to create git store path", slog.String("path", gitStorePath), slog.Any("error", err)) } + repoMaxCostUint, err := humanize.ParseBytes(*repoMaxCostStr) if err != nil { logger.Fatal("Failed to parse repo cache max cost", slog.String("maxCost", *repoMaxCostStr), slog.Any("error", err)) } + if repoMaxCostUint > math.MaxInt64 { + logger.Fatal("Repo cache max cost too large", slog.Uint64("maxCost", repoMaxCostUint)) + } + //nolint:gosec // humanize parses the string into uint64 but ristretto needs int64. Integer overflow is handled above. repoCacheMaxCost = int64(repoMaxCostUint) loadLastFetchMap() diff --git a/go/cmd/gitter/repository_test.go b/go/cmd/gitter/repository_test.go index 0dc96edb6a8..adbf702ebe2 100644 --- a/go/cmd/gitter/repository_test.go +++ b/go/cmd/gitter/repository_test.go @@ -783,7 +783,6 @@ func TestLimit_Cherrypick(t *testing.T) { // | | // E -> F -> G -> H - hA := decodeSHA1("aaaa") hB := decodeSHA1("bbbb") hC := decodeSHA1("cccc") From 367eee3aa1b515920f17c85953fb1e346f9e2790 Mon Sep 17 00:00:00 2001 From: Joey L Date: Thu, 19 Mar 2026 04:42:10 +0000 Subject: [PATCH 38/39] lint ^2 and remove ssh --- go/cmd/gitter/gitter.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index 47cea6a0c75..b65c03f2ce4 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -69,7 +69,7 @@ var ( repoCacheMaxCost int64 // max cost in bytes ) -var validURLRegex = regexp.MustCompile(`^(https?|git|ssh)://`) +var validURLRegex = regexp.MustCompile(`^(https?|git)://`) const shutdownTimeout = 10 * time.Second @@ -229,7 +229,7 @@ func validateURL(r *http.Request, url string) error { } // If request came from a local ip, don't do the check if !isLocalRequest(r) { - // Check if url starts with protocols: http(s)://, git://, ssh:// + // Check if url starts with protocols: http(s)://, git:// if !validURLRegex.MatchString(url) { return errors.New("invalid url parameter") } @@ -480,7 +480,6 @@ func main() { if repoMaxCostUint > math.MaxInt64 { logger.Fatal("Repo cache max cost too large", slog.Uint64("maxCost", repoMaxCostUint)) } - //nolint:gosec // humanize parses the string into uint64 but ristretto needs int64. Integer overflow is handled above. repoCacheMaxCost = int64(repoMaxCostUint) loadLastFetchMap() From 6a3d09a0eedc40b7e5091208379508477b05653e Mon Sep 17 00:00:00 2001 From: Joey L Date: Thu, 19 Mar 2026 23:17:03 +0000 Subject: [PATCH 39/39] rename vars --- go/cmd/gitter/gitter.go | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/go/cmd/gitter/gitter.go b/go/cmd/gitter/gitter.go index b65c03f2ce4..7ea59794875 100644 --- a/go/cmd/gitter/gitter.go +++ b/go/cmd/gitter/gitter.go @@ -64,9 +64,9 @@ var ( fetchTimeout time.Duration semaphore chan struct{} // Request concurrency control // LRU cache for recently loaded repositories (key: repo URL) - repoCache *ristretto.Cache[string, *Repository] - repoTTL time.Duration - repoCacheMaxCost int64 // max cost in bytes + repoCache *ristretto.Cache[string, *Repository] + repoTTL time.Duration + repoCacheMaxCostBytes int64 ) var validURLRegex = regexp.MustCompile(`^(https?|git)://`) @@ -148,7 +148,7 @@ func InitRepoCache() { var err error repoCache, err = ristretto.NewCache(&ristretto.Config[string, *Repository]{ NumCounters: numCounters, - MaxCost: repoCacheMaxCost, + MaxCost: repoCacheMaxCostBytes, BufferItems: 64, Cost: repoCostBytes, // Check for TTL expiry every 60 seconds @@ -317,7 +317,7 @@ func getFreshRepo(ctx context.Context, w http.ResponseWriter, url string, forceU if !forceUpdate { if repo, ok := repoCache.Get(url); ok { // repoCache.Get() will not return expired items, so we can safely return the repo - logger.InfoContext(ctx, "Repository already in cache, skipping fetch and load") + logger.DebugContext(ctx, "Repository already in cache, skipping fetch and load") return repo, nil } } @@ -365,6 +365,7 @@ func FetchRepo(ctx context.Context, url string, forceUpdate bool) error { logger.InfoContext(ctx, "Fetching git blob", slog.Duration("sinceAccessTime", time.Since(accessTime))) if _, err := os.Stat(filepath.Join(repoPath, ".git")); os.IsNotExist(err) { // Clone + logger.InfoContext(ctx, "Cloning git repository", slog.Duration("sinceAccessTime", time.Since(accessTime))) err := runCmd(ctx, "", []string{"GIT_TERMINAL_PROMPT=0"}, "git", "clone", "--", url, repoPath) if err != nil { return fmt.Errorf("git clone failed: %w", err) @@ -373,6 +374,7 @@ func FetchRepo(ctx context.Context, url string, forceUpdate bool) error { // Fetch/Pull - implementing simple git pull for now, might need reset --hard if we want exact mirrors // For a generic "get latest", pull is usually sufficient if we treat it as read-only. // Ideally safely: git fetch origin && git reset --hard origin/HEAD + logger.InfoContext(ctx, "Fetching git repository", slog.Duration("sinceAccessTime", time.Since(accessTime))) err := runCmd(ctx, repoPath, nil, "git", "fetch", "origin") if err != nil { return fmt.Errorf("git fetch failed: %w", err) @@ -480,7 +482,7 @@ func main() { if repoMaxCostUint > math.MaxInt64 { logger.Fatal("Repo cache max cost too large", slog.Uint64("maxCost", repoMaxCostUint)) } - repoCacheMaxCost = int64(repoMaxCostUint) + repoCacheMaxCostBytes = int64(repoMaxCostUint) loadLastFetchMap() InitRepoCache() @@ -654,7 +656,7 @@ func affectedCommitsHandler(w http.ResponseWriter, r *http.Request) { cherrypickLimit := body.GetDetectCherrypicksLimit() ctx := context.WithValue(r.Context(), urlKey, url) - logger.InfoContext(ctx, "Received request: /affected-commits", slog.Any("introduced", se.Introduced), slog.Any("fixed", se.Fixed), slog.Any("last_affected", se.LastAffected), slog.Any("limit", se.Limit), slog.Bool("cherrypickIntro", cherrypickIntro), slog.Bool("cherrypickFixed", cherrypickFixed)) + logger.InfoContext(ctx, "Received request: /affected-commits", slog.Any("introduced", se.Introduced), slog.Any("fixed", se.Fixed), slog.Any("last_affected", se.LastAffected), slog.Any("limit", se.Limit), slog.Bool("cherrypickIntro", cherrypickIntro), slog.Bool("cherrypickFixed", cherrypickFixed), slog.Bool("cherrypickLimit", cherrypickLimit)) select { case semaphore <- struct{}{}: