diff --git a/vulnfeeds/cmd/combine-to-osv/DESIGN.md b/vulnfeeds/cmd/combine-to-osv/DESIGN.md new file mode 100644 index 00000000000..067a937ef27 --- /dev/null +++ b/vulnfeeds/cmd/combine-to-osv/DESIGN.md @@ -0,0 +1,71 @@ +# Combine-to-OSV Range Selection & Merging Design + +This document summarizes the design decisions and merging strategies implemented in the `combine-to-osv` tool to combine converted OSV records from NVD and CVE5 into a single enriched, schema-compliant OSV record. + +--- + +## 1. Structural Decisions + +### Unified Affected Package Grouping +Rather than outputting a separate `Affected` package object for each repository range, all repository-based Git ranges are grouped under a **single** `Affected` struct inside the final combined OSV record. Pure package-level entries (which contain only a package name without any ranges) are preserved as separate `Affected` objects. + +--- + +## 2. Range Selection & Merging Strategy (`pickBestRange`) + +When both NVD and CVE5 converted records contain Git ranges for the same repository, `pickBestRange` is used to determine the best combined range. + +```mermaid +graph TD + A[Start pickBestRange] --> B{One range is nil?} + B -- Yes --> C[Return non-nil range with last_affected cleanup] + B -- No --> D{One range is REFERENCES source only?} + D -- Yes --> E[Merge events & database_specifics] + D -- No --> F{Both simple ranges <= 2 events?} + F -- Yes --> G[Merge boundary versions & database_specifics] + F -- No --> H{Fixed event presence differs?} + H -- Yes --> I[Prefer range with Fixed event] + H -- No --> J{Introduced '0' presence differs?} + J -- Yes --> K[Prefer range with non-zero Introduced] + J -- No --> L{CPE_RANGE source presence differs?} + L -- Yes --> M[Prefer range with CPE_RANGE source] + L -- No --> N{extracted_events differ?} + N -- Yes --> O[Prioritize preferred source CVE5] + N -- No --> P[Fallback: Choose range with more events] + E --> Q[Remove last_affected if Fixed exists] + G --> Q + I --> Q + K --> Q + M --> Q + O --> Q + P --> Q + Q --> R[Return merged/chosen range] +``` + +### 1. References-Only Merging +If one range's metadata source is **only** `"REFERENCES"` (meaning its commits were directly parsed from fix references), its events are appended and merged into the other CVE range instead of choosing one range wholesale. This preserves precise fix commits extracted from advisory links. + +### 2. Boundary Version Merging +For simple version ranges (with two or fewer events), boundary versions are merged to combine the most complete and constrained information: +* We prefer more constrained introduced boundaries (e.g., a non-zero introduced version over a `"0"` version). +* We prefer defined fixed version boundaries over undefined ones. + +### 3. Preference Rules (Wholesale Fallbacks) +If ranges are not simple enough to merge boundaries, we select the best range using the following hierarchy: +1. **Fixed Priority**: A range with bounded `fixed` version or commit information is prioritized over a range with open-ended `last_affected` information. +2. **Constrained Range Priority**: We prefer ranges that define a specific non-zero `introduced` bound over those that start at `"0"`. +3. **CPE_RANGE Source Priority**: We prefer ranges whose metadata source is `"CPE_RANGE"` because they are extracted from explicit config nodes rather than inferred from text. +4. **Preferred Source**: If all else is equal, we prefer the range from the default preferred source (`CVE5` CNA-provided data). +5. **Completeness**: Choose the range that has a larger number of Git commit events. + +--- + +## 3. Metadata & Cleanup Rules + +### Database Specific Merging +Whenever ranges are merged (either via boundary version merging or references-only merging), their `database_specific` metadata fields are combined: +* String `source` tags are merged into a unified `ListValue` list (e.g., `"AFFECTED_FIELD"` and `"REFERENCES"` are merged into `["AFFECTED_FIELD", "REFERENCES"]`). +* Duplicate entries inside `extracted_events` are removed. + +### Last-Affected Cleanup +At the end of the selection or merging process, if the final range contains at least one explicit `fixed` commit or version event, any `last_affected` events are automatically removed from the range to maintain clean, bounded schema compliance. diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index 06a6ada272d..67e236e5bd2 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -18,10 +18,12 @@ import ( "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/conversion/writer" "github.com/google/osv/vulnfeeds/models" + "github.com/google/osv/vulnfeeds/utility" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/ossf/osv-schema/bindings/go/osvschema" "google.golang.org/api/iterator" "google.golang.org/protobuf/encoding/protojson" + "google.golang.org/protobuf/types/known/structpb" ) const ( @@ -100,7 +102,7 @@ func main() { func extractCVEName(filename string, prefix string) string { cleaned := strings.TrimPrefix(filename, prefix) cleaned = strings.TrimSuffix(cleaned, ".json") - pre := strings.SplitAfter(cleaned, "-") + pre := strings.Split(cleaned, "-") if pre[0] != "CVE" { return "" } @@ -128,7 +130,7 @@ func listBucketObjects(bucketName string, prefix string) ([]string, error) { if err != nil { return nil, fmt.Errorf("bucket.Objects: %w", err) } - filenames = append(filenames, attrs.Name, prefix) + filenames = append(filenames, attrs.Name) } return filenames, nil @@ -269,104 +271,520 @@ func combineTwoOSVRecords(cve5 *osvschema.Vulnerability, nvd *osvschema.Vulnerab // It returns a new slice and does not modify cve5Affected in place. func pickAffectedInformation(cve5Affected []*osvschema.Affected, nvdAffected []*osvschema.Affected) []*osvschema.Affected { if len(nvdAffected) == 0 { + for _, aff := range cve5Affected { + for _, r := range aff.GetRanges() { + cleanLastAffectedIfFixedExists(r) + } + } + return cve5Affected } - // If NVD has more affected packages, prefer it entirely. - if len(cve5Affected) == 0 || len(nvdAffected) > len(cve5Affected) { + + if len(cve5Affected) == 0 { + for _, aff := range nvdAffected { + for _, r := range aff.GetRanges() { + cleanLastAffectedIfFixedExists(r) + } + } + return nvdAffected } - nvdRepoMap := make(map[string][]*osvschema.Range) - for _, affected := range nvdAffected { - for _, r := range affected.GetRanges() { + // Group all ranges by repository URL + cve5Ranges := make(map[string]*osvschema.Range) + for _, aff := range cve5Affected { + for _, r := range aff.GetRanges() { if r.GetRepo() != "" { repo := strings.ToLower(r.GetRepo()) - nvdRepoMap[repo] = append(nvdRepoMap[repo], r) + cve5Ranges[repo] = r } } } - cve5RepoMap := make(map[string][]*osvschema.Range) - for _, affected := range cve5Affected { - for _, r := range affected.GetRanges() { + nvdRanges := make(map[string]*osvschema.Range) + for _, aff := range nvdAffected { + for _, r := range aff.GetRanges() { if r.GetRepo() != "" { repo := strings.ToLower(r.GetRepo()) - cve5RepoMap[repo] = append(cve5RepoMap[repo], r) + nvdRanges[repo] = r } } } - newRepoAffectedMap := make(map[string]*osvschema.Affected) - - // Finds ranges with the same repo and merges them into one affected set. - for repo, cveRanges := range cve5RepoMap { - if nvdRanges, ok := nvdRepoMap[repo]; ok { - var newAffectedRanges []*osvschema.Range + // Collect all unique repo URLs + reposMap := make(map[string]bool) + for repo := range cve5Ranges { + reposMap[repo] = true + } + for repo := range nvdRanges { + reposMap[repo] = true + } - // Found a match. If NVD has more ranges, use its ranges. - if len(nvdRanges) > len(cveRanges) { - // just use the nvd ranges - newAffectedRanges = nvdRanges - } else if len(cveRanges) == 1 && len(nvdRanges) == 1 { - c5Intro, c5Fixed := getRangeBoundaryVersions(cveRanges[0].GetEvents()) - nvdIntro, nvdFixed := getRangeBoundaryVersions(nvdRanges[0].GetEvents()) + var finalRanges []*osvschema.Range + for repo := range reposMap { + bestRange := pickBestRange(cve5Ranges[repo], nvdRanges[repo]) + if bestRange != nil { + finalRanges = append(finalRanges, bestRange) + } + } - // Prefer cve5 data, but use nvd data if cve5 data is missing. - if c5Intro == "" { - c5Intro = nvdIntro + // Separate output Affected list: + // 1. Those that have ranges are grouped under a single Affected struct. + // 2. Those that do not have ranges are kept as separate Affected structs. + var combinedAffected []*osvschema.Affected + + if len(finalRanges) > 0 { + // Sort final ranges by repo for stability + slices.SortFunc(finalRanges, func(a, b *osvschema.Range) int { + return cmp.Compare(strings.ToLower(a.GetRepo()), strings.ToLower(b.GetRepo())) + }) + + // Find Package and EcosystemSpecific if any were present in the input ranges + var pkg *osvschema.Package + var ecosystemSpecific *structpb.Struct + for _, aff := range cve5Affected { + if len(aff.GetRanges()) > 0 { + if aff.GetPackage() != nil { + pkg = aff.GetPackage() } - if c5Fixed == "" { - c5Fixed = nvdFixed + if aff.GetEcosystemSpecific() != nil { + ecosystemSpecific = aff.GetEcosystemSpecific() } - - if c5Intro != "" || c5Fixed != "" { - newRange := conversion.BuildGitVersionRange(c5Intro, "", c5Fixed, repo) - newAffectedRanges = append(newAffectedRanges, newRange) - } else { - newAffectedRanges = cveRanges + } + } + if pkg == nil || ecosystemSpecific == nil { + for _, aff := range nvdAffected { + if len(aff.GetRanges()) > 0 { + if pkg == nil && aff.GetPackage() != nil { + pkg = aff.GetPackage() + } + if ecosystemSpecific == nil && aff.GetEcosystemSpecific() != nil { + ecosystemSpecific = aff.GetEcosystemSpecific() + } } - } else { - newAffectedRanges = cveRanges } + } - // Remove from map so we know which NVD packages are left. - delete(nvdRepoMap, repo) - newRepoAffectedMap[repo] = &osvschema.Affected{ - Ranges: newAffectedRanges, + combinedAffected = append(combinedAffected, &osvschema.Affected{ + Ranges: finalRanges, + Package: pkg, + EcosystemSpecific: ecosystemSpecific, + }) + } + + // Copy over affected objects from cve5 and nvd that have NO ranges (e.g. pure package entries), + // deduplicating them by package name. + seenPackages := make(map[string]bool) + for _, aff := range cve5Affected { + if len(aff.GetRanges()) == 0 && aff.GetPackage() != nil { + pkgName := strings.ToLower(aff.GetPackage().GetName()) + if !seenPackages[pkgName] { + combinedAffected = append(combinedAffected, aff) + seenPackages[pkgName] = true } - } else { - newRepoAffectedMap[repo] = &osvschema.Affected{ - Ranges: cveRanges, + } + } + for _, aff := range nvdAffected { + if len(aff.GetRanges()) == 0 && aff.GetPackage() != nil { + pkgName := strings.ToLower(aff.GetPackage().GetName()) + if !seenPackages[pkgName] { + combinedAffected = append(combinedAffected, aff) + seenPackages[pkgName] = true } } } - // Add remaining NVD packages that were not in cve5. - for repo, nvdRange := range nvdRepoMap { - newRepoAffectedMap[repo] = &osvschema.Affected{ - Ranges: nvdRange, + // Sort the combinedAffected array: first entries with ranges, then by package name if present. + slices.SortFunc(combinedAffected, func(a, b *osvschema.Affected) int { + hasRangeA := len(a.GetRanges()) > 0 + hasRangeB := len(b.GetRanges()) > 0 + if hasRangeA != hasRangeB { + if hasRangeA { + return -1 + } + + return 1 + } + var pkgA, pkgB string + if a.GetPackage() != nil { + pkgA = a.GetPackage().GetName() + } + if b.GetPackage() != nil { + pkgB = b.GetPackage().GetName() + } + + return cmp.Compare(strings.ToLower(pkgA), strings.ToLower(pkgB)) + }) + + return combinedAffected +} + +type ExtractedEvent struct { + Introduced string + Fixed string + LastAffected string + Limit string +} + +func getExtractedEvents(r *osvschema.Range) []*structpb.Value { + if r.GetDatabaseSpecific() == nil { + return nil + } + fields := r.GetDatabaseSpecific().GetFields() + if fields == nil { + return nil + } + val, ok := fields["extracted_events"] + if !ok || val.GetListValue() == nil { + return nil + } + + return val.GetListValue().GetValues() +} + +func parseExtractedEvent(v *structpb.Value) ExtractedEvent { + s := v.GetStructValue() + if s == nil { + return ExtractedEvent{} + } + fields := s.GetFields() + var ev ExtractedEvent + if intro, ok := fields["introduced"]; ok { + ev.Introduced = intro.GetStringValue() + } + if fixed, ok := fields["fixed"]; ok { + ev.Fixed = fixed.GetStringValue() + } + if la, ok := fields["last_affected"]; ok { + ev.LastAffected = la.GetStringValue() + } + if lim, ok := fields["limit"]; ok { + ev.Limit = lim.GetStringValue() + } + + return ev +} + +func parseExtractedEvents(r *osvschema.Range) []ExtractedEvent { + rawValues := getExtractedEvents(r) + if len(rawValues) == 0 { + return nil + } + events := make([]ExtractedEvent, 0, len(rawValues)) + for _, val := range rawValues { + events = append(events, parseExtractedEvent(val)) + } + + return events +} + +// sameVersionRanges checks if two ranges have the same extracted events. +func sameVersionRanges(evs1, evs2 []ExtractedEvent) bool { + if len(evs1) != len(evs2) { + return false + } + for i := range evs1 { + if evs1[i] != evs2[i] { + return false + } + } + + return true +} + +// hasFixedEvent checks if any event in the range has a fixed field. +func hasFixedEvent(r *osvschema.Range) bool { + for _, e := range r.GetEvents() { + if e.GetFixed() != "" { + return true + } + } + + return false +} + +// hasIntroducedZero checks if any event in the range has an introduced field with "0". +func hasIntroducedZero(r *osvschema.Range) bool { + for _, e := range r.GetEvents() { + if e.GetIntroduced() == "0" { + return true } } - var combinedAffected []*osvschema.Affected //nolint:prealloc - for _, aff := range newRepoAffectedMap { - combinedAffected = append(combinedAffected, aff) + return false +} + +// isCPERange checks if the range is a CPE range. +func isCPERange(r *osvschema.Range) bool { + if r.GetDatabaseSpecific() == nil { + return false + } + fields := r.GetDatabaseSpecific().GetFields() + if fields == nil { + return false + } + val, ok := fields["source"] + if !ok { + return false + } + if val.GetStringValue() == "CPE_RANGE" { + return true + } + if listVal := val.GetListValue(); listVal != nil { + for _, item := range listVal.GetValues() { + if item.GetStringValue() == "CPE_RANGE" { + return true + } + } } - // sort by repo - slices.SortFunc(combinedAffected, func(a, b *osvschema.Affected) int { - var repoA, repoB string - if len(a.GetRanges()) > 0 { - repoA = a.GetRanges()[0].GetRepo() + return false +} + +// cleanLastAffectedIfFixedExists removes the last_affected field from all +// events in the range if any event has a fixed field. This happens in place. +func cleanLastAffectedIfFixedExists(r *osvschema.Range) { + if r == nil { + return + } + hasFixed := false + for _, e := range r.GetEvents() { + if e.GetFixed() != "" { + hasFixed = true + break + } + } + if !hasFixed { + return + } + var cleanEvents []*osvschema.Event + for _, e := range r.GetEvents() { + if e.GetLastAffected() == "" { + cleanEvents = append(cleanEvents, e) + } + } + r.Events = cleanEvents +} + +// isReferencesOnly checks if the range 'source' field is only "REFERENCES" +// or ["REFERENCES"]. +func isReferencesOnly(r *osvschema.Range) bool { + if r.GetDatabaseSpecific() == nil { + return false + } + fields := r.GetDatabaseSpecific().GetFields() + if fields == nil { + return false + } + val, ok := fields["source"] + if !ok { + return false + } + if val.GetStringValue() == "REFERENCES" { + return true + } + if listVal := val.GetListValue(); listVal != nil { + values := listVal.GetValues() + if len(values) == 1 && values[0].GetStringValue() == "REFERENCES" { + return true + } + } + + return false +} + +func mergeDatabaseSpecifics(ds1, ds2 *structpb.Struct) *structpb.Struct { + if ds1 == nil { + return ds2 + } + if ds2 == nil { + return ds1 + } + + mergedMap := make(map[string]any) + for k, v := range ds1.GetFields() { + mergedMap[k] = v.AsInterface() + } + + for k, v := range ds2.GetFields() { + val2 := v.AsInterface() + if existing, ok := mergedMap[k]; ok { + mergedVal, err := conversion.MergeDatabaseSpecificValues(existing, val2) + if err == nil { + mergedMap[k] = mergedVal + } + } else { + mergedMap[k] = val2 + } + } + + if ds, err := utility.NewStructpbFromMap(mergedMap); err == nil { + return ds + } + + return ds1 +} + +// mergeRanges merges two ranges into one. It prefers base over other if +// both ranges have the same type and repo. +func mergeRanges(base, other *osvschema.Range) *osvschema.Range { + merged := &osvschema.Range{ + Type: base.GetType(), + Repo: base.GetRepo(), + Events: append([]*osvschema.Event{}, base.GetEvents()...), + DatabaseSpecific: mergeDatabaseSpecifics(base.GetDatabaseSpecific(), other.GetDatabaseSpecific()), + } + for _, e := range other.GetEvents() { + found := false + for _, existing := range merged.GetEvents() { + if e.GetIntroduced() != "" && e.GetIntroduced() == existing.GetIntroduced() { + found = true + break + } + if e.GetFixed() != "" && e.GetFixed() == existing.GetFixed() { + found = true + break + } + if e.GetLastAffected() != "" && e.GetLastAffected() == existing.GetLastAffected() { + found = true + break + } } - if len(b.GetRanges()) > 0 { - repoB = b.GetRanges()[0].GetRepo() + if !found { + if e.GetIntroduced() != "" { + merged.Events = append([]*osvschema.Event{e}, merged.GetEvents()...) + } else { + merged.Events = append(merged.Events, e) + } + } + } + slices.SortStableFunc(merged.GetEvents(), func(a, b *osvschema.Event) int { + if a.GetIntroduced() != "" && b.GetIntroduced() == "" { + return -1 + } + if a.GetIntroduced() == "" && b.GetIntroduced() != "" { + return 1 } - return cmp.Compare(repoA, repoB) + return 0 }) - return combinedAffected + return merged +} + +// pickBestRange picks the best range between two ranges. +// It prefers cve5Range over nvdRange if both ranges have fixed information. +// If one range is references-only, it merges them instead of choosing one. +// More information can be found in the DESIGN.md file in this folder +func pickBestRange(cve5Range *osvschema.Range, nvdRange *osvschema.Range) *osvschema.Range { + if cve5Range == nil { + cleanLastAffectedIfFixedExists(nvdRange) + return nvdRange + } + if nvdRange == nil { + cleanLastAffectedIfFixedExists(cve5Range) + return cve5Range + } + + // 1. If one of the ranges is references-only, merge them instead of choosing one + if isReferencesOnly(nvdRange) { + merged := mergeRanges(cve5Range, nvdRange) + cleanLastAffectedIfFixedExists(merged) + + return merged + } + if isReferencesOnly(cve5Range) { + merged := mergeRanges(nvdRange, cve5Range) + cleanLastAffectedIfFixedExists(merged) + + return merged + } + + // 2. Try to merge boundary versions first for simple 1-event/2-event ranges. + var merged *osvschema.Range + if len(cve5Range.GetEvents()) <= 2 && len(nvdRange.GetEvents()) <= 2 { + c5Intro, c5Fixed := getRangeBoundaryVersions(cve5Range.GetEvents()) + nvdIntro, nvdFixed := getRangeBoundaryVersions(nvdRange.GetEvents()) + + // Prefer cve5 bounds, but use nvd if cve5 is missing them + if c5Intro == "" { + c5Intro = nvdIntro + } + if c5Fixed == "" { + c5Fixed = nvdFixed + } + + if c5Intro != "" || c5Fixed != "" { + merged = conversion.BuildGitVersionRange(c5Intro, "", c5Fixed, cve5Range.GetRepo()) + merged.DatabaseSpecific = mergeDatabaseSpecifics(cve5Range.GetDatabaseSpecific(), nvdRange.GetDatabaseSpecific()) + } + } + + if merged == nil { + // 2. Prioritize range with fixed information over last_affected / open-ended ranges + c5HasFixed := hasFixedEvent(cve5Range) + nvdHasFixed := hasFixedEvent(nvdRange) + + if c5HasFixed != nvdHasFixed { + if c5HasFixed { + merged = cve5Range + } else { + merged = nvdRange + } + } + } + + if merged == nil { + // 3. Prefer constrained ranges (no introduced "0") + c5HasIntroZero := hasIntroducedZero(cve5Range) + nvdHasIntroZero := hasIntroducedZero(nvdRange) + + if c5HasIntroZero != nvdHasIntroZero { + if !c5HasIntroZero { + merged = cve5Range + } else { + merged = nvdRange + } + } + } + + if merged == nil { + // 4. Prefer CPE_RANGE if it exists, otherwise fall back to preferred source (CVE5) + c5IsCPERange := isCPERange(cve5Range) + nvdIsCPERange := isCPERange(nvdRange) + + if c5IsCPERange != nvdIsCPERange { + if c5IsCPERange { + merged = cve5Range + } else { + merged = nvdRange + } + } + } + + if merged == nil { + cve5Evs := parseExtractedEvents(cve5Range) + nvdEvs := parseExtractedEvents(nvdRange) + + if !sameVersionRanges(cve5Evs, nvdEvs) && len(cve5Evs) > 0 && len(nvdEvs) > 0 { + // Different version ranges defined, prioritize preferred source (CVE5) + merged = cve5Range + } + } + + if merged == nil { + // Fallback: choose the one with more complete Git commits (more events) + if len(nvdRange.GetEvents()) > len(cve5Range.GetEvents()) { + merged = nvdRange + } else { + merged = cve5Range + } + } + + // 5. Remove last_affected events if a fixed commit exists + cleanLastAffectedIfFixedExists(merged) + + return merged } func hasRanges(affected []*osvschema.Affected) bool { diff --git a/vulnfeeds/cmd/combine-to-osv/main_test.go b/vulnfeeds/cmd/combine-to-osv/main_test.go index 51504d9b34b..be2c998f1e2 100644 --- a/vulnfeeds/cmd/combine-to-osv/main_test.go +++ b/vulnfeeds/cmd/combine-to-osv/main_test.go @@ -11,6 +11,7 @@ import ( "github.com/google/osv/vulnfeeds/models" "github.com/ossf/osv-schema/bindings/go/osvschema" "google.golang.org/protobuf/testing/protocmp" + "google.golang.org/protobuf/types/known/structpb" "google.golang.org/protobuf/types/known/timestamppb" ) @@ -177,9 +178,20 @@ func TestPickAffectedInformation(t *testing.T) { nvdAffected: append(append([]*osvschema.Affected(nil), nvdBase...), &osvschema.Affected{ Package: &osvschema.Package{Name: "another"}, }), - wantAffected: append(append([]*osvschema.Affected(nil), nvdBase...), &osvschema.Affected{ - Package: &osvschema.Package{Name: "another"}, - }), + wantAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: cve5Base[0].GetRanges()[0].GetEvents(), + }, + }, + }, + { + Package: &osvschema.Package{Name: "another"}, + }, + }, }, { name: "Same repo, same number of ranges, cve5 data is preferred", @@ -316,9 +328,9 @@ func TestPickAffectedInformation(t *testing.T) { }, }, wantAffected: []*osvschema.Affected{ - cve5Base[0], // From cve5 { Ranges: []*osvschema.Range{ + cve5Base[0].GetRanges()[0], { Type: osvschema.Range_GIT, Repo: repoB, @@ -331,6 +343,356 @@ func TestPickAffectedInformation(t *testing.T) { }, }, }, + { + name: "Fixed overrides LastAffected (CVE5 has Fixed)", + cve5Affected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.1"}, + }, + }, + }, + }, + }, + nvdAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {LastAffected: "1.0.2"}, + }, + }, + }, + }, + }, + wantAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.1"}, + }, + }, + }, + }, + }, + }, + { + name: "Fixed overrides LastAffected (NVD has Fixed)", + cve5Affected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {LastAffected: "1.0.1"}, + }, + }, + }, + }, + }, + nvdAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.2"}, + }, + }, + }, + }, + }, + wantAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.2"}, + }, + }, + }, + }, + }, + }, + { + name: "Prefer constrained range (non-zero introduced)", + cve5Affected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.1"}, + }, + }, + }, + }, + }, + nvdAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0.9.0"}, + {Fixed: "1.0.1"}, + }, + }, + }, + }, + }, + wantAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0.9.0"}, + {Fixed: "1.0.1"}, + }, + }, + }, + }, + }, + }, + { + name: "Prefer CPE_RANGE source over CVE5", + cve5Affected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.1"}, + }, + }, + }, + }, + }, + nvdAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.1"}, + }, + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "source": structpb.NewStringValue("CPE_RANGE"), + }, + }, + }, + }, + }, + }, + wantAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.1"}, + }, + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "source": structpb.NewStringValue("CPE_RANGE"), + }, + }, + }, + }, + }, + }, + }, + { + name: "Prefer CPE_RANGE source over CVE5 when source is array", + cve5Affected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.1"}, + }, + }, + }, + }, + }, + nvdAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.1"}, + }, + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "source": structpb.NewListValue(&structpb.ListValue{ + Values: []*structpb.Value{ + structpb.NewStringValue("CPE_RANGE"), + structpb.NewStringValue("REFERENCES"), + }, + }), + }, + }, + }, + }, + }, + }, + wantAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.1"}, + }, + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "source": structpb.NewListValue(&structpb.ListValue{ + Values: []*structpb.Value{ + structpb.NewStringValue("CPE_RANGE"), + structpb.NewStringValue("REFERENCES"), + }, + }), + }, + }, + }, + }, + }, + }, + }, + { + name: "Cleanup last_affected if fixed exists", + cve5Affected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Fixed: "1.0.1"}, + {LastAffected: "1.0.0"}, + }, + }, + }, + }, + }, + nvdAffected: []*osvschema.Affected{}, + wantAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Fixed: "1.0.1"}, + }, + }, + }, + }, + }, + }, + { + name: "Merge references-only range with CVE range", + cve5Affected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "1.0.0"}, + {LastAffected: "1.0.1"}, + }, + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "source": structpb.NewStringValue("AFFECTED_FIELD"), + }, + }, + }, + }, + }, + }, + nvdAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "2c1762b85acb"}, + }, + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "source": structpb.NewStringValue("REFERENCES"), + }, + }, + }, + }, + }, + }, + wantAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Introduced: "1.0.0"}, + {Fixed: "2c1762b85acb"}, + }, + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "source": structpb.NewListValue(&structpb.ListValue{ + Values: []*structpb.Value{ + structpb.NewStringValue("AFFECTED_FIELD"), + structpb.NewStringValue("REFERENCES"), + }, + }), + }, + }, + }, + }, + }, + }, + }, } // Sorter for comparing slices of Affected, ignoring order. diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index ed73618d886..6c99603ede0 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -654,7 +654,7 @@ func AddFieldToDatabaseSpecific(ds *structpb.Struct, field string, value any) er } // ProcessRanges attempts to resolve the given ranges to commits and updates the metrics accordingly. -func ProcessRanges(ranges []models.RangeWithMetadata, repos []string, metrics *models.ConversionMetrics, cache git.RepoTagsCache, source models.VersionSource) ([]models.RangeWithMetadata, []models.RangeWithMetadata, []string) { +func ProcessRanges(ranges []models.RangeWithMetadata, repos []string, metrics *models.ConversionMetrics, cache git.RepoTagsCache) ([]models.RangeWithMetadata, []models.RangeWithMetadata, []string) { if len(ranges) == 0 { return nil, nil, nil } @@ -672,7 +672,17 @@ func ProcessRanges(ranges []models.RangeWithMetadata, repos []string, metrics *m } } - metrics.VersionSources = append(metrics.VersionSources, source) + // Dynamically record the precise sources from each processed range's metadata. + // This ensures that granular version sources (such as CPE_RANGE or CPE_STRING) are tracked in + // the final conversion metrics instead of a single generic fallback source. + for _, ra := range ranges { + if ra.Metadata.Source == "" { + continue + } + if !slices.Contains(metrics.VersionSources, ra.Metadata.Source) { + metrics.VersionSources = append(metrics.VersionSources, ra.Metadata.Source) + } + } return r, un, sR } diff --git a/vulnfeeds/conversion/cve5/default_extractor.go b/vulnfeeds/conversion/cve5/default_extractor.go index ef391c26731..c93a6afe0f6 100644 --- a/vulnfeeds/conversion/cve5/default_extractor.go +++ b/vulnfeeds/conversion/cve5/default_extractor.go @@ -42,7 +42,7 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln var unresolvedRanges []models.RangeWithMetadata processRanges := func(nr []models.RangeWithMetadata) bool { - r, un, sR := c.ProcessRanges(nr, repos, metrics, repoTagsCache, models.VersionSourceAffected) + r, un, sR := c.ProcessRanges(nr, repos, metrics, repoTagsCache) resolvedRanges = append(resolvedRanges, r...) unresolvedRanges = append(unresolvedRanges, un...) for _, s := range sR { diff --git a/vulnfeeds/conversion/nvd/__snapshots__/converter_test.snap b/vulnfeeds/conversion/nvd/__snapshots__/converter_test.snap index 2587dd4d1b7..88462702cb5 100755 --- a/vulnfeeds/conversion/nvd/__snapshots__/converter_test.snap +++ b/vulnfeeds/conversion/nvd/__snapshots__/converter_test.snap @@ -1767,7 +1767,7 @@ } ], "source": [ - "CPE_FIELD", + "CPE_RANGE", "REFERENCES" ] }, @@ -1853,7 +1853,7 @@ } ], "source": [ - "CPE_FIELD", + "CPE_RANGE", "REFERENCES" ] }, @@ -1997,7 +1997,7 @@ } ], "source": [ - "CPE_FIELD", + "CPE_RANGE", "REFERENCES" ] }, @@ -2066,7 +2066,7 @@ "fixed": "7.61.1" } ], - "source": "CPE_FIELD" + "source": "CPE_RANGE" }, "events": [ { @@ -2105,7 +2105,7 @@ "last_affected": "18.04" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "canonical:ubuntu_linux" }, { @@ -2117,7 +2117,7 @@ "last_affected": "9.0" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "debian:debian_linux" }, { @@ -2145,7 +2145,7 @@ "last_affected": "7.6" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "redhat:enterprise_linux" } ] @@ -2230,7 +2230,7 @@ "last_affected": "38" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "fedoraproject:fedora" }, { @@ -2254,7 +2254,7 @@ "last_affected": "12.1" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "redhat:directory_server" } ] @@ -2388,7 +2388,7 @@ } ], "source": [ - "CPE_FIELD", + "CPE_STRING", "REFERENCES" ] }, @@ -2424,7 +2424,7 @@ "last_affected": "36" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "fedoraproject:fedora" } ] @@ -2810,7 +2810,7 @@ "last_affected": "2.8.4" } ], - "source": "CPE_FIELD" + "source": "CPE_STRING" }, "events": [ { @@ -3077,7 +3077,7 @@ "last_affected": "12.04" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "canonical:ubuntu_linux" }, { @@ -3089,7 +3089,7 @@ "last_affected": "42.1" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "opensuse:leap" } ] @@ -3243,7 +3243,7 @@ "fixed": "0.9.2" } ], - "source": "CPE_FIELD" + "source": "CPE_RANGE" }, "events": [ { @@ -3270,7 +3270,7 @@ "last_affected": "40" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "fedoraproject:fedora" }, { @@ -3286,7 +3286,7 @@ "last_affected": "8.0" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "redhat:enterprise_linux" } ] @@ -3329,26 +3329,6 @@ ], "database_specific": { "unresolved_ranges": [ - { - "cpes": [ - "cpe:2.3:o:fedoraproject:fedora:38:*:*:*:*:*:*:*", - "cpe:2.3:o:fedoraproject:fedora:39:*:*:*:*:*:*:*", - "cpe:2.3:o:fedoraproject:fedora:40:*:*:*:*:*:*:*" - ], - "extracted_events": [ - { - "last_affected": "38" - }, - { - "last_affected": "39" - }, - { - "last_affected": "40" - } - ], - "source": "CPE_FIELD", - "vendor_product": "fedoraproject:fedora" - }, { "cpes": [ "cpe:2.3:a:filezilla-project:filezilla_client:*:*:*:*:*:*:*:*" @@ -3358,7 +3338,7 @@ "fixed": "3.67.0" } ], - "source": "CPE_FIELD", + "source": "CPE_RANGE", "vendor_product": "filezilla-project:filezilla_client" }, { @@ -3373,7 +3353,7 @@ "fixed": "0.81" } ], - "source": "CPE_FIELD", + "source": "CPE_RANGE", "vendor_product": "putty:putty" }, { @@ -3385,7 +3365,7 @@ "fixed": "1.14.6" } ], - "source": "CPE_FIELD", + "source": "CPE_RANGE", "vendor_product": "tigris:tortoisesvn" }, { @@ -3397,7 +3377,7 @@ "fixed": "2.15.0.1" } ], - "source": "CPE_FIELD", + "source": "CPE_RANGE", "vendor_product": "tortoisegit:tortoisegit" }, { @@ -3409,9 +3389,29 @@ "fixed": "6.3.3" } ], - "source": "CPE_FIELD", + "source": "CPE_RANGE", "vendor_product": "winscp:winscp" }, + { + "cpes": [ + "cpe:2.3:o:fedoraproject:fedora:38:*:*:*:*:*:*:*", + "cpe:2.3:o:fedoraproject:fedora:39:*:*:*:*:*:*:*", + "cpe:2.3:o:fedoraproject:fedora:40:*:*:*:*:*:*:*" + ], + "extracted_events": [ + { + "last_affected": "38" + }, + { + "last_affected": "39" + }, + { + "last_affected": "40" + } + ], + "source": "CPE_STRING", + "vendor_product": "fedoraproject:fedora" + }, { "extracted_events": [ { diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 549e3bd1ad9..1e3047e6456 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -82,7 +82,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, vpRepoCache *c.VPRepoCache, cac } // If we have ranges, try to resolve them - r, un, sR := c.ProcessRanges(cpeRanges, repos, metrics, cache, models.VersionSourceCPE) + r, un, sR := c.ProcessRanges(cpeRanges, repos, metrics, cache) if metrics.Outcome == models.Error { return nil, metrics, models.Error } @@ -116,7 +116,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, vpRepoCache *c.VPRepoCache, cac if len(textRanges) > 0 { metrics.AddNote("Extracted versions from description: %v", textRanges) } - r, un, sR := c.ProcessRanges(textRanges, repos, metrics, cache, models.VersionSourceDescription) + r, un, sR := c.ProcessRanges(textRanges, repos, metrics, cache) if metrics.Outcome == models.Error { return nil, metrics, models.Error } diff --git a/vulnfeeds/conversion/versions.go b/vulnfeeds/conversion/versions.go index 5916cc8be51..b6548e236db 100644 --- a/vulnfeeds/conversion/versions.go +++ b/vulnfeeds/conversion/versions.go @@ -736,6 +736,8 @@ func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, vpRepoCa introduced := "" fixed := "" lastaffected := "" + source := models.VersionSourceCPERange + if match.VersionStartIncluding != nil { introduced = cleanVersion(*match.VersionStartIncluding) } else if match.VersionStartExcluding != nil { @@ -778,6 +780,7 @@ func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, vpRepoCa if CPE.Update != "ANY" { lastaffected += "-" + CPE.Update } + source = models.VersionSourceCPEString } if introduced == "" { @@ -816,7 +819,7 @@ func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, vpRepoCa Range: vr, Metadata: models.Metadata{ CPE: match.Criteria, - Source: models.VersionSourceCPE, + Source: source, }, }, ) @@ -828,7 +831,7 @@ func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, vpRepoCa Range: vr, Metadata: models.Metadata{ CPE: match.Criteria, - Source: models.VersionSourceCPE, + Source: source, }, }, ) diff --git a/vulnfeeds/models/metrics.go b/vulnfeeds/models/metrics.go index f0f56c8f9ca..d1334f20bcd 100644 --- a/vulnfeeds/models/metrics.go +++ b/vulnfeeds/models/metrics.go @@ -110,6 +110,8 @@ const ( VersionSourceAffected VersionSource = "AFFECTED_FIELD" VersionSourceGit VersionSource = "AFFECTED_FIELD_GIT" VersionSourceCPE VersionSource = "CPE_FIELD" + VersionSourceCPERange VersionSource = "CPE_RANGE" + VersionSourceCPEString VersionSource = "CPE_STRING" VersionSourceDescription VersionSource = "DESCRIPTION" VersionSourceText VersionSource = "TEXT_EXTRACTION" VersionSourceRefs VersionSource = "REFERENCES"