Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion go/extractor/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
load("@rules_go//go:def.bzl", "go_library")
load("@rules_go//go:def.bzl", "go_library", "go_test")
load("@rules_java//java:defs.bzl", "java_library")
load("@rules_pkg//pkg:mappings.bzl", "pkg_files")

Expand Down Expand Up @@ -60,3 +60,10 @@ pkg_files(
},
visibility = ["//go:__pkg__"],
)

go_test(
name = "extractor_test",
srcs = ["extractor_test.go"],
embed = [":extractor"],
deps = ["@org_golang_x_tools//go/packages"],
)
87 changes: 65 additions & 22 deletions go/extractor/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,63 @@ func init() {
}
}

// isExactTestPackage checks if a package ID represents an exact test match.
// Returns true for IDs like "github.com/foo/bar [github.com/foo/bar.test]"
// Returns false for IDs like "github.com/foo/bar [github.com/foo/bar/nested.test]"
func isExactTestPackage(pkg *packages.Package) bool {
// Test packages have IDs in the format: "pkgpath [pkgpath.test]"
// or for nested test dependencies: "pkgpath [pkgpath/nested.test]"
expectedTestID := pkg.PkgPath + " [" + pkg.PkgPath + ".test]"
return pkg.ID == expectedTestID
}

// isBetterPackage determines if pkg is a better choice than current for extraction.
// Preferences:
// 1. Exact test package (e.g., "pkg [pkg.test]") over nested test dependencies
// 2. More Syntax nodes (more files to extract)
// 3. Longer ID string as tiebreaker
func isBetterPackage(pkg, current *packages.Package) bool {
pkgIsExact := isExactTestPackage(pkg)
currentIsExact := isExactTestPackage(current)

// Prefer exact test packages
if pkgIsExact != currentIsExact {
return pkgIsExact
}

// Prefer packages with more syntax nodes (more files)
pkgSyntaxCount := len(pkg.Syntax)
currentSyntaxCount := len(current.Syntax)
if pkgSyntaxCount != currentSyntaxCount {
return pkgSyntaxCount > currentSyntaxCount
}

// Fall back to string length
return len(pkg.ID) > len(current.ID)
}

// selectBestPackages builds a map from package paths to their best package variants.
// In the context of a `go test -c` compilation, we see the same package more than
// once, with IDs like "abc.com/pkgname [abc.com/pkgname.test]" to distinguish the version
// that contains and is used by test code.
// We prefer the version with the most complete test coverage, which is typically:
// 1. The exact test package (e.g., "pkg [pkg.test]") over nested test dependencies
// 2. The package with the most Syntax nodes (most files to extract)
// 3. The longest ID string as a tiebreaker
func selectBestPackages(pkgs []*packages.Package) map[string]*packages.Package {
bestPackageIds := make(map[string]*packages.Package)
packages.Visit(pkgs, nil, func(pkg *packages.Package) {
if bestSoFar, present := bestPackageIds[pkg.PkgPath]; present {
if isBetterPackage(pkg, bestSoFar) {
bestPackageIds[pkg.PkgPath] = pkg
}
} else {
bestPackageIds[pkg.PkgPath] = pkg
}
})
return bestPackageIds
}

// ExtractWithFlags extracts the packages specified by the given patterns and build flags
func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool, sourceRoot string) error {
startTime := time.Now()
Expand Down Expand Up @@ -153,22 +210,8 @@ func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool,

pkgsNotFound := make([]string, 0, len(pkgs))

// Build a map from package paths to their longest IDs--
// in the context of a `go test -c` compilation, we will see the same package more than
// once, with IDs like "abc.com/pkgname [abc.com/pkgname.test]" to distinguish the version
// that contains and is used by test code.
// For our purposes it is simplest to just ignore the non-test version, since the test
// version seems to be a superset of it.
longestPackageIds := make(map[string]string)
packages.Visit(pkgs, nil, func(pkg *packages.Package) {
if longestIDSoFar, present := longestPackageIds[pkg.PkgPath]; present {
if len(pkg.ID) > len(longestIDSoFar) {
longestPackageIds[pkg.PkgPath] = pkg.ID
}
} else {
longestPackageIds[pkg.PkgPath] = pkg.ID
}
})
// Build a map from package paths to their best IDs
bestPackageIds := selectBestPackages(pkgs)

// Do a post-order traversal and extract the package scope of each package
packages.Visit(pkgs, nil, func(pkg *packages.Package) {
Expand Down Expand Up @@ -257,15 +300,15 @@ func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool,
// extract AST information for all packages
packages.Visit(pkgs, nil, func(pkg *packages.Package) {

// If this is a variant of a package that also occurs with a longer ID, skip it;
// If this is a variant of a package that also occurs with a better ID, skip it;
// otherwise we would extract the same file more than once including extracting the
// body of methods twice, causing database inconsistencies.
//
// We prefer the version with the longest ID because that is (so far as I know) always
// the version that defines more entities -- the only case I'm aware of being a test
// variant of a package, which includes test-only functions in addition to the complete
// contents of the main variant.
if pkg.ID != longestPackageIds[pkg.PkgPath] {
// We prefer the version with the most complete test coverage, prioritizing:
// 1. Exact test packages (e.g., "pkg [pkg.test]") over nested test dependencies
// 2. Packages with more Syntax nodes (more files to extract)
// 3. Longer ID strings as a tiebreaker
if pkg.ID != bestPackageIds[pkg.PkgPath].ID {
return
}

Expand Down
Loading
Loading