Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 57 additions & 43 deletions cli/src/main/kotlin/com/bazel_diff/hash/BuildGraphHasher.kt
Original file line number Diff line number Diff line change
Expand Up @@ -55,22 +55,13 @@ class BuildGraphHasher(private val bazelClient: BazelClient) : KoinComponent {
}
val seedForFilepaths =
runBlocking(Dispatchers.IO) { createSeedForFilepaths(seedFilepaths) }
val seedForBzlFiles = createSeedForBzlFiles(allTargets, modifiedFilepaths)
// Only mix the bzl-files seed when at least one main-repo `.bzl` was actually hashed.
// This keeps the seed (and therefore every target's hash) byte-for-byte stable for
// workspaces that don't load any tracked .bzl files, preserving cached hashes from
// before this change.
val combinedSeed =
if (seedForBzlFiles != null) {
sha256 {
safePutBytes(seedForFilepaths)
safePutBytes(seedForBzlFiles)
}
} else {
seedForFilepaths
}
// Attribute each BUILD file's loaded `.bzl` digests to the package that loads them, so a
// `.bzl` edit only re-hashes targets in packages that actually `load()` it -- not every
// target in the workspace (issue #365). A package that loads no tracked `.bzl` gets nothing
// mixed in, keeping its targets' hashes byte-for-byte stable.
val packageBzlSeeds = createPackageBzlSeeds(allTargets, modifiedFilepaths)
return hashAllTargets(
combinedSeed, sourceDigests, allTargets, ignoredAttrs, modifiedFilepaths)
seedForFilepaths, packageBzlSeeds, sourceDigests, allTargets, ignoredAttrs, modifiedFilepaths)
}

private fun hashSourcefiles(
Expand Down Expand Up @@ -111,6 +102,7 @@ class BuildGraphHasher(private val bazelClient: BazelClient) : KoinComponent {

private fun hashAllTargets(
seedHash: ByteArray,
packageBzlSeeds: Map<String, ByteArray>,
sourceDigests: ConcurrentMap<String, ByteArray>,
allTargets: List<BazelTarget>,
ignoredAttrs: Set<String>,
Expand All @@ -130,6 +122,7 @@ class BuildGraphHasher(private val bazelClient: BazelClient) : KoinComponent {
sourceDigests,
ruleHashes,
seedHash,
packageBzlSeeds,
ignoredAttrs,
modifiedFilepaths)
Pair(
Expand Down Expand Up @@ -187,8 +180,9 @@ class BuildGraphHasher(private val bazelClient: BazelClient) : KoinComponent {
}

/**
* Builds a seed-hash contribution from the contents of every `.bzl` (and `.scl`) file the
* workspace's BUILD files load.
* Builds a per-package seed-hash contribution from the contents of every `.bzl` (and `.scl`)
* file that package's BUILD file loads, keyed by package label (e.g. `//pkg`, `//` for the
* root package).
*
* Background: Bazel pre-7 populated [Build.Rule.skylark_environment_hash_code] in the query
* proto, so any change to a `.bzl` file loaded by a rule's BUILD file naturally bubbled into
Expand All @@ -197,41 +191,61 @@ class BuildGraphHasher(private val bazelClient: BazelClient) : KoinComponent {
* [#227](https://github.com/Tinder/bazel-diff/issues/227): editing a macro body (e.g. adding
* `print()`) no longer invalidated any caller because the emitted rule attrs were identical.
*
* Fix: walk every queried SourceFile target's `subincludeList` (the `Build.SourceFile.subinclude`
* proto field, which already lists every `.bzl` the BUILD file loaded), softDigest each main-repo
* `.bzl`, and roll the union of digests into the workspace-wide seed. This is intentionally
* conservative -- a single `.bzl` edit re-hashes every target -- because `.bzl` edits are rare
* and missing them silently is the worse failure mode. Per-package precision would require
* mapping each rule to its package's BUILD file, which isn't a direct dep relationship in the
* query proto.
* Fix: each package's BUILD `SourceFile` target carries a `subincludeList` (the
* `Build.SourceFile.subinclude` proto field) listing every `.bzl` that BUILD loaded. We
* softDigest each main-repo `.bzl` and roll the digests for a given package into a seed
* attributed to that package. Callers then mix only their own package's seed into each
* target's hash, so editing a `.bzl` re-hashes exactly the targets in packages that
* `load()` it -- not every target in the workspace
* ([#365](https://github.com/Tinder/bazel-diff/issues/365)). A package that loads no tracked
* `.bzl` has no entry here, so nothing is mixed in and its targets stay byte-for-byte stable.
*
* External-repo `.bzl` files (`@repo//...`, `@@canonical//...`) are skipped because
* [SourceFileHasher.softDigest] returns null for non-main-repo labels, which keeps the seed
* stable across BCR fetches that don't actually change repo contents.
*/
private fun createSeedForBzlFiles(
private fun createPackageBzlSeeds(
allTargets: List<BazelTarget>,
modifiedFilepaths: Set<Path>
): ByteArray? {
val subincludes =
allTargets
.asSequence()
.filterIsInstance<BazelTarget.SourceFile>()
.flatMap { it.subincludeList.asSequence() }
.toSortedSet()
val tracked = mutableListOf<Pair<String, ByteArray>>()
for (label in subincludes) {
val digest =
sourceFileHasher.softDigest(
BazelSourceFileTarget(label, ByteArray(0)), modifiedFilepaths)
if (digest != null) tracked += label to digest
): Map<String, ByteArray> {
// Union the loaded `.bzl` labels per package. Every source file in a package shares the same
// BUILD, but the `subinclude` field is populated on the BUILD's SourceFile target; group by
// package so we don't depend on which source file carries it.
val packageToLabels = sortedMapOf<String, java.util.SortedSet<String>>()
for (target in allTargets) {
if (target !is BazelTarget.SourceFile || target.subincludeList.isEmpty()) continue
packageToLabels
.getOrPut(labelToPackage(target.sourceFileName)) { sortedSetOf() }
.addAll(target.subincludeList)
}
if (tracked.isEmpty()) return null
return sha256 {
for ((label, digest) in tracked) {
safePutBytes(label.toByteArray())
safePutBytes(digest)

val result = mutableMapOf<String, ByteArray>()
for ((pkg, labels) in packageToLabels) {
val tracked = mutableListOf<Pair<String, ByteArray>>()
for (label in labels) {
val digest =
sourceFileHasher.softDigest(
BazelSourceFileTarget(label, ByteArray(0)), modifiedFilepaths)
if (digest != null) tracked += label to digest
}
if (tracked.isEmpty()) continue
result[pkg] = sha256 {
for ((label, digest) in tracked) {
safePutBytes(label.toByteArray())
safePutBytes(digest)
}
}
}
return result
}
}

/**
* Returns the package portion of a Bazel label, i.e. everything before the target separator `:`.
* `//pkg:a` -> `//pkg`, `//:logo` -> `//`, `@@repo//pkg:a` -> `@@repo//pkg`. Labels without a `:`
* (not expected from `bazel query` output) are returned unchanged.
*/
internal fun labelToPackage(label: String): String {
val colon = label.lastIndexOf(':')
return if (colon >= 0) label.substring(0, colon) else label
}
7 changes: 7 additions & 0 deletions cli/src/main/kotlin/com/bazel_diff/hash/RuleHasher.kt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class RuleHasher(
ruleHashes: ConcurrentMap<String, TargetDigest>,
sourceDigests: ConcurrentMap<String, ByteArray>,
seedHash: ByteArray?,
packageBzlSeeds: Map<String, ByteArray>,
depPath: LinkedHashSet<String>?,
ignoredAttrs: Set<String>,
modifiedFilepaths: Set<Path>
Expand All @@ -55,6 +56,11 @@ class RuleHasher(
targetSha256(trackDepLabels) {
putDirectBytes(rule.digest(ignoredAttrs))
putDirectBytes(seedHash)
// Mix in the `.bzl` seed for this rule's own package only. Each rule always looks up
// its own package (not the caller's), so this stays consistent under the memoized,
// depth-first recursion below and a macro edit re-hashes only the packages that
// `load()` it (issue #365).
putDirectBytes(packageBzlSeeds[labelToPackage(rule.name)])

for (ruleInput in rule.ruleInputList(useCquery, fineGrainedHashExternalRepos)) {
// Under --useCquery, `ruleInput` may carry an embedded configurationChecksum (see
Expand All @@ -78,6 +84,7 @@ class RuleHasher(
ruleHashes,
sourceDigests,
seedHash,
packageBzlSeeds,
depPathClone,
ignoredAttrs,
modifiedFilepaths)
Expand Down
6 changes: 6 additions & 0 deletions cli/src/main/kotlin/com/bazel_diff/hash/TargetHasher.kt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class TargetHasher : KoinComponent {
sourceDigests: ConcurrentMap<String, ByteArray>,
ruleHashes: ConcurrentMap<String, TargetDigest>,
seedHash: ByteArray?,
packageBzlSeeds: Map<String, ByteArray>,
ignoredAttrs: Set<String>,
modifiedFilepaths: Set<Path>
): TargetDigest {
Expand All @@ -37,6 +38,7 @@ class TargetHasher : KoinComponent {
ruleHashes,
sourceDigests,
seedHash,
packageBzlSeeds,
depPath = null,
ignoredAttrs,
modifiedFilepaths)
Expand All @@ -53,14 +55,18 @@ class TargetHasher : KoinComponent {
ruleHashes,
sourceDigests,
seedHash,
packageBzlSeeds,
depPath = null,
ignoredAttrs,
modifiedFilepaths)
}
is BazelTarget.SourceFile -> {
// Mix in the `.bzl` seed for this source file's own package only, so a macro edit in
// another package leaves this file's hash untouched (issue #365).
val digest = sha256 {
safePutBytes(sourceDigests[target.sourceFileName])
safePutBytes(seedHash)
safePutBytes(packageBzlSeeds[labelToPackage(target.sourceFileName)])
}
TargetDigest(digest, digest)
}
Expand Down
44 changes: 13 additions & 31 deletions cli/src/test/kotlin/com/bazel_diff/e2e/E2ETest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -1876,19 +1876,22 @@ class E2ETest {
}

// ------------------------------------------------------------------------
// Reproducer for https://github.com/Tinder/bazel-diff/issues/365
// Regression coverage for https://github.com/Tinder/bazel-diff/issues/365
// ------------------------------------------------------------------------
// The user reported that after upgrading bazel-diff (12.0.0 -> 25.0.0), ADDING (or editing) a
// `.bzl` macro and `load()`ing it -- even in an otherwise-empty BUILD file -- caused EVERY
// target in the repository to be reported as impacted, not just the targets that load the macro.
//
// Root cause: the #259/#227 fix in BuildGraphHasher.createSeedForBzlFiles rolls the union of
// EVERY main-repo `.bzl` file's digest into a single workspace-wide seed that is then mixed into
// every target's hash. Its own doc comment acknowledges the tradeoff: "a single `.bzl` edit
// re-hashes every target". So introducing a brand-new, unrelated `.bzl` flips the seed (here from
// "no tracked .bzl" to "one tracked .bzl"), which changes the hash of completely unrelated
// Root cause was that the #259/#227 fix rolled the union of EVERY main-repo `.bzl` file's digest
// into a single workspace-wide seed that was then mixed into every target's hash, so introducing
// a brand-new, unrelated `.bzl` flipped the seed and changed the hash of completely unrelated
// targets -- e.g. native genrules in another package that load nothing.
//
// Fix: BuildGraphHasher.createPackageBzlSeeds attributes each `.bzl` digest to the package whose
// BUILD `load()`s it, and each target mixes in only its own package's seed. A macro added in a
// new package no longer perturbs targets that never load it, while #259/#227 stays fixed because
// a `.bzl` edit still re-hashes every target in the packages that load it.
//
// Fixture `bzl_seed_overtrigger` has a single package `//pkg` with two native genrules
// (`//pkg:a`, `//pkg:b`) that load no `.bzl`. The scenario adds a NEW package `//macros` whose
// BUILD only does `load("//macros:defs.bzl", "my_macro")` (an empty load -- exactly the shape
Expand Down Expand Up @@ -1954,16 +1957,11 @@ class E2ETest {
return impactedTargetsOutput.readLines().filter { it.isNotBlank() }.toSet()
}

// Desired post-fix invariant: adding an unrelated macro must NOT impact //pkg:a or //pkg:b.
// @Ignore'd because it fails today (the global .bzl seed re-hashes every target). Flipping this
// @Ignore off should be the second-to-last step of the fix PR; at that point the companion
// *_currentBehaviourBuggy_issue365 test below must be deleted or updated.
// Post-fix invariant for #365: adding an unrelated macro must NOT impact //pkg:a or //pkg:b.
// BuildGraphHasher.createPackageBzlSeeds now attributes each `.bzl` digest to the package that
// `load()`s it, so the new `//macros` package's macro does not perturb the `//pkg` targets that
// load nothing.
@Test
@org.junit.Ignore(
"Reproducer for #365 -- expected to pass once the .bzl seed is attributed per-package " +
"instead of as a single workspace-wide seed. Today BuildGraphHasher.createSeedForBzlFiles " +
"mixes the union of all .bzl digests into every target's hash, so adding an unrelated " +
"macro impacts //pkg:a and //pkg:b and this assertion fails.")
fun testAddingUnrelatedMacroDoesNotImpactExistingTargets_reproducerForIssue365() {
val impacted = runIssue365MacroAddScenario()
val unrelatedImpacted =
Expand All @@ -1978,22 +1976,6 @@ class E2ETest {
.isEqualTo(true)
}

// Pins the current (buggy) behaviour so CI stays green and the reproducer stays visible in
// source. When the fix lands, this test should be deleted/updated and the @Ignore above flipped.
@Test
fun testAddingUnrelatedMacroImpactsExistingTargets_currentBehaviourBuggy_issue365() {
val impacted = runIssue365MacroAddScenario()
val aImpacted = impacted.any { it == "//pkg:a" || it == "@@//pkg:a" }
val bImpacted = impacted.any { it == "//pkg:b" || it == "@@//pkg:b" }
assertThat(aImpacted && bImpacted)
.transform(
"Documents the #365 over-triggering bug: adding an unrelated macro currently impacts " +
"//pkg:a and //pkg:b. Got: $impacted") {
it
}
.isEqualTo(true)
}

private fun copyTestWorkspace(path: String): File {
val testProject = temp.newFolder()

Expand Down
Loading