From cf165587c0a1bcc3822d79b64e93a45130b1a3ac Mon Sep 17 00:00:00 2001 From: Maxwell Elliott <56700854+tinder-maxwellelliott@users.noreply.github.com> Date: Mon, 8 Jun 2026 12:15:34 -0400 Subject: [PATCH] Fix #365: attribute .bzl seed per-package instead of workspace-wide The #259/#227 fix (createSeedForBzlFiles) rolled the union of every main-repo `.bzl` digest into a single workspace-wide seed that was mixed into every target's hash. As its own doc comment acknowledged ("a single `.bzl` edit re-hashes every target"), adding or editing any `.bzl` -- even in an otherwise-empty BUILD that nothing else loads -- flipped the seed and marked every target in the repo impacted, not just the targets that load it. Replace the single seed with `createPackageBzlSeeds`, which groups each BUILD file's loaded `.bzl` digests by the package that loads them (via the `subinclude` proto field on each package's BUILD SourceFile target). Each rule and source file then mixes in only its own package's seed, looked up by `labelToPackage(name)`. Because every target looks up its own package (never the caller's), this stays consistent under RuleHasher's memoized, depth-first recursion. Effects: - A macro added/edited in package X re-hashes only targets in packages that `load()` it, fixing the #365 over-triggering. - #259/#227 stays fixed: a `.bzl` edit still re-hashes every target in the packages that load it (e.g. macro_invalidation's `//:logo_miniature`). - Workspaces that load no tracked `.bzl` are byte-for-byte stable: the per-package map is empty, so nothing is mixed in (golden-hash unit tests unchanged). Flips the @Ignore'd post-fix invariant test on and removes the companion test that pinned the buggy behaviour. Co-Authored-By: Claude Opus 4.8 --- .../com/bazel_diff/hash/BuildGraphHasher.kt | 100 ++++++++++-------- .../kotlin/com/bazel_diff/hash/RuleHasher.kt | 7 ++ .../com/bazel_diff/hash/TargetHasher.kt | 6 ++ .../test/kotlin/com/bazel_diff/e2e/E2ETest.kt | 44 +++----- 4 files changed, 83 insertions(+), 74 deletions(-) diff --git a/cli/src/main/kotlin/com/bazel_diff/hash/BuildGraphHasher.kt b/cli/src/main/kotlin/com/bazel_diff/hash/BuildGraphHasher.kt index cf7289b7..5253e1c1 100644 --- a/cli/src/main/kotlin/com/bazel_diff/hash/BuildGraphHasher.kt +++ b/cli/src/main/kotlin/com/bazel_diff/hash/BuildGraphHasher.kt @@ -55,22 +55,13 @@ class BuildGraphHasher(private val bazelClient: BazelClient) : KoinComponent { } val seedForFilepaths = runBlocking(Dispatchers.IO) { createSeedForFilepaths(seedFilepaths) } - val seedForBzlFiles = createSeedForBzlFiles(allTargets, modifiedFilepaths) - // Only mix the bzl-files seed when at least one main-repo `.bzl` was actually hashed. - // This keeps the seed (and therefore every target's hash) byte-for-byte stable for - // workspaces that don't load any tracked .bzl files, preserving cached hashes from - // before this change. - val combinedSeed = - if (seedForBzlFiles != null) { - sha256 { - safePutBytes(seedForFilepaths) - safePutBytes(seedForBzlFiles) - } - } else { - seedForFilepaths - } + // Attribute each BUILD file's loaded `.bzl` digests to the package that loads them, so a + // `.bzl` edit only re-hashes targets in packages that actually `load()` it -- not every + // target in the workspace (issue #365). A package that loads no tracked `.bzl` gets nothing + // mixed in, keeping its targets' hashes byte-for-byte stable. + val packageBzlSeeds = createPackageBzlSeeds(allTargets, modifiedFilepaths) return hashAllTargets( - combinedSeed, sourceDigests, allTargets, ignoredAttrs, modifiedFilepaths) + seedForFilepaths, packageBzlSeeds, sourceDigests, allTargets, ignoredAttrs, modifiedFilepaths) } private fun hashSourcefiles( @@ -111,6 +102,7 @@ class BuildGraphHasher(private val bazelClient: BazelClient) : KoinComponent { private fun hashAllTargets( seedHash: ByteArray, + packageBzlSeeds: Map, sourceDigests: ConcurrentMap, allTargets: List, ignoredAttrs: Set, @@ -130,6 +122,7 @@ class BuildGraphHasher(private val bazelClient: BazelClient) : KoinComponent { sourceDigests, ruleHashes, seedHash, + packageBzlSeeds, ignoredAttrs, modifiedFilepaths) Pair( @@ -187,8 +180,9 @@ class BuildGraphHasher(private val bazelClient: BazelClient) : KoinComponent { } /** - * Builds a seed-hash contribution from the contents of every `.bzl` (and `.scl`) file the - * workspace's BUILD files load. + * Builds a per-package seed-hash contribution from the contents of every `.bzl` (and `.scl`) + * file that package's BUILD file loads, keyed by package label (e.g. `//pkg`, `//` for the + * root package). * * Background: Bazel pre-7 populated [Build.Rule.skylark_environment_hash_code] in the query * proto, so any change to a `.bzl` file loaded by a rule's BUILD file naturally bubbled into @@ -197,41 +191,61 @@ class BuildGraphHasher(private val bazelClient: BazelClient) : KoinComponent { * [#227](https://github.com/Tinder/bazel-diff/issues/227): editing a macro body (e.g. adding * `print()`) no longer invalidated any caller because the emitted rule attrs were identical. * - * Fix: walk every queried SourceFile target's `subincludeList` (the `Build.SourceFile.subinclude` - * proto field, which already lists every `.bzl` the BUILD file loaded), softDigest each main-repo - * `.bzl`, and roll the union of digests into the workspace-wide seed. This is intentionally - * conservative -- a single `.bzl` edit re-hashes every target -- because `.bzl` edits are rare - * and missing them silently is the worse failure mode. Per-package precision would require - * mapping each rule to its package's BUILD file, which isn't a direct dep relationship in the - * query proto. + * Fix: each package's BUILD `SourceFile` target carries a `subincludeList` (the + * `Build.SourceFile.subinclude` proto field) listing every `.bzl` that BUILD loaded. We + * softDigest each main-repo `.bzl` and roll the digests for a given package into a seed + * attributed to that package. Callers then mix only their own package's seed into each + * target's hash, so editing a `.bzl` re-hashes exactly the targets in packages that + * `load()` it -- not every target in the workspace + * ([#365](https://github.com/Tinder/bazel-diff/issues/365)). A package that loads no tracked + * `.bzl` has no entry here, so nothing is mixed in and its targets stay byte-for-byte stable. * * External-repo `.bzl` files (`@repo//...`, `@@canonical//...`) are skipped because * [SourceFileHasher.softDigest] returns null for non-main-repo labels, which keeps the seed * stable across BCR fetches that don't actually change repo contents. */ - private fun createSeedForBzlFiles( + private fun createPackageBzlSeeds( allTargets: List, modifiedFilepaths: Set - ): ByteArray? { - val subincludes = - allTargets - .asSequence() - .filterIsInstance() - .flatMap { it.subincludeList.asSequence() } - .toSortedSet() - val tracked = mutableListOf>() - for (label in subincludes) { - val digest = - sourceFileHasher.softDigest( - BazelSourceFileTarget(label, ByteArray(0)), modifiedFilepaths) - if (digest != null) tracked += label to digest + ): Map { + // Union the loaded `.bzl` labels per package. Every source file in a package shares the same + // BUILD, but the `subinclude` field is populated on the BUILD's SourceFile target; group by + // package so we don't depend on which source file carries it. + val packageToLabels = sortedMapOf>() + for (target in allTargets) { + if (target !is BazelTarget.SourceFile || target.subincludeList.isEmpty()) continue + packageToLabels + .getOrPut(labelToPackage(target.sourceFileName)) { sortedSetOf() } + .addAll(target.subincludeList) } - if (tracked.isEmpty()) return null - return sha256 { - for ((label, digest) in tracked) { - safePutBytes(label.toByteArray()) - safePutBytes(digest) + + val result = mutableMapOf() + for ((pkg, labels) in packageToLabels) { + val tracked = mutableListOf>() + for (label in labels) { + val digest = + sourceFileHasher.softDigest( + BazelSourceFileTarget(label, ByteArray(0)), modifiedFilepaths) + if (digest != null) tracked += label to digest + } + if (tracked.isEmpty()) continue + result[pkg] = sha256 { + for ((label, digest) in tracked) { + safePutBytes(label.toByteArray()) + safePutBytes(digest) + } } } + return result } } + +/** + * Returns the package portion of a Bazel label, i.e. everything before the target separator `:`. + * `//pkg:a` -> `//pkg`, `//:logo` -> `//`, `@@repo//pkg:a` -> `@@repo//pkg`. Labels without a `:` + * (not expected from `bazel query` output) are returned unchanged. + */ +internal fun labelToPackage(label: String): String { + val colon = label.lastIndexOf(':') + return if (colon >= 0) label.substring(0, colon) else label +} diff --git a/cli/src/main/kotlin/com/bazel_diff/hash/RuleHasher.kt b/cli/src/main/kotlin/com/bazel_diff/hash/RuleHasher.kt index faa917b6..43c39513 100644 --- a/cli/src/main/kotlin/com/bazel_diff/hash/RuleHasher.kt +++ b/cli/src/main/kotlin/com/bazel_diff/hash/RuleHasher.kt @@ -38,6 +38,7 @@ class RuleHasher( ruleHashes: ConcurrentMap, sourceDigests: ConcurrentMap, seedHash: ByteArray?, + packageBzlSeeds: Map, depPath: LinkedHashSet?, ignoredAttrs: Set, modifiedFilepaths: Set @@ -55,6 +56,11 @@ class RuleHasher( targetSha256(trackDepLabels) { putDirectBytes(rule.digest(ignoredAttrs)) putDirectBytes(seedHash) + // Mix in the `.bzl` seed for this rule's own package only. Each rule always looks up + // its own package (not the caller's), so this stays consistent under the memoized, + // depth-first recursion below and a macro edit re-hashes only the packages that + // `load()` it (issue #365). + putDirectBytes(packageBzlSeeds[labelToPackage(rule.name)]) for (ruleInput in rule.ruleInputList(useCquery, fineGrainedHashExternalRepos)) { // Under --useCquery, `ruleInput` may carry an embedded configurationChecksum (see @@ -78,6 +84,7 @@ class RuleHasher( ruleHashes, sourceDigests, seedHash, + packageBzlSeeds, depPathClone, ignoredAttrs, modifiedFilepaths) diff --git a/cli/src/main/kotlin/com/bazel_diff/hash/TargetHasher.kt b/cli/src/main/kotlin/com/bazel_diff/hash/TargetHasher.kt index 907f3466..6cce4c47 100644 --- a/cli/src/main/kotlin/com/bazel_diff/hash/TargetHasher.kt +++ b/cli/src/main/kotlin/com/bazel_diff/hash/TargetHasher.kt @@ -16,6 +16,7 @@ class TargetHasher : KoinComponent { sourceDigests: ConcurrentMap, ruleHashes: ConcurrentMap, seedHash: ByteArray?, + packageBzlSeeds: Map, ignoredAttrs: Set, modifiedFilepaths: Set ): TargetDigest { @@ -37,6 +38,7 @@ class TargetHasher : KoinComponent { ruleHashes, sourceDigests, seedHash, + packageBzlSeeds, depPath = null, ignoredAttrs, modifiedFilepaths) @@ -53,14 +55,18 @@ class TargetHasher : KoinComponent { ruleHashes, sourceDigests, seedHash, + packageBzlSeeds, depPath = null, ignoredAttrs, modifiedFilepaths) } is BazelTarget.SourceFile -> { + // Mix in the `.bzl` seed for this source file's own package only, so a macro edit in + // another package leaves this file's hash untouched (issue #365). val digest = sha256 { safePutBytes(sourceDigests[target.sourceFileName]) safePutBytes(seedHash) + safePutBytes(packageBzlSeeds[labelToPackage(target.sourceFileName)]) } TargetDigest(digest, digest) } diff --git a/cli/src/test/kotlin/com/bazel_diff/e2e/E2ETest.kt b/cli/src/test/kotlin/com/bazel_diff/e2e/E2ETest.kt index 30d08230..daec6d7c 100644 --- a/cli/src/test/kotlin/com/bazel_diff/e2e/E2ETest.kt +++ b/cli/src/test/kotlin/com/bazel_diff/e2e/E2ETest.kt @@ -1876,19 +1876,22 @@ class E2ETest { } // ------------------------------------------------------------------------ - // Reproducer for https://github.com/Tinder/bazel-diff/issues/365 + // Regression coverage for https://github.com/Tinder/bazel-diff/issues/365 // ------------------------------------------------------------------------ // The user reported that after upgrading bazel-diff (12.0.0 -> 25.0.0), ADDING (or editing) a // `.bzl` macro and `load()`ing it -- even in an otherwise-empty BUILD file -- caused EVERY // target in the repository to be reported as impacted, not just the targets that load the macro. // - // Root cause: the #259/#227 fix in BuildGraphHasher.createSeedForBzlFiles rolls the union of - // EVERY main-repo `.bzl` file's digest into a single workspace-wide seed that is then mixed into - // every target's hash. Its own doc comment acknowledges the tradeoff: "a single `.bzl` edit - // re-hashes every target". So introducing a brand-new, unrelated `.bzl` flips the seed (here from - // "no tracked .bzl" to "one tracked .bzl"), which changes the hash of completely unrelated + // Root cause was that the #259/#227 fix rolled the union of EVERY main-repo `.bzl` file's digest + // into a single workspace-wide seed that was then mixed into every target's hash, so introducing + // a brand-new, unrelated `.bzl` flipped the seed and changed the hash of completely unrelated // targets -- e.g. native genrules in another package that load nothing. // + // Fix: BuildGraphHasher.createPackageBzlSeeds attributes each `.bzl` digest to the package whose + // BUILD `load()`s it, and each target mixes in only its own package's seed. A macro added in a + // new package no longer perturbs targets that never load it, while #259/#227 stays fixed because + // a `.bzl` edit still re-hashes every target in the packages that load it. + // // Fixture `bzl_seed_overtrigger` has a single package `//pkg` with two native genrules // (`//pkg:a`, `//pkg:b`) that load no `.bzl`. The scenario adds a NEW package `//macros` whose // BUILD only does `load("//macros:defs.bzl", "my_macro")` (an empty load -- exactly the shape @@ -1954,16 +1957,11 @@ class E2ETest { return impactedTargetsOutput.readLines().filter { it.isNotBlank() }.toSet() } - // Desired post-fix invariant: adding an unrelated macro must NOT impact //pkg:a or //pkg:b. - // @Ignore'd because it fails today (the global .bzl seed re-hashes every target). Flipping this - // @Ignore off should be the second-to-last step of the fix PR; at that point the companion - // *_currentBehaviourBuggy_issue365 test below must be deleted or updated. + // Post-fix invariant for #365: adding an unrelated macro must NOT impact //pkg:a or //pkg:b. + // BuildGraphHasher.createPackageBzlSeeds now attributes each `.bzl` digest to the package that + // `load()`s it, so the new `//macros` package's macro does not perturb the `//pkg` targets that + // load nothing. @Test - @org.junit.Ignore( - "Reproducer for #365 -- expected to pass once the .bzl seed is attributed per-package " + - "instead of as a single workspace-wide seed. Today BuildGraphHasher.createSeedForBzlFiles " + - "mixes the union of all .bzl digests into every target's hash, so adding an unrelated " + - "macro impacts //pkg:a and //pkg:b and this assertion fails.") fun testAddingUnrelatedMacroDoesNotImpactExistingTargets_reproducerForIssue365() { val impacted = runIssue365MacroAddScenario() val unrelatedImpacted = @@ -1978,22 +1976,6 @@ class E2ETest { .isEqualTo(true) } - // Pins the current (buggy) behaviour so CI stays green and the reproducer stays visible in - // source. When the fix lands, this test should be deleted/updated and the @Ignore above flipped. - @Test - fun testAddingUnrelatedMacroImpactsExistingTargets_currentBehaviourBuggy_issue365() { - val impacted = runIssue365MacroAddScenario() - val aImpacted = impacted.any { it == "//pkg:a" || it == "@@//pkg:a" } - val bImpacted = impacted.any { it == "//pkg:b" || it == "@@//pkg:b" } - assertThat(aImpacted && bImpacted) - .transform( - "Documents the #365 over-triggering bug: adding an unrelated macro currently impacts " + - "//pkg:a and //pkg:b. Got: $impacted") { - it - } - .isEqualTo(true) - } - private fun copyTestWorkspace(path: String): File { val testProject = temp.newFolder()