From faf6ba4baaeac9a7874af6b9592170776aebf2dc Mon Sep 17 00:00:00 2001
From: Andrew Halberstadt <ahal@mozilla.com>
Date: Wed, 8 Oct 2025 13:51:09 -0400
Subject: [PATCH 1/2] refactor!: rename base_rev -> base in
 vcs.get_changed_files()

This function works with anything committish, update the argument name
to reflect that (while we're breaking backwards compat anyway).
---
 src/taskgraph/util/vcs.py | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/src/taskgraph/util/vcs.py b/src/taskgraph/util/vcs.py
index 80f08e4da..e2d887771 100644
--- a/src/taskgraph/util/vcs.py
+++ b/src/taskgraph/util/vcs.py
@@ -140,12 +140,12 @@ def get_changed_files(
         diff_filter: Optional[str],
         mode: Optional[str],
         rev: Optional[str],
-        base_rev: Optional[str],
+        base: Optional[str],
     ) -> List[str]:
         """Return a list of files that are changed in:
          * either this repository's working copy,
          * or at a given revision (``rev``)
-         * or between 2 revisions (``base_rev`` and ``rev``)
+         * or between 2 revisions (``base`` and ``rev``)
 
         ``diff_filter`` controls which kinds of modifications are returned.
         It is a string which may only contain the following characters:
@@ -162,9 +162,9 @@ def get_changed_files(
         ``rev`` is a specifier for which changesets to consider for
         changes. The exact meaning depends on the vcs system being used.
 
-        ``base_rev`` specifies the range of changesets. This parameter cannot
+        ``base`` specifies the range of changesets. This parameter cannot
         be used without ``rev``. The range includes ``rev`` but excludes
-        ``base_rev``.
+        ``base``.
         """
 
     @abstractmethod
@@ -293,17 +293,17 @@ def get_tracked_files(self, *paths, rev=None):
         rev = rev or "."
         return self.run("files", "-r", rev, *paths).splitlines()
 
-    def get_changed_files(self, diff_filter=None, mode=None, rev=None, base_rev=None):
+    def get_changed_files(self, diff_filter=None, mode=None, rev=None, base=None):
         diff_filter = diff_filter or "ADM"
         if rev is None:
-            if base_rev is not None:
-                raise ValueError("Cannot specify `base_rev` without `rev`")
+            if base is not None:
+                raise ValueError("Cannot specify `base` without `rev`")
             # Use --no-status to print just the filename.
             df = self._format_diff_filter(diff_filter, for_status=True)
             return self.run("status", "--no-status", f"-{df}").splitlines()
         else:
             template = self._files_template(diff_filter)
-            revision_argument = rev if base_rev is None else f"{rev} % {base_rev}"
+            revision_argument = rev if base is None else f"{rev} % {base}"
             return self.run("log", "-r", revision_argument, "-T", template).splitlines()
 
     def get_outgoing_files(self, diff_filter="ADM", upstream=None):
@@ -479,23 +479,21 @@ def get_tracked_files(self, *paths, rev=None):
         rev = rev or "HEAD"
         return self.run("ls-tree", "-r", "--name-only", rev, *paths).splitlines()
 
-    def get_changed_files(self, diff_filter=None, mode=None, rev=None, base_rev=None):
+    def get_changed_files(self, diff_filter=None, mode=None, rev=None, base=None):
         diff_filter = diff_filter or "ADM"
         mode = mode or "unstaged"
         assert all(f.lower() in self._valid_diff_filter for f in diff_filter)
 
         if rev is None:
-            if base_rev is not None:
-                raise ValueError("Cannot specify `base_rev` without `rev`")
+            if base is not None:
+                raise ValueError("Cannot specify `base` without `rev`")
             cmd = ["diff"]
             if mode == "staged":
                 cmd.append("--cached")
             elif mode == "all":
                 cmd.append("HEAD")
         else:
-            revision_argument = (
-                f"{rev}~1..{rev}" if base_rev is None else f"{base_rev}..{rev}"
-            )
+            revision_argument = f"{rev}~1..{rev}" if base is None else f"{base}..{rev}"
             cmd = ["log", "--format=format:", revision_argument]
 
         cmd.append("--name-only")

From 90a856b0bbd7b33313872a413eee6535fdc94b26 Mon Sep 17 00:00:00 2001
From: Andrew Halberstadt <ahal@mozilla.com>
Date: Wed, 8 Oct 2025 13:51:09 -0400
Subject: [PATCH 2/2] fix!: remove post processing of base_ref and base_rev
 parameters

BREAKING CHANGE: the base_ref and base_rev parameters will now match
what was passed into the Decision task exactly. This means `base_ref`
will not get reset to the repo's default branch, and `base_rev` will no
longer be reset to the merge base.

### Why we were previously post-processing these parameters

Github push and pull_request events almost never contain the base ref,
and often don't contain the base rev. For base ref, it only ever seems
to be present when pushing a new branch. For base rev it is missing in
the following cases:

1. All pull_request events. The value we pass in is actually the
   revision that the base ref happens to be pointing at when the event
   is fired.
2. Force pushes. The value we pass in is actually the old unreachable
   head rev that used to be on the branch

Really, we only ever get a proper base rev when doing normal
fast-forward pushes.

The reason we need a base rev in the first place, is to compute the
files changed. So let's say there's a PR with the following graph:

    A -> B -> C -> D (main)
         \
          E -> F (PR)


Based on the Github event, `D` is being passed in as `base_rev` and `F`
is being passed in as `head_rev`. The post-processing was essentially
running `git merge-base` to find `B`, so that we could then use `git
log` to find all files touched between `B..F`.

### Why the post-processing isn't actually necessary

It turns out that `git log D..F` already does the right thing! It means,
show me all commits reachable from `F`, but not reachable from `D`,
which is exactly what we want. Only files changed in `E` and `F` will be
included. So there's no benefit of using `merge-base` for the purposes
of finding changed files.

As far as the `base_ref` post-processing goes, all we were doing was
setting it to `repo.default_branch` if it doesn't exist (which is almost
always for pushes). While this likely works in most cases, we don't
actually have any idea what the real `base_ref` is, so it's not correct
to be doing this.

Since we don't even need `base_ref` to determine files changed, I think
it's fine to leave it as `None` in the event it wasn't passed in.

### Risks with this patch

While removing this should be fine for the purposes of determining
changed files, there might be other use cases that projects are using
the `merge-base` for. Such projects may need to determine the merge-base
on their own, therefore this patch is backwards incompatible.

### Future improvements

I think we should consider removing or renaming the `base_ref` and
`base_rev` parameters. As outlined, they are misleading (at least with
Github based repos) and don't actually represent the "base revision".

But for now, I'll save that particular change for another day.
---
 src/taskgraph/decision.py   | 81 +--------------------------------
 src/taskgraph/parameters.py |  2 +-
 test/test_decision.py       | 91 +------------------------------------
 3 files changed, 5 insertions(+), 169 deletions(-)

diff --git a/src/taskgraph/decision.py b/src/taskgraph/decision.py
index e75bc3014..a0c5381a8 100644
--- a/src/taskgraph/decision.py
+++ b/src/taskgraph/decision.py
@@ -21,7 +21,7 @@
 from taskgraph.util import json
 from taskgraph.util.python_path import find_object
 from taskgraph.util.schema import Schema, validate_schema
-from taskgraph.util.vcs import Repository, get_repository
+from taskgraph.util.vcs import get_repository
 from taskgraph.util.yaml import load_yaml
 
 logger = logging.getLogger(__name__)
@@ -192,25 +192,10 @@ def get_decision_parameters(graph_config, options):
     except UnicodeDecodeError:
         commit_message = ""
 
-    parameters["base_ref"] = _determine_more_accurate_base_ref(
-        repo,
-        candidate_base_ref=options.get("base_ref"),
-        head_ref=options.get("head_ref"),
-        base_rev=options.get("base_rev"),
-    )
-
-    parameters["base_rev"] = _determine_more_accurate_base_rev(
-        repo,
-        base_ref=parameters["base_ref"],
-        candidate_base_rev=options.get("base_rev"),
-        head_rev=options.get("head_rev"),
-        env_prefix=_get_env_prefix(graph_config),
-    )
-
     # Define default filter list, as most configurations shouldn't need
     # custom filters.
     parameters["files_changed"] = repo.get_changed_files(
-        rev=parameters["head_rev"], base_rev=parameters["base_rev"]
+        rev=parameters["head_rev"], base=parameters["base_rev"]
     )
     parameters["filters"] = [
         "target_tasks_method",
@@ -284,68 +269,6 @@ def get_decision_parameters(graph_config, options):
     return result
 
 
-def _determine_more_accurate_base_ref(repo, candidate_base_ref, head_ref, base_rev):
-    base_ref = candidate_base_ref
-
-    if not candidate_base_ref:
-        base_ref = repo.default_branch
-    elif candidate_base_ref == head_ref and base_rev == Repository.NULL_REVISION:
-        logger.info(
-            "base_ref and head_ref are identical but base_rev equals the null revision. "
-            "This is a new branch but Github didn't identify its actual base."
-        )
-        base_ref = repo.default_branch
-
-    if base_ref != candidate_base_ref:
-        logger.info(
-            f'base_ref has been reset from "{candidate_base_ref}" to "{base_ref}".'
-        )
-
-    return base_ref
-
-
-def _determine_more_accurate_base_rev(
-    repo, base_ref, candidate_base_rev, head_rev, env_prefix
-):
-    if not candidate_base_rev:
-        logger.info("base_rev is not set.")
-        base_ref_or_rev = base_ref
-    elif candidate_base_rev == Repository.NULL_REVISION:
-        logger.info("base_rev equals the null revision. This branch is a new one.")
-        base_ref_or_rev = base_ref
-    elif not repo.does_revision_exist_locally(candidate_base_rev):
-        logger.warning(
-            "base_rev does not exist locally. It is likely because the branch was force-pushed. "
-            "taskgraph is not able to assess how many commits were changed and assumes it is only "
-            f"the last one. Please set the {env_prefix.upper()}_BASE_REV environment variable "
-            "in the decision task and provide `--base-rev` to taskgraph."
-        )
-        base_ref_or_rev = base_ref
-    else:
-        base_ref_or_rev = candidate_base_rev
-
-    if base_ref_or_rev == base_ref:
-        logger.info(
-            f'Using base_ref "{base_ref}" to determine latest common revision...'
-        )
-
-    base_rev = repo.find_latest_common_revision(base_ref_or_rev, head_rev)
-    if base_rev != candidate_base_rev:
-        if base_ref_or_rev == candidate_base_rev:
-            logger.info("base_rev is not an ancestor of head_rev.")
-
-        logger.info(
-            f'base_rev has been reset from "{candidate_base_rev}" to "{base_rev}".'
-        )
-
-    return base_rev
-
-
-def _get_env_prefix(graph_config):
-    repo_keys = list(graph_config["taskgraph"].get("repositories", {}).keys())
-    return repo_keys[0] if repo_keys else ""
-
-
 def set_try_config(parameters, task_config_file):
     if os.path.isfile(task_config_file):
         logger.info(f"using try tasks from {task_config_file}")
diff --git a/src/taskgraph/parameters.py b/src/taskgraph/parameters.py
index d88a155b7..12f470f15 100644
--- a/src/taskgraph/parameters.py
+++ b/src/taskgraph/parameters.py
@@ -33,7 +33,7 @@ class ParameterMismatch(Exception):
 base_schema = Schema(
     {
         Required("base_repository"): str,
-        Required("base_ref"): str,
+        Optional("base_ref"): str,
         Required("base_rev"): str,
         Required("build_date"): int,
         Required("build_number"): int,
diff --git a/test/test_decision.py b/test/test_decision.py
index 2b2e4f34e..f3071a938 100644
--- a/test/test_decision.py
+++ b/test/test_decision.py
@@ -10,8 +10,6 @@
 import unittest
 from pathlib import Path
 
-import pytest
-
 from taskgraph import decision
 from taskgraph.util.vcs import GitRepository, HgRepository
 from taskgraph.util.yaml import load_yaml
@@ -54,6 +52,7 @@ class TestGetDecisionParameters(unittest.TestCase):
     def setUp(self):
         self.options = {
             "base_repository": "https://hg.mozilla.org/mozilla-unified",
+            "base_rev": "aaaa",
             "head_repository": "https://hg.mozilla.org/mozilla-central",
             "head_rev": "bbbb",
             "head_ref": "default",
@@ -67,27 +66,11 @@ def setUp(self):
             "tasks_for": "hg-push",
             "level": "3",
         }
-        self.old_determine_more_accurate_base_rev = (
-            decision._determine_more_accurate_base_rev
-        )
-        decision._determine_more_accurate_base_rev = lambda *_, **__: "aaaa"
-        self.old_determine_more_accurate_base_ref = (
-            decision._determine_more_accurate_base_ref
-        )
-        decision._determine_more_accurate_base_ref = lambda *_, **__: "aaaa"
-
-    def tearDown(self):
-        decision._determine_more_accurate_base_rev = (
-            self.old_determine_more_accurate_base_rev
-        )
-        decision._determine_more_accurate_base_ref = (
-            self.old_determine_more_accurate_base_ref
-        )
 
     def test_simple_options(self, mock_files_changed):
         mock_files_changed.return_value = ["foo.txt"]
         params = decision.get_decision_parameters(FAKE_GRAPH_CONFIG, self.options)
-        mock_files_changed.assert_called_once_with(rev="bbbb", base_rev="aaaa")
+        mock_files_changed.assert_called_once_with(rev="bbbb", base="aaaa")
         self.assertEqual(params["build_date"], 1503691511)
         self.assertEqual(params["head_tag"], "v0.0.1")
         self.assertEqual(params["pushlog_id"], "143")
@@ -154,73 +137,3 @@ def test_dontbuild_commit_message_yields_default_target_tasks_method(
         self.options["tasks_for"] = "hg-push"
         params = decision.get_decision_parameters(FAKE_GRAPH_CONFIG, self.options)
         self.assertEqual(params["target_tasks_method"], "nothing")
-
-
-@pytest.mark.parametrize(
-    "candidate_base_ref, base_rev, expected_base_ref",
-    (
-        ("", "base-rev", "default-branch"),
-        ("head-ref", "base-rev", "head-ref"),
-        ("head-ref", "0000000000000000000000000000000000000000", "default-branch"),
-    ),
-)
-def test_determine_more_accurate_base_ref(
-    candidate_base_ref, base_rev, expected_base_ref
-):
-    repo_mock = unittest.mock.MagicMock()
-    repo_mock.default_branch = "default-branch"
-
-    assert (
-        decision._determine_more_accurate_base_ref(
-            repo_mock, candidate_base_ref, "head-ref", base_rev
-        )
-        == expected_base_ref
-    )
-
-
-@pytest.mark.parametrize(
-    "common_rev, candidate_base_rev, expected_base_ref_or_rev, expected_base_rev",
-    (
-        ("found-rev", "", "base-ref", "found-rev"),
-        (
-            "found-rev",
-            "0000000000000000000000000000000000000000",
-            "base-ref",
-            "found-rev",
-        ),
-        ("found-rev", "non-existing-rev", "base-ref", "found-rev"),
-        ("existing-rev", "existing-rev", "existing-rev", "existing-rev"),
-    ),
-)
-def test_determine_more_accurate_base_rev(
-    common_rev, candidate_base_rev, expected_base_ref_or_rev, expected_base_rev
-):
-    repo_mock = unittest.mock.MagicMock()
-    repo_mock.find_latest_common_revision.return_value = common_rev
-    repo_mock.does_revision_exist_locally = lambda rev: rev == "existing-rev"
-
-    assert (
-        decision._determine_more_accurate_base_rev(
-            repo_mock, "base-ref", candidate_base_rev, "head-rev", env_prefix="PREFIX"
-        )
-        == expected_base_rev
-    )
-    repo_mock.find_latest_common_revision.assert_called_once_with(
-        expected_base_ref_or_rev, "head-rev"
-    )
-
-
-@pytest.mark.parametrize(
-    "graph_config, expected_value",
-    (
-        ({"taskgraph": {}}, ""),
-        ({"taskgraph": {"repositories": {}}}, ""),
-        ({"taskgraph": {"repositories": {"mobile": {}}}}, "mobile"),
-        (
-            {"taskgraph": {"repositories": {"mobile": {}, "some-other-repo": {}}}},
-            "mobile",
-        ),
-    ),
-)
-def test_get_env_prefix(graph_config, expected_value):
-    assert decision._get_env_prefix(graph_config) == expected_value