From f9117e028ce5be29efa154ee9a6122a52180c547 Mon Sep 17 00:00:00 2001 From: Bastien Orivel Date: Tue, 17 Jun 2025 11:37:07 +0200 Subject: [PATCH] Be a lot more conservative in _github_submodule_required This function is used to decide whether we can use github directly to download the code without cloning the repository. That optimization only works if the repository doesn't have submodules. Before this commit, if github returned any status code that wasn't a 2xx or 3xx, we would consider that the repository didn't have any submodule. This is fairly obviously wrong as a transient server error on the github side for example would mean that we'd assume that the repository doesn't have any submodules. We now check for HTTPErrors and only consider that a repository doesn't have any submodule iif the status is 404 otherwise, something went wrong, log it and fallback to the non optimized path. I also kept the bare exception but added some log to it, just in case. --- src/taskgraph/run-task/fetch-content | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/taskgraph/run-task/fetch-content b/src/taskgraph/run-task/fetch-content index d1374cdbd..01560ad92 100755 --- a/src/taskgraph/run-task/fetch-content +++ b/src/taskgraph/run-task/fetch-content @@ -679,8 +679,15 @@ def _github_submodule_required(repo: str, commit: str): try: status_code = urllib.request.urlopen(url).getcode() return status_code == 200 - except: - return False + except urllib.error.HTTPError as e: + if e.status == 404: + return False + # If we get a non 2xx status code that isn't a 404, something has gone horribly wrong on the github side, log it and return True + log("Got {} from github while checking for submodules in {} which was unexpected. Cannot check whether the repo has submodules or not".format(e.status, repo)) + return True + except Exception as e: + log("Got an unexpected `{}` exception while checking for submodules in {}. Cannot check whether the repo has submodules or not".format(e, repo)) + return True def git_checkout_archive(