diff --git a/pyproject.toml b/pyproject.toml index 23c710f2..ef8980b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,6 @@ classifiers = [ ] dependencies = [ "aiohttp>=3", - "aiomemoizettl", "arrow>=1.0", "cryptography>=2.6.1", "dictdiffer", diff --git a/src/scriptworker/github.py b/src/scriptworker/github.py index 2d002371..08db1e0c 100644 --- a/src/scriptworker/github.py +++ b/src/scriptworker/github.py @@ -3,7 +3,6 @@ import logging import re -from aiomemoizettl import memoize_ttl from github3 import GitHub from github3.exceptions import GitHubException @@ -35,6 +34,9 @@ def __init__(self, owner, repo_name, token=""): dict: a representation of the repo definition """ + self._owner = owner + self._repo_name = repo_name + self._token = token github = retry_sync(GitHub, kwargs={"token": token}, sleeptime_kwargs=_GITHUB_LIBRARY_SLEEP_TIME_KWARGS) self._github_repository = retry_sync(github.repository, args=(owner, repo_name), sleeptime_kwargs=_GITHUB_LIBRARY_SLEEP_TIME_KWARGS) @@ -110,6 +112,11 @@ async def get_tag_hash(self, tag_name): async def has_commit_landed_on_repository(self, context, revision): """Tell if a commit was landed on the repository or if it just comes from a pull request. + This method uses the official GitHub REST API to check if a commit has been merged + into the repository. It queries the /repos/{owner}/{repo}/commits/{sha}/pulls endpoint + to determine if the commit is associated with any merged pull requests, or if it was + directly committed to the repository. + Args: context (scriptworker.context.Context): the scriptworker context. revision (str): the commit hash or the tag name. @@ -118,42 +125,58 @@ async def has_commit_landed_on_repository(self, context, revision): bool: True if the commit is present in one of the branches of the main repository """ - if any(vcs_rule.get("require_secret") for vcs_rule in context.config["trusted_vcs_rules"]): - # This check uses unofficial API on github, which we can't easily - # check for private repos, assume its true in the private case. - log.info("has_commit_landed_on_repository() not implemented for private" "repositories, assume True") - return True - - # Revision may be a tag name. `branch_commits` doesn't work on tags + # Revision may be a tag name. Convert tags to commit hashes if not _is_git_full_hash(revision): revision = await self.get_tag_hash(tag_name=revision) - html_text = await _fetch_github_branch_commits_data(context, self._github_repository.html_url, revision) - - # https://github.com/{repo_owner}/{repo_name}/branch_commits/{revision} just returns some \n - # when the commit hasn't landed on the origin repo. Otherwise, some HTML data is returned - it - # represents the branches on which the given revision is present. - return html_text != "" - - -# TODO Use memoize_ttl() as decorator once https://github.com/michalc/aiomemoizettl/issues/2 is done -async def _fetch_github_branch_commits_data_helper(context, repo_html_url, revision): - url = "/".join((repo_html_url.rstrip("/"), "branch_commits", revision)) - log.info('Cache does not exist for URL "{}" (in this context), fetching it...'.format(url)) - html_text = await retry_request(context, url) - return html_text.strip() - - -# XXX memoize_ttl() uses all function parameters to create a key that stores its cache. -# This means new contexts cannot use the memoized value, even though they're calling the same -# repo and revision. jlorenzo tried to take the context out of the memoize_ttl() call, but -# whenever the cache is invalidated, request() doesn't work anymore because the session carried -# by the context has been long closed. -# Therefore, the defined TTL has 2 purposes: -# a. it memoizes calls for the time of a single cot_verify() run -# b. it clears the cache automatically, so we don't have to manually invalidate it. -_BRANCH_COMMITS_CACHE_TTL_IN_SECONDS = 10 * 60 # 10 minutes -_fetch_github_branch_commits_data = memoize_ttl(_fetch_github_branch_commits_data_helper, get_ttl=lambda _: _BRANCH_COMMITS_CACHE_TTL_IN_SECONDS) + # Use the official GitHub REST API to check if commit has landed + # GET /repos/{owner}/{repo}/commits/{sha}/pulls + api_url = f"https://api.github.com/repos/{self._owner}/{self._repo_name}/commits/{revision}/pulls" + + headers = { + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + } + + # Add authentication if token is available + if self._token: + headers["Authorization"] = f"Bearer {self._token}" + + log.info(f"Checking if commit {revision} has landed on {self._owner}/{self._repo_name}") + + try: + # Query the GitHub API for pull requests associated with this commit + pull_requests = await retry_request( + context, api_url, return_type="json", headers=headers, good=(200, 404) # 404 means commit doesn't exist in repo at all + ) + + # If we got a 404, the commit doesn't exist in this repository + # This is handled by checking the type of response + if isinstance(pull_requests, dict) and pull_requests.get("message") == "Not Found": + log.info(f"Commit {revision} not found in repository") + return False + + # If no pull requests are associated with this commit, it was directly committed + # to the repository (not from a PR), so it has landed + if not pull_requests: + log.info(f"Commit {revision} was directly committed (no associated PRs)") + return True + + # If there are pull requests, check if any of them are merged + # A merged PR means the commit has landed on the main repository + has_merged_pr = any(pr.get("merged_at") is not None for pr in pull_requests) + + if has_merged_pr: + log.info(f"Commit {revision} has landed via merged PR") + else: + log.info(f"Commit {revision} only exists in unmerged PRs") + + return has_merged_pr + + except Exception as e: + log.error(f"Error checking if commit {revision} has landed: {e}") + # In case of errors, assume the commit hasn't landed to be safe + return False def is_github_url(url): diff --git a/tests/test_github.py b/tests/test_github.py index 3e3e8948..0df84a8c 100644 --- a/tests/test_github.py +++ b/tests/test_github.py @@ -1,5 +1,3 @@ -import asyncio -from copy import copy from types import SimpleNamespace from unittest.mock import MagicMock, patch @@ -8,20 +6,7 @@ from scriptworker import github from scriptworker.exceptions import ConfigError -_CACHE = {} - - -@pytest.fixture(scope="session", autouse=True) -async def mock_memoized_func(): - # Memoize_ttl causes an issue with pytest-asyncio, so we copy the behavior to an in-memory cache - async def fetch(*args, **kwargs): - key = (args, tuple(kwargs.items())) - if key not in _CACHE: - _CACHE[key] = await github._fetch_github_branch_commits_data_helper(*args, **kwargs) - return _CACHE[key] - - with patch("scriptworker.github._fetch_github_branch_commits_data", fetch): - yield +# No longer need caching/memoization mocks since we're using the official GitHub API @pytest.fixture(scope="function") @@ -147,66 +132,57 @@ async def test_get_tag_hash(github_repository, tags, raises, expected): @pytest.mark.parametrize( - "commitish, expected_url, html_text, raises, expected", + "commitish, expected_url, api_response, raises, expected", ( + # Commit with only unmerged PRs - should return False ( "0129abcdef012345643456789abcdef012345678", - "https://github.com/some-user/some-repo/branch_commits/0129abcdef012345643456789abcdef012345678", - "\r\n\r\n", + "https://api.github.com/repos/some-user/some-repo/commits/0129abcdef012345643456789abcdef012345678/pulls", + [{"id": 123, "merged_at": None}], # Unmerged PR False, False, ), + # Commit with no associated PRs - direct commit, should return True ( "0f0123456789abcdef012123456789abcde34565", - "https://github.com/some-user/some-repo/branch_commits/0f0123456789abcdef012123456789abcde34565", - "\n", - False, + "https://api.github.com/repos/some-user/some-repo/commits/0f0123456789abcdef012123456789abcde34565/pulls", + [], # No PRs = direct commit False, + True, ), + # Commit that doesn't exist (404) - should return False ( "0123456789abcdef0123456789abcdef01234568", - "https://github.com/some-user/some-repo/branch_commits/0123456789abcdef0123456789abcdef01234568", - "", + "https://api.github.com/repos/some-user/some-repo/commits/0123456789abcdef0123456789abcdef01234568/pulls", + {"message": "Not Found"}, # 404 response False, False, ), + # Commit with merged PR - should return True ( "06789abcdf0123ef01123456789abcde45234569", - "https://github.com/some-user/some-repo/branch_commits/06789abcdf0123ef01123456789abcde45234569", - """ - - - -
- """, + "https://api.github.com/repos/some-user/some-repo/commits/06789abcdf0123ef01123456789abcde45234569/pulls", + [{"id": 1234, "merged_at": "2021-01-01T00:00:00Z"}], # Merged PR False, True, ), + # Tag that resolves to a commit with merged PR - should return True ( "v1.0.0", - "https://github.com/some-user/some-repo/branch_commits/hashforv100", - """ - - - - - """, + "https://api.github.com/repos/some-user/some-repo/commits/hashforv100/pulls", + [{"id": 1234, "merged_at": "2021-01-01T00:00:00Z"}], # Merged PR False, True, ), - ("non-existing-tag", None, "", True, None), + # Non-existing tag - should raise ValueError + ("non-existing-tag", None, None, True, None), ), ) -async def test_has_commit_landed_on_repository(context, github_repository, commitish, expected_url, html_text, raises, expected): - async def retry_request(_, url): - assert url == expected_url - return html_text +async def test_has_commit_landed_on_repository(context, github_repository, commitish, expected_url, api_response, raises, expected): + async def retry_request(_, url, **kwargs): + if expected_url: + assert url == expected_url + return api_response with patch("scriptworker.github.retry_request", retry_request): if raises: @@ -216,54 +192,6 @@ async def retry_request(_, url): assert await github_repository.has_commit_landed_on_repository(context, commitish) == expected -@pytest.mark.asyncio -async def test_has_commit_landed_on_repository_private(vpn_context, github_repository): - """For private repos we don't actually query against github. - - The API used is not formally available and needs authorization on private repos, but isn't clearly useful - to try to use for this case. - """ - commitish = "06789abcdf0123ef01123456789abcde45234569" - - async def retry_request(_, url): - assert False, "We should never have made a request" - - assert await github_repository.has_commit_landed_on_repository(vpn_context, commitish) == True - - -@pytest.mark.asyncio -async def test_has_commit_landed_on_repository_cache(context, vpn_context, github_repository): - global retry_request_call_count - retry_request_call_count = 0 - - async def _counter(*args, **kwargs): - global retry_request_call_count - retry_request_call_count += 1 - return "" - - with patch("scriptworker.github.retry_request", _counter): - await asyncio.gather( - *[ - github_repository.has_commit_landed_on_repository(context, "0129abcdef012345643456789abcdef012345678"), - github_repository.has_commit_landed_on_repository(context, "0129abcdef012345643456789abcdef012345678"), - github_repository.has_commit_landed_on_repository(context, "0129abcdef012345643456789abcdef012345678"), - github_repository.has_commit_landed_on_repository(context, "0129abcdef012345643456789abcdef012345678"), - ] - ) - # Even though all calls were fired at once, just a single call was made - assert retry_request_call_count == 1 - - await github_repository.has_commit_landed_on_repository(context, "456789abcdef0123456780129abcdef012345643") - # New commit means new request - assert retry_request_call_count == 2 - - different_context = copy(context) - different_context.task = {"taskGroupId": "someOtherTaskId"} - await github_repository.has_commit_landed_on_repository(different_context, "456789abcdef0123456780129abcdef012345643") - # New context means new request too - assert retry_request_call_count == 3 - - @pytest.mark.parametrize( "url, expected", ( diff --git a/tests/test_production.py b/tests/test_production.py index a32cc8bc..e658194c 100644 --- a/tests/test_production.py +++ b/tests/test_production.py @@ -6,7 +6,6 @@ import os import re import tempfile -from unittest.mock import patch import pytest from asyncio_extras.contextmanager import async_contextmanager @@ -14,7 +13,6 @@ import scriptworker.log as swlog import scriptworker.utils as utils -from scriptworker import github from scriptworker.config import apply_product_config, get_unfrozen_copy, read_worker_creds from scriptworker.constants import DEFAULT_CONFIG from scriptworker.context import Context @@ -24,20 +22,6 @@ # constants helpers and fixtures {{{1 pytestmark = [pytest.mark.skipif(os.environ.get("NO_TESTS_OVER_WIRE"), reason="NO_TESTS_OVER_WIRE: skipping production CoT verification test")] -_CACHE = {} - - -@pytest.fixture(scope="session", autouse=True) -async def mock_memoized_func(): - # Memoize_ttl causes an issue with pytest-asyncio, so we copy the behavior to an in-memory cache - async def fetch(*args, **kwargs): - key = (args, tuple(kwargs.items())) - if key not in _CACHE: - _CACHE[key] = await github._fetch_github_branch_commits_data_helper(*args, **kwargs) - return _CACHE[key] - - with patch("scriptworker.github._fetch_github_branch_commits_data", fetch): - yield def read_integration_creds():