diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 51d2ed24..650a6660 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -51,3 +51,23 @@ jobs: - name: Show out.csv run: cat out.csv + + token-usage-unit_test: + + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Cache pip + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install dependencies + run: pip install -r requirements.txt + + - name: Run test + run: python3 test_token_usage.py --tt1 ${{ secrets.TEST_TOKEN_GITHUB }} --tt2 ${{ secrets.SECOND_TEST_TOKEN_GITHUB }} --repo moevm/github_repo_commitment_calc --out out.csv diff --git a/ForgejoRepoAPI.py b/ForgejoRepoAPI.py index dc05da6e..3a386d60 100644 --- a/ForgejoRepoAPI.py +++ b/ForgejoRepoAPI.py @@ -4,9 +4,20 @@ import isodate from pyforgejo import PyforgejoApi -from interface_wrapper import (Branch, Comment, Commit, Contributor, Invite, - IRepositoryAPI, Issue, PullRequest, Repository, - User, WikiPage, logging) +from interface_wrapper import ( + Branch, + Comment, + Commit, + Contributor, + Invite, + IRepositoryAPI, + Issue, + PullRequest, + Repository, + User, + WikiPage, + logging, +) class ForgejoRepoAPI(IRepositoryAPI): diff --git a/GitHubRepoAPI.py b/GitHubRepoAPI.py index ca7bd250..a74a5efa 100644 --- a/GitHubRepoAPI.py +++ b/GitHubRepoAPI.py @@ -1,12 +1,37 @@ -from interface_wrapper import (Branch, Comment, Commit, Contributor, Invite, - IRepositoryAPI, Issue, PullRequest, Repository, - User, WikiPage, logging) +from interface_wrapper import ( + Branch, + Comment, + Commit, + Contributor, + Invite, + IRepositoryAPI, + Issue, + PullRequest, + Repository, + User, + WikiPage, + logging, +) + +from github import Github, GithubException class GitHubRepoAPI(IRepositoryAPI): + def __init__(self, client: Github): + self.client = self._client_validation(client) - def __init__(self, client): - self.client = client + @staticmethod + def _client_validation(client: Github) -> Github: + try: + client.get_user().login + except GithubException as err: + logging.error(f'Github: Connect: error {err.data}') + logging.error( + 'Github: Connect: user could not be authenticated please try again.' + ) + exit(1) + else: + return client def get_user_data(self, user) -> User: return User( diff --git a/commits_parser.py b/commits_parser.py index baa8de84..e4e248f3 100644 --- a/commits_parser.py +++ b/commits_parser.py @@ -1,5 +1,7 @@ from dataclasses import asdict, dataclass from time import sleep +from typing import Generator +from datetime import datetime import pytz @@ -62,12 +64,17 @@ def log_repository_commits( def log_commits( - client: IRepositoryAPI, working_repos, csv_name, start, finish, branch, fork_flag + binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None, None], + csv_name: str, + start: datetime, + finish: datetime, + branch: str, + fork_flag: bool, ): info = asdict(CommitData()) logger.log_to_csv(csv_name, list(info.keys())) - for repo, token in working_repos: + for client, repo, token in binded_repos: try: logger.log_title(repo.name) log_repository_commits(client, repo, csv_name, start, finish, branch) diff --git a/contributors_parser.py b/contributors_parser.py index d07e654e..53509ec7 100644 --- a/contributors_parser.py +++ b/contributors_parser.py @@ -80,12 +80,14 @@ def get_contributors_stats(client: IRepositoryAPI, repository: Repository) -> di def log_contributors( - client: IRepositoryAPI, working_repos: Generator, csv_name: str, fork_flag: bool + binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None, None], + csv_name: str, + fork_flag: bool, ): info = asdict(ContributorData()) logger.log_to_csv(csv_name, list(info.keys())) - for repo, token in working_repos: + for client, repo, token in binded_repos: try: logger.log_title(repo.name) log_repository_contributors(client, repo, csv_name) diff --git a/git_logger.py b/git_logger.py index ec7836b2..8e2066b5 100644 --- a/git_logger.py +++ b/git_logger.py @@ -16,22 +16,40 @@ def get_tokens_from_file(tokens_path: str) -> list[str]: return tokens -class GitClients: +def get_repos_from_file(repos_path: str) -> list[str]: + with open(repos_path, 'r') as file: + list_repos = [x for x in file.read().split('\n') if x] + + return list_repos + + +class Clients: def __init__(self, source: str, tokens: list[str], base_url: str | None = None): - self.clients = self._init_clients(source, tokens, base_url) - self.cur_client = None + # Возможно это можно переписать покрасивее + if source == 'github': + self.clients = self._init_clients(source, tokens, base_url) + elif base_url == 'forgejo': + self.client = RepositoryFactory.create_api(source, tokens[0], base_url) + self.token = tokens[0] + else: + print(f"Unavailable source {source}, use [ 'github' | 'forgejo' ] instead") + + self.source = source def _init_clients( self, source: str, tokens: list[str], base_url: str | None ) -> list[dict]: clients = [ - {"client": login(source, token, base_url), "token": token} + { + "client": RepositoryFactory.create_api(source, token, base_url), + "token": token, + } for token in tokens ] return clients - def get_next_client(self) -> IRepositoryAPI: + def _get_next_git_client(self) -> tuple[IRepositoryAPI, str]: client = None max_remaining_limit = -1 @@ -51,24 +69,29 @@ def get_next_client(self) -> IRepositoryAPI: if client is None: raise Exception("No git clients available") - self.cur_client = client - return client + return client['client'], client['token'] + def _get_next_forgejo_client(self) -> tuple[IRepositoryAPI, str]: + return self.client, self.token -def get_next_repo(clients: GitClients, repositories): - with open(repositories, 'r') as file: - list_repos = [x for x in file.read().split('\n') if x] - print(list_repos) - for repo_name in list_repos: + def get_next_client(self) -> tuple[IRepositoryAPI, str]: + if self.source == 'github': + return self._get_next_git_client() + elif self.source == 'forgejo': + return self._get_next_forgejo_client + + +def get_next_binded_repo(clients: Clients, repositories: list[str]): + for repo_name in repositories: try: - cur_client = clients.get_next_client() - repo = cur_client['client'].get_repository(repo_name) + client, token = clients.get_next_client() + repo = client.get_repository(repo_name) except Exception as err: print(f'get_next_repo(): error {err}') print(f'get_next_repo(): failed to load repository "{repo_name}"') exit(1) else: - yield repo, cur_client['token'] + yield client, repo, token def get_assignee_story(git_object): diff --git a/interface_wrapper.py b/interface_wrapper.py index 2afa5036..c3df4342 100644 --- a/interface_wrapper.py +++ b/interface_wrapper.py @@ -3,7 +3,7 @@ from dataclasses import dataclass from datetime import datetime -from github import Github +from github import Github, Auth from pyforgejo import PyforgejoApi # Настройка логирования @@ -182,7 +182,7 @@ def create_api( from GitHubRepoAPI import GitHubRepoAPI if source == 'github': - return GitHubRepoAPI(Github(token)) + return GitHubRepoAPI(Github(auth=Auth.Token(token))) elif source == 'forgejo': if not isinstance(base_url, str): raise ValueError( diff --git a/invites_parser.py b/invites_parser.py index 167095ec..33848d8f 100644 --- a/invites_parser.py +++ b/invites_parser.py @@ -1,5 +1,6 @@ from dataclasses import asdict, dataclass from time import sleep +from typing import Generator from constants import TIMEDELTA from interface_wrapper import IRepositoryAPI, Repository @@ -31,11 +32,14 @@ def log_repository_invitations( sleep(TIMEDELTA) -def log_invitations(client: IRepositoryAPI, working_repos, csv_name: str): +def log_invitations( + binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None, None], + csv_name: str, +): info = asdict(InviteData()) logger.log_to_csv(csv_name, list(info.keys())) - for repo, token in working_repos: + for client, repo, token in binded_repos: logger.log_title(repo.name) try: log_repository_invitations(client, repo, csv_name) diff --git a/issues_parser.py b/issues_parser.py index bd1d72c0..c10cea59 100644 --- a/issues_parser.py +++ b/issues_parser.py @@ -1,6 +1,8 @@ import json from dataclasses import asdict, dataclass from time import sleep +from typing import Generator +from datetime import datetime import pytz import requests @@ -187,12 +189,16 @@ def log_issue_and_comments(csv_name, issue_data: IssueData, comments): def log_issues( - client: IRepositoryAPI, working_repo, csv_name, token, start, finish, fork_flag + binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None, None], + csv_name: str, + start: datetime, + finish: datetime, + fork_flag: bool, ): info = asdict(IssueDataWithComment()) logger.log_to_csv(csv_name, list(info.keys())) - for repo, token in working_repo: + for client, repo, token in binded_repos: try: logger.log_title(repo.name) log_repository_issues(client, repo, csv_name, token, start, finish) diff --git a/main.py b/main.py index f44fbffd..dc72a0bf 100644 --- a/main.py +++ b/main.py @@ -1,19 +1,16 @@ import argparse import traceback -from datetime import datetime - -import pytz +import git_logger +import export_sheets import commits_parser import contributors_parser -import export_sheets -import git_logger +import pull_requests_parser import invites_parser import issues_parser -import pull_requests_parser import wikipars -from constants import TIMEZONE -from interface_wrapper import RepositoryFactory + +from utils import parse_time def parse_args(): @@ -120,22 +117,40 @@ def parse_args(): return parser.parse_args() -def parse_time(datetime_str): - start = ( - datetime_str[0].split('/') + datetime_str[1].split(':') - if len(datetime_str) == 2 - else datetime_str[0].split('/') + ['00', '00', '00'] - ) - start = [int(i) for i in start] - start_datetime = datetime( - year=start[0], - month=start[1], - day=start[2], - hour=start[3], - minute=start[4], - second=start[5], - ) - return start_datetime.astimezone(pytz.timezone(TIMEZONE)) +def run(args, binded_repos, repos_for_wiki=None): + start = parse_time(args.start.split('-')) + finish = parse_time(args.finish.split('-')) + + if args.commits: + commits_parser.log_commits( + binded_repos, args.out, start, finish, args.branch, args.forks_include + ) + if args.pull_requests: + pull_requests_parser.log_pull_requests( + binded_repos, + args.out, + start, + finish, + args.forks_include, + args.pr_comments, + ) + if args.issues: + issues_parser.log_issues( + binded_repos, args.out, start, finish, args.forks_include + ) + if args.invites: + invites_parser.log_invitations( + binded_repos, + args.out, + ) + if args.contributors: + contributors_parser.log_contributors(binded_repos, args.out, args.forks_include) + if args.wikis: + wikipars.wikiparser(repos_for_wiki, args.download_repos, args.out) + if args.export_google_sheets: + export_sheets.write_data_to_table( + args.out, args.google_token, args.table_id, args.sheet_id + ) def main(): @@ -146,57 +161,18 @@ def main(): else: tokens = git_logger.get_tokens_from_file(args.tokens) - repositories = args.list - csv_name = args.out - path_drepo = args.download_repos - fork_flag = args.forks_include - log_pr_comments = args.pr_comments + repositories = git_logger.get_repos_from_file(args.list) + + print(repositories) try: - clients = git_logger.GitClients("github", tokens) + clients = git_logger.Clients("github", tokens) + binded_repos = git_logger.get_next_binded_repo(clients, repositories) except Exception as e: print(e) print(traceback.print_exc()) else: - client = RepositoryFactory.create_api("github", tokens[0]) - working_repos = git_logger.get_next_repo(clients, repositories) - start = parse_time(args.start.split('-')) - finish = parse_time(args.finish.split('-')) - - if args.commits: - commits_parser.log_commits( - client, working_repos, csv_name, start, finish, args.branch, fork_flag - ) - if args.pull_requests: - pull_requests_parser.log_pull_requests( - client, - working_repos, - csv_name, - start, - finish, - fork_flag, - log_pr_comments, - ) - if args.issues: - issues_parser.log_issues( - client, working_repos, csv_name, tokens[0], start, finish, fork_flag - ) - if args.invites: - invites_parser.log_invitations( - client, - working_repos, - csv_name, - ) - if args.wikis: - wikipars.wikiparser(clients, repositories, path_drepo, csv_name) - if args.contributors: - contributors_parser.log_contributors( - client, working_repos, csv_name, fork_flag - ) - if args.export_google_sheets: - export_sheets.write_data_to_table( - csv_name, args.google_token, args.table_id, args.sheet_id - ) + run(args, binded_repos, repositories) if __name__ == '__main__': diff --git a/pull_requests_parser.py b/pull_requests_parser.py index 9ab2bba4..fb9f59c6 100644 --- a/pull_requests_parser.py +++ b/pull_requests_parser.py @@ -1,6 +1,8 @@ import json from dataclasses import asdict, dataclass from time import sleep +from typing import Generator +from datetime import datetime import pytz import requests @@ -187,18 +189,17 @@ def get_info(obj, attr): def log_pull_requests( - client: IRepositoryAPI, - working_repos, - csv_name, - start, - finish, - fork_flag, + binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None, None], + csv_name: str, + start: datetime, + finish: datetime, + fork_flag: bool, log_comments=False, ): info = asdict(PullRequestDataWithComment()) logger.log_to_csv(csv_name, list(info.keys())) - for repo, token in working_repos: + for client, repo, token in binded_repos: try: logger.log_title(repo.name) log_repositories_pr( diff --git a/test_token_usage.py b/test_token_usage.py new file mode 100644 index 00000000..ac6d516f --- /dev/null +++ b/test_token_usage.py @@ -0,0 +1,149 @@ +import unittest +import argparse +import sys + +from main import run + +import git_logger + + +def parse_args(args): + parser = argparse.ArgumentParser() + parser.add_argument('--tt1', type=str, required=True, help='first test token') + parser.add_argument('--tt2', type=str, required=True, help='second test token') + + parser.add_argument( + '-r', + '--repo', + type=str, + required=True, + help=('test repo'), + ) + + parser.add_argument('-o', '--out', type=str, required=True, help='output filename') + + return parser.parse_args(args) + + +class TestTokenUsage(unittest.TestCase): + def setUp(self): + test_args = parse_args(sys.argv[1:]) + self.tokens = [test_args.tt1, test_args.tt2] + self.repo = test_args.repo + self.output_csv = test_args.out + + self.args = argparse.Namespace( + commits=False, + issues=False, + pull_requests=False, + wikis=False, + contributors=False, + invites=False, + start="2000/01/01-00:00:00", + finish="2400/01/01-00:00:00", + branch="default", + forks_include=False, + pr_comments=False, + export_google_sheets=False, + out=test_args.out, + ) + + @staticmethod + def _get_rate_limit(clients: git_logger.Clients): + return [c['client'].get_rate_limiting()[0] for c in clients.clients] + + @staticmethod + def _is_only_one_token_used(limit_start, limit_finish): + return bool(limit_start[0] - limit_finish[0]) != bool( + limit_start[1] - limit_finish[1] + ) + + @staticmethod + def _is_max_token_used(limit_start, limit_finish): + if limit_start[0] - limit_finish[0]: + return limit_start[0] == max(limit_start) + else: + return limit_start[1] == max(limit_start) + + @staticmethod + def _change_tokens_order(tokens, key): + key %= len(tokens) + return tokens[key:] + tokens[:key] + + def _get_usage(self, binded_repos, clients): + limit_start = self._get_rate_limit(clients) + + run(self.args, binded_repos) + + limit_finish = self._get_rate_limit(clients) + + return limit_start, limit_finish + + def test_commits_parser(self): + self.args.commits = True + for i in range(2): + clients = git_logger.Clients( + "github", self._change_tokens_order(self.tokens, i) + ) + binded_repos = git_logger.get_next_binded_repo(clients, [self.repo]) + + limit_start, limit_finish = self._get_usage(binded_repos, clients) + + self.assertTrue(self._is_only_one_token_used(limit_start, limit_finish)) + self.assertTrue(self._is_max_token_used(limit_start, limit_finish)) + + def test_contributors_parser(self): + self.args.contributors = True + for i in range(2): + clients = git_logger.Clients( + "github", self._change_tokens_order(self.tokens, i) + ) + binded_repos = git_logger.get_next_binded_repo(clients, [self.repo]) + + limit_start, limit_finish = self._get_usage(binded_repos, clients) + + self.assertTrue(self._is_only_one_token_used(limit_start, limit_finish)) + self.assertTrue(self._is_max_token_used(limit_start, limit_finish)) + + def test_issues_parser(self): + self.args.issues = True + for i in range(2): + clients = git_logger.Clients( + "github", self._change_tokens_order(self.tokens, i) + ) + binded_repos = git_logger.get_next_binded_repo(clients, [self.repo]) + + limit_start, limit_finish = self._get_usage(binded_repos, clients) + + self.assertTrue(self._is_only_one_token_used(limit_start, limit_finish)) + self.assertTrue(self._is_max_token_used(limit_start, limit_finish)) + + def test_invites_parser(self): + self.args.invites = True + for i in range(2): + clients = git_logger.Clients( + "github", self._change_tokens_order(self.tokens, i) + ) + binded_repos = git_logger.get_next_binded_repo(clients, [self.repo]) + + limit_start, limit_finish = self._get_usage(binded_repos, clients) + + self.assertTrue(self._is_only_one_token_used(limit_start, limit_finish)) + self.assertTrue(self._is_max_token_used(limit_start, limit_finish)) + + def test_pull_requests_parser(self): + self.args.pull_requests = True + for i in range(2): + clients = git_logger.Clients( + "github", self._change_tokens_order(self.tokens, i) + ) + binded_repos = git_logger.get_next_binded_repo(clients, [self.repo]) + + limit_start, limit_finish = self._get_usage(binded_repos, clients) + + self.assertTrue(self._is_only_one_token_used(limit_start, limit_finish)) + self.assertTrue(self._is_max_token_used(limit_start, limit_finish)) + + +if __name__ == '__main__': + unittest.main(argv=[sys.argv[0]]) diff --git a/utils.py b/utils.py index 1952a1bc..9088b574 100644 --- a/utils.py +++ b/utils.py @@ -1,9 +1,12 @@ import csv +from datetime import datetime +import pytz -from constants import MIN_SIDE_PADDING, SIDE_WHITE_SPACES, TITLE_LEN +from constants import MIN_SIDE_PADDING, SIDE_WHITE_SPACES, TITLE_LEN, TIMEZONE class logger: + # TODO: отключение вывода в stdout @staticmethod def log_title(title: str, title_len: int = TITLE_LEN): final_len = max( @@ -31,6 +34,10 @@ def log_to_csv(csv_name: str, field_names: tuple[str], row: dict | None = None): def log_to_stdout(info: dict): print(info) + @staticmethod + def log_sep(): + print("-" * TITLE_LEN) + @staticmethod def log_error(error: str): # или использовать logging, как в interface_wrapper @@ -39,3 +46,21 @@ def log_error(error: str): @staticmethod def log_warning(warning: str): pass + + +def parse_time(datetime_str) -> datetime: + start = ( + datetime_str[0].split('/') + datetime_str[1].split(':') + if len(datetime_str) == 2 + else datetime_str[0].split('/') + ['00', '00', '00'] + ) + start = [int(i) for i in start] + start_datetime = datetime( + year=start[0], + month=start[1], + day=start[2], + hour=start[3], + minute=start[4], + second=start[5], + ) + return start_datetime.astimezone(pytz.timezone(TIMEZONE)) diff --git a/wikipars.py b/wikipars.py index 9d828516..fbb86425 100644 --- a/wikipars.py +++ b/wikipars.py @@ -1,4 +1,3 @@ -import csv import os import time @@ -6,25 +5,16 @@ from constants import WIKI_FIELDNAMES +from utils import logger -def log_wiki_to_csv(info, csv_name): - with open(csv_name, 'a', newline='') as file: - writer = csv.DictWriter(file, fieldnames=WIKI_FIELDNAMES) - writer.writerow(info) +def wikiparser(repositories: list[str], path_drepo: str, csv_name: str): + logger.log_to_csv(csv_name, WIKI_FIELDNAMES) -def wikiparser(client, repositories, path_drepo, csv_name): - with open(csv_name, 'w', newline='') as file: - writer = csv.writer(file) - writer.writerow(WIKI_FIELDNAMES) - - # Создаем список репозиториев из файла - with open(repositories, 'r') as file: - list_repos = [x for x in file.read().split('\n') if x] error_repos = [] data_changes = [] - for name_rep in list_repos: + for name_rep in repositories: # Проверяем, есть ли репозиторий в папке dir_path = path_drepo + "/" + name_rep if os.path.exists(dir_path): @@ -48,7 +38,7 @@ def wikiparser(client, repositories, path_drepo, csv_name): error_repos.append(name_rep) continue - print("=" * 20, name_rep, "=" * 20) + logger.log_title(name_rep) # Вывод изменений # Хэш пустого дерева для сравнения с первым коммитом. Способ был найден здесь: # https://stackoverflow.com/questions/33916648/get-the-diff-details-of-first-commit-in-gitpython @@ -91,16 +81,19 @@ def wikiparser(client, repositories, path_drepo, csv_name): data_commit["revision id"] = commit data_commit["added lines"] = commit.stats.total["insertions"] data_commit["deleted lines"] = commit.stats.total["deletions"] + for fieldname in data_commit: print(fieldname, data_commit[fieldname], sep=': ') - print("-" * 40) - log_wiki_to_csv(data_commit, csv_name) + + logger.log_sep() + logger.log_to_csv(csv_name, data_commit) + data_changes.append(data_commit) # Вывод репозиториев, с которыми возникли ошибки if error_repos: - print("!=====Проблемные репозитории=====!") + logger.log_title("! Проблемные репозитории !") for rep in error_repos: - print(rep) + logger.log_to_stdout(rep) return data_changes