From cb7ffb5e42001d772b175df8828ed3bf6a8e66e5 Mon Sep 17 00:00:00 2001 From: PeeachPie Date: Fri, 28 Mar 2025 14:28:00 +0300 Subject: [PATCH 01/11] test --- ForgejoRepoAPI.py | 97 +++++++++---- GitHubRepoAPI.py | 19 ++- git_logger.py | 66 ++++++--- interface_wrapper.py | 8 +- main.py | 70 +++------ repo_parser/commits_parser.py | 91 ++++++++++++ repo_parser/contributors_parser.py | 100 +++++++++++++ repo_parser/invites_parser.py | 53 +++++++ repo_parser/issues_parser.py | 210 +++++++++++++++++++++++++++ repo_parser/pull_requests_parser.py | 211 ++++++++++++++++++++++++++++ repo_parser/wiki_parser.py | 108 ++++++++++++++ test_token_usage.py | 80 +++++++++++ utils.py | 25 ++++ 13 files changed, 1037 insertions(+), 101 deletions(-) create mode 100644 repo_parser/commits_parser.py create mode 100644 repo_parser/contributors_parser.py create mode 100644 repo_parser/invites_parser.py create mode 100644 repo_parser/issues_parser.py create mode 100644 repo_parser/pull_requests_parser.py create mode 100644 repo_parser/wiki_parser.py create mode 100644 test_token_usage.py diff --git a/ForgejoRepoAPI.py b/ForgejoRepoAPI.py index b4ecc4c7..920b8712 100644 --- a/ForgejoRepoAPI.py +++ b/ForgejoRepoAPI.py @@ -10,7 +10,7 @@ PullRequest, WikiPage, Comment, - Invite + Invite, ) import base64 import sys @@ -32,12 +32,14 @@ def get_user_data(self, user) -> User: type=getattr(user, 'type', ""), bio=getattr(user, 'bio', ""), site_admin=user.site_admin if hasattr(user, 'site_admin') else False, - _id=user.id + _id=user.id, ) def get_repository(self, id: str) -> Repository | None: try: - repo = self.client.repository.repo_get(owner=id.split('/')[0], repo=id.split('/')[1]) + repo = self.client.repository.repo_get( + owner=id.split('/')[0], repo=id.split('/')[1] + ) if not repo: logging.error(f"Failed to get repository {id} from Forgejo.") @@ -57,30 +59,35 @@ def get_repository(self, id: str) -> Repository | None: def get_collaborator_permission(self, repo: Repository, user: User) -> str: try: permission = self.client.repository.repo_get_repo_permissions( - owner=repo.owner.login, - repo=repo.name, - collaborator=user.login + owner=repo.owner.login, repo=repo.name, collaborator=user.login ) return permission.permission except Exception as e: if ("401" in str(e)) or ("403" in str(e)): logging.error( - f"Permission error: Only admins or repo admins can view permissions for others in {repo.name}.") + f"Permission error: Only admins or repo admins can view permissions for others in {repo.name}." + ) return f"Permission error: Only admins or repo admins can view permissions for others in {repo.name}." - logging.error(f"Failed to get collaborator permission for {user.login} in {repo.name}: {e}") + logging.error( + f"Failed to get collaborator permission for {user.login} in {repo.name}: {e}" + ) return "Error" def get_commits(self, repo: Repository, files: bool = True) -> list[Commit]: try: - commits = self.client.repository.repo_get_all_commits(repo.owner.login, repo.name) + commits = self.client.repository.repo_get_all_commits( + repo.owner.login, repo.name + ) return [ Commit( _id=c.sha, message=c.commit.message, author=self.get_user_data(c.author), date=isodate.parse_datetime(c.commit.author.date), - files=[f.filename for f in getattr(c, "files", [])] if files else None + files=( + [f.filename for f in getattr(c, "files", [])] if files else None + ), ) for c in commits ] @@ -92,11 +99,17 @@ def get_commits(self, repo: Repository, files: bool = True) -> list[Commit]: def get_contributors(self, repo: Repository) -> list[Contributor]: try: - commits = self.client.repository.repo_get_all_commits(repo.owner.login, repo.name) - contributors = {c.author.login: c.author.email or "" for c in commits if c.author} + commits = self.client.repository.repo_get_all_commits( + repo.owner.login, repo.name + ) + contributors = { + c.author.login: c.author.email or "" for c in commits if c.author + } return [Contributor(login, email) for login, email in contributors.items()] except Exception as e: - logging.error(f"Failed to get contributors from Forgejo for repo {repo.name}: {e}") + logging.error( + f"Failed to get contributors from Forgejo for repo {repo.name}: {e}" + ) return [] def get_issues(self, repo: Repository) -> list[Issue]: @@ -109,7 +122,11 @@ def get_issues(self, repo: Repository) -> list[Issue]: state=i.state, created_at=i.created_at, closed_at=i.closed_at if i.state == 'closed' else None, - closed_by=self.get_user_data(i.closed_by) if hasattr(i, 'closed_by') and i.closed_by else None, + closed_by=( + self.get_user_data(i.closed_by) + if hasattr(i, 'closed_by') and i.closed_by + else None + ), body=i.body, user=self.get_user_data(i.user), labels=[label.name for label in i.labels], @@ -118,12 +135,16 @@ def get_issues(self, repo: Repository) -> list[Issue]: for i in issues ] except Exception as e: - logging.error(f"Failed to get issues from Forgejo for repo {repo.name}: {e}") + logging.error( + f"Failed to get issues from Forgejo for repo {repo.name}: {e}" + ) return [] def get_pull_requests(self, repo: Repository) -> list[PullRequest]: try: - pulls = self.client.repository.repo_list_pull_requests(repo.owner.login, repo.name) + pulls = self.client.repository.repo_list_pull_requests( + repo.owner.login, repo.name + ) return [ PullRequest( _id=p.number, @@ -144,12 +165,16 @@ def get_pull_requests(self, repo: Repository) -> list[PullRequest]: for p in pulls ] except Exception as e: - logging.error(f"Failed to get pull requests from Forgejo for repo {repo.name}: {e}") + logging.error( + f"Failed to get pull requests from Forgejo for repo {repo.name}: {e}" + ) return [] def get_branches(self, repo: Repository) -> list[Branch]: try: - branches = self.client.repository.repo_list_branches(repo.owner.login, repo.name) + branches = self.client.repository.repo_list_branches( + repo.owner.login, repo.name + ) result = [] for branch in branches: @@ -161,7 +186,9 @@ def get_branches(self, repo: Repository) -> list[Branch]: email=author.email if author and author.email else "", ) - commit_details = self.client.repository.repo_get_single_commit(repo.owner.login, repo.name, commit.id) + commit_details = self.client.repository.repo_get_single_commit( + repo.owner.login, repo.name, commit.id + ) files = [file.filename for file in getattr(commit_details, "files", [])] commit_obj = Commit( @@ -177,27 +204,37 @@ def get_branches(self, repo: Repository) -> list[Branch]: return result except Exception as e: - logging.error(f"Failed to get branches from Forgejo for repo {repo.name}: {e}") + logging.error( + f"Failed to get branches from Forgejo for repo {repo.name}: {e}" + ) return [] def get_wiki_pages(self, repo: Repository) -> list[WikiPage]: try: - pages = self.client.repository.repo_get_wiki_pages(repo.owner.login, repo.name) + pages = self.client.repository.repo_get_wiki_pages( + repo.owner.login, repo.name + ) result = [] for page in pages: - page_details = self.client.repository.repo_get_wiki_page(repo.owner.login, repo.name, page.title) + page_details = self.client.repository.repo_get_wiki_page( + repo.owner.login, repo.name, page.title + ) wiki_page = WikiPage( title=page_details.title, - content=base64.b64decode(page_details.content_base_64).decode('utf-8') + content=base64.b64decode(page_details.content_base_64).decode( + 'utf-8' + ), ) result.append(wiki_page) return result except Exception as e: - logging.error(f"Failed to get wiki pages from Forgejo for repo {repo.name}: {e}") + logging.error( + f"Failed to get wiki pages from Forgejo for repo {repo.name}: {e}" + ) return [] def get_forks(self, repo: Repository) -> list[Repository]: @@ -215,8 +252,7 @@ def get_forks(self, repo: Repository) -> list[Repository]: name=fork.name, url=fork.html_url, default_branch=default_branch, - owner=owner - + owner=owner, ) ) return result @@ -229,7 +265,9 @@ def get_comments(self, repo, obj) -> list[Comment]: result = [] try: if isinstance(obj, Issue): - comments = self.client.issue.get_repo_comments(repo.owner.login, repo.name) + comments = self.client.issue.get_repo_comments( + repo.owner.login, repo.name + ) result = [ Comment( body=c.body, @@ -240,8 +278,9 @@ def get_comments(self, repo, obj) -> list[Comment]: ] elif isinstance(obj, PullRequest): - comments = self.client.repository.repo_get_pull_review_comments(repo.owner.login, repo.name, obj._id, - 100000) # нет id комментария + comments = self.client.repository.repo_get_pull_review_comments( + repo.owner.login, repo.name, obj._id, 100000 + ) # нет id комментария result = [ Comment( body=c.body, diff --git a/GitHubRepoAPI.py b/GitHubRepoAPI.py index 239bdfe9..0e961299 100644 --- a/GitHubRepoAPI.py +++ b/GitHubRepoAPI.py @@ -13,11 +13,25 @@ Invite, ) +from github import Github, GithubException + class GitHubRepoAPI(IRepositoryAPI): + def __init__(self, client: Github): + self.client = self._client_validation(client) - def __init__(self, client): - self.client = client + @staticmethod + def _client_validation(client: Github) -> Github: + try: + client.get_user().login + except GithubException as err: + logging.error(f'Github: Connect: error {err.data}') + logging.error( + 'Github: Connect: user could not be authenticated please try again.' + ) + exit(1) + else: + return client def get_user_data(self, user) -> User: return User( @@ -103,6 +117,7 @@ def get_issues(self, repo: Repository) -> list[Issue]: def get_pull_requests(self, repo: Repository) -> list[PullRequest]: try: pulls = self.client.get_repo(repo._id).get_pulls(state='all') + print(dir(pulls[0].merged_by)) return [ PullRequest( _id=p.number, diff --git a/git_logger.py b/git_logger.py index ed1e57ad..88afcf40 100644 --- a/git_logger.py +++ b/git_logger.py @@ -1,7 +1,4 @@ -from interface_wrapper import ( - RepositoryFactory, - IRepositoryAPI -) +from interface_wrapper import RepositoryFactory, IRepositoryAPI from time import sleep TIMEDELTA = 0.05 @@ -20,17 +17,40 @@ def get_tokens_from_file(tokens_path: str) -> list[str]: return tokens -class GitClients: +def get_repos_from_file(repos_path: str) -> list[str]: + with open(repos_path, 'r') as file: + list_repos = [x for x in file.read().split('\n') if x] + + return list_repos + + +class Clients: def __init__(self, source: str, tokens: list[str], base_url: str | None = None): - self.clients = self._init_clients(source, tokens, base_url) - self.cur_client = None + # Возможно это можно переписать покрасивее + if source == 'github': + self.clients = self._init_clients(source, tokens, base_url) + elif base_url == 'forgejo': + self.client = RepositoryFactory.create_api(source, tokens[0], base_url) + self.token = tokens[0] + else: + print(f"Unavailable source {source}, use [ 'github' | 'forgejo' ] instead") - def _init_clients(self, source: str, tokens: list[str], base_url: str | None) -> list[dict]: - clients = [{"client": login(source, token, base_url), "token": token} for token in tokens] + self.source = source + + def _init_clients( + self, source: str, tokens: list[str], base_url: str | None + ) -> list[dict]: + clients = [ + { + "client": RepositoryFactory.create_api(source, token, base_url), + "token": token, + } + for token in tokens + ] return clients - def get_next_client(self) -> IRepositoryAPI: + def _get_next_git_client(self) -> tuple[IRepositoryAPI, str]: client = None max_remaining_limit = -1 @@ -50,25 +70,29 @@ def get_next_client(self) -> IRepositoryAPI: if client is None: raise Exception("No git clients available") - self.cur_client = client - return client + return client['client'], client['token'] + def _get_next_forgejo_client(self) -> tuple[IRepositoryAPI, str]: + return self.client, self.token -def get_next_repo(clients: GitClients, repositories): - with open(repositories, 'r') as file: - list_repos = [x for x in file.read().split('\n') if x] - print(list_repos) - for repo_name in list_repos: + def get_next_client(self) -> tuple[IRepositoryAPI, str]: + if self.source == 'github': + return self._get_next_git_client() + elif self.source == 'forgejo': + return self._get_next_forgejo_client + + +def get_next_binded_repo(clients: Clients, repositories: list[str]): + for repo_name in repositories: try: - cur_client = clients.get_next_client() - repo = cur_client['client'].get_repository(repo_name) + client, token = clients.get_next_client() + repo = client.get_repository(repo_name) except Exception as err: print(f'get_next_repo(): error {err}') print(f'get_next_repo(): failed to load repository "{repo_name}"') exit(1) else: - print(cur_client['token']) - yield repo, cur_client['token'] + yield client, repo, token def get_assignee_story(git_object): diff --git a/interface_wrapper.py b/interface_wrapper.py index 50c80408..85e49bb7 100644 --- a/interface_wrapper.py +++ b/interface_wrapper.py @@ -175,7 +175,9 @@ def get_rate_limiting(self) -> tuple[int, int]: # Фабрика для создания API class RepositoryFactory: @staticmethod - def create_api(source: str, token: str, base_url: str | None = None) -> IRepositoryAPI: + def create_api( + source: str, token: str, base_url: str | None = None + ) -> IRepositoryAPI: from GitHubRepoAPI import GitHubRepoAPI from ForgejoRepoAPI import ForgejoRepoAPI @@ -183,7 +185,9 @@ def create_api(source: str, token: str, base_url: str | None = None) -> IReposit return GitHubRepoAPI(Github(token)) elif source == 'forgejo': if not isinstance(base_url, str): - raise ValueError(f"base_url for PyforgejoApi should be str, got {type(base_url)}") + raise ValueError( + f"base_url for PyforgejoApi should be str, got {type(base_url)}" + ) return ForgejoRepoAPI(PyforgejoApi(api_key=token, base_url=base_url)) else: raise ValueError(f"Unsupported source: {source}") diff --git a/main.py b/main.py index 7c7585b2..b7d26f08 100644 --- a/main.py +++ b/main.py @@ -1,18 +1,17 @@ import argparse -from datetime import datetime -import pytz import traceback - import git_logger import export_sheets -import commits_parser -import pull_requests_parser -import issues_parser -import invites_parser -import wikipars -import contributors_parser -from interface_wrapper import RepositoryFactory +from repo_parser import ( + commits_parser, + contributors_parser, + pull_requests_parser, + invites_parser, + issues_parser, + wiki_parser, +) +from utils import parse_time def parse_args(): @@ -119,24 +118,6 @@ def parse_args(): return parser.parse_args() -def parse_time(datetime_str): - start = ( - datetime_str[0].split('/') + datetime_str[1].split(':') - if len(datetime_str) == 2 - else datetime_str[0].split('/') + ['00', '00', '00'] - ) - start = [int(i) for i in start] - start_datetime = datetime( - year=start[0], - month=start[1], - day=start[2], - hour=start[3], - minute=start[4], - second=start[5], - ) - return start_datetime.astimezone(pytz.timezone(git_logger.TIMEZONE)) - - def main(): args = parse_args() @@ -145,31 +126,31 @@ def main(): else: tokens = git_logger.get_tokens_from_file(args.tokens) - repositories = args.list + repositories = git_logger.get_repos_from_file(args.list) + csv_name = args.out path_drepo = args.download_repos fork_flag = args.forks_include log_pr_comments = args.pr_comments + print(repositories) + try: - clients = git_logger.GitClients("github", tokens) + clients = git_logger.Clients("github", tokens) + binded_repos = git_logger.get_next_binded_repo(clients, repositories) + start = parse_time(args.start.split('-')) + finish = parse_time(args.finish.split('-')) except Exception as e: print(e) print(traceback.print_exc()) else: - client = RepositoryFactory.create_api("github", tokens[0]) - working_repos = git_logger.get_next_repo(clients, repositories) - start = parse_time(args.start.split('-')) - finish = parse_time(args.finish.split('-')) - if args.commits: commits_parser.log_commits( - client, working_repos, csv_name, start, finish, args.branch, fork_flag + binded_repos, csv_name, start, finish, args.branch, fork_flag ) if args.pull_requests: pull_requests_parser.log_pull_requests( - client, - working_repos, + binded_repos, csv_name, start, finish, @@ -177,21 +158,16 @@ def main(): log_pr_comments, ) if args.issues: - issues_parser.log_issues( - client, working_repos, csv_name, tokens[0], start, finish, fork_flag - ) + issues_parser.log_issues(binded_repos, csv_name, start, finish, fork_flag) if args.invites: invites_parser.log_invitations( - client, - working_repos, + binded_repos, csv_name, ) if args.wikis: - wikipars.wikiparser(clients, repositories, path_drepo, csv_name) + wiki_parser.wiki_parser(repositories, path_drepo, csv_name) if args.contributors: - contributors_parser.log_contributors( - client, working_repos, csv_name, fork_flag - ) + contributors_parser.log_contributors(binded_repos, csv_name, fork_flag) if args.export_google_sheets: export_sheets.write_data_to_table( csv_name, args.google_token, args.table_id, args.sheet_id diff --git a/repo_parser/commits_parser.py b/repo_parser/commits_parser.py new file mode 100644 index 00000000..60ab8f96 --- /dev/null +++ b/repo_parser/commits_parser.py @@ -0,0 +1,91 @@ +from datetime import datetime +from typing import Generator +from utils import logger +import pytz +from time import sleep + +from interface_wrapper import IRepositoryAPI, Repository + +EMPTY_FIELD = 'Empty field' +TIMEDELTA = 0.05 +TIMEZONE = 'Europe/Moscow' +FIELDNAMES = ( + 'repository name', + 'author name', + 'author login', + 'author email', + 'date and time', + 'changed files', + 'commit id', + 'branch', +) +GOOGLE_MAX_CELL_LEN = 50000 + + +def log_repository_commits( + client: IRepositoryAPI, repository: Repository, csv_name, start, finish, branch +): + branches = [] + match branch: + case 'all': + for branch in repository.get_branches(): + branches.append(branch.name) + case None: + branches.append(repository.default_branch) + case _: + branches.append(branch) + + for branch in branches: + logger.log_to_stdout(f'Processing branch {branch}') + + commits = client.get_commits(repository) + + for commit in commits: + if ( + commit.date.astimezone(pytz.timezone(TIMEZONE)) < start + or commit.date.astimezone(pytz.timezone(TIMEZONE)) > finish + ): + continue + + changed_files = '; '.join([file for file in commit.files]) + info = { + 'repository name': repository.name, + 'author name': commit.author.username, + 'author login': commit.author.login, + 'author email': commit.author.email or EMPTY_FIELD, + 'date and time': commit.date, + 'changed files': changed_files[:GOOGLE_MAX_CELL_LEN], + 'commit id': commit._id, + 'branch': branch, + } + + logger.log_to_csv(csv_name, FIELDNAMES, info) + logger.log_to_stdout(info) + + sleep(TIMEDELTA) + + +def log_commits( + binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None], + csv_name: str, + start: datetime, + finish: datetime, + branch: str, + fork_flag: bool, +): + logger.log_to_csv(csv_name, FIELDNAMES) + + for client, repo, token in binded_repos: + try: + logger.log_title(repo.name) + log_repository_commits(client, repo, csv_name, start, finish, branch) + if fork_flag: + for forked_repo in client.get_forks(repo): + logger.log_title("FORKED:", forked_repo.full_name) + log_repository_commits( + client, forked_repo, csv_name, start, finish, branch + ) + sleep(TIMEDELTA) + sleep(TIMEDELTA) + except Exception as e: + print(e) diff --git a/repo_parser/contributors_parser.py b/repo_parser/contributors_parser.py new file mode 100644 index 00000000..5580f230 --- /dev/null +++ b/repo_parser/contributors_parser.py @@ -0,0 +1,100 @@ +from utils import logger +from time import sleep +from typing import Generator +from interface_wrapper import IRepositoryAPI, Repository + +EMPTY_FIELD = 'Empty field' +TIMEDELTA = 0.05 +TIMEZONE = 'Europe/Moscow' +FIELDNAMES = ( + 'repository name', + 'login', + 'name', + 'email', + 'url', + 'permissions', + 'total_commits', + 'id', + 'node_id', + 'type', + 'bio', + 'site_admin', +) + + +def log_repository_contributors( + client: IRepositoryAPI, repository: Repository, csv_name: str +): + contributors_stats = get_contributors_stats(client, repository) + + def nvl(val): + return val or EMPTY_FIELD + + for contributor_stat in contributors_stats.values(): + contributor = contributor_stat["contributor_object"] + contributor_permissions = client.get_collaborator_permission( + repository, contributor + ) + + info_tmp = { + 'repository name': repository.name, + 'login': contributor.login, + 'name': nvl(contributor.username), + 'email': nvl(contributor_stat['email']), + 'url': contributor.html_url, + 'permissions': nvl(contributor_permissions), + 'total_commits': contributor_stat['total_commits'], + 'node_id': contributor.node_id, + 'type': contributor.type, + 'bio': nvl(contributor.bio), + 'site_admin': contributor.site_admin, + } + + logger.log_to_csv(csv_name, FIELDNAMES, info_tmp) + logger.log_to_stdout(info_tmp) + + sleep(TIMEDELTA) + + +def get_contributors_stats(client: IRepositoryAPI, repository: Repository) -> dict: + contributors_stats = dict() + commits = client.get_commits(repository, False) + + for commit in commits: + contributor = commit.author + + if contributor.login not in contributors_stats: + contributors_stats[contributor.login] = { + 'total_commits': 0, + 'email': contributor.email, + 'contributor_object': contributor, + } + + contributors_stats[contributor.login]['total_commits'] += 1 + + sleep(TIMEDELTA) + + return contributors_stats + + +def log_contributors( + binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None], + csv_name: str, + fork_flag: bool, +): + logger.log_to_csv(csv_name, FIELDNAMES) + + for client, repo, token in binded_repos: + try: + logger.log_title(repo.name) + log_repository_contributors(client, repo, csv_name) + + if fork_flag: + for forked_repo in client.get_forks(repo): + logger.log_title("FORKED:", forked_repo.name) + log_repository_contributors(client, forked_repo, csv_name) + sleep(TIMEDELTA) + + except Exception as e: + print(e) + exit(1) diff --git a/repo_parser/invites_parser.py b/repo_parser/invites_parser.py new file mode 100644 index 00000000..6d972f62 --- /dev/null +++ b/repo_parser/invites_parser.py @@ -0,0 +1,53 @@ +from typing import Generator +from utils import logger +from time import sleep +from interface_wrapper import IRepositoryAPI, Repository + +FIELDNAMES = ( + 'repository name', + 'invited login', + 'invite creation date', + 'invitation url', +) +TIMEDELTA = 0.05 + + +def log_inviter(repo, invite, writer): + invite_info = [ + repo.full_name, + invite.invitee.login, + invite.created_at.strftime("%d/%m/%Y, %H:%M:%S"), + invite.html_url, + ] + writer.writerow(invite_info) + print(invite_info) + + +def log_repository_invitations( + client: IRepositoryAPI, repository: Repository, csv_name +): + invitations = client.get_invites(repository) + for invite in invitations: + invite_info = { + 'repository name': repository.name, + 'invited login': invite.invitee.login, + 'invite creation date': invite.created_at.strftime("%d/%m/%Y, %H:%M:%S"), + 'invitation url': invite.html_url, + } + logger.log_to_csv(csv_name, FIELDNAMES, invite_info) + logger.log_to_stdout(invite_info) + sleep(TIMEDELTA) + + +def log_invitations( + binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None], + csv_name: str, +): + logger.log_to_csv(csv_name, FIELDNAMES) + + for client, repo, token in binded_repos: + logger.log_title(repo.name) + try: + log_repository_invitations(client, repo, csv_name) + except Exception as e: + print(e) diff --git a/repo_parser/issues_parser.py b/repo_parser/issues_parser.py new file mode 100644 index 00000000..374e166b --- /dev/null +++ b/repo_parser/issues_parser.py @@ -0,0 +1,210 @@ +from datetime import datetime +from typing import Generator +from utils import logger +import pytz +import requests +import json +from time import sleep +from git_logger import get_assignee_story +from interface_wrapper import IRepositoryAPI, Repository + +EMPTY_FIELD = 'Empty field' +TIMEDELTA = 0.05 +TIMEZONE = 'Europe/Moscow' +FIELDNAMES = ( + 'repository name', + 'number', + 'title', + 'state', + 'task', + 'created at', + 'creator name', + 'creator login', + 'creator email', + 'closer name', + 'closer login', + 'closer email', + 'closed at', + 'comment body', + 'comment created at', + 'comment author name', + 'comment author login', + 'comment author email', + 'assignee story', + 'connected pull requests', + 'labels', + 'milestone', +) + + +def get_connected_pulls(issue_number, repo_owner, repo_name, token): + # TODO как-то заменить + return + access_token = token + repo_owner = repo_owner.login + # Формирование запроса GraphQL + query = """ + { + repository(owner: "%s", name: "%s") { + issue(number: %d) { + timelineItems(first: 50, itemTypes:[CONNECTED_EVENT,CROSS_REFERENCED_EVENT]) { + filteredCount + nodes { + ... on ConnectedEvent { + ConnectedEvent: subject { + ... on PullRequest { + number + title + url + } + } + } + ... on CrossReferencedEvent { + CrossReferencedEvent: source { + ... on PullRequest { + number + title + url + } + } + } + } + } + } + } + }""" % ( + repo_owner, + repo_name, + issue_number, + ) + + # Формирование заголовков запроса + headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/json", + } + + # Отправка запроса GraphQL + response = requests.post( + "https://api.github.com/graphql", + headers=headers, + data=json.dumps({"query": query}), + ) + response_data = response.json() + # Обработка полученных данных + pull_request_data = response_data["data"]["repository"]["issue"] + list_url = [] + if pull_request_data is not None: + issues_data = pull_request_data["timelineItems"]["nodes"] + for pulls in issues_data: + if ( + pulls.get("CrossReferencedEvent") is not None + and pulls.get("CrossReferencedEvent").get("url") not in list_url + ): + list_url.append(pulls.get("CrossReferencedEvent").get("url")) + if ( + pulls.get("ConnectedEvent") is not None + and pulls.get("ConnectedEvent").get("url") not in list_url + ): + list_url.append(pulls.get("ConnectedEvent").get("url")) + if list_url == []: + return 'Empty field' + else: + return ';'.join(list_url) + return 'Empty field' + + +def log_repository_issues( + client: IRepositoryAPI, repository: Repository, csv_name, token, start, finish +): + def nvl(val): + return val or EMPTY_FIELD + + def get_info(obj, attr): + return EMPTY_FIELD if obj is None else getattr(obj, attr) + + issues = client.get_issues(repository) + for issue in issues: + if ( + issue.created_at.astimezone(pytz.timezone(TIMEZONE)) < start + or issue.created_at.astimezone(pytz.timezone(TIMEZONE)) > finish + ): + continue + + info_tmp = { + 'repository name': repository.name, + 'number': issue._id, + 'title': issue.title, + 'state': issue.state, + 'task': issue.body, + 'created at': issue.created_at, + 'creator name': issue.user.username, + 'creator login': issue.user.login, + 'creator email': issue.user.email, + 'closed at': nvl(issue.closed_at), + 'closer name': issue.closed_by.username if issue.closed_by else None, + 'closer login': issue.closed_by.login if issue.closed_by else None, + 'closer email': issue.closed_by.email if issue.closed_by else None, + 'comment body': EMPTY_FIELD, + 'comment created at': EMPTY_FIELD, + 'comment author name': EMPTY_FIELD, + 'comment author login': EMPTY_FIELD, + 'comment author email': EMPTY_FIELD, + 'assignee story': get_assignee_story(issue), + 'connected pull requests': ( + EMPTY_FIELD + if issue._id is None + else get_connected_pulls( + issue._id, repository.owner, repository.name, token + ) + ), + 'labels': ( + EMPTY_FIELD + if issue.labels is None + else ';'.join([label for label in issue.labels]) + ), + 'milestone': get_info(issue.milestone, 'title'), + } + comments = client.get_comments(repository, issue) + if len(comments) > 0: + for comment in comments: + info = info_tmp + info['comment body'] = comment.body + info['comment created at'] = comment.created_at + info['comment author name'] = comment.author.username + info['comment author login'] = comment.author.login + info['comment author email'] = comment.author.email + + logger.log_to_csv(csv_name, FIELDNAMES, info) + logger.log_to_stdout(info) + else: + logger.log_to_csv(csv_name, FIELDNAMES, info_tmp) + logger.log_to_stdout(info_tmp) + + sleep(TIMEDELTA) + + +def log_issues( + binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None], + csv_name: str, + start: datetime, + finish: datetime, + fork_flag: bool, +): + logger.log_to_csv(csv_name, FIELDNAMES) + + for client, repo, token in binded_repos: + try: + logger.log_title(repo.name) + log_repository_issues(client, repo, csv_name, token, start, finish) + if fork_flag: + forked_repos = client.get_forks(repo) + for forked_repo in forked_repos: + logger.log_title("FORKED:", forked_repo.name) + log_repository_issues( + client, forked_repo, csv_name, token, start, finish + ) + sleep(TIMEDELTA) + sleep(TIMEDELTA) + except Exception as e: + print("log_issues exception:", e) diff --git a/repo_parser/pull_requests_parser.py b/repo_parser/pull_requests_parser.py new file mode 100644 index 00000000..e45c3382 --- /dev/null +++ b/repo_parser/pull_requests_parser.py @@ -0,0 +1,211 @@ +from datetime import datetime +from typing import Generator +from utils import logger +import pytz +import requests +import json +from time import sleep +from git_logger import get_assignee_story +from interface_wrapper import IRepositoryAPI, Repository + +EMPTY_FIELD = 'Empty field' +TIMEDELTA = 0.05 +TIMEZONE = 'Europe/Moscow' +FIELDNAMES = ( + 'repository name', + 'title', + 'id', + 'state', + 'commit into', + 'commit from', + 'created at', + 'creator name', + 'creator login', + 'creator email', + 'changed files', + 'comment body', + 'comment created at', + 'comment author name', + 'comment author login', + 'comment author email', + 'merger name', + 'merger login', + 'merger email', + 'source branch', + 'target branch', + 'assignee story', + 'related issues', + 'labels', + 'milestone', +) + + +def get_related_issues(pull_request_number, repo_owner, repo_name, token): + # TODO как-то заменить + return + access_token = token + repo_owner = repo_owner.login + + # Формирование запроса GraphQL + query = """ + { + repository(owner: "%s", name: "%s") { + pullRequest(number: %d) { + id + closingIssuesReferences(first: 50) { + edges { + node { + id + body + number + title + url + } + } + } + } + } + } + """ % ( + repo_owner, + repo_name, + pull_request_number, + ) + + # Формирование заголовков запроса + headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/json", + } + + # Отправка запроса GraphQL + response = requests.post( + "https://api.github.com/graphql", + headers=headers, + data=json.dumps({"query": query}), + ) + response_data = response.json() + # Обработка полученных данных + pull_request_data = response_data["data"]["repository"]["pullRequest"] + issues_data = pull_request_data["closingIssuesReferences"]["edges"] + list_issues_url = [] + # сохранение информации об issues + for issue in issues_data: + issue_node = issue["node"] + list_issues_url.append(issue_node["url"]) + return ';'.join(list_issues_url) + + +def log_repositories_pr( + client: IRepositoryAPI, + repository: Repository, + csv_name, + token, + start, + finish, + log_comments=False, +): + def nvl(val): + return val or EMPTY_FIELD + + def get_info(obj, attr): + return EMPTY_FIELD if obj is None else getattr(obj, attr) + + pulls = client.get_pull_requests(repository) + for pull in pulls: + if ( + pull.created_at.astimezone(pytz.timezone(TIMEZONE)) < start + or pull.created_at.astimezone(pytz.timezone(TIMEZONE)) > finish + ): + continue + + info_tmp = { + 'repository name': repository.name, + 'title': pull.title, + 'id': pull._id, + 'state': pull.state, + 'commit into': pull.base_label, + 'commit from': pull.head_label, + 'created at': pull.created_at, + 'creator name': nvl(pull.author.username), + 'creator login': pull.author.login, + 'creator email': pull.author.email, + 'changed files': '; '.join([file for file in pull.files]), + 'comment body': EMPTY_FIELD, + 'comment created at': EMPTY_FIELD, + 'comment author name': EMPTY_FIELD, + 'comment author login': EMPTY_FIELD, + 'comment author email': EMPTY_FIELD, + 'merger name': pull.merged_by.username if pull.merged_by else None, + 'merger login': pull.merged_by.login if pull.merged_by else None, + 'merger email': pull.merged_by.email if pull.merged_by else None, + 'source branch': pull.head_ref, + 'target branch': pull.base_ref, + 'assignee story': get_assignee_story(pull), + 'related issues': ( + EMPTY_FIELD + if pull.issue_url is None + else get_related_issues( + pull._id, repository.owner, repository.name, token + ) + ), + 'labels': ( + EMPTY_FIELD + if pull.labels is None + else ';'.join([label for label in pull.labels]) + ), + 'milestone': get_info(pull.milestone, 'title'), + } + + if log_comments: + comments = client.get_comments(repository, pull) + if len(comments) > 0: + for comment in comments: + info = info_tmp + info['comment body'] = comment.body + info['comment created at'] = comment.created_at + info['comment author name'] = comment.author.name + info['comment author login'] = comment.author.login + info['comment author email'] = nvl(comment.author.email) + + logger.log_to_csv(csv_name, FIELDNAMES, info) + logger.log_to_stdout(info) + else: + logger.log_to_csv(csv_name, FIELDNAMES, info_tmp) + logger.log_to_stdout(info_tmp) + sleep(TIMEDELTA) + + +def log_pull_requests( + binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None], + csv_name: str, + start: datetime, + finish: datetime, + fork_flag: bool, + log_comments=False, +): + logger.log_to_csv(csv_name, FIELDNAMES) + + for client, repo, token in binded_repos: + try: + logger.log_title(repo.name) + log_repositories_pr( + client, repo, csv_name, token, start, finish, log_comments + ) + if fork_flag: + forked_repos = client.get_repo(repo._id).get_forks() + for forked_repo in forked_repos: + logger.log_title("FORKED:", forked_repo.full_name) + log_repositories_pr( + client, + forked_repo, + csv_name, + token, + start, + finish, + log_comments, + ) + sleep(TIMEDELTA) + sleep(TIMEDELTA) + except Exception as e: + print(e) diff --git a/repo_parser/wiki_parser.py b/repo_parser/wiki_parser.py new file mode 100644 index 00000000..d3b53cc3 --- /dev/null +++ b/repo_parser/wiki_parser.py @@ -0,0 +1,108 @@ +from git import Repo, exc +import os +import time +from utils import logger + +WIKI_FIELDNAMES = ( + 'repository name', + 'author name', + 'author login', + 'datetime', + 'page', + 'action', + 'revision id', + 'added lines', + 'deleted lines', +) + + +def wiki_parser(repositories: list[str], path_drepo: str, csv_name: str): + logger.log_to_csv(csv_name, WIKI_FIELDNAMES) + + error_repos = [] + data_changes = [] + for name_rep in repositories: + # Проверяем, есть ли репозиторий в папке + dir_path = path_drepo + "/" + name_rep + if os.path.exists(dir_path): + # Обновляем репозиторий + if len(os.listdir(dir_path)) > 0: + repo = Repo(dir_path) + repo.remotes.origin.pull() + else: + os.rmdir(dir_path) + error_repos.append(name_rep) + continue + else: + # Клонируем репозиторий в папку + dir_path = path_drepo + "/" + name_rep + os.makedirs(dir_path, exist_ok=True) + repo_url = f"git@github.com:{name_rep}.wiki.git" + try: + repo = Repo.clone_from(repo_url, dir_path) + except exc.GitCommandError: + os.rmdir(dir_path) + error_repos.append(name_rep) + continue + + logger.log_title(name_rep) + + # Вывод изменений + # Хэш пустого дерева для сравнения с первым коммитом. Способ был найден здесь: + # https://stackoverflow.com/questions/33916648/get-the-diff-details-of-first-commit-in-gitpython + EMPTY_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" + wiki_commits = repo.iter_commits(all=True) + activity = { + "A": "Страница добавлена", + "M": "Страница изменена", + "D": "Страница удалена", + "R": "Страница переименована", + } + # eng_activity = {"A" : "Page added", "M" : "Page modified", "D" : "Page deleted", "R": "Page renamed"} + for commit in wiki_commits: + data_commit = dict() + parent = commit.parents + data_commit["repository name"] = name_rep + data_commit["author name"] = commit.author + if commit.author.email and len(commit.author.email.split('+')) > 1: + data_commit["author login"] = commit.author.email.split('+')[1].split( + '@users' + )[0] + else: + data_commit["author login"] = "empty login" + data_commit["datetime"] = time.strftime( + "%Y-%m-%d %H:%M:%S%z", time.gmtime(commit.committed_date) + ) + if parent: + data_commit["page"] = ';'.join( + [diff.b_path for diff in parent[0].diff(commit)] + ) + data_commit["action"] = ';'.join( + [activity[diff.change_type] for diff in parent[0].diff(commit)] + ) + else: + # Первый коммит + data_commit["page"] = ';'.join( + [diff.b_path for diff in commit.diff(EMPTY_TREE_SHA)] + ) + data_commit["action"] = ';'.join([activity["A"]]) + data_commit["revision id"] = commit + data_commit["added lines"] = commit.stats.total["insertions"] + data_commit["deleted lines"] = commit.stats.total["deletions"] + + for fieldname in data_commit: + print(fieldname, data_commit[fieldname], sep=': ') + + logger.log_sep() + + logger.log_to_csv(csv_name, data_commit) + + data_changes.append(data_commit) + + # Вывод репозиториев, с которыми возникли ошибки + if error_repos: + logger.log_title("! Проблемные репозитории !") + for rep in error_repos: + logger.log_to_stdout(rep) + + return data_changes diff --git a/test_token_usage.py b/test_token_usage.py new file mode 100644 index 00000000..5cf69388 --- /dev/null +++ b/test_token_usage.py @@ -0,0 +1,80 @@ +import unittest +import argparse +import sys +from utils import parse_time +from datetime import datetime +from interface_wrapper import RepositoryFactory, IRepositoryAPI + +import git_logger + +from repo_parser import ( + commits_parser, + contributors_parser, + pull_requests_parser, + invites_parser, + issues_parser, + wiki_parser, +) + + +def fix_rate_limit(clients: git_logger.Clients): + return [c['client'].get_rate_limiting() for c in clients.clients] + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('--tt1', type=str, required=True, help='first test token') + parser.add_argument('--tt2', type=str, required=True, help='second test token') + + parser.add_argument( + '-r', + '--repo', + type=str, + required=True, + help=('test repo'), + ) + + parser.add_argument('-o', '--out', type=str, required=True, help='output filename') + + return parser.parse_args() + + +class TestCommitsParser(unittest.TestCase): + def setUp(self): + args = parse_args() + print(args) + + self.token1 = args.tt1 + self.token2 = args.tt2 + self.repo = args.test_repo + self.output_csv = args.out + + self.start = parse_time('2000/01/01-00:00:00') + self.finish = parse_time('2400/01/01-00:00:00') + self.branch = 'default' + self.fork_flag = False + + def test_commits_parser(self): + clients1 = git_logger.Clients("github", [self.token1, self.token2]) + binded_repos1 = git_logger.get_next_binded_repo(clients1, [self.test_repo]) + + rate_limit_start = fix_rate_limit(clients1) + + commits_parser.log_commits( + binded_repos1, + self.output_csv, + self.start, + self.finish, + self.branch, + self.fork_flag, + ) + + rate_limit_finish = fix_rate_limit(clients1) + + print(rate_limit_start, rate_limit_finish) + + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/utils.py b/utils.py index 0b8c8420..4e8f1f93 100644 --- a/utils.py +++ b/utils.py @@ -1,4 +1,7 @@ import csv +from datetime import datetime +import pytz +import git_logger TITLE_LEN = 80 MIN_SIDE_PADDING = 4 @@ -33,6 +36,10 @@ def log_to_csv(csv_name: str, field_names: tuple[str], row: dict | None = None): def log_to_stdout(info: dict): print(info) + @staticmethod + def log_sep(): + print("-" * TITLE_LEN) + @staticmethod def log_error(error: str): # или использовать logging, как в interface_wrapper @@ -41,3 +48,21 @@ def log_error(error: str): @staticmethod def log_warning(warning: str): pass + + +def parse_time(datetime_str) -> datetime: + start = ( + datetime_str[0].split('/') + datetime_str[1].split(':') + if len(datetime_str) == 2 + else datetime_str[0].split('/') + ['00', '00', '00'] + ) + start = [int(i) for i in start] + start_datetime = datetime( + year=start[0], + month=start[1], + day=start[2], + hour=start[3], + minute=start[4], + second=start[5], + ) + return start_datetime.astimezone(pytz.timezone(git_logger.TIMEZONE)) From 42926f045ca7f90fbace3c96a78e7309be08137a Mon Sep 17 00:00:00 2001 From: PeeachPie Date: Sat, 29 Mar 2025 15:22:16 +0300 Subject: [PATCH 02/11] unit test for token usage --- git_logger.py | 2 - interface_wrapper.py | 4 +- main.py | 73 +++++++++++---------- test_token_usage.py | 153 +++++++++++++++++++++++++++++-------------- utils.py | 6 +- 5 files changed, 147 insertions(+), 91 deletions(-) diff --git a/git_logger.py b/git_logger.py index 88afcf40..c3089196 100644 --- a/git_logger.py +++ b/git_logger.py @@ -2,8 +2,6 @@ from time import sleep TIMEDELTA = 0.05 -TIMEZONE = 'Europe/Moscow' - def login(source, token, base_url): client = RepositoryFactory.create_api(source, token, base_url) diff --git a/interface_wrapper.py b/interface_wrapper.py index 85e49bb7..66195edb 100644 --- a/interface_wrapper.py +++ b/interface_wrapper.py @@ -3,7 +3,7 @@ from dataclasses import dataclass import logging -from github import Github +from github import Github, Auth from pyforgejo import PyforgejoApi # Настройка логирования @@ -182,7 +182,7 @@ def create_api( from ForgejoRepoAPI import ForgejoRepoAPI if source == 'github': - return GitHubRepoAPI(Github(token)) + return GitHubRepoAPI(Github(auth=Auth.Token(token))) elif source == 'forgejo': if not isinstance(base_url, str): raise ValueError( diff --git a/main.py b/main.py index b7d26f08..55b51037 100644 --- a/main.py +++ b/main.py @@ -118,6 +118,42 @@ def parse_args(): return parser.parse_args() +def run(args, binded_repos, repos_for_wiki=None): + start = parse_time(args.start.split('-')) + finish = parse_time(args.finish.split('-')) + + if args.commits: + commits_parser.log_commits( + binded_repos, args.out, start, finish, args.branch, args.forks_include + ) + if args.pull_requests: + pull_requests_parser.log_pull_requests( + binded_repos, + args.out, + start, + finish, + args.forks_include, + args.pr_comments, + ) + if args.issues: + issues_parser.log_issues( + binded_repos, args.out, start, finish, args.forks_include + ) + if args.invites: + invites_parser.log_invitations( + binded_repos, + args.out, + ) + if args.contributors: + contributors_parser.log_contributors(binded_repos, args.out, args.forks_include) + if args.wikis: + wiki_parser.wiki_parser(repos_for_wiki, args.download_repos, args.out) + if args.export_google_sheets: + export_sheets.write_data_to_table( + args.out, args.google_token, args.table_id, args.sheet_id + ) + + def main(): args = parse_args() @@ -128,51 +164,16 @@ def main(): repositories = git_logger.get_repos_from_file(args.list) - csv_name = args.out - path_drepo = args.download_repos - fork_flag = args.forks_include - log_pr_comments = args.pr_comments - print(repositories) try: clients = git_logger.Clients("github", tokens) binded_repos = git_logger.get_next_binded_repo(clients, repositories) - start = parse_time(args.start.split('-')) - finish = parse_time(args.finish.split('-')) except Exception as e: print(e) print(traceback.print_exc()) else: - if args.commits: - commits_parser.log_commits( - binded_repos, csv_name, start, finish, args.branch, fork_flag - ) - if args.pull_requests: - pull_requests_parser.log_pull_requests( - binded_repos, - csv_name, - start, - finish, - fork_flag, - log_pr_comments, - ) - if args.issues: - issues_parser.log_issues(binded_repos, csv_name, start, finish, fork_flag) - if args.invites: - invites_parser.log_invitations( - binded_repos, - csv_name, - ) - if args.wikis: - wiki_parser.wiki_parser(repositories, path_drepo, csv_name) - if args.contributors: - contributors_parser.log_contributors(binded_repos, csv_name, fork_flag) - if args.export_google_sheets: - export_sheets.write_data_to_table( - csv_name, args.google_token, args.table_id, args.sheet_id - ) - + run(args, binded_repos) if __name__ == '__main__': main() diff --git a/test_token_usage.py b/test_token_usage.py index 5cf69388..fefdfc69 100644 --- a/test_token_usage.py +++ b/test_token_usage.py @@ -1,27 +1,12 @@ import unittest import argparse import sys -from utils import parse_time -from datetime import datetime -from interface_wrapper import RepositoryFactory, IRepositoryAPI - -import git_logger - -from repo_parser import ( - commits_parser, - contributors_parser, - pull_requests_parser, - invites_parser, - issues_parser, - wiki_parser, -) +from main import run -def fix_rate_limit(clients: git_logger.Clients): - return [c['client'].get_rate_limiting() for c in clients.clients] - +import git_logger -def parse_args(): +def parse_args(args): parser = argparse.ArgumentParser() parser.add_argument('--tt1', type=str, required=True, help='first test token') parser.add_argument('--tt2', type=str, required=True, help='second test token') @@ -36,45 +21,117 @@ def parse_args(): parser.add_argument('-o', '--out', type=str, required=True, help='output filename') - return parser.parse_args() - + return parser.parse_args(args) -class TestCommitsParser(unittest.TestCase): +class TestTokenUsage(unittest.TestCase): def setUp(self): - args = parse_args() - print(args) + test_args = parse_args(sys.argv[1:]) + + self.tokens = [test_args.tt1, test_args.tt2] + self.repo = test_args.repo + self.output_csv = test_args.out + + self.args = argparse.Namespace( + commits=False, + issues=False, + pull_requests=False, + wikis=False, + contributors=False, + invites=False, + start="2000/01/01-00:00:00", + finish="2400/01/01-00:00:00", + branch="default", + forks_include=False, + pr_comments=False, + export_google_sheets=False, + out=test_args.out, + ) + + @staticmethod + def _get_rate_limit(clients: git_logger.Clients): + return [c['client'].get_rate_limiting()[0] for c in clients.clients] + + @staticmethod + def _is_only_one_token_used(limit_start, limit_finish): + return (bool(limit_start[0] - limit_finish[0]) + != bool(limit_start[1] - limit_finish[1])) + + @staticmethod + def _is_max_token_used(limit_start, limit_finish): + if limit_start[0] - limit_finish[0]: + return limit_start[0] == max(limit_start) + else: + return limit_start[1] == max(limit_start) + + @staticmethod + def _change_tokens_order(tokens, key): + key %= len(tokens) + return tokens[key:] + tokens[:key] + + def _get_usage(self, binded_repos, clients): + limit_start = self._get_rate_limit(clients) + + run(self.args, binded_repos) - self.token1 = args.tt1 - self.token2 = args.tt2 - self.repo = args.test_repo - self.output_csv = args.out + limit_finish = self._get_rate_limit(clients) - self.start = parse_time('2000/01/01-00:00:00') - self.finish = parse_time('2400/01/01-00:00:00') - self.branch = 'default' - self.fork_flag = False + return limit_start, limit_finish def test_commits_parser(self): - clients1 = git_logger.Clients("github", [self.token1, self.token2]) - binded_repos1 = git_logger.get_next_binded_repo(clients1, [self.test_repo]) - - rate_limit_start = fix_rate_limit(clients1) - - commits_parser.log_commits( - binded_repos1, - self.output_csv, - self.start, - self.finish, - self.branch, - self.fork_flag, - ) + self.args.commits = True + self.assertTrue(False) + for i in range(2): + clients = git_logger.Clients("github", self._change_tokens_order(self.tokens, i)) + binded_repos = git_logger.get_next_binded_repo(clients, [self.repo]) + + limit_start, limit_finish = self._get_usage(binded_repos, clients) + + self.assertTrue(self._is_only_one_token_used(limit_start, limit_finish)) + self.assertTrue(self._is_max_token_used(limit_start, limit_finish)) + + def test_contributors_parser(self): + self.args.contributors = True + for i in range(2): + clients = git_logger.Clients("github", self._change_tokens_order(self.tokens, i)) + binded_repos = git_logger.get_next_binded_repo(clients, [self.repo]) + + limit_start, limit_finish = self._get_usage(binded_repos, clients) + + self.assertTrue(self._is_only_one_token_used(limit_start, limit_finish)) + self.assertTrue(self._is_max_token_used(limit_start, limit_finish)) + + def test_issues_parser(self): + self.args.issues = True + for i in range(2): + clients = git_logger.Clients("github", self._change_tokens_order(self.tokens, i)) + binded_repos = git_logger.get_next_binded_repo(clients, [self.repo]) + + limit_start, limit_finish = self._get_usage(binded_repos, clients) + + self.assertTrue(self._is_only_one_token_used(limit_start, limit_finish)) + self.assertTrue(self._is_max_token_used(limit_start, limit_finish)) + + def test_invites_parser(self): + self.args.invites = True + for i in range(2): + clients = git_logger.Clients("github", self._change_tokens_order(self.tokens, i)) + binded_repos = git_logger.get_next_binded_repo(clients, [self.repo]) + + limit_start, limit_finish = self._get_usage(binded_repos, clients) - rate_limit_finish = fix_rate_limit(clients1) + self.assertTrue(self._is_only_one_token_used(limit_start, limit_finish)) + self.assertTrue(self._is_max_token_used(limit_start, limit_finish)) - print(rate_limit_start, rate_limit_finish) + def test_pull_requests_parser(self): + self.args.pull_requests = True + for i in range(2): + clients = git_logger.Clients("github", self._change_tokens_order(self.tokens, i)) + binded_repos = git_logger.get_next_binded_repo(clients, [self.repo]) - pass + limit_start, limit_finish = self._get_usage(binded_repos, clients) + self.assertTrue(self._is_only_one_token_used(limit_start, limit_finish)) + self.assertTrue(self._is_max_token_used(limit_start, limit_finish)) if __name__ == '__main__': - unittest.main() + unittest.main(argv=[sys.argv[0]]) diff --git a/utils.py b/utils.py index 4e8f1f93..caf9dcbf 100644 --- a/utils.py +++ b/utils.py @@ -1,14 +1,13 @@ import csv from datetime import datetime import pytz -import git_logger TITLE_LEN = 80 MIN_SIDE_PADDING = 4 SIDE_WHITE_SPACES = 1 - class logger: + #TODO: отключение вывода в stdout @staticmethod def log_title(title: str, title_len: int = TITLE_LEN): final_len = max( @@ -49,6 +48,7 @@ def log_error(error: str): def log_warning(warning: str): pass +TIMEZONE = 'Europe/Moscow' def parse_time(datetime_str) -> datetime: start = ( @@ -65,4 +65,4 @@ def parse_time(datetime_str) -> datetime: minute=start[4], second=start[5], ) - return start_datetime.astimezone(pytz.timezone(git_logger.TIMEZONE)) + return start_datetime.astimezone(pytz.timezone(TIMEZONE)) From cc27ae851b04efcfe2556c84a7a869ece39f005e Mon Sep 17 00:00:00 2001 From: PeeachPie Date: Sat, 29 Mar 2025 15:54:26 +0300 Subject: [PATCH 03/11] unit test for token usage --- test_token_usage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test_token_usage.py b/test_token_usage.py index fefdfc69..329adee0 100644 --- a/test_token_usage.py +++ b/test_token_usage.py @@ -79,7 +79,6 @@ def _get_usage(self, binded_repos, clients): def test_commits_parser(self): self.args.commits = True - self.assertTrue(False) for i in range(2): clients = git_logger.Clients("github", self._change_tokens_order(self.tokens, i)) binded_repos = git_logger.get_next_binded_repo(clients, [self.repo]) From 45e6c5a8b20e84f4f3bc79b1b46f87ff74eda840 Mon Sep 17 00:00:00 2001 From: PeeachPie Date: Sat, 29 Mar 2025 16:49:45 +0300 Subject: [PATCH 04/11] unit test for token usage --- ForgejoRepoAPI.py | 17 ++++++++++++++--- GitHubRepoAPI.py | 17 ++++++++++++++--- contributors_parser.py | 1 - git_logger.py | 2 ++ main.py | 4 ++++ pull_requests_parser.py | 1 + test_token_usage.py | 32 +++++++++++++++++++++++--------- utils.py | 4 +++- wikipars.py | 4 ++-- 9 files changed, 63 insertions(+), 19 deletions(-) diff --git a/ForgejoRepoAPI.py b/ForgejoRepoAPI.py index dc05da6e..3a386d60 100644 --- a/ForgejoRepoAPI.py +++ b/ForgejoRepoAPI.py @@ -4,9 +4,20 @@ import isodate from pyforgejo import PyforgejoApi -from interface_wrapper import (Branch, Comment, Commit, Contributor, Invite, - IRepositoryAPI, Issue, PullRequest, Repository, - User, WikiPage, logging) +from interface_wrapper import ( + Branch, + Comment, + Commit, + Contributor, + Invite, + IRepositoryAPI, + Issue, + PullRequest, + Repository, + User, + WikiPage, + logging, +) class ForgejoRepoAPI(IRepositoryAPI): diff --git a/GitHubRepoAPI.py b/GitHubRepoAPI.py index 3b6aa2ce..a74a5efa 100644 --- a/GitHubRepoAPI.py +++ b/GitHubRepoAPI.py @@ -1,6 +1,17 @@ -from interface_wrapper import (Branch, Comment, Commit, Contributor, Invite, - IRepositoryAPI, Issue, PullRequest, Repository, - User, WikiPage, logging) +from interface_wrapper import ( + Branch, + Comment, + Commit, + Contributor, + Invite, + IRepositoryAPI, + Issue, + PullRequest, + Repository, + User, + WikiPage, + logging, +) from github import Github, GithubException diff --git a/contributors_parser.py b/contributors_parser.py index 2c2b3218..ce28e302 100644 --- a/contributors_parser.py +++ b/contributors_parser.py @@ -1,7 +1,6 @@ from dataclasses import asdict, dataclass from time import sleep from typing import Generator -from datetime import datetime from constants import EMPTY_FIELD, TIMEDELTA from interface_wrapper import IRepositoryAPI, Repository diff --git a/git_logger.py b/git_logger.py index 91f4b8bb..8e2066b5 100644 --- a/git_logger.py +++ b/git_logger.py @@ -3,6 +3,7 @@ from constants import TIMEDELTA from interface_wrapper import IRepositoryAPI, RepositoryFactory + def login(source, token, base_url): client = RepositoryFactory.create_api(source, token, base_url) return client @@ -21,6 +22,7 @@ def get_repos_from_file(repos_path: str) -> list[str]: return list_repos + class Clients: def __init__(self, source: str, tokens: list[str], base_url: str | None = None): # Возможно это можно переписать покрасивее diff --git a/main.py b/main.py index df87e860..64d144ae 100644 --- a/main.py +++ b/main.py @@ -12,6 +12,7 @@ from utils import parse_time + def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--invites", help="print pending invites", action="store_true") @@ -115,6 +116,7 @@ def parse_args(): action.required = True return parser.parse_args() + def run(args, binded_repos, repos_for_wiki=None): start = parse_time(args.start.split('-')) finish = parse_time(args.finish.split('-')) @@ -150,6 +152,7 @@ def run(args, binded_repos, repos_for_wiki=None): args.out, args.google_token, args.table_id, args.sheet_id ) + def main(): args = parse_args() @@ -171,5 +174,6 @@ def main(): else: run(args, binded_repos) + if __name__ == '__main__': main() diff --git a/pull_requests_parser.py b/pull_requests_parser.py index e6eafaeb..3866d286 100644 --- a/pull_requests_parser.py +++ b/pull_requests_parser.py @@ -12,6 +12,7 @@ from interface_wrapper import IRepositoryAPI, Repository from utils import logger + @dataclass(kw_only=True, frozen=True) class PullRequestData: repository_name: str = '' diff --git a/test_token_usage.py b/test_token_usage.py index 329adee0..28207207 100644 --- a/test_token_usage.py +++ b/test_token_usage.py @@ -6,6 +6,7 @@ import git_logger + def parse_args(args): parser = argparse.ArgumentParser() parser.add_argument('--tt1', type=str, required=True, help='first test token') @@ -23,6 +24,7 @@ def parse_args(args): return parser.parse_args(args) + class TestTokenUsage(unittest.TestCase): def setUp(self): test_args = parse_args(sys.argv[1:]) @@ -53,16 +55,17 @@ def _get_rate_limit(clients: git_logger.Clients): @staticmethod def _is_only_one_token_used(limit_start, limit_finish): - return (bool(limit_start[0] - limit_finish[0]) - != bool(limit_start[1] - limit_finish[1])) - + return bool(limit_start[0] - limit_finish[0]) != bool( + limit_start[1] - limit_finish[1] + ) + @staticmethod def _is_max_token_used(limit_start, limit_finish): if limit_start[0] - limit_finish[0]: return limit_start[0] == max(limit_start) else: return limit_start[1] == max(limit_start) - + @staticmethod def _change_tokens_order(tokens, key): key %= len(tokens) @@ -80,7 +83,9 @@ def _get_usage(self, binded_repos, clients): def test_commits_parser(self): self.args.commits = True for i in range(2): - clients = git_logger.Clients("github", self._change_tokens_order(self.tokens, i)) + clients = git_logger.Clients( + "github", self._change_tokens_order(self.tokens, i) + ) binded_repos = git_logger.get_next_binded_repo(clients, [self.repo]) limit_start, limit_finish = self._get_usage(binded_repos, clients) @@ -91,7 +96,9 @@ def test_commits_parser(self): def test_contributors_parser(self): self.args.contributors = True for i in range(2): - clients = git_logger.Clients("github", self._change_tokens_order(self.tokens, i)) + clients = git_logger.Clients( + "github", self._change_tokens_order(self.tokens, i) + ) binded_repos = git_logger.get_next_binded_repo(clients, [self.repo]) limit_start, limit_finish = self._get_usage(binded_repos, clients) @@ -102,7 +109,9 @@ def test_contributors_parser(self): def test_issues_parser(self): self.args.issues = True for i in range(2): - clients = git_logger.Clients("github", self._change_tokens_order(self.tokens, i)) + clients = git_logger.Clients( + "github", self._change_tokens_order(self.tokens, i) + ) binded_repos = git_logger.get_next_binded_repo(clients, [self.repo]) limit_start, limit_finish = self._get_usage(binded_repos, clients) @@ -113,7 +122,9 @@ def test_issues_parser(self): def test_invites_parser(self): self.args.invites = True for i in range(2): - clients = git_logger.Clients("github", self._change_tokens_order(self.tokens, i)) + clients = git_logger.Clients( + "github", self._change_tokens_order(self.tokens, i) + ) binded_repos = git_logger.get_next_binded_repo(clients, [self.repo]) limit_start, limit_finish = self._get_usage(binded_repos, clients) @@ -124,7 +135,9 @@ def test_invites_parser(self): def test_pull_requests_parser(self): self.args.pull_requests = True for i in range(2): - clients = git_logger.Clients("github", self._change_tokens_order(self.tokens, i)) + clients = git_logger.Clients( + "github", self._change_tokens_order(self.tokens, i) + ) binded_repos = git_logger.get_next_binded_repo(clients, [self.repo]) limit_start, limit_finish = self._get_usage(binded_repos, clients) @@ -132,5 +145,6 @@ def test_pull_requests_parser(self): self.assertTrue(self._is_only_one_token_used(limit_start, limit_finish)) self.assertTrue(self._is_max_token_used(limit_start, limit_finish)) + if __name__ == '__main__': unittest.main(argv=[sys.argv[0]]) diff --git a/utils.py b/utils.py index 40377d6d..9088b574 100644 --- a/utils.py +++ b/utils.py @@ -4,8 +4,9 @@ from constants import MIN_SIDE_PADDING, SIDE_WHITE_SPACES, TITLE_LEN, TIMEZONE + class logger: - #TODO: отключение вывода в stdout + # TODO: отключение вывода в stdout @staticmethod def log_title(title: str, title_len: int = TITLE_LEN): final_len = max( @@ -46,6 +47,7 @@ def log_error(error: str): def log_warning(warning: str): pass + def parse_time(datetime_str) -> datetime: start = ( datetime_str[0].split('/') + datetime_str[1].split(':') diff --git a/wikipars.py b/wikipars.py index ade37dee..03e31ace 100644 --- a/wikipars.py +++ b/wikipars.py @@ -1,4 +1,3 @@ -import csv import os import time @@ -8,6 +7,7 @@ from utils import logger + def wikiparser(repositories: list[str], path_drepo: str, csv_name: str): logger.log_to_csv(csv_name, WIKI_FIELDNAMES) @@ -87,7 +87,7 @@ def wikiparser(repositories: list[str], path_drepo: str, csv_name: str): for fieldname in data_commit: print(fieldname, data_commit[fieldname], sep=': ') - + logger.log_sep() logger.log_to_csv(csv_name, data_commit) From 881d78fbadfa6eed1b8b89c0ed94c74e344bd6dc Mon Sep 17 00:00:00 2001 From: PeeachPie Date: Sat, 29 Mar 2025 16:52:42 +0300 Subject: [PATCH 05/11] unit test for token usage --- .github/workflows/tests.yml | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 45cb8215..03552c7d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -52,22 +52,22 @@ jobs: - name: Show out.csv run: cat out.csv -token-usage-unit_test: + token-usage-unit_test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 - - name: Cache pip - uses: actions/cache@v3 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pip- + - name: Cache pip + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- - - name: Install dependencies - run: pip install -r requirements.txt + - name: Install dependencies + run: pip install -r requirements.txt - - name: Run test - run: python3 test_token_usage.py --tt1 ${{ secrets.TEST_TOKEN_GITHUB }} -tt2 ${{ secrets.SECOND_TEST_TOKEN_GITHUB }} --repo moevm/github_repo_commitment_calc --out out.csv \ No newline at end of file + - name: Run test + run: python3 test_token_usage.py --tt1 ${{ secrets.TEST_TOKEN_GITHUB }} -tt2 ${{ secrets.SECOND_TEST_TOKEN_GITHUB }} --repo moevm/github_repo_commitment_calc --out out.csv \ No newline at end of file From 1c858aeccbe8ba6bcd99c44a503cfab5c36e5160 Mon Sep 17 00:00:00 2001 From: PeeachPie Date: Sat, 29 Mar 2025 16:57:11 +0300 Subject: [PATCH 06/11] unit test for token usage --- commits_parser.py | 2 +- contributors_parser.py | 2 +- invites_parser.py | 2 +- issues_parser.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/commits_parser.py b/commits_parser.py index 29f28cdf..e4e248f3 100644 --- a/commits_parser.py +++ b/commits_parser.py @@ -64,7 +64,7 @@ def log_repository_commits( def log_commits( - binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None], + binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None, None], csv_name: str, start: datetime, finish: datetime, diff --git a/contributors_parser.py b/contributors_parser.py index ce28e302..53509ec7 100644 --- a/contributors_parser.py +++ b/contributors_parser.py @@ -80,7 +80,7 @@ def get_contributors_stats(client: IRepositoryAPI, repository: Repository) -> di def log_contributors( - binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None], + binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None, None], csv_name: str, fork_flag: bool, ): diff --git a/invites_parser.py b/invites_parser.py index c49e58bd..33848d8f 100644 --- a/invites_parser.py +++ b/invites_parser.py @@ -33,7 +33,7 @@ def log_repository_invitations( def log_invitations( - binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None], + binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None, None], csv_name: str, ): info = asdict(InviteData()) diff --git a/issues_parser.py b/issues_parser.py index 0fe1a3ec..c10cea59 100644 --- a/issues_parser.py +++ b/issues_parser.py @@ -189,7 +189,7 @@ def log_issue_and_comments(csv_name, issue_data: IssueData, comments): def log_issues( - binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None], + binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None, None], csv_name: str, start: datetime, finish: datetime, From 7ced6cefb4413223f5ea838f3e436d8b8a2dd65f Mon Sep 17 00:00:00 2001 From: PeeachPie Date: Sat, 29 Mar 2025 16:58:53 +0300 Subject: [PATCH 07/11] unit test for token usage --- .github/workflows/tests.yml | 2 +- pull_requests_parser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 03552c7d..73c2b3d7 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -70,4 +70,4 @@ jobs: run: pip install -r requirements.txt - name: Run test - run: python3 test_token_usage.py --tt1 ${{ secrets.TEST_TOKEN_GITHUB }} -tt2 ${{ secrets.SECOND_TEST_TOKEN_GITHUB }} --repo moevm/github_repo_commitment_calc --out out.csv \ No newline at end of file + run: python3 test_token_usage.py --tt1 ${{ secrets.TEST_TOKEN_GITHUB }} -tt2 ${{ secrets.SECOND_TEST_TOKEN_GITHUB }} --repo moevm/github_repo_commitment_calc --out out.csv diff --git a/pull_requests_parser.py b/pull_requests_parser.py index 3866d286..fb9f59c6 100644 --- a/pull_requests_parser.py +++ b/pull_requests_parser.py @@ -189,7 +189,7 @@ def get_info(obj, attr): def log_pull_requests( - binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None], + binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None, None], csv_name: str, start: datetime, finish: datetime, From 45337f04b8d15268f446b51ccdbcb3d7b6a2eb2b Mon Sep 17 00:00:00 2001 From: PeeachPie Date: Sat, 29 Mar 2025 17:00:32 +0300 Subject: [PATCH 08/11] unit test for token usage --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index 64d144ae..e040f53c 100644 --- a/main.py +++ b/main.py @@ -146,7 +146,7 @@ def run(args, binded_repos, repos_for_wiki=None): if args.contributors: contributors_parser.log_contributors(binded_repos, args.out, args.forks_include) if args.wikis: - wikipars.wiki_parser(repos_for_wiki, args.download_repos, args.out) + wikipars.wikiparser(repos_for_wiki, args.download_repos, args.out) if args.export_google_sheets: export_sheets.write_data_to_table( args.out, args.google_token, args.table_id, args.sheet_id From 3545aafbafe5939277fb4f5916a70ea26e45285d Mon Sep 17 00:00:00 2001 From: PeeachPie Date: Sat, 29 Mar 2025 17:04:21 +0300 Subject: [PATCH 09/11] unit test for token usage --- main.py | 2 +- wikipars.py | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/main.py b/main.py index e040f53c..dc72a0bf 100644 --- a/main.py +++ b/main.py @@ -172,7 +172,7 @@ def main(): print(e) print(traceback.print_exc()) else: - run(args, binded_repos) + run(args, binded_repos, repositories) if __name__ == '__main__': diff --git a/wikipars.py b/wikipars.py index 03e31ace..fbb86425 100644 --- a/wikipars.py +++ b/wikipars.py @@ -11,13 +11,10 @@ def wikiparser(repositories: list[str], path_drepo: str, csv_name: str): logger.log_to_csv(csv_name, WIKI_FIELDNAMES) - # Создаем список репозиториев из файла - with open(repositories, 'r') as file: - list_repos = [x for x in file.read().split('\n') if x] error_repos = [] data_changes = [] - for name_rep in list_repos: + for name_rep in repositories: # Проверяем, есть ли репозиторий в папке dir_path = path_drepo + "/" + name_rep if os.path.exists(dir_path): From 03a895c3e607e3ab09d99253279ffc5fa2364180 Mon Sep 17 00:00:00 2001 From: PeeachPie Date: Sat, 29 Mar 2025 17:10:44 +0300 Subject: [PATCH 10/11] unit test for token usage --- test_token_usage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_token_usage.py b/test_token_usage.py index 28207207..52f2f913 100644 --- a/test_token_usage.py +++ b/test_token_usage.py @@ -27,8 +27,8 @@ def parse_args(args): class TestTokenUsage(unittest.TestCase): def setUp(self): + print(len(sys.argv[1:])) test_args = parse_args(sys.argv[1:]) - self.tokens = [test_args.tt1, test_args.tt2] self.repo = test_args.repo self.output_csv = test_args.out From 36c3cfe2d29892ef86c82961333ef8f6d65d8232 Mon Sep 17 00:00:00 2001 From: PeeachPie Date: Sat, 29 Mar 2025 17:12:27 +0300 Subject: [PATCH 11/11] unit test for token usage --- .github/workflows/tests.yml | 2 +- test_token_usage.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 73c2b3d7..650a6660 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -70,4 +70,4 @@ jobs: run: pip install -r requirements.txt - name: Run test - run: python3 test_token_usage.py --tt1 ${{ secrets.TEST_TOKEN_GITHUB }} -tt2 ${{ secrets.SECOND_TEST_TOKEN_GITHUB }} --repo moevm/github_repo_commitment_calc --out out.csv + run: python3 test_token_usage.py --tt1 ${{ secrets.TEST_TOKEN_GITHUB }} --tt2 ${{ secrets.SECOND_TEST_TOKEN_GITHUB }} --repo moevm/github_repo_commitment_calc --out out.csv diff --git a/test_token_usage.py b/test_token_usage.py index 52f2f913..ac6d516f 100644 --- a/test_token_usage.py +++ b/test_token_usage.py @@ -27,7 +27,6 @@ def parse_args(args): class TestTokenUsage(unittest.TestCase): def setUp(self): - print(len(sys.argv[1:])) test_args = parse_args(sys.argv[1:]) self.tokens = [test_args.tt1, test_args.tt2] self.repo = test_args.repo