diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f0e87b48..12aa5a05 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,28 +1,44 @@ -name: Tests +name: Smoke Tests on: push: - branches: [ "master" ] - pull_request: - branches: [ "master" ] jobs: - smoke-test: + strategy: + fail-fast: false + matrix: + args: + - "--invites" + - "--commits" + - "--pull_requests" + - "--issues" + - "--wikis" + # - "--contributors" + # - "--workflow_runs" runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Create list.txt - run: echo "OSLL/github_repo_commitment_calc" > list.txt + - name: Cache pip + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- - name: Install dependencies run: pip install -r requirements.txt + - name: Create list.txt + run: echo "thehighestmath/SummerPractice" > list.txt + - name: Run test - run: python3 main.py --commits --token ${{ secrets.TEST_TOKEN_GITHUB }} --list list.txt --out out.csv --branch master + run: | + python3 main.py ${{ matrix.args }} --token ${{ secrets.TEST_TOKEN_GITHUB }} --list list.txt --out out.csv --branch master - name: Check if out.csv exists run: ls out.csv @@ -31,8 +47,46 @@ jobs: if: failure() run: exit 1 - - name: Upload test results - uses: actions/upload-artifact@v4 - with: - name: test-results - path: out.csv + - name: Show out.csv + run: cat out.csv + + - name: Check header in first line + run: | + case "${{ matrix.args }}" in + --invites) + HEADER="repository name,invited login,invite creation date,invitation url" + ;; + --commits) + HEADER="repository name,author name,author login,author email,date and time,changed files,commit id,branch" + ;; + --pull_requests) + HEADER="repository name,title,id,state,commit into,commit from,created at,creator name,creator login,creator email,changed files,comment body,comment created at,comment author name,comment author login,comment author email,merger name,merger login,merger email,source branch,target branch,assignee story,related issues,labels,milestone" + ;; + --issues) + HEADER="repository name,number,title,state,task,created at,creator name,creator login,creator email,closer name,closer login,closer email,closed at,comment body,comment created at,comment author name,comment author login,comment author email,assignee story,connected pull requests,labels,milestone" + ;; + --wikis) + HEADER="repository name,author name,author login,datetime,page,action,revision id,added lines,deleted lines" + ;; + --contributors) + HEADER="repository name,contributor login,number of commits,additions,deletions,branch" + ;; + --workflow_runs) + HEADER="repository name,workflow name,run id,status,conclusion,author login,author email,date and time,url,branch" + ;; + *) + echo "Unknown ARG: '${{ matrix.args }}'" + exit 1 + ;; + esac + + FIRST_LINE=$(head -n 1 out.csv) + + if [[ "$FIRST_LINE" == "$HEADER"* ]]; then + echo "Header is valid for ${{ matrix.args }}" + else + echo "::error::Header is invalid for ${{ matrix.args }}" + echo "Expected: $HEADER" + echo "Actual: $FIRST_LINE" + exit 1 + fi diff --git a/README.md b/README.md index 53fe1413..d2a9e468 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ python3 main.py [-i, --issues] [-t, --token] token (github токен вмест ``` 3. Логирование pull requests ```commandline -python3 main.py [-p, --pull_requests] [-t, --token] token (github токен вместо token) [-l, --list] list (list - строка пути к txt файлу со списком репозиториев) [-o, --out] out (out - название csv файла, в который будут помещены все логи) +python3 main.py [-p, --pull_requests] [-t, --token] token (github токен вместо token) [-l, --list] list (list - строка пути к txt файлу со списком репозиториев) [-o, --out] out (out - название csv файла, в который будут помещены все логи) [--pr_comments] (если установлен - также выгружаются комментарии к PR) ``` 4. Логирование непринятых приглашений в репо ```commandline diff --git a/commits_parser.py b/commits_parser.py index 23f41fb5..e72e23d7 100644 --- a/commits_parser.py +++ b/commits_parser.py @@ -7,6 +7,8 @@ TIMEDELTA = 0.05 TIMEZONE = 'Europe/Moscow' FIELDNAMES = ('repository name', 'author name', 'author login', 'author email', 'date and time', 'changed files', 'commit id', 'branch') +GOOGLE_MAX_CELL_LEN = 50000 + def log_commit_to_csv(info, csv_name): with open(csv_name, 'a', newline='') as file: @@ -39,8 +41,11 @@ def log_repository_commits(repository: Repository, csv_name, start, finish, bran continue if commit.commit is not None: nvl = lambda val: val or EMPTY_FIELD + changed_files = '; '.join([file.filename for file in commit.files]) + if len(changed_files) > GOOGLE_MAX_CELL_LEN: + changed_files = changed_files[:GOOGLE_MAX_CELL_LEN] commit_data = [repository.full_name, commit.commit.author.name, nvl(commit.author.login), nvl(commit.commit.author.email), - commit.commit.author.date, '; '.join([file.filename for file in commit.files]), commit.commit.sha, branch] + commit.commit.author.date, changed_files, commit.commit.sha, branch] info = dict(zip(FIELDNAMES, commit_data)) log_commit_to_csv(info, csv_name) diff --git a/main.py b/main.py index 268dc1cf..3626774f 100644 --- a/main.py +++ b/main.py @@ -23,6 +23,7 @@ def parse_args(): parser.add_argument('-l', '--list', type=str, required=True, help='Path to the file containing the list of repositories. Repositories should be separated by a line break. Names should be in the format / ') parser.add_argument("--download_repos", type=str, help="path to downloaded repositories", default='./') parser.add_argument('-o', '--out', type=str, required=True, help='output filename') + parser.add_argument("--pr_comments", help="log comments for PR", action="store_true") parser.add_argument('-s', '--start', type=str, required=False, help='start time', default='2000/01/01-00:00:00') parser.add_argument('-f', '--finish', type=str, required=False, help='finish time', default='2400/01/01-00:00:00') parser.add_argument('-b', '--branch', type=str, required=False, help='branch to select commits, by default use "default" repository branch, use "all" to get all commits from all branches', default=None) @@ -60,6 +61,8 @@ def main(): csv_name = args.out path_drepo = args.download_repos fork_flag = args.forks_include + log_pr_comments = args.pr_comments + start, finish = None, None try: client = git_logger.login(token=token) @@ -74,7 +77,7 @@ def main(): if args.commits: commits_parser.log_commits(client, working_repos, csv_name, start, finish, args.branch, fork_flag) if args.pull_requests: - pull_requests_parser.log_pull_requests(client, working_repos, csv_name, token, start, finish, fork_flag) + pull_requests_parser.log_pull_requests(client, working_repos, csv_name, token, start, finish, fork_flag, log_pr_comments) if args.issues: issues_parser.log_issues(client, working_repos, csv_name, token, start, finish, fork_flag) if args.invites: diff --git a/pull_requests_parser.py b/pull_requests_parser.py index 5cb85bc2..979d3eb9 100644 --- a/pull_requests_parser.py +++ b/pull_requests_parser.py @@ -71,7 +71,7 @@ def get_related_issues(pull_request_number, repo_owner, repo_name, token): return ';'.join(list_issues_url) -def log_repositories_pr(repository: Repository, csv_name, token, start, finish): +def log_repositories_pr(repository: Repository, csv_name, token, start, finish, log_comments=False): for pull in repository.get_pulls(state='all'): if pull.created_at.astimezone(pytz.timezone(TIMEZONE)) < start or pull.created_at.astimezone( pytz.timezone(TIMEZONE)) > finish: @@ -106,23 +106,25 @@ def log_repositories_pr(repository: Repository, csv_name, token, start, finish): 'milestone': get_info(pull.milestone, 'title') } - if pull.get_comments().totalCount > 0: - for comment in pull.get_comments(): - info = info_tmp - info['comment body'] = comment.body - info['comment created at'] = comment.created_at - info['comment author name'] = comment.user.name - info['comment author login'] = comment.user.login - info['comment author email'] = nvl(comment.user.email) - log_pr_to_csv(info, csv_name) - log_pr_to_stdout(info) + if log_comments: + comments = pull.get_comments() + if comments.totalCount > 0: + for comment in comments: + info = info_tmp + info['comment body'] = comment.body + info['comment created at'] = comment.created_at + info['comment author name'] = comment.user.name + info['comment author login'] = comment.user.login + info['comment author email'] = nvl(comment.user.email) + log_pr_to_csv(info, csv_name) + log_pr_to_stdout(info) else: log_pr_to_csv(info_tmp, csv_name) log_pr_to_stdout(info_tmp) sleep(TIMEDELTA) -def log_pull_requests(client: Github, working_repos, csv_name, token, start, finish, fork_flag): +def log_pull_requests(client: Github, working_repos, csv_name, token, start, finish, fork_flag, log_comments=False): with open(csv_name, 'w', newline='') as file: writer = csv.writer(file) writer.writerow(FIELDNAMES) @@ -134,7 +136,7 @@ def log_pull_requests(client: Github, working_repos, csv_name, token, start, fin if fork_flag: for forked_repo in repo.get_forks(): print('=' * 20, "FORKED:", forked_repo.full_name, '=' * 20) - log_repositories_pr(forked_repo, csv_name, token, start, finish) + log_repositories_pr(forked_repo, csv_name, token, start, finish, log_comments) sleep(TIMEDELTA) sleep(TIMEDELTA) except Exception as e: diff --git a/utilities/upload_commit_stats/Dockerfile b/utilities/upload_commit_stats/Dockerfile new file mode 100644 index 00000000..3dc430f7 --- /dev/null +++ b/utilities/upload_commit_stats/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.10-slim-bullseye + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY upload_commit_stats.py . + +CMD ["python", "./script.py"] \ No newline at end of file diff --git a/utilities/upload_commit_stats/README.md b/utilities/upload_commit_stats/README.md new file mode 100644 index 00000000..87ef6f79 --- /dev/null +++ b/utilities/upload_commit_stats/README.md @@ -0,0 +1,20 @@ +## Docker build +``` +docker build -t upload_commit_stats . +``` + +## Docker run +``` +REPO_PATH="" +GOOGLE_SECRET="" +TABLE_ID="" + +docker run \ + -v $REPO_PATH:/repos \ + -v $GOOGLE_SECRET:/secret.json \ + upload_commit_stats \ + python3 upload_commit_stats.py \ + --data-dir /repos \ + --table-id $TABLE_ID \ + --oauth-file /secret.json +``` diff --git a/utilities/upload_commit_stats/requirements.txt b/utilities/upload_commit_stats/requirements.txt new file mode 100644 index 00000000..2f32ca0a --- /dev/null +++ b/utilities/upload_commit_stats/requirements.txt @@ -0,0 +1,3 @@ +numpy==1.23.5 +pandas==1.5.3 +pygsheets==2.0.6 \ No newline at end of file diff --git a/utilities/upload_commit_stats/upload_commit_stats.py b/utilities/upload_commit_stats/upload_commit_stats.py new file mode 100644 index 00000000..43878ac6 --- /dev/null +++ b/utilities/upload_commit_stats/upload_commit_stats.py @@ -0,0 +1,63 @@ +import argparse +import os +import json +from pathlib import Path +import pandas as pd +import pygsheets + + +def parse_arguments(): + parser = argparse.ArgumentParser(description="Скрипт для загрузки данных из JSON файлов в Google Таблицу") + parser.add_argument('--data-dir', type=str, required=True, help="Директория с репозиториями") + parser.add_argument('--table-id', type=str, required=True, help="ID таблицы Google Sheets") + parser.add_argument('--oauth-file', type=str, required=True, help="Путь к файлу client_secret.json") + return parser.parse_args() + + +def authorize_google_sheets(oauth_file): + return pygsheets.authorize(service_file=oauth_file) + + +def open_spreadsheet(gc, table_id): + return gc.open_by_key(table_id) + + +def read_and_normalize_json_file(json_path): + with open(json_path, 'r') as f: + data = [json.loads(line) for line in f] + return pd.json_normalize(data) + + +def update_sheet(spreadsheet, worksheet_name, dataframe): + try: + spreadsheet.worksheets('title', worksheet_name) + except: + spreadsheet.add_worksheet(worksheet_name) + + wks = spreadsheet.worksheet_by_title(worksheet_name) + wks.clear() + wks.set_dataframe(dataframe, start=(1, 1), copy_index=False, copy_head=True, fit=True) + + +def process_repositories(data_dir, spreadsheet): + for repo_dir in os.listdir(data_dir): + repo_path = Path(data_dir) / repo_dir + json_file_path = repo_path / 'commits.json' + if json_file_path.exists(): + print(f"Parse commits from {repo_dir}") + df = read_and_normalize_json_file(json_file_path) + df = df[sorted(df.columns.to_list())] + worksheet_name = repo_dir + update_sheet(spreadsheet, worksheet_name, df) + + +def main(): + args = parse_arguments() + gc = authorize_google_sheets(args.oauth_file) + spreadsheet = open_spreadsheet(gc, args.table_id) + process_repositories(args.data_dir, spreadsheet) + print("Finished!") + + +if __name__ == "__main__": + main() \ No newline at end of file