From 2a92a8d4eeca42d5477f0c366464ec691357abda Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Fri, 27 Mar 2026 13:41:38 -0400 Subject: [PATCH 1/3] Add 'PRs in Release' table to report --- .../ci_run_report.html.jinja | 9 + .../create_workflow_report.py | 311 +++++++++++++++--- 2 files changed, 279 insertions(+), 41 deletions(-) diff --git a/.github/actions/create_workflow_report/ci_run_report.html.jinja b/.github/actions/create_workflow_report/ci_run_report.html.jinja index 4c94465a16c6..a6e0df17c49f 100644 --- a/.github/actions/create_workflow_report/ci_run_report.html.jinja +++ b/.github/actions/create_workflow_report/ci_run_report.html.jinja @@ -168,6 +168,7 @@ {% endif %}

Table of Contents

+ {%- if pr_number == 0 -%} +

PRs in Release

+ {% if prs_in_release_missing_verification %} +

Some PRs are missing verification.

+ {% endif %} + {{ prs_in_release_html }} + {%- endif %} + {%- if pr_number != 0 -%}

New Fails in PR

Compared with base sha {{ base_sha }}

diff --git a/.github/actions/create_workflow_report/create_workflow_report.py b/.github/actions/create_workflow_report/create_workflow_report.py index adb682be2dcd..dda22bbcf4dd 100755 --- a/.github/actions/create_workflow_report/create_workflow_report.py +++ b/.github/actions/create_workflow_report/create_workflow_report.py @@ -6,10 +6,12 @@ from itertools import combinations import json from datetime import datetime +from datetime import timezone from functools import lru_cache from glob import glob import urllib.parse import re +import subprocess import pandas as pd from jinja2 import Environment, FileSystemLoader @@ -169,6 +171,196 @@ def get_run_details(run_id: str) -> dict: return response.json() +def _enrich_prs_in_release_merge_prs(df: pd.DataFrame, repo: str) -> tuple[pd.DataFrame, bool]: + if len(df) == 0: + return pd.DataFrame(columns=["pr_number", "pr_name", "labels"]), False + if not GITHUB_TOKEN: + raise Exception("GITHUB_TOKEN is required to fetch PR titles and labels") + headers = { + "Authorization": f"token {GITHUB_TOKEN}", + "Accept": "application/vnd.github.v3+json", + } + rows = [] + missing_verification = False + for pr_number in df["pr_number"].tolist(): + response = requests.get( + f"https://api.github.com/repos/{repo}/pulls/{pr_number}", + headers=headers, + ) + if response.status_code != 200: + raise Exception( + f"Failed to fetch pull request info: {response.status_code} {response.text}" + ) + pr = response.json() + label_names = [l["name"] for l in pr.get("labels", [])] + if any(ln.lower() == "cicd" for ln in label_names): + continue + lowered = {ln.lower() for ln in label_names} + if "verified" not in lowered and "verified-with-issue" not in lowered: + missing_verification = True + rows.append( + { + "pr_number": pr_number, + "pr_name": pr.get("title", ""), + "labels": ", ".join(sorted(label_names)), + } + ) + return pd.DataFrame(rows), missing_verification + + +def _git_rev_parse(ref: str, cwd: str | None) -> str | None: + p = subprocess.run( + ["git", "rev-parse", "--verify", ref], + cwd=cwd, + capture_output=True, + text=True, + ) + if p.returncode != 0: + return None + return p.stdout.strip() + + +def _git_is_ancestor(ancestor: str, descendant: str, cwd: str | None) -> bool: + p = subprocess.run( + ["git", "merge-base", "--is-ancestor", ancestor, descendant], + cwd=cwd, + capture_output=True, + ) + return p.returncode == 0 + + +def _git_log_merge_prs( + baseline: str, branch_ref: str, cwd: str | None, repo: str +) -> pd.DataFrame: + p = subprocess.run( + [ + "git", + "-c", + "core.quotepath=false", + "log", + f"{baseline}..{branch_ref}", + "--merges", + "--format=%H%x09%s", + ], + cwd=cwd, + capture_output=True, + text=True, + check=True, + ) + rows = [] + for line in p.stdout.splitlines(): + if not line.strip(): + continue + sha, subject = line.split("\t", 1) + m = re.match( + r"Merge pull request #(\d+) from ([^/\s]+)/", subject, re.IGNORECASE + ) + if not m: + continue + pr_number, head_owner = int(m.group(1)), m.group(2) + if head_owner.lower() != repo.split("/")[0].lower(): + continue + rows.append( + { + "pr_number": pr_number, + "merge_commit_sha": sha, + "merge_subject": subject, + } + ) + if not rows: + return pd.DataFrame(columns=["pr_number", "merge_commit_sha", "merge_subject"]) + df = pd.DataFrame(rows) + df = df.drop_duplicates(subset=["pr_number"], keep="first") + return df + + +def _find_release_baseline( + branch_ref: str, repo: str, cwd: str | None +) -> tuple[str | None, str | None]: + if not GITHUB_TOKEN: + return None, None + headers = { + "Authorization": f"token {GITHUB_TOKEN}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get( + f"https://api.github.com/repos/{repo}/releases?per_page=100", + headers=headers, + ) + if response.status_code != 200: + raise Exception( + f"GitHub API request failed: {response.status_code} {response.text}" + ) + for rel in response.json(): + if rel.get("draft"): + continue + tag_name = rel.get("tag_name") + if not tag_name: + continue + tag_sha = _git_rev_parse(tag_name, cwd) + if not tag_sha: + continue + if not _git_is_ancestor(tag_sha, branch_ref, cwd): + continue + return tag_name, tag_sha + return None, None + + +def _find_rebase_baseline(branch_ref: str, cwd: str | None) -> str | None: + p = subprocess.run( + [ + "git", + "log", + branch_ref, + "--reverse", + "--merges", + "-i", + "--grep=rebase-cicd", + "--grep=rebase/", + "--format=%H", + ], + cwd=cwd, + capture_output=True, + text=True, + ) + if p.returncode != 0: + return None + lines = [ln for ln in p.stdout.splitlines() if ln.strip()] + if not lines: + return None + return lines[0] + + +def get_prs_in_release_dataframe( + branch_ref: str = "HEAD", + *, + repo: str = GITHUB_REPO, + cwd: str | None = None, +) -> tuple[pd.DataFrame, bool]: + """ + PRs merged into branch_ref that belong in the next release notes: after the latest GitHub + Release tag on this history, or after the oldest rebase bootstrap if no such tag exists. + Only merge commits whose subject has from / (e.g. from Altinity/) are included. + Columns: pr_number, pr_name, labels. Omits PRs labeled cicd. + Second value is True if any listed PR lacks verified or verified-with-issue. + """ + branch_sha = _git_rev_parse(branch_ref, cwd) + if not branch_sha: + raise Exception(f"Cannot resolve branch ref: {branch_ref!r}") + + baseline_ref, baseline_sha = _find_release_baseline(branch_ref, repo, cwd) + if not baseline_sha: + rebase_sha = _find_rebase_baseline(branch_ref, cwd) + if not rebase_sha: + raise Exception( + "No release tag on this branch and no rebase bootstrap merge found" + ) + baseline_sha = rebase_sha + + df = _git_log_merge_prs(baseline_sha, branch_ref, cwd, repo) + return _enrich_prs_in_release_merge_prs(df, repo) + + def _checks_latest_test_status_cte(commit_sha: str, branch_name: str) -> str: """ Shared filtering for gh-data.checks: anchor time excludes stateless teardown checks @@ -622,25 +814,40 @@ def format_test_status(text: str) -> str: def format_results_as_html_table(results) -> str: + if not isinstance(results, pd.DataFrame): + return results + if len(results) == 0: return "

Nothing to report

" - results.columns = [col.replace("_", " ").title() for col in results.columns] + + results = results.copy() + + def format_col_name(col_name: str) -> str: + return col_name.replace("_", " ").title().replace("Pr ", "PR ") + + results.columns = [format_col_name(col) for col in results.columns] + + formatters = { + "Results Link": url_to_html_link, + "Test Name": format_test_name_for_linewrap, + "Test Status": format_test_status, + "Job Status": format_test_status, + "Status": format_test_status, + "Message": lambda m: m.replace("\n", " "), + "Identifier": lambda i: url_to_html_link( + "https://nvd.nist.gov/vuln/detail/" + i + ), + "Severity": lambda s: ( + f'{s}' + ), + "PR Number": lambda n: url_to_html_link( + f"https://github.com/{GITHUB_REPO}/pull/{n}" + ), + } + html = results.to_html( index=False, - formatters={ - "Results Link": url_to_html_link, - "Test Name": format_test_name_for_linewrap, - "Test Status": format_test_status, - "Job Status": format_test_status, - "Status": format_test_status, - "Message": lambda m: m.replace("\n", " "), - "Identifier": lambda i: url_to_html_link( - "https://nvd.nist.gov/vuln/detail/" + i - ), - "Severity": lambda s: ( - f'{s}' - ), - }, + formatters=formatters, escape=False, border=0, classes=["test-results-table"], @@ -816,7 +1023,9 @@ def create_workflow_report( settings={"use_numpy": True}, ) - fail_results = { + prs_in_release_missing_verification = False + results_dfs = { + "prs_in_release": [], "job_statuses": get_commit_statuses(commit_sha), "checks_fails": get_checks_fails(db_client, commit_sha, branch_name), "checks_known_fails": [], @@ -826,8 +1035,17 @@ def create_workflow_report( "docker_images_cves": [], } + if pr_number == 0 and not mark_preview: + try: + prs_df, prs_in_release_missing_verification = get_prs_in_release_dataframe( + branch_name, cwd=os.getcwd() + ) + results_dfs["prs_in_release"] = prs_df + except Exception as e: + print(f"Error in get_prs_in_release_dataframe: {e}") + try: - fail_results["docker_images_cves"] = ( + results_dfs["docker_images_cves"] = ( [] if not check_cves else get_cves(pr_number, commit_sha, branch_name) ) except Exception as e: @@ -835,7 +1053,7 @@ def create_workflow_report( # get_cves returns ... in the case where no Grype result files were found. # This might occur when run in preview mode. - cves_not_checked = not check_cves or fail_results["docker_images_cves"] is ... + cves_not_checked = not check_cves or results_dfs["docker_images_cves"] is ... if known_fails_file_path: if not os.path.exists(known_fails_file_path): @@ -843,7 +1061,7 @@ def create_workflow_report( else: known_fails = get_broken_tests_rules(known_fails_file_path) - fail_results["checks_known_fails"] = get_checks_known_fails( + results_dfs["checks_known_fails"] = get_checks_known_fails( db_client, commit_sha, branch_name, known_fails ) @@ -855,24 +1073,24 @@ def create_workflow_report( pr_info_html = f""" #{pr_info.get("number")} ({pr_info.get("base", {}).get('ref')} <- {pr_info.get("head", {}).get('ref')}) {pr_info.get("title")} """ - fail_results["pr_new_fails"] = get_new_fails_this_pr( + results_dfs["pr_new_fails"] = get_new_fails_this_pr( db_client, pr_info, - fail_results["checks_fails"], - fail_results["regression_fails"], + results_dfs["checks_fails"], + results_dfs["regression_fails"], ) except Exception as e: pr_info_html = e pr_info = {} - fail_results["job_statuses"] = backfill_skipped_statuses( - fail_results["job_statuses"], pr_number, branch_name, commit_sha + results_dfs["job_statuses"] = backfill_skipped_statuses( + results_dfs["job_statuses"], pr_number, branch_name, commit_sha ) high_cve_count = 0 - if not cves_not_checked and len(fail_results["docker_images_cves"]) > 0: + if not cves_not_checked and len(results_dfs["docker_images_cves"]) > 0: high_cve_count = ( - fail_results["docker_images_cves"]["severity"] + results_dfs["docker_images_cves"]["severity"] .str.lower() .isin(("high", "critical")) .sum() @@ -893,43 +1111,54 @@ def create_workflow_report( "workflow_id": run_id, "commit_sha": commit_sha, "base_sha": "" if pr_number == 0 else pr_info.get("base", {}).get("sha"), - "date": f"{datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC", + "date": f"{datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC", "is_preview": mark_preview, + "prs_in_release_missing_verification": prs_in_release_missing_verification, "counts": { - "jobs_status": f"{sum(fail_results['job_statuses']['job_status'].value_counts().get(x, 0) for x in ('failure', 'error'))} fail/error", - "checks_errors": len(fail_results["checks_errors"]), - "checks_new_fails": len(fail_results["checks_fails"]), - "regression_new_fails": len(fail_results["regression_fails"]), + "jobs_status": f"{sum(results_dfs['job_statuses']['job_status'].value_counts().get(x, 0) for x in ('failure', 'error'))} fail/error", + "checks_errors": len(results_dfs["checks_errors"]), + "checks_new_fails": len(results_dfs["checks_fails"]), + "regression_new_fails": len(results_dfs["regression_fails"]), "cves": "N/A" if cves_not_checked else f"{high_cve_count} high/critical", "checks_known_fails": ( - "N/A" if not known_fails else len(fail_results["checks_known_fails"]) + "N/A" if not known_fails else len(results_dfs["checks_known_fails"]) + ), + "pr_new_fails": len(results_dfs["pr_new_fails"]), + "prs_in_release": ( + "N/A" + if mark_preview or pr_number != 0 + else len(results_dfs["prs_in_release"]) ), - "pr_new_fails": len(fail_results["pr_new_fails"]), }, "build_report_links": get_build_report_links( - fail_results["job_statuses"], pr_number, branch_name, commit_sha + results_dfs["job_statuses"], pr_number, branch_name, commit_sha + ), + "prs_in_release_html": ( + "

PR details are not loaded during preview.

" + if mark_preview or pr_number != 0 + else format_results_as_html_table(results_dfs["prs_in_release"]) ), "ci_jobs_status_html": format_results_as_html_table( - fail_results["job_statuses"] + results_dfs["job_statuses"] ), "checks_errors_html": format_results_as_html_table( - fail_results["checks_errors"] + results_dfs["checks_errors"] ), - "checks_fails_html": format_results_as_html_table(fail_results["checks_fails"]), + "checks_fails_html": format_results_as_html_table(results_dfs["checks_fails"]), "regression_fails_html": format_results_as_html_table( - fail_results["regression_fails"] + results_dfs["regression_fails"] ), "docker_images_cves_html": ( "

Not Checked

" if cves_not_checked - else format_results_as_html_table(fail_results["docker_images_cves"]) + else format_results_as_html_table(results_dfs["docker_images_cves"]) ), "checks_known_fails_html": ( "

Not Checked

" if not known_fails - else format_results_as_html_table(fail_results["checks_known_fails"]) + else format_results_as_html_table(results_dfs["checks_known_fails"]) ), - "new_fails_html": format_results_as_html_table(fail_results["pr_new_fails"]), + "new_fails_html": format_results_as_html_table(results_dfs["pr_new_fails"]), } # Render the template with the context From a5d87f90ab8d0d063e2657e99582568c972ebd44 Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Fri, 27 Mar 2026 16:37:14 -0400 Subject: [PATCH 2/3] highlight labels for unverified PRs --- .../create_workflow_report.py | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/.github/actions/create_workflow_report/create_workflow_report.py b/.github/actions/create_workflow_report/create_workflow_report.py index dda22bbcf4dd..f5fee14e6159 100755 --- a/.github/actions/create_workflow_report/create_workflow_report.py +++ b/.github/actions/create_workflow_report/create_workflow_report.py @@ -32,6 +32,8 @@ CVE_SEVERITY_ORDER = {"critical": 1, "high": 2, "medium": 3, "low": 4, "negligible": 5} +PR_LABELS_VERIFIED = {"verified", "verified-with-issue", "no-verification-needed"} + def _is_clickhouse_memory_limit_error(exc: BaseException) -> bool: if isinstance(exc, ServerException) and getattr(exc, "code", None) == 241: return True @@ -173,7 +175,7 @@ def get_run_details(run_id: str) -> dict: def _enrich_prs_in_release_merge_prs(df: pd.DataFrame, repo: str) -> tuple[pd.DataFrame, bool]: if len(df) == 0: - return pd.DataFrame(columns=["pr_number", "pr_name", "labels"]), False + return pd.DataFrame(columns=["pr_number", "pr_name", "pr_labels"]), False if not GITHUB_TOKEN: raise Exception("GITHUB_TOKEN is required to fetch PR titles and labels") headers = { @@ -193,16 +195,15 @@ def _enrich_prs_in_release_merge_prs(df: pd.DataFrame, repo: str) -> tuple[pd.Da ) pr = response.json() label_names = [l["name"] for l in pr.get("labels", [])] - if any(ln.lower() == "cicd" for ln in label_names): + if "cicd" in label_names: continue - lowered = {ln.lower() for ln in label_names} - if "verified" not in lowered and "verified-with-issue" not in lowered: + if not PR_LABELS_VERIFIED.intersection(label_names): missing_verification = True rows.append( { "pr_number": pr_number, "pr_name": pr.get("title", ""), - "labels": ", ".join(sorted(label_names)), + "pr_labels": ", ".join(sorted(label_names)), } ) return pd.DataFrame(rows), missing_verification @@ -337,12 +338,12 @@ def get_prs_in_release_dataframe( repo: str = GITHUB_REPO, cwd: str | None = None, ) -> tuple[pd.DataFrame, bool]: - """ + f""" PRs merged into branch_ref that belong in the next release notes: after the latest GitHub Release tag on this history, or after the oldest rebase bootstrap if no such tag exists. Only merge commits whose subject has from / (e.g. from Altinity/) are included. Columns: pr_number, pr_name, labels. Omits PRs labeled cicd. - Second value is True if any listed PR lacks verified or verified-with-issue. + Second value is True if any listed PR lacks verified labels. """ branch_sha = _git_rev_parse(branch_ref, cwd) if not branch_sha: @@ -794,6 +795,15 @@ def url_to_html_link(url: str) -> str: return f'{text}' +def format_pr_labels_with_verification(labels: str) -> str: + """Format the PR labels with verification.""" + labels_list = labels.split(", ") + if PR_LABELS_VERIFIED.intersection(labels_list): + return labels + else: + return f'{labels} (missing verification)' + + def format_test_name_for_linewrap(text: str) -> str: """Tweak the test name to improve line wrapping.""" return f'{text}' @@ -843,6 +853,7 @@ def format_col_name(col_name: str) -> str: "PR Number": lambda n: url_to_html_link( f"https://github.com/{GITHUB_REPO}/pull/{n}" ), + "PR Labels": format_pr_labels_with_verification, } html = results.to_html( From 7cab525dbab8dc03d3c7d03e547d0ee7a4a9e498 Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Tue, 31 Mar 2026 11:43:21 -0400 Subject: [PATCH 3/3] report: fix _find_rebase_baseline --- .../create_workflow_report/create_workflow_report.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/actions/create_workflow_report/create_workflow_report.py b/.github/actions/create_workflow_report/create_workflow_report.py index f5fee14e6159..5294791ac5c7 100755 --- a/.github/actions/create_workflow_report/create_workflow_report.py +++ b/.github/actions/create_workflow_report/create_workflow_report.py @@ -314,10 +314,11 @@ def _find_rebase_baseline(branch_ref: str, cwd: str | None) -> str | None: "log", branch_ref, "--reverse", - "--merges", "-i", - "--grep=rebase-cicd", - "--grep=rebase/", + "-E", + "--grep=^Rebase CICD", + "--grep=Merge pull request .*rebase-cicd", + "--grep=Merge pull request .*from Altinity/rebase/", "--format=%H", ], cwd=cwd,