From 696010c7d0e1a8646767e7685be98361803f4df0 Mon Sep 17 00:00:00 2001 From: jakub-nt <175944085+jakub-nt@users.noreply.github.com> Date: Wed, 27 Nov 2024 16:59:02 +0100 Subject: [PATCH 1/3] Fix bug in VCF data sorting Signed-off-by: jakub-nt <175944085+jakub-nt@users.noreply.github.com> --- cfbs/masterfiles/analyze.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cfbs/masterfiles/analyze.py b/cfbs/masterfiles/analyze.py index 86867d98..48d329fa 100644 --- a/cfbs/masterfiles/analyze.py +++ b/cfbs/masterfiles/analyze.py @@ -87,8 +87,8 @@ def finalize_vcf(versions_dict, checksums_dict, files_dict): # sort version numbers, in decreasing order versions_dict["versions"] = OrderedDict( sorted( - versions_dict["versions"].items(), - key=lambda p: (version_as_comparable_list(p[0]), p[1]), + working_dict.items(), + key=lambda p: version_as_comparable_list(p[0]), reverse=True, ) ) From 951ceb26464d50b69150bdb591c18e7d9f5cdc48 Mon Sep 17 00:00:00 2001 From: jakub-nt <175944085+jakub-nt@users.noreply.github.com> Date: Wed, 27 Nov 2024 17:07:31 +0100 Subject: [PATCH 2/3] Output download and git differences to a JSON file, and do not exit on the first difference Signed-off-by: jakub-nt <175944085+jakub-nt@users.noreply.github.com> --- .../masterfiles/check_download_matches_git.py | 87 +++++++++++++++---- 1 file changed, 69 insertions(+), 18 deletions(-) diff --git a/cfbs/masterfiles/check_download_matches_git.py b/cfbs/masterfiles/check_download_matches_git.py index a90cc87c..bd6f2c5e 100644 --- a/cfbs/masterfiles/check_download_matches_git.py +++ b/cfbs/masterfiles/check_download_matches_git.py @@ -1,6 +1,7 @@ -import os +from collections import OrderedDict -from cfbs.utils import dict_diff, read_json, user_error +from cfbs.masterfiles.analyze import version_as_comparable_list +from cfbs.utils import dict_diff, read_json, user_error, write_json def check_download_matches_git(versions): @@ -14,7 +15,11 @@ def check_download_matches_git(versions): download_versions_dict = read_json("versions.json") git_versions_dict = read_json("versions-git.json") - os.makedirs("differences", exist_ok=True) + diffs_dict = {"differences": {}} + + nonmatching_versions = [] + extraneous_count = 0 + differing_count = 0 for version in versions: download_version_dict = download_versions_dict["versions"][version]["files"] @@ -29,18 +34,64 @@ def check_download_matches_git(versions): new_download_dict[key] = value download_version_dict = new_download_dict - with open("differences/difference-" + version + ".txt", "w") as f: - only_dl, only_git, value_diff = dict_diff( - download_version_dict, git_version_dict - ) - - print("Files only in the downloaded version:", only_dl, file=f) - print("Files only in the git version:", only_git, file=f) - print("Files with different contents:", value_diff, file=f) - - if len(only_dl) > 0 or len(value_diff) > 0: - user_error( - "Downloadable files of version " - + version - + " do not match git files" - ) + version_diffs_dict = {} + version_diffs_dict["files_only_in_downloads"] = [] + version_diffs_dict["files_only_in_git"] = [] + version_diffs_dict["files_with_different_content"] = [] + + only_dl, only_git, value_diff = dict_diff( + download_version_dict, git_version_dict + ) + + for filepath in only_dl: + version_diffs_dict["files_only_in_downloads"].append(filepath) + for filepath in only_git: + version_diffs_dict["files_only_in_git"].append(filepath) + for filepath, _, _ in value_diff: + version_diffs_dict["files_with_different_content"].append(filepath) + + diffs_dict["differences"][version] = version_diffs_dict + + if len(only_dl) > 0 or len(value_diff) > 0: + nonmatching_versions.append(version) + extraneous_count += len(only_dl) + differing_count += len(value_diff) + + nonmatching_versions.sort(key=lambda v: version_as_comparable_list(v), reverse=True) + + # fully sort differences.json: + working_dict = diffs_dict["differences"] + # sort filepaths of each version, alphabetically + for k in working_dict.keys(): + working_dict[k]["files_only_in_downloads"].sort() + working_dict[k]["files_only_in_git"].sort() + working_dict[k]["files_with_different_content"].sort() + # sort version numbers, in decreasing order + diffs_dict["differences"] = OrderedDict( + sorted( + working_dict.items(), + key=lambda p: version_as_comparable_list(p[0]), + reverse=True, + ) + ) + + write_json("differences.json", diffs_dict) + + if len(nonmatching_versions) > 0: + user_error( + "The masterfiles downloaded from github.com and cfengine.com do not match - found " + + str(extraneous_count) + + " extraneous file" + + ("" if extraneous_count == 1 else "s") + + " and " + + str(differing_count) + + " differing file" + + ("" if differing_count == 1 else "s") + + " across " + + str(len(nonmatching_versions)) + + " version" + + ("" if len(nonmatching_versions) == 1 else "s") + + " (" + + ", ".join(nonmatching_versions) + + "). See ./differences.json" + ) From 7a4d2264c261ce4dac11bd134471e95f7576fa55 Mon Sep 17 00:00:00 2001 From: jakub-nt <175944085+jakub-nt@users.noreply.github.com> Date: Fri, 29 Nov 2024 17:17:03 +0100 Subject: [PATCH 3/3] Improve the release-information JSON file format Signed-off-by: jakub-nt <175944085+jakub-nt@users.noreply.github.com> --- cfbs/masterfiles/analyze.py | 67 +++++++++---------- .../masterfiles/check_download_matches_git.py | 10 +-- 2 files changed, 36 insertions(+), 41 deletions(-) diff --git a/cfbs/masterfiles/analyze.py b/cfbs/masterfiles/analyze.py index 48d329fa..81978eec 100644 --- a/cfbs/masterfiles/analyze.py +++ b/cfbs/masterfiles/analyze.py @@ -23,27 +23,19 @@ def versions_checksums_files( if version not in versions_dict["versions"]: versions_dict["versions"][version] = {} - if "files" not in versions_dict["versions"][version]: - versions_dict["versions"][version]["files"] = {} - versions_dict["versions"][version]["files"][tarball_relpath] = file_checksum + versions_dict["versions"][version][tarball_relpath] = file_checksum if not file_checksum in checksums_dict["checksums"]: - checksums_dict["checksums"][file_checksum] = [] - checksums_dict["checksums"][file_checksum].append( - { - "file": tarball_relpath, - "version": version, - } - ) + checksums_dict["checksums"][file_checksum] = {} + if not tarball_relpath in checksums_dict["checksums"][file_checksum]: + checksums_dict["checksums"][file_checksum][tarball_relpath] = [] + checksums_dict["checksums"][file_checksum][tarball_relpath].append(version) if not tarball_relpath in files_dict["files"]: - files_dict["files"][tarball_relpath] = [] - files_dict["files"][tarball_relpath].append( - { - "checksum": file_checksum, - "version": version, - } - ) + files_dict["files"][tarball_relpath] = {} + if not file_checksum in files_dict["files"][tarball_relpath]: + files_dict["files"][tarball_relpath][file_checksum] = [] + files_dict["files"][tarball_relpath][file_checksum].append(version) return versions_dict, checksums_dict, files_dict @@ -53,37 +45,40 @@ def finalize_vcf(versions_dict, checksums_dict, files_dict): # checksums.json: working_dict = checksums_dict["checksums"] - # sort each list, first by version descending, then by filepath alphabetically - for k in working_dict.keys(): - working_dict[k] = sorted( - working_dict[k], - key=lambda d: ( - version_as_comparable_list_negated(d["version"]), - d["file"], - ), - ) + for c in working_dict.keys(): + for f in working_dict[c].keys(): + # sort each version list, descending + working_dict[c][f] = sorted( + working_dict[c][f], + key=lambda v: version_as_comparable_list(v), + reverse=True, + ) + # sort filepaths, alphabetically + working_dict[c] = dict_sorted_by_key(working_dict[c]) # sort checksums checksums_dict["checksums"] = dict_sorted_by_key(working_dict) # files.json: working_dict = files_dict["files"] # sort each list, first by version descending, then by checksum - for k in working_dict.keys(): - working_dict[k] = sorted( - working_dict[k], - key=lambda d: ( - version_as_comparable_list_negated(d["version"]), - d["checksum"], - ), - ) + for f in working_dict.keys(): + for c in working_dict[f].keys(): + # sort each version list, descending + working_dict[f][c] = sorted( + working_dict[f][c], + key=lambda v: version_as_comparable_list(v), + reverse=True, + ) + # sort checksums + working_dict[f] = dict_sorted_by_key(working_dict[f]) # sort files, alphabetically files_dict["files"] = dict_sorted_by_key(working_dict) # versions.json: working_dict = versions_dict["versions"] # sort files of each version - for k in working_dict.keys(): - working_dict[k]["files"] = dict_sorted_by_key(working_dict[k]["files"]) + for v in working_dict.keys(): + working_dict[v] = dict_sorted_by_key(working_dict[v]) # sort version numbers, in decreasing order versions_dict["versions"] = OrderedDict( sorted( diff --git a/cfbs/masterfiles/check_download_matches_git.py b/cfbs/masterfiles/check_download_matches_git.py index bd6f2c5e..af103709 100644 --- a/cfbs/masterfiles/check_download_matches_git.py +++ b/cfbs/masterfiles/check_download_matches_git.py @@ -22,17 +22,17 @@ def check_download_matches_git(versions): differing_count = 0 for version in versions: - download_version_dict = download_versions_dict["versions"][version]["files"] - git_version_dict = git_versions_dict["versions"][version]["files"] + dl_version_files_dict = download_versions_dict["versions"][version] + git_version_files_dict = git_versions_dict["versions"][version] # normalize downloaded version dictionary filepaths # necessary because the downloaded version and git version dictionaries have filepaths of different forms new_download_dict = {} - for key, value in download_version_dict.items(): + for key, value in dl_version_files_dict.items(): if key.startswith("masterfiles/"): key = key[12:] new_download_dict[key] = value - download_version_dict = new_download_dict + dl_version_files_dict = new_download_dict version_diffs_dict = {} version_diffs_dict["files_only_in_downloads"] = [] @@ -40,7 +40,7 @@ def check_download_matches_git(versions): version_diffs_dict["files_with_different_content"] = [] only_dl, only_git, value_diff = dict_diff( - download_version_dict, git_version_dict + dl_version_files_dict, git_version_files_dict ) for filepath in only_dl: