From 48274306a5b1a130b851f9325c0920670ad696ca Mon Sep 17 00:00:00 2001 From: jakub-nt <175944085+jakub-nt@users.noreply.github.com> Date: Thu, 7 Aug 2025 21:37:55 +0200 Subject: [PATCH 1/3] Fixed a Mutable Default Argument bug Hiding the static dictionary or list used as a default function argument behind a `None` is not sufficient, as dictionary and list assignment in Python only assigns the reference, rather than cloning the contents. Signed-off-by: jakub-nt <175944085+jakub-nt@users.noreply.github.com> --- cfbs/analyze.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cfbs/analyze.py b/cfbs/analyze.py index 4dc3813d..da5a1fb1 100644 --- a/cfbs/analyze.py +++ b/cfbs/analyze.py @@ -1,6 +1,7 @@ from collections import OrderedDict import os from typing import Tuple, Union +import copy from cfbs.internal_file_management import fetch_archive from cfbs.masterfiles.analyze import ( @@ -75,9 +76,9 @@ def checksums_files( ignored_path_components=[], ): if checksums_dict is None: - checksums_dict = DEFAULT_CHECKSUMS_DICT + checksums_dict = copy.deepcopy(DEFAULT_CHECKSUMS_DICT) if files_dict is None: - files_dict = DEFAULT_FILES_DICT + files_dict = copy.deepcopy(DEFAULT_FILES_DICT) for root, _, files in os.walk(files_dir_path): for name in files: @@ -520,7 +521,7 @@ def analyze_policyset( e.g. a backslash), and should not end with a `/` if it represents a file. """ if ignored_path_components is None: - ignored_path_components = DEFAULT_IGNORED_PATH_COMPONENTS + ignored_path_components = copy.deepcopy(DEFAULT_IGNORED_PATH_COMPONENTS) checksums_dict, files_dict = checksums_files( path, ignored_path_components=ignored_path_components From 4809e0b7b30ee01fc4cd3663190e5b39ef18417b Mon Sep 17 00:00:00 2001 From: jakub-nt <175944085+jakub-nt@users.noreply.github.com> Date: Thu, 7 Aug 2025 23:05:35 +0200 Subject: [PATCH 2/3] Fixed bug due to attempting to MPF-normalize a path not inside masterfiles; changed the error message to suggest real paths instead of (incorrectly) MPF-normalized paths Signed-off-by: jakub-nt <175944085+jakub-nt@users.noreply.github.com> --- cfbs/analyze.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/cfbs/analyze.py b/cfbs/analyze.py index da5a1fb1..15ba8698 100644 --- a/cfbs/analyze.py +++ b/cfbs/analyze.py @@ -224,8 +224,11 @@ def filepaths_display_moved(filepaths): print("└──", filepaths[-1][0], "<-", list_or_single(filepaths[-1][1])) -def mpf_normalized_path(path, is_parentpath, masterfiles_dir): - """Returns a filepath converted from `path` to an MPF-comparable form.""" +def mpf_normalized_path(path, is_parentpath: bool, masterfiles_dir): + """Returns a filepath converted from `path` to an MPF-comparable form. + + `path` should be a path inside the masterfiles directory (or inside the parent directory, if `is_parentpath` is `True`). + """ # downloaded MPF release information filepaths always have forward slashes norm_path = path.replace(os.sep, "/") @@ -597,20 +600,18 @@ def analyze_policyset( # try to detect whether the user provided a wrong policy set path # gather all possible policy set paths, by detecting promises.cf or update.cf possible_policyset_relpaths = [] - mpfnorm_path = mpf_normalized_path(path, is_parentpath, masterfiles_dir) + for filepath in files_dict: file_name = name(filepath) if file_name in ("promises.cf", "update.cf"): - relpath = os.path.relpath(filepath, mpfnorm_path) - relpath = relpath.replace(os.sep, "/") + actual_filepath = mpf_denormalized_path( + filepath, is_parentpath, masterfiles_dir + ) + # `checksums_files` outputs paths relative to its `files_dir_path` argument, + # therefore `actual_filepath` is now relative to the user-provided path already - if "/" not in relpath: - # `"."`, not `path`, as the list collects relative paths - possible_policyset_relpaths.append(".") - if relpath.endswith("/update.cf"): - possible_policyset_relpaths.append(relpath[:-10]) - if relpath.endswith("/promises.cf"): - possible_policyset_relpaths.append(relpath[:-12]) + filepath_dir = os.path.dirname(actual_filepath) + possible_policyset_relpaths.append(filepath_dir) # for drive root, the path's parent is the path itself, so only check the parent path if this is not the case if os.path.realpath(path) != os.path.realpath(os.path.join(path, "..")): @@ -622,8 +623,9 @@ def analyze_policyset( possible_policyset_relpaths = deduplicate_list(possible_policyset_relpaths) # check whether the policy set contains update.cf or promises.cf directly in masterfiles + # `os.path.dirname` results in `''` rather than `'.'` for current directory if not ( - (masterfiles_dir if is_parentpath else ".") in possible_policyset_relpaths + (masterfiles_dir if is_parentpath else "") in possible_policyset_relpaths ): extra_error_text = "" if len(possible_policyset_relpaths) > 0: From c4b8277b3bea0dbe2301b8f1a79d4aa56a947c6d Mon Sep 17 00:00:00 2001 From: jakub-nt <175944085+jakub-nt@users.noreply.github.com> Date: Thu, 7 Aug 2025 23:17:22 +0200 Subject: [PATCH 3/3] Moved policy-set path finding to a separate function and added tests Signed-off-by: jakub-nt <175944085+jakub-nt@users.noreply.github.com> --- cfbs/analyze.py | 56 +++++++----- .../analyze/parent_dir/mfiles/promises.cf | 0 .../analyze/parent_dir/mfiles/subdir/.gitkeep | 0 tests/test_analyze.py | 86 +++++++++++++++++++ 4 files changed, 120 insertions(+), 22 deletions(-) create mode 100644 tests/sample/analyze/parent_dir/mfiles/promises.cf create mode 100644 tests/sample/analyze/parent_dir/mfiles/subdir/.gitkeep create mode 100644 tests/test_analyze.py diff --git a/cfbs/analyze.py b/cfbs/analyze.py index 15ba8698..1e18bbac 100644 --- a/cfbs/analyze.py +++ b/cfbs/analyze.py @@ -505,6 +505,37 @@ def to_json_dict(self): ] +def possible_policyset_paths(path, masterfiles_dir, is_parentpath, files_dict): + """Returns a list of possible policy-set paths inside an analyzed `path`. + + The returned paths are in the form of relative paths to `path`. + """ + possible_policyset_relpaths = [] + + for filepath in files_dict: + file_name = name(filepath) + if file_name in ("promises.cf", "update.cf"): + actual_filepath = mpf_denormalized_path( + filepath, is_parentpath, masterfiles_dir + ) + # `checksums_files` output paths relative to its `files_dir_path` argument, + # therefore `actual_filepath` is now relative to the user-provided path already + + filepath_dir = os.path.dirname(actual_filepath) + possible_policyset_relpaths.append(filepath_dir) + + # for drive root, the path's parent is the path itself, so only check the parent path if this is not the case + if os.path.realpath(path) != os.path.realpath(os.path.join(path, "..")): + if os.path.exists(os.path.join(path, "..", "update.cf")) or os.path.exists( + os.path.join(path, "..", "promises.cf") + ): + possible_policyset_relpaths.append("..") + + possible_policyset_relpaths = deduplicate_list(possible_policyset_relpaths) + + return possible_policyset_relpaths + + def analyze_policyset( path, is_parentpath=False, @@ -599,28 +630,9 @@ def analyze_policyset( if reference_version is None: # try to detect whether the user provided a wrong policy set path # gather all possible policy set paths, by detecting promises.cf or update.cf - possible_policyset_relpaths = [] - - for filepath in files_dict: - file_name = name(filepath) - if file_name in ("promises.cf", "update.cf"): - actual_filepath = mpf_denormalized_path( - filepath, is_parentpath, masterfiles_dir - ) - # `checksums_files` outputs paths relative to its `files_dir_path` argument, - # therefore `actual_filepath` is now relative to the user-provided path already - - filepath_dir = os.path.dirname(actual_filepath) - possible_policyset_relpaths.append(filepath_dir) - - # for drive root, the path's parent is the path itself, so only check the parent path if this is not the case - if os.path.realpath(path) != os.path.realpath(os.path.join(path, "..")): - if os.path.exists(os.path.join(path, "..", "update.cf")) or os.path.exists( - os.path.join(path, "..", "promises.cf") - ): - possible_policyset_relpaths.append("..") - - possible_policyset_relpaths = deduplicate_list(possible_policyset_relpaths) + possible_policyset_relpaths = possible_policyset_paths( + path, masterfiles_dir, is_parentpath, files_dict + ) # check whether the policy set contains update.cf or promises.cf directly in masterfiles # `os.path.dirname` results in `''` rather than `'.'` for current directory diff --git a/tests/sample/analyze/parent_dir/mfiles/promises.cf b/tests/sample/analyze/parent_dir/mfiles/promises.cf new file mode 100644 index 00000000..e69de29b diff --git a/tests/sample/analyze/parent_dir/mfiles/subdir/.gitkeep b/tests/sample/analyze/parent_dir/mfiles/subdir/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_analyze.py b/tests/test_analyze.py new file mode 100644 index 00000000..90a03549 --- /dev/null +++ b/tests/test_analyze.py @@ -0,0 +1,86 @@ +import os + +from cfbs.analyze import checksums_files, mpf_normalized_path, possible_policyset_paths + + +# Executing the functions in particular working directories is necessary for testing relative paths. +class cwd: + def __init__(self, new_wd): + self.new_wd = os.path.expanduser(new_wd) + + def __enter__(self): + self.old_wd = os.getcwd() + os.chdir(self.new_wd) + + def __exit__(self, exc_type, exc_value, traceback): + os.chdir(self.old_wd) + + +def ppp_scaffolded(path, masterfiles_dir="masterfiles"): + """Testable variant of `cfbs.analyze.possible_policyset_paths` including prerequisite code.""" + is_parentpath = os.path.isdir(os.path.join(path, masterfiles_dir)) + + _, files_dict = checksums_files(path, ignored_path_components=[".gitkeep"]) + files_dict = files_dict["files"] + files_dict = { + mpf_normalized_path(file, is_parentpath, masterfiles_dir): checksums + for file, checksums in files_dict.items() + } + + return possible_policyset_paths(path, masterfiles_dir, is_parentpath, files_dict) + + +def test_possible_policyset_paths(): + with cwd("tests/sample/analyze"): + path = "." + assert ppp_scaffolded(path) == ["parent_dir/mfiles"] + assert ppp_scaffolded(path, "mfiles") == ["parent_dir/mfiles"] + assert ppp_scaffolded(path, "wrong_dirname") == ["parent_dir/mfiles"] + + path = "parent_dir" + assert ppp_scaffolded(path) == ["mfiles"] + assert ppp_scaffolded(path, "mfiles") == ["mfiles"] + assert ppp_scaffolded(path, "wrong_dirname") == ["mfiles"] + + path = "parent_dir/mfiles" + assert ppp_scaffolded(path) == [""] + assert ppp_scaffolded(path, "mfiles") == [""] + assert ppp_scaffolded(path, "wrong_dirname") == [""] + + path = "parent_dir/mfiles/subdir" + assert ppp_scaffolded(path) == [".."] + assert ppp_scaffolded(path, "mfiles") == [".."] + assert ppp_scaffolded(path, "wrong_dirname") == [".."] + + with cwd("tests/sample/analyze/parent_dir"): + path = "." + assert ppp_scaffolded(path) == ["mfiles"] + assert ppp_scaffolded(path, "mfiles") == ["mfiles"] + assert ppp_scaffolded(path, "wrong_dirname") == ["mfiles"] + + path = "mfiles" + assert ppp_scaffolded(path) == [""] + assert ppp_scaffolded(path, "mfiles") == [""] + assert ppp_scaffolded(path, "wrong_dirname") == [""] + + path = "mfiles/subdir" + assert ppp_scaffolded(path) == [".."] + assert ppp_scaffolded(path, "mfiles") == [".."] + assert ppp_scaffolded(path, "wrong_dirname") == [".."] + + with cwd("tests/sample/analyze/parent_dir/mfiles"): + path = "." + assert ppp_scaffolded(path) == [""] + assert ppp_scaffolded(path, "mfiles") == [""] + assert ppp_scaffolded(path, "wrong_dirname") == [""] + + path = "subdir" + assert ppp_scaffolded(path) == [".."] + assert ppp_scaffolded(path, "mfiles") == [".."] + assert ppp_scaffolded(path, "wrong_dirname") == [".."] + + with cwd("tests/sample/analyze/parent_dir/mfiles/subdir"): + path = "." + assert ppp_scaffolded(path) == [".."] + assert ppp_scaffolded(path, "mfiles") == [".."] + assert ppp_scaffolded(path, "wrong_dirname") == [".."]