From 83ae0cf4ca15ef8bdaf9908ddbc0ddeb75b4e17f Mon Sep 17 00:00:00 2001 From: jakub-nt <175944085+jakub-nt@users.noreply.github.com> Date: Wed, 24 Sep 2025 19:28:34 +0200 Subject: [PATCH 1/9] Removed unused variable Signed-off-by: jakub-nt <175944085+jakub-nt@users.noreply.github.com> --- cfbs/commands.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cfbs/commands.py b/cfbs/commands.py index 5a9cc164..bb6321c8 100644 --- a/cfbs/commands.py +++ b/cfbs/commands.py @@ -802,9 +802,7 @@ def validate_command(paths=None, index_arg=None): return validate_config(config) -def _download_dependencies( - config, prefer_offline=False, redownload=False, ignore_versions=False -): +def _download_dependencies(config, redownload=False, ignore_versions=False): # TODO: This function should be split in 2: # 1. Code for downloading things into ~/.cfengine # 2. Code for copying things into ./out @@ -913,7 +911,7 @@ def build_command(ignore_versions=False): # We want the cfbs build command to be as backwards compatible as possible, # so we try building anyway and don't return error(s) init_out_folder() - _download_dependencies(config, prefer_offline=True, ignore_versions=ignore_versions) + _download_dependencies(config, ignore_versions=ignore_versions) r = perform_build(config) return r From 52a6fd5f4b4b30897d4d1423eefe5b85743894be Mon Sep 17 00:00:00 2001 From: jakub-nt <175944085+jakub-nt@users.noreply.github.com> Date: Thu, 25 Sep 2025 15:28:24 +0200 Subject: [PATCH 2/9] Added type hints Signed-off-by: jakub-nt <175944085+jakub-nt@users.noreply.github.com> --- cfbs/build.py | 7 ++++--- cfbs/commands.py | 7 +++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/cfbs/build.py b/cfbs/build.py index a77ca2d9..72ade13e 100644 --- a/cfbs/build.py +++ b/cfbs/build.py @@ -13,6 +13,7 @@ import os import logging as log import shutil +from cfbs.cfbs_config import CFBSConfig from cfbs.utils import ( canonify, cp, @@ -306,12 +307,12 @@ def _perform_build_step(module, i, step, max_length): _perform_replace_step(n, to_replace, version, filename) -def perform_build(config) -> int: +def perform_build(config: CFBSConfig) -> int: if not config.get("build"): raise CFBSExitError("No 'build' key found in the configuration") # mini-validation - for module in config.get("build", []): + for module in config["build"]: for step in module["steps"]: operation, args = split_build_step(step) @@ -341,7 +342,7 @@ def perform_build(config) -> int: print("\nSteps:") module_name_length = config.longest_module_key_length("name") - for module in config.get("build", []): + for module in config["build"]: for i, step in enumerate(module["steps"]): _perform_build_step(module, i, step, module_name_length) assert os.path.isdir("./out/masterfiles/") diff --git a/cfbs/commands.py b/cfbs/commands.py index bb6321c8..cede436d 100644 --- a/cfbs/commands.py +++ b/cfbs/commands.py @@ -802,14 +802,17 @@ def validate_command(paths=None, index_arg=None): return validate_config(config) -def _download_dependencies(config, redownload=False, ignore_versions=False): +def _download_dependencies(config: CFBSConfig, redownload=False, ignore_versions=False): # TODO: This function should be split in 2: # 1. Code for downloading things into ~/.cfengine # 2. Code for copying things into ./out print("\nModules:") counter = 1 max_length = config.longest_module_key_length("name") - for module in config.get("build", []): + build = config.get("build") + if build is None: + return + for module in build: name = module["name"] if name.startswith("./"): local_module_copy(module, counter, max_length) From 13ca501562e48536737113c81e72e323bf4e2c3a Mon Sep 17 00:00:00 2001 From: jakub-nt <175944085+jakub-nt@users.noreply.github.com> Date: Thu, 2 Oct 2025 19:57:54 +0200 Subject: [PATCH 3/9] Renamed a function related to `replace` build step This is to free the `_perform_replace_step` name for the `if operation == replace` code. --- cfbs/build.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cfbs/build.py b/cfbs/build.py index 72ade13e..ada320c3 100644 --- a/cfbs/build.py +++ b/cfbs/build.py @@ -80,7 +80,7 @@ def _generate_augment(module_name, input_data): return augment -def _perform_replace_step(n, a, b, filename): +def _perform_replacement(n, a, b, filename): assert n and a and b and filename assert a not in b @@ -294,7 +294,7 @@ def _perform_build_step(module, i, step, max_length): validate_build_step(name, module, i, operation, args, strict=True) n, a, b, file = args file = os.path.join(destination, file) - _perform_replace_step(n, a, b, file) + _perform_replacement(n, a, b, file) elif operation == "replace_version": assert len(args) == 3 # New build step so let's be a bit strict about validating it: @@ -304,7 +304,7 @@ def _perform_build_step(module, i, step, max_length): to_replace = args[1] filename = os.path.join(destination, args[2]) version = module["version"] - _perform_replace_step(n, to_replace, version, filename) + _perform_replacement(n, to_replace, version, filename) def perform_build(config: CFBSConfig) -> int: From 3dade252d46ffd67622cc8963f3a2d4d455bd4f5 Mon Sep 17 00:00:00 2001 From: jakub-nt <175944085+jakub-nt@users.noreply.github.com> Date: Thu, 2 Oct 2025 20:34:00 +0200 Subject: [PATCH 4/9] Refactored `_perform_build_step` into separate functions The `_perform_build_step` mega-function has been getting long and is problematic in cases where individual build steps take unique arguments or return different things. --- cfbs/build.py | 405 ++++++++++++++++++++++++++++---------------------- 1 file changed, 225 insertions(+), 180 deletions(-) diff --git a/cfbs/build.py b/cfbs/build.py index ada320c3..13c34bec 100644 --- a/cfbs/build.py +++ b/cfbs/build.py @@ -123,188 +123,199 @@ def _perform_replacement(n, a, b, filename): raise CFBSExitError("Failed to write to '%s'" % (filename,)) -def _perform_build_step(module, i, step, max_length): - operation, args = split_build_step(step) - name = module["name"] - source = module["_directory"] - counter = module["_counter"] - destination = "out/masterfiles" - - prefix = "%03d %s :" % (counter, pad_right(name, max_length)) - - assert operation in AVAILABLE_BUILD_STEPS # Should already be validated - if operation == "copy": - src, dst = args - if dst in [".", "./"]: - dst = "" - print("%s copy '%s' 'masterfiles/%s'" % (prefix, src, dst)) - src, dst = os.path.join(source, src), os.path.join(destination, dst) - cp(src, dst) - elif operation == "run": - shell_command = " ".join(args) - print("%s run '%s'" % (prefix, shell_command)) - sh(shell_command, source) - elif operation == "delete": - files = [args] if type(args) is str else args - assert len(files) > 0 - as_string = " ".join(["'%s'" % f for f in files]) - print("%s delete %s" % (prefix, as_string)) - for file in files: - if not rm(os.path.join(source, file), True): - print( - "Warning: tried to delete '%s' but path did not exist." - % os.path.join(source, file) - ) - elif operation == "json": - src, dst = args - if dst in [".", "./"]: - dst = "" - print("%s json '%s' 'masterfiles/%s'" % (prefix, src, dst)) - if not os.path.isfile(os.path.join(source, src)): - raise CFBSExitError("'%s' is not a file" % src) - src, dst = os.path.join(source, src), os.path.join(destination, dst) - extras, original = read_json(src), read_json(dst) - if not extras: - print("Warning: '%s' looks empty, adding nothing" % os.path.basename(src)) - if original: - merged = merge_json(original, extras) - if os.path.basename(dst) == "def.json": - merged = deduplicate_def_json(merged) - else: - merged = extras - write_json(dst, merged) - elif operation == "append": - src, dst = args - if dst in [".", "./"]: - dst = "" - print("%s append '%s' 'masterfiles/%s'" % (prefix, src, dst)) - src, dst = os.path.join(source, src), os.path.join(destination, dst) - if not os.path.exists(dst): - touch(dst) - assert os.path.isfile(dst) - sh("cat '%s' >> '%s'" % (src, dst)) - elif operation == "directory": - src, dst = args - if dst in [".", "./"]: - dst = "" - print("{} directory '{}' 'masterfiles/{}'".format(prefix, src, dst)) - dstarg = dst # save this for adding .cf files to inputs - src, dst = os.path.join(source, src), os.path.join(destination, dst) - defjson = os.path.join(destination, "def.json") - merged = read_json(defjson) - if not merged: - merged = {} - for root, _, files in os.walk(src): - for f in files: - if f == "def.json": - extra = read_json(os.path.join(root, f)) - if extra: - merged = merge_json(merged, extra) - merged = deduplicate_def_json(merged) - else: - s = os.path.join(root, f) - d = os.path.join(destination, dstarg, root[len(src) :], f) - log.debug("Copying '%s' to '%s'" % (s, d)) - cp(s, d) - write_json(defjson, merged) - elif operation == "input": - src, dst = args - if dst in [".", "./"]: - dst = "" - print("%s input '%s' 'masterfiles/%s'" % (prefix, src, dst)) - if src.startswith(name + "/"): - log.warning( - "Deprecated 'input' build step behavior - it should be: 'input ./input.json def.json'" - ) - # We'll translate it to what it should be - # TODO: Consider removing this behavior for cfbs 4? - src = "." + src[len(name) :] - src = os.path.join(name, src) - dst = os.path.join(destination, dst) - if not os.path.isfile(os.path.join(src)): - log.warning( - "Input data '%s' does not exist: Skipping build step." - % os.path.basename(src) - ) - return - extras, original = read_json(src), read_json(dst) - extras = _generate_augment(name, extras) - log.debug("Generated augment: %s", pretty(extras)) - if not extras: - raise CFBSExitError( - "Input data '%s' is incomplete: Skipping build step." - % os.path.basename(src) +def _perform_copy_step(args, source, destination, prefix): + src, dst = args + if dst in [".", "./"]: + dst = "" + print("%s copy '%s' 'masterfiles/%s'" % (prefix, src, dst)) + src, dst = os.path.join(source, src), os.path.join(destination, dst) + cp(src, dst) + + +def _perform_run_step(args, source, prefix): + shell_command = " ".join(args) + print("%s run '%s'" % (prefix, shell_command)) + sh(shell_command, source) + + +def _perform_delete_step(args, source, prefix): + files = [args] if type(args) is str else args + assert len(files) > 0 + as_string = " ".join(["'%s'" % f for f in files]) + print("%s delete %s" % (prefix, as_string)) + for file in files: + if not rm(os.path.join(source, file), True): + print( + "Warning: tried to delete '%s' but path did not exist." + % os.path.join(source, file) ) - if original: - log.debug("Original def.json: %s", pretty(original)) - merged = merge_json(original, extras) + + +def _perform_json_step(args, source, destination, prefix): + src, dst = args + if dst in [".", "./"]: + dst = "" + print("%s json '%s' 'masterfiles/%s'" % (prefix, src, dst)) + if not os.path.isfile(os.path.join(source, src)): + raise CFBSExitError("'%s' is not a file" % src) + src, dst = os.path.join(source, src), os.path.join(destination, dst) + extras, original = read_json(src), read_json(dst) + if not extras: + print("Warning: '%s' looks empty, adding nothing" % os.path.basename(src)) + if original: + merged = merge_json(original, extras) + if os.path.basename(dst) == "def.json": merged = deduplicate_def_json(merged) - else: - merged = extras - log.debug("Merged def.json: %s", pretty(merged)) - write_json(dst, merged) - elif operation == "policy_files": - files = [] - for file in args: - if file.startswith("./"): - file = file[2:] - if file.endswith(".cf"): - files.append(file) - elif file.endswith("/"): - cf_files = find("out/masterfiles/" + file, extension=".cf") - files += (strip_left(f, "out/masterfiles/") for f in cf_files) + else: + merged = extras + write_json(dst, merged) + + +def _perform_append_step(args, source, destination, prefix): + src, dst = args + if dst in [".", "./"]: + dst = "" + print("%s append '%s' 'masterfiles/%s'" % (prefix, src, dst)) + src, dst = os.path.join(source, src), os.path.join(destination, dst) + if not os.path.exists(dst): + touch(dst) + assert os.path.isfile(dst) + sh("cat '%s' >> '%s'" % (src, dst)) + + +def _perform_directory_step(args, source, destination, prefix): + src, dst = args + if dst in [".", "./"]: + dst = "" + print("{} directory '{}' 'masterfiles/{}'".format(prefix, src, dst)) + dstarg = dst # save this for adding .cf files to inputs + src, dst = os.path.join(source, src), os.path.join(destination, dst) + defjson = os.path.join(destination, "def.json") + merged = read_json(defjson) + if not merged: + merged = {} + for root, _, files in os.walk(src): + for f in files: + if f == "def.json": + extra = read_json(os.path.join(root, f)) + if extra: + merged = merge_json(merged, extra) + merged = deduplicate_def_json(merged) else: - raise CFBSExitError( - "Unsupported filetype '%s' for build step '%s': " - % (file, operation) - + "Expected directory (*/) of policy file (*.cf)" - ) - print("%s policy_files '%s'" % (prefix, "' '".join(files) if files else "")) - augment = {"inputs": files} - log.debug("Generated augment: %s" % pretty(augment)) - path = os.path.join(destination, "def.json") - original = read_json(path) - log.debug("Original def.json: %s" % pretty(original)) - if original: - merged = merge_json(original, augment) - merged = deduplicate_def_json(merged) - else: - merged = augment - log.debug("Merged def.json: %s", pretty(merged)) - write_json(path, merged) - elif operation == "bundles": - bundles = args - print("%s bundles '%s'" % (prefix, "' '".join(bundles) if bundles else "")) - augment = {"vars": {"control_common_bundlesequence_end": bundles}} - log.debug("Generated augment: %s" % pretty(augment)) - path = os.path.join(destination, "def.json") - original = read_json(path) - log.debug("Original def.json: %s" % pretty(original)) - if original: - merged = merge_json(original, augment) - merged = deduplicate_def_json(merged) + s = os.path.join(root, f) + d = os.path.join(destination, dstarg, root[len(src) :], f) + log.debug("Copying '%s' to '%s'" % (s, d)) + cp(s, d) + write_json(defjson, merged) + + +def _perform_input_step(args, name, destination, prefix): + src, dst = args + if dst in [".", "./"]: + dst = "" + print("%s input '%s' 'masterfiles/%s'" % (prefix, src, dst)) + if src.startswith(name + "/"): + log.warning( + "Deprecated 'input' build step behavior - it should be: 'input ./input.json def.json'" + ) + # We'll translate it to what it should be + # TODO: Consider removing this behavior for cfbs 4? + src = "." + src[len(name) :] + src = os.path.join(name, src) + dst = os.path.join(destination, dst) + if not os.path.isfile(os.path.join(src)): + log.warning( + "Input data '%s' does not exist: Skipping build step." + % os.path.basename(src) + ) + return + extras, original = read_json(src), read_json(dst) + extras = _generate_augment(name, extras) + log.debug("Generated augment: %s", pretty(extras)) + if not extras: + raise CFBSExitError( + "Input data '%s' is incomplete: Skipping build step." + % os.path.basename(src) + ) + if original: + log.debug("Original def.json: %s", pretty(original)) + merged = merge_json(original, extras) + merged = deduplicate_def_json(merged) + else: + merged = extras + log.debug("Merged def.json: %s", pretty(merged)) + write_json(dst, merged) + + +def _perform_policy_files_step(operation, args, destination, prefix): + files = [] + for file in args: + if file.startswith("./"): + file = file[2:] + if file.endswith(".cf"): + files.append(file) + elif file.endswith("/"): + cf_files = find("out/masterfiles/" + file, extension=".cf") + files += (strip_left(f, "out/masterfiles/") for f in cf_files) else: - merged = augment - log.debug("Merged def.json: %s", pretty(merged)) - write_json(path, merged) - elif operation == "replace": - assert len(args) == 4 - print("%s replace '%s'" % (prefix, "' '".join(args))) - # New build step so let's be a bit strict about validating it: - validate_build_step(name, module, i, operation, args, strict=True) - n, a, b, file = args - file = os.path.join(destination, file) - _perform_replacement(n, a, b, file) - elif operation == "replace_version": - assert len(args) == 3 - # New build step so let's be a bit strict about validating it: - validate_build_step(name, module, i, operation, args, strict=True) - print("%s replace_version '%s'" % (prefix, "' '".join(args))) - n = args[0] - to_replace = args[1] - filename = os.path.join(destination, args[2]) - version = module["version"] - _perform_replacement(n, to_replace, version, filename) + raise CFBSExitError( + "Unsupported filetype '%s' for build step '%s': " % (file, operation) + + "Expected directory (*/) of policy file (*.cf)" + ) + print("%s policy_files '%s'" % (prefix, "' '".join(files) if files else "")) + augment = {"inputs": files} + log.debug("Generated augment: %s" % pretty(augment)) + path = os.path.join(destination, "def.json") + original = read_json(path) + log.debug("Original def.json: %s" % pretty(original)) + if original: + merged = merge_json(original, augment) + merged = deduplicate_def_json(merged) + else: + merged = augment + log.debug("Merged def.json: %s", pretty(merged)) + write_json(path, merged) + + +def _perform_bundles_step(args, prefix, destination): + bundles = args + print("%s bundles '%s'" % (prefix, "' '".join(bundles) if bundles else "")) + augment = {"vars": {"control_common_bundlesequence_end": bundles}} + log.debug("Generated augment: %s" % pretty(augment)) + path = os.path.join(destination, "def.json") + original = read_json(path) + log.debug("Original def.json: %s" % pretty(original)) + if original: + merged = merge_json(original, augment) + merged = deduplicate_def_json(merged) + else: + merged = augment + log.debug("Merged def.json: %s", pretty(merged)) + write_json(path, merged) + + +def _perform_replace_step(module, i, operation, args, name, destination, prefix): + assert len(args) == 4 + print("%s replace '%s'" % (prefix, "' '".join(args))) + # New build step so let's be a bit strict about validating it: + validate_build_step(name, module, i, operation, args, strict=True) + n, a, b, file = args + file = os.path.join(destination, file) + _perform_replacement(n, a, b, file) + + +def _perform_replace_version_step( + module, i, operation, args, name, destination, prefix +): + assert len(args) == 3 + # New build step so let's be a bit strict about validating it: + validate_build_step(name, module, i, operation, args, strict=True) + print("%s replace_version '%s'" % (prefix, "' '".join(args))) + n = args[0] + to_replace = args[1] + filename = os.path.join(destination, args[2]) + version = module["version"] + _perform_replacement(n, to_replace, version, filename) def perform_build(config: CFBSConfig) -> int: @@ -341,10 +352,44 @@ def perform_build(config: CFBSConfig) -> int: ) print("\nSteps:") - module_name_length = config.longest_module_key_length("name") + max_length = config.longest_module_key_length("name") for module in config["build"]: for i, step in enumerate(module["steps"]): - _perform_build_step(module, i, step, module_name_length) + operation, args = split_build_step(step) + name = module["name"] + source = module["_directory"] + counter = module["_counter"] + destination = "out/masterfiles" + + prefix = "%03d %s :" % (counter, pad_right(name, max_length)) + + if operation == "copy": + _perform_copy_step(args, source, destination, prefix) + elif operation == "run": + _perform_run_step(args, source, prefix) + elif operation == "delete": + _perform_delete_step(args, source, prefix) + elif operation == "json": + _perform_json_step(args, source, destination, prefix) + elif operation == "append": + _perform_append_step(args, source, destination, prefix) + elif operation == "directory": + _perform_directory_step(args, source, destination, prefix) + elif operation == "input": + _perform_input_step(args, name, destination, prefix) + elif operation == "policy_files": + _perform_policy_files_step(operation, args, destination, prefix) + elif operation == "bundles": + _perform_bundles_step(args, prefix, destination) + elif operation == "replace": + _perform_replace_step( + module, i, operation, args, name, destination, prefix + ) + elif operation == "replace_version": + _perform_replace_version_step( + module, i, operation, args, name, destination, prefix + ) + assert os.path.isdir("./out/masterfiles/") shutil.copyfile("./cfbs.json", "./out/masterfiles/cfbs.json") if os.path.isfile("out/masterfiles/def.json"): From d520eef7c7c269e3fc43a136d0658e5d6ccdb6f8 Mon Sep 17 00:00:00 2001 From: jakub-nt <175944085+jakub-nt@users.noreply.github.com> Date: Thu, 2 Oct 2025 20:48:21 +0200 Subject: [PATCH 5/9] Removed unnecessary function arguments Only one value for them was valid. --- cfbs/build.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/cfbs/build.py b/cfbs/build.py index 13c34bec..357712b1 100644 --- a/cfbs/build.py +++ b/cfbs/build.py @@ -247,7 +247,7 @@ def _perform_input_step(args, name, destination, prefix): write_json(dst, merged) -def _perform_policy_files_step(operation, args, destination, prefix): +def _perform_policy_files_step(args, destination, prefix): files = [] for file in args: if file.startswith("./"): @@ -259,7 +259,7 @@ def _perform_policy_files_step(operation, args, destination, prefix): files += (strip_left(f, "out/masterfiles/") for f in cf_files) else: raise CFBSExitError( - "Unsupported filetype '%s' for build step '%s': " % (file, operation) + "Unsupported filetype '%s' for build step 'policy_files': " % file + "Expected directory (*/) of policy file (*.cf)" ) print("%s policy_files '%s'" % (prefix, "' '".join(files) if files else "")) @@ -294,22 +294,20 @@ def _perform_bundles_step(args, prefix, destination): write_json(path, merged) -def _perform_replace_step(module, i, operation, args, name, destination, prefix): +def _perform_replace_step(module, i, args, name, destination, prefix): assert len(args) == 4 print("%s replace '%s'" % (prefix, "' '".join(args))) # New build step so let's be a bit strict about validating it: - validate_build_step(name, module, i, operation, args, strict=True) + validate_build_step(name, module, i, "replace", args, strict=True) n, a, b, file = args file = os.path.join(destination, file) _perform_replacement(n, a, b, file) -def _perform_replace_version_step( - module, i, operation, args, name, destination, prefix -): +def _perform_replace_version_step(module, i, args, name, destination, prefix): assert len(args) == 3 # New build step so let's be a bit strict about validating it: - validate_build_step(name, module, i, operation, args, strict=True) + validate_build_step(name, module, i, "replace_version", args, strict=True) print("%s replace_version '%s'" % (prefix, "' '".join(args))) n = args[0] to_replace = args[1] @@ -358,9 +356,9 @@ def perform_build(config: CFBSConfig) -> int: operation, args = split_build_step(step) name = module["name"] source = module["_directory"] - counter = module["_counter"] destination = "out/masterfiles" + counter = module["_counter"] prefix = "%03d %s :" % (counter, pad_right(name, max_length)) if operation == "copy": @@ -378,16 +376,14 @@ def perform_build(config: CFBSConfig) -> int: elif operation == "input": _perform_input_step(args, name, destination, prefix) elif operation == "policy_files": - _perform_policy_files_step(operation, args, destination, prefix) + _perform_policy_files_step(args, destination, prefix) elif operation == "bundles": _perform_bundles_step(args, prefix, destination) elif operation == "replace": - _perform_replace_step( - module, i, operation, args, name, destination, prefix - ) + _perform_replace_step(module, i, args, name, destination, prefix) elif operation == "replace_version": _perform_replace_version_step( - module, i, operation, args, name, destination, prefix + module, i, args, name, destination, prefix ) assert os.path.isdir("./out/masterfiles/") From 4095eb47bde3764ec2eb94e33c501f2d9d765591 Mon Sep 17 00:00:00 2001 From: jakub-nt <175944085+jakub-nt@users.noreply.github.com> Date: Fri, 3 Oct 2025 18:50:13 +0200 Subject: [PATCH 6/9] Implemented an option to write file overwrite diffs during a copy build step to a file, and warnings on identical file overwrites Ticket: ENT-13116 Ticket: ENT-13117 Signed-off-by: jakub-nt <175944085+jakub-nt@users.noreply.github.com> --- cfbs/args.py | 8 +++++ cfbs/build.py | 79 ++++++++++++++++++++++++++++++++++++++++++++++-- cfbs/cfbs.1 | 8 +++-- cfbs/commands.py | 4 +-- cfbs/main.py | 9 +++++- cfbs/utils.py | 56 +++++++++++++++++++++++++++++++++- 6 files changed, 156 insertions(+), 8 deletions(-) diff --git a/cfbs/args.py b/cfbs/args.py index f9f53139..d46d6277 100644 --- a/cfbs/args.py +++ b/cfbs/args.py @@ -27,6 +27,7 @@ class ArgsTypesNamespace(argparse.Namespace): git_user_email = None # type: Optional[str] git_commit_message = None # type: Optional[str] ignore_versions_json = False # type: bool + diffs = None # type: Optional[str] omit_download = False # type: bool check_against_git = False # type: bool minimum_version = None # type: Optional[str] @@ -161,6 +162,13 @@ def get_arg_parser(whitespace_for_manual=False): help="Ignore versions.json. Necessary in case of a custom index or testing changes to the default index.", action="store_true", ) + parser.add_argument( + "--diffs", + help="Write diffs of files overwritten with a copy build step during 'cfbs build' to the specified file", + nargs="?", + const="diffs.txt", + default=None, + ) parser.add_argument( "--omit-download", help="Use existing masterfiles instead of downloading in 'cfbs generate-release-information'", diff --git a/cfbs/build.py b/cfbs/build.py index 357712b1..cebc7d78 100644 --- a/cfbs/build.py +++ b/cfbs/build.py @@ -17,13 +17,16 @@ from cfbs.utils import ( canonify, cp, + cp_dry_itemize, deduplicate_def_json, + file_diff_text, find, merge_json, mkdir, pad_right, read_json, rm, + save_file, sh, strip_left, touch, @@ -129,8 +132,68 @@ def _perform_copy_step(args, source, destination, prefix): dst = "" print("%s copy '%s' 'masterfiles/%s'" % (prefix, src, dst)) src, dst = os.path.join(source, src), os.path.join(destination, dst) + + step_diffs_data = "" + + itemization = cp_dry_itemize(src, dst) + noop_overwrites_relpaths = [] + actual_overwrites_relpaths = [] + for item_string, file_relpath in itemization: + if item_string[1] != "f": + # only consider regular files + continue + if item_string[0] == "." or len(item_string) < 3: + # the first character being a dot means that it's a no-op overwrite (possibly except file attributes) + # explanation for the `< 3` comparison: + # if all attributes are unchanged, the rsync item string will use spaces instead of dots and they will have been parsed away earlier + noop_overwrites_relpaths.append(file_relpath) + continue + if item_string[2] == "+": + # the copied file is new + continue + if item_string[2] == "c": + # the copied file has a different checksum + actual_overwrites_relpaths.append(file_relpath) + continue + elif item_string[2] == ".": + # the copied regular file doesn't have a different checksum + noop_overwrites_relpaths.append(file_relpath) + log.debug("Novel item string: %s %s" % (item_string, file_relpath)) + for file_relpath in actual_overwrites_relpaths: + if os.path.isfile(src): + fileA = src + else: + fileA = os.path.join(src, file_relpath) + if os.path.isfile(dst): + fileB = dst + else: + fileB = os.path.join(dst, file_relpath) + file_diff_data = file_diff_text(fileA, fileB) + step_diffs_data += file_diff_data + if len(noop_overwrites_relpaths) > 0: + warning_message = ( + "Identical file overwrites occured during copy.\n" + + " Check your modules and their build steps to ascertain whether this is intentional.\n" + + " In most cases, the cause is a file from a latter module already being provided by an earlier module (commonly stock masterfiles).\n" + + " In that case, the file is best deleted from the latter module(s).\n" + + " Identical overwrites count: %s\n" % len(noop_overwrites_relpaths) + ) + # display affected files, without flooding the output + if len(noop_overwrites_relpaths) < 20: + for overwrite_noop in noop_overwrites_relpaths: + warning_message += " " + overwrite_noop + "\n" + else: + for overwrite_noop in noop_overwrites_relpaths[:9]: + warning_message += " " + overwrite_noop + "\n" + warning_message += " ...\n" + for overwrite_noop in noop_overwrites_relpaths[-9:]: + warning_message += " " + overwrite_noop + "\n" + # display all the messages as one warning + log.warning(warning_message) cp(src, dst) + return step_diffs_data + def _perform_run_step(args, source, prefix): shell_command = " ".join(args) @@ -316,7 +379,7 @@ def _perform_replace_version_step(module, i, args, name, destination, prefix): _perform_replacement(n, to_replace, version, filename) -def perform_build(config: CFBSConfig) -> int: +def perform_build(config: CFBSConfig, diffs_filename=None) -> int: if not config.get("build"): raise CFBSExitError("No 'build' key found in the configuration") @@ -349,6 +412,8 @@ def perform_build(config: CFBSConfig) -> int: % (step, expected, actual) ) + diffs_data = "" + print("\nSteps:") max_length = config.longest_module_key_length("name") for module in config["build"]: @@ -362,7 +427,8 @@ def perform_build(config: CFBSConfig) -> int: prefix = "%03d %s :" % (counter, pad_right(name, max_length)) if operation == "copy": - _perform_copy_step(args, source, destination, prefix) + step_diffs_data = _perform_copy_step(args, source, destination, prefix) + diffs_data += step_diffs_data elif operation == "run": _perform_run_step(args, source, prefix) elif operation == "delete": @@ -386,6 +452,15 @@ def perform_build(config: CFBSConfig) -> int: module, i, args, name, destination, prefix ) + if diffs_filename is not None: + try: + save_file(diffs_filename, diffs_data) + except IsADirectoryError: + print("") + log.warning( + "An existing directory was provided as the '--diffs' file path - writing the diffs file for the build failed - continuing build..." + ) + assert os.path.isdir("./out/masterfiles/") shutil.copyfile("./cfbs.json", "./out/masterfiles/cfbs.json") if os.path.isfile("out/masterfiles/def.json"): diff --git a/cfbs/cfbs.1 b/cfbs/cfbs.1 index 28441e99..baac7aca 100644 --- a/cfbs/cfbs.1 +++ b/cfbs/cfbs.1 @@ -1,9 +1,9 @@ -.TH CFBS "1" "2025\-08\-14" "cfbs" "CFEngine Build System manual" +.TH CFBS "1" "2025\-10\-03" "cfbs" "CFEngine Build System manual" .SH NAME cfbs \- combines multiple modules into 1 policy set to deploy on your infrastructure. Modules can be custom promise types, JSON files which enable certain functionality, or reusable CFEngine policy. The modules you use can be written by the CFEngine team, others in the community, your colleagues, or yourself. .SH SYNOPSIS .B cfbs -[-h] [--loglevel LOGLEVEL] [-M] [--version] [--force] [--non-interactive] [--index INDEX] [--check] [--checksum CHECKSUM] [--keep-order] [--git {yes,no}] [--git-user-name GIT_USER_NAME] [--git-user-email GIT_USER_EMAIL] [--git-commit-message GIT_COMMIT_MESSAGE] [--ignore-versions-json] [--omit-download] [--check-against-git] [--from MINIMUM_VERSION] [--to-json [TO_JSON]] [--reference-version REFERENCE_VERSION] [--masterfiles-dir MASTERFILES_DIR] [--ignored-path-components [IGNORED_PATH_COMPONENTS ...]] [--offline] [--masterfiles MASTERFILES] [cmd] [args ...] +[-h] [--loglevel LOGLEVEL] [-M] [--version] [--force] [--non-interactive] [--index INDEX] [--check] [--checksum CHECKSUM] [--keep-order] [--git {yes,no}] [--git-user-name GIT_USER_NAME] [--git-user-email GIT_USER_EMAIL] [--git-commit-message GIT_COMMIT_MESSAGE] [--ignore-versions-json] [--diffs [DIFFS]] [--omit-download] [--check-against-git] [--from MINIMUM_VERSION] [--to-json [TO_JSON]] [--reference-version REFERENCE_VERSION] [--masterfiles-dir MASTERFILES_DIR] [--ignored-path-components [IGNORED_PATH_COMPONENTS ...]] [--offline] [--masterfiles MASTERFILES] [cmd] [args ...] .SH DESCRIPTION CFEngine Build System. @@ -72,6 +72,10 @@ Specify git commit message \fB\-\-ignore\-versions\-json\fR Ignore versions.json. Necessary in case of a custom index or testing changes to the default index. +.TP +\fB\-\-diffs\fR \fI\,[DIFFS]\/\fR +Write diffs of files overwritten with a copy build step during 'cfbs build' to the specified file + .TP \fB\-\-omit\-download\fR Use existing masterfiles instead of downloading in 'cfbs generate\-release\-information' diff --git a/cfbs/commands.py b/cfbs/commands.py index cede436d..8dab3250 100644 --- a/cfbs/commands.py +++ b/cfbs/commands.py @@ -902,7 +902,7 @@ def download_command(force, ignore_versions=False): @cfbs_command("build") -def build_command(ignore_versions=False): +def build_command(ignore_versions=False, diffs_filename=None): config = CFBSConfig.get_instance() r = validate_config(config) if r != 0: @@ -915,7 +915,7 @@ def build_command(ignore_versions=False): # so we try building anyway and don't return error(s) init_out_folder() _download_dependencies(config, ignore_versions=ignore_versions) - r = perform_build(config) + r = perform_build(config, diffs_filename) return r diff --git a/cfbs/main.py b/cfbs/main.py index b5b6b82c..f2f1b35b 100644 --- a/cfbs/main.py +++ b/cfbs/main.py @@ -138,6 +138,11 @@ def _main() -> int: % args.command ) + if args.diffs and args.command != "build": + raise CFBSUserError( + "The option --diffs is only for 'cfbs build', not 'cfbs %s'" % args.command + ) + if args.non_interactive and args.command not in ( "init", "add", @@ -213,7 +218,9 @@ def _main() -> int: if args.command == "download": return commands.download_command(args.force) if args.command == "build": - return commands.build_command(ignore_versions=args.ignore_versions_json) + return commands.build_command( + ignore_versions=args.ignore_versions_json, diffs_filename=args.diffs + ) if args.command == "install": return commands.install_command(args.args) if args.command == "update": diff --git a/cfbs/utils.py b/cfbs/utils.py index 1f8f7c74..d408ce82 100644 --- a/cfbs/utils.py +++ b/cfbs/utils.py @@ -1,3 +1,5 @@ +import difflib +import logging as log import os import re import sys @@ -10,7 +12,7 @@ import urllib.error from collections import OrderedDict from shutil import rmtree -from typing import Iterable, Union +from typing import Iterable, List, Tuple, Union from cfbs.pretty import pretty @@ -111,6 +113,24 @@ def display_diff(path_A, path_B): raise +def file_diff(filepath_A, filepath_B): + with open(filepath_A) as f: + lines_A = f.readlines() + with open(filepath_B) as f: + lines_B = f.readlines() + + u_diff = difflib.unified_diff(lines_A, lines_B, filepath_A, filepath_B) + + return u_diff + + +def file_diff_text(filepath_A, filepath_B): + u_diff = file_diff(filepath_A, filepath_B) + diff_text = "".join(u_diff) + + return diff_text + + def mkdir(path: str, exist_ok=True): os.makedirs(path, exist_ok=exist_ok) @@ -147,6 +167,40 @@ def cp(src, dst): sh("rsync -r %s/ %s" % (src, dst)) +def cp_dry_itemize(src: str, dst: str) -> List[Tuple[str, str]]: + above = os.path.dirname(dst) + if not os.path.exists(above): + mkdir(above) + if dst.endswith("/") and not os.path.exists(dst): + mkdir(dst) + if os.path.isfile(src): + result = subprocess.run( + "rsync -rniic %s %s" % (src, dst), + shell=True, + check=True, + stdout=subprocess.PIPE, + ) + else: + result = subprocess.run( + "rsync -rniic %s/ %s" % (src, dst), + shell=True, + check=True, + stdout=subprocess.PIPE, + ) + lines = result.stdout.decode("utf-8").split("\n") + itemization = [] + for line in lines: + terms = line.split() + if len(terms) == 2: + item_string = terms[0] + file_relpath = terms[1] + itemization.append((item_string, file_relpath)) + elif len(terms) > 0: + log.debug("Abnormal rsync output line: '%s'" % terms) + + return itemization + + def pad_left(s, n): return s.rjust(n) From 45633eeb17b4992b783ad511626ccedc17b097e9 Mon Sep 17 00:00:00 2001 From: jakub-nt <175944085+jakub-nt@users.noreply.github.com> Date: Mon, 6 Oct 2025 13:22:53 +0200 Subject: [PATCH 7/9] Helper shell command run functions now return the result; fixed `CalledProcessError` exception in `cp_dry_itemize` not being handled Signed-off-by: jakub-nt <175944085+jakub-nt@users.noreply.github.com> --- cfbs/utils.py | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/cfbs/utils.py b/cfbs/utils.py index d408ce82..d725dbf5 100644 --- a/cfbs/utils.py +++ b/cfbs/utils.py @@ -86,7 +86,7 @@ def __init__(self, name_or_message, message=None) -> None: def _sh(cmd: str): # print(cmd) try: - subprocess.run( + return subprocess.run( cmd, shell=True, check=True, @@ -99,9 +99,8 @@ def _sh(cmd: str): def sh(cmd: str, directory=None): if directory: - _sh("cd %s && %s" % (directory, cmd)) - return - _sh("%s" % cmd) + return _sh("cd %s && %s" % (directory, cmd)) + return _sh(cmd) def display_diff(path_A, path_B): @@ -162,9 +161,8 @@ def cp(src, dst): if dst.endswith("/") and not os.path.exists(dst): mkdir(dst) if os.path.isfile(src): - sh("rsync -r %s %s" % (src, dst)) - return - sh("rsync -r %s/ %s" % (src, dst)) + return sh("rsync -r %s %s" % (src, dst)) + return sh("rsync -r %s/ %s" % (src, dst)) def cp_dry_itemize(src: str, dst: str) -> List[Tuple[str, str]]: @@ -174,19 +172,9 @@ def cp_dry_itemize(src: str, dst: str) -> List[Tuple[str, str]]: if dst.endswith("/") and not os.path.exists(dst): mkdir(dst) if os.path.isfile(src): - result = subprocess.run( - "rsync -rniic %s %s" % (src, dst), - shell=True, - check=True, - stdout=subprocess.PIPE, - ) + result = sh("rsync -rniic %s %s" % (src, dst)) else: - result = subprocess.run( - "rsync -rniic %s/ %s" % (src, dst), - shell=True, - check=True, - stdout=subprocess.PIPE, - ) + result = sh("rsync -rniic %s/ %s" % (src, dst)) lines = result.stdout.decode("utf-8").split("\n") itemization = [] for line in lines: From 33c6dc0cfd9ae9c37ae7d5771562681ac98e7576 Mon Sep 17 00:00:00 2001 From: jakub-nt <175944085+jakub-nt@users.noreply.github.com> Date: Tue, 7 Oct 2025 14:40:12 +0200 Subject: [PATCH 8/9] Print the diffs filename during build Signed-off-by: jakub-nt <175944085+jakub-nt@users.noreply.github.com> --- cfbs/build.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cfbs/build.py b/cfbs/build.py index cebc7d78..6f0e3167 100644 --- a/cfbs/build.py +++ b/cfbs/build.py @@ -454,9 +454,12 @@ def perform_build(config: CFBSConfig, diffs_filename=None) -> int: if diffs_filename is not None: try: + print( + "\nWriting diffs of non-identical file overwrites to '%s'..." + % diffs_filename + ) save_file(diffs_filename, diffs_data) except IsADirectoryError: - print("") log.warning( "An existing directory was provided as the '--diffs' file path - writing the diffs file for the build failed - continuing build..." ) From 5b25f4493d22127a7398cca5013fe6813f9528a0 Mon Sep 17 00:00:00 2001 From: jakub-nt <175944085+jakub-nt@users.noreply.github.com> Date: Tue, 14 Oct 2025 16:31:01 +0200 Subject: [PATCH 9/9] Improved code quality of copy build step dry run functions * Documented precisely what the functions do and return in the docstrings * Split out some appropriate code to a separate function * Renamed a variable to a more accurate name Signed-off-by: jakub-nt <175944085+jakub-nt@users.noreply.github.com> --- cfbs/build.py | 31 +++++-------------------------- cfbs/utils.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 26 deletions(-) diff --git a/cfbs/build.py b/cfbs/build.py index 6f0e3167..03276f30 100644 --- a/cfbs/build.py +++ b/cfbs/build.py @@ -17,7 +17,7 @@ from cfbs.utils import ( canonify, cp, - cp_dry_itemize, + cp_dry_overwrites, deduplicate_def_json, file_diff_text, find, @@ -135,31 +135,10 @@ def _perform_copy_step(args, source, destination, prefix): step_diffs_data = "" - itemization = cp_dry_itemize(src, dst) - noop_overwrites_relpaths = [] - actual_overwrites_relpaths = [] - for item_string, file_relpath in itemization: - if item_string[1] != "f": - # only consider regular files - continue - if item_string[0] == "." or len(item_string) < 3: - # the first character being a dot means that it's a no-op overwrite (possibly except file attributes) - # explanation for the `< 3` comparison: - # if all attributes are unchanged, the rsync item string will use spaces instead of dots and they will have been parsed away earlier - noop_overwrites_relpaths.append(file_relpath) - continue - if item_string[2] == "+": - # the copied file is new - continue - if item_string[2] == "c": - # the copied file has a different checksum - actual_overwrites_relpaths.append(file_relpath) - continue - elif item_string[2] == ".": - # the copied regular file doesn't have a different checksum - noop_overwrites_relpaths.append(file_relpath) - log.debug("Novel item string: %s %s" % (item_string, file_relpath)) - for file_relpath in actual_overwrites_relpaths: + noop_overwrites_relpaths, modifying_overwrites_relpaths = cp_dry_overwrites( + src, dst + ) + for file_relpath in modifying_overwrites_relpaths: if os.path.isfile(src): fileA = src else: diff --git a/cfbs/utils.py b/cfbs/utils.py index d725dbf5..281fbde9 100644 --- a/cfbs/utils.py +++ b/cfbs/utils.py @@ -166,6 +166,10 @@ def cp(src, dst): def cp_dry_itemize(src: str, dst: str) -> List[Tuple[str, str]]: + """Emulates a dry run of `cfbs.utils.cp` and itemizes files to be copied. + + Returns a list of `rsync --itemize` strings (described in the rsync manual) with their corresponding paths. + """ above = os.path.dirname(dst) if not os.path.exists(above): mkdir(above) @@ -189,6 +193,38 @@ def cp_dry_itemize(src: str, dst: str) -> List[Tuple[str, str]]: return itemization +def cp_dry_overwrites(src: str, dst: str) -> Tuple[List[str], List[str]]: + """Returns paths of files that would be overwritten by `cfbs.utils.cp`, grouping identical and non-identical file overwrites separately.""" + noop_overwrites_relpaths = [] + modifying_overwrites_relpaths = [] + + itemization = cp_dry_itemize(src, dst) + + for item_string, file_relpath in itemization: + if item_string[1] != "f": + # only consider regular files + continue + if item_string[0] == "." or len(item_string) < 3: + # the first character being a dot means that it's a no-op overwrite (possibly except file attributes) + # explanation for the `< 3` comparison: + # if all attributes are unchanged, the rsync item string will use spaces instead of dots and they will have been parsed away earlier + noop_overwrites_relpaths.append(file_relpath) + continue + if item_string[2] == "+": + # the copied file is new + continue + if item_string[2] == "c": + # the copied file has a different checksum + modifying_overwrites_relpaths.append(file_relpath) + continue + elif item_string[2] == ".": + # the copied regular file doesn't have a different checksum + noop_overwrites_relpaths.append(file_relpath) + log.debug("Novel item string: %s %s" % (item_string, file_relpath)) + + return noop_overwrites_relpaths, modifying_overwrites_relpaths + + def pad_left(s, n): return s.rjust(n)