diff --git a/cfbs/args.py b/cfbs/args.py index f9f53139..d46d6277 100644 --- a/cfbs/args.py +++ b/cfbs/args.py @@ -27,6 +27,7 @@ class ArgsTypesNamespace(argparse.Namespace): git_user_email = None # type: Optional[str] git_commit_message = None # type: Optional[str] ignore_versions_json = False # type: bool + diffs = None # type: Optional[str] omit_download = False # type: bool check_against_git = False # type: bool minimum_version = None # type: Optional[str] @@ -161,6 +162,13 @@ def get_arg_parser(whitespace_for_manual=False): help="Ignore versions.json. Necessary in case of a custom index or testing changes to the default index.", action="store_true", ) + parser.add_argument( + "--diffs", + help="Write diffs of files overwritten with a copy build step during 'cfbs build' to the specified file", + nargs="?", + const="diffs.txt", + default=None, + ) parser.add_argument( "--omit-download", help="Use existing masterfiles instead of downloading in 'cfbs generate-release-information'", diff --git a/cfbs/build.py b/cfbs/build.py index a77ca2d9..03276f30 100644 --- a/cfbs/build.py +++ b/cfbs/build.py @@ -13,16 +13,20 @@ import os import logging as log import shutil +from cfbs.cfbs_config import CFBSConfig from cfbs.utils import ( canonify, cp, + cp_dry_overwrites, deduplicate_def_json, + file_diff_text, find, merge_json, mkdir, pad_right, read_json, rm, + save_file, sh, strip_left, touch, @@ -79,7 +83,7 @@ def _generate_augment(module_name, input_data): return augment -def _perform_replace_step(n, a, b, filename): +def _perform_replacement(n, a, b, filename): assert n and a and b and filename assert a not in b @@ -122,196 +126,244 @@ def _perform_replace_step(n, a, b, filename): raise CFBSExitError("Failed to write to '%s'" % (filename,)) -def _perform_build_step(module, i, step, max_length): - operation, args = split_build_step(step) - name = module["name"] - source = module["_directory"] - counter = module["_counter"] - destination = "out/masterfiles" - - prefix = "%03d %s :" % (counter, pad_right(name, max_length)) - - assert operation in AVAILABLE_BUILD_STEPS # Should already be validated - if operation == "copy": - src, dst = args - if dst in [".", "./"]: - dst = "" - print("%s copy '%s' 'masterfiles/%s'" % (prefix, src, dst)) - src, dst = os.path.join(source, src), os.path.join(destination, dst) - cp(src, dst) - elif operation == "run": - shell_command = " ".join(args) - print("%s run '%s'" % (prefix, shell_command)) - sh(shell_command, source) - elif operation == "delete": - files = [args] if type(args) is str else args - assert len(files) > 0 - as_string = " ".join(["'%s'" % f for f in files]) - print("%s delete %s" % (prefix, as_string)) - for file in files: - if not rm(os.path.join(source, file), True): - print( - "Warning: tried to delete '%s' but path did not exist." - % os.path.join(source, file) - ) - elif operation == "json": - src, dst = args - if dst in [".", "./"]: - dst = "" - print("%s json '%s' 'masterfiles/%s'" % (prefix, src, dst)) - if not os.path.isfile(os.path.join(source, src)): - raise CFBSExitError("'%s' is not a file" % src) - src, dst = os.path.join(source, src), os.path.join(destination, dst) - extras, original = read_json(src), read_json(dst) - if not extras: - print("Warning: '%s' looks empty, adding nothing" % os.path.basename(src)) - if original: - merged = merge_json(original, extras) - if os.path.basename(dst) == "def.json": - merged = deduplicate_def_json(merged) +def _perform_copy_step(args, source, destination, prefix): + src, dst = args + if dst in [".", "./"]: + dst = "" + print("%s copy '%s' 'masterfiles/%s'" % (prefix, src, dst)) + src, dst = os.path.join(source, src), os.path.join(destination, dst) + + step_diffs_data = "" + + noop_overwrites_relpaths, modifying_overwrites_relpaths = cp_dry_overwrites( + src, dst + ) + for file_relpath in modifying_overwrites_relpaths: + if os.path.isfile(src): + fileA = src else: - merged = extras - write_json(dst, merged) - elif operation == "append": - src, dst = args - if dst in [".", "./"]: - dst = "" - print("%s append '%s' 'masterfiles/%s'" % (prefix, src, dst)) - src, dst = os.path.join(source, src), os.path.join(destination, dst) - if not os.path.exists(dst): - touch(dst) - assert os.path.isfile(dst) - sh("cat '%s' >> '%s'" % (src, dst)) - elif operation == "directory": - src, dst = args - if dst in [".", "./"]: - dst = "" - print("{} directory '{}' 'masterfiles/{}'".format(prefix, src, dst)) - dstarg = dst # save this for adding .cf files to inputs - src, dst = os.path.join(source, src), os.path.join(destination, dst) - defjson = os.path.join(destination, "def.json") - merged = read_json(defjson) - if not merged: - merged = {} - for root, _, files in os.walk(src): - for f in files: - if f == "def.json": - extra = read_json(os.path.join(root, f)) - if extra: - merged = merge_json(merged, extra) - merged = deduplicate_def_json(merged) - else: - s = os.path.join(root, f) - d = os.path.join(destination, dstarg, root[len(src) :], f) - log.debug("Copying '%s' to '%s'" % (s, d)) - cp(s, d) - write_json(defjson, merged) - elif operation == "input": - src, dst = args - if dst in [".", "./"]: - dst = "" - print("%s input '%s' 'masterfiles/%s'" % (prefix, src, dst)) - if src.startswith(name + "/"): - log.warning( - "Deprecated 'input' build step behavior - it should be: 'input ./input.json def.json'" - ) - # We'll translate it to what it should be - # TODO: Consider removing this behavior for cfbs 4? - src = "." + src[len(name) :] - src = os.path.join(name, src) - dst = os.path.join(destination, dst) - if not os.path.isfile(os.path.join(src)): - log.warning( - "Input data '%s' does not exist: Skipping build step." - % os.path.basename(src) - ) - return - extras, original = read_json(src), read_json(dst) - extras = _generate_augment(name, extras) - log.debug("Generated augment: %s", pretty(extras)) - if not extras: - raise CFBSExitError( - "Input data '%s' is incomplete: Skipping build step." - % os.path.basename(src) - ) - if original: - log.debug("Original def.json: %s", pretty(original)) - merged = merge_json(original, extras) - merged = deduplicate_def_json(merged) + fileA = os.path.join(src, file_relpath) + if os.path.isfile(dst): + fileB = dst else: - merged = extras - log.debug("Merged def.json: %s", pretty(merged)) - write_json(dst, merged) - elif operation == "policy_files": - files = [] - for file in args: - if file.startswith("./"): - file = file[2:] - if file.endswith(".cf"): - files.append(file) - elif file.endswith("/"): - cf_files = find("out/masterfiles/" + file, extension=".cf") - files += (strip_left(f, "out/masterfiles/") for f in cf_files) - else: - raise CFBSExitError( - "Unsupported filetype '%s' for build step '%s': " - % (file, operation) - + "Expected directory (*/) of policy file (*.cf)" - ) - print("%s policy_files '%s'" % (prefix, "' '".join(files) if files else "")) - augment = {"inputs": files} - log.debug("Generated augment: %s" % pretty(augment)) - path = os.path.join(destination, "def.json") - original = read_json(path) - log.debug("Original def.json: %s" % pretty(original)) - if original: - merged = merge_json(original, augment) - merged = deduplicate_def_json(merged) + fileB = os.path.join(dst, file_relpath) + file_diff_data = file_diff_text(fileA, fileB) + step_diffs_data += file_diff_data + if len(noop_overwrites_relpaths) > 0: + warning_message = ( + "Identical file overwrites occured during copy.\n" + + " Check your modules and their build steps to ascertain whether this is intentional.\n" + + " In most cases, the cause is a file from a latter module already being provided by an earlier module (commonly stock masterfiles).\n" + + " In that case, the file is best deleted from the latter module(s).\n" + + " Identical overwrites count: %s\n" % len(noop_overwrites_relpaths) + ) + # display affected files, without flooding the output + if len(noop_overwrites_relpaths) < 20: + for overwrite_noop in noop_overwrites_relpaths: + warning_message += " " + overwrite_noop + "\n" else: - merged = augment - log.debug("Merged def.json: %s", pretty(merged)) - write_json(path, merged) - elif operation == "bundles": - bundles = args - print("%s bundles '%s'" % (prefix, "' '".join(bundles) if bundles else "")) - augment = {"vars": {"control_common_bundlesequence_end": bundles}} - log.debug("Generated augment: %s" % pretty(augment)) - path = os.path.join(destination, "def.json") - original = read_json(path) - log.debug("Original def.json: %s" % pretty(original)) - if original: - merged = merge_json(original, augment) + for overwrite_noop in noop_overwrites_relpaths[:9]: + warning_message += " " + overwrite_noop + "\n" + warning_message += " ...\n" + for overwrite_noop in noop_overwrites_relpaths[-9:]: + warning_message += " " + overwrite_noop + "\n" + # display all the messages as one warning + log.warning(warning_message) + cp(src, dst) + + return step_diffs_data + + +def _perform_run_step(args, source, prefix): + shell_command = " ".join(args) + print("%s run '%s'" % (prefix, shell_command)) + sh(shell_command, source) + + +def _perform_delete_step(args, source, prefix): + files = [args] if type(args) is str else args + assert len(files) > 0 + as_string = " ".join(["'%s'" % f for f in files]) + print("%s delete %s" % (prefix, as_string)) + for file in files: + if not rm(os.path.join(source, file), True): + print( + "Warning: tried to delete '%s' but path did not exist." + % os.path.join(source, file) + ) + + +def _perform_json_step(args, source, destination, prefix): + src, dst = args + if dst in [".", "./"]: + dst = "" + print("%s json '%s' 'masterfiles/%s'" % (prefix, src, dst)) + if not os.path.isfile(os.path.join(source, src)): + raise CFBSExitError("'%s' is not a file" % src) + src, dst = os.path.join(source, src), os.path.join(destination, dst) + extras, original = read_json(src), read_json(dst) + if not extras: + print("Warning: '%s' looks empty, adding nothing" % os.path.basename(src)) + if original: + merged = merge_json(original, extras) + if os.path.basename(dst) == "def.json": merged = deduplicate_def_json(merged) + else: + merged = extras + write_json(dst, merged) + + +def _perform_append_step(args, source, destination, prefix): + src, dst = args + if dst in [".", "./"]: + dst = "" + print("%s append '%s' 'masterfiles/%s'" % (prefix, src, dst)) + src, dst = os.path.join(source, src), os.path.join(destination, dst) + if not os.path.exists(dst): + touch(dst) + assert os.path.isfile(dst) + sh("cat '%s' >> '%s'" % (src, dst)) + + +def _perform_directory_step(args, source, destination, prefix): + src, dst = args + if dst in [".", "./"]: + dst = "" + print("{} directory '{}' 'masterfiles/{}'".format(prefix, src, dst)) + dstarg = dst # save this for adding .cf files to inputs + src, dst = os.path.join(source, src), os.path.join(destination, dst) + defjson = os.path.join(destination, "def.json") + merged = read_json(defjson) + if not merged: + merged = {} + for root, _, files in os.walk(src): + for f in files: + if f == "def.json": + extra = read_json(os.path.join(root, f)) + if extra: + merged = merge_json(merged, extra) + merged = deduplicate_def_json(merged) + else: + s = os.path.join(root, f) + d = os.path.join(destination, dstarg, root[len(src) :], f) + log.debug("Copying '%s' to '%s'" % (s, d)) + cp(s, d) + write_json(defjson, merged) + + +def _perform_input_step(args, name, destination, prefix): + src, dst = args + if dst in [".", "./"]: + dst = "" + print("%s input '%s' 'masterfiles/%s'" % (prefix, src, dst)) + if src.startswith(name + "/"): + log.warning( + "Deprecated 'input' build step behavior - it should be: 'input ./input.json def.json'" + ) + # We'll translate it to what it should be + # TODO: Consider removing this behavior for cfbs 4? + src = "." + src[len(name) :] + src = os.path.join(name, src) + dst = os.path.join(destination, dst) + if not os.path.isfile(os.path.join(src)): + log.warning( + "Input data '%s' does not exist: Skipping build step." + % os.path.basename(src) + ) + return + extras, original = read_json(src), read_json(dst) + extras = _generate_augment(name, extras) + log.debug("Generated augment: %s", pretty(extras)) + if not extras: + raise CFBSExitError( + "Input data '%s' is incomplete: Skipping build step." + % os.path.basename(src) + ) + if original: + log.debug("Original def.json: %s", pretty(original)) + merged = merge_json(original, extras) + merged = deduplicate_def_json(merged) + else: + merged = extras + log.debug("Merged def.json: %s", pretty(merged)) + write_json(dst, merged) + + +def _perform_policy_files_step(args, destination, prefix): + files = [] + for file in args: + if file.startswith("./"): + file = file[2:] + if file.endswith(".cf"): + files.append(file) + elif file.endswith("/"): + cf_files = find("out/masterfiles/" + file, extension=".cf") + files += (strip_left(f, "out/masterfiles/") for f in cf_files) else: - merged = augment - log.debug("Merged def.json: %s", pretty(merged)) - write_json(path, merged) - elif operation == "replace": - assert len(args) == 4 - print("%s replace '%s'" % (prefix, "' '".join(args))) - # New build step so let's be a bit strict about validating it: - validate_build_step(name, module, i, operation, args, strict=True) - n, a, b, file = args - file = os.path.join(destination, file) - _perform_replace_step(n, a, b, file) - elif operation == "replace_version": - assert len(args) == 3 - # New build step so let's be a bit strict about validating it: - validate_build_step(name, module, i, operation, args, strict=True) - print("%s replace_version '%s'" % (prefix, "' '".join(args))) - n = args[0] - to_replace = args[1] - filename = os.path.join(destination, args[2]) - version = module["version"] - _perform_replace_step(n, to_replace, version, filename) - - -def perform_build(config) -> int: + raise CFBSExitError( + "Unsupported filetype '%s' for build step 'policy_files': " % file + + "Expected directory (*/) of policy file (*.cf)" + ) + print("%s policy_files '%s'" % (prefix, "' '".join(files) if files else "")) + augment = {"inputs": files} + log.debug("Generated augment: %s" % pretty(augment)) + path = os.path.join(destination, "def.json") + original = read_json(path) + log.debug("Original def.json: %s" % pretty(original)) + if original: + merged = merge_json(original, augment) + merged = deduplicate_def_json(merged) + else: + merged = augment + log.debug("Merged def.json: %s", pretty(merged)) + write_json(path, merged) + + +def _perform_bundles_step(args, prefix, destination): + bundles = args + print("%s bundles '%s'" % (prefix, "' '".join(bundles) if bundles else "")) + augment = {"vars": {"control_common_bundlesequence_end": bundles}} + log.debug("Generated augment: %s" % pretty(augment)) + path = os.path.join(destination, "def.json") + original = read_json(path) + log.debug("Original def.json: %s" % pretty(original)) + if original: + merged = merge_json(original, augment) + merged = deduplicate_def_json(merged) + else: + merged = augment + log.debug("Merged def.json: %s", pretty(merged)) + write_json(path, merged) + + +def _perform_replace_step(module, i, args, name, destination, prefix): + assert len(args) == 4 + print("%s replace '%s'" % (prefix, "' '".join(args))) + # New build step so let's be a bit strict about validating it: + validate_build_step(name, module, i, "replace", args, strict=True) + n, a, b, file = args + file = os.path.join(destination, file) + _perform_replacement(n, a, b, file) + + +def _perform_replace_version_step(module, i, args, name, destination, prefix): + assert len(args) == 3 + # New build step so let's be a bit strict about validating it: + validate_build_step(name, module, i, "replace_version", args, strict=True) + print("%s replace_version '%s'" % (prefix, "' '".join(args))) + n = args[0] + to_replace = args[1] + filename = os.path.join(destination, args[2]) + version = module["version"] + _perform_replacement(n, to_replace, version, filename) + + +def perform_build(config: CFBSConfig, diffs_filename=None) -> int: if not config.get("build"): raise CFBSExitError("No 'build' key found in the configuration") # mini-validation - for module in config.get("build", []): + for module in config["build"]: for step in module["steps"]: operation, args = split_build_step(step) @@ -339,11 +391,58 @@ def perform_build(config) -> int: % (step, expected, actual) ) + diffs_data = "" + print("\nSteps:") - module_name_length = config.longest_module_key_length("name") - for module in config.get("build", []): + max_length = config.longest_module_key_length("name") + for module in config["build"]: for i, step in enumerate(module["steps"]): - _perform_build_step(module, i, step, module_name_length) + operation, args = split_build_step(step) + name = module["name"] + source = module["_directory"] + destination = "out/masterfiles" + + counter = module["_counter"] + prefix = "%03d %s :" % (counter, pad_right(name, max_length)) + + if operation == "copy": + step_diffs_data = _perform_copy_step(args, source, destination, prefix) + diffs_data += step_diffs_data + elif operation == "run": + _perform_run_step(args, source, prefix) + elif operation == "delete": + _perform_delete_step(args, source, prefix) + elif operation == "json": + _perform_json_step(args, source, destination, prefix) + elif operation == "append": + _perform_append_step(args, source, destination, prefix) + elif operation == "directory": + _perform_directory_step(args, source, destination, prefix) + elif operation == "input": + _perform_input_step(args, name, destination, prefix) + elif operation == "policy_files": + _perform_policy_files_step(args, destination, prefix) + elif operation == "bundles": + _perform_bundles_step(args, prefix, destination) + elif operation == "replace": + _perform_replace_step(module, i, args, name, destination, prefix) + elif operation == "replace_version": + _perform_replace_version_step( + module, i, args, name, destination, prefix + ) + + if diffs_filename is not None: + try: + print( + "\nWriting diffs of non-identical file overwrites to '%s'..." + % diffs_filename + ) + save_file(diffs_filename, diffs_data) + except IsADirectoryError: + log.warning( + "An existing directory was provided as the '--diffs' file path - writing the diffs file for the build failed - continuing build..." + ) + assert os.path.isdir("./out/masterfiles/") shutil.copyfile("./cfbs.json", "./out/masterfiles/cfbs.json") if os.path.isfile("out/masterfiles/def.json"): diff --git a/cfbs/cfbs.1 b/cfbs/cfbs.1 index 28441e99..baac7aca 100644 --- a/cfbs/cfbs.1 +++ b/cfbs/cfbs.1 @@ -1,9 +1,9 @@ -.TH CFBS "1" "2025\-08\-14" "cfbs" "CFEngine Build System manual" +.TH CFBS "1" "2025\-10\-03" "cfbs" "CFEngine Build System manual" .SH NAME cfbs \- combines multiple modules into 1 policy set to deploy on your infrastructure. Modules can be custom promise types, JSON files which enable certain functionality, or reusable CFEngine policy. The modules you use can be written by the CFEngine team, others in the community, your colleagues, or yourself. .SH SYNOPSIS .B cfbs -[-h] [--loglevel LOGLEVEL] [-M] [--version] [--force] [--non-interactive] [--index INDEX] [--check] [--checksum CHECKSUM] [--keep-order] [--git {yes,no}] [--git-user-name GIT_USER_NAME] [--git-user-email GIT_USER_EMAIL] [--git-commit-message GIT_COMMIT_MESSAGE] [--ignore-versions-json] [--omit-download] [--check-against-git] [--from MINIMUM_VERSION] [--to-json [TO_JSON]] [--reference-version REFERENCE_VERSION] [--masterfiles-dir MASTERFILES_DIR] [--ignored-path-components [IGNORED_PATH_COMPONENTS ...]] [--offline] [--masterfiles MASTERFILES] [cmd] [args ...] +[-h] [--loglevel LOGLEVEL] [-M] [--version] [--force] [--non-interactive] [--index INDEX] [--check] [--checksum CHECKSUM] [--keep-order] [--git {yes,no}] [--git-user-name GIT_USER_NAME] [--git-user-email GIT_USER_EMAIL] [--git-commit-message GIT_COMMIT_MESSAGE] [--ignore-versions-json] [--diffs [DIFFS]] [--omit-download] [--check-against-git] [--from MINIMUM_VERSION] [--to-json [TO_JSON]] [--reference-version REFERENCE_VERSION] [--masterfiles-dir MASTERFILES_DIR] [--ignored-path-components [IGNORED_PATH_COMPONENTS ...]] [--offline] [--masterfiles MASTERFILES] [cmd] [args ...] .SH DESCRIPTION CFEngine Build System. @@ -72,6 +72,10 @@ Specify git commit message \fB\-\-ignore\-versions\-json\fR Ignore versions.json. Necessary in case of a custom index or testing changes to the default index. +.TP +\fB\-\-diffs\fR \fI\,[DIFFS]\/\fR +Write diffs of files overwritten with a copy build step during 'cfbs build' to the specified file + .TP \fB\-\-omit\-download\fR Use existing masterfiles instead of downloading in 'cfbs generate\-release\-information' diff --git a/cfbs/commands.py b/cfbs/commands.py index 5a9cc164..8dab3250 100644 --- a/cfbs/commands.py +++ b/cfbs/commands.py @@ -802,16 +802,17 @@ def validate_command(paths=None, index_arg=None): return validate_config(config) -def _download_dependencies( - config, prefer_offline=False, redownload=False, ignore_versions=False -): +def _download_dependencies(config: CFBSConfig, redownload=False, ignore_versions=False): # TODO: This function should be split in 2: # 1. Code for downloading things into ~/.cfengine # 2. Code for copying things into ./out print("\nModules:") counter = 1 max_length = config.longest_module_key_length("name") - for module in config.get("build", []): + build = config.get("build") + if build is None: + return + for module in build: name = module["name"] if name.startswith("./"): local_module_copy(module, counter, max_length) @@ -901,7 +902,7 @@ def download_command(force, ignore_versions=False): @cfbs_command("build") -def build_command(ignore_versions=False): +def build_command(ignore_versions=False, diffs_filename=None): config = CFBSConfig.get_instance() r = validate_config(config) if r != 0: @@ -913,8 +914,8 @@ def build_command(ignore_versions=False): # We want the cfbs build command to be as backwards compatible as possible, # so we try building anyway and don't return error(s) init_out_folder() - _download_dependencies(config, prefer_offline=True, ignore_versions=ignore_versions) - r = perform_build(config) + _download_dependencies(config, ignore_versions=ignore_versions) + r = perform_build(config, diffs_filename) return r diff --git a/cfbs/main.py b/cfbs/main.py index b5b6b82c..f2f1b35b 100644 --- a/cfbs/main.py +++ b/cfbs/main.py @@ -138,6 +138,11 @@ def _main() -> int: % args.command ) + if args.diffs and args.command != "build": + raise CFBSUserError( + "The option --diffs is only for 'cfbs build', not 'cfbs %s'" % args.command + ) + if args.non_interactive and args.command not in ( "init", "add", @@ -213,7 +218,9 @@ def _main() -> int: if args.command == "download": return commands.download_command(args.force) if args.command == "build": - return commands.build_command(ignore_versions=args.ignore_versions_json) + return commands.build_command( + ignore_versions=args.ignore_versions_json, diffs_filename=args.diffs + ) if args.command == "install": return commands.install_command(args.args) if args.command == "update": diff --git a/cfbs/utils.py b/cfbs/utils.py index 1f8f7c74..281fbde9 100644 --- a/cfbs/utils.py +++ b/cfbs/utils.py @@ -1,3 +1,5 @@ +import difflib +import logging as log import os import re import sys @@ -10,7 +12,7 @@ import urllib.error from collections import OrderedDict from shutil import rmtree -from typing import Iterable, Union +from typing import Iterable, List, Tuple, Union from cfbs.pretty import pretty @@ -84,7 +86,7 @@ def __init__(self, name_or_message, message=None) -> None: def _sh(cmd: str): # print(cmd) try: - subprocess.run( + return subprocess.run( cmd, shell=True, check=True, @@ -97,9 +99,8 @@ def _sh(cmd: str): def sh(cmd: str, directory=None): if directory: - _sh("cd %s && %s" % (directory, cmd)) - return - _sh("%s" % cmd) + return _sh("cd %s && %s" % (directory, cmd)) + return _sh(cmd) def display_diff(path_A, path_B): @@ -111,6 +112,24 @@ def display_diff(path_A, path_B): raise +def file_diff(filepath_A, filepath_B): + with open(filepath_A) as f: + lines_A = f.readlines() + with open(filepath_B) as f: + lines_B = f.readlines() + + u_diff = difflib.unified_diff(lines_A, lines_B, filepath_A, filepath_B) + + return u_diff + + +def file_diff_text(filepath_A, filepath_B): + u_diff = file_diff(filepath_A, filepath_B) + diff_text = "".join(u_diff) + + return diff_text + + def mkdir(path: str, exist_ok=True): os.makedirs(path, exist_ok=exist_ok) @@ -142,9 +161,68 @@ def cp(src, dst): if dst.endswith("/") and not os.path.exists(dst): mkdir(dst) if os.path.isfile(src): - sh("rsync -r %s %s" % (src, dst)) - return - sh("rsync -r %s/ %s" % (src, dst)) + return sh("rsync -r %s %s" % (src, dst)) + return sh("rsync -r %s/ %s" % (src, dst)) + + +def cp_dry_itemize(src: str, dst: str) -> List[Tuple[str, str]]: + """Emulates a dry run of `cfbs.utils.cp` and itemizes files to be copied. + + Returns a list of `rsync --itemize` strings (described in the rsync manual) with their corresponding paths. + """ + above = os.path.dirname(dst) + if not os.path.exists(above): + mkdir(above) + if dst.endswith("/") and not os.path.exists(dst): + mkdir(dst) + if os.path.isfile(src): + result = sh("rsync -rniic %s %s" % (src, dst)) + else: + result = sh("rsync -rniic %s/ %s" % (src, dst)) + lines = result.stdout.decode("utf-8").split("\n") + itemization = [] + for line in lines: + terms = line.split() + if len(terms) == 2: + item_string = terms[0] + file_relpath = terms[1] + itemization.append((item_string, file_relpath)) + elif len(terms) > 0: + log.debug("Abnormal rsync output line: '%s'" % terms) + + return itemization + + +def cp_dry_overwrites(src: str, dst: str) -> Tuple[List[str], List[str]]: + """Returns paths of files that would be overwritten by `cfbs.utils.cp`, grouping identical and non-identical file overwrites separately.""" + noop_overwrites_relpaths = [] + modifying_overwrites_relpaths = [] + + itemization = cp_dry_itemize(src, dst) + + for item_string, file_relpath in itemization: + if item_string[1] != "f": + # only consider regular files + continue + if item_string[0] == "." or len(item_string) < 3: + # the first character being a dot means that it's a no-op overwrite (possibly except file attributes) + # explanation for the `< 3` comparison: + # if all attributes are unchanged, the rsync item string will use spaces instead of dots and they will have been parsed away earlier + noop_overwrites_relpaths.append(file_relpath) + continue + if item_string[2] == "+": + # the copied file is new + continue + if item_string[2] == "c": + # the copied file has a different checksum + modifying_overwrites_relpaths.append(file_relpath) + continue + elif item_string[2] == ".": + # the copied regular file doesn't have a different checksum + noop_overwrites_relpaths.append(file_relpath) + log.debug("Novel item string: %s %s" % (item_string, file_relpath)) + + return noop_overwrites_relpaths, modifying_overwrites_relpaths def pad_left(s, n):