From d87ae7aaafeeda1da17741ef9178fe1fcfd661ce Mon Sep 17 00:00:00 2001 From: rbbtsn0w Date: Tue, 17 Mar 2026 14:04:25 +0800 Subject: [PATCH 1/2] feat(cli): polite deep merge for settings.json with json5 and safe atomic write --- CHANGELOG.md | 1 + pyproject.toml | 1 + src/specify_cli/__init__.py | 130 +++++++++++++++++++----- tests/test_merge.py | 190 ++++++++++++++++++++++++++++++++++++ 4 files changed, 297 insertions(+), 25 deletions(-) create mode 100644 tests/test_merge.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b1b6a47a0..a479928172 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- feat(cli): polite deep merge for VSCode settings.json with JSONC support via `json5` and zero-data-loss fallbacks - feat(presets): Pluggable preset system with preset catalog and template resolver - Preset manifest (`preset.yml`) with validation for artifact, command, and script types - `PresetManifest`, `PresetRegistry`, `PresetManager`, `PresetCatalog`, `PresetResolver` classes in `src/specify_cli/presets.py` diff --git a/pyproject.toml b/pyproject.toml index cdbad2e013..322abe3a57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ dependencies = [ "pyyaml>=6.0", "packaging>=23.0", "pathspec>=0.12.0", + "json5>=0.13.0", ] [project.scripts] diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index a45535aee4..6e11afdd80 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -7,6 +7,7 @@ # "platformdirs", # "readchar", # "httpx", +# "json5", # ] # /// """ @@ -32,6 +33,8 @@ import shutil import shlex import json +import json5 +import stat import yaml from pathlib import Path from typing import Any, Optional, Tuple @@ -654,37 +657,78 @@ def init_git_repo(project_path: Path, quiet: bool = False) -> Tuple[bool, Option os.chdir(original_cwd) def handle_vscode_settings(sub_item, dest_file, rel_path, verbose=False, tracker=None) -> None: - """Handle merging or copying of .vscode/settings.json files.""" + """Handle merging or copying of .vscode/settings.json files. + + Note: when merge produces changes, rewritten output is normalized JSON and + existing JSONC comments/trailing commas are not preserved. + """ def log(message, color="green"): if verbose and not tracker: console.print(f"[{color}]{message}[/] {rel_path}") + def atomic_write_json(target_file: Path, payload: dict[str, Any]) -> None: + """Atomically write JSON while preserving existing mode bits when possible.""" + fd, temp_path = tempfile.mkstemp( + dir=target_file.parent, + prefix=f"{target_file.name}.", + suffix=".tmp", + ) + try: + with os.fdopen(fd, 'w', encoding='utf-8') as f: + json.dump(payload, f, indent=4) + f.write('\n') + + if target_file.exists(): + try: + existing_stat = target_file.stat() + os.chmod(temp_path, stat.S_IMODE(existing_stat.st_mode)) + if hasattr(os, "chown"): + try: + os.chown(temp_path, existing_stat.st_uid, existing_stat.st_gid) + except PermissionError: + # Best-effort owner/group preservation without requiring elevated privileges. + pass + except OSError: + # Best-effort metadata preservation; data safety is prioritized. + pass + + os.replace(temp_path, target_file) + except Exception: + if os.path.exists(temp_path): + os.unlink(temp_path) + raise + try: with open(sub_item, 'r', encoding='utf-8') as f: - new_settings = json.load(f) + # json5 natively supports comments and trailing commas (JSONC) + new_settings = json5.load(f) if dest_file.exists(): merged = merge_json_files(dest_file, new_settings, verbose=verbose and not tracker) - with open(dest_file, 'w', encoding='utf-8') as f: - json.dump(merged, f, indent=4) - f.write('\n') - log("Merged:", "green") + if merged is not None: + atomic_write_json(dest_file, merged) + log("Merged:", "green") + log("Note: comments/trailing commas are normalized when rewritten", "yellow") + else: + log("Skipped merge (preserved existing settings)", "yellow") else: shutil.copy2(sub_item, dest_file) log("Copied (no existing settings.json):", "blue") except Exception as e: - log(f"Warning: Could not merge, copying instead: {e}", "yellow") - shutil.copy2(sub_item, dest_file) + log(f"Warning: Could not merge settings: {e}", "yellow") + if not dest_file.exists(): + shutil.copy2(sub_item, dest_file) + -def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = False) -> dict: +def merge_json_files(existing_path: Path, new_content: Any, verbose: bool = False) -> Optional[dict[str, Any]]: """Merge new JSON content into existing JSON file. - Performs a deep merge where: + Performs a polite deep merge where: - New keys are added - - Existing keys are preserved unless overwritten by new content + - Existing keys are PRESERVED (not overwritten) unless they are dictionaries - Nested dictionaries are merged recursively - - Lists and other values are replaced (not merged) + - Lists and other values are preserved from base if they exist Args: existing_path: Path to existing JSON file @@ -692,28 +736,64 @@ def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = Fal verbose: Whether to print merge details Returns: - Merged JSON content as dict + Merged JSON content as dict, or None if the existing file should be left untouched. """ - try: - with open(existing_path, 'r', encoding='utf-8') as f: - existing_content = json.load(f) - except (FileNotFoundError, json.JSONDecodeError): - # If file doesn't exist or is invalid, just use new content + # Load existing content first to have a safe fallback + existing_content = None + exists = existing_path.exists() + + if exists: + try: + with open(existing_path, 'r', encoding='utf-8') as f: + # Handle comments (JSONC) natively with json5 + # Note: json5 handles BOM automatically + existing_content = json5.load(f) + except FileNotFoundError: + # Handle race condition where file is deleted after exists() check + exists = False + except Exception as e: + if verbose: + console.print(f"[yellow]Warning: Could not read or parse existing JSON in {existing_path.name} ({e}).[/yellow]") + # Skip merge to preserve existing file if unparseable or inaccessible (e.g. PermissionError) + return None + + # Validate template content + if not isinstance(new_content, dict): + if verbose: + console.print(f"[yellow]Warning: Template content for {existing_path.name} is not a dictionary. Preserving existing settings.[/yellow]") + return None + + if not exists: return new_content - def deep_merge(base: dict, update: dict) -> dict: - """Recursively merge update dict into base dict.""" + # If existing content parsed but is not a dict, skip merge to avoid data loss + if not isinstance(existing_content, dict): + if verbose: + console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object. Skipping merge to avoid data loss.[/yellow]") + return None + + def deep_merge_polite(base: dict[str, Any], update: dict[str, Any]) -> dict[str, Any]: + """Recursively merge update dict into base dict, preserving base values.""" result = base.copy() for key, value in update.items(): - if key in result and isinstance(result[key], dict) and isinstance(value, dict): + if key not in result: + # Add new key + result[key] = value + elif isinstance(result[key], dict) and isinstance(value, dict): # Recursively merge nested dictionaries - result[key] = deep_merge(result[key], value) + result[key] = deep_merge_polite(result[key], value) else: - # Add new key or replace existing value - result[key] = value + # Key already exists and is not a dict, PRESERVE existing value + # This ensures user settings aren't overwritten by template defaults + pass return result - merged = deep_merge(existing_content, new_content) + merged = deep_merge_polite(existing_content, new_content) + + # Detect if anything actually changed. If not, return None so the caller + # can skip rewriting the file (preserving user's comments/formatting). + if merged == existing_content: + return None if verbose: console.print(f"[cyan]Merged JSON file:[/cyan] {existing_path.name}") diff --git a/tests/test_merge.py b/tests/test_merge.py new file mode 100644 index 0000000000..69f946ccc5 --- /dev/null +++ b/tests/test_merge.py @@ -0,0 +1,190 @@ +import stat + +from specify_cli import merge_json_files +from specify_cli import handle_vscode_settings + +# --- Dimension 2: Polite Deep Merge Strategy --- + +def test_merge_json_files_type_mismatch_preservation(tmp_path): + """If user has a string but template wants a dict, PRESERVE user's string.""" + existing_file = tmp_path / "settings.json" + # User might have overridden a setting with a simple string or different type + existing_file.write_text('{"chat.editor.fontFamily": "CustomFont"}') + + # Template might expect a dict for the same key (hypothetically) + new_settings = { + "chat.editor.fontFamily": {"font": "TemplateFont"} + } + + merged = merge_json_files(existing_file, new_settings) + # Result is None because user settings were preserved and nothing else changed + assert merged is None + +def test_merge_json_files_deep_nesting(tmp_path): + """Verify deep recursive merging of new keys.""" + existing_file = tmp_path / "settings.json" + existing_file.write_text(""" + { + "a": { + "b": { + "c": 1 + } + } + } + """) + + new_settings = { + "a": { + "b": { + "d": 2 # New nested key + }, + "e": 3 # New mid-level key + } + } + + merged = merge_json_files(existing_file, new_settings) + assert merged["a"]["b"]["c"] == 1 + assert merged["a"]["b"]["d"] == 2 + assert merged["a"]["e"] == 3 + +def test_merge_json_files_empty_existing(tmp_path): + """Merging into an empty/new file.""" + existing_file = tmp_path / "empty.json" + existing_file.write_text("{}") + + new_settings = {"a": 1} + merged = merge_json_files(existing_file, new_settings) + assert merged == {"a": 1} + +# --- Dimension 3: Real-world Simulation --- + +def test_merge_vscode_realistic_scenario(tmp_path): + """A realistic VSCode settings.json with many existing preferences, comments, and trailing commas.""" + existing_file = tmp_path / "vscode_settings.json" + existing_file.write_text(""" + { + "editor.fontSize": 12, + "editor.formatOnSave": true, /* block comment */ + "files.exclude": { + "**/.git": true, + "**/node_modules": true, + }, + "chat.promptFilesRecommendations": { + "existing.tool": true, + } // User comment + } + """) + + template_settings = { + "chat.promptFilesRecommendations": { + "speckit.specify": True, + "speckit.plan": True + }, + "chat.tools.terminal.autoApprove": { + ".specify/scripts/bash/": True + } + } + + merged = merge_json_files(existing_file, template_settings) + + # Check preservation + assert merged["editor.fontSize"] == 12 + assert merged["files.exclude"]["**/.git"] is True + assert merged["chat.promptFilesRecommendations"]["existing.tool"] is True + + # Check additions + assert merged["chat.promptFilesRecommendations"]["speckit.specify"] is True + assert merged["chat.tools.terminal.autoApprove"][".specify/scripts/bash/"] is True + +# --- Dimension 4: Error Handling & Robustness --- + +def test_merge_json_files_with_bom(tmp_path): + """Test files with UTF-8 BOM (sometimes created on Windows).""" + existing_file = tmp_path / "bom.json" + content = '{"a": 1}' + # Prepend UTF-8 BOM + existing_file.write_bytes(b'\xef\xbb\xbf' + content.encode('utf-8')) + + new_settings = {"b": 2} + merged = merge_json_files(existing_file, new_settings) + assert merged == {"a": 1, "b": 2} + +def test_merge_json_files_not_a_dictionary_template(tmp_path): + """If for some reason new_content is not a dict, PRESERVE existing settings by returning None.""" + existing_file = tmp_path / "ok.json" + existing_file.write_text('{"a": 1}') + + # Secure fallback: return None to skip writing and avoid clobbering + assert merge_json_files(existing_file, ["not", "a", "dict"]) is None + +def test_merge_json_files_unparseable_existing(tmp_path): + """If the existing file is unparseable JSON, return None to avoid overwriting it.""" + bad_file = tmp_path / "bad.json" + bad_file.write_text('{"a": 1, missing_value}') # Invalid JSON + + assert merge_json_files(bad_file, {"b": 2}) is None + + +def test_merge_json_files_list_preservation(tmp_path): + """Verify that existing list values are preserved and NOT merged or overwritten.""" + existing_file = tmp_path / "list.json" + existing_file.write_text('{"my.list": ["user_item"]}') + + template_settings = { + "my.list": ["template_item"] + } + + merged = merge_json_files(existing_file, template_settings) + # The polite merge policy says: keep existing values if they exist and aren't both dicts. + # Since nothing changed, it returns None. + assert merged is None + +def test_merge_json_files_no_changes(tmp_path): + """If the merge doesn't introduce any new keys or changes, return None to skip rewrite.""" + existing_file = tmp_path / "no_change.json" + existing_file.write_text('{"a": 1, "b": {"c": 2}}') + + template_settings = { + "a": 1, # Already exists + "b": {"c": 2} # Already exists nested + } + + # Should return None because result == existing + assert merge_json_files(existing_file, template_settings) is None + +def test_merge_json_files_type_mismatch_no_op(tmp_path): + """If a key exists with different type and we preserve it, it might still result in no change.""" + existing_file = tmp_path / "mismatch_no_op.json" + existing_file.write_text('{"a": "user_string"}') + + template_settings = { + "a": {"key": "template_dict"} # Mismatch, will be ignored + } + + # Should return None because we preserved the user's string and nothing else changed + assert merge_json_files(existing_file, template_settings) is None + + +def test_handle_vscode_settings_preserves_mode_on_atomic_write(tmp_path): + """Atomic rewrite should preserve existing file mode bits.""" + vscode_dir = tmp_path / ".vscode" + vscode_dir.mkdir() + dest_file = vscode_dir / "settings.json" + template_file = tmp_path / "template_settings.json" + + dest_file.write_text('{"a": 1}\n', encoding="utf-8") + dest_file.chmod(0o640) + before_mode = stat.S_IMODE(dest_file.stat().st_mode) + + template_file.write_text('{"b": 2}\n', encoding="utf-8") + + handle_vscode_settings( + template_file, + dest_file, + "settings.json", + verbose=False, + tracker=None, + ) + + after_mode = stat.S_IMODE(dest_file.stat().st_mode) + assert after_mode == before_mode From 2f8b035627d7c5671c21b7cb2d9e30b03cec50b2 Mon Sep 17 00:00:00 2001 From: rbbtsn0w Date: Tue, 17 Mar 2026 21:25:05 +0800 Subject: [PATCH 2/2] fix(cli): prevent temp fd leak and align merge-policy docs --- src/specify_cli/__init__.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 6e11afdd80..c3e1685a98 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -668,13 +668,17 @@ def log(message, color="green"): def atomic_write_json(target_file: Path, payload: dict[str, Any]) -> None: """Atomically write JSON while preserving existing mode bits when possible.""" - fd, temp_path = tempfile.mkstemp( - dir=target_file.parent, - prefix=f"{target_file.name}.", - suffix=".tmp", - ) + temp_path: Optional[Path] = None try: - with os.fdopen(fd, 'w', encoding='utf-8') as f: + with tempfile.NamedTemporaryFile( + mode='w', + encoding='utf-8', + dir=target_file.parent, + prefix=f"{target_file.name}.", + suffix=".tmp", + delete=False, + ) as f: + temp_path = Path(f.name) json.dump(payload, f, indent=4) f.write('\n') @@ -694,8 +698,8 @@ def atomic_write_json(target_file: Path, payload: dict[str, Any]) -> None: os.replace(temp_path, target_file) except Exception: - if os.path.exists(temp_path): - os.unlink(temp_path) + if temp_path and temp_path.exists(): + temp_path.unlink() raise try: @@ -726,8 +730,8 @@ def merge_json_files(existing_path: Path, new_content: Any, verbose: bool = Fals Performs a polite deep merge where: - New keys are added - - Existing keys are PRESERVED (not overwritten) unless they are dictionaries - - Nested dictionaries are merged recursively + - Existing keys are preserved (not overwritten) unless both values are dictionaries + - Nested dictionaries are merged recursively only when both sides are dictionaries - Lists and other values are preserved from base if they exist Args: @@ -783,8 +787,8 @@ def deep_merge_polite(base: dict[str, Any], update: dict[str, Any]) -> dict[str, # Recursively merge nested dictionaries result[key] = deep_merge_polite(result[key], value) else: - # Key already exists and is not a dict, PRESERVE existing value - # This ensures user settings aren't overwritten by template defaults + # Key already exists and values are not both dicts; preserve existing value. + # This ensures user settings aren't overwritten by template defaults. pass return result