From 984c9e7b2c2808c291d269130a13c8b93e32fe8d Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 16 Feb 2026 11:55:15 +0000 Subject: [PATCH] Add include/exclude override system for config-driven path re-inclusion Implement config-driven include overrides and profile-scoped include/exclude extensions so users can re-include paths normally excluded by ignore_patterns, .gitignore, or default excludes without editing .gitignore or weakening global excludes. Changes: - config.py: Add include_patterns to DEFAULT_CONFIG, validate new fields (include_patterns, profile additional_excludes/additional_includes) - core.py: Add included_patterns support to DumpSession with _is_force_included helper that checks both pathspec matching and directory ancestor traversal - engine.py: Merge top-level and profile include/exclude lists in _initialize_session, pass effective includes to DumpSession - Add 45 comprehensive tests covering validation, behavior, engine integration, regression, and force-include helper logic https://claude.ai/code/session_01U9Lzb7fYHLCXcBuWao1px1 --- .dump_config.json | 1 + src/dumpcode/config.py | 32 +- src/dumpcode/core.py | 83 ++- src/dumpcode/engine.py | 24 +- tests/test_include_exclude_overrides.py | 900 ++++++++++++++++++++++++ 5 files changed, 1021 insertions(+), 19 deletions(-) create mode 100644 tests/test_include_exclude_overrides.py diff --git a/.dump_config.json b/.dump_config.json index dd8156f..486707a 100644 --- a/.dump_config.json +++ b/.dump_config.json @@ -20,6 +20,7 @@ ".ruff_cache", "ai_response.md" ], + "include_patterns": [], "profiles": { "readme": { "description": "Generate a professional, architect-level README.md for the current project", diff --git a/src/dumpcode/config.py b/src/dumpcode/config.py index 9c761a6..dceb4c3 100644 --- a/src/dumpcode/config.py +++ b/src/dumpcode/config.py @@ -17,6 +17,7 @@ ".mypy_cache", ".ruff_cache", "ai_response.md", ], + "include_patterns": [], "profiles": DEFAULT_PROFILES, "use_xml": True } @@ -24,10 +25,10 @@ def validate_config(config: Dict) -> bool: """Basic structural check for configuration. - + Args: config: Configuration dictionary to validate - + Returns: True if config has valid structure, False otherwise """ @@ -36,20 +37,35 @@ def validate_config(config: Dict) -> bool: if not isinstance(config["version"], int): return False - + + if "include_patterns" in config: + if not isinstance(config["include_patterns"], list): + return False + profiles = config.get("profiles") if not isinstance(profiles, dict): return False - + for name, body in profiles.items(): if not isinstance(body, dict): return False - - valid_keys = {"description", "pre", "post", "run_commands"} - + + valid_keys = { + "description", "pre", "post", "run_commands", + "additional_excludes", "additional_includes", + } + if not any(key in body for key in valid_keys): return False - + + if "additional_excludes" in body: + if not isinstance(body["additional_excludes"], list): + return False + + if "additional_includes" in body: + if not isinstance(body["additional_includes"], list): + return False + return True diff --git a/src/dumpcode/core.py b/src/dumpcode/core.py index 231a82c..a24547d 100644 --- a/src/dumpcode/core.py +++ b/src/dumpcode/core.py @@ -1,5 +1,6 @@ """Core dumping logic, file system traversal, and session management.""" +import fnmatch import os from dataclasses import dataclass from pathlib import Path @@ -132,6 +133,7 @@ def __init__( max_depth: Optional[int], dir_only: bool, git_changed_only: bool = False, + included_patterns: Optional[List[str]] = None, ) -> None: """Initialize the session with scanning constraints. @@ -141,12 +143,14 @@ def __init__( max_depth: Depth limit for directory traversal. dir_only: If True, skip file contents. git_changed_only: If True, only include files modified in git. + included_patterns: Patterns that override exclusions (force-include). """ self.root_path = root_path self.excluded_patterns = excluded_patterns self.max_depth = max_depth self.dir_only = dir_only self.git_changed_only = git_changed_only + self.included_patterns = list(included_patterns) if included_patterns else [] self.dir_count = 0 self.file_count = 0 @@ -155,6 +159,7 @@ def __init__( self.skipped_files: List[Dict[str, str]] = [] self.visited_paths: Set[Path] = set() self.matcher = self._create_combined_matcher(root_path, excluded_patterns) + self.include_matcher = self._create_include_matcher(self.included_patterns) def _load_gitignore_lines(self, root_path: Path) -> List[str]: """Load .gitignore file lines. @@ -201,6 +206,63 @@ def _create_combined_matcher(self, root_path: Path, excluded_patterns: Set[str]) except ImportError: return None + def _create_include_matcher(self, included_patterns: List[str]) -> Any: + """Create a pathspec matcher for include override patterns. + + Args: + included_patterns: List of glob patterns to force-include. + + Returns: + A pathspec.PathSpec instance if pathspec is available, otherwise None. + """ + if not included_patterns: + return None + + try: + import pathspec + return pathspec.PathSpec.from_lines('gitignore', included_patterns) + except ImportError: + return None + + def _is_force_included(self, rel_path: str, is_dir: bool = False) -> bool: + """Check if a path should be force-included despite matching exclusion patterns. + + Checks direct pathspec matching for files and directories. For directories, + additionally checks whether the path is an ancestor of any include pattern + (to allow traversal into excluded directories that contain force-included files). + + Args: + rel_path: The relative path (POSIX format) to check. + is_dir: Whether the path is a directory. + + Returns: + True if the path matches any include override pattern. + """ + if self.include_matcher and self.include_matcher.match_file(rel_path): + return True + + if is_dir and self.included_patterns: + rel_parts = rel_path.split("/") + for pattern in self.included_patterns: + pattern_parts = pattern.split("/") + if len(pattern_parts) <= len(rel_parts) and "**" not in pattern_parts: + continue + match = True + for i, rel_part in enumerate(rel_parts): + if i >= len(pattern_parts): + match = False + break + pat_part = pattern_parts[i] + if pat_part == "**": + break + if not fnmatch.fnmatch(rel_part, pat_part): + match = False + break + if match: + return True + + return False + def log_skip(self, path: Path, reason: str) -> None: """Log a file that was skipped during processing. @@ -211,23 +273,30 @@ def log_skip(self, path: Path, reason: str) -> None: self.skipped_files.append({"path": str(path), "reason": reason}) def is_excluded(self, item_path: Path) -> bool: - """Check if a path should be ignored based on patterns and gitignore. + """Check if a path should be ignored based on patterns, gitignore, and includes. + + Evaluates exclusion rules first (built-in excludes, ignore_patterns, gitignore, + profile additional_excludes), then checks include overrides. Include patterns + are last-wins: if a path is excluded but matches an include pattern, it is + included. Args: item_path: The path to check for exclusion. Returns: - True if the path matches exclusion patterns, False otherwise. + True if the path matches exclusion patterns and is not force-included. """ if item_path.name == CONFIG_FILENAME: return True rel_path = item_path.relative_to(self.root_path).as_posix() - - if self.matcher: - return self.matcher.match_file(rel_path) - - return False + + excluded = self.matcher.match_file(rel_path) if self.matcher else False + + if excluded and self._is_force_included(rel_path, is_dir=item_path.is_dir()): + return False + + return excluded def generate_tree( self, diff --git a/src/dumpcode/engine.py b/src/dumpcode/engine.py index 9c2bd63..cbe239b 100644 --- a/src/dumpcode/engine.py +++ b/src/dumpcode/engine.py @@ -137,22 +137,38 @@ def _handle_ai_mode(self, output_file: Path, total_chars: int) -> None: def _initialize_session(self, output_file: Path) -> DumpSession: """Initialize the DumpSession with exclusion patterns and filesystem metadata. - + + Merges top-level ignore/include patterns with active profile's additional + excludes/includes to build effective lists passed to the session. + Args: output_file: Path to the target output file - + Returns: A configured DumpSession instance """ excluded = set(self.config.get("ignore_patterns", [])) self._exclude_output_file(output_file, excluded) - + + profile = self._get_active_profile() + if profile: + additional_excludes = profile.get("additional_excludes", []) + if isinstance(additional_excludes, list): + excluded.update(additional_excludes) + + effective_includes = list(self.config.get("include_patterns", [])) + if profile: + additional_includes = profile.get("additional_includes", []) + if isinstance(additional_includes, list): + effective_includes.extend(additional_includes) + return self.session_cls( self.settings.start_path, excluded, self.settings.max_depth, self.settings.dir_only, - self.settings.git_changed_only + self.settings.git_changed_only, + included_patterns=effective_includes, ) def _write_instructions_block(self, writer: DumpWriter, profile: Optional[Dict[str, Any]]) -> None: diff --git a/tests/test_include_exclude_overrides.py b/tests/test_include_exclude_overrides.py new file mode 100644 index 0000000..da99643 --- /dev/null +++ b/tests/test_include_exclude_overrides.py @@ -0,0 +1,900 @@ +"""Tests for the include/exclude override system. + +Covers: +- Config validation for new fields (include_patterns, additional_excludes, additional_includes) +- Behavior: include overrides re-include excluded paths +- Profile-scoped additional excludes/includes +- Directory traversal into excluded dirs containing included files +- Regression: existing behavior unchanged when new fields are absent/empty +""" + +import json +from pathlib import Path +from unittest.mock import patch + +import pytest + +from dumpcode.config import validate_config, load_or_create_config +from dumpcode.constants import CONFIG_FILENAME +from dumpcode.core import DumpSession, DumpSettings +from dumpcode.engine import DumpEngine + + +# --------------------------------------------------------------------------- +# 1. Config Validation Tests +# --------------------------------------------------------------------------- + +class TestIncludeExcludeValidation: + """Validate new config fields.""" + + def test_old_config_without_new_keys_is_valid(self): + """Existing configs (no include_patterns, no additional_*) remain valid.""" + config = { + "version": 1, + "profiles": {"readme": {"description": "Generate README"}} + } + assert validate_config(config) is True + + def test_include_patterns_list_is_valid(self): + config = { + "version": 1, + "include_patterns": ["results/**", "*.jsonl"], + "profiles": {"readme": {"description": "x"}} + } + assert validate_config(config) is True + + def test_include_patterns_empty_list_is_valid(self): + config = { + "version": 1, + "include_patterns": [], + "profiles": {"readme": {"description": "x"}} + } + assert validate_config(config) is True + + def test_include_patterns_non_list_is_invalid(self): + config = { + "version": 1, + "include_patterns": "results/**", + "profiles": {"readme": {"description": "x"}} + } + assert validate_config(config) is False + + def test_profile_additional_excludes_list_is_valid(self): + config = { + "version": 1, + "profiles": { + "debug": { + "description": "Debug", + "additional_excludes": ["logs/**"] + } + } + } + assert validate_config(config) is True + + def test_profile_additional_excludes_non_list_is_invalid(self): + config = { + "version": 1, + "profiles": { + "debug": { + "description": "Debug", + "additional_excludes": "logs/**" + } + } + } + assert validate_config(config) is False + + def test_profile_additional_includes_list_is_valid(self): + config = { + "version": 1, + "profiles": { + "debug": { + "description": "Debug", + "additional_includes": ["results/**"] + } + } + } + assert validate_config(config) is True + + def test_profile_additional_includes_non_list_is_invalid(self): + config = { + "version": 1, + "profiles": { + "debug": { + "description": "Debug", + "additional_includes": "results/**" + } + } + } + assert validate_config(config) is False + + def test_profile_with_only_additional_includes_is_valid(self): + """A profile having only additional_includes (no description/pre/post) is valid.""" + config = { + "version": 1, + "profiles": { + "include-only": { + "additional_includes": ["results/**"] + } + } + } + assert validate_config(config) is True + + def test_profile_with_only_additional_excludes_is_valid(self): + config = { + "version": 1, + "profiles": { + "exclude-only": { + "additional_excludes": ["logs/**"] + } + } + } + assert validate_config(config) is True + + def test_profile_empty_additional_lists_are_valid(self): + config = { + "version": 1, + "profiles": { + "empty": { + "description": "Empty overrides", + "additional_excludes": [], + "additional_includes": [] + } + } + } + assert validate_config(config) is True + + def test_full_config_with_all_new_fields(self): + """Full config matching the spec example is valid.""" + config = { + "version": 28, + "ignore_patterns": [".git", "__pycache__"], + "include_patterns": ["results/**", "results/**/*.jsonl"], + "profiles": { + "readme": { + "description": "Generate README", + "pre": ["..."], + "post": "...", + }, + "debug-results": { + "description": "Include run artifacts", + "additional_excludes": ["results/**/runner_logs/**"], + "additional_includes": [ + "results/**/run_report_*.md", + "results/**/orchestrator.log" + ] + } + }, + } + assert validate_config(config) is True + + +class TestIncludeExcludeConfigLoad: + """Test loading configs with new fields.""" + + def test_load_config_with_include_patterns(self, tmp_path): + config_data = { + "version": 5, + "ignore_patterns": ["*.pyc"], + "include_patterns": ["results/**"], + "profiles": {"test": {"description": "Test"}} + } + (tmp_path / CONFIG_FILENAME).write_text(json.dumps(config_data)) + + config = load_or_create_config(tmp_path) + assert config["include_patterns"] == ["results/**"] + + def test_load_config_missing_include_patterns_uses_default(self, tmp_path): + """Backward compat: missing include_patterns yields empty list from DEFAULT_CONFIG.""" + config_data = { + "version": 3, + "profiles": {"test": {"description": "Test"}} + } + (tmp_path / CONFIG_FILENAME).write_text(json.dumps(config_data)) + + config = load_or_create_config(tmp_path) + assert config.get("include_patterns") == [] + + +# --------------------------------------------------------------------------- +# 2. DumpSession Behavior Tests +# --------------------------------------------------------------------------- + +class TestDumpSessionIncludeOverride: + """Test that include patterns override exclusion rules.""" + + def test_include_overrides_ignore_patterns(self, tmp_path): + """Path excluded by ignore_patterns but in include_patterns is included.""" + results_dir = tmp_path / "results" + results_dir.mkdir() + report = results_dir / "report.md" + report.write_text("report content") + + session = DumpSession( + root_path=tmp_path, + excluded_patterns={"results"}, + max_depth=None, + dir_only=False, + included_patterns=["results/**"], + ) + + assert not session.is_excluded(results_dir) + assert not session.is_excluded(report) + + def test_include_overrides_gitignore(self, tmp_path): + """Path excluded by .gitignore but in include_patterns is included.""" + # Set up .gitignore + (tmp_path / ".gitignore").write_text("results/\n") + + results_dir = tmp_path / "results" + results_dir.mkdir() + data_file = results_dir / "data.jsonl" + data_file.write_text('{"key": "value"}') + + session = DumpSession( + root_path=tmp_path, + excluded_patterns=set(), + max_depth=None, + dir_only=False, + included_patterns=["results/**"], + ) + + assert not session.is_excluded(results_dir) + assert not session.is_excluded(data_file) + + def test_include_overrides_combined_excludes(self, tmp_path): + """Path excluded by both ignore_patterns and .gitignore is included by include_patterns.""" + (tmp_path / ".gitignore").write_text("dist/\n") + + dist_dir = tmp_path / "dist" + dist_dir.mkdir() + bundle = dist_dir / "bundle.js" + bundle.write_text("console.log('hi')") + + session = DumpSession( + root_path=tmp_path, + excluded_patterns={"dist"}, + max_depth=None, + dir_only=False, + included_patterns=["dist/bundle.js"], + ) + + assert not session.is_excluded(dist_dir) + assert not session.is_excluded(bundle) + + def test_config_filename_always_excluded(self, tmp_path): + """CONFIG_FILENAME is always excluded, even if matched by include patterns.""" + config_file = tmp_path / CONFIG_FILENAME + config_file.write_text("{}") + + session = DumpSession( + root_path=tmp_path, + excluded_patterns=set(), + max_depth=None, + dir_only=False, + included_patterns=[CONFIG_FILENAME], + ) + + assert session.is_excluded(config_file) + + def test_no_include_patterns_behaves_as_before(self, tmp_path): + """Without include patterns, behavior is unchanged.""" + results_dir = tmp_path / "results" + results_dir.mkdir() + (results_dir / "data.jsonl").write_text("{}") + + session = DumpSession( + root_path=tmp_path, + excluded_patterns={"results"}, + max_depth=None, + dir_only=False, + ) + + assert session.is_excluded(results_dir) + + def test_empty_include_patterns_behaves_as_before(self, tmp_path): + """Empty include patterns list has no effect.""" + results_dir = tmp_path / "results" + results_dir.mkdir() + + session = DumpSession( + root_path=tmp_path, + excluded_patterns={"results"}, + max_depth=None, + dir_only=False, + included_patterns=[], + ) + + assert session.is_excluded(results_dir) + + def test_include_does_not_affect_non_excluded_paths(self, tmp_path): + """Include patterns do not change the status of paths that are not excluded.""" + src_dir = tmp_path / "src" + src_dir.mkdir() + main_py = src_dir / "main.py" + main_py.write_text("pass") + + session = DumpSession( + root_path=tmp_path, + excluded_patterns=set(), + max_depth=None, + dir_only=False, + included_patterns=["src/**"], + ) + + assert not session.is_excluded(main_py) + + def test_include_specific_file_in_excluded_dir(self, tmp_path): + """Include a specific file inside an excluded directory.""" + logs_dir = tmp_path / "logs" + logs_dir.mkdir() + important = logs_dir / "important.log" + important.write_text("critical error") + debug = logs_dir / "debug.log" + debug.write_text("debug info") + + session = DumpSession( + root_path=tmp_path, + excluded_patterns={"logs"}, + max_depth=None, + dir_only=False, + included_patterns=["logs/important.log"], + ) + + # The directory should be traversable (ancestor of included file) + assert not session.is_excluded(logs_dir) + # The specific file should be included + assert not session.is_excluded(important) + # Other files in the directory should remain excluded + assert session.is_excluded(debug) + + def test_include_with_glob_pattern(self, tmp_path): + """Include via glob pattern (e.g., *.jsonl) inside excluded directory.""" + results_dir = tmp_path / "results" + results_dir.mkdir() + jsonl = results_dir / "output.jsonl" + jsonl.write_text("{}") + txt = results_dir / "notes.txt" + txt.write_text("notes") + + session = DumpSession( + root_path=tmp_path, + excluded_patterns={"results"}, + max_depth=None, + dir_only=False, + included_patterns=["results/*.jsonl"], + ) + + assert not session.is_excluded(results_dir) + assert not session.is_excluded(jsonl) + assert session.is_excluded(txt) + + def test_include_with_double_star_glob(self, tmp_path): + """Include with ** glob traverses nested directories.""" + results = tmp_path / "results" + run1 = results / "run1" + run1.mkdir(parents=True) + report = run1 / "report.md" + report.write_text("# Report") + log = run1 / "debug.log" + log.write_text("debug") + + session = DumpSession( + root_path=tmp_path, + excluded_patterns={"results"}, + max_depth=None, + dir_only=False, + included_patterns=["results/**/*.md"], + ) + + # Directories should be traversable + assert not session.is_excluded(results) + assert not session.is_excluded(run1) + # .md file should be included + assert not session.is_excluded(report) + # .log file should remain excluded + assert session.is_excluded(log) + + +class TestDumpSessionTreeWithIncludes: + """Test that generate_tree correctly includes force-included files.""" + + def test_tree_includes_force_included_files(self, tmp_path): + """Files in excluded dirs matched by include patterns appear in tree and files_to_dump.""" + results = tmp_path / "results" + results.mkdir() + data = results / "data.jsonl" + data.write_text('{"result": 1}') + + session = DumpSession( + root_path=tmp_path, + excluded_patterns={"results"}, + max_depth=None, + dir_only=False, + included_patterns=["results/**"], + ) + + session.generate_tree(tmp_path) + + dumped_names = [f.name for f in session.files_to_dump] + assert "data.jsonl" in dumped_names + assert session.file_count >= 1 + + def test_tree_excludes_non_included_files_in_included_dir(self, tmp_path): + """Only files matching include patterns are included, others remain excluded.""" + results = tmp_path / "results" + results.mkdir() + included_file = results / "report.md" + included_file.write_text("# Report") + excluded_file = results / "debug.log" + excluded_file.write_text("debug") + + session = DumpSession( + root_path=tmp_path, + excluded_patterns={"results"}, + max_depth=None, + dir_only=False, + included_patterns=["results/*.md"], + ) + + session.generate_tree(tmp_path) + + dumped_names = [f.name for f in session.files_to_dump] + assert "report.md" in dumped_names + assert "debug.log" not in dumped_names + + def test_tree_without_include_patterns_unchanged(self, tmp_path): + """Regression: no include patterns means behavior is unchanged.""" + src = tmp_path / "src" + src.mkdir() + (src / "main.py").write_text("pass") + + results = tmp_path / "results" + results.mkdir() + (results / "data.jsonl").write_text("{}") + + session = DumpSession( + root_path=tmp_path, + excluded_patterns={"results"}, + max_depth=None, + dir_only=False, + ) + + session.generate_tree(tmp_path) + + dumped_names = [f.name for f in session.files_to_dump] + assert "main.py" in dumped_names + assert "data.jsonl" not in dumped_names + + def test_tree_nested_include_traversal(self, tmp_path): + """Include patterns with nested paths ensure all ancestor dirs are traversed.""" + deep = tmp_path / "output" / "run1" / "logs" + deep.mkdir(parents=True) + target = deep / "orchestrator.log" + target.write_text("log line") + + session = DumpSession( + root_path=tmp_path, + excluded_patterns={"output"}, + max_depth=None, + dir_only=False, + included_patterns=["output/run1/logs/orchestrator.log"], + ) + + session.generate_tree(tmp_path) + + dumped_names = [f.name for f in session.files_to_dump] + assert "orchestrator.log" in dumped_names + + +# --------------------------------------------------------------------------- +# 3. Engine Integration Tests (profile additional_excludes / additional_includes) +# --------------------------------------------------------------------------- + +class TestEngineIncludeExcludeOverrides: + """Test engine-level integration of include/exclude overrides.""" + + def test_profile_additional_excludes(self, tmp_path): + """Profile additional_excludes adds to ignore_patterns.""" + src = tmp_path / "src" + src.mkdir() + (src / "main.py").write_text("print('main')") + (src / "debug.py").write_text("print('debug')") + + out_file = tmp_path / "dump.txt" + profile = { + "description": "Test", + "additional_excludes": ["src/debug.py"], + } + + settings = DumpSettings( + start_path=tmp_path, + output_file=out_file, + use_xml=True, + active_profile=profile, + no_copy=True, + ) + + config = { + "ignore_patterns": [], + "profiles": {"test-excl": profile}, + } + engine = DumpEngine(config=config, settings=settings) + engine.run() + + content = out_file.read_text() + assert "main.py" in content + assert "debug.py" not in content + + def test_profile_additional_includes(self, tmp_path): + """Profile additional_includes re-includes excluded paths.""" + results = tmp_path / "results" + results.mkdir() + report = results / "report.md" + report.write_text("# Results") + + out_file = tmp_path / "dump.txt" + profile = { + "description": "Debug results", + "additional_includes": ["results/**"], + } + + settings = DumpSettings( + start_path=tmp_path, + output_file=out_file, + use_xml=True, + active_profile=profile, + no_copy=True, + ) + + config = { + "ignore_patterns": ["results"], + "profiles": {"debug-results": profile}, + } + engine = DumpEngine(config=config, settings=settings) + engine.run() + + content = out_file.read_text() + assert "report.md" in content + assert "# Results" in content + + def test_top_level_include_patterns(self, tmp_path): + """Top-level include_patterns in config re-includes excluded paths.""" + results = tmp_path / "results" + results.mkdir() + data = results / "data.jsonl" + data.write_text('{"x": 1}') + + out_file = tmp_path / "dump.txt" + + settings = DumpSettings( + start_path=tmp_path, + output_file=out_file, + use_xml=True, + active_profile=None, + no_copy=True, + ) + + engine = DumpEngine( + config={ + "ignore_patterns": ["results"], + "include_patterns": ["results/**"], + }, + settings=settings, + ) + engine.run() + + content = out_file.read_text() + assert "data.jsonl" in content + + def test_combined_top_level_and_profile_includes(self, tmp_path): + """Top-level + profile includes are merged.""" + results = tmp_path / "results" + results.mkdir() + (results / "data.jsonl").write_text("{}") + logs = tmp_path / "logs" + logs.mkdir() + (logs / "app.log").write_text("log") + + out_file = tmp_path / "dump.txt" + profile = { + "description": "Full debug", + "additional_includes": ["logs/**"], + } + + settings = DumpSettings( + start_path=tmp_path, + output_file=out_file, + use_xml=True, + active_profile=profile, + no_copy=True, + ) + + config = { + "ignore_patterns": ["results", "logs"], + "include_patterns": ["results/**"], + "profiles": {"full-debug": profile}, + } + engine = DumpEngine(config=config, settings=settings) + engine.run() + + content = out_file.read_text() + assert "data.jsonl" in content + assert "app.log" in content + + def test_inactive_profile_rules_have_no_effect(self, tmp_path): + """Rules from a profile that is NOT active have no effect.""" + results = tmp_path / "results" + results.mkdir() + (results / "data.jsonl").write_text("{}") + + out_file = tmp_path / "dump.txt" + + settings = DumpSettings( + start_path=tmp_path, + output_file=out_file, + use_xml=True, + active_profile=None, # No active profile + no_copy=True, + ) + + # The config has a profile with additional_includes, but it's not active + engine = DumpEngine( + config={ + "ignore_patterns": ["results"], + "include_patterns": [], + "profiles": { + "debug": { + "description": "Debug", + "additional_includes": ["results/**"], + } + } + }, + settings=settings, + ) + engine.run() + + content = out_file.read_text() + assert "data.jsonl" not in content + + def test_profile_additional_excludes_plus_includes(self, tmp_path): + """Profile can both add excludes and re-include specific paths.""" + results = tmp_path / "results" + results.mkdir() + report = results / "run_report.md" + report.write_text("# Report") + runner_logs = results / "runner_logs" + runner_logs.mkdir() + (runner_logs / "verbose.log").write_text("verbose") + + out_file = tmp_path / "dump.txt" + profile = { + "description": "Debug results", + "additional_excludes": ["results/runner_logs"], + "additional_includes": ["results/run_report.md"], + } + + settings = DumpSettings( + start_path=tmp_path, + output_file=out_file, + use_xml=True, + active_profile=profile, + no_copy=True, + ) + + config = { + "ignore_patterns": ["results"], + "profiles": {"debug-results": profile}, + } + engine = DumpEngine(config=config, settings=settings) + engine.run() + + content = out_file.read_text() + assert "run_report.md" in content + assert "verbose.log" not in content + + +# --------------------------------------------------------------------------- +# 4. Regression Tests +# --------------------------------------------------------------------------- + +class TestIncludeExcludeRegression: + """Ensure existing behavior is unaffected when new fields are absent/empty.""" + + def test_no_new_fields_output_matches_current(self, tmp_path): + """Config without new fields produces identical behavior.""" + src = tmp_path / "src" + src.mkdir() + (src / "hello.py").write_text("print('hi')") + + out_file = tmp_path / "dump.txt" + + settings = DumpSettings( + start_path=tmp_path, + output_file=out_file, + use_xml=True, + active_profile=None, + no_copy=True, + ) + + engine = DumpEngine( + config={"ignore_patterns": ["*.pyc"]}, + settings=settings, + ) + engine.run() + + content = out_file.read_text() + assert "hello.py" in content + assert "print('hi')" in content + + def test_empty_include_patterns_no_change(self, tmp_path): + """Empty include_patterns has no effect on output.""" + src = tmp_path / "src" + src.mkdir() + (src / "main.py").write_text("pass") + venv = tmp_path / "venv" + venv.mkdir() + (venv / "pyvenv.cfg").write_text("home = /usr") + + out_file = tmp_path / "dump.txt" + + settings = DumpSettings( + start_path=tmp_path, + output_file=out_file, + use_xml=True, + active_profile=None, + no_copy=True, + ) + + engine = DumpEngine( + config={ + "ignore_patterns": ["venv"], + "include_patterns": [], + }, + settings=settings, + ) + engine.run() + + content = out_file.read_text() + assert "main.py" in content + assert "pyvenv.cfg" not in content + + def test_tree_and_files_consistent(self, tmp_path): + """Tree entries and files_to_dump agree on the same include/exclude decisions.""" + results = tmp_path / "results" + results.mkdir() + included = results / "report.md" + included.write_text("report") + excluded = results / "debug.log" + excluded.write_text("debug") + + session = DumpSession( + root_path=tmp_path, + excluded_patterns={"results"}, + max_depth=None, + dir_only=False, + included_patterns=["results/*.md"], + ) + + session.generate_tree(tmp_path) + + # Check files_to_dump + dumped_names = {f.name for f in session.files_to_dump} + assert "report.md" in dumped_names + assert "debug.log" not in dumped_names + + # Check tree entries match + tree_file_names = {e.path.name for e in session.tree_entries if not e.is_dir} + assert "report.md" in tree_file_names + assert "debug.log" not in tree_file_names + + +# --------------------------------------------------------------------------- +# 5. Force-include helper method tests +# --------------------------------------------------------------------------- + +class TestForceIncludedHelper: + """Direct tests for _is_force_included logic.""" + + def test_direct_pathspec_match(self, tmp_path): + session = DumpSession( + root_path=tmp_path, + excluded_patterns=set(), + max_depth=None, + dir_only=False, + included_patterns=["results/data.jsonl"], + ) + assert session._is_force_included("results/data.jsonl") is True + assert session._is_force_included("results/other.txt") is False + + def test_glob_pathspec_match(self, tmp_path): + session = DumpSession( + root_path=tmp_path, + excluded_patterns=set(), + max_depth=None, + dir_only=False, + included_patterns=["results/**/*.jsonl"], + ) + assert session._is_force_included("results/run1/output.jsonl") is True + # A .txt file doesn't match *.jsonl via pathspec + assert session._is_force_included("results/run1/output.txt", is_dir=False) is False + # But as a directory, the ancestor check still applies (results/run1 could contain .jsonl) + assert session._is_force_included("results/run1", is_dir=True) is True + + def test_ancestor_directory_match(self, tmp_path): + """Directories that are ancestors of include patterns match.""" + session = DumpSession( + root_path=tmp_path, + excluded_patterns=set(), + max_depth=None, + dir_only=False, + included_patterns=["results/run1/report.md"], + ) + assert session._is_force_included("results", is_dir=True) is True + assert session._is_force_included("results/run1", is_dir=True) is True + assert session._is_force_included("src", is_dir=True) is False + + def test_ancestor_with_double_star(self, tmp_path): + """Directories match when pattern has ** component.""" + session = DumpSession( + root_path=tmp_path, + excluded_patterns=set(), + max_depth=None, + dir_only=False, + included_patterns=["results/**/*.md"], + ) + assert session._is_force_included("results", is_dir=True) is True + assert session._is_force_included("results/run1", is_dir=True) is True + assert session._is_force_included("results/run1/subdir", is_dir=True) is True + + def test_ancestor_with_wildcard_component(self, tmp_path): + """Directories match when pattern has * in intermediate components.""" + session = DumpSession( + root_path=tmp_path, + excluded_patterns=set(), + max_depth=None, + dir_only=False, + included_patterns=["results/*/output.jsonl"], + ) + assert session._is_force_included("results", is_dir=True) is True + assert session._is_force_included("results/run1", is_dir=True) is True + assert session._is_force_included("results/run2", is_dir=True) is True + assert session._is_force_included("other", is_dir=True) is False + + def test_ancestor_check_does_not_apply_to_files(self, tmp_path): + """Files are not force-included by ancestor matching, only by pathspec.""" + session = DumpSession( + root_path=tmp_path, + excluded_patterns=set(), + max_depth=None, + dir_only=False, + included_patterns=["results/**/*.md"], + ) + # A .log file should NOT be force-included even though its path + # is "under" the pattern directory + assert session._is_force_included("results/run1/debug.log", is_dir=False) is False + # But a .md file should match via pathspec + assert session._is_force_included("results/run1/report.md", is_dir=False) is True + + def test_no_include_patterns(self, tmp_path): + session = DumpSession( + root_path=tmp_path, + excluded_patterns=set(), + max_depth=None, + dir_only=False, + included_patterns=[], + ) + assert session._is_force_included("anything") is False + + def test_basename_only_pattern(self, tmp_path): + """Basename-only patterns don't match as directory ancestors.""" + session = DumpSession( + root_path=tmp_path, + excluded_patterns=set(), + max_depth=None, + dir_only=False, + included_patterns=["*.jsonl"], + ) + # *.jsonl has no directory components, so no ancestor matching + assert session._is_force_included("results") is False + # But file matching works via pathspec + assert session._is_force_included("data.jsonl") is True