Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .dump_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
".ruff_cache",
"ai_response.md"
],
"include_patterns": [],
"profiles": {
"readme": {
"description": "Generate a professional, architect-level README.md for the current project",
Expand Down
32 changes: 24 additions & 8 deletions src/dumpcode/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,18 @@
".mypy_cache", ".ruff_cache",
"ai_response.md",
],
"include_patterns": [],
"profiles": DEFAULT_PROFILES,
"use_xml": True
}


def validate_config(config: Dict) -> bool:
"""Basic structural check for configuration.

Args:
config: Configuration dictionary to validate

Returns:
True if config has valid structure, False otherwise
"""
Expand All @@ -36,20 +37,35 @@ def validate_config(config: Dict) -> bool:

if not isinstance(config["version"], int):
return False


if "include_patterns" in config:
if not isinstance(config["include_patterns"], list):
return False

profiles = config.get("profiles")
if not isinstance(profiles, dict):
return False

for name, body in profiles.items():
if not isinstance(body, dict):
return False

valid_keys = {"description", "pre", "post", "run_commands"}


valid_keys = {
"description", "pre", "post", "run_commands",
"additional_excludes", "additional_includes",
}

if not any(key in body for key in valid_keys):
return False


if "additional_excludes" in body:
if not isinstance(body["additional_excludes"], list):
return False

if "additional_includes" in body:
if not isinstance(body["additional_includes"], list):
return False
Comment on lines +61 to +67

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The validation logic for additional_excludes and additional_includes is duplicated. You can refactor this into a loop to make the code more concise and easier to maintain if more list-based keys are added in the future.

Suggested change
if "additional_excludes" in body:
if not isinstance(body["additional_excludes"], list):
return False
if "additional_includes" in body:
if not isinstance(body["additional_includes"], list):
return False
for key in ("additional_excludes", "additional_includes"):
if key in body and not isinstance(body[key], list):
return False


return True


Expand Down
83 changes: 76 additions & 7 deletions src/dumpcode/core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Core dumping logic, file system traversal, and session management."""

import fnmatch
import os
from dataclasses import dataclass
from pathlib import Path
Expand Down Expand Up @@ -132,6 +133,7 @@ def __init__(
max_depth: Optional[int],
dir_only: bool,
git_changed_only: bool = False,
included_patterns: Optional[List[str]] = None,
) -> None:
"""Initialize the session with scanning constraints.

Expand All @@ -141,12 +143,14 @@ def __init__(
max_depth: Depth limit for directory traversal.
dir_only: If True, skip file contents.
git_changed_only: If True, only include files modified in git.
included_patterns: Patterns that override exclusions (force-include).
"""
self.root_path = root_path
self.excluded_patterns = excluded_patterns
self.max_depth = max_depth
self.dir_only = dir_only
self.git_changed_only = git_changed_only
self.included_patterns = list(included_patterns) if included_patterns else []

self.dir_count = 0
self.file_count = 0
Expand All @@ -155,6 +159,7 @@ def __init__(
self.skipped_files: List[Dict[str, str]] = []
self.visited_paths: Set[Path] = set()
self.matcher = self._create_combined_matcher(root_path, excluded_patterns)
self.include_matcher = self._create_include_matcher(self.included_patterns)

def _load_gitignore_lines(self, root_path: Path) -> List[str]:
"""Load .gitignore file lines.
Expand Down Expand Up @@ -201,6 +206,63 @@ def _create_combined_matcher(self, root_path: Path, excluded_patterns: Set[str])
except ImportError:
return None

def _create_include_matcher(self, included_patterns: List[str]) -> Any:
"""Create a pathspec matcher for include override patterns.

Args:
included_patterns: List of glob patterns to force-include.

Returns:
A pathspec.PathSpec instance if pathspec is available, otherwise None.
"""
if not included_patterns:
return None

try:
import pathspec
return pathspec.PathSpec.from_lines('gitignore', included_patterns)
except ImportError:
return None

def _is_force_included(self, rel_path: str, is_dir: bool = False) -> bool:
"""Check if a path should be force-included despite matching exclusion patterns.

Checks direct pathspec matching for files and directories. For directories,
additionally checks whether the path is an ancestor of any include pattern
(to allow traversal into excluded directories that contain force-included files).

Args:
rel_path: The relative path (POSIX format) to check.
is_dir: Whether the path is a directory.

Returns:
True if the path matches any include override pattern.
"""
if self.include_matcher and self.include_matcher.match_file(rel_path):
return True

if is_dir and self.included_patterns:
rel_parts = rel_path.split("/")
for pattern in self.included_patterns:
pattern_parts = pattern.split("/")
if len(pattern_parts) <= len(rel_parts) and "**" not in pattern_parts:
continue
match = True
for i, rel_part in enumerate(rel_parts):
if i >= len(pattern_parts):
match = False
break
pat_part = pattern_parts[i]
if pat_part == "**":
break
if not fnmatch.fnmatch(rel_part, pat_part):
match = False
break
if match:
return True

return False

def log_skip(self, path: Path, reason: str) -> None:
"""Log a file that was skipped during processing.

Expand All @@ -211,23 +273,30 @@ def log_skip(self, path: Path, reason: str) -> None:
self.skipped_files.append({"path": str(path), "reason": reason})

def is_excluded(self, item_path: Path) -> bool:
"""Check if a path should be ignored based on patterns and gitignore.
"""Check if a path should be ignored based on patterns, gitignore, and includes.

Evaluates exclusion rules first (built-in excludes, ignore_patterns, gitignore,
profile additional_excludes), then checks include overrides. Include patterns
are last-wins: if a path is excluded but matches an include pattern, it is
included.

Args:
item_path: The path to check for exclusion.

Returns:
True if the path matches exclusion patterns, False otherwise.
True if the path matches exclusion patterns and is not force-included.
"""
if item_path.name == CONFIG_FILENAME:
return True

rel_path = item_path.relative_to(self.root_path).as_posix()

if self.matcher:
return self.matcher.match_file(rel_path)

return False

excluded = self.matcher.match_file(rel_path) if self.matcher else False

if excluded and self._is_force_included(rel_path, is_dir=item_path.is_dir()):
return False

return excluded

def generate_tree(
self,
Expand Down
24 changes: 20 additions & 4 deletions src/dumpcode/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,22 +137,38 @@ def _handle_ai_mode(self, output_file: Path, total_chars: int) -> None:

def _initialize_session(self, output_file: Path) -> DumpSession:
"""Initialize the DumpSession with exclusion patterns and filesystem metadata.


Merges top-level ignore/include patterns with active profile's additional
excludes/includes to build effective lists passed to the session.

Args:
output_file: Path to the target output file

Returns:
A configured DumpSession instance
"""
excluded = set(self.config.get("ignore_patterns", []))
self._exclude_output_file(output_file, excluded)


profile = self._get_active_profile()
if profile:
additional_excludes = profile.get("additional_excludes", [])
if isinstance(additional_excludes, list):
excluded.update(additional_excludes)

effective_includes = list(self.config.get("include_patterns", []))
if profile:
additional_includes = profile.get("additional_includes", [])
if isinstance(additional_includes, list):
effective_includes.extend(additional_includes)

return self.session_cls(
self.settings.start_path,
excluded,
self.settings.max_depth,
self.settings.dir_only,
self.settings.git_changed_only
self.settings.git_changed_only,
included_patterns=effective_includes,
)

def _write_instructions_block(self, writer: DumpWriter, profile: Optional[Dict[str, Any]]) -> None:
Expand Down
Loading