From d25c74d0ab993d503169ae0d7752fba93ac14fea Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 18 Mar 2026 20:18:05 +0000 Subject: [PATCH 1/2] Initial plan From 75c458c64383167e84c371e2f5c22f869d627bcd Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 18 Mar 2026 20:23:47 +0000 Subject: [PATCH 2/2] daily-file-diet: replace expensive find | wc -l with git ls-tree Co-authored-by: dsyme <7204669+dsyme@users.noreply.github.com> --- docs/daily-file-diet.md | 4 ++-- workflows/daily-file-diet.md | 28 ++++++++++------------------ 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/docs/daily-file-diet.md b/docs/daily-file-diet.md index 265c5a5..2218691 100644 --- a/docs/daily-file-diet.md +++ b/docs/daily-file-diet.md @@ -22,7 +22,7 @@ gh aw compile The Daily File Diet workflow runs on weekdays and: -1. **Scans Source Files** - Finds all non-test source files in your repository, excluding generated directories like `node_modules`, `vendor`, `dist`, and `target` +1. **Scans Source Files** - Finds all tracked non-test source files in your repository using `git ls-tree`, which automatically respects `.gitignore` and avoids scanning generated directories like `node_modules`, `vendor`, `dist`, and `target` 2. **Identifies Oversized Files** - Detects files exceeding 500 lines (the healthy size threshold) 3. **Analyzes Structure** - Examines what the file contains: functions, classes, modules, and their relationships 4. **Creates Refactoring Issues** - Proposes concrete split strategies with specific file names, responsibilities, and implementation guidance @@ -80,7 +80,7 @@ gh aw edit daily-file-diet Common customizations: - **Adjust the threshold** - Change the 500-line limit to suit your team's preferences -- **Focus on specific languages** - Restrict `find` commands to your repository's primary language +- **Focus on specific languages** - Restrict the `grep` pattern in the `git ls-tree` pipeline to your repository's primary language - **Add labels** - Apply team-specific labels to generated issues - **Change the schedule** - Run less frequently if your codebase changes slowly diff --git a/workflows/daily-file-diet.md b/workflows/daily-file-diet.md index 7c94132..208d43d 100644 --- a/workflows/daily-file-diet.md +++ b/workflows/daily-file-diet.md @@ -25,19 +25,13 @@ tools: github: toolsets: [default] bash: - - "find . -type f -not -path '*/.git/*' -not -path '*/node_modules/*' -not -path '*/vendor/*' -not -path '*/dist/*' -not -path '*/build/*' -not -path '*/.next/*' -not -path '*/target/*' -not -path '*/__pycache__/*' -not -path '*/coverage/*' -not -path '*/venv/*' -not -path '*/.tox/*' -not -path '*/.mypy_cache/*' -name '*' -exec wc -l {} \\; 2>/dev/null" + - "git ls-tree -r --name-only HEAD" + - "git ls-tree -r -l --full-name HEAD" + - "git ls-tree -r --name-only HEAD | grep -E * | grep -vE * | xargs wc -l 2>/dev/null" + - "git ls-tree -r --name-only HEAD | grep -E * | xargs wc -l 2>/dev/null" - "wc -l *" - "head -n * *" - "grep -n * *" - - "find . -type f -name '*.go' -not -path '*_test.go' -not -path '*/vendor/*'" - - "find . -type f -name '*.py' -not -path '*/__pycache__/*' -not -path '*/venv/*'" - - "find . -type f -name '*.ts' -not -path '*/node_modules/*' -not -path '*/dist/*'" - - "find . -type f -name '*.js' -not -path '*/node_modules/*' -not -path '*/dist/*'" - - "find . -type f -name '*.rb' -not -path '*/vendor/*'" - - "find . -type f -name '*.java' -not -path '*/target/*'" - - "find . -type f -name '*.rs' -not -path '*/target/*'" - - "find . -type f -name '*.cs'" - - "find . -type f \\( -name '*.go' -o -name '*.py' -o -name '*.ts' -o -name '*.js' -o -name '*.rb' -o -name '*.java' -o -name '*.rs' -o -name '*.cs' -o -name '*.cpp' -o -name '*.c' \\) -not -path '*/node_modules/*' -not -path '*/vendor/*' -not -path '*/dist/*' -not -path '*/build/*' -not -path '*/target/*' -not -path '*/__pycache__/*' -exec wc -l {} \\; 2>/dev/null" - "sort *" - "cat *" @@ -67,14 +61,12 @@ First, determine the primary programming language(s) used in this repository. Th **For polyglot or unknown repos:** ```bash -find . -type f \( -name "*.go" -o -name "*.py" -o -name "*.ts" -o -name "*.js" -o -name "*.rb" -o -name "*.java" -o -name "*.rs" -o -name "*.cs" -o -name "*.cpp" -o -name "*.c" \) \ - -not -path "*/node_modules/*" \ - -not -path "*/vendor/*" \ - -not -path "*/dist/*" \ - -not -path "*/build/*" \ - -not -path "*/target/*" \ - -not -path "*/__pycache__/*" \ - -exec wc -l {} \; 2>/dev/null | sort -rn | head -20 +git ls-tree -r --name-only HEAD \ + | grep -E '\.(go|py|ts|tsx|js|jsx|rb|java|rs|cs|cpp|c|h|hpp)$' \ + | grep -vE '(_test\.go|\.test\.(ts|js)|\.spec\.(ts|js)|test_[^/]*\.py|[^/]*_test\.py)$' \ + | xargs wc -l 2>/dev/null \ + | sort -rn \ + | head -20 ``` Also skip test files (files ending in `_test.go`, `.test.ts`, `.spec.ts`, `.test.js`, `.spec.js`, `_test.py`, `test_*.py`, etc.) — focus on non-test production code.