diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index b497af6a9..733512bed 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -3393,6 +3393,37 @@ describe('Directory Exclusion', () => { expect(files[0]).toBe('src/components/Button.tsx'); expect(files[0]).not.toContain('\\'); }); + + it('should index tracked files under CJK directories in git repos (issue #541)', async () => { + const { execFileSync } = await import('child_process'); + const git = (cwd: string, ...args: string[]) => + execFileSync('git', args, { cwd, stdio: 'pipe' }); + + const root = path.join(tempDir, 'cjk-root'); + fs.mkdirSync(path.join(root, 'src', 'english'), { recursive: true }); + fs.mkdirSync(path.join(root, 'src', '中文目录'), { recursive: true }); + + git(root, 'init', '-q'); + git(root, 'config', 'user.email', 'test@test.com'); + git(root, 'config', 'user.name', 'Test'); + + fs.writeFileSync( + path.join(root, 'src', 'english', 'Foo.cs'), + 'namespace Demo;\npublic class Foo { public void Bar() {} }\n' + ); + fs.writeFileSync( + path.join(root, 'src', '中文目录', 'Baz.cs'), + 'namespace Demo;\npublic class Baz { public void Qux() {} }\n' + ); + + git(root, 'add', '-A'); + git(root, 'commit', '-q', '-m', 'cjk paths'); + + const files = scanDirectory(root); + + expect(files).toContain('src/english/Foo.cs'); + expect(files).toContain('src/中文目录/Baz.cs'); + }); }); describe('Git Submodules', () => { diff --git a/__tests__/sync.test.ts b/__tests__/sync.test.ts index 708a92a42..71c9d4611 100644 --- a/__tests__/sync.test.ts +++ b/__tests__/sync.test.ts @@ -225,6 +225,41 @@ describe('Sync Module', () => { expect(nodes.length).toBeGreaterThan(0); }); + it('should detect new untracked files under CJK directories via git', async () => { + fs.mkdirSync(path.join(testDir, 'src', '中文目录'), { recursive: true }); + fs.writeFileSync( + path.join(testDir, 'src', '中文目录', 'new.ts'), + `export function cjkFunc() { return 42; }` + ); + + const result = await cg.sync(); + + expect(result.filesAdded).toBe(1); + expect(result.changedFilePaths).toContain('src/中文目录/new.ts'); + expect(cg.searchNodes('cjkFunc').length).toBeGreaterThan(0); + }); + + it('should detect modified tracked files under CJK directories via git', async () => { + const cjkDir = path.join(testDir, 'src', '中文目录'); + const filePath = path.join(cjkDir, 'tracked.ts'); + fs.mkdirSync(cjkDir, { recursive: true }); + fs.writeFileSync(filePath, `export function cjkTracked() { return 1; }`); + git('add', '-A'); + git('commit', '-m', 'add cjk tracked file'); + await cg.sync(); + + fs.writeFileSync(filePath, `export function renamedCjkTracked() { return 7; }`); + + const changes = cg.getChangedFiles(); + expect(changes.modified).toContain('src/中文目录/tracked.ts'); + + const result = await cg.sync(); + expect(result.filesModified).toBe(1); + expect(result.changedFilePaths).toContain('src/中文目录/tracked.ts'); + expect(cg.searchNodes('renamedCjkTracked').length).toBeGreaterThan(0); + expect(cg.searchNodes('cjkTracked').length).toBeGreaterThan(0); + }); + it('should stop reporting untracked files once they are indexed (issue #206)', async () => { // Untracked files stay `??` in git status even after codegraph indexes // them. Change detection must compare them against the DB by hash, not diff --git a/src/extraction/index.ts b/src/extraction/index.ts index 42037d7f6..30cc7252c 100644 --- a/src/extraction/index.ts +++ b/src/extraction/index.ts @@ -198,32 +198,32 @@ function collectGitFiles(repoDir: string, prefix: string, files: Set): v // Without this, monorepos using submodules index 0 files. (See issue #147.) // Note: --recurse-submodules only supports -c/--cached and --stage modes — it // can't be combined with -o, so untracked files are gathered separately below. - const tracked = execFileSync('git', ['ls-files', '-c', '--recurse-submodules'], gitOpts); - for (const line of tracked.split('\n')) { - const trimmed = line.trim(); - if (trimmed) { - files.add(normalizePath(prefix + trimmed)); + const tracked = execFileSync('git', ['ls-files', '-z', '-c', '--recurse-submodules'], gitOpts); + for (const entry of tracked.split('\0')) { + const filePath = entry.trim(); + if (filePath) { + files.add(normalizePath(prefix + filePath)); } } // Untracked files (submodules manage their own untracked state). Embedded git // repos surface here as a single "subdir/" entry that git refuses to descend // into — recurse into those as their own repos so their source gets indexed. - const untracked = execFileSync('git', ['ls-files', '-o', '--exclude-standard'], gitOpts); - for (const line of untracked.split('\n')) { - const trimmed = line.trim(); - if (!trimmed) continue; - if (trimmed.endsWith('/')) { + const untracked = execFileSync('git', ['ls-files', '-z', '-o', '--exclude-standard'], gitOpts); + for (const entry of untracked.split('\0')) { + const filePath = entry.trim(); + if (!filePath) continue; + if (filePath.endsWith('/')) { // git only emits a trailing-slash directory entry for an embedded repo. // Guard with a .git check anyway, and skip anything else exactly as git // itself skips it (we never descend into a non-repo opaque dir). - const childDir = path.join(repoDir, trimmed); + const childDir = path.join(repoDir, filePath); if (fs.existsSync(path.join(childDir, '.git'))) { - collectGitFiles(childDir, prefix + trimmed, files); + collectGitFiles(childDir, prefix + filePath, files); } continue; } - files.add(normalizePath(prefix + trimmed)); + files.add(normalizePath(prefix + filePath)); } } @@ -290,7 +290,7 @@ function getGitChangedFiles(rootDir: string): GitChanges | null { try { const output = execFileSync( 'git', - ['status', '--porcelain', '--no-renames'], + ['status', '--porcelain', '-z', '--no-renames'], { cwd: rootDir, encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true } ); @@ -298,11 +298,11 @@ function getGitChangedFiles(rootDir: string): GitChanges | null { const added: string[] = []; const deleted: string[] = []; - for (const line of output.split('\n')) { - if (line.length < 4) continue; // Minimum: "XY file" + for (const entry of output.split('\0')) { + if (entry.length < 4) continue; // Minimum: "XY file" - const statusCode = line.substring(0, 2); - const filePath = normalizePath(line.substring(3)); + const statusCode = entry.substring(0, 2); + const filePath = normalizePath(entry.substring(3)); // Skip non-source files (git status already omits .gitignored paths). if (!isSourceFile(filePath)) continue;