Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions __tests__/extraction.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3393,6 +3393,37 @@ describe('Directory Exclusion', () => {
expect(files[0]).toBe('src/components/Button.tsx');
expect(files[0]).not.toContain('\\');
});

it('should index tracked files under CJK directories in git repos (issue #541)', async () => {
const { execFileSync } = await import('child_process');
const git = (cwd: string, ...args: string[]) =>
execFileSync('git', args, { cwd, stdio: 'pipe' });

const root = path.join(tempDir, 'cjk-root');
fs.mkdirSync(path.join(root, 'src', 'english'), { recursive: true });
fs.mkdirSync(path.join(root, 'src', '中文目录'), { recursive: true });

git(root, 'init', '-q');
git(root, 'config', 'user.email', 'test@test.com');
git(root, 'config', 'user.name', 'Test');

fs.writeFileSync(
path.join(root, 'src', 'english', 'Foo.cs'),
'namespace Demo;\npublic class Foo { public void Bar() {} }\n'
);
fs.writeFileSync(
path.join(root, 'src', '中文目录', 'Baz.cs'),
'namespace Demo;\npublic class Baz { public void Qux() {} }\n'
);

git(root, 'add', '-A');
git(root, 'commit', '-q', '-m', 'cjk paths');

const files = scanDirectory(root);

expect(files).toContain('src/english/Foo.cs');
expect(files).toContain('src/中文目录/Baz.cs');
});
});

describe('Git Submodules', () => {
Expand Down
35 changes: 35 additions & 0 deletions __tests__/sync.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,41 @@ describe('Sync Module', () => {
expect(nodes.length).toBeGreaterThan(0);
});

it('should detect new untracked files under CJK directories via git', async () => {
fs.mkdirSync(path.join(testDir, 'src', '中文目录'), { recursive: true });
fs.writeFileSync(
path.join(testDir, 'src', '中文目录', 'new.ts'),
`export function cjkFunc() { return 42; }`
);

const result = await cg.sync();

expect(result.filesAdded).toBe(1);
expect(result.changedFilePaths).toContain('src/中文目录/new.ts');
expect(cg.searchNodes('cjkFunc').length).toBeGreaterThan(0);
});

it('should detect modified tracked files under CJK directories via git', async () => {
const cjkDir = path.join(testDir, 'src', '中文目录');
const filePath = path.join(cjkDir, 'tracked.ts');
fs.mkdirSync(cjkDir, { recursive: true });
fs.writeFileSync(filePath, `export function cjkTracked() { return 1; }`);
git('add', '-A');
git('commit', '-m', 'add cjk tracked file');
await cg.sync();

fs.writeFileSync(filePath, `export function renamedCjkTracked() { return 7; }`);

const changes = cg.getChangedFiles();
expect(changes.modified).toContain('src/中文目录/tracked.ts');

const result = await cg.sync();
expect(result.filesModified).toBe(1);
expect(result.changedFilePaths).toContain('src/中文目录/tracked.ts');
expect(cg.searchNodes('renamedCjkTracked').length).toBeGreaterThan(0);
expect(cg.searchNodes('cjkTracked').length).toBeGreaterThan(0);
});

it('should stop reporting untracked files once they are indexed (issue #206)', async () => {
// Untracked files stay `??` in git status even after codegraph indexes
// them. Change detection must compare them against the DB by hash, not
Expand Down
36 changes: 18 additions & 18 deletions src/extraction/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -198,32 +198,32 @@ function collectGitFiles(repoDir: string, prefix: string, files: Set<string>): v
// Without this, monorepos using submodules index 0 files. (See issue #147.)
// Note: --recurse-submodules only supports -c/--cached and --stage modes — it
// can't be combined with -o, so untracked files are gathered separately below.
const tracked = execFileSync('git', ['ls-files', '-c', '--recurse-submodules'], gitOpts);
for (const line of tracked.split('\n')) {
const trimmed = line.trim();
if (trimmed) {
files.add(normalizePath(prefix + trimmed));
const tracked = execFileSync('git', ['ls-files', '-z', '-c', '--recurse-submodules'], gitOpts);
for (const entry of tracked.split('\0')) {
const filePath = entry.trim();
if (filePath) {
files.add(normalizePath(prefix + filePath));
}
}

// Untracked files (submodules manage their own untracked state). Embedded git
// repos surface here as a single "subdir/" entry that git refuses to descend
// into — recurse into those as their own repos so their source gets indexed.
const untracked = execFileSync('git', ['ls-files', '-o', '--exclude-standard'], gitOpts);
for (const line of untracked.split('\n')) {
const trimmed = line.trim();
if (!trimmed) continue;
if (trimmed.endsWith('/')) {
const untracked = execFileSync('git', ['ls-files', '-z', '-o', '--exclude-standard'], gitOpts);
for (const entry of untracked.split('\0')) {
const filePath = entry.trim();
if (!filePath) continue;
if (filePath.endsWith('/')) {
// git only emits a trailing-slash directory entry for an embedded repo.
// Guard with a .git check anyway, and skip anything else exactly as git
// itself skips it (we never descend into a non-repo opaque dir).
const childDir = path.join(repoDir, trimmed);
const childDir = path.join(repoDir, filePath);
if (fs.existsSync(path.join(childDir, '.git'))) {
collectGitFiles(childDir, prefix + trimmed, files);
collectGitFiles(childDir, prefix + filePath, files);
}
continue;
}
files.add(normalizePath(prefix + trimmed));
files.add(normalizePath(prefix + filePath));
}
}

Expand Down Expand Up @@ -290,19 +290,19 @@ function getGitChangedFiles(rootDir: string): GitChanges | null {
try {
const output = execFileSync(
'git',
['status', '--porcelain', '--no-renames'],
['status', '--porcelain', '-z', '--no-renames'],
{ cwd: rootDir, encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true }
);

const modified: string[] = [];
const added: string[] = [];
const deleted: string[] = [];

for (const line of output.split('\n')) {
if (line.length < 4) continue; // Minimum: "XY file"
for (const entry of output.split('\0')) {
if (entry.length < 4) continue; // Minimum: "XY file"

const statusCode = line.substring(0, 2);
const filePath = normalizePath(line.substring(3));
const statusCode = entry.substring(0, 2);
const filePath = normalizePath(entry.substring(3));

// Skip non-source files (git status already omits .gitignored paths).
if (!isSourceFile(filePath)) continue;
Expand Down