Skip to content

Commit e98409d

Browse files
committed
Better fuzzy matching for '@' file search
1 parent 1bc1490 commit e98409d

File tree

2 files changed

+172
-109
lines changed

2 files changed

+172
-109
lines changed

cli/src/hooks/use-suggestion-engine.ts

Lines changed: 151 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import { promises as fs } from 'fs'
22

33
import {
4-
getAllFilePaths,
4+
getAllPathsWithDirectories,
55
getProjectFileTree,
6+
type PathInfo,
67
} from '@codebuff/common/project-file-tree'
78
import { useDeferredValue, useEffect, useMemo, useRef, useState } from 'react'
89

@@ -269,11 +270,13 @@ export type MatchedAgentInfo = Prettify<
269270

270271
export type MatchedFileInfo = Prettify<{
271272
filePath: string
273+
isDirectory: boolean
272274
pathHighlightIndices?: number[] | null
275+
matchScore?: number
273276
}>
274277

275-
const flattenFileTree = (nodes: FileTreeNode[]): string[] =>
276-
getAllFilePaths(nodes)
278+
const flattenFileTree = (nodes: FileTreeNode[]): PathInfo[] =>
279+
getAllPathsWithDirectories(nodes)
277280

278281
const getFileName = (filePath: string): string => {
279282
const lastSlash = filePath.lastIndexOf('/')
@@ -297,8 +300,72 @@ const createPushUnique = <T, K>(
297300
}
298301
}
299302

303+
/**
304+
* Fuzzy match: matches characters in order, allowing gaps.
305+
* Returns highlight indices if matched, null if not.
306+
* Also returns a score (lower is better) based on match quality.
307+
*/
308+
const fuzzyMatch = (
309+
text: string,
310+
query: string,
311+
): { indices: number[]; score: number } | null => {
312+
const textLower = text.toLowerCase()
313+
const queryLower = query.toLowerCase()
314+
const indices: number[] = []
315+
let textIdx = 0
316+
let lastMatchIdx = -1
317+
let gaps = 0
318+
let consecutiveMatches = 0
319+
let maxConsecutive = 0
320+
321+
for (let queryIdx = 0; queryIdx < queryLower.length; queryIdx++) {
322+
const char = queryLower[queryIdx]
323+
let found = false
324+
325+
while (textIdx < textLower.length) {
326+
if (textLower[textIdx] === char) {
327+
// Prefer matches at word boundaries (after / or at start)
328+
if (lastMatchIdx >= 0 && textIdx > lastMatchIdx + 1) {
329+
gaps += textIdx - lastMatchIdx - 1
330+
consecutiveMatches = 1
331+
} else {
332+
consecutiveMatches++
333+
maxConsecutive = Math.max(maxConsecutive, consecutiveMatches)
334+
}
335+
indices.push(textIdx)
336+
lastMatchIdx = textIdx
337+
textIdx++
338+
found = true
339+
break
340+
}
341+
textIdx++
342+
}
343+
344+
if (!found) return null
345+
}
346+
347+
// Capture final consecutive run
348+
maxConsecutive = Math.max(maxConsecutive, consecutiveMatches)
349+
350+
// Score: lower is better
351+
// - Fewer gaps = better
352+
// - Longer consecutive matches = better
353+
// - Matches at word boundaries (after /) = better
354+
const boundaryBonus = indices.filter(
355+
(idx) => idx === 0 || text[idx - 1] === '/'
356+
).length
357+
358+
const score =
359+
gaps * 10 -
360+
maxConsecutive * 5 -
361+
boundaryBonus * 15 +
362+
(indices[0] ?? 0) // Prefer matches that start earlier
363+
364+
return { indices, score }
365+
}
366+
300367
const filterFileMatches = (
301-
filePaths: string[],
368+
pathInfos: PathInfo[],
302369
query: string,
303370
): MatchedFileInfo[] => {
304371
if (!query) {
@@ -318,138 +385,110 @@ const filterFileMatches = (
318385
const querySegments = normalized.split('/')
319386
const hasSlashes = querySegments.length > 1
320387

321-
// Helper to calculate the longest contiguous match length in the file path
322-
const calculateContiguousMatchLength = (filePath: string): number => {
323-
const pathLower = filePath.toLowerCase()
324-
let maxContiguousLength = 0
325-
326-
// Try to find the longest contiguous substring that matches the query pattern
327-
for (let i = 0; i < pathLower.length; i++) {
328-
let matchLength = 0
329-
let queryIdx = 0
330-
let pathIdx = i
331-
332-
// Try to match as many characters as possible from this position
333-
while (pathIdx < pathLower.length && queryIdx < normalized.length) {
334-
if (pathLower[pathIdx] === normalized[queryIdx]) {
335-
matchLength++
336-
queryIdx++
337-
pathIdx++
338-
} else {
339-
break
340-
}
341-
}
342-
343-
maxContiguousLength = Math.max(maxContiguousLength, matchLength)
344-
}
345-
346-
return maxContiguousLength
347-
}
348-
349-
// Helper to match path segments
350-
const matchPathSegments = (filePath: string): number[] | null => {
388+
// Helper to match path segments (for queries with /)
389+
const matchPathSegments = (filePath: string): { indices: number[]; score: number } | null => {
351390
const pathLower = filePath.toLowerCase()
352391
const highlightIndices: number[] = []
353392
let searchStart = 0
393+
let totalGaps = 0
354394

355395
for (const segment of querySegments) {
356396
if (!segment) continue
357-
397+
358398
const segmentIndex = pathLower.indexOf(segment, searchStart)
359399
if (segmentIndex === -1) {
360400
return null
361401
}
362402

363-
// Add highlight indices for this segment
403+
// Count gaps between segments
404+
if (searchStart > 0) {
405+
totalGaps += segmentIndex - searchStart
406+
}
407+
364408
for (let i = 0; i < segment.length; i++) {
365409
highlightIndices.push(segmentIndex + i)
366410
}
367411

368412
searchStart = segmentIndex + segment.length
369413
}
370414

371-
return highlightIndices
415+
const score = totalGaps * 5 + filePath.length
416+
return { indices: highlightIndices, score }
372417
}
373418

374-
if (hasSlashes) {
375-
// Slash-separated path matching
376-
for (const filePath of filePaths) {
377-
const highlightIndices = matchPathSegments(filePath)
378-
if (highlightIndices) {
379-
pushUnique(matches, {
380-
filePath,
381-
pathHighlightIndices: highlightIndices,
382-
})
383-
}
419+
for (const { path: filePath, isDirectory } of pathInfos) {
420+
if (seen.has(filePath)) continue
421+
422+
const fileName = getFileName(filePath)
423+
const fileNameLower = fileName.toLowerCase()
424+
const pathLower = filePath.toLowerCase()
425+
426+
let matchResult: { indices: number[]; score: number } | null = null
427+
428+
if (hasSlashes) {
429+
// Try path segment matching first
430+
matchResult = matchPathSegments(filePath)
384431
}
385432

386-
// Sort by contiguous match length (longest first)
387-
matches.sort((a, b) => {
388-
const aLength = calculateContiguousMatchLength(a.filePath)
389-
const bLength = calculateContiguousMatchLength(b.filePath)
390-
return bLength - aLength
391-
})
392-
} else {
393-
// Original logic for non-slash queries
394-
395-
// Prefix of file name
396-
for (const filePath of filePaths) {
397-
const fileName = getFileName(filePath)
398-
const fileNameLower = fileName.toLowerCase()
399-
400-
if (fileNameLower.startsWith(normalized)) {
401-
pushUnique(matches, {
402-
filePath,
403-
pathHighlightIndices: createHighlightIndices(
404-
filePath.lastIndexOf(fileName),
405-
filePath.lastIndexOf(fileName) + normalized.length,
406-
),
407-
})
408-
continue
433+
if (!matchResult) {
434+
// Try exact prefix of full path (highest priority)
435+
if (pathLower.startsWith(normalized)) {
436+
matchResult = {
437+
indices: createHighlightIndices(0, normalized.length),
438+
score: -1000 + filePath.length, // Very high priority
439+
}
409440
}
410-
411-
const path = filePath.toLowerCase()
412-
if (path.startsWith(normalized)) {
413-
pushUnique(matches, {
414-
filePath,
415-
pathHighlightIndices: createHighlightIndices(0, normalized.length),
416-
})
441+
// Try prefix of filename
442+
else if (fileNameLower.startsWith(normalized)) {
443+
const fileNameStart = filePath.lastIndexOf(fileName)
444+
matchResult = {
445+
indices: createHighlightIndices(fileNameStart, fileNameStart + normalized.length),
446+
score: -500 + filePath.length, // High priority
447+
}
448+
}
449+
// Try substring match in path
450+
else if (pathLower.includes(normalized)) {
451+
const idx = pathLower.indexOf(normalized)
452+
matchResult = {
453+
indices: createHighlightIndices(idx, idx + normalized.length),
454+
score: -100 + idx + filePath.length,
455+
}
456+
}
457+
// Try fuzzy match as fallback
458+
else {
459+
matchResult = fuzzyMatch(filePath, normalized)
417460
}
418461
}
419462

420-
// Substring of file name or path
421-
for (const filePath of filePaths) {
422-
if (seen.has(filePath)) continue
423-
const path = filePath.toLowerCase()
424-
const fileName = getFileName(filePath)
425-
const fileNameLower = fileName.toLowerCase()
426-
427-
const fileNameIndex = fileNameLower.indexOf(normalized)
428-
if (fileNameIndex !== -1) {
429-
const actualFileNameStart = filePath.lastIndexOf(fileName)
430-
pushUnique(matches, {
431-
filePath,
432-
pathHighlightIndices: createHighlightIndices(
433-
actualFileNameStart + fileNameIndex,
434-
actualFileNameStart + fileNameIndex + normalized.length,
435-
),
436-
})
437-
continue
438-
}
463+
if (matchResult) {
464+
// Adjust score: prefer shorter paths
465+
const lengthPenalty = filePath.length * 2
466+
467+
// Give bonus for exact directory matches (query matches the full path)
468+
// e.g. "cli" should prioritize "cli/" directory over "cli/package.json"
469+
const isExactMatch = pathLower === normalized
470+
const isExactDirMatch = isDirectory && isExactMatch
471+
const exactMatchBonus = isExactDirMatch ? -500 : 0
472+
473+
// Only penalize directories when they're not an exact or prefix match
474+
// This ensures "cli/" appears before "cli/src/file.ts" when searching "cli"
475+
const isPrefixMatch = pathLower.startsWith(normalized)
476+
const dirPenalty = isDirectory && !isPrefixMatch ? 50 : 0
477+
478+
const finalScore = matchResult.score + lengthPenalty + dirPenalty + exactMatchBonus
439479

440-
const pathIndex = path.indexOf(normalized)
441-
if (pathIndex !== -1) {
442-
pushUnique(matches, {
443-
filePath,
444-
pathHighlightIndices: createHighlightIndices(
445-
pathIndex,
446-
pathIndex + normalized.length,
447-
),
448-
})
449-
}
480+
pushUnique(matches, {
481+
filePath,
482+
isDirectory,
483+
pathHighlightIndices: matchResult.indices,
484+
matchScore: finalScore,
485+
})
450486
}
451487
}
452488

489+
// Sort by score (lower is better)
490+
matches.sort((a, b) => (a.matchScore ?? 0) - (b.matchScore ?? 0))
491+
453492
return matches
454493
}
455494

@@ -564,7 +603,7 @@ export const useSuggestionEngine = ({
564603
new Map<string, MatchedFileInfo[]>(),
565604
)
566605
const fileRefreshIdRef = useRef(0)
567-
const [filePaths, setFilePaths] = useState<string[]>(() =>
606+
const [filePaths, setFilePaths] = useState<PathInfo[]>(() =>
568607
flattenFileTree(fileTree),
569608
)
570609

@@ -712,17 +751,20 @@ export const useSuggestionEngine = ({
712751
return fileMatches.map((file) => {
713752
const fileName = getFileName(file.filePath)
714753
const isRootLevel = !file.filePath.includes('/')
754+
// Show directories with trailing / in the label
755+
const displayLabel = file.isDirectory ? `${fileName}/` : fileName
756+
const displayPath = file.isDirectory ? `${file.filePath}/` : file.filePath
715757

716758
return {
717759
id: file.filePath,
718-
label: fileName,
760+
label: displayLabel,
719761
labelHighlightIndices: file.pathHighlightIndices
720762
? file.pathHighlightIndices.map((idx) => {
721763
const fileNameStart = file.filePath.lastIndexOf(fileName)
722764
return idx >= fileNameStart ? idx - fileNameStart : -1
723765
}).filter((idx) => idx >= 0)
724766
: null,
725-
description: isRootLevel ? '.' : file.filePath,
767+
description: isRootLevel ? '.' : displayPath,
726768
descriptionHighlightIndices: isRootLevel ? null : file.pathHighlightIndices,
727769
}
728770
})

common/src/project-file-tree.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,27 @@ export function getAllFilePaths(
243243
})
244244
}
245245

246+
export interface PathInfo {
247+
path: string
248+
isDirectory: boolean
249+
}
250+
251+
export function getAllPathsWithDirectories(
252+
nodes: FileTreeNode[],
253+
basePath: string = '',
254+
): PathInfo[] {
255+
return nodes.flatMap((node) => {
256+
const nodePath = basePath ? path.join(basePath, node.name) : node.name
257+
if (node.type === 'file') {
258+
return [{ path: nodePath, isDirectory: false }]
259+
}
260+
// Include the directory itself, plus recurse into children
261+
const dirEntry: PathInfo = { path: nodePath, isDirectory: true }
262+
const children = getAllPathsWithDirectories(node.children || [], nodePath)
263+
return [dirEntry, ...children]
264+
})
265+
}
266+
246267
export function flattenTree(nodes: FileTreeNode[]): FileTreeNode[] {
247268
return nodes.flatMap((node) => {
248269
if (node.type === 'file') {

0 commit comments

Comments
 (0)