Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 61 additions & 1 deletion src/codegen/extensions/tools/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
This performs either a regex pattern match or simple text search across all files in the codebase.
Each matching line will be returned with its line number.
Results are paginated with a default of 10 files per page.
The search also includes filenames that match the query.
"""

import os
Expand Down Expand Up @@ -45,6 +46,10 @@ class SearchFileResult(Observation):
matches: list[SearchMatch] = Field(
description="List of matches found in this file",
)
filename_match: bool = Field(
default=False,
description="Whether the filename itself matched the search query",
)

str_template: ClassVar[str] = "{filepath}: {match_count} matches"

Expand All @@ -53,6 +58,8 @@ def render(self) -> str:
lines = [
f"📄 {self.filepath}",
]
if self.filename_match:
lines.append(" (filename matches search query)")
for match in self.matches:
lines.append(match.render())
return "\n".join(lines)
Expand Down Expand Up @@ -158,6 +165,7 @@ def _search_with_ripgrep(

# Parse the output
all_results: dict[str, list[SearchMatch]] = {}
filename_matches: set[str] = set()

# ripgrep returns non-zero exit code when no matches are found
if result.returncode != 0 and result.returncode != 1:
Expand Down Expand Up @@ -215,6 +223,40 @@ def _search_with_ripgrep(
# Skip lines with invalid line numbers
continue

# Now search for filename matches
filename_cmd = ["find", search_path, "-type", "f"]
if file_extensions:
# Add file extension filters
extension_pattern = " -o ".join([f'-name "*.{ext.lstrip(".")}"' for ext in file_extensions])
filename_cmd.extend(["-a", "(", "-name", f"*{query}*"])
if extension_pattern:
filename_cmd.extend(["-a", "(", *extension_pattern.split(), ")"])
filename_cmd.append(")")
else:
filename_cmd.extend(["-name", f"*{query}*"])

try:
filename_result = subprocess.run(
filename_cmd,
capture_output=True,
text=True,
encoding="utf-8",
check=False,
)

if filename_result.returncode == 0 and filename_result.stdout:
for filepath in filename_result.stdout.splitlines():
if not filepath:
continue
rel_path = os.path.relpath(filepath, codebase.repo_path)
filename_matches.add(rel_path)
# If this file doesn't already have content matches, add it with empty matches
if rel_path not in all_results:
all_results[rel_path] = []
except Exception:
# If filename search fails, just continue with content matches
pass

# Convert to SearchFileResult objects
file_results = []
for filepath, matches in all_results.items():
Expand All @@ -223,6 +265,7 @@ def _search_with_ripgrep(
status="success",
filepath=filepath,
matches=sorted(matches, key=lambda x: x.line_number),
filename_match=filepath in filename_matches,
)
)

Expand Down Expand Up @@ -294,6 +337,20 @@ def _search_with_python(
extensions = file_extensions if file_extensions is not None else "*"

all_results = []
filename_matches = set()

# First, check for filename matches
for file in codebase.files(extensions=extensions):
# Check if the filename contains the query
filename = os.path.basename(file.filepath)
if use_regex:
if pattern.search(filename):
filename_matches.add(file.filepath)
else:
if query.lower() in filename.lower():
filename_matches.add(file.filepath)

# Then search file contents
for file in codebase.files(extensions=extensions):
# Skip binary files
try:
Expand All @@ -318,12 +375,14 @@ def _search_with_python(
)
)

if file_matches:
# Add file to results if it has content matches or if the filename matched
if file_matches or file.filepath in filename_matches:
all_results.append(
SearchFileResult(
status="success",
filepath=file.filepath,
matches=sorted(file_matches, key=lambda x: x.line_number),
filename_match=file.filepath in filename_matches,
)
)

Expand Down Expand Up @@ -365,6 +424,7 @@ def search(
Otherwise, performs a case-insensitive text search.
Returns matching lines with their line numbers, grouped by file.
Results are paginated by files, with a default of 10 files per page.
Also includes files whose names match the search query.

Args:
codebase: The codebase to operate on
Expand Down
Loading