From 81d727cd7d1cfb00535ce8b67af52a3baeaae60e Mon Sep 17 00:00:00 2001 From: Rushil Patel Date: Tue, 18 Mar 2025 11:28:35 -0700 Subject: [PATCH 1/6] fix: update searchbyfilename tool to paginate --- src/codegen/extensions/langchain/tools.py | 6 +- .../extensions/tools/search_files_by_name.py | 55 +++++++++++++++++-- 2 files changed, 55 insertions(+), 6 deletions(-) diff --git a/src/codegen/extensions/langchain/tools.py b/src/codegen/extensions/langchain/tools.py index 9b7acc2e4..2f82e1899 100644 --- a/src/codegen/extensions/langchain/tools.py +++ b/src/codegen/extensions/langchain/tools.py @@ -1097,6 +1097,8 @@ class SearchFilesByNameTool(BaseTool): """Tool for searching files by filename across a codebase.""" name: ClassVar[str] = "search_files_by_name" + page: int = 1 + files_per_page: int = 10 description: ClassVar[str] = """ Search for files and directories by glob pattern across the active codebase. This is useful when you need to: - Find specific file types (e.g., '*.py', '*.tsx') @@ -1106,9 +1108,11 @@ class SearchFilesByNameTool(BaseTool): args_schema: ClassVar[type[BaseModel]] = SearchFilesByNameInput codebase: Codebase = Field(exclude=True) + + def __init__(self, codebase: Codebase): super().__init__(codebase=codebase) def _run(self, pattern: str) -> str: """Execute the glob pattern search using fd.""" - return search_files_by_name(self.codebase, pattern).render() + return search_files_by_name(self.codebase, pattern, page=self.page, files_per_page=self.files_per_page).render() diff --git a/src/codegen/extensions/tools/search_files_by_name.py b/src/codegen/extensions/tools/search_files_by_name.py index bc595a25c..5795b639e 100644 --- a/src/codegen/extensions/tools/search_files_by_name.py +++ b/src/codegen/extensions/tools/search_files_by_name.py @@ -20,25 +20,47 @@ class SearchFilesByNameResultObservation(Observation): files: list[str] = Field( description="List of matching file paths", ) + page: int = Field( + description="Current page number (1-based)", + ) + total_pages: int = Field( + description="Total number of pages available", + ) + total_files: int = Field( + description="Total number of files with matches", + ) + files_per_page: int = Field( + description="Number of files shown per page", + ) - str_template: ClassVar[str] = "Found {total} files matching pattern: {pattern}" + str_template: ClassVar[str] = "Found {total_files} files matching pattern: {pattern} (page {page}/{total_pages})" @property def total(self) -> int: - return len(self.files) + return self.total_files def search_files_by_name( codebase: Codebase, pattern: str, + page: int = 1, + files_per_page: int = 10, ) -> SearchFilesByNameResultObservation: """Search for files by name pattern in the codebase. Args: codebase: The codebase to search in pattern: Glob pattern to search for (e.g. "*.py", "test_*.py") + page: Page number to return (1-based, default: 1) + files_per_page: Number of files to return per page (default: 10) """ try: + # Validate pagination parameters + if page < 1: + page = 1 + if files_per_page < 1: + files_per_page = 10 + if shutil.which("fd") is None: logger.warning("fd is not installed, falling back to find") results = subprocess.check_output( @@ -46,7 +68,7 @@ def search_files_by_name( cwd=codebase.repo_path, timeout=30, ) - files = [path.removeprefix("./") for path in results.decode("utf-8").strip().split("\n")] if results.strip() else [] + all_files = [path.removeprefix("./") for path in results.decode("utf-8").strip().split("\n")] if results.strip() else [] else: logger.info(f"Searching for files with pattern: {pattern}") @@ -55,12 +77,31 @@ def search_files_by_name( cwd=codebase.repo_path, timeout=30, ) - files = results.decode("utf-8").strip().split("\n") if results.strip() else [] + all_files = results.decode("utf-8").strip().split("\n") if results.strip() else [] + + # Sort files for consistent pagination + all_files.sort() + + # Calculate pagination + total_files = len(all_files) + total_pages = (total_files + files_per_page - 1) // files_per_page if total_files > 0 else 1 + + # Ensure page is within valid range + page = min(page, total_pages) + + # Get paginated results + start_idx = (page - 1) * files_per_page + end_idx = start_idx + files_per_page + paginated_files = all_files[start_idx:end_idx] return SearchFilesByNameResultObservation( status="success", pattern=pattern, - files=files, + files=paginated_files, + page=page, + total_pages=total_pages, + total_files=total_files, + files_per_page=files_per_page, ) except Exception as e: @@ -69,4 +110,8 @@ def search_files_by_name( error=f"Error searching files: {e!s}", pattern=pattern, files=[], + page=page, + total_pages=0, + total_files=0, + files_per_page=files_per_page, ) From dd83a61f3f0261bd83390016d849909564e4810e Mon Sep 17 00:00:00 2001 From: Rushil Patel Date: Tue, 18 Mar 2025 11:58:10 -0700 Subject: [PATCH 2/6] fix: enabled infinite page size for global edit --- src/codegen/extensions/tools/global_replacement_edit.py | 3 ++- src/codegen/extensions/tools/search_files_by_name.py | 9 +++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/codegen/extensions/tools/global_replacement_edit.py b/src/codegen/extensions/tools/global_replacement_edit.py index 4717b2f58..7773aa885 100644 --- a/src/codegen/extensions/tools/global_replacement_edit.py +++ b/src/codegen/extensions/tools/global_replacement_edit.py @@ -2,6 +2,7 @@ import difflib import logging +import math import re from typing import ClassVar @@ -103,7 +104,7 @@ def replacement_edit_global( ) diffs = [] - for file in search_files_by_name(codebase, file_pattern).files: + for file in search_files_by_name(codebase, file_pattern, page=1, files_per_page=math.inf).files: if count is not None and count <= 0: break try: diff --git a/src/codegen/extensions/tools/search_files_by_name.py b/src/codegen/extensions/tools/search_files_by_name.py index 5795b639e..98310250e 100644 --- a/src/codegen/extensions/tools/search_files_by_name.py +++ b/src/codegen/extensions/tools/search_files_by_name.py @@ -1,6 +1,7 @@ +import math import shutil import subprocess -from typing import ClassVar +from typing import ClassVar, Optional from pydantic import Field @@ -44,7 +45,7 @@ def search_files_by_name( codebase: Codebase, pattern: str, page: int = 1, - files_per_page: int = 10, + files_per_page: int | float = 10, ) -> SearchFilesByNameResultObservation: """Search for files by name pattern in the codebase. @@ -58,8 +59,8 @@ def search_files_by_name( # Validate pagination parameters if page < 1: page = 1 - if files_per_page < 1: - files_per_page = 10 + if files_per_page is not None and files_per_page < 1: + files_per_page = 20 if shutil.which("fd") is None: logger.warning("fd is not installed, falling back to find") From 704b8eb203265e5897faca70cda7a5fc948be5d0 Mon Sep 17 00:00:00 2001 From: Rushil Patel Date: Tue, 18 Mar 2025 12:05:38 -0700 Subject: [PATCH 3/6] fix: data type --- src/codegen/extensions/tools/search_files_by_name.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/codegen/extensions/tools/search_files_by_name.py b/src/codegen/extensions/tools/search_files_by_name.py index 98310250e..1e0022d0b 100644 --- a/src/codegen/extensions/tools/search_files_by_name.py +++ b/src/codegen/extensions/tools/search_files_by_name.py @@ -30,7 +30,7 @@ class SearchFilesByNameResultObservation(Observation): total_files: int = Field( description="Total number of files with matches", ) - files_per_page: int = Field( + files_per_page: int | float = Field( description="Number of files shown per page", ) From 89be1c3123b91b622f2e13dce4374ceb361b652a Mon Sep 17 00:00:00 2001 From: Rushil Patel Date: Tue, 18 Mar 2025 12:20:57 -0700 Subject: [PATCH 4/6] fix: no page size limit --- src/codegen/extensions/tools/search_files_by_name.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/codegen/extensions/tools/search_files_by_name.py b/src/codegen/extensions/tools/search_files_by_name.py index 1e0022d0b..d9abe81a7 100644 --- a/src/codegen/extensions/tools/search_files_by_name.py +++ b/src/codegen/extensions/tools/search_files_by_name.py @@ -85,6 +85,8 @@ def search_files_by_name( # Calculate pagination total_files = len(all_files) + if files_per_page == math.inf: + files_per_page = total_files total_pages = (total_files + files_per_page - 1) // files_per_page if total_files > 0 else 1 # Ensure page is within valid range From a62384ad48faaaa17d5c9678e72f67f358a2e3fe Mon Sep 17 00:00:00 2001 From: Rushil Patel Date: Tue, 18 Mar 2025 12:29:03 -0700 Subject: [PATCH 5/6] fix: no page size limit --- src/codegen/extensions/tools/search_files_by_name.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/codegen/extensions/tools/search_files_by_name.py b/src/codegen/extensions/tools/search_files_by_name.py index d9abe81a7..b44f6da85 100644 --- a/src/codegen/extensions/tools/search_files_by_name.py +++ b/src/codegen/extensions/tools/search_files_by_name.py @@ -87,7 +87,10 @@ def search_files_by_name( total_files = len(all_files) if files_per_page == math.inf: files_per_page = total_files - total_pages = (total_files + files_per_page - 1) // files_per_page if total_files > 0 else 1 + total_pages = 1 + else: + total_pages = (total_files + files_per_page - 1) // files_per_page if total_files > 0 else 1 + # Ensure page is within valid range page = min(page, total_pages) From 89185e675f567695de19d4e32e19cf9f50679f47 Mon Sep 17 00:00:00 2001 From: Rushil Patel Date: Tue, 18 Mar 2025 12:46:01 -0700 Subject: [PATCH 6/6] fix: update tool input schema --- src/codegen/extensions/langchain/tools.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/codegen/extensions/langchain/tools.py b/src/codegen/extensions/langchain/tools.py index 2f82e1899..f9bd50934 100644 --- a/src/codegen/extensions/langchain/tools.py +++ b/src/codegen/extensions/langchain/tools.py @@ -1091,16 +1091,15 @@ class SearchFilesByNameInput(BaseModel): """Input for searching files by name pattern.""" pattern: str = Field(..., description="`fd`-compatible glob pattern to search for (e.g. '*.py', 'test_*.py')") - + page: int = Field(default=1, description="Page number to return (1-based)") + files_per_page: int | float = Field(default=10, description="Number of files per page to return, use math.inf to return all files") class SearchFilesByNameTool(BaseTool): """Tool for searching files by filename across a codebase.""" name: ClassVar[str] = "search_files_by_name" - page: int = 1 - files_per_page: int = 10 description: ClassVar[str] = """ -Search for files and directories by glob pattern across the active codebase. This is useful when you need to: +Search for files and directories by glob pattern (with pagination) across the active codebase. This is useful when you need to: - Find specific file types (e.g., '*.py', '*.tsx') - Locate configuration files (e.g., 'package.json', 'requirements.txt') - Find files with specific names (e.g., 'README.md', 'Dockerfile')