Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/codegen/extensions/langchain/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
SearchTool,
# SemanticEditTool,
ViewFileTool,
WebPageViewTool,
WebSearchTool,
)

from .graph import create_react_agent
Expand Down Expand Up @@ -76,6 +78,8 @@ def create_codebase_agent(
ReplacementEditTool(codebase),
RelaceEditTool(codebase),
ReflectionTool(codebase),
WebSearchTool(codebase),
WebPageViewTool(codebase),
# SemanticSearchTool(codebase),
# =====[ Github Integration ]=====
# Enable Github integration
Expand Down Expand Up @@ -134,6 +138,8 @@ def create_chat_agent(
MoveSymbolTool(codebase),
RevealSymbolTool(codebase),
RelaceEditTool(codebase),
WebSearchTool(codebase),
WebPageViewTool(codebase),
]

if additional_tools:
Expand Down
59 changes: 57 additions & 2 deletions src/codegen/extensions/langchain/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
from codegen.extensions.tools.search import search
from codegen.extensions.tools.semantic_edit import semantic_edit
from codegen.extensions.tools.semantic_search import semantic_search
from codegen.extensions.tools.web_page_view import web_page_view
from codegen.extensions.tools.web_search import web_search
from codegen.sdk.core.codebase import Codebase

from ..tools import (
Expand Down Expand Up @@ -823,6 +825,59 @@ def _run(self, content: str) -> str:
return "✅ Message sent successfully"


########################################################################################################################
# WEB SEARCH
########################################################################################################################


class WebSearchInput(BaseModel):
"""Input for web search."""

query: str = Field(..., description="The search query")
num_results: int = Field(default=5, description="Number of results to return (default: 5)")
search_engine: str = Field(default="google", description="Search engine to use (default: 'google')")


class WebSearchTool(BaseTool):
"""Tool for searching the web."""

name: ClassVar[str] = "web_search"
description: ClassVar[str] = "Search the web for information and return relevant results"
args_schema: ClassVar[type[BaseModel]] = WebSearchInput
codebase: Codebase = Field(exclude=True)

def __init__(self, codebase: Codebase) -> None:
super().__init__(codebase=codebase)

def _run(self, query: str, num_results: int = 5, search_engine: str = "google") -> str:
result = web_search(self.codebase, query, num_results=num_results, search_engine=search_engine)
return result.render()


class WebPageViewInput(BaseModel):
"""Input for viewing web page content."""

url: str = Field(..., description="URL of the web page to view")
selector: Optional[str] = Field(None, description="Optional CSS selector to extract specific content")
max_length: int = Field(default=10000, description="Maximum length of content to return (default: 10000)")


class WebPageViewTool(BaseTool):
"""Tool for viewing web page content."""

name: ClassVar[str] = "web_page_view"
description: ClassVar[str] = "Extract and view content from a web page"
args_schema: ClassVar[type[BaseModel]] = WebPageViewInput
codebase: Codebase = Field(exclude=True)

def __init__(self, codebase: Codebase) -> None:
super().__init__(codebase=codebase)

def _run(self, url: str, selector: Optional[str] = None, max_length: int = 10000) -> str:
result = web_page_view(self.codebase, url, selector=selector, max_length=max_length)
return result.render()


########################################################################################################################
# EXPORT
########################################################################################################################
Expand Down Expand Up @@ -850,11 +905,11 @@ def get_workspace_tools(codebase: Codebase) -> list["BaseTool"]:
RevealSymbolTool(codebase),
RunBashCommandTool(), # Note: This tool doesn't need the codebase
SearchTool(codebase),
# SemanticEditTool(codebase),
# SemanticSearchTool(codebase),
ViewFileTool(codebase),
RelaceEditTool(codebase),
ReflectionTool(codebase),
WebSearchTool(codebase),
WebPageViewTool(codebase),
# Github
GithubCreatePRTool(codebase),
GithubCreatePRCommentTool(codebase),
Expand Down
92 changes: 92 additions & 0 deletions src/codegen/extensions/tools/web_page_view.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
"""Web page viewing tool for the code agent."""

from typing import Optional

import requests
from bs4 import BeautifulSoup

from codegen.sdk.core.codebase import Codebase
from codegen.sdk.core.result import Result


def web_page_view(
codebase: Codebase,
url: str,
selector: Optional[str] = None,
max_length: int = 10000,
) -> Result:
"""Extract content from a web page.

Args:
codebase: The codebase (not used but required for consistency)
url: URL of the web page to view
selector: Optional CSS selector to extract specific content
max_length: Maximum length of content to return (default: 10000)

Returns:
Result object with web page content
"""
try:
# Set user agent to avoid being blocked
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}

# Make the request
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()

# Parse the HTML
soup = BeautifulSoup(response.text, "html.parser")

# Remove script and style elements
for script in soup(["script", "style"]):
script.extract()

# Extract content based on selector if provided
if selector:
content_elements = soup.select(selector)
if not content_elements:
return Result(
success=False,
message=f"No elements found matching selector '{selector}'",
data=None,
)
content = "\n".join([elem.get_text(strip=True) for elem in content_elements])
else:
# Get the main content (try common content containers)
main_content = soup.find("main") or soup.find("article") or soup.find("div", {"id": "content"}) or soup.find("div", {"class": "content"})

if main_content:
content = main_content.get_text(strip=True)
else:
# Fall back to the entire page text
content = soup.get_text(strip=True)

# Truncate if too long
if len(content) > max_length:
content = content[:max_length] + "... [content truncated]"

# Get the page title
title = soup.title.string if soup.title else "Unknown Title"

return Result(
success=True,
message=f"Successfully extracted content from {url}",
data={
"url": url,
"title": title,
"content": content,
"content_length": len(content),
},
)
except requests.RequestException as e:
return Result(
success=False,
message=f"Error fetching web page: {e!s}",
data=None,
)
except Exception as e:
return Result(
success=False,
message=f"Error processing web page: {e!s}",
data=None,
)
82 changes: 82 additions & 0 deletions src/codegen/extensions/tools/web_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
"""Web search tool for the code agent."""

import json
import os

import requests

from codegen.sdk.core.codebase import Codebase
from codegen.sdk.core.result import Result


def web_search(
codebase: Codebase,
query: str,
num_results: int = 5,
search_engine: str = "google",
) -> Result:
"""Search the web for information.

Args:
codebase: The codebase (not used but required for consistency)
query: The search query
num_results: Number of results to return (default: 5)
search_engine: Search engine to use (default: "google")

Returns:
Result object with search results
"""
# Get API key from environment variable
api_key = os.environ.get("SERP_API_KEY")
if not api_key:
return Result(
success=False,
message="SERP_API_KEY environment variable not set. Please set it to use the web search tool.",
data=None,
)

# Prepare the API request
base_url = "https://serpapi.com/search"
params = {
"q": query,
"api_key": api_key,
"engine": search_engine,
}

try:
response = requests.get(base_url, params=params)
response.raise_for_status()
data = response.json()

# Extract organic search results
results = []
if "organic_results" in data:
for result in data["organic_results"][:num_results]:
results.append(
{
"title": result.get("title", ""),
"link": result.get("link", ""),
"snippet": result.get("snippet", ""),
}
)

return Result(
success=True,
message=f"Found {len(results)} results for query: {query}",
data={
"query": query,
"results": results,
},
)
except requests.RequestException as e:
return Result(
success=False,
message=f"Error performing web search: {e!s}",
data=None,
)
except json.JSONDecodeError:
return Result(
success=False,
message="Error parsing search results",
data=None,
)
Loading