Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions src/codegen/extensions/langchain/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
from codegen.extensions.tools.search import search
from codegen.extensions.tools.semantic_edit import semantic_edit
from codegen.extensions.tools.semantic_search import semantic_search
from codegen.extensions.web.web import (
web_browse_page_tool,
web_extract_images_tool,
web_search_tool,
)
from codegen.extensions.web.web_client import WebClient
from codegen.sdk.core.codebase import Codebase

from ..tools import (
Expand All @@ -43,6 +49,7 @@
)
from ..tools.relace_edit_prompts import RELACE_EDIT_PROMPT
from ..tools.semantic_edit_prompts import FILE_EDIT_PROMPT
from .web_tools import WebBrowsePageTool, WebExtractImagesTool, WebSearchTool


class ViewFileInput(BaseModel):
Expand Down Expand Up @@ -823,6 +830,59 @@ def _run(self, content: str) -> str:
return "✅ Message sent successfully"


########################################################################################################################
# WEB
########################################################################################################################


class WebBrowsePageTool(BaseTool):
"""Tool for browsing a web page."""

name: ClassVar[str] = "web_browse_page"
description: ClassVar[str] = "Browse a web page and extract relevant information"
args_schema: ClassVar[type[BaseModel]] = WebBrowsePageTool.args_schema
client: WebClient = Field(exclude=True)

def __init__(self, client: WebClient) -> None:
super().__init__(client=client)

def _run(self, url: str) -> str:
result = web_browse_page_tool(self.client, url)
return result.render()


class WebSearchTool(BaseTool):
"""Tool for searching the web."""

name: ClassVar[str] = "web_search"
description: ClassVar[str] = "Search the web for information"
args_schema: ClassVar[type[BaseModel]] = WebSearchTool.args_schema
client: WebClient = Field(exclude=True)

def __init__(self, client: WebClient) -> None:
super().__init__(client=client)

def _run(self, query: str) -> str:
result = web_search_tool(self.client, query)
return result.render()


class WebExtractImagesTool(BaseTool):
"""Tool for extracting images from a web page."""

name: ClassVar[str] = "web_extract_images"
description: ClassVar[str] = "Extract images from a web page"
args_schema: ClassVar[type[BaseModel]] = WebExtractImagesTool.args_schema
client: WebClient = Field(exclude=True)

def __init__(self, client: WebClient) -> None:
super().__init__(client=client)

def _run(self, url: str) -> str:
result = web_extract_images_tool(self.client, url)
return result.render()


########################################################################################################################
# EXPORT
########################################################################################################################
Expand Down Expand Up @@ -868,6 +928,10 @@ def get_workspace_tools(codebase: Codebase) -> list["BaseTool"]:
LinearSearchIssuesTool(codebase),
LinearCreateIssueTool(codebase),
LinearGetTeamsTool(codebase),
# Web
WebBrowsePageTool(WebClient()),
WebSearchTool(WebClient()),
WebExtractImagesTool(WebClient()),
]


Expand Down
81 changes: 81 additions & 0 deletions src/codegen/extensions/langchain/web_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""LangChain tools for web browsing."""

from typing import ClassVar

from langchain_core.tools.base import BaseTool
from pydantic import BaseModel, Field

from codegen.extensions.web.web import (
web_browse_page_tool,
web_extract_images_tool,
web_search_tool,
)
from codegen.extensions.web.web_client import WebClient


class WebBrowsePageInput(BaseModel):
"""Input for browsing a web page."""

url: str = Field(..., description="URL of the web page to browse")


class WebBrowsePageTool(BaseTool):
"""Tool for browsing web pages."""

name: ClassVar[str] = "web_browse_page"
description: ClassVar[str] = "Browse a web page and extract its content"
args_schema: ClassVar[type[BaseModel]] = WebBrowsePageInput
client: WebClient = Field(exclude=True)

def __init__(self, client: WebClient) -> None:
super().__init__(client=client)

def _run(self, url: str) -> str:
result = web_browse_page_tool(self.client, url)
return result.render()


class WebSearchInput(BaseModel):
"""Input for web search."""

query: str = Field(..., description="Search query string")
num_results: int = Field(default=10, description="Maximum number of results to return")


class WebSearchTool(BaseTool):
"""Tool for searching the web."""

name: ClassVar[str] = "web_search"
description: ClassVar[str] = "Search the web using a search engine"
args_schema: ClassVar[type[BaseModel]] = WebSearchInput
client: WebClient = Field(exclude=True)

def __init__(self, client: WebClient) -> None:
super().__init__(client=client)

def _run(self, query: str, num_results: int = 10) -> str:
result = web_search_tool(self.client, query, num_results)
return result.render()


class WebExtractImagesInput(BaseModel):
"""Input for extracting images from a web page."""

url: str = Field(..., description="URL of the web page")
max_images: int = Field(default=20, description="Maximum number of images to extract")


class WebExtractImagesTool(BaseTool):
"""Tool for extracting images from web pages."""

name: ClassVar[str] = "web_extract_images"
description: ClassVar[str] = "Extract images from a web page"
args_schema: ClassVar[type[BaseModel]] = WebExtractImagesInput
client: WebClient = Field(exclude=True)

def __init__(self, client: WebClient) -> None:
super().__init__(client=client)

def _run(self, url: str, max_images: int = 20) -> str:
result = web_extract_images_tool(self.client, url, max_images)
return result.render()
15 changes: 15 additions & 0 deletions src/codegen/extensions/web/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Web browsing functionality."""

from codegen.extensions.web.web import (
web_browse_page_tool,
web_extract_images_tool,
web_search_tool,
)
from codegen.extensions.web.web_client import WebClient

__all__ = [
"WebClient",
"web_browse_page_tool",
"web_extract_images_tool",
"web_search_tool",
]
29 changes: 29 additions & 0 deletions src/codegen/extensions/web/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Types for web browsing functionality."""

from pydantic import BaseModel, Field


class WebPage(BaseModel):
"""Represents a web page."""

url: str = Field(description="URL of the web page")
title: str = Field(description="Title of the web page")
content: str = Field(description="Main content of the web page")
status_code: int = Field(description="HTTP status code of the response")


class WebSearchResult(BaseModel):
"""Represents a single search result."""

title: str = Field(description="Title of the search result")
url: str = Field(description="URL of the search result")
snippet: str = Field(description="Snippet or description of the search result")


class WebImage(BaseModel):
"""Represents an image from a web page."""

url: str = Field(description="URL of the image")
alt_text: str | None = Field(None, description="Alternative text for the image")
width: int | None = Field(None, description="Width of the image in pixels")
height: int | None = Field(None, description="Height of the image in pixels")
Loading
Loading