From 2256aff686557b4fd30679fe2d28bee9ebc5b4bf Mon Sep 17 00:00:00 2001 From: Rushil Patel Date: Sat, 15 Mar 2025 12:36:00 -0700 Subject: [PATCH 1/8] inital logger interface --- src/codegen/agents/__init__.py | 1 + src/codegen/agents/code_agent.py | 27 +- src/codegen/agents/data.py | 52 ++++ src/codegen/agents/loggers.py | 17 ++ src/codegen/agents/scratch.ipynb | 447 +++++++++++++++++++++++++++++++ src/codegen/agents/tracer.py | 167 ++++++++++++ 6 files changed, 704 insertions(+), 7 deletions(-) create mode 100644 src/codegen/agents/data.py create mode 100644 src/codegen/agents/loggers.py create mode 100644 src/codegen/agents/scratch.ipynb create mode 100644 src/codegen/agents/tracer.py diff --git a/src/codegen/agents/__init__.py b/src/codegen/agents/__init__.py index e69de29bb..8b1378917 100644 --- a/src/codegen/agents/__init__.py +++ b/src/codegen/agents/__init__.py @@ -0,0 +1 @@ + diff --git a/src/codegen/agents/code_agent.py b/src/codegen/agents/code_agent.py index a234a185f..bc8b96c02 100644 --- a/src/codegen/agents/code_agent.py +++ b/src/codegen/agents/code_agent.py @@ -3,9 +3,10 @@ from uuid import uuid4 from langchain.tools import BaseTool -from langchain_core.messages import AIMessage from langsmith import Client - +from langchain_core.messages import AIMessage +from codegen.agents.loggers import ExternalLogger +from codegen.agents.tracer import MessageStreamTracer from codegen.extensions.langchain.agent import create_codebase_agent from codegen.extensions.langchain.utils.get_langsmith_url import find_and_print_langsmith_run_url @@ -42,7 +43,7 @@ def __init__(self, codebase: "Codebase", model_provider: str = "anthropic", mode self.project_name = os.environ.get("LANGCHAIN_PROJECT", "RELACE") print(f"Using LangSmith project: {self.project_name}") - def run(self, prompt: str, thread_id: Optional[str] = None) -> str: + def run(self, prompt: str, thread_id: Optional[str] = None, logger: Optional[ExternalLogger] = None) -> str: """Run the agent with a prompt. Args: @@ -55,20 +56,32 @@ def run(self, prompt: str, thread_id: Optional[str] = None) -> str: if thread_id is None: thread_id = str(uuid4()) + + # this message has a reducer which appends the current message to the existing history # see more https://langchain-ai.github.io/langgraph/concepts/low_level/#reducers input = {"messages": [("user", prompt)]} # we stream the steps instead of invoke because it allows us to access intermediate nodes - stream = self.agent.stream(input, config={"configurable": {"thread_id": thread_id, "metadata": {"project": self.project_name}}, "recursion_limit": 100}, stream_mode="values") + stream = self.agent.stream(input, config={"configurable": {"thread_id": thread_id, "metadata": {"project": self.project_name}}, "recursion_limit": 100}, stream_mode="values") + # Get the stream from the graph + # Create the external logger (optional) + # Create the tracer + _tracer = MessageStreamTracer(logger=logger) + + # Process the stream with the tracer + traced_stream = _tracer.process_stream(stream) # Keep track of run IDs from the stream run_ids = [] - for s in stream: + for s in traced_stream: message = s["messages"][-1] + message.pretty_print() + if isinstance(message, tuple): - print(message) + # print(message) + pass else: if isinstance(message, AIMessage) and isinstance(message.content, list) and "text" in message.content[0]: AIMessage(message.content[0]["text"]).pretty_print() @@ -82,7 +95,7 @@ def run(self, prompt: str, thread_id: Optional[str] = None) -> str: # Get the last message content result = s["messages"][-1].content - # Try to find run IDs in the LangSmith client's recent runs + # # Try to find run IDs in the LangSmith client's recent runs try: # Find and print the LangSmith run URL find_and_print_langsmith_run_url(self.langsmith_client, self.project_name) diff --git a/src/codegen/agents/data.py b/src/codegen/agents/data.py new file mode 100644 index 000000000..b0034dd46 --- /dev/null +++ b/src/codegen/agents/data.py @@ -0,0 +1,52 @@ +from typing import List, Optional +from dataclasses import dataclass, field +from datetime import datetime + +# Base dataclass for all message types +@dataclass +class BaseMessage: + """Base class for all message types.""" + type: str + timestamp: str = field(default_factory=lambda: datetime.now().isoformat()) + content: str = "" + +@dataclass +class UserMessage(BaseMessage): + """Represents a message from the user.""" + type: str = field(default="user") + +@dataclass +class SystemMessageData(BaseMessage): + """Represents a system message.""" + type: str = field(default="system") + +@dataclass +class ToolCall: + """Represents a tool call within an assistant message.""" + name: Optional[str] = None + arguments: Optional[str] = None + id: Optional[str] = None + +@dataclass +class AssistantMessage(BaseMessage): + """Represents a message from the assistant.""" + type: str = field(default="assistant") + tool_calls: List[ToolCall] = field(default_factory=list) + +@dataclass +class ToolMessageData(BaseMessage): + """Represents a tool response message.""" + type: str = field(default="tool") + tool_name: Optional[str] = None + tool_response: Optional[str] = None + tool_id: Optional[str] = None + +@dataclass +class FunctionMessageData(BaseMessage): + """Represents a function message.""" + type: str = field(default="function") + +@dataclass +class UnknownMessage(BaseMessage): + """Represents an unknown message type.""" + type: str = field(default="unknown") \ No newline at end of file diff --git a/src/codegen/agents/loggers.py b/src/codegen/agents/loggers.py new file mode 100644 index 000000000..d0827c896 --- /dev/null +++ b/src/codegen/agents/loggers.py @@ -0,0 +1,17 @@ +import json +from typing import Dict, List, Any, Union, Protocol +from dataclasses import asdict +from .data import BaseMessage + +# Define the interface for ExternalLogger +class ExternalLogger(Protocol): + """Protocol defining the interface for external loggers.""" + + def log(self, data: Union[Dict[str, Any], BaseMessage]) -> None: + """ + Log structured data to an external system. + + Args: + data: The structured data to log, either as a dictionary or a BaseMessage + """ + pass \ No newline at end of file diff --git a/src/codegen/agents/scratch.ipynb b/src/codegen/agents/scratch.ipynb new file mode 100644 index 000000000..69e2db9ed --- /dev/null +++ b/src/codegen/agents/scratch.ipynb @@ -0,0 +1,447 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "codegen.agents.code_agent.CodeAgent" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from codegen.agents.code_agent import CodeAgent\n", + "\n", + "\n", + "CodeAgent" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[37m2025-03-14 14:56:27,596 - codegen.sdk.core.codebase - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34mFetching codebase for codegen-sh/Kevin-s-Adventure-Game\u001b[0m\n", + "\u001b[37m2025-03-14 14:56:27,596 - codegen.sdk.core.codebase - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34mUsing directory: /tmp/codegen\u001b[0m\n", + "\u001b[37m2025-03-14 14:56:27,597 - codegen.sdk.core.codebase - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34mWill clone https://github.com/codegen-sh/Kevin-s-Adventure-Game.git to /tmp/codegen/Kevin-s-Adventure-Game\u001b[0m\n", + "\u001b[37m2025-03-14 14:56:27,597 - codegen.sdk.core.codebase - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34mCloning repository...\u001b[0m\n", + "\u001b[37m2025-03-14 14:56:28,311 - codegen.sdk.core.codebase - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34mClone completed successfully\u001b[0m\n", + "\u001b[37m2025-03-14 14:56:28,311 - codegen.sdk.core.codebase - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34mInitializing Codebase...\u001b[0m\n", + "\u001b[37m2025-03-14 14:56:28,398 - codegen.sdk.codebase.codebase_context - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34m> Parsing 13 files in ALL subdirectories with ['.py'] extensions\u001b[0m\n", + "\u001b[37m2025-03-14 14:56:28,428 - codegen.sdk.codebase.codebase_context - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34m> Building directory tree\u001b[0m\n", + "\u001b[37m2025-03-14 14:56:28,440 - codegen.sdk.codebase.codebase_context - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34m> Computing import resolution edges for 55 imports\u001b[0m\n", + "\u001b[37m2025-03-14 14:56:28,447 - codegen.sdk.codebase.codebase_context - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34m> Computing superclass dependencies\u001b[0m\n", + "\u001b[37m2025-03-14 14:56:28,447 - codegen.sdk.codebase.codebase_context - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34m> Incrementally computing dependencies for 327 nodes\u001b[0m\n", + "\u001b[37m2025-03-14 14:56:28,467 - codegen.sdk.codebase.codebase_context - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34m> Incrementally computing dependencies for 7 nodes\u001b[0m\n", + "\u001b[37m2025-03-14 14:56:28,468 - codegen.sdk.codebase.codebase_context - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34m> Found 13 files\u001b[0m\n", + "\u001b[37m2025-03-14 14:56:28,468 - codegen.sdk.codebase.codebase_context - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34m> Found 334 nodes and 840 edges\u001b[0m\n", + "\u001b[37m2025-03-14 14:56:28,468 - codegen.shared.performance.stopwatch_utils - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34mFunction 'build_graph' took 80.71 milliseconds to execute.\u001b[0m\n", + "\u001b[37m2025-03-14 14:56:28,480 - codegen.sdk.core.codebase - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34mCodebase initialization complete\u001b[0m\n" + ] + } + ], + "source": [ + "from codegen.sdk.core.codebase import Codebase\n", + "\n", + "\n", + "codebase = Codebase.from_repo(\"codegen-sh/Kevin-s-Adventure-Game\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Any, Dict, Union\n", + "from codegen.agents.data import BaseMessage\n", + "from codegen.agents.loggers import ExternalLogger\n", + "\n", + "\n", + "class ConsoleLogger(ExternalLogger):\n", + " def log(self, data: Union[Dict[str, Any], BaseMessage]) -> None:\n", + " print(data.content)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using LangSmith project: RELACE\n", + "What is the main character's name? also show the source code snippetwhere you find the answer\n", + "================================\u001b[1m Human Message \u001b[0m=================================\n", + "\n", + "What is the main character's name? also show the source code snippetwhere you find the answer\n", + "================================\u001b[1m Human Message \u001b[0m=================================\n", + "\n", + "What is the main character's name? also show the source code snippetwhere you find the answer\n", + "[{'text': \"I'll help you find the main character's name in the codebase. Let me search for references to a main character.\", 'type': 'text'}, {'id': 'toolu_01THYvJFKVWoDtj9N97oTVLV', 'input': {'query': 'main character'}, 'name': 'search', 'type': 'tool_use'}]\n", + "==================================\u001b[1m Ai Message \u001b[0m==================================\n", + "\n", + "[{'text': \"I'll help you find the main character's name in the codebase. Let me search for references to a main character.\", 'type': 'text'}, {'id': 'toolu_01THYvJFKVWoDtj9N97oTVLV', 'input': {'query': 'main character'}, 'name': 'search', 'type': 'tool_use'}]\n", + "Tool Calls:\n", + " search (toolu_01THYvJFKVWoDtj9N97oTVLV)\n", + " Call ID: toolu_01THYvJFKVWoDtj9N97oTVLV\n", + " Args:\n", + " query: main character\n", + "==================================\u001b[1m Ai Message \u001b[0m==================================\n", + "\n", + "I'll help you find the main character's name in the codebase. Let me search for references to a main character.\n", + "[SEARCH RESULTS]: main character\n", + "Found 0 files with matches (showing page 1 of 0)\n", + "\n", + "No matches found\n", + "=================================\u001b[1m Tool Message \u001b[0m=================================\n", + "Name: search\n", + "\n", + "[SEARCH RESULTS]: main character\n", + "Found 0 files with matches (showing page 1 of 0)\n", + "\n", + "No matches found\n", + "=================================\u001b[1m Tool Message \u001b[0m=================================\n", + "Name: search\n", + "\n", + "[SEARCH RESULTS]: main character\n", + "Found 0 files with matches (showing page 1 of 0)\n", + "\n", + "No matches found\n", + "[{'text': \"Let me try a different approach by looking at the files in the project to understand what kind of application or story we're dealing with.\", 'type': 'text'}, {'id': 'toolu_015jfRz3Q3UzDa6esM4YV5Qj', 'input': {'dirpath': './'}, 'name': 'list_directory', 'type': 'tool_use'}]\n", + "==================================\u001b[1m Ai Message \u001b[0m==================================\n", + "\n", + "[{'text': \"Let me try a different approach by looking at the files in the project to understand what kind of application or story we're dealing with.\", 'type': 'text'}, {'id': 'toolu_015jfRz3Q3UzDa6esM4YV5Qj', 'input': {'dirpath': './'}, 'name': 'list_directory', 'type': 'tool_use'}]\n", + "Tool Calls:\n", + " list_directory (toolu_015jfRz3Q3UzDa6esM4YV5Qj)\n", + " Call ID: toolu_015jfRz3Q3UzDa6esM4YV5Qj\n", + " Args:\n", + " dirpath: ./\n", + "==================================\u001b[1m Ai Message \u001b[0m==================================\n", + "\n", + "Let me try a different approach by looking at the files in the project to understand what kind of application or story we're dealing with.\n", + "[LIST DIRECTORY]: \n", + "\n", + "├── main.py\n", + "├── game/\n", + "├── locations/\n", + "└── utils/\n", + "=================================\u001b[1m Tool Message \u001b[0m=================================\n", + "Name: list_directory\n", + "\n", + "[LIST DIRECTORY]: \n", + "\n", + "├── main.py\n", + "├── game/\n", + "├── locations/\n", + "└── utils/\n", + "=================================\u001b[1m Tool Message \u001b[0m=================================\n", + "Name: list_directory\n", + "\n", + "[LIST DIRECTORY]: \n", + "\n", + "├── main.py\n", + "├── game/\n", + "├── locations/\n", + "└── utils/\n", + "[{'text': 'This looks like a game project. Let me check the main.py file first to see if it contains information about the main character.', 'type': 'text'}, {'id': 'toolu_01M1FShB2Ec1mqgjvV67jgdg', 'input': {'filepath': 'main.py'}, 'name': 'view_file', 'type': 'tool_use'}]\n", + "==================================\u001b[1m Ai Message \u001b[0m==================================\n", + "\n", + "[{'text': 'This looks like a game project. Let me check the main.py file first to see if it contains information about the main character.', 'type': 'text'}, {'id': 'toolu_01M1FShB2Ec1mqgjvV67jgdg', 'input': {'filepath': 'main.py'}, 'name': 'view_file', 'type': 'tool_use'}]\n", + "Tool Calls:\n", + " view_file (toolu_01M1FShB2Ec1mqgjvV67jgdg)\n", + " Call ID: toolu_01M1FShB2Ec1mqgjvV67jgdg\n", + " Args:\n", + " filepath: main.py\n", + "==================================\u001b[1m Ai Message \u001b[0m==================================\n", + "\n", + "This looks like a game project. Let me check the main.py file first to see if it contains information about the main character.\n", + "[VIEW FILE]: main.py (50 lines total)\n", + "\n", + " 1|from game.actions import perform_action\n", + " 2|from game.player import create_player, get_player_status\n", + " 3|from game.world import get_current_location, initialize_world\n", + " 4|from utils.save_load import list_save_files, load_game, save_game\n", + " 5|from utils.text_formatting import print_help, print_welcome_message\n", + " 6|\n", + " 7|\n", + " 8|def main():\n", + " 9| print_welcome_message()\n", + "10|\n", + "11| # Add load game option\n", + "12| load_option = input(\"Do you want to load a saved game? (y/n): \").lower()\n", + "13| if load_option == 'y':\n", + "14| save_files = list_save_files()\n", + "15| if save_files:\n", + "16| print(\"Available save files:\")\n", + "17| for i, file in enumerate(save_files, 1):\n", + "18| print(f\"{i}. {file}\")\n", + "19| choice = int(input(\"Enter the number of the save file to load: \"))\n", + "20| player, world = load_game(save_files[choice - 1])\n", + "21| if player is None or world is None:\n", + "22| print(\"Failed to load game. Starting a new game.\")\n", + "23| player = create_player(\"Kevin\")\n", + "24| world = initialize_world()\n", + "25| else:\n", + "26| print(\"No save files found. Starting a new game.\")\n", + "27| player = create_player(\"Kevin\")\n", + "28| world = initialize_world()\n", + "29| else:\n", + "30| player = create_player(\"Kevin\")\n", + "31| world = initialize_world()\n", + "32|\n", + "33| while True:\n", + "34| current_location = get_current_location(world)\n", + "35| print(f\"\\nYou are in the {current_location}.\")\n", + "36| print(get_player_status(player))\n", + "37|\n", + "38| action = input(\"What would you like to do? \").lower()\n", + "39|\n", + "40| if action == \"quit\":\n", + "41| save_game(player, world)\n", + "42| print(\"Thanks for playing! Your progress has been saved.\")\n", + "43| break\n", + "44| elif action == \"help\":\n", + "45| print_help()\n", + "46| else:\n", + "47| perform_action(player, world, action)\n", + "48|\n", + "49|if __name__ == \"__main__\":\n", + "50| main()\n", + "=================================\u001b[1m Tool Message \u001b[0m=================================\n", + "Name: view_file\n", + "\n", + "[VIEW FILE]: main.py (50 lines total)\n", + "\n", + " 1|from game.actions import perform_action\n", + " 2|from game.player import create_player, get_player_status\n", + " 3|from game.world import get_current_location, initialize_world\n", + " 4|from utils.save_load import list_save_files, load_game, save_game\n", + " 5|from utils.text_formatting import print_help, print_welcome_message\n", + " 6|\n", + " 7|\n", + " 8|def main():\n", + " 9| print_welcome_message()\n", + "10|\n", + "11| # Add load game option\n", + "12| load_option = input(\"Do you want to load a saved game? (y/n): \").lower()\n", + "13| if load_option == 'y':\n", + "14| save_files = list_save_files()\n", + "15| if save_files:\n", + "16| print(\"Available save files:\")\n", + "17| for i, file in enumerate(save_files, 1):\n", + "18| print(f\"{i}. {file}\")\n", + "19| choice = int(input(\"Enter the number of the save file to load: \"))\n", + "20| player, world = load_game(save_files[choice - 1])\n", + "21| if player is None or world is None:\n", + "22| print(\"Failed to load game. Starting a new game.\")\n", + "23| player = create_player(\"Kevin\")\n", + "24| world = initialize_world()\n", + "25| else:\n", + "26| print(\"No save files found. Starting a new game.\")\n", + "27| player = create_player(\"Kevin\")\n", + "28| world = initialize_world()\n", + "29| else:\n", + "30| player = create_player(\"Kevin\")\n", + "31| world = initialize_world()\n", + "32|\n", + "33| while True:\n", + "34| current_location = get_current_location(world)\n", + "35| print(f\"\\nYou are in the {current_location}.\")\n", + "36| print(get_player_status(player))\n", + "37|\n", + "38| action = input(\"What would you like to do? \").lower()\n", + "39|\n", + "40| if action == \"quit\":\n", + "41| save_game(player, world)\n", + "42| print(\"Thanks for playing! Your progress has been saved.\")\n", + "43| break\n", + "44| elif action == \"help\":\n", + "45| print_help()\n", + "46| else:\n", + "47| perform_action(player, world, action)\n", + "48|\n", + "49|if __name__ == \"__main__\":\n", + "50| main()\n", + "=================================\u001b[1m Tool Message \u001b[0m=================================\n", + "Name: view_file\n", + "\n", + "[VIEW FILE]: main.py (50 lines total)\n", + "\n", + " 1|from game.actions import perform_action\n", + " 2|from game.player import create_player, get_player_status\n", + " 3|from game.world import get_current_location, initialize_world\n", + " 4|from utils.save_load import list_save_files, load_game, save_game\n", + " 5|from utils.text_formatting import print_help, print_welcome_message\n", + " 6|\n", + " 7|\n", + " 8|def main():\n", + " 9| print_welcome_message()\n", + "10|\n", + "11| # Add load game option\n", + "12| load_option = input(\"Do you want to load a saved game? (y/n): \").lower()\n", + "13| if load_option == 'y':\n", + "14| save_files = list_save_files()\n", + "15| if save_files:\n", + "16| print(\"Available save files:\")\n", + "17| for i, file in enumerate(save_files, 1):\n", + "18| print(f\"{i}. {file}\")\n", + "19| choice = int(input(\"Enter the number of the save file to load: \"))\n", + "20| player, world = load_game(save_files[choice - 1])\n", + "21| if player is None or world is None:\n", + "22| print(\"Failed to load game. Starting a new game.\")\n", + "23| player = create_player(\"Kevin\")\n", + "24| world = initialize_world()\n", + "25| else:\n", + "26| print(\"No save files found. Starting a new game.\")\n", + "27| player = create_player(\"Kevin\")\n", + "28| world = initialize_world()\n", + "29| else:\n", + "30| player = create_player(\"Kevin\")\n", + "31| world = initialize_world()\n", + "32|\n", + "33| while True:\n", + "34| current_location = get_current_location(world)\n", + "35| print(f\"\\nYou are in the {current_location}.\")\n", + "36| print(get_player_status(player))\n", + "37|\n", + "38| action = input(\"What would you like to do? \").lower()\n", + "39|\n", + "40| if action == \"quit\":\n", + "41| save_game(player, world)\n", + "42| print(\"Thanks for playing! Your progress has been saved.\")\n", + "43| break\n", + "44| elif action == \"help\":\n", + "45| print_help()\n", + "46| else:\n", + "47| perform_action(player, world, action)\n", + "48|\n", + "49|if __name__ == \"__main__\":\n", + "50| main()\n", + "I found the main character's name! According to the source code in main.py, the main character's name is \"Kevin\".\n", + "\n", + "The name appears in lines 23, 27, and 30 of main.py where the player character is created:\n", + "\n", + "```python\n", + "player = create_player(\"Kevin\")\n", + "```\n", + "\n", + "This line appears in three different places in the code, depending on whether the player is loading a saved game or starting a new one. In all cases, the default player name is set to \"Kevin\".\n", + "==================================\u001b[1m Ai Message \u001b[0m==================================\n", + "\n", + "I found the main character's name! According to the source code in main.py, the main character's name is \"Kevin\".\n", + "\n", + "The name appears in lines 23, 27, and 30 of main.py where the player character is created:\n", + "\n", + "```python\n", + "player = create_player(\"Kevin\")\n", + "```\n", + "\n", + "This line appears in three different places in the code, depending on whether the player is loading a saved game or starting a new one. In all cases, the default player name is set to \"Kevin\".\n", + "==================================\u001b[1m Ai Message \u001b[0m==================================\n", + "\n", + "I found the main character's name! According to the source code in main.py, the main character's name is \"Kevin\".\n", + "\n", + "The name appears in lines 23, 27, and 30 of main.py where the player character is created:\n", + "\n", + "```python\n", + "player = create_player(\"Kevin\")\n", + "```\n", + "\n", + "This line appears in three different places in the code, depending on whether the player is loading a saved game or starting a new one. In all cases, the default player name is set to \"Kevin\".\n", + "\n", + "============================================================\n", + "Could not retrieve LangSmith URL: Project RELACE not found\n", + "Traceback (most recent call last):\n", + " File \"/Users/rushil/dev/codegen-sdk/src/codegen/extensions/langchain/utils/get_langsmith_url.py\", line 53, in find_and_print_langsmith_run_url\n", + " recent_runs = list(\n", + " client.list_runs(\n", + " ...<4 lines>...\n", + " )\n", + " )\n", + " File \"/Users/rushil/dev/codegen-sdk/.venv/lib/python3.13/site-packages/langsmith/client.py\", line 2313, in list_runs\n", + " [self.read_project(project_name=name).id for name in project_name]\n", + " ~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/rushil/dev/codegen-sdk/.venv/lib/python3.13/site-packages/langsmith/utils.py\", line 138, in wrapper\n", + " return func(*args, **kwargs)\n", + " File \"/Users/rushil/dev/codegen-sdk/.venv/lib/python3.13/site-packages/langsmith/client.py\", line 2961, in read_project\n", + " raise ls_utils.LangSmithNotFoundError(\n", + " f\"Project {project_name} not found\"\n", + " )\n", + "langsmith.utils.LangSmithNotFoundError: Project RELACE not found\n", + "\n", + "============================================================\n" + ] + }, + { + "data": { + "text/plain": [ + "'I found the main character\\'s name! According to the source code in main.py, the main character\\'s name is \"Kevin\".\\n\\nThe name appears in lines 23, 27, and 30 of main.py where the player character is created:\\n\\n```python\\nplayer = create_player(\"Kevin\")\\n```\\n\\nThis line appears in three different places in the code, depending on whether the player is loading a saved game or starting a new one. In all cases, the default player name is set to \"Kevin\".'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "\n", + "\n", + "\n", + "agent = CodeAgent(codebase)\n", + "agent.run(\"What is the main character's name? also show the source code where you find the answer\", logger=ConsoleLogger())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent.run(\"What is the main character's name?\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/codegen/agents/tracer.py b/src/codegen/agents/tracer.py new file mode 100644 index 000000000..493b83cd9 --- /dev/null +++ b/src/codegen/agents/tracer.py @@ -0,0 +1,167 @@ +from typing import Dict, List, Any, Generator, Union, Optional +from langchain.schema import AIMessage, HumanMessage, SystemMessage as LCSystemMessage, FunctionMessage as LCFunctionMessage +from langchain_core.messages import ToolMessage as LCToolMessage +from dataclasses import asdict + +from .data import ( + BaseMessage, UserMessage, SystemMessageData, AssistantMessage, + ToolMessageData, FunctionMessageData, UnknownMessage, ToolCall +) +from .loggers import ExternalLogger + +class MessageStreamTracer: + def __init__(self, logger: Optional[ExternalLogger] = None): + self.traces = [] + self.logger = logger + + def process_stream(self, message_stream: Generator) -> Generator: + """ + Process the stream of messages from the LangGraph agent, + extract structured data, and pass through the messages. + """ + for chunk in message_stream: + # Process the chunk + structured_data = self.extract_structured_data(chunk) + + # Log the structured data + if structured_data: + self.traces.append(structured_data) + + # If there's an external logger, send the data there + if self.logger: + self.logger.log(structured_data) + + # Pass through the chunk to maintain the original stream behavior + yield chunk + + def extract_structured_data(self, chunk: Dict[str, Any]) -> Optional[BaseMessage]: + """ + Extract structured data from a message chunk. + Returns None if the chunk doesn't contain useful information. + Returns a BaseMessage subclass instance based on the message type. + """ + # Get the messages from the chunk if available + messages = chunk.get("messages", []) + if not messages and isinstance(chunk, dict): + # Sometimes the message might be in a different format + for key, value in chunk.items(): + if isinstance(value, list) and all(hasattr(item, "type") for item in value if hasattr(item, "__dict__")): + messages = value + break + + if not messages: + return None + + # Get the latest message + latest_message = messages[-1] if messages else None + + if not latest_message: + return None + + # Determine message type + message_type = self._get_message_type(latest_message) + content = self._get_message_content(latest_message) + + # Create the appropriate message type + if message_type == "user": + return UserMessage( + type=message_type, + content=content + ) + elif message_type == "system": + return SystemMessageData( + type=message_type, + content=content + ) + elif message_type == "assistant": + tool_calls_data = self._extract_tool_calls(latest_message) + tool_calls = [ + ToolCall( + name=tc.get("name"), + arguments=tc.get("arguments"), + id=tc.get("id") + ) for tc in tool_calls_data + ] + return AssistantMessage( + type=message_type, + content=content, + tool_calls=tool_calls + ) + elif message_type == "tool": + return ToolMessageData( + type=message_type, + content=content, + tool_name=getattr(latest_message, "name", None), + tool_response=content, + tool_id=getattr(latest_message, "tool_call_id", None) + ) + elif message_type == "function": + return FunctionMessageData( + type=message_type, + content=content + ) + else: + return UnknownMessage( + type=message_type, + content=content + ) + + def _get_message_type(self, message) -> str: + """Determine the type of message.""" + if isinstance(message, HumanMessage): + return "user" + elif isinstance(message, AIMessage): + return "assistant" + elif isinstance(message, LCSystemMessage): + return "system" + elif isinstance(message, LCFunctionMessage): + return "function" + elif isinstance(message, LCToolMessage): + return "tool" + elif hasattr(message, "type") and message.type: + return message.type + else: + return "unknown" + + def _get_message_content(self, message) -> str: + """Extract content from a message.""" + if hasattr(message, "content"): + return message.content + elif hasattr(message, "message") and hasattr(message.message, "content"): + return message.message.content + else: + return str(message) + + def _extract_tool_calls(self, message) -> List[Dict[str, Any]]: + """Extract tool calls from an assistant message.""" + tool_calls = [] + + # Check different possible locations for tool calls + if hasattr(message, "additional_kwargs") and "tool_calls" in message.additional_kwargs: + raw_tool_calls = message.additional_kwargs["tool_calls"] + for tc in raw_tool_calls: + tool_calls.append({ + "name": tc.get("function", {}).get("name"), + "arguments": tc.get("function", {}).get("arguments"), + "id": tc.get("id") + }) + + # Also check for function_call which is used in some models + elif hasattr(message, "additional_kwargs") and "function_call" in message.additional_kwargs: + fc = message.additional_kwargs["function_call"] + if isinstance(fc, dict): + tool_calls.append({ + "name": fc.get("name"), + "arguments": fc.get("arguments"), + "id": "function_call_1" # Assigning a default ID + }) + + return tool_calls + + def get_traces(self) -> List[BaseMessage]: + """Get all collected traces.""" + return self.traces + + def clear_traces(self) -> None: + """Clear all traces.""" + self.traces = [] \ No newline at end of file From 55c0c081d2377f16049d06bab2651cd70111f06a Mon Sep 17 00:00:00 2001 From: Rushil Patel Date: Sat, 15 Mar 2025 13:00:37 -0700 Subject: [PATCH 2/8] fix --- src/codegen/agents/code_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/codegen/agents/code_agent.py b/src/codegen/agents/code_agent.py index 4c5d13b97..781ad441f 100644 --- a/src/codegen/agents/code_agent.py +++ b/src/codegen/agents/code_agent.py @@ -137,7 +137,7 @@ def run(self, prompt: str) -> str: stream = self.agent.stream(input, config=config, stream_mode="values") - _tracer = MessageStreamTracer(logger=logger) + _tracer = MessageStreamTracer(logger=self.logger) # Process the stream with the tracer traced_stream = _tracer.process_stream(stream) From 443946a04c5d412770c06c3d02ce70307e3e6454 Mon Sep 17 00:00:00 2001 From: Rushil Patel Date: Sat, 15 Mar 2025 13:57:12 -0700 Subject: [PATCH 3/8] fix: add type alias --- src/codegen/agents/data.py | 6 ++++-- src/codegen/agents/loggers.py | 8 +++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/codegen/agents/data.py b/src/codegen/agents/data.py index b0034dd46..8dab1ad6d 100644 --- a/src/codegen/agents/data.py +++ b/src/codegen/agents/data.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import List, Optional, Union from dataclasses import dataclass, field from datetime import datetime @@ -49,4 +49,6 @@ class FunctionMessageData(BaseMessage): @dataclass class UnknownMessage(BaseMessage): """Represents an unknown message type.""" - type: str = field(default="unknown") \ No newline at end of file + type: str = field(default="unknown") + +type AgentRunMessage = Union[UserMessage, SystemMessageData, AssistantMessage, ToolMessageData, FunctionMessageData, UnknownMessage] \ No newline at end of file diff --git a/src/codegen/agents/loggers.py b/src/codegen/agents/loggers.py index d0827c896..d7c96aebc 100644 --- a/src/codegen/agents/loggers.py +++ b/src/codegen/agents/loggers.py @@ -1,13 +1,11 @@ -import json -from typing import Dict, List, Any, Union, Protocol -from dataclasses import asdict -from .data import BaseMessage +from typing import Protocol +from .data import AgentRunMessage # Define the interface for ExternalLogger class ExternalLogger(Protocol): """Protocol defining the interface for external loggers.""" - def log(self, data: Union[Dict[str, Any], BaseMessage]) -> None: + def log(self, data: AgentRunMessage) -> None: """ Log structured data to an external system. From fffae0773dcf095964db20e23037a417b4459dac Mon Sep 17 00:00:00 2001 From: Rushil Patel Date: Sat, 15 Mar 2025 14:09:40 -0700 Subject: [PATCH 4/8] fix: literal type --- src/codegen/agents/data.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/codegen/agents/data.py b/src/codegen/agents/data.py index 8dab1ad6d..53b94814e 100644 --- a/src/codegen/agents/data.py +++ b/src/codegen/agents/data.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Union +from typing import List, Literal, Optional, Union from dataclasses import dataclass, field from datetime import datetime @@ -13,12 +13,12 @@ class BaseMessage: @dataclass class UserMessage(BaseMessage): """Represents a message from the user.""" - type: str = field(default="user") + type: Literal["user"] = field(default="user") @dataclass class SystemMessageData(BaseMessage): """Represents a system message.""" - type: str = field(default="system") + type: Literal["system"] = field(default="system") @dataclass class ToolCall: @@ -30,13 +30,13 @@ class ToolCall: @dataclass class AssistantMessage(BaseMessage): """Represents a message from the assistant.""" - type: str = field(default="assistant") + type: Literal["assistant"] = field(default="assistant") tool_calls: List[ToolCall] = field(default_factory=list) @dataclass class ToolMessageData(BaseMessage): """Represents a tool response message.""" - type: str = field(default="tool") + type: Literal["tool"] = field(default="tool") tool_name: Optional[str] = None tool_response: Optional[str] = None tool_id: Optional[str] = None @@ -44,11 +44,11 @@ class ToolMessageData(BaseMessage): @dataclass class FunctionMessageData(BaseMessage): """Represents a function message.""" - type: str = field(default="function") + type: Literal["function"] = field(default="function") @dataclass class UnknownMessage(BaseMessage): """Represents an unknown message type.""" - type: str = field(default="unknown") + type: Literal["unknown"] = field(default="unknown") type AgentRunMessage = Union[UserMessage, SystemMessageData, AssistantMessage, ToolMessageData, FunctionMessageData, UnknownMessage] \ No newline at end of file From 960298dfec5b9d6f357cacbac036a5304307d030 Mon Sep 17 00:00:00 2001 From: rushilpatel0 <171610820+rushilpatel0@users.noreply.github.com> Date: Sat, 15 Mar 2025 21:10:36 +0000 Subject: [PATCH 5/8] Automated pre-commit update --- src/codegen/agents/__init__.py | 1 - src/codegen/agents/code_agent.py | 8 +- src/codegen/agents/data.py | 25 ++- src/codegen/agents/loggers.py | 11 +- src/codegen/agents/scratch.ipynb | 370 +------------------------------ src/codegen/agents/tracer.py | 125 ++++------- 6 files changed, 83 insertions(+), 457 deletions(-) diff --git a/src/codegen/agents/__init__.py b/src/codegen/agents/__init__.py index 8b1378917..e69de29bb 100644 --- a/src/codegen/agents/__init__.py +++ b/src/codegen/agents/__init__.py @@ -1 +0,0 @@ - diff --git a/src/codegen/agents/code_agent.py b/src/codegen/agents/code_agent.py index 781ad441f..99406ef40 100644 --- a/src/codegen/agents/code_agent.py +++ b/src/codegen/agents/code_agent.py @@ -7,8 +7,8 @@ from langchain_core.runnables.config import RunnableConfig from langgraph.graph.graph import CompiledGraph from langsmith import Client -from langchain_core.messages import AIMessage -from codegen.agents.loggers import ExternalLogger + +from codegen.agents.loggers import ExternalLogger from codegen.agents.tracer import MessageStreamTracer from codegen.extensions.langchain.agent import create_codebase_agent from codegen.extensions.langchain.utils.get_langsmith_url import ( @@ -86,7 +86,6 @@ def __init__( self.project_name = os.environ.get("LANGCHAIN_PROJECT", "RELACE") print(f"Using LangSmith project: {self.project_name}") - # Store SWEBench metadata if provided self.run_id = metadata.get("run_id") self.instance_id = metadata.get("instance_id") @@ -125,8 +124,6 @@ def run(self, prompt: str) -> str: "recursion_limit": 100, } - - # this message has a reducer which appends the current message to the existing history # see more https://langchain-ai.github.io/langgraph/concepts/low_level/#reducers input = {"query": prompt} @@ -136,7 +133,6 @@ def run(self, prompt: str) -> str: stream = self.agent.stream(input, config=config, stream_mode="values") - _tracer = MessageStreamTracer(logger=self.logger) # Process the stream with the tracer diff --git a/src/codegen/agents/data.py b/src/codegen/agents/data.py index 53b94814e..34ffd6caa 100644 --- a/src/codegen/agents/data.py +++ b/src/codegen/agents/data.py @@ -1,54 +1,71 @@ -from typing import List, Literal, Optional, Union from dataclasses import dataclass, field from datetime import datetime +from typing import Literal, Optional, Union + # Base dataclass for all message types @dataclass class BaseMessage: """Base class for all message types.""" + type: str timestamp: str = field(default_factory=lambda: datetime.now().isoformat()) content: str = "" + @dataclass class UserMessage(BaseMessage): """Represents a message from the user.""" + type: Literal["user"] = field(default="user") + @dataclass class SystemMessageData(BaseMessage): """Represents a system message.""" + type: Literal["system"] = field(default="system") + @dataclass class ToolCall: """Represents a tool call within an assistant message.""" + name: Optional[str] = None arguments: Optional[str] = None id: Optional[str] = None + @dataclass class AssistantMessage(BaseMessage): """Represents a message from the assistant.""" + type: Literal["assistant"] = field(default="assistant") - tool_calls: List[ToolCall] = field(default_factory=list) + tool_calls: list[ToolCall] = field(default_factory=list) + @dataclass class ToolMessageData(BaseMessage): """Represents a tool response message.""" + type: Literal["tool"] = field(default="tool") tool_name: Optional[str] = None tool_response: Optional[str] = None tool_id: Optional[str] = None + @dataclass class FunctionMessageData(BaseMessage): """Represents a function message.""" + type: Literal["function"] = field(default="function") + @dataclass class UnknownMessage(BaseMessage): """Represents an unknown message type.""" - type: Literal["unknown"] = field(default="unknown") -type AgentRunMessage = Union[UserMessage, SystemMessageData, AssistantMessage, ToolMessageData, FunctionMessageData, UnknownMessage] \ No newline at end of file + type: Literal["unknown"] = field(default="unknown") + + +type AgentRunMessage = Union[UserMessage, SystemMessageData, AssistantMessage, ToolMessageData, FunctionMessageData, UnknownMessage] diff --git a/src/codegen/agents/loggers.py b/src/codegen/agents/loggers.py index d7c96aebc..b507c427c 100644 --- a/src/codegen/agents/loggers.py +++ b/src/codegen/agents/loggers.py @@ -1,15 +1,16 @@ from typing import Protocol + from .data import AgentRunMessage + # Define the interface for ExternalLogger class ExternalLogger(Protocol): """Protocol defining the interface for external loggers.""" - + def log(self, data: AgentRunMessage) -> None: - """ - Log structured data to an external system. - + """Log structured data to an external system. + Args: data: The structured data to log, either as a dictionary or a BaseMessage """ - pass \ No newline at end of file + pass diff --git a/src/codegen/agents/scratch.ipynb b/src/codegen/agents/scratch.ipynb index 69e2db9ed..1b50f8920 100644 --- a/src/codegen/agents/scratch.ipynb +++ b/src/codegen/agents/scratch.ipynb @@ -2,20 +2,9 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "codegen.agents.code_agent.CodeAgent" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from codegen.agents.code_agent import CodeAgent\n", "\n", @@ -25,32 +14,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[37m2025-03-14 14:56:27,596 - codegen.sdk.core.codebase - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34mFetching codebase for codegen-sh/Kevin-s-Adventure-Game\u001b[0m\n", - "\u001b[37m2025-03-14 14:56:27,596 - codegen.sdk.core.codebase - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34mUsing directory: /tmp/codegen\u001b[0m\n", - "\u001b[37m2025-03-14 14:56:27,597 - codegen.sdk.core.codebase - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34mWill clone https://github.com/codegen-sh/Kevin-s-Adventure-Game.git to /tmp/codegen/Kevin-s-Adventure-Game\u001b[0m\n", - "\u001b[37m2025-03-14 14:56:27,597 - codegen.sdk.core.codebase - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34mCloning repository...\u001b[0m\n", - "\u001b[37m2025-03-14 14:56:28,311 - codegen.sdk.core.codebase - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34mClone completed successfully\u001b[0m\n", - "\u001b[37m2025-03-14 14:56:28,311 - codegen.sdk.core.codebase - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34mInitializing Codebase...\u001b[0m\n", - "\u001b[37m2025-03-14 14:56:28,398 - codegen.sdk.codebase.codebase_context - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34m> Parsing 13 files in ALL subdirectories with ['.py'] extensions\u001b[0m\n", - "\u001b[37m2025-03-14 14:56:28,428 - codegen.sdk.codebase.codebase_context - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34m> Building directory tree\u001b[0m\n", - "\u001b[37m2025-03-14 14:56:28,440 - codegen.sdk.codebase.codebase_context - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34m> Computing import resolution edges for 55 imports\u001b[0m\n", - "\u001b[37m2025-03-14 14:56:28,447 - codegen.sdk.codebase.codebase_context - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34m> Computing superclass dependencies\u001b[0m\n", - "\u001b[37m2025-03-14 14:56:28,447 - codegen.sdk.codebase.codebase_context - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34m> Incrementally computing dependencies for 327 nodes\u001b[0m\n", - "\u001b[37m2025-03-14 14:56:28,467 - codegen.sdk.codebase.codebase_context - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34m> Incrementally computing dependencies for 7 nodes\u001b[0m\n", - "\u001b[37m2025-03-14 14:56:28,468 - codegen.sdk.codebase.codebase_context - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34m> Found 13 files\u001b[0m\n", - "\u001b[37m2025-03-14 14:56:28,468 - codegen.sdk.codebase.codebase_context - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34m> Found 334 nodes and 840 edges\u001b[0m\n", - "\u001b[37m2025-03-14 14:56:28,468 - codegen.shared.performance.stopwatch_utils - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34mFunction 'build_graph' took 80.71 milliseconds to execute.\u001b[0m\n", - "\u001b[37m2025-03-14 14:56:28,480 - codegen.sdk.core.codebase - \u001b[32mINFO\u001b[0m\u001b[37m - \u001b[34mCodebase initialization complete\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "from codegen.sdk.core.codebase import Codebase\n", "\n", @@ -60,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -76,332 +42,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using LangSmith project: RELACE\n", - "What is the main character's name? also show the source code snippetwhere you find the answer\n", - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "What is the main character's name? also show the source code snippetwhere you find the answer\n", - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "What is the main character's name? also show the source code snippetwhere you find the answer\n", - "[{'text': \"I'll help you find the main character's name in the codebase. Let me search for references to a main character.\", 'type': 'text'}, {'id': 'toolu_01THYvJFKVWoDtj9N97oTVLV', 'input': {'query': 'main character'}, 'name': 'search', 'type': 'tool_use'}]\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "\n", - "[{'text': \"I'll help you find the main character's name in the codebase. Let me search for references to a main character.\", 'type': 'text'}, {'id': 'toolu_01THYvJFKVWoDtj9N97oTVLV', 'input': {'query': 'main character'}, 'name': 'search', 'type': 'tool_use'}]\n", - "Tool Calls:\n", - " search (toolu_01THYvJFKVWoDtj9N97oTVLV)\n", - " Call ID: toolu_01THYvJFKVWoDtj9N97oTVLV\n", - " Args:\n", - " query: main character\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "\n", - "I'll help you find the main character's name in the codebase. Let me search for references to a main character.\n", - "[SEARCH RESULTS]: main character\n", - "Found 0 files with matches (showing page 1 of 0)\n", - "\n", - "No matches found\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "Name: search\n", - "\n", - "[SEARCH RESULTS]: main character\n", - "Found 0 files with matches (showing page 1 of 0)\n", - "\n", - "No matches found\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "Name: search\n", - "\n", - "[SEARCH RESULTS]: main character\n", - "Found 0 files with matches (showing page 1 of 0)\n", - "\n", - "No matches found\n", - "[{'text': \"Let me try a different approach by looking at the files in the project to understand what kind of application or story we're dealing with.\", 'type': 'text'}, {'id': 'toolu_015jfRz3Q3UzDa6esM4YV5Qj', 'input': {'dirpath': './'}, 'name': 'list_directory', 'type': 'tool_use'}]\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "\n", - "[{'text': \"Let me try a different approach by looking at the files in the project to understand what kind of application or story we're dealing with.\", 'type': 'text'}, {'id': 'toolu_015jfRz3Q3UzDa6esM4YV5Qj', 'input': {'dirpath': './'}, 'name': 'list_directory', 'type': 'tool_use'}]\n", - "Tool Calls:\n", - " list_directory (toolu_015jfRz3Q3UzDa6esM4YV5Qj)\n", - " Call ID: toolu_015jfRz3Q3UzDa6esM4YV5Qj\n", - " Args:\n", - " dirpath: ./\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "\n", - "Let me try a different approach by looking at the files in the project to understand what kind of application or story we're dealing with.\n", - "[LIST DIRECTORY]: \n", - "\n", - "├── main.py\n", - "├── game/\n", - "├── locations/\n", - "└── utils/\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "Name: list_directory\n", - "\n", - "[LIST DIRECTORY]: \n", - "\n", - "├── main.py\n", - "├── game/\n", - "├── locations/\n", - "└── utils/\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "Name: list_directory\n", - "\n", - "[LIST DIRECTORY]: \n", - "\n", - "├── main.py\n", - "├── game/\n", - "├── locations/\n", - "└── utils/\n", - "[{'text': 'This looks like a game project. Let me check the main.py file first to see if it contains information about the main character.', 'type': 'text'}, {'id': 'toolu_01M1FShB2Ec1mqgjvV67jgdg', 'input': {'filepath': 'main.py'}, 'name': 'view_file', 'type': 'tool_use'}]\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "\n", - "[{'text': 'This looks like a game project. Let me check the main.py file first to see if it contains information about the main character.', 'type': 'text'}, {'id': 'toolu_01M1FShB2Ec1mqgjvV67jgdg', 'input': {'filepath': 'main.py'}, 'name': 'view_file', 'type': 'tool_use'}]\n", - "Tool Calls:\n", - " view_file (toolu_01M1FShB2Ec1mqgjvV67jgdg)\n", - " Call ID: toolu_01M1FShB2Ec1mqgjvV67jgdg\n", - " Args:\n", - " filepath: main.py\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "\n", - "This looks like a game project. Let me check the main.py file first to see if it contains information about the main character.\n", - "[VIEW FILE]: main.py (50 lines total)\n", - "\n", - " 1|from game.actions import perform_action\n", - " 2|from game.player import create_player, get_player_status\n", - " 3|from game.world import get_current_location, initialize_world\n", - " 4|from utils.save_load import list_save_files, load_game, save_game\n", - " 5|from utils.text_formatting import print_help, print_welcome_message\n", - " 6|\n", - " 7|\n", - " 8|def main():\n", - " 9| print_welcome_message()\n", - "10|\n", - "11| # Add load game option\n", - "12| load_option = input(\"Do you want to load a saved game? (y/n): \").lower()\n", - "13| if load_option == 'y':\n", - "14| save_files = list_save_files()\n", - "15| if save_files:\n", - "16| print(\"Available save files:\")\n", - "17| for i, file in enumerate(save_files, 1):\n", - "18| print(f\"{i}. {file}\")\n", - "19| choice = int(input(\"Enter the number of the save file to load: \"))\n", - "20| player, world = load_game(save_files[choice - 1])\n", - "21| if player is None or world is None:\n", - "22| print(\"Failed to load game. Starting a new game.\")\n", - "23| player = create_player(\"Kevin\")\n", - "24| world = initialize_world()\n", - "25| else:\n", - "26| print(\"No save files found. Starting a new game.\")\n", - "27| player = create_player(\"Kevin\")\n", - "28| world = initialize_world()\n", - "29| else:\n", - "30| player = create_player(\"Kevin\")\n", - "31| world = initialize_world()\n", - "32|\n", - "33| while True:\n", - "34| current_location = get_current_location(world)\n", - "35| print(f\"\\nYou are in the {current_location}.\")\n", - "36| print(get_player_status(player))\n", - "37|\n", - "38| action = input(\"What would you like to do? \").lower()\n", - "39|\n", - "40| if action == \"quit\":\n", - "41| save_game(player, world)\n", - "42| print(\"Thanks for playing! Your progress has been saved.\")\n", - "43| break\n", - "44| elif action == \"help\":\n", - "45| print_help()\n", - "46| else:\n", - "47| perform_action(player, world, action)\n", - "48|\n", - "49|if __name__ == \"__main__\":\n", - "50| main()\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "Name: view_file\n", - "\n", - "[VIEW FILE]: main.py (50 lines total)\n", - "\n", - " 1|from game.actions import perform_action\n", - " 2|from game.player import create_player, get_player_status\n", - " 3|from game.world import get_current_location, initialize_world\n", - " 4|from utils.save_load import list_save_files, load_game, save_game\n", - " 5|from utils.text_formatting import print_help, print_welcome_message\n", - " 6|\n", - " 7|\n", - " 8|def main():\n", - " 9| print_welcome_message()\n", - "10|\n", - "11| # Add load game option\n", - "12| load_option = input(\"Do you want to load a saved game? (y/n): \").lower()\n", - "13| if load_option == 'y':\n", - "14| save_files = list_save_files()\n", - "15| if save_files:\n", - "16| print(\"Available save files:\")\n", - "17| for i, file in enumerate(save_files, 1):\n", - "18| print(f\"{i}. {file}\")\n", - "19| choice = int(input(\"Enter the number of the save file to load: \"))\n", - "20| player, world = load_game(save_files[choice - 1])\n", - "21| if player is None or world is None:\n", - "22| print(\"Failed to load game. Starting a new game.\")\n", - "23| player = create_player(\"Kevin\")\n", - "24| world = initialize_world()\n", - "25| else:\n", - "26| print(\"No save files found. Starting a new game.\")\n", - "27| player = create_player(\"Kevin\")\n", - "28| world = initialize_world()\n", - "29| else:\n", - "30| player = create_player(\"Kevin\")\n", - "31| world = initialize_world()\n", - "32|\n", - "33| while True:\n", - "34| current_location = get_current_location(world)\n", - "35| print(f\"\\nYou are in the {current_location}.\")\n", - "36| print(get_player_status(player))\n", - "37|\n", - "38| action = input(\"What would you like to do? \").lower()\n", - "39|\n", - "40| if action == \"quit\":\n", - "41| save_game(player, world)\n", - "42| print(\"Thanks for playing! Your progress has been saved.\")\n", - "43| break\n", - "44| elif action == \"help\":\n", - "45| print_help()\n", - "46| else:\n", - "47| perform_action(player, world, action)\n", - "48|\n", - "49|if __name__ == \"__main__\":\n", - "50| main()\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "Name: view_file\n", - "\n", - "[VIEW FILE]: main.py (50 lines total)\n", - "\n", - " 1|from game.actions import perform_action\n", - " 2|from game.player import create_player, get_player_status\n", - " 3|from game.world import get_current_location, initialize_world\n", - " 4|from utils.save_load import list_save_files, load_game, save_game\n", - " 5|from utils.text_formatting import print_help, print_welcome_message\n", - " 6|\n", - " 7|\n", - " 8|def main():\n", - " 9| print_welcome_message()\n", - "10|\n", - "11| # Add load game option\n", - "12| load_option = input(\"Do you want to load a saved game? (y/n): \").lower()\n", - "13| if load_option == 'y':\n", - "14| save_files = list_save_files()\n", - "15| if save_files:\n", - "16| print(\"Available save files:\")\n", - "17| for i, file in enumerate(save_files, 1):\n", - "18| print(f\"{i}. {file}\")\n", - "19| choice = int(input(\"Enter the number of the save file to load: \"))\n", - "20| player, world = load_game(save_files[choice - 1])\n", - "21| if player is None or world is None:\n", - "22| print(\"Failed to load game. Starting a new game.\")\n", - "23| player = create_player(\"Kevin\")\n", - "24| world = initialize_world()\n", - "25| else:\n", - "26| print(\"No save files found. Starting a new game.\")\n", - "27| player = create_player(\"Kevin\")\n", - "28| world = initialize_world()\n", - "29| else:\n", - "30| player = create_player(\"Kevin\")\n", - "31| world = initialize_world()\n", - "32|\n", - "33| while True:\n", - "34| current_location = get_current_location(world)\n", - "35| print(f\"\\nYou are in the {current_location}.\")\n", - "36| print(get_player_status(player))\n", - "37|\n", - "38| action = input(\"What would you like to do? \").lower()\n", - "39|\n", - "40| if action == \"quit\":\n", - "41| save_game(player, world)\n", - "42| print(\"Thanks for playing! Your progress has been saved.\")\n", - "43| break\n", - "44| elif action == \"help\":\n", - "45| print_help()\n", - "46| else:\n", - "47| perform_action(player, world, action)\n", - "48|\n", - "49|if __name__ == \"__main__\":\n", - "50| main()\n", - "I found the main character's name! According to the source code in main.py, the main character's name is \"Kevin\".\n", - "\n", - "The name appears in lines 23, 27, and 30 of main.py where the player character is created:\n", - "\n", - "```python\n", - "player = create_player(\"Kevin\")\n", - "```\n", - "\n", - "This line appears in three different places in the code, depending on whether the player is loading a saved game or starting a new one. In all cases, the default player name is set to \"Kevin\".\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "\n", - "I found the main character's name! According to the source code in main.py, the main character's name is \"Kevin\".\n", - "\n", - "The name appears in lines 23, 27, and 30 of main.py where the player character is created:\n", - "\n", - "```python\n", - "player = create_player(\"Kevin\")\n", - "```\n", - "\n", - "This line appears in three different places in the code, depending on whether the player is loading a saved game or starting a new one. In all cases, the default player name is set to \"Kevin\".\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "\n", - "I found the main character's name! According to the source code in main.py, the main character's name is \"Kevin\".\n", - "\n", - "The name appears in lines 23, 27, and 30 of main.py where the player character is created:\n", - "\n", - "```python\n", - "player = create_player(\"Kevin\")\n", - "```\n", - "\n", - "This line appears in three different places in the code, depending on whether the player is loading a saved game or starting a new one. In all cases, the default player name is set to \"Kevin\".\n", - "\n", - "============================================================\n", - "Could not retrieve LangSmith URL: Project RELACE not found\n", - "Traceback (most recent call last):\n", - " File \"/Users/rushil/dev/codegen-sdk/src/codegen/extensions/langchain/utils/get_langsmith_url.py\", line 53, in find_and_print_langsmith_run_url\n", - " recent_runs = list(\n", - " client.list_runs(\n", - " ...<4 lines>...\n", - " )\n", - " )\n", - " File \"/Users/rushil/dev/codegen-sdk/.venv/lib/python3.13/site-packages/langsmith/client.py\", line 2313, in list_runs\n", - " [self.read_project(project_name=name).id for name in project_name]\n", - " ~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^\n", - " File \"/Users/rushil/dev/codegen-sdk/.venv/lib/python3.13/site-packages/langsmith/utils.py\", line 138, in wrapper\n", - " return func(*args, **kwargs)\n", - " File \"/Users/rushil/dev/codegen-sdk/.venv/lib/python3.13/site-packages/langsmith/client.py\", line 2961, in read_project\n", - " raise ls_utils.LangSmithNotFoundError(\n", - " f\"Project {project_name} not found\"\n", - " )\n", - "langsmith.utils.LangSmithNotFoundError: Project RELACE not found\n", - "\n", - "============================================================\n" - ] - }, - { - "data": { - "text/plain": [ - "'I found the main character\\'s name! According to the source code in main.py, the main character\\'s name is \"Kevin\".\\n\\nThe name appears in lines 23, 27, and 30 of main.py where the player character is created:\\n\\n```python\\nplayer = create_player(\"Kevin\")\\n```\\n\\nThis line appears in three different places in the code, depending on whether the player is loading a saved game or starting a new one. In all cases, the default player name is set to \"Kevin\".'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "\n", - "\n", - "\n", - "\n", "agent = CodeAgent(codebase)\n", "agent.run(\"What is the main character's name? also show the source code where you find the answer\", logger=ConsoleLogger())" ] diff --git a/src/codegen/agents/tracer.py b/src/codegen/agents/tracer.py index 493b83cd9..816835c41 100644 --- a/src/codegen/agents/tracer.py +++ b/src/codegen/agents/tracer.py @@ -1,42 +1,41 @@ -from typing import Dict, List, Any, Generator, Union, Optional -from langchain.schema import AIMessage, HumanMessage, SystemMessage as LCSystemMessage, FunctionMessage as LCFunctionMessage +from collections.abc import Generator +from typing import Any, Optional + +from langchain.schema import AIMessage, HumanMessage +from langchain.schema import FunctionMessage as LCFunctionMessage +from langchain.schema import SystemMessage as LCSystemMessage from langchain_core.messages import ToolMessage as LCToolMessage -from dataclasses import asdict -from .data import ( - BaseMessage, UserMessage, SystemMessageData, AssistantMessage, - ToolMessageData, FunctionMessageData, UnknownMessage, ToolCall -) +from .data import AssistantMessage, BaseMessage, FunctionMessageData, SystemMessageData, ToolCall, ToolMessageData, UnknownMessage, UserMessage from .loggers import ExternalLogger + class MessageStreamTracer: def __init__(self, logger: Optional[ExternalLogger] = None): self.traces = [] self.logger = logger - + def process_stream(self, message_stream: Generator) -> Generator: - """ - Process the stream of messages from the LangGraph agent, + """Process the stream of messages from the LangGraph agent, extract structured data, and pass through the messages. """ for chunk in message_stream: # Process the chunk structured_data = self.extract_structured_data(chunk) - + # Log the structured data if structured_data: self.traces.append(structured_data) - + # If there's an external logger, send the data there if self.logger: self.logger.log(structured_data) - + # Pass through the chunk to maintain the original stream behavior yield chunk - - def extract_structured_data(self, chunk: Dict[str, Any]) -> Optional[BaseMessage]: - """ - Extract structured data from a message chunk. + + def extract_structured_data(self, chunk: dict[str, Any]) -> Optional[BaseMessage]: + """Extract structured data from a message chunk. Returns None if the chunk doesn't contain useful information. Returns a BaseMessage subclass instance based on the message type. """ @@ -48,64 +47,36 @@ def extract_structured_data(self, chunk: Dict[str, Any]) -> Optional[BaseMessage if isinstance(value, list) and all(hasattr(item, "type") for item in value if hasattr(item, "__dict__")): messages = value break - + if not messages: return None - + # Get the latest message latest_message = messages[-1] if messages else None - + if not latest_message: return None - + # Determine message type message_type = self._get_message_type(latest_message) content = self._get_message_content(latest_message) - + # Create the appropriate message type if message_type == "user": - return UserMessage( - type=message_type, - content=content - ) + return UserMessage(type=message_type, content=content) elif message_type == "system": - return SystemMessageData( - type=message_type, - content=content - ) + return SystemMessageData(type=message_type, content=content) elif message_type == "assistant": tool_calls_data = self._extract_tool_calls(latest_message) - tool_calls = [ - ToolCall( - name=tc.get("name"), - arguments=tc.get("arguments"), - id=tc.get("id") - ) for tc in tool_calls_data - ] - return AssistantMessage( - type=message_type, - content=content, - tool_calls=tool_calls - ) + tool_calls = [ToolCall(name=tc.get("name"), arguments=tc.get("arguments"), id=tc.get("id")) for tc in tool_calls_data] + return AssistantMessage(type=message_type, content=content, tool_calls=tool_calls) elif message_type == "tool": - return ToolMessageData( - type=message_type, - content=content, - tool_name=getattr(latest_message, "name", None), - tool_response=content, - tool_id=getattr(latest_message, "tool_call_id", None) - ) + return ToolMessageData(type=message_type, content=content, tool_name=getattr(latest_message, "name", None), tool_response=content, tool_id=getattr(latest_message, "tool_call_id", None)) elif message_type == "function": - return FunctionMessageData( - type=message_type, - content=content - ) + return FunctionMessageData(type=message_type, content=content) else: - return UnknownMessage( - type=message_type, - content=content - ) - + return UnknownMessage(type=message_type, content=content) + def _get_message_type(self, message) -> str: """Determine the type of message.""" if isinstance(message, HumanMessage): @@ -122,7 +93,7 @@ def _get_message_type(self, message) -> str: return message.type else: return "unknown" - + def _get_message_content(self, message) -> str: """Extract content from a message.""" if hasattr(message, "content"): @@ -131,37 +102,35 @@ def _get_message_content(self, message) -> str: return message.message.content else: return str(message) - - def _extract_tool_calls(self, message) -> List[Dict[str, Any]]: + + def _extract_tool_calls(self, message) -> list[dict[str, Any]]: """Extract tool calls from an assistant message.""" tool_calls = [] - + # Check different possible locations for tool calls if hasattr(message, "additional_kwargs") and "tool_calls" in message.additional_kwargs: raw_tool_calls = message.additional_kwargs["tool_calls"] for tc in raw_tool_calls: - tool_calls.append({ - "name": tc.get("function", {}).get("name"), - "arguments": tc.get("function", {}).get("arguments"), - "id": tc.get("id") - }) - + tool_calls.append({"name": tc.get("function", {}).get("name"), "arguments": tc.get("function", {}).get("arguments"), "id": tc.get("id")}) + # Also check for function_call which is used in some models elif hasattr(message, "additional_kwargs") and "function_call" in message.additional_kwargs: fc = message.additional_kwargs["function_call"] if isinstance(fc, dict): - tool_calls.append({ - "name": fc.get("name"), - "arguments": fc.get("arguments"), - "id": "function_call_1" # Assigning a default ID - }) - + tool_calls.append( + { + "name": fc.get("name"), + "arguments": fc.get("arguments"), + "id": "function_call_1", # Assigning a default ID + } + ) + return tool_calls - - def get_traces(self) -> List[BaseMessage]: + + def get_traces(self) -> list[BaseMessage]: """Get all collected traces.""" return self.traces - + def clear_traces(self) -> None: """Clear all traces.""" - self.traces = [] \ No newline at end of file + self.traces = [] From 350e5b015d07bb6aa3dca81f9d3368188e4ebc66 Mon Sep 17 00:00:00 2001 From: Rushil Patel Date: Sun, 16 Mar 2025 08:16:40 -0700 Subject: [PATCH 6/8] fix: datetime tz lint --- src/codegen/agents/data.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/codegen/agents/data.py b/src/codegen/agents/data.py index 34ffd6caa..f7c886278 100644 --- a/src/codegen/agents/data.py +++ b/src/codegen/agents/data.py @@ -1,7 +1,6 @@ from dataclasses import dataclass, field -from datetime import datetime from typing import Literal, Optional, Union - +from datetime import datetime, UTC # Base dataclass for all message types @dataclass @@ -9,7 +8,7 @@ class BaseMessage: """Base class for all message types.""" type: str - timestamp: str = field(default_factory=lambda: datetime.now().isoformat()) + timestamp: str = field(default_factory=lambda: datetime.now(tz=UTC).isoformat()) content: str = "" From 021851021f177b22b05af102e723b218ee1a4e5d Mon Sep 17 00:00:00 2001 From: Rushil Patel Date: Sun, 16 Mar 2025 08:28:58 -0700 Subject: [PATCH 7/8] fix: datetime import --- src/codegen/agents/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/codegen/agents/data.py b/src/codegen/agents/data.py index f7c886278..1a597a228 100644 --- a/src/codegen/agents/data.py +++ b/src/codegen/agents/data.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, field from typing import Literal, Optional, Union -from datetime import datetime, UTC +from datetime import UTC, datetime # Base dataclass for all message types @dataclass From 5b1e301f79ed1fc5d2d8c16211465692785a781f Mon Sep 17 00:00:00 2001 From: rushilpatel0 <171610820+rushilpatel0@users.noreply.github.com> Date: Sun, 16 Mar 2025 15:30:10 +0000 Subject: [PATCH 8/8] Automated pre-commit update --- src/codegen/agents/data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/codegen/agents/data.py b/src/codegen/agents/data.py index 1a597a228..6ac9b1d81 100644 --- a/src/codegen/agents/data.py +++ b/src/codegen/agents/data.py @@ -1,6 +1,7 @@ from dataclasses import dataclass, field -from typing import Literal, Optional, Union from datetime import UTC, datetime +from typing import Literal, Optional, Union + # Base dataclass for all message types @dataclass