From e2e79934b4b9d97990a36a758936e9559d220758 Mon Sep 17 00:00:00 2001
From: tkucar <tkucar@codegen.com>
Date: Tue, 4 Mar 2025 02:31:56 +0100
Subject: [PATCH 1/7] init

---
 .../examples/ai_impact_analysis/run.py        |  69 ++
 .../attributions/symbol_attribution.py        |  93 +++
 pyproject.toml                                |   2 +-
 .../extensions/attribution/3pp/__init__.py    |   1 +
 .../extensions/attribution/3pp/cursor.py      | 592 ++++++++++++++++++
 .../extensions/attribution/3pp/windsurf.py    | 186 ++++++
 src/codegen/extensions/attribution/cli.py     | 153 +++++
 .../extensions/attribution/git_history.py     | 326 ++++++++++
 src/codegen/extensions/attribution/main.py    | 103 +++
 9 files changed, 1524 insertions(+), 1 deletion(-)
 create mode 100644 codegen-examples/examples/ai_impact_analysis/run.py
 create mode 100644 codegen-examples/examples/attributions/symbol_attribution.py
 create mode 100644 src/codegen/extensions/attribution/3pp/__init__.py
 create mode 100644 src/codegen/extensions/attribution/3pp/cursor.py
 create mode 100644 src/codegen/extensions/attribution/3pp/windsurf.py
 create mode 100644 src/codegen/extensions/attribution/cli.py
 create mode 100644 src/codegen/extensions/attribution/git_history.py
 create mode 100644 src/codegen/extensions/attribution/main.py

diff --git a/codegen-examples/examples/ai_impact_analysis/run.py b/codegen-examples/examples/ai_impact_analysis/run.py
new file mode 100644
index 000000000..7ff9026a8
--- /dev/null
+++ b/codegen-examples/examples/ai_impact_analysis/run.py
@@ -0,0 +1,69 @@
+import sys
+import traceback
+import os
+
+from codegen import Codebase
+from codegen.extensions.attribution.cli import run
+from codegen.git.repo_operator.repo_operator import RepoOperator
+from codegen.git.schemas.repo_config import RepoConfig
+from codegen.sdk.codebase.config import ProjectConfig
+from codegen.shared.enums.programming_language import ProgrammingLanguage
+
+if __name__ == "__main__":
+    try:
+        print("Initializing codebase...")
+        
+        # Option A: Use current directory if it's a git repository
+        if os.path.exists(".git"):
+            print("Using current directory as repository...")
+            # Create a repo operator for the current directory
+            repo_path = os.getcwd()
+            repo_config = RepoConfig.from_repo_path(repo_path)
+            repo_operator = RepoOperator(repo_config=repo_config)
+            
+            # Initialize codebase with a project config
+            project = ProjectConfig.from_repo_operator(
+                repo_operator=repo_operator,
+                programming_language=ProgrammingLanguage.PYTHON
+            )
+            codebase = Codebase(projects=[project])
+        else:
+            # Option B: Try to find a git repository in parent directories
+            print("Searching for git repository in parent directories...")
+            current_dir = os.getcwd()
+            found_git = False
+            
+            while current_dir != os.path.dirname(current_dir):  # Stop at root
+                if os.path.exists(os.path.join(current_dir, ".git")):
+                    print(f"Found git repository at {current_dir}")
+                    repo_config = RepoConfig.from_repo_path(current_dir)
+                    repo_operator = RepoOperator(repo_config=repo_config)
+                    
+                    # Initialize codebase with a project config
+                    project = ProjectConfig.from_repo_operator(
+                        repo_operator=repo_operator,
+                        programming_language=ProgrammingLanguage.PYTHON
+                    )
+                    codebase = Codebase(projects=[project])
+                    found_git = True
+                    break
+                current_dir = os.path.dirname(current_dir)
+            
+            if not found_git:
+                # Option C: Use from_repo method which handles cloning
+                print("No local git repository found. Cloning a repository...")
+                codebase = Codebase.from_repo(
+                    repo_full_name="codegen-sh/codegen",
+                    language="python"
+                )
+        
+        print(f"Codebase loaded with {len(codebase.files)} files and {len(codebase.symbols)} symbols")
+        
+        # Run the analysis
+        run(codebase)
+        
+    except Exception as e:
+        print(f"\n❌ Error: {str(e)}")
+        print("\nTraceback:")
+        traceback.print_exc()
+        sys.exit(1)
\ No newline at end of file
diff --git a/codegen-examples/examples/attributions/symbol_attribution.py b/codegen-examples/examples/attributions/symbol_attribution.py
new file mode 100644
index 000000000..16cd8179e
--- /dev/null
+++ b/codegen-examples/examples/attributions/symbol_attribution.py
@@ -0,0 +1,93 @@
+import os
+import sys
+
+from codegen import Codebase
+from codegen.extensions.attribution.cli import run
+from codegen.extensions.attribution.main import add_attribution_to_symbols
+from codegen.git.repo_operator.repo_operator import RepoOperator
+from codegen.git.schemas.repo_config import RepoConfig
+from codegen.sdk.codebase.config import ProjectConfig
+from codegen.shared.enums.programming_language import ProgrammingLanguage
+
+def print_symbol_attribution(codebase):
+    """Print attribution information for symbols in the codebase."""
+    print("\n🔍 Symbol Attribution Examples:")
+    
+    # First, make sure attribution information is added to symbols
+    ai_authors = ['devin[bot]', 'codegen[bot]', 'github-actions[bot]']
+    add_attribution_to_symbols(codebase, ai_authors)
+    
+    # Get some interesting symbols to examine
+    # Let's look at classes and functions with the most usages
+    symbols_with_usages = []
+    for symbol in codebase.symbols:
+        if hasattr(symbol, 'usages') and len(symbol.usages) > 0:
+            symbols_with_usages.append((symbol, len(symbol.usages)))
+    
+    # Sort by usage count (most used first)
+    symbols_with_usages.sort(key=lambda x: x[1], reverse=True)
+    
+    # Print attribution for top symbols
+    count = 0
+    for symbol, usage_count in symbols_with_usages[:10]:  # Look at top 10 most used symbols
+        count += 1
+        print(f"\n📊 Symbol #{count}: {symbol.name} ({type(symbol).__name__})")
+        print(f"  • File: {symbol.filepath}")
+        print(f"  • Usages: {usage_count}")
+        
+        # Print attribution information
+        if hasattr(symbol, 'last_editor'):
+            print(f"  • Last editor: {symbol.last_editor}")
+        else:
+            print("  • Last editor: Not available")
+            
+        if hasattr(symbol, 'editor_history') and symbol.editor_history:
+            print(f"  • Editor history: {', '.join(symbol.editor_history[:5])}" + 
+                  (f" and {len(symbol.editor_history) - 5} more..." if len(symbol.editor_history) > 5 else ""))
+        else:
+            print("  • Editor history: Not available")
+            
+        if hasattr(symbol, 'is_ai_authored'):
+            print(f"  • AI authored: {'Yes' if symbol.is_ai_authored else 'No'}")
+        else:
+            print("  • AI authored: Not available")
+
+if __name__ == "__main__":
+    try:
+        print("Initializing codebase...")
+        
+        # Use current directory if it's a git repository
+        if os.path.exists(".git"):
+            print("Using current directory as repository...")
+            repo_path = os.getcwd()
+            repo_config = RepoConfig.from_repo_path(repo_path)
+            repo_operator = RepoOperator(repo_config=repo_config)
+            
+            project = ProjectConfig.from_repo_operator(
+                repo_operator=repo_operator,
+                programming_language=ProgrammingLanguage.PYTHON
+            )
+            codebase = Codebase(projects=[project])
+        else:
+            # Use from_repo method for a well-known repository
+            print("Using a sample repository...")
+            codebase = Codebase.from_repo(
+                repo_full_name="codegen-sh/codegen",
+                #commit="",  # Using a specific commit for consistency
+                language="python"
+            )
+        
+        print(f"Codebase loaded with {len(codebase.files)} files and {len(codebase.symbols)} symbols")
+        
+        # First run the analysis to gather attribution data
+        print("\n🔍 Running AI impact analysis...")
+        run(codebase)
+        
+        # Then show examples of accessing attribution information
+        print_symbol_attribution(codebase)
+        
+    except Exception as e:
+        print(f"\n❌ Error: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
diff --git a/pyproject.toml b/pyproject.toml
index e2a7470d1..d3ce75054 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,7 +43,7 @@ dependencies = [
   "hatch-vcs>=0.4.0",
   "hatchling>=1.25.0",
   "pyinstrument>=5.0.0",
-  "pip>=24.3.1",                       # This is needed for some NPM/YARN/PNPM post-install scripts to work!
+  "pip>=24.3.1", # This is needed for some NPM/YARN/PNPM post-install scripts to work!
   "rich-click>=1.8.5",
   "python-dotenv>=1.0.1",
   "giturlparse",
diff --git a/src/codegen/extensions/attribution/3pp/__init__.py b/src/codegen/extensions/attribution/3pp/__init__.py
new file mode 100644
index 000000000..4228e6fc3
--- /dev/null
+++ b/src/codegen/extensions/attribution/3pp/__init__.py
@@ -0,0 +1 @@
+"""Code for fetching attributions from 3rd party products."""
diff --git a/src/codegen/extensions/attribution/3pp/cursor.py b/src/codegen/extensions/attribution/3pp/cursor.py
new file mode 100644
index 000000000..4fb534495
--- /dev/null
+++ b/src/codegen/extensions/attribution/3pp/cursor.py
@@ -0,0 +1,592 @@
+import asyncio
+import json
+import logging
+import os
+import platform
+import shutil
+import sqlite3
+from pathlib import Path
+
+import jwt
+import requests
+
+
+class Cursor:
+    def __init__(self, log_level=logging.INFO):
+        """Initialize the Cursor class with optional log level configuration."""
+        logging.basicConfig(level=log_level)
+        self.logger = logging.getLogger("Cursor")
+        self.api_base_url = "https://cursor.com"
+
+    def log(self, message, is_error=False):
+        """Log messages with appropriate level."""
+        if is_error:
+            self.logger.error(message)
+        else:
+            self.logger.debug(message)
+
+    def get_windows_username(self):
+        """Get Windows username when running in WSL environment."""
+        try:
+            import subprocess
+
+            result = subprocess.run(["cmd.exe", "/c", "echo", "%USERNAME%"], capture_output=True, text=True)
+            return result.stdout.strip()
+        except Exception as e:
+            self.log(f"Error getting Windows username: {e}", True)
+            return None
+
+    def is_installed(self):
+        """Check if Cursor is installed on the system.
+
+        Returns:
+            bool: True if installed, False otherwise
+        """
+        # Check if the database path exists
+        db_path = self.get_cursor_db_path()
+        if not os.path.exists(db_path):
+            self.log("Cursor database not found", True)
+            return False
+
+        # Check if the Cursor binary is installed
+        binary_path = self._get_binary_path()
+        if binary_path and not binary_path.exists():
+            self.log("Cursor binary not found", True)
+            return False
+
+        return True
+
+    def _get_binary_path(self):
+        """Get the path to the Cursor binary based on platform."""
+        try:
+            app_name = os.environ.get("VSCODE_APP_NAME", "")
+            folder_name = "Cursor Nightly" if app_name == "Cursor Nightly" else "Cursor"
+
+            if platform.system() == "Windows":
+                # Check in Program Files
+                program_files = os.environ.get("ProgramFiles", "C:\\Program Files")
+                path = Path(program_files) / folder_name / "Cursor.exe"
+                if path.exists():
+                    return path
+
+                # Check in PATH
+                cursor_path = shutil.which("cursor.exe")
+                if cursor_path:
+                    return Path(cursor_path)
+
+            elif platform.system() == "Darwin":  # macOS
+                # Check in Applications
+                path = Path(f"/Applications/{folder_name}.app/Contents/MacOS/Cursor")
+                if path.exists():
+                    return path
+
+                # Check in PATH
+                cursor_path = shutil.which("cursor")
+                if cursor_path:
+                    return Path(cursor_path)
+
+            else:  # Linux and others
+                # Check in common locations
+                paths = [Path(f"/usr/bin/{folder_name.lower()}"), Path(f"/usr/local/bin/{folder_name.lower()}"), Path(os.path.expanduser(f"~/.local/bin/{folder_name.lower()}"))]
+
+                for path in paths:
+                    if path.exists():
+                        return path
+
+                # Check in PATH
+                cursor_path = shutil.which(folder_name.lower())
+                if cursor_path:
+                    return Path(cursor_path)
+
+            return None
+        except Exception as error:
+            self.log(f"Error finding Cursor binary: {error}", True)
+            return None
+
+    def get_cursor_db_path(self):
+        """Determine the path to the Cursor database based on the current platform."""
+        app_name = os.environ.get("VSCODE_APP_NAME", "")
+        folder_name = "Cursor Nightly" if app_name == "Cursor Nightly" else "Cursor"
+
+        if platform.system() == "Windows":
+            return os.path.join(os.environ.get("APPDATA", ""), folder_name, "User", "globalStorage", "state.vscdb")
+        elif platform.system() == "Linux":
+            is_wsl = os.environ.get("VSCODE_REMOTE_NAME") == "wsl"
+            if is_wsl:
+                windows_username = self.get_windows_username()
+                if windows_username:
+                    return os.path.join("/mnt/c/Users", windows_username, "AppData/Roaming", folder_name, "User/globalStorage/state.vscdb")
+            return os.path.join(os.path.expanduser("~"), ".config", folder_name, "User", "globalStorage", "state.vscdb")
+        elif platform.system() == "Darwin":  # macOS
+            return os.path.join(os.path.expanduser("~"), "Library", "Application Support", folder_name, "User", "globalStorage", "state.vscdb")
+
+        # Default fallback
+        return os.path.join(os.path.expanduser("~"), ".config", folder_name, "User", "globalStorage", "state.vscdb")
+
+    async def read_auth_token(self):
+        """Retrieve and process the Cursor authentication token from the database."""
+        try:
+            db_path = self.get_cursor_db_path()
+
+            self.log(f"Platform: {platform.system()}")
+            self.log(f"Home directory: {os.path.expanduser('~')}")
+            self.log(f"Attempting to open database at: {db_path}")
+            self.log(f"Database path exists: {os.path.exists(db_path)}")
+
+            if not os.path.exists(db_path):
+                self.log("Database file does not exist", True)
+                return None
+
+            # Connect to SQLite database
+            conn = sqlite3.connect(db_path)
+            cursor = conn.cursor()
+
+            self.log("Successfully opened database connection")
+            self.log("Executing SQL query for token...")
+
+            cursor.execute("SELECT value FROM ItemTable WHERE key = 'cursorAuth/accessToken'")
+            result = cursor.fetchone()
+
+            if not result:
+                self.log("No token found in database")
+                conn.close()
+                return None
+
+            token = result[0]
+            self.log(f"Token length: {len(token)}")
+            self.log(f"Token starts with: {token[:20]}...")
+
+            try:
+                decoded = jwt.decode(token, options={"verify_signature": False})
+                self.log(f"JWT decoded successfully: {bool(decoded)}")
+                self.log(f"JWT payload exists: {bool(decoded)}")
+                self.log(f"JWT sub exists: {bool(decoded and 'sub' in decoded)}")
+
+                if not decoded or "sub" not in decoded:
+                    self.log(f"Invalid JWT structure: {decoded}", True)
+                    conn.close()
+                    return None
+
+                sub = str(decoded["sub"])
+                self.log(f"Sub value: {sub}")
+                user_id = sub.split("|")[1]
+                self.log(f"Extracted userId: {user_id}")
+                session_token = f"{user_id}%3A%3A{token}"
+                self.log(f"Created session token, length: {len(session_token)}")
+                conn.close()
+                return session_token
+            except Exception as error:
+                self.log(f"Error processing token: {error}", True)
+                self.log(f"Error details: {error.__class__.__name__}, {error!s}", True)
+                conn.close()
+                return None
+        except Exception as error:
+            self.log(f"Error opening database: {error}", True)
+            self.log(f"Database error details: {error!s}", True)
+            return None
+
+    async def get_user_info(self):
+        """Get user information using the auth token.
+
+        Returns:
+            dict: User information if successful, None otherwise
+        """
+        token = await self.read_auth_token()
+        if not token:
+            self.log("No auth token available", True)
+            return None
+
+        try:
+            headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
+
+            response = requests.get(f"{self.api_base_url}/api/v1/user", headers=headers)
+
+            if response.status_code == 200:
+                user_data = response.json()
+                self.log(f"Successfully retrieved user info: {user_data}")
+                return user_data
+            else:
+                self.log(f"Failed to get user info: {response.status_code}", True)
+                self.log(f"Response: {response.text}", True)
+                return None
+
+        except Exception as error:
+            self.log(f"Error getting user info: {error}", True)
+            self.log(f"Error details: {error.__class__.__name__}, {error!s}", True)
+            return None
+
+    async def validate_token(self):
+        """Validate if the current token is valid."""
+        user_info = await self.get_user_info()
+        return user_info is not None
+
+    def get_cursor_storage_path(self):
+        """Determine the path to the Cursor storage directory based on the current platform."""
+        app_name = os.environ.get("VSCODE_APP_NAME", "")
+        folder_name = "Cursor Nightly" if app_name == "Cursor Nightly" else "Cursor"
+
+        if platform.system() == "Windows":
+            return os.path.join(os.environ.get("APPDATA", ""), folder_name, "User", "workspaceStorage")
+        elif platform.system() == "Linux":
+            is_wsl = os.environ.get("VSCODE_REMOTE_NAME") == "wsl"
+            if is_wsl:
+                windows_username = self.get_windows_username()
+                if windows_username:
+                    return os.path.join("/mnt/c/Users", windows_username, "AppData/Roaming", folder_name, "User/workspaceStorage")
+            return os.path.join(os.path.expanduser("~"), ".config", folder_name, "User", "workspaceStorage")
+        elif platform.system() == "Darwin":  # macOS
+            return os.path.join(os.path.expanduser("~"), "Library", "Application Support", folder_name, "User", "workspaceStorage")
+
+        # Default fallback
+        return os.path.join(os.path.expanduser("~"), ".config", folder_name, "User", "workspaceStorage")
+
+    def get_global_storage_path(self):
+        """Determine the path to the Cursor global storage directory."""
+        app_name = os.environ.get("VSCODE_APP_NAME", "")
+        folder_name = "Cursor Nightly" if app_name == "Cursor Nightly" else "Cursor"
+
+        if platform.system() == "Windows":
+            return os.path.join(os.environ.get("APPDATA", ""), folder_name, "User", "globalStorage", "state.vscdb")
+        elif platform.system() == "Linux":
+            is_wsl = os.environ.get("VSCODE_REMOTE_NAME") == "wsl"
+            if is_wsl:
+                windows_username = self.get_windows_username()
+                if windows_username:
+                    return os.path.join("/mnt/c/Users", windows_username, "AppData/Roaming", folder_name, "User/globalStorage/state.vscdb")
+            return os.path.join(os.path.expanduser("~"), ".config", folder_name, "User", "globalStorage", "state.vscdb")
+        elif platform.system() == "Darwin":  # macOS
+            return os.path.join(os.path.expanduser("~"), "Library", "Application Support", folder_name, "User", "globalStorage", "state.vscdb")
+
+        # Default fallback
+        return os.path.join(os.path.expanduser("~"), ".config", folder_name, "User", "globalStorage", "state.vscdb")
+
+    async def get_workspaces(self):
+        """Get all workspaces from the Cursor storage directory."""
+        try:
+            workspace_path = self.get_cursor_storage_path()
+            self.log(f"Looking for workspaces in: {workspace_path}")
+
+            workspaces = []
+            workspace_dir = Path(workspace_path)
+
+            if not workspace_dir.exists():
+                self.log(f"Workspace directory does not exist: {workspace_path}", True)
+                return []
+
+            for entry in workspace_dir.iterdir():
+                if entry.is_dir():
+                    db_path = entry.joinpath("state.vscdb")
+                    workspace_json_path = entry.joinpath("workspace.json")
+
+                    # Skip if state.vscdb doesn't exist
+                    if not db_path.exists():
+                        self.log(f"Skipping {entry.name}: no state.vscdb found")
+                        continue
+
+                    workspace_info = {"id": entry.name, "path": str(entry), "dbPath": str(db_path)}
+
+                    # Try to get workspace name from workspace.json if it exists
+                    if workspace_json_path.exists():
+                        try:
+                            workspace_data = json.loads(workspace_json_path.read_text())
+                            if "folder" in workspace_data:
+                                workspace_info["name"] = Path(workspace_data["folder"]).name
+                        except Exception as e:
+                            self.log(f"Error reading workspace.json: {e}", True)
+
+                    if "name" not in workspace_info:
+                        workspace_info["name"] = entry.name
+
+                    workspaces.append(workspace_info)
+
+            return workspaces
+
+        except Exception as e:
+            self.log(f"Failed to get workspaces: {e}", True)
+            return []
+
+    async def get_workspace_chat_data(self, workspace_id: str):
+        """Get chat data for a specific workspace."""
+        try:
+            workspace_path = self.get_cursor_storage_path()
+            db_path = os.path.join(workspace_path, workspace_id, "state.vscdb")
+
+            if not os.path.exists(db_path):
+                self.log(f"Database does not exist: {db_path}", True)
+                return None
+
+            # Connect to SQLite database
+            conn = sqlite3.connect(db_path)
+            conn.row_factory = sqlite3.Row  # This allows accessing columns by name
+            cursor = conn.cursor()
+
+            # First, check what tables exist in the database
+            cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
+            tables = [table[0] for table in cursor.fetchall()]
+            self.log(f"Tables in database: {tables}")
+
+            response = {}
+
+            # Determine the correct table name (ItemTable or Item)
+            item_table = "ItemTable" if "ItemTable" in tables else "Item"
+
+            # Get chat data
+            try:
+                # Look for chat data with different possible key patterns
+                chat_keys = ["workbench.panel.aichat.view.aichat.chatdata", "workbench.panel.chat.view.chat.chatdata", "workbench.view.chat.chatdata"]
+
+                chat_result = None
+                for chat_key in chat_keys:
+                    cursor.execute(f"SELECT value FROM {item_table} WHERE key = ?", (chat_key,))
+                    chat_result = cursor.fetchone()
+                    if chat_result:
+                        self.log(f"Found chat data with key: {chat_key}")
+                        break
+
+                # Look for composer data
+                composer_keys = ["composer.composerData", "cursor.composerData"]
+
+                composer_result = None
+                for composer_key in composer_keys:
+                    cursor.execute(f"SELECT value FROM {item_table} WHERE key = ?", (composer_key,))
+                    composer_result = cursor.fetchone()
+                    if composer_result:
+                        self.log(f"Found composer data with key: {composer_key}")
+                        break
+            except Exception as e:
+                self.log(f"Error querying database: {e}", True)
+                chat_result = None
+                composer_result = None
+
+            conn.close()
+
+            # Process chat data
+            if chat_result:
+                try:
+                    chat_data = json.loads(chat_result["value"])
+                    response["chats"] = chat_data
+                    self.log(f"Successfully parsed chat data with {len(chat_data.get('tabs', []))} tabs")
+                except json.JSONDecodeError as e:
+                    self.log(f"Error parsing chat data: {e}", True)
+            else:
+                self.log("No chat data found in database")
+
+            # Process composer data
+            if composer_result:
+                try:
+                    composers = json.loads(composer_result["value"])
+                    self.log(f"Found {len(composers.get('allComposers', []))} composers")
+                    response["composers"] = composers
+                except json.JSONDecodeError as e:
+                    self.log(f"Error parsing composer data: {e}", True)
+            else:
+                self.log("No composer data found in database")
+
+            return response
+
+        except Exception as e:
+            self.log(f"Failed to get workspace data: {e}", True)
+            import traceback
+
+            self.log(traceback.format_exc(), True)
+            return None
+
+    async def search_chat_history(self, query: str, search_type: str = "all"):
+        """Search across all workspaces for chat history matching the query.
+
+        Args:
+            query: The search term to look for
+            search_type: Type of logs to search - 'all', 'chat', or 'composer'
+
+        Returns:
+            list: List of search results with matching content
+        """
+        try:
+            if not query:
+                self.log("No search query provided", True)
+                return []
+
+            results = []
+            workspaces = await self.get_workspaces()
+
+            for workspace in workspaces:
+                workspace_id = workspace["id"]
+
+                try:
+                    workspace_data = await self.get_workspace_chat_data(workspace_id)
+                    if not workspace_data:
+                        continue
+
+                    # Search in chat data
+                    if search_type in ["all", "chat"] and "chats" in workspace_data:
+                        chat_data = workspace_data["chats"]
+                        for tab in chat_data.get("tabs", []):
+                            has_match = False
+                            matching_text = ""
+
+                            # Search in chat title
+                            if tab.get("chatTitle", "").lower().find(query.lower()) != -1:
+                                has_match = True
+                                matching_text = tab.get("chatTitle", "")
+
+                            # Search in bubbles/messages
+                            if not has_match:
+                                for bubble in tab.get("bubbles", []):
+                                    if bubble.get("text", "").lower().find(query.lower()) != -1:
+                                        has_match = True
+                                        matching_text = bubble.get("text", "")
+                                        break
+
+                            if has_match:
+                                results.append(
+                                    {
+                                        "workspaceId": workspace_id,
+                                        "workspaceName": workspace.get("name", workspace_id),
+                                        "chatId": tab.get("tabId", ""),
+                                        "chatTitle": tab.get("chatTitle", f"Chat {tab.get('tabId', '')[:8]}"),
+                                        "timestamp": tab.get("lastSendTime", ""),
+                                        "matchingText": matching_text,
+                                        "type": "chat",
+                                    }
+                                )
+
+                    # Search in composer data
+                    if search_type in ["all", "composer"] and "composers" in workspace_data:
+                        composer_data = workspace_data["composers"]
+                        for composer in composer_data.get("allComposers", []):
+                            has_match = False
+                            matching_text = ""
+
+                            # Search in composer text/title
+                            if composer.get("text", "").lower().find(query.lower()) != -1:
+                                has_match = True
+                                matching_text = composer.get("text", "")
+
+                            # Search in conversation
+                            if not has_match and "conversation" in composer:
+                                for message in composer.get("conversation", []):
+                                    if message.get("text", "").lower().find(query.lower()) != -1:
+                                        has_match = True
+                                        matching_text = message.get("text", "")
+                                        break
+
+                            if has_match:
+                                results.append(
+                                    {
+                                        "workspaceId": workspace_id,
+                                        "workspaceName": workspace.get("name", workspace_id),
+                                        "chatId": composer.get("composerId", ""),
+                                        "chatTitle": composer.get("text", f"Composer {composer.get('composerId', '')[:8]}"),
+                                        "timestamp": composer.get("lastUpdatedAt", composer.get("createdAt", "")),
+                                        "matchingText": matching_text,
+                                        "type": "composer",
+                                    }
+                                )
+
+                except Exception as e:
+                    self.log(f"Error searching workspace {workspace_id}: {e}", True)
+
+            # Sort results by timestamp, newest first
+            results.sort(key=lambda x: x.get("timestamp", ""), reverse=True)
+            return results
+
+        except Exception as e:
+            self.log(f"Failed to search chat history: {e}", True)
+            return []
+
+
+async def main():
+    cursor = Cursor(log_level=logging.DEBUG)
+
+    # Check if Cursor is installed
+    if not cursor.is_installed():
+        print("Cursor is not installed or not properly configured")
+        return
+
+    token = await cursor.read_auth_token()
+    print(f"Token: {token}")
+
+    # Get all workspaces
+    workspaces = await cursor.get_workspaces()
+    print(f"Found {len(workspaces)} workspaces:")
+    for workspace in workspaces:
+        print(f"  - {workspace['name']} ({workspace['id']})")
+
+    # If workspaces were found, get chat data for the first one
+    if workspaces:
+        for workspace in workspaces:
+            workspace_id = workspace["id"]
+            print(f"\nGetting chat data for workspace: {workspace['name']}")
+
+            chat_data = await cursor.get_workspace_chat_data(workspace_id)
+
+            if chat_data:
+                # Print summary of chat data
+                if "chats" in chat_data:
+                    chats = chat_data["chats"]
+                    print(f"\nFound {len(chats.get('tabs', []))} chat tabs")
+
+                    for i, tab in enumerate(chats.get("tabs", [])):
+                        bubbles = tab.get("bubbles", [])
+                        print(f"  - Tab {i + 1}: {tab.get('chatTitle', 'Untitled')} ({len(bubbles)} messages)")
+
+                        # Print a sample of messages from this chat
+                        if bubbles:
+                            print("    Sample messages:")
+                            for j, bubble in enumerate(bubbles[:3]):  # Show first 3 messages
+                                msg_type = "AI" if bubble.get("type") == "ai" else "User"
+                                text = bubble.get("text", "")
+                                # Truncate long messages
+                                if len(text) > 100:
+                                    text = text[:97] + "..."
+                                print(f"      {msg_type}: {text}")
+                            if len(bubbles) > 3:
+                                print(f"      ... and {len(bubbles) - 3} more messages")
+
+                # Print summary of composer data
+                if "composers" in chat_data:
+                    composers = chat_data["composers"]
+                    print(f"\nFound {len(composers.get('allComposers', []))} composers")
+
+                    for i, composer in enumerate(composers.get("allComposers", [])):
+                        conversation = composer.get("conversation", [])
+                        print(f"  - Composer {i + 1}: {composer.get('text', 'Untitled')} ({len(conversation)} messages)")
+
+                        # Print a sample of messages from this composer
+                        if conversation:
+                            print("    Sample messages:")
+                            for j, message in enumerate(conversation[:3]):  # Show first 3 messages
+                                msg_type = "AI" if message.get("type") == 2 else "User"
+                                text = message.get("text", "")
+                                # Truncate long messages
+                                if len(text) > 100:
+                                    text = text[:97] + "..."
+                                print(f"      {msg_type}: {text}")
+                            if len(conversation) > 3:
+                                print(f"      ... and {len(conversation) - 3} more messages")
+            else:
+                print("No chat data found for this workspace")
+    else:
+        print("No workspaces found")
+
+    # Search for chat history
+    search_query = "def"  # Example search term
+    print(f"\nSearching chat history for '{search_query}'...")
+    search_results = await cursor.search_chat_history(search_query)
+
+    if search_results:
+        print(f"Found {len(search_results)} results:")
+        for i, result in enumerate(search_results[:10]):  # Show first 10 results
+            print(f"  {i+1}. [{result['type']}] {result['chatTitle']} ({result['workspaceName']})")
+            # Show a snippet of the matching text
+            matching_text = result['matchingText']
+            if len(matching_text) > 100:
+                matching_text = matching_text[:97] + "..."
+            print(f"     Match: {matching_text}")
+    else:
+        print("No search results found")
+
+
+if __name__ == "__main__":
+    # For testing - lets pull prompts and responses from local cursor db
+    asyncio.run(main())
diff --git a/src/codegen/extensions/attribution/3pp/windsurf.py b/src/codegen/extensions/attribution/3pp/windsurf.py
new file mode 100644
index 000000000..df66181c6
--- /dev/null
+++ b/src/codegen/extensions/attribution/3pp/windsurf.py
@@ -0,0 +1,186 @@
+import asyncio
+import json
+import logging
+import os
+import platform
+import shutil
+from pathlib import Path
+
+import requests
+
+
+class Windsurf:
+    def __init__(self, log_level=logging.INFO):
+        """Initialize the Windsurf class for Codeium integration."""
+        logging.basicConfig(level=log_level)
+        self.logger = logging.getLogger("Windsurf")
+        self.api_base_url = "https://api.codeium.com"
+        self.user_data_path = self._get_user_data_path()
+
+    def log(self, message, is_error=False):
+        """Log messages with appropriate level."""
+        if is_error:
+            self.logger.error(message)
+        else:
+            self.logger.debug(message)
+
+    def _get_user_data_path(self):
+        """Get the path to Windsurf/Codeium user data based on platform."""
+        if platform.system() == "Windows":
+            return Path(os.environ.get("APPDATA", "")) / "Codeium"
+        elif platform.system() == "Darwin":  # macOS
+            return Path.home() / "Library" / "Application Support" / "Codeium"
+        else:  # Linux and others
+            return Path.home() / ".config" / "Codeium"
+
+    def is_installed(self):
+        """Check if Windsurf/Codeium is installed on the system.
+
+        Returns:
+            bool: True if installed, False otherwise
+        """
+        # Check if the user data directory exists
+        if not self.user_data_path.exists():
+            self.log("Codeium user data directory not found", True)
+            return False
+
+        # Check if auth file exists
+        auth_path = self.get_auth_token_path()
+        if not auth_path.exists():
+            self.log("Codeium auth file not found", True)
+            return False
+
+        # Check if config file exists
+        config_path = self.get_config_path()
+        if not config_path.exists():
+            self.log("Codeium config file not found", True)
+            return False
+
+        # Check if the Codeium binary is installed
+        binary_path = self._get_binary_path()
+        if binary_path and not binary_path.exists():
+            self.log("Codeium binary not found", True)
+            return False
+
+        return True
+
+    def _get_binary_path(self):
+        """Get the path to the Codeium binary based on platform."""
+        try:
+            if platform.system() == "Windows":
+                # Check in Program Files
+                program_files = os.environ.get("ProgramFiles", "C:\\Program Files")
+                path = Path(program_files) / "Codeium" / "Codeium.exe"
+                if path.exists():
+                    return path
+
+                # Check in PATH
+                codeium_path = shutil.which("codeium.exe")
+                if codeium_path:
+                    return Path(codeium_path)
+
+            elif platform.system() == "Darwin":  # macOS
+                # Check in Applications
+                path = Path("/Applications/Codeium.app/Contents/MacOS/Codeium")
+                if path.exists():
+                    return path
+
+                # Check in PATH
+                codeium_path = shutil.which("codeium")
+                if codeium_path:
+                    return Path(codeium_path)
+
+            else:  # Linux and others
+                # Check in common locations
+                paths = [Path("/usr/bin/codeium"), Path("/usr/local/bin/codeium"), Path(os.path.expanduser("~/.local/bin/codeium"))]
+
+                for path in paths:
+                    if path.exists():
+                        return path
+
+                # Check in PATH
+                codeium_path = shutil.which("codeium")
+                if codeium_path:
+                    return Path(codeium_path)
+
+            return None
+        except Exception as e:
+            self.log(f"Error finding Codeium binary: {e!s}", True)
+            return None
+
+    def get_config_path(self):
+        """Get the path to the Codeium configuration file."""
+        return self.user_data_path / "config.json"
+
+    def get_auth_token_path(self):
+        """Get the path to the authentication token file."""
+        return self.user_data_path / "auth.json"
+
+    def get_auth_token(self):
+        """Read the authentication token from the auth file."""
+        try:
+            auth_path = self.get_auth_token_path()
+            self.log(f"Reading auth token from: {auth_path}")
+
+            if not auth_path.exists():
+                self.log("Auth token file does not exist", True)
+                return None
+
+            with open(auth_path) as f:
+                auth_data = json.load(f)
+
+            if "api_key" in auth_data:
+                return auth_data["api_key"]
+            else:
+                self.log("No API key found in auth data", True)
+                return None
+
+        except Exception as e:
+            self.log(f"Error reading auth token: {e!s}", True)
+            return None
+
+    async def get_user_info(self):
+        """Get user information using the auth token."""
+        token = self.get_auth_token()
+        if not token:
+            return None
+
+        try:
+            headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
+
+            response = requests.get(f"{self.api_base_url}/user/info", headers=headers)
+            if response.status_code == 200:
+                self.log(f"User info response: {response.json()}")
+                return response.json()
+            else:
+                self.log(f"Failed to get user info: {response.status_code}", True)
+                return None
+
+        except Exception as e:
+            self.log(f"Error getting user info: {e!s}", True)
+            return None
+
+    async def validate_token(self):
+        """Validate if the current token is valid."""
+        user_info = await self.get_user_info()
+        return user_info is not None
+
+
+async def main():
+    windsurf = Windsurf(log_level=logging.DEBUG)
+
+    # Check if Codeium is installed
+    if not windsurf.is_installed():
+        print("Codeium is not installed or not properly configured")
+        return
+
+    token = windsurf.read_auth_token()
+    print(f"Token: {token}")
+
+    is_valid = await windsurf.validate_token()
+    print(f"Token is valid: {is_valid}")
+
+
+if __name__ == "__main__":
+    # TODO: don't have windsurf at the moment, test later if feature is needed
+    asyncio.run(main())
diff --git a/src/codegen/extensions/attribution/cli.py b/src/codegen/extensions/attribution/cli.py
new file mode 100644
index 000000000..bf4370e6a
--- /dev/null
+++ b/src/codegen/extensions/attribution/cli.py
@@ -0,0 +1,153 @@
+import json
+import os
+
+import pygit2
+
+import codegen
+from codegen import Codebase
+from codegen.extensions.attribution.main import add_attribution_to_symbols, analyze_ai_impact
+
+
+def diagnose_repository(codebase):
+    """Print diagnostic information about the repository."""
+    try:
+        repo_path = codebase.ctx.projects[0].repo_operator.repo_path
+        print("\n🔍 Repository Diagnostics:")
+        print(f"Repository path: {repo_path}")
+
+        # Check if it's a git repository
+        if not os.path.exists(os.path.join(repo_path, ".git")):
+            print("⚠️ No .git directory found. This might not be a git repository.")
+            return
+
+        try:
+            repo = pygit2.Repository(repo_path)
+
+            # Check if repository has commits
+            try:
+                head = repo.head
+                head_commit = repo.get(head.target)
+                print(f"Repository has a HEAD commit: {head_commit.id}")
+                print(f"HEAD commit author: {head_commit.author.name} <{head_commit.author.email}>")
+                print(f"HEAD commit message (first 5 lines only): {'\n'.join(head_commit.message.strip().split('\n')[:5])}")
+                print("...")
+                # Check if it's a shallow clone
+                if os.path.exists(os.path.join(repo_path, ".git", "shallow")):
+                    print("⚠️ This appears to be a shallow clone, which may have limited history.")
+
+                # Try to count commits
+                commit_count = 0
+                for _ in repo.walk(head.target, pygit2.GIT_SORT_TIME):
+                    commit_count += 1
+                    if commit_count >= 10:  # Just check first 10
+                        break
+
+                if commit_count == 0:
+                    print("⚠️ No commits found in the repository.")
+                else:
+                    print(f"Found at least {commit_count} commits in the repository.")
+
+            except (pygit2.GitError, KeyError) as e:
+                print(f"⚠️ Error accessing HEAD: {e}")
+                print("This repository might be empty or corrupted.")
+
+        except Exception as e:
+            print(f"⚠️ Error opening repository with pygit2: {e}")
+
+    except Exception as e:
+        print(f"⚠️ Error during repository diagnosis: {e}")
+
+
+@codegen.function("analyze-ai-impact")
+def run(codebase: Codebase):
+    """Analyze the impact of AI on the codebase.
+
+    This function:
+    1. Analyzes git history to identify AI contributions
+    2. Identifies which parts of the codebase were written by AI
+    3. Determines the impact of AI-written code
+    4. Generates a report with statistics and visualizations
+
+    Run the analysis using the codegen CLI:
+    codegen analyze-ai-impact
+
+    Or from script:
+    from codegen.extensions.attribution.cli import run
+    codebase = Codebase....
+    run(codebase)
+    """
+    print("🤖 Analyzing AI impact on codebase...")
+
+    # Run repository diagnostics first
+    diagnose_repository(codebase)
+
+    # Default AI authors to track (and ci bots)
+    ai_authors = ["renovate[bot]", "dependabot[bot]", "github-actions[bot]", "devin-ai-integration[bot]"]
+
+    # Run the analysis
+    results = analyze_ai_impact(codebase, ai_authors)
+
+    # Print list of all contributors
+    print("\n👥 All Contributors:")
+    contributors = results.get("contributors", [])
+    if contributors:
+        # Sort by commit count (descending)
+        for author, count in contributors:
+            is_ai = any(ai_name in author for ai_name in ai_authors)
+            ai_indicator = "🤖" if is_ai else "👤"
+            print(f"  {ai_indicator} {author}: {count} commits")
+    else:
+        print("  No contributors found.")
+
+    # Print summary statistics
+    stats = results["stats"]
+    print("\n📊 AI Contribution Summary:")
+    print(f"Total commits: {stats['total_commits']}")
+    print(f"AI commits: {stats['ai_commits']} ({stats['ai_percentage']:.1f}%)")
+
+    if stats["total_file_count"] > 0:
+        ai_file_percentage = stats["ai_file_count"] / stats["total_file_count"] * 100
+    else:
+        ai_file_percentage = 0.0
+    print(f"Files with >50% AI contribution: {stats['ai_file_count']} of {stats['total_file_count']} ({ai_file_percentage:.1f}%)")
+
+    if results["total_symbol_count"] > 0:
+        ai_symbol_percentage = results["ai_symbol_count"] / results["total_symbol_count"] * 100
+    else:
+        ai_symbol_percentage = 0.0
+    print(f"AI-touched symbols: {results['ai_symbol_count']} of {results['total_symbol_count']} ({ai_symbol_percentage:.1f}%)")
+
+    # Print high-impact AI symbols
+    print("\n🔍 High-Impact AI-Written Code:")
+    if results["high_impact_symbols"]:
+        for symbol in results["high_impact_symbols"][:10]:  # Show top 10
+            print(f"  • {symbol['name']} ({symbol['filepath']})")
+            print(f"    - Used by {symbol['usage_count']} other symbols")
+            print(f"    - Last edited by: {symbol['last_editor']}")
+    else:
+        print("  No high-impact AI-written code found.")
+
+    # Print top AI files
+    print("\n📁 Top Files by AI Contribution:")
+    if stats["top_ai_files"]:
+        for file_path, percentage in stats["top_ai_files"][:10]:  # Show top 10
+            print(f"  • {file_path}: {percentage:.1f}% AI contribution")
+    else:
+        print("  No files with AI contributions found.")
+
+    # Save detailed results to file
+    output_path = "ai_impact_analysis.json"
+    with open(output_path, "w") as f:
+        json.dump(results, f, indent=2)
+
+    print(f"\n✅ Detailed analysis saved to {output_path}")
+
+    # Add attribution to symbols
+    print("\n🏷️ Adding attribution information to symbols...")
+    add_attribution_to_symbols(codebase, ai_authors)
+    print("✅ Attribution information added to symbols")
+
+    print("\nYou can now access attribution information on symbols:")
+    print("  • symbol.last_editor - The last person who edited the symbol")
+    print("  • symbol.editor_history - List of all editors who have touched the symbol")
+    print("  • symbol.is_ai_authored - Whether the symbol was authored by AI")
diff --git a/src/codegen/extensions/attribution/git_history.py b/src/codegen/extensions/attribution/git_history.py
new file mode 100644
index 000000000..fc0625c05
--- /dev/null
+++ b/src/codegen/extensions/attribution/git_history.py
@@ -0,0 +1,326 @@
+import time
+from collections import defaultdict
+from datetime import datetime
+from typing import Optional
+
+import pygit2
+
+from codegen.sdk.core.codebase import Codebase
+from codegen.sdk.core.symbol import Symbol
+
+
+class GitAttributionTracker:
+    """Tracks attribution information for code symbols based on git history."""
+
+    def __init__(self, codebase: Codebase, ai_authors: Optional[list[str]] = None):
+        """Initialize the attribution tracker.
+
+        Args:
+            codebase: The codebase to analyze
+            ai_authors: List of author names/emails to track as AI contributors
+                        (defaults to ['devin[bot]', 'codegen[bot]'])
+        """
+        self.codebase = codebase
+        self.repo_path = codebase.ctx.projects[0].repo_operator.repo_path
+        self.repo = pygit2.Repository(self.repo_path)
+
+        # Default AI authors if none provided
+        self.ai_authors = ai_authors or ['devin[bot]', 'codegen[bot]']
+
+        # Cache structures
+        self._file_history = {}  # file path -> list of commit info
+        self._symbol_history = {}  # symbol id -> list of commit info
+        self._author_contributions = defaultdict(list)  # author -> list of commit info
+
+        # Track if history has been built
+        self._history_built = False
+
+    def build_history(self, max_commits: Optional[int] = None) -> None:
+        """Build the git history for the codebase.
+
+        Args:
+            max_commits: Maximum number of commits to process (None for all)
+        """
+        start_time = time.time()
+        print(f"Building git history for {self.repo_path}...")
+
+        # Check if repository exists and has commits
+        try:
+            head = self.repo.head
+        except Exception as e:
+            print(f"⚠️ Error accessing repository head: {e}")
+            print("This might be a shallow clone or a repository without history.")
+            self._history_built = True
+            return
+
+        # Walk through commit history
+        commit_count = 0
+        author_set = set()
+
+        try:
+            for commit in self.repo.walk(self.repo.head.target, pygit2.GIT_SORT_TIME):
+                # Track unique authors
+                author_id = f"{commit.author.name} <{commit.author.email}>"
+                author_set.add(author_id)
+
+                # Process each diff in the commit
+                if len(commit.parents) > 0:
+                    try:
+                        diff = self.repo.diff(commit.parents[0], commit)
+                        self._process_commit(commit, diff)
+                    except Exception as e:
+                        print(f"Error processing commit {commit.id}: {e}")
+                else:
+                    # Initial commit (no parents)
+                    try:
+                        # For initial commit, compare with empty tree
+                        diff = commit.tree.diff_to_tree(context_lines=0)
+                        self._process_commit(commit, diff)
+                    except Exception as e:
+                        print(f"Error processing initial commit {commit.id}: {e}")
+
+                commit_count += 1
+                if max_commits and commit_count >= max_commits:
+                    break
+
+                # Progress indicator
+                if commit_count % 100 == 0:
+                    print(f"Processed {commit_count} commits...")
+
+        except Exception as e:
+            print(f"⚠️ Error walking commit history: {e}")
+
+        self._history_built = True
+        elapsed = time.time() - start_time
+
+        # Print diagnostic information
+        print(f"Finished building history in {elapsed:.2f} seconds.")
+        print(f"Processed {commit_count} commits from {len(author_set)} unique authors.")
+        print(f"Found {len(self._file_history)} files with history.")
+        print(f"Found {len(self._author_contributions)} contributors.")
+
+        if len(self._author_contributions) > 0:
+            print("Top contributors:")
+            top_contributors = sorted(
+                [(author, len(commits)) for author, commits in self._author_contributions.items()],
+                key=lambda x: x[1],
+                reverse=True
+            )[:5]
+            for author, count in top_contributors:
+                print(f"  • {author}: {count} commits")
+        else:
+            print("⚠️ No contributors found. This might be due to:")
+            print("  1. Using a shallow clone without history")
+            print("  2. Repository access issues")
+            print("  3. Empty repository or no commits")
+
+    def _process_commit(self, commit, diff) -> None:
+        """Process a single commit and its diff."""
+        author_name = commit.author.name
+        author_email = commit.author.email
+        author_id = f"{author_name} <{author_email}>"
+        timestamp = commit.author.time
+        commit_id = str(commit.id)
+
+        commit_info = {
+            'author': author_name,
+            'email': author_email,
+            'timestamp': timestamp,
+            'commit_id': commit_id,
+            'message': commit.message.strip(),
+        }
+
+        # Track by author
+        self._author_contributions[author_id].append(commit_info)
+
+        # Track by file
+        for patch in diff:
+            file_path = patch.delta.new_file.path
+
+            # Skip if not a source file we care about
+            if not self._is_tracked_file(file_path):
+                continue
+
+            if file_path not in self._file_history:
+                self._file_history[file_path] = []
+
+            file_commit = commit_info.copy()
+            file_commit['file_path'] = file_path
+            self._file_history[file_path].append(file_commit)
+
+    def _is_tracked_file(self, file_path: str) -> bool:
+        """Check if a file should be tracked based on extension."""
+        # Get file extensions from the codebase
+        extensions = self.codebase.ctx.extensions
+
+        # If we can't determine extensions, track common source files
+        if not extensions:
+            extensions = ['.py', '.js', '.ts', '.tsx', '.jsx']
+
+        return any(file_path.endswith(ext) for ext in extensions)
+
+    def _ensure_history_built(self) -> None:
+        """Ensure git history has been built."""
+        if not self._history_built:
+            self.build_history()
+
+    def map_symbols_to_history(self) -> None:
+        """Map symbols in the codebase to their git history."""
+        self._ensure_history_built()
+
+        print("Mapping symbols to git history...")
+        start_time = time.time()
+
+        # For each symbol, find commits that modified its file
+        for symbol in self.codebase.symbols:
+            if not hasattr(symbol, 'filepath') or not symbol.filepath:
+                continue
+
+            symbol_id = f"{symbol.filepath}:{symbol.name}"
+            self._symbol_history[symbol_id] = []
+
+            # Get file history
+            file_history = self._file_history.get(symbol.filepath, [])
+
+            # For now, just associate all file changes with the symbol
+            # A more sophisticated approach would use line ranges
+            for commit in file_history:
+                self._symbol_history[symbol_id].append(commit)
+
+        elapsed = time.time() - start_time
+        print(f"Finished mapping symbols in {elapsed:.2f} seconds.")
+
+    def get_symbol_history(self, symbol: Symbol) -> list[dict]:
+        """Get the edit history for a symbol.
+
+        Args:
+            symbol: The symbol to get history for
+
+        Returns:
+            List of commit information dictionaries
+        """
+        self._ensure_history_built()
+
+        if not hasattr(symbol, 'filepath') or not symbol.filepath:
+            return []
+
+        symbol_id = f"{symbol.filepath}:{symbol.name}"
+        return self._symbol_history.get(symbol_id, [])
+
+    def get_symbol_last_editor(self, symbol: Symbol) -> Optional[str]:
+        """Get the last person who edited a symbol.
+
+        Args:
+            symbol: The symbol to check
+
+        Returns:
+            Author name or None if no history found
+        """
+        history = self.get_symbol_history(symbol)
+        if not history:
+            return None
+
+        # Sort by timestamp (newest first) and return the author
+        sorted_history = sorted(history, key=lambda x: x['timestamp'], reverse=True)
+        return sorted_history[0]['author']
+
+    def get_ai_contribution_stats(self) -> dict:
+        """Get statistics about AI contributions to the codebase.
+
+        Returns:
+            Dictionary with AI contribution statistics
+        """
+        self._ensure_history_built()
+
+        # Count AI commits by file
+        ai_file_commits = defaultdict(int)
+        total_file_commits = defaultdict(int)
+
+        for file_path, commits in self._file_history.items():
+            for commit in commits:
+                total_file_commits[file_path] += 1
+                if commit['author'] in self.ai_authors or commit['email'] in self.ai_authors:
+                    ai_file_commits[file_path] += 1
+
+        # Find files with highest AI contribution percentage
+        ai_contribution_percentage = {}
+        for file_path, total in total_file_commits.items():
+            if total > 0:
+                ai_contribution_percentage[file_path] = (ai_file_commits[file_path] / total) * 100
+
+        # Get top files by AI contribution
+        top_ai_files = sorted(
+            ai_contribution_percentage.items(),
+            key=lambda x: x[1],
+            reverse=True
+        )[:20]
+
+        # Count total AI commits
+        ai_commits = sum(
+            len(commits) for author, commits in self._author_contributions.items()
+            if any(name in author for name in self.ai_authors)
+        )
+
+        total_commits = sum(len(commits) for commits in self._author_contributions.values())
+
+        # Calculate AI percentage safely
+        if total_commits > 0:
+            ai_percentage = (ai_commits / total_commits) * 100
+        else:
+            ai_percentage = 0.0
+
+        return {
+            'total_commits': total_commits,
+            'ai_commits': ai_commits,
+            'ai_percentage': ai_percentage,
+            'top_ai_files': top_ai_files,
+            'ai_file_count': len([f for f, p in ai_contribution_percentage.items() if p > 50]),
+            'total_file_count': len(total_file_commits),
+        }
+
+    def get_ai_touched_symbols(self) -> list[Symbol]:
+        """Get all symbols that have been touched by AI authors.
+
+        Returns:
+            List of symbols that have been edited by AI authors
+        """
+        self._ensure_history_built()
+
+        ai_symbols = []
+
+        for symbol in self.codebase.symbols:
+            history = self.get_symbol_history(symbol)
+
+            # Check if any commit is from an AI author
+            if any(
+                commit['author'] in self.ai_authors or commit['email'] in self.ai_authors
+                for commit in history
+            ):
+                ai_symbols.append(symbol)
+
+        return ai_symbols
+
+    def get_ai_contribution_timeline(self) -> list[tuple[datetime, int]]:
+        """Get a timeline of AI contributions over time.
+
+        Returns:
+            List of (datetime, count) tuples showing AI contributions over time
+        """
+        self._ensure_history_built()
+
+        # Group commits by month
+        monthly_counts = defaultdict(int)
+
+        for author, commits in self._author_contributions.items():
+            if any(name in author for name in self.ai_authors):
+                for commit in commits:
+                    # Convert timestamp to year-month
+                    dt = datetime.fromtimestamp(commit['timestamp'])
+                    month_key = f"{dt.year}-{dt.month:02d}"
+                    monthly_counts[month_key] += 1
+
+        # Sort by date
+        timeline = sorted(monthly_counts.items())
+
+        # Convert to datetime objects
+        return [(datetime.strptime(month, "%Y-%m"), count) for month, count in timeline]
diff --git a/src/codegen/extensions/attribution/main.py b/src/codegen/extensions/attribution/main.py
new file mode 100644
index 000000000..5f81169ac
--- /dev/null
+++ b/src/codegen/extensions/attribution/main.py
@@ -0,0 +1,103 @@
+from typing import Optional
+
+from codegen.extensions.attribution.git_history import GitAttributionTracker
+from codegen.sdk.core.codebase import Codebase
+
+
+def analyze_ai_impact(
+    codebase: Codebase,
+    ai_authors: Optional[list[str]] = None,
+    max_commits: Optional[int] = None
+) -> dict:
+    """Analyze the impact of AI on a codebase.
+
+    Args:
+        codebase: The codebase to analyze
+        ai_authors: List of author names/emails to track as AI contributors
+                    (defaults to ['devin[bot]', 'codegen[bot]'])
+        max_commits: Maximum number of commits to process (None for all)
+
+    Returns:
+        Dictionary with analysis results
+    """
+    tracker = GitAttributionTracker(codebase, ai_authors)
+    tracker.build_history(max_commits)
+    tracker.map_symbols_to_history()
+
+    # Get basic stats
+    stats = tracker.get_ai_contribution_stats()
+
+    # Get AI-touched symbols
+    ai_symbols = tracker.get_ai_touched_symbols()
+
+    # Find high-impact AI symbols (those with many dependents)
+    high_impact_symbols = []
+    for symbol in ai_symbols:
+        if hasattr(symbol, 'usages') and len(symbol.usages) > 5:
+            high_impact_symbols.append({
+                'name': symbol.name,
+                'filepath': symbol.filepath,
+                'usage_count': len(symbol.usages),
+                'last_editor': tracker.get_symbol_last_editor(symbol)
+            })
+
+    # Sort by usage count
+    high_impact_symbols.sort(key=lambda x: x['usage_count'], reverse=True)
+
+    # Get timeline data
+    timeline = tracker.get_ai_contribution_timeline()
+    timeline_data = [
+        {'date': dt.strftime('%Y-%m'), 'count': count}
+        for dt, count in timeline
+    ]
+
+    # Get list of all contributors with commit counts
+    contributors = []
+    for author_id, commits in tracker._author_contributions.items():
+        contributors.append((author_id, len(commits)))
+
+    # Sort by commit count (descending)
+    contributors.sort(key=lambda x: x[1], reverse=True)
+
+    return {
+        'stats': stats,
+        'ai_symbol_count': len(ai_symbols),
+        'total_symbol_count': len(list(codebase.symbols)),
+        'high_impact_symbols': high_impact_symbols[:20],  # Top 20
+        'timeline': timeline_data,
+        'contributors': contributors,
+    }
+
+
+def add_attribution_to_symbols(codebase: Codebase, ai_authors: Optional[list[str]] = None) -> None:
+    """Add attribution information to symbols in the codebase.
+
+    This adds the following attributes to each symbol:
+    - last_editor: The name of the last person who edited the symbol
+    - editor_history: List of all editors who have touched the symbol
+
+    Args:
+        codebase: The codebase to analyze
+        ai_authors: List of author names/emails to track as AI contributors
+    """
+    tracker = GitAttributionTracker(codebase, ai_authors)
+    tracker.build_history()
+    tracker.map_symbols_to_history()
+
+    # Add attribution to each symbol
+    for symbol in codebase.symbols:
+        history = tracker.get_symbol_history(symbol)
+
+        # Add last editor
+        if history:
+            sorted_history = sorted(history, key=lambda x: x['timestamp'], reverse=True)
+            symbol.last_editor = sorted_history[0]['author']
+
+            # Add editor history (unique editors)
+            editors = {commit['author'] for commit in history}
+            symbol.editor_history = list(editors)
+
+            # Add is_ai_authored flag
+            symbol.is_ai_authored = any(
+                editor in tracker.ai_authors for editor in symbol.editor_history
+            )

From d0d80ec7ddd5fd7c4914d3af185588efad77d086 Mon Sep 17 00:00:00 2001
From: tomcodgen <191515280+tomcodgen@users.noreply.github.com>
Date: Tue, 4 Mar 2025 01:33:33 +0000
Subject: [PATCH 2/7] Automated pre-commit update

---
 .../examples/ai_impact_analysis/run.py        | 33 ++++------
 .../attributions/symbol_attribution.py        | 51 ++++++++-------
 pyproject.toml                                |  2 +-
 .../extensions/attribution/3pp/cursor.py      |  4 +-
 .../extensions/attribution/git_history.py     | 62 +++++++------------
 src/codegen/extensions/attribution/main.py    | 44 +++++--------
 6 files changed, 79 insertions(+), 117 deletions(-)

diff --git a/codegen-examples/examples/ai_impact_analysis/run.py b/codegen-examples/examples/ai_impact_analysis/run.py
index 7ff9026a8..c7cb08488 100644
--- a/codegen-examples/examples/ai_impact_analysis/run.py
+++ b/codegen-examples/examples/ai_impact_analysis/run.py
@@ -12,7 +12,7 @@
 if __name__ == "__main__":
     try:
         print("Initializing codebase...")
-        
+
         # Option A: Use current directory if it's a git repository
         if os.path.exists(".git"):
             print("Using current directory as repository...")
@@ -20,50 +20,41 @@
             repo_path = os.getcwd()
             repo_config = RepoConfig.from_repo_path(repo_path)
             repo_operator = RepoOperator(repo_config=repo_config)
-            
+
             # Initialize codebase with a project config
-            project = ProjectConfig.from_repo_operator(
-                repo_operator=repo_operator,
-                programming_language=ProgrammingLanguage.PYTHON
-            )
+            project = ProjectConfig.from_repo_operator(repo_operator=repo_operator, programming_language=ProgrammingLanguage.PYTHON)
             codebase = Codebase(projects=[project])
         else:
             # Option B: Try to find a git repository in parent directories
             print("Searching for git repository in parent directories...")
             current_dir = os.getcwd()
             found_git = False
-            
+
             while current_dir != os.path.dirname(current_dir):  # Stop at root
                 if os.path.exists(os.path.join(current_dir, ".git")):
                     print(f"Found git repository at {current_dir}")
                     repo_config = RepoConfig.from_repo_path(current_dir)
                     repo_operator = RepoOperator(repo_config=repo_config)
-                    
+
                     # Initialize codebase with a project config
-                    project = ProjectConfig.from_repo_operator(
-                        repo_operator=repo_operator,
-                        programming_language=ProgrammingLanguage.PYTHON
-                    )
+                    project = ProjectConfig.from_repo_operator(repo_operator=repo_operator, programming_language=ProgrammingLanguage.PYTHON)
                     codebase = Codebase(projects=[project])
                     found_git = True
                     break
                 current_dir = os.path.dirname(current_dir)
-            
+
             if not found_git:
                 # Option C: Use from_repo method which handles cloning
                 print("No local git repository found. Cloning a repository...")
-                codebase = Codebase.from_repo(
-                    repo_full_name="codegen-sh/codegen",
-                    language="python"
-                )
-        
+                codebase = Codebase.from_repo(repo_full_name="codegen-sh/codegen", language="python")
+
         print(f"Codebase loaded with {len(codebase.files)} files and {len(codebase.symbols)} symbols")
-        
+
         # Run the analysis
         run(codebase)
-        
+
     except Exception as e:
         print(f"\n❌ Error: {str(e)}")
         print("\nTraceback:")
         traceback.print_exc()
-        sys.exit(1)
\ No newline at end of file
+        sys.exit(1)
diff --git a/codegen-examples/examples/attributions/symbol_attribution.py b/codegen-examples/examples/attributions/symbol_attribution.py
index 16cd8179e..dc2512dd7 100644
--- a/codegen-examples/examples/attributions/symbol_attribution.py
+++ b/codegen-examples/examples/attributions/symbol_attribution.py
@@ -9,24 +9,25 @@
 from codegen.sdk.codebase.config import ProjectConfig
 from codegen.shared.enums.programming_language import ProgrammingLanguage
 
+
 def print_symbol_attribution(codebase):
     """Print attribution information for symbols in the codebase."""
     print("\n🔍 Symbol Attribution Examples:")
-    
+
     # First, make sure attribution information is added to symbols
-    ai_authors = ['devin[bot]', 'codegen[bot]', 'github-actions[bot]']
+    ai_authors = ["devin[bot]", "codegen[bot]", "github-actions[bot]"]
     add_attribution_to_symbols(codebase, ai_authors)
-    
+
     # Get some interesting symbols to examine
     # Let's look at classes and functions with the most usages
     symbols_with_usages = []
     for symbol in codebase.symbols:
-        if hasattr(symbol, 'usages') and len(symbol.usages) > 0:
+        if hasattr(symbol, "usages") and len(symbol.usages) > 0:
             symbols_with_usages.append((symbol, len(symbol.usages)))
-    
+
     # Sort by usage count (most used first)
     symbols_with_usages.sort(key=lambda x: x[1], reverse=True)
-    
+
     # Print attribution for top symbols
     count = 0
     for symbol, usage_count in symbols_with_usages[:10]:  # Look at top 10 most used symbols
@@ -34,60 +35,58 @@ def print_symbol_attribution(codebase):
         print(f"\n📊 Symbol #{count}: {symbol.name} ({type(symbol).__name__})")
         print(f"  • File: {symbol.filepath}")
         print(f"  • Usages: {usage_count}")
-        
+
         # Print attribution information
-        if hasattr(symbol, 'last_editor'):
+        if hasattr(symbol, "last_editor"):
             print(f"  • Last editor: {symbol.last_editor}")
         else:
             print("  • Last editor: Not available")
-            
-        if hasattr(symbol, 'editor_history') and symbol.editor_history:
-            print(f"  • Editor history: {', '.join(symbol.editor_history[:5])}" + 
-                  (f" and {len(symbol.editor_history) - 5} more..." if len(symbol.editor_history) > 5 else ""))
+
+        if hasattr(symbol, "editor_history") and symbol.editor_history:
+            print(f"  • Editor history: {', '.join(symbol.editor_history[:5])}" + (f" and {len(symbol.editor_history) - 5} more..." if len(symbol.editor_history) > 5 else ""))
         else:
             print("  • Editor history: Not available")
-            
-        if hasattr(symbol, 'is_ai_authored'):
+
+        if hasattr(symbol, "is_ai_authored"):
             print(f"  • AI authored: {'Yes' if symbol.is_ai_authored else 'No'}")
         else:
             print("  • AI authored: Not available")
 
+
 if __name__ == "__main__":
     try:
         print("Initializing codebase...")
-        
+
         # Use current directory if it's a git repository
         if os.path.exists(".git"):
             print("Using current directory as repository...")
             repo_path = os.getcwd()
             repo_config = RepoConfig.from_repo_path(repo_path)
             repo_operator = RepoOperator(repo_config=repo_config)
-            
-            project = ProjectConfig.from_repo_operator(
-                repo_operator=repo_operator,
-                programming_language=ProgrammingLanguage.PYTHON
-            )
+
+            project = ProjectConfig.from_repo_operator(repo_operator=repo_operator, programming_language=ProgrammingLanguage.PYTHON)
             codebase = Codebase(projects=[project])
         else:
             # Use from_repo method for a well-known repository
             print("Using a sample repository...")
             codebase = Codebase.from_repo(
                 repo_full_name="codegen-sh/codegen",
-                #commit="",  # Using a specific commit for consistency
-                language="python"
+                # commit="",  # Using a specific commit for consistency
+                language="python",
             )
-        
+
         print(f"Codebase loaded with {len(codebase.files)} files and {len(codebase.symbols)} symbols")
-        
+
         # First run the analysis to gather attribution data
         print("\n🔍 Running AI impact analysis...")
         run(codebase)
-        
+
         # Then show examples of accessing attribution information
         print_symbol_attribution(codebase)
-        
+
     except Exception as e:
         print(f"\n❌ Error: {str(e)}")
         import traceback
+
         traceback.print_exc()
         sys.exit(1)
diff --git a/pyproject.toml b/pyproject.toml
index d3ce75054..e2a7470d1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,7 +43,7 @@ dependencies = [
   "hatch-vcs>=0.4.0",
   "hatchling>=1.25.0",
   "pyinstrument>=5.0.0",
-  "pip>=24.3.1", # This is needed for some NPM/YARN/PNPM post-install scripts to work!
+  "pip>=24.3.1",                       # This is needed for some NPM/YARN/PNPM post-install scripts to work!
   "rich-click>=1.8.5",
   "python-dotenv>=1.0.1",
   "giturlparse",
diff --git a/src/codegen/extensions/attribution/3pp/cursor.py b/src/codegen/extensions/attribution/3pp/cursor.py
index 4fb534495..3d1f18046 100644
--- a/src/codegen/extensions/attribution/3pp/cursor.py
+++ b/src/codegen/extensions/attribution/3pp/cursor.py
@@ -577,9 +577,9 @@ async def main():
     if search_results:
         print(f"Found {len(search_results)} results:")
         for i, result in enumerate(search_results[:10]):  # Show first 10 results
-            print(f"  {i+1}. [{result['type']}] {result['chatTitle']} ({result['workspaceName']})")
+            print(f"  {i + 1}. [{result['type']}] {result['chatTitle']} ({result['workspaceName']})")
             # Show a snippet of the matching text
-            matching_text = result['matchingText']
+            matching_text = result["matchingText"]
             if len(matching_text) > 100:
                 matching_text = matching_text[:97] + "..."
             print(f"     Match: {matching_text}")
diff --git a/src/codegen/extensions/attribution/git_history.py b/src/codegen/extensions/attribution/git_history.py
index fc0625c05..42ee0c40a 100644
--- a/src/codegen/extensions/attribution/git_history.py
+++ b/src/codegen/extensions/attribution/git_history.py
@@ -25,7 +25,7 @@ def __init__(self, codebase: Codebase, ai_authors: Optional[list[str]] = None):
         self.repo = pygit2.Repository(self.repo_path)
 
         # Default AI authors if none provided
-        self.ai_authors = ai_authors or ['devin[bot]', 'codegen[bot]']
+        self.ai_authors = ai_authors or ["devin[bot]", "codegen[bot]"]
 
         # Cache structures
         self._file_history = {}  # file path -> list of commit info
@@ -101,11 +101,7 @@ def build_history(self, max_commits: Optional[int] = None) -> None:
 
         if len(self._author_contributions) > 0:
             print("Top contributors:")
-            top_contributors = sorted(
-                [(author, len(commits)) for author, commits in self._author_contributions.items()],
-                key=lambda x: x[1],
-                reverse=True
-            )[:5]
+            top_contributors = sorted([(author, len(commits)) for author, commits in self._author_contributions.items()], key=lambda x: x[1], reverse=True)[:5]
             for author, count in top_contributors:
                 print(f"  • {author}: {count} commits")
         else:
@@ -123,11 +119,11 @@ def _process_commit(self, commit, diff) -> None:
         commit_id = str(commit.id)
 
         commit_info = {
-            'author': author_name,
-            'email': author_email,
-            'timestamp': timestamp,
-            'commit_id': commit_id,
-            'message': commit.message.strip(),
+            "author": author_name,
+            "email": author_email,
+            "timestamp": timestamp,
+            "commit_id": commit_id,
+            "message": commit.message.strip(),
         }
 
         # Track by author
@@ -145,7 +141,7 @@ def _process_commit(self, commit, diff) -> None:
                 self._file_history[file_path] = []
 
             file_commit = commit_info.copy()
-            file_commit['file_path'] = file_path
+            file_commit["file_path"] = file_path
             self._file_history[file_path].append(file_commit)
 
     def _is_tracked_file(self, file_path: str) -> bool:
@@ -155,7 +151,7 @@ def _is_tracked_file(self, file_path: str) -> bool:
 
         # If we can't determine extensions, track common source files
         if not extensions:
-            extensions = ['.py', '.js', '.ts', '.tsx', '.jsx']
+            extensions = [".py", ".js", ".ts", ".tsx", ".jsx"]
 
         return any(file_path.endswith(ext) for ext in extensions)
 
@@ -173,7 +169,7 @@ def map_symbols_to_history(self) -> None:
 
         # For each symbol, find commits that modified its file
         for symbol in self.codebase.symbols:
-            if not hasattr(symbol, 'filepath') or not symbol.filepath:
+            if not hasattr(symbol, "filepath") or not symbol.filepath:
                 continue
 
             symbol_id = f"{symbol.filepath}:{symbol.name}"
@@ -201,7 +197,7 @@ def get_symbol_history(self, symbol: Symbol) -> list[dict]:
         """
         self._ensure_history_built()
 
-        if not hasattr(symbol, 'filepath') or not symbol.filepath:
+        if not hasattr(symbol, "filepath") or not symbol.filepath:
             return []
 
         symbol_id = f"{symbol.filepath}:{symbol.name}"
@@ -221,8 +217,8 @@ def get_symbol_last_editor(self, symbol: Symbol) -> Optional[str]:
             return None
 
         # Sort by timestamp (newest first) and return the author
-        sorted_history = sorted(history, key=lambda x: x['timestamp'], reverse=True)
-        return sorted_history[0]['author']
+        sorted_history = sorted(history, key=lambda x: x["timestamp"], reverse=True)
+        return sorted_history[0]["author"]
 
     def get_ai_contribution_stats(self) -> dict:
         """Get statistics about AI contributions to the codebase.
@@ -239,7 +235,7 @@ def get_ai_contribution_stats(self) -> dict:
         for file_path, commits in self._file_history.items():
             for commit in commits:
                 total_file_commits[file_path] += 1
-                if commit['author'] in self.ai_authors or commit['email'] in self.ai_authors:
+                if commit["author"] in self.ai_authors or commit["email"] in self.ai_authors:
                     ai_file_commits[file_path] += 1
 
         # Find files with highest AI contribution percentage
@@ -249,17 +245,10 @@ def get_ai_contribution_stats(self) -> dict:
                 ai_contribution_percentage[file_path] = (ai_file_commits[file_path] / total) * 100
 
         # Get top files by AI contribution
-        top_ai_files = sorted(
-            ai_contribution_percentage.items(),
-            key=lambda x: x[1],
-            reverse=True
-        )[:20]
+        top_ai_files = sorted(ai_contribution_percentage.items(), key=lambda x: x[1], reverse=True)[:20]
 
         # Count total AI commits
-        ai_commits = sum(
-            len(commits) for author, commits in self._author_contributions.items()
-            if any(name in author for name in self.ai_authors)
-        )
+        ai_commits = sum(len(commits) for author, commits in self._author_contributions.items() if any(name in author for name in self.ai_authors))
 
         total_commits = sum(len(commits) for commits in self._author_contributions.values())
 
@@ -270,12 +259,12 @@ def get_ai_contribution_stats(self) -> dict:
             ai_percentage = 0.0
 
         return {
-            'total_commits': total_commits,
-            'ai_commits': ai_commits,
-            'ai_percentage': ai_percentage,
-            'top_ai_files': top_ai_files,
-            'ai_file_count': len([f for f, p in ai_contribution_percentage.items() if p > 50]),
-            'total_file_count': len(total_file_commits),
+            "total_commits": total_commits,
+            "ai_commits": ai_commits,
+            "ai_percentage": ai_percentage,
+            "top_ai_files": top_ai_files,
+            "ai_file_count": len([f for f, p in ai_contribution_percentage.items() if p > 50]),
+            "total_file_count": len(total_file_commits),
         }
 
     def get_ai_touched_symbols(self) -> list[Symbol]:
@@ -292,10 +281,7 @@ def get_ai_touched_symbols(self) -> list[Symbol]:
             history = self.get_symbol_history(symbol)
 
             # Check if any commit is from an AI author
-            if any(
-                commit['author'] in self.ai_authors or commit['email'] in self.ai_authors
-                for commit in history
-            ):
+            if any(commit["author"] in self.ai_authors or commit["email"] in self.ai_authors for commit in history):
                 ai_symbols.append(symbol)
 
         return ai_symbols
@@ -315,7 +301,7 @@ def get_ai_contribution_timeline(self) -> list[tuple[datetime, int]]:
             if any(name in author for name in self.ai_authors):
                 for commit in commits:
                     # Convert timestamp to year-month
-                    dt = datetime.fromtimestamp(commit['timestamp'])
+                    dt = datetime.fromtimestamp(commit["timestamp"])
                     month_key = f"{dt.year}-{dt.month:02d}"
                     monthly_counts[month_key] += 1
 
diff --git a/src/codegen/extensions/attribution/main.py b/src/codegen/extensions/attribution/main.py
index 5f81169ac..a282fda89 100644
--- a/src/codegen/extensions/attribution/main.py
+++ b/src/codegen/extensions/attribution/main.py
@@ -4,11 +4,7 @@
 from codegen.sdk.core.codebase import Codebase
 
 
-def analyze_ai_impact(
-    codebase: Codebase,
-    ai_authors: Optional[list[str]] = None,
-    max_commits: Optional[int] = None
-) -> dict:
+def analyze_ai_impact(codebase: Codebase, ai_authors: Optional[list[str]] = None, max_commits: Optional[int] = None) -> dict:
     """Analyze the impact of AI on a codebase.
 
     Args:
@@ -33,23 +29,15 @@ def analyze_ai_impact(
     # Find high-impact AI symbols (those with many dependents)
     high_impact_symbols = []
     for symbol in ai_symbols:
-        if hasattr(symbol, 'usages') and len(symbol.usages) > 5:
-            high_impact_symbols.append({
-                'name': symbol.name,
-                'filepath': symbol.filepath,
-                'usage_count': len(symbol.usages),
-                'last_editor': tracker.get_symbol_last_editor(symbol)
-            })
+        if hasattr(symbol, "usages") and len(symbol.usages) > 5:
+            high_impact_symbols.append({"name": symbol.name, "filepath": symbol.filepath, "usage_count": len(symbol.usages), "last_editor": tracker.get_symbol_last_editor(symbol)})
 
     # Sort by usage count
-    high_impact_symbols.sort(key=lambda x: x['usage_count'], reverse=True)
+    high_impact_symbols.sort(key=lambda x: x["usage_count"], reverse=True)
 
     # Get timeline data
     timeline = tracker.get_ai_contribution_timeline()
-    timeline_data = [
-        {'date': dt.strftime('%Y-%m'), 'count': count}
-        for dt, count in timeline
-    ]
+    timeline_data = [{"date": dt.strftime("%Y-%m"), "count": count} for dt, count in timeline]
 
     # Get list of all contributors with commit counts
     contributors = []
@@ -60,12 +48,12 @@ def analyze_ai_impact(
     contributors.sort(key=lambda x: x[1], reverse=True)
 
     return {
-        'stats': stats,
-        'ai_symbol_count': len(ai_symbols),
-        'total_symbol_count': len(list(codebase.symbols)),
-        'high_impact_symbols': high_impact_symbols[:20],  # Top 20
-        'timeline': timeline_data,
-        'contributors': contributors,
+        "stats": stats,
+        "ai_symbol_count": len(ai_symbols),
+        "total_symbol_count": len(list(codebase.symbols)),
+        "high_impact_symbols": high_impact_symbols[:20],  # Top 20
+        "timeline": timeline_data,
+        "contributors": contributors,
     }
 
 
@@ -90,14 +78,12 @@ def add_attribution_to_symbols(codebase: Codebase, ai_authors: Optional[list[str
 
         # Add last editor
         if history:
-            sorted_history = sorted(history, key=lambda x: x['timestamp'], reverse=True)
-            symbol.last_editor = sorted_history[0]['author']
+            sorted_history = sorted(history, key=lambda x: x["timestamp"], reverse=True)
+            symbol.last_editor = sorted_history[0]["author"]
 
             # Add editor history (unique editors)
-            editors = {commit['author'] for commit in history}
+            editors = {commit["author"] for commit in history}
             symbol.editor_history = list(editors)
 
             # Add is_ai_authored flag
-            symbol.is_ai_authored = any(
-                editor in tracker.ai_authors for editor in symbol.editor_history
-            )
+            symbol.is_ai_authored = any(editor in tracker.ai_authors for editor in symbol.editor_history)

From aca6587b822c5118b80515f2d98bac81930bdf1a Mon Sep 17 00:00:00 2001
From: tkucar <tkucar@codegen.com>
Date: Tue, 4 Mar 2025 02:51:06 +0100
Subject: [PATCH 3/7] docs

---
 docs/mint.json                  |   3 +-
 docs/tutorials/attributions.mdx | 194 ++++++++++++++++++++++++++++++++
 2 files changed, 196 insertions(+), 1 deletion(-)
 create mode 100644 docs/tutorials/attributions.mdx

diff --git a/docs/mint.json b/docs/mint.json
index d5062557c..fa74dfea4 100644
--- a/docs/mint.json
+++ b/docs/mint.json
@@ -105,7 +105,8 @@
         "tutorials/python2-to-python3",
         "tutorials/flask-to-fastapi",
         "tutorials/build-mcp",
-        "tutorials/neo4j-graph"
+        "tutorials/neo4j-graph",
+        "tutorials/attributions"
       ]
     },
     {
diff --git a/docs/tutorials/attributions.mdx b/docs/tutorials/attributions.mdx
new file mode 100644
index 000000000..cf86538f9
--- /dev/null
+++ b/docs/tutorials/attributions.mdx
@@ -0,0 +1,194 @@
+---
+title: "Code statistics and attributions"
+sidebarTitle: "Code statistics and attributions"
+description: "Learn how to analyze code statistics and attributions using Codegen"
+icon: "network-wired"
+iconType: "solid"
+---
+
+# AI Impact Analysis
+
+This tutorial shows how to use Codegen's attribution extension to analyze the impact of AI on your 
+codebase. You'll learn how to identify which parts of your code were written by AI tools like 
+GitHub Copilot, Devin, or other AI assistants.
+
+Note: the code is flexible - you can track CI pipeline bots, or any other contributor you want.
+
+
+## Overview
+
+The attribution extension analyzes git history to:
+
+1. Identify which symbols (functions, classes, etc.) were authored or modified by AI tools
+2. Calculate the percentage of AI contributions in your codebase
+3. Find high-impact AI-written code (code that many other parts depend on)
+4. Track the evolution of AI contributions over time
+
+## Installation
+
+The attribution extension is included with Codegen. No additional installation is required.
+
+## Basic Usage
+
+### Running the Analysis
+
+You can run the AI impact analysis using the Codegen CLI:
+
+```bash
+codegen analyze-ai-impact
+```
+
+Or from Python code:
+
+```python
+from codegen import Codebase
+from codegen.extensions.attribution.cli import run
+
+# Initialize codebase from current directory
+codebase = Codebase.from_repo("your-org/your-repo", language="python")
+
+# Run the analysis
+run(codebase)
+```
+
+### Understanding the Results
+
+The analysis will print a summary of AI contributions to your console and save detailed results to a JSON file. The summary includes:
+
+- List of all contributors (human and AI)
+- Percentage of commits made by AI
+- Number of files and symbols touched by AI
+- High-impact AI-written code (code with many dependents)
+- Top files by AI contribution percentage
+
+## Advanced Usage
+
+### Accessing Attribution Information
+
+After running the analysis, each symbol in your codebase will have attribution information attached to it:
+
+```python
+from codegen import Codebase
+from codegen.extensions.attribution.main import add_attribution_to_symbols
+
+# Initialize codebase
+codebase = Codebase.from_repo("your-org/your-repo", language="python")
+
+# Add attribution information to symbols
+ai_authors = ['github-actions[bot]', 'dependabot[bot]', 'copilot[bot]']
+add_attribution_to_symbols(codebase, ai_authors)
+
+# Access attribution information on symbols
+for symbol in codebase.symbols:
+    if hasattr(symbol, 'is_ai_authored') and symbol.is_ai_authored:
+        print(f"AI-authored symbol: {symbol.name} in {symbol.filepath}")
+        print(f"Last editor: {symbol.last_editor}")
+        print(f"All editors: {symbol.editor_history}")
+```
+
+### Customizing AI Author Detection
+
+By default, the analysis looks for common AI bot names in commit authors. 
+You can customize this by providing your own list of AI authors:
+
+```python
+from codegen import Codebase
+from codegen.extensions.attribution.main import analyze_ai_impact
+
+# Initialize codebase
+codebase = Codebase.from_repo("your-org/your-repo", language="python")
+
+# Define custom AI authors
+ai_authors = [
+    'github-actions[bot]',
+    'dependabot[bot]',
+    'copilot[bot]',
+    'devin[bot]',
+    'your-custom-ai-email@example.com'
+]
+
+# Run analysis with custom AI authors
+results = analyze_ai_impact(codebase, ai_authors)
+```
+
+## Example: Contributor Analysis
+
+Here's a complete example that analyzes contributors to your codebase and their impact:
+
+```python
+import os
+from collections import Counter
+
+from codegen import Codebase
+from codegen.extensions.attribution.main import add_attribution_to_symbols
+from codegen.git.repo_operator.repo_operator import RepoOperator
+from codegen.git.schemas.repo_config import RepoConfig
+from codegen.sdk.codebase.config import ProjectConfig
+from codegen.shared.enums.programming_language import ProgrammingLanguage
+
+def analyze_contributors(codebase):
+    """Analyze contributors to the codebase and their impact."""
+    print("\n🔍 Contributor Analysis:")
+    
+    # Define which authors are considered AI
+    ai_authors = ['devin[bot]', 'codegen[bot]', 'github-actions[bot]', 'dependabot[bot]']
+    
+    # Add attribution information to all symbols
+    print("Adding attribution information to symbols...")
+    add_attribution_to_symbols(codebase, ai_authors)
+    
+    # Collect statistics about contributors
+    contributor_stats = Counter()
+    ai_contributor_stats = Counter()
+    
+    print("Analyzing symbol attributions...")
+    for symbol in codebase.symbols:
+        if hasattr(symbol, 'last_editor') and symbol.last_editor:
+            contributor_stats[symbol.last_editor] += 1
+            
+            # Track if this is an AI contributor
+            if any(ai in symbol.last_editor for ai in ai_authors):
+                ai_contributor_stats[symbol.last_editor] += 1
+    
+    # Print top contributors overall
+    print("\n👥 Top Contributors by Symbols Authored:")
+    for contributor, count in contributor_stats.most_common(10):
+        is_ai = any(ai in contributor for ai in ai_authors)
+        ai_indicator = "🤖" if is_ai else "👤"
+        print(f"  {ai_indicator} {contributor}: {count} symbols")
+    
+    # Print top AI contributors if any
+    if ai_contributor_stats:
+        print("\n🤖 Top AI Contributors:")
+        for contributor, count in ai_contributor_stats.most_common(5):
+            print(f"  • {contributor}: {count} symbols")
+
+# Initialize codebase from current directory
+if os.path.exists(".git"):
+    repo_path = os.getcwd()
+    repo_config = RepoConfig.from_repo_path(repo_path)
+    repo_operator = RepoOperator(repo_config=repo_config)
+    
+    project = ProjectConfig.from_repo_operator(
+        repo_operator=repo_operator,
+        programming_language=ProgrammingLanguage.PYTHON
+    )
+    codebase = Codebase(projects=[project])
+    
+    # Run the contributor analysis
+    analyze_contributors(codebase)
+```
+
+## Conclusion
+
+The attribution extension provides valuable insights into how AI tools are being used in your 
+development process. By understanding which parts of your codebase are authored by AI, you can:
+
+- Track the adoption of AI coding assistants in your team
+- Identify areas where AI is most effective
+- Ensure appropriate review of AI-generated code
+- Measure the impact of AI on developer productivity
+
+For more advanced usage, check out the [API reference](/api-reference/extensions/attribution)
+for the attribution extension.
+

From 9d308701712243e0fb657312a7a808e7d0743707 Mon Sep 17 00:00:00 2001
From: tomcodgen <191515280+tomcodgen@users.noreply.github.com>
Date: Tue, 4 Mar 2025 01:52:02 +0000
Subject: [PATCH 4/7] Automated pre-commit update

---
 docs/mint.json | 756 ++++++++++++++++++++++++-------------------------
 1 file changed, 377 insertions(+), 379 deletions(-)

diff --git a/docs/mint.json b/docs/mint.json
index fa74dfea4..435da0a76 100644
--- a/docs/mint.json
+++ b/docs/mint.json
@@ -1,380 +1,378 @@
 {
-  "$schema": "https://mintlify.com/schema.json",
-  "name": "Codegen",
-  "logo": {
-    "dark": "https://cdn.prod.website-files.com/67070304751b9b01bf6a161c/679bcf45a3e32761c42b324b_Codegen_Logomark_Dark.svg",
-    "light": "https://cdn.prod.website-files.com/67070304751b9b01bf6a161c/679bcf45bf55446746125835_Codegen_Logomark_Light.svg"
-  },
-  "modeToggle": {
-    "default": "dark"
-  },
-  "metadata": {
-    "og:site_name": "Codegen",
-    "og:title": "Codegen - Manipulate Code at Scale",
-    "og:description": "A scriptable interface to a powerful, multi-lingual language server built on top of Tree-sitter.",
-    "og:url": "https://docs.codegen.com",
-    "og:locale": "en_US",
-    "og:logo": "https://i.imgur.com/f4OVOqI.png",
-    "article:publisher": "Codegen, Inc.",
-    "twitter:site": "@codegen"
-  },
-  "favicon": "/favicon.svg",
-  "colors": {
-    "primary": "#a277ff",
-    "light": "#a277ff",
-    "dark": "#a277ff",
-    "anchors": {
-      "from": "#61ffca",
-      "to": "#61ffca"
-    }
-  },
-  "theme": "prism",
-  "background": {
-    "style": "gradient"
-  },
-  "analytics": {
-    "posthog": {
-      "apiKey": "phc_GLxaINoQJnuyCyxDmTciQqzdKBYFVDkY7bRBO4bDdso"
-    }
-  },
-  "feedback": {
-    "thumbsRating": true
-  },
-  "topbarCtaButton": {
-    "name": "GitHub",
-    "url": "https://github.com/codegen-sh/codegen-sdk"
-  },
-  "tabs": [
-    {
-      "name": "API Reference",
-      "url": "/api-reference"
-    },
-    {
-      "name": "CLI",
-      "url": "/cli"
-    },
-    {
-      "name": "Blog",
-      "url": "/blog"
-    },
-    {
-      "name": "Changelog",
-      "url": "/changelog"
-    }
-  ],
-  "navigation": [
-    {
-      "group": "Introduction",
-      "pages": [
-        "introduction/overview",
-        "introduction/getting-started",
-        "introduction/installation",
-        "introduction/ide-usage",
-        "introduction/work-with-ai",
-        "introduction/how-it-works",
-        "introduction/guiding-principles",
-        "introduction/community",
-        "introduction/about",
-        "introduction/faq"
-      ]
-    },
-    {
-      "group": "Tutorials",
-      "pages": [
-        "tutorials/at-a-glance",
-        "tutorials/build-code-agent",
-        "tutorials/slack-bot",
-        "tutorials/github-review-bot",
-        "tutorials/deep-code-research",
-        "tutorials/training-data",
-        "tutorials/codebase-visualization",
-        "tutorials/migrating-apis",
-        "tutorials/organize-your-codebase",
-        "tutorials/promise-to-async-await",
-        "tutorials/modularity",
-        "tutorials/manage-feature-flags",
-        "tutorials/deleting-dead-code",
-        "tutorials/increase-type-coverage",
-        "tutorials/managing-typescript-exports",
-        "tutorials/converting-default-exports",
-        "tutorials/creating-documentation",
-        "tutorials/react-modernization",
-        "tutorials/unittest-to-pytest",
-        "tutorials/sqlalchemy-1.6-to-2.0",
-        "tutorials/fixing-import-loops-in-pytorch",
-        "tutorials/python2-to-python3",
-        "tutorials/flask-to-fastapi",
-        "tutorials/build-mcp",
-        "tutorials/neo4j-graph",
-        "tutorials/attributions"
-      ]
-    },
-    {
-      "group": "Building with Codegen",
-      "pages": [
-        "building-with-codegen/at-a-glance",
-        "building-with-codegen/parsing-codebases",
-        "building-with-codegen/reusable-codemods",
-        "building-with-codegen/dot-codegen",
-        "building-with-codegen/function-decorator",
-        "building-with-codegen/language-support",
-        "building-with-codegen/commit-and-reset",
-        "building-with-codegen/git-operations",
-        "building-with-codegen/files-and-directories",
-        "building-with-codegen/the-editable-api",
-        "building-with-codegen/symbol-api",
-        "building-with-codegen/class-api",
-        "building-with-codegen/imports",
-        "building-with-codegen/exports",
-        "building-with-codegen/inheritable-behaviors",
-        "building-with-codegen/statements-and-code-blocks",
-        "building-with-codegen/dependencies-and-usages",
-        "building-with-codegen/function-calls-and-callsites",
-        "building-with-codegen/variable-assignments",
-        "building-with-codegen/local-variables",
-        "building-with-codegen/comments-and-docstrings",
-        "building-with-codegen/external-modules",
-        "building-with-codegen/type-annotations",
-        "building-with-codegen/moving-symbols",
-        "building-with-codegen/collections",
-        "building-with-codegen/traversing-the-call-graph",
-        "building-with-codegen/react-and-jsx",
-        "building-with-codegen/codebase-visualization",
-        "building-with-codegen/flagging-symbols",
-        "building-with-codegen/calling-out-to-llms",
-        "building-with-codegen/semantic-code-search",
-        "building-with-codegen/reducing-conditions"
-      ]
-    },
-    {
-      "group": "CLI",
-      "pages": [
-        "cli/about",
-        "cli/init",
-        "cli/notebook",
-        "cli/create",
-        "cli/run",
-        "cli/reset",
-        "cli/expert"
-      ]
-    },
-    {
-      "group": "Changelog",
-      "pages": [
-        "changelog/changelog"
-      ]
-    },
-    {
-      "group": "Blog",
-      "pages": [
-        "blog/posts",
-        "blog/act-via-code",
-        "blog/promise-to-async-await-twilio",
-        "blog/fixing-import-loops"
-      ]
-    },
-    {
-      "group": "API Reference",
-      "pages": [
-        "api-reference/index",
-        {
-          "group": "Core",
-          "icon": "code",
-          "pages": [
-            "api-reference/core/Argument",
-            "api-reference/core/Assignment",
-            "api-reference/core/AssignmentStatement",
-            "api-reference/core/Attribute",
-            "api-reference/core/AwaitExpression",
-            "api-reference/core/BinaryExpression",
-            "api-reference/core/BlockStatement",
-            "api-reference/core/Boolean",
-            "api-reference/core/Callable",
-            "api-reference/core/CatchStatement",
-            "api-reference/core/ChainedAttribute",
-            "api-reference/core/Class",
-            "api-reference/core/CodeBlock",
-            "api-reference/core/CodeOwner",
-            "api-reference/core/Codebase",
-            "api-reference/core/Comment",
-            "api-reference/core/CommentGroup",
-            "api-reference/core/ComparisonExpression",
-            "api-reference/core/Decorator",
-            "api-reference/core/Dict",
-            "api-reference/core/Directory",
-            "api-reference/core/Editable",
-            "api-reference/core/Export",
-            "api-reference/core/ExportStatement",
-            "api-reference/core/Exportable",
-            "api-reference/core/Expression",
-            "api-reference/core/ExpressionGroup",
-            "api-reference/core/ExpressionStatement",
-            "api-reference/core/ExternalModule",
-            "api-reference/core/File",
-            "api-reference/core/FlagKwargs",
-            "api-reference/core/ForLoopStatement",
-            "api-reference/core/Function",
-            "api-reference/core/FunctionCall",
-            "api-reference/core/GenericType",
-            "api-reference/core/HasBlock",
-            "api-reference/core/HasName",
-            "api-reference/core/HasValue",
-            "api-reference/core/IfBlockStatement",
-            "api-reference/core/Import",
-            "api-reference/core/ImportStatement",
-            "api-reference/core/ImportType",
-            "api-reference/core/Importable",
-            "api-reference/core/Interface",
-            "api-reference/core/List",
-            "api-reference/core/MessageType",
-            "api-reference/core/MultiExpression",
-            "api-reference/core/MultiLineCollection",
-            "api-reference/core/Name",
-            "api-reference/core/NamedType",
-            "api-reference/core/NoneType",
-            "api-reference/core/Number",
-            "api-reference/core/Pair",
-            "api-reference/core/Parameter",
-            "api-reference/core/ParenthesizedExpression",
-            "api-reference/core/Placeholder",
-            "api-reference/core/PlaceholderType",
-            "api-reference/core/RaiseStatement",
-            "api-reference/core/ReturnStatement",
-            "api-reference/core/SourceFile",
-            "api-reference/core/Span",
-            "api-reference/core/Statement",
-            "api-reference/core/StatementType",
-            "api-reference/core/String",
-            "api-reference/core/StubPlaceholder",
-            "api-reference/core/SubscriptExpression",
-            "api-reference/core/SwitchCase",
-            "api-reference/core/SwitchStatement",
-            "api-reference/core/Symbol",
-            "api-reference/core/SymbolGroup",
-            "api-reference/core/SymbolStatement",
-            "api-reference/core/TernaryExpression",
-            "api-reference/core/TryCatchStatement",
-            "api-reference/core/Tuple",
-            "api-reference/core/TupleType",
-            "api-reference/core/Type",
-            "api-reference/core/TypeAlias",
-            "api-reference/core/TypePlaceholder",
-            "api-reference/core/Typeable",
-            "api-reference/core/UnaryExpression",
-            "api-reference/core/UnionType",
-            "api-reference/core/Unpack",
-            "api-reference/core/Unwrappable",
-            "api-reference/core/Usable",
-            "api-reference/core/Usage",
-            "api-reference/core/UsageKind",
-            "api-reference/core/UsageType",
-            "api-reference/core/Value",
-            "api-reference/core/WhileStatement",
-            "api-reference/core/WithStatement"
-          ]
-        },
-        {
-          "group": "Python",
-          "icon": "python",
-          "pages": [
-            "api-reference/python/PyAssignment",
-            "api-reference/python/PyAssignmentStatement",
-            "api-reference/python/PyAttribute",
-            "api-reference/python/PyBlockStatement",
-            "api-reference/python/PyBreakStatement",
-            "api-reference/python/PyCatchStatement",
-            "api-reference/python/PyChainedAttribute",
-            "api-reference/python/PyClass",
-            "api-reference/python/PyCodeBlock",
-            "api-reference/python/PyComment",
-            "api-reference/python/PyCommentGroup",
-            "api-reference/python/PyCommentType",
-            "api-reference/python/PyConditionalExpression",
-            "api-reference/python/PyDecorator",
-            "api-reference/python/PyFile",
-            "api-reference/python/PyForLoopStatement",
-            "api-reference/python/PyFunction",
-            "api-reference/python/PyGenericType",
-            "api-reference/python/PyHasBlock",
-            "api-reference/python/PyIfBlockStatement",
-            "api-reference/python/PyImport",
-            "api-reference/python/PyImportStatement",
-            "api-reference/python/PyMatchCase",
-            "api-reference/python/PyMatchStatement",
-            "api-reference/python/PyNamedType",
-            "api-reference/python/PyParameter",
-            "api-reference/python/PyPassStatement",
-            "api-reference/python/PyReturnTypePlaceholder",
-            "api-reference/python/PyString",
-            "api-reference/python/PySymbol",
-            "api-reference/python/PyTryCatchStatement",
-            "api-reference/python/PyUnionType",
-            "api-reference/python/PyWhileStatement"
-          ]
-        },
-        {
-          "group": "Typescript",
-          "icon": "js",
-          "pages": [
-            "api-reference/typescript/JSXElement",
-            "api-reference/typescript/JSXExpression",
-            "api-reference/typescript/JSXProp",
-            "api-reference/typescript/TSArrayType",
-            "api-reference/typescript/TSAssignment",
-            "api-reference/typescript/TSAssignmentStatement",
-            "api-reference/typescript/TSAttribute",
-            "api-reference/typescript/TSBlockStatement",
-            "api-reference/typescript/TSCatchStatement",
-            "api-reference/typescript/TSChainedAttribute",
-            "api-reference/typescript/TSClass",
-            "api-reference/typescript/TSCodeBlock",
-            "api-reference/typescript/TSComment",
-            "api-reference/typescript/TSCommentGroup",
-            "api-reference/typescript/TSCommentType",
-            "api-reference/typescript/TSConditionalType",
-            "api-reference/typescript/TSConfig",
-            "api-reference/typescript/TSDecorator",
-            "api-reference/typescript/TSDict",
-            "api-reference/typescript/TSEnum",
-            "api-reference/typescript/TSExport",
-            "api-reference/typescript/TSExpressionType",
-            "api-reference/typescript/TSFile",
-            "api-reference/typescript/TSForLoopStatement",
-            "api-reference/typescript/TSFunction",
-            "api-reference/typescript/TSFunctionType",
-            "api-reference/typescript/TSGenericType",
-            "api-reference/typescript/TSHasBlock",
-            "api-reference/typescript/TSIfBlockStatement",
-            "api-reference/typescript/TSImport",
-            "api-reference/typescript/TSImportStatement",
-            "api-reference/typescript/TSInterface",
-            "api-reference/typescript/TSLabeledStatement",
-            "api-reference/typescript/TSLookupType",
-            "api-reference/typescript/TSNamedType",
-            "api-reference/typescript/TSNamespace",
-            "api-reference/typescript/TSObjectType",
-            "api-reference/typescript/TSPair",
-            "api-reference/typescript/TSParameter",
-            "api-reference/typescript/TSQueryType",
-            "api-reference/typescript/TSReadonlyType",
-            "api-reference/typescript/TSReturnTypePlaceholder",
-            "api-reference/typescript/TSString",
-            "api-reference/typescript/TSSwitchCase",
-            "api-reference/typescript/TSSwitchStatement",
-            "api-reference/typescript/TSSymbol",
-            "api-reference/typescript/TSTernaryExpression",
-            "api-reference/typescript/TSTryCatchStatement",
-            "api-reference/typescript/TSTypeAlias",
-            "api-reference/typescript/TSUndefinedType",
-            "api-reference/typescript/TSUnionType",
-            "api-reference/typescript/TSWhileStatement"
-          ]
-        }
-      ]
-    }
-  ],
-  "footerSocials": {
-    "x": "https://x.com/codegen",
-    "linkedin": "https://linkedin.com/company/codegen-dot-com"
-  }
-}
\ No newline at end of file
+	"$schema": "https://mintlify.com/schema.json",
+	"name": "Codegen",
+	"logo": {
+		"dark": "https://cdn.prod.website-files.com/67070304751b9b01bf6a161c/679bcf45a3e32761c42b324b_Codegen_Logomark_Dark.svg",
+		"light": "https://cdn.prod.website-files.com/67070304751b9b01bf6a161c/679bcf45bf55446746125835_Codegen_Logomark_Light.svg"
+	},
+	"modeToggle": {
+		"default": "dark"
+	},
+	"metadata": {
+		"og:site_name": "Codegen",
+		"og:title": "Codegen - Manipulate Code at Scale",
+		"og:description": "A scriptable interface to a powerful, multi-lingual language server built on top of Tree-sitter.",
+		"og:url": "https://docs.codegen.com",
+		"og:locale": "en_US",
+		"og:logo": "https://i.imgur.com/f4OVOqI.png",
+		"article:publisher": "Codegen, Inc.",
+		"twitter:site": "@codegen"
+	},
+	"favicon": "/favicon.svg",
+	"colors": {
+		"primary": "#a277ff",
+		"light": "#a277ff",
+		"dark": "#a277ff",
+		"anchors": {
+			"from": "#61ffca",
+			"to": "#61ffca"
+		}
+	},
+	"theme": "prism",
+	"background": {
+		"style": "gradient"
+	},
+	"analytics": {
+		"posthog": {
+			"apiKey": "phc_GLxaINoQJnuyCyxDmTciQqzdKBYFVDkY7bRBO4bDdso"
+		}
+	},
+	"feedback": {
+		"thumbsRating": true
+	},
+	"topbarCtaButton": {
+		"name": "GitHub",
+		"url": "https://github.com/codegen-sh/codegen-sdk"
+	},
+	"tabs": [
+		{
+			"name": "API Reference",
+			"url": "/api-reference"
+		},
+		{
+			"name": "CLI",
+			"url": "/cli"
+		},
+		{
+			"name": "Blog",
+			"url": "/blog"
+		},
+		{
+			"name": "Changelog",
+			"url": "/changelog"
+		}
+	],
+	"navigation": [
+		{
+			"group": "Introduction",
+			"pages": [
+				"introduction/overview",
+				"introduction/getting-started",
+				"introduction/installation",
+				"introduction/ide-usage",
+				"introduction/work-with-ai",
+				"introduction/how-it-works",
+				"introduction/guiding-principles",
+				"introduction/community",
+				"introduction/about",
+				"introduction/faq"
+			]
+		},
+		{
+			"group": "Tutorials",
+			"pages": [
+				"tutorials/at-a-glance",
+				"tutorials/build-code-agent",
+				"tutorials/slack-bot",
+				"tutorials/github-review-bot",
+				"tutorials/deep-code-research",
+				"tutorials/training-data",
+				"tutorials/codebase-visualization",
+				"tutorials/migrating-apis",
+				"tutorials/organize-your-codebase",
+				"tutorials/promise-to-async-await",
+				"tutorials/modularity",
+				"tutorials/manage-feature-flags",
+				"tutorials/deleting-dead-code",
+				"tutorials/increase-type-coverage",
+				"tutorials/managing-typescript-exports",
+				"tutorials/converting-default-exports",
+				"tutorials/creating-documentation",
+				"tutorials/react-modernization",
+				"tutorials/unittest-to-pytest",
+				"tutorials/sqlalchemy-1.6-to-2.0",
+				"tutorials/fixing-import-loops-in-pytorch",
+				"tutorials/python2-to-python3",
+				"tutorials/flask-to-fastapi",
+				"tutorials/build-mcp",
+				"tutorials/neo4j-graph",
+				"tutorials/attributions"
+			]
+		},
+		{
+			"group": "Building with Codegen",
+			"pages": [
+				"building-with-codegen/at-a-glance",
+				"building-with-codegen/parsing-codebases",
+				"building-with-codegen/reusable-codemods",
+				"building-with-codegen/dot-codegen",
+				"building-with-codegen/function-decorator",
+				"building-with-codegen/language-support",
+				"building-with-codegen/commit-and-reset",
+				"building-with-codegen/git-operations",
+				"building-with-codegen/files-and-directories",
+				"building-with-codegen/the-editable-api",
+				"building-with-codegen/symbol-api",
+				"building-with-codegen/class-api",
+				"building-with-codegen/imports",
+				"building-with-codegen/exports",
+				"building-with-codegen/inheritable-behaviors",
+				"building-with-codegen/statements-and-code-blocks",
+				"building-with-codegen/dependencies-and-usages",
+				"building-with-codegen/function-calls-and-callsites",
+				"building-with-codegen/variable-assignments",
+				"building-with-codegen/local-variables",
+				"building-with-codegen/comments-and-docstrings",
+				"building-with-codegen/external-modules",
+				"building-with-codegen/type-annotations",
+				"building-with-codegen/moving-symbols",
+				"building-with-codegen/collections",
+				"building-with-codegen/traversing-the-call-graph",
+				"building-with-codegen/react-and-jsx",
+				"building-with-codegen/codebase-visualization",
+				"building-with-codegen/flagging-symbols",
+				"building-with-codegen/calling-out-to-llms",
+				"building-with-codegen/semantic-code-search",
+				"building-with-codegen/reducing-conditions"
+			]
+		},
+		{
+			"group": "CLI",
+			"pages": [
+				"cli/about",
+				"cli/init",
+				"cli/notebook",
+				"cli/create",
+				"cli/run",
+				"cli/reset",
+				"cli/expert"
+			]
+		},
+		{
+			"group": "Changelog",
+			"pages": ["changelog/changelog"]
+		},
+		{
+			"group": "Blog",
+			"pages": [
+				"blog/posts",
+				"blog/act-via-code",
+				"blog/promise-to-async-await-twilio",
+				"blog/fixing-import-loops"
+			]
+		},
+		{
+			"group": "API Reference",
+			"pages": [
+				"api-reference/index",
+				{
+					"group": "Core",
+					"icon": "code",
+					"pages": [
+						"api-reference/core/Argument",
+						"api-reference/core/Assignment",
+						"api-reference/core/AssignmentStatement",
+						"api-reference/core/Attribute",
+						"api-reference/core/AwaitExpression",
+						"api-reference/core/BinaryExpression",
+						"api-reference/core/BlockStatement",
+						"api-reference/core/Boolean",
+						"api-reference/core/Callable",
+						"api-reference/core/CatchStatement",
+						"api-reference/core/ChainedAttribute",
+						"api-reference/core/Class",
+						"api-reference/core/CodeBlock",
+						"api-reference/core/CodeOwner",
+						"api-reference/core/Codebase",
+						"api-reference/core/Comment",
+						"api-reference/core/CommentGroup",
+						"api-reference/core/ComparisonExpression",
+						"api-reference/core/Decorator",
+						"api-reference/core/Dict",
+						"api-reference/core/Directory",
+						"api-reference/core/Editable",
+						"api-reference/core/Export",
+						"api-reference/core/ExportStatement",
+						"api-reference/core/Exportable",
+						"api-reference/core/Expression",
+						"api-reference/core/ExpressionGroup",
+						"api-reference/core/ExpressionStatement",
+						"api-reference/core/ExternalModule",
+						"api-reference/core/File",
+						"api-reference/core/FlagKwargs",
+						"api-reference/core/ForLoopStatement",
+						"api-reference/core/Function",
+						"api-reference/core/FunctionCall",
+						"api-reference/core/GenericType",
+						"api-reference/core/HasBlock",
+						"api-reference/core/HasName",
+						"api-reference/core/HasValue",
+						"api-reference/core/IfBlockStatement",
+						"api-reference/core/Import",
+						"api-reference/core/ImportStatement",
+						"api-reference/core/ImportType",
+						"api-reference/core/Importable",
+						"api-reference/core/Interface",
+						"api-reference/core/List",
+						"api-reference/core/MessageType",
+						"api-reference/core/MultiExpression",
+						"api-reference/core/MultiLineCollection",
+						"api-reference/core/Name",
+						"api-reference/core/NamedType",
+						"api-reference/core/NoneType",
+						"api-reference/core/Number",
+						"api-reference/core/Pair",
+						"api-reference/core/Parameter",
+						"api-reference/core/ParenthesizedExpression",
+						"api-reference/core/Placeholder",
+						"api-reference/core/PlaceholderType",
+						"api-reference/core/RaiseStatement",
+						"api-reference/core/ReturnStatement",
+						"api-reference/core/SourceFile",
+						"api-reference/core/Span",
+						"api-reference/core/Statement",
+						"api-reference/core/StatementType",
+						"api-reference/core/String",
+						"api-reference/core/StubPlaceholder",
+						"api-reference/core/SubscriptExpression",
+						"api-reference/core/SwitchCase",
+						"api-reference/core/SwitchStatement",
+						"api-reference/core/Symbol",
+						"api-reference/core/SymbolGroup",
+						"api-reference/core/SymbolStatement",
+						"api-reference/core/TernaryExpression",
+						"api-reference/core/TryCatchStatement",
+						"api-reference/core/Tuple",
+						"api-reference/core/TupleType",
+						"api-reference/core/Type",
+						"api-reference/core/TypeAlias",
+						"api-reference/core/TypePlaceholder",
+						"api-reference/core/Typeable",
+						"api-reference/core/UnaryExpression",
+						"api-reference/core/UnionType",
+						"api-reference/core/Unpack",
+						"api-reference/core/Unwrappable",
+						"api-reference/core/Usable",
+						"api-reference/core/Usage",
+						"api-reference/core/UsageKind",
+						"api-reference/core/UsageType",
+						"api-reference/core/Value",
+						"api-reference/core/WhileStatement",
+						"api-reference/core/WithStatement"
+					]
+				},
+				{
+					"group": "Python",
+					"icon": "python",
+					"pages": [
+						"api-reference/python/PyAssignment",
+						"api-reference/python/PyAssignmentStatement",
+						"api-reference/python/PyAttribute",
+						"api-reference/python/PyBlockStatement",
+						"api-reference/python/PyBreakStatement",
+						"api-reference/python/PyCatchStatement",
+						"api-reference/python/PyChainedAttribute",
+						"api-reference/python/PyClass",
+						"api-reference/python/PyCodeBlock",
+						"api-reference/python/PyComment",
+						"api-reference/python/PyCommentGroup",
+						"api-reference/python/PyCommentType",
+						"api-reference/python/PyConditionalExpression",
+						"api-reference/python/PyDecorator",
+						"api-reference/python/PyFile",
+						"api-reference/python/PyForLoopStatement",
+						"api-reference/python/PyFunction",
+						"api-reference/python/PyGenericType",
+						"api-reference/python/PyHasBlock",
+						"api-reference/python/PyIfBlockStatement",
+						"api-reference/python/PyImport",
+						"api-reference/python/PyImportStatement",
+						"api-reference/python/PyMatchCase",
+						"api-reference/python/PyMatchStatement",
+						"api-reference/python/PyNamedType",
+						"api-reference/python/PyParameter",
+						"api-reference/python/PyPassStatement",
+						"api-reference/python/PyReturnTypePlaceholder",
+						"api-reference/python/PyString",
+						"api-reference/python/PySymbol",
+						"api-reference/python/PyTryCatchStatement",
+						"api-reference/python/PyUnionType",
+						"api-reference/python/PyWhileStatement"
+					]
+				},
+				{
+					"group": "Typescript",
+					"icon": "js",
+					"pages": [
+						"api-reference/typescript/JSXElement",
+						"api-reference/typescript/JSXExpression",
+						"api-reference/typescript/JSXProp",
+						"api-reference/typescript/TSArrayType",
+						"api-reference/typescript/TSAssignment",
+						"api-reference/typescript/TSAssignmentStatement",
+						"api-reference/typescript/TSAttribute",
+						"api-reference/typescript/TSBlockStatement",
+						"api-reference/typescript/TSCatchStatement",
+						"api-reference/typescript/TSChainedAttribute",
+						"api-reference/typescript/TSClass",
+						"api-reference/typescript/TSCodeBlock",
+						"api-reference/typescript/TSComment",
+						"api-reference/typescript/TSCommentGroup",
+						"api-reference/typescript/TSCommentType",
+						"api-reference/typescript/TSConditionalType",
+						"api-reference/typescript/TSConfig",
+						"api-reference/typescript/TSDecorator",
+						"api-reference/typescript/TSDict",
+						"api-reference/typescript/TSEnum",
+						"api-reference/typescript/TSExport",
+						"api-reference/typescript/TSExpressionType",
+						"api-reference/typescript/TSFile",
+						"api-reference/typescript/TSForLoopStatement",
+						"api-reference/typescript/TSFunction",
+						"api-reference/typescript/TSFunctionType",
+						"api-reference/typescript/TSGenericType",
+						"api-reference/typescript/TSHasBlock",
+						"api-reference/typescript/TSIfBlockStatement",
+						"api-reference/typescript/TSImport",
+						"api-reference/typescript/TSImportStatement",
+						"api-reference/typescript/TSInterface",
+						"api-reference/typescript/TSLabeledStatement",
+						"api-reference/typescript/TSLookupType",
+						"api-reference/typescript/TSNamedType",
+						"api-reference/typescript/TSNamespace",
+						"api-reference/typescript/TSObjectType",
+						"api-reference/typescript/TSPair",
+						"api-reference/typescript/TSParameter",
+						"api-reference/typescript/TSQueryType",
+						"api-reference/typescript/TSReadonlyType",
+						"api-reference/typescript/TSReturnTypePlaceholder",
+						"api-reference/typescript/TSString",
+						"api-reference/typescript/TSSwitchCase",
+						"api-reference/typescript/TSSwitchStatement",
+						"api-reference/typescript/TSSymbol",
+						"api-reference/typescript/TSTernaryExpression",
+						"api-reference/typescript/TSTryCatchStatement",
+						"api-reference/typescript/TSTypeAlias",
+						"api-reference/typescript/TSUndefinedType",
+						"api-reference/typescript/TSUnionType",
+						"api-reference/typescript/TSWhileStatement"
+					]
+				}
+			]
+		}
+	],
+	"footerSocials": {
+		"x": "https://x.com/codegen",
+		"linkedin": "https://linkedin.com/company/codegen-dot-com"
+	}
+}

From 56fbca6491cc80c6627f9340a3d7f37dc2b53f40 Mon Sep 17 00:00:00 2001
From: tkucar <tkucar@codegen.com>
Date: Fri, 7 Mar 2025 20:10:25 +0100
Subject: [PATCH 5/7] warning: this drops changes that are not commited! to be
 addressed in next revision

---
 .../extensions/attribution/git_history.py     | 140 +++++++++++++++---
 1 file changed, 119 insertions(+), 21 deletions(-)

diff --git a/src/codegen/extensions/attribution/git_history.py b/src/codegen/extensions/attribution/git_history.py
index 42ee0c40a..c66fa5f07 100644
--- a/src/codegen/extensions/attribution/git_history.py
+++ b/src/codegen/extensions/attribution/git_history.py
@@ -1,11 +1,15 @@
 import time
-from collections import defaultdict
+from collections import defaultdict, deque
 from datetime import datetime
 from typing import Optional
 
 import pygit2
+from intervaltree import IntervalTree
+from pygit2 import Commit, Patch
+from pygit2.enums import CheckoutStrategy, DeltaStatus, SortMode
 
 from codegen.sdk.core.codebase import Codebase
+from codegen.sdk.core.file import SourceFile
 from codegen.sdk.core.symbol import Symbol
 
 
@@ -23,18 +27,23 @@ def __init__(self, codebase: Codebase, ai_authors: Optional[list[str]] = None):
         self.codebase = codebase
         self.repo_path = codebase.ctx.projects[0].repo_operator.repo_path
         self.repo = pygit2.Repository(self.repo_path)
+        self.org_branch_reference = self.repo.head
 
         # Default AI authors if none provided
         self.ai_authors = ai_authors or ["devin[bot]", "codegen[bot]"]
 
         # Cache structures
         self._file_history = {}  # file path -> list of commit info
-        self._symbol_history = {}  # symbol id -> list of commit info
+        self._symbol_history:defaultdict[str,list] = defaultdict(list)  # symbol id -> list of commit info
         self._author_contributions = defaultdict(list)  # author -> list of commit info
 
         # Track if history has been built
         self._history_built = False
 
+        self._file_symbol_location_state:dict[str,IntervalTree] = {}
+
+        self._commits:deque[Commit]
+
     def build_history(self, max_commits: Optional[int] = None) -> None:
         """Build the git history for the codebase.
 
@@ -57,12 +66,13 @@ def build_history(self, max_commits: Optional[int] = None) -> None:
         commit_count = 0
         author_set = set()
 
+        self._commits=deque()
         try:
-            for commit in self.repo.walk(self.repo.head.target, pygit2.GIT_SORT_TIME):
+            for commit in self.repo.walk(self.repo.head.target, SortMode.TIME):
                 # Track unique authors
                 author_id = f"{commit.author.name} <{commit.author.email}>"
                 author_set.add(author_id)
-
+                self._commits.append(commit)
                 # Process each diff in the commit
                 if len(commit.parents) > 0:
                     try:
@@ -144,6 +154,35 @@ def _process_commit(self, commit, diff) -> None:
             file_commit["file_path"] = file_path
             self._file_history[file_path].append(file_commit)
 
+
+    def _process_symbol_location_state(self, filepaths:list[str]):
+        for filepath in filepaths:
+            file = self.codebase.get_file(filepath)
+            filetree = IntervalTree()
+            try:
+                for symbol in file.symbols:
+                    symbol:Symbol
+                    start_line=symbol.range.start_point.row+1 # 1 Indexing
+                    end_line=symbol.range.end_point.row+2 # Intervaltree is end non-inclusive
+                    filetree.addi(start_line,end_line,symbol)
+            except Exception as e:
+                pass
+            self._file_symbol_location_state[filepath] = filetree
+
+    def _get_symbols_affected_by_patch(self,patch:Patch,filepath):
+        if filepath not in self._file_symbol_location_state:
+            return []
+        symbols_affected=set()
+        for hunk in patch.hunks:
+            start = hunk.new_start
+            end = start+hunk.new_lines # Intervaltree is end non-inclusive
+            for interval in self._file_symbol_location_state[filepath].overlap(start,end):
+                symbols_affected.add(interval[2])
+
+        return symbols_affected
+
+
+
     def _is_tracked_file(self, file_path: str) -> bool:
         """Check if a file should be tracked based on extension."""
         # Get file extensions from the codebase
@@ -160,31 +199,90 @@ def _ensure_history_built(self) -> None:
         if not self._history_built:
             self.build_history()
 
-    def map_symbols_to_history(self) -> None:
-        """Map symbols in the codebase to their git history."""
+    def map_symbols_to_history(self,force=False) -> None:
+        """Map symbols in the codebase to their git history. force ensures a rerun even if data is already found!"""
         self._ensure_history_built()
+        if self._symbol_history:
+            print("Already built, run with force if you want to rerun anyway!")
+            return
 
         print("Mapping symbols to git history...")
         start_time = time.time()
 
-        # For each symbol, find commits that modified its file
-        for symbol in self.codebase.symbols:
-            if not hasattr(symbol, "filepath") or not symbol.filepath:
-                continue
-
-            symbol_id = f"{symbol.filepath}:{symbol.name}"
-            self._symbol_history[symbol_id] = []
-
-            # Get file history
-            file_history = self._file_history.get(symbol.filepath, [])
+        print("Turning off graph mapping!")
 
-            # For now, just associate all file changes with the symbol
-            # A more sophisticated approach would use line ranges
-            for commit in file_history:
-                self._symbol_history[symbol_id].append(commit)
+        print("Generating initial symbol state...")
+        filepaths = [file.filepath for file in self.codebase.files]
+        self._process_symbol_location_state(filepaths)
 
         elapsed = time.time() - start_time
-        print(f"Finished mapping symbols in {elapsed:.2f} seconds.")
+        print(f"Finished initial symbol state generation in {elapsed:.2f} seconds.")
+        symbol_tracking_checkpoint=time.time()
+        try:
+            print("Starting symbol tracking procedure....")
+            for commit in self._commits:
+                author_name = commit.author.name
+                author_email = commit.author.email
+                timestamp = commit.author.time
+                commit_id = str(commit.id)
+
+                commit_info = {
+                    "author": author_name,
+                    "email": author_email,
+                    "timestamp": timestamp,
+                    "commit_id": commit_id,
+                    "message": commit.message.strip(),
+                }
+                commit_previous = commit.parents[0] if commit.parents else None
+                if not commit_previous:
+                    #If Last commit
+                    empty_tree_old = self.repo.TreeBuilder().write()
+                    empty_tree=self.repo.get(empty_tree_old)
+                    diff = self.repo.diff(empty_tree,commit.tree)
+                else:
+                    diff = self.repo.diff(commit_previous, commit,context_lines=0) #We don't need context lines
+
+                if isinstance(diff,Patch):
+                    diff=[diff]
+                sync_past_filepaths=[] #Files to sync in the past commit
+                for patch in diff:
+                    filepath=patch.delta.new_file.path
+                    if not self._is_tracked_file(filepath):
+                        continue #Ignore files we don't track
+                    if not patch.delta.status==DeltaStatus.ADDED: #Reversed since we're going backwards, if it doesn't exist in the past commits don't sync!
+                        sync_past_filepaths.append(filepath)
+                    symbols_affected = self._get_symbols_affected_by_patch(patch,filepath)
+                    for symbol in symbols_affected:
+                        symbol_id = f"{symbol.filepath}:{symbol.name}" #For future stuff might want to do this more neatly and allow for future dead symbols/renames
+                        self._symbol_history[symbol_id].append(commit_info)
+
+                if commit_previous:
+                    #If not last commit
+                    self.repo.checkout_tree(commit_previous,strategy=CheckoutStrategy.FORCE)
+                    self.repo.set_head(commit_previous.id)
+                    files = [self.codebase.get_file(fp) for fp in sync_past_filepaths]
+                    exclude_state_files=[]
+                    for file in files:
+                        if not isinstance(file,SourceFile):
+                            #What kind of pyfiles are not source files? To investigate!
+                            exclude_state_files.append(file.filepath)
+                            continue
+                        file.sync_with_file_content()
+                    self._process_symbol_location_state([fp for fp in sync_past_filepaths if fp not in exclude_state_files])
+
+        finally:
+            print("Finished, restoring git repo state...")
+            self.repo.checkout(self.org_branch_reference,strategy=CheckoutStrategy.FORCE)
+
+        print(f"Restored, newest commit id in repo is {self.repo.revparse_single(self.org_branch_reference.name).id}")
+
+
+
+        end_time = time.time()
+        elapsed_total = end_time - start_time
+        elapsed_symbol_tracking = end_time-symbol_tracking_checkpoint
+        print(f"Finished symbol tracking in {elapsed_symbol_tracking:.2f} seconds.")
+        print(f"Finished mapping symbols in {elapsed_total:.2f} seconds.")
 
     def get_symbol_history(self, symbol: Symbol) -> list[dict]:
         """Get the edit history for a symbol.

From 7d6ecd14318a2690de65d5e8ea84d467ead9028a Mon Sep 17 00:00:00 2001
From: tomcodegen <kucar.tomislav@gmail.com>
Date: Mon, 10 Mar 2025 11:03:06 -0700
Subject: [PATCH 6/7] stash before run

---
 .../extensions/attribution/git_history.py     | 37 +++++++++++++++++--
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/src/codegen/extensions/attribution/git_history.py b/src/codegen/extensions/attribution/git_history.py
index c66fa5f07..06450c32f 100644
--- a/src/codegen/extensions/attribution/git_history.py
+++ b/src/codegen/extensions/attribution/git_history.py
@@ -28,7 +28,6 @@ def __init__(self, codebase: Codebase, ai_authors: Optional[list[str]] = None):
         self.repo_path = codebase.ctx.projects[0].repo_operator.repo_path
         self.repo = pygit2.Repository(self.repo_path)
         self.org_branch_reference = self.repo.head
-
         # Default AI authors if none provided
         self.ai_authors = ai_authors or ["devin[bot]", "codegen[bot]"]
 
@@ -209,7 +208,19 @@ def map_symbols_to_history(self,force=False) -> None:
         print("Mapping symbols to git history...")
         start_time = time.time()
 
-        print("Turning off graph mapping!")
+
+
+        print("Stashing any working directory changes...")
+        stash_msg = f"Codegen Attribution Stash @ {datetime.now().timestamp()}"
+        stash_id=None
+        try:
+            stash_id = self.repo.stash(self.repo.default_signature,stash_msg,include_untracked=True)
+            print("Stashed!")
+        except KeyError as e:
+            print("Nothing to stash, proceeding.....")
+        except Exception as e:
+            print("Error encountered attempting to stash the current working state, stopping to preserve work, please manually clean the working directory and try again!")
+            raise(e)
 
         print("Generating initial symbol state...")
         filepaths = [file.filepath for file in self.codebase.files]
@@ -274,7 +285,27 @@ def map_symbols_to_history(self,force=False) -> None:
             print("Finished, restoring git repo state...")
             self.repo.checkout(self.org_branch_reference,strategy=CheckoutStrategy.FORCE)
 
-        print(f"Restored, newest commit id in repo is {self.repo.revparse_single(self.org_branch_reference.name).id}")
+            print(f"Restored to latest commit, newest commit id in repo is {self.repo.revparse_single(self.org_branch_reference.name).id}")
+
+            if stash_id:
+                #Restoring Working Directory
+                print("Restoring working directory changes...")
+                found_stash=None
+                for idx,stash in enumerate(self.repo.listall_stashes()):
+                    if stash_msg in stash.message:
+                        found_stash=idx
+                        break
+                if found_stash==0:
+                    print("Applying stash..")
+                    self.repo.stash_apply(0,reinstate_index=True)
+                    print("Applied Stash")
+                    self.repo.stash_drop(0)
+                    print("Stash Removed!")
+                else:
+                    print("Another stash occured in the meantime,please handle stash resotration manually")
+                    print(f"Codebase stash index:{found_stash}")
+                    print(f"Codebase stash msg:{stash_msg}")
+                    print(f"Codebase stash oid:{stash_id}")
 
 
 

From c5ad1cd824d2aa80236bd7510a04cf1a4597512c Mon Sep 17 00:00:00 2001
From: vishalshenoy <34020235+vishalshenoy@users.noreply.github.com>
Date: Tue, 11 Mar 2025 20:53:11 +0000
Subject: [PATCH 7/7] Automated pre-commit update

---
 docs/mint.json                                | 756 +++++++++---------
 .../extensions/attribution/git_history.py     |  93 +--
 2 files changed, 420 insertions(+), 429 deletions(-)

diff --git a/docs/mint.json b/docs/mint.json
index ab4b0c49a..cec15a787 100644
--- a/docs/mint.json
+++ b/docs/mint.json
@@ -1,381 +1,379 @@
 {
-  "$schema": "https://mintlify.com/schema.json",
-  "name": "Codegen",
-  "logo": {
-    "dark": "https://cdn.prod.website-files.com/67070304751b9b01bf6a161c/679bcf45a3e32761c42b324b_Codegen_Logomark_Dark.svg",
-    "light": "https://cdn.prod.website-files.com/67070304751b9b01bf6a161c/679bcf45bf55446746125835_Codegen_Logomark_Light.svg"
-  },
-  "modeToggle": {
-    "default": "dark"
-  },
-  "metadata": {
-    "og:site_name": "Codegen",
-    "og:title": "Codegen - Manipulate Code at Scale",
-    "og:description": "A scriptable interface to a powerful, multi-lingual language server built on top of Tree-sitter.",
-    "og:url": "https://docs.codegen.com",
-    "og:locale": "en_US",
-    "og:logo": "https://i.imgur.com/f4OVOqI.png",
-    "article:publisher": "Codegen, Inc.",
-    "twitter:site": "@codegen"
-  },
-  "favicon": "/favicon.svg",
-  "colors": {
-    "primary": "#a277ff",
-    "light": "#a277ff",
-    "dark": "#a277ff",
-    "anchors": {
-      "from": "#61ffca",
-      "to": "#61ffca"
-    }
-  },
-  "theme": "prism",
-  "background": {
-    "style": "gradient"
-  },
-  "analytics": {
-    "posthog": {
-      "apiKey": "phc_GLxaINoQJnuyCyxDmTciQqzdKBYFVDkY7bRBO4bDdso"
-    }
-  },
-  "feedback": {
-    "thumbsRating": true
-  },
-  "topbarCtaButton": {
-    "name": "GitHub",
-    "url": "https://github.com/codegen-sh/codegen-sdk"
-  },
-  "tabs": [
-    {
-      "name": "API Reference",
-      "url": "/api-reference"
-    },
-    {
-      "name": "CLI",
-      "url": "/cli"
-    },
-    {
-      "name": "Blog",
-      "url": "/blog"
-    },
-    {
-      "name": "Changelog",
-      "url": "/changelog"
-    }
-  ],
-  "navigation": [
-    {
-      "group": "Introduction",
-      "pages": [
-        "introduction/overview",
-        "introduction/getting-started",
-        "introduction/installation",
-        "introduction/ide-usage",
-        "introduction/work-with-ai",
-        "introduction/how-it-works",
-        "introduction/advanced-settings",
-        "introduction/guiding-principles",
-        "introduction/community",
-        "introduction/about",
-        "introduction/faq"
-      ]
-    },
-    {
-      "group": "Tutorials",
-      "pages": [
-        "tutorials/at-a-glance",
-        "tutorials/build-code-agent",
-        "tutorials/slack-bot",
-        "tutorials/github-review-bot",
-        "tutorials/deep-code-research",
-        "tutorials/training-data",
-        "tutorials/codebase-visualization",
-        "tutorials/migrating-apis",
-        "tutorials/organize-your-codebase",
-        "tutorials/promise-to-async-await",
-        "tutorials/modularity",
-        "tutorials/manage-feature-flags",
-        "tutorials/deleting-dead-code",
-        "tutorials/increase-type-coverage",
-        "tutorials/managing-typescript-exports",
-        "tutorials/converting-default-exports",
-        "tutorials/creating-documentation",
-        "tutorials/react-modernization",
-        "tutorials/unittest-to-pytest",
-        "tutorials/sqlalchemy-1.6-to-2.0",
-        "tutorials/fixing-import-loops-in-pytorch",
-        "tutorials/python2-to-python3",
-        "tutorials/flask-to-fastapi",
-        "tutorials/build-mcp",
-        "tutorials/neo4j-graph"
-      ]
-    },
-    {
-      "group": "Building with Codegen",
-      "pages": [
-        "building-with-codegen/at-a-glance",
-        "building-with-codegen/parsing-codebases",
-        "building-with-codegen/reusable-codemods",
-        "building-with-codegen/dot-codegen",
-        "building-with-codegen/function-decorator",
-        "building-with-codegen/language-support",
-        "building-with-codegen/commit-and-reset",
-        "building-with-codegen/git-operations",
-        "building-with-codegen/files-and-directories",
-        "building-with-codegen/the-editable-api",
-        "building-with-codegen/symbol-api",
-        "building-with-codegen/class-api",
-        "building-with-codegen/imports",
-        "building-with-codegen/exports",
-        "building-with-codegen/inheritable-behaviors",
-        "building-with-codegen/statements-and-code-blocks",
-        "building-with-codegen/dependencies-and-usages",
-        "building-with-codegen/function-calls-and-callsites",
-        "building-with-codegen/variable-assignments",
-        "building-with-codegen/local-variables",
-        "building-with-codegen/comments-and-docstrings",
-        "building-with-codegen/external-modules",
-        "building-with-codegen/type-annotations",
-        "building-with-codegen/moving-symbols",
-        "building-with-codegen/collections",
-        "building-with-codegen/traversing-the-call-graph",
-        "building-with-codegen/react-and-jsx",
-        "building-with-codegen/codebase-visualization",
-        "building-with-codegen/flagging-symbols",
-        "building-with-codegen/calling-out-to-llms",
-        "building-with-codegen/semantic-code-search",
-        "building-with-codegen/reducing-conditions"
-      ]
-    },
-    {
-      "group": "CLI",
-      "pages": [
-        "cli/about",
-        "cli/init",
-        "cli/notebook",
-        "cli/create",
-        "cli/run",
-        "cli/reset",
-        "cli/expert"
-      ]
-    },
-    {
-      "group": "Changelog",
-      "pages": [
-        "changelog/changelog"
-      ]
-    },
-    {
-      "group": "Blog",
-      "pages": [
-        "blog/posts",
-        "blog/devin",
-        "blog/act-via-code",
-        "blog/promise-to-async-await-twilio",
-        "blog/fixing-import-loops"
-      ]
-    },
-    {
-      "group": "API Reference",
-      "pages": [
-        "api-reference/index",
-        {
-          "group": "Core",
-          "icon": "code",
-          "pages": [
-            "api-reference/core/Argument",
-            "api-reference/core/Assignment",
-            "api-reference/core/AssignmentStatement",
-            "api-reference/core/Attribute",
-            "api-reference/core/AwaitExpression",
-            "api-reference/core/BinaryExpression",
-            "api-reference/core/BlockStatement",
-            "api-reference/core/Boolean",
-            "api-reference/core/Callable",
-            "api-reference/core/CatchStatement",
-            "api-reference/core/ChainedAttribute",
-            "api-reference/core/Class",
-            "api-reference/core/CodeBlock",
-            "api-reference/core/CodeOwner",
-            "api-reference/core/Codebase",
-            "api-reference/core/Comment",
-            "api-reference/core/CommentGroup",
-            "api-reference/core/ComparisonExpression",
-            "api-reference/core/Decorator",
-            "api-reference/core/Dict",
-            "api-reference/core/Directory",
-            "api-reference/core/Editable",
-            "api-reference/core/Export",
-            "api-reference/core/ExportStatement",
-            "api-reference/core/Exportable",
-            "api-reference/core/Expression",
-            "api-reference/core/ExpressionGroup",
-            "api-reference/core/ExpressionStatement",
-            "api-reference/core/ExternalModule",
-            "api-reference/core/File",
-            "api-reference/core/FlagKwargs",
-            "api-reference/core/ForLoopStatement",
-            "api-reference/core/Function",
-            "api-reference/core/FunctionCall",
-            "api-reference/core/GenericType",
-            "api-reference/core/HasBlock",
-            "api-reference/core/HasName",
-            "api-reference/core/HasValue",
-            "api-reference/core/IfBlockStatement",
-            "api-reference/core/Import",
-            "api-reference/core/ImportStatement",
-            "api-reference/core/ImportType",
-            "api-reference/core/Importable",
-            "api-reference/core/Interface",
-            "api-reference/core/List",
-            "api-reference/core/MessageType",
-            "api-reference/core/MultiExpression",
-            "api-reference/core/MultiLineCollection",
-            "api-reference/core/Name",
-            "api-reference/core/NamedType",
-            "api-reference/core/NoneType",
-            "api-reference/core/Number",
-            "api-reference/core/Pair",
-            "api-reference/core/Parameter",
-            "api-reference/core/ParenthesizedExpression",
-            "api-reference/core/Placeholder",
-            "api-reference/core/PlaceholderType",
-            "api-reference/core/RaiseStatement",
-            "api-reference/core/ReturnStatement",
-            "api-reference/core/SourceFile",
-            "api-reference/core/Span",
-            "api-reference/core/Statement",
-            "api-reference/core/StatementType",
-            "api-reference/core/String",
-            "api-reference/core/StubPlaceholder",
-            "api-reference/core/SubscriptExpression",
-            "api-reference/core/SwitchCase",
-            "api-reference/core/SwitchStatement",
-            "api-reference/core/Symbol",
-            "api-reference/core/SymbolGroup",
-            "api-reference/core/SymbolStatement",
-            "api-reference/core/TernaryExpression",
-            "api-reference/core/TryCatchStatement",
-            "api-reference/core/Tuple",
-            "api-reference/core/TupleType",
-            "api-reference/core/Type",
-            "api-reference/core/TypeAlias",
-            "api-reference/core/TypePlaceholder",
-            "api-reference/core/Typeable",
-            "api-reference/core/UnaryExpression",
-            "api-reference/core/UnionType",
-            "api-reference/core/Unpack",
-            "api-reference/core/Unwrappable",
-            "api-reference/core/Usable",
-            "api-reference/core/Usage",
-            "api-reference/core/UsageKind",
-            "api-reference/core/UsageType",
-            "api-reference/core/Value",
-            "api-reference/core/WhileStatement",
-            "api-reference/core/WithStatement"
-          ]
-        },
-        {
-          "group": "Python",
-          "icon": "python",
-          "pages": [
-            "api-reference/python/PyAssignment",
-            "api-reference/python/PyAssignmentStatement",
-            "api-reference/python/PyAttribute",
-            "api-reference/python/PyBlockStatement",
-            "api-reference/python/PyBreakStatement",
-            "api-reference/python/PyCatchStatement",
-            "api-reference/python/PyChainedAttribute",
-            "api-reference/python/PyClass",
-            "api-reference/python/PyCodeBlock",
-            "api-reference/python/PyComment",
-            "api-reference/python/PyCommentGroup",
-            "api-reference/python/PyCommentType",
-            "api-reference/python/PyConditionalExpression",
-            "api-reference/python/PyDecorator",
-            "api-reference/python/PyFile",
-            "api-reference/python/PyForLoopStatement",
-            "api-reference/python/PyFunction",
-            "api-reference/python/PyGenericType",
-            "api-reference/python/PyHasBlock",
-            "api-reference/python/PyIfBlockStatement",
-            "api-reference/python/PyImport",
-            "api-reference/python/PyImportStatement",
-            "api-reference/python/PyMatchCase",
-            "api-reference/python/PyMatchStatement",
-            "api-reference/python/PyNamedType",
-            "api-reference/python/PyParameter",
-            "api-reference/python/PyPassStatement",
-            "api-reference/python/PyReturnTypePlaceholder",
-            "api-reference/python/PyString",
-            "api-reference/python/PySymbol",
-            "api-reference/python/PyTryCatchStatement",
-            "api-reference/python/PyUnionType",
-            "api-reference/python/PyWhileStatement"
-          ]
-        },
-        {
-          "group": "Typescript",
-          "icon": "js",
-          "pages": [
-            "api-reference/typescript/JSXElement",
-            "api-reference/typescript/JSXExpression",
-            "api-reference/typescript/JSXProp",
-            "api-reference/typescript/TSArrayType",
-            "api-reference/typescript/TSAssignment",
-            "api-reference/typescript/TSAssignmentStatement",
-            "api-reference/typescript/TSAttribute",
-            "api-reference/typescript/TSBlockStatement",
-            "api-reference/typescript/TSCatchStatement",
-            "api-reference/typescript/TSChainedAttribute",
-            "api-reference/typescript/TSClass",
-            "api-reference/typescript/TSCodeBlock",
-            "api-reference/typescript/TSComment",
-            "api-reference/typescript/TSCommentGroup",
-            "api-reference/typescript/TSCommentType",
-            "api-reference/typescript/TSConditionalType",
-            "api-reference/typescript/TSConfig",
-            "api-reference/typescript/TSDecorator",
-            "api-reference/typescript/TSDict",
-            "api-reference/typescript/TSEnum",
-            "api-reference/typescript/TSExport",
-            "api-reference/typescript/TSExpressionType",
-            "api-reference/typescript/TSFile",
-            "api-reference/typescript/TSForLoopStatement",
-            "api-reference/typescript/TSFunction",
-            "api-reference/typescript/TSFunctionType",
-            "api-reference/typescript/TSGenericType",
-            "api-reference/typescript/TSHasBlock",
-            "api-reference/typescript/TSIfBlockStatement",
-            "api-reference/typescript/TSImport",
-            "api-reference/typescript/TSImportStatement",
-            "api-reference/typescript/TSInterface",
-            "api-reference/typescript/TSLabeledStatement",
-            "api-reference/typescript/TSLookupType",
-            "api-reference/typescript/TSNamedType",
-            "api-reference/typescript/TSNamespace",
-            "api-reference/typescript/TSObjectType",
-            "api-reference/typescript/TSPair",
-            "api-reference/typescript/TSParameter",
-            "api-reference/typescript/TSQueryType",
-            "api-reference/typescript/TSReadonlyType",
-            "api-reference/typescript/TSReturnTypePlaceholder",
-            "api-reference/typescript/TSString",
-            "api-reference/typescript/TSSwitchCase",
-            "api-reference/typescript/TSSwitchStatement",
-            "api-reference/typescript/TSSymbol",
-            "api-reference/typescript/TSTernaryExpression",
-            "api-reference/typescript/TSTryCatchStatement",
-            "api-reference/typescript/TSTypeAlias",
-            "api-reference/typescript/TSUndefinedType",
-            "api-reference/typescript/TSUnionType",
-            "api-reference/typescript/TSWhileStatement"
-          ]
-        }
-      ]
-    }
-  ],
-  "footerSocials": {
-    "x": "https://x.com/codegen",
-    "linkedin": "https://linkedin.com/company/codegen-dot-com"
-  }
+	"$schema": "https://mintlify.com/schema.json",
+	"name": "Codegen",
+	"logo": {
+		"dark": "https://cdn.prod.website-files.com/67070304751b9b01bf6a161c/679bcf45a3e32761c42b324b_Codegen_Logomark_Dark.svg",
+		"light": "https://cdn.prod.website-files.com/67070304751b9b01bf6a161c/679bcf45bf55446746125835_Codegen_Logomark_Light.svg"
+	},
+	"modeToggle": {
+		"default": "dark"
+	},
+	"metadata": {
+		"og:site_name": "Codegen",
+		"og:title": "Codegen - Manipulate Code at Scale",
+		"og:description": "A scriptable interface to a powerful, multi-lingual language server built on top of Tree-sitter.",
+		"og:url": "https://docs.codegen.com",
+		"og:locale": "en_US",
+		"og:logo": "https://i.imgur.com/f4OVOqI.png",
+		"article:publisher": "Codegen, Inc.",
+		"twitter:site": "@codegen"
+	},
+	"favicon": "/favicon.svg",
+	"colors": {
+		"primary": "#a277ff",
+		"light": "#a277ff",
+		"dark": "#a277ff",
+		"anchors": {
+			"from": "#61ffca",
+			"to": "#61ffca"
+		}
+	},
+	"theme": "prism",
+	"background": {
+		"style": "gradient"
+	},
+	"analytics": {
+		"posthog": {
+			"apiKey": "phc_GLxaINoQJnuyCyxDmTciQqzdKBYFVDkY7bRBO4bDdso"
+		}
+	},
+	"feedback": {
+		"thumbsRating": true
+	},
+	"topbarCtaButton": {
+		"name": "GitHub",
+		"url": "https://github.com/codegen-sh/codegen-sdk"
+	},
+	"tabs": [
+		{
+			"name": "API Reference",
+			"url": "/api-reference"
+		},
+		{
+			"name": "CLI",
+			"url": "/cli"
+		},
+		{
+			"name": "Blog",
+			"url": "/blog"
+		},
+		{
+			"name": "Changelog",
+			"url": "/changelog"
+		}
+	],
+	"navigation": [
+		{
+			"group": "Introduction",
+			"pages": [
+				"introduction/overview",
+				"introduction/getting-started",
+				"introduction/installation",
+				"introduction/ide-usage",
+				"introduction/work-with-ai",
+				"introduction/how-it-works",
+				"introduction/advanced-settings",
+				"introduction/guiding-principles",
+				"introduction/community",
+				"introduction/about",
+				"introduction/faq"
+			]
+		},
+		{
+			"group": "Tutorials",
+			"pages": [
+				"tutorials/at-a-glance",
+				"tutorials/build-code-agent",
+				"tutorials/slack-bot",
+				"tutorials/github-review-bot",
+				"tutorials/deep-code-research",
+				"tutorials/training-data",
+				"tutorials/codebase-visualization",
+				"tutorials/migrating-apis",
+				"tutorials/organize-your-codebase",
+				"tutorials/promise-to-async-await",
+				"tutorials/modularity",
+				"tutorials/manage-feature-flags",
+				"tutorials/deleting-dead-code",
+				"tutorials/increase-type-coverage",
+				"tutorials/managing-typescript-exports",
+				"tutorials/converting-default-exports",
+				"tutorials/creating-documentation",
+				"tutorials/react-modernization",
+				"tutorials/unittest-to-pytest",
+				"tutorials/sqlalchemy-1.6-to-2.0",
+				"tutorials/fixing-import-loops-in-pytorch",
+				"tutorials/python2-to-python3",
+				"tutorials/flask-to-fastapi",
+				"tutorials/build-mcp",
+				"tutorials/neo4j-graph"
+			]
+		},
+		{
+			"group": "Building with Codegen",
+			"pages": [
+				"building-with-codegen/at-a-glance",
+				"building-with-codegen/parsing-codebases",
+				"building-with-codegen/reusable-codemods",
+				"building-with-codegen/dot-codegen",
+				"building-with-codegen/function-decorator",
+				"building-with-codegen/language-support",
+				"building-with-codegen/commit-and-reset",
+				"building-with-codegen/git-operations",
+				"building-with-codegen/files-and-directories",
+				"building-with-codegen/the-editable-api",
+				"building-with-codegen/symbol-api",
+				"building-with-codegen/class-api",
+				"building-with-codegen/imports",
+				"building-with-codegen/exports",
+				"building-with-codegen/inheritable-behaviors",
+				"building-with-codegen/statements-and-code-blocks",
+				"building-with-codegen/dependencies-and-usages",
+				"building-with-codegen/function-calls-and-callsites",
+				"building-with-codegen/variable-assignments",
+				"building-with-codegen/local-variables",
+				"building-with-codegen/comments-and-docstrings",
+				"building-with-codegen/external-modules",
+				"building-with-codegen/type-annotations",
+				"building-with-codegen/moving-symbols",
+				"building-with-codegen/collections",
+				"building-with-codegen/traversing-the-call-graph",
+				"building-with-codegen/react-and-jsx",
+				"building-with-codegen/codebase-visualization",
+				"building-with-codegen/flagging-symbols",
+				"building-with-codegen/calling-out-to-llms",
+				"building-with-codegen/semantic-code-search",
+				"building-with-codegen/reducing-conditions"
+			]
+		},
+		{
+			"group": "CLI",
+			"pages": [
+				"cli/about",
+				"cli/init",
+				"cli/notebook",
+				"cli/create",
+				"cli/run",
+				"cli/reset",
+				"cli/expert"
+			]
+		},
+		{
+			"group": "Changelog",
+			"pages": ["changelog/changelog"]
+		},
+		{
+			"group": "Blog",
+			"pages": [
+				"blog/posts",
+				"blog/devin",
+				"blog/act-via-code",
+				"blog/promise-to-async-await-twilio",
+				"blog/fixing-import-loops"
+			]
+		},
+		{
+			"group": "API Reference",
+			"pages": [
+				"api-reference/index",
+				{
+					"group": "Core",
+					"icon": "code",
+					"pages": [
+						"api-reference/core/Argument",
+						"api-reference/core/Assignment",
+						"api-reference/core/AssignmentStatement",
+						"api-reference/core/Attribute",
+						"api-reference/core/AwaitExpression",
+						"api-reference/core/BinaryExpression",
+						"api-reference/core/BlockStatement",
+						"api-reference/core/Boolean",
+						"api-reference/core/Callable",
+						"api-reference/core/CatchStatement",
+						"api-reference/core/ChainedAttribute",
+						"api-reference/core/Class",
+						"api-reference/core/CodeBlock",
+						"api-reference/core/CodeOwner",
+						"api-reference/core/Codebase",
+						"api-reference/core/Comment",
+						"api-reference/core/CommentGroup",
+						"api-reference/core/ComparisonExpression",
+						"api-reference/core/Decorator",
+						"api-reference/core/Dict",
+						"api-reference/core/Directory",
+						"api-reference/core/Editable",
+						"api-reference/core/Export",
+						"api-reference/core/ExportStatement",
+						"api-reference/core/Exportable",
+						"api-reference/core/Expression",
+						"api-reference/core/ExpressionGroup",
+						"api-reference/core/ExpressionStatement",
+						"api-reference/core/ExternalModule",
+						"api-reference/core/File",
+						"api-reference/core/FlagKwargs",
+						"api-reference/core/ForLoopStatement",
+						"api-reference/core/Function",
+						"api-reference/core/FunctionCall",
+						"api-reference/core/GenericType",
+						"api-reference/core/HasBlock",
+						"api-reference/core/HasName",
+						"api-reference/core/HasValue",
+						"api-reference/core/IfBlockStatement",
+						"api-reference/core/Import",
+						"api-reference/core/ImportStatement",
+						"api-reference/core/ImportType",
+						"api-reference/core/Importable",
+						"api-reference/core/Interface",
+						"api-reference/core/List",
+						"api-reference/core/MessageType",
+						"api-reference/core/MultiExpression",
+						"api-reference/core/MultiLineCollection",
+						"api-reference/core/Name",
+						"api-reference/core/NamedType",
+						"api-reference/core/NoneType",
+						"api-reference/core/Number",
+						"api-reference/core/Pair",
+						"api-reference/core/Parameter",
+						"api-reference/core/ParenthesizedExpression",
+						"api-reference/core/Placeholder",
+						"api-reference/core/PlaceholderType",
+						"api-reference/core/RaiseStatement",
+						"api-reference/core/ReturnStatement",
+						"api-reference/core/SourceFile",
+						"api-reference/core/Span",
+						"api-reference/core/Statement",
+						"api-reference/core/StatementType",
+						"api-reference/core/String",
+						"api-reference/core/StubPlaceholder",
+						"api-reference/core/SubscriptExpression",
+						"api-reference/core/SwitchCase",
+						"api-reference/core/SwitchStatement",
+						"api-reference/core/Symbol",
+						"api-reference/core/SymbolGroup",
+						"api-reference/core/SymbolStatement",
+						"api-reference/core/TernaryExpression",
+						"api-reference/core/TryCatchStatement",
+						"api-reference/core/Tuple",
+						"api-reference/core/TupleType",
+						"api-reference/core/Type",
+						"api-reference/core/TypeAlias",
+						"api-reference/core/TypePlaceholder",
+						"api-reference/core/Typeable",
+						"api-reference/core/UnaryExpression",
+						"api-reference/core/UnionType",
+						"api-reference/core/Unpack",
+						"api-reference/core/Unwrappable",
+						"api-reference/core/Usable",
+						"api-reference/core/Usage",
+						"api-reference/core/UsageKind",
+						"api-reference/core/UsageType",
+						"api-reference/core/Value",
+						"api-reference/core/WhileStatement",
+						"api-reference/core/WithStatement"
+					]
+				},
+				{
+					"group": "Python",
+					"icon": "python",
+					"pages": [
+						"api-reference/python/PyAssignment",
+						"api-reference/python/PyAssignmentStatement",
+						"api-reference/python/PyAttribute",
+						"api-reference/python/PyBlockStatement",
+						"api-reference/python/PyBreakStatement",
+						"api-reference/python/PyCatchStatement",
+						"api-reference/python/PyChainedAttribute",
+						"api-reference/python/PyClass",
+						"api-reference/python/PyCodeBlock",
+						"api-reference/python/PyComment",
+						"api-reference/python/PyCommentGroup",
+						"api-reference/python/PyCommentType",
+						"api-reference/python/PyConditionalExpression",
+						"api-reference/python/PyDecorator",
+						"api-reference/python/PyFile",
+						"api-reference/python/PyForLoopStatement",
+						"api-reference/python/PyFunction",
+						"api-reference/python/PyGenericType",
+						"api-reference/python/PyHasBlock",
+						"api-reference/python/PyIfBlockStatement",
+						"api-reference/python/PyImport",
+						"api-reference/python/PyImportStatement",
+						"api-reference/python/PyMatchCase",
+						"api-reference/python/PyMatchStatement",
+						"api-reference/python/PyNamedType",
+						"api-reference/python/PyParameter",
+						"api-reference/python/PyPassStatement",
+						"api-reference/python/PyReturnTypePlaceholder",
+						"api-reference/python/PyString",
+						"api-reference/python/PySymbol",
+						"api-reference/python/PyTryCatchStatement",
+						"api-reference/python/PyUnionType",
+						"api-reference/python/PyWhileStatement"
+					]
+				},
+				{
+					"group": "Typescript",
+					"icon": "js",
+					"pages": [
+						"api-reference/typescript/JSXElement",
+						"api-reference/typescript/JSXExpression",
+						"api-reference/typescript/JSXProp",
+						"api-reference/typescript/TSArrayType",
+						"api-reference/typescript/TSAssignment",
+						"api-reference/typescript/TSAssignmentStatement",
+						"api-reference/typescript/TSAttribute",
+						"api-reference/typescript/TSBlockStatement",
+						"api-reference/typescript/TSCatchStatement",
+						"api-reference/typescript/TSChainedAttribute",
+						"api-reference/typescript/TSClass",
+						"api-reference/typescript/TSCodeBlock",
+						"api-reference/typescript/TSComment",
+						"api-reference/typescript/TSCommentGroup",
+						"api-reference/typescript/TSCommentType",
+						"api-reference/typescript/TSConditionalType",
+						"api-reference/typescript/TSConfig",
+						"api-reference/typescript/TSDecorator",
+						"api-reference/typescript/TSDict",
+						"api-reference/typescript/TSEnum",
+						"api-reference/typescript/TSExport",
+						"api-reference/typescript/TSExpressionType",
+						"api-reference/typescript/TSFile",
+						"api-reference/typescript/TSForLoopStatement",
+						"api-reference/typescript/TSFunction",
+						"api-reference/typescript/TSFunctionType",
+						"api-reference/typescript/TSGenericType",
+						"api-reference/typescript/TSHasBlock",
+						"api-reference/typescript/TSIfBlockStatement",
+						"api-reference/typescript/TSImport",
+						"api-reference/typescript/TSImportStatement",
+						"api-reference/typescript/TSInterface",
+						"api-reference/typescript/TSLabeledStatement",
+						"api-reference/typescript/TSLookupType",
+						"api-reference/typescript/TSNamedType",
+						"api-reference/typescript/TSNamespace",
+						"api-reference/typescript/TSObjectType",
+						"api-reference/typescript/TSPair",
+						"api-reference/typescript/TSParameter",
+						"api-reference/typescript/TSQueryType",
+						"api-reference/typescript/TSReadonlyType",
+						"api-reference/typescript/TSReturnTypePlaceholder",
+						"api-reference/typescript/TSString",
+						"api-reference/typescript/TSSwitchCase",
+						"api-reference/typescript/TSSwitchStatement",
+						"api-reference/typescript/TSSymbol",
+						"api-reference/typescript/TSTernaryExpression",
+						"api-reference/typescript/TSTryCatchStatement",
+						"api-reference/typescript/TSTypeAlias",
+						"api-reference/typescript/TSUndefinedType",
+						"api-reference/typescript/TSUnionType",
+						"api-reference/typescript/TSWhileStatement"
+					]
+				}
+			]
+		}
+	],
+	"footerSocials": {
+		"x": "https://x.com/codegen",
+		"linkedin": "https://linkedin.com/company/codegen-dot-com"
+	}
 }
diff --git a/src/codegen/extensions/attribution/git_history.py b/src/codegen/extensions/attribution/git_history.py
index 06450c32f..39dfcc740 100644
--- a/src/codegen/extensions/attribution/git_history.py
+++ b/src/codegen/extensions/attribution/git_history.py
@@ -33,15 +33,15 @@ def __init__(self, codebase: Codebase, ai_authors: Optional[list[str]] = None):
 
         # Cache structures
         self._file_history = {}  # file path -> list of commit info
-        self._symbol_history:defaultdict[str,list] = defaultdict(list)  # symbol id -> list of commit info
+        self._symbol_history: defaultdict[str, list] = defaultdict(list)  # symbol id -> list of commit info
         self._author_contributions = defaultdict(list)  # author -> list of commit info
 
         # Track if history has been built
         self._history_built = False
 
-        self._file_symbol_location_state:dict[str,IntervalTree] = {}
+        self._file_symbol_location_state: dict[str, IntervalTree] = {}
 
-        self._commits:deque[Commit]
+        self._commits: deque[Commit]
 
     def build_history(self, max_commits: Optional[int] = None) -> None:
         """Build the git history for the codebase.
@@ -65,7 +65,7 @@ def build_history(self, max_commits: Optional[int] = None) -> None:
         commit_count = 0
         author_set = set()
 
-        self._commits=deque()
+        self._commits = deque()
         try:
             for commit in self.repo.walk(self.repo.head.target, SortMode.TIME):
                 # Track unique authors
@@ -153,35 +153,32 @@ def _process_commit(self, commit, diff) -> None:
             file_commit["file_path"] = file_path
             self._file_history[file_path].append(file_commit)
 
-
-    def _process_symbol_location_state(self, filepaths:list[str]):
+    def _process_symbol_location_state(self, filepaths: list[str]):
         for filepath in filepaths:
             file = self.codebase.get_file(filepath)
             filetree = IntervalTree()
             try:
                 for symbol in file.symbols:
-                    symbol:Symbol
-                    start_line=symbol.range.start_point.row+1 # 1 Indexing
-                    end_line=symbol.range.end_point.row+2 # Intervaltree is end non-inclusive
-                    filetree.addi(start_line,end_line,symbol)
+                    symbol: Symbol
+                    start_line = symbol.range.start_point.row + 1  # 1 Indexing
+                    end_line = symbol.range.end_point.row + 2  # Intervaltree is end non-inclusive
+                    filetree.addi(start_line, end_line, symbol)
             except Exception as e:
                 pass
             self._file_symbol_location_state[filepath] = filetree
 
-    def _get_symbols_affected_by_patch(self,patch:Patch,filepath):
+    def _get_symbols_affected_by_patch(self, patch: Patch, filepath):
         if filepath not in self._file_symbol_location_state:
             return []
-        symbols_affected=set()
+        symbols_affected = set()
         for hunk in patch.hunks:
             start = hunk.new_start
-            end = start+hunk.new_lines # Intervaltree is end non-inclusive
-            for interval in self._file_symbol_location_state[filepath].overlap(start,end):
+            end = start + hunk.new_lines  # Intervaltree is end non-inclusive
+            for interval in self._file_symbol_location_state[filepath].overlap(start, end):
                 symbols_affected.add(interval[2])
 
         return symbols_affected
 
-
-
     def _is_tracked_file(self, file_path: str) -> bool:
         """Check if a file should be tracked based on extension."""
         # Get file extensions from the codebase
@@ -198,7 +195,7 @@ def _ensure_history_built(self) -> None:
         if not self._history_built:
             self.build_history()
 
-    def map_symbols_to_history(self,force=False) -> None:
+    def map_symbols_to_history(self, force=False) -> None:
         """Map symbols in the codebase to their git history. force ensures a rerun even if data is already found!"""
         self._ensure_history_built()
         if self._symbol_history:
@@ -208,19 +205,17 @@ def map_symbols_to_history(self,force=False) -> None:
         print("Mapping symbols to git history...")
         start_time = time.time()
 
-
-
         print("Stashing any working directory changes...")
         stash_msg = f"Codegen Attribution Stash @ {datetime.now().timestamp()}"
-        stash_id=None
+        stash_id = None
         try:
-            stash_id = self.repo.stash(self.repo.default_signature,stash_msg,include_untracked=True)
+            stash_id = self.repo.stash(self.repo.default_signature, stash_msg, include_untracked=True)
             print("Stashed!")
         except KeyError as e:
             print("Nothing to stash, proceeding.....")
         except Exception as e:
             print("Error encountered attempting to stash the current working state, stopping to preserve work, please manually clean the working directory and try again!")
-            raise(e)
+            raise (e)
 
         print("Generating initial symbol state...")
         filepaths = [file.filepath for file in self.codebase.files]
@@ -228,7 +223,7 @@ def map_symbols_to_history(self,force=False) -> None:
 
         elapsed = time.time() - start_time
         print(f"Finished initial symbol state generation in {elapsed:.2f} seconds.")
-        symbol_tracking_checkpoint=time.time()
+        symbol_tracking_checkpoint = time.time()
         try:
             print("Starting symbol tracking procedure....")
             for commit in self._commits:
@@ -246,36 +241,36 @@ def map_symbols_to_history(self,force=False) -> None:
                 }
                 commit_previous = commit.parents[0] if commit.parents else None
                 if not commit_previous:
-                    #If Last commit
+                    # If Last commit
                     empty_tree_old = self.repo.TreeBuilder().write()
-                    empty_tree=self.repo.get(empty_tree_old)
-                    diff = self.repo.diff(empty_tree,commit.tree)
+                    empty_tree = self.repo.get(empty_tree_old)
+                    diff = self.repo.diff(empty_tree, commit.tree)
                 else:
-                    diff = self.repo.diff(commit_previous, commit,context_lines=0) #We don't need context lines
+                    diff = self.repo.diff(commit_previous, commit, context_lines=0)  # We don't need context lines
 
-                if isinstance(diff,Patch):
-                    diff=[diff]
-                sync_past_filepaths=[] #Files to sync in the past commit
+                if isinstance(diff, Patch):
+                    diff = [diff]
+                sync_past_filepaths = []  # Files to sync in the past commit
                 for patch in diff:
-                    filepath=patch.delta.new_file.path
+                    filepath = patch.delta.new_file.path
                     if not self._is_tracked_file(filepath):
-                        continue #Ignore files we don't track
-                    if not patch.delta.status==DeltaStatus.ADDED: #Reversed since we're going backwards, if it doesn't exist in the past commits don't sync!
+                        continue  # Ignore files we don't track
+                    if not patch.delta.status == DeltaStatus.ADDED:  # Reversed since we're going backwards, if it doesn't exist in the past commits don't sync!
                         sync_past_filepaths.append(filepath)
-                    symbols_affected = self._get_symbols_affected_by_patch(patch,filepath)
+                    symbols_affected = self._get_symbols_affected_by_patch(patch, filepath)
                     for symbol in symbols_affected:
-                        symbol_id = f"{symbol.filepath}:{symbol.name}" #For future stuff might want to do this more neatly and allow for future dead symbols/renames
+                        symbol_id = f"{symbol.filepath}:{symbol.name}"  # For future stuff might want to do this more neatly and allow for future dead symbols/renames
                         self._symbol_history[symbol_id].append(commit_info)
 
                 if commit_previous:
-                    #If not last commit
-                    self.repo.checkout_tree(commit_previous,strategy=CheckoutStrategy.FORCE)
+                    # If not last commit
+                    self.repo.checkout_tree(commit_previous, strategy=CheckoutStrategy.FORCE)
                     self.repo.set_head(commit_previous.id)
                     files = [self.codebase.get_file(fp) for fp in sync_past_filepaths]
-                    exclude_state_files=[]
+                    exclude_state_files = []
                     for file in files:
-                        if not isinstance(file,SourceFile):
-                            #What kind of pyfiles are not source files? To investigate!
+                        if not isinstance(file, SourceFile):
+                            # What kind of pyfiles are not source files? To investigate!
                             exclude_state_files.append(file.filepath)
                             continue
                         file.sync_with_file_content()
@@ -283,21 +278,21 @@ def map_symbols_to_history(self,force=False) -> None:
 
         finally:
             print("Finished, restoring git repo state...")
-            self.repo.checkout(self.org_branch_reference,strategy=CheckoutStrategy.FORCE)
+            self.repo.checkout(self.org_branch_reference, strategy=CheckoutStrategy.FORCE)
 
             print(f"Restored to latest commit, newest commit id in repo is {self.repo.revparse_single(self.org_branch_reference.name).id}")
 
             if stash_id:
-                #Restoring Working Directory
+                # Restoring Working Directory
                 print("Restoring working directory changes...")
-                found_stash=None
-                for idx,stash in enumerate(self.repo.listall_stashes()):
+                found_stash = None
+                for idx, stash in enumerate(self.repo.listall_stashes()):
                     if stash_msg in stash.message:
-                        found_stash=idx
+                        found_stash = idx
                         break
-                if found_stash==0:
+                if found_stash == 0:
                     print("Applying stash..")
-                    self.repo.stash_apply(0,reinstate_index=True)
+                    self.repo.stash_apply(0, reinstate_index=True)
                     print("Applied Stash")
                     self.repo.stash_drop(0)
                     print("Stash Removed!")
@@ -307,11 +302,9 @@ def map_symbols_to_history(self,force=False) -> None:
                     print(f"Codebase stash msg:{stash_msg}")
                     print(f"Codebase stash oid:{stash_id}")
 
-
-
         end_time = time.time()
         elapsed_total = end_time - start_time
-        elapsed_symbol_tracking = end_time-symbol_tracking_checkpoint
+        elapsed_symbol_tracking = end_time - symbol_tracking_checkpoint
         print(f"Finished symbol tracking in {elapsed_symbol_tracking:.2f} seconds.")
         print(f"Finished mapping symbols in {elapsed_total:.2f} seconds.")