From 358cf00a33f1019920c76cd906a61a749435eb0b Mon Sep 17 00:00:00 2001 From: codegen-bot Date: Wed, 12 Mar 2025 20:55:02 +0000 Subject: [PATCH 1/2] Add README for attributions example --- .../examples/attributions/README.md | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 codegen-examples/examples/attributions/README.md diff --git a/codegen-examples/examples/attributions/README.md b/codegen-examples/examples/attributions/README.md new file mode 100644 index 000000000..b00624297 --- /dev/null +++ b/codegen-examples/examples/attributions/README.md @@ -0,0 +1,103 @@ +# Code Statistics and Attributions + +This example demonstrates how to use Codegen's attribution extension to analyze the impact of AI on your codebase. You'll learn how to identify which parts of your code were written by AI tools like GitHub Copilot, Devin, or other AI assistants. + +## Overview + +The attribution extension analyzes git history to: + +1. Identify which symbols (functions, classes, etc.) were authored or modified by AI tools +2. Calculate the percentage of AI contributions in your codebase +3. Find high-impact AI-written code (code that many other parts depend on) +4. Track the evolution of AI contributions over time + +## How It Works + +This example: + +1. Loads a codebase (either the current directory or a sample repository) +2. Runs the AI impact analysis to gather attribution data +3. Demonstrates how to access attribution information for symbols +4. Prints detailed information about the most used symbols, including: + - Last editor + - Editor history + - Whether the symbol was authored by AI + +## Running the Example + +You can run this example with: + +```bash +python symbol_attribution.py +``` + +The script will: +1. Initialize a codebase from the current directory (if it's a git repository) or use a sample repository +2. Run the AI impact analysis +3. Print attribution information for the most used symbols in the codebase + +## Code Walkthrough + +### Setting Up the Codebase + +```python +# Use current directory if it's a git repository +if os.path.exists(".git"): + print("Using current directory as repository...") + repo_path = os.getcwd() + repo_config = RepoConfig.from_repo_path(repo_path) + repo_operator = RepoOperator(repo_config=repo_config) + + project = ProjectConfig.from_repo_operator( + repo_operator=repo_operator, + programming_language=ProgrammingLanguage.PYTHON + ) + codebase = Codebase(projects=[project]) +else: + # Use from_repo method for a well-known repository + print("Using a sample repository...") + codebase = Codebase.from_repo( + repo_full_name="codegen-sh/codegen", + language="python", + ) +``` + +### Running the Analysis + +```python +# Run the AI impact analysis +run(codebase) +``` + +### Accessing Attribution Information + +```python +# Define which authors are considered AI +ai_authors = ["devin[bot]", "codegen[bot]", "github-actions[bot]"] +add_attribution_to_symbols(codebase, ai_authors) + +# Access attribution information on symbols +for symbol in codebase.symbols: + if hasattr(symbol, 'last_editor'): + print(f"Last editor: {symbol.last_editor}") + + if hasattr(symbol, 'editor_history'): + print(f"Editor history: {symbol.editor_history}") + + if hasattr(symbol, 'is_ai_authored'): + print(f"AI authored: {'Yes' if symbol.is_ai_authored else 'No'}") +``` + +## Key Insights + +By running this example, you can: + +- Identify which parts of your codebase were authored by AI +- Track the adoption of AI coding assistants in your team +- Identify areas where AI is most effective +- Ensure appropriate review of AI-generated code +- Measure the impact of AI on developer productivity + +## Further Reading + +For more advanced usage, check out the [API reference](https://docs.codegen.sh/api-reference/extensions/attribution) for the attribution extension. \ No newline at end of file From 263308407cb4ccca8f70de89f829ce5384dd51e1 Mon Sep 17 00:00:00 2001 From: "codegen-sh[bot]" <131295404+codegen-sh[bot]@users.noreply.github.com> Date: Wed, 12 Mar 2025 20:55:46 +0000 Subject: [PATCH 2/2] Automated pre-commit update --- .../examples/attributions/README.md | 34 +++++++++---------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/codegen-examples/examples/attributions/README.md b/codegen-examples/examples/attributions/README.md index b00624297..e82acdaae 100644 --- a/codegen-examples/examples/attributions/README.md +++ b/codegen-examples/examples/attributions/README.md @@ -7,18 +7,18 @@ This example demonstrates how to use Codegen's attribution extension to analyze The attribution extension analyzes git history to: 1. Identify which symbols (functions, classes, etc.) were authored or modified by AI tools -2. Calculate the percentage of AI contributions in your codebase -3. Find high-impact AI-written code (code that many other parts depend on) -4. Track the evolution of AI contributions over time +1. Calculate the percentage of AI contributions in your codebase +1. Find high-impact AI-written code (code that many other parts depend on) +1. Track the evolution of AI contributions over time ## How It Works This example: 1. Loads a codebase (either the current directory or a sample repository) -2. Runs the AI impact analysis to gather attribution data -3. Demonstrates how to access attribution information for symbols -4. Prints detailed information about the most used symbols, including: +1. Runs the AI impact analysis to gather attribution data +1. Demonstrates how to access attribution information for symbols +1. Prints detailed information about the most used symbols, including: - Last editor - Editor history - Whether the symbol was authored by AI @@ -32,9 +32,10 @@ python symbol_attribution.py ``` The script will: + 1. Initialize a codebase from the current directory (if it's a git repository) or use a sample repository -2. Run the AI impact analysis -3. Print attribution information for the most used symbols in the codebase +1. Run the AI impact analysis +1. Print attribution information for the most used symbols in the codebase ## Code Walkthrough @@ -48,10 +49,7 @@ if os.path.exists(".git"): repo_config = RepoConfig.from_repo_path(repo_path) repo_operator = RepoOperator(repo_config=repo_config) - project = ProjectConfig.from_repo_operator( - repo_operator=repo_operator, - programming_language=ProgrammingLanguage.PYTHON - ) + project = ProjectConfig.from_repo_operator(repo_operator=repo_operator, programming_language=ProgrammingLanguage.PYTHON) codebase = Codebase(projects=[project]) else: # Use from_repo method for a well-known repository @@ -78,13 +76,13 @@ add_attribution_to_symbols(codebase, ai_authors) # Access attribution information on symbols for symbol in codebase.symbols: - if hasattr(symbol, 'last_editor'): + if hasattr(symbol, "last_editor"): print(f"Last editor: {symbol.last_editor}") - - if hasattr(symbol, 'editor_history'): + + if hasattr(symbol, "editor_history"): print(f"Editor history: {symbol.editor_history}") - - if hasattr(symbol, 'is_ai_authored'): + + if hasattr(symbol, "is_ai_authored"): print(f"AI authored: {'Yes' if symbol.is_ai_authored else 'No'}") ``` @@ -100,4 +98,4 @@ By running this example, you can: ## Further Reading -For more advanced usage, check out the [API reference](https://docs.codegen.sh/api-reference/extensions/attribution) for the attribution extension. \ No newline at end of file +For more advanced usage, check out the [API reference](https://docs.codegen.sh/api-reference/extensions/attribution) for the attribution extension.