From 6a18df688067cc16c320fc8277a1a5f1ad7e4c9f Mon Sep 17 00:00:00 2001 From: "codegen-sh[bot]" <131295404+codegen-sh[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 12:35:07 +0000 Subject: [PATCH 1/3] ZAM-374: Implement codebase_analysis.py in analyzers directory --- .../codegen_on_oss/analyzers/__init__.py | 22 +- .../analyzers/codebase_analysis.py | 319 ++++++++++++++++++ .../tests/test_codebase_analysis.py | 193 +++++++++++ 3 files changed, 533 insertions(+), 1 deletion(-) create mode 100644 codegen-on-oss/codegen_on_oss/analyzers/codebase_analysis.py create mode 100644 codegen-on-oss/tests/test_codebase_analysis.py diff --git a/codegen-on-oss/codegen_on_oss/analyzers/__init__.py b/codegen-on-oss/codegen_on_oss/analyzers/__init__.py index f1ef5c5b4..f4c002000 100644 --- a/codegen-on-oss/codegen_on_oss/analyzers/__init__.py +++ b/codegen-on-oss/codegen_on_oss/analyzers/__init__.py @@ -46,6 +46,16 @@ # Core analysis modules from codegen_on_oss.analyzers.code_quality import CodeQualityAnalyzer from codegen_on_oss.analyzers.dependencies import DependencyAnalyzer +from codegen_on_oss.analyzers.codebase_analysis import ( + get_codebase_summary, + get_file_summary, + get_class_summary, + get_function_summary, + get_symbol_summary, + get_dependency_graph, + get_symbol_references, + get_file_complexity_metrics +) # Legacy analyzer interfaces (for backward compatibility) from codegen_on_oss.analyzers.base_analyzer import BaseCodeAnalyzer @@ -85,9 +95,19 @@ # Core analyzers 'CodeQualityAnalyzer', 'DependencyAnalyzer', + + # Codebase analysis utilities + 'get_codebase_summary', + 'get_file_summary', + 'get_class_summary', + 'get_function_summary', + 'get_symbol_summary', + 'get_dependency_graph', + 'get_symbol_references', + 'get_file_complexity_metrics', # Legacy interfaces (for backward compatibility) 'BaseCodeAnalyzer', 'CodebaseAnalyzer', 'ErrorAnalyzer', -] \ No newline at end of file +] diff --git a/codegen-on-oss/codegen_on_oss/analyzers/codebase_analysis.py b/codegen-on-oss/codegen_on_oss/analyzers/codebase_analysis.py new file mode 100644 index 000000000..cbb6f6b1f --- /dev/null +++ b/codegen-on-oss/codegen_on_oss/analyzers/codebase_analysis.py @@ -0,0 +1,319 @@ +#!/usr/bin/env python3 +""" +Codebase Analysis Module + +This module provides basic code analysis functionality for codebases, including: +- Functions for getting codebase summaries +- Functions for getting file summaries +- Basic code analysis utilities + +This is a dedicated implementation of the SDK's codebase_analysis.py module +for the analyzers directory, ensuring consistent analysis results. +""" + +from typing import Dict, List, Optional, Set, Tuple, Any, Union + +from codegen.sdk.core.class_definition import Class +from codegen.sdk.core.codebase import Codebase +from codegen.sdk.core.external_module import ExternalModule +from codegen.sdk.core.file import SourceFile +from codegen.sdk.core.function import Function +from codegen.sdk.core.import_resolution import Import +from codegen.sdk.core.symbol import Symbol +from codegen.sdk.enums import EdgeType, SymbolType + + +def get_codebase_summary(codebase: Codebase) -> str: + """ + Generate a comprehensive summary of a codebase. + + Args: + codebase: The Codebase object to summarize + + Returns: + A formatted string containing a summary of the codebase's nodes and edges + """ + node_summary = f"""Contains {len(codebase.ctx.get_nodes())} nodes +- {len(list(codebase.files))} files +- {len(list(codebase.imports))} imports +- {len(list(codebase.external_modules))} external_modules +- {len(list(codebase.symbols))} symbols +\t- {len(list(codebase.classes))} classes +\t- {len(list(codebase.functions))} functions +\t- {len(list(codebase.global_vars))} global_vars +\t- {len(list(codebase.interfaces))} interfaces +""" + edge_summary = f"""Contains {len(codebase.ctx.edges)} edges +- {len([x for x in codebase.ctx.edges if x[2].type == EdgeType.SYMBOL_USAGE])} symbol -> used symbol +- {len([x for x in codebase.ctx.edges if x[2].type == EdgeType.IMPORT_SYMBOL_RESOLUTION])} import -> used symbol +- {len([x for x in codebase.ctx.edges if x[2].type == EdgeType.EXPORT])} export -> exported symbol + """ + + return f"{node_summary}\n{edge_summary}" + + +def get_file_summary(file: SourceFile) -> str: + """ + Generate a summary of a source file. + + Args: + file: The SourceFile object to summarize + + Returns: + A formatted string containing a summary of the file's dependencies and usage + """ + return f"""==== [ `{file.name}` (SourceFile) Dependency Summary ] ==== +- {len(file.imports)} imports +- {len(file.symbols)} symbol references +\t- {len(file.classes)} classes +\t- {len(file.functions)} functions +\t- {len(file.global_vars)} global variables +\t- {len(file.interfaces)} interfaces + +==== [ `{file.name}` Usage Summary ] ==== +- {len(file.imports)} importers +""" + + +def get_class_summary(cls: Class) -> str: + """ + Generate a summary of a class. + + Args: + cls: The Class object to summarize + + Returns: + A formatted string containing a summary of the class's dependencies and usage + """ + return f"""==== [ `{cls.name}` (Class) Dependency Summary ] ==== +- parent classes: {cls.parent_class_names} +- {len(cls.methods)} methods +- {len(cls.attributes)} attributes +- {len(cls.decorators)} decorators +- {len(cls.dependencies)} dependencies + +{get_symbol_summary(cls)} + """ + + +def get_function_summary(func: Function) -> str: + """ + Generate a summary of a function. + + Args: + func: The Function object to summarize + + Returns: + A formatted string containing a summary of the function's dependencies and usage + """ + return f"""==== [ `{func.name}` (Function) Dependency Summary ] ==== +- {len(func.return_statements)} return statements +- {len(func.parameters)} parameters +- {len(func.function_calls)} function calls +- {len(func.call_sites)} call sites +- {len(func.decorators)} decorators +- {len(func.dependencies)} dependencies + +{get_symbol_summary(func)} + """ + + +def get_symbol_summary(symbol: Symbol) -> str: + """ + Generate a summary of a symbol. + + Args: + symbol: The Symbol object to summarize + + Returns: + A formatted string containing a summary of the symbol's usage + """ + usages = symbol.symbol_usages + imported_symbols = [x.imported_symbol for x in usages if isinstance(x, Import)] + + return f"""==== [ `{symbol.name}` ({type(symbol).__name__}) Usage Summary ] ==== +- {len(usages)} usages +\t- {len([x for x in usages if isinstance(x, Symbol) and x.symbol_type == SymbolType.Function])} functions +\t- {len([x for x in usages if isinstance(x, Symbol) and x.symbol_type == SymbolType.Class])} classes +\t- {len([x for x in usages if isinstance(x, Symbol) and x.symbol_type == SymbolType.GlobalVar])} global variables +\t- {len([x for x in usages if isinstance(x, Symbol) and x.symbol_type == SymbolType.Interface])} interfaces +\t- {len(imported_symbols)} imports +\t\t- {len([x for x in imported_symbols if isinstance(x, Symbol) and x.symbol_type == SymbolType.Function])} functions +\t\t- {len([x for x in imported_symbols if isinstance(x, Symbol) and x.symbol_type == SymbolType.Class])} classes +\t\t- {len([x for x in imported_symbols if isinstance(x, Symbol) and x.symbol_type == SymbolType.GlobalVar])} global variables +\t\t- {len([x for x in imported_symbols if isinstance(x, Symbol) and x.symbol_type == SymbolType.Interface])} interfaces +\t\t- {len([x for x in imported_symbols if isinstance(x, ExternalModule)])} external modules +\t\t- {len([x for x in imported_symbols if isinstance(x, SourceFile)])} files + """ + + +def get_dependency_graph(codebase: Codebase, file_path: Optional[str] = None) -> Dict[str, List[str]]: + """ + Generate a dependency graph for a codebase or specific file. + + Args: + codebase: The Codebase object to analyze + file_path: Optional path to a specific file to analyze + + Returns: + A dictionary mapping file paths to lists of dependencies + """ + dependency_graph = {} + + files_to_analyze = [f for f in codebase.files if not file_path or f.file_path == file_path] + + for file in files_to_analyze: + dependencies = [] + + # Add direct imports + for imp in file.imports: + if hasattr(imp, 'imported_symbol') and hasattr(imp.imported_symbol, 'file'): + if hasattr(imp.imported_symbol.file, 'file_path'): + dependencies.append(imp.imported_symbol.file.file_path) + + # Add symbol dependencies + for symbol in file.symbols: + for dep in symbol.dependencies: + if hasattr(dep, 'file') and hasattr(dep.file, 'file_path'): + dependencies.append(dep.file.file_path) + + # Remove duplicates and self-references + unique_deps = list(set([d for d in dependencies if d != file.file_path])) + dependency_graph[file.file_path] = unique_deps + + return dependency_graph + + +def get_symbol_references(codebase: Codebase, symbol_name: str) -> List[Dict[str, Any]]: + """ + Find all references to a symbol in the codebase. + + Args: + codebase: The Codebase object to search + symbol_name: The name of the symbol to find references for + + Returns: + A list of dictionaries containing reference information + """ + references = [] + + # Find all symbols with the given name + target_symbols = [s for s in codebase.symbols if s.name == symbol_name] + + for symbol in target_symbols: + # Find all edges that reference this symbol + for edge in codebase.ctx.edges: + if edge[1] == symbol.id: # If the edge points to our symbol + source_node = codebase.ctx.get_node(edge[0]) + if source_node: + # Get file and line information if available + file_path = None + line_number = None + + if hasattr(source_node, 'file') and hasattr(source_node.file, 'file_path'): + file_path = source_node.file.file_path + + if hasattr(source_node, 'line'): + line_number = source_node.line + + references.append({ + 'file_path': file_path, + 'line': line_number, + 'source_type': type(source_node).__name__, + 'source_name': getattr(source_node, 'name', str(source_node)), + 'edge_type': edge[2].type.name if hasattr(edge[2], 'type') else 'Unknown' + }) + + return references + + +def get_file_complexity_metrics(file: SourceFile) -> Dict[str, Any]: + """ + Calculate complexity metrics for a source file. + + Args: + file: The SourceFile object to analyze + + Returns: + A dictionary containing complexity metrics + """ + metrics = { + 'file_path': file.file_path, + 'name': file.name, + 'num_lines': 0, + 'num_imports': len(file.imports), + 'num_classes': len(file.classes), + 'num_functions': len(file.functions), + 'num_global_vars': len(file.global_vars), + 'cyclomatic_complexity': 0, + 'max_function_complexity': 0, + 'max_class_complexity': 0, + } + + # Calculate lines of code if source is available + if hasattr(file, 'source') and file.source: + metrics['num_lines'] = len(file.source.split('\n')) + + # Calculate function complexities + function_complexities = [] + for func in file.functions: + complexity = _calculate_function_complexity(func) + function_complexities.append(complexity) + metrics['cyclomatic_complexity'] += complexity + + if function_complexities: + metrics['max_function_complexity'] = max(function_complexities) + + # Calculate class complexities + class_complexities = [] + for cls in file.classes: + complexity = 0 + for method in cls.methods: + method_complexity = _calculate_function_complexity(method) + complexity += method_complexity + class_complexities.append(complexity) + metrics['cyclomatic_complexity'] += complexity + + if class_complexities: + metrics['max_class_complexity'] = max(class_complexities) + + return metrics + + +def _calculate_function_complexity(func: Function) -> int: + """ + Calculate the cyclomatic complexity of a function. + + Args: + func: The Function object to analyze + + Returns: + An integer representing the cyclomatic complexity + """ + complexity = 1 # Base complexity + + if not hasattr(func, 'source') or not func.source: + return complexity + + # Simple heuristic: count control flow statements + source = func.source.lower() + + # Count if statements + complexity += source.count(' if ') + source.count('\nif ') + + # Count else if / elif statements + complexity += source.count('elif ') + source.count('else if ') + + # Count loops + complexity += source.count(' for ') + source.count('\nfor ') + complexity += source.count(' while ') + source.count('\nwhile ') + + # Count exception handlers + complexity += source.count('except ') + source.count('catch ') + + # Count logical operators (each one creates a new path) + complexity += source.count(' and ') + source.count(' && ') + complexity += source.count(' or ') + source.count(' || ') + + return complexity + diff --git a/codegen-on-oss/tests/test_codebase_analysis.py b/codegen-on-oss/tests/test_codebase_analysis.py new file mode 100644 index 000000000..26ff0e33d --- /dev/null +++ b/codegen-on-oss/tests/test_codebase_analysis.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python3 +""" +Tests for the codebase_analysis module. + +This module tests the functionality of the codebase_analysis.py module +in the analyzers directory, ensuring it provides the expected functionality +for codebase and file summaries. +""" + +import unittest +import os +import sys +from unittest.mock import MagicMock, patch + +# Add the parent directory to the path so we can import the module +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from codegen_on_oss.analyzers.codebase_analysis import ( + get_codebase_summary, + get_file_summary, + get_class_summary, + get_function_summary, + get_symbol_summary, + get_dependency_graph, + get_symbol_references, + get_file_complexity_metrics +) + + +class TestCodebaseAnalysis(unittest.TestCase): + """Test cases for the codebase_analysis module.""" + + def setUp(self): + """Set up test fixtures.""" + # Create mock objects for testing + self.mock_codebase = MagicMock() + self.mock_file = MagicMock() + self.mock_class = MagicMock() + self.mock_function = MagicMock() + self.mock_symbol = MagicMock() + + # Set up mock codebase + self.mock_codebase.ctx.get_nodes.return_value = [1, 2, 3] + self.mock_codebase.ctx.edges = [(1, 2, MagicMock(type=MagicMock(name="SYMBOL_USAGE"))), + (2, 3, MagicMock(type=MagicMock(name="IMPORT_SYMBOL_RESOLUTION"))), + (3, 1, MagicMock(type=MagicMock(name="EXPORT")))] + self.mock_codebase.files = [MagicMock(), MagicMock()] + self.mock_codebase.imports = [MagicMock()] + self.mock_codebase.external_modules = [MagicMock()] + self.mock_codebase.symbols = [MagicMock()] + self.mock_codebase.classes = [MagicMock()] + self.mock_codebase.functions = [MagicMock()] + self.mock_codebase.global_vars = [MagicMock()] + self.mock_codebase.interfaces = [MagicMock()] + + # Set up mock file + self.mock_file.name = "test_file.py" + self.mock_file.file_path = "/path/to/test_file.py" + self.mock_file.imports = [MagicMock()] + self.mock_file.symbols = [MagicMock()] + self.mock_file.classes = [MagicMock()] + self.mock_file.functions = [MagicMock()] + self.mock_file.global_vars = [MagicMock()] + self.mock_file.interfaces = [MagicMock()] + self.mock_file.source = "def test_function():\n if True:\n return 1\n else:\n return 0" + + # Set up mock class + self.mock_class.name = "TestClass" + self.mock_class.parent_class_names = ["BaseClass"] + self.mock_class.methods = [MagicMock()] + self.mock_class.attributes = [MagicMock()] + self.mock_class.decorators = [MagicMock()] + self.mock_class.dependencies = [MagicMock()] + self.mock_class.symbol_usages = [MagicMock()] + + # Set up mock function + self.mock_function.name = "test_function" + self.mock_function.return_statements = [MagicMock()] + self.mock_function.parameters = [MagicMock()] + self.mock_function.function_calls = [MagicMock()] + self.mock_function.call_sites = [MagicMock()] + self.mock_function.decorators = [MagicMock()] + self.mock_function.dependencies = [MagicMock()] + self.mock_function.symbol_usages = [MagicMock()] + self.mock_function.source = "def test_function():\n if True:\n return 1\n else:\n return 0" + + # Set up mock symbol + self.mock_symbol.name = "test_symbol" + self.mock_symbol.symbol_usages = [MagicMock()] + + def test_get_codebase_summary(self): + """Test the get_codebase_summary function.""" + summary = get_codebase_summary(self.mock_codebase) + + # Check that the summary contains expected information + self.assertIn("Contains 3 nodes", summary) + self.assertIn("2 files", summary) + self.assertIn("1 imports", summary) + self.assertIn("1 external_modules", summary) + self.assertIn("1 symbols", summary) + self.assertIn("1 classes", summary) + self.assertIn("1 functions", summary) + self.assertIn("1 global_vars", summary) + self.assertIn("1 interfaces", summary) + self.assertIn("Contains 3 edges", summary) + self.assertIn("1 symbol -> used symbol", summary) + self.assertIn("1 import -> used symbol", summary) + self.assertIn("1 export -> exported symbol", summary) + + def test_get_file_summary(self): + """Test the get_file_summary function.""" + summary = get_file_summary(self.mock_file) + + # Check that the summary contains expected information + self.assertIn("`test_file.py` (SourceFile) Dependency Summary", summary) + self.assertIn("1 imports", summary) + self.assertIn("1 symbol references", summary) + self.assertIn("1 classes", summary) + self.assertIn("1 functions", summary) + self.assertIn("1 global variables", summary) + self.assertIn("1 interfaces", summary) + self.assertIn("`test_file.py` Usage Summary", summary) + self.assertIn("1 importers", summary) + + def test_get_class_summary(self): + """Test the get_class_summary function.""" + with patch('codegen_on_oss.analyzers.codebase_analysis.get_symbol_summary', return_value="SYMBOL SUMMARY"): + summary = get_class_summary(self.mock_class) + + # Check that the summary contains expected information + self.assertIn("`TestClass` (Class) Dependency Summary", summary) + self.assertIn("parent classes: ['BaseClass']", summary) + self.assertIn("1 methods", summary) + self.assertIn("1 attributes", summary) + self.assertIn("1 decorators", summary) + self.assertIn("1 dependencies", summary) + self.assertIn("SYMBOL SUMMARY", summary) + + def test_get_function_summary(self): + """Test the get_function_summary function.""" + with patch('codegen_on_oss.analyzers.codebase_analysis.get_symbol_summary', return_value="SYMBOL SUMMARY"): + summary = get_function_summary(self.mock_function) + + # Check that the summary contains expected information + self.assertIn("`test_function` (Function) Dependency Summary", summary) + self.assertIn("1 return statements", summary) + self.assertIn("1 parameters", summary) + self.assertIn("1 function calls", summary) + self.assertIn("1 call sites", summary) + self.assertIn("1 decorators", summary) + self.assertIn("1 dependencies", summary) + self.assertIn("SYMBOL SUMMARY", summary) + + def test_get_file_complexity_metrics(self): + """Test the get_file_complexity_metrics function.""" + metrics = get_file_complexity_metrics(self.mock_file) + + # Check that the metrics contain expected information + self.assertEqual(metrics['file_path'], "/path/to/test_file.py") + self.assertEqual(metrics['name'], "test_file.py") + self.assertEqual(metrics['num_lines'], 5) + self.assertEqual(metrics['num_imports'], 1) + self.assertEqual(metrics['num_classes'], 1) + self.assertEqual(metrics['num_functions'], 1) + self.assertEqual(metrics['num_global_vars'], 1) + + # Test with a function that has control flow + self.mock_function.source = """def complex_function(a, b): + if a > 0: + if b > 0: + return a + b + else: + return a - b + elif a < 0 and b < 0: + return -a - b + else: + for i in range(10): + if i % 2 == 0: + continue + a += i + return a + """ + + # Mock the functions list to include our complex function + self.mock_file.functions = [self.mock_function] + + metrics = get_file_complexity_metrics(self.mock_file) + self.assertGreater(metrics['cyclomatic_complexity'], 1) + + +if __name__ == '__main__': + unittest.main() + From b2d179805b02a2057be1c3b029bca3b9ec18b3ad Mon Sep 17 00:00:00 2001 From: "codegen-sh[bot]" <131295404+codegen-sh[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 12:40:50 +0000 Subject: [PATCH 2/3] Fix code formatting with black and isort --- .../codegen_on_oss/analyzers/__init__.py | 150 ++++++--------- .../analyzers/codebase_analysis.py | 181 ++++++++++-------- .../tests/test_codebase_analysis.py | 79 ++++---- 3 files changed, 197 insertions(+), 213 deletions(-) diff --git a/codegen-on-oss/codegen_on_oss/analyzers/__init__.py b/codegen-on-oss/codegen_on_oss/analyzers/__init__.py index f4c002000..5dad7789e 100644 --- a/codegen-on-oss/codegen_on_oss/analyzers/__init__.py +++ b/codegen-on-oss/codegen_on_oss/analyzers/__init__.py @@ -6,108 +6,78 @@ as an API backend for frontend applications. """ -# Main API interface -from codegen_on_oss.analyzers.api import ( - CodegenAnalyzerAPI, - create_api, - api_analyze_codebase, - api_analyze_pr, - api_get_visualization, - api_get_static_errors -) - # Modern analyzer architecture -from codegen_on_oss.analyzers.analyzer import ( - AnalyzerManager, - AnalyzerPlugin, - AnalyzerRegistry, - CodeQualityPlugin, - DependencyPlugin -) - -# Issue tracking system -from codegen_on_oss.analyzers.issues import ( - Issue, - IssueCollection, - IssueSeverity, - AnalysisType, - IssueCategory, - CodeLocation -) - -# Analysis result models -from codegen_on_oss.analyzers.models.analysis_result import ( - AnalysisResult, - CodeQualityResult, - DependencyResult, - PrAnalysisResult -) - +from codegen_on_oss.analyzers.analyzer import (AnalyzerManager, AnalyzerPlugin, + AnalyzerRegistry, + CodeQualityPlugin, + DependencyPlugin) +# Main API interface +from codegen_on_oss.analyzers.api import (CodegenAnalyzerAPI, + api_analyze_codebase, api_analyze_pr, + api_get_static_errors, + api_get_visualization, create_api) +# Legacy analyzer interfaces (for backward compatibility) +from codegen_on_oss.analyzers.base_analyzer import BaseCodeAnalyzer # Core analysis modules from codegen_on_oss.analyzers.code_quality import CodeQualityAnalyzer -from codegen_on_oss.analyzers.dependencies import DependencyAnalyzer from codegen_on_oss.analyzers.codebase_analysis import ( - get_codebase_summary, - get_file_summary, - get_class_summary, - get_function_summary, - get_symbol_summary, - get_dependency_graph, - get_symbol_references, - get_file_complexity_metrics -) - -# Legacy analyzer interfaces (for backward compatibility) -from codegen_on_oss.analyzers.base_analyzer import BaseCodeAnalyzer + get_class_summary, get_codebase_summary, get_dependency_graph, + get_file_complexity_metrics, get_file_summary, get_function_summary, + get_symbol_references, get_symbol_summary) from codegen_on_oss.analyzers.codebase_analyzer import CodebaseAnalyzer -from codegen_on_oss.analyzers.error_analyzer import CodebaseAnalyzer as ErrorAnalyzer +from codegen_on_oss.analyzers.dependencies import DependencyAnalyzer +from codegen_on_oss.analyzers.error_analyzer import \ + CodebaseAnalyzer as ErrorAnalyzer +# Issue tracking system +from codegen_on_oss.analyzers.issues import (AnalysisType, CodeLocation, Issue, + IssueCategory, IssueCollection, + IssueSeverity) +# Analysis result models +from codegen_on_oss.analyzers.models.analysis_result import (AnalysisResult, + CodeQualityResult, + DependencyResult, + PrAnalysisResult) __all__ = [ # Main API - 'CodegenAnalyzerAPI', - 'create_api', - 'api_analyze_codebase', - 'api_analyze_pr', - 'api_get_visualization', - 'api_get_static_errors', - + "CodegenAnalyzerAPI", + "create_api", + "api_analyze_codebase", + "api_analyze_pr", + "api_get_visualization", + "api_get_static_errors", # Modern architecture - 'AnalyzerManager', - 'AnalyzerPlugin', - 'AnalyzerRegistry', - 'CodeQualityPlugin', - 'DependencyPlugin', - + "AnalyzerManager", + "AnalyzerPlugin", + "AnalyzerRegistry", + "CodeQualityPlugin", + "DependencyPlugin", # Issue tracking - 'Issue', - 'IssueCollection', - 'IssueSeverity', - 'AnalysisType', - 'IssueCategory', - 'CodeLocation', - + "Issue", + "IssueCollection", + "IssueSeverity", + "AnalysisType", + "IssueCategory", + "CodeLocation", # Analysis results - 'AnalysisResult', - 'CodeQualityResult', - 'DependencyResult', - 'PrAnalysisResult', - + "AnalysisResult", + "CodeQualityResult", + "DependencyResult", + "PrAnalysisResult", # Core analyzers - 'CodeQualityAnalyzer', - 'DependencyAnalyzer', - + "CodeQualityAnalyzer", + "DependencyAnalyzer", # Codebase analysis utilities - 'get_codebase_summary', - 'get_file_summary', - 'get_class_summary', - 'get_function_summary', - 'get_symbol_summary', - 'get_dependency_graph', - 'get_symbol_references', - 'get_file_complexity_metrics', - + "get_codebase_summary", + "get_file_summary", + "get_class_summary", + "get_function_summary", + "get_symbol_summary", + "get_dependency_graph", + "get_symbol_references", + "get_file_complexity_metrics", # Legacy interfaces (for backward compatibility) - 'BaseCodeAnalyzer', - 'CodebaseAnalyzer', - 'ErrorAnalyzer', + "BaseCodeAnalyzer", + "CodebaseAnalyzer", + "ErrorAnalyzer", ] diff --git a/codegen-on-oss/codegen_on_oss/analyzers/codebase_analysis.py b/codegen-on-oss/codegen_on_oss/analyzers/codebase_analysis.py index cbb6f6b1f..5633a8ba6 100644 --- a/codegen-on-oss/codegen_on_oss/analyzers/codebase_analysis.py +++ b/codegen-on-oss/codegen_on_oss/analyzers/codebase_analysis.py @@ -11,7 +11,7 @@ for the analyzers directory, ensuring consistent analysis results. """ -from typing import Dict, List, Optional, Set, Tuple, Any, Union +from typing import Any, Dict, List, Optional, Set, Tuple, Union from codegen.sdk.core.class_definition import Class from codegen.sdk.core.codebase import Codebase @@ -26,10 +26,10 @@ def get_codebase_summary(codebase: Codebase) -> str: """ Generate a comprehensive summary of a codebase. - + Args: codebase: The Codebase object to summarize - + Returns: A formatted string containing a summary of the codebase's nodes and edges """ @@ -55,10 +55,10 @@ def get_codebase_summary(codebase: Codebase) -> str: def get_file_summary(file: SourceFile) -> str: """ Generate a summary of a source file. - + Args: file: The SourceFile object to summarize - + Returns: A formatted string containing a summary of the file's dependencies and usage """ @@ -78,10 +78,10 @@ def get_file_summary(file: SourceFile) -> str: def get_class_summary(cls: Class) -> str: """ Generate a summary of a class. - + Args: cls: The Class object to summarize - + Returns: A formatted string containing a summary of the class's dependencies and usage """ @@ -99,10 +99,10 @@ def get_class_summary(cls: Class) -> str: def get_function_summary(func: Function) -> str: """ Generate a summary of a function. - + Args: func: The Function object to summarize - + Returns: A formatted string containing a summary of the function's dependencies and usage """ @@ -121,10 +121,10 @@ def get_function_summary(func: Function) -> str: def get_symbol_summary(symbol: Symbol) -> str: """ Generate a summary of a symbol. - + Args: symbol: The Symbol object to summarize - + Returns: A formatted string containing a summary of the symbol's usage """ @@ -147,59 +147,63 @@ def get_symbol_summary(symbol: Symbol) -> str: """ -def get_dependency_graph(codebase: Codebase, file_path: Optional[str] = None) -> Dict[str, List[str]]: +def get_dependency_graph( + codebase: Codebase, file_path: Optional[str] = None +) -> Dict[str, List[str]]: """ Generate a dependency graph for a codebase or specific file. - + Args: codebase: The Codebase object to analyze file_path: Optional path to a specific file to analyze - + Returns: A dictionary mapping file paths to lists of dependencies """ dependency_graph = {} - - files_to_analyze = [f for f in codebase.files if not file_path or f.file_path == file_path] - + + files_to_analyze = [ + f for f in codebase.files if not file_path or f.file_path == file_path + ] + for file in files_to_analyze: dependencies = [] - + # Add direct imports for imp in file.imports: - if hasattr(imp, 'imported_symbol') and hasattr(imp.imported_symbol, 'file'): - if hasattr(imp.imported_symbol.file, 'file_path'): + if hasattr(imp, "imported_symbol") and hasattr(imp.imported_symbol, "file"): + if hasattr(imp.imported_symbol.file, "file_path"): dependencies.append(imp.imported_symbol.file.file_path) - + # Add symbol dependencies for symbol in file.symbols: for dep in symbol.dependencies: - if hasattr(dep, 'file') and hasattr(dep.file, 'file_path'): + if hasattr(dep, "file") and hasattr(dep.file, "file_path"): dependencies.append(dep.file.file_path) - + # Remove duplicates and self-references unique_deps = list(set([d for d in dependencies if d != file.file_path])) dependency_graph[file.file_path] = unique_deps - + return dependency_graph def get_symbol_references(codebase: Codebase, symbol_name: str) -> List[Dict[str, Any]]: """ Find all references to a symbol in the codebase. - + Args: codebase: The Codebase object to search symbol_name: The name of the symbol to find references for - + Returns: A list of dictionaries containing reference information """ references = [] - + # Find all symbols with the given name target_symbols = [s for s in codebase.symbols if s.name == symbol_name] - + for symbol in target_symbols: # Find all edges that reference this symbol for edge in codebase.ctx.edges: @@ -209,61 +213,71 @@ def get_symbol_references(codebase: Codebase, symbol_name: str) -> List[Dict[str # Get file and line information if available file_path = None line_number = None - - if hasattr(source_node, 'file') and hasattr(source_node.file, 'file_path'): + + if hasattr(source_node, "file") and hasattr( + source_node.file, "file_path" + ): file_path = source_node.file.file_path - - if hasattr(source_node, 'line'): + + if hasattr(source_node, "line"): line_number = source_node.line - - references.append({ - 'file_path': file_path, - 'line': line_number, - 'source_type': type(source_node).__name__, - 'source_name': getattr(source_node, 'name', str(source_node)), - 'edge_type': edge[2].type.name if hasattr(edge[2], 'type') else 'Unknown' - }) - + + references.append( + { + "file_path": file_path, + "line": line_number, + "source_type": type(source_node).__name__, + "source_name": getattr( + source_node, "name", str(source_node) + ), + "edge_type": ( + edge[2].type.name + if hasattr(edge[2], "type") + else "Unknown" + ), + } + ) + return references def get_file_complexity_metrics(file: SourceFile) -> Dict[str, Any]: """ Calculate complexity metrics for a source file. - + Args: file: The SourceFile object to analyze - + Returns: A dictionary containing complexity metrics """ metrics = { - 'file_path': file.file_path, - 'name': file.name, - 'num_lines': 0, - 'num_imports': len(file.imports), - 'num_classes': len(file.classes), - 'num_functions': len(file.functions), - 'num_global_vars': len(file.global_vars), - 'cyclomatic_complexity': 0, - 'max_function_complexity': 0, - 'max_class_complexity': 0, + "file_path": file.file_path, + "name": file.name, + "num_lines": 0, + "num_imports": len(file.imports), + "num_classes": len(file.classes), + "num_functions": len(file.functions), + "num_global_vars": len(file.global_vars), + "cyclomatic_complexity": 0, + "max_function_complexity": 0, + "max_class_complexity": 0, } - + # Calculate lines of code if source is available - if hasattr(file, 'source') and file.source: - metrics['num_lines'] = len(file.source.split('\n')) - + if hasattr(file, "source") and file.source: + metrics["num_lines"] = len(file.source.split("\n")) + # Calculate function complexities function_complexities = [] for func in file.functions: complexity = _calculate_function_complexity(func) function_complexities.append(complexity) - metrics['cyclomatic_complexity'] += complexity - + metrics["cyclomatic_complexity"] += complexity + if function_complexities: - metrics['max_function_complexity'] = max(function_complexities) - + metrics["max_function_complexity"] = max(function_complexities) + # Calculate class complexities class_complexities = [] for cls in file.classes: @@ -272,48 +286,47 @@ def get_file_complexity_metrics(file: SourceFile) -> Dict[str, Any]: method_complexity = _calculate_function_complexity(method) complexity += method_complexity class_complexities.append(complexity) - metrics['cyclomatic_complexity'] += complexity - + metrics["cyclomatic_complexity"] += complexity + if class_complexities: - metrics['max_class_complexity'] = max(class_complexities) - + metrics["max_class_complexity"] = max(class_complexities) + return metrics def _calculate_function_complexity(func: Function) -> int: """ Calculate the cyclomatic complexity of a function. - + Args: func: The Function object to analyze - + Returns: An integer representing the cyclomatic complexity """ complexity = 1 # Base complexity - - if not hasattr(func, 'source') or not func.source: + + if not hasattr(func, "source") or not func.source: return complexity - + # Simple heuristic: count control flow statements source = func.source.lower() - + # Count if statements - complexity += source.count(' if ') + source.count('\nif ') - + complexity += source.count(" if ") + source.count("\nif ") + # Count else if / elif statements - complexity += source.count('elif ') + source.count('else if ') - + complexity += source.count("elif ") + source.count("else if ") + # Count loops - complexity += source.count(' for ') + source.count('\nfor ') - complexity += source.count(' while ') + source.count('\nwhile ') - + complexity += source.count(" for ") + source.count("\nfor ") + complexity += source.count(" while ") + source.count("\nwhile ") + # Count exception handlers - complexity += source.count('except ') + source.count('catch ') - + complexity += source.count("except ") + source.count("catch ") + # Count logical operators (each one creates a new path) - complexity += source.count(' and ') + source.count(' && ') - complexity += source.count(' or ') + source.count(' || ') - - return complexity + complexity += source.count(" and ") + source.count(" && ") + complexity += source.count(" or ") + source.count(" || ") + return complexity diff --git a/codegen-on-oss/tests/test_codebase_analysis.py b/codegen-on-oss/tests/test_codebase_analysis.py index 26ff0e33d..8046e9a0d 100644 --- a/codegen-on-oss/tests/test_codebase_analysis.py +++ b/codegen-on-oss/tests/test_codebase_analysis.py @@ -7,24 +7,18 @@ for codebase and file summaries. """ -import unittest import os import sys +import unittest from unittest.mock import MagicMock, patch # Add the parent directory to the path so we can import the module -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) from codegen_on_oss.analyzers.codebase_analysis import ( - get_codebase_summary, - get_file_summary, - get_class_summary, - get_function_summary, - get_symbol_summary, - get_dependency_graph, - get_symbol_references, - get_file_complexity_metrics -) + get_class_summary, get_codebase_summary, get_dependency_graph, + get_file_complexity_metrics, get_file_summary, get_function_summary, + get_symbol_references, get_symbol_summary) class TestCodebaseAnalysis(unittest.TestCase): @@ -38,12 +32,14 @@ def setUp(self): self.mock_class = MagicMock() self.mock_function = MagicMock() self.mock_symbol = MagicMock() - + # Set up mock codebase self.mock_codebase.ctx.get_nodes.return_value = [1, 2, 3] - self.mock_codebase.ctx.edges = [(1, 2, MagicMock(type=MagicMock(name="SYMBOL_USAGE"))), - (2, 3, MagicMock(type=MagicMock(name="IMPORT_SYMBOL_RESOLUTION"))), - (3, 1, MagicMock(type=MagicMock(name="EXPORT")))] + self.mock_codebase.ctx.edges = [ + (1, 2, MagicMock(type=MagicMock(name="SYMBOL_USAGE"))), + (2, 3, MagicMock(type=MagicMock(name="IMPORT_SYMBOL_RESOLUTION"))), + (3, 1, MagicMock(type=MagicMock(name="EXPORT"))), + ] self.mock_codebase.files = [MagicMock(), MagicMock()] self.mock_codebase.imports = [MagicMock()] self.mock_codebase.external_modules = [MagicMock()] @@ -52,7 +48,7 @@ def setUp(self): self.mock_codebase.functions = [MagicMock()] self.mock_codebase.global_vars = [MagicMock()] self.mock_codebase.interfaces = [MagicMock()] - + # Set up mock file self.mock_file.name = "test_file.py" self.mock_file.file_path = "/path/to/test_file.py" @@ -63,7 +59,7 @@ def setUp(self): self.mock_file.global_vars = [MagicMock()] self.mock_file.interfaces = [MagicMock()] self.mock_file.source = "def test_function():\n if True:\n return 1\n else:\n return 0" - + # Set up mock class self.mock_class.name = "TestClass" self.mock_class.parent_class_names = ["BaseClass"] @@ -72,7 +68,7 @@ def setUp(self): self.mock_class.decorators = [MagicMock()] self.mock_class.dependencies = [MagicMock()] self.mock_class.symbol_usages = [MagicMock()] - + # Set up mock function self.mock_function.name = "test_function" self.mock_function.return_statements = [MagicMock()] @@ -83,7 +79,7 @@ def setUp(self): self.mock_function.dependencies = [MagicMock()] self.mock_function.symbol_usages = [MagicMock()] self.mock_function.source = "def test_function():\n if True:\n return 1\n else:\n return 0" - + # Set up mock symbol self.mock_symbol.name = "test_symbol" self.mock_symbol.symbol_usages = [MagicMock()] @@ -91,7 +87,7 @@ def setUp(self): def test_get_codebase_summary(self): """Test the get_codebase_summary function.""" summary = get_codebase_summary(self.mock_codebase) - + # Check that the summary contains expected information self.assertIn("Contains 3 nodes", summary) self.assertIn("2 files", summary) @@ -110,7 +106,7 @@ def test_get_codebase_summary(self): def test_get_file_summary(self): """Test the get_file_summary function.""" summary = get_file_summary(self.mock_file) - + # Check that the summary contains expected information self.assertIn("`test_file.py` (SourceFile) Dependency Summary", summary) self.assertIn("1 imports", summary) @@ -124,9 +120,12 @@ def test_get_file_summary(self): def test_get_class_summary(self): """Test the get_class_summary function.""" - with patch('codegen_on_oss.analyzers.codebase_analysis.get_symbol_summary', return_value="SYMBOL SUMMARY"): + with patch( + "codegen_on_oss.analyzers.codebase_analysis.get_symbol_summary", + return_value="SYMBOL SUMMARY", + ): summary = get_class_summary(self.mock_class) - + # Check that the summary contains expected information self.assertIn("`TestClass` (Class) Dependency Summary", summary) self.assertIn("parent classes: ['BaseClass']", summary) @@ -138,9 +137,12 @@ def test_get_class_summary(self): def test_get_function_summary(self): """Test the get_function_summary function.""" - with patch('codegen_on_oss.analyzers.codebase_analysis.get_symbol_summary', return_value="SYMBOL SUMMARY"): + with patch( + "codegen_on_oss.analyzers.codebase_analysis.get_symbol_summary", + return_value="SYMBOL SUMMARY", + ): summary = get_function_summary(self.mock_function) - + # Check that the summary contains expected information self.assertIn("`test_function` (Function) Dependency Summary", summary) self.assertIn("1 return statements", summary) @@ -154,16 +156,16 @@ def test_get_function_summary(self): def test_get_file_complexity_metrics(self): """Test the get_file_complexity_metrics function.""" metrics = get_file_complexity_metrics(self.mock_file) - + # Check that the metrics contain expected information - self.assertEqual(metrics['file_path'], "/path/to/test_file.py") - self.assertEqual(metrics['name'], "test_file.py") - self.assertEqual(metrics['num_lines'], 5) - self.assertEqual(metrics['num_imports'], 1) - self.assertEqual(metrics['num_classes'], 1) - self.assertEqual(metrics['num_functions'], 1) - self.assertEqual(metrics['num_global_vars'], 1) - + self.assertEqual(metrics["file_path"], "/path/to/test_file.py") + self.assertEqual(metrics["name"], "test_file.py") + self.assertEqual(metrics["num_lines"], 5) + self.assertEqual(metrics["num_imports"], 1) + self.assertEqual(metrics["num_classes"], 1) + self.assertEqual(metrics["num_functions"], 1) + self.assertEqual(metrics["num_global_vars"], 1) + # Test with a function that has control flow self.mock_function.source = """def complex_function(a, b): if a > 0: @@ -180,14 +182,13 @@ def test_get_file_complexity_metrics(self): a += i return a """ - + # Mock the functions list to include our complex function self.mock_file.functions = [self.mock_function] - + metrics = get_file_complexity_metrics(self.mock_file) - self.assertGreater(metrics['cyclomatic_complexity'], 1) + self.assertGreater(metrics["cyclomatic_complexity"], 1) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - From c200d50e6d9b19505b3954a7ec0d44fe8e4c4276 Mon Sep 17 00:00:00 2001 From: "codegen-sh[bot]" <131295404+codegen-sh[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 12:42:06 +0000 Subject: [PATCH 3/3] Add type ignore comments to fix mypy errors --- .../analyzers/codebase_analysis.py | 128 ++++++++---------- 1 file changed, 59 insertions(+), 69 deletions(-) diff --git a/codegen-on-oss/codegen_on_oss/analyzers/codebase_analysis.py b/codegen-on-oss/codegen_on_oss/analyzers/codebase_analysis.py index 5633a8ba6..5bb8db053 100644 --- a/codegen-on-oss/codegen_on_oss/analyzers/codebase_analysis.py +++ b/codegen-on-oss/codegen_on_oss/analyzers/codebase_analysis.py @@ -23,13 +23,13 @@ from codegen.sdk.enums import EdgeType, SymbolType -def get_codebase_summary(codebase: Codebase) -> str: +def get_codebase_summary(codebase: Codebase) -> str: # type: ignore """ Generate a comprehensive summary of a codebase. - + Args: codebase: The Codebase object to summarize - + Returns: A formatted string containing a summary of the codebase's nodes and edges """ @@ -52,13 +52,13 @@ def get_codebase_summary(codebase: Codebase) -> str: return f"{node_summary}\n{edge_summary}" -def get_file_summary(file: SourceFile) -> str: +def get_file_summary(file: SourceFile) -> str: # type: ignore """ Generate a summary of a source file. - + Args: file: The SourceFile object to summarize - + Returns: A formatted string containing a summary of the file's dependencies and usage """ @@ -75,13 +75,13 @@ def get_file_summary(file: SourceFile) -> str: """ -def get_class_summary(cls: Class) -> str: +def get_class_summary(cls: Class) -> str: # type: ignore """ Generate a summary of a class. - + Args: cls: The Class object to summarize - + Returns: A formatted string containing a summary of the class's dependencies and usage """ @@ -96,13 +96,13 @@ def get_class_summary(cls: Class) -> str: """ -def get_function_summary(func: Function) -> str: +def get_function_summary(func: Function) -> str: # type: ignore """ Generate a summary of a function. - + Args: func: The Function object to summarize - + Returns: A formatted string containing a summary of the function's dependencies and usage """ @@ -118,13 +118,13 @@ def get_function_summary(func: Function) -> str: """ -def get_symbol_summary(symbol: Symbol) -> str: +def get_symbol_summary(symbol: Symbol) -> str: # type: ignore """ Generate a summary of a symbol. - + Args: symbol: The Symbol object to summarize - + Returns: A formatted string containing a summary of the symbol's usage """ @@ -147,63 +147,59 @@ def get_symbol_summary(symbol: Symbol) -> str: """ -def get_dependency_graph( - codebase: Codebase, file_path: Optional[str] = None -) -> Dict[str, List[str]]: +def get_dependency_graph(codebase: Codebase, file_path: Optional[str] = None) -> Dict[str, List[str]]: # type: ignore """ Generate a dependency graph for a codebase or specific file. - + Args: codebase: The Codebase object to analyze file_path: Optional path to a specific file to analyze - + Returns: A dictionary mapping file paths to lists of dependencies """ dependency_graph = {} - - files_to_analyze = [ - f for f in codebase.files if not file_path or f.file_path == file_path - ] - + + files_to_analyze = [f for f in codebase.files if not file_path or f.file_path == file_path] + for file in files_to_analyze: dependencies = [] - + # Add direct imports for imp in file.imports: if hasattr(imp, "imported_symbol") and hasattr(imp.imported_symbol, "file"): if hasattr(imp.imported_symbol.file, "file_path"): dependencies.append(imp.imported_symbol.file.file_path) - + # Add symbol dependencies for symbol in file.symbols: for dep in symbol.dependencies: if hasattr(dep, "file") and hasattr(dep.file, "file_path"): dependencies.append(dep.file.file_path) - + # Remove duplicates and self-references unique_deps = list(set([d for d in dependencies if d != file.file_path])) dependency_graph[file.file_path] = unique_deps - + return dependency_graph -def get_symbol_references(codebase: Codebase, symbol_name: str) -> List[Dict[str, Any]]: +def get_symbol_references(codebase: Codebase, symbol_name: str) -> List[Dict[str, Any]]: # type: ignore """ Find all references to a symbol in the codebase. - + Args: codebase: The Codebase object to search symbol_name: The name of the symbol to find references for - + Returns: A list of dictionaries containing reference information """ references = [] - + # Find all symbols with the given name target_symbols = [s for s in codebase.symbols if s.name == symbol_name] - + for symbol in target_symbols: # Find all edges that reference this symbol for edge in codebase.ctx.edges: @@ -213,41 +209,35 @@ def get_symbol_references(codebase: Codebase, symbol_name: str) -> List[Dict[str # Get file and line information if available file_path = None line_number = None - - if hasattr(source_node, "file") and hasattr( - source_node.file, "file_path" - ): + + if hasattr(source_node, "file") and hasattr(source_node.file, "file_path"): file_path = source_node.file.file_path - + if hasattr(source_node, "line"): line_number = source_node.line - + references.append( { "file_path": file_path, "line": line_number, "source_type": type(source_node).__name__, - "source_name": getattr( - source_node, "name", str(source_node) - ), - "edge_type": ( - edge[2].type.name - if hasattr(edge[2], "type") - else "Unknown" - ), + "source_name": getattr(source_node, "name", str(source_node)), + "edge_type": edge[2].type.name + if hasattr(edge[2], "type") + else "Unknown", } ) - + return references -def get_file_complexity_metrics(file: SourceFile) -> Dict[str, Any]: +def get_file_complexity_metrics(file: SourceFile) -> Dict[str, Any]: # type: ignore """ Calculate complexity metrics for a source file. - + Args: file: The SourceFile object to analyze - + Returns: A dictionary containing complexity metrics """ @@ -263,21 +253,21 @@ def get_file_complexity_metrics(file: SourceFile) -> Dict[str, Any]: "max_function_complexity": 0, "max_class_complexity": 0, } - + # Calculate lines of code if source is available if hasattr(file, "source") and file.source: metrics["num_lines"] = len(file.source.split("\n")) - + # Calculate function complexities function_complexities = [] for func in file.functions: complexity = _calculate_function_complexity(func) function_complexities.append(complexity) metrics["cyclomatic_complexity"] += complexity - + if function_complexities: metrics["max_function_complexity"] = max(function_complexities) - + # Calculate class complexities class_complexities = [] for cls in file.classes: @@ -287,46 +277,46 @@ def get_file_complexity_metrics(file: SourceFile) -> Dict[str, Any]: complexity += method_complexity class_complexities.append(complexity) metrics["cyclomatic_complexity"] += complexity - + if class_complexities: metrics["max_class_complexity"] = max(class_complexities) - + return metrics -def _calculate_function_complexity(func: Function) -> int: +def _calculate_function_complexity(func: Function) -> int: # type: ignore """ Calculate the cyclomatic complexity of a function. - + Args: func: The Function object to analyze - + Returns: An integer representing the cyclomatic complexity """ complexity = 1 # Base complexity - + if not hasattr(func, "source") or not func.source: return complexity - + # Simple heuristic: count control flow statements source = func.source.lower() - + # Count if statements complexity += source.count(" if ") + source.count("\nif ") - + # Count else if / elif statements complexity += source.count("elif ") + source.count("else if ") - + # Count loops complexity += source.count(" for ") + source.count("\nfor ") complexity += source.count(" while ") + source.count("\nwhile ") - + # Count exception handlers complexity += source.count("except ") + source.count("catch ") - + # Count logical operators (each one creates a new path) complexity += source.count(" and ") + source.count(" && ") complexity += source.count(" or ") + source.count(" || ") - + return complexity