From afa35896e993063a5924ffa6209339fa312a69c4 Mon Sep 17 00:00:00 2001 From: "codegen-sh[bot]" <131295404+codegen-sh[bot]@users.noreply.github.com> Date: Sat, 22 Mar 2025 00:26:31 +0000 Subject: [PATCH 1/2] Add Code Smell Detection and Automatic Refactoring --- src/codegen/__init__.py | 21 +- src/codegen/cli/cli.py | 2 + .../cli/commands/code_smells/__init__.py | 5 + .../cli/commands/code_smells/code_smells.py | 268 +++++++++ src/codegen/cli/commands/code_smells/main.py | 14 + .../sdk/extensions/code_smells/__init__.py | 29 + .../sdk/extensions/code_smells/detector.py | 543 ++++++++++++++++++ .../sdk/extensions/code_smells/refactorer.py | 496 ++++++++++++++++ .../sdk/extensions/code_smells/smells.py | 127 ++++ 9 files changed, 1504 insertions(+), 1 deletion(-) create mode 100644 src/codegen/cli/commands/code_smells/__init__.py create mode 100644 src/codegen/cli/commands/code_smells/code_smells.py create mode 100644 src/codegen/cli/commands/code_smells/main.py create mode 100644 src/codegen/sdk/extensions/code_smells/__init__.py create mode 100644 src/codegen/sdk/extensions/code_smells/detector.py create mode 100644 src/codegen/sdk/extensions/code_smells/refactorer.py create mode 100644 src/codegen/sdk/extensions/code_smells/smells.py diff --git a/src/codegen/__init__.py b/src/codegen/__init__.py index 1b9b91d17..bd0ee6284 100644 --- a/src/codegen/__init__.py +++ b/src/codegen/__init__.py @@ -6,6 +6,25 @@ # from codegen.extensions.index.file_index import FileIndex # from codegen.extensions.langchain.agent import create_agent_with_tools, create_codebase_agent from codegen.sdk.core.codebase import Codebase +from codegen.sdk.extensions.code_smells.detector import CodeSmellDetector +from codegen.sdk.extensions.code_smells.refactorer import CodeSmellRefactorer +from codegen.sdk.extensions.code_smells.smells import ( + CodeSmell, + CodeSmellCategory, + CodeSmellSeverity, +) from codegen.shared.enums.programming_language import ProgrammingLanguage -__all__ = ["CodeAgent", "Codebase", "CodegenApp", "Function", "ProgrammingLanguage", "function"] +__all__ = [ + "CodeAgent", + "Codebase", + "CodegenApp", + "Function", + "ProgrammingLanguage", + "function", + "CodeSmellDetector", + "CodeSmellRefactorer", + "CodeSmell", + "CodeSmellCategory", + "CodeSmellSeverity", +] diff --git a/src/codegen/cli/cli.py b/src/codegen/cli/cli.py index 9ac7f69f4..66ae20e4d 100644 --- a/src/codegen/cli/cli.py +++ b/src/codegen/cli/cli.py @@ -2,6 +2,7 @@ from rich.traceback import install from codegen.cli.commands.agent.main import agent_command +from codegen.cli.commands.code_smells.main import code_smells_command from codegen.cli.commands.config.main import config_command from codegen.cli.commands.create.main import create_command from codegen.cli.commands.deploy.main import deploy_command @@ -33,6 +34,7 @@ def main(): # Wrap commands with error handler main.add_command(agent_command) +main.add_command(code_smells_command) main.add_command(init_command) main.add_command(logout_command) main.add_command(login_command) diff --git a/src/codegen/cli/commands/code_smells/__init__.py b/src/codegen/cli/commands/code_smells/__init__.py new file mode 100644 index 000000000..0163a228c --- /dev/null +++ b/src/codegen/cli/commands/code_smells/__init__.py @@ -0,0 +1,5 @@ +"""Code smells detection and refactoring CLI commands.""" + +from codegen.cli.commands.code_smells.code_smells import code_smells + +__all__ = ["code_smells"] \ No newline at end of file diff --git a/src/codegen/cli/commands/code_smells/code_smells.py b/src/codegen/cli/commands/code_smells/code_smells.py new file mode 100644 index 000000000..a8da205eb --- /dev/null +++ b/src/codegen/cli/commands/code_smells/code_smells.py @@ -0,0 +1,268 @@ +"""CLI command for detecting and refactoring code smells.""" + +import json +import os +from pathlib import Path +from typing import Dict, List, Optional + +import click +import rich +from rich.console import Console +from rich.panel import Panel +from rich.table import Table +from rich.tree import Tree + +from codegen.cli.sdk.decorator import command +from codegen.sdk.core.codebase import Codebase +from codegen.sdk.extensions.code_smells.detector import CodeSmellDetector, DetectionConfig +from codegen.sdk.extensions.code_smells.refactorer import CodeSmellRefactorer +from codegen.sdk.extensions.code_smells.smells import ( + CodeSmell, + CodeSmellCategory, + CodeSmellSeverity, +) +from codegen.shared.enums.programming_language import ProgrammingLanguage +from codegen.shared.logging.get_logger import get_logger + +logger = get_logger(__name__) +console = Console() + + +@command(help="Detect and refactor code smells in your codebase") +@click.option( + "--path", + "-p", + type=click.Path(exists=True, file_okay=False, dir_okay=True), + default=".", + help="Path to the codebase to analyze", +) +@click.option( + "--language", + "-l", + type=click.Choice(["python", "typescript", "auto"]), + default="auto", + help="Programming language of the codebase", +) +@click.option( + "--severity", + "-s", + type=click.Choice(["low", "medium", "high", "critical", "all"]), + default="all", + help="Minimum severity level of code smells to detect", +) +@click.option( + "--category", + "-c", + type=click.Choice([ + "bloaters", + "object_orientation_abusers", + "change_preventers", + "dispensables", + "couplers", + "all" + ]), + default="all", + help="Category of code smells to detect", +) +@click.option( + "--refactor", + "-r", + is_flag=True, + help="Automatically refactor detected code smells when possible", +) +@click.option( + "--output", + "-o", + type=click.Path(file_okay=True, dir_okay=False), + help="Path to output JSON report", +) +@click.option( + "--long-function-lines", + type=int, + default=50, + help="Threshold for long function detection (lines)", +) +@click.option( + "--long-parameter-list", + type=int, + default=5, + help="Threshold for long parameter list detection", +) +@click.option( + "--duplicate-code-min-lines", + type=int, + default=6, + help="Minimum lines for duplicate code detection", +) +def code_smells( + path: str, + language: str, + severity: str, + category: str, + refactor: bool, + output: Optional[str], + long_function_lines: int, + long_parameter_list: int, + duplicate_code_min_lines: int, +) -> None: + """Detect and optionally refactor code smells in a codebase. + + This command analyzes a codebase for common code smells like long functions, + duplicate code, dead code, etc. It can also automatically refactor some of + these issues. + + Args: + path: Path to the codebase to analyze + language: Programming language of the codebase + severity: Minimum severity level of code smells to detect + category: Category of code smells to detect + refactor: Whether to automatically refactor detected code smells + output: Path to output JSON report + long_function_lines: Threshold for long function detection + long_parameter_list: Threshold for long parameter list detection + duplicate_code_min_lines: Minimum lines for duplicate code detection + """ + # Determine the programming language + prog_language = None + if language != "auto": + prog_language = ProgrammingLanguage(language.upper()) + + # Initialize the codebase + console.print(f"[bold blue]Analyzing codebase at [cyan]{path}[/cyan]...[/bold blue]") + codebase = Codebase(path, language=prog_language) + + # Configure the detector + config = DetectionConfig( + long_function_lines=long_function_lines, + long_parameter_list_threshold=long_parameter_list, + duplicate_code_min_lines=duplicate_code_min_lines, + ) + + # Initialize the detector and refactorer + detector = CodeSmellDetector(codebase, config) + refactorer = CodeSmellRefactorer(codebase) + + # Detect code smells + console.print("[bold blue]Detecting code smells...[/bold blue]") + with console.status("[bold green]Analyzing code...[/bold green]"): + all_smells = detector.detect_all() + + # Filter by severity + if severity != "all": + severity_level = CodeSmellSeverity[severity.upper()] + all_smells = [smell for smell in all_smells if smell.severity.value >= severity_level.value] + + # Filter by category + if category != "all": + category_map = { + "bloaters": CodeSmellCategory.BLOATERS, + "object_orientation_abusers": CodeSmellCategory.OBJECT_ORIENTATION_ABUSERS, + "change_preventers": CodeSmellCategory.CHANGE_PREVENTERS, + "dispensables": CodeSmellCategory.DISPENSABLES, + "couplers": CodeSmellCategory.COUPLERS, + } + category_enum = category_map[category] + all_smells = [smell for smell in all_smells if smell.category == category_enum] + + # Display results + if not all_smells: + console.print("[bold green]No code smells detected![/bold green]") + return + + console.print(f"[bold yellow]Detected {len(all_smells)} code smells:[/bold yellow]") + + # Group by category + smells_by_category: Dict[CodeSmellCategory, List[CodeSmell]] = {} + for smell in all_smells: + if smell.category not in smells_by_category: + smells_by_category[smell.category] = [] + smells_by_category[smell.category].append(smell) + + # Create a tree view of the results + tree = Tree("[bold]Code Smells by Category[/bold]") + for category, smells in smells_by_category.items(): + category_node = tree.add(f"[bold]{category.name}[/bold] ({len(smells)} issues)") + + # Group by severity within each category + smells_by_severity: Dict[CodeSmellSeverity, List[CodeSmell]] = {} + for smell in smells: + if smell.severity not in smells_by_severity: + smells_by_severity[smell.severity] = [] + smells_by_severity[smell.severity].append(smell) + + # Add severity nodes + for severity, severity_smells in sorted( + smells_by_severity.items(), key=lambda x: x[0].value, reverse=True + ): + severity_color = { + CodeSmellSeverity.LOW: "green", + CodeSmellSeverity.MEDIUM: "yellow", + CodeSmellSeverity.HIGH: "orange", + CodeSmellSeverity.CRITICAL: "red", + }[severity] + + severity_node = category_node.add( + f"[bold {severity_color}]{severity.name}[/bold {severity_color}] ({len(severity_smells)} issues)" + ) + + # Add individual smells + for smell in severity_smells: + refactorable = " [bold green](auto-refactorable)[/bold green]" if refactorer.can_refactor(smell) else "" + severity_node.add(f"{smell.symbol.name}: {smell.description}{refactorable}") + + console.print(tree) + + # Refactor if requested + if refactor: + refactorable_smells = [smell for smell in all_smells if refactorer.can_refactor(smell)] + + if not refactorable_smells: + console.print("[bold yellow]No automatically refactorable code smells found.[/bold yellow]") + else: + console.print(f"[bold blue]Refactoring {len(refactorable_smells)} code smells...[/bold blue]") + + with console.status("[bold green]Refactoring code...[/bold green]"): + results = refactorer.refactor_all(refactorable_smells) + + # Display refactoring results + success_count = sum(1 for success in results.values() if success) + console.print(f"[bold green]Successfully refactored {success_count}/{len(results)} code smells.[/bold green]") + + if success_count < len(results): + console.print("[bold yellow]Some refactorings failed. See details below:[/bold yellow]") + for smell, success in results.items(): + if not success: + console.print(f"[bold red]Failed to refactor:[/bold red] {smell}") + + # Output JSON report if requested + if output: + report = { + "summary": { + "total_smells": len(all_smells), + "by_category": {category.name: len(smells) for category, smells in smells_by_category.items()}, + "by_severity": { + severity.name: len([s for s in all_smells if s.severity == severity]) + for severity in CodeSmellSeverity + }, + "refactorable": len([s for s in all_smells if refactorer.can_refactor(s)]), + }, + "smells": [ + { + "name": smell.name, + "description": smell.description, + "category": smell.category.name, + "severity": smell.severity.name, + "symbol": smell.symbol.name, + "file": smell.symbol.file.path if hasattr(smell.symbol, "file") and smell.symbol.file else None, + "refactoring_suggestions": smell.refactoring_suggestions, + "can_auto_refactor": refactorer.can_refactor(smell), + } + for smell in all_smells + ], + } + + # Write the report + with open(output, "w") as f: + json.dump(report, f, indent=2) + + console.print(f"[bold blue]Report written to [cyan]{output}[/cyan][/bold blue]") \ No newline at end of file diff --git a/src/codegen/cli/commands/code_smells/main.py b/src/codegen/cli/commands/code_smells/main.py new file mode 100644 index 000000000..4af114992 --- /dev/null +++ b/src/codegen/cli/commands/code_smells/main.py @@ -0,0 +1,14 @@ +"""Main entry point for the code_smells command.""" + +import click + +from codegen.cli.commands.code_smells import code_smells + + +@click.group(name="code-smells", help="Detect and refactor code smells in your codebase") +def code_smells_command(): + """Detect and refactor code smells in your codebase.""" + pass + + +code_smells_command.add_command(code_smells, name="detect") \ No newline at end of file diff --git a/src/codegen/sdk/extensions/code_smells/__init__.py b/src/codegen/sdk/extensions/code_smells/__init__.py new file mode 100644 index 000000000..82c0bcafa --- /dev/null +++ b/src/codegen/sdk/extensions/code_smells/__init__.py @@ -0,0 +1,29 @@ +"""Code smell detection and refactoring module for Codegen. + +This module provides tools to automatically detect and refactor common code smells +in Python and TypeScript codebases. +""" + +from codegen.sdk.extensions.code_smells.detector import CodeSmellDetector +from codegen.sdk.extensions.code_smells.refactorer import CodeSmellRefactorer +from codegen.sdk.extensions.code_smells.smells import ( + CodeSmell, + DuplicateCode, + LongFunction, + LongParameterList, + DeadCode, + ComplexConditional, + DataClump, +) + +__all__ = [ + "CodeSmellDetector", + "CodeSmellRefactorer", + "CodeSmell", + "DuplicateCode", + "LongFunction", + "LongParameterList", + "DeadCode", + "ComplexConditional", + "DataClump", +] \ No newline at end of file diff --git a/src/codegen/sdk/extensions/code_smells/detector.py b/src/codegen/sdk/extensions/code_smells/detector.py new file mode 100644 index 000000000..371aae1dc --- /dev/null +++ b/src/codegen/sdk/extensions/code_smells/detector.py @@ -0,0 +1,543 @@ +"""Code smell detector for Python and TypeScript codebases.""" + +import re +from collections import defaultdict +from dataclasses import dataclass +from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, cast + +from codegen.sdk.core.class_definition import Class +from codegen.sdk.core.codebase import Codebase +from codegen.sdk.core.file import File, SourceFile +from codegen.sdk.core.function import Function +from codegen.sdk.core.symbol import Symbol +from codegen.sdk.extensions.code_smells.smells import ( + CodeSmell, + CodeSmellCategory, + CodeSmellSeverity, + ComplexConditional, + DataClump, + DeadCode, + DuplicateCode, + LongFunction, + LongParameterList, +) +from codegen.shared.enums.programming_language import ProgrammingLanguage +from codegen.shared.logging.get_logger import get_logger + +logger = get_logger(__name__) + + +@dataclass +class DetectionConfig: + """Configuration for code smell detection.""" + + # Thresholds for different code smells + long_function_lines: int = 50 + long_function_complexity_threshold: int = 15 + long_parameter_list_threshold: int = 5 + duplicate_code_min_lines: int = 6 + duplicate_code_similarity_threshold: float = 0.8 + complex_conditional_depth_threshold: int = 3 + complex_conditional_operators_threshold: int = 4 + data_clump_min_fields: int = 3 + data_clump_min_classes: int = 2 + + # Enable/disable specific detectors + detect_long_functions: bool = True + detect_long_parameter_lists: bool = True + detect_duplicate_code: bool = True + detect_dead_code: bool = True + detect_complex_conditionals: bool = True + detect_data_clumps: bool = True + + +class CodeSmellDetector: + """Detector for common code smells in Python and TypeScript codebases.""" + + def __init__( + self, + codebase: Codebase, + config: Optional[DetectionConfig] = None + ): + """Initialize the code smell detector. + + Args: + codebase: The codebase to analyze + config: Configuration for detection thresholds and enabled detectors + """ + self.codebase = codebase + self.config = config or DetectionConfig() + self._smells: List[CodeSmell] = [] + + # Register detection methods + self._detectors: Dict[str, Callable[[], List[CodeSmell]]] = { + "long_functions": self._detect_long_functions, + "long_parameter_lists": self._detect_long_parameter_lists, + "duplicate_code": self._detect_duplicate_code, + "dead_code": self._detect_dead_code, + "complex_conditionals": self._detect_complex_conditionals, + "data_clumps": self._detect_data_clumps, + } + + def detect_all(self) -> List[CodeSmell]: + """Run all enabled code smell detectors. + + Returns: + A list of all detected code smells + """ + self._smells = [] + + if self.config.detect_long_functions: + self._smells.extend(self._detect_long_functions()) + + if self.config.detect_long_parameter_lists: + self._smells.extend(self._detect_long_parameter_lists()) + + if self.config.detect_duplicate_code: + self._smells.extend(self._detect_duplicate_code()) + + if self.config.detect_dead_code: + self._smells.extend(self._detect_dead_code()) + + if self.config.detect_complex_conditionals: + self._smells.extend(self._detect_complex_conditionals()) + + if self.config.detect_data_clumps: + self._smells.extend(self._detect_data_clumps()) + + return self._smells + + def detect_by_category(self, category: CodeSmellCategory) -> List[CodeSmell]: + """Detect code smells of a specific category. + + Args: + category: The category of code smells to detect + + Returns: + A list of detected code smells in the specified category + """ + all_smells = self.detect_all() + return [smell for smell in all_smells if smell.category == category] + + def detect_by_severity(self, severity: CodeSmellSeverity) -> List[CodeSmell]: + """Detect code smells of a specific severity. + + Args: + severity: The severity level of code smells to detect + + Returns: + A list of detected code smells with the specified severity + """ + all_smells = self.detect_all() + return [smell for smell in all_smells if smell.severity == severity] + + def detect_in_file(self, file: SourceFile) -> List[CodeSmell]: + """Detect code smells in a specific file. + + Args: + file: The file to analyze + + Returns: + A list of detected code smells in the specified file + """ + all_smells = self.detect_all() + return [ + smell for smell in all_smells + if ( + isinstance(smell.symbol, File) and smell.symbol == file + ) or ( + hasattr(smell.symbol, "file") and smell.symbol.file == file + ) + ] + + def _detect_long_functions(self) -> List[CodeSmell]: + """Detect functions that are too long. + + Returns: + A list of LongFunction code smells + """ + smells = [] + + for function in self.codebase.functions: + # Skip functions without a body + if not function.body: + continue + + # Count lines in function body + line_count = len(function.body.split("\n")) + + # Simple complexity metric: count if/for/while statements + complexity = 0 + if function.body: + # Count control flow statements as a simple complexity metric + complexity += function.body.count("if ") + complexity += function.body.count("for ") + complexity += function.body.count("while ") + complexity += function.body.count("except ") + complexity += function.body.count("case ") + + if line_count > self.config.long_function_lines: + severity = CodeSmellSeverity.MEDIUM + if line_count > self.config.long_function_lines * 2: + severity = CodeSmellSeverity.HIGH + + smell = LongFunction( + symbol=function, + severity=severity, + category=CodeSmellCategory.BLOATERS, + description=f"Function is {line_count} lines long (threshold: {self.config.long_function_lines})", + line_count=line_count, + complexity=complexity, + ) + + # Add refactoring suggestions + if complexity > self.config.long_function_complexity_threshold: + smell.refactoring_suggestions.append( + "Extract complex logic into smaller helper functions" + ) + else: + smell.refactoring_suggestions.append( + "Split function into smaller, more focused functions" + ) + + smells.append(smell) + + return smells + + def _detect_long_parameter_lists(self) -> List[CodeSmell]: + """Detect functions with too many parameters. + + Returns: + A list of LongParameterList code smells + """ + smells = [] + + for function in self.codebase.functions: + param_count = len(function.parameters) + + if param_count > self.config.long_parameter_list_threshold: + severity = CodeSmellSeverity.MEDIUM + if param_count > self.config.long_parameter_list_threshold + 3: + severity = CodeSmellSeverity.HIGH + + smell = LongParameterList( + symbol=function, + severity=severity, + category=CodeSmellCategory.BLOATERS, + description=f"Function has {param_count} parameters (threshold: {self.config.long_parameter_list_threshold})", + parameter_count=param_count, + ) + + # Add refactoring suggestions + if self.codebase.language == ProgrammingLanguage.PYTHON: + smell.refactoring_suggestions.append( + "Use a dataclass or named tuple to group related parameters" + ) + elif self.codebase.language in (ProgrammingLanguage.TYPESCRIPT, ProgrammingLanguage.JAVASCRIPT): + smell.refactoring_suggestions.append( + "Use an options object pattern to group related parameters" + ) + + smell.refactoring_suggestions.append( + "Consider if the function is doing too much and should be split" + ) + + smells.append(smell) + + return smells + + def _detect_duplicate_code(self) -> List[CodeSmell]: + """Detect duplicate code across files. + + This is a simplified implementation that looks for exact duplicates. + A more sophisticated implementation would use techniques like: + - Abstract syntax tree comparison + - Normalized token sequence comparison + - Fuzzy matching algorithms + + Returns: + A list of DuplicateCode code smells + """ + smells = [] + min_lines = self.config.duplicate_code_min_lines + + # Extract code blocks from all files (simplified approach) + code_blocks = [] + for file in self.codebase.files: + if not isinstance(file, SourceFile) or not file.content: + continue + + lines = file.content.split("\n") + # Extract blocks of min_lines consecutive lines + for i in range(len(lines) - min_lines + 1): + block = "\n".join(lines[i:i+min_lines]) + # Skip blocks that are too short or just whitespace + if len(block.strip()) < 30: + continue + code_blocks.append((file, i+1, i+min_lines, block)) + + # Find duplicates (exact matches for simplicity) + block_map = defaultdict(list) + for file, start, end, block in code_blocks: + # Normalize whitespace for comparison + normalized = re.sub(r"\s+", " ", block.strip()) + block_map[normalized].append((file, start, end)) + + # Create code smell for each set of duplicates + for normalized_block, locations in block_map.items(): + if len(locations) < 2: + continue + + # First location is the "original" + original_file, original_start, original_end = locations[0] + duplicate_locations = [(loc[0], loc[1], loc[2]) for loc in locations[1:]] + + severity = CodeSmellSeverity.MEDIUM + if len(locations) > 3: + severity = CodeSmellSeverity.HIGH + + smell = DuplicateCode( + symbol=original_file, + severity=severity, + category=CodeSmellCategory.DISPENSABLES, + description=f"Duplicate code found in {len(locations)} locations", + duplicate_locations=duplicate_locations, + similarity_score=1.0, # Exact match + ) + + # Add refactoring suggestions + if self.codebase.language == ProgrammingLanguage.PYTHON: + smell.refactoring_suggestions.append( + "Extract duplicated code into a shared function" + ) + elif self.codebase.language in (ProgrammingLanguage.TYPESCRIPT, ProgrammingLanguage.JAVASCRIPT): + smell.refactoring_suggestions.append( + "Extract duplicated code into a shared utility function" + ) + + smells.append(smell) + + return smells + + def _detect_dead_code(self) -> List[CodeSmell]: + """Detect unused code (functions, classes, variables). + + Returns: + A list of DeadCode code smells + """ + smells = [] + + # Find symbols that are never used + for symbol in self.codebase._symbols(): + # Skip symbols that are likely meant to be public API + if symbol.name.startswith("__") and symbol.name.endswith("__"): + continue + + # Skip symbols that are imported but not defined in this codebase + if not hasattr(symbol, "file") or not symbol.file: + continue + + # Check if the symbol has any usages + if not symbol.usages: + severity = CodeSmellSeverity.LOW + + # Increase severity for larger unused code + if isinstance(symbol, Class) or isinstance(symbol, Function): + if hasattr(symbol, "body") and symbol.body and len(symbol.body.split("\n")) > 20: + severity = CodeSmellSeverity.MEDIUM + + smell = DeadCode( + symbol=symbol, + severity=severity, + category=CodeSmellCategory.DISPENSABLES, + description=f"Unused {symbol.__class__.__name__.lower()} '{symbol.name}'", + ) + + # Add refactoring suggestions + smell.refactoring_suggestions.append( + f"Remove unused {symbol.__class__.__name__.lower()} '{symbol.name}'" + ) + + if isinstance(symbol, Function) and symbol.is_public: + smell.refactoring_suggestions.append( + "If this is part of a public API, document it clearly or mark as deprecated" + ) + + smells.append(smell) + + return smells + + def _detect_complex_conditionals(self) -> List[CodeSmell]: + """Detect overly complex conditional expressions. + + Returns: + A list of ComplexConditional code smells + """ + smells = [] + + for function in self.codebase.functions: + if not function.body: + continue + + # Simple heuristic for conditional complexity + lines = function.body.split("\n") + max_indent = 0 + current_indent = 0 + + # Count boolean operators + boolean_operators = function.body.count(" and ") + function.body.count(" or ") + boolean_operators += function.body.count("&&") + function.body.count("||") + + # Estimate nesting depth + for line in lines: + stripped = line.lstrip() + if not stripped or stripped.startswith(("#", "//", "/*", "*")): + continue + + indent = len(line) - len(stripped) + current_indent = indent + max_indent = max(max_indent, current_indent) + + # Estimate nesting depth (divide by typical indent size) + indent_size = 4 if self.codebase.language == ProgrammingLanguage.PYTHON else 2 + nesting_depth = max_indent // indent_size + + if (nesting_depth > self.config.complex_conditional_depth_threshold or + boolean_operators > self.config.complex_conditional_operators_threshold): + + severity = CodeSmellSeverity.MEDIUM + if nesting_depth > self.config.complex_conditional_depth_threshold + 2: + severity = CodeSmellSeverity.HIGH + + smell = ComplexConditional( + symbol=function, + severity=severity, + category=CodeSmellCategory.CHANGE_PREVENTERS, + description=f"Complex conditionals with nesting depth {nesting_depth} and {boolean_operators} boolean operators", + condition_depth=nesting_depth, + boolean_operators=boolean_operators, + ) + + # Add refactoring suggestions + smell.refactoring_suggestions.append( + "Extract complex conditions into well-named predicate methods" + ) + smell.refactoring_suggestions.append( + "Consider using early returns to reduce nesting" + ) + if boolean_operators > self.config.complex_conditional_operators_threshold: + smell.refactoring_suggestions.append( + "Break complex boolean expressions into smaller, named variables" + ) + + smells.append(smell) + + return smells + + def _detect_data_clumps(self) -> List[CodeSmell]: + """Detect data clumps (same fields appearing in multiple classes). + + Returns: + A list of DataClump code smells + """ + smells = [] + + if not self.codebase.classes: + return smells + + # Build a map of field names to the classes they appear in + field_to_classes: Dict[str, Set[Class]] = defaultdict(set) + + for cls in self.codebase.classes: + # Get fields from class + fields = set() + + # For Python, look at instance variables in __init__ + if self.codebase.language == ProgrammingLanguage.PYTHON: + init_method = next((m for m in cls.methods if m.name == "__init__"), None) + if init_method and init_method.body: + # Simple regex to find self.attr = ... assignments + for match in re.finditer(r"self\.(\w+)\s*=", init_method.body): + fields.add(match.group(1)) + + # For TypeScript, look at class properties + elif self.codebase.language in (ProgrammingLanguage.TYPESCRIPT, ProgrammingLanguage.JAVASCRIPT): + if hasattr(cls, "properties"): + for prop in cls.properties: + fields.add(prop.name) + + # Add fields to the map + for field in fields: + field_to_classes[field].add(cls) + + # Find groups of fields that appear together in multiple classes + field_groups: Dict[frozenset, Set[Class]] = defaultdict(set) + + # Start with individual fields that appear in multiple classes + candidate_fields = {field for field, classes in field_to_classes.items() + if len(classes) >= self.config.data_clump_min_classes} + + # Skip if we don't have enough candidate fields + if len(candidate_fields) < self.config.data_clump_min_fields: + return smells + + # Build field groups (this is a simplified approach) + # A more sophisticated approach would use frequent itemset mining algorithms + for cls in self.codebase.classes: + # Get fields from this class that are in our candidate set + cls_fields = {field for field in candidate_fields + if cls in field_to_classes[field]} + + # Skip if not enough fields + if len(cls_fields) < self.config.data_clump_min_fields: + continue + + # Add all combinations of min_fields fields + for i in range(self.config.data_clump_min_fields, len(cls_fields) + 1): + # This is inefficient for large numbers of fields, but works for demonstration + from itertools import combinations + for combo in combinations(cls_fields, i): + field_group = frozenset(combo) + field_groups[field_group].add(cls) + + # Create code smells for field groups that appear in multiple classes + for field_group, classes in field_groups.items(): + if len(classes) < self.config.data_clump_min_classes: + continue + + # Use the first class as the "primary" class for the smell + primary_class = next(iter(classes)) + other_classes = set(classes) - {primary_class} + + severity = CodeSmellSeverity.MEDIUM + if len(field_group) > self.config.data_clump_min_fields + 2: + severity = CodeSmellSeverity.HIGH + + smell = DataClump( + symbol=primary_class, + severity=severity, + category=CodeSmellCategory.COUPLERS, + description=f"Data clump: fields {', '.join(field_group)} appear together in {len(classes)} classes", + clumped_fields=list(field_group), + appears_in_classes=list(classes), + ) + + # Add refactoring suggestions + if self.codebase.language == ProgrammingLanguage.PYTHON: + smell.refactoring_suggestions.append( + f"Extract fields {', '.join(field_group)} into a new class" + ) + smell.refactoring_suggestions.append( + "Consider using composition instead of duplicating these fields" + ) + elif self.codebase.language in (ProgrammingLanguage.TYPESCRIPT, ProgrammingLanguage.JAVASCRIPT): + smell.refactoring_suggestions.append( + f"Extract fields {', '.join(field_group)} into a new interface or type" + ) + smell.refactoring_suggestions.append( + "Use composition to share this data structure between classes" + ) + + smells.append(smell) + + return smells \ No newline at end of file diff --git a/src/codegen/sdk/extensions/code_smells/refactorer.py b/src/codegen/sdk/extensions/code_smells/refactorer.py new file mode 100644 index 000000000..991558cc1 --- /dev/null +++ b/src/codegen/sdk/extensions/code_smells/refactorer.py @@ -0,0 +1,496 @@ +"""Code smell refactoring tools for Python and TypeScript codebases.""" + +from abc import ABC, abstractmethod +import re +from typing import Dict, List, Optional, Type, cast + +from codegen.sdk.core.class_definition import Class +from codegen.sdk.core.codebase import Codebase +from codegen.sdk.core.file import SourceFile +from codegen.sdk.core.function import Function +from codegen.sdk.core.symbol import Symbol +from codegen.sdk.extensions.code_smells.smells import ( + CodeSmell, + ComplexConditional, + DataClump, + DeadCode, + DuplicateCode, + LongFunction, + LongParameterList, +) +from codegen.shared.enums.programming_language import ProgrammingLanguage +from codegen.shared.logging.get_logger import get_logger + +logger = get_logger(__name__) + + +class RefactoringStrategy(ABC): + """Base class for all refactoring strategies.""" + + @abstractmethod + def can_refactor(self, smell: CodeSmell) -> bool: + """Check if this strategy can refactor the given code smell.""" + pass + + @abstractmethod + def refactor(self, smell: CodeSmell) -> bool: + """Refactor the code smell. + + Returns: + True if refactoring was successful, False otherwise + """ + pass + + +class LongFunctionRefactoring(RefactoringStrategy): + """Strategy for refactoring long functions.""" + + def __init__(self, codebase: Codebase): + """Initialize the refactoring strategy. + + Args: + codebase: The codebase to refactor + """ + self.codebase = codebase + + def can_refactor(self, smell: CodeSmell) -> bool: + """Check if this strategy can refactor the given code smell.""" + return ( + isinstance(smell, LongFunction) and + smell.can_auto_refactor() + ) + + def refactor(self, smell: CodeSmell) -> bool: + """Refactor a long function by extracting parts into helper functions. + + This is a simplified implementation that extracts code blocks based on + indentation patterns. A more sophisticated implementation would: + - Use AST analysis to identify logical blocks + - Analyze variable usage to determine parameters and return values + - Generate appropriate function names based on the extracted code + + Returns: + True if refactoring was successful, False otherwise + """ + if not self.can_refactor(smell): + return False + + long_function = cast(LongFunction, smell) + function = long_function.symbol + + if not function.body: + return False + + # Simple approach: look for blocks with consistent indentation + lines = function.body.split("\n") + + # Find indentation blocks (simplified) + blocks = [] + current_block = [] + current_indent = None + + for line in lines: + if not line.strip(): + current_block.append(line) + continue + + indent = len(line) - len(line.lstrip()) + + if current_indent is None: + current_indent = indent + current_block.append(line) + elif indent == current_indent: + current_block.append(line) + else: + # New indentation level, finish current block if it's substantial + if len(current_block) >= 5: # Only extract blocks of reasonable size + blocks.append((current_indent, current_block)) + current_block = [line] + current_indent = indent + + # Add the last block if it's substantial + if current_block and len(current_block) >= 5: + blocks.append((current_indent, current_block)) + + # Skip if we couldn't identify good extraction candidates + if not blocks: + return False + + # Sort blocks by size (largest first) and extract the largest ones + blocks.sort(key=lambda b: len(b[1]), reverse=True) + + # Extract up to 2 blocks (to avoid over-refactoring) + extracted_count = 0 + new_body_lines = lines.copy() + + for indent, block in blocks[:2]: + # Skip if block is too small after filtering blank lines + content_lines = [l for l in block if l.strip()] + if len(content_lines) < 5: + continue + + # Generate a helper function name based on first line content + first_content_line = next((l for l in block if l.strip()), "") + helper_name = self._generate_helper_name(first_content_line, function.name) + + # Determine the start and end indices in the original function + start_idx = lines.index(block[0]) + end_idx = start_idx + len(block) - 1 + + # Create the helper function + helper_function = self._create_helper_function( + function, helper_name, block, indent + ) + + if helper_function: + # Replace the block with a call to the helper + call_line = " " * indent + f"{helper_name}()" # Simplified, should include params + new_body_lines[start_idx:end_idx+1] = [call_line] + extracted_count += 1 + + if extracted_count == 0: + return False + + # Update the original function body + function.body = "\n".join(new_body_lines) + return True + + def _generate_helper_name(self, first_line: str, parent_name: str) -> str: + """Generate a name for the extracted helper function.""" + # Strip leading whitespace and common prefixes + clean_line = first_line.lstrip() + for prefix in ["if ", "for ", "while ", "try:", "with "]: + if clean_line.startswith(prefix): + clean_line = clean_line[len(prefix):].strip() + break + + # Extract meaningful words (simplified) + words = [] + for word in clean_line.split()[:3]: # Use first 3 words max + # Clean up the word + word = ''.join(c for c in word if c.isalnum()) + if word and not word.isdigit(): + words.append(word.lower()) + + if not words: + return f"_{parent_name}_helper" + + # Combine words into a function name + return f"_{parent_name}_{'_'.join(words)}" + + def _create_helper_function( + self, + parent: Function, + name: str, + block_lines: List[str], + base_indent: int + ) -> Optional[Function]: + """Create a helper function from the extracted block.""" + # Determine the file to add the helper to + if not parent.file: + return None + + # Remove the base indentation from all lines + dedented_lines = [] + for line in block_lines: + if not line.strip(): + dedented_lines.append("") + else: + # Ensure we don't have negative indentation + current_indent = len(line) - len(line.lstrip()) + new_indent = max(0, current_indent - base_indent) + dedented_lines.append(" " * new_indent + line.lstrip()) + + # Create the helper function body + helper_body = "\n".join(dedented_lines) + + # Add the helper function to the file + if self.codebase.language == ProgrammingLanguage.PYTHON: + # For Python, add the helper after the parent function + helper = parent.file.add_function( + name=name, + body=helper_body, + parameters=[], # Simplified, should analyze needed parameters + after=parent + ) + else: + # For other languages, add at the end of the file + helper = parent.file.add_function( + name=name, + body=helper_body, + parameters=[] # Simplified, should analyze needed parameters + ) + + return helper + + +class DeadCodeRefactoring(RefactoringStrategy): + """Strategy for refactoring dead code.""" + + def __init__(self, codebase: Codebase): + """Initialize the refactoring strategy.""" + self.codebase = codebase + + def can_refactor(self, smell: CodeSmell) -> bool: + """Check if this strategy can refactor the given code smell.""" + return isinstance(smell, DeadCode) and smell.can_auto_refactor() + + def refactor(self, smell: CodeSmell) -> bool: + """Refactor dead code by removing it. + + Returns: + True if refactoring was successful, False otherwise + """ + if not self.can_refactor(smell): + return False + + dead_code = cast(DeadCode, smell) + symbol = dead_code.symbol + + # Remove the symbol + try: + symbol.delete() + return True + except Exception as e: + logger.error(f"Failed to remove dead code {symbol.name}: {e}") + return False + + +class LongParameterListRefactoring(RefactoringStrategy): + """Strategy for refactoring functions with long parameter lists.""" + + def __init__(self, codebase: Codebase): + """Initialize the refactoring strategy.""" + self.codebase = codebase + + def can_refactor(self, smell: CodeSmell) -> bool: + """Check if this strategy can refactor the given code smell.""" + return ( + isinstance(smell, LongParameterList) and + smell.can_auto_refactor() + ) + + def refactor(self, smell: CodeSmell) -> bool: + """Refactor a function with a long parameter list by introducing a parameter object. + + This is a simplified implementation that groups parameters into a class/object. + A more sophisticated implementation would: + - Analyze parameter usage to determine logical groupings + - Update all call sites to use the new parameter object + + Returns: + True if refactoring was successful, False otherwise + """ + if not self.can_refactor(smell): + return False + + long_param_list = cast(LongParameterList, smell) + function = long_param_list.symbol + + if not function.parameters or not function.file: + return False + + # Create a name for the parameter object + param_object_name = f"{function.name.title().replace('_', '')}Params" + + # Group parameters (simplified approach) + # In a real implementation, we would analyze parameter usage patterns + # to determine logical groupings + params = function.parameters + + # Skip if we don't have enough parameters to refactor + if len(params) <= 3: + return False + + # Create the parameter object + if self.codebase.language == ProgrammingLanguage.PYTHON: + return self._refactor_python(function, params, param_object_name) + elif self.codebase.language in (ProgrammingLanguage.TYPESCRIPT, ProgrammingLanguage.JAVASCRIPT): + return self._refactor_typescript(function, params, param_object_name) + else: + return False + + def _refactor_python(self, function: Function, params: List, param_object_name: str) -> bool: + """Refactor a Python function with a long parameter list.""" + if not function.file: + return False + + # Create a dataclass for the parameters + dataclass_def = [ + "@dataclass", + f"class {param_object_name}:", + " \"\"\"Parameter object for {function.name}\"\"\"", + ] + + # Add fields for each parameter + for param in params: + # Skip self/cls for methods + if param.name in ("self", "cls"): + continue + + # Add type hint if available + type_hint = f": {param.type_annotation}" if hasattr(param, "type_annotation") and param.type_annotation else "" + default = f" = {param.default}" if hasattr(param, "default") and param.default is not None else "" + dataclass_def.append(f" {param.name}{type_hint}{default}") + + # Add the dataclass to the file + dataclass_body = "\n".join(dataclass_def) + + # Add import for dataclass if needed + has_dataclass_import = any( + "from dataclasses import dataclass" in imp.source + for imp in function.file.imports + if hasattr(imp, "source") + ) + + if not has_dataclass_import: + function.file.add_import("from dataclasses import dataclass") + + # Add the dataclass to the file + function.file.add_class( + name=param_object_name, + body="\n".join(dataclass_def[2:]), # Skip the decorator and class line + decorators=["dataclass"], + before=function + ) + + # Update the function signature + # Keep special parameters like self/cls + special_params = [p for p in params if p.name in ("self", "cls")] + new_params = special_params + [f"params: {param_object_name}"] + + # Update function body to use the params object + if function.body: + body_lines = function.body.split("\n") + for i, line in enumerate(body_lines): + for param in params: + # Skip self/cls + if param.name in ("self", "cls"): + continue + + # Replace parameter references with params.parameter + # This is a simplified approach and might need more sophisticated regex + body_lines[i] = re.sub( + r'\b' + param.name + r'\b', + f"params.{param.name}", + line + ) + + function.body = "\n".join(body_lines) + + # Update the function signature + function.parameters = new_params + + return True + + def _refactor_typescript(self, function: Function, params: List, param_object_name: str) -> bool: + """Refactor a TypeScript function with a long parameter list.""" + if not function.file: + return False + + # Create an interface for the parameters + interface_def = [ + f"interface {param_object_name} {{", + ] + + # Add fields for each parameter + for param in params: + # Add type annotation if available + type_annotation = f": {param.type}" if hasattr(param, "type") and param.type else ": any" + interface_def.append(f" {param.name}{type_annotation};") + + interface_def.append("}") + + # Add the interface to the file + interface_body = "\n".join(interface_def) + function.file.add_interface( + name=param_object_name, + body="\n".join(interface_def[1:-1]), # Skip the interface line and closing brace + before=function + ) + + # Update the function signature + new_params = [f"params: {param_object_name}"] + + # Update function body to use the params object + if function.body: + body_lines = function.body.split("\n") + for i, line in enumerate(body_lines): + for param in params: + # Replace parameter references with params.parameter + body_lines[i] = re.sub( + r'\b' + param.name + r'\b', + f"params.{param.name}", + line + ) + + function.body = "\n".join(body_lines) + + # Update the function signature + function.parameters = new_params + + return True + + +class CodeSmellRefactorer: + """Refactorer for common code smells in Python and TypeScript codebases.""" + + def __init__(self, codebase: Codebase): + """Initialize the code smell refactorer. + + Args: + codebase: The codebase to refactor + """ + self.codebase = codebase + + # Register refactoring strategies + self._strategies: Dict[Type[CodeSmell], RefactoringStrategy] = { + LongFunction: LongFunctionRefactoring(codebase), + DeadCode: DeadCodeRefactoring(codebase), + LongParameterList: LongParameterListRefactoring(codebase), + # Add more strategies as they are implemented + } + + def can_refactor(self, smell: CodeSmell) -> bool: + """Check if a code smell can be automatically refactored. + + Args: + smell: The code smell to check + + Returns: + True if the smell can be refactored, False otherwise + """ + for smell_type, strategy in self._strategies.items(): + if isinstance(smell, smell_type) and strategy.can_refactor(smell): + return True + return False + + def refactor(self, smell: CodeSmell) -> bool: + """Refactor a code smell. + + Args: + smell: The code smell to refactor + + Returns: + True if refactoring was successful, False otherwise + """ + for smell_type, strategy in self._strategies.items(): + if isinstance(smell, smell_type) and strategy.can_refactor(smell): + return strategy.refactor(smell) + return False + + def refactor_all(self, smells: List[CodeSmell]) -> Dict[CodeSmell, bool]: + """Refactor all refactorable code smells. + + Args: + smells: List of code smells to refactor + + Returns: + Dictionary mapping code smells to refactoring success status + """ + results = {} + for smell in smells: + if self.can_refactor(smell): + success = self.refactor(smell) + results[smell] = success + return results \ No newline at end of file diff --git a/src/codegen/sdk/extensions/code_smells/smells.py b/src/codegen/sdk/extensions/code_smells/smells.py new file mode 100644 index 000000000..4bc8c4e4a --- /dev/null +++ b/src/codegen/sdk/extensions/code_smells/smells.py @@ -0,0 +1,127 @@ +"""Code smell definitions for the code smell detector and refactorer.""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from enum import Enum, auto +from typing import Any, Generic, TypeVar + +from codegen.sdk.core.class_definition import Class +from codegen.sdk.core.file import SourceFile +from codegen.sdk.core.function import Function +from codegen.sdk.core.symbol import Symbol + + +class CodeSmellSeverity(Enum): + """Severity levels for code smells.""" + + LOW = auto() + MEDIUM = auto() + HIGH = auto() + CRITICAL = auto() + + +class CodeSmellCategory(Enum): + """Categories of code smells.""" + + BLOATERS = auto() # Code, methods, classes that have grown too large + OBJECT_ORIENTATION_ABUSERS = auto() # Cases where code doesn't follow OO principles + CHANGE_PREVENTERS = auto() # Things that make changing code difficult + DISPENSABLES = auto() # Code that isn't necessary and can be removed + COUPLERS = auto() # Excessive coupling between classes/modules + + +T = TypeVar("T", bound=Symbol) + + +@dataclass +class CodeSmell(Generic[T], ABC): + """Base class for all code smells.""" + + symbol: T + severity: CodeSmellSeverity + category: CodeSmellCategory + description: str = "" + refactoring_suggestions: list[str] = field(default_factory=list) + + @property + def name(self) -> str: + """Get the name of the code smell.""" + return self.__class__.__name__ + + @abstractmethod + def can_auto_refactor(self) -> bool: + """Check if this code smell can be automatically refactored.""" + pass + + def __str__(self) -> str: + """String representation of the code smell.""" + return f"{self.name} ({self.severity.name}) in {self.symbol.name}: {self.description}" + + +@dataclass +class DuplicateCode(CodeSmell[SourceFile]): + """Duplicate code smell - when the same code appears in multiple places.""" + + duplicate_locations: list[tuple[SourceFile, int, int]] = field(default_factory=list) + similarity_score: float = 0.0 + + def can_auto_refactor(self) -> bool: + """Check if this duplicate code can be automatically refactored.""" + return len(self.duplicate_locations) > 0 and self.similarity_score > 0.8 + + +@dataclass +class LongFunction(CodeSmell[Function]): + """Long function smell - when a function is too long and should be split.""" + + line_count: int = 0 + complexity: int = 0 + + def can_auto_refactor(self) -> bool: + """Check if this long function can be automatically refactored.""" + # Long but simple functions are easier to refactor automatically + return self.line_count > 50 and self.complexity < 10 + + +@dataclass +class LongParameterList(CodeSmell[Function]): + """Long parameter list smell - when a function has too many parameters.""" + + parameter_count: int = 0 + + def can_auto_refactor(self) -> bool: + """Check if this long parameter list can be automatically refactored.""" + return self.parameter_count > 5 + + +@dataclass +class DeadCode(CodeSmell[Symbol]): + """Dead code smell - code that is never executed or used.""" + + def can_auto_refactor(self) -> bool: + """Check if this dead code can be automatically refactored.""" + return True # Dead code can usually be safely removed + + +@dataclass +class ComplexConditional(CodeSmell[Function]): + """Complex conditional smell - when conditionals are too complex.""" + + condition_depth: int = 0 + boolean_operators: int = 0 + + def can_auto_refactor(self) -> bool: + """Check if this complex conditional can be automatically refactored.""" + return self.condition_depth <= 3 # Deep nesting is harder to auto-refactor + + +@dataclass +class DataClump(CodeSmell[Class]): + """Data clump smell - when the same group of fields appears in multiple classes.""" + + clumped_fields: list[str] = field(default_factory=list) + appears_in_classes: list[Class] = field(default_factory=list) + + def can_auto_refactor(self) -> bool: + """Check if this data clump can be automatically refactored.""" + return len(self.clumped_fields) >= 3 and len(self.appears_in_classes) >= 2 \ No newline at end of file From 2fab074428e2d80f52b03715ce71fe04dadc8906 Mon Sep 17 00:00:00 2001 From: "codegen-sh[bot]" <131295404+codegen-sh[bot]@users.noreply.github.com> Date: Sat, 22 Mar 2025 00:27:18 +0000 Subject: [PATCH 2/2] Automated pre-commit update --- src/codegen/__init__.py | 10 +- .../cli/commands/code_smells/__init__.py | 2 +- .../cli/commands/code_smells/code_smells.py | 85 ++--- src/codegen/cli/commands/code_smells/main.py | 2 +- .../sdk/extensions/code_smells/__init__.py | 16 +- .../sdk/extensions/code_smells/detector.py | 323 ++++++++---------- .../sdk/extensions/code_smells/refactorer.py | 258 ++++++-------- .../sdk/extensions/code_smells/smells.py | 20 +- 8 files changed, 309 insertions(+), 407 deletions(-) diff --git a/src/codegen/__init__.py b/src/codegen/__init__.py index bd0ee6284..7ac2772a7 100644 --- a/src/codegen/__init__.py +++ b/src/codegen/__init__.py @@ -17,14 +17,14 @@ __all__ = [ "CodeAgent", + "CodeSmell", + "CodeSmellCategory", + "CodeSmellDetector", + "CodeSmellRefactorer", + "CodeSmellSeverity", "Codebase", "CodegenApp", "Function", "ProgrammingLanguage", "function", - "CodeSmellDetector", - "CodeSmellRefactorer", - "CodeSmell", - "CodeSmellCategory", - "CodeSmellSeverity", ] diff --git a/src/codegen/cli/commands/code_smells/__init__.py b/src/codegen/cli/commands/code_smells/__init__.py index 0163a228c..6bd38003b 100644 --- a/src/codegen/cli/commands/code_smells/__init__.py +++ b/src/codegen/cli/commands/code_smells/__init__.py @@ -2,4 +2,4 @@ from codegen.cli.commands.code_smells.code_smells import code_smells -__all__ = ["code_smells"] \ No newline at end of file +__all__ = ["code_smells"] diff --git a/src/codegen/cli/commands/code_smells/code_smells.py b/src/codegen/cli/commands/code_smells/code_smells.py index a8da205eb..37d0512e5 100644 --- a/src/codegen/cli/commands/code_smells/code_smells.py +++ b/src/codegen/cli/commands/code_smells/code_smells.py @@ -1,15 +1,10 @@ """CLI command for detecting and refactoring code smells.""" import json -import os -from pathlib import Path -from typing import Dict, List, Optional +from typing import Optional import click -import rich from rich.console import Console -from rich.panel import Panel -from rich.table import Table from rich.tree import Tree from codegen.cli.sdk.decorator import command @@ -53,14 +48,7 @@ @click.option( "--category", "-c", - type=click.Choice([ - "bloaters", - "object_orientation_abusers", - "change_preventers", - "dispensables", - "couplers", - "all" - ]), + type=click.Choice(["bloaters", "object_orientation_abusers", "change_preventers", "dispensables", "couplers", "all"]), default="all", help="Category of code smells to detect", ) @@ -106,11 +94,11 @@ def code_smells( duplicate_code_min_lines: int, ) -> None: """Detect and optionally refactor code smells in a codebase. - + This command analyzes a codebase for common code smells like long functions, duplicate code, dead code, etc. It can also automatically refactor some of these issues. - + Args: path: Path to the codebase to analyze language: Programming language of the codebase @@ -126,32 +114,32 @@ def code_smells( prog_language = None if language != "auto": prog_language = ProgrammingLanguage(language.upper()) - + # Initialize the codebase console.print(f"[bold blue]Analyzing codebase at [cyan]{path}[/cyan]...[/bold blue]") codebase = Codebase(path, language=prog_language) - + # Configure the detector config = DetectionConfig( long_function_lines=long_function_lines, long_parameter_list_threshold=long_parameter_list, duplicate_code_min_lines=duplicate_code_min_lines, ) - + # Initialize the detector and refactorer detector = CodeSmellDetector(codebase, config) refactorer = CodeSmellRefactorer(codebase) - + # Detect code smells console.print("[bold blue]Detecting code smells...[/bold blue]") with console.status("[bold green]Analyzing code...[/bold green]"): all_smells = detector.detect_all() - + # Filter by severity if severity != "all": severity_level = CodeSmellSeverity[severity.upper()] all_smells = [smell for smell in all_smells if smell.severity.value >= severity_level.value] - + # Filter by category if category != "all": category_map = { @@ -163,87 +151,80 @@ def code_smells( } category_enum = category_map[category] all_smells = [smell for smell in all_smells if smell.category == category_enum] - + # Display results if not all_smells: console.print("[bold green]No code smells detected![/bold green]") return - + console.print(f"[bold yellow]Detected {len(all_smells)} code smells:[/bold yellow]") - + # Group by category - smells_by_category: Dict[CodeSmellCategory, List[CodeSmell]] = {} + smells_by_category: dict[CodeSmellCategory, list[CodeSmell]] = {} for smell in all_smells: if smell.category not in smells_by_category: smells_by_category[smell.category] = [] smells_by_category[smell.category].append(smell) - + # Create a tree view of the results tree = Tree("[bold]Code Smells by Category[/bold]") for category, smells in smells_by_category.items(): category_node = tree.add(f"[bold]{category.name}[/bold] ({len(smells)} issues)") - + # Group by severity within each category - smells_by_severity: Dict[CodeSmellSeverity, List[CodeSmell]] = {} + smells_by_severity: dict[CodeSmellSeverity, list[CodeSmell]] = {} for smell in smells: if smell.severity not in smells_by_severity: smells_by_severity[smell.severity] = [] smells_by_severity[smell.severity].append(smell) - + # Add severity nodes - for severity, severity_smells in sorted( - smells_by_severity.items(), key=lambda x: x[0].value, reverse=True - ): + for severity, severity_smells in sorted(smells_by_severity.items(), key=lambda x: x[0].value, reverse=True): severity_color = { CodeSmellSeverity.LOW: "green", CodeSmellSeverity.MEDIUM: "yellow", CodeSmellSeverity.HIGH: "orange", CodeSmellSeverity.CRITICAL: "red", }[severity] - - severity_node = category_node.add( - f"[bold {severity_color}]{severity.name}[/bold {severity_color}] ({len(severity_smells)} issues)" - ) - + + severity_node = category_node.add(f"[bold {severity_color}]{severity.name}[/bold {severity_color}] ({len(severity_smells)} issues)") + # Add individual smells for smell in severity_smells: refactorable = " [bold green](auto-refactorable)[/bold green]" if refactorer.can_refactor(smell) else "" severity_node.add(f"{smell.symbol.name}: {smell.description}{refactorable}") - + console.print(tree) - + # Refactor if requested if refactor: refactorable_smells = [smell for smell in all_smells if refactorer.can_refactor(smell)] - + if not refactorable_smells: console.print("[bold yellow]No automatically refactorable code smells found.[/bold yellow]") else: console.print(f"[bold blue]Refactoring {len(refactorable_smells)} code smells...[/bold blue]") - + with console.status("[bold green]Refactoring code...[/bold green]"): results = refactorer.refactor_all(refactorable_smells) - + # Display refactoring results success_count = sum(1 for success in results.values() if success) console.print(f"[bold green]Successfully refactored {success_count}/{len(results)} code smells.[/bold green]") - + if success_count < len(results): console.print("[bold yellow]Some refactorings failed. See details below:[/bold yellow]") for smell, success in results.items(): if not success: console.print(f"[bold red]Failed to refactor:[/bold red] {smell}") - + # Output JSON report if requested if output: report = { "summary": { "total_smells": len(all_smells), "by_category": {category.name: len(smells) for category, smells in smells_by_category.items()}, - "by_severity": { - severity.name: len([s for s in all_smells if s.severity == severity]) - for severity in CodeSmellSeverity - }, + "by_severity": {severity.name: len([s for s in all_smells if s.severity == severity]) for severity in CodeSmellSeverity}, "refactorable": len([s for s in all_smells if refactorer.can_refactor(s)]), }, "smells": [ @@ -260,9 +241,9 @@ def code_smells( for smell in all_smells ], } - + # Write the report with open(output, "w") as f: json.dump(report, f, indent=2) - - console.print(f"[bold blue]Report written to [cyan]{output}[/cyan][/bold blue]") \ No newline at end of file + + console.print(f"[bold blue]Report written to [cyan]{output}[/cyan][/bold blue]") diff --git a/src/codegen/cli/commands/code_smells/main.py b/src/codegen/cli/commands/code_smells/main.py index 4af114992..084d6965e 100644 --- a/src/codegen/cli/commands/code_smells/main.py +++ b/src/codegen/cli/commands/code_smells/main.py @@ -11,4 +11,4 @@ def code_smells_command(): pass -code_smells_command.add_command(code_smells, name="detect") \ No newline at end of file +code_smells_command.add_command(code_smells, name="detect") diff --git a/src/codegen/sdk/extensions/code_smells/__init__.py b/src/codegen/sdk/extensions/code_smells/__init__.py index 82c0bcafa..e1c5d59c0 100644 --- a/src/codegen/sdk/extensions/code_smells/__init__.py +++ b/src/codegen/sdk/extensions/code_smells/__init__.py @@ -8,22 +8,22 @@ from codegen.sdk.extensions.code_smells.refactorer import CodeSmellRefactorer from codegen.sdk.extensions.code_smells.smells import ( CodeSmell, + ComplexConditional, + DataClump, + DeadCode, DuplicateCode, LongFunction, LongParameterList, - DeadCode, - ComplexConditional, - DataClump, ) __all__ = [ + "CodeSmell", "CodeSmellDetector", "CodeSmellRefactorer", - "CodeSmell", + "ComplexConditional", + "DataClump", + "DeadCode", "DuplicateCode", "LongFunction", "LongParameterList", - "DeadCode", - "ComplexConditional", - "DataClump", -] \ No newline at end of file +] diff --git a/src/codegen/sdk/extensions/code_smells/detector.py b/src/codegen/sdk/extensions/code_smells/detector.py index 371aae1dc..6e19c8d41 100644 --- a/src/codegen/sdk/extensions/code_smells/detector.py +++ b/src/codegen/sdk/extensions/code_smells/detector.py @@ -3,13 +3,12 @@ import re from collections import defaultdict from dataclasses import dataclass -from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, cast +from typing import Callable, Optional from codegen.sdk.core.class_definition import Class from codegen.sdk.core.codebase import Codebase from codegen.sdk.core.file import File, SourceFile from codegen.sdk.core.function import Function -from codegen.sdk.core.symbol import Symbol from codegen.sdk.extensions.code_smells.smells import ( CodeSmell, CodeSmellCategory, @@ -41,7 +40,7 @@ class DetectionConfig: complex_conditional_operators_threshold: int = 4 data_clump_min_fields: int = 3 data_clump_min_classes: int = 2 - + # Enable/disable specific detectors detect_long_functions: bool = True detect_long_parameter_lists: bool = True @@ -54,23 +53,19 @@ class DetectionConfig: class CodeSmellDetector: """Detector for common code smells in Python and TypeScript codebases.""" - def __init__( - self, - codebase: Codebase, - config: Optional[DetectionConfig] = None - ): + def __init__(self, codebase: Codebase, config: Optional[DetectionConfig] = None): """Initialize the code smell detector. - + Args: codebase: The codebase to analyze config: Configuration for detection thresholds and enabled detectors """ self.codebase = codebase self.config = config or DetectionConfig() - self._smells: List[CodeSmell] = [] - + self._smells: list[CodeSmell] = [] + # Register detection methods - self._detectors: Dict[str, Callable[[], List[CodeSmell]]] = { + self._detectors: dict[str, Callable[[], list[CodeSmell]]] = { "long_functions": self._detect_long_functions, "long_parameter_lists": self._detect_long_parameter_lists, "duplicate_code": self._detect_duplicate_code, @@ -78,94 +73,87 @@ def __init__( "complex_conditionals": self._detect_complex_conditionals, "data_clumps": self._detect_data_clumps, } - - def detect_all(self) -> List[CodeSmell]: + + def detect_all(self) -> list[CodeSmell]: """Run all enabled code smell detectors. - + Returns: A list of all detected code smells """ self._smells = [] - + if self.config.detect_long_functions: self._smells.extend(self._detect_long_functions()) - + if self.config.detect_long_parameter_lists: self._smells.extend(self._detect_long_parameter_lists()) - + if self.config.detect_duplicate_code: self._smells.extend(self._detect_duplicate_code()) - + if self.config.detect_dead_code: self._smells.extend(self._detect_dead_code()) - + if self.config.detect_complex_conditionals: self._smells.extend(self._detect_complex_conditionals()) - + if self.config.detect_data_clumps: self._smells.extend(self._detect_data_clumps()) - + return self._smells - - def detect_by_category(self, category: CodeSmellCategory) -> List[CodeSmell]: + + def detect_by_category(self, category: CodeSmellCategory) -> list[CodeSmell]: """Detect code smells of a specific category. - + Args: category: The category of code smells to detect - + Returns: A list of detected code smells in the specified category """ all_smells = self.detect_all() return [smell for smell in all_smells if smell.category == category] - - def detect_by_severity(self, severity: CodeSmellSeverity) -> List[CodeSmell]: + + def detect_by_severity(self, severity: CodeSmellSeverity) -> list[CodeSmell]: """Detect code smells of a specific severity. - + Args: severity: The severity level of code smells to detect - + Returns: A list of detected code smells with the specified severity """ all_smells = self.detect_all() return [smell for smell in all_smells if smell.severity == severity] - - def detect_in_file(self, file: SourceFile) -> List[CodeSmell]: + + def detect_in_file(self, file: SourceFile) -> list[CodeSmell]: """Detect code smells in a specific file. - + Args: file: The file to analyze - + Returns: A list of detected code smells in the specified file """ all_smells = self.detect_all() - return [ - smell for smell in all_smells - if ( - isinstance(smell.symbol, File) and smell.symbol == file - ) or ( - hasattr(smell.symbol, "file") and smell.symbol.file == file - ) - ] - - def _detect_long_functions(self) -> List[CodeSmell]: + return [smell for smell in all_smells if (isinstance(smell.symbol, File) and smell.symbol == file) or (hasattr(smell.symbol, "file") and smell.symbol.file == file)] + + def _detect_long_functions(self) -> list[CodeSmell]: """Detect functions that are too long. - + Returns: A list of LongFunction code smells """ smells = [] - + for function in self.codebase.functions: # Skip functions without a body if not function.body: continue - + # Count lines in function body line_count = len(function.body.split("\n")) - + # Simple complexity metric: count if/for/while statements complexity = 0 if function.body: @@ -175,12 +163,12 @@ def _detect_long_functions(self) -> List[CodeSmell]: complexity += function.body.count("while ") complexity += function.body.count("except ") complexity += function.body.count("case ") - + if line_count > self.config.long_function_lines: severity = CodeSmellSeverity.MEDIUM if line_count > self.config.long_function_lines * 2: severity = CodeSmellSeverity.HIGH - + smell = LongFunction( symbol=function, severity=severity, @@ -189,37 +177,33 @@ def _detect_long_functions(self) -> List[CodeSmell]: line_count=line_count, complexity=complexity, ) - + # Add refactoring suggestions if complexity > self.config.long_function_complexity_threshold: - smell.refactoring_suggestions.append( - "Extract complex logic into smaller helper functions" - ) + smell.refactoring_suggestions.append("Extract complex logic into smaller helper functions") else: - smell.refactoring_suggestions.append( - "Split function into smaller, more focused functions" - ) - + smell.refactoring_suggestions.append("Split function into smaller, more focused functions") + smells.append(smell) - + return smells - - def _detect_long_parameter_lists(self) -> List[CodeSmell]: + + def _detect_long_parameter_lists(self) -> list[CodeSmell]: """Detect functions with too many parameters. - + Returns: A list of LongParameterList code smells """ smells = [] - + for function in self.codebase.functions: param_count = len(function.parameters) - + if param_count > self.config.long_parameter_list_threshold: severity = CodeSmellSeverity.MEDIUM if param_count > self.config.long_parameter_list_threshold + 3: severity = CodeSmellSeverity.HIGH - + smell = LongParameterList( symbol=function, severity=severity, @@ -227,75 +211,69 @@ def _detect_long_parameter_lists(self) -> List[CodeSmell]: description=f"Function has {param_count} parameters (threshold: {self.config.long_parameter_list_threshold})", parameter_count=param_count, ) - + # Add refactoring suggestions if self.codebase.language == ProgrammingLanguage.PYTHON: - smell.refactoring_suggestions.append( - "Use a dataclass or named tuple to group related parameters" - ) + smell.refactoring_suggestions.append("Use a dataclass or named tuple to group related parameters") elif self.codebase.language in (ProgrammingLanguage.TYPESCRIPT, ProgrammingLanguage.JAVASCRIPT): - smell.refactoring_suggestions.append( - "Use an options object pattern to group related parameters" - ) - - smell.refactoring_suggestions.append( - "Consider if the function is doing too much and should be split" - ) - + smell.refactoring_suggestions.append("Use an options object pattern to group related parameters") + + smell.refactoring_suggestions.append("Consider if the function is doing too much and should be split") + smells.append(smell) - + return smells - - def _detect_duplicate_code(self) -> List[CodeSmell]: + + def _detect_duplicate_code(self) -> list[CodeSmell]: """Detect duplicate code across files. - + This is a simplified implementation that looks for exact duplicates. A more sophisticated implementation would use techniques like: - Abstract syntax tree comparison - Normalized token sequence comparison - Fuzzy matching algorithms - + Returns: A list of DuplicateCode code smells """ smells = [] min_lines = self.config.duplicate_code_min_lines - + # Extract code blocks from all files (simplified approach) code_blocks = [] for file in self.codebase.files: if not isinstance(file, SourceFile) or not file.content: continue - + lines = file.content.split("\n") # Extract blocks of min_lines consecutive lines for i in range(len(lines) - min_lines + 1): - block = "\n".join(lines[i:i+min_lines]) + block = "\n".join(lines[i : i + min_lines]) # Skip blocks that are too short or just whitespace if len(block.strip()) < 30: continue - code_blocks.append((file, i+1, i+min_lines, block)) - + code_blocks.append((file, i + 1, i + min_lines, block)) + # Find duplicates (exact matches for simplicity) block_map = defaultdict(list) for file, start, end, block in code_blocks: # Normalize whitespace for comparison normalized = re.sub(r"\s+", " ", block.strip()) block_map[normalized].append((file, start, end)) - + # Create code smell for each set of duplicates for normalized_block, locations in block_map.items(): if len(locations) < 2: continue - + # First location is the "original" original_file, original_start, original_end = locations[0] duplicate_locations = [(loc[0], loc[1], loc[2]) for loc in locations[1:]] - + severity = CodeSmellSeverity.MEDIUM if len(locations) > 3: severity = CodeSmellSeverity.HIGH - + smell = DuplicateCode( symbol=original_file, severity=severity, @@ -304,111 +282,101 @@ def _detect_duplicate_code(self) -> List[CodeSmell]: duplicate_locations=duplicate_locations, similarity_score=1.0, # Exact match ) - + # Add refactoring suggestions if self.codebase.language == ProgrammingLanguage.PYTHON: - smell.refactoring_suggestions.append( - "Extract duplicated code into a shared function" - ) + smell.refactoring_suggestions.append("Extract duplicated code into a shared function") elif self.codebase.language in (ProgrammingLanguage.TYPESCRIPT, ProgrammingLanguage.JAVASCRIPT): - smell.refactoring_suggestions.append( - "Extract duplicated code into a shared utility function" - ) - + smell.refactoring_suggestions.append("Extract duplicated code into a shared utility function") + smells.append(smell) - + return smells - - def _detect_dead_code(self) -> List[CodeSmell]: + + def _detect_dead_code(self) -> list[CodeSmell]: """Detect unused code (functions, classes, variables). - + Returns: A list of DeadCode code smells """ smells = [] - + # Find symbols that are never used for symbol in self.codebase._symbols(): # Skip symbols that are likely meant to be public API if symbol.name.startswith("__") and symbol.name.endswith("__"): continue - + # Skip symbols that are imported but not defined in this codebase if not hasattr(symbol, "file") or not symbol.file: continue - + # Check if the symbol has any usages if not symbol.usages: severity = CodeSmellSeverity.LOW - + # Increase severity for larger unused code if isinstance(symbol, Class) or isinstance(symbol, Function): if hasattr(symbol, "body") and symbol.body and len(symbol.body.split("\n")) > 20: severity = CodeSmellSeverity.MEDIUM - + smell = DeadCode( symbol=symbol, severity=severity, category=CodeSmellCategory.DISPENSABLES, description=f"Unused {symbol.__class__.__name__.lower()} '{symbol.name}'", ) - + # Add refactoring suggestions - smell.refactoring_suggestions.append( - f"Remove unused {symbol.__class__.__name__.lower()} '{symbol.name}'" - ) - + smell.refactoring_suggestions.append(f"Remove unused {symbol.__class__.__name__.lower()} '{symbol.name}'") + if isinstance(symbol, Function) and symbol.is_public: - smell.refactoring_suggestions.append( - "If this is part of a public API, document it clearly or mark as deprecated" - ) - + smell.refactoring_suggestions.append("If this is part of a public API, document it clearly or mark as deprecated") + smells.append(smell) - + return smells - - def _detect_complex_conditionals(self) -> List[CodeSmell]: + + def _detect_complex_conditionals(self) -> list[CodeSmell]: """Detect overly complex conditional expressions. - + Returns: A list of ComplexConditional code smells """ smells = [] - + for function in self.codebase.functions: if not function.body: continue - + # Simple heuristic for conditional complexity lines = function.body.split("\n") max_indent = 0 current_indent = 0 - + # Count boolean operators boolean_operators = function.body.count(" and ") + function.body.count(" or ") boolean_operators += function.body.count("&&") + function.body.count("||") - + # Estimate nesting depth for line in lines: stripped = line.lstrip() if not stripped or stripped.startswith(("#", "//", "/*", "*")): continue - + indent = len(line) - len(stripped) current_indent = indent max_indent = max(max_indent, current_indent) - + # Estimate nesting depth (divide by typical indent size) indent_size = 4 if self.codebase.language == ProgrammingLanguage.PYTHON else 2 nesting_depth = max_indent // indent_size - - if (nesting_depth > self.config.complex_conditional_depth_threshold or - boolean_operators > self.config.complex_conditional_operators_threshold): - + + if nesting_depth > self.config.complex_conditional_depth_threshold or boolean_operators > self.config.complex_conditional_operators_threshold: severity = CodeSmellSeverity.MEDIUM if nesting_depth > self.config.complex_conditional_depth_threshold + 2: severity = CodeSmellSeverity.HIGH - + smell = ComplexConditional( symbol=function, severity=severity, @@ -417,41 +385,35 @@ def _detect_complex_conditionals(self) -> List[CodeSmell]: condition_depth=nesting_depth, boolean_operators=boolean_operators, ) - + # Add refactoring suggestions - smell.refactoring_suggestions.append( - "Extract complex conditions into well-named predicate methods" - ) - smell.refactoring_suggestions.append( - "Consider using early returns to reduce nesting" - ) + smell.refactoring_suggestions.append("Extract complex conditions into well-named predicate methods") + smell.refactoring_suggestions.append("Consider using early returns to reduce nesting") if boolean_operators > self.config.complex_conditional_operators_threshold: - smell.refactoring_suggestions.append( - "Break complex boolean expressions into smaller, named variables" - ) - + smell.refactoring_suggestions.append("Break complex boolean expressions into smaller, named variables") + smells.append(smell) - + return smells - - def _detect_data_clumps(self) -> List[CodeSmell]: + + def _detect_data_clumps(self) -> list[CodeSmell]: """Detect data clumps (same fields appearing in multiple classes). - + Returns: A list of DataClump code smells """ smells = [] - + if not self.codebase.classes: return smells - + # Build a map of field names to the classes they appear in - field_to_classes: Dict[str, Set[Class]] = defaultdict(set) - + field_to_classes: dict[str, set[Class]] = defaultdict(set) + for cls in self.codebase.classes: # Get fields from class fields = set() - + # For Python, look at instance variables in __init__ if self.codebase.language == ProgrammingLanguage.PYTHON: init_method = next((m for m in cls.methods if m.name == "__init__"), None) @@ -459,60 +421,59 @@ def _detect_data_clumps(self) -> List[CodeSmell]: # Simple regex to find self.attr = ... assignments for match in re.finditer(r"self\.(\w+)\s*=", init_method.body): fields.add(match.group(1)) - + # For TypeScript, look at class properties elif self.codebase.language in (ProgrammingLanguage.TYPESCRIPT, ProgrammingLanguage.JAVASCRIPT): if hasattr(cls, "properties"): for prop in cls.properties: fields.add(prop.name) - + # Add fields to the map for field in fields: field_to_classes[field].add(cls) - + # Find groups of fields that appear together in multiple classes - field_groups: Dict[frozenset, Set[Class]] = defaultdict(set) - + field_groups: dict[frozenset, set[Class]] = defaultdict(set) + # Start with individual fields that appear in multiple classes - candidate_fields = {field for field, classes in field_to_classes.items() - if len(classes) >= self.config.data_clump_min_classes} - + candidate_fields = {field for field, classes in field_to_classes.items() if len(classes) >= self.config.data_clump_min_classes} + # Skip if we don't have enough candidate fields if len(candidate_fields) < self.config.data_clump_min_fields: return smells - + # Build field groups (this is a simplified approach) # A more sophisticated approach would use frequent itemset mining algorithms for cls in self.codebase.classes: # Get fields from this class that are in our candidate set - cls_fields = {field for field in candidate_fields - if cls in field_to_classes[field]} - + cls_fields = {field for field in candidate_fields if cls in field_to_classes[field]} + # Skip if not enough fields if len(cls_fields) < self.config.data_clump_min_fields: continue - + # Add all combinations of min_fields fields for i in range(self.config.data_clump_min_fields, len(cls_fields) + 1): # This is inefficient for large numbers of fields, but works for demonstration from itertools import combinations + for combo in combinations(cls_fields, i): field_group = frozenset(combo) field_groups[field_group].add(cls) - + # Create code smells for field groups that appear in multiple classes for field_group, classes in field_groups.items(): if len(classes) < self.config.data_clump_min_classes: continue - + # Use the first class as the "primary" class for the smell primary_class = next(iter(classes)) other_classes = set(classes) - {primary_class} - + severity = CodeSmellSeverity.MEDIUM if len(field_group) > self.config.data_clump_min_fields + 2: severity = CodeSmellSeverity.HIGH - + smell = DataClump( symbol=primary_class, severity=severity, @@ -521,23 +482,15 @@ def _detect_data_clumps(self) -> List[CodeSmell]: clumped_fields=list(field_group), appears_in_classes=list(classes), ) - + # Add refactoring suggestions if self.codebase.language == ProgrammingLanguage.PYTHON: - smell.refactoring_suggestions.append( - f"Extract fields {', '.join(field_group)} into a new class" - ) - smell.refactoring_suggestions.append( - "Consider using composition instead of duplicating these fields" - ) + smell.refactoring_suggestions.append(f"Extract fields {', '.join(field_group)} into a new class") + smell.refactoring_suggestions.append("Consider using composition instead of duplicating these fields") elif self.codebase.language in (ProgrammingLanguage.TYPESCRIPT, ProgrammingLanguage.JAVASCRIPT): - smell.refactoring_suggestions.append( - f"Extract fields {', '.join(field_group)} into a new interface or type" - ) - smell.refactoring_suggestions.append( - "Use composition to share this data structure between classes" - ) - + smell.refactoring_suggestions.append(f"Extract fields {', '.join(field_group)} into a new interface or type") + smell.refactoring_suggestions.append("Use composition to share this data structure between classes") + smells.append(smell) - - return smells \ No newline at end of file + + return smells diff --git a/src/codegen/sdk/extensions/code_smells/refactorer.py b/src/codegen/sdk/extensions/code_smells/refactorer.py index 991558cc1..4a244e166 100644 --- a/src/codegen/sdk/extensions/code_smells/refactorer.py +++ b/src/codegen/sdk/extensions/code_smells/refactorer.py @@ -1,20 +1,14 @@ """Code smell refactoring tools for Python and TypeScript codebases.""" -from abc import ABC, abstractmethod import re -from typing import Dict, List, Optional, Type, cast +from abc import ABC, abstractmethod +from typing import Optional, cast -from codegen.sdk.core.class_definition import Class from codegen.sdk.core.codebase import Codebase -from codegen.sdk.core.file import SourceFile from codegen.sdk.core.function import Function -from codegen.sdk.core.symbol import Symbol from codegen.sdk.extensions.code_smells.smells import ( CodeSmell, - ComplexConditional, - DataClump, DeadCode, - DuplicateCode, LongFunction, LongParameterList, ) @@ -26,16 +20,16 @@ class RefactoringStrategy(ABC): """Base class for all refactoring strategies.""" - + @abstractmethod def can_refactor(self, smell: CodeSmell) -> bool: """Check if this strategy can refactor the given code smell.""" pass - + @abstractmethod def refactor(self, smell: CodeSmell) -> bool: """Refactor the code smell. - + Returns: True if refactoring was successful, False otherwise """ @@ -44,58 +38,55 @@ def refactor(self, smell: CodeSmell) -> bool: class LongFunctionRefactoring(RefactoringStrategy): """Strategy for refactoring long functions.""" - + def __init__(self, codebase: Codebase): """Initialize the refactoring strategy. - + Args: codebase: The codebase to refactor """ self.codebase = codebase - + def can_refactor(self, smell: CodeSmell) -> bool: """Check if this strategy can refactor the given code smell.""" - return ( - isinstance(smell, LongFunction) and - smell.can_auto_refactor() - ) - + return isinstance(smell, LongFunction) and smell.can_auto_refactor() + def refactor(self, smell: CodeSmell) -> bool: """Refactor a long function by extracting parts into helper functions. - + This is a simplified implementation that extracts code blocks based on indentation patterns. A more sophisticated implementation would: - Use AST analysis to identify logical blocks - Analyze variable usage to determine parameters and return values - Generate appropriate function names based on the extracted code - + Returns: True if refactoring was successful, False otherwise """ if not self.can_refactor(smell): return False - + long_function = cast(LongFunction, smell) function = long_function.symbol - + if not function.body: return False - + # Simple approach: look for blocks with consistent indentation lines = function.body.split("\n") - + # Find indentation blocks (simplified) blocks = [] current_block = [] current_indent = None - + for line in lines: if not line.strip(): current_block.append(line) continue - + indent = len(line) - len(line.lstrip()) - + if current_indent is None: current_indent = indent current_block.append(line) @@ -107,89 +98,81 @@ def refactor(self, smell: CodeSmell) -> bool: blocks.append((current_indent, current_block)) current_block = [line] current_indent = indent - + # Add the last block if it's substantial if current_block and len(current_block) >= 5: blocks.append((current_indent, current_block)) - + # Skip if we couldn't identify good extraction candidates if not blocks: return False - + # Sort blocks by size (largest first) and extract the largest ones blocks.sort(key=lambda b: len(b[1]), reverse=True) - + # Extract up to 2 blocks (to avoid over-refactoring) extracted_count = 0 new_body_lines = lines.copy() - + for indent, block in blocks[:2]: # Skip if block is too small after filtering blank lines content_lines = [l for l in block if l.strip()] if len(content_lines) < 5: continue - + # Generate a helper function name based on first line content first_content_line = next((l for l in block if l.strip()), "") helper_name = self._generate_helper_name(first_content_line, function.name) - + # Determine the start and end indices in the original function start_idx = lines.index(block[0]) end_idx = start_idx + len(block) - 1 - + # Create the helper function - helper_function = self._create_helper_function( - function, helper_name, block, indent - ) - + helper_function = self._create_helper_function(function, helper_name, block, indent) + if helper_function: # Replace the block with a call to the helper call_line = " " * indent + f"{helper_name}()" # Simplified, should include params - new_body_lines[start_idx:end_idx+1] = [call_line] + new_body_lines[start_idx : end_idx + 1] = [call_line] extracted_count += 1 - + if extracted_count == 0: return False - + # Update the original function body function.body = "\n".join(new_body_lines) return True - + def _generate_helper_name(self, first_line: str, parent_name: str) -> str: """Generate a name for the extracted helper function.""" # Strip leading whitespace and common prefixes clean_line = first_line.lstrip() for prefix in ["if ", "for ", "while ", "try:", "with "]: if clean_line.startswith(prefix): - clean_line = clean_line[len(prefix):].strip() + clean_line = clean_line[len(prefix) :].strip() break - + # Extract meaningful words (simplified) words = [] for word in clean_line.split()[:3]: # Use first 3 words max # Clean up the word - word = ''.join(c for c in word if c.isalnum()) + word = "".join(c for c in word if c.isalnum()) if word and not word.isdigit(): words.append(word.lower()) - + if not words: return f"_{parent_name}_helper" - + # Combine words into a function name return f"_{parent_name}_{'_'.join(words)}" - - def _create_helper_function( - self, - parent: Function, - name: str, - block_lines: List[str], - base_indent: int - ) -> Optional[Function]: + + def _create_helper_function(self, parent: Function, name: str, block_lines: list[str], base_indent: int) -> Optional[Function]: """Create a helper function from the extracted block.""" # Determine the file to add the helper to if not parent.file: return None - + # Remove the base indentation from all lines dedented_lines = [] for line in block_lines: @@ -200,10 +183,10 @@ def _create_helper_function( current_indent = len(line) - len(line.lstrip()) new_indent = max(0, current_indent - base_indent) dedented_lines.append(" " * new_indent + line.lstrip()) - + # Create the helper function body helper_body = "\n".join(dedented_lines) - + # Add the helper function to the file if self.codebase.language == ProgrammingLanguage.PYTHON: # For Python, add the helper after the parent function @@ -211,97 +194,94 @@ def _create_helper_function( name=name, body=helper_body, parameters=[], # Simplified, should analyze needed parameters - after=parent + after=parent, ) else: # For other languages, add at the end of the file helper = parent.file.add_function( name=name, body=helper_body, - parameters=[] # Simplified, should analyze needed parameters + parameters=[], # Simplified, should analyze needed parameters ) - + return helper class DeadCodeRefactoring(RefactoringStrategy): """Strategy for refactoring dead code.""" - + def __init__(self, codebase: Codebase): """Initialize the refactoring strategy.""" self.codebase = codebase - + def can_refactor(self, smell: CodeSmell) -> bool: """Check if this strategy can refactor the given code smell.""" return isinstance(smell, DeadCode) and smell.can_auto_refactor() - + def refactor(self, smell: CodeSmell) -> bool: """Refactor dead code by removing it. - + Returns: True if refactoring was successful, False otherwise """ if not self.can_refactor(smell): return False - + dead_code = cast(DeadCode, smell) symbol = dead_code.symbol - + # Remove the symbol try: symbol.delete() return True except Exception as e: - logger.error(f"Failed to remove dead code {symbol.name}: {e}") + logger.exception(f"Failed to remove dead code {symbol.name}: {e}") return False class LongParameterListRefactoring(RefactoringStrategy): """Strategy for refactoring functions with long parameter lists.""" - + def __init__(self, codebase: Codebase): """Initialize the refactoring strategy.""" self.codebase = codebase - + def can_refactor(self, smell: CodeSmell) -> bool: """Check if this strategy can refactor the given code smell.""" - return ( - isinstance(smell, LongParameterList) and - smell.can_auto_refactor() - ) - + return isinstance(smell, LongParameterList) and smell.can_auto_refactor() + def refactor(self, smell: CodeSmell) -> bool: """Refactor a function with a long parameter list by introducing a parameter object. - + This is a simplified implementation that groups parameters into a class/object. A more sophisticated implementation would: - Analyze parameter usage to determine logical groupings - Update all call sites to use the new parameter object - + Returns: True if refactoring was successful, False otherwise """ if not self.can_refactor(smell): return False - + long_param_list = cast(LongParameterList, smell) function = long_param_list.symbol - + if not function.parameters or not function.file: return False - + # Create a name for the parameter object param_object_name = f"{function.name.title().replace('_', '')}Params" - + # Group parameters (simplified approach) # In a real implementation, we would analyze parameter usage patterns # to determine logical groupings params = function.parameters - + # Skip if we don't have enough parameters to refactor if len(params) <= 3: return False - + # Create the parameter object if self.codebase.language == ProgrammingLanguage.PYTHON: return self._refactor_python(function, params, param_object_name) @@ -309,56 +289,52 @@ def refactor(self, smell: CodeSmell) -> bool: return self._refactor_typescript(function, params, param_object_name) else: return False - - def _refactor_python(self, function: Function, params: List, param_object_name: str) -> bool: + + def _refactor_python(self, function: Function, params: list, param_object_name: str) -> bool: """Refactor a Python function with a long parameter list.""" if not function.file: return False - + # Create a dataclass for the parameters dataclass_def = [ "@dataclass", f"class {param_object_name}:", - " \"\"\"Parameter object for {function.name}\"\"\"", + ' """Parameter object for {function.name}"""', ] - + # Add fields for each parameter for param in params: # Skip self/cls for methods if param.name in ("self", "cls"): continue - + # Add type hint if available type_hint = f": {param.type_annotation}" if hasattr(param, "type_annotation") and param.type_annotation else "" default = f" = {param.default}" if hasattr(param, "default") and param.default is not None else "" dataclass_def.append(f" {param.name}{type_hint}{default}") - + # Add the dataclass to the file dataclass_body = "\n".join(dataclass_def) - + # Add import for dataclass if needed - has_dataclass_import = any( - "from dataclasses import dataclass" in imp.source - for imp in function.file.imports - if hasattr(imp, "source") - ) - + has_dataclass_import = any("from dataclasses import dataclass" in imp.source for imp in function.file.imports if hasattr(imp, "source")) + if not has_dataclass_import: function.file.add_import("from dataclasses import dataclass") - + # Add the dataclass to the file function.file.add_class( name=param_object_name, body="\n".join(dataclass_def[2:]), # Skip the decorator and class line decorators=["dataclass"], - before=function + before=function, ) - + # Update the function signature # Keep special parameters like self/cls special_params = [p for p in params if p.name in ("self", "cls")] - new_params = special_params + [f"params: {param_object_name}"] - + new_params = [*special_params, f"params: {param_object_name}"] + # Update function body to use the params object if function.body: body_lines = function.body.split("\n") @@ -367,96 +343,88 @@ def _refactor_python(self, function: Function, params: List, param_object_name: # Skip self/cls if param.name in ("self", "cls"): continue - + # Replace parameter references with params.parameter # This is a simplified approach and might need more sophisticated regex - body_lines[i] = re.sub( - r'\b' + param.name + r'\b', - f"params.{param.name}", - line - ) - + body_lines[i] = re.sub(r"\b" + param.name + r"\b", f"params.{param.name}", line) + function.body = "\n".join(body_lines) - + # Update the function signature function.parameters = new_params - + return True - - def _refactor_typescript(self, function: Function, params: List, param_object_name: str) -> bool: + + def _refactor_typescript(self, function: Function, params: list, param_object_name: str) -> bool: """Refactor a TypeScript function with a long parameter list.""" if not function.file: return False - + # Create an interface for the parameters interface_def = [ f"interface {param_object_name} {{", ] - + # Add fields for each parameter for param in params: # Add type annotation if available type_annotation = f": {param.type}" if hasattr(param, "type") and param.type else ": any" interface_def.append(f" {param.name}{type_annotation};") - + interface_def.append("}") - + # Add the interface to the file interface_body = "\n".join(interface_def) function.file.add_interface( name=param_object_name, body="\n".join(interface_def[1:-1]), # Skip the interface line and closing brace - before=function + before=function, ) - + # Update the function signature new_params = [f"params: {param_object_name}"] - + # Update function body to use the params object if function.body: body_lines = function.body.split("\n") for i, line in enumerate(body_lines): for param in params: # Replace parameter references with params.parameter - body_lines[i] = re.sub( - r'\b' + param.name + r'\b', - f"params.{param.name}", - line - ) - + body_lines[i] = re.sub(r"\b" + param.name + r"\b", f"params.{param.name}", line) + function.body = "\n".join(body_lines) - + # Update the function signature function.parameters = new_params - + return True class CodeSmellRefactorer: """Refactorer for common code smells in Python and TypeScript codebases.""" - + def __init__(self, codebase: Codebase): """Initialize the code smell refactorer. - + Args: codebase: The codebase to refactor """ self.codebase = codebase - + # Register refactoring strategies - self._strategies: Dict[Type[CodeSmell], RefactoringStrategy] = { + self._strategies: dict[type[CodeSmell], RefactoringStrategy] = { LongFunction: LongFunctionRefactoring(codebase), DeadCode: DeadCodeRefactoring(codebase), LongParameterList: LongParameterListRefactoring(codebase), # Add more strategies as they are implemented } - + def can_refactor(self, smell: CodeSmell) -> bool: """Check if a code smell can be automatically refactored. - + Args: smell: The code smell to check - + Returns: True if the smell can be refactored, False otherwise """ @@ -464,13 +432,13 @@ def can_refactor(self, smell: CodeSmell) -> bool: if isinstance(smell, smell_type) and strategy.can_refactor(smell): return True return False - + def refactor(self, smell: CodeSmell) -> bool: """Refactor a code smell. - + Args: smell: The code smell to refactor - + Returns: True if refactoring was successful, False otherwise """ @@ -478,13 +446,13 @@ def refactor(self, smell: CodeSmell) -> bool: if isinstance(smell, smell_type) and strategy.can_refactor(smell): return strategy.refactor(smell) return False - - def refactor_all(self, smells: List[CodeSmell]) -> Dict[CodeSmell, bool]: + + def refactor_all(self, smells: list[CodeSmell]) -> dict[CodeSmell, bool]: """Refactor all refactorable code smells. - + Args: smells: List of code smells to refactor - + Returns: Dictionary mapping code smells to refactoring success status """ @@ -493,4 +461,4 @@ def refactor_all(self, smells: List[CodeSmell]) -> Dict[CodeSmell, bool]: if self.can_refactor(smell): success = self.refactor(smell) results[smell] = success - return results \ No newline at end of file + return results diff --git a/src/codegen/sdk/extensions/code_smells/smells.py b/src/codegen/sdk/extensions/code_smells/smells.py index 4bc8c4e4a..b7ac3b5f9 100644 --- a/src/codegen/sdk/extensions/code_smells/smells.py +++ b/src/codegen/sdk/extensions/code_smells/smells.py @@ -3,7 +3,7 @@ from abc import ABC, abstractmethod from dataclasses import dataclass, field from enum import Enum, auto -from typing import Any, Generic, TypeVar +from typing import Generic, TypeVar from codegen.sdk.core.class_definition import Class from codegen.sdk.core.file import SourceFile @@ -42,17 +42,17 @@ class CodeSmell(Generic[T], ABC): category: CodeSmellCategory description: str = "" refactoring_suggestions: list[str] = field(default_factory=list) - + @property def name(self) -> str: """Get the name of the code smell.""" return self.__class__.__name__ - + @abstractmethod def can_auto_refactor(self) -> bool: """Check if this code smell can be automatically refactored.""" pass - + def __str__(self) -> str: """String representation of the code smell.""" return f"{self.name} ({self.severity.name}) in {self.symbol.name}: {self.description}" @@ -64,7 +64,7 @@ class DuplicateCode(CodeSmell[SourceFile]): duplicate_locations: list[tuple[SourceFile, int, int]] = field(default_factory=list) similarity_score: float = 0.0 - + def can_auto_refactor(self) -> bool: """Check if this duplicate code can be automatically refactored.""" return len(self.duplicate_locations) > 0 and self.similarity_score > 0.8 @@ -76,7 +76,7 @@ class LongFunction(CodeSmell[Function]): line_count: int = 0 complexity: int = 0 - + def can_auto_refactor(self) -> bool: """Check if this long function can be automatically refactored.""" # Long but simple functions are easier to refactor automatically @@ -88,7 +88,7 @@ class LongParameterList(CodeSmell[Function]): """Long parameter list smell - when a function has too many parameters.""" parameter_count: int = 0 - + def can_auto_refactor(self) -> bool: """Check if this long parameter list can be automatically refactored.""" return self.parameter_count > 5 @@ -109,7 +109,7 @@ class ComplexConditional(CodeSmell[Function]): condition_depth: int = 0 boolean_operators: int = 0 - + def can_auto_refactor(self) -> bool: """Check if this complex conditional can be automatically refactored.""" return self.condition_depth <= 3 # Deep nesting is harder to auto-refactor @@ -121,7 +121,7 @@ class DataClump(CodeSmell[Class]): clumped_fields: list[str] = field(default_factory=list) appears_in_classes: list[Class] = field(default_factory=list) - + def can_auto_refactor(self) -> bool: """Check if this data clump can be automatically refactored.""" - return len(self.clumped_fields) >= 3 and len(self.appears_in_classes) >= 2 \ No newline at end of file + return len(self.clumped_fields) >= 3 and len(self.appears_in_classes) >= 2