|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Complexity Estimator CLI |
| 4 | +
|
| 5 | +This script estimates the time complexity (Big-O) of a Python function by running |
| 6 | +it with increasing input sizes and curve-fitting the execution times. |
| 7 | +
|
| 8 | +Usage: |
| 9 | + python scripts/estimate_complexity.py <module_path> <function_name> |
| 10 | +
|
| 11 | +Example: |
| 12 | + python scripts/estimate_complexity.py my_script my_sorting_function |
| 13 | +""" |
| 14 | + |
| 15 | +import sys |
| 16 | +import time |
| 17 | +import importlib |
| 18 | +import math |
| 19 | +import statistics |
| 20 | +import inspect |
| 21 | +import typing |
| 22 | +from pathlib import Path |
| 23 | + |
| 24 | +# Add current directory to path so we can import local modules |
| 25 | +sys.path.insert(0, str(Path.cwd())) |
| 26 | + |
| 27 | + |
| 28 | +def measure_execution_time(func, input_size, iterations=5): |
| 29 | + """ |
| 30 | + Measure the average execution time of func(input_sized_data). |
| 31 | + Uses type hints to determine whether to pass 'n' (int) or data of size 'n'. |
| 32 | + """ |
| 33 | + input_data = None |
| 34 | + |
| 35 | + # 1. Check type hints |
| 36 | + try: |
| 37 | + sig = inspect.signature(func) |
| 38 | + params = list(sig.parameters.values()) |
| 39 | + if params: |
| 40 | + first_param = params[0] |
| 41 | + hint = first_param.annotation |
| 42 | + |
| 43 | + if hint is int: |
| 44 | + input_data = input_size |
| 45 | + elif hint in (list, typing.List, typing.Sequence): |
| 46 | + # Simple list generation |
| 47 | + input_data = list(range(input_size)) |
| 48 | + # Handle generic aliases like list[int] in newer Python |
| 49 | + elif hasattr(hint, "__origin__") and hint.__origin__ in (list, typing.List, typing.Sequence): |
| 50 | + input_data = list(range(input_size)) |
| 51 | + except (ValueError, TypeError): |
| 52 | + # Signature inspection failed or function is weird |
| 53 | + pass |
| 54 | + |
| 55 | + # 2. Heuristic fallback logic |
| 56 | + if input_data is None: |
| 57 | + return _measure_heuristic(func, input_size, iterations) |
| 58 | + |
| 59 | + # 3. Execution with determined input |
| 60 | + try: |
| 61 | + start_time = time.perf_counter() |
| 62 | + for _ in range(iterations): |
| 63 | + func(input_data) |
| 64 | + end_time = time.perf_counter() |
| 65 | + return (end_time - start_time) / iterations |
| 66 | + except Exception as e: |
| 67 | + # If specific input failed, maybe try heuristic as last resort? |
| 68 | + # But for now, just report error to avoid infinite fallback loops. |
| 69 | + # print(f"Error with generated input: {e}") |
| 70 | + return None |
| 71 | + |
| 72 | +def _measure_heuristic(func, input_size, iterations): |
| 73 | + """Fallback: Try int first, then list.""" |
| 74 | + try: |
| 75 | + # Try passing integer N |
| 76 | + start_time = time.perf_counter() |
| 77 | + for _ in range(iterations): |
| 78 | + func(input_size) |
| 79 | + end_time = time.perf_counter() |
| 80 | + return (end_time - start_time) / iterations |
| 81 | + except TypeError: |
| 82 | + # Try passing list of size N |
| 83 | + data = list(range(input_size)) |
| 84 | + start_time = time.perf_counter() |
| 85 | + for _ in range(iterations): |
| 86 | + func(data) |
| 87 | + end_time = time.perf_counter() |
| 88 | + return (end_time - start_time) / iterations |
| 89 | + except Exception: |
| 90 | + return None |
| 91 | + |
| 92 | +def detect_complexity(n_values, times): |
| 93 | + """ |
| 94 | + Estimate complexity by comparing RSquared values for different models. |
| 95 | + Simplified approach: Normalize data and check correlation with theoretical curves. |
| 96 | + """ |
| 97 | + if len(times) < 3: |
| 98 | + return "Insufficient Data" |
| 99 | + |
| 100 | + # Normalize times |
| 101 | + min_time = min(times) |
| 102 | + if min_time == 0: min_time = 1e-9 |
| 103 | + normalized_times = [t / min_time for t in times] |
| 104 | + |
| 105 | + models = { |
| 106 | + "O(1) (Constant)": [1 for _ in n_values], |
| 107 | + "O(log n) (Logarithmic)": [math.log(n) if n > 0 else 0 for n in n_values], |
| 108 | + "O(n) (Linear)": [n for n in n_values], |
| 109 | + "O(n log n) (Linearithmic)": [n * math.log(n) if n > 0 else 0 for n in n_values], |
| 110 | + "O(n^2) (Quadratic)": [n**2 for n in n_values], |
| 111 | + } |
| 112 | + |
| 113 | + best_fit = None |
| 114 | + best_score = -float('inf') |
| 115 | + |
| 116 | + for name, theoretical in models.items(): |
| 117 | + # Calculate correlation coefficient (Pearson) |
| 118 | + try: |
| 119 | + if len(set(theoretical)) == 1: # Handle constant case |
| 120 | + # For constant time, we check variance of times |
| 121 | + score = 1.0 / (statistics.stdev(normalized_times) + 1.0) |
| 122 | + else: |
| 123 | + # Correlation between theoretical and actual |
| 124 | + # Using covariance / (std_dev_x * std_dev_y) |
| 125 | + correlation = statistics.correlation(theoretical, times) |
| 126 | + score = correlation |
| 127 | + |
| 128 | + if score > best_score: |
| 129 | + best_score = score |
| 130 | + best_fit = name |
| 131 | + except statistics.StatisticsError: |
| 132 | + continue |
| 133 | + |
| 134 | + return best_fit, best_score |
| 135 | + |
| 136 | +def main(): |
| 137 | + if len(sys.argv) < 3: |
| 138 | + print(__doc__) |
| 139 | + sys.exit(1) |
| 140 | + |
| 141 | + module_name = sys.argv[1] |
| 142 | + func_name = sys.argv[2] |
| 143 | + |
| 144 | + try: |
| 145 | + module = importlib.import_module(module_name) |
| 146 | + func = getattr(module, func_name) |
| 147 | + except (ImportError, AttributeError) as e: |
| 148 | + print(f"Error importing {module_name}.{func_name}: {e}") |
| 149 | + sys.exit(1) |
| 150 | + |
| 151 | + print(f"Estimating complexity for {module_name}.{func_name}...") |
| 152 | + |
| 153 | + # Input sizes to test |
| 154 | + n_values = [100, 500, 1000, 2000, 5000] |
| 155 | + times = [] |
| 156 | + |
| 157 | + print(f"{'Input Size (n)':<15} | {'Avg Time (s)':<15}") |
| 158 | + print("-" * 35) |
| 159 | + |
| 160 | + for n in n_values: |
| 161 | + t = measure_execution_time(func, n) |
| 162 | + if t is None: |
| 163 | + print("Failed to execute function. ensure it accepts an int or list[int].") |
| 164 | + break |
| 165 | + times.append(t) |
| 166 | + print(f"{n:<15} | {t:.6f}") |
| 167 | + |
| 168 | + if len(times) == len(n_values): |
| 169 | + complexity, score = detect_complexity(n_values, times) |
| 170 | + print("-" * 35) |
| 171 | + print(f"Estimated Complexity: {complexity}") |
| 172 | + print(f"Fit Score: {score:.3f}") |
| 173 | + |
| 174 | +if __name__ == "__main__": |
| 175 | + main() |
0 commit comments