|
| 1 | +#!/usr/bin/env python3 |
| 2 | +import sys, math, json, subprocess |
| 3 | +from collections import Counter |
| 4 | +from pathlib import Path |
| 5 | + |
| 6 | +def shannon_entropy(text: str) -> float: |
| 7 | + if not text or len(text) < 10: |
| 8 | + return 0.0 |
| 9 | + freq = Counter(text) |
| 10 | + probs = [count / len(text) for count in freq.values()] |
| 11 | + return -sum(p * math.log2(p) for p in probs if p > 0) |
| 12 | + |
| 13 | +# Get changed files safely for pull_request events |
| 14 | +changed_files = [] |
| 15 | +try: |
| 16 | + # GitHub provides github.event.pull_request.base.sha and head.sha in the context |
| 17 | + base_sha = subprocess.check_output(['git', 'rev-parse', 'origin/${{ github.base_ref }}'], text=True).strip() |
| 18 | + changed_files = subprocess.check_output( |
| 19 | + ['git', 'diff', '--name-only', base_sha, 'HEAD'], text=True |
| 20 | + ).splitlines() |
| 21 | +except subprocess.CalledProcessError: |
| 22 | + # Fallback for first-time PRs or edge cases: use the merge-base or just files in HEAD |
| 23 | + try: |
| 24 | + changed_files = subprocess.check_output( |
| 25 | + ['git', 'diff', '--name-only', 'HEAD~1', 'HEAD'], text=True |
| 26 | + ).splitlines() |
| 27 | + except subprocess.CalledProcessError: |
| 28 | + # Last resort: all files in the repo |
| 29 | + changed_files = subprocess.check_output(['git', 'ls-files'], text=True).splitlines() |
| 30 | + |
| 31 | +results = [] |
| 32 | +total_ent = 0.0 |
| 33 | +count = 0 |
| 34 | + |
| 35 | +for f in changed_files: |
| 36 | + path = Path(f.strip()) |
| 37 | + if not path.exists() or path.suffix in {'.png', '.jpg', '.gif', '.bin', '.lock', '.exe', '.dll', '.so'}: |
| 38 | + continue |
| 39 | + try: |
| 40 | + content = path.read_text(encoding='utf-8', errors='ignore') |
| 41 | + ent = shannon_entropy(content) |
| 42 | + results.append(f"{f}: {ent:.3f}") |
| 43 | + total_ent += ent |
| 44 | + count += 1 |
| 45 | + except Exception: |
| 46 | + pass |
| 47 | + |
| 48 | +avg = round(total_ent / count, 3) if count > 0 else 0.0 |
| 49 | + |
| 50 | +verdict = ( |
| 51 | + "✅ Mid-4 beauty detected (thoughtful human code!)" if 4.3 <= avg <= 4.7 else |
| 52 | + "⚠️ Consider review — entropy outside sweet spot" if avg > 0 else |
| 53 | + "No source files changed" |
| 54 | +) |
| 55 | + |
| 56 | +with open('/tmp/beauty.json', 'w') as f: |
| 57 | + json.dump({ |
| 58 | + "average_entropy": avg, |
| 59 | + "verdict": verdict, |
| 60 | + "files": results[:20] |
| 61 | + }, f, indent=2) |
| 62 | + |
| 63 | +print(f"Average entropy: {avg}") |
| 64 | +print(verdict) |
0 commit comments