Skip to content

Commit dc11e37

Browse files
authored
Create compute-entropy.py
make it beautiful
1 parent d5d5201 commit dc11e37

File tree

1 file changed

+64
-0
lines changed

1 file changed

+64
-0
lines changed
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/usr/bin/env python3
2+
import sys, math, json, subprocess
3+
from collections import Counter
4+
from pathlib import Path
5+
6+
def shannon_entropy(text: str) -> float:
7+
if not text or len(text) < 10:
8+
return 0.0
9+
freq = Counter(text)
10+
probs = [count / len(text) for count in freq.values()]
11+
return -sum(p * math.log2(p) for p in probs if p > 0)
12+
13+
# Get changed files safely for pull_request events
14+
changed_files = []
15+
try:
16+
# GitHub provides github.event.pull_request.base.sha and head.sha in the context
17+
base_sha = subprocess.check_output(['git', 'rev-parse', 'origin/${{ github.base_ref }}'], text=True).strip()
18+
changed_files = subprocess.check_output(
19+
['git', 'diff', '--name-only', base_sha, 'HEAD'], text=True
20+
).splitlines()
21+
except subprocess.CalledProcessError:
22+
# Fallback for first-time PRs or edge cases: use the merge-base or just files in HEAD
23+
try:
24+
changed_files = subprocess.check_output(
25+
['git', 'diff', '--name-only', 'HEAD~1', 'HEAD'], text=True
26+
).splitlines()
27+
except subprocess.CalledProcessError:
28+
# Last resort: all files in the repo
29+
changed_files = subprocess.check_output(['git', 'ls-files'], text=True).splitlines()
30+
31+
results = []
32+
total_ent = 0.0
33+
count = 0
34+
35+
for f in changed_files:
36+
path = Path(f.strip())
37+
if not path.exists() or path.suffix in {'.png', '.jpg', '.gif', '.bin', '.lock', '.exe', '.dll', '.so'}:
38+
continue
39+
try:
40+
content = path.read_text(encoding='utf-8', errors='ignore')
41+
ent = shannon_entropy(content)
42+
results.append(f"{f}: {ent:.3f}")
43+
total_ent += ent
44+
count += 1
45+
except Exception:
46+
pass
47+
48+
avg = round(total_ent / count, 3) if count > 0 else 0.0
49+
50+
verdict = (
51+
"✅ Mid-4 beauty detected (thoughtful human code!)" if 4.3 <= avg <= 4.7 else
52+
"⚠️ Consider review — entropy outside sweet spot" if avg > 0 else
53+
"No source files changed"
54+
)
55+
56+
with open('/tmp/beauty.json', 'w') as f:
57+
json.dump({
58+
"average_entropy": avg,
59+
"verdict": verdict,
60+
"files": results[:20]
61+
}, f, indent=2)
62+
63+
print(f"Average entropy: {avg}")
64+
print(verdict)

0 commit comments

Comments
 (0)