Skip to content

Commit fa4a199

Browse files
committed
add example of ML-based toxicity detection
1 parent 74584d4 commit fa4a199

1 file changed

Lines changed: 47 additions & 0 deletions

File tree

examples/python/ml_moderation.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#!/usr/bin/env python3
2+
"""ML-based toxicity detection example.
3+
4+
Requires: pip install 'badwords-py[ml]'
5+
6+
The ML model (XLM-RoBERTa) detects toxicity in multiple languages.
7+
First run downloads the model from GitHub Releases (~135MB).
8+
"""
9+
10+
import sys
11+
12+
try:
13+
from badwords.ml import ToxicityPredictor
14+
except ImportError:
15+
print("Error: badwords-py[ml] required. Install with:", file=sys.stderr)
16+
print(" pip install 'badwords-py[ml]'", file=sys.stderr)
17+
sys.exit(1)
18+
19+
20+
def main() -> None:
21+
# Создаём предиктор (модель скачается при первом вызове predict)
22+
predictor = ToxicityPredictor()
23+
24+
texts = [
25+
"Hello, how are you today?",
26+
"Have a nice day!",
27+
"You are stupid and worthless",
28+
"Поздравляю, теперь ты не тупой",
29+
"Иди нахуй",
30+
]
31+
32+
print("=" * 60)
33+
print("ML Toxicity Detection (0.0 = clean, 1.0 = toxic)")
34+
print("Threshold: 0.5")
35+
print("=" * 60)
36+
37+
for text in texts:
38+
prob = predictor.predict(text)
39+
label = "TOXIC" if prob >= 0.5 else "clean"
40+
bar = "█" * int(prob * 20) + "░" * (20 - int(prob * 20))
41+
print(f" {prob:.2f} [{label:5}] {bar} {text!r}")
42+
43+
print("=" * 60)
44+
45+
46+
if __name__ == "__main__":
47+
main()

0 commit comments

Comments
 (0)