Skip to content

Commit 5161cdb

Browse files
authored
Merge pull request #27 from GYFX35/feat/ai-fake-content-verification-11814308070149739403
Add AI content and fake content verification features
2 parents 1298b34 + 1029812 commit 5161cdb

9 files changed

Lines changed: 370 additions & 2 deletions

File tree

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import re
2+
from .heuristics import AI_PHRASES, HEURISTIC_WEIGHTS
3+
4+
def analyze_text_for_ai_content(text):
5+
"""
6+
Analyzes text for indicators of being AI-generated.
7+
"""
8+
if not text:
9+
return {"score": 0.0, "indicators_found": [], "is_ai_generated": False}
10+
11+
text_lower = text.lower()
12+
score = 0.0
13+
indicators_found = []
14+
15+
# 1. Check for known AI phrases
16+
for phrase in AI_PHRASES:
17+
if phrase in text_lower:
18+
score += HEURISTIC_WEIGHTS.get("AI_PHRASE", 2.0)
19+
indicators_found.append(f"Found AI-typical phrase: '{phrase}'")
20+
21+
# 2. Check for robotic structure (e.g., numbered lists with consistent formatting)
22+
numbered_list_pattern = r'\n\d+\.\s'
23+
numbered_lists = re.findall(numbered_list_pattern, text)
24+
if len(numbered_lists) >= 3:
25+
score += HEURISTIC_WEIGHTS.get("ROBOTIC_STRUCTURE", 3.0)
26+
indicators_found.append("Contains multiple numbered lists, typical of structured AI output.")
27+
28+
# 3. Check for lack of common human errors (very basic check)
29+
# This is a heuristic: AI text is often too "perfect"
30+
# We could count common typos and if they are 0 in a long text, it's slightly more suspicious
31+
common_typos = ['teh', 'receive', 'believe', 'occured', 'definately']
32+
words = text_lower.split()
33+
if len(words) > 50:
34+
typo_found = False
35+
for word in words:
36+
if word in common_typos:
37+
typo_found = True
38+
break
39+
if not typo_found:
40+
score += HEURISTIC_WEIGHTS.get("LACK_OF_ERRORS", 1.5)
41+
indicators_found.append("Text is highly polished with no common human typos in a long passage.")
42+
43+
# 4. Check for repetitive sentence starts
44+
sentences = re.split(r'[.!?]+', text)
45+
sentences = [s.strip() for s in sentences if s.strip()]
46+
if len(sentences) >= 5:
47+
starts = [s.split()[0].lower() if s.split() else "" for s in sentences]
48+
from collections import Counter
49+
counts = Counter(starts)
50+
most_common = counts.most_common(1)
51+
if most_common and most_common[0][1] / len(sentences) > 0.4:
52+
score += HEURISTIC_WEIGHTS.get("ROBOTIC_STRUCTURE", 3.0)
53+
indicators_found.append(f"High repetition of sentence starters ('{most_common[0][0]}'), suggests robotic generation.")
54+
55+
is_ai = score >= 5.0
56+
57+
return {
58+
"score": round(score, 2),
59+
"indicators_found": indicators_found,
60+
"is_ai_generated": is_ai,
61+
"assessment": "High likelihood" if score > 7.0 else "Medium likelihood" if score >= 5.0 else "Low likelihood"
62+
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import re
2+
import nltk
3+
from .heuristics import (
4+
SENSATIONALIST_KEYWORDS,
5+
CLICKBAIT_PATTERNS,
6+
HEURISTIC_WEIGHTS
7+
)
8+
9+
def _download_nltk_data():
10+
try:
11+
nltk.data.find('tokenizers/punkt')
12+
except LookupError:
13+
nltk.download('punkt', quiet=True)
14+
try:
15+
nltk.data.find('help/tagsets')
16+
except LookupError:
17+
nltk.download('averaged_perceptron_tagger', quiet=True)
18+
try:
19+
nltk.data.find('chunkers/maxent_ne_chunker')
20+
except LookupError:
21+
nltk.download('maxent_ne_chunker', quiet=True)
22+
try:
23+
nltk.data.find('corpora/words')
24+
except LookupError:
25+
nltk.download('words', quiet=True)
26+
27+
def analyze_text_for_fake_content(text):
28+
"""
29+
Analyzes a block of text for indicators of fake or misleading content.
30+
"""
31+
_download_nltk_data()
32+
if not text:
33+
return {"score": 0.0, "indicators_found": []}
34+
35+
text_lower = text.lower()
36+
score = 0.0
37+
indicators_found = []
38+
39+
# 1. Analyze text for sensationalist keywords
40+
for keyword in SENSATIONALIST_KEYWORDS:
41+
if keyword in text_lower:
42+
score += HEURISTIC_WEIGHTS.get("SENSATIONALIST_KEYWORD", 1.0)
43+
indicators_found.append(f"Found sensationalist keyword: '{keyword}'")
44+
45+
# 2. Analyze text for clickbait patterns
46+
for pattern in CLICKBAIT_PATTERNS:
47+
if re.search(pattern, text, re.IGNORECASE):
48+
score += HEURISTIC_WEIGHTS.get("CLICKBAIT_PATTERN", 1.5)
49+
indicators_found.append(f"Found clickbait pattern: '{pattern}'")
50+
51+
# 3. Check for excessive punctuation (common in fake news/clickbait)
52+
if re.search(r'!!|!!\?|\?{2,}', text):
53+
score += 1.0
54+
indicators_found.append("Found excessive punctuation (e.g., '!!', '???').")
55+
56+
# 4. Check for all caps words (excluding short acronyms)
57+
all_caps_words = re.findall(r'\b[A-Z]{4,}\b', text)
58+
if len(all_caps_words) >= 2:
59+
score += 1.0
60+
indicators_found.append(f"Found multiple words in all caps: {', '.join(all_caps_words[:3])}...")
61+
62+
# 5. Named Entity Extraction (Optional, for context)
63+
named_entities = {"organizations": [], "persons": []}
64+
try:
65+
tokens = nltk.word_tokenize(text)
66+
tagged = nltk.pos_tag(tokens)
67+
entities = nltk.ne_chunk(tagged)
68+
for entity in entities:
69+
if isinstance(entity, nltk.Tree):
70+
entity_text = " ".join([word for word, tag in entity.leaves()])
71+
if entity.label() == 'ORGANIZATION':
72+
if entity_text not in named_entities["organizations"]:
73+
named_entities["organizations"].append(entity_text)
74+
elif entity.label() == 'PERSON':
75+
if entity_text not in named_entities["persons"]:
76+
named_entities["persons"].append(entity_text)
77+
except Exception:
78+
# NLTK might not be fully initialized or data missing
79+
pass
80+
81+
return {
82+
"score": round(score, 2),
83+
"indicators_found": indicators_found,
84+
"named_entities": named_entities,
85+
"assessment": "High suspicion" if score > 5.0 else "Moderate suspicion" if score >= 3.0 else "Low suspicion"
86+
}

social_media_analyzer/heuristics.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,25 @@
154154
]
155155

156156

157+
# --- AI Content Heuristics ---
158+
159+
AI_PHRASES = [
160+
"as an ai language model",
161+
"based on the information provided",
162+
"it is important to note that",
163+
"in conclusion",
164+
"furthermore",
165+
"moreover",
166+
"on the other hand",
167+
"at the end of the day",
168+
"it is worth mentioning",
169+
"provide a comprehensive overview",
170+
"the content provided",
171+
"can be summarized as",
172+
"let me know if you need any further assistance",
173+
"here is a summary of",
174+
]
175+
157176
# --- Fake News Heuristics ---
158177

159178
FAKE_NEWS_DOMAINS = [
@@ -172,7 +191,9 @@
172191
CLICKBAIT_PATTERNS = [
173192
r"you won't believe", r"will shock you", r"number \d will amaze you",
174193
r"this one weird trick", r"doctors hate him", r"the truth about",
175-
r"scientists baffled", r"what happened next", r"secret to"
194+
r"scientists baffled", r"what happened next", r"secret to",
195+
r"completely changed my life", r"instant results", r"limited time offer",
196+
r"everyone is talking about", r"gone viral"
176197
]
177198

178199
# --- Regular Expression Patterns ---
@@ -271,6 +292,9 @@ def generate_suspicious_url_patterns(legitimate_domains):
271292
"PHONE_NUMBER_UNSOLICITED": 1.0,
272293
"SUSPICIOUS_URL_PATTERN": 3.0, # High weight for matching a suspicious URL pattern
273294
"GOOGLE_SAFE_BROWSING_HIT": 10.0, # Very high weight for a positive Google Safe Browsing match
295+
"AI_PHRASE": 2.0,
296+
"ROBOTIC_STRUCTURE": 3.0,
297+
"LACK_OF_ERRORS": 1.5,
274298
# Teenager Protection Weights
275299
"CYBERBULLYING": 2.5,
276300
"INAPPROPRIATE_CONTENT": 3.0,
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import unittest
2+
from .ai_content_detector import analyze_text_for_ai_content
3+
from .fake_content_verifier import analyze_text_for_fake_content
4+
5+
class TestAIAndFakeContent(unittest.TestCase):
6+
7+
def test_ai_content_detection(self):
8+
ai_text = "As an AI language model, I am designed to provide helpful information. Furthermore, in conclusion, this text looks like it was generated by an AI."
9+
result = analyze_text_for_ai_content(ai_text)
10+
self.assertTrue(result["is_ai_generated"])
11+
self.assertIn("Found AI-typical phrase: 'as an ai language model'", result["indicators_found"])
12+
13+
def test_human_content_detection(self):
14+
human_text = "Hey there, I just wanted to say hi. I hope you have a great day! No robotic stuff here."
15+
result = analyze_text_for_ai_content(human_text)
16+
self.assertFalse(result["is_ai_generated"])
17+
18+
def test_fake_content_verification(self):
19+
fake_text = "SHOCKING BOMBSHELL!!! You won't believe what happened next! This secret will shock you."
20+
result = analyze_text_for_fake_content(fake_text)
21+
self.assertGreater(result["score"], 3.0)
22+
self.assertIn("Found sensationalist keyword: 'shocking'", result["indicators_found"])
23+
self.assertIn("Found clickbait pattern: 'you won't believe'", result["indicators_found"])
24+
25+
if __name__ == '__main__':
26+
unittest.main()

src/AIContentDetector.jsx

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import React, { useState } from 'react';
2+
3+
function AIContentDetector() {
4+
const [text, setText] = useState('');
5+
const [result, setResult] = useState(null);
6+
const [loading, setLoading] = useState(false);
7+
8+
const handleAnalyze = () => {
9+
setLoading(true);
10+
fetch('/analyze/ai-content', {
11+
method: 'POST',
12+
headers: {
13+
'Content-Type': 'application/json',
14+
},
15+
body: JSON.stringify({ text }),
16+
})
17+
.then((res) => res.json())
18+
.then((data) => {
19+
setResult(data);
20+
setLoading(false);
21+
})
22+
.catch((error) => {
23+
console.error('Error:', error);
24+
setLoading(false);
25+
});
26+
};
27+
28+
return (
29+
<div className="analyzer-container">
30+
<h2>AI Content Detector</h2>
31+
<p>Paste text below to check if it was likely generated by an AI.</p>
32+
<textarea
33+
value={text}
34+
onChange={(e) => setText(e.target.value)}
35+
placeholder="Paste text here..."
36+
rows="10"
37+
style={{ width: '100%', marginBottom: '10px' }}
38+
/>
39+
<br />
40+
<button onClick={handleAnalyze} disabled={loading || !text}>
41+
{loading ? 'Analyzing...' : 'Detect AI Content'}
42+
</button>
43+
{result && (
44+
<div className="results">
45+
<h3>Analysis Results</h3>
46+
<p><strong>Assessment:</strong> {result.assessment}</p>
47+
<p><strong>Confidence Score:</strong> {result.score}</p>
48+
{result.indicators_found.length > 0 && (
49+
<>
50+
<h4>Indicators Found:</h4>
51+
<ul>
52+
{result.indicators_found.map((indicator, index) => (
53+
<li key={index}>{indicator}</li>
54+
))}
55+
</ul>
56+
</>
57+
)}
58+
</div>
59+
)}
60+
</div>
61+
);
62+
}
63+
64+
export default AIContentDetector;

src/App.jsx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ import React, { useState } from 'react';
22
import './App.css';
33
import ScamAnalyzer from './ScamAnalyzer';
44
import FakeNewsAnalyzer from './FakeNewsAnalyzer';
5+
import AIContentDetector from './AIContentDetector';
6+
import FakeContentAnalyzer from './FakeContentAnalyzer';
57
import FBIGame from './FBIGame';
68
import SupplyChainPlatform from './SupplyChainPlatform';
79

@@ -15,13 +17,17 @@ function App() {
1517
<nav>
1618
<button className={view === 'scam' ? 'active' : ''} onClick={() => setView('scam')}>Scam Analyzer</button>
1719
<button className={view === 'fake-news' ? 'active' : ''} onClick={() => setView('fake-news')}>Fake News Analyzer</button>
20+
<button className={view === 'ai-content' ? 'active' : ''} onClick={() => setView('ai-content')}>AI Content</button>
21+
<button className={view === 'fake-content' ? 'active' : ''} onClick={() => setView('fake-content')}>Fake Content</button>
1822
<button className={view === 'fbi-game' ? 'active' : ''} onClick={() => setView('fbi-game')}>FBI AR Game</button>
1923
<button className={view === 'supply-chain' ? 'active' : ''} onClick={() => setView('supply-chain')}>Supply Chain</button>
2024
</nav>
2125
</header>
2226
<main>
2327
{view === 'scam' && <ScamAnalyzer />}
2428
{view === 'fake-news' && <FakeNewsAnalyzer />}
29+
{view === 'ai-content' && <AIContentDetector />}
30+
{view === 'fake-content' && <FakeContentAnalyzer />}
2531
{view === 'fbi-game' && <FBIGame />}
2632
{view === 'supply-chain' && <SupplyChainPlatform />}
2733
</main>

src/FakeContentAnalyzer.jsx

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import React, { useState } from 'react';
2+
3+
function FakeContentAnalyzer() {
4+
const [text, setText] = useState('');
5+
const [result, setResult] = useState(null);
6+
const [loading, setLoading] = useState(false);
7+
8+
const handleAnalyze = () => {
9+
setLoading(true);
10+
fetch('/analyze/fake-content', {
11+
method: 'POST',
12+
headers: {
13+
'Content-Type': 'application/json',
14+
},
15+
body: JSON.stringify({ text }),
16+
})
17+
.then((res) => res.json())
18+
.then((data) => {
19+
setResult(data);
20+
setLoading(false);
21+
})
22+
.catch((error) => {
23+
console.error('Error:', error);
24+
setLoading(false);
25+
});
26+
};
27+
28+
return (
29+
<div className="analyzer-container">
30+
<h2>Fake Content Verifier</h2>
31+
<p>Paste text content (e.g., a social media post or article snippet) to check for misleading indicators.</p>
32+
<textarea
33+
value={text}
34+
onChange={(e) => setText(e.target.value)}
35+
placeholder="Paste content here..."
36+
rows="10"
37+
style={{ width: '100%', marginBottom: '10px' }}
38+
/>
39+
<br />
40+
<button onClick={handleAnalyze} disabled={loading || !text}>
41+
{loading ? 'Analyzing...' : 'Verify Content'}
42+
</button>
43+
{result && (
44+
<div className="results">
45+
<h3>Analysis Results</h3>
46+
<p><strong>Assessment:</strong> {result.assessment}</p>
47+
<p><strong>Suspicion Score:</strong> {result.score}</p>
48+
{result.indicators_found.length > 0 && (
49+
<>
50+
<h4>Indicators Found:</h4>
51+
<ul>
52+
{result.indicators_found.map((indicator, index) => (
53+
<li key={index}>{indicator}</li>
54+
))}
55+
</ul>
56+
</>
57+
)}
58+
{result.named_entities && (result.named_entities.organizations.length > 0 || result.named_entities.persons.length > 0) && (
59+
<div className="entities">
60+
<h4>Entities Mentioned:</h4>
61+
{result.named_entities.organizations.length > 0 && (
62+
<p><strong>Organizations:</strong> {result.named_entities.organizations.join(', ')}</p>
63+
)}
64+
{result.named_entities.persons.length > 0 && (
65+
<p><strong>People:</strong> {result.named_entities.persons.join(', ')}</p>
66+
)}
67+
</div>
68+
)}
69+
</div>
70+
)}
71+
</div>
72+
);
73+
}
74+
75+
export default FakeContentAnalyzer;

0 commit comments

Comments
 (0)