1+ import csv
2+ import re
3+ from functools import reduce
4+
5+ import requests
6+ import json
7+ import time
8+ from pathlib import Path
9+
10+ from tencentcloud .common import credential
11+ from tencentcloud .common .profile .client_profile import ClientProfile
12+ from tencentcloud .common .profile .http_profile import HttpProfile
13+ from tencentcloud .common .exception .tencent_cloud_sdk_exception import TencentCloudSDKException
14+ from tencentcloud .nlp .v20190408 import nlp_client , models
15+ from pyecharts import options as opts
16+ from pyecharts .charts import Bar
17+ from pyecharts .commons .utils import JsCode
18+ from pyecharts .globals import ThemeType
19+ import jieba
20+ import wordcloud
21+
22+ import ssl
23+ ssl ._create_default_https_context = ssl ._create_unverified_context
24+
25+ # csv保存函数
26+ def csv_write (tablelist ):
27+ tableheader = ['弹幕内容' , '情感' ]
28+ csv_file = Path ('danmu.csv' )
29+ not_file = not csv_file .is_file ()
30+ with open ('danmu.csv' , 'a' , newline = '' , errors = 'ignore' ) as f :
31+ writer = csv .writer (f )
32+ if not_file :
33+ writer .writerow (tableheader )
34+ for row in tablelist :
35+ writer .writerow (row )
36+
37+ def nlp (text ):
38+ try :
39+ cred = credential .Credential ("AKIDqUs00jXM04sUpl7lqejX4IUIp38NuxJj" , "D9nBJ9wTNzMSHwLSnBzXn3nrxoYTAZsg" )
40+ httpProfile = HttpProfile ()
41+ httpProfile .endpoint = "nlp.tencentcloudapi.com"
42+
43+ clientProfile = ClientProfile ()
44+ clientProfile .httpProfile = httpProfile
45+ client = nlp_client .NlpClient (cred , "ap-guangzhou" , clientProfile )
46+
47+ req = models .SentimentAnalysisRequest ()
48+ params = {
49+ "Text" : text ,
50+ "Mode" : "3class"
51+ }
52+ req .from_json_string (json .dumps (params ))
53+
54+ resp = client .SentimentAnalysis (req )
55+ sentiment = {'positive' : '正面' , 'negative' : '负面' , 'neutral' : '中性' }
56+ return sentiment [resp .Sentiment ]
57+ except TencentCloudSDKException as err :
58+ print (err )
59+
60+ # df = pd.DataFrame()
61+ def danmu ():
62+ headers = {
63+ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.80 Safari/537.36'
64+ }
65+ urls = [['https://mfm.video.qq.com/danmu?otype=json&callback=&target_id=6208914107%26vid%3Do0035t7199o&session_key=63761%2C673%2C1606144955×tamp={}&_=1606144949402' , 7478 ],
66+ ['https://mfm.video.qq.com/danmu?otype=json&callback=&target_id=6208234802%26vid%3Da00352eyo25&session_key=111028%2C1191%2C1606200649×tamp={}&_=1606200643186' , 8610 ]]
67+
68+ for url in urls :
69+ for page in range (15 , url [1 ], 30 ):
70+ u = url [0 ].format (page )
71+ html = requests .get (u , headers = headers )
72+ result = json .loads (html .text , strict = False )
73+ time .sleep (1 )
74+ danmu_list = []
75+ # 遍历获取目标字段
76+ for i in result ['comments' ]:
77+ content = i ['content' ] # 弹幕内容
78+ n = nlp (content )
79+ danmu_list .append ([content , n ])
80+ print (content )
81+ csv_write (danmu_list )
82+
83+ def analysis ():
84+ guest = {'陈凯歌,陈导,凯歌' :{'正面' :0 ,'负面' :0 ,'中性' :0 },
85+ '尔冬升,尔导' :{'正面' :0 ,'负面' :0 ,'中性' :0 },
86+ '赵薇,薇导' :{'正面' :0 ,'负面' :0 ,'中性' :0 },
87+ '郭敬明,郭导,小四' :{'正面' :0 ,'负面' :0 ,'中性' :0 },
88+ '大鹏' :{'正面' :0 ,'负面' :0 ,'中性' :0 }}
89+
90+ with open ('danmu.csv' ) as f :
91+ csv_reader = csv .reader (f )
92+ for row in csv_reader :
93+ for g in guest .keys ():
94+ for i in g .split (',' ):
95+ a = [m .start () for m in re .finditer (i , row [0 ])]
96+ if len (a ) != 0 :
97+ guest [g ][row [1 ]] = guest .get (g ).get (row [1 ]) + 1
98+ return guest
99+
100+ def draw (guest = {}):
101+
102+ list1 = [
103+ {"value" : guest .get ('陈凯歌,陈导,凯歌' ).get ('正面' ), "percent" : guest .get ('陈凯歌,陈导,凯歌' ).get ('正面' ) / reduce (lambda x ,y :x + y ,guest .get ('陈凯歌,陈导,凯歌' ).values ())},
104+ {"value" : guest .get ('尔冬升,尔导' ).get ('正面' ), "percent" : guest .get ('尔冬升,尔导' ).get ('正面' ) / reduce (lambda x ,y :x + y ,guest .get ('尔冬升,尔导' ).values ())},
105+ {"value" : guest .get ('赵薇,薇导' ).get ('正面' ), "percent" : guest .get ('赵薇,薇导' ).get ('正面' ) / reduce (lambda x ,y :x + y ,guest .get ('赵薇,薇导' ).values ())},
106+ {"value" : guest .get ('郭敬明,郭导,小四' ).get ('正面' ), "percent" : guest .get ('郭敬明,郭导,小四' ).get ('正面' ) / reduce (lambda x ,y :x + y ,guest .get ('郭敬明,郭导,小四' ).values ())},
107+ {"value" : guest .get ('大鹏' ).get ('正面' ), "percent" : guest .get ('大鹏' ).get ('正面' ) / reduce (lambda x ,y :x + y ,guest .get ('大鹏' ).values ())},
108+ ]
109+ list2 = [
110+ {"value" : guest .get ('陈凯歌,陈导,凯歌' ).get ('负面' ),
111+ "percent" : guest .get ('陈凯歌,陈导,凯歌' ).get ('负面' ) / reduce (lambda x , y : x + y , guest .get ('陈凯歌,陈导,凯歌' ).values ())},
112+ {"value" : guest .get ('尔冬升,尔导' ).get ('负面' ),
113+ "percent" : guest .get ('尔冬升,尔导' ).get ('负面' ) / reduce (lambda x , y : x + y , guest .get ('尔冬升,尔导' ).values ())},
114+ {"value" : guest .get ('赵薇,薇导' ).get ('负面' ),
115+ "percent" : guest .get ('赵薇,薇导' ).get ('负面' ) / reduce (lambda x , y : x + y , guest .get ('赵薇,薇导' ).values ())},
116+ {"value" : guest .get ('郭敬明,郭导,小四' ).get ('负面' ),
117+ "percent" : guest .get ('郭敬明,郭导,小四' ).get ('负面' ) / reduce (lambda x , y : x + y , guest .get ('郭敬明,郭导,小四' ).values ())},
118+ {"value" : guest .get ('大鹏' ).get ('负面' ),
119+ "percent" : guest .get ('大鹏' ).get ('负面' ) / reduce (lambda x , y : x + y , guest .get ('大鹏' ).values ())},
120+ ]
121+
122+ list3 = [
123+ {"value" : guest .get ('陈凯歌,陈导,凯歌' ).get ('中性' ),
124+ "percent" : guest .get ('陈凯歌,陈导,凯歌' ).get ('中性' ) / reduce (lambda x , y : x + y , guest .get ('陈凯歌,陈导,凯歌' ).values ())},
125+ {"value" : guest .get ('尔冬升,尔导' ).get ('中性' ),
126+ "percent" : guest .get ('尔冬升,尔导' ).get ('中性' ) / reduce (lambda x , y : x + y , guest .get ('尔冬升,尔导' ).values ())},
127+ {"value" : guest .get ('赵薇,薇导' ).get ('中性' ),
128+ "percent" : guest .get ('赵薇,薇导' ).get ('中性' ) / reduce (lambda x , y : x + y , guest .get ('赵薇,薇导' ).values ())},
129+ {"value" : guest .get ('郭敬明,郭导,小四' ).get ('中性' ),
130+ "percent" : guest .get ('郭敬明,郭导,小四' ).get ('中性' ) / reduce (lambda x , y : x + y , guest .get ('郭敬明,郭导,小四' ).values ())},
131+ {"value" : guest .get ('大鹏' ).get ('中性' ),
132+ "percent" : guest .get ('大鹏' ).get ('中性' ) / reduce (lambda x , y : x + y , guest .get ('大鹏' ).values ())},
133+ ]
134+
135+ c = (
136+ Bar (init_opts = opts .InitOpts (theme = ThemeType .LIGHT ))
137+ .add_xaxis (['陈凯歌' , '尔冬升' , '赵薇' , '郭敬明' , '大鹏' ])
138+ .add_yaxis ("正面" , list1 , stack = "stack1" , category_gap = "50%" )
139+ .add_yaxis ("负面" , list2 , stack = "stack1" , category_gap = "50%" )
140+ .add_yaxis ("中性" , list3 , stack = "stack1" , category_gap = "50%" )
141+ .set_series_opts (
142+ label_opts = opts .LabelOpts (
143+ position = "right" ,
144+ formatter = JsCode (
145+ "function(x){return Number(x.data.percent * 100).toFixed() + '%';}"
146+ ),
147+ )
148+ )
149+ .render ("导演.html" )
150+ )
151+
152+
153+ def ciyun ():
154+ with open ('danmu.csv' ) as f :
155+ with open ('ciyun.txt' , 'a' ) as ciyun_file :
156+ csv_reader = csv .reader (f )
157+ for row in csv_reader :
158+ ciyun_file .write (row [0 ])
159+
160+ # 构建并配置词云对象w
161+ w = wordcloud .WordCloud (width = 1000 ,
162+ height = 700 ,
163+ background_color = 'white' ,
164+ font_path = "/System/Library/fonts/PingFang.ttc" ,
165+ collocations = False ,
166+ stopwords = {'的' , '了' ,'啊' ,'我' ,'很' ,'是' ,'好' ,'这' ,'都' ,'不' })
167+
168+
169+ f = open ('ciyun.txt' , encoding = 'utf-8' )
170+ txt = f .read ()
171+ txtlist = jieba .lcut (txt )
172+ result = " " .join (txtlist )
173+
174+ w .generate (result )
175+
176+ w .to_file ('演员请就位2.png' )
177+
178+
179+ if __name__ == "__main__" :
180+ ciyun ()
0 commit comments