1+ # coding=utf-8
2+
3+ from selenium import webdriver
4+ import time
5+ import random
6+
7+ from selenium .webdriver import ActionChains
8+ from pyecharts import options as opts
9+ from pyecharts .charts import Bar , Pie
10+
11+
12+ def login ():
13+ driver = webdriver .Chrome ()
14+
15+ driver .get ('https://www.itjuzi.com/login' )
16+ driver .implicitly_wait (10 )
17+
18+ driver .find_element_by_xpath ('//form/div[1]/div/div[1]/input' ).clear ()
19+ driver .find_element_by_xpath ('//form/div[1]/div/div[1]/input' ).send_keys ('18821290263' )
20+ driver .find_element_by_xpath ('//form/div[2]/div/div[1]/input' ).clear ()
21+ driver .find_element_by_xpath ('//form/div[2]/div/div[1]/input' ).send_keys ('123456' )
22+ driver .find_element_by_class_name ('el-button' ).click ()
23+ driver .switch_to .default_content ()
24+ time .sleep (5 )
25+ return driver
26+
27+ def link (driver ):
28+ ActionChains (driver ).move_to_element (driver .find_elements_by_class_name ('more' )[0 ]).perform () # 把鼠标移到公司库导航上面
29+ driver .find_element_by_link_text ('死亡公司' ).click () # 点击死亡公司超链接
30+ driver .switch_to .window (driver .window_handles [1 ]) # 切换到新开的标签页
31+ driver .implicitly_wait (10 )
32+ time .sleep (5 )
33+
34+ def crawler (driver ):
35+
36+ next_page = driver .find_element_by_class_name ('btn-next' ) #下一页
37+ # 只抓 2020 年的数据
38+ for page in range (1 , 11 ):
39+ result = []
40+ deadCompany = driver .find_element_by_tag_name ("tbody" ).find_elements_by_tag_name ("tr" )
41+ num = len (deadCompany )
42+
43+ for i in range (1 ,num + 1 ):
44+ gsjc = deadCompany [i - 1 ].find_element_by_xpath ('td[3]/div/h5/a' ).text # 公司简称
45+ chsj = deadCompany [i - 1 ].find_element_by_xpath ('td[3]/div/p' ).text # 存活时间
46+ gbsj = deadCompany [i - 1 ].find_element_by_xpath ('td[4]' ).text # 关闭时间
47+ hy = deadCompany [i - 1 ].find_element_by_xpath ('td[5]' ).text # 所属行业
48+ dd = deadCompany [i - 1 ].find_element_by_xpath ('td[6]' ).text # 公司地点
49+ clsj = deadCompany [i - 1 ].find_element_by_xpath ('td[7]' ).text # 关闭时间
50+ htzt = deadCompany [i - 1 ].find_element_by_xpath ('td[8]' ).text # 融资状态
51+
52+ result .append (',' .join ([gsjc , chsj , gbsj , hy , dd , clsj , htzt ]))
53+
54+ with open ('itjuzi/deadCompany.csv' , 'a' ) as f :
55+ f .write ('\n ' .join ('%s' % id for id in result )+ '\n ' )
56+ print (result )
57+
58+ print ("第 %s 页爬取完成" % page )
59+ next_page .click () # 点击下一页
60+ time .sleep (random .uniform (2 , 10 ))
61+
62+ def parse_csv ():
63+ deadCompany_list = []
64+ with open ('itjuzi/deadCompany.csv' , 'r' ) as f :
65+ for line in f .readlines ():
66+ a = line .strip ()
67+ deadCompany_list .append (a )
68+ return deadCompany_list
69+
70+
71+ def lifetime_pie (deadCompany_list ):
72+ lifetime_dict = {}
73+ for i in deadCompany_list :
74+ info = i .split (',' )
75+ lifetime = info [1 ].replace ('存活' , '' ).split ('年' )[0 ]
76+ if int (lifetime ) >= 10 :
77+ lifetime = '>=10'
78+ lifetime_dict [lifetime ] = lifetime_dict .get (lifetime , 0 ) + 1
79+
80+ (
81+ Pie ()
82+ .add ("" , [list (z ) for z in zip (lifetime_dict .keys (), lifetime_dict .values ())],
83+ radius = ["40%" , "75%" ], )
84+ .set_global_opts (
85+ title_opts = opts .TitleOpts (
86+ title = "公司存活年限" ,
87+ pos_left = "center" ,
88+ pos_top = "20" ),legend_opts = opts .LegendOpts (type_ = "scroll" , pos_left = "80%" , orient = "vertical" ), )
89+ .set_series_opts (label_opts = opts .LabelOpts (formatter = "{b}: {d}%" ), )
90+ .render ("存活时间.html" )
91+ )
92+
93+
94+ def rongzi_pie (deadCompany_list ):
95+ rongzi_dict = {}
96+ norongzi_list = ['尚未获投' , '不明确' , '尚未获' ]
97+ rongzi_list = ['天使轮' , 'A轮' , 'B轮' , 'C轮' , 'D轮' , 'E轮' , 'D+轮' , '种子轮' , 'A+轮' , '新三板' , '战略投资' , 'B+轮' , 'Pre-A轮' ]
98+ for i in deadCompany_list :
99+ info = i .split (',' )
100+ rongzi = info [6 ].strip ()
101+ if rongzi in norongzi_list :
102+ rongzi = '没有融资'
103+ elif rongzi in rongzi_list :
104+ rongzi = '已融资'
105+
106+ rongzi_dict [rongzi ] = rongzi_dict .get (rongzi , 0 ) + 1
107+
108+ (
109+ Pie ()
110+ .add ("" , [list (z ) for z in zip (rongzi_dict .keys (), rongzi_dict .values ())],
111+ radius = ["40%" , "75%" ], )
112+ .set_global_opts (
113+ title_opts = opts .TitleOpts (
114+ title = "融资情况" ,
115+ pos_left = "center" ,
116+ pos_top = "20" ), legend_opts = opts .LegendOpts (type_ = "scroll" , pos_left = "80%" , orient = "vertical" ), )
117+ .set_series_opts (label_opts = opts .LabelOpts (formatter = "{b}: {d}%" ), )
118+ .render ("融资情况.html" )
119+ )
120+
121+ def rongzi_pie (deadCompany_list ):
122+ rongzi_dict = {}
123+ norongzi_list = ['尚未获投' , '不明确' , '尚未获' ]
124+ rongzi_list = ['天使轮' , 'A轮' , 'B轮' , 'C轮' , 'D轮' , 'E轮' , 'D+轮' , '种子轮' , 'A+轮' , '新三板' , '战略投资' , 'B+轮' , 'Pre-A轮' ]
125+ for i in deadCompany_list :
126+ info = i .split (',' )
127+ rongzi = info [6 ].strip ()
128+ if rongzi in norongzi_list :
129+ rongzi = '没有融资'
130+ elif rongzi in rongzi_list :
131+ rongzi = '已融资'
132+
133+ rongzi_dict [rongzi ] = rongzi_dict .get (rongzi , 0 ) + 1
134+
135+ (
136+ Pie ()
137+ .add ("" , [list (z ) for z in zip (rongzi_dict .keys (), rongzi_dict .values ())],
138+ radius = ["40%" , "75%" ], )
139+ .set_global_opts (
140+ title_opts = opts .TitleOpts (
141+ title = "融资情况" ,
142+ pos_left = "center" ,
143+ pos_top = "20" ), legend_opts = opts .LegendOpts (type_ = "scroll" , pos_left = "80%" , orient = "vertical" ), )
144+ .set_series_opts (label_opts = opts .LabelOpts (formatter = "{b}: {d}%" ), )
145+ .render ("融资情况.html" )
146+ )
147+
148+ def place_bar (deadCompany_list ):
149+ place_dict = {}
150+ for i in deadCompany_list :
151+ info = i .split (',' )
152+ place = info [4 ].strip ()
153+
154+ place_dict [place ] = place_dict .get (place , 0 ) + 1
155+
156+
157+ ( Bar (init_opts = opts .InitOpts (width = '2000px' ))
158+ .add_xaxis (list (place_dict .keys ()))
159+ .add_yaxis ("地区" , list (place_dict .values ()), )
160+ .set_global_opts (
161+ title_opts = opts .TitleOpts (title = "地区分布" )
162+ )
163+ .render ("地区.html" )
164+ )
165+
166+
167+ if __name__ == '__main__' :
168+ driver = login ()
169+ link (driver )
170+ crawler (driver )
171+
172+ deadCompany_list = parse_csv ()
173+ lifetime_pie (deadCompany_list )
174+ rongzi_pie (deadCompany_list )
175+ place_bar (deadCompany_list )
0 commit comments