Skip to content

Commit 77de834

Browse files
committed
no message
1 parent a06fc74 commit 77de834

File tree

1 file changed

+175
-0
lines changed

1 file changed

+175
-0
lines changed
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
# coding=utf-8
2+
3+
from selenium import webdriver
4+
import time
5+
import random
6+
7+
from selenium.webdriver import ActionChains
8+
from pyecharts import options as opts
9+
from pyecharts.charts import Bar, Pie
10+
11+
12+
def login():
13+
driver = webdriver.Chrome()
14+
15+
driver.get('https://www.itjuzi.com/login')
16+
driver.implicitly_wait(10)
17+
18+
driver.find_element_by_xpath('//form/div[1]/div/div[1]/input').clear()
19+
driver.find_element_by_xpath('//form/div[1]/div/div[1]/input').send_keys('18821290263')
20+
driver.find_element_by_xpath('//form/div[2]/div/div[1]/input').clear()
21+
driver.find_element_by_xpath('//form/div[2]/div/div[1]/input').send_keys('123456')
22+
driver.find_element_by_class_name('el-button').click()
23+
driver.switch_to.default_content()
24+
time.sleep(5)
25+
return driver
26+
27+
def link(driver):
28+
ActionChains(driver).move_to_element(driver.find_elements_by_class_name('more')[0]).perform() # 把鼠标移到公司库导航上面
29+
driver.find_element_by_link_text('死亡公司').click() # 点击死亡公司超链接
30+
driver.switch_to.window(driver.window_handles[1]) # 切换到新开的标签页
31+
driver.implicitly_wait(10)
32+
time.sleep(5)
33+
34+
def crawler(driver):
35+
36+
next_page=driver.find_element_by_class_name('btn-next') #下一页
37+
# 只抓 2020 年的数据
38+
for page in range(1, 11):
39+
result = []
40+
deadCompany = driver.find_element_by_tag_name("tbody").find_elements_by_tag_name("tr")
41+
num = len(deadCompany)
42+
43+
for i in range(1,num + 1):
44+
gsjc = deadCompany[i - 1].find_element_by_xpath('td[3]/div/h5/a').text # 公司简称
45+
chsj = deadCompany[i - 1].find_element_by_xpath('td[3]/div/p').text # 存活时间
46+
gbsj = deadCompany[i - 1].find_element_by_xpath('td[4]').text # 关闭时间
47+
hy = deadCompany[i - 1].find_element_by_xpath('td[5]').text # 所属行业
48+
dd = deadCompany[i - 1].find_element_by_xpath('td[6]').text # 公司地点
49+
clsj = deadCompany[i - 1].find_element_by_xpath('td[7]').text # 关闭时间
50+
htzt = deadCompany[i - 1].find_element_by_xpath('td[8]').text # 融资状态
51+
52+
result.append(','.join([gsjc, chsj, gbsj, hy, dd, clsj, htzt]))
53+
54+
with open('itjuzi/deadCompany.csv', 'a') as f:
55+
f.write('\n'.join('%s' % id for id in result)+'\n')
56+
print(result)
57+
58+
print("第 %s 页爬取完成" % page)
59+
next_page.click() # 点击下一页
60+
time.sleep(random.uniform(2, 10))
61+
62+
def parse_csv():
63+
deadCompany_list = []
64+
with open('itjuzi/deadCompany.csv', 'r') as f:
65+
for line in f.readlines():
66+
a = line.strip()
67+
deadCompany_list.append(a)
68+
return deadCompany_list
69+
70+
71+
def lifetime_pie(deadCompany_list):
72+
lifetime_dict = {}
73+
for i in deadCompany_list:
74+
info = i.split(',')
75+
lifetime = info[1].replace('存活', '').split('年')[0]
76+
if int(lifetime) >= 10:
77+
lifetime = '>=10'
78+
lifetime_dict[lifetime] = lifetime_dict.get(lifetime, 0) + 1
79+
80+
(
81+
Pie()
82+
.add("", [list(z) for z in zip(lifetime_dict.keys(), lifetime_dict.values())],
83+
radius=["40%", "75%"], )
84+
.set_global_opts(
85+
title_opts=opts.TitleOpts(
86+
title="公司存活年限",
87+
pos_left="center",
88+
pos_top="20"),legend_opts=opts.LegendOpts(type_="scroll", pos_left="80%", orient="vertical"), )
89+
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%"), )
90+
.render("存活时间.html")
91+
)
92+
93+
94+
def rongzi_pie(deadCompany_list):
95+
rongzi_dict = {}
96+
norongzi_list = ['尚未获投', '不明确', '尚未获']
97+
rongzi_list = ['天使轮', 'A轮', 'B轮', 'C轮', 'D轮', 'E轮', 'D+轮', '种子轮', 'A+轮', '新三板', '战略投资', 'B+轮', 'Pre-A轮']
98+
for i in deadCompany_list:
99+
info = i.split(',')
100+
rongzi = info[6].strip()
101+
if rongzi in norongzi_list:
102+
rongzi = '没有融资'
103+
elif rongzi in rongzi_list:
104+
rongzi = '已融资'
105+
106+
rongzi_dict[rongzi] = rongzi_dict.get(rongzi, 0) + 1
107+
108+
(
109+
Pie()
110+
.add("", [list(z) for z in zip(rongzi_dict.keys(), rongzi_dict.values())],
111+
radius=["40%", "75%"], )
112+
.set_global_opts(
113+
title_opts=opts.TitleOpts(
114+
title="融资情况",
115+
pos_left="center",
116+
pos_top="20"), legend_opts=opts.LegendOpts(type_="scroll", pos_left="80%", orient="vertical"), )
117+
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%"), )
118+
.render("融资情况.html")
119+
)
120+
121+
def rongzi_pie(deadCompany_list):
122+
rongzi_dict = {}
123+
norongzi_list = ['尚未获投', '不明确', '尚未获']
124+
rongzi_list = ['天使轮', 'A轮', 'B轮', 'C轮', 'D轮', 'E轮', 'D+轮', '种子轮', 'A+轮', '新三板', '战略投资', 'B+轮', 'Pre-A轮']
125+
for i in deadCompany_list:
126+
info = i.split(',')
127+
rongzi = info[6].strip()
128+
if rongzi in norongzi_list:
129+
rongzi = '没有融资'
130+
elif rongzi in rongzi_list:
131+
rongzi = '已融资'
132+
133+
rongzi_dict[rongzi] = rongzi_dict.get(rongzi, 0) + 1
134+
135+
(
136+
Pie()
137+
.add("", [list(z) for z in zip(rongzi_dict.keys(), rongzi_dict.values())],
138+
radius=["40%", "75%"], )
139+
.set_global_opts(
140+
title_opts=opts.TitleOpts(
141+
title="融资情况",
142+
pos_left="center",
143+
pos_top="20"), legend_opts=opts.LegendOpts(type_="scroll", pos_left="80%", orient="vertical"), )
144+
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%"), )
145+
.render("融资情况.html")
146+
)
147+
148+
def place_bar(deadCompany_list):
149+
place_dict = {}
150+
for i in deadCompany_list:
151+
info = i.split(',')
152+
place = info[4].strip()
153+
154+
place_dict[place] = place_dict.get(place, 0) + 1
155+
156+
157+
( Bar(init_opts=opts.InitOpts(width='2000px'))
158+
.add_xaxis(list(place_dict.keys()))
159+
.add_yaxis("地区", list(place_dict.values()), )
160+
.set_global_opts(
161+
title_opts=opts.TitleOpts(title="地区分布")
162+
)
163+
.render("地区.html")
164+
)
165+
166+
167+
if __name__ == '__main__':
168+
driver = login()
169+
link(driver)
170+
crawler(driver)
171+
172+
deadCompany_list = parse_csv()
173+
lifetime_pie(deadCompany_list)
174+
rongzi_pie(deadCompany_list)
175+
place_bar(deadCompany_list)

0 commit comments

Comments
 (0)