Skip to content

Commit 4be3f6c

Browse files
committed
add code
1 parent 40da46e commit 4be3f6c

File tree

5 files changed

+341
-3
lines changed

5 files changed

+341
-3
lines changed

doudou/2020-06-22-music-163/app.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import requests
2+
import json
3+
import matplotlib.pyplot as plt
4+
from wordcloud import WordCloud
5+
6+
7+
# 模拟浏览器请求
8+
headers = {
9+
'Referer': 'http://music.163.com/',
10+
'Host': 'music.163.com',
11+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36',
12+
'Accept': '*/*',
13+
}
14+
15+
# 构建 URL 以及 POSt 参数
16+
url = 'https://music.163.com/weapi/v1/play/record?csrf_token='
17+
data = {
18+
'params': 'xrJhjXYUqEWa98DVbFtw6yTygOTCOvSAypxfWNr5kpw/MEvXsRk+Av+DNF7zY9a1oA95FsmDtE3VpM422dZR6WJGDxS3/se00qFFHx6wumfLzc9mgnfB5hGkrBwF9+P/7zamjfWSOUfvvUuWhM2Gd7z2pA11lMB',
19+
'encSecKey': '2371bb4de91d5de7110722d3491c7cf6d3f6f5cdcbc16a5e9c7456e4b9075c1965bbd2bf4fbf02023cf63391f74b6956339cb72fa32a4413de347ffb536299f5711fe02fe60f66b77ac96a16a6bcb5ba14cf9b1609ddf8e8180d683bba5801acf'
20+
}
21+
22+
# 发送请求
23+
req = requests.post(url, data) # 发送 post 请求,第一个参数是 URL,第二个参数是参数
24+
25+
print(json.loads(req.text))
26+
27+
# 输出结果
28+
# {"allData":[{"playCount":0,"score":100,"song":{"name":"盛夏光年 (2013版)","id":28181110,"pst":0,"t":0,"ar":[{"id":13193,"name":"五月天","tns":...
29+
30+
result = json.loads(req.text)
31+
names = []
32+
for i in range(100):
33+
names.append(result['allData'][i]['song']['ar'][0]['name'])
34+
35+
text = ",".join(names)
36+
37+
38+
def show_word_cloud(text):
39+
wc = WordCloud(font_path='/System/Library/Fonts/PingFang.ttc', background_color="white", scale=2.5,
40+
contour_color="lightblue", ).generate(text)
41+
42+
# 读入背景图片
43+
w = WordCloud(background_color='white', scale=1.5).generate(text)
44+
w.to_file("names.png")
45+
plt.figure(figsize=(16, 9))
46+
plt.imshow(wc)
47+
plt.axis('off')
48+
plt.show()
49+
50+
51+
show_word_cloud(text)
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
import numpy as np
2+
from pyecharts import options as opts
3+
from pyecharts.charts import Bar
4+
from pyecharts.charts import Pie
5+
from wordcloud import WordCloud
6+
import matplotlib.pyplot as plt
7+
import json
8+
import pandas as pd
9+
10+
11+
def get_data():
12+
with open('data.txt') as f:
13+
data = []
14+
for line in f.readlines():
15+
result = json.loads(line)
16+
result_list = result['content']['positionResult']['result']
17+
for item in result_list:
18+
dict = {
19+
'city': item['city'],
20+
'industryField': item['industryField'],
21+
'education': item['education'],
22+
'workYear': item['workYear'],
23+
'salary': item['salary'],
24+
'firstType': item['firstType'],
25+
'secondType': item['secondType'],
26+
'thirdType': item['thirdType'],
27+
# list
28+
'skillLables': ','.join(item['skillLables']),
29+
'companyLabelList': ','.join(item['companyLabelList'])
30+
}
31+
data.append(dict)
32+
return data
33+
34+
35+
data = get_data()
36+
data = pd.DataFrame(data)
37+
data.head(5)
38+
39+
# 城市图
40+
citys_value_counts = data['city'].value_counts()
41+
top = 15
42+
citys = list(citys_value_counts.head(top).index)
43+
city_counts = list(citys_value_counts.head(top))
44+
45+
bar = (
46+
Bar()
47+
.add_xaxis(citys)
48+
.add_yaxis("", city_counts)
49+
)
50+
bar.render_notebook()
51+
52+
# 城市图
53+
pie = (
54+
Pie()
55+
.add("", [list(z) for z in zip(citys, city_counts)])
56+
.set_global_opts(title_opts=opts.TitleOpts(title=""))
57+
.set_global_opts(legend_opts=opts.LegendOpts(is_show=False))
58+
)
59+
pie.render_notebook()
60+
61+
# 行业
62+
industrys = list(data['industryField'])
63+
industry_list = [i for item in industrys for i in item.split(',')]
64+
65+
industry_series = pd.Series(data=industry_list)
66+
industry_value_counts = industry_series.value_counts()
67+
68+
industrys = list(industry_value_counts.head(top).index)
69+
industry_counts = list(industry_value_counts.head(top))
70+
71+
pie = (
72+
Pie()
73+
.add("", [list(z) for z in zip(industrys, industry_counts)])
74+
.set_global_opts(title_opts=opts.TitleOpts(title=""))
75+
.set_global_opts(legend_opts=opts.LegendOpts(is_show=False))
76+
)
77+
pie.render_notebook()
78+
79+
# 学历
80+
eduction_value_counts = data['education'].value_counts()
81+
82+
eduction = list(eduction_value_counts.index)
83+
eduction_counts = list(eduction_value_counts)
84+
85+
pie = (
86+
Pie()
87+
.add("", [list(z) for z in zip(eduction, eduction_counts)])
88+
.set_global_opts(title_opts=opts.TitleOpts(title=""))
89+
.set_global_opts(legend_opts=opts.LegendOpts(is_show=False))
90+
)
91+
pie.render_notebook()
92+
93+
# 工作年限
94+
work_year_value_counts = data['workYear'].value_counts()
95+
work_year = list(work_year_value_counts.index)
96+
work_year_counts = list(work_year_value_counts)
97+
98+
bar = (
99+
Bar()
100+
.add_xaxis(work_year)
101+
.add_yaxis("", work_year_counts)
102+
)
103+
bar.render_notebook()
104+
105+
# 技能
106+
word_data = data['skillLables'].str.split(',').apply(pd.Series)
107+
word_data = word_data.replace(np.nan, '')
108+
text = word_data.to_string(header=False, index=False)
109+
110+
wc = WordCloud(font_path='/System/Library/Fonts/PingFang.ttc', background_color="white", scale=2.5,
111+
contour_color="lightblue", ).generate(text)
112+
113+
plt.figure(figsize=(16, 9))
114+
plt.imshow(wc)
115+
plt.axis('off')
116+
plt.show()
117+
118+
# 福利
119+
word_data = data['companyLabelList'].str.split(',').apply(pd.Series)
120+
word_data = word_data.replace(np.nan, '')
121+
text = word_data.to_string(header=False, index=False)
122+
123+
wc = WordCloud(font_path='/System/Library/Fonts/PingFang.ttc', background_color="white", scale=2.5,
124+
contour_color="lightblue", ).generate(text)
125+
126+
plt.figure(figsize=(16, 9))
127+
plt.imshow(wc)
128+
plt.axis('off')
129+
plt.show()
130+
131+
# 薪资
132+
salary_value_counts = data['salary'].value_counts()
133+
salary = list(salary_value_counts.head(top).index)
134+
salary_counts = list(salary_value_counts.head(top))
135+
136+
bar = (
137+
Bar()
138+
.add_xaxis(salary)
139+
.add_yaxis("", salary_counts)
140+
.set_global_opts(xaxis_opts=opts.AxisOpts(name_rotate=0, name="薪资", axislabel_opts={"rotate": 45}))
141+
)
142+
bar.render_notebook()

doudou/2020-07-13-lagou/app.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import requests
2+
import time
3+
4+
5+
def headers_to_dict(headers):
6+
headers = headers.split("\n")
7+
d_headers = dict()
8+
for h in headers:
9+
if h:
10+
k, v = h.split(":", 1)
11+
if k == 'cookie' and d_headers.get(k, None) is not None:
12+
d_headers[k] = d_headers.get(k) + "; " + v.strip()
13+
else:
14+
d_headers[k] = v.strip()
15+
return d_headers
16+
17+
18+
home_url = 'https://www.lagou.com/jobs/list_python?px=new&city=%E5%85%A8%E5%9B%BD'
19+
url = 'https://www.lagou.com/jobs/positionAjax.json?px=new&needAddtionalResult=false'
20+
headers = """
21+
accept: application/json, text/javascript, */*; q=0.01
22+
origin: https://www.lagou.com
23+
referer: https://www.lagou.com/jobs/list_python?px=new&city=%E5%85%A8%E5%9B%BD
24+
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36
25+
"""
26+
27+
headers_dict = headers_to_dict(headers)
28+
29+
30+
def get_data_from_cloud(page):
31+
params = {
32+
'first': 'false',
33+
'pn': page,
34+
'kd': 'python'
35+
}
36+
s = requests.Session() # 创建一个session对象
37+
s.get(home_url, headers=headers_dict, timeout=3) # 用session对象发出get请求,请求首页获取cookies
38+
cookie = s.cookies # 为此次获取的cookies
39+
response = requests.post(url, data=params, headers=headers_dict, cookies=cookie, timeout=3)
40+
result = response.text
41+
write_file(result)
42+
43+
44+
def write_file(content):
45+
filename = 'data.txt'
46+
with open(filename, 'a') as f:
47+
f.write(content + '\n')
48+
49+
50+
"""
51+
工作地点地图 : city
52+
行业分布:industryField
53+
学历要求:education
54+
工作经验:workYear
55+
薪资:salary
56+
所需技能:skillLables
57+
福利:companyLabelList
58+
类型:firstType、secondType
59+
"""
60+
def get_data():
61+
for i in range(76):
62+
page = i + 1
63+
get_data_from_cloud(page)
64+
time.sleep(5)
65+
66+
67+
get_data()

doudou/2020-07-13-lagou/data.txt

Lines changed: 76 additions & 0 deletions
Large diffs are not rendered by default.

doudou/README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,13 @@ Python技术 公众号文章代码库
1616

1717
+ [520](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-05-17-520):Python 教你花式表白小姐姐
1818

19-
+ [字符画](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-05-17-character-drawing):字符画
19+
+ [character-drawing](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-05-17-character-drawing):字符画
2020

21-
+ [迷宫](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-06-12-maze):迷宫
21+
+ [maze](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-06-12-maze):迷宫
2222

23-
+ [Python 骚操作](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-06-19-skills):Python 骚操作
23+
+ [python-skills](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-06-19-skills):Python 骚操作
24+
25+
+ [lagou](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-07-13-lagou):拉钩招聘数据分析
2426

2527
---
2628

0 commit comments

Comments
 (0)