add code

itdoudoutech · itdoudoutech · commit 4be3f6cc181e · 2020-07-13T19:52:01.000+08:00
diff --git a/doudou/2020-06-22-music-163/app.py b/doudou/2020-06-22-music-163/app.py
@@ -0,0 +1,51 @@
+import requests
+import json
+import matplotlib.pyplot as plt
+from wordcloud import WordCloud
+
+
+# 模拟浏览器请求
+headers = {
+    'Referer': 'http://music.163.com/',
+    'Host': 'music.163.com',
+    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36',
+    'Accept': '*/*',
+}
+
+# 构建 URL 以及 POSt 参数
+url = 'https://music.163.com/weapi/v1/play/record?csrf_token='
+data = {
+    'params': 'xrJhjXYUqEWa98DVbFtw6yTygOTCOvSAypxfWNr5kpw/MEvXsRk+Av+DNF7zY9a1oA95FsmDtE3VpM422dZR6WJGDxS3/se00qFFHx6wumfLzc9mgnfB5hGkrBwF9+P/7zamjfWSOUfvvUuWhM2Gd7z2pA11lMB',
+    'encSecKey': '2371bb4de91d5de7110722d3491c7cf6d3f6f5cdcbc16a5e9c7456e4b9075c1965bbd2bf4fbf02023cf63391f74b6956339cb72fa32a4413de347ffb536299f5711fe02fe60f66b77ac96a16a6bcb5ba14cf9b1609ddf8e8180d683bba5801acf'
+}
+
+# 发送请求
+req = requests.post(url, data)  # 发送 post 请求，第一个参数是 URL，第二个参数是参数
+
+print(json.loads(req.text))
+
+# 输出结果
+# {"allData":[{"playCount":0,"score":100,"song":{"name":"盛夏光年 (2013版)","id":28181110,"pst":0,"t":0,"ar":[{"id":13193,"name":"五月天","tns":...
+
+result = json.loads(req.text)
+names = []
+for i in range(100):
+    names.append(result['allData'][i]['song']['ar'][0]['name'])
+
+text = ",".join(names)
+
+
+def show_word_cloud(text):
+    wc = WordCloud(font_path='/System/Library/Fonts/PingFang.ttc', background_color="white", scale=2.5,
+                   contour_color="lightblue", ).generate(text)
+
+    # 读入背景图片
+    w = WordCloud(background_color='white', scale=1.5).generate(text)
+    w.to_file("names.png")
+    plt.figure(figsize=(16, 9))
+    plt.imshow(wc)
+    plt.axis('off')
+    plt.show()
+
+
+show_word_cloud(text)
diff --git a/doudou/2020-07-13-lagou/analysis.py b/doudou/2020-07-13-lagou/analysis.py
@@ -0,0 +1,142 @@
+import numpy as np
+from pyecharts import options as opts
+from pyecharts.charts import Bar
+from pyecharts.charts import Pie
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+import json
+import pandas as pd
+
+
+def get_data():
+    with open('data.txt') as f:
+        data = []
+        for line in f.readlines():
+            result = json.loads(line)
+            result_list = result['content']['positionResult']['result']
+            for item in result_list:
+                dict = {
+                    'city': item['city'],
+                    'industryField': item['industryField'],
+                    'education': item['education'],
+                    'workYear': item['workYear'],
+                    'salary': item['salary'],
+                    'firstType': item['firstType'],
+                    'secondType': item['secondType'],
+                    'thirdType': item['thirdType'],
+                    # list
+                    'skillLables': ','.join(item['skillLables']),
+                    'companyLabelList': ','.join(item['companyLabelList'])
+                }
+                data.append(dict)
+        return data
+
+
+data = get_data()
+data = pd.DataFrame(data)
+data.head(5)
+
+# 城市图
+citys_value_counts = data['city'].value_counts()
+top = 15
+citys = list(citys_value_counts.head(top).index)
+city_counts = list(citys_value_counts.head(top))
+
+bar = (
+    Bar()
+        .add_xaxis(citys)
+        .add_yaxis("", city_counts)
+)
+bar.render_notebook()
+
+# 城市图
+pie = (
+    Pie()
+        .add("", [list(z) for z in zip(citys, city_counts)])
+        .set_global_opts(title_opts=opts.TitleOpts(title=""))
+        .set_global_opts(legend_opts=opts.LegendOpts(is_show=False))
+)
+pie.render_notebook()
+
+# 行业
+industrys = list(data['industryField'])
+industry_list = [i for item in industrys for i in item.split(',')]
+
+industry_series = pd.Series(data=industry_list)
+industry_value_counts = industry_series.value_counts()
+
+industrys = list(industry_value_counts.head(top).index)
+industry_counts = list(industry_value_counts.head(top))
+
+pie = (
+    Pie()
+        .add("", [list(z) for z in zip(industrys, industry_counts)])
+        .set_global_opts(title_opts=opts.TitleOpts(title=""))
+        .set_global_opts(legend_opts=opts.LegendOpts(is_show=False))
+)
+pie.render_notebook()
+
+# 学历
+eduction_value_counts = data['education'].value_counts()
+
+eduction = list(eduction_value_counts.index)
+eduction_counts = list(eduction_value_counts)
+
+pie = (
+    Pie()
+        .add("", [list(z) for z in zip(eduction, eduction_counts)])
+        .set_global_opts(title_opts=opts.TitleOpts(title=""))
+        .set_global_opts(legend_opts=opts.LegendOpts(is_show=False))
+)
+pie.render_notebook()
+
+# 工作年限
+work_year_value_counts = data['workYear'].value_counts()
+work_year = list(work_year_value_counts.index)
+work_year_counts = list(work_year_value_counts)
+
+bar = (
+    Bar()
+        .add_xaxis(work_year)
+        .add_yaxis("", work_year_counts)
+)
+bar.render_notebook()
+
+# 技能
+word_data = data['skillLables'].str.split(',').apply(pd.Series)
+word_data = word_data.replace(np.nan, '')
+text = word_data.to_string(header=False, index=False)
+
+wc = WordCloud(font_path='/System/Library/Fonts/PingFang.ttc', background_color="white", scale=2.5,
+               contour_color="lightblue", ).generate(text)
+
+plt.figure(figsize=(16, 9))
+plt.imshow(wc)
+plt.axis('off')
+plt.show()
+
+# 福利
+word_data = data['companyLabelList'].str.split(',').apply(pd.Series)
+word_data = word_data.replace(np.nan, '')
+text = word_data.to_string(header=False, index=False)
+
+wc = WordCloud(font_path='/System/Library/Fonts/PingFang.ttc', background_color="white", scale=2.5,
+               contour_color="lightblue", ).generate(text)
+
+plt.figure(figsize=(16, 9))
+plt.imshow(wc)
+plt.axis('off')
+plt.show()
+
+# 薪资
+salary_value_counts = data['salary'].value_counts()
+salary = list(salary_value_counts.head(top).index)
+salary_counts = list(salary_value_counts.head(top))
+
+bar = (
+    Bar()
+        .add_xaxis(salary)
+        .add_yaxis("", salary_counts)
+        .set_global_opts(xaxis_opts=opts.AxisOpts(name_rotate=0, name="薪资", axislabel_opts={"rotate": 45}))
+)
+bar.render_notebook()
diff --git a/doudou/2020-07-13-lagou/app.py b/doudou/2020-07-13-lagou/app.py
@@ -0,0 +1,67 @@
+import requests
+import time
+
+
+def headers_to_dict(headers):
+    headers = headers.split("\n")
+    d_headers = dict()
+    for h in headers:
+        if h:
+            k, v = h.split(":", 1)
+            if k == 'cookie' and d_headers.get(k, None) is not None:
+                d_headers[k] = d_headers.get(k) + "; " + v.strip()
+            else:
+                d_headers[k] = v.strip()
+    return d_headers
+
+
+home_url = 'https://www.lagou.com/jobs/list_python?px=new&city=%E5%85%A8%E5%9B%BD'
+url = 'https://www.lagou.com/jobs/positionAjax.json?px=new&needAddtionalResult=false'
+headers = """
+accept: application/json, text/javascript, */*; q=0.01
+origin: https://www.lagou.com
+referer: https://www.lagou.com/jobs/list_python?px=new&city=%E5%85%A8%E5%9B%BD
+user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36
+"""
+
+headers_dict = headers_to_dict(headers)
+
+
+def get_data_from_cloud(page):
+    params = {
+        'first': 'false',
+        'pn': page,
+        'kd': 'python'
+    }
+    s = requests.Session()  # 创建一个session对象
+    s.get(home_url, headers=headers_dict, timeout=3)  # 用session对象发出get请求，请求首页获取cookies
+    cookie = s.cookies  # 为此次获取的cookies
+    response = requests.post(url, data=params, headers=headers_dict, cookies=cookie, timeout=3)
+    result = response.text
+    write_file(result)
+
+
+def write_file(content):
+    filename = 'data.txt'
+    with open(filename, 'a') as f:
+        f.write(content + '\n')
+
+
+"""
+工作地点地图 ： city
+行业分布：industryField
+学历要求：education
+工作经验：workYear
+薪资：salary
+所需技能：skillLables
+福利：companyLabelList
+类型：firstType、secondType
+"""
+def get_data():
+    for i in range(76):
+        page = i + 1
+        get_data_from_cloud(page)
+        time.sleep(5)
+
+
+get_data()
diff --git a/doudou/2020-07-13-lagou/data.txt b/doudou/2020-07-13-lagou/data.txt
diff --git a/doudou/README.md b/doudou/README.md
@@ -16,11 +16,13 @@ Python技术 公众号文章代码库
 
 + [520](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-05-17-520)：Python 教你花式表白小姐姐
 
-+ [字符画](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-05-17-character-drawing)：字符画
++ [character-drawing](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-05-17-character-drawing)：字符画
 
-+ [迷宫](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-06-12-maze)：迷宫
++ [maze](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-06-12-maze)：迷宫
 
-+ [Python 骚操作](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-06-19-skills)：Python 骚操作
++ [python-skills](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-06-19-skills)：Python 骚操作
+
++ [lagou](https://github.com/JustDoPython/python-examples/tree/master/doudou/2020-07-13-lagou)：拉钩招聘数据分析
 
 ---