Skip to content

Commit 9ffea31

Browse files
committed
add code
1 parent 1a50b5d commit 9ffea31

File tree

4 files changed

+144
-0
lines changed

4 files changed

+144
-0
lines changed

doudou/2020-10-13-national-day

-988 Bytes
Binary file not shown.
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import requests
2+
import csv
3+
import time
4+
from requests import RequestException
5+
from bs4 import BeautifulSoup
6+
7+
8+
headers = {
9+
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
10+
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36',
11+
'accept-encoding': 'gzip, deflate, br',
12+
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
13+
'referer': 'https://piao.qunar.com/',
14+
'cookie': 'QN1=0000048030682631e2b8e754; QN99=9790; QN300=s%3Dbing; _i=DFiEZnlDE06wWY2e-VJVB_sesBww; fid=7bffafe6-c57b-4fe2-a347-57564cf0e66f; QunarGlobal=10.86.213.148_-ba9ffe3_173d2293375_-522b|1596959542130; QN601=6087af8bc791a83b1722cf1f3a337261; QN48=000018002f10273189b0bd0e; QN621=1490067914133%3DDEFAULT%26fr%3Dqunarindex; QN668=51%2C55%2C59%2C56%2C59%2C55%2C59%2C55%2C55%2C57%2C57%2C52%2C52; quinn=449c5a2ddd5098e4f741c730191aa6912eabaf034aa8231814cd7f5efb7ca1dbcff43ae3475ce1d71b90ad243bc206c6; SC1=21fa2e00edea939d117d9a7e41129b1c; SC18=; QN205=s%3Dbing; QN277=s%3Dbing; csrfToken=HomfccFgWNTkPHmFVLrLHhlXCV6mjpsX; QN269=A21E61200CA111EBA025FA163E26B699; QN163=0; QN71="MTE3LjEzNi4xMi4xOTA65bm/5LicOjE="; QN57=16025173972310.18425984059497624; QN243=22; Hm_lvt_15577700f8ecddb1a927813c81166ade=1602517398,1602517783; QN63=%E7%83%AD%E9%97%A8%E6%99%AF%E7%82%B9; _vi=g_OFZoprSNiT8bT2fhMMgWQhy-acGZ71z08p4vqpe6lVRC2Xv29cXK1WQEMpCBGx_4IHmo0unplzjb6oGmuoAhUZNNr22jOvzOiFBCsn4Q7AbvU8itcY097o-NJQC3d9gVplwq7h5uOrek1Kr7dV3MmHblSRGp_fqwibyoi9LuUx; QN267=016328531675c329458; QN58=1602521369113%7C1602521608108%7C2; JSESSIONID=21D7E279F1794E089E322E748FFE3B89; Hm_lpvt_15577700f8ecddb1a927813c81166ade=1602521609; QN271=a6ad6f7e-c46f-453f-aa1a-110855ad9ec7; __qt=v1%7CVTJGc2RHVmtYMS9CR1htaXZ6S0tkcVg3c3AyaGI0TnN4VmIwR1BXeldqYWJFZXlnekV0VnpJNjRCK2VrS1Axek8vb0dLZ25JM1F1WE83SURKU2dOd3lPb1I0UHVoSzNZWG43MnFRdTh0alJXTGdpK1BETUVNYTk0ejQ2cmpPNXNRazAwNUpsYXViNENwV0ovY09TYnIzcHgwc3AvYkpLUk4reXZkdTVHMXJVPQ%3D%3D%7C1602521615121%7CVTJGc2RHVmtYMS9DMHJaQlpialhNRXJKcXo1SkNBSzdKZXFxQmRWV01QbnFUSEdEMXNNZzBsZjI3U0ZNTWxjei9PeFBkcDNKUlp3MWxnb29SbjNPemc9PQ%3D%3D%7CVTJGc2RHVmtYMSt5UmRsK1BMMDJUZHRMSDlhR1lMNXhPbXNIeVMyNk9DdGgraTJ6OXJHbEdQWXhrZCtmN1hDeDhlRGlMQThLRDJOK1hTV2VYd1EvaDdVcmZnNDNaYTE0cnE2bklsNENIcjlYRVNTdExxb01BNy9ZQlFwTFE5VHp3dTJHRjI5SE1mTCtIRXMvb25FSXVxbEY5UTdPcVlYSzZlU1phK1pDVmhOZElsL1BlNmtROXVGMmhJb1FKd3hxV3F3Qyt5OTc5K3Zjdk9zVjhsMzN3VEN2ZUN3WGx6VGJ2OFYwYSsvMDBWYmpLNFhMRk8xQWd1WmxsNzU5TXRKV2lTdDFZbEZpaUlMV1ZSQkZOSk50dVRqVDh4WFRSQ0lqUUdXd2U2eXBydVBDaXhSWDRWUklHV3hGRDVLQkwyQ2J4emlvaU5tZzNIbENFb2g0YWFndGhDZnFvV3dpaEJMYkpNNWQzdDkvTzF5S1FPVWJpTlhvRFFZcDFXSnJzMWRUZUNvT1MrSU4zVHJiRER4MkdZRWMxMEtCKzBXai9RanVoMzNyWUt0Qi9CbFZLOXViYlo5eXBVaXZwTzMyMWtrSGRnaGNydy9BVzIyWEFoRjBKN1QwTEtwdVE5QWJqa1BLa0kzWUJDWGVOZVdMWjdVQjRnb1ppSXdHM3VFZWxsZDlRZUI0SUtBeXRSVjAyT0Znck8xdUsxY2taVzQyMzk0UUJUZ20wQjRJRk5VbUdhN2VPR0Q3STl1YTlOdnNSV2d2TVk5K1kzRjh3bzVXbHZ3eFdxQnBVeER3YW5JOTVOd0RXZnVQd0xqWmVMSFNSNStCaFVkNGJ5WGdBRHRabUJacktpbnVwV2MzWDIvTmwxaDdpK1l1VElYRGJreTdSUURWOEtaTFlwT3dwNktPQ3pUalJuNFBxYVEyanZFb2V4aGRyVFJ4Mmw3UEg4aDk5Y1gzZklPdlJnRGE3SVJGMnRydkMvMkIzVVFmZUp6NXFteUxZZXFSa2FveDA5dE1GaTVOWjZPVWZ4emZZRmFnQW1OQ0NiQ0ROempjZzBaMTdXSDZqM2YrVlVBNGJDZz0%3D'
15+
}
16+
17+
excel_file = open('data.csv', 'w', encoding='utf-8', newline='')
18+
writer = csv.writer(excel_file)
19+
writer.writerow(['名称', '城市', '类型', '级别', '热度', '地址'])
20+
21+
22+
def get_page_html(url):
23+
try:
24+
response = requests.get(url, headers=headers)
25+
if response.status_code == 200:
26+
return response.text
27+
return None
28+
except RequestException:
29+
return None
30+
31+
32+
def parse_content(content, subject, url):
33+
if not content:
34+
print('content is none ', url)
35+
return;
36+
soup = BeautifulSoup(content, "html.parser")
37+
search_list = soup.find(id='search-list')
38+
items = search_list.find_all('div', class_="sight_item")
39+
for item in items:
40+
name = item['data-sight-name']
41+
districts = item['data-districts']
42+
address = item['data-address']
43+
level = item.find('span', class_='level')
44+
level = level.text if level else ''
45+
star = item.find('span', class_='product_star_level')
46+
star = star.text if star else ''
47+
writer.writerow([name, districts, subject, level, star, address])
48+
# print(name, districts, address, id, level, star)
49+
50+
51+
subjects = ['文化古迹', '自然风光', '农家度假', '游乐场', '展馆', '古建筑', '城市观光']
52+
53+
54+
def get_data():
55+
for subject in subjects:
56+
for page in range(10):
57+
page = page + 1
58+
url = F'https://piao.qunar.com/ticket/list.htm?keyword=热门景点&region=&from=mps_search_suggest&subject={subject}&page={page}&sku='
59+
print(url)
60+
content = get_page_html(url)
61+
parse_content(content, subject, url)
62+
time.sleep(5)
63+
64+
65+
if __name__ == '__main__':
66+
get_data()
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
名称,城市,类型,级别,热度,地址
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import csv
2+
import pandas as pd
3+
from pyecharts.charts import Geo
4+
from pyecharts import options as opts
5+
from pyecharts.globals import ChartType, SymbolType
6+
from pyecharts.charts import Bar
7+
from pyecharts.charts import Pie
8+
9+
data = []
10+
with open('data.csv', 'r') as f:
11+
reader = csv.reader(f)
12+
header = next(reader)
13+
for row in reader:
14+
data.append(row)
15+
16+
df_data = []
17+
for row in data:
18+
city = row[1].split('·')[1]
19+
if city in ['保亭', '德宏', '湘西', '陵水', '黔东南', '黔南']:
20+
continue
21+
star = row[4].split('热度')[1].strip()
22+
star = int(float(star) * 1000)
23+
df_data.append([row[0], city, row[3], star])
24+
25+
df = pd.DataFrame(df_data, columns=['name', 'city', 'level', 'star'])
26+
27+
28+
def show_pic_one():
29+
data = df.groupby(by=['city'])['star'].sum()
30+
citys = list(data.index)
31+
city_stars = list(data)
32+
33+
data = [list(z) for z in zip(citys, city_stars)]
34+
geo = (
35+
Geo()
36+
.add_schema(maptype="china")
37+
.add(
38+
"热点图", # 图题
39+
data,
40+
type_=ChartType.HEATMAP, # 地图类型
41+
)
42+
.set_series_opts(label_opts=opts.LabelOpts(is_show=False)) # 设置是否显示标签
43+
.set_global_opts(
44+
visualmap_opts=opts.VisualMapOpts(max_=5000), # 设置legend显示的最大值
45+
title_opts=opts.TitleOpts(title=""), # 左上角标题
46+
)
47+
)
48+
49+
geo.render_notebook()
50+
51+
52+
def show_pic_two():
53+
data = df.loc[:, 'city'].value_counts().sort_values(ascending=False)
54+
citys = list(data.index)[:15]
55+
city_count = list(data)[:15]
56+
57+
bar = Bar()
58+
bar.add_xaxis(citys)
59+
bar.add_yaxis("Top 15", city_count)
60+
bar.set_global_opts(title_opts=opts.TitleOpts(title=""))
61+
bar.render_notebook()
62+
63+
64+
def show_pic_three():
65+
data = df.groupby(by=['name'])['star'].sum().sort_values(ascending=False)
66+
names = list(data.index)[:10]
67+
name_stars = list(data)[:10]
68+
69+
# data
70+
71+
pie = (
72+
Pie()
73+
.add("", [list(z) for z in zip(names, name_stars)])
74+
.set_global_opts(title_opts=opts.TitleOpts(title=""))
75+
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
76+
)
77+
pie.render_notebook()

0 commit comments

Comments
 (0)