Skip to content

Commit 2970c82

Browse files
committed
no message
1 parent 814d9bb commit 2970c82

File tree

1 file changed

+75
-0
lines changed

1 file changed

+75
-0
lines changed
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# !/usr/bin/python
2+
# -*- coding:utf-8 -*-
3+
import requests, time, urllib.request, re, json, sys
4+
from bs4 import BeautifulSoup
5+
6+
class bilibili_crawl:
7+
8+
def __init__(self, bv):
9+
# 视频页地址
10+
self.url = 'https://www.bilibili.com/video/' + bv
11+
# 下载开始时间
12+
self.start_time = time.time()
13+
14+
def get_vedio_info(self):
15+
try:
16+
headers = {
17+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
18+
}
19+
20+
response = requests.get(url = self.url, headers = headers)
21+
if response.status_code == 200:
22+
23+
bs = BeautifulSoup(response.text, 'html.parser')
24+
# 取视频标题
25+
video_title = bs.find('span', class_='tit').get_text()
26+
27+
# 取视频链接
28+
pattern = re.compile(r"window\.__playinfo__=(.*?)$", re.MULTILINE | re.DOTALL)
29+
script = bs.find("script", text=pattern)
30+
result = pattern.search(script.next).group(1)
31+
32+
temp = json.loads(result)
33+
# 取第一个视频链接
34+
for item in temp['data']['dash']['video']:
35+
if 'baseUrl' in item.keys():
36+
video_url = item['baseUrl']
37+
break
38+
39+
return {
40+
'title': video_title,
41+
'url': video_url
42+
}
43+
except requests.RequestException:
44+
print('视频链接错误,请重新更换')
45+
46+
def download_video(self, video):
47+
title = re.sub(r'[\/:*?"<>|]', '-', video['title'])
48+
url = video['url']
49+
filename = title + '.mp4'
50+
opener = urllib.request.build_opener()
51+
opener.addheaders = [('Origin', 'https://www.bilibili.com'),
52+
('Referer', self.url),
53+
('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36')]
54+
urllib.request.install_opener(opener)
55+
urllib.request.urlretrieve(url = url, filename = filename, reporthook = self.schedule)
56+
57+
def schedule(self, blocknum, blocksize, totalsize):
58+
'''
59+
urllib.urlretrieve 的回调函数
60+
:param blocknum: 已经下载的数据块
61+
:param blocksize: 数据块的大小
62+
:param totalsize: 远程文件的大小
63+
:return:
64+
'''
65+
percent = 100.0 * blocknum * blocksize / totalsize
66+
if percent > 100:
67+
percent = 100
68+
s = ('#' * round(percent)).ljust(100, '-')
69+
sys.stdout.write("%.2f%%" % percent + '[ ' + s +']' + '\r')
70+
sys.stdout.flush()
71+
72+
if __name__ == '__main__':
73+
bc = bilibili_crawl('BV1Vh411Z7j5')
74+
vedio = bc.get_vedio_info()
75+
bc.download_video(vedio)

0 commit comments

Comments
 (0)