1+ # !/usr/bin/python
2+ # -*- coding:utf-8 -*-
3+ import requests , time , urllib .request , re , json , sys
4+ from bs4 import BeautifulSoup
5+
6+ class bilibili_crawl :
7+
8+ def __init__ (self , bv ):
9+ # 视频页地址
10+ self .url = 'https://www.bilibili.com/video/' + bv
11+ # 下载开始时间
12+ self .start_time = time .time ()
13+
14+ def get_vedio_info (self ):
15+ try :
16+ headers = {
17+ 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
18+ }
19+
20+ response = requests .get (url = self .url , headers = headers )
21+ if response .status_code == 200 :
22+
23+ bs = BeautifulSoup (response .text , 'html.parser' )
24+ # 取视频标题
25+ video_title = bs .find ('span' , class_ = 'tit' ).get_text ()
26+
27+ # 取视频链接
28+ pattern = re .compile (r"window\.__playinfo__=(.*?)$" , re .MULTILINE | re .DOTALL )
29+ script = bs .find ("script" , text = pattern )
30+ result = pattern .search (script .next ).group (1 )
31+
32+ temp = json .loads (result )
33+ # 取第一个视频链接
34+ for item in temp ['data' ]['dash' ]['video' ]:
35+ if 'baseUrl' in item .keys ():
36+ video_url = item ['baseUrl' ]
37+ break
38+
39+ return {
40+ 'title' : video_title ,
41+ 'url' : video_url
42+ }
43+ except requests .RequestException :
44+ print ('视频链接错误,请重新更换' )
45+
46+ def download_video (self , video ):
47+ title = re .sub (r'[\/:*?"<>|]' , '-' , video ['title' ])
48+ url = video ['url' ]
49+ filename = title + '.mp4'
50+ opener = urllib .request .build_opener ()
51+ opener .addheaders = [('Origin' , 'https://www.bilibili.com' ),
52+ ('Referer' , self .url ),
53+ ('User-Agent' , 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36' )]
54+ urllib .request .install_opener (opener )
55+ urllib .request .urlretrieve (url = url , filename = filename , reporthook = self .schedule )
56+
57+ def schedule (self , blocknum , blocksize , totalsize ):
58+ '''
59+ urllib.urlretrieve 的回调函数
60+ :param blocknum: 已经下载的数据块
61+ :param blocksize: 数据块的大小
62+ :param totalsize: 远程文件的大小
63+ :return:
64+ '''
65+ percent = 100.0 * blocknum * blocksize / totalsize
66+ if percent > 100 :
67+ percent = 100
68+ s = ('#' * round (percent )).ljust (100 , '-' )
69+ sys .stdout .write ("%.2f%%" % percent + '[ ' + s + ']' + '\r ' )
70+ sys .stdout .flush ()
71+
72+ if __name__ == '__main__' :
73+ bc = bilibili_crawl ('BV1Vh411Z7j5' )
74+ vedio = bc .get_vedio_info ()
75+ bc .download_video (vedio )
0 commit comments