-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample.py
More file actions
51 lines (43 loc) · 1.68 KB
/
example.py
File metadata and controls
51 lines (43 loc) · 1.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import requests
from bs4 import BeautifulSoup
import re
from urllib import request
def gethtml(url):
HEADERS = {
'Accept':'*/*',
'Accept-Encoding':'gzip, deflate, sdch',
'Accept-Language':'zh-CN,zh;q=0.8',
'Connection':'keep-alive',
'Cookie':'AIDUID=EB3E97CCCCC70480B959301E9AD16905:FG=1; locale=zh; BIDUPSID=EB3E97CCCCC70480B959301E9AD16905; PSTM=1464408270; H_PS_PSSID=18881_20141_1446_19570_17001_15285_11536',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',
'DNT':'1'
}
html = requests.get(url, headers=HEADERS)
word = html.text
#print(word)
soup = BeautifulSoup(word)
s = soup.find_all('img')
#print(s)
for i in s:
z = i.get('src')
if re.findall('imgsrc.baidu.com',z):
img = request.urlopen(z)
imgr = img.read()
try:
file = open(z[-7:],'wb')
file.write(imgr)
file.close()
print('保存图片'+z)
except IOError:
print("IOError")
url = input("请输入贴吧网址: ")
gethtml(url)
'''
Accept:*/*
Accept-Encoding:gzip, deflate, sdch
Accept-Language:zh-CN,zh;q=0.8
Connection:keep-alive
Cookie:BAIDUID=EB3E97CCCCC70480B959301E9AD16905:FG=1; locale=zh; BIDUPSID=EB3E97CCCCC70480B959301E9AD16905; PSTM=1464408270; H_PS_PSSID=18881_20141_1446_19570_17001_15285_11536
#post_content_90321290196 > img
#j_p_postlist > div:nth-child(1) > div.d_post_content_main.d_post_content_firstfloor > div.thread_recommend.thread-recommend > div.thread_list_slideshow > ul > div:nth-child(1) > li:nth-child(1) > a
'''