爬站长之家表情包

首页 > 代码库 > 爬站长之家表情包

2024-09-17 03:14:49 219人阅读

 1 from bs4 import BeautifulSoup
 2 import os
 3 import requests
 4 
 5 #获取页面内容
 6 def getHtmlText(url, s=‘text‘):
 7     try:
 8         r = requests.get(url, timeout=30)
 9         r.raise_for_status()
10         r.encoding = r.apparent_encoding
11         if s == ‘text‘:
12             return r.text
13         elif s == ‘content‘:
14             return r.content
15         else:
16             return ‘‘
17     except:
18         return ""
19 
20 
21  #获取表情包名字与表情包套链接
22 def getEmotionInfo(html):
23     soup = BeautifulSoup(html, ‘html.parser‘)
24     emo_divs = soup.find_all(‘div‘, attrs={‘class‘:‘up‘})
25     for div in emo_divs:
26         a = div.find(‘div‘, attrs={‘class‘:‘num_1‘}).find(‘a‘)
27         title = a.attrs[‘title‘]
28         href = http://www.mamicode.com/a.attrs[‘href‘]
29         getEmotionImgInfo(title, href)
30 
31 #获取表情包中每一个图片的链接
32 def getEmotionImgInfo(title, href):
33     html = getHtmlText(href)
34     soup = BeautifulSoup(html, ‘html.parser‘)
35     img_div = soup.find(‘div‘, attrs={‘class‘:‘img_text‘}).next_sibling.next_sibling
36     imgs = img_div.find_all(‘img‘)
37     url_list = []
38     for img in imgs:
39         src = http://www.mamicode.com/img.attrs[‘src‘]
40         url_list.append(src)
41     getImg(title, url_list)
42 
43 #获取表情包保存在本地
44 def getImg(title, url_list):
45     root = ‘D://pics//‘ + title
46     if not os.path.exists(root):
47         os.mkdir(root)
48     count_small = 0
49     for key in url_list:
50         path = root +‘//‘+ key.split(‘/‘)[-1]
51         if not os.path.exists(path):
52             img_content = getHtmlText(key,‘content‘)
53             with open(path, ‘wb‘) as f:
54                 f.write(img_content)
55             count_small = count_small + 1
56             print(‘\r{}文件进度：{:.2f}%‘.format(title, count_small*100/len(url_list)),end=‘,‘)
57 
58 if __name__ == ‘__main__‘:
59     first_url = ‘http://sc.chinaz.com/biaoqing/index.html‘
60     root_url = ‘http://sc.chinaz.com/biaoqing/index_‘
61 
62     pages = 20
63     for i in range(1,pages): #切换页面爬取内容
64         if i == 1:
65             html = getHtmlText(first_url)
66         else:
67             url = root_url + str(i) + ‘.html‘
68             html = getHtmlText(url)
69         getEmotionInfo(html)

爬站长之家表情包

声明：以上内容来自用户投稿及互联网公开渠道收集整理发布，本网站不拥有所有权，未作人工编辑处理，也不承担相关法律责任，若内容有误或涉及侵权可进行投诉：投诉/举报工作人员会在5个工作日内联系你，一经查实，本站将立刻删除涉嫌侵权内容。

联系
我们

首页 > 代码库 > 爬站长之家表情包

爬站长之家表情包

看完仍有疑问？有类似问题直接问程序猿