首页 > 代码库 > 用python+selenium抓取微博24小时热门话题的前15个并保存到txt中

用python+selenium抓取微博24小时热门话题的前15个并保存到txt中

抓取微博24小时热门话题的前15个,抓取的内容请保存至txt文件中,需要抓取阅读数

 1 #coding=utf-8
 2 from selenium import webdriver
 3 import unittest
 4 from time import sleep
 5 
 6 class Weibo(unittest.TestCase):
 7 
 8     def setUp(self):
 9         self.dr = webdriver.Chrome()
10         self.hot_list = self.get_weibo_hot_topic()
11         self.weibo_topic = self.get_top_rank_file()
12 
13     def get_weibo_hot_topic(self):
14         self.dr.get(http://weibo.com/)
15         sleep(5)
16         self.login(649_xxxx@qq.com,kemi_xxxx) #微博帐号密码
17         self.dr.get(http://d.weibo.com/100803?refer=index_hot_new)
18         sleep(5)
19         hot_topic_list = []
20         i = 0
21         while i < 15:
22             rank_and_topic = self.dr.find_elements_by_css_selector(.title.W_autocut)[i].text #定位排行和话题
23             number = self.dr.find_elements_by_css_selector(.number)[i].text #定位阅读数
24             hot_topic_list.append([rank_and_topic, number])
25             i += 1
26         return hot_topic_list
27 
28     def get_top_rank_file(self):
29         self.file_title = 微博24小时热门话题
30         self.file = open(self.file_title + .txt, wb)
31         for item in self.hot_list:
32             separate_line = ~~~~~~~~~~~~~~~~~~~~~~~~\n‘  #分隔线
33             self.file.write(separate_line.encode(utf-8))
34             self.file.write((item[0]+  +阅读数:+item[1]+\n).encode(utf-8))
35         self.file.close()
36 
37     def login(self, username, password):
38         self.dr.find_element_by_name(username).clear()
39         self.dr.find_element_by_name(username).send_keys(username)
40         self.dr.find_element_by_name(password).send_keys(password)
41         self.dr.find_element_by_css_selector(.info_list.login_btn).click()
42         
43     def test_weibo_topic(self):
44         pass
45         print(抓取完毕)
46 
47     def tearDown(self):
48         self.dr.quit()
49 
50 if __name__== __main__:
51     unittest.main()

网页如下:

技术分享

生成txt文件如下:

技术分享

 

用python+selenium抓取微博24小时热门话题的前15个并保存到txt中