首页 > 代码库 > 用python+selenium获取北上广深成五地PM2.5数据信息并按空气质量排序

用python+selenium获取北上广深成五地PM2.5数据信息并按空气质量排序

从http://www.pm25.com/shenzhen.html抓取北京,深圳,上海,广州,成都的pm2.5指数,并按照空气质量从优到差排序,保存在txt文档里

#coding=utf-8
from selenium import webdriver
from time import sleep

class PM:

    def __init__(self):
        self.dr = webdriver.Chrome()
        self.pm25_info = self.get_pm25_info()

    def get_pm25_info(self):
        city_list = [beijing,shenzhen,shanghai,guangzhou,chengdu]
        pm_info_list = []
        i = 0
        while i < len(city_list):
            self.dr.get(http://www.pm25.com/+city_list[i]+.html)
            sleep(3)
            city_name = self.dr.find_element_by_css_selector(.bi_loaction_city).text #定位城市名字
            pm_exp = self.dr.find_element_by_css_selector(.bi_aqiarea_num).text #定位PM2.5指数
            air_quality = self.dr.find_element_by_css_selector(.bi_aqiarea_right span).text #定位空气质量
            pm_info_list.append((city_name, pm_exp, air_quality))
            i += 1
        pm_info_list.sort(key=lambda x:float(x[1]))
        return pm_info_list

    def get_pm_info_file(self):
        self.file_title = 北上广深成五地PM2.5数据信息
        self.file = open(self.file_title + .txt, wb)
        self.floor = 1
        for item in self.pm25_info:
            separate_line = ********** + TOP + str(self.floor) + **********\n
            self.file.write(separate_line.encode(utf-8))
            self.file.write((城市:  + item[0] + \n
                             PM2.5指数:  + item[1] + \n
                             空气质量: + item[2]+ \n).encode(utf-8))
            self.floor += 1
        self.file.close()


    def quit(self):
        self.dr.quit()

if __name__ == __main__:
    pm_info = PM()
    pm_info.get_pm_info_file()
    pm_info.quit()

网页如下:

技术分享

生成txt如下:

技术分享

用python+selenium获取北上广深成五地PM2.5数据信息并按空气质量排序