首页 > 代码库 > scrapy爬取中关村在线手机频道
scrapy爬取中关村在线手机频道
1 # -*- coding: utf-8 -*- 2 import scrapy 3 from pyquery import PyQuery as pq 4 5 from zolphone.items import ZolphoneItem 6 7 8 class PhoneSpider(scrapy.Spider): 9 name = "phone" 10 # allowed_domains = ["www.zol.com.cn"] 11 # start_url = ‘http://detail.zol.com.cn/cell_phone_index/subcate57_0_list_1_0_1_1_0_1.html‘ 12 start_url = ‘http://detail.zol.com.cn/cell_phone_index/subcate57_0_list_1_0_1_1_0_‘ 13 14 def start_requests(self): 15 16 for page in range(1, 209): 17 url = self.start_url + str(page) + ‘.html‘ 18 yield scrapy.Request(url,callback=self.parse_index) 19 20 21 def parse_index(self, response): 22 base_url = ‘http://detail.zol.com.cn‘ 23 doc = pq(response.text) 24 lis = doc(‘.list-box .list-item‘).items() 25 for result in lis: 26 detail_url = base_url + result.find(‘.pro-intro h3 a‘).attr(‘href‘) 27 yield scrapy.Request(url=detail_url, callback=self.parse_detail) 28 29 def parse_detail(self,response): 30 doc = pq(response.text) 31 title1 = response.css(‘.page-title h1::text‘).extract_first() 32 title2 = doc(‘.page-title h2‘).text() 33 price = doc(‘.product-price .price-type‘).text() 34 release_time = doc(‘.section div h3 .showdate‘).text() 35 print(title1, title2, price, release_time) 36 item = ZolphoneItem() 37 item[‘title1‘] = title1 38 item[‘title2‘] = title2 39 item[‘price‘] = price 40 item[‘release_time‘] = release_time 41 42 yield item
1 import scrapy 2 3 4 class ZolphoneItem(scrapy.Item): 5 # define the fields for your item here like: 6 # name = scrapy.Field() 7 title1 = scrapy.Field() 8 title2 = scrapy.Field() 9 price = scrapy.Field() 10 release_time = scrapy.Field()
scrapy爬取中关村在线手机频道
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。