首页 > 代码库 > scrapy 模拟登陆

scrapy 模拟登陆

import scrapy
import urllib.request
from scrapy.http import Request,FormRequest

class LoginspdSpider(scrapy.Spider):
    name = "loginspd"
    allowed_domains = ["douban.com"]
    start_urls = [http://douban.com/]
    headers = {
        User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36 QIHU 360EE}
    def start_requests(self):
        return [Request(https://www.douban.com/accounts/login,meta={cookiejar:1},callback=self.parse)]
    def parse(self, response):
        # 获取验证码地址
        captcha = response.xpath(//img[@id="captcha_image"]/@src).extract()
        if len(captcha)>0:
            print(有验证码)
            localpath = D:/Captcha.png
            urllib.request.urlretrieve(captcha[0],filename=localpath)
            print(请查看本地验证码并输入:)
            captcha_value = input()
            data = {
                form_email:83*****81@qq.com,
                form_password:*****1,
                captcha-solution:captcha_value,
                redir:https://www.douban.com/people/161282530/,
            }
        else:
            print(没有验证码)
            data = {
                form_email: 834****81@qq.com,
                form_password: *****,
                redir: https://www.douban.com/people/161282530/,
            }
        print(登录中。。。。)

        return [FormRequest.from_response(response,
                                          # meta = {‘cookiejar‘:response.meta[‘cookiejar‘]}
                                          headers =self.headers,
                                          formdata=data,
                                          callback = self.next,)]
    def next(self,response):
        print(此时已经登录完成并爬取个人中心数据)
        title = response.xpath(/html/head/title/text()).extract()
        print(title)

 

scrapy 模拟登陆