首页 > 代码库 > python模拟163登陆获取邮件列表
python模拟163登陆获取邮件列表
利用cookielib和urllib2模块模拟登陆163的例子有很多,近期看了《python模拟登陆163邮箱并获取通讯录》一文,受到启发,试着对收件箱、发件箱等进行了分析,并列出了所有邮件列表及状态,包括发件人、收件人、主题、发信时间、已读未读等状态。
1、参考代码:http://hi.baidu.com/fc_lamp/blog/item/2466d1096fcc532de8248839.html%EF%BB%BF
1 #-*- coding:UTF-8 -*- 2 import urllib,urllib2,cookielib 3 import xml.etree.ElementTree as etree #xml解析类 4 5 class Login163: 6 #伪装browser 7 header = {‘User-Agent‘:‘Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6‘} 8 username = ‘‘ 9 passwd = ‘‘ 10 cookie = None #cookie对象 11 cookiefile = ‘./cookies.dat‘ #cookie临时存放地 12 user = ‘‘ 13 14 def __init__(self,username,passwd): 15 self.username = username 16 self.passwd = passwd 17 #cookie设置 18 self.cookie = cookielib.LWPCookieJar() #自定义cookie存放 19 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie)) 20 urllib2.install_opener(opener) 21 22 #登陆 23 def login(self): 24 25 #请求参数设置 26 postdata =http://www.mamicode.com/ { 27 ‘username‘:self.username, 28 ‘password‘:self.passwd, 29 ‘type‘:1 30 } 31 postdata =http://www.mamicode.com/ urllib.urlencode(postdata) 32 33 #发起请求 34 req = urllib2.Request( 35 url=‘http://reg.163.com/logins.jsp?type=1&product=mail163&url=http://entry.mail.163.com/coremail/fcg/ntesdoor2?lightweight%3D1%26verifycookie%3D1%26language%3D-1%26style%3D1‘, 36 data= http://www.mamicode.com/postdata,#请求数据 37 headers = self.header #请求头 38 ) 39 40 result = urllib2.urlopen(req).read() 41 result = str(result) 42 self.user = self.username.split(‘@‘)[0] 43 44 self.cookie.save(self.cookiefile)#保存cookie 45 46 if ‘登录成功,正在跳转...‘ in result: 47 #print("%s 你已成功登陆163邮箱。---------\n" %(user)) 48 flag = True 49 else: 50 flag = ‘%s 登陆163邮箱失败。‘%(self.user) 51 52 return flag 53 54 #获取通讯录 55 def address_list(self): 56 57 #获取认证sid 58 auth = urllib2.Request( 59 url=‘http://entry.mail.163.com/coremail/fcg/ntesdoor2?username=‘+self.user+‘&lightweight=1&verifycookie=1&language=-1&style=1‘, 60 headers = self.header 61 ) 62 auth = urllib2.urlopen(auth).read() 63 for i,sid in enumerate(self.cookie):#enumerate()用于同时返数字索引与数值,实际上是一个元组:((0,test[0]),(1,test[1]).......)这有点像php里的foreach 语句的作用 64 sid = str(sid) 65 if ‘sid‘ in sid: 66 sid = sid.split()[1].split(‘=‘)[1] 67 break 68 self.cookie.save(self.cookiefile) 69 70 #请求地址 71 url = ‘http://twebmail.mail.163.com/js4/s?sid=‘+sid+‘&func=global:sequential&showAd=false&userType=browser&uid=‘+self.username 72 #参数设定(var 变量是必需要的,不然就只能看到:<code>S_OK</code><messages/>这类信息) 73 #这里参数也是在firebug下查看的。 74 postdata =http://www.mamicode.com/ { 75 ‘func‘:‘global:sequential‘, 76 ‘showAd‘:‘false‘, 77 ‘sid‘:sid, 78 ‘uid‘:self.username, 79 ‘userType‘:‘browser‘, 80 ‘var‘:‘<?xml version="1.0"?><object><array name="items"><object><string name="func">pab:searchContacts</string><object name="var"><array name="order"><object><string name="field">FN</string><boolean name="desc">false</boolean><boolean name="ignoreCase">true</boolean></object></array></object></object><object><string name="func">pab:getAllGroups</string></object></array></object>‘ 81 } 82 postdata =http://www.mamicode.com/ urllib.urlencode(postdata) 83 84 #组装请求 85 req = urllib2.Request( 86 url = url, 87 data =http://www.mamicode.com/ postdata, 88 headers = self.header 89 ) 90 res = urllib2.urlopen(req).read() 91 92 #解析XML,转换成json 93 #说明:由于这样请求后163给出的是xml格式的数据, 94 #为了返回的数据能方便使用最好是转为JSON 95 json = [] 96 tree = etree.fromstring(res) 97 obj = None 98 for child in tree: 99 if child.tag == ‘array‘:100 obj = child 101 break102 #这里多参考一下,etree元素的方法属性等,包括attrib,text,tag,getchildren()等103 obj = obj[0].getchildren().pop()104 for child in obj:105 for x in child:106 attr = x.attrib107 if attr[‘name‘]== ‘EMAIL;PREF‘:108 value = http://www.mamicode.com/{‘email‘:x.text}109 json.append(value)110 return json111 112 #Demo113 print("Requesting......\n\n")114 login = Login163(‘xxxx@163.com‘,‘xxxxx‘)115 flag = login.login()116 if type(flag) is bool:117 print("Successful landing,Resolved contacts......\n\n")118 res = login.address_list()119 for x in res:120 print(x[‘email‘])121 else:122 print(flag)
2、分析收件箱、发件箱等网址
在参考代码中,获取通讯录的url为
url = ‘http://twebmail.mail.163.com/js4/s?sid=‘+sid+‘&func=global:sequential&showAd=false&userType=browser&uid=‘+self.username,通过对邮箱地址的分析,发现收件箱、发件箱等的url为url = ‘http://twebmail.mail.163.com/js4/s?sid=‘+sid+‘&func=mbox:listMessages&showAd=false&userType=browser&uid=‘+self.username,其中func=
mbox:listMessages。其对收件箱、发件箱的具体区分在下面的postdata中,具体为:
(1)收件箱
postdata = http://www.mamicode.com/{"1.0"?--><object><int name="fid">1</int><string name="order">date</string><boolean name="desc">true</boolean><boolean name="topFirst">false</boolean><int name="start">0</int><int name="limit">20</int></object>‘}
(2)发件箱
postdata = http://www.mamicode.com/{"1.0"?--><object><int name="fid">3</int><string name="order">date</string><boolean name="desc">true</boolean><boolean name="topFirst">false</boolean><int name="start">0</int><int name="limit">20</int></object>‘}
可以看出,两段代码的不同之处就是fid的取值不同,其中收件箱为1,发件箱为3,草稿箱为2。
3、xml解析
利用ElementTree 类来进行xml到字典的转换。在获取通讯录的实例中,主要使用了这一方法。本例子(具体代码见后文)在收取邮件列表时,并没有用这一方法,仍然使用的是字符串的处理方法。但这里还是列一下ElementTree 类对xml的处理。如(参考地址:http://hi.baidu.com/fc_lamp/blog/item/8ed2d53ada4586f714cecb3d.html):
-<result> <code>S_OK</code> -<array name="var"> -<object> <string name="code">S_OK</string> -<array name="var"> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> +<object></object> </array> </object> +<object></object> </array> </result>
解决方法:
1 #-*- coding:UTF-8 -*- 2 3 import xml.etree.ElementTree as etree #xml解析类 4 def xml2json(xml): 5 json = [] 6 tree = etree.fromstring(xml) #如果是文件可用parse(source) 7 obj = None 8 for child in tree: 9 if child.tag == ‘array‘:10 obj = child 11 break12 #这里多参考一下,etree元素的方法属性等,包括attrib,text,tag,getchildren()等13 obj = obj[0].getchildren().pop()14 for child in obj:15 for x in child:16 attr = x.attrib17 if attr[‘name‘]== ‘EMAIL;PREF‘:18 value = http://www.mamicode.com/{‘email‘:x.text}19 json.append(value)20 return json
4、收件箱邮件列表
本例子只列出了收件箱邮件列表,如果需要,可根据以上介绍调整fid值,列出发件箱、草稿箱等的邮件列表。程序在windosxp、py2.6环境下调查通过,运行后,会在当前目录下生成三个文件:inboxlistfile.txt记录收件箱邮件列表,addfile.txt记录通讯录,cookies.dat记录cookies。具体代码如下:
1 #-*- coding:UTF-8 -*- 2 #@小五义 http://www.cnblogs.com/xiaowuyi 3 #163邮件列表 4 import urllib,urllib2,cookielib 5 import xml.etree.ElementTree as etree #xml解析类 6 7 class Login163: 8 #伪装browser 9 header = {‘User-Agent‘:‘Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6‘} 10 username = ‘‘ 11 passwd = ‘‘ 12 cookie = None #cookie对象 13 cookiefile = ‘./cookies.dat‘ #cookie临时存放地 14 user = ‘‘ 15 16 def __init__(self,username,passwd): 17 self.username = username 18 self.passwd = passwd 19 #cookie设置 20 self.cookie = cookielib.LWPCookieJar() #自定义cookie存放 21 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie)) 22 urllib2.install_opener(opener) 23 24 #登陆 25 def login(self): 26 27 #请求参数设置 28 postdata =http://www.mamicode.com/ { 29 ‘username‘:self.username, 30 ‘password‘:self.passwd, 31 ‘type‘:1 32 } 33 postdata =http://www.mamicode.com/ urllib.urlencode(postdata) 34 35 #发起请求 36 req = urllib2.Request( 37 url=‘http://reg.163.com/logins.jsp?type=1&product=mail163&url=http://entry.mail.163.com/coremail/fcg/ntesdoor2?lightweight%3D1%26verifycookie%3D1%26language%3D-1%26style%3D1‘, 38 data= http://www.mamicode.com/postdata,#请求数据 39 headers = self.header #请求头 40 ) 41 42 result = urllib2.urlopen(req).read() 43 result = str(result) 44 #print result 45 self.user = self.username.split(‘@‘)[0] 46 47 self.cookie.save(self.cookiefile)#保存cookie 48 49 if ‘登录成功,正在跳转...‘ in result: 50 #print("%s 你已成功登陆163邮箱。---------n" %(user)) 51 flag = True 52 else: 53 flag = ‘%s 登陆163邮箱失败。‘%(self.user) 54 55 return flag 56 57 #获取通讯录 58 def address_list(self): 59 60 #获取认证sid 61 auth = urllib2.Request( 62 url=‘http://entry.mail.163.com/coremail/fcg/ntesdoor2?username=‘+self.user+‘&lightweight=1&verifycookie=1&language=-1&style=1‘, 63 headers = self.header 64 ) 65 auth = urllib2.urlopen(auth).read() 66 67 #authstr=str(auth) 68 #print authstr 69 70 for i,sid in enumerate(self.cookie): 71 sid = str(sid) 72 #print ‘sid:%s‘ %sid 73 if ‘sid‘ in sid: 74 sid = sid.split()[1].split(‘=‘)[1] 75 break 76 self.cookie.save(self.cookiefile) 77 78 #请求地址 79 url = ‘http://twebmail.mail.163.com/js4/s?sid=‘+sid+‘&func=global:sequential&showAd=false&userType=browser&uid=‘+self.username 80 #参数设定(var 变量是必需要的,不然就只能看到:<code>S_OK</code><messages>这类信息) 81 #这里参数也是在firebug下查看的。 82 postdata =http://www.mamicode.com/ { 83 ‘func‘:‘global:sequential‘, 84 ‘showAd‘:‘false‘, 85 ‘sid‘:‘qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr‘, 86 ‘uid‘:self.username, 87 ‘userType‘:‘browser‘, 88 ‘var‘:‘<!--?xml version="1.0"?--><object><array name="items"><object><string name="func">pab:searchContacts</string><object name="var"><array name="order"><object><string name="field">FN</string><boolean name="desc">false</boolean><boolean name="ignoreCase">true</boolean></object></array></object></object><object><string name="func">pab:getAllGroups</string></object></array></object>‘ 89 } 90 postdata =http://www.mamicode.com/ urllib.urlencode(postdata) 91 92 #组装请求 93 req = urllib2.Request( 94 url = url, 95 data =http://www.mamicode.com/ postdata, 96 headers = self.header 97 ) 98 res = urllib2.urlopen(req).read() 99 100 #print str(res)101 102 #解析XML,转换成json103 #说明:由于这样请求后163给出的是xml格式的数据,104 #为了返回的数据能方便使用最好是转为JSON105 json = []106 tree = etree.fromstring(res)107 108 109 110 obj = None111 for child in tree:112 if child.tag == ‘array‘:113 obj = child 114 break115 #这里多参考一下,etree元素的方法属性等,包括attrib,text,tag,getchildren()等116 obj = obj[0].getchildren().pop()117 for child in obj:118 for x in child:119 attr = x.attrib120 if attr[‘name‘]== ‘EMAIL;PREF‘:121 value = http://www.mamicode.com/{‘email‘:x.text}122 json.append(value)123 return json124 125 def minbox(self):#收件箱,fid为1,发件箱为3,草稿箱为2126 #获取认证sid127 auth = urllib2.Request(128 url=‘http://entry.mail.163.com/coremail/fcg/ntesdoor2?username=‘+self.user+‘&lightweight=1&verifycookie=1&language=-1&style=1‘,129 headers = self.header130 )131 auth = urllib2.urlopen(auth).read()132 133 #authstr=str(auth)134 #print authstr135 136 for i,sid in enumerate(self.cookie):137 sid = str(sid)138 #print ‘sid:%s‘ %sid139 if ‘sid‘ in sid:140 sid = sid.split()[1].split(‘=‘)[1]141 break142 self.cookie.save(self.cookiefile)143 144 #请求地址145 url = ‘http://twebmail.mail.163.com/js4/s?sid=‘+sid+‘&func=mbox:listMessages&showAd=false&userType=browser&uid=‘+self.username146 #参数设定(var 变量是必需要的,不然就只能看到:<code>S_OK</code><messages>这类信息)147 #这里参数也是在firebug下查看的。148 postdata =http://www.mamicode.com/ {149 ‘func‘:‘global:sequential‘,150 ‘showAd‘:‘false‘,151 ‘sid‘:‘qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr‘,152 ‘uid‘:self.username,153 ‘userType‘:‘browser‘,154 ‘var‘:‘<!--?xml version="1.0"?--><object><int name="fid">1</int><string name="order">date</string><boolean name="desc">true</boolean><boolean name="topFirst">false</boolean><int name="start">0</int><int name="limit">20</int></object>‘155 }156 postdata =http://www.mamicode.com/ urllib.urlencode(postdata)157 158 #组装请求159 req = urllib2.Request(160 url = url,161 data =http://www.mamicode.com/ postdata,162 headers = self.header163 )164 res = urllib2.urlopen(req).read()165 166 liststr=str(res).split(‘<object>‘)#用object进行分割167 inboxlistcount=len(liststr)-1#记录邮件封数168 inboxlistfile=open(‘inboxlistfile.txt‘,‘a‘)169 t=0 #记录当前第几封信170 for i in liststr:171 if ‘xml‘ in i and ‘ version=‘ in i:172 inboxlistfile.write(‘inbox 共‘+str(inboxlistcount)+‘信‘)173 inboxlistfile.write(‘\n‘)174 if ‘name="id"‘ in i:175 t=t+1176 inboxlistfile.write(‘第‘+str(t)+‘封:‘)177 inboxlistfile.write(‘\n‘)178 #写入from179 beginnum=i.find(‘name="from"‘)180 endnum=i.find(‘</string>‘,beginnum)181 inboxlistfile.write(‘From:‘+i[beginnum+12:endnum])182 inboxlistfile.write(‘\n‘)183 #写入to184 beginnum=i.find(‘name="to"‘)185 endnum=i.find(‘</string>‘,beginnum)186 inboxlistfile.write(‘TO:‘+i[beginnum+10:endnum])187 inboxlistfile.write(‘\n‘)188 #写入subject189 beginnum=i.find(‘name="subject"‘)190 endnum=i.find(‘</string>‘,beginnum)191 inboxlistfile.write(‘Subject:‘+i[beginnum+15:endnum])192 inboxlistfile.write(‘\n‘)193 #写入date:194 beginnum=i.find(‘name="sentDate"‘)195 endnum=i.find(‘</date>‘,beginnum)196 inboxlistfile.write(‘Date:‘+i[beginnum+16:endnum])197 inboxlistfile.write(‘\n‘)198 if ‘name="read">true‘ in i:199 inboxlistfile.write(‘邮件状态:已读‘)200 inboxlistfile.write(‘\n‘)201 else:202 inboxlistfile.write(‘邮件状态:未读‘)203 inboxlistfile.write(‘\n‘)204 #写用邮件尺寸205 beginnum=i.find(‘name="size"‘)206 endnum=i.find(‘</int>‘,beginnum)207 inboxlistfile.write(‘邮件尺寸:‘+i[beginnum+12:endnum])208 inboxlistfile.write(‘\n‘)209 #写入邮件编号,用于下载邮件210 beginnum=i.find(‘name="id"‘)211 endnum=i.find(‘</string>‘,beginnum)212 inboxlistfile.write(‘邮件编号:‘+i[beginnum+10:endnum])213 inboxlistfile.write(‘\n\n‘)214 215 inboxlistfile.close()216 217 218 219 #Demo220 print("Requesting......nn")221 login = Login163(‘AAAAA@163.com‘,‘AAAAA‘)222 flag = login.login()223 if type(flag) is bool:224 login.minbox()225 #login.letterdown()226 print("Successful landing,Resolved contacts......nn")227 res = login.address_list()228 for x in res:229 print(x[‘email‘])230 else:231 print(flag)
python模拟163登陆获取邮件列表