首页 > 代码库 > python爬虫基础

python爬虫基础

Demo1:urllib使用

#encoding:utf-8
import urllib
import urlparse
def printlist(lines):
    for i in lines:
        print(i)
def demo():
    s=urllib.urlopen(‘http://blog.kamidox.com‘)
    msg=s.info()
    #printlist(msg.items())
    #printlist(msg.headers)
    #print(s.getcode())
    #printlist(s.readlines())
    #print(msg.getheader("Content-Type"))
    printlist(dir(msg))#find the methods of the class
def progress(blk,blk_size,total_size):
    print("%d/%d - %.02f%%"%(blk*blk_size,total_size,(float)(blk*blk_size)*100/total_size))
def retrieve():
    fname,msg=urllib.urlretrieve(‘http://blog.kamidox.com‘,‘index.html‘,reporthook=progress)
    #print(fname)
    #printlist(msg.items())
def urlencode():
    params={‘score‘:100,‘name‘:‘pachongjichu‘,‘comment‘:‘very good‘}
    qs=urllib.urlencode(params)
    print(qs)
    print(urlparse.parse_qs(qs))
if __name__ == ‘__main__‘:
    urlencode()

Demo2:抓取图片

#encoding:utf-8
import urllib
response=urllib.urlopen("http://placekitten.com/g/300/400")
cat_img=response.read()
with open(‘cat_300_400.jpg‘,‘wb‘)as f:#图片是二进制文件
    f.write(cat_img)
print(response.info())
#print(response.read())

Demo3:有道词典翻译

# encoding:utf-8
import urllib
import json
import sys
reload(sys)
sys.setdefaultencoding(‘utf8‘)
# 解决错误 UnicodeDecodeError: ‘ascii‘ codec can‘t decode byte 0xe7 in position 0:
content=raw_input(‘请输入要翻译的内容:‘)#raw_input是输入原始字符串
url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=http://www.youdao.com/"
data = http://www.mamicode.com/{}#来自于form data那一项中的数据"翻译结果:%s"%(target[‘translateResult‘][0][0][‘tgt‘]))

  

 

python爬虫基础