首页 > 代码库 > 简单的大众点评爬虫

简单的大众点评爬虫

一个很简单的爬虫,爬取中大周边地点的点评信息。

# -*- coding: utf-8 -*-import requestsimport reimport timedef placeSplider(name, star, url):    time.sleep(5)    res = requests.get(http://www.dianping.com+url)    text = res.text    longInfo = "<p class=\"desc J-desc\">(.*?)</p>"    longInfo_re = re.compile(longInfo, re.DOTALL)    longInfos = longInfo_re.findall(text)        info = "sml-rank-stars sml-str(.*?)\".*?<p class=\"desc\">(.*?)</p>"    info_re = re.compile(info, re.DOTALL)    results = info_re.findall(text)    #print result    #print ‘%d results‘ %len(results)    if len(results) == 0 or len(results[0]) < 2 or results[0][1].count(u人点评) > 0:        print u没有点评\n        return    fOut = open(D:\\%s.txt %name, w)    fOut.write(place star %s\n %star)    for result in results:        star = result[0]        info = result[1]        if info.count(<span) > 0 or info.count(u仅售)>0:#去广告            print ‘‘            break        else:            if info[-6:] == u"......":#替换短评论为相应的长评论                info = info[:-6]                for i in longInfos:                    if i.count(info) > 0:                        info = i                        break            info = info.replace("<br/>", ‘‘)            info = info.replace("<br>", ‘‘)            info = info.replace("&nbsp;", ‘‘)            print star, info            fOut.write(%s\n %star)            fOut.write(%s\n %info.encode(u8))    fOut.close()for page in range(1, 6):    res = requests.get(http://www.dianping.com/search/keyword/206/0_%E4%B8%AD%E5%B1%B1%E5%A4%A7%E5%AD%A6/p+str(page))    text = res.text    href = "data-hippo-type=\"shop\" title=\"(.*?)\" target=\"_blank\" href=http://www.mamicode.com/"(.*?)\".*?sml-rank-stars sml-str(.*?)\""    href_re = re.compile(href, re.DOTALL)    result =  href_re.findall(text)    for place in result:        name = place[0]        url = place[1]        star = place[2]        print name, star, url        placeSplider(name, star, url)    time.sleep(5)

 

简单的大众点评爬虫