首页 > 代码库 > python爬取数据保存入库
python爬取数据保存入库
import urllib2 import re import MySQLdb class LatestTest: #初始化 def __init__(self): self.url="https://toutiao.io/latest" self.UserAgent=‘Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36‘ self.header={‘User-Agent‘:self.UserAgent} #获取URL、标题、邮箱 保存到list def getDate(self): request=urllib2.Request(self.url,headers=self.header) respone=urllib2.urlopen(request).read() #print respone content=re.compile(r‘<div class="post">.*?class="title">.*?href="http://www.mamicode.com/(.*?)">(.*?)</a>.*?<div class="meta">.*?<span>(.*?)</span>‘,re.S) urls=re.findall(content,respone) namelist=[] for url in urls: #print url[0],url[1],url[2] namelist.append([url[0].strip(),url[1].strip(),url[2].strip()]) if len(namelist)>=10: break return namelist #保存数据到mysql数据库 def savaDateMysql(self,url,title,email): sql="insert into content(url,title,email)values(‘%s‘,‘%s‘,‘%s‘)" %(url,title,email) try: conn=MySQLdb.connect(‘192.168.200.23‘,‘root‘,‘g6s8m3t7s‘,‘mysql‘,charset=‘utf8‘) cursor=conn.cursor() # cursor.execute(‘create table IF NOT EXISTS content(id int AUTO_INCREMENT PRIMARY KEY,url varchar(100),title varchar(100),email varchar(100))‘) #cursor.execute(‘drop table IF EXISTS content‘) cursor.execute(sql) conn.commit() except Exception,e: print e finally: conn.close() if __name__==‘__main__‘: lat=LatestTest() contentlist=lat.getDate() try: for tent in contentlist: url=tent[0].strip() title=tent[1].strip() email=tent[2].strip() print url,title,email lat.savaDateMysql(url,title,email) except Exception,e: print e
python爬取数据保存入库
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。