首页 > 代码库 > 爬取豆瓣,写入数据库
爬取豆瓣,写入数据库
import pymysql import requests from bs4 import BeautifulSoup baseUrl = "https://movie.douban.com/top250?start=%d&filter=" def get_movies(start): url = baseUrl % start lists = [] html = requests.get(url) soup = BeautifulSoup(html.content, "html.parser") items = soup.find("ol", "grid_view").find_all("li") for i in items: movie = {} movie["rank"] = i.find("em").text movie["link"] = i.find("div","pic").find("a").get("href") movie["poster"] = i.find("div","pic").find("a").find(‘img‘).get("src") movie["name"] = i.find("span", "title").text movie["score"] = i.find("span", "rating_num").text movie["quote"] = i.find("span", "inq").text if(i.find("span", "inq")) else "" lists.append(movie) return lists if __name__ == "__main__": db = pymysql.connect(host="192.168.1.210",port=3306,user="root",password="ubuntu",db="mysql",charset="utf8mb4") cursor = db.cursor() cursor.execute("DROP TABLE IF EXISTS movies") createTab = """CREATE TABLE movies( id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, name VARCHAR(20) NOT NULL, rank VARCHAR(4) NOT NULL, link VARCHAR(50) NOT NULL, poster VARCHAR(100) NOT NULL, score VARCHAR(4) NOT NULL, quote VARCHAR(50) ) character set = utf8""" cursor.execute(createTab) start = 0 while (start < 250): lists = get_movies(start) for i in lists: sql = "INSERT INTO movies(name,rank,link,poster,score,quote) VALUES(%s,%s,%s,%s,%s,%s)" try: cursor.execute(sql, (i["name"], i["rank"], i["link"], i["poster"], i["score"], i["quote"])) db.commit() print(i["name"]+" is success") except: db.rollback() start += 25 db.close()
爬取豆瓣,写入数据库
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。