首页 > 代码库 > Python superbwallpapers 动漫分类 下载

Python superbwallpapers 动漫分类 下载

动漫分类壁纸多线程下载,有Bug

# -*- coding: utf-8 -*-import os,urllib2,re,urllibfrom bs4 import BeautifulSoupimport socketsocket.setdefaulttimeout(25)      #in case of overtime:http://outofmemory.cn/code-snippet/16848/python-through-urllib-urlretrieve-file-setting-method#another way:urllib.request.socket.setdefaulttimeout(20)re_link = re.compile(r/anime/.{0,50}-\d{5})re_404 = re.compile(rPage not found - Please try some of the popular items below)  main_url = []#main_url.append("http://www.superbwallpapers.com/anime/")pic_page = []pic_name = []pic_url = []pic_url_number =[]end_page = 40for each_page in range(end_page):    main_url.append("http://www.superbwallpapers.com/anime/" + str(each_page + 1) + ".html")        each_page += 1print main_url#how_many = 0        def one_page(main_url):    #global how_many    main_page_html = urllib2.urlopen(main_url).read()    soup = BeautifulSoup(main_page_html,fromEncoding="gb18030")    match_pic = []    for link in soup.find_all(a):        href = str(link.get(href))            match = re_link.match(href)        if match :            #print match.group()            match_pic.append(match.group())    global pic_url_number    global pic_page    global pic_name    global pic_url    for i in range((len(match_pic)-1)/2):        pic_page.append(http://www.superbwallpapers.com + match_pic[i*2])        pic_name.append(match_pic[i*2][7:])        pic_url.append("http://cdn.superbwallpapers.com/wallpapers" + match_pic[i*2] + "-1920x1080.jpg")        pic_url_number.append(x)        output = open(K://PIC/url.txt,w+)for x in range(end_page):    one_page(main_url[x])    title = "K://PIC/" + str(x)    if not os.path.isdir(title):        os.mkdir(title)output.write(str(pic_url))output.close()pic_number = 0url_fail = []import threadinghow_many = 0lock = threading.Lock()#one_page(main_url[how_many])class myThread (threading.Thread):    def __init__(self, pic_url):        threading.Thread.__init__(self)        self.pic_url = pic_url            def run(self):        global pic_number        global how_many        print %s acquire lock... % threading.currentThread().getName()        lock.acquire()                print %s get the lock. % threading.currentThread().getName()        o1 = pic_number                pic_number += 1        # 释放锁        lock.release()        print %s release lock... % threading.currentThread().getName()        try:            urllib.urlretrieve(pic_url[o1], "K://PIC/" + str(pic_url_number[o1])+ "/" + str(pic_name[o1]) + ".jpg")            #detail:http://www.nowamagic.net/academy/detail/1302861        except:  #except socket.timeout as e:            try:                urllib.urlretrieve(pic_url[o1], "K://PIC/" + str(pic_url_number[o1])+ "/" + str(pic_name[o1]) + ".jpg")            except:                  global url_fail                url_fail.append(pic_url[o1])                url_fail.append(pic_url_number[o1])                print "-----socket timout-----,record..."        print "Picture " + str(pic_name[o1]) + " Downloaded"           def start_new_thread():    thread = myThread(pic_url)    thread.start()while pic_number <= len(pic_url):    if threading.activeCount() < 7:        start_new_thread()

 技术分享

Python superbwallpapers 动漫分类 下载