首页 > 代码库 > 下载8000首儿歌的python代码

下载8000首儿歌的python代码

下载8000首儿歌的python的代码:

#-*- coding: UTF-8 -*-from pyquery import PyQuery as pyfrom lxml import etreeimport urllibimport reimport osimport sysimport loggingdef format(filename):    tuple=(‘ ‘,‘’‘,‘\‘‘)    for char in tuple:        if (filename.find(char)!=-1):            filename=filename.replace(char,"_")    return filenamedef download_mp3(mp3_url, filename,dir):           f = dir+"\\"+filename    if os.path.exists(f):      logger.debug(f+" is existed.")      return          try:        open(f, ‘wb‘).write(urllib.urlopen(mp3_url).read())        logger.debug(  filename + ‘ is downloaded.‘)    except:        logger.debug( filename + ‘ is not downloaded.‘)        def download_all_mp3(start,end,dir,logger):  for x in range(start,end):    try:        url = "http://www.youban.com/mp3-d" + str(x) + ".html"        logger.debug(str(x) + ": "+url)        doc = py(url=url)        e = doc(‘.mp3downloadbox‘)        if e is None or e == ‘‘:           logger.debug(url+" is not existed.")          return                  e = unicode(e)        #logger.debug( e)        regex = re.compile(ur".*<h1>(.*)</h1>.*downloadboxlist.*?<a.*?\"(.*?)\"",re.UNICODE|re.S)        m = regex.search(e)        if m is not None:          title = m.group(1).strip()          title2 = str(x)+"_"+title + ".mp3"           #title2 = re.sub(‘ ‘,‘_‘,title2)          title2 = format(title2)          link = m.group(2)          #logger.debug( "title:" + title + " link:" + link)          if link == ‘‘ or title == ‘‘:            logger.debug(url + " is not useful")            continue          logger.debug(str(x)+": "+link)          download_mp3(link,title2,dir)    except:        logger.debug(url+" met exception.")        continue      
if __name__ == "__main__": dir_root = "e:\\song" if sys.argv[3] != ‘‘: dir_root=sys.argv[3] start,end = 1,8000 if sys.argv[1] >= 0 and sys.argv[2]>=0: start,end = int(sys.argv[1]),int(sys.argv[2]) print ("Download from %s to %s.\n" % (start,end)) dir = dir_root + "\\"+str(start)+"-"+str(end) if not os.path.exists(dir): os.mkdir(dir) print "Download to " + dir + ".\n" logger = logging.getLogger("simple") logger.setLevel(logging.DEBUG) fh = logging.FileHandler(dir+"\\"+"download.log") ch = logging.StreamHandler() formatter = logging.Formatter("%(message)s") ch.setFormatter(formatter) fh.setFormatter(formatter) logger.addHandler(ch) logger.addHandler(fh) download_all_mp3(start,end,dir,logger)

 

有需要的可以参考继续修改。