python批量下载色影无忌和蜂鸟的图片爬虫小应用

首页 > 代码库 > python批量下载色影无忌和蜂鸟的图片爬虫小应用

python批量下载色影无忌和蜂鸟的图片爬虫小应用

2024-10-07 00:42:40 216人阅读

有些冗余信息。由于之前測试正則表達式。所以没有把它们给移走。只是不影响使用。

# -*- coding:utf-8 -*-
import re,urllib,sys,os,time


def getAllUrl():
	entry=sys.argv[1]
	#try:
	getPage=urllib.urlopen(entry).read()
	#except:
	#	print "Error"
	
	pattern=re.compile(r‘<a href="http://www.mamicode.com/(.+?
)".+?>‘)
	web_site_pattern=re.compile(r‘(http:.+?)‘)
	all_url = pattern.findall(getPage)
	for url in all_url:
		if web_site_pattern.match(url):
			print url
		#print url
		
	print "done"

def download_pic():
	url=sys.argv[1];
	#local_path="C:/Tools/source/"
	connection=urllib.urlopen(url)
	data=http://www.mamicode.com/connection.read()"Waiting to get data"
	time.sleep(3)
	connection.close()
	
	#analyze 
	#p=re.compile(r‘img width="\d+".+src="http://www.mamicode.com/(.+)".+‘)
	download_pic_pattern=re.compile(r‘<img src="http://www.mamicode.com/(.+?\.jpg)".+?/>‘)
	#p10=re.compile(r‘(.+)\.jpg‘)
	all_url=download_pic_pattern.findall(data)
	#print all_url
	i=1
	directory="C:/Tools/source"
	name_pattern=re.compile(r‘/(\w+?\.jpg)‘)
	if not os.path.exists(directory):
		os.mkdir(directory)

	for urls in all_url:
		print urls
		#print "working"
		
		
		#print local_path	
		i=i+1
		name=name_pattern.findall(urls)
		print name[0]
		local_path="C:/Tools/source/%s" % name[0] 
		jpeg_connection=urllib.urlopen(urls)
		
		jpeg=jpeg_connection.read()
		time.sleep(1)
		print "waiting"
		f=file(local_path,"wb")
		f.write(jpeg)
		
		f.close()
		jpeg_connection.close()
		#i=i+1
		
	#f=file(local_path,"wb")
	#f.write(data)
	#f.close()
	print("Done")
	
def download_pic_2():
	url=sys.argv[1];
	local_path="C:/Tools/a.jpg"
	data=http://www.mamicode.com/urllib.urlretrieve(url,local_path)"Done")


def regulation():
	str1="abc123*GBK1024abc*defb1kc12*addd"
	
	p1=re.compile(r‘abc‘)
	print p1.findall(str1)

	p2=re.compile(r‘a.c‘)
	print p2.findall(str1)

	p3=re.compile(r‘abc\*‘)
	print p3.findall(str1)
	
	p4=re.compile(r‘[abc]12‘)
	print p4.findall(str1)

	p5=re.compile(r‘\d\*‘)
	print p5.findall(str1)

	p6=re.compile(r‘a[^\d]‘)
	print p6.findall(str1)
	
	p7=re.compile(r‘a[^\d]*‘)
	print p7.findall(str1)

	p8=re.compile(r‘[a-zA-Z]+(\d+)‘)
	print p8.findall(str1)

	str2="dadfae ef <img atl=\"500\" src=http://www.mamicode.com/"www.qq.com/1.jpg\" width=\"700\"> asdfe aa<ima"

	p9=re.compile(r‘<img .+ src="http://www.mamicode.com/(.+)" .+>‘)
	urls=p9.findall(str2)
	#print
	print urls
	for url in urls:
		print url

	
	
	
	
if __name__ =="__main__":
	#main()
	#download_pic_2()
	#regulation()
	download_pic()
	#getAllUrl()

#######兴许

后面改动了代码，使用beautifulsoup，能够更大范围的下载图片

代码例如以下： http://www.30daydo.com/article/56

python批量下载色影无忌和蜂鸟的图片爬虫小应用

声明：以上内容来自用户投稿及互联网公开渠道收集整理发布，本网站不拥有所有权，未作人工编辑处理，也不承担相关法律责任，若内容有误或涉及侵权可进行投诉：投诉/举报工作人员会在5个工作日内联系你，一经查实，本站将立刻删除涉嫌侵权内容。

联系
我们

首页 > 代码库 > python批量下载色影无忌和蜂鸟的图片 爬虫小应用

python批量下载色影无忌和蜂鸟的图片 爬虫小应用

看完仍有疑问？有类似问题直接问程序猿

首页 > 代码库 > python批量下载色影无忌和蜂鸟的图片爬虫小应用

python批量下载色影无忌和蜂鸟的图片爬虫小应用