首页 > 代码库 > 通过文本或url扫描下载文件

通过文本或url扫描下载文件

  1 package com.xxxx;  2   3 import java.io.BufferedInputStream;   4 import java.io.BufferedReader;   5 import java.io.File;   6 import java.io.FileNotFoundException;   7 import java.io.FileOutputStream;   8 import java.io.IOException;   9 import java.io.InputStreamReader;  10 import java.net.MalformedURLException;  11 import java.net.URL;  12 import java.util.ArrayList; 13 import java.util.List; 14 import java.util.regex.Matcher;  15 import java.util.regex.Pattern;  16   17 public class GetImage {  18      19     public int getCharacterPosition(String string,int numb){ 20         //这里是获取"#"符号的位置 21         Matcher slashMatcher = Pattern.compile("/").matcher(string); 22         int mIdx = 0; 23         while(slashMatcher.find()) { 24            mIdx++; 25            //当"#"符号第二次出现的位置 26            if(mIdx == numb){ 27               break; 28            } 29         } 30         return slashMatcher.start(); 31     } 32      33      34      35      36      37     /** 38      * 下载文件(图片、压缩包等文件都可以下载) 39      * @param httpUrl 40      * eg:http://www.xxxx.com/uploadfiles/123.rar 41      */ 42     public void getHtmlFile(String httpUrl) {  43     URL url;  44     BufferedInputStream in;  45     FileOutputStream file;  46     try {  47        System.out.println("取网络文件");  48        //获取子目录 49        String unitPath = httpUrl.substring(getCharacterPosition(httpUrl,3) ,httpUrl.lastIndexOf("/")); 50        String fileName = httpUrl.substring(httpUrl.lastIndexOf("/"));  51        String filePath = "F:\\FocuSimple"+unitPath+"\\"; 52        File up = new File(filePath); 53         if(!up.exists()){    //判断文件夹是否不存在 54             up.mkdirs(); 55         } 56         57        url = new URL(httpUrl);  58        59        in = new BufferedInputStream(url.openStream());  60        61        file = new FileOutputStream(new File(filePath+fileName));  62        int t;  63        while ((t = in.read()) != -1) {  64        file.write(t);  65        }  66        file.close();  67        in.close();  68       System.out.println("文件获取成功");  69     } catch (MalformedURLException e) {  70        e.printStackTrace();  71     } catch (FileNotFoundException e) {  72       e.printStackTrace();  73     } catch (IOException e) {  74        e.printStackTrace();  75     }  76     }  77        78     public String getHtmlCode(String httpUrl) throws IOException {  79     String content ="";  80     URL uu = new URL(httpUrl); // 创建URL类对象  81     BufferedReader ii = new BufferedReader(new InputStreamReader(uu  82         .openStream())); // //使用openStream得到一输入流并由此构造一个BufferedReader对象  83     String input;  84     while ((input = ii.readLine()) != null) { // 建立读取循环,并判断是否有读取值  85        content += input;  86     }  87     ii.close();  88     return content;  89     }  90     public static List<String> getImageSrc(String htmlCode) { 91         List<String> imageSrcList = new ArrayList<String>(); 92 //        Pattern p = Pattern.compile("<img\\b[^>]*\\bsrc\\b\\s*=\\s*(‘|\")?([^‘\"\n\r\f>]+(\\.jpg|\\.bmp|\\.eps|\\.gif|\\.mif|\\.miff|\\.png|\\.tif|\\.tiff|\\.svg|\\.wmf|\\.jpe|\\.jpeg|\\.dib|\\.ico|\\.tga|\\.cut|\\.pic)\\b)[^>]*>", Pattern.CASE_INSENSITIVE); 93         Pattern p = Pattern.compile("src\\b\\s*=\\s*(‘|\")?([^‘\"\n\r\f>]+(\\.jpg|\\.bmp|\\.eps|\\.gif|\\.mif|\\.miff|\\.png|\\.tif|\\.tiff|\\.svg|\\.wmf|\\.jpe|\\.jpeg|\\.dib|\\.ico|\\.tga|\\.cut|\\.pic)\\b)[^>]*", Pattern.CASE_INSENSITIVE); 94         Matcher m = p.matcher(htmlCode); 95         String quote = null; 96         String src = http://www.mamicode.com/null; 97         while (m.find()) { 98             quote = m.group(1); 99             src = http://www.mamicode.com/(quote == null || quote.trim().length() == 0) ? m.group(2).split("\\s+")[0] : m.group(2);100             imageSrcList.add(src);101             System.out.println("src"+src);102         }103         return imageSrcList;104     }105     106     public void get(String url,String text) throws IOException { 107       108     String searchImgReg = "(?x)(src|SRC|background|BACKGROUND)=(‘|\")/?(([\\w-]+/)*([\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))(‘|\")"; 109     String searchImgReg2 = "(?x)(src|SRC|background|BACKGROUND)=(‘|\")(http://([\\w-]+\\.)+[\\w-]+(:[0-9]+)*(/[\\w-]+)*(/[\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))(‘|\")"; 110     String content  = "";111     if(text == null){112         content = this.getHtmlCode(url);113     }else{114         content = text;115     }116     System.out.println("内容:"+content); 117     118     Pattern p = Pattern.compile("src\\b\\s*=\\s*(‘|\")?([^‘\"\n\r\f>]+(\\.jpg|\\.bmp|\\.eps|\\.gif|\\.mif|\\.miff|\\.png|\\.tif|\\.tiff|\\.svg|\\.wmf|\\.jpe|\\.jpeg|\\.dib|\\.ico|\\.tga|\\.cut|\\.pic)\\b)[^>]*", Pattern.CASE_INSENSITIVE);119     Matcher m = p.matcher(content);120     String quote = null;121     String src = http://www.mamicode.com/null;122     while (m.find()) {123         quote = m.group(1);124         src = http://www.mamicode.com/(quote == null || quote.trim().length() == 0) ? m.group(2).split("\\s+")[0] : m.group(2);125         this.getHtmlFile(url+src); 126     }127     128     Pattern pattern = Pattern.compile(searchImgReg); 129     Matcher matcher = pattern.matcher(content); 130     while (matcher.find()) { 131        System.out.println("图片路径1:"+matcher.group(3)); 132       this.getHtmlFile(url+matcher.group(3)); 133          134     } 135       136     pattern = Pattern.compile(searchImgReg2); 137     matcher = pattern.matcher(content); 138     while (matcher.find()) { 139        System.out.println("图片路径1:"+matcher.group(3)); 140       this.getHtmlFile(matcher.group(3)); 141           142     } 143     // searchImgReg = 144     // "(?x)(src|SRC|background|BACKGROUND)=(‘|\")/?(([\\w-]+/)*([\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))(‘|\")"; 145     } 146     public static void main(String[] args) throws IOException { 147         GetImage gcp = new GetImage(); 148         gcp.get("http://www.123rf.com.cn/#baidu01",null); 149         gcp.get(null,"<img src=http://www.mamicode.com/"/images/ico/logo.png\">"); 150         gcp.getHtmlFile("http://www.xxxx.com/uploadfiles/123.rar");151     } 152 }

 

通过文本或url扫描下载文件