首页 > 代码库 > 下载网页的基本方法

下载网页的基本方法

一、Java.net.URL

 1 import java.io.BufferedReader; 2 import java.io.InputStreamReader; 3 import java.net.URL; 4  5 public class RetrivePage { 6     public static String downloadPage(String path) throws Exception { 7         URL pageURL = new URL(path); 8         BufferedReader reader = new BufferedReader(new InputStreamReader(pageURL.openStream())); 9         String line;10         StringBuilder pageBuffer = new StringBuilder();11         while ((line = reader.readLine()) != null) {12             pageBuffer.append(line);13         }14         return pageBuffer.toString();15     }16 17     public static void main(String args[]) throws Exception {18         System.out.println(RetrivePage.downloadPage("http://www.sina.com"));19     }20 }

二、Scanner对象

 1 import java.io.InputStreamReader; 2 import java.net.URL; 3 import java.util.Scanner; 4  5 public class RetrivePage { 6     public static String downloadPage(String path) throws Exception { 7         URL pageURL = new URL(path); 8         Scanner scanner = new Scanner(new InputStreamReader(pageURL.openStream(), "utf-8")); 9         scanner.useDelimiter("\\z");10         StringBuilder pageBuffer = new StringBuilder();11         while (scanner.hasNext()) {12            pageBuffer.append(scanner.next());13         }14         return pageBuffer.toString();15     }16 17     public static void main(String args[]) throws Exception {18         System.out.println(RetrivePage.downloadPage("http://www.sina.com"));19     }20 }

三、套接字

 1 import java.io.*; 2 import java.net.Socket; 3  4 public class RetrivePage { 5     public static void main(String args[]) throws Exception { 6         String host = "blog.csdn.net"; 7         String file = "/column.html"; 8         int port = 80; 9         Socket s = new Socket(host, port);10         OutputStream out = s.getOutputStream();11         PrintWriter outw = new PrintWriter(out, false);12         outw.print("GET" + file + " HTTP/1.0\r\n");13         outw.print("Accept:text/plain,text/html,text/*\r\n");14         outw.print("\r\n");15         outw.flush();16         InputStream in = s.getInputStream();17         InputStreamReader inr = new InputStreamReader(in);18         BufferedReader bufferedReader = new BufferedReader(inr);19         String line;20         while ((line = bufferedReader.readLine()) != null) {21             System.out.println(line);22         }23     }24 }

四、HttpClient

 1 import org.apache.http.HttpEntity; 2 import org.apache.http.HttpResponse; 3 import org.apache.http.client.HttpClient; 4 import org.apache.http.client.methods.HttpGet; 5 import org.apache.http.impl.client.DefaultHttpClient; 6 import org.apache.http.util.EntityUtils; 7 public class RetrivePage { 8     public static void main(String args[]) throws Exception { 9         HttpClient httpClient=new DefaultHttpClient();10         HttpGet httpGet=new HttpGet("http://www.sina.com");11         HttpResponse response=httpClient.execute(httpGet);12         HttpEntity entity=response.getEntity();13         if(entity!=null){14             System.out.println(EntityUtils.toString(entity,"utf-8"));15             EntityUtils.consume(entity);16         }17         httpClient.getConnectionManager().shutdown();18     }19 }

 

下载网页的基本方法