首页 > 代码库 > 下载网页的基本方法
下载网页的基本方法
一、Java.net.URL
1 import java.io.BufferedReader; 2 import java.io.InputStreamReader; 3 import java.net.URL; 4 5 public class RetrivePage { 6 public static String downloadPage(String path) throws Exception { 7 URL pageURL = new URL(path); 8 BufferedReader reader = new BufferedReader(new InputStreamReader(pageURL.openStream())); 9 String line;10 StringBuilder pageBuffer = new StringBuilder();11 while ((line = reader.readLine()) != null) {12 pageBuffer.append(line);13 }14 return pageBuffer.toString();15 }16 17 public static void main(String args[]) throws Exception {18 System.out.println(RetrivePage.downloadPage("http://www.sina.com"));19 }20 }
二、Scanner对象
1 import java.io.InputStreamReader; 2 import java.net.URL; 3 import java.util.Scanner; 4 5 public class RetrivePage { 6 public static String downloadPage(String path) throws Exception { 7 URL pageURL = new URL(path); 8 Scanner scanner = new Scanner(new InputStreamReader(pageURL.openStream(), "utf-8")); 9 scanner.useDelimiter("\\z");10 StringBuilder pageBuffer = new StringBuilder();11 while (scanner.hasNext()) {12 pageBuffer.append(scanner.next());13 }14 return pageBuffer.toString();15 }16 17 public static void main(String args[]) throws Exception {18 System.out.println(RetrivePage.downloadPage("http://www.sina.com"));19 }20 }
三、套接字
1 import java.io.*; 2 import java.net.Socket; 3 4 public class RetrivePage { 5 public static void main(String args[]) throws Exception { 6 String host = "blog.csdn.net"; 7 String file = "/column.html"; 8 int port = 80; 9 Socket s = new Socket(host, port);10 OutputStream out = s.getOutputStream();11 PrintWriter outw = new PrintWriter(out, false);12 outw.print("GET" + file + " HTTP/1.0\r\n");13 outw.print("Accept:text/plain,text/html,text/*\r\n");14 outw.print("\r\n");15 outw.flush();16 InputStream in = s.getInputStream();17 InputStreamReader inr = new InputStreamReader(in);18 BufferedReader bufferedReader = new BufferedReader(inr);19 String line;20 while ((line = bufferedReader.readLine()) != null) {21 System.out.println(line);22 }23 }24 }
四、HttpClient
1 import org.apache.http.HttpEntity; 2 import org.apache.http.HttpResponse; 3 import org.apache.http.client.HttpClient; 4 import org.apache.http.client.methods.HttpGet; 5 import org.apache.http.impl.client.DefaultHttpClient; 6 import org.apache.http.util.EntityUtils; 7 public class RetrivePage { 8 public static void main(String args[]) throws Exception { 9 HttpClient httpClient=new DefaultHttpClient();10 HttpGet httpGet=new HttpGet("http://www.sina.com");11 HttpResponse response=httpClient.execute(httpGet);12 HttpEntity entity=response.getEntity();13 if(entity!=null){14 System.out.println(EntityUtils.toString(entity,"utf-8"));15 EntityUtils.consume(entity);16 }17 httpClient.getConnectionManager().shutdown();18 }19 }
下载网页的基本方法
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。