首页 > 代码库 > <<< Java提取网页源码

<<< Java提取网页源码

 

package com.sevennight;import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.FileWriter;import java.io.IOException;import java.io.InputStreamReader;public class zidongwenzi {/**  * @param args  * @param   * @throws IOException   */public static void main(String[] args) throws IOException {  GetWebContent con=new GetWebContent();  BufferedWriter bw=new BufferedWriter(new FileWriter("F:/WebContent.txt"));   //此处保存在本地的地址        bw.write(con.getWebCon("http://www.baidu.com"));   //此处填写网址...        bw.flush();        bw.close();}}class GetWebContent {         public String getWebCon(String domain){              StringBuffer sb  =  new StringBuffer();            try{                    java.net.URL url = new java.net.URL(domain);                BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream(),"GBK"));   //此处gbk是网页编码                String line;                while((line=in.readLine())!=null){                    sb.append(line+‘\n‘);                    System.out.println(line);                }               in.close();            }catch(Exception e){//   Report   any   errors   that   arise                        sb.append(e.toString());                  System.err.println(e);                    System.err.println("Usage:   java   HttpClient   <URL>   [<filename>]");              }                return sb.toString();                      }}