首页 > 代码库 > Java HTML页面抓取实例

Java HTML页面抓取实例

import java.io.BufferedReader;import java.io.IOException;import java.io.InputStreamReader;import java.io.UnsupportedEncodingException;import java.net.HttpURLConnection;import java.net.MalformedURLException;import java.net.URL;public class Url {    public static void main(String[] args) throws Exception{        String html = getURLContent();        System.out.println(html);    }        /**     * 获取网页内容     */    private static String getURLContent() throws MalformedURLException, IOException, UnsupportedEncodingException {        URL urlmy = new URL("http://www.baidu.com");        HttpURLConnection con = (HttpURLConnection) urlmy.openConnection();        HttpURLConnection.setFollowRedirects(true);        con.setInstanceFollowRedirects(false);        con.connect();        BufferedReader br = new BufferedReader(new InputStreamReader(con.getInputStream(),"UTF-8"));        String s = "";        StringBuffer sb = new StringBuffer();        while ((s = br.readLine()) != null) {            sb.append(s+"\r\n");        }                return sb.toString();    }}