首页 > 代码库 > 网页爬虫

网页爬虫

/*    网页爬虫*/import java.io.*;import java.net.*;import java.util.regex.*;class RegexTest2{    public static void main(String[] args) throws Exception    {        getMails();    }    public static void getMails() throws Exception    {        URL url = new URL("http://www.douban.com/group/topic/16778555/");        URLConnection conn = url.openConnection();        BufferedReader bufr = new BufferedReader(new InputStreamReader(conn.getInputStream()));        String line = null;        String mailReg = "\\w+@\\w+(\\.\\w+)+";        Pattern p = Pattern.compile(mailReg);        while((line = bufr.readLine()) != null){            Matcher m = p.matcher(line);            while(m.find()){                System.out.println(m.group());            }        }    }}