首页 > 代码库 > htmlparser实现从网页上抓取数据

htmlparser实现从网页上抓取数据

package parser;

 

import java.io.BufferedReader;

import java.io.BufferedWriter;

import java.io.FileWriter;

import java.io.IOException;

import java.io.InputStream;

import java.io.InputStreamReader;

import java.net.MalformedURLException;

import java.net.URL;

 

/**

 * 基本能实现网页抓取,不过要手动输入URL 将整个html内容保存到指定文件

 *

 *@author chenguoyong

 *

 */

public class ScrubSelectedWeb {

       privatefinal static String CRLF = System.getProperty("line.separator");

 

       /**

        * @param args

        */

       publicstatic void main(String[] args) {

              try{

                     URLur = newURL("http://10.249.187.199:8083/injs100/");

                     InputStreaminstr = ur.openStream();

                     Strings, str;

                     BufferedReaderin = new BufferedReader(new InputStreamReader(instr));

                     StringBuffersb = new StringBuffer();

                     BufferedWriterout = new BufferedWriter(new FileWriter(

                                   "D:/outPut.txt"));

                     while((s = in.readLine()) != null) {

                            sb.append(s+ CRLF);

                     }

                     System.out.println(sb);

                     str= new String(sb);

                     out.write(str);

                     out.close();

                     in.close();

              }catch (MalformedURLException e) {

                     e.printStackTrace();

              }catch (IOException e) {

                     e.printStackTrace();

              }

 

       }

 

}

基本能实现网页抓取,不过要手动输入URL,此外没有重构。只是一个简单的思路。





http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281464
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281465
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281466
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281467
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281468
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281469
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281470
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281472
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281473
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281474
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281476
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281477
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281478
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281479
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281480
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281481
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281482
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281485
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281487


http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281643
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281644
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281645
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281646
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281647
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281648
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281649
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281650
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281489
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281491
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281492
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281493
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281494
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281495
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281496
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281497
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281498
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281499
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281500
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281501
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281502
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281503
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281504
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281505
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281506
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281507
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281508
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281509
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281510
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281511
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281513
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281514
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281515
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281516
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281517
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281518
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281519
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281520
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281521
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281522
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281523
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281524
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281525
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281526
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281527
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281528
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281529
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281530
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281531
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281532
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281533
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281534
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281535
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281536
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281537
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281538
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281539
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281540
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281541
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281542
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281543
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281544
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281545
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281546
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281547
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281548
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281549
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281551
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281552
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281553
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281555
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281556
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281557
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281558
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281559
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281560
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281561
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281562
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281563
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281564
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281565
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281566
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281567
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281568
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281569
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281570
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281571
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281572
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281573
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281574
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281575
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281576
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281577
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281578
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281579
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281580
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281581
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281582
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281583
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281584
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281586
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281587
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281589
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281590
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281591
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281592
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281593
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281594
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281595
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281596
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281597
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281598
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281599
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281600
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281601
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281602
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281603
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281605
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281606
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281607
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281608
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281609
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281610
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281611
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281612
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281613
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281614
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281615
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281616
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281617
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281618
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281619
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281620
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281621
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281622
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281623
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281624
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281626
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281627
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281628
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281629
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281630
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281631
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281632
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281633
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281635
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281636
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281637
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281638
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281639
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281652
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281653
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281654
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281655
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281656
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281657
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281658
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281659
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281660
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281661
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281662
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281663
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281664
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281665
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281666
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281667
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281668
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281669
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281670
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281671
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281672
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281673
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281674
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281675
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281680
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281681
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281682
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281683
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281684
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281685
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281686
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281687
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281688
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281689
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281690
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281691
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281692
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281693
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281694
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281695
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281696
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281697
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281698
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281699
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281700
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281701
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281702
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281703
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281704
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281705
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281706
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281707
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281708
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281709
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281710
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281711
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281712
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281713
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281714
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281715
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281716
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281717
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281718
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281719
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281720
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281721
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281722
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281723
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281724
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281725
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281726
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281727
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281728
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281729
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281730
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281731
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281732
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281733
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281734
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281735
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281736
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281737
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281738
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281739
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281740
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281741
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281742
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281743
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281744
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281745
http://www.ituite.com/space.php?uid=1040447&do=poll&pid=1281746

htmlparser实现从网页上抓取数据