首页 > 代码库 > java读取html文件,截取<body>标签中内容

java读取html文件,截取<body>标签中内容

 1     public String readfile(String filePath){ 2         File file = new File(filePath);   3         InputStream input = null; 4         try { 5             input = new FileInputStream(file); 6         } catch (FileNotFoundException e) { 7             e.printStackTrace(); 8         }   9         StringBuffer buffer = new StringBuffer();  10         byte[] bytes = new byte[1024];11         try {12             for(int n ; (n = input.read(bytes))!=-1 ; ){  13                 buffer.append(new String(bytes,0,n,"GBK"));  14             }15         } catch (IOException e) {16             e.printStackTrace();17         }18 //        System.out.println(buffer);19         return buffer.toString();  20     }21     22      public String getBody(String val) {23           String start = "<body>";24           String end = "</body>";25           int s = val.indexOf(start) + start.length();26           int e = val.indexOf(end);27         return val.substring(s, e);28     }29     
 1     public static void main(String [] args){ 2         OaDao m = new OaDao(); 3 //        String sql = "SELECT sth,xdh FROM TK_ST_0331 where sth=‘022012050101131000100‘ and rownum <=10"; 4         String sql = "select t.sth , t.stgjz ,t.stly, x.mc from TK_ST_0331 t ,TK_STK_ST_0331 k,TK_TX X  where t.sth = k.sth AND X.BH = t.tx and rownum <10 "; 5         List<OaVo> datalist= m.findAll(sql); 6         for(OaVo vo : datalist){ 7             System.out.println(vo.getVal1()+"///"+vo.getVal2()); 8              9 //            String sth = "022012010100000100100";10             String sth = vo.getVal1();11             String kmh = sth.substring(0, 2);    //科目号12             String nf = sth.substring(2, 6);    //年份13             String yf = sth.substring(6,10);    //月份14             String serialno = sth.substring(10, 16);    //序列号15             String stxl = sth.substring(16, 19);    //题型16             String path ="/"+kmh+"/"+nf+"/"+yf+"/"+serialno+"/"+stxl+"/";17             18             String tm_path ="H:/tk_source/"+kmh+"/"+yf+"/"+serialno+"/"+stxl+"/"+sth+"_tm.htm";19             String da_path ="H:/tk_source/"+kmh+"/"+yf+"/"+serialno+"/"+stxl+"/"+sth+"_da.htm";20             String jx_path ="H:/tk_source/"+kmh+"/"+yf+"/"+serialno+"/"+stxl+"/"+sth+"_jx.htm";21             22     //        String path = "H:/tk_source/02/0101/000001/001/022012010100000100100_da.htm";23             24             String tm = m.getBody(m.readfile(tm_path));25             System.out.println("----------------------题目------------------------------");26             System.out.println(tm);27             28             String da = m.getBody(m.readfile(da_path));29             System.out.println("----------------------答案------------------------------");30             System.out.println(da);31             32             33             String jx = m.getBody(m.readfile(da_path));34             System.out.println("----------------------解析------------------------------");35             System.out.println(jx);36         }37     }

 

java读取html文件,截取<body>标签中内容