首页 > 代码库 > word2html文件
word2html文件
1 package com.wesib2b.lw.app.util; 2 3 import java.io.ByteArrayOutputStream; 4 import java.io.File; 5 import java.io.FileInputStream; 6 import java.io.FileNotFoundException; 7 import java.io.FileOutputStream; 8 import java.io.InputStream; 9 import java.util.List; 10 import javax.xml.parsers.DocumentBuilderFactory; 11 import javax.xml.transform.OutputKeys; 12 import javax.xml.transform.Transformer; 13 import javax.xml.transform.TransformerFactory; 14 import javax.xml.transform.dom.DOMSource; 15 import javax.xml.transform.stream.StreamResult; 16 import org.apache.commons.io.FileUtils; 17 import org.apache.poi.hwpf.HWPFDocument; 18 import org.apache.poi.hwpf.converter.PicturesManager; 19 import org.apache.poi.hwpf.converter.WordToHtmlConverter; 20 import org.apache.poi.hwpf.usermodel.Picture; 21 import org.apache.poi.hwpf.usermodel.PictureType; 22 import org.w3c.dom.Document; 23 24 public class test { 25 public static void main(String[] args) throws Throwable { 26 final String path = "D:\\"; 27 final String file = "59系统教师绩效统计(需求变更).doc"; 28 InputStream input = new FileInputStream(path + file); 29 HWPFDocument wordDocument = new HWPFDocument(input); 30 WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( 31 DocumentBuilderFactory.newInstance().newDocumentBuilder() 32 .newDocument()); 33 wordToHtmlConverter.setPicturesManager(new PicturesManager() { 34 public String savePicture(byte[] content, PictureType pictureType, 35 String suggestedName, float widthInches, float heightInches) { 36 return suggestedName; 37 } 38 }); 39 wordToHtmlConverter.processDocument(wordDocument); 40 List pics = wordDocument.getPicturesTable().getAllPictures(); 41 if (pics != null) { 42 for (int i = 0; i < pics.size(); i++) { 43 Picture pic = (Picture) pics.get(i); 44 try { 45 pic.writeImageContent(new FileOutputStream(path 46 + pic.suggestFullFileName())); 47 } catch (FileNotFoundException e) { 48 e.printStackTrace(); 49 } 50 } 51 } 52 Document htmlDocument = wordToHtmlConverter.getDocument(); 53 ByteArrayOutputStream outStream = new ByteArrayOutputStream(); 54 DOMSource domSource = new DOMSource(htmlDocument); 55 StreamResult streamResult = new StreamResult(outStream); 56 TransformerFactory tf = TransformerFactory.newInstance(); 57 Transformer serializer = tf.newTransformer(); 58 serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); 59 serializer.setOutputProperty(OutputKeys.INDENT, "yes"); 60 serializer.setOutputProperty(OutputKeys.METHOD, "html"); 61 serializer.transform(domSource, streamResult); 62 outStream.close(); 63 String content = new String(outStream.toByteArray()); 64 FileUtils.write(new File(path, "1.html"), content, "utf-8"); 65 } 66 }
word2html文件
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。