首页 > 代码库 > C#Word文件转换为html

C#Word文件转换为html

  1 using Microsoft.Office.Interop.Word;
  2 using System;
  3 using System.Collections.Generic;
  4 using System.IO;
  5 using System.Linq;
  6 using System.Text;
  7 using System.Web;
  8 using System.Web.UI;
  9 using System.Web.UI.WebControls;
 10 
 11 namespace Admin
 12 {
 13     public partial class TestDemo : System.Web.UI.Page
 14     {
 15         protected void Page_Load(object sender, EventArgs e)
 16         {
 17             string wordFileName = "~/uploads/TutorCV/111.docx";
 18             string htmlFileName = "~/uploads/TutorCV/";
 19             //GetPathByDocToHTML(Server.MapPath(wordFileName));
 20         }
 21         private string GetPathByDocToHTML(string strFile)
 22         {
 23             if (string.IsNullOrEmpty(strFile))
 24             {
 25                 return "0";//没有文件
 26             }
 27 
 28             //Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
 29             Microsoft.Office.Interop.Word._Application word = new Application();
 30             Type wordType = word.GetType();
 31             Microsoft.Office.Interop.Word.Documents docs = word.Documents;
 32 
 33             // 打开文件  
 34             Type docsType = docs.GetType();
 35 
 36             object fileName = strFile;
 37 
 38             Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",
 39             System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, true, true });
 40 
 41             // 转换格式,另存为html  
 42             Type docType = doc.GetType();
 43             //给文件重新起名
 44             string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +
 45             System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();
 46 
 47             string strFileFolder = "~/uploads/TutorCV/";
 48             DateTime dt = DateTime.Now;
 49             //以yyyymmdd形式生成子文件夹名
 50             string strFileSubFolder = dt.Year.ToString();
 51             strFileSubFolder += (dt.Month < 10) ? ("0" + dt.Month.ToString()) : dt.Month.ToString();
 52             strFileSubFolder += (dt.Day < 10) ? ("0" + dt.Day.ToString()) : dt.Day.ToString();
 53             string strFilePath = strFileFolder + strFileSubFolder + "/";
 54             // 判断指定目录下是否存在文件夹,如果不存在,则创建 
 55             if (!Directory.Exists(Server.MapPath(strFilePath)))
 56             {
 57                 // 创建up文件夹 
 58                 Directory.CreateDirectory(Server.MapPath(strFilePath));
 59             }
 60 
 61             //被转换的html文档保存的位置 
 62             // HttpContext.Current.Server.MapPath("html" + strFileSubFolder + filename + ".html")
 63             string ConfigPath = Server.MapPath(strFilePath + filename + ".html");
 64             object saveFileName = ConfigPath;
 65 
 66             /*下面是Microsoft Word 9 Object Library的写法,如果是10,可能写成: 
 67               * docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod, 
 68               * null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML}); 
 69               * 其它格式: 
 70               * wdFormatHTML 
 71               * wdFormatDocument 
 72               * wdFormatDOSText 
 73               * wdFormatDOSTextLineBreaks 
 74               * wdFormatEncodedText 
 75               * wdFormatRTF 
 76               * wdFormatTemplate 
 77               * wdFormatText 
 78               * wdFormatTextLineBreaks 
 79               * wdFormatUnicodeText 
 80             */
 81             docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
 82             null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });
 83 
 84             //docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
 85             //  null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML }); 
 86 
 87             //关闭文档  
 88             docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod,
 89             null, doc, new object[] { null, null, null });
 90 
 91             // 退出 Word  
 92             wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);
 93             //转到新生成的页面  
 94             //return ("/" + filename + ".html");
 95 
 96             //转化HTML页面统一编码格式
 97             TransHTMLEncoding(ConfigPath);
 98 
 99             return (strFilePath + filename + ".html");
100         }
101         private void TransHTMLEncoding(string strFilePath)
102         {
103             try
104             {
105                 System.IO.StreamReader sr = new System.IO.StreamReader(strFilePath, Encoding.GetEncoding(0));
106                 string html = sr.ReadToEnd();
107                 sr.Close();
108                 html = System.Text.RegularExpressions.Regex.Replace(html, @"<meta[^>]*>", "<meta http-equiv=Content-Type content=‘text/html; charset=gb2312‘>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
109                 System.IO.StreamWriter sw = new System.IO.StreamWriter(strFilePath, false, Encoding.Default);
110 
111                 sw.Write(html);
112                 sw.Close();
113             }
114             catch (Exception ex)
115             {
116                 Page.RegisterStartupScript("alt", "<script>alert(‘" + ex.Message + "‘)</script>");
117             }
118         }
119 
120     }
121 }

 

C#Word文件转换为html