C#Word文件转换为html

时间:2022-11-11 22:02:46
  1 using Microsoft.Office.Interop.Word;
2 using System;
3 using System.Collections.Generic;
4 using System.IO;
5 using System.Linq;
6 using System.Text;
7 using System.Web;
8 using System.Web.UI;
9 using System.Web.UI.WebControls;
10
11 namespace Admin
12 {
13 public partial class TestDemo : System.Web.UI.Page
14 {
15 protected void Page_Load(object sender, EventArgs e)
16 {
17 string wordFileName = "~/uploads/TutorCV/111.docx";
18 string htmlFileName = "~/uploads/TutorCV/";
19 //GetPathByDocToHTML(Server.MapPath(wordFileName));
20 }
21 private string GetPathByDocToHTML(string strFile)
22 {
23 if (string.IsNullOrEmpty(strFile))
24 {
25 return "0";//没有文件
26 }
27
28 //Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
29 Microsoft.Office.Interop.Word._Application word = new Application();
30 Type wordType = word.GetType();
31 Microsoft.Office.Interop.Word.Documents docs = word.Documents;
32
33 // 打开文件
34 Type docsType = docs.GetType();
35
36 object fileName = strFile;
37
38 Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",
39 System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, true, true });
40
41 // 转换格式,另存为html
42 Type docType = doc.GetType();
43 //给文件重新起名
44 string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +
45 System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();
46
47 string strFileFolder = "~/uploads/TutorCV/";
48 DateTime dt = DateTime.Now;
49 //以yyyymmdd形式生成子文件夹名
50 string strFileSubFolder = dt.Year.ToString();
51 strFileSubFolder += (dt.Month < 10) ? ("0" + dt.Month.ToString()) : dt.Month.ToString();
52 strFileSubFolder += (dt.Day < 10) ? ("0" + dt.Day.ToString()) : dt.Day.ToString();
53 string strFilePath = strFileFolder + strFileSubFolder + "/";
54 // 判断指定目录下是否存在文件夹,如果不存在,则创建
55 if (!Directory.Exists(Server.MapPath(strFilePath)))
56 {
57 // 创建up文件夹
58 Directory.CreateDirectory(Server.MapPath(strFilePath));
59 }
60
61 //被转换的html文档保存的位置
62 // HttpContext.Current.Server.MapPath("html" + strFileSubFolder + filename + ".html")
63 string ConfigPath = Server.MapPath(strFilePath + filename + ".html");
64 object saveFileName = ConfigPath;
65
66 /*下面是Microsoft Word 9 Object Library的写法,如果是10,可能写成:
67 * docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
68 * null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML});
69 * 其它格式:
70 * wdFormatHTML
71 * wdFormatDocument
72 * wdFormatDOSText
73 * wdFormatDOSTextLineBreaks
74 * wdFormatEncodedText
75 * wdFormatRTF
76 * wdFormatTemplate
77 * wdFormatText
78 * wdFormatTextLineBreaks
79 * wdFormatUnicodeText
80 */
81 docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
82 null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });
83
84 //docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
85 // null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });
86
87 //关闭文档
88 docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod,
89 null, doc, new object[] { null, null, null });
90
91 // 退出 Word
92 wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);
93 //转到新生成的页面
94 //return ("/" + filename + ".html");
95
96 //转化HTML页面统一编码格式
97 TransHTMLEncoding(ConfigPath);
98
99 return (strFilePath + filename + ".html");
100 }
101 private void TransHTMLEncoding(string strFilePath)
102 {
103 try
104 {
105 System.IO.StreamReader sr = new System.IO.StreamReader(strFilePath, Encoding.GetEncoding(0));
106 string html = sr.ReadToEnd();
107 sr.Close();
108 html = System.Text.RegularExpressions.Regex.Replace(html, @"<meta[^>]*>", "<meta http-equiv=Content-Type content='text/html; charset=gb2312'>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
109 System.IO.StreamWriter sw = new System.IO.StreamWriter(strFilePath, false, Encoding.Default);
110
111 sw.Write(html);
112 sw.Close();
113 }
114 catch (Exception ex)
115 {
116 Page.RegisterStartupScript("alt", "<script>alert('" + ex.Message + "')</script>");
117 }
118 }
119
120 }
121 }