I'm writing a Word Addin with C# that replace all formats to xml tags, now I want convert tables in word to html with standard tags, tables may different in rows count and columns count, I mean table contains merged cells or columns
我正在编写一个带有C#的Word Addin,它将所有格式替换为xml标签,现在我希望将word转换为带有标准标签的html表,行数和列数可能不同,我的意思是表包含合并的单元格或列
same:
-------------------------
| 1 | 2 | 3 | 4 |
| -------------------
| | 5 | 6 | 7 |
| -------------------
| | 8 |
| -------------------
| | 9 | 10 | 11 |
|------------------------
| 12 | 13 | 14 | 15 |
-------------------------
that cell 1 is merge of four rows in one column and cell 8 is merge of three columns in one row
单元格1在一列中合并四行,单元格8在一行中合并三列
How can I convert it?
我怎么转换它?
3 个解决方案
#1
1
We ran into similar projects a while ago and hope below codes could give you a start. HTML Part
我们前一段时间遇到过类似的项目,希望下面的代码可以给你一个开始。 HTML部分
<span style="font-size:18px;"><div>
<input id="File1" type="file" runat="server"/>
<asp:Button ID="btnConvert" runat="server" Text="Convert" OnClick="btnConvert_Click" />
</div></span>
C# Part using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.IO;
C#部分使用系统;使用System.Data;使用System.Configuration;使用System.Collections;使用System.Collections.Generic;使用System.Linq;使用System.Web;使用System.Web.Security;使用System.Web.UI;使用System.Web.UI.WebControls;使用System.Web.UI.WebControls.WebParts;使用System.Web.UI.HtmlControls;使用System.IO;
protected void Page_Load(object sender, EventArgs e)
{
}
/// <summary>
/// word to Html
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
protected void btnConvert_Click(object sender, EventArgs e)
{
try
{
//upload
//uploadWord(File1);
//convert
wordToHtml(File1);
}
catch (Exception ex)
{
throw ex;
}
finally
{
Response.Write("Convert successfully!");
}
}
//upload and convert to html wordToHtml(wordFilePath)
///<summary>
///upload and save as html
///</summary>
///<param name="wordFilePath">word doc file path on client machine</param>
///<returns>html save path</returns>
public string wordToHtml(System.Web.UI.HtmlControls.HtmlInputFile wordFilePath)
{
Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
Type wordType = word.GetType();
Microsoft.Office.Interop.Word.Documents docs = word.Documents;
// open doc file
Type docsType = docs.GetType();
//upload to server and parse as html
string filePath = uploadWord(wordFilePath);
//if upload is success
if (filePath == "0")
return "0";
//if file is word
if (filePath == "1")
return "1";
object fileName = filePath;
Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",
System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, true, true });
// convert and save
Type docType = doc.GetType();
string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +
System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();
// if directory exist, create if not
if (!Directory.Exists(Server.MapPath("~\\html")))
{
// create upload directory
Directory.CreateDirectory(Server.MapPath("~\\html"));
}
//html save location
string ConfigPath = HttpContext.Current.Server.MapPath("html/" + filename + ".html");
object saveFileName = ConfigPath;
/*For Microsoft Word 9 Object Library, if using 10, then use below:
* docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
* null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML});
* other formats:
* wdFormatHTML
* wdFormatDocument
* wdFormatDOSText
* wdFormatDOSTextLineBreaks
* wdFormatEncodedText
* wdFormatRTF
* wdFormatTemplate
* wdFormatText
* wdFormatTextLineBreaks
* wdFormatUnicodeText
*/
docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });
//close document
docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod,
null, doc, new object[] { null, null, null });
// exit Word
wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);
//goto converted html page
return ("/" + filename + ".html");
}
public string uploadWord(System.Web.UI.HtmlControls.HtmlInputFile uploadFiles)
{
if (uploadFiles.PostedFile != null)
{
string fileName = uploadFiles.PostedFile.FileName;
int extendNameIndex = fileName.LastIndexOf(".");
string extendName = fileName.Substring(extendNameIndex);
string newName = "";
try
{
//check if is word format
if (extendName == ".doc" || extendName == ".docx")
{
DateTime now = DateTime.Now;
newName = now.DayOfYear.ToString() + uploadFiles.PostedFile.ContentLength.ToString();
// check if directory exist, create one if not
if (!Directory.Exists(Server.MapPath("~\\wordTmp")))
{
// create upload directory
Directory.CreateDirectory(Server.MapPath("~\\wordTmp"));
}
//upload path, wordTemp relative to parrent
uploadFiles.PostedFile.SaveAs(System.Web.HttpContext.Current.Server.MapPath("wordTmp/" + newName + extendName));
}
else
{
return "1";
}
}
catch
{
return "0";
}
//return "http://" + HttpContext.Current.Request.Url.Host + HttpContext.Current.Request.ApplicationPath + "/wordTmp/" + newName + extendName;
return System.Web.HttpContext.Current.Server.MapPath("wordTmp/" + newName + extendName);
}
else
{
return "0";
}
}</span>
#2
0
This is simple trick just copy word table
& paste in dreamweaver
-> Design Mode. When you see in code mode you will get all html tags from there you can just copy that code & paste in c# file.
这是一个简单的技巧,只需在Dreamweaver中复制word表和粘贴 - >设计模式。当您在代码模式中看到时,您将从那里获得所有html标记,您只需将该代码复制并粘贴到c#文件中即可。
Enjoy.
#3
0
tanx for reply, i found a way to convert word tables to html.
i write this code:
坦克回复,我找到了一种方法将单词表转换为HTML。我写这段代码:
private static void ConvertTableToHTML()
{
try
{
foreach (Table tb in Common.WordApplication.ActiveDocument.Tables)
{
for (int r = 1; r <= tb.Rows.Count; r++)
{
for (int c = 1; c <= tb.Columns.Count; c++)
{
try
{
Cell cell = tb.Cell(r, c);
foreach (Paragraph paragraph in cell.Range.Paragraphs)
{
Tagging(paragraph.Range, "P");
}
Tagging(cell.Range, "TD");
}
catch (Exception e)
{
if (e.Message.Contains("The requested member of the collection does not exist."))
{
//Most likely a part of a merged cell, so skip over.
}
else throw;
}
}
try
{
Row row = tb.Rows[r];
Tagging(row.Range, "TR");
}
catch (Exception ex)
{
bool initialTrTagInserted = false;
int columnsIndex = 1;
int columnsCount = tb.Columns.Count;
while (!initialTrTagInserted && columnsIndex <= columnsCount)
{
try
{
Cell cell = tb.Cell(r, columnsIndex);
cell.Range.InsertBefore("<TR>");
initialTrTagInserted = true;
}
catch (Exception e)
{
}
columnsIndex++;
}
columnsIndex = tb.Columns.Count;
bool endTrTagInserted = false;
while (!endTrTagInserted && columnsIndex >= 1)
{
try
{
Cell cell = tb.Cell(r, columnsIndex);
cell.Range.InsertAfter("</TR>");
endTrTagInserted = true;
}
catch (Exception e)
{
}
columnsIndex--;
}
}
}
Common.Tagging2(tb.Range, "Table");
object separator = "";
object nestedTable = true;
tb.ConvertToText(separator, nestedTable);
}
}
catch (Exception ex) { MessageBox.Show(ex.Message); }
}
public static void Tagging(Range range, string TagName)
{
try
{
range.InsertBefore("<" + TagName + ">");
range.InsertAfter("</" + TagName + ">");
}
catch (Exception ex) { throw new Exception(ex.Message); }
}
#1
1
We ran into similar projects a while ago and hope below codes could give you a start. HTML Part
我们前一段时间遇到过类似的项目,希望下面的代码可以给你一个开始。 HTML部分
<span style="font-size:18px;"><div>
<input id="File1" type="file" runat="server"/>
<asp:Button ID="btnConvert" runat="server" Text="Convert" OnClick="btnConvert_Click" />
</div></span>
C# Part using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.IO;
C#部分使用系统;使用System.Data;使用System.Configuration;使用System.Collections;使用System.Collections.Generic;使用System.Linq;使用System.Web;使用System.Web.Security;使用System.Web.UI;使用System.Web.UI.WebControls;使用System.Web.UI.WebControls.WebParts;使用System.Web.UI.HtmlControls;使用System.IO;
protected void Page_Load(object sender, EventArgs e)
{
}
/// <summary>
/// word to Html
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
protected void btnConvert_Click(object sender, EventArgs e)
{
try
{
//upload
//uploadWord(File1);
//convert
wordToHtml(File1);
}
catch (Exception ex)
{
throw ex;
}
finally
{
Response.Write("Convert successfully!");
}
}
//upload and convert to html wordToHtml(wordFilePath)
///<summary>
///upload and save as html
///</summary>
///<param name="wordFilePath">word doc file path on client machine</param>
///<returns>html save path</returns>
public string wordToHtml(System.Web.UI.HtmlControls.HtmlInputFile wordFilePath)
{
Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
Type wordType = word.GetType();
Microsoft.Office.Interop.Word.Documents docs = word.Documents;
// open doc file
Type docsType = docs.GetType();
//upload to server and parse as html
string filePath = uploadWord(wordFilePath);
//if upload is success
if (filePath == "0")
return "0";
//if file is word
if (filePath == "1")
return "1";
object fileName = filePath;
Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",
System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, true, true });
// convert and save
Type docType = doc.GetType();
string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +
System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();
// if directory exist, create if not
if (!Directory.Exists(Server.MapPath("~\\html")))
{
// create upload directory
Directory.CreateDirectory(Server.MapPath("~\\html"));
}
//html save location
string ConfigPath = HttpContext.Current.Server.MapPath("html/" + filename + ".html");
object saveFileName = ConfigPath;
/*For Microsoft Word 9 Object Library, if using 10, then use below:
* docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
* null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML});
* other formats:
* wdFormatHTML
* wdFormatDocument
* wdFormatDOSText
* wdFormatDOSTextLineBreaks
* wdFormatEncodedText
* wdFormatRTF
* wdFormatTemplate
* wdFormatText
* wdFormatTextLineBreaks
* wdFormatUnicodeText
*/
docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });
//close document
docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod,
null, doc, new object[] { null, null, null });
// exit Word
wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);
//goto converted html page
return ("/" + filename + ".html");
}
public string uploadWord(System.Web.UI.HtmlControls.HtmlInputFile uploadFiles)
{
if (uploadFiles.PostedFile != null)
{
string fileName = uploadFiles.PostedFile.FileName;
int extendNameIndex = fileName.LastIndexOf(".");
string extendName = fileName.Substring(extendNameIndex);
string newName = "";
try
{
//check if is word format
if (extendName == ".doc" || extendName == ".docx")
{
DateTime now = DateTime.Now;
newName = now.DayOfYear.ToString() + uploadFiles.PostedFile.ContentLength.ToString();
// check if directory exist, create one if not
if (!Directory.Exists(Server.MapPath("~\\wordTmp")))
{
// create upload directory
Directory.CreateDirectory(Server.MapPath("~\\wordTmp"));
}
//upload path, wordTemp relative to parrent
uploadFiles.PostedFile.SaveAs(System.Web.HttpContext.Current.Server.MapPath("wordTmp/" + newName + extendName));
}
else
{
return "1";
}
}
catch
{
return "0";
}
//return "http://" + HttpContext.Current.Request.Url.Host + HttpContext.Current.Request.ApplicationPath + "/wordTmp/" + newName + extendName;
return System.Web.HttpContext.Current.Server.MapPath("wordTmp/" + newName + extendName);
}
else
{
return "0";
}
}</span>
#2
0
This is simple trick just copy word table
& paste in dreamweaver
-> Design Mode. When you see in code mode you will get all html tags from there you can just copy that code & paste in c# file.
这是一个简单的技巧,只需在Dreamweaver中复制word表和粘贴 - >设计模式。当您在代码模式中看到时,您将从那里获得所有html标记,您只需将该代码复制并粘贴到c#文件中即可。
Enjoy.
#3
0
tanx for reply, i found a way to convert word tables to html.
i write this code:
坦克回复,我找到了一种方法将单词表转换为HTML。我写这段代码:
private static void ConvertTableToHTML()
{
try
{
foreach (Table tb in Common.WordApplication.ActiveDocument.Tables)
{
for (int r = 1; r <= tb.Rows.Count; r++)
{
for (int c = 1; c <= tb.Columns.Count; c++)
{
try
{
Cell cell = tb.Cell(r, c);
foreach (Paragraph paragraph in cell.Range.Paragraphs)
{
Tagging(paragraph.Range, "P");
}
Tagging(cell.Range, "TD");
}
catch (Exception e)
{
if (e.Message.Contains("The requested member of the collection does not exist."))
{
//Most likely a part of a merged cell, so skip over.
}
else throw;
}
}
try
{
Row row = tb.Rows[r];
Tagging(row.Range, "TR");
}
catch (Exception ex)
{
bool initialTrTagInserted = false;
int columnsIndex = 1;
int columnsCount = tb.Columns.Count;
while (!initialTrTagInserted && columnsIndex <= columnsCount)
{
try
{
Cell cell = tb.Cell(r, columnsIndex);
cell.Range.InsertBefore("<TR>");
initialTrTagInserted = true;
}
catch (Exception e)
{
}
columnsIndex++;
}
columnsIndex = tb.Columns.Count;
bool endTrTagInserted = false;
while (!endTrTagInserted && columnsIndex >= 1)
{
try
{
Cell cell = tb.Cell(r, columnsIndex);
cell.Range.InsertAfter("</TR>");
endTrTagInserted = true;
}
catch (Exception e)
{
}
columnsIndex--;
}
}
}
Common.Tagging2(tb.Range, "Table");
object separator = "";
object nestedTable = true;
tb.ConvertToText(separator, nestedTable);
}
}
catch (Exception ex) { MessageBox.Show(ex.Message); }
}
public static void Tagging(Range range, string TagName)
{
try
{
range.InsertBefore("<" + TagName + ">");
range.InsertAfter("</" + TagName + ">");
}
catch (Exception ex) { throw new Exception(ex.Message); }
}