这是开博客第一篇分享,记录代码的文字
C#使用Aspose.Word读取word文档里的文本域
using Aspose.Words;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Text.RegularExpressions;
namespace ContractDocumentMaster {
public class Program {
public static string DocPath = @"E:\work_api\";
public static string DocName = "contractTemplate.doc";
/// <summary>
/// 读取Word文档,提前文档里的输入域。从«工程名称» 中查找工程名称
/// 使用破解版Aspose.Word,版本6.5.0.0
/// </summary>
public static void Main(string[] args) {
Console.WriteLine("Program running...wait...\n\n");
//PrintAllAsposeWordNodeType();
Run();
//TestRegExp();
Console.WriteLine("\n\nProgram Finished. Press any key to exit....");
Console.ReadKey();
}
/// <summary>
/// 测试正则表达式,多行模式
/// </summary>
public static void TestRegExp() {
var str = "«工程名称»---------«工程__名称»";
var reg = new Regex(@"«([^\s»]*)»", RegexOptions.Multiline);
var match = reg.Match(str);
while(match.Success) {
Console.WriteLine("Match : " + match.Groups[1].Value);
match = match.NextMatch();
}
}
/// <summary>
/// 主函数
/// </summary>
public static void Run() {
Aspose.Words.Document doc = new Aspose.Words.Document(DocPath + DocName);
var nodeTypeCollection = new Dictionary<string, int>();
var mergedFieldCollection = new Dictionary<string, int>();
var emptyLine = 0;
//var section = (Section)doc.ChildNodes[0];
//var body = section.Body;
var body = doc.FirstSection.Body;
var c = 0;
foreach(var node in body.ChildNodes) {
var nt = node.ToString().Trim();
if(!nodeTypeCollection.Keys.Contains(nt)) {
nodeTypeCollection.Add(nt, 1);
} else {
var i = nodeTypeCollection[nt]+1;
nodeTypeCollection[nt] = i;
}
if(c++ > 30) break;
var showNodeType = true;
var value = string.Empty;
if(node is Paragraph) {
var pg = (Paragraph)node;
if(pg.Runs != null && pg.Runs.Count > 0) {
value = pg.Runs[0].Text.Trim();
//var s = ((Section)node).GetAncestor(NodeType.Section);
//HightLinePrint(s.GetText());
} else {
showNodeType = false;
emptyLine++;
}
} else if(node is Aspose.Words.Tables.Table) {
var table = (Aspose.Words.Tables.Table)node;
//foreach(Aspose.Words.Tables.Row row in table.Rows) {
// foreach(Aspose.Words.Tables.Cell cell in row.Cells) {
// sCellValue = cell.Paragraphs[0].Runs[0].Text;
// listWord.Add(sCellValue);
// }
//}
}
CollectMergeField((Aspose.Words.Node)node, mergedFieldCollection);
if(showNodeType) {
Console.WriteLine(nt.Replace("Aspose.Words.",""));
Console.WriteLine("\t\t" + value);
}
}
Console.WriteLine("\n\n");
Console.WriteLine("Total Empty Line :" + emptyLine);
Console.WriteLine("Total Node Type :" + nodeTypeCollection.Count);
var ntc = 1;
foreach(var key in nodeTypeCollection.Keys){
Console.WriteLine(ntc++ +" : "+ key + " ==> "+ nodeTypeCollection[key]);
}
Console.WriteLine("\n\n");
Console.WriteLine("Total Field Node Type :" + mergedFieldCollection.Count);
ntc = 1;
foreach(var key in mergedFieldCollection.Keys) {
Console.WriteLine(ntc++ + " : " + key + " ==> " + mergedFieldCollection[key]);
}
}
/// <summary>
/// 收集所有输入域
/// </summary>
public static void CollectMergeField(Node node, Dictionary<string, int> dic){
var reg = new Regex(@"«([^\s»]*)»", RegexOptions.Multiline);
if(node is Paragraph) {
var pg = (Paragraph)node;
var txt = pg.GetText();
var match = reg.Match(txt);
while(match.Success) {
var mergedFieldName = match.Groups[1].Value.ToString();
if(!dic.Keys.Contains(mergedFieldName)) {
dic.Add(mergedFieldName, 1);
} else {
dic[mergedFieldName] = dic[mergedFieldName] + 1;
}
match = match.NextMatch();
}
}
}
public static void HightLinePrint(object o) {
Console.WriteLine("########\t\t" + o.ToString());
}
/// <summary>
/// 打印Aspose.Word.NodeType 所有分类
/// </summary>
public static void PrintAllAsposeWordNodeType() {
foreach(var node in Enum.GetValues(typeof(NodeType))) {
Console.WriteLine(node);
}
}
}
}