1.开始
DocumentFormat.OpenXml是ms官方给一个操作office三大件新版文件格式(.xlsx,.docx,.pptx)的组件;特色是它定义了OpenXml所包含的所有对象(たぶん),能做到精确微调文件内容格式;因此它没有EppPlus那么容易上手,性能也很看使用者的水平。。
DocumentFormat.OpenXml的语法很接近直接操作xml,所以使用它来操作Excel,得先熟悉Excel的xml文档结构:
↑已经忘记从哪里找来的了; WorkbookPart包含4个重要子节点:
- WorkSheetPart:表格数据内容就在这里面,结构最复杂的部分,Workheet的子节点除了Colmns、SheetData还有合并单元格集合MergeCells(图中缺失);
- WorkSheet:存放表单id及命名(sheet1, Sheet2...),这里有excel的坑,如果包含多个Sheet直接Sheets.First()有可能获取到最后一张Sheet,最好根据Name来搜索;
- WorkbootStylePart:存放样式;
- SharedStringTablePart(上图中缺失):共享字符串集合,字符串默认会存在里面,然后Cell引用其数组下标,这也是类似保存1w行"一二三亖"的.xlsx比.txt小的原因
了解了这些想用DocumentFormat.OpenXml从零开始new一个样式还看得过去的Excel文档依然很麻烦(相信用EppPlus、NPOI也一样),而且OpenXml的同级节点还强调先后顺序,错了用excel打开就会报错,相信没人想这么做;正确的姿势应该是加载模板写入数据另存为,模板中把要用到的样式都定义好,然后用代码拷贝对应单元格的样式。
先定义一些扩展方法,不然用起来会很累:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Xml;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet; namespace EOpenXml
{
public static class OpenXmlExcelExtentions
{
public static Sheet GetSheet(this WorkbookPart workbookPart, string sheetName)
{
return workbookPart.Workbook
.GetFirstChild<Sheets>()
.Elements<Sheet>().Where(s => s.Name == sheetName).FirstOrDefault();
} /// <summary>
/// Given a worksheet and a row index, return the row.
/// </summary>
/// <param name="sheetData"></param>
/// <param name="rowIndex"></param>
/// <returns></returns>
public static Row GetRow(this SheetData sheetData, uint rowIndex)
{
return sheetData.
Elements<Row>().Where(r => r.RowIndex == rowIndex).FirstOrDefault();
}
public static Cell GetCell(this SheetData sheetData, string columnName, uint rowIndex)
{
Row row = GetRow(sheetData, rowIndex); if (row == null)
return null; return row.Elements<Cell>().Where(c => string.Compare
(c.CellReference.Value, columnName +
rowIndex, true) == ).FirstOrDefault();
} // https://msdn.microsoft.com/en-us/library/office/cc861607.aspx
// Given a column name, a row index, and a WorksheetPart, inserts a cell into the worksheet.
// If the cell already exists, returns it.
public static Cell GetOrCreateCell(this SheetData sheetData, string columnName, uint rowIndex)
{
string cellReference = columnName + rowIndex; // If the worksheet does not contain a row with the specified row index, insert one.
Row row;
if (sheetData.Elements<Row>().Where(r => r.RowIndex == rowIndex).Count() != )
{
row = sheetData.Elements<Row>().Where(r => r.RowIndex == rowIndex).First();
}
else
{
row = new Row() { RowIndex = rowIndex };
sheetData.Append(row);
} return row.GetOrCreateCell(cellReference);
}
public static Cell GetOrCreateCell(this Row row, string cellReference)
{
// If there is not a cell with the specified column name, insert one.
if (row.Elements<Cell>().Where(c => c?.CellReference?.Value == cellReference).Count() > )
{
return row.Elements<Cell>().Where(c => c.CellReference.Value == cellReference).First();
}
else
{
// Cells must be in sequential order according to CellReference. Determine where to insert the new cell.
Cell refCell = null;
foreach (Cell cell in row.Elements<Cell>())
{
if (cell.CellReference.Value.Length == cellReference.Length)
{
if (string.Compare(cell.CellReference.Value, cellReference, true) > )
{
refCell = cell;
break;
}
}
} Cell newCell = new Cell() { CellReference = cellReference };
row.InsertBefore(newCell, refCell);
return newCell;
}
} public static string GetValue(this Cell cell, SharedStringTablePart shareStringPart)
{
if (cell == null)
return null;
string cellvalue = cell.InnerText;
if (cell.DataType != null)
{
if (cell.DataType == CellValues.SharedString)
{
int id = -;
if (Int32.TryParse(cellvalue, out id))
{
SharedStringItem item = GetItem(shareStringPart, id);
if (item.Text != null)
{
//code to take the string value
cellvalue = item.Text.Text;
}
else if (item.InnerText != null)
{
cellvalue = item.InnerText;
}
else if (item.InnerXml != null)
{
cellvalue = item.InnerXml;
}
}
}
}
return cellvalue;
}
public static string GetValue(this Cell cell, string[] shareStringPartValues)
{
if (cell == null)
return null;
string cellvalue = cell.InnerText;
if (cell.DataType != null)
{
if (cell.DataType == CellValues.SharedString)
{
int id = -;
if (Int32.TryParse(cellvalue, out id))
{
cellvalue = shareStringPartValues[id];
}
}
}
return cellvalue;
} public static Cell SetValue(this Cell cell, object value = null, SharedStringTablePart shareStringPart = null, int shareStringItemIndex = -, uint styleIndex = )
{
if (value == null)
{
cell.CellValue = new CellValue();
if (shareStringItemIndex != -)
{
cell.CellValue = new CellValue(shareStringItemIndex.ToString());
cell.DataType = new EnumValue<CellValues>(CellValues.SharedString);
}
}
else if (value is string str)
{
if (shareStringPart == null)
{
cell.CellValue = new CellValue(str);
cell.DataType = new EnumValue<CellValues>(CellValues.String);
}
else
{
// Insert the text into the SharedStringTablePart.
int index = shareStringPart.GetOrInsertItem(str, false);
// Set the value of cell
cell.CellValue = new CellValue(index.ToString());
cell.DataType = new EnumValue<CellValues>(CellValues.SharedString);
}
}
else if (value is int || value is short || value is long ||
value is float || value is double || value is uint ||
value is ulong || value is ushort || value is decimal)
{
cell.CellValue = new CellValue(value.ToString());
cell.DataType = new EnumValue<CellValues>(CellValues.Number);
}
else if (value is DateTime date)
{
cell.CellValue = new CellValue(date.ToString("yyyy-MM-dd")); // ISO 861
cell.DataType = new EnumValue<CellValues>(CellValues.Date);
}
else if (value is XmlDocument xd)
{
if (shareStringPart == null)
{
throw new Exception("Param [shareStringPart] can't be null when value type is XmlDocument.");
}
else
{
int index = shareStringPart.GetOrInsertItem(xd.OuterXml, true);
// Set the value of cell
cell.CellValue = new CellValue(index.ToString());
cell.DataType = new EnumValue<CellValues>(CellValues.SharedString);
}
} if (styleIndex != )
cell.StyleIndex = styleIndex; return cell;
} // https://msdn.microsoft.com/en-us/library/office/gg278314.aspx
// Given text and a SharedStringTablePart, creates a SharedStringItem with the specified text
// and inserts it into the SharedStringTablePart. If the item already exists, returns its index.
public static int GetOrInsertItem(this SharedStringTablePart shareStringPart, string content, bool isXml)
{
// If the part does not contain a SharedStringTable, create one.
if (shareStringPart.SharedStringTable == null)
{
shareStringPart.SharedStringTable = new SharedStringTable();
} int i = ; // Iterate through all the items in the SharedStringTable. If the text already exists, return its index.
foreach (SharedStringItem item in shareStringPart.SharedStringTable.Elements<SharedStringItem>())
{
if ((!isXml && item.InnerText == content) || (isXml && item.OuterXml == content))
{
return i;
} i++;
} // The text does not exist in the part. Create the SharedStringItem and return its index.
if (isXml)
shareStringPart.SharedStringTable.AppendChild(new SharedStringItem(content));
else
shareStringPart.SharedStringTable.AppendChild(new SharedStringItem(new Text(content)));
shareStringPart.SharedStringTable.Save(); return i;
}
private static SharedStringItem GetItem(this SharedStringTablePart shareStringPart, int id)
{
return shareStringPart.SharedStringTable.Elements<SharedStringItem>().ElementAt(id);
} /// <summary>
/// https://docs.microsoft.com/en-us/office/open-xml/how-to-merge-two-adjacent-cells-in-a-spreadsheet
/// </summary>
/// <param name="worksheet"></param>
/// <returns></returns>
public static MergeCells GetOrCreateMergeCells(this Worksheet worksheet)
{
MergeCells mergeCells;
if (worksheet.Elements<MergeCells>().Count() > )
{
mergeCells = worksheet.Elements<MergeCells>().First();
}
else
{
mergeCells = new MergeCells(); // Insert a MergeCells object into the specified position.
if (worksheet.Elements<CustomSheetView>().Count() > )
{
worksheet.InsertAfter(mergeCells, worksheet.Elements<CustomSheetView>().First());
}
else if (worksheet.Elements<DataConsolidate>().Count() > )
{
worksheet.InsertAfter(mergeCells, worksheet.Elements<DataConsolidate>().First());
}
else if (worksheet.Elements<SortState>().Count() > )
{
worksheet.InsertAfter(mergeCells, worksheet.Elements<SortState>().First());
}
else if (worksheet.Elements<AutoFilter>().Count() > )
{
worksheet.InsertAfter(mergeCells, worksheet.Elements<AutoFilter>().First());
}
else if (worksheet.Elements<Scenarios>().Count() > )
{
worksheet.InsertAfter(mergeCells, worksheet.Elements<Scenarios>().First());
}
else if (worksheet.Elements<ProtectedRanges>().Count() > )
{
worksheet.InsertAfter(mergeCells, worksheet.Elements<ProtectedRanges>().First());
}
else if (worksheet.Elements<SheetProtection>().Count() > )
{
worksheet.InsertAfter(mergeCells, worksheet.Elements<SheetProtection>().First());
}
else if (worksheet.Elements<SheetCalculationProperties>().Count() > )
{
worksheet.InsertAfter(mergeCells, worksheet.Elements<SheetCalculationProperties>().First());
}
else
{
worksheet.InsertAfter(mergeCells, worksheet.Elements<SheetData>().First());
}
worksheet.Save();
}
return mergeCells;
} /// <summary>
/// Given the names of two adjacent cells, merges the two cells.
/// Create the merged cell and append it to the MergeCells collection.
/// When two cells are merged, only the content from one cell is preserved:
/// the upper-left cell for left-to-right languages or the upper-right cell for right-to-left languages.
/// </summary>
/// <param name="mergeCells"></param>
/// <param name="cell1Name"></param>
/// <param name="cell2Name"></param>
public static void MergeTwoCells(this MergeCells mergeCells, string cell1Name, string cell2Name)
{
MergeCell mergeCell = new MergeCell() { Reference = new StringValue(cell1Name + ":" + cell2Name) };
mergeCells.Append(mergeCell);
} public static IEnumerable<string> GetItemValues(this SharedStringTablePart shareStringPart)
{
foreach (var item in shareStringPart.SharedStringTable.Elements<SharedStringItem>())
{
if (item.Text != null)
{
//code to take the string value
yield return item.Text.Text;
}
else if (item.InnerText != null)
{
yield return item.InnerText;
}
else if (item.InnerXml != null)
{
yield return item.InnerXml;
}
else
{
yield return null;
}
};
}
public static XmlDocument GetCellAssociatedSharedStringItemXmlDocument(this SheetData sheetData, string columnName, uint rowIndex, SharedStringTablePart shareStringPart)
{
Cell cell = GetCell(sheetData, columnName, rowIndex);
if (cell == null)
return null;
if (cell.DataType == CellValues.SharedString)
{
int id = -;
if (Int32.TryParse(cell.InnerText, out id))
{
SharedStringItem ssi = shareStringPart.GetItem(id);
var doc = new XmlDocument();
doc.LoadXml(ssi.OuterXml);
return doc;
}
}
return null;
}
}
}
2.插入数据
private static void GenerateExcel()
{
using (MemoryStream mem = new MemoryStream())
{
using (var temp = File.OpenRead(@"E:\template.xlsx"))
{
temp.CopyTo(mem);
} using (SpreadsheetDocument doc = SpreadsheetDocument.Open(mem, true))
{
WorkbookPart wbPart = doc.WorkbookPart;
Worksheet worksheet = wbPart.WorksheetParts.First().Worksheet;
//statement to get the sheetdata which contains the rows and cell in table
SheetData sheetData = worksheet.GetFirstChild<SheetData>(); SharedStringTablePart shareStringPart;
if (wbPart.GetPartsOfType<SharedStringTablePart>().Any())
shareStringPart = wbPart.GetPartsOfType<SharedStringTablePart>().First();
else
shareStringPart = wbPart.AddNewPart<SharedStringTablePart>();
//假设模板第一行是Title不用动,把要用到的样式都定义在了第二行的单元格里
var secondRow = sheetData.GetRow();
uint[] lineStyles = secondRow.Elements<Cell>().Select(c => c.StyleIndex.Value).ToArray();
sheetData.RemoveChild(secondRow);
//从第二行开始循环插入4列1000数据
uint currentRowIndex = ;
for (int i = ;i<;i++)
{
Row row = new Row();
row.RowIndex = currentRowIndex;//设置行号
row.AppendChild(new Cell().SetValue(, shareStringPart, styleIndex: lineStyles[]));
row.AppendChild(new Cell().SetValue(DateTime.Now, shareStringPart, styleIndex: lineStyles[]));
row.AppendChild(new Cell().SetValue(3.1415926535, shareStringPart, styleIndex: lineStyles[]));
row.AppendChild(new Cell().SetValue("通商宽衣", shareStringPart, styleIndex: lineStyles[]));//这里慢
sheetData.AppendChild(row);
currentRowIndex++;
}
wbPart.Workbook.Save();
doc.SaveAs($@"E:\Temp_{DateTime.Now.ToString("yyMMddHHmm")}.xlsx");
doc.Close();
}
mem.Close();
}
}
以上就生成了一个Excel打开不会报任何格式错误提示的标准.xlsx文件;但有需要优化的地方:在每次插入字符串的时候会去循环共享字符集,调用shareStringPart.GetItemValues().ToArray()可以将集合全部存到数组或Dictionary<string,int>里面会快很多,如果清楚集合内容就不用去判断重复了,当然也可以简单粗暴的保存为CellValues.InlineString,这样在重复字符串比较多的时候两种方式所生成的文件大小会有很大差异。
3.快速遍历
public static void Read()
{
using (var sd = SpreadsheetDocument.Open(@"E:\temp.xlsx", false))
{
WorkbookPart wbPart = sd.WorkbookPart;
SharedStringTablePart shareStringPart;
if (wbPart.GetPartsOfType<SharedStringTablePart>().Count() > )
shareStringPart = wbPart.GetPartsOfType<SharedStringTablePart>().First();
else
shareStringPart = wbPart.AddNewPart<SharedStringTablePart>();
string[] shareStringItemValues = shareStringPart.GetItemValues().ToArray(); WorksheetPart worksheetPart = wbPart.WorksheetParts.First();
uint dataRowStart = ;//数据开始行
OpenXmlReader reader = OpenXmlReader.Create(worksheetPart);
while (reader.Read())
{
if (reader.ElementType == typeof(Worksheet))
{
reader.ReadFirstChild();
} if (reader.ElementType == typeof(Row))
{
Row r = (Row)reader.LoadCurrentElement();
if (r.RowIndex < dataRowStart)
continue;
foreach (Cell c in r.Elements<Cell>())
{
if (c.CellReference != null && c.CellReference.HasValue)
{
string cv = c.GetValue(shareStringItemValues);
Console.WriteLine(cv);
if (c.CellReference.Value == "B" + r.RowIndex)
Console.WriteLine("刚读取的是B列");
}
}
}
}
sd.Close();
}
}
4.总结
DocumentFormat.OpenXml不友好但操作透明,使用前最好先自行封装下,习惯之后相信能用的很爽。
以上です。