VBA 格式化输出XML(UTF-8无BOM编码)

时间:2023-01-06 13:21:20

VBA可以使用MSXML2.Document来创建XML Dom树并输出到文件,先看个简单的例子:

Function CreateXml(xmlFile As String)
Dim xDoc As Object
Dim rootNode As Object
Dim header As Object
Dim newNode As Object
Dim tNode As Object

Set xDoc = CreateObject("MSXML2.DOMDocument")
Set rootNode = xDoc.createElement("BookList")
Set xDoc.DocumentElement = rootNode
'xDoc.Load xmlFile
Set header = xDoc.createProcessingInstruction("xml", "version='1.0' encoding='Unicode'")
xDoc.InsertBefore header, xDoc.ChildNodes(0)

Set newNode = xDoc.createElement("book")
Set tNode = xDoc.DocumentElement.appendChild(newNode)
tNode.setAttribute "type", "program"

Set newNode = xDoc.createElement("name")
Set tNode = xDoc.DocumentElement.ChildNodes.Item(0).appendChild(newNode)
tNode.appendChild (xDoc.createTextNode("Thinking in Java"))

Set newNode = xDoc.createElement("author")
Set tNode = xDoc.DocumentElement.ChildNodes.Item(0).appendChild(newNode)
tNode.appendChild (xDoc.createTextNode("Bruce Eckel"))

Set newNode = xDoc.createElement("book")
Set tNode = xDoc.DocumentElement.appendChild(newNode)
tNode.setAttribute "type", "literature"

Set newNode = xDoc.createElement("name")
Set tNode = xDoc.DocumentElement.ChildNodes.Item(1).appendChild(newNode)
tNode.appendChild (xDoc.createTextNode("边城"))

Set newNode = xDoc.createElement("author")
Set tNode = xDoc.DocumentElement.ChildNodes.Item(1).appendChild(newNode)
tNode.appendChild (xDoc.createTextNode("沈从文"))

Set newNode = Nothing
Set tNode = Nothing

xDoc.save xmlFile

End Function

在宏工程中调用一下这个函数工程,就可以生成一个xml文件,但是生成的xml文件所有内容都显示在一行上了,有没有方法进行换行及缩进,让xml文件看起来更整齐美观呢?方法是有的,借助Msxml2.SAXXMLReader和Msxml2.MXXMLWriter就可以实现这个效果,看代码:

'格式化xml,带换行缩进
Function PrettyPrintXml(xmldoc) As String
Dim reader As Object
Dim writer As Object
Set reader = CreateObject("Msxml2.SAXXMLReader.6.0")
Set writer = CreateObject("Msxml2.MXXMLWriter.6.0")
writer.indent = True
writer.omitXMLDeclaration = True
reader.contentHandler = writer
reader.Parse (xmldoc)
PrettyPrintXml = writer.Output

End Function

然后将前面的xDoc.save xmlFile改一下:

'xDoc.save xmlFile
Dim xmlStr As String
xmlStr = PrettyPrintXml(xDoc)
WriteUtf8WithoutBom xmlFile, xmlStr
Open xmlFile For Output As #1
Print #1, xmlStr
Close #1

这样就可以格式化输出xml文件了。还有一个问题,我们想要指定xml文件的编码格式,如UTF-8,GB2312等,我通常习惯保存成UTF-8格式,那么该如何设置呢?查找资料,可以用ADODB.stream来搞。

Function WriteWithUtf8(filename As String, content As String)
Dim stream As New ADODB.stream
stream.Open
stream.Type = adTypeText
stream.Charset = "utf-8"
stream.WriteText content
stream.SaveToFile filename, adSaveCreateOverWrite

stream.Flush
stream.Close

End Function

细心点的话会发现用上面的方法实际上输出的文件格式是带BOM的UTF-8,它跟UTF-8无BOM的区别在哪呢?用UltraEdit工具来看十六进制码,会发现前者在开头多了三个字节:0xEF,0xBB,0xBF,想保存成UTF-8无BOM,把这三个字节去掉不就行了,实现如下:

' utf8无BOM编码格式
Function WriteUtf8WithoutBom(filename As String, content As String)
Dim stream As New ADODB.stream
stream.Open
stream.Type = adTypeText
stream.Charset = "utf-8"
stream.WriteText "<?xml version=" & Chr(34) & "1.0" & Chr(34) & _
" encoding=" & Chr(34) & "UTF-8" & Chr(34) & "?>" & vbCrLf
stream.WriteText content

'移除前三个字节(0xEF,0xBB,0xBF)
stream.Position = 3

Dim newStream As New ADODB.stream
newStream.Type = adTypeBinary
newStream.Mode = adModeReadWrite
newStream.Open

stream.CopyTo newStream
stream.Flush
stream.Close

newStream.SaveToFile filename, adSaveCreateOverWrite
newStream.Flush
newStream.Close

End Function

注意需要引用两个库:Microsoft ADO Ext. 6.0 for DDL and Security,Microsoft ActiveX Data Objects 2.7 Library

最后附上完整代码:

Sub 按钮2_Click()
Dim xmlFile As String
xmlFile = "D:\test\books.xml"
CreateXml xmlFile
End Sub

Function CreateXml(xmlFile As String)
Dim xDoc As Object
Dim rootNode As Object
Dim header As Object
Dim newNode As Object
Dim tNode As Object

Set xDoc = CreateObject("MSXML2.DOMDocument")
Set rootNode = xDoc.createElement("BookList")
Set xDoc.DocumentElement = rootNode
'xDoc.Load xmlFile
Set header = xDoc.createProcessingInstruction("xml", "version='1.0' encoding='Unicode'")
xDoc.InsertBefore header, xDoc.ChildNodes(0)

Set newNode = xDoc.createElement("book")
Set tNode = xDoc.DocumentElement.appendChild(newNode)
tNode.setAttribute "type", "program"

Set newNode = xDoc.createElement("name")
Set tNode = xDoc.DocumentElement.ChildNodes.Item(0).appendChild(newNode)
tNode.appendChild (xDoc.createTextNode("Thinking in Java"))

Set newNode = xDoc.createElement("author")
Set tNode = xDoc.DocumentElement.ChildNodes.Item(0).appendChild(newNode)
tNode.appendChild (xDoc.createTextNode("Bruce Eckel"))

Set newNode = xDoc.createElement("book")
Set tNode = xDoc.DocumentElement.appendChild(newNode)
tNode.setAttribute "type", "literature"

Set newNode = xDoc.createElement("name")
Set tNode = xDoc.DocumentElement.ChildNodes.Item(1).appendChild(newNode)
tNode.appendChild (xDoc.createTextNode("边城"))

Set newNode = xDoc.createElement("author")
Set tNode = xDoc.DocumentElement.ChildNodes.Item(1).appendChild(newNode)
tNode.appendChild (xDoc.createTextNode("沈从文"))

Set newNode = Nothing
Set tNode = Nothing

Dim xmlStr As String
xmlStr = PrettyPrintXml(xDoc)
WriteUtf8WithoutBom xmlFile, xmlStr

Set rootNode = Nothing
Set xDoc = Nothing

MsgBox xmlFile & "输出完成"

End Function

'格式化xml,带换行缩进
Function PrettyPrintXml(xmldoc) As String
Dim reader As Object
Dim writer As Object
Set reader = CreateObject("Msxml2.SAXXMLReader.6.0")
Set writer = CreateObject("Msxml2.MXXMLWriter.6.0")
writer.indent = True
writer.omitXMLDeclaration = True
reader.contentHandler = writer
reader.Parse (xmldoc)
PrettyPrintXml = writer.Output
End Function

' utf8无BOM编码格式
Function WriteUtf8WithoutBom(filename As String, content As String)
Dim stream As New ADODB.stream
stream.Open
stream.Type = adTypeText
stream.Charset = "utf-8"
stream.WriteText "<?xml version=" & Chr(34) & "1.0" & Chr(34) & _
" encoding=" & Chr(34) & "UTF-8" & Chr(34) & "?>" & vbCrLf
stream.WriteText content

'移除前三个字节(0xEF,0xBB,0xBF)
stream.Position = 3

Dim newStream As New ADODB.stream
newStream.Type = adTypeBinary
newStream.Mode = adModeReadWrite
newStream.Open

stream.CopyTo newStream
stream.Flush
stream.Close

newStream.SaveToFile filename, adSaveCreateOverWrite
newStream.Flush
newStream.Close
End Function