在Swift中使用libxml2

时间:2021-02-08 20:15:55
//
//  main.swift
//  C150805_libxml2r2
//  http://git.oschina.net/yao_yu/Swift2015/tree/master/C150805_libxml2r2?dir=1&filepath=C150805_libxml2r2&oid=f80a7498226526b991e7913298c15cd38480aea5&sha=c073af33d0534a10098bb8fcc0706c2fd489dc3f
//
//  Created by yao_yu on 15/8/5.
//  Copyright © 2015年 yao_yu. All rights reserved.
//

import Foundation

/* ---------- 扩展 ---------- */

extension NSString{
    convenience init?(urlString:String, encoding:NSStringEncoding) {
        let url = NSURL(string: urlString)
        do {
            try self.init(contentsOfURL: url!, encoding: encoding)
        } catch {}
    }
}

extension String {
     init?(XMLChar char: UnsafePointer<xmlChar>){
        self.init()
        if char != nil {
            self = String.fromCString(UnsafePointer<CChar>(char))!
        }
    }
}

/* ---------- XML节点 ---------- */

class XMLNode {
    var xmlDoc:xmlDocPtr = nil
    var xmlNode:xmlNodePtr = nil
    
    init(node:xmlNodePtr, document:xmlDocPtr) {
        self.xmlNode = node
        self.xmlDoc = document
    }
    
    convenience init(document:xmlDocPtr) {
        self.init(node:xmlDocGetRootElement(document), document:document)
    }
    
    lazy var rawContent:String? = {
       return XMLNodeGetContent(self.xmlNode)
//        return XMLNodeGetString(self.xmlDoc, xmlNode: self.xmlNode)
    }()
    
    lazy var children:[XMLNode] = {
        return self.xmlNodes2XMLNodes(XMLNodeGetChildren(self.xmlNode))
        }()
    
    lazy var attributes: [String: String] = {
        return XMLNodeGetAttributes(self.xmlNode)
    }()
    
    subscript(key:String) -> String? {
        return attributes[key]
    }
    
    private func xmlNodes2XMLNodes(nodes:[xmlNodePtr]) -> [XMLNode] {
        var xmlNodes = [XMLNode]()
        for node in nodes{
            xmlNodes.append(XMLNode(node: node, document: xmlDoc))
        }
        return xmlNodes
        
        //下面的代码引发:Command failed due to signal: Abort trap: 6
        //return nodes.map{[unowned self] in XMLNode(node:$0, document:self.xmlDoc)}
    }
}

extension XMLNode {
    func xPath(xpath: String) -> [XMLNode] {
        return xmlNodes2XMLNodes(XMLFindXPath(self.xmlDoc, xPath: xpath))
    }
}

/* ---------- libxml2读取工具函数 ---------- */

func XMLNodeGetString(doc:xmlDocPtr, xmlNode:xmlNodePtr) -> String? {
    let contentChars = xmlNodeListGetString(doc, xmlNode, 1)
    if contentChars == nil { return nil }
    let contentString = String(XMLChar: contentChars)
    free(contentChars)
    assert(contentString != nil, "XMLNodeGetString: 值转换不成功")
    return contentString
}

func XMLNodeGetContent(xmlNode:xmlNodePtr) -> String? {
    let contentChars = xmlNodeGetContent(xmlNode)
    if contentChars == nil { return nil }
    let contentString = String(XMLChar: contentChars)
    free(contentChars)
    assert(contentString != nil, "XMLNodeGetContent: 值转换不成功")
    return contentString
}

func XMLNodeGetChildren(xmlNode: xmlNodePtr) -> [xmlNodePtr] {
    var children = [xmlNodePtr]()
    
    for var childNodePointer = xmlNode.memory.children;
        childNodePointer != nil;
        childNodePointer = childNodePointer.memory.next
    {
        if xmlNodeIsText(childNodePointer) == 0 {
            children.append(childNodePointer)
        }
    }
    
    return children
}

func XMLNodeGetAttributes(xmlNode: xmlNodePtr) -> [String: String] {
    var result:[String: String] = [String: String]()
    for var attribute: xmlAttrPtr = xmlNode.memory.properties;
        attribute != nil;
        attribute = attribute.memory.next
    {
        if let key:String = String(XMLChar: attribute.memory.name) {
            if let value:String = XMLNodeGetContent(attribute.memory.children) {
                result[key] = value
            } else {
                result[key] = ""
            }
        } else {
            print((">>>>>>>>>>>>>>>>>>>>>>>>错误:", String(XMLChar: attribute.memory.name)))
        }
    }
    return result
}

func XMLNodeGetAttribute(xmlNode: xmlNodePtr, key: String) -> String? {
    for var attribute: xmlAttrPtr = xmlNode.memory.properties;
        attribute != nil;
        attribute = attribute.memory.next
    {
        if key == String(XMLChar: attribute.memory.name) {
            return XMLNodeGetContent(attribute.memory.children)
        }
    }
    return nil
}

func XMLFindXPath(xmlDoc:xmlDocPtr, xPath: String) -> [xmlNodePtr] {
    let xPathContext = xmlXPathNewContext(xmlDoc)
    if xPathContext == nil {
        return []
    }
    
    xPathContext.memory.node = nil
    
    let xPathObject = xmlXPathEvalExpression(UnsafePointer<xmlChar>(xPath.cStringUsingEncoding(NSUTF8StringEncoding)!), xPathContext)
    xmlXPathFreeContext(xPathContext)
    if xPathObject == nil {
        return []
    }
    
    let nodeSet = xPathObject.memory.nodesetval
    if nodeSet == nil || nodeSet.memory.nodeNr == 0 || nodeSet.memory.nodeTab == nil {
        xmlXPathFreeObject(xPathObject)
        return []
    }
    
    var resultNodes = [xmlNodePtr]()
    for i in 0 ..< Int(nodeSet.memory.nodeNr) {
        resultNodes.append(nodeSet.memory.nodeTab[i])
    }
    
    xmlXPathFreeObject(xPathObject)
    
    return resultNodes
}

func XMLReadNSData(data:NSData?, encoding:NSStringEncoding = NSUTF8StringEncoding, isXML:Bool = false) -> xmlDocPtr?  {
    if let data = data {
        let cBuffer = UnsafePointer<CChar>(data.bytes)
        let cSize = CInt(data.length)
        //
//        let cfEncoding = CFStringConvertNSStringEncodingToEncoding(encoding)
//        let cfEncodingAsString:CFStringRef = CFStringConvertEncodingToIANACharSetName(cfEncoding)
//        let cEncoding:UnsafePointer<CChar> = CFStringGetCStringPtr(cfEncodingAsString, CFStringEncoding(0))

        if isXML {
            let options = CInt(XML_PARSE_RECOVER.rawValue)
            return xmlReadMemory(cBuffer, cSize, nil, nil, options)
        } else {
            let options = CInt(HTML_PARSE_RECOVER.rawValue | HTML_PARSE_NOWARNING.rawValue | HTML_PARSE_NOERROR.rawValue)
            return htmlReadMemory(cBuffer, cSize, nil, nil, options)
        }
    }
    return nil
}

let GB18030_2000_Encoding = CFStringConvertEncodingToNSStringEncoding(CFStringEncoding(CFStringEncodings.GB_18030_2000.rawValue))

/* ---------- 测试代码 ---------- */

class CElapseTime {
    var startTime:NSDate
    var prompt:String
    var unsed:Bool = false
    
    init(prompt:String) {
        self.startTime = NSDate()
        self.prompt = prompt
    }
    
    var newprompt:String {
        return "\(prompt)耗时:\(NSDate().timeIntervalSinceDate(startTime))"
    }
}

func testParseSina() {
    
    var sURL:String
    var encoding:UInt
    
    (sURL,encoding) = ("http://www.baidu.com", NSUTF8StringEncoding)
    print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\(sURL)")

    var timer = CElapseTime(prompt: "读取网页")
    //let sContent = NSString(urlString:sURL, encoding: encoding)
    var sContent:NSString? = nil
    do{
        try sContent = NSString(contentsOfFile: "/Volumes/Data/Document/Test/sample.txt", encoding: NSUTF8StringEncoding)
    } catch {
        
    }
    print(timer.newprompt)
    let sTimer1 = timer.newprompt
    timer = CElapseTime(prompt: "数据解析")
    
    if let doc = XMLReadNSData(sContent?.dataUsingEncoding(NSUTF8StringEncoding)){
        let rootNode = XMLNode(document: doc)
        let findNodes = rootNode.xPath("//div")
        for childNode in findNodes {
            autoreleasepool{
                let _ = (childNode.attributes, childNode.rawContent)
            }
//            if let content = childNode.rawContent {
//                print(content)
//            }
        }
        print(findNodes.count)
    }
    print(sTimer1)
    print(timer.newprompt)
}

testParseSina()