// // main.swift // C150805_libxml2r2 // http://git.oschina.net/yao_yu/Swift2015/tree/master/C150805_libxml2r2?dir=1&filepath=C150805_libxml2r2&oid=f80a7498226526b991e7913298c15cd38480aea5&sha=c073af33d0534a10098bb8fcc0706c2fd489dc3f // // Created by yao_yu on 15/8/5. // Copyright © 2015年 yao_yu. All rights reserved. // import Foundation /* ---------- 扩展 ---------- */ extension NSString{ convenience init?(urlString:String, encoding:NSStringEncoding) { let url = NSURL(string: urlString) do { try self.init(contentsOfURL: url!, encoding: encoding) } catch {} } } extension String { init?(XMLChar char: UnsafePointer<xmlChar>){ self.init() if char != nil { self = String.fromCString(UnsafePointer<CChar>(char))! } } } /* ---------- XML节点 ---------- */ class XMLNode { var xmlDoc:xmlDocPtr = nil var xmlNode:xmlNodePtr = nil init(node:xmlNodePtr, document:xmlDocPtr) { self.xmlNode = node self.xmlDoc = document } convenience init(document:xmlDocPtr) { self.init(node:xmlDocGetRootElement(document), document:document) } lazy var rawContent:String? = { return XMLNodeGetContent(self.xmlNode) // return XMLNodeGetString(self.xmlDoc, xmlNode: self.xmlNode) }() lazy var children:[XMLNode] = { return self.xmlNodes2XMLNodes(XMLNodeGetChildren(self.xmlNode)) }() lazy var attributes: [String: String] = { return XMLNodeGetAttributes(self.xmlNode) }() subscript(key:String) -> String? { return attributes[key] } private func xmlNodes2XMLNodes(nodes:[xmlNodePtr]) -> [XMLNode] { var xmlNodes = [XMLNode]() for node in nodes{ xmlNodes.append(XMLNode(node: node, document: xmlDoc)) } return xmlNodes //下面的代码引发:Command failed due to signal: Abort trap: 6 //return nodes.map{[unowned self] in XMLNode(node:$0, document:self.xmlDoc)} } } extension XMLNode { func xPath(xpath: String) -> [XMLNode] { return xmlNodes2XMLNodes(XMLFindXPath(self.xmlDoc, xPath: xpath)) } } /* ---------- libxml2读取工具函数 ---------- */ func XMLNodeGetString(doc:xmlDocPtr, xmlNode:xmlNodePtr) -> String? { let contentChars = xmlNodeListGetString(doc, xmlNode, 1) if contentChars == nil { return nil } let contentString = String(XMLChar: contentChars) free(contentChars) assert(contentString != nil, "XMLNodeGetString: 值转换不成功") return contentString } func XMLNodeGetContent(xmlNode:xmlNodePtr) -> String? { let contentChars = xmlNodeGetContent(xmlNode) if contentChars == nil { return nil } let contentString = String(XMLChar: contentChars) free(contentChars) assert(contentString != nil, "XMLNodeGetContent: 值转换不成功") return contentString } func XMLNodeGetChildren(xmlNode: xmlNodePtr) -> [xmlNodePtr] { var children = [xmlNodePtr]() for var childNodePointer = xmlNode.memory.children; childNodePointer != nil; childNodePointer = childNodePointer.memory.next { if xmlNodeIsText(childNodePointer) == 0 { children.append(childNodePointer) } } return children } func XMLNodeGetAttributes(xmlNode: xmlNodePtr) -> [String: String] { var result:[String: String] = [String: String]() for var attribute: xmlAttrPtr = xmlNode.memory.properties; attribute != nil; attribute = attribute.memory.next { if let key:String = String(XMLChar: attribute.memory.name) { if let value:String = XMLNodeGetContent(attribute.memory.children) { result[key] = value } else { result[key] = "" } } else { print((">>>>>>>>>>>>>>>>>>>>>>>>错误:", String(XMLChar: attribute.memory.name))) } } return result } func XMLNodeGetAttribute(xmlNode: xmlNodePtr, key: String) -> String? { for var attribute: xmlAttrPtr = xmlNode.memory.properties; attribute != nil; attribute = attribute.memory.next { if key == String(XMLChar: attribute.memory.name) { return XMLNodeGetContent(attribute.memory.children) } } return nil } func XMLFindXPath(xmlDoc:xmlDocPtr, xPath: String) -> [xmlNodePtr] { let xPathContext = xmlXPathNewContext(xmlDoc) if xPathContext == nil { return [] } xPathContext.memory.node = nil let xPathObject = xmlXPathEvalExpression(UnsafePointer<xmlChar>(xPath.cStringUsingEncoding(NSUTF8StringEncoding)!), xPathContext) xmlXPathFreeContext(xPathContext) if xPathObject == nil { return [] } let nodeSet = xPathObject.memory.nodesetval if nodeSet == nil || nodeSet.memory.nodeNr == 0 || nodeSet.memory.nodeTab == nil { xmlXPathFreeObject(xPathObject) return [] } var resultNodes = [xmlNodePtr]() for i in 0 ..< Int(nodeSet.memory.nodeNr) { resultNodes.append(nodeSet.memory.nodeTab[i]) } xmlXPathFreeObject(xPathObject) return resultNodes } func XMLReadNSData(data:NSData?, encoding:NSStringEncoding = NSUTF8StringEncoding, isXML:Bool = false) -> xmlDocPtr? { if let data = data { let cBuffer = UnsafePointer<CChar>(data.bytes) let cSize = CInt(data.length) // // let cfEncoding = CFStringConvertNSStringEncodingToEncoding(encoding) // let cfEncodingAsString:CFStringRef = CFStringConvertEncodingToIANACharSetName(cfEncoding) // let cEncoding:UnsafePointer<CChar> = CFStringGetCStringPtr(cfEncodingAsString, CFStringEncoding(0)) if isXML { let options = CInt(XML_PARSE_RECOVER.rawValue) return xmlReadMemory(cBuffer, cSize, nil, nil, options) } else { let options = CInt(HTML_PARSE_RECOVER.rawValue | HTML_PARSE_NOWARNING.rawValue | HTML_PARSE_NOERROR.rawValue) return htmlReadMemory(cBuffer, cSize, nil, nil, options) } } return nil } let GB18030_2000_Encoding = CFStringConvertEncodingToNSStringEncoding(CFStringEncoding(CFStringEncodings.GB_18030_2000.rawValue)) /* ---------- 测试代码 ---------- */ class CElapseTime { var startTime:NSDate var prompt:String var unsed:Bool = false init(prompt:String) { self.startTime = NSDate() self.prompt = prompt } var newprompt:String { return "\(prompt)耗时:\(NSDate().timeIntervalSinceDate(startTime))" } } func testParseSina() { var sURL:String var encoding:UInt (sURL,encoding) = ("http://www.baidu.com", NSUTF8StringEncoding) print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\(sURL)") var timer = CElapseTime(prompt: "读取网页") //let sContent = NSString(urlString:sURL, encoding: encoding) var sContent:NSString? = nil do{ try sContent = NSString(contentsOfFile: "/Volumes/Data/Document/Test/sample.txt", encoding: NSUTF8StringEncoding) } catch { } print(timer.newprompt) let sTimer1 = timer.newprompt timer = CElapseTime(prompt: "数据解析") if let doc = XMLReadNSData(sContent?.dataUsingEncoding(NSUTF8StringEncoding)){ let rootNode = XMLNode(document: doc) let findNodes = rootNode.xPath("//div") for childNode in findNodes { autoreleasepool{ let _ = (childNode.attributes, childNode.rawContent) } // if let content = childNode.rawContent { // print(content) // } } print(findNodes.count) } print(sTimer1) print(timer.newprompt) } testParseSina()