在 Swift 3 中使用 LibXML2 下载时从服务器解析大 XML
Parsing large XML from server while downloading with LibXML2 in Swift 3
我对 Swift 3 中 LibXML2 的 SAX 解析器有疑问。
我想要 iOS 中 Android 中的 XMLPullParser
之类的东西。从服务器下载 XML 并在下载时解析流。
我的 XML 看起来像这样:
<?xml version="1.0" encoding="UTF-8" ?>
<ResultList id="12345678-0" platforms="A;B;C;D;E">
<Book id="1111111111" author="Author A" title="Title A" price="9.95" ... />
<Book id="1111111112" author="Author B" title="Title B" price="2.00" ... />
<Book id="1111111113" author="Author C" title="Title C" price="5.00" ... />
<ResultInfo bookcount="3" />
</ResultList>
因此所有数据都存储在属性而不是子节点中。
我主要根据这些示例class自己制作了以下内容:
XMLPerformance, XMLPerformance-Swift and iOS-XML-Streaming
import Foundation
class LibXMLParser: NSObject, URLSessionDataDelegate {
var url: URL?
var delegate: LibXMLParserDelegate?
var done = false
var context: xmlParserCtxtPtr?
var simpleSAXHandlerStruct: xmlSAXHandler = {
var handler = xmlSAXHandler()
handler.initialized = XML_SAX2_MAGIC
handler.startElementNs = startElementSAX
handler.endElementNs = endElementSAX
handler.characters = charactersFoundSAX
//handler.error = errorEncounteredSAX
return handler
}()
init(url: URL) {
super.init()
self.url = url
}
func parse() {
self.done = false
let session = URLSession(configuration: .default, delegate: self, delegateQueue: OperationQueue.main)
let dataTask = session.dataTask(with: URLRequest(url: url!))
dataTask.resume()
self.context = xmlCreatePushParserCtxt(&simpleSAXHandlerStruct, Unmanaged.passUnretained(self).toOpaque(), nil, 0, nil)
self.delegate?.parserDidStartDocument()
repeat {
RunLoop.current.run(mode: .defaultRunLoopMode, before: Date.distantFuture)
} while !self.done
xmlFreeParserCtxt(self.context)
self.delegate?.parserDidEndDocument()
}
func urlSession(_ session: URLSession, dataTask: URLSessionDataTask, didReceive data: Data) {
print("Did receive data")
data.withUnsafeBytes { (bytes: UnsafePointer<CChar>) -> Void in
xmlParseChunk(self.context, bytes, CInt(data.count), 0)
}
}
func urlSessionDidFinishEvents(forBackgroundURLSession session: URLSession) {
xmlParseChunk(self.context, nil, 0, 1)
self.done = true
}
func urlSession(_ session: URLSession, didBecomeInvalidWithError error: Error?) {
self.done = true
//self.delegate?.parserErrorOccurred(error)
}
func urlSession(_ session: URLSession, task: URLSessionTask, didCompleteWithError error: Error?) {
self.done = true
//self.delegate?.parserErrorOccurred(error)
}
}
private func startElementSAX(_ ctx: UnsafeMutableRawPointer?, name: UnsafePointer<xmlChar>?, prefix: UnsafePointer<xmlChar>?, URI: UnsafePointer<xmlChar>?, nb_namespaces: CInt, namespaces: UnsafeMutablePointer<UnsafePointer<xmlChar>?>?, nb_attributes: CInt, nb_defaulted: CInt, attributes: UnsafeMutablePointer<UnsafePointer<xmlChar>?>?) {
let parser = Unmanaged<LibXMLParser>.fromOpaque(ctx!).takeUnretainedValue()
parser.delegate?.parserDidStartElement(String(cString: name!), nb_attributes: nb_attributes, attributes: attributes)
}
private func endElementSAX(_ ctx: UnsafeMutableRawPointer?, name: UnsafePointer<xmlChar>?,
prefix: UnsafePointer<xmlChar>?,
URI: UnsafePointer<xmlChar>?) {
let parser = Unmanaged<LibXMLParser>.fromOpaque(ctx!).takeUnretainedValue()
parser.delegate?.parserDidEndElement(String(cString: name!))
}
private func charactersFoundSAX(_ ctx: UnsafeMutableRawPointer?, ch: UnsafePointer<xmlChar>?, len: CInt) {
let parser = Unmanaged<LibXMLParser>.fromOpaque(ctx!).takeUnretainedValue()
parser.delegate?.parserFoundCharacters(String(cString: ch!))
}
我用 URL
初始化这个 class。当我调用 parse()
时,它会创建一个 URLSession
和一个 URLSessionDataTask
,并带有对自身的委托来覆盖方法 didReceive data: Data
。
之后我创建一个 xmlParserCtxtPtr
并循环直到 dataTask 完成。
当它接收到数据时,我使用 xmlParseChunk
方法对其进行解析,然后 startElementSAX
调用我从 ViewController class 设置的委托。 (我只需要元素名称,属性数量和属性。)
到目前为止一切顺利。
在我的 ViewController (UITableViewController) 中,我有以下代码:
func downloadBooksLibXML() {
print("Downloading…")
UIApplication.shared.isNetworkActivityIndicatorVisible = true
DispatchQueue.global().async {
print("Setting up parser")
let parser = LibXMLParser(url: URL(string: self.baseUrl + self.parameters!)!)
parser.delegate = self
parser.parse()
}
}
func parserDidStartDocument() {
}
func parserDidEndDocument() {
DispatchQueue.main.sync {
UIApplication.shared.isNetworkActivityIndicatorVisible = false
self.isDone = true
print("Finished")
}
}
func parserDidStartElement(_ elementName: String, nb_attributes: CInt, attributes: UnsafeMutablePointer<UnsafePointer<xmlChar>?>?) {
print(elementName)
switch elementName {
case "Book":
DispatchQueue.main.async {
let book = self.buildBook(nb_attributes: nb_attributes, attributes: attributes)
self.books.append(book)
self.tableView.beginUpdates()
self.tableView.insertRows(at: [IndexPath(row: self.books.count - 1, section: 0)], with: .automatic)
self.tableView.endUpdates()
self.navigationItem.title = String(format: NSLocalizedString("books_found", comment: "Books found"), "\(self.books.count)")
}
case "ResultList":
break
case "ResultInfo":
break
default:
break
}
}
func buildBook(nb_attributes: CInt, attributes: UnsafeMutablePointer<UnsafePointer<xmlChar>?>?) -> Book {
let fields = 5 /* (localname/prefix/URI/value/end) */
let book = Book()
for i in 0..<Int(nb_attributes) {
if let localname = attributes?[i * fields + 0],
//let prefix = attributes?[i * fields + 1],
//let URI = attributes?[i * fields + 2],
let value_start = attributes?[i * fields + 3],
let value_end = attributes?[i * fields + 4] {
let localnameString = String(cString: localname)
let string_start = String(cString: value_start)
let string_end = String(cString: value_end)
let diff = string_start.characters.count - string_end.characters.count
if diff > 0 {
let value = string_start.substring(to: string_start.index(string_start.startIndex, offsetBy: diff))
book.setValue(value, forKey: localnameString)
}
}
}
return book
}
func parserDidEndElement(_ elementName: String) {
}
func parserFoundCharacters(_ string: String) {
}
func parserErrorOccurred(_ parseError: Error?) {
}
------
更新
nwellnhof 的回答解决了获取属性值的问题。我已经将上面的代码更新为更好的代码。它现在不再遍历所有属性。
现在我的新问题:
我创建了方法 buildBook
来获取 XML 属性的 Book
对象。
我主要将这里的方法 What is the right way to get attribute value in libXML sax parser (C++)? 翻译成 Swift 并使用 setValue(value: Any?, forKey: String)
来设置我的书对象的属性。
但现在我的问题是它没有更新 tableView。
我已经尝试使用 DispatchQueue.global().sync
在后台线程中同步执行 buildBook
方法,并使用 DispatchQueue.main.async
在异步主线程中执行 tableView 更新。但随后它在 tableView.endUpdates()
处崩溃,尽管它在主线程中。
------
非常感谢任何帮助。
似乎是一个简单的差一错误。要在 C 中迭代属性数组,我会这样写:
for (int i = 0; i < nb_attributes; i++)
但是您使用的 closed range operator 包括上限:
for i in 0...Int(nb_attributes)
所以你应该改用 half-open range operator:
for i in 0..<Int(nb_attributes)
顺便说一下,libxml2 也有一个 pull parser interface 仿照 C# 的 XmlTextReader
,它比 SAX 解析器更容易使用。
我对 Swift 3 中 LibXML2 的 SAX 解析器有疑问。
我想要 iOS 中 Android 中的 XMLPullParser
之类的东西。从服务器下载 XML 并在下载时解析流。
我的 XML 看起来像这样:
<?xml version="1.0" encoding="UTF-8" ?>
<ResultList id="12345678-0" platforms="A;B;C;D;E">
<Book id="1111111111" author="Author A" title="Title A" price="9.95" ... />
<Book id="1111111112" author="Author B" title="Title B" price="2.00" ... />
<Book id="1111111113" author="Author C" title="Title C" price="5.00" ... />
<ResultInfo bookcount="3" />
</ResultList>
因此所有数据都存储在属性而不是子节点中。
我主要根据这些示例class自己制作了以下内容:
XMLPerformance, XMLPerformance-Swift and iOS-XML-Streaming
import Foundation
class LibXMLParser: NSObject, URLSessionDataDelegate {
var url: URL?
var delegate: LibXMLParserDelegate?
var done = false
var context: xmlParserCtxtPtr?
var simpleSAXHandlerStruct: xmlSAXHandler = {
var handler = xmlSAXHandler()
handler.initialized = XML_SAX2_MAGIC
handler.startElementNs = startElementSAX
handler.endElementNs = endElementSAX
handler.characters = charactersFoundSAX
//handler.error = errorEncounteredSAX
return handler
}()
init(url: URL) {
super.init()
self.url = url
}
func parse() {
self.done = false
let session = URLSession(configuration: .default, delegate: self, delegateQueue: OperationQueue.main)
let dataTask = session.dataTask(with: URLRequest(url: url!))
dataTask.resume()
self.context = xmlCreatePushParserCtxt(&simpleSAXHandlerStruct, Unmanaged.passUnretained(self).toOpaque(), nil, 0, nil)
self.delegate?.parserDidStartDocument()
repeat {
RunLoop.current.run(mode: .defaultRunLoopMode, before: Date.distantFuture)
} while !self.done
xmlFreeParserCtxt(self.context)
self.delegate?.parserDidEndDocument()
}
func urlSession(_ session: URLSession, dataTask: URLSessionDataTask, didReceive data: Data) {
print("Did receive data")
data.withUnsafeBytes { (bytes: UnsafePointer<CChar>) -> Void in
xmlParseChunk(self.context, bytes, CInt(data.count), 0)
}
}
func urlSessionDidFinishEvents(forBackgroundURLSession session: URLSession) {
xmlParseChunk(self.context, nil, 0, 1)
self.done = true
}
func urlSession(_ session: URLSession, didBecomeInvalidWithError error: Error?) {
self.done = true
//self.delegate?.parserErrorOccurred(error)
}
func urlSession(_ session: URLSession, task: URLSessionTask, didCompleteWithError error: Error?) {
self.done = true
//self.delegate?.parserErrorOccurred(error)
}
}
private func startElementSAX(_ ctx: UnsafeMutableRawPointer?, name: UnsafePointer<xmlChar>?, prefix: UnsafePointer<xmlChar>?, URI: UnsafePointer<xmlChar>?, nb_namespaces: CInt, namespaces: UnsafeMutablePointer<UnsafePointer<xmlChar>?>?, nb_attributes: CInt, nb_defaulted: CInt, attributes: UnsafeMutablePointer<UnsafePointer<xmlChar>?>?) {
let parser = Unmanaged<LibXMLParser>.fromOpaque(ctx!).takeUnretainedValue()
parser.delegate?.parserDidStartElement(String(cString: name!), nb_attributes: nb_attributes, attributes: attributes)
}
private func endElementSAX(_ ctx: UnsafeMutableRawPointer?, name: UnsafePointer<xmlChar>?,
prefix: UnsafePointer<xmlChar>?,
URI: UnsafePointer<xmlChar>?) {
let parser = Unmanaged<LibXMLParser>.fromOpaque(ctx!).takeUnretainedValue()
parser.delegate?.parserDidEndElement(String(cString: name!))
}
private func charactersFoundSAX(_ ctx: UnsafeMutableRawPointer?, ch: UnsafePointer<xmlChar>?, len: CInt) {
let parser = Unmanaged<LibXMLParser>.fromOpaque(ctx!).takeUnretainedValue()
parser.delegate?.parserFoundCharacters(String(cString: ch!))
}
我用 URL
初始化这个 class。当我调用 parse()
时,它会创建一个 URLSession
和一个 URLSessionDataTask
,并带有对自身的委托来覆盖方法 didReceive data: Data
。
之后我创建一个 xmlParserCtxtPtr
并循环直到 dataTask 完成。
当它接收到数据时,我使用 xmlParseChunk
方法对其进行解析,然后 startElementSAX
调用我从 ViewController class 设置的委托。 (我只需要元素名称,属性数量和属性。)
到目前为止一切顺利。
在我的 ViewController (UITableViewController) 中,我有以下代码:
func downloadBooksLibXML() {
print("Downloading…")
UIApplication.shared.isNetworkActivityIndicatorVisible = true
DispatchQueue.global().async {
print("Setting up parser")
let parser = LibXMLParser(url: URL(string: self.baseUrl + self.parameters!)!)
parser.delegate = self
parser.parse()
}
}
func parserDidStartDocument() {
}
func parserDidEndDocument() {
DispatchQueue.main.sync {
UIApplication.shared.isNetworkActivityIndicatorVisible = false
self.isDone = true
print("Finished")
}
}
func parserDidStartElement(_ elementName: String, nb_attributes: CInt, attributes: UnsafeMutablePointer<UnsafePointer<xmlChar>?>?) {
print(elementName)
switch elementName {
case "Book":
DispatchQueue.main.async {
let book = self.buildBook(nb_attributes: nb_attributes, attributes: attributes)
self.books.append(book)
self.tableView.beginUpdates()
self.tableView.insertRows(at: [IndexPath(row: self.books.count - 1, section: 0)], with: .automatic)
self.tableView.endUpdates()
self.navigationItem.title = String(format: NSLocalizedString("books_found", comment: "Books found"), "\(self.books.count)")
}
case "ResultList":
break
case "ResultInfo":
break
default:
break
}
}
func buildBook(nb_attributes: CInt, attributes: UnsafeMutablePointer<UnsafePointer<xmlChar>?>?) -> Book {
let fields = 5 /* (localname/prefix/URI/value/end) */
let book = Book()
for i in 0..<Int(nb_attributes) {
if let localname = attributes?[i * fields + 0],
//let prefix = attributes?[i * fields + 1],
//let URI = attributes?[i * fields + 2],
let value_start = attributes?[i * fields + 3],
let value_end = attributes?[i * fields + 4] {
let localnameString = String(cString: localname)
let string_start = String(cString: value_start)
let string_end = String(cString: value_end)
let diff = string_start.characters.count - string_end.characters.count
if diff > 0 {
let value = string_start.substring(to: string_start.index(string_start.startIndex, offsetBy: diff))
book.setValue(value, forKey: localnameString)
}
}
}
return book
}
func parserDidEndElement(_ elementName: String) {
}
func parserFoundCharacters(_ string: String) {
}
func parserErrorOccurred(_ parseError: Error?) {
}
------
更新
nwellnhof 的回答解决了获取属性值的问题。我已经将上面的代码更新为更好的代码。它现在不再遍历所有属性。 现在我的新问题:
我创建了方法 buildBook
来获取 XML 属性的 Book
对象。
我主要将这里的方法 What is the right way to get attribute value in libXML sax parser (C++)? 翻译成 Swift 并使用 setValue(value: Any?, forKey: String)
来设置我的书对象的属性。
但现在我的问题是它没有更新 tableView。
我已经尝试使用 DispatchQueue.global().sync
在后台线程中同步执行 buildBook
方法,并使用 DispatchQueue.main.async
在异步主线程中执行 tableView 更新。但随后它在 tableView.endUpdates()
处崩溃,尽管它在主线程中。
------
非常感谢任何帮助。
似乎是一个简单的差一错误。要在 C 中迭代属性数组,我会这样写:
for (int i = 0; i < nb_attributes; i++)
但是您使用的 closed range operator 包括上限:
for i in 0...Int(nb_attributes)
所以你应该改用 half-open range operator:
for i in 0..<Int(nb_attributes)
顺便说一下,libxml2 也有一个 pull parser interface 仿照 C# 的 XmlTextReader
,它比 SAX 解析器更容易使用。