XML 正在解析(性能问题)

XML Parsing (Performance Problems)

首先,让我声明我受困于我正在使用的当前堆栈,目前无法过渡到其他堆栈。与其让我知道我应该使用其他东西,不如让我知道我不能...所以让我们跳过它。

我的用户主要是以下几类:

基础知识:

非基础知识:

<root>
    <*page*>
        <row>
            <*column*></*column*>
        </row>
    </*page*>
</root>

示例输出:

<root>
    <tab_awesome>
        <row>
            <col1>Value 1</col1>
            <col2>Value 2</col2>
        </row>
    </tab_awesome>
</root>

然后我使用这个 XML 并当前使用 MSXML 的 "Load" 方法从 Recordset 加载字符串值。加载并进行一些验证后,我会将 XML 转换为导出到 [N] 个选项卡等的标准字典结构。

{
    "page_count": 1,
    "page_names": ["tab_awesome"],
    "pages": {
        "tab_awesome": {
            "page_name": "tab_awesome",
            "row_count": 1,
            "column_count": 2,
            "column_names": ["col1", "col2"]
            "data": [["Value 1", "Value 2"]]
        }
    }
}

问题:

我的想法:

我无法使用 API 调用 windows 和 VBA。

hmm...可能的一件事是 return 每个 Tab 作为 Recordset 中的另一行(仍然是字符串 XML)。对于每一行,创建一个 Excel 的新进程,parse/load 该选项卡的信息,将 XML 发送回主进程,关闭 Excel 的那些辅助实例...

嗯...我很幸运能够解决这个问题。使用我拥有的机器,我能够将处理时间缩短到 18 秒。


    Option Explicit

    Implements IVBSAXContentHandler

    ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ' Defined Events
    ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Public Event NoRecordsReturned()
    Public Event returnedRecordset(ByRef records As Dictionary)
    ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ' Enumerations & Types
    ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Private Enum enumLevel
        eRoot = 0
        ePage = 1
        eRow = 2
        eColumn = 3
    End Enum

    ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ' Class Variables
    ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Private saxReader As New SAXXMLReader60
    Private Container As Dictionary

    Private DEPTH As Long
    Private page_name As String
    Private page_idx As Long
    Private row_idx As Long
    Private column_idx As Long
    Private column_value As String

    Private page_data() As String
    Private page_data_len As Long
    Private lst_first_row_headers As Object
    Private lst_first_row_values As Object
    Private lst_page_names As Object

    ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ' Class: Initialization & Termination
    ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Private Sub Class_Initialize()
        Call Build_Main_Container

        Set lst_first_row_values = CreateObject("System.Collections.ArrayList")
        Set lst_first_row_headers = CreateObject("System.Collections.ArrayList")
        Set lst_page_names = CreateObject("System.Collections.ArrayList")
    End Sub
    ' ------------------
    Private Sub Class_Terminate()
        Set lst_first_row_values = Nothing
        Set lst_first_row_headers = Nothing
        Set lst_page_names = Nothing
        Set Container = Nothing
    End Sub

    ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ' Class Properties
    ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Public Property Let parse(ByRef strRecordset As String)
        Set saxReader.contentHandler = Me
        saxReader.parse strRecordset
    End Property

    Private Property Set IVBSAXContentHandler_documentLocator(ByVal RHS As MSXML2.IVBSAXLocator)
    End Property

    ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ' Private Subroutines
    ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Private Sub IVBSAXContentHandler_endPrefixMapping(strPrefix As String)
    End Sub
    Private Sub IVBSAXContentHandler_ignorableWhitespace(strChars As String)
    End Sub
    Private Sub IVBSAXContentHandler_processingInstruction(strTarget As String, strData As String)
    End Sub
    Private Sub IVBSAXContentHandler_skippedEntity(strName As String)
    End Sub
    Private Sub IVBSAXContentHandler_startPrefixMapping(strPrefix As String, strURI As String)
    End Sub

    ' // Start of useful Subs & Funcs
    Private Sub IVBSAXContentHandler_startDocument()
        DEPTH = 0
        page_idx = 0
        row_idx = 0
        column_idx = 0
    End Sub

    Private Sub IVBSAXContentHandler_endDocument()
        If Container("page_count") = 0 Then
            RaiseEvent NoRecordsReturned
        Else
            ' // Finished and sending data back to consumer
            RaiseEvent returnedRecordset(Container)
        End If
    End Sub

    ' //////////////////////////////////////////////
    Private Sub IVBSAXContentHandler_startElement(strNamespaceURI As String, strLocalName As String, strQName As String, ByVal oAttributes As MSXML2.IVBSAXAttributes)
        Select Case DEPTH
            'Case eRoot
            ' // Not in use
            Case ePage
                page_name = strLocalName
            'Case eRow
            ' // Not in use
            Case eColumn
                If Not oAttributes.Length = 0 Then
                    If IsNil(oAttributes) Then
                        Call IVBSAXContentHandler_characters(vbNullString)
                    End If
                End If
        End Select
        ' // This goes @ the bottom
        DEPTH = DEPTH + 1
    End Sub

    ' //////////////////////////////////////////////
    Private Sub IVBSAXContentHandler_endElement(strNamespaceURI As String, strLocalName As String, strQName As String)
        ' // This goes @ the top \ '
        DEPTH = DEPTH - 1
        ' // ------------------- \ '
        Select Case DEPTH
            'Case eRoot
            ' // Not in use
            Case ePage
                Call pageEnd(page_name, row_idx - 1)
                page_idx = page_idx + 1
                ' // Reset: New Page
                row_idx = 0
                page_name = Empty
            Case eRow
                Call rowEnd(row_idx, column_idx - 1)
                row_idx = row_idx + 1
                ' // Reset: New Row
                column_idx = 0
                column_value = Empty
            Case eColumn
                Call columnData(row_idx, column_idx, strLocalName, column_value)
                column_idx = column_idx + 1
        End Select
    End Sub
    ' //////////////////////////////////////////////
    Private Sub IVBSAXContentHandler_characters(strChars As String)
        column_value = strChars
    End Sub

    Private Sub pageEnd(ByRef name As String, ByRef last_row_idx As Long)
        lst_page_names.Add name
        Call ProcessPage(name, last_row_idx)
    End Sub

    Private Sub rowEnd(ByRef row_idx As Long, ByRef last_column_idx As Long)
        If row_idx = 0 Then
            Call CreatePageArray(last_column_idx)
        End If
    End Sub

    Private Sub columnData(ByRef row_idx As Long, ByRef column_idx As Long, ByRef tag As String, ByRef value As String)
        If row_idx  0 Then
            Call ArrayAppend(row_idx, column_idx, value)
            Exit Sub
        Else
        ' // First row: need to add column values to ArrayList
            lst_first_row_headers.Add tag
            lst_first_row_values.Add value
            Exit Sub
        End If
    End Sub

    Private Sub ArrayAppend(ByRef row_idx As Long, ByRef col_idx As Long, ByRef val As String)
        If row_idx