读取大 xml 文件

Reading big xml file

我需要在 Microsoft Visual FoxPro 9 SP2 桌面应用程序中读取大 xml 文件。 创建的游标结果包含所需的列和 convert.xsl 文件以将 xml 转换为 xmltocursor.

可读的格式

尝试过

source = CreateObject('MSXML.Domdocument')
stylesheet = CreateObject('MSXML.Domdocument')
resultDoc = CreateObject('MSXML.Domdocument')
resultDoc.validateOnParse = .t.
stylesheet.load('convert.xsl')
source.load( 'bigxml.xml' )
source.transformNodeToObject(stylesheet, @resultDoc)

* Exception code=E06D7363
IF XMLToCursor(resultDoc.xml, 'result', 8192 )=0

但是得到了

Fatal exception 

Exception code=E06D7363 

IF XMLToCursor(resultDoc.xml, 'result', 8192 )=0

应用程序终止。

如何将大 xml 文件转换为游标?

XSL 是:

<?xml version="1.0" encoding="ISO-8859-1" standalone="no" ?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
  <xsl:output method="xml" version="1.0" encoding="UTF-8" standalone="yes"/>

  <!-- this replaces root tag with VFPData which is required from CursorToXML to work properly -->
  <xsl:template match="/">
    <xsl:element name="VFPData">
      <xsl:apply-templates/>
    </xsl:element>
  </xsl:template>

  <!-- this will denormalize XML data -->
  <xsl:template match="/ettevotjad/ettevotja">

    <xsl:element name="Document-Ariregister">

      <xsl:element name="nimi">
        <xsl:value-of select="nimi"/>
      </xsl:element>

      <xsl:element name="ariregistr">
        <xsl:value-of select="ariregistri_kood"/>
      </xsl:element>

      <xsl:element name="kmkr_nr">
        <xsl:value-of select="kmkr_nr"/>
      </xsl:element>

      <xsl:element name="ettevotja_">
        <xsl:value-of select="ettevotja_staatus"/>
      </xsl:element>

      <xsl:element name="asukoht_et">
        <xsl:value-of select="ettevotja_aadress/asukoht_ettevotja_aadressis"/>
      </xsl:element>

      <xsl:element name="asukoha_e2">
        <xsl:value-of select="ettevotja_aadress/asukoha_ehak_tekstina"/>
      </xsl:element>

      <xsl:element name="indeks_ett">
        <xsl:value-of select="ettevotja_aadress/indeks_ettevotja_aadressis"/>
      </xsl:element>
  
    </xsl:element>
  </xsl:template>

  <!-- to ommit nodes data -->
  <xsl:template match="text()">
  </xsl:template>

  <!-- to work over every node -->
  <xsl:template match="*">
    <xsl:apply-templates/>
  </xsl:template>

</xsl:stylesheet>

这里是使用mxml.6.0的解决方案,不需要xs文件:

* requires msxml.6.0! ( https://www.microsoft.com/es-es/download/details.aspx?id=3988 )
*
Parameters fileName,xPath

fileName = "D:\Data\Xml\ettevotja_rekvisiidid_2021-01-27.xml"
xPath = '/ettevotjad'

xPath = RTRIM(m.xPath,1,'/')

Private All

    oxml = Createobject('msxml2.domDocument.6.0')

    Wait 'loading document...' Window Nowait

    With oXml As msxml.DOMDocument
        .Load(Fullpath(m.fileName))
        Wait Clear
        If .parseError.errorCode # 0
            Messagebox(.parseError.reason,16)
            Return
        Endif
    Endwith


Close Tables All

nsteps = 500
tini = Seconds()
Set Escape On

x =  1

DO WHILE .t.

    subset = oXml.selectNodes(Textmerge(m.xPath+'/*[ position() >= <<m.x>> and position() < <<m.x+m.nsteps>> ]'))

    IF subset.length = 0
        EXIT
    ENDIF

    cXml = ''

    For Y = 1 To subset.Length
        m.cXml = m.cXml + subset.Item(m.y-1).XML
    ENDFOR
    
    x = m.x + subset.Length

    Wait Textmerge('<<m.x-1>> records  << (m.x-1) / (SECONDS() - m.tini) >> records/sec ') Window nowait
    
    cXml = '<xml encoding="windows-1252">'+m.cXml+'</xml>'

    Xmltocursor(m.cXml,'xmlImport',Iif(Used('xmlImport'),4+8192,0))


ENDDO



Browse Normal Font 'consolas,8'



我喜欢 Marco 的解决方案,因为至少它在 VFP 中。但是,有这样大的 XML 文件,我也会关心性能。我尝试使用 Marco 的代码,它花了 80 多秒才能完成,如果您真的希望这些字段像在 XSL 中那样分开,可能需要一些工作。

我尝试过,从 XML 中读取,基于您的 XSL 进行转换并使用 C# 写入 SQLite 数据库。花了 7.2 秒,那些地址线已经分开了。 这里是 C# 代码,如果你仍然使用的话:

void Main()
{
    string dataFile = @"d:\Andrus\bigdata.sqlite";
    string constr = $"Data Source={dataFile}";

    Stopwatch sw = new Stopwatch();
    sw.Start();

    if (!File.Exists(dataFile))
    {
        CreateDatabase(constr);
    }

    InsertData(constr, @"d:\Andrus\bigxml.xml");

    sw.Stop();
    sw.Dump("Duration");

}

void InsertData(string connectionString, string xmlFile)
{
    using (SQLiteConnection conn = new SQLiteConnection(connectionString))
    using (SQLiteCommand sql = new SQLiteCommand(@"insert into MyData 
        (nimi, ariregistr, asukoht_et, asukoha_e2, indeks_ett, kmkr_nr)
        values
        (?,?,?,?,?,?)", conn))
    {
        sql.Parameters.AddWithValue("nimi", "");
        sql.Parameters.AddWithValue("ariregistr", 0);
        sql.Parameters.AddWithValue("asukoht_et", "");
        sql.Parameters.AddWithValue("asukoha_e2", "");
        sql.Parameters.AddWithValue("indeks_ett", "");
        sql.Parameters.AddWithValue("kmkr_nr", "");

        conn.Open();
        SQLiteTransaction transaction = conn.BeginTransaction();

        XmlReaderSettings settings = new XmlReaderSettings();
        settings.IgnoreWhitespace = true;
        using (XmlReader r = XmlReader.Create(xmlFile, settings))
        {
            r.MoveToContent();
            r.ReadStartElement("ettevotjad");
            while (r.Name == "ettevotja")
            {
                XElement x = (XElement)XNode.ReadFrom(r);

                string nimi = (string)x.Element("nimi");
                int? ariregistr = (int?)x.Element("ariregistri_kood");
                string asukoht_et = (string)x.Element("ettevotja_aadress").Element("asukoht_ettevotja_aadressis");
                string asukoha_e2 = (string)x.Element("ettevotja_aadress").Element("asukoha_ehak_tekstina");
                string indeks_ett = (string)x.Element("ettevotja_aadress").Element("indeks_ettevotja_aadressis");
                string kmkr_nr = (string)x.Element("kmkr_nr");

                ///
                sql.Parameters["nimi"].Value = nimi;
                sql.Parameters["ariregistr"].Value = ariregistr;
                sql.Parameters["asukoht_et"].Value = asukoht_et;
                sql.Parameters["asukoha_e2"].Value = asukoha_e2;
                sql.Parameters["indeks_ett"].Value = indeks_ett;
                sql.Parameters["kmkr_nr"].Value = kmkr_nr;

                sql.ExecuteNonQuery();
            }
            r.ReadEndElement();
        }
        transaction.Commit();
        conn.Close();
    }
}

void CreateDatabase(string connectionString)
{
    using (SQLiteConnection conn = new SQLiteConnection(connectionString))
    using (SQLiteCommand sql = new SQLiteCommand())
    {
        sql.CommandText = @"create table MyData (
            nimi text,
            ariregistr number null,
            asukoht_et text,
            asukoha_e2 text,
            indeks_ett text,
            kmkr_nr text
            )";
        sql.Connection = conn;

        conn.Open();
        sql.ExecuteNonQuery();
        conn.Close();
    }
}

PS:代码直接在 LinqPad 中 运行。