读取大 xml 文件
Reading big xml file
我需要在 Microsoft Visual FoxPro 9 SP2 桌面应用程序中读取大 xml 文件。
创建的游标结果包含所需的列和 convert.xsl 文件以将 xml 转换为 xmltocursor.
可读的格式
尝试过
source = CreateObject('MSXML.Domdocument')
stylesheet = CreateObject('MSXML.Domdocument')
resultDoc = CreateObject('MSXML.Domdocument')
resultDoc.validateOnParse = .t.
stylesheet.load('convert.xsl')
source.load( 'bigxml.xml' )
source.transformNodeToObject(stylesheet, @resultDoc)
* Exception code=E06D7363
IF XMLToCursor(resultDoc.xml, 'result', 8192 )=0
但是得到了
Fatal exception
Exception code=E06D7363
第
行
IF XMLToCursor(resultDoc.xml, 'result', 8192 )=0
应用程序终止。
如何将大 xml 文件转换为游标?
XSL 是:
<?xml version="1.0" encoding="ISO-8859-1" standalone="no" ?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" encoding="UTF-8" standalone="yes"/>
<!-- this replaces root tag with VFPData which is required from CursorToXML to work properly -->
<xsl:template match="/">
<xsl:element name="VFPData">
<xsl:apply-templates/>
</xsl:element>
</xsl:template>
<!-- this will denormalize XML data -->
<xsl:template match="/ettevotjad/ettevotja">
<xsl:element name="Document-Ariregister">
<xsl:element name="nimi">
<xsl:value-of select="nimi"/>
</xsl:element>
<xsl:element name="ariregistr">
<xsl:value-of select="ariregistri_kood"/>
</xsl:element>
<xsl:element name="kmkr_nr">
<xsl:value-of select="kmkr_nr"/>
</xsl:element>
<xsl:element name="ettevotja_">
<xsl:value-of select="ettevotja_staatus"/>
</xsl:element>
<xsl:element name="asukoht_et">
<xsl:value-of select="ettevotja_aadress/asukoht_ettevotja_aadressis"/>
</xsl:element>
<xsl:element name="asukoha_e2">
<xsl:value-of select="ettevotja_aadress/asukoha_ehak_tekstina"/>
</xsl:element>
<xsl:element name="indeks_ett">
<xsl:value-of select="ettevotja_aadress/indeks_ettevotja_aadressis"/>
</xsl:element>
</xsl:element>
</xsl:template>
<!-- to ommit nodes data -->
<xsl:template match="text()">
</xsl:template>
<!-- to work over every node -->
<xsl:template match="*">
<xsl:apply-templates/>
</xsl:template>
</xsl:stylesheet>
这里是使用mxml.6.0的解决方案,不需要xs文件:
* requires msxml.6.0! ( https://www.microsoft.com/es-es/download/details.aspx?id=3988 )
*
Parameters fileName,xPath
fileName = "D:\Data\Xml\ettevotja_rekvisiidid_2021-01-27.xml"
xPath = '/ettevotjad'
xPath = RTRIM(m.xPath,1,'/')
Private All
oxml = Createobject('msxml2.domDocument.6.0')
Wait 'loading document...' Window Nowait
With oXml As msxml.DOMDocument
.Load(Fullpath(m.fileName))
Wait Clear
If .parseError.errorCode # 0
Messagebox(.parseError.reason,16)
Return
Endif
Endwith
Close Tables All
nsteps = 500
tini = Seconds()
Set Escape On
x = 1
DO WHILE .t.
subset = oXml.selectNodes(Textmerge(m.xPath+'/*[ position() >= <<m.x>> and position() < <<m.x+m.nsteps>> ]'))
IF subset.length = 0
EXIT
ENDIF
cXml = ''
For Y = 1 To subset.Length
m.cXml = m.cXml + subset.Item(m.y-1).XML
ENDFOR
x = m.x + subset.Length
Wait Textmerge('<<m.x-1>> records << (m.x-1) / (SECONDS() - m.tini) >> records/sec ') Window nowait
cXml = '<xml encoding="windows-1252">'+m.cXml+'</xml>'
Xmltocursor(m.cXml,'xmlImport',Iif(Used('xmlImport'),4+8192,0))
ENDDO
Browse Normal Font 'consolas,8'
我喜欢 Marco 的解决方案,因为至少它在 VFP 中。但是,有这样大的 XML 文件,我也会关心性能。我尝试使用 Marco 的代码,它花了 80 多秒才能完成,如果您真的希望这些字段像在 XSL 中那样分开,可能需要一些工作。
我尝试过,从 XML 中读取,基于您的 XSL 进行转换并使用 C# 写入 SQLite 数据库。花了 7.2 秒,那些地址线已经分开了。
这里是 C# 代码,如果你仍然使用的话:
void Main()
{
string dataFile = @"d:\Andrus\bigdata.sqlite";
string constr = $"Data Source={dataFile}";
Stopwatch sw = new Stopwatch();
sw.Start();
if (!File.Exists(dataFile))
{
CreateDatabase(constr);
}
InsertData(constr, @"d:\Andrus\bigxml.xml");
sw.Stop();
sw.Dump("Duration");
}
void InsertData(string connectionString, string xmlFile)
{
using (SQLiteConnection conn = new SQLiteConnection(connectionString))
using (SQLiteCommand sql = new SQLiteCommand(@"insert into MyData
(nimi, ariregistr, asukoht_et, asukoha_e2, indeks_ett, kmkr_nr)
values
(?,?,?,?,?,?)", conn))
{
sql.Parameters.AddWithValue("nimi", "");
sql.Parameters.AddWithValue("ariregistr", 0);
sql.Parameters.AddWithValue("asukoht_et", "");
sql.Parameters.AddWithValue("asukoha_e2", "");
sql.Parameters.AddWithValue("indeks_ett", "");
sql.Parameters.AddWithValue("kmkr_nr", "");
conn.Open();
SQLiteTransaction transaction = conn.BeginTransaction();
XmlReaderSettings settings = new XmlReaderSettings();
settings.IgnoreWhitespace = true;
using (XmlReader r = XmlReader.Create(xmlFile, settings))
{
r.MoveToContent();
r.ReadStartElement("ettevotjad");
while (r.Name == "ettevotja")
{
XElement x = (XElement)XNode.ReadFrom(r);
string nimi = (string)x.Element("nimi");
int? ariregistr = (int?)x.Element("ariregistri_kood");
string asukoht_et = (string)x.Element("ettevotja_aadress").Element("asukoht_ettevotja_aadressis");
string asukoha_e2 = (string)x.Element("ettevotja_aadress").Element("asukoha_ehak_tekstina");
string indeks_ett = (string)x.Element("ettevotja_aadress").Element("indeks_ettevotja_aadressis");
string kmkr_nr = (string)x.Element("kmkr_nr");
///
sql.Parameters["nimi"].Value = nimi;
sql.Parameters["ariregistr"].Value = ariregistr;
sql.Parameters["asukoht_et"].Value = asukoht_et;
sql.Parameters["asukoha_e2"].Value = asukoha_e2;
sql.Parameters["indeks_ett"].Value = indeks_ett;
sql.Parameters["kmkr_nr"].Value = kmkr_nr;
sql.ExecuteNonQuery();
}
r.ReadEndElement();
}
transaction.Commit();
conn.Close();
}
}
void CreateDatabase(string connectionString)
{
using (SQLiteConnection conn = new SQLiteConnection(connectionString))
using (SQLiteCommand sql = new SQLiteCommand())
{
sql.CommandText = @"create table MyData (
nimi text,
ariregistr number null,
asukoht_et text,
asukoha_e2 text,
indeks_ett text,
kmkr_nr text
)";
sql.Connection = conn;
conn.Open();
sql.ExecuteNonQuery();
conn.Close();
}
}
PS:代码直接在 LinqPad 中 运行。
我需要在 Microsoft Visual FoxPro 9 SP2 桌面应用程序中读取大 xml 文件。 创建的游标结果包含所需的列和 convert.xsl 文件以将 xml 转换为 xmltocursor.
可读的格式尝试过
source = CreateObject('MSXML.Domdocument')
stylesheet = CreateObject('MSXML.Domdocument')
resultDoc = CreateObject('MSXML.Domdocument')
resultDoc.validateOnParse = .t.
stylesheet.load('convert.xsl')
source.load( 'bigxml.xml' )
source.transformNodeToObject(stylesheet, @resultDoc)
* Exception code=E06D7363
IF XMLToCursor(resultDoc.xml, 'result', 8192 )=0
但是得到了
Fatal exception
Exception code=E06D7363
第
行IF XMLToCursor(resultDoc.xml, 'result', 8192 )=0
应用程序终止。
如何将大 xml 文件转换为游标?
XSL 是:
<?xml version="1.0" encoding="ISO-8859-1" standalone="no" ?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" encoding="UTF-8" standalone="yes"/>
<!-- this replaces root tag with VFPData which is required from CursorToXML to work properly -->
<xsl:template match="/">
<xsl:element name="VFPData">
<xsl:apply-templates/>
</xsl:element>
</xsl:template>
<!-- this will denormalize XML data -->
<xsl:template match="/ettevotjad/ettevotja">
<xsl:element name="Document-Ariregister">
<xsl:element name="nimi">
<xsl:value-of select="nimi"/>
</xsl:element>
<xsl:element name="ariregistr">
<xsl:value-of select="ariregistri_kood"/>
</xsl:element>
<xsl:element name="kmkr_nr">
<xsl:value-of select="kmkr_nr"/>
</xsl:element>
<xsl:element name="ettevotja_">
<xsl:value-of select="ettevotja_staatus"/>
</xsl:element>
<xsl:element name="asukoht_et">
<xsl:value-of select="ettevotja_aadress/asukoht_ettevotja_aadressis"/>
</xsl:element>
<xsl:element name="asukoha_e2">
<xsl:value-of select="ettevotja_aadress/asukoha_ehak_tekstina"/>
</xsl:element>
<xsl:element name="indeks_ett">
<xsl:value-of select="ettevotja_aadress/indeks_ettevotja_aadressis"/>
</xsl:element>
</xsl:element>
</xsl:template>
<!-- to ommit nodes data -->
<xsl:template match="text()">
</xsl:template>
<!-- to work over every node -->
<xsl:template match="*">
<xsl:apply-templates/>
</xsl:template>
</xsl:stylesheet>
这里是使用mxml.6.0的解决方案,不需要xs文件:
* requires msxml.6.0! ( https://www.microsoft.com/es-es/download/details.aspx?id=3988 )
*
Parameters fileName,xPath
fileName = "D:\Data\Xml\ettevotja_rekvisiidid_2021-01-27.xml"
xPath = '/ettevotjad'
xPath = RTRIM(m.xPath,1,'/')
Private All
oxml = Createobject('msxml2.domDocument.6.0')
Wait 'loading document...' Window Nowait
With oXml As msxml.DOMDocument
.Load(Fullpath(m.fileName))
Wait Clear
If .parseError.errorCode # 0
Messagebox(.parseError.reason,16)
Return
Endif
Endwith
Close Tables All
nsteps = 500
tini = Seconds()
Set Escape On
x = 1
DO WHILE .t.
subset = oXml.selectNodes(Textmerge(m.xPath+'/*[ position() >= <<m.x>> and position() < <<m.x+m.nsteps>> ]'))
IF subset.length = 0
EXIT
ENDIF
cXml = ''
For Y = 1 To subset.Length
m.cXml = m.cXml + subset.Item(m.y-1).XML
ENDFOR
x = m.x + subset.Length
Wait Textmerge('<<m.x-1>> records << (m.x-1) / (SECONDS() - m.tini) >> records/sec ') Window nowait
cXml = '<xml encoding="windows-1252">'+m.cXml+'</xml>'
Xmltocursor(m.cXml,'xmlImport',Iif(Used('xmlImport'),4+8192,0))
ENDDO
Browse Normal Font 'consolas,8'
我喜欢 Marco 的解决方案,因为至少它在 VFP 中。但是,有这样大的 XML 文件,我也会关心性能。我尝试使用 Marco 的代码,它花了 80 多秒才能完成,如果您真的希望这些字段像在 XSL 中那样分开,可能需要一些工作。
我尝试过,从 XML 中读取,基于您的 XSL 进行转换并使用 C# 写入 SQLite 数据库。花了 7.2 秒,那些地址线已经分开了。 这里是 C# 代码,如果你仍然使用的话:
void Main()
{
string dataFile = @"d:\Andrus\bigdata.sqlite";
string constr = $"Data Source={dataFile}";
Stopwatch sw = new Stopwatch();
sw.Start();
if (!File.Exists(dataFile))
{
CreateDatabase(constr);
}
InsertData(constr, @"d:\Andrus\bigxml.xml");
sw.Stop();
sw.Dump("Duration");
}
void InsertData(string connectionString, string xmlFile)
{
using (SQLiteConnection conn = new SQLiteConnection(connectionString))
using (SQLiteCommand sql = new SQLiteCommand(@"insert into MyData
(nimi, ariregistr, asukoht_et, asukoha_e2, indeks_ett, kmkr_nr)
values
(?,?,?,?,?,?)", conn))
{
sql.Parameters.AddWithValue("nimi", "");
sql.Parameters.AddWithValue("ariregistr", 0);
sql.Parameters.AddWithValue("asukoht_et", "");
sql.Parameters.AddWithValue("asukoha_e2", "");
sql.Parameters.AddWithValue("indeks_ett", "");
sql.Parameters.AddWithValue("kmkr_nr", "");
conn.Open();
SQLiteTransaction transaction = conn.BeginTransaction();
XmlReaderSettings settings = new XmlReaderSettings();
settings.IgnoreWhitespace = true;
using (XmlReader r = XmlReader.Create(xmlFile, settings))
{
r.MoveToContent();
r.ReadStartElement("ettevotjad");
while (r.Name == "ettevotja")
{
XElement x = (XElement)XNode.ReadFrom(r);
string nimi = (string)x.Element("nimi");
int? ariregistr = (int?)x.Element("ariregistri_kood");
string asukoht_et = (string)x.Element("ettevotja_aadress").Element("asukoht_ettevotja_aadressis");
string asukoha_e2 = (string)x.Element("ettevotja_aadress").Element("asukoha_ehak_tekstina");
string indeks_ett = (string)x.Element("ettevotja_aadress").Element("indeks_ettevotja_aadressis");
string kmkr_nr = (string)x.Element("kmkr_nr");
///
sql.Parameters["nimi"].Value = nimi;
sql.Parameters["ariregistr"].Value = ariregistr;
sql.Parameters["asukoht_et"].Value = asukoht_et;
sql.Parameters["asukoha_e2"].Value = asukoha_e2;
sql.Parameters["indeks_ett"].Value = indeks_ett;
sql.Parameters["kmkr_nr"].Value = kmkr_nr;
sql.ExecuteNonQuery();
}
r.ReadEndElement();
}
transaction.Commit();
conn.Close();
}
}
void CreateDatabase(string connectionString)
{
using (SQLiteConnection conn = new SQLiteConnection(connectionString))
using (SQLiteCommand sql = new SQLiteCommand())
{
sql.CommandText = @"create table MyData (
nimi text,
ariregistr number null,
asukoht_et text,
asukoha_e2 text,
indeks_ett text,
kmkr_nr text
)";
sql.Connection = conn;
conn.Open();
sql.ExecuteNonQuery();
conn.Close();
}
}
PS:代码直接在 LinqPad 中 运行。