(C#) 如何在不加载或重写整个文件的情况下修改现有 XML 文件中的属性值?
(C#) How to modify attribute's value in the existing XML file without loading or rewriting the whole file?
我正在使用 XmlWriter 和 Linq2Xml 制作一些巨大的 XML 文件(几 GB)。
此文件的类型:
<Table recCount="" recLength="">
<Rec recId="1">..</Rec>
<Rec recId="2">..</Rec>
..
<Rec recId="n">..</Rec>
</Table>
我不知道 Table's recCount 和 recLength[=28 的值=] 属性,直到我写入所有内部 Rec 节点,所以我必须在最后为这些属性写入值。
现在我正在将所有内部 Rec 节点写入临时文件,计算 Table's attributes' 值并按照我上面显示的方式将所有内容写入结果文件。 (从具有所有 Rec 节点的临时文件中复制所有内容)
我想知道是否有一种方法可以修改这些属性的值而无需将内容写入另一个文件(就像我现在这样做)或将整个文档加载到内存中(这显然是由于这些文件的大小不可能)?
您可以尝试将 xml 文件加载到数据集中,因为这样会更容易计算您的属性。此外,内存管理由 DataSet 层完成。为什么不试一试,让我们也知道结果。
我认为 FileStream Class 会对您有所帮助。查看 Read 和 Write 方法。
大量注释代码。基本思想是在第一遍中我们写:
<?xml version="1.0" encoding="utf-8"?>
<Table recCount="" recLength="">
<!--Reserved space:++++++++++++++++-->
<Rec...
然后我们回到文件开头重写前三行:
<?xml version="1.0" encoding="utf-8"?>
<Table recCount="1000" recLength="150">
<!--Reserved space:#############-->
这里重要的"trick"是你不能"insert"进入一个文件,你只能覆盖它。所以我们 "reserve" 一些 space 用于数字(Reserved space:#############.
注释。我们可以通过多种方式完成它...例如,在第一遍中我们可以:
<Table recCount=" " recLength=" ">
然后(xml-合法但丑陋):
<Table recCount="1000 " recLength="150 ">
或者我们可以在 Table 的 >
之后追加 space :
<Table recCount="" recLength="">
(在>
之后有20space)
然后:
<Table recCount="1000" recLength="150">
(现在>
之后有13个space)
或者我们可以简单地在新行上添加 space 而没有 <!-- -->
...
代码:
int maxRecCountLength = 10; // int.MaxValue.ToString().Length
int maxRecLengthLength = 10; // int.MaxValue.ToString().Length
int tokenLength = 4; // 4 == + , see below what and are
// Note that the reserved space will be in the form +++++++++++++++++++
string reservedSpace = new string('+', maxRecCountLength + maxRecLengthLength - tokenLength);
// You have to manually open the FileStream
using (var fs = new FileStream("out.xml", FileMode.Create))
// and add a StreamWriter on top of it
using (var sw = new StreamWriter(fs, Encoding.UTF8, 4096, true))
{
// Here you write on your StreamWriter however you want.
// Note that recCount and recLength have a placeholder and .
int recCount = 0;
int maxRecLength = 0;
using (var xw = XmlWriter.Create(sw))
{
xw.WriteWhitespace("\r\n");
xw.WriteStartElement("Table");
xw.WriteAttributeString("recCount", "");
xw.WriteAttributeString("recLength", "");
// You have to add some white space that will be
// partially replaced by the recCount and recLength value
xw.WriteWhitespace("\r\n");
xw.WriteComment("Reserved space:" + reservedSpace);
// <--------- BEGIN YOUR CODE
for (int i = 0; i < 100; i++)
{
xw.WriteWhitespace("\r\n");
xw.WriteStartElement("Rec");
string str = string.Format("Some number: {0}", i);
if (str.Length > maxRecLength)
{
maxRecLength = str.Length;
}
xw.WriteValue(str);
recCount++;
xw.WriteEndElement();
}
// <--------- END YOUR CODE
xw.WriteWhitespace("\r\n");
xw.WriteEndElement();
}
sw.Flush();
// Now we read the first lines to modify them (normally we will
// read three lines, the xml header, the <Table element and the
// <-- Reserved space:
fs.Position = 0;
var lines = new List<string>();
using (var sr = new StreamReader(fs, sw.Encoding, false, 4096, true))
{
while (true)
{
string str = sr.ReadLine();
lines.Add(str);
if (str.StartsWith("<Table"))
{
// We read the next line, the comment line
str = sr.ReadLine();
lines.Add(str);
break;
}
}
}
string strCount = XmlConvert.ToString(recCount);
string strMaxRecLength = XmlConvert.ToString(maxRecLength);
// We do some replaces for the tokens
int oldLen = lines[lines.Count - 2].Length;
lines[lines.Count - 2] = lines[lines.Count - 2].Replace("=\"\"", string.Format("=\"{0}\"", strCount));
lines[lines.Count - 2] = lines[lines.Count - 2].Replace("=\"\"", string.Format("=\"{0}\"", strMaxRecLength));
int newLen = lines[lines.Count - 2].Length;
// Remove spaces from reserved whitespace
lines[lines.Count - 1] = lines[lines.Count - 1].Replace(":" + reservedSpace, ":" + new string('#', reservedSpace.Length - newLen + oldLen));
// We move back to just after the UTF8/UTF16 preamble
fs.Position = sw.Encoding.GetPreamble().Length;
// And we rewrite the lines
foreach (string str in lines)
{
sw.Write(str);
sw.Write("\r\n");
}
}
较慢的 .NET 3.5 方式
在 .NET 3.5 中,StreamReader
/StreamWriter
想要关闭基础 FileStream
,所以我不得不多次重新打开文件。这个有点慢。
int maxRecCountLength = 10; // int.MaxValue.ToString().Length
int maxRecLengthLength = 10; // int.MaxValue.ToString().Length
int tokenLength = 4; // 4 == + , see below what and are
// Note that the reserved space will be in the form +++++++++++++++++++
string reservedSpace = new string('+', maxRecCountLength + maxRecLengthLength - tokenLength);
string fileName = "out.xml";
int recCount = 0;
int maxRecLength = 0;
using (var sw = new StreamWriter(fileName))
{
// Here you write on your StreamWriter however you want.
// Note that recCount and recLength have a placeholder and .
using (var xw = XmlWriter.Create(sw))
{
xw.WriteWhitespace("\r\n");
xw.WriteStartElement("Table");
xw.WriteAttributeString("recCount", "");
xw.WriteAttributeString("recLength", "");
// You have to add some white space that will be
// partially replaced by the recCount and recLength value
xw.WriteWhitespace("\r\n");
xw.WriteComment("Reserved space:" + reservedSpace);
// <--------- BEGIN YOUR CODE
for (int i = 0; i < 100; i++)
{
xw.WriteWhitespace("\r\n");
xw.WriteStartElement("Rec");
string str = string.Format("Some number: {0}", i);
if (str.Length > maxRecLength)
{
maxRecLength = str.Length;
}
xw.WriteValue(str);
recCount++;
xw.WriteEndElement();
}
// <--------- END YOUR CODE
xw.WriteWhitespace("\r\n");
xw.WriteEndElement();
}
}
var lines = new List<string>();
using (var sr = new StreamReader(fileName))
{
// Now we read the first lines to modify them (normally we will
// read three lines, the xml header, the <Table element and the
// <-- Reserved space:
while (true)
{
string str = sr.ReadLine();
lines.Add(str);
if (str.StartsWith("<Table"))
{
// We read the next line, the comment line
str = sr.ReadLine();
lines.Add(str);
break;
}
}
}
// We have to use the Stream overload of StreamWriter because
// we want to modify the text!
using (var fs = File.OpenWrite(fileName))
using (var sw = new StreamWriter(fs))
{
string strCount = XmlConvert.ToString(recCount);
string strMaxRecLength = XmlConvert.ToString(maxRecLength);
// We do some replaces for the tokens
int oldLen = lines[lines.Count - 2].Length;
lines[lines.Count - 2] = lines[lines.Count - 2].Replace("=\"\"", string.Format("=\"{0}\"", strCount));
lines[lines.Count - 2] = lines[lines.Count - 2].Replace("=\"\"", string.Format("=\"{0}\"", strMaxRecLength));
int newLen = lines[lines.Count - 2].Length;
// Remove spaces from reserved whitespace
lines[lines.Count - 1] = lines[lines.Count - 1].Replace(":" + reservedSpace, ":" + new string('#', reservedSpace.Length - newLen + oldLen));
// We move back to just after the UTF8/UTF16 preamble
sw.BaseStream.Position = sw.Encoding.GetPreamble().Length;
// And we rewrite the lines
foreach (string str in lines)
{
sw.Write(str);
sw.Write("\r\n");
}
}
尝试使用以下方法。
您可以将默认值设置为外部 xml 架构中的属性。
创建 xml 文档时,您不会创建这些属性。在这里:
int count = 5;
int length = 42;
var writerSettings = new XmlWriterSettings { Indent = true };
using (var writer = XmlWriter.Create("data.xml", writerSettings))
{
writer.WriteStartElement("Table");
for (int i = 1; i <= count; i++)
{
writer.WriteStartElement("Rec");
writer.WriteAttributeString("recId", i.ToString());
writer.WriteString("..");
writer.WriteEndElement();
}
}
因此,xml 看起来像这样:
<?xml version="1.0" encoding="utf-8"?>
<Table>
<Rec recId="1">..</Rec>
<Rec recId="2">..</Rec>
<Rec recId="3">..</Rec>
<Rec recId="4">..</Rec>
<Rec recId="5">..</Rec>
</Table>
现在为此文档创建一个 xml 架构,它将为所需的属性指定默认值。
string ns = "http://www.w3.org/2001/XMLSchema";
using (var writer = XmlWriter.Create("data.xsd", writerSettings))
{
writer.WriteStartElement("xs", "schema", ns);
writer.WriteStartElement("xs", "element", ns);
writer.WriteAttributeString("name", "Table");
writer.WriteStartElement("xs", "complexType", ns);
writer.WriteStartElement("xs", "sequence", ns);
writer.WriteStartElement("xs", "any", ns);
writer.WriteAttributeString("processContents", "skip");
writer.WriteAttributeString("maxOccurs", "unbounded");
writer.WriteEndElement();
writer.WriteEndElement();
writer.WriteStartElement("xs", "attribute", ns);
writer.WriteAttributeString("name", "recCount");
writer.WriteAttributeString("default", count.ToString()); // <--
writer.WriteEndElement();
writer.WriteStartElement("xs", "attribute", ns);
writer.WriteAttributeString("name", "recLength");
writer.WriteAttributeString("default", length.ToString()); // <--
writer.WriteEndElement();
}
或者更容易创建如下模式:
XNamespace xs = "http://www.w3.org/2001/XMLSchema";
var schema = new XElement(xs + "schema",
new XElement(xs + "element", new XAttribute("name", "Table"),
new XElement(xs + "complexType",
new XElement(xs + "sequence",
new XElement(xs + "any",
new XAttribute("processContents", "skip"),
new XAttribute("maxOccurs", "unbounded")
)
),
new XElement(xs + "attribute",
new XAttribute("name", "recCount"),
new XAttribute("default", count) // <--
),
new XElement(xs + "attribute",
new XAttribute("name", "recLength"),
new XAttribute("default", length) // <--
)
)
)
);
schema.Save("data.xsd");
请注意变量 count
和 length
的写法 - 应该有您的数据。
生成的架构如下所示:
<?xml version="1.0" encoding="utf-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="Table">
<xs:complexType>
<xs:sequence>
<xs:any processContents="skip" maxOccurs="unbounded" />
</xs:sequence>
<xs:attribute name="recCount" default="5" />
<xs:attribute name="recLength" default="42" />
</xs:complexType>
</xs:element>
</xs:schema>
现在,当阅读 xml 文档时,您必须添加此架构 - 默认属性值将从中获取。
XElement xml;
var readerSettings = new XmlReaderSettings();
readerSettings.ValidationType = ValidationType.Schema; // <--
readerSettings.Schemas.Add("", "data.xsd"); // <--
using (var reader = XmlReader.Create("data.xml", readerSettings)) // <--
{
xml = XElement.Load(reader);
}
xml.Save(Console.Out);
Console.WriteLine();
结果:
<Table recCount="5" recLength="42">
<Rec recId="1">..</Rec>
<Rec recId="2">..</Rec>
<Rec recId="3">..</Rec>
<Rec recId="4">..</Rec>
<Rec recId="5">..</Rec>
</Table>
我正在使用 XmlWriter 和 Linq2Xml 制作一些巨大的 XML 文件(几 GB)。 此文件的类型:
<Table recCount="" recLength="">
<Rec recId="1">..</Rec>
<Rec recId="2">..</Rec>
..
<Rec recId="n">..</Rec>
</Table>
我不知道 Table's recCount 和 recLength[=28 的值=] 属性,直到我写入所有内部 Rec 节点,所以我必须在最后为这些属性写入值。
现在我正在将所有内部 Rec 节点写入临时文件,计算 Table's attributes' 值并按照我上面显示的方式将所有内容写入结果文件。 (从具有所有 Rec 节点的临时文件中复制所有内容)
我想知道是否有一种方法可以修改这些属性的值而无需将内容写入另一个文件(就像我现在这样做)或将整个文档加载到内存中(这显然是由于这些文件的大小不可能)?
您可以尝试将 xml 文件加载到数据集中,因为这样会更容易计算您的属性。此外,内存管理由 DataSet 层完成。为什么不试一试,让我们也知道结果。
我认为 FileStream Class 会对您有所帮助。查看 Read 和 Write 方法。
大量注释代码。基本思想是在第一遍中我们写:
<?xml version="1.0" encoding="utf-8"?>
<Table recCount="" recLength="">
<!--Reserved space:++++++++++++++++-->
<Rec...
然后我们回到文件开头重写前三行:
<?xml version="1.0" encoding="utf-8"?>
<Table recCount="1000" recLength="150">
<!--Reserved space:#############-->
这里重要的"trick"是你不能"insert"进入一个文件,你只能覆盖它。所以我们 "reserve" 一些 space 用于数字(Reserved space:#############.
注释。我们可以通过多种方式完成它...例如,在第一遍中我们可以:
<Table recCount=" " recLength=" ">
然后(xml-合法但丑陋):
<Table recCount="1000 " recLength="150 ">
或者我们可以在 Table 的 >
之后追加 space :
<Table recCount="" recLength="">
(在>
之后有20space)
然后:
<Table recCount="1000" recLength="150">
(现在>
之后有13个space)
或者我们可以简单地在新行上添加 space 而没有 <!-- -->
...
代码:
int maxRecCountLength = 10; // int.MaxValue.ToString().Length
int maxRecLengthLength = 10; // int.MaxValue.ToString().Length
int tokenLength = 4; // 4 == + , see below what and are
// Note that the reserved space will be in the form +++++++++++++++++++
string reservedSpace = new string('+', maxRecCountLength + maxRecLengthLength - tokenLength);
// You have to manually open the FileStream
using (var fs = new FileStream("out.xml", FileMode.Create))
// and add a StreamWriter on top of it
using (var sw = new StreamWriter(fs, Encoding.UTF8, 4096, true))
{
// Here you write on your StreamWriter however you want.
// Note that recCount and recLength have a placeholder and .
int recCount = 0;
int maxRecLength = 0;
using (var xw = XmlWriter.Create(sw))
{
xw.WriteWhitespace("\r\n");
xw.WriteStartElement("Table");
xw.WriteAttributeString("recCount", "");
xw.WriteAttributeString("recLength", "");
// You have to add some white space that will be
// partially replaced by the recCount and recLength value
xw.WriteWhitespace("\r\n");
xw.WriteComment("Reserved space:" + reservedSpace);
// <--------- BEGIN YOUR CODE
for (int i = 0; i < 100; i++)
{
xw.WriteWhitespace("\r\n");
xw.WriteStartElement("Rec");
string str = string.Format("Some number: {0}", i);
if (str.Length > maxRecLength)
{
maxRecLength = str.Length;
}
xw.WriteValue(str);
recCount++;
xw.WriteEndElement();
}
// <--------- END YOUR CODE
xw.WriteWhitespace("\r\n");
xw.WriteEndElement();
}
sw.Flush();
// Now we read the first lines to modify them (normally we will
// read three lines, the xml header, the <Table element and the
// <-- Reserved space:
fs.Position = 0;
var lines = new List<string>();
using (var sr = new StreamReader(fs, sw.Encoding, false, 4096, true))
{
while (true)
{
string str = sr.ReadLine();
lines.Add(str);
if (str.StartsWith("<Table"))
{
// We read the next line, the comment line
str = sr.ReadLine();
lines.Add(str);
break;
}
}
}
string strCount = XmlConvert.ToString(recCount);
string strMaxRecLength = XmlConvert.ToString(maxRecLength);
// We do some replaces for the tokens
int oldLen = lines[lines.Count - 2].Length;
lines[lines.Count - 2] = lines[lines.Count - 2].Replace("=\"\"", string.Format("=\"{0}\"", strCount));
lines[lines.Count - 2] = lines[lines.Count - 2].Replace("=\"\"", string.Format("=\"{0}\"", strMaxRecLength));
int newLen = lines[lines.Count - 2].Length;
// Remove spaces from reserved whitespace
lines[lines.Count - 1] = lines[lines.Count - 1].Replace(":" + reservedSpace, ":" + new string('#', reservedSpace.Length - newLen + oldLen));
// We move back to just after the UTF8/UTF16 preamble
fs.Position = sw.Encoding.GetPreamble().Length;
// And we rewrite the lines
foreach (string str in lines)
{
sw.Write(str);
sw.Write("\r\n");
}
}
较慢的 .NET 3.5 方式
在 .NET 3.5 中,StreamReader
/StreamWriter
想要关闭基础 FileStream
,所以我不得不多次重新打开文件。这个有点慢。
int maxRecCountLength = 10; // int.MaxValue.ToString().Length
int maxRecLengthLength = 10; // int.MaxValue.ToString().Length
int tokenLength = 4; // 4 == + , see below what and are
// Note that the reserved space will be in the form +++++++++++++++++++
string reservedSpace = new string('+', maxRecCountLength + maxRecLengthLength - tokenLength);
string fileName = "out.xml";
int recCount = 0;
int maxRecLength = 0;
using (var sw = new StreamWriter(fileName))
{
// Here you write on your StreamWriter however you want.
// Note that recCount and recLength have a placeholder and .
using (var xw = XmlWriter.Create(sw))
{
xw.WriteWhitespace("\r\n");
xw.WriteStartElement("Table");
xw.WriteAttributeString("recCount", "");
xw.WriteAttributeString("recLength", "");
// You have to add some white space that will be
// partially replaced by the recCount and recLength value
xw.WriteWhitespace("\r\n");
xw.WriteComment("Reserved space:" + reservedSpace);
// <--------- BEGIN YOUR CODE
for (int i = 0; i < 100; i++)
{
xw.WriteWhitespace("\r\n");
xw.WriteStartElement("Rec");
string str = string.Format("Some number: {0}", i);
if (str.Length > maxRecLength)
{
maxRecLength = str.Length;
}
xw.WriteValue(str);
recCount++;
xw.WriteEndElement();
}
// <--------- END YOUR CODE
xw.WriteWhitespace("\r\n");
xw.WriteEndElement();
}
}
var lines = new List<string>();
using (var sr = new StreamReader(fileName))
{
// Now we read the first lines to modify them (normally we will
// read three lines, the xml header, the <Table element and the
// <-- Reserved space:
while (true)
{
string str = sr.ReadLine();
lines.Add(str);
if (str.StartsWith("<Table"))
{
// We read the next line, the comment line
str = sr.ReadLine();
lines.Add(str);
break;
}
}
}
// We have to use the Stream overload of StreamWriter because
// we want to modify the text!
using (var fs = File.OpenWrite(fileName))
using (var sw = new StreamWriter(fs))
{
string strCount = XmlConvert.ToString(recCount);
string strMaxRecLength = XmlConvert.ToString(maxRecLength);
// We do some replaces for the tokens
int oldLen = lines[lines.Count - 2].Length;
lines[lines.Count - 2] = lines[lines.Count - 2].Replace("=\"\"", string.Format("=\"{0}\"", strCount));
lines[lines.Count - 2] = lines[lines.Count - 2].Replace("=\"\"", string.Format("=\"{0}\"", strMaxRecLength));
int newLen = lines[lines.Count - 2].Length;
// Remove spaces from reserved whitespace
lines[lines.Count - 1] = lines[lines.Count - 1].Replace(":" + reservedSpace, ":" + new string('#', reservedSpace.Length - newLen + oldLen));
// We move back to just after the UTF8/UTF16 preamble
sw.BaseStream.Position = sw.Encoding.GetPreamble().Length;
// And we rewrite the lines
foreach (string str in lines)
{
sw.Write(str);
sw.Write("\r\n");
}
}
尝试使用以下方法。
您可以将默认值设置为外部 xml 架构中的属性。
创建 xml 文档时,您不会创建这些属性。在这里:
int count = 5;
int length = 42;
var writerSettings = new XmlWriterSettings { Indent = true };
using (var writer = XmlWriter.Create("data.xml", writerSettings))
{
writer.WriteStartElement("Table");
for (int i = 1; i <= count; i++)
{
writer.WriteStartElement("Rec");
writer.WriteAttributeString("recId", i.ToString());
writer.WriteString("..");
writer.WriteEndElement();
}
}
因此,xml 看起来像这样:
<?xml version="1.0" encoding="utf-8"?>
<Table>
<Rec recId="1">..</Rec>
<Rec recId="2">..</Rec>
<Rec recId="3">..</Rec>
<Rec recId="4">..</Rec>
<Rec recId="5">..</Rec>
</Table>
现在为此文档创建一个 xml 架构,它将为所需的属性指定默认值。
string ns = "http://www.w3.org/2001/XMLSchema";
using (var writer = XmlWriter.Create("data.xsd", writerSettings))
{
writer.WriteStartElement("xs", "schema", ns);
writer.WriteStartElement("xs", "element", ns);
writer.WriteAttributeString("name", "Table");
writer.WriteStartElement("xs", "complexType", ns);
writer.WriteStartElement("xs", "sequence", ns);
writer.WriteStartElement("xs", "any", ns);
writer.WriteAttributeString("processContents", "skip");
writer.WriteAttributeString("maxOccurs", "unbounded");
writer.WriteEndElement();
writer.WriteEndElement();
writer.WriteStartElement("xs", "attribute", ns);
writer.WriteAttributeString("name", "recCount");
writer.WriteAttributeString("default", count.ToString()); // <--
writer.WriteEndElement();
writer.WriteStartElement("xs", "attribute", ns);
writer.WriteAttributeString("name", "recLength");
writer.WriteAttributeString("default", length.ToString()); // <--
writer.WriteEndElement();
}
或者更容易创建如下模式:
XNamespace xs = "http://www.w3.org/2001/XMLSchema";
var schema = new XElement(xs + "schema",
new XElement(xs + "element", new XAttribute("name", "Table"),
new XElement(xs + "complexType",
new XElement(xs + "sequence",
new XElement(xs + "any",
new XAttribute("processContents", "skip"),
new XAttribute("maxOccurs", "unbounded")
)
),
new XElement(xs + "attribute",
new XAttribute("name", "recCount"),
new XAttribute("default", count) // <--
),
new XElement(xs + "attribute",
new XAttribute("name", "recLength"),
new XAttribute("default", length) // <--
)
)
)
);
schema.Save("data.xsd");
请注意变量 count
和 length
的写法 - 应该有您的数据。
生成的架构如下所示:
<?xml version="1.0" encoding="utf-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="Table">
<xs:complexType>
<xs:sequence>
<xs:any processContents="skip" maxOccurs="unbounded" />
</xs:sequence>
<xs:attribute name="recCount" default="5" />
<xs:attribute name="recLength" default="42" />
</xs:complexType>
</xs:element>
</xs:schema>
现在,当阅读 xml 文档时,您必须添加此架构 - 默认属性值将从中获取。
XElement xml;
var readerSettings = new XmlReaderSettings();
readerSettings.ValidationType = ValidationType.Schema; // <--
readerSettings.Schemas.Add("", "data.xsd"); // <--
using (var reader = XmlReader.Create("data.xml", readerSettings)) // <--
{
xml = XElement.Load(reader);
}
xml.Save(Console.Out);
Console.WriteLine();
结果:
<Table recCount="5" recLength="42">
<Rec recId="1">..</Rec>
<Rec recId="2">..</Rec>
<Rec recId="3">..</Rec>
<Rec recId="4">..</Rec>
<Rec recId="5">..</Rec>
</Table>