使用 XmlReader 读取文件时更新 XLSX 文件更改
Update XLSX file changes whilst reading the file with XmlReader
我们有一个代码正在将 Excel XLSX 文档加载到内存中,对其进行一些修改并将其保存回来。
XmlDocument doc = new XmlDocument();
doc.Load(pp.GetStream());
XmlNode rootNode = doc.DocumentElement;
if (rootNode == null) return;
ProcessNode(rootNode);
if (this.fileModified)
{
doc.Save(pp.GetStream(FileMode.Create, FileAccess.Write));
}
这对于小文件运行良好,但对于一些大的 Excel 文件会抛出 OutOfMemory 异常。所以我们决定换一种方式,使用XmlReader
class 不一次性把文件加载到内存中。
PackagePartCollection ppc = this.Package.GetParts();
foreach (PackagePart pp in ppc)
{
if (!this.xmlContentTypesXlsx.Contains(pp.ContentType)) continue;
using (XmlReader reader = XmlReader.Create(pp.GetStream()))
{
reader.MoveToContent();
while (reader.EOF == false)
{
XmlDocument doc;
XmlNode rootNode;
if (reader.NodeType == XmlNodeType.Element && reader.Name == "hyperlinks")
{
doc = new XmlDocument();
rootNode = doc.ReadNode(reader);
if (rootNode != null)
{
doc.AppendChild(rootNode);
ProcessNode(rootNode); // how can I save updated changes back to the file?
}
}
else if (reader.NodeType == XmlNodeType.Element && reader.Name == "row")
{
doc = new XmlDocument();
rootNode = doc.ReadNode(reader);
if (rootNode != null)
{
doc.AppendChild(rootNode);
ProcessNode(rootNode); // how can I save updated changes back to the file?
}
}
else
{
reader.Read();
}
}
}
}
这会逐个节点读取文件并处理我们需要的节点(并更改那里的一些值)。但是,我不确定如何将这些值更新回原始 Excel 文件。
我尝试将 XmlWriter
与 XmlReader
一起使用,但无法正常工作。有什么想法吗?
更新:
我尝试使用评论部分@dbc 的建议,但对我来说似乎太慢了。它可能不会为大文件抛出 OutOfMemory 异常,但处理将永远进行。
PackagePartCollection ppc = this.Package.GetParts();
foreach (PackagePart pp in ppc)
{
if (!this.xmlContentTypesXlsx.Contains(pp.ContentType)) continue;
StringBuilder strBuilder = new StringBuilder();
using (XmlReader reader = XmlReader.Create(pp.GetStream()))
{
using (XmlWriter writer = this.Package.FileOpenAccess == FileAccess.ReadWrite ? XmlWriter.Create(strBuilder) : null)
{
reader.MoveToContent();
while (reader.EOF == false)
{
XmlDocument doc;
XmlNode rootNode;
if (reader.NodeType == XmlNodeType.Element && reader.Name == "hyperlinks")
{
doc = new XmlDocument();
rootNode = doc.ReadNode(reader);
if (rootNode != null)
{
doc.AppendChild(rootNode);
ProcessNode(rootNode);
writer?.WriteRaw(rootNode.OuterXml);
}
}
else if (reader.NodeType == XmlNodeType.Element && reader.Name == "row")
{
doc = new XmlDocument();
rootNode = doc.ReadNode(reader);
if (rootNode != null)
{
doc.AppendChild(rootNode);
ProcessNode(rootNode);
writer?.WriteRaw(rootNode.OuterXml);
}
}
else
{
WriteShallowNode(writer, reader); // Used from the @dbc's suggested Whosebug answers
reader.Read();
}
}
writer?.Flush();
}
}
}
注意 1:我正在使用 StringBuilder 进行测试,但最终计划切换到临时文件。
注 2:我尝试在每 100 个元素后刷新 XmlWriter,但它仍然很慢。
有什么想法吗?
试试看。我已经使用了很长时间的大型 xml 文件,这些文件会导致内存不足
using (XmlReader reader = XmlReader.Create("File Stream", readerSettings))
{
while (!reader.EOF)
{
if (reader.Name != "row")
{
reader.ReadToFollowing("row");
}
if (!reader.EOF)
{
XElement row = (XElement)XElement.ReadFrom(reader);
}
}
}
}
我在@dbc 的帮助下做了一些修改,现在它可以正常工作了。
PackagePartCollection ppc = this.Package.GetParts();
foreach (PackagePart pp in ppc)
{
try
{
if (!this.xmlContentTypesXlsx.Contains(pp.ContentType)) continue;
string tempFilePath = GetTempFilePath();
using (XmlReader reader = XmlReader.Create(pp.GetStream()))
{
using (XmlWriter writer = this.Package.FileOpenAccess == FileAccess.ReadWrite ? XmlWriter.Create(tempFilePath) : null)
{
while (reader.EOF == false)
{
if (reader.NodeType == XmlNodeType.Element && reader.Name == "hyperlinks")
{
XmlDocument doc = new XmlDocument();
XmlNode rootNode = doc.ReadNode(reader);
if (rootNode != null)
{
ProcessNode(rootNode);
if (writer != null)
{
rootNode.WriteTo(writer);
}
}
}
else if (reader.NodeType == XmlNodeType.Element && reader.Name == "row")
{
XmlDocument doc = new XmlDocument();
XmlNode rootNode = doc.ReadNode(reader);
if (rootNode != null)
{
ProcessNode(rootNode);
if (writer != null)
{
rootNode.WriteTo(writer);
}
}
}
else
{
WriteShallowNode(writer, reader); // Used from the @dbc's suggested Whosebug answers
reader.Read();
}
}
}
}
if (this.packageChanged) // is being set in ProcessNode method
{
this.packageChanged = false;
using (var tempFile = File.OpenRead(tempFilePath))
{
tempFile.CopyTo(pp.GetStream(FileMode.Create, FileAccess.Write));
}
}
}
catch (OutOfMemoryException)
{
throw;
}
catch (Exception ex)
{
Log.Exception(ex, @"Failed to process a file."); // our inner log method
}
finally
{
if (!string.IsNullOrWhiteSpace(tempFilePath))
{
// Delete temp file
}
}
}
我们有一个代码正在将 Excel XLSX 文档加载到内存中,对其进行一些修改并将其保存回来。
XmlDocument doc = new XmlDocument();
doc.Load(pp.GetStream());
XmlNode rootNode = doc.DocumentElement;
if (rootNode == null) return;
ProcessNode(rootNode);
if (this.fileModified)
{
doc.Save(pp.GetStream(FileMode.Create, FileAccess.Write));
}
这对于小文件运行良好,但对于一些大的 Excel 文件会抛出 OutOfMemory 异常。所以我们决定换一种方式,使用XmlReader
class 不一次性把文件加载到内存中。
PackagePartCollection ppc = this.Package.GetParts();
foreach (PackagePart pp in ppc)
{
if (!this.xmlContentTypesXlsx.Contains(pp.ContentType)) continue;
using (XmlReader reader = XmlReader.Create(pp.GetStream()))
{
reader.MoveToContent();
while (reader.EOF == false)
{
XmlDocument doc;
XmlNode rootNode;
if (reader.NodeType == XmlNodeType.Element && reader.Name == "hyperlinks")
{
doc = new XmlDocument();
rootNode = doc.ReadNode(reader);
if (rootNode != null)
{
doc.AppendChild(rootNode);
ProcessNode(rootNode); // how can I save updated changes back to the file?
}
}
else if (reader.NodeType == XmlNodeType.Element && reader.Name == "row")
{
doc = new XmlDocument();
rootNode = doc.ReadNode(reader);
if (rootNode != null)
{
doc.AppendChild(rootNode);
ProcessNode(rootNode); // how can I save updated changes back to the file?
}
}
else
{
reader.Read();
}
}
}
}
这会逐个节点读取文件并处理我们需要的节点(并更改那里的一些值)。但是,我不确定如何将这些值更新回原始 Excel 文件。
我尝试将 XmlWriter
与 XmlReader
一起使用,但无法正常工作。有什么想法吗?
更新:
我尝试使用评论部分@dbc 的建议,但对我来说似乎太慢了。它可能不会为大文件抛出 OutOfMemory 异常,但处理将永远进行。
PackagePartCollection ppc = this.Package.GetParts();
foreach (PackagePart pp in ppc)
{
if (!this.xmlContentTypesXlsx.Contains(pp.ContentType)) continue;
StringBuilder strBuilder = new StringBuilder();
using (XmlReader reader = XmlReader.Create(pp.GetStream()))
{
using (XmlWriter writer = this.Package.FileOpenAccess == FileAccess.ReadWrite ? XmlWriter.Create(strBuilder) : null)
{
reader.MoveToContent();
while (reader.EOF == false)
{
XmlDocument doc;
XmlNode rootNode;
if (reader.NodeType == XmlNodeType.Element && reader.Name == "hyperlinks")
{
doc = new XmlDocument();
rootNode = doc.ReadNode(reader);
if (rootNode != null)
{
doc.AppendChild(rootNode);
ProcessNode(rootNode);
writer?.WriteRaw(rootNode.OuterXml);
}
}
else if (reader.NodeType == XmlNodeType.Element && reader.Name == "row")
{
doc = new XmlDocument();
rootNode = doc.ReadNode(reader);
if (rootNode != null)
{
doc.AppendChild(rootNode);
ProcessNode(rootNode);
writer?.WriteRaw(rootNode.OuterXml);
}
}
else
{
WriteShallowNode(writer, reader); // Used from the @dbc's suggested Whosebug answers
reader.Read();
}
}
writer?.Flush();
}
}
}
注意 1:我正在使用 StringBuilder 进行测试,但最终计划切换到临时文件。 注 2:我尝试在每 100 个元素后刷新 XmlWriter,但它仍然很慢。
有什么想法吗?
试试看。我已经使用了很长时间的大型 xml 文件,这些文件会导致内存不足
using (XmlReader reader = XmlReader.Create("File Stream", readerSettings))
{
while (!reader.EOF)
{
if (reader.Name != "row")
{
reader.ReadToFollowing("row");
}
if (!reader.EOF)
{
XElement row = (XElement)XElement.ReadFrom(reader);
}
}
}
}
我在@dbc 的帮助下做了一些修改,现在它可以正常工作了。
PackagePartCollection ppc = this.Package.GetParts();
foreach (PackagePart pp in ppc)
{
try
{
if (!this.xmlContentTypesXlsx.Contains(pp.ContentType)) continue;
string tempFilePath = GetTempFilePath();
using (XmlReader reader = XmlReader.Create(pp.GetStream()))
{
using (XmlWriter writer = this.Package.FileOpenAccess == FileAccess.ReadWrite ? XmlWriter.Create(tempFilePath) : null)
{
while (reader.EOF == false)
{
if (reader.NodeType == XmlNodeType.Element && reader.Name == "hyperlinks")
{
XmlDocument doc = new XmlDocument();
XmlNode rootNode = doc.ReadNode(reader);
if (rootNode != null)
{
ProcessNode(rootNode);
if (writer != null)
{
rootNode.WriteTo(writer);
}
}
}
else if (reader.NodeType == XmlNodeType.Element && reader.Name == "row")
{
XmlDocument doc = new XmlDocument();
XmlNode rootNode = doc.ReadNode(reader);
if (rootNode != null)
{
ProcessNode(rootNode);
if (writer != null)
{
rootNode.WriteTo(writer);
}
}
}
else
{
WriteShallowNode(writer, reader); // Used from the @dbc's suggested Whosebug answers
reader.Read();
}
}
}
}
if (this.packageChanged) // is being set in ProcessNode method
{
this.packageChanged = false;
using (var tempFile = File.OpenRead(tempFilePath))
{
tempFile.CopyTo(pp.GetStream(FileMode.Create, FileAccess.Write));
}
}
}
catch (OutOfMemoryException)
{
throw;
}
catch (Exception ex)
{
Log.Exception(ex, @"Failed to process a file."); // our inner log method
}
finally
{
if (!string.IsNullOrWhiteSpace(tempFilePath))
{
// Delete temp file
}
}
}