使用 XmlReader 读取文件时更新 XLSX 文件更改

Update XLSX file changes whilst reading the file with XmlReader

我们有一个代码正在将 Excel XLSX 文档加载到内存中,对其进行一些修改并将其保存回来。

XmlDocument doc = new XmlDocument();
doc.Load(pp.GetStream());
XmlNode rootNode = doc.DocumentElement;

if (rootNode == null) return;
ProcessNode(rootNode);

if (this.fileModified)
{
    doc.Save(pp.GetStream(FileMode.Create, FileAccess.Write));
}

这对于小文件运行良好,但对于一些大的 Excel 文件会抛出 OutOfMemory 异常。所以我们决定换一种方式,使用XmlReader class 不一次性把文件加载到内存中。

PackagePartCollection ppc = this.Package.GetParts();
foreach (PackagePart pp in ppc)
{
     if (!this.xmlContentTypesXlsx.Contains(pp.ContentType)) continue;

     using (XmlReader reader = XmlReader.Create(pp.GetStream()))
     {
          reader.MoveToContent();
          while (reader.EOF == false)
          {
             XmlDocument doc;
             XmlNode rootNode;
             if (reader.NodeType == XmlNodeType.Element && reader.Name == "hyperlinks")
             {
                   doc = new XmlDocument();
                   rootNode = doc.ReadNode(reader);
                   if (rootNode != null)
                   {
                        doc.AppendChild(rootNode);
                        ProcessNode(rootNode);  // how can I save updated changes back to the file?
                   }
              }
              else if (reader.NodeType == XmlNodeType.Element && reader.Name == "row")
              {
                    doc = new XmlDocument();
                    rootNode = doc.ReadNode(reader);

                    if (rootNode != null)
                    {
                        doc.AppendChild(rootNode);
                        ProcessNode(rootNode); // how can I save updated changes back to the file?
                    }
              }
              else
              {
                    reader.Read();
              }
          }
     }
}

这会逐个节点读取文件并处理我们需要的节点(并更改那里的一些值)。但是,我不确定如何将这些值更新回原始 Excel 文件。 我尝试将 XmlWriterXmlReader 一起使用,但无法正常工作。有什么想法吗?

更新:

我尝试使用评论部分@dbc 的建议,但对我来说似乎太慢了。它可能不会为大文件抛出 OutOfMemory 异常,但处理将永远进行。

PackagePartCollection ppc = this.Package.GetParts();
foreach (PackagePart pp in ppc)
{
     if (!this.xmlContentTypesXlsx.Contains(pp.ContentType)) continue;

     StringBuilder strBuilder = new StringBuilder();
     
     using (XmlReader reader = XmlReader.Create(pp.GetStream()))
     {
        using (XmlWriter writer = this.Package.FileOpenAccess == FileAccess.ReadWrite ? XmlWriter.Create(strBuilder) : null)
        {
          reader.MoveToContent();
          while (reader.EOF == false)
          {
             XmlDocument doc;
             XmlNode rootNode;
             if (reader.NodeType == XmlNodeType.Element && reader.Name == "hyperlinks")
             {
                   doc = new XmlDocument();
                   rootNode = doc.ReadNode(reader);
                   if (rootNode != null)
                   {
                        doc.AppendChild(rootNode);
                        ProcessNode(rootNode);
                        writer?.WriteRaw(rootNode.OuterXml);
                   }
              }
              else if (reader.NodeType == XmlNodeType.Element && reader.Name == "row")
              {
                    doc = new XmlDocument();
                    rootNode = doc.ReadNode(reader);

                    if (rootNode != null)
                    {
                        doc.AppendChild(rootNode);
                        ProcessNode(rootNode);
                        writer?.WriteRaw(rootNode.OuterXml);
                    }
              }
              else
              {
                    WriteShallowNode(writer, reader); // Used from the @dbc's suggested Whosebug answers
                    reader.Read();
              }
            }

            writer?.Flush();
         }
      }
}

注意 1:我正在使用 StringBuilder 进行测试,但最终计划切换到临时文件。 注 2:我尝试在每 100 个元素后刷新 XmlWriter,但它仍然很慢。

有什么想法吗?

试试看。我已经使用了很长时间的大型 xml 文件,这些文件会导致内存不足

           using (XmlReader reader = XmlReader.Create("File Stream", readerSettings))
            {
                while (!reader.EOF)
                {
                    if (reader.Name != "row")
                    {
                        reader.ReadToFollowing("row");

                    }
                    if (!reader.EOF)
                    {
                        XElement row = (XElement)XElement.ReadFrom(reader);
                    }
                }
              }
            }

我在@dbc 的帮助下做了一些修改,现在它可以正常工作了。

PackagePartCollection ppc = this.Package.GetParts();
foreach (PackagePart pp in ppc)
{
  try
  {
     if (!this.xmlContentTypesXlsx.Contains(pp.ContentType)) continue;

     string tempFilePath = GetTempFilePath();
     
     using (XmlReader reader = XmlReader.Create(pp.GetStream()))
     {
        using (XmlWriter writer = this.Package.FileOpenAccess == FileAccess.ReadWrite ? XmlWriter.Create(tempFilePath) : null)
        {
          while (reader.EOF == false)
          {
             if (reader.NodeType == XmlNodeType.Element && reader.Name == "hyperlinks")
             {
                   XmlDocument doc = new XmlDocument();
                   XmlNode rootNode = doc.ReadNode(reader);
                   if (rootNode != null)
                   {
                        ProcessNode(rootNode);
                        if (writer != null)
                        {
                            rootNode.WriteTo(writer);
                        }
                   }
              }
              else if (reader.NodeType == XmlNodeType.Element && reader.Name == "row")
              {
                    XmlDocument doc = new XmlDocument();
                    XmlNode rootNode = doc.ReadNode(reader);

                    if (rootNode != null)
                    {
                        ProcessNode(rootNode);
                        if (writer != null)
                        {
                            rootNode.WriteTo(writer);
                        }
                    }
              }
              else
              {
                    WriteShallowNode(writer, reader); // Used from the @dbc's suggested Whosebug answers
                    reader.Read();
              }
            }
         }
      }


      if (this.packageChanged) // is being set in ProcessNode method
      {
          this.packageChanged = false;

          using (var tempFile = File.OpenRead(tempFilePath))
          {
               tempFile.CopyTo(pp.GetStream(FileMode.Create, FileAccess.Write));
          }
       }
   }
   catch (OutOfMemoryException)
   {
        throw;
   }
   catch (Exception ex)
   {
      Log.Exception(ex, @"Failed to process a file."); // our inner log method
   }
   finally
   {
       if (!string.IsNullOrWhiteSpace(tempFilePath))
       {
            // Delete temp file
       }
   }
}