在 C# 中使用 Xml 文档取消注释 XML 的片段
Uncommenting fragment of XML with Xml Document in C#
取消注释 XML 中某些节点的正文的最简单方法是什么?元素有唯一的名字,文件结构如下:
somefile.xml
<?xml version="1.0"?>
<name1>
<irrelevant1>
<irrelevant2>
<!--
<irrelevant3 />
-->
</irrelevant2>
</irrelevant1>
<name2>
<name3>
<!--
<name4 field="The" />
<name4 field="Owls" />
<name4 field="Are />
<name4 field="Not" />
<name4 field="What" />
<name4 field="They" />
<name4 field="Seem />
-->
</name3>
</name2>
</name1>
目标应该如下所示,删除了注释:
uncommented.xml
<?xml version="1.0"?>
<name1>
<irrelevant1>
<irrelevant2>
<!--
<irrelevant3 />
-->
</irrelevant2>
</irrelevant1>
<name2>
<name3>
<name4 field="The" />
<name4 field="Owls" />
<name4 field="Are />
<name4 field="Not" />
<name4 field="What" />
<name4 field="They" />
<name4 field="Seem />
</name3>
</name2>
</name1>
我的解析方法:
XmlDocument xdoc = new XmlDocument();
xdoc.Load(@"C:\somefile.xml");
XmlNodeList nl = xdoc.GetElementsByTagName("name2");
XmlNode xn = nl[0];
string xn_content = xn.InnerXml;
xn_content = Regex.Replace(xn_content, "<!--|-->", String.Empty);
XmlDocument doc = new XmlDocument();
doc.LoadXml(xn_content);
XmlNode newNode = doc.DocumentElement;
// this import doesn't really help
xdoc.ImportNode(newNode, true);
xn.RemoveAll();
xn.AppendChild(newNode);
xdoc.Save(@"C:\uncommented.xml");
带有 ArgumentException 的结果:
{"The node to be inserted is from a different document context."}
您的直接问题是您调用了 XmlDocument.ImportNode()
但没有使用返回的节点。你需要做 newNode = xDoc.ImportNode(newNode, true);
.
但是,更简洁的方法是完全避免 Regex
解析。相反,下降 XmlNode
层次结构,挑选出 XmlComment
nodes you wish to uncomment, load their InnerText
into an XmlDocumentFragment
,然后将其新创建的子节点添加到评论的父节点:
public static class XmlNodeExtensions
{
public static XmlDocument Document(this XmlNode node)
{
for (; node != null; node = node.ParentNode)
{
var doc = node as XmlDocument;
if (doc != null)
return doc;
}
return null;
}
public static IEnumerable<XmlNode> AncestorsAndSelf(this XmlNode node)
{
for (; node != null; node = node.ParentNode)
yield return node;
}
public static IEnumerable<XmlNode> DescendantsAndSelf(this XmlNode root)
{
if (root == null)
yield break;
yield return root;
foreach (var child in root.ChildNodes.Cast<XmlNode>())
foreach (var subChild in child.DescendantsAndSelf())
yield return subChild;
}
public static void UncommentXmlNodes(IEnumerable<XmlComment> comments)
{
foreach (var comment in comments.ToList())
UncommentXmlNode(comment);
}
public static void UncommentXmlNode(XmlComment comment)
{
if (comment == null)
throw new NullReferenceException();
var doc = comment.Document();
if (doc == null)
throw new InvalidOperationException();
var parent = comment.ParentNode;
var innerText = comment.InnerText;
XmlDocumentFragment docFrag = doc.CreateDocumentFragment();
//Set the contents of the document fragment.
docFrag.InnerXml = innerText;
XmlNode insertAfter = comment;
foreach (var child in docFrag.ChildNodes.OfType<XmlElement>().ToList())
{
insertAfter = parent.InsertAfter(child, insertAfter);
}
parent.RemoveChild(comment);
}
}
然后这样称呼它:
string xml = @"<?xml version=""1.0""?>
<name1>
<irrelevant1>
<irrelevant2>
<!--
<irrelevant3 />
-->
</irrelevant2>
</irrelevant1>
<name2>
<name3>
<!--
<name4 field=""The"" />
<name4 field=""Owls"" />
<name4 field=""Are"" />
<name4 field=""Not"" />
<name4 field=""What"" />
<name4 field=""They"" />
<name4 field=""Seem"" />
-->
</name3>
</name2>
</name1>
";
var xmlDoc = new XmlDocument();
xmlDoc.LoadXml(xml);
Debug.WriteLine(xmlDoc.ToXml());
XmlNodeExtensions.UncommentXmlNodes(xmlDoc.DocumentElement.DescendantsAndSelf().OfType<XmlComment>().Where(c => c.ParentNode.Name == "name3"));
Debug.WriteLine(xmlDoc.ToXml());
请注意,您的评论 XML 无效。 <name4 field="Are />
应该是 <name4 field="Are"/>
而 <name4 field="Seem />
应该是 <name4 field="Seem"/>
。我在测试用例中为您修复了这个问题,因为我认为这是一个打字错误。
取消注释 XML 中某些节点的正文的最简单方法是什么?元素有唯一的名字,文件结构如下:
somefile.xml
<?xml version="1.0"?>
<name1>
<irrelevant1>
<irrelevant2>
<!--
<irrelevant3 />
-->
</irrelevant2>
</irrelevant1>
<name2>
<name3>
<!--
<name4 field="The" />
<name4 field="Owls" />
<name4 field="Are />
<name4 field="Not" />
<name4 field="What" />
<name4 field="They" />
<name4 field="Seem />
-->
</name3>
</name2>
</name1>
目标应该如下所示,删除了注释:
uncommented.xml
<?xml version="1.0"?>
<name1>
<irrelevant1>
<irrelevant2>
<!--
<irrelevant3 />
-->
</irrelevant2>
</irrelevant1>
<name2>
<name3>
<name4 field="The" />
<name4 field="Owls" />
<name4 field="Are />
<name4 field="Not" />
<name4 field="What" />
<name4 field="They" />
<name4 field="Seem />
</name3>
</name2>
</name1>
我的解析方法:
XmlDocument xdoc = new XmlDocument();
xdoc.Load(@"C:\somefile.xml");
XmlNodeList nl = xdoc.GetElementsByTagName("name2");
XmlNode xn = nl[0];
string xn_content = xn.InnerXml;
xn_content = Regex.Replace(xn_content, "<!--|-->", String.Empty);
XmlDocument doc = new XmlDocument();
doc.LoadXml(xn_content);
XmlNode newNode = doc.DocumentElement;
// this import doesn't really help
xdoc.ImportNode(newNode, true);
xn.RemoveAll();
xn.AppendChild(newNode);
xdoc.Save(@"C:\uncommented.xml");
带有 ArgumentException 的结果:
{"The node to be inserted is from a different document context."}
您的直接问题是您调用了 XmlDocument.ImportNode()
但没有使用返回的节点。你需要做 newNode = xDoc.ImportNode(newNode, true);
.
但是,更简洁的方法是完全避免 Regex
解析。相反,下降 XmlNode
层次结构,挑选出 XmlComment
nodes you wish to uncomment, load their InnerText
into an XmlDocumentFragment
,然后将其新创建的子节点添加到评论的父节点:
public static class XmlNodeExtensions
{
public static XmlDocument Document(this XmlNode node)
{
for (; node != null; node = node.ParentNode)
{
var doc = node as XmlDocument;
if (doc != null)
return doc;
}
return null;
}
public static IEnumerable<XmlNode> AncestorsAndSelf(this XmlNode node)
{
for (; node != null; node = node.ParentNode)
yield return node;
}
public static IEnumerable<XmlNode> DescendantsAndSelf(this XmlNode root)
{
if (root == null)
yield break;
yield return root;
foreach (var child in root.ChildNodes.Cast<XmlNode>())
foreach (var subChild in child.DescendantsAndSelf())
yield return subChild;
}
public static void UncommentXmlNodes(IEnumerable<XmlComment> comments)
{
foreach (var comment in comments.ToList())
UncommentXmlNode(comment);
}
public static void UncommentXmlNode(XmlComment comment)
{
if (comment == null)
throw new NullReferenceException();
var doc = comment.Document();
if (doc == null)
throw new InvalidOperationException();
var parent = comment.ParentNode;
var innerText = comment.InnerText;
XmlDocumentFragment docFrag = doc.CreateDocumentFragment();
//Set the contents of the document fragment.
docFrag.InnerXml = innerText;
XmlNode insertAfter = comment;
foreach (var child in docFrag.ChildNodes.OfType<XmlElement>().ToList())
{
insertAfter = parent.InsertAfter(child, insertAfter);
}
parent.RemoveChild(comment);
}
}
然后这样称呼它:
string xml = @"<?xml version=""1.0""?>
<name1>
<irrelevant1>
<irrelevant2>
<!--
<irrelevant3 />
-->
</irrelevant2>
</irrelevant1>
<name2>
<name3>
<!--
<name4 field=""The"" />
<name4 field=""Owls"" />
<name4 field=""Are"" />
<name4 field=""Not"" />
<name4 field=""What"" />
<name4 field=""They"" />
<name4 field=""Seem"" />
-->
</name3>
</name2>
</name1>
";
var xmlDoc = new XmlDocument();
xmlDoc.LoadXml(xml);
Debug.WriteLine(xmlDoc.ToXml());
XmlNodeExtensions.UncommentXmlNodes(xmlDoc.DocumentElement.DescendantsAndSelf().OfType<XmlComment>().Where(c => c.ParentNode.Name == "name3"));
Debug.WriteLine(xmlDoc.ToXml());
请注意,您的评论 XML 无效。 <name4 field="Are />
应该是 <name4 field="Are"/>
而 <name4 field="Seem />
应该是 <name4 field="Seem"/>
。我在测试用例中为您修复了这个问题,因为我认为这是一个打字错误。