从 XML 文件中删除样式表

Removing the stylesheet from an XML file

我需要从 input.xml 文件中删除样式表行:

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="class.xsl"?>
<class>
    <student>Jack</student>
    <student>Harry</student>
    <student>Rebecca</student>
    <teacher>Mr. Bean</teacher>
</class>

预期结果:

<?xml version="1.0" encoding="UTF-8"?>
<class>
    <student>Jack</student>
    <student>Harry</student>
    <student>Rebecca</student>
    <teacher>Mr. Bean</teacher>
</class>

我想我可以删除以 <?xml-stylesheet 开头的整行,但我更愿意找到一个优雅的解决方案...

我已经将 XML 加载到 IXMLDocument 中,但我没有找到任何删除 <?xml-stylesheet type="text/xsl" href="class.xsl"?> 行的函数:

uses
  XMLDoc,
  XMLIntf;

procedure TForm1.Button1Click(Sender: TObject);
var
  Doc : IXMLDocument;
begin
  Doc := NewXMLDocument();
  Doc.LoadFromFile('.\input.xml');

  ...

  Doc.SaveToFile('.\output.xml');
end;

您可以使用ChildNodes.Delete 函数删除节点。在此示例中,我使用 OmniXML 作为供应商(以避免必须初始化 COM):

program SO60488378;

{$APPTYPE CONSOLE}


{$R *.res}

uses
  Xml.XmlDom,
  Xml.omnixmldom,
  XMLDoc,
  XMLIntf,
  System.SysUtils;

var
  XMLinput : String;
  Doc      : IXMLDocument;

begin
  XMLinput := '<?xml version="1.0" encoding="UTF-8"?>'+#13#10+
            '<?xml-stylesheet type="text/xsl" href="class.xsl"?>'+#13#10+
            '<class>'+#13#10+
            '    <student>Jack</student>'+#13#10+
            '    <student>Harry</student>'+#13#10+
            '     <student>Rebecca</student>'+#13#10+
            '    <teacher>Mr. Bean</teacher>'+#13#10+
            '</class>'+#13#10;
  try
   DefaultDOMVendor := sOmniXmlVendor;
   Doc := NewXMLDocument();
   Doc.LoadFromXML(XMLInput);
   // delete second node from root
   Doc.ChildNodes.Delete(1);
   Writeln(FormatXMLData(Doc.XML.Text));
  except
    on E: Exception do
      Writeln(E.ClassName, ': ', E.Message);
  end;
 Readln;
end.

输出:

<?xml version="1.0"?>
<class>
  <student>Jack</student>
  <student>Harry</student>
  <student>Rebecca</student>
  <teacher>Mr. Bean</teacher>
</class>

编辑: 按照@PeterWolf 的建议添加解决方案:

   Doc.LoadFromXML(XMLInput);
   // loop all root nodes and delete first xml-stylesheet node
   // if you want to delete ALL stylesheet nodes, just inverse the loop and remove the break statement
   for Index := 0 to Doc.ChildNodes.Count-1 do
    begin
     Node := Doc.ChildNodes[Index];
     if (Node.NodeType = ntProcessingInstr) and (Node.NodeName = 'xml-stylesheet') then
      begin
       Doc.ChildNodes.Delete(Index);
       Break;
      end;
    end;
   Writeln(FormatXMLData(Doc.XML.Text));

RegEx 助你一臂之力。

procedure TForm1.btnDoItClick(Sender: TObject);
var sXML : String;
begin
  // Uses System.RegularExpressions;
  sXML := memoXML.Text; // or load it from file
  sXML := Tregex.Replace(sXML, '[\n*](?=<\?xml-stylesheet)(.*)\?>', '');
  memoXML.Text := sXML; // or save it to file
end;