在 TXT/xml 文件中查找多行,如果符合条件则删除
Find multiple lines in TXT/xml file and remove if criteria met
想知道是否可以制作一个简单的脚本来检查是否满足多个条件并对文件进行必要的修改。
继续举例说明我所拥有的和我想要实现的目标。
我有一个包含 4 行的 xml 文件 - 数字、年份、型号和人。
如果<man>
是福特或道奇,我不希望进行任何修改。但是如果 <man>
不是那个,那么我想检查 <year>
或 <model>
是否是 "NA" 并删除带有 "NA".[=16 的行=]
<?xml version="1.0" encoding="UTF-8"?>
<CarStuff>
<fileName>CarExpor201217.xml</fileName>
<numberCars>5</numberCars>
<ref>2017XY</ref>
<carExo id="CAR0001_01">
<dealVen id="CAR0001_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar id="CAR0001_03">
<amount>1811.10</amount>
<lotNumber>1</lotNumber>
<year>NA</year> - Line must be removed
<model>NA</model> - Line must be removed
<man>Acura</man>
</soldCar>
</carExo>
<carExo id="CAR0002_01">
<dealVen id="CAR0002_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar id="CAR0002_03">
<amount>1811.10</amount>
<lotNumber>1</lotNumber>
<year>NA</year> - Line must be kept
<model>NA</model> - Line must be kept
<man>Ford</man>
</soldCar>
</carExo>
<carExo id="CAR0003_01">
<dealVen id="CAR0003_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar id="CAR0003_03">
<amount>1811.10</amount>
<lotNumber>1</lotNumber>
<year>1997</year> - Line must be kept
<model>NA</model> - Line must be removed
<man>Bugati</man>
</soldCar>
</carExo>
<carExo id="CAR0004_01">
<dealVen id="CAR0004_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar id="CAR0004_03">
<amount>1811.10</amount>
<lotNumber>1</lotNumber>
<year>1997</year> - Line must be kept
<model>NA</model> - Line must be kept
<man>Dodge</man>
</soldCar>
</carExo>
<carExo id="CAR0005_01">
<dealVen id="CAR0005_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar id="CAR0005_03">
<amount>1811.10</amount>
<lotNumber>2</lotNumber>
<year>NA</year> - Line must be kept
<model>Charger</model> - Line must be kept
<man>Dodge</man>
</soldCar>
</carExo>
<carExo id="CAR0005_01">
<dealVen id="CAR0005_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar id="CAR0005_03">
<amount>1811.10</amount>
<lotNumber>3</lotNumber>
<year>NA</year> - Line must be removed
<model>Dot</model> - Line must be kept
<man>Datsun</man>
</soldCar>
</carExo>
</CarStuff>
感谢所有评论和想法。
听起来您需要删除所有包含 >NA<
.
的行
这不是真正的编程问题(所以它是 off-topic) but here's a quick answer using Notepad++:
Ctrl + H 打开查找替换对话框。
在 Find what:
文本框中包含您的正则表达式:.*>NA<.*\r?\n
(其中 \r
是可选的,以防文件不存在有 Windows 行结尾).
将 Replace with:
文本框留空。
确保选中“搜索模式”区域中的 Regular Expression
单选按钮。
舔 Replace All
瞧!所有包含 >NA<
的行都已删除。
(答案改编自this)。
通过 XMLDom 解决
您可以使用 XMLDom 和 XPath 在所谓的 NodeList 中搜索不包含 Dodge 或 Ford 字符串的 <man>
标签,并检查所有兄弟标签是否包含 "NA" 以便删除它们。下面的代码使用 后期绑定 。顺便说一句,您在 OP 中的 xml 格式不正确(关闭标记 </carStuf>
而不是 </carStuff>
- 我添加了一个小的解析错误例程来在加载时检查它。
代码
Option Explicit
Sub checkNA()
Dim xDoc As Object ' xml document
Dim noli, noli2 As Object ' node list
Dim no, no2 As Object ' node
Dim noMan As Object ' node <man> to check if no Dodge or Ford
Dim s As String
Dim sFile As String ' xml file name
sFile = ThisWorkbook.Path & "\xml\na_test.xml" ' <<< change to your xml file name
' late binding xml
Set xDoc = CreateObject("MSXML2.DOMDocument.6.0")
xDoc.async = False: xDoc.validateOnParse = False
xDoc.setProperty "SelectionLanguage", "XPath"
' load xml
If xDoc.Load(sFile) Then
Debug.Print "Loaded successfully"
Else
Dim xPE As Object ' Set xPE = CreateObject("MSXML2.IXMLDOMParseError")
Dim strErrText As String
Set xPE = xDoc.parseError
With xPE
strErrText = "Load error " & .ErrorCode & " xml file " & vbCrLf & _
Replace(.URL, "file:///", "") & vbCrLf & vbCrLf & _
xPE.reason & _
"Source Text: " & .srcText & vbCrLf & vbCrLf & _
"Line No.: " & .Line & vbCrLf & _
"Line Pos.: " & .linepos & vbCrLf & _
"File Pos.: " & .filepos & vbCrLf & vbCrLf
End With
MsgBox strErrText, vbExclamation
Set xPE = Nothing
Exit Sub
End If
' check items
s = "carExo/soldCar"
Set noli = xDoc.DocumentElement.SelectNodes(s)
For Each no In noli
Set noMan = no.SelectSingleNode("man")
If Not noMan Is Nothing Then
If InStr("Ford.Dodge" & ".", noMan.Text & ".") = 0 Then
Debug.Print "delete", noMan.Text
' delete all subtags containing "NA" as text
Set noli2 = no.SelectNodes("*")
For Each no2 In noli2
If no2.Text = "NA" Then
' delete item
Debug.Print , no2.nodename & "=" & no2.Text
no2.ParentNode.RemoveChild no2
End If
Next no2
Else
' Debug.Print "keep", noman.Text
End If
End If
Next no
' save
' Debug.Print xDoc.XML
xDoc.Save sFile
' close
Set xDoc = Nothing
End Sub
编辑 12/29 - 附录
我使用一些额外的 XPath 添加了 ' check items
部分的第二个可行版本。这种替代方法简单地避免了正常代码中的两个 If
条件,因为它缩小了两个节点列表中找到的节点的范围。
' check items
s = "carExo/soldCar[man!='Ford'][man!='Dodge']" ' << (1) added condition to XPath
Set noli = xDoc.DocumentElement.SelectNodes(s)
For Each no In noli
Set noMan = no.SelectSingleNode("man")
If Not noMan Is Nothing Then
Debug.Print "delete", noMan.Text
' delete all subtags containing "NA" as text
Set noli2 = no.SelectNodes("*[.='NA']") ' << (2)added condition to XPath
For Each no2 In noli2
' delete item
Debug.Print , no2.nodename & "=" & no2.Text
no2.ParentNode.RemoveChild no2
Next no2
End If
Next no
提示
当然有很多街道通往罗马,请参阅下面@Parfait 的 XSLT 方法。
只需使用 XSLT,这种专用语言旨在通过根据各种标准删除节点来完全满足您转换原始 XML 文件的需要。
具体如下运行 Identity Transform 以按原样复制 XML,然后根据模型/年份/人的标准排除节点。
XSLT (另存为.xsl,一个特殊的.xml文件)
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="soldCar[man != 'Ford' and man != 'Dodge']">
<xsl:copy>
<xsl:copy-of select="amount|lotNumber"/>
<xsl:if test="model != 'NA'">
<xsl:copy-of select="model"/>
</xsl:if>
<xsl:if test="year != 'NA'">
<xsl:copy-of select="year"/>
</xsl:if>
<xsl:copy-of select="man"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
VBA
Public Sub RunXSLT()
Dim strFile As String, strPath As String
' REFERENCE MS XML, v6.0
Dim xmlDoc As New MSXML2.DOMDocument60, xslDoc As New MSXML2.DOMDocument60
Dim newDoc As New MSXML2.DOMDocument60
' LOAD XML SOURCE
xmlDoc.Load "C:\Path\To\Input.xml"
' LOAD XSL SOURCE
xslDoc.Load "C:\Path\To\XSLT\Script.xsl"
' TRANSFORM SOURCE
xmlDoc.transformNodeToObject xslDoc, newDoc
newDoc.Save "C:\Path\To\Output.xml"
' RELEASE DOM OBJECTS
Set xmlDoc = Nothing: Set xslDoc = Nothing: Set newDoc = Nothing
End Sub
输出
<?xml version="1.0" encoding="utf-8"?>
<CarStuff>
<fileName>CarExpor201217.xml</fileName>
<numberCars>5</numberCars>
<ref>2017XY</ref>
<carExo id="CAR0001_01">
<dealVen id="CAR0001_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar>
<amount>1811.10</amount>
<lotNumber>1</lotNumber>
<man>Acura</man>
</soldCar>
</carExo>
<carExo id="CAR0002_01">
<dealVen id="CAR0002_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar id="CAR0002_03">
<amount>1811.10</amount>
<lotNumber>1</lotNumber>
<year>NA</year>
<model>NA</model>
<man>Ford</man>
</soldCar>
</carExo>
<carExo id="CAR0003_01">
<dealVen id="CAR0003_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar>
<amount>1811.10</amount>
<lotNumber>1</lotNumber>
<year>1997</year>
<man>Bugati</man>
</soldCar>
</carExo>
<carExo id="CAR0004_01">
<dealVen id="CAR0004_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar id="CAR0004_03">
<amount>1811.10</amount>
<lotNumber>1</lotNumber>
<year>1997</year>
<model>NA</model>
<man>Dodge</man>
</soldCar>
</carExo>
<carExo id="CAR0005_01">
<dealVen id="CAR0005_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar id="CAR0005_03">
<amount>1811.10</amount>
<lotNumber>2</lotNumber>
<year>NA</year>
<model>Charger</model>
<man>Dodge</man>
</soldCar>
</carExo>
<carExo id="CAR0005_01">
<dealVen id="CAR0005_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar>
<amount>1811.10</amount>
<lotNumber>3</lotNumber>
<model>Dot</model>
<man>Datsun</man>
</soldCar>
</carExo>
</CarStuff>
想知道是否可以制作一个简单的脚本来检查是否满足多个条件并对文件进行必要的修改。
继续举例说明我所拥有的和我想要实现的目标。
我有一个包含 4 行的 xml 文件 - 数字、年份、型号和人。
如果<man>
是福特或道奇,我不希望进行任何修改。但是如果 <man>
不是那个,那么我想检查 <year>
或 <model>
是否是 "NA" 并删除带有 "NA".[=16 的行=]
<?xml version="1.0" encoding="UTF-8"?>
<CarStuff>
<fileName>CarExpor201217.xml</fileName>
<numberCars>5</numberCars>
<ref>2017XY</ref>
<carExo id="CAR0001_01">
<dealVen id="CAR0001_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar id="CAR0001_03">
<amount>1811.10</amount>
<lotNumber>1</lotNumber>
<year>NA</year> - Line must be removed
<model>NA</model> - Line must be removed
<man>Acura</man>
</soldCar>
</carExo>
<carExo id="CAR0002_01">
<dealVen id="CAR0002_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar id="CAR0002_03">
<amount>1811.10</amount>
<lotNumber>1</lotNumber>
<year>NA</year> - Line must be kept
<model>NA</model> - Line must be kept
<man>Ford</man>
</soldCar>
</carExo>
<carExo id="CAR0003_01">
<dealVen id="CAR0003_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar id="CAR0003_03">
<amount>1811.10</amount>
<lotNumber>1</lotNumber>
<year>1997</year> - Line must be kept
<model>NA</model> - Line must be removed
<man>Bugati</man>
</soldCar>
</carExo>
<carExo id="CAR0004_01">
<dealVen id="CAR0004_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar id="CAR0004_03">
<amount>1811.10</amount>
<lotNumber>1</lotNumber>
<year>1997</year> - Line must be kept
<model>NA</model> - Line must be kept
<man>Dodge</man>
</soldCar>
</carExo>
<carExo id="CAR0005_01">
<dealVen id="CAR0005_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar id="CAR0005_03">
<amount>1811.10</amount>
<lotNumber>2</lotNumber>
<year>NA</year> - Line must be kept
<model>Charger</model> - Line must be kept
<man>Dodge</man>
</soldCar>
</carExo>
<carExo id="CAR0005_01">
<dealVen id="CAR0005_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar id="CAR0005_03">
<amount>1811.10</amount>
<lotNumber>3</lotNumber>
<year>NA</year> - Line must be removed
<model>Dot</model> - Line must be kept
<man>Datsun</man>
</soldCar>
</carExo>
</CarStuff>
感谢所有评论和想法。
听起来您需要删除所有包含 >NA<
.
这不是真正的编程问题(所以它是 off-topic) but here's a quick answer using Notepad++:
Ctrl + H 打开查找替换对话框。
在
Find what:
文本框中包含您的正则表达式:.*>NA<.*\r?\n
(其中\r
是可选的,以防文件不存在有 Windows 行结尾).将
Replace with:
文本框留空。确保选中“搜索模式”区域中的
Regular Expression
单选按钮。舔
Replace All
瞧!所有包含>NA<
的行都已删除。
(答案改编自this)。
通过 XMLDom 解决
您可以使用 XMLDom 和 XPath 在所谓的 NodeList 中搜索不包含 Dodge 或 Ford 字符串的 <man>
标签,并检查所有兄弟标签是否包含 "NA" 以便删除它们。下面的代码使用 后期绑定 。顺便说一句,您在 OP 中的 xml 格式不正确(关闭标记 </carStuf>
而不是 </carStuff>
- 我添加了一个小的解析错误例程来在加载时检查它。
代码
Option Explicit
Sub checkNA()
Dim xDoc As Object ' xml document
Dim noli, noli2 As Object ' node list
Dim no, no2 As Object ' node
Dim noMan As Object ' node <man> to check if no Dodge or Ford
Dim s As String
Dim sFile As String ' xml file name
sFile = ThisWorkbook.Path & "\xml\na_test.xml" ' <<< change to your xml file name
' late binding xml
Set xDoc = CreateObject("MSXML2.DOMDocument.6.0")
xDoc.async = False: xDoc.validateOnParse = False
xDoc.setProperty "SelectionLanguage", "XPath"
' load xml
If xDoc.Load(sFile) Then
Debug.Print "Loaded successfully"
Else
Dim xPE As Object ' Set xPE = CreateObject("MSXML2.IXMLDOMParseError")
Dim strErrText As String
Set xPE = xDoc.parseError
With xPE
strErrText = "Load error " & .ErrorCode & " xml file " & vbCrLf & _
Replace(.URL, "file:///", "") & vbCrLf & vbCrLf & _
xPE.reason & _
"Source Text: " & .srcText & vbCrLf & vbCrLf & _
"Line No.: " & .Line & vbCrLf & _
"Line Pos.: " & .linepos & vbCrLf & _
"File Pos.: " & .filepos & vbCrLf & vbCrLf
End With
MsgBox strErrText, vbExclamation
Set xPE = Nothing
Exit Sub
End If
' check items
s = "carExo/soldCar"
Set noli = xDoc.DocumentElement.SelectNodes(s)
For Each no In noli
Set noMan = no.SelectSingleNode("man")
If Not noMan Is Nothing Then
If InStr("Ford.Dodge" & ".", noMan.Text & ".") = 0 Then
Debug.Print "delete", noMan.Text
' delete all subtags containing "NA" as text
Set noli2 = no.SelectNodes("*")
For Each no2 In noli2
If no2.Text = "NA" Then
' delete item
Debug.Print , no2.nodename & "=" & no2.Text
no2.ParentNode.RemoveChild no2
End If
Next no2
Else
' Debug.Print "keep", noman.Text
End If
End If
Next no
' save
' Debug.Print xDoc.XML
xDoc.Save sFile
' close
Set xDoc = Nothing
End Sub
编辑 12/29 - 附录
我使用一些额外的 XPath 添加了 ' check items
部分的第二个可行版本。这种替代方法简单地避免了正常代码中的两个 If
条件,因为它缩小了两个节点列表中找到的节点的范围。
' check items
s = "carExo/soldCar[man!='Ford'][man!='Dodge']" ' << (1) added condition to XPath
Set noli = xDoc.DocumentElement.SelectNodes(s)
For Each no In noli
Set noMan = no.SelectSingleNode("man")
If Not noMan Is Nothing Then
Debug.Print "delete", noMan.Text
' delete all subtags containing "NA" as text
Set noli2 = no.SelectNodes("*[.='NA']") ' << (2)added condition to XPath
For Each no2 In noli2
' delete item
Debug.Print , no2.nodename & "=" & no2.Text
no2.ParentNode.RemoveChild no2
Next no2
End If
Next no
提示
当然有很多街道通往罗马,请参阅下面@Parfait 的 XSLT 方法。
只需使用 XSLT,这种专用语言旨在通过根据各种标准删除节点来完全满足您转换原始 XML 文件的需要。
具体如下运行 Identity Transform 以按原样复制 XML,然后根据模型/年份/人的标准排除节点。
XSLT (另存为.xsl,一个特殊的.xml文件)
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="soldCar[man != 'Ford' and man != 'Dodge']">
<xsl:copy>
<xsl:copy-of select="amount|lotNumber"/>
<xsl:if test="model != 'NA'">
<xsl:copy-of select="model"/>
</xsl:if>
<xsl:if test="year != 'NA'">
<xsl:copy-of select="year"/>
</xsl:if>
<xsl:copy-of select="man"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
VBA
Public Sub RunXSLT()
Dim strFile As String, strPath As String
' REFERENCE MS XML, v6.0
Dim xmlDoc As New MSXML2.DOMDocument60, xslDoc As New MSXML2.DOMDocument60
Dim newDoc As New MSXML2.DOMDocument60
' LOAD XML SOURCE
xmlDoc.Load "C:\Path\To\Input.xml"
' LOAD XSL SOURCE
xslDoc.Load "C:\Path\To\XSLT\Script.xsl"
' TRANSFORM SOURCE
xmlDoc.transformNodeToObject xslDoc, newDoc
newDoc.Save "C:\Path\To\Output.xml"
' RELEASE DOM OBJECTS
Set xmlDoc = Nothing: Set xslDoc = Nothing: Set newDoc = Nothing
End Sub
输出
<?xml version="1.0" encoding="utf-8"?>
<CarStuff>
<fileName>CarExpor201217.xml</fileName>
<numberCars>5</numberCars>
<ref>2017XY</ref>
<carExo id="CAR0001_01">
<dealVen id="CAR0001_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar>
<amount>1811.10</amount>
<lotNumber>1</lotNumber>
<man>Acura</man>
</soldCar>
</carExo>
<carExo id="CAR0002_01">
<dealVen id="CAR0002_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar id="CAR0002_03">
<amount>1811.10</amount>
<lotNumber>1</lotNumber>
<year>NA</year>
<model>NA</model>
<man>Ford</man>
</soldCar>
</carExo>
<carExo id="CAR0003_01">
<dealVen id="CAR0003_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar>
<amount>1811.10</amount>
<lotNumber>1</lotNumber>
<year>1997</year>
<man>Bugati</man>
</soldCar>
</carExo>
<carExo id="CAR0004_01">
<dealVen id="CAR0004_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar id="CAR0004_03">
<amount>1811.10</amount>
<lotNumber>1</lotNumber>
<year>1997</year>
<model>NA</model>
<man>Dodge</man>
</soldCar>
</carExo>
<carExo id="CAR0005_01">
<dealVen id="CAR0005_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar id="CAR0005_03">
<amount>1811.10</amount>
<lotNumber>2</lotNumber>
<year>NA</year>
<model>Charger</model>
<man>Dodge</man>
</soldCar>
</carExo>
<carExo id="CAR0005_01">
<dealVen id="CAR0005_02">
<name>John</name>
<surname>Smith</surname>
</dealVen>
<soldCar>
<amount>1811.10</amount>
<lotNumber>3</lotNumber>
<model>Dot</model>
<man>Datsun</man>
</soldCar>
</carExo>
</CarStuff>