Select 关键词之间的文字
Select text between key words
这是 Select block of text and merge into new document
的后续问题
我有一个 SGM 文档,其中添加了注释,并且在我的 sgm 文件中有注释。我需要提取 start/stop 注释之间的字符串,以便将它们放入临时文件中进行修改。现在它正在选择所有内容,包括 start/stop 评论和 start/stop 评论之外的数据。
Dim DirFolder As String = txtDirectory.Text
Dim Directory As New IO.DirectoryInfo(DirFolder)
Dim allFiles As IO.FileInfo() = Directory.GetFiles("*.sgm")
Dim singleFile As IO.FileInfo
Dim Prefix As String
Dim newMasterFilePath As String
Dim masterFileName As String
Dim newMasterFileName As String
Dim startMark As String = "<!--#start#-->"
Dim stopMark As String = "<!--#stop#-->"
searchDir = txtDirectory.Text
Prefix = txtBxUnique.Text
For Each singleFile In allFiles
If File.Exists(singleFile.FullName) Then
Dim fileName = singleFile.FullName
Debug.Print("file name : " & fileName)
' A backup first
Dim backup As String = fileName & ".bak"
File.Copy(fileName, backup, True)
' Load lines from the source file in memory
Dim lines() As String = File.ReadAllLines(backup)
' Now re-create the source file and start writing lines inside a block
' Evaluate all the lines in the file.
' Set insideBlock to false
Dim insideBlock As Boolean = False
Using sw As StreamWriter = File.CreateText(backup)
For Each line As String In lines
If line = startMark Then
' start writing at the line below
insideBlock = True
' Evaluate if the next line is <!Stop>
ElseIf line = stopMark Then
' Stop writing
insideBlock = False
ElseIf insideBlock = True Then
' Write the current line in the block
sw.WriteLine(line)
End If
Next
End Using
End If
Next
这是要测试的示例文本。
<chapter id="Chapter_Overview"> <?Pub Lcl _divid="500" _parentid="0">
<title>Learning how to gather data</title>
<!--#start#-->
<section>
<title>ALTERNATE MISSION EQUIPMENT</title>
<para0 verdate="18 Jan 2019" verstatus="ver">
<title>
<applicabil applicref="xxx">
</applicabil>Three-Button Trackball Mouse</title>
<para>This is the example to grab all text between start and stop comments.
</para></para0>
</section>
<!--#stop#-->
注意事项:开始和结束注释总是换行,一个文档可以有多个 start/stop 部分
我想也许可以在此使用正则表达式
(<section>[\w+\w]+.*?<\/section>)\R(<\?Pub _gtinsert.*>\R<pgbrk pgnum.*?>\R<\?Pub /_gtinsert>)*
或者可以使用 IndexOf 和 LastIndexOf,但我无法正常工作。
可以读取整个文件,使用{"<!--#start#-->", "<!--#stop#-->"}
的字符串数组拆分成一个数组,拆分成这个
- 元素 0:
"<!--#start#-->"
之前的文本
- 元素 1:
"<!--#start#-->"
和 "<!--#stop#-->"
之间的文本
- 元素 2:
"<!--#stop#-->"
之后的文本
并取出元素 1。然后将其写入备份。
Dim text = File.ReadAllText(backup).Split({startMark, stopMark}, StringSplitOptions.RemoveEmptyEntries)(1)
Using sw As StreamWriter = File.CreateText(backup)
sw.Write(text)
End Using
编辑地址评论
我确实使原始代码紧凑了一点。它可以扩展为以下内容,允许您添加一些验证
Dim text = File.ReadAllText(backup)
Dim split = text.Split({startMark, stopMark}, StringSplitOptions.RemoveEmptyEntries)
If split.Count() <> 3 Then Throw New Exception("File didn't contain one or more delimiters.")
text = split(1)
Using sw As StreamWriter = File.CreateText(backup)
sw.Write(text)
End Using
这是 Select block of text and merge into new document
的后续问题我有一个 SGM 文档,其中添加了注释,并且在我的 sgm 文件中有注释。我需要提取 start/stop 注释之间的字符串,以便将它们放入临时文件中进行修改。现在它正在选择所有内容,包括 start/stop 评论和 start/stop 评论之外的数据。
Dim DirFolder As String = txtDirectory.Text
Dim Directory As New IO.DirectoryInfo(DirFolder)
Dim allFiles As IO.FileInfo() = Directory.GetFiles("*.sgm")
Dim singleFile As IO.FileInfo
Dim Prefix As String
Dim newMasterFilePath As String
Dim masterFileName As String
Dim newMasterFileName As String
Dim startMark As String = "<!--#start#-->"
Dim stopMark As String = "<!--#stop#-->"
searchDir = txtDirectory.Text
Prefix = txtBxUnique.Text
For Each singleFile In allFiles
If File.Exists(singleFile.FullName) Then
Dim fileName = singleFile.FullName
Debug.Print("file name : " & fileName)
' A backup first
Dim backup As String = fileName & ".bak"
File.Copy(fileName, backup, True)
' Load lines from the source file in memory
Dim lines() As String = File.ReadAllLines(backup)
' Now re-create the source file and start writing lines inside a block
' Evaluate all the lines in the file.
' Set insideBlock to false
Dim insideBlock As Boolean = False
Using sw As StreamWriter = File.CreateText(backup)
For Each line As String In lines
If line = startMark Then
' start writing at the line below
insideBlock = True
' Evaluate if the next line is <!Stop>
ElseIf line = stopMark Then
' Stop writing
insideBlock = False
ElseIf insideBlock = True Then
' Write the current line in the block
sw.WriteLine(line)
End If
Next
End Using
End If
Next
这是要测试的示例文本。
<chapter id="Chapter_Overview"> <?Pub Lcl _divid="500" _parentid="0">
<title>Learning how to gather data</title>
<!--#start#-->
<section>
<title>ALTERNATE MISSION EQUIPMENT</title>
<para0 verdate="18 Jan 2019" verstatus="ver">
<title>
<applicabil applicref="xxx">
</applicabil>Three-Button Trackball Mouse</title>
<para>This is the example to grab all text between start and stop comments.
</para></para0>
</section>
<!--#stop#-->
注意事项:开始和结束注释总是换行,一个文档可以有多个 start/stop 部分
我想也许可以在此使用正则表达式
(<section>[\w+\w]+.*?<\/section>)\R(<\?Pub _gtinsert.*>\R<pgbrk pgnum.*?>\R<\?Pub /_gtinsert>)*
或者可以使用 IndexOf 和 LastIndexOf,但我无法正常工作。
可以读取整个文件,使用{"<!--#start#-->", "<!--#stop#-->"}
的字符串数组拆分成一个数组,拆分成这个
- 元素 0:
"<!--#start#-->"
之前的文本
- 元素 1:
"<!--#start#-->"
和"<!--#stop#-->"
之间的文本
- 元素 2:
"<!--#stop#-->"
之后的文本
并取出元素 1。然后将其写入备份。
Dim text = File.ReadAllText(backup).Split({startMark, stopMark}, StringSplitOptions.RemoveEmptyEntries)(1)
Using sw As StreamWriter = File.CreateText(backup)
sw.Write(text)
End Using
编辑地址评论
我确实使原始代码紧凑了一点。它可以扩展为以下内容,允许您添加一些验证
Dim text = File.ReadAllText(backup)
Dim split = text.Split({startMark, stopMark}, StringSplitOptions.RemoveEmptyEntries)
If split.Count() <> 3 Then Throw New Exception("File didn't contain one or more delimiters.")
text = split(1)
Using sw As StreamWriter = File.CreateText(backup)
sw.Write(text)
End Using