VBA 用于从 Teradata SQL 文本文件中删除注释的正则表达式
VBA Regex to remove comments from Teradata SQL text files
我正在尝试编写 VBA 正则表达式以从 Teradata SQL 文本文件中删除注释。
有两种评论:
1 --
The occurrence of two dashes, '--' denotes the remainder of the line
as comments.
2./* xxx */
Everything between '/*' and '*/' is comments. Comments of this type
can be spread over 1+ lines.
复杂的是单引号中的文本,例如 '——这些破折号在单引号中,因此不表示即将发表的评论'。
我是 Regex 的新手,但事实证明我无法解决这个问题。我一直在沿着负面前瞻的思路思考。
有人可以帮忙吗?
据我所知:
Type 1:
\-\-[\S \t]*$
Type 2:
/\*[\s\S]*?\*/
一种基于递归解析器调用的算法。有几种模式:3种子类型解析、引用解析和普通注释。正常模式可以由任何其他模式交替,这反过来又成为唯一的正常模式。因此e。 G。注释中的引号字符和引用文本中的任何注释字符都将被忽略。要搜索的字符取决于当前模式。源是一个块一个块地解析,一旦找到目标字符就分别切换模式,当前块完成,下一个开始下一个递归调用。调用堆栈存储瞬态结果。 source 结束后,backward process 开始,每个被调用的 parser 连接起来 returns 它是 chunk,所以最终得到一个完整的代码。
代码如下:
Option Explicit
Sub RemoveComments()
Dim strOriginal As String
Dim strProcessed As String
strOriginal = ReadTextFile("C:\Users\DELL\Desktop\tmp\source.sql", 0) ' -2 - System default, -1 - Unicode, 0 - ASCII
Parse strOriginal, strProcessed, 0
WriteTextFile strProcessed, "C:\Users\DELL\Desktop\tmp\result.sql", 0
End Sub
Sub Parse(strSrc As String, strRes As String, lngMode As Long)
Static objRegExp As Object
Dim strBeg As String
Dim objMatches As Object
Dim lngPos As Long
Dim lngEscPos As Long
Dim strRet As String
If objRegExp Is Nothing Then ' initialize regexp once
Set objRegExp = CreateObject("VBScript.RegExp")
With objRegExp
.Global = False
.MultiLine = True
.IgnoreCase = True
End With
End If
strRes = ""
If strSrc = "" Then Exit Sub ' source completed
strBeg = "" ' preceding chunk is empty by default
Select Case lngMode
Case 0 ' processing normal
With objRegExp
.Pattern = "(\/\*)|(^[ \t]*--)|(--)|(\')"
Set objMatches = .Execute(strSrc)
If objMatches.Count = 0 Then
strRes = strSrc
Exit Sub ' source completed
End If
lngPos = objMatches(0).FirstIndex
With objMatches(0)
Select Case True
Case .SubMatches(0) <> ""
lngMode = 1 ' start multiline comment
Case .SubMatches(1) <> ""
lngMode = 2 ' start whole line comment
Case .SubMatches(2) <> ""
lngMode = 3 ' start singleline comment
Case .SubMatches(3) <> ""
lngMode = 4 ' start text in quotes
lngPos = lngPos + 1 ' skip found quote char
End Select
End With
End With
strBeg = Left(strSrc, lngPos)
lngPos = lngPos + 1
Case 1 ' processing multiline comment
lngMode = 0 ' start normal
lngPos = InStr(strSrc, "*/")
If lngPos = 0 Then Exit Sub ' source completed, comment unclosed
lngPos = lngPos + 2 ' skip comment closing char
Case 2 ' processing whole line comment
lngMode = 0 ' start normal
lngPos = InStr(strSrc, vbCrLf)
If lngPos = 0 Then Exit Sub ' source completed
lngPos = lngPos + 2 ' skip new line char
Case 3 ' processing singleline comment
lngMode = 0 ' start normal
lngPos = InStr(strSrc, vbCrLf)
If lngPos = 0 Then Exit Sub ' source completed
Case 4 ' processing text within quotes
lngPos = InStr(strSrc, "'")
If lngPos = 0 Then Exit Sub ' source completed
If Mid(strSrc, lngPos, 2) = "''" Then ' escaped quote char ''
strBeg = Left(strSrc, lngPos + 1) ' store preceding chunk with escaped quote char
lngPos = lngPos + 2 ' shift next from escaped quote char
Else
lngMode = 0 ' start normal
strBeg = Left(strSrc, lngPos) ' store preceding chunk with quote char
lngPos = lngPos + 1 ' shift next from quote char
End If
End Select
Parse Mid(strSrc, lngPos), strRet, lngMode ' recursive parser call
strRes = strBeg & strRet ' concatenate preceding chunk with processed and return result
End Sub
我正在尝试编写 VBA 正则表达式以从 Teradata SQL 文本文件中删除注释。
有两种评论:
1 -- The occurrence of two dashes, '--' denotes the remainder of the line as comments.
2./* xxx */ Everything between '/*' and '*/' is comments. Comments of this type can be spread over 1+ lines.
复杂的是单引号中的文本,例如 '——这些破折号在单引号中,因此不表示即将发表的评论'。
我是 Regex 的新手,但事实证明我无法解决这个问题。我一直在沿着负面前瞻的思路思考。
有人可以帮忙吗?
据我所知:
Type 1:
\-\-[\S \t]*$
Type 2:
/\*[\s\S]*?\*/
一种基于递归解析器调用的算法。有几种模式:3种子类型解析、引用解析和普通注释。正常模式可以由任何其他模式交替,这反过来又成为唯一的正常模式。因此e。 G。注释中的引号字符和引用文本中的任何注释字符都将被忽略。要搜索的字符取决于当前模式。源是一个块一个块地解析,一旦找到目标字符就分别切换模式,当前块完成,下一个开始下一个递归调用。调用堆栈存储瞬态结果。 source 结束后,backward process 开始,每个被调用的 parser 连接起来 returns 它是 chunk,所以最终得到一个完整的代码。
代码如下:
Option Explicit
Sub RemoveComments()
Dim strOriginal As String
Dim strProcessed As String
strOriginal = ReadTextFile("C:\Users\DELL\Desktop\tmp\source.sql", 0) ' -2 - System default, -1 - Unicode, 0 - ASCII
Parse strOriginal, strProcessed, 0
WriteTextFile strProcessed, "C:\Users\DELL\Desktop\tmp\result.sql", 0
End Sub
Sub Parse(strSrc As String, strRes As String, lngMode As Long)
Static objRegExp As Object
Dim strBeg As String
Dim objMatches As Object
Dim lngPos As Long
Dim lngEscPos As Long
Dim strRet As String
If objRegExp Is Nothing Then ' initialize regexp once
Set objRegExp = CreateObject("VBScript.RegExp")
With objRegExp
.Global = False
.MultiLine = True
.IgnoreCase = True
End With
End If
strRes = ""
If strSrc = "" Then Exit Sub ' source completed
strBeg = "" ' preceding chunk is empty by default
Select Case lngMode
Case 0 ' processing normal
With objRegExp
.Pattern = "(\/\*)|(^[ \t]*--)|(--)|(\')"
Set objMatches = .Execute(strSrc)
If objMatches.Count = 0 Then
strRes = strSrc
Exit Sub ' source completed
End If
lngPos = objMatches(0).FirstIndex
With objMatches(0)
Select Case True
Case .SubMatches(0) <> ""
lngMode = 1 ' start multiline comment
Case .SubMatches(1) <> ""
lngMode = 2 ' start whole line comment
Case .SubMatches(2) <> ""
lngMode = 3 ' start singleline comment
Case .SubMatches(3) <> ""
lngMode = 4 ' start text in quotes
lngPos = lngPos + 1 ' skip found quote char
End Select
End With
End With
strBeg = Left(strSrc, lngPos)
lngPos = lngPos + 1
Case 1 ' processing multiline comment
lngMode = 0 ' start normal
lngPos = InStr(strSrc, "*/")
If lngPos = 0 Then Exit Sub ' source completed, comment unclosed
lngPos = lngPos + 2 ' skip comment closing char
Case 2 ' processing whole line comment
lngMode = 0 ' start normal
lngPos = InStr(strSrc, vbCrLf)
If lngPos = 0 Then Exit Sub ' source completed
lngPos = lngPos + 2 ' skip new line char
Case 3 ' processing singleline comment
lngMode = 0 ' start normal
lngPos = InStr(strSrc, vbCrLf)
If lngPos = 0 Then Exit Sub ' source completed
Case 4 ' processing text within quotes
lngPos = InStr(strSrc, "'")
If lngPos = 0 Then Exit Sub ' source completed
If Mid(strSrc, lngPos, 2) = "''" Then ' escaped quote char ''
strBeg = Left(strSrc, lngPos + 1) ' store preceding chunk with escaped quote char
lngPos = lngPos + 2 ' shift next from escaped quote char
Else
lngMode = 0 ' start normal
strBeg = Left(strSrc, lngPos) ' store preceding chunk with quote char
lngPos = lngPos + 1 ' shift next from quote char
End If
End Select
Parse Mid(strSrc, lngPos), strRet, lngMode ' recursive parser call
strRes = strBeg & strRet ' concatenate preceding chunk with processed and return result
End Sub