VBA 用于从 Teradata SQL 文本文件中删除注释的正则表达式

VBA Regex to remove comments from Teradata SQL text files

我正在尝试编写 VBA 正则表达式以从 Teradata SQL 文本文件中删除注释。

有两种评论:

1 -- The occurrence of two dashes, '--' denotes the remainder of the line as comments.

2./* xxx */ Everything between '/*' and '*/' is comments. Comments of this type can be spread over 1+ lines.

复杂的是单引号中的文本,例如 '——这些破折号在单引号中,因此不表示即将发表的评论'

我是 Regex 的新手,但事实证明我无法解决这个问题。我一直在沿着负面前瞻的思路思考。

有人可以帮忙吗?

据我所知:

Type 1:

\-\-[\S \t]*$

Type 2:

/\*[\s\S]*?\*/

一种基于递归解析器调用的算法。有几种模式:3种子类型解析、引用解析和普通注释。正常模式可以由任何其他模式交替,这反过来又成为唯一的正常模式。因此e。 G。注释中的引号字符和引用文本中的任何注释字符都将被忽略。要搜索的字符取决于当前模式。源是一个块一个块地解析,一旦找到目标字符就分别切换模式,当前块完成,下一个开始下一个递归调用。调用堆栈存储瞬态结果。 source 结束后,backward process 开始,每个被调用的 parser 连接起来 returns 它是 chunk,所以最终得到一个完整的代码。

代码如下:

Option Explicit

Sub RemoveComments()

    Dim strOriginal As String
    Dim strProcessed As String

    strOriginal = ReadTextFile("C:\Users\DELL\Desktop\tmp\source.sql", 0) ' -2 - System default, -1 - Unicode, 0 - ASCII
    Parse strOriginal, strProcessed, 0
    WriteTextFile strProcessed, "C:\Users\DELL\Desktop\tmp\result.sql", 0

End Sub

Sub Parse(strSrc As String, strRes As String, lngMode As Long)

    Static objRegExp As Object
    Dim strBeg As String
    Dim objMatches As Object
    Dim lngPos As Long
    Dim lngEscPos As Long
    Dim strRet As String

    If objRegExp Is Nothing Then ' initialize regexp once
        Set objRegExp = CreateObject("VBScript.RegExp")
        With objRegExp
            .Global = False
            .MultiLine = True
            .IgnoreCase = True
        End With
    End If
    strRes = ""
    If strSrc = "" Then Exit Sub ' source completed
    strBeg = "" ' preceding chunk is empty by default
    Select Case lngMode
        Case 0 ' processing normal
            With objRegExp
                .Pattern = "(\/\*)|(^[ \t]*--)|(--)|(\')"
                Set objMatches = .Execute(strSrc)
                If objMatches.Count = 0 Then
                    strRes = strSrc
                    Exit Sub ' source completed
                End If
                lngPos = objMatches(0).FirstIndex
                With objMatches(0)
                    Select Case True
                        Case .SubMatches(0) <> ""
                            lngMode = 1 ' start multiline comment
                        Case .SubMatches(1) <> ""
                            lngMode = 2 ' start whole line comment
                        Case .SubMatches(2) <> ""
                            lngMode = 3 ' start singleline comment
                        Case .SubMatches(3) <> ""
                            lngMode = 4 ' start text in quotes
                            lngPos = lngPos + 1 ' skip found quote char
                    End Select
                End With
            End With
            strBeg = Left(strSrc, lngPos)
            lngPos = lngPos + 1
        Case 1 ' processing multiline comment
            lngMode = 0 ' start normal
            lngPos = InStr(strSrc, "*/")
            If lngPos = 0 Then Exit Sub ' source completed, comment unclosed
            lngPos = lngPos + 2 ' skip comment closing char
        Case 2 ' processing whole line comment
            lngMode = 0 ' start normal
            lngPos = InStr(strSrc, vbCrLf)
            If lngPos = 0 Then Exit Sub ' source completed
            lngPos = lngPos + 2 ' skip new line char
        Case 3 ' processing singleline comment
            lngMode = 0 ' start normal
            lngPos = InStr(strSrc, vbCrLf)
            If lngPos = 0 Then Exit Sub ' source completed
        Case 4 ' processing text within quotes
            lngPos = InStr(strSrc, "'")
            If lngPos = 0 Then Exit Sub ' source completed
            If Mid(strSrc, lngPos, 2) = "''" Then ' escaped quote char ''
                strBeg = Left(strSrc, lngPos + 1) ' store preceding chunk with escaped quote char
                lngPos = lngPos + 2 ' shift next from escaped quote char
            Else
                lngMode = 0 ' start normal
                strBeg = Left(strSrc, lngPos) ' store preceding chunk with quote char
                lngPos = lngPos + 1 ' shift next from quote char
            End If
    End Select
    Parse Mid(strSrc, lngPos), strRet, lngMode ' recursive parser call
    strRes = strBeg & strRet ' concatenate preceding chunk with processed and return result

End Sub