VBA 字符串规范化(通过 WinAPI)

VBA String Normalization (via WinAPI)

我刚开始尝试在 VBA 中编写代码以使用 WinAPI 函数。 WinAPI Normalize() 函数使用什么编码? UTF-16 是我所期望的,但以下内容不起作用。字符数似乎计算不正确,然后尝试实际创建规范化字符串只会使 Access 崩溃。

'normFormEnum
'not random numbers, but from ...
'https://msdn.microsoft.com/en-us/library/windows/desktop/dd319094(v=vs.85).aspx
'for use in calling the Win API Function NormalizeString()
Public Enum normFormEnum
    normFOther = 0
    normFC = 1      'the W3C (Internet) required normalization format
    normFD = 2
    normFKC = 5
    normFKD = 6
End Enum

'https://msdn.microsoft.com/en-us/library/windows/desktop/dd319093(v=vs.85).aspx
Private Declare Function NormalizeString Lib "Normaliz" ( _
    ByVal normForm As normFormEnum, _
    ByVal lpSrcString As LongPtr, _
    ByVal cwSrcLength As Long, _
    ByRef lpDstString As LongPtr, _
    ByVal cwDstLength As Long _
    ) As Long

Public Function stringNormalize( _
    ByVal theString As String, _
    Optional ByVal normForm As normFormEnum = normFC _
    ) As String

    Dim nChars As Long
    Dim newString As String

    nChars = NormalizeString(normForm, StrPtr(theString), Len(theString), 0&, 0)

    'prefill the string buffer so it can be altered shortly...
    newString = String(nChars, " ")

Debug.Print nChars
'prints nChars, showing that it 3x the amount of characters.

'The following will crash the application....

'    NormalizeString normForm, StrPtr(theString), Len(theString), StrPtr(newString), nChars

    stringNormalize = newString

End Function

函数 NormalizeString returns 当 cwDstLength 为 0 时的估计大小(以字节为单位),但您将其用作字符数。

所以从第一次调用的结果中取出一半,并用第二次调用的结果截断缓冲区:

Private Declare PtrSafe Function NormalizeString Lib "Normaliz" ( _
  ByVal normForm As Long, _
  ByVal lpSrcString As LongPtr, _
  ByVal cwSrcLength As Long, _
  ByVal lpDstString As LongPtr, _
  ByVal cwDstLength As Long _
) As Long

Public Enum NormalizationForm
  NormOther = 0
  NormC = 1
  NormD = 2
  NormKC = 5
  NormKD = 6
End Enum

Public Function NormalizeStr(source As String, ByVal normForm As NormalizationForm) As String
  Dim buffer As String, size As Long, i As Long

  For i = 1 To 5
    size = NormalizeString(normForm, StrPtr(source), Len(source), StrPtr(buffer), Len(buffer))

    If size >= 0 And size < Len(buffer) Then
      NormalizeStr = Left$(buffer, size)
      Exit Function
    End If

    buffer = String$(Abs(size) + 1, 0)
  Next

  Err.Raise 9, , "NormalizeString failed"
End Function

Public Sub Usage()
  Debug.Print NormalizeStr(ChrW(196), NormD)
  Debug.Print NormalizeStr("A" & ChrW(776), NormC)
End Sub