VBA 将 excel 分析保存为 UTF-8 文本文件的宏
VBA Macro to save excel analysis as UTF-8 textfile
我有一个包含一些外来字符的 XLS 文件。
我尝试了以下方法,但数据看起来很奇怪:
--- removed code above
wb.SaveAs fPath & Replace(fName, ".xlsx", ".txt"), FileFormat:=xlUnicodeText, CreateBackup:=False
wb.Saved = True
wb.Close True
ActiveWorkbook.Close
tFileToOpenPath = fPath & Replace(fName, ".xlsx", ".txt")
tFileToSavePath = fPath & Replace(fName, ".xlsx", "-UTF8.txt")
Dim oStream
Set oStream = CreateObject("ADODB.Stream") 'Create Stream object
With oStream
.Type = 2 'Specify stream type – we want To save text/string data.
.CharSet = "utf-8" 'Specify charset For the source text data.
.Open 'Open the stream
.LoadFromFile tFileToOpenPath 'And write the file to the object stream
.SaveToFile tFileToSavePath, 2 'Save the data to the named path
End With
Set oStream = Nothing 'Close the stream - no memory leaks
输出的数据是这样的...我很纳闷
S a l e s
txt文件本身没问题,只是UTF-8.txt一个真的很奇怪
谢谢!
事情是这样的:
wb.SaveAs
将您的工作簿另存为 xlUnicodeText
。这实际上是 UTF16LE(当有人只说 'Unicode' 时,这主要是指)。
您的流对象打开此 UTF16 源,但将其读取为 UTF8。在 UTF16 中,字符最少占用两个字节。在 UTF8 中,简单的拉丁字符只使用一个字节,因此在大多数字符之后有一个空字节(在您的情况下显示为空格;notepad++ 将显示 NUL)。
这似乎适用于转换:
Public Sub convert_UnicodeToUTF8(parF1 As String, parF2 As String)
Const adSaveCreateOverWrite = 2
Const adTypeText = 2
Dim streamSrc, streamDst ' Source / Destination
Set streamSrc = CreateObject("ADODB.Stream")
Set streamDst = CreateObject("ADODB.Stream")
streamDst.Type = adTypeText
streamDst.Charset = "utf-8"
streamDst.Open
With streamSrc
.Type = adTypeText
.Charset = "Unicode" ' this is also the default value
.Open
.LoadFromFile parF1
.copyTo streamDst
.Close
End With
streamDst.saveToFile parF2, adSaveCreateOverWrite
streamDst.Close
Set streamSrc = Nothing
Set streamDst = Nothing
End Sub
the CharSet property of the destination Stream object can be different than the source Stream object`
我有一个包含一些外来字符的 XLS 文件。 我尝试了以下方法,但数据看起来很奇怪:
--- removed code above
wb.SaveAs fPath & Replace(fName, ".xlsx", ".txt"), FileFormat:=xlUnicodeText, CreateBackup:=False
wb.Saved = True
wb.Close True
ActiveWorkbook.Close
tFileToOpenPath = fPath & Replace(fName, ".xlsx", ".txt")
tFileToSavePath = fPath & Replace(fName, ".xlsx", "-UTF8.txt")
Dim oStream
Set oStream = CreateObject("ADODB.Stream") 'Create Stream object
With oStream
.Type = 2 'Specify stream type – we want To save text/string data.
.CharSet = "utf-8" 'Specify charset For the source text data.
.Open 'Open the stream
.LoadFromFile tFileToOpenPath 'And write the file to the object stream
.SaveToFile tFileToSavePath, 2 'Save the data to the named path
End With
Set oStream = Nothing 'Close the stream - no memory leaks
输出的数据是这样的...我很纳闷
S a l e s
txt文件本身没问题,只是UTF-8.txt一个真的很奇怪
谢谢!
事情是这样的:
wb.SaveAs
将您的工作簿另存为 xlUnicodeText
。这实际上是 UTF16LE(当有人只说 'Unicode' 时,这主要是指)。
您的流对象打开此 UTF16 源,但将其读取为 UTF8。在 UTF16 中,字符最少占用两个字节。在 UTF8 中,简单的拉丁字符只使用一个字节,因此在大多数字符之后有一个空字节(在您的情况下显示为空格;notepad++ 将显示 NUL)。
这似乎适用于转换:
Public Sub convert_UnicodeToUTF8(parF1 As String, parF2 As String)
Const adSaveCreateOverWrite = 2
Const adTypeText = 2
Dim streamSrc, streamDst ' Source / Destination
Set streamSrc = CreateObject("ADODB.Stream")
Set streamDst = CreateObject("ADODB.Stream")
streamDst.Type = adTypeText
streamDst.Charset = "utf-8"
streamDst.Open
With streamSrc
.Type = adTypeText
.Charset = "Unicode" ' this is also the default value
.Open
.LoadFromFile parF1
.copyTo streamDst
.Close
End With
streamDst.saveToFile parF2, adSaveCreateOverWrite
streamDst.Close
Set streamSrc = Nothing
Set streamDst = Nothing
End Sub
the CharSet property of the destination Stream object can be different than the source Stream object`