WideCharToMultiByte 在 Wine 中不起作用
WideCharToMultiByte doesn't work in Wine
我正在尝试使用 WideCharToMultiByte 将 std::wstring 转换为 utf8 std::string。这是我的代码:
const std::wstring & utf16("lorem ipsum"); // input
if (utf16.empty()) {
return "";
}
cout << "wstring -> string, input: , size: " << utf16.size() << endl;
for (size_t i = 0; i < utf16.size(); ++i) {
cout << i << ": " << static_cast<int>(utf16[i]) << endl;
}
for (size_t i = 0; i < utf16.size(); ++i) {
wcout << static_cast<wchar_t>(utf16[i]);
}
cout << endl;
std::string res;
int required_size = 0;
if ((required_size = WideCharToMultiByte(
CP_UTF8,
0,
utf16.c_str(),
utf16.size(),
nullptr,
0,
nullptr,
nullptr
)) == 0) {
throw std::invalid_argument("Cannot convert.");
}
cout << "required size: " << required_size << endl;
res.resize(required_size);
if (WideCharToMultiByte(
CP_UTF8,
0,
utf16.c_str(),
utf16.size(),
&res[0],
res.size(),
nullptr,
nullptr
) == 0) {
throw std::invalid_argument("Cannot convert.");
}
cout << "Result: " << res << ", size: " << res.size() << endl;
for (size_t i = 0; i < res.size(); ++i) {
cout << i << ": " << (int)static_cast<uint8_t>(res[i]) << endl;
}
exit(1);
return res;
运行正常,无异常,无错误。只有结果是错误的。这是 运行 代码的输出:
wstring -> string, input: , size: 11
0: 108
1: 111
2: 114
3: 101
4: 109
5: 32
6: 105
7: 112
8: 115
9: 117
10: 109
lorem ipsum
required size: 11
Result: lorem , size: 11
0: 108
1: 0
2: 111
3: 0
4: 114
5: 0
6: 101
7: 0
8: 109
9: 0
10: 32
我不明白为什么会有空字节。我做错了什么?
根据评论总结:
就 WideCharToMultiByte
逻辑和参数而言,您的代码是正确的;唯一实际的问题是 utf16
的初始化,需要使用宽字面量进行初始化。该代码在 VC++ 2015 RTM 和 Update 1 中给出了预期结果,因此这是您正在使用的 WideCharToMultiByte
仿真层中的错误。
也就是说,对于 C++11 以后的版本,如果可能,您应该更喜欢一个可移植的解决方案:std::wstring_convert
in conjunction with std::codecvt_utf8_utf16
#include <cstddef>
#include <string>
#include <locale>
#include <codecvt>
#include <iostream>
std::string test(std::wstring const& utf16)
{
std::wcout << L"wstring -> string, input: " << utf16 << L", size: " << utf16.size() << L'\n';
for (std::size_t i{}; i != utf16.size(); ++i)
std::wcout << i << L": " << static_cast<int>(utf16[i]) << L'\n';
for (std::size_t i{}; i != utf16.size(); ++i)
std::wcout << utf16[i];
std::wcout << L'\n';
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> cvt;
std::string res = cvt.to_bytes(utf16);
std::wcout << L"Result: " << res.c_str() << L", size: " << res.size() << L'\n';
for (std::size_t i{}; i != res.size(); ++i)
std::wcout << i << L": " << static_cast<int>(res[i]) << L'\n';
return res;
}
int main()
{
test(L"lorem ipsum");
}
我正在尝试使用 WideCharToMultiByte 将 std::wstring 转换为 utf8 std::string。这是我的代码:
const std::wstring & utf16("lorem ipsum"); // input
if (utf16.empty()) {
return "";
}
cout << "wstring -> string, input: , size: " << utf16.size() << endl;
for (size_t i = 0; i < utf16.size(); ++i) {
cout << i << ": " << static_cast<int>(utf16[i]) << endl;
}
for (size_t i = 0; i < utf16.size(); ++i) {
wcout << static_cast<wchar_t>(utf16[i]);
}
cout << endl;
std::string res;
int required_size = 0;
if ((required_size = WideCharToMultiByte(
CP_UTF8,
0,
utf16.c_str(),
utf16.size(),
nullptr,
0,
nullptr,
nullptr
)) == 0) {
throw std::invalid_argument("Cannot convert.");
}
cout << "required size: " << required_size << endl;
res.resize(required_size);
if (WideCharToMultiByte(
CP_UTF8,
0,
utf16.c_str(),
utf16.size(),
&res[0],
res.size(),
nullptr,
nullptr
) == 0) {
throw std::invalid_argument("Cannot convert.");
}
cout << "Result: " << res << ", size: " << res.size() << endl;
for (size_t i = 0; i < res.size(); ++i) {
cout << i << ": " << (int)static_cast<uint8_t>(res[i]) << endl;
}
exit(1);
return res;
运行正常,无异常,无错误。只有结果是错误的。这是 运行 代码的输出:
wstring -> string, input: , size: 11
0: 108
1: 111
2: 114
3: 101
4: 109
5: 32
6: 105
7: 112
8: 115
9: 117
10: 109
lorem ipsum
required size: 11
Result: lorem , size: 11
0: 108
1: 0
2: 111
3: 0
4: 114
5: 0
6: 101
7: 0
8: 109
9: 0
10: 32
我不明白为什么会有空字节。我做错了什么?
根据评论总结:
就 WideCharToMultiByte
逻辑和参数而言,您的代码是正确的;唯一实际的问题是 utf16
的初始化,需要使用宽字面量进行初始化。该代码在 VC++ 2015 RTM 和 Update 1 中给出了预期结果,因此这是您正在使用的 WideCharToMultiByte
仿真层中的错误。
也就是说,对于 C++11 以后的版本,如果可能,您应该更喜欢一个可移植的解决方案:std::wstring_convert
in conjunction with std::codecvt_utf8_utf16
#include <cstddef>
#include <string>
#include <locale>
#include <codecvt>
#include <iostream>
std::string test(std::wstring const& utf16)
{
std::wcout << L"wstring -> string, input: " << utf16 << L", size: " << utf16.size() << L'\n';
for (std::size_t i{}; i != utf16.size(); ++i)
std::wcout << i << L": " << static_cast<int>(utf16[i]) << L'\n';
for (std::size_t i{}; i != utf16.size(); ++i)
std::wcout << utf16[i];
std::wcout << L'\n';
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> cvt;
std::string res = cvt.to_bytes(utf16);
std::wcout << L"Result: " << res.c_str() << L", size: " << res.size() << L'\n';
for (std::size_t i{}; i != res.size(); ++i)
std::wcout << i << L": " << static_cast<int>(res[i]) << L'\n';
return res;
}
int main()
{
test(L"lorem ipsum");
}