c ++从字符串制作unicode char
c++ making a unicode char from a string
我有这样的字符串
string s = "0081";
我需要像这样制作一个单字符的字符串
string c = "\u0081"
如何从长度为 4 的原始字符串中生成长度为 1 的字符串?
编辑:
我的错误,“\u0081”不是 char(1 字节)而是 2 字节 character/string?
所以我输入的是二进制 1000 0001,即 0x81,这就是我的字符串“0081”的来源。
从这个 0x81 到一个字符串 c = "\u0081" 不管那个值是什么会更容易吗?
感谢大家的帮助
给你:
unsigned int x;
std::stringstream ss;
ss << std::hex << "1081";
ss >> x;
wchar_t wc1 = x;
wchar_t wc2 = L'\u1081';
assert(wc1 == wc2);
std::wstring ws(1, wc);
这是整个过程,基于我在其他地方的评论中链接到的一些代码。
string s = "0081";
long codepoint = strtol(s.c_str(), NULL, 16);
string c = CodepointToUTF8(codepoint);
std::string CodepointToUTF8(long codepoint)
{
std::string out;
if (codepoint <= 0x7f)
out.append(1, static_cast<char>(codepoint));
else if (codepoint <= 0x7ff)
{
out.append(1, static_cast<char>(0xc0 | ((codepoint >> 6) & 0x1f)));
out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
}
else if (codepoint <= 0xffff)
{
out.append(1, static_cast<char>(0xe0 | ((codepoint >> 12) & 0x0f)));
out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
}
else
{
out.append(1, static_cast<char>(0xf0 | ((codepoint >> 18) & 0x07)));
out.append(1, static_cast<char>(0x80 | ((codepoint >> 12) & 0x3f)));
out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
}
return out;
}
请注意,此代码不进行任何错误检查,因此如果您向它传递一个无效的代码点,您将返回一个无效的字符串。
我有这样的字符串
string s = "0081";
我需要像这样制作一个单字符的字符串
string c = "\u0081"
如何从长度为 4 的原始字符串中生成长度为 1 的字符串?
编辑: 我的错误,“\u0081”不是 char(1 字节)而是 2 字节 character/string? 所以我输入的是二进制 1000 0001,即 0x81,这就是我的字符串“0081”的来源。 从这个 0x81 到一个字符串 c = "\u0081" 不管那个值是什么会更容易吗? 感谢大家的帮助
给你:
unsigned int x;
std::stringstream ss;
ss << std::hex << "1081";
ss >> x;
wchar_t wc1 = x;
wchar_t wc2 = L'\u1081';
assert(wc1 == wc2);
std::wstring ws(1, wc);
这是整个过程,基于我在其他地方的评论中链接到的一些代码。
string s = "0081";
long codepoint = strtol(s.c_str(), NULL, 16);
string c = CodepointToUTF8(codepoint);
std::string CodepointToUTF8(long codepoint)
{
std::string out;
if (codepoint <= 0x7f)
out.append(1, static_cast<char>(codepoint));
else if (codepoint <= 0x7ff)
{
out.append(1, static_cast<char>(0xc0 | ((codepoint >> 6) & 0x1f)));
out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
}
else if (codepoint <= 0xffff)
{
out.append(1, static_cast<char>(0xe0 | ((codepoint >> 12) & 0x0f)));
out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
}
else
{
out.append(1, static_cast<char>(0xf0 | ((codepoint >> 18) & 0x07)));
out.append(1, static_cast<char>(0x80 | ((codepoint >> 12) & 0x3f)));
out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
}
return out;
}
请注意,此代码不进行任何错误检查,因此如果您向它传递一个无效的代码点,您将返回一个无效的字符串。