JavaScript: 如何将多字节字符串数组转换为32位整型数组?
JavaScript: How to convert multi-byte string array to 32bits int array?
我有一个包含 UTF-32(但可能较高的 16 位将始终为 0)代码点的字符串。每个标记是长字符串中每个字符的代码点的 4 个字节中的一个。
请注意,字节在转换为字符串之前会被解释为带符号的整数,我对此无法控制。
// Provided:
intEncodedBytesString= "0,0,0,-31,0,0,0,-15,0,0,0,-31"; //3 chars: áñá
// Wanted
actualCodePoints = [225,241,225];
我需要将 intEncodedBytesString 转换为 actualCodePoints 数组。
到目前为止,我想到了这个:
var intEncodedBytesStringArray = intEncodedBytesString.toString().split(',');
var i, str = '';
var charAmount = intEncodedBytesStringArray.length / 4;
for (i = 0; i < charAmount; i++) {
var codePoint = 0;
for (var j = 0; j < 4; j++) {
var num = parseInt(intEncodedBytesStringArray[i * 4 + j], 10);
if (num != 0) {
if (num < 0) {
num = (1 << (8 * (4 - j))) + num;
}
codePoint += (num << (8 * (3 - j)));
}
}
str += String.fromCodePoint(codePoint);
}
是否有更好、更简单and/or更有效的方法?
我已经看到许多答案和代码片段来处理类似的事情,但没有解决我的输入字节是有符号整数字符串的问题:S
编辑:此代码不适用于最高代码点,因为 1<<32 是 1 而不是 2^32。
因为它是非常简单的 UTF-32,是的,有一个更简单的方法:只在四字节块中工作。此外,处理可能的消极情绪的简单方法是 (value + 256) % 256
.
所以:
var intEncodedBytesString = "0,0,0,-31,0,0,0,-15,0,0,0,-31"; //3 char
var actualCodePoints = [];
var bytes = intEncodedBytesString.split(",").map(Number);
for (var i = 0; i < bytes.length; i += 4) {
actualCodePoints.push(
(((bytes[i] + 256) % 256) << 24) +
(((bytes[i + 1] + 256) % 256) << 16) +
(((bytes[i + 2] + 256) % 256) << 8) +
(bytes[i + 3] + 256) % 256
);
}
评论中有详细解释的示例:
// Starting point
var intEncodedBytesString = "0,0,0,-31,0,0,0,-15,0,0,0,-31"; //3 char
// Target array
var actualCodePoints = [];
// Get the bytes as numbers by splitting on comman running the array
// through Number to convert to number.
var bytes = intEncodedBytesString.split(",").map(Number);
// Loop through the bytes building code points
var i, cp;
for (i = 0; i < bytes.length; i += 4) {
// (x + 256) % 256 will handle turning (for instance) -31 into 224
// We shift the value for the first byte left 24 bits, the next byte 16 bits,
// the next 8 bits, and don't shift the last one at all. Adding them all
// together gives us the code point, which we push into the array.
cp = (((bytes[i] + 256) % 256) << 24) +
(((bytes[i + 1] + 256) % 256) << 16) +
(((bytes[i + 2] + 256) % 256) << 8) +
(bytes[i + 3] + 256) % 256;
actualCodePoints.push(cp);
}
// Show the result
console.log(actualCodePoints);
// If the JavaScript engine supports it, show the string
if (String.fromCodePoint) { // ES2015+
var str = String.fromCodePoint.apply(String, actualCodePoints);
// The above could be
// `let str = String.fromCodePoint(...actualCodePoints);`
// on an ES2015+ engine
console.log(str);
} else {
console.log("(Your browser doesn't support String.fromCodePoint)");
}
我有一个包含 UTF-32(但可能较高的 16 位将始终为 0)代码点的字符串。每个标记是长字符串中每个字符的代码点的 4 个字节中的一个。 请注意,字节在转换为字符串之前会被解释为带符号的整数,我对此无法控制。
// Provided:
intEncodedBytesString= "0,0,0,-31,0,0,0,-15,0,0,0,-31"; //3 chars: áñá
// Wanted
actualCodePoints = [225,241,225];
我需要将 intEncodedBytesString 转换为 actualCodePoints 数组。 到目前为止,我想到了这个:
var intEncodedBytesStringArray = intEncodedBytesString.toString().split(',');
var i, str = '';
var charAmount = intEncodedBytesStringArray.length / 4;
for (i = 0; i < charAmount; i++) {
var codePoint = 0;
for (var j = 0; j < 4; j++) {
var num = parseInt(intEncodedBytesStringArray[i * 4 + j], 10);
if (num != 0) {
if (num < 0) {
num = (1 << (8 * (4 - j))) + num;
}
codePoint += (num << (8 * (3 - j)));
}
}
str += String.fromCodePoint(codePoint);
}
是否有更好、更简单and/or更有效的方法?
我已经看到许多答案和代码片段来处理类似的事情,但没有解决我的输入字节是有符号整数字符串的问题:S
编辑:此代码不适用于最高代码点,因为 1<<32 是 1 而不是 2^32。
因为它是非常简单的 UTF-32,是的,有一个更简单的方法:只在四字节块中工作。此外,处理可能的消极情绪的简单方法是 (value + 256) % 256
.
所以:
var intEncodedBytesString = "0,0,0,-31,0,0,0,-15,0,0,0,-31"; //3 char
var actualCodePoints = [];
var bytes = intEncodedBytesString.split(",").map(Number);
for (var i = 0; i < bytes.length; i += 4) {
actualCodePoints.push(
(((bytes[i] + 256) % 256) << 24) +
(((bytes[i + 1] + 256) % 256) << 16) +
(((bytes[i + 2] + 256) % 256) << 8) +
(bytes[i + 3] + 256) % 256
);
}
评论中有详细解释的示例:
// Starting point
var intEncodedBytesString = "0,0,0,-31,0,0,0,-15,0,0,0,-31"; //3 char
// Target array
var actualCodePoints = [];
// Get the bytes as numbers by splitting on comman running the array
// through Number to convert to number.
var bytes = intEncodedBytesString.split(",").map(Number);
// Loop through the bytes building code points
var i, cp;
for (i = 0; i < bytes.length; i += 4) {
// (x + 256) % 256 will handle turning (for instance) -31 into 224
// We shift the value for the first byte left 24 bits, the next byte 16 bits,
// the next 8 bits, and don't shift the last one at all. Adding them all
// together gives us the code point, which we push into the array.
cp = (((bytes[i] + 256) % 256) << 24) +
(((bytes[i + 1] + 256) % 256) << 16) +
(((bytes[i + 2] + 256) % 256) << 8) +
(bytes[i + 3] + 256) % 256;
actualCodePoints.push(cp);
}
// Show the result
console.log(actualCodePoints);
// If the JavaScript engine supports it, show the string
if (String.fromCodePoint) { // ES2015+
var str = String.fromCodePoint.apply(String, actualCodePoints);
// The above could be
// `let str = String.fromCodePoint(...actualCodePoints);`
// on an ES2015+ engine
console.log(str);
} else {
console.log("(Your browser doesn't support String.fromCodePoint)");
}