更正错误编码的字符串(ASCII 字符变回 UTF-8)
Correcting incorrectly encoded string (ASCII characters back to UTF-8)
这是我从 Android "wifi config file" (wpa_supplicant.conf
).
中提取的示例 WiFi ssid
我正在尝试显示文件中的所有 ssid,大多数都可以,因为它们是用引号引起来的普通字符串,例如,
network={
ssid="Linksys"
...
}
然而,有些条目只是想要与众不同和特殊,例如,
network={
ssid=e299aa20e6b7a1e5ae9ae69c89e98ca2e589a920e299ab
...
}
现在,问题是,如何将它转换回可读字符串(最好是在 JS 中)?我怀疑编码有误(不过它在本机设备上显示正确。)
显然该字符串是十六进制未编码的。通过一些字符串操作将其转回二进制,我能够将其编码回可读形式。
function HextoUTF8(txt) {
function HexStringToBytes(str) {
if (str.length % 2) throw TypeError("Not a valid length");
return [].map.call(str, function(e) {
return ("000" + parseInt(e, 16).toString(2)).slice(-4);
}).join("").match(/.{8}/g);
}
function BytesToUTF8(bytes) {
var inExpectationMode = false,
itr = new Iterator(bytes),
byte,
availableBitsTable = {
"1": -7,
"2": -5,
"3": -4,
"4": -3
},
expectingBitsLeft = 0,
currectCharacter = "",
result = "";
while (byte = itr.next(), !byte.ended) {
byte = byte.value;
if (inExpectationMode) {
currectCharacter += byte.slice(-6);
} else {
//First in sequence
expectingBitsLeft = determineSequenceLength(byte);
currectCharacter += byte.slice(availableBitsTable[expectingBitsLeft]);
}
inExpectationMode = true;
expectingBitsLeft--;
if (!expectingBitsLeft) {
inExpectationMode = false;
result += String.fromCharCode(parseInt(currectCharacter, 2));
currectCharacter = "";
}
}
return result;
}
function determineSequenceLength(byte) {
if (byte[0] === "0") return 1;
else if (byte.slice(0, 3) === "110") return 2;
else if (byte.slice(0, 4) === "1110") return 3;
else if (byte.slice(0, 5) === "11110") return 4;
}
function Iterator(array) {
if (this === window) throw TypeError("This is a class");
if (!Array.isArray(array)) throw TypeError("An array is required");
this.i = -1;
this.ended = !array.length;
this.array = function() {
return array;
};
}
Iterator.prototype.next = function() {
if (this.ended || ++this.i == this.array().length) {
this.ended = true;
return {
ended: true
};
} else {
return {
ended: this.ended,
value: this.array()[this.i]
};
}
}
return BytesToUTF8(HexStringToBytes(txt));
}
最理想的情况是我应该进行位操作,但无论如何,它有效,
> HextoUTF8("e299aa20e6b7a1e5ae9ae69c89e98ca2e589a920e299ab");
> "♪ 淡定有錢剩 ♫"
这是我从 Android "wifi config file" (wpa_supplicant.conf
).
我正在尝试显示文件中的所有 ssid,大多数都可以,因为它们是用引号引起来的普通字符串,例如,
network={
ssid="Linksys"
...
}
然而,有些条目只是想要与众不同和特殊,例如,
network={
ssid=e299aa20e6b7a1e5ae9ae69c89e98ca2e589a920e299ab
...
}
现在,问题是,如何将它转换回可读字符串(最好是在 JS 中)?我怀疑编码有误(不过它在本机设备上显示正确。)
显然该字符串是十六进制未编码的。通过一些字符串操作将其转回二进制,我能够将其编码回可读形式。
function HextoUTF8(txt) {
function HexStringToBytes(str) {
if (str.length % 2) throw TypeError("Not a valid length");
return [].map.call(str, function(e) {
return ("000" + parseInt(e, 16).toString(2)).slice(-4);
}).join("").match(/.{8}/g);
}
function BytesToUTF8(bytes) {
var inExpectationMode = false,
itr = new Iterator(bytes),
byte,
availableBitsTable = {
"1": -7,
"2": -5,
"3": -4,
"4": -3
},
expectingBitsLeft = 0,
currectCharacter = "",
result = "";
while (byte = itr.next(), !byte.ended) {
byte = byte.value;
if (inExpectationMode) {
currectCharacter += byte.slice(-6);
} else {
//First in sequence
expectingBitsLeft = determineSequenceLength(byte);
currectCharacter += byte.slice(availableBitsTable[expectingBitsLeft]);
}
inExpectationMode = true;
expectingBitsLeft--;
if (!expectingBitsLeft) {
inExpectationMode = false;
result += String.fromCharCode(parseInt(currectCharacter, 2));
currectCharacter = "";
}
}
return result;
}
function determineSequenceLength(byte) {
if (byte[0] === "0") return 1;
else if (byte.slice(0, 3) === "110") return 2;
else if (byte.slice(0, 4) === "1110") return 3;
else if (byte.slice(0, 5) === "11110") return 4;
}
function Iterator(array) {
if (this === window) throw TypeError("This is a class");
if (!Array.isArray(array)) throw TypeError("An array is required");
this.i = -1;
this.ended = !array.length;
this.array = function() {
return array;
};
}
Iterator.prototype.next = function() {
if (this.ended || ++this.i == this.array().length) {
this.ended = true;
return {
ended: true
};
} else {
return {
ended: this.ended,
value: this.array()[this.i]
};
}
}
return BytesToUTF8(HexStringToBytes(txt));
}
最理想的情况是我应该进行位操作,但无论如何,它有效,
> HextoUTF8("e299aa20e6b7a1e5ae9ae69c89e98ca2e589a920e299ab");
> "♪ 淡定有錢剩 ♫"