Python urllib.unquote_plus node.js 等效
Python urllib.unquote_plus node.js equivalent
我正在尝试解码通过 HTTP GET 提供的一些数据,基本上是一些由 Python 编码的数据,我正在尝试使用 Node.
对其进行解码
Python:
data = "0%0E%09-%FB%CD%989%2B%C0%E5%A2+%28%93%D9%C9%86r0%9C%8D+%F1%E3v%0C%9E%19%91%E3%06%01%FA%D5%5B%F3%3D7%10%23%E7%95S%B7%93%60%DB%1F%1F%8A%E3%5D%CC%95%B8%DA%DB%A3%18%9FoVzC%23%FCB%8D%86%86N9%BE%AE%98%13x%0D%D9u%C48%F6%AD%A3%19z%BE%DB%8E-%C4T%02P%06%D5%C1%21%1F%FB%9C%EB%0A%7C%F9XFB%9D%F3"
print urllib.unquote_plus(data)
输出:
0 -�͘9+�� (��Ɇr0�� ��v
�u�8�z�ێ-�TP��!�� �����[�=7#�S��`���]̕��ۣ�oVzC#�B���N9���x
|�XFB��
节点:
var token = '0%0E%09-%FB%CD%989%2B%C0%E5%A2+%28%93%D9%C9%86r0%9C%8D+%F1%E3v%0C%9E%19%91%E3%06%01%FA%D5%5B%F3%3D7%10%23%E7%95S%B7%93%60%DB%1F%1F%8A%E3%5D%CC%95%B8%DA%DB%A3%18%9FoVzC%23%FCB%8D%86%86N9%BE%AE%98%13x%0D%D9u%C48%F6%AD%A3%19z%BE%DB%8E-%C4T%02P%06%D5%C1%21%1F%FB%9C%EB%0A%7C%F9XFB%9D%F3';
console.log(unescape(_.replace(token, '+', '%20')))
_
is Lodash library. See https://lodash.com/docs/4.16.6#replace
输出:
0 -ûÍ9+Àå¢ (ÙÉr0+ñãv
ÙuÄ8ö£z¾Û-ÄTPÕÁ!ûë ãúÕ[ó=7#çS·`Ûã]̸ÚÛ£oVzC#üBN9¾®x
|ùXFBó
这两个解码输出看起来很相似,但还是有点不同,我不明白为什么。一旦解密(通过另一种算法),它们会给出类似的结果,但是 JS 版本有像 {"duration": 600m�B�}PO�UQ��:...}'
这样的意外字符,我不知道 JS 实现有什么问题。
不知道以前有没有做过(应该有,可能是我没搜到)。
您似乎需要的是一个函数,它可以将具有百分比编码的字符串解码为字节缓冲区,而不是像常规 URL 解码函数会尝试做的那样解码为字符串。
function bufferUrlDecode(data) {
var buf = new Buffer(data, 'ascii');
var pos = 0, flag = false, prev = null, i, b;
for (i = 0; i < buf.length; i++) {
b = buf[i];
if (flag) {
b -= b < 58 ? 48 : b < 97 ? 55 : 87; // hex char to half-byte value
if (b < 0 || b > 15) {
throw new Error('invalid encoding at position ' + i);
} else if (prev === null) {
prev = b << 4;
} else {
buf[pos++] = prev + b;
flag = false;
prev = null;
}
} else {
if (b === 43 /* '+' becomes space */) {
buf[pos++] = 32;
} else if (b === 37 /* '%' triggers URL decoding */) {
flag = true;
} else {
buf[pos++] = b;
}
}
}
if (prev !== null) throw new Error('invalid encoding at position ' + data.length);
return buf.slice(0, pos);
}
用法:
var str = "0%0E%09-%FB%CD%989%2B%C0%E5%A2+%28%93%D9%C9%86r0%9C%8D+%F1%E3v%0C%9E%19%91%E3%06%01%FA%D5%5B%F3%3D7%10%23%E7%95S%B7%93%60%DB%1F%1F%8A%E3%5D%CC%95%B8%DA%DB%A3%18%9FoVzC%23%FCB%8D%86%86N9%BE%AE%98%13x%0D%D9u%C48%F6%AD%A3%19z%BE%DB%8E-%C4T%02P%06%D5%C1%21%1F%FB%9C%EB%0A%7C%F9XFB%9D%F3";
var bytes = bufferUrlDecode(str);
console.log(bytes);
console.log(bytes.toString('hex'));
输出:
<Buffer 30 0e 09 2d fb cd 98 39 2b c0 e5 a2 20 28 ... >
300e092dfbcd98392bc0e5a22028 etc...
我正在尝试解码通过 HTTP GET 提供的一些数据,基本上是一些由 Python 编码的数据,我正在尝试使用 Node.
对其进行解码Python:
data = "0%0E%09-%FB%CD%989%2B%C0%E5%A2+%28%93%D9%C9%86r0%9C%8D+%F1%E3v%0C%9E%19%91%E3%06%01%FA%D5%5B%F3%3D7%10%23%E7%95S%B7%93%60%DB%1F%1F%8A%E3%5D%CC%95%B8%DA%DB%A3%18%9FoVzC%23%FCB%8D%86%86N9%BE%AE%98%13x%0D%D9u%C48%F6%AD%A3%19z%BE%DB%8E-%C4T%02P%06%D5%C1%21%1F%FB%9C%EB%0A%7C%F9XFB%9D%F3"
print urllib.unquote_plus(data)
输出:
0 -�͘9+�� (��Ɇr0�� ��v
�u�8�z�ێ-�TP��!�� �����[�=7#�S��`���]̕��ۣ�oVzC#�B���N9���x
|�XFB��
节点:
var token = '0%0E%09-%FB%CD%989%2B%C0%E5%A2+%28%93%D9%C9%86r0%9C%8D+%F1%E3v%0C%9E%19%91%E3%06%01%FA%D5%5B%F3%3D7%10%23%E7%95S%B7%93%60%DB%1F%1F%8A%E3%5D%CC%95%B8%DA%DB%A3%18%9FoVzC%23%FCB%8D%86%86N9%BE%AE%98%13x%0D%D9u%C48%F6%AD%A3%19z%BE%DB%8E-%C4T%02P%06%D5%C1%21%1F%FB%9C%EB%0A%7C%F9XFB%9D%F3';
console.log(unescape(_.replace(token, '+', '%20')))
_
is Lodash library. See https://lodash.com/docs/4.16.6#replace
输出:
0 -ûÍ9+Àå¢ (ÙÉr0+ñãv
ÙuÄ8ö£z¾Û-ÄTPÕÁ!ûë ãúÕ[ó=7#çS·`Ûã]̸ÚÛ£oVzC#üBN9¾®x
|ùXFBó
这两个解码输出看起来很相似,但还是有点不同,我不明白为什么。一旦解密(通过另一种算法),它们会给出类似的结果,但是 JS 版本有像 {"duration": 600m�B�}PO�UQ��:...}'
这样的意外字符,我不知道 JS 实现有什么问题。
不知道以前有没有做过(应该有,可能是我没搜到)。
您似乎需要的是一个函数,它可以将具有百分比编码的字符串解码为字节缓冲区,而不是像常规 URL 解码函数会尝试做的那样解码为字符串。
function bufferUrlDecode(data) {
var buf = new Buffer(data, 'ascii');
var pos = 0, flag = false, prev = null, i, b;
for (i = 0; i < buf.length; i++) {
b = buf[i];
if (flag) {
b -= b < 58 ? 48 : b < 97 ? 55 : 87; // hex char to half-byte value
if (b < 0 || b > 15) {
throw new Error('invalid encoding at position ' + i);
} else if (prev === null) {
prev = b << 4;
} else {
buf[pos++] = prev + b;
flag = false;
prev = null;
}
} else {
if (b === 43 /* '+' becomes space */) {
buf[pos++] = 32;
} else if (b === 37 /* '%' triggers URL decoding */) {
flag = true;
} else {
buf[pos++] = b;
}
}
}
if (prev !== null) throw new Error('invalid encoding at position ' + data.length);
return buf.slice(0, pos);
}
用法:
var str = "0%0E%09-%FB%CD%989%2B%C0%E5%A2+%28%93%D9%C9%86r0%9C%8D+%F1%E3v%0C%9E%19%91%E3%06%01%FA%D5%5B%F3%3D7%10%23%E7%95S%B7%93%60%DB%1F%1F%8A%E3%5D%CC%95%B8%DA%DB%A3%18%9FoVzC%23%FCB%8D%86%86N9%BE%AE%98%13x%0D%D9u%C48%F6%AD%A3%19z%BE%DB%8E-%C4T%02P%06%D5%C1%21%1F%FB%9C%EB%0A%7C%F9XFB%9D%F3";
var bytes = bufferUrlDecode(str);
console.log(bytes);
console.log(bytes.toString('hex'));
输出:
<Buffer 30 0e 09 2d fb cd 98 39 2b c0 e5 a2 20 28 ... >
300e092dfbcd98392bc0e5a22028 etc...