使用 zlib 解压缩文本字符串
Decompress text string with zlib
我需要解码以 zlib 格式存储的十六进制字符串。一个例子是:
1800000013000000eAFjYoAAZiDFCMQgGgQAAJwACg==
其中 18000000 和 13000000 是 uncompressed/compressed 数据的大小(在本例中为 24 和 19)。
我也知道字符串的其余部分包含
020000000000000003000000010000000300000000000000
问题出在哪里?按照 https://panthema.net/2007/0328-ZLibString.html 压缩该字符串的任何教程,我得到
x?302@?P??
十六进制可以写成
783f3330324053f503f103ff5
这与我预期的压缩字符串无关,所以我没有找到解压缩原始字符串的方法(这是我的最终目标)
提前感谢您的提示!
PS。我正在使用来自
https://github.com/systemed/intersector/blob/master/helpers.cpp
看起来字符串已经用 base64 编码了(谢谢@zdenek 和@Mark-Adler)我设法用
解码了它
BYTE *res;
int resSize = FromBase64Simple((BYTE*)actualData.c_str(),actualData.len(),res,sizeCompressed);
您可以阅读 https://github.com/kengonakajima/luvit-base64/blob/master/base64.c
中的实现
但这不是问题,因为我可以使用
转储结果
char* resChar = new char[resSize];
for(int i = 0;i<resSize;i++)
{
int asciiCode = (BYTE)res[i];
resChar[i]=char(asciiCode);
char buffer [2];
itoa (asciiCode,buffer,16);
qDebug()<<"["<<i<<"]\t"<<asciiCode<<"\t"<<buffer;
}
我得到了十进制和十六进制的每个字节的结果,都可以。十六进制看起来像:
78 01 63 62 80 00 66 20 c5 08 c4 20 1a 04 00 00 9c 00 0a
但是 resChar 是 "x?cb?" 与@Mark-Adler "x?302@?P??" 所说的值无关(显然 '?' 符号不是 printable 符号),我真的认为这是问题所在,但我的数据似乎与此相对应 table: https://www.asciitable.com/ and Mark's one doesn't also this web https://conv.darkbyte.ru/ returns 与我的算法
相同的结果
我尝试用上面说的实现解压字符串但失败了(也试过https://gist.github.com/arq5x/5315739)但它的解压值是单个字符串“”
这里我们使用最小的可重现案例:
#include <string>
static char LookupDigits[] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, //gap: ctrl chars
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, //gap: ctrl chars
0,0,0,0,0,0,0,0,0,0,0, //gap: spc,!"#$%'()*
62, // +
0, 0, 0, // gap ,-.
63, // /
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // 0-9
0, 0, 0, // gap: :;<
99, // = (end padding)
0, 0, 0, // gap: >?@
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,
17,18,19,20,21,22,23,24,25, // A-Z
0, 0, 0, 0, 0, 0, // gap: [\]^_`
26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,
43,44,45,46,47,48,49,50,51, // a-z
0, 0, 0, 0, // gap: {|}~ (and the rest...)
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
int FromBase64Simple(const unsigned char* pSrc, int nLenSrc, unsigned char* pDst, int nLenDst)
{
int nLenOut = 0;
for (int j = 0; j<nLenSrc; j += 4) {
if (nLenOut > nLenDst) {
return(0); // error, buffer too small
}
unsigned char s1 = LookupDigits[*pSrc++];
unsigned char s2 = LookupDigits[*pSrc++];
unsigned char s3 = LookupDigits[*pSrc++];
unsigned char s4 = LookupDigits[*pSrc++];
unsigned char d1 = ((s1 & 0x3f) << 2) | ((s2 & 0x30) >> 4);
unsigned char d2 = ((s2 & 0x0f) << 4) | ((s3 & 0x3c) >> 2);
unsigned char d3 = ((s3 & 0x03) << 6) | ((s4 & 0x3f) >> 0);
*pDst++ = d1; nLenOut++;
if (s3 == 99) break; // end padding found
*pDst++ = d2; nLenOut++;
if (s4 == 99) break; // end padding found
*pDst++ = d3; nLenOut++;
}
return(nLenOut);
}
int main()
{
std::string inputData = "eAFjYoAAZiDFCMQgGgQAAJwACg==";
//19 is hardcoded since I know its size prior to this call
unsigned char res[19];
int resSize = FromBase64Simple((unsigned char*)inputData.c_str(), inputData.size(), res, 19);
for (int i = 0; i<resSize; i++)
{
int asciiCode = res[i];
printf("[%i]\t%i\t%x\n", i, asciiCode, asciiCode);
}
printf("\n\nres: %s", (char*)res);
getchar();
return 0;
}
"eAFjYoAAZiDFCMQgGgQAAJwACg==" 是 Base64 编码的。您需要先将其解码为二进制文件以获得可以解压缩的内容。以十六进制表示的二进制是:
78 01 63 62 80 00 66 20 c5 08 c4 20 1a 04 00 00 9c 00 0a
这是一个有效的 zlib 流,解压缩到这个,以十六进制表示:
02 00 00 00 00 00 00 00 03 00 00 00 01 00 00 00 03 00 00 00 00 00 00 00
您的压缩结果"x?302@?P??"原来是二进制的,无法打印。那些问号在原文中并不是真正的问号,而是一些其他不打印的字节。所以不要打印它。您将打印结果转换为十六进制的结果尝试不正确,因为十六进制中有问号 (3f)。
这对我来说很好用。我使用了您链接的解压缩功能和您提供的 base64 功能。我删除了错误检查并重新格式化了一些内容以使其更短。
#include <cstdio>
#include <cstring>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <zlib.h>
#pragma comment(lib, "zdll.lib")
static char LookupDigits[] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,62,0,0,0,63,52,53,54,55,56,57,58,59,60,61,
0,0,0,99,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,
20,21,22,23,24,25,0,0,0,0,0,0,26,27,28,29,30,31,32,33,34,35,36,
37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
int FromBase64Simple(const unsigned char* pSrc, int nLenSrc, unsigned char* pDst, int nLenDst)
{
int nLenOut = 0;
for (int j = 0; j<nLenSrc; j += 4)
{
if (nLenOut > nLenDst)
{
return(0); // error, buffer too small
}
unsigned char s1 = LookupDigits[*pSrc++];
unsigned char s2 = LookupDigits[*pSrc++];
unsigned char s3 = LookupDigits[*pSrc++];
unsigned char s4 = LookupDigits[*pSrc++];
unsigned char d1 = ((s1 & 0x3f) << 2) | ((s2 & 0x30) >> 4);
unsigned char d2 = ((s2 & 0x0f) << 4) | ((s3 & 0x3c) >> 2);
unsigned char d3 = ((s3 & 0x03) << 6) | ((s4 & 0x3f) >> 0);
*pDst++ = d1; nLenOut++;
if (s3 == 99) break; // end padding found
*pDst++ = d2; nLenOut++;
if (s4 == 99) break; // end padding found
*pDst++ = d3; nLenOut++;
}
return(nLenOut);
}
std::string decompress_string(const std::string& str)
{
z_stream zs; // z_stream is zlib's control structure
memset(&zs, 0, sizeof(zs));
inflateInit(&zs);
zs.next_in = (Bytef*)str.data();
zs.avail_in = str.size();
int ret;
char outbuffer[32768];
std::string outstring;
do
{
zs.next_out = reinterpret_cast<Bytef*>(outbuffer);
zs.avail_out = sizeof(outbuffer);
ret = inflate(&zs, 0);
if (outstring.size() < zs.total_out)
{
outstring.append(outbuffer, zs.total_out - outstring.size());
}
}
while (ret == Z_OK);
inflateEnd(&zs);
return outstring;
}
int main()
{
std::string inputData = "eAFjYoAAZiDFCMQgGgQAAJwACg==";
//19 is hardcoded since I know its size prior to this call
std::string res(19, '[=10=]');
FromBase64Simple((unsigned char*)inputData.c_str(), inputData.size(), (unsigned char*)res.data(), res.size());
std::string d = decompress_string(res);
for (int c : d)
{
printf("%02x", c);
}
printf("\n");
getchar();
return 0;
}
输出:020000000000000003000000010000000300000000000000
我需要解码以 zlib 格式存储的十六进制字符串。一个例子是:
1800000013000000eAFjYoAAZiDFCMQgGgQAAJwACg==
其中 18000000 和 13000000 是 uncompressed/compressed 数据的大小(在本例中为 24 和 19)。
我也知道字符串的其余部分包含
020000000000000003000000010000000300000000000000
问题出在哪里?按照 https://panthema.net/2007/0328-ZLibString.html 压缩该字符串的任何教程,我得到
x?302@?P??
十六进制可以写成
783f3330324053f503f103ff5
这与我预期的压缩字符串无关,所以我没有找到解压缩原始字符串的方法(这是我的最终目标)
提前感谢您的提示!
PS。我正在使用来自 https://github.com/systemed/intersector/blob/master/helpers.cpp
看起来字符串已经用 base64 编码了(谢谢@zdenek 和@Mark-Adler)我设法用
解码了它BYTE *res;
int resSize = FromBase64Simple((BYTE*)actualData.c_str(),actualData.len(),res,sizeCompressed);
您可以阅读 https://github.com/kengonakajima/luvit-base64/blob/master/base64.c
中的实现但这不是问题,因为我可以使用
转储结果char* resChar = new char[resSize];
for(int i = 0;i<resSize;i++)
{
int asciiCode = (BYTE)res[i];
resChar[i]=char(asciiCode);
char buffer [2];
itoa (asciiCode,buffer,16);
qDebug()<<"["<<i<<"]\t"<<asciiCode<<"\t"<<buffer;
}
我得到了十进制和十六进制的每个字节的结果,都可以。十六进制看起来像:
78 01 63 62 80 00 66 20 c5 08 c4 20 1a 04 00 00 9c 00 0a
但是 resChar 是 "x?cb?" 与@Mark-Adler "x?302@?P??" 所说的值无关(显然 '?' 符号不是 printable 符号),我真的认为这是问题所在,但我的数据似乎与此相对应 table: https://www.asciitable.com/ and Mark's one doesn't also this web https://conv.darkbyte.ru/ returns 与我的算法
相同的结果我尝试用上面说的实现解压字符串但失败了(也试过https://gist.github.com/arq5x/5315739)但它的解压值是单个字符串“”
这里我们使用最小的可重现案例:
#include <string>
static char LookupDigits[] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, //gap: ctrl chars
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, //gap: ctrl chars
0,0,0,0,0,0,0,0,0,0,0, //gap: spc,!"#$%'()*
62, // +
0, 0, 0, // gap ,-.
63, // /
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // 0-9
0, 0, 0, // gap: :;<
99, // = (end padding)
0, 0, 0, // gap: >?@
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,
17,18,19,20,21,22,23,24,25, // A-Z
0, 0, 0, 0, 0, 0, // gap: [\]^_`
26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,
43,44,45,46,47,48,49,50,51, // a-z
0, 0, 0, 0, // gap: {|}~ (and the rest...)
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
int FromBase64Simple(const unsigned char* pSrc, int nLenSrc, unsigned char* pDst, int nLenDst)
{
int nLenOut = 0;
for (int j = 0; j<nLenSrc; j += 4) {
if (nLenOut > nLenDst) {
return(0); // error, buffer too small
}
unsigned char s1 = LookupDigits[*pSrc++];
unsigned char s2 = LookupDigits[*pSrc++];
unsigned char s3 = LookupDigits[*pSrc++];
unsigned char s4 = LookupDigits[*pSrc++];
unsigned char d1 = ((s1 & 0x3f) << 2) | ((s2 & 0x30) >> 4);
unsigned char d2 = ((s2 & 0x0f) << 4) | ((s3 & 0x3c) >> 2);
unsigned char d3 = ((s3 & 0x03) << 6) | ((s4 & 0x3f) >> 0);
*pDst++ = d1; nLenOut++;
if (s3 == 99) break; // end padding found
*pDst++ = d2; nLenOut++;
if (s4 == 99) break; // end padding found
*pDst++ = d3; nLenOut++;
}
return(nLenOut);
}
int main()
{
std::string inputData = "eAFjYoAAZiDFCMQgGgQAAJwACg==";
//19 is hardcoded since I know its size prior to this call
unsigned char res[19];
int resSize = FromBase64Simple((unsigned char*)inputData.c_str(), inputData.size(), res, 19);
for (int i = 0; i<resSize; i++)
{
int asciiCode = res[i];
printf("[%i]\t%i\t%x\n", i, asciiCode, asciiCode);
}
printf("\n\nres: %s", (char*)res);
getchar();
return 0;
}
"eAFjYoAAZiDFCMQgGgQAAJwACg==" 是 Base64 编码的。您需要先将其解码为二进制文件以获得可以解压缩的内容。以十六进制表示的二进制是:
78 01 63 62 80 00 66 20 c5 08 c4 20 1a 04 00 00 9c 00 0a
这是一个有效的 zlib 流,解压缩到这个,以十六进制表示:
02 00 00 00 00 00 00 00 03 00 00 00 01 00 00 00 03 00 00 00 00 00 00 00
您的压缩结果"x?302@?P??"原来是二进制的,无法打印。那些问号在原文中并不是真正的问号,而是一些其他不打印的字节。所以不要打印它。您将打印结果转换为十六进制的结果尝试不正确,因为十六进制中有问号 (3f)。
这对我来说很好用。我使用了您链接的解压缩功能和您提供的 base64 功能。我删除了错误检查并重新格式化了一些内容以使其更短。
#include <cstdio>
#include <cstring>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <zlib.h>
#pragma comment(lib, "zdll.lib")
static char LookupDigits[] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,62,0,0,0,63,52,53,54,55,56,57,58,59,60,61,
0,0,0,99,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,
20,21,22,23,24,25,0,0,0,0,0,0,26,27,28,29,30,31,32,33,34,35,36,
37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
int FromBase64Simple(const unsigned char* pSrc, int nLenSrc, unsigned char* pDst, int nLenDst)
{
int nLenOut = 0;
for (int j = 0; j<nLenSrc; j += 4)
{
if (nLenOut > nLenDst)
{
return(0); // error, buffer too small
}
unsigned char s1 = LookupDigits[*pSrc++];
unsigned char s2 = LookupDigits[*pSrc++];
unsigned char s3 = LookupDigits[*pSrc++];
unsigned char s4 = LookupDigits[*pSrc++];
unsigned char d1 = ((s1 & 0x3f) << 2) | ((s2 & 0x30) >> 4);
unsigned char d2 = ((s2 & 0x0f) << 4) | ((s3 & 0x3c) >> 2);
unsigned char d3 = ((s3 & 0x03) << 6) | ((s4 & 0x3f) >> 0);
*pDst++ = d1; nLenOut++;
if (s3 == 99) break; // end padding found
*pDst++ = d2; nLenOut++;
if (s4 == 99) break; // end padding found
*pDst++ = d3; nLenOut++;
}
return(nLenOut);
}
std::string decompress_string(const std::string& str)
{
z_stream zs; // z_stream is zlib's control structure
memset(&zs, 0, sizeof(zs));
inflateInit(&zs);
zs.next_in = (Bytef*)str.data();
zs.avail_in = str.size();
int ret;
char outbuffer[32768];
std::string outstring;
do
{
zs.next_out = reinterpret_cast<Bytef*>(outbuffer);
zs.avail_out = sizeof(outbuffer);
ret = inflate(&zs, 0);
if (outstring.size() < zs.total_out)
{
outstring.append(outbuffer, zs.total_out - outstring.size());
}
}
while (ret == Z_OK);
inflateEnd(&zs);
return outstring;
}
int main()
{
std::string inputData = "eAFjYoAAZiDFCMQgGgQAAJwACg==";
//19 is hardcoded since I know its size prior to this call
std::string res(19, '[=10=]');
FromBase64Simple((unsigned char*)inputData.c_str(), inputData.size(), (unsigned char*)res.data(), res.size());
std::string d = decompress_string(res);
for (int c : d)
{
printf("%02x", c);
}
printf("\n");
getchar();
return 0;
}
输出:020000000000000003000000010000000300000000000000