strtok_r 造成的内存损坏
Memory corruption by strtok_r
我正在尝试嵌套 strtok_r
。我有一个 gzippped csv
文件,我正在尝试使用 zpipe.c
读取它。我注意到,当我尝试打印 out
变量时,如果我第二次使用 strtok_r,它会添加一些奇怪的字符。一切正常,直到我取消注释第二行 strtok_r 。然后我开始在 printf(out) 语句的末尾看到垃圾值。更让我困惑的是,我在与第二个 strtok_r
相关的任何事情之前打印出 out
可变方式。它是如何因为尚未发生的事情而被破坏的,这非常令人困惑。
#include "system.h"
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include "zlib.h"
struct timeval begTime;
#if defined(MSDOS) || defined(OS2) || defined(WIN32) ||
defined(__CYGWIN__)
# include <fcntl.h>
# include <io.h>
# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
#else
# define SET_BINARY_MODE(file)
#endif
#define CHUNK 16384
int inf(FILE *source, FILE *dest)
{
int ret;
unsigned have;
z_stream strm;
unsigned char in[CHUNK];
unsigned char out[CHUNK];
/* allocate inflate state */
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
ret = inflateInit2(&strm, 15|32);
if (ret != Z_OK)
return ret;
do {
memset(in, 0, CHUNK);
strm.avail_in = fread(in, 1, CHUNK, source);
printf("read %d\n", strm.avail_in);
if (ferror(source)) {
(void)inflateEnd(&strm);
return Z_ERRNO;
}
if (strm.avail_in == 0)
break;
strm.next_in = in;
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
switch (ret) {
case Z_NEED_DICT:
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
switch (ret) {
case Z_NEED_DICT:
ret = Z_DATA_ERROR; /* and fall through */
case Z_DATA_ERROR:
case Z_MEM_ERROR:
(void)inflateEnd(&strm);
return ret;
}
have = CHUNK - strm.avail_out;
printf("out, %s\n", out);
char *cylv_line;
char *savePtr = (char*)out;
cylv_line = strtok_r(savePtr, "\n", &savePtr); // size of buffer always be greater than used
int line_num =0;
printf("before contract cm\n");
gettimeofday(&begTime, NULL);
int sumlen = 0;
while(cylv_line != NULL)
{
//all works fine till i uncomment this line. Then i start seeing garbage values at the end of printf(out) statement
//cylv_line = strtok_r(savePtr, "\n", &savePtr);
line_num++;
}
printf("odone, %s\n", out);
if (ferror(dest)) {
(void)inflateEnd(&strm);
return Z_ERRNO;
}
} while (strm.avail_out == 0);
/* done when inflate() says it's done */
} while (ret != Z_STREAM_END);
(void)inflateEnd(&strm);
return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
}
void zerr(int ret)
{
fputs("zpipe: ", stderr);
switch (ret) {
case Z_ERRNO:
if (ferror(stdin))
fputs("error reading stdin\n", stderr);
if (ferror(stdout))
fputs("error writing stdout\n", stderr);
break;
case Z_STREAM_ERROR:
fputs("invalid compression level\n", stderr);
break;
case Z_DATA_ERROR:
fputs("invalid or incomplete deflate data\n", stderr);
break;
case Z_MEM_ERROR:
fputs("out of memory\n", stderr);
break;
case Z_VERSION_ERROR:
fputs("zlib version mismatch!\n", stderr);
}
}
int main(int argc, char **argv)
{
int ret;
ret = inf(fopen(argv[1],"r"), stdout);
if (ret != Z_OK)
zerr(ret);
return ret;
}
我已经用粗体显示了我 运行 嵌套 strtok_r
时添加的垃圾
39,1489116595360920,1,19,39,N,1173603600662,1000000000000369,1225,B,75000,2000
40,1489116595360937,1,19,40,N,1173603600662,1000000000000370,1223,B,75000,2000
41,1489116595360952,1,19,41,N,1173603600662,1000000000000371,1269,B,665050000,10
42,1489116595360964,1,19,42,N,1173603600662,1000000000000372,1269,B,665225000,100
43,1489116595360977,1,19,43,N,1173603600662,1000000000000373,1269,S,670475000,10
44,1489116595361024,1,19,44,N,1173603600662,1000000000000374,1269,S,669950000,10
45,1489116595361072,1,19,45,N,1173603600662,1000000000000375,1237,B,75000,2000
46,1489116595361078,1,19,46,N,1173603600662,1000000000000376,1269,B,667250000,10
47,1489116595361082,1,19,47,N,1173603600662,1000000000000377,1269,B,667525000,5
48,1489116595361085,1,19,48,N,1173603600662,1000000000000378,1327,B,75000,2000
49,1489116595361087,1,19,49,N,1173603600662,1000000000000379,1279,B,100000,2500
50,1489116595361095,1,19,50,N,1173603600662,1000000000000380,1269,B,665225000,200
51,1489116595361110,1,19,51,N,11736036006ÑgÂ"ü^?
合同前厘米
odone, 1,1489116595360464,1,19,1,N,1173603600658,1000000000000333,1269,B,667125000,99
出,62,1000000000000381,1008,B,25000,1373
52,1489116595361125,1,19,52,N,1173603600662,1000000000000382,1269,B,663225000,200
53,1489116595361139,1,19,53,N,1173603600662,1000000000000383,1387,S,150000,2000
你使用的strtok_r
功能有误。在随后的调用中,第一个参数应该是 last 调用的结果(在你的例子中 cylv_line
),而不是保存的状态。
所以你应该这样做
cylv_line = strtok_r(out, "\n", &savePtr); // Initial call
...
while (cylv_line != NULL)
{
...
cylv_line = strtok_r(cylv_line, "\n", &savePtr);
...
}
实际的问题是你把展开后的数据当成字符串,却没有加字符串终止符。
将 out
的定义更改为 space 作为终止符:
char out[CHUNK + 1]; // +1 for the string terminator
然后在 inflation 之后添加终止符:
out[have] = '[=12=]'; // Terminate as a C string
没有终止符的字符串函数(如 strtok
和 strtok_r
)将超出数据的末尾,您将有 未定义的行为。
我正在尝试嵌套 strtok_r
。我有一个 gzippped csv
文件,我正在尝试使用 zpipe.c
读取它。我注意到,当我尝试打印 out
变量时,如果我第二次使用 strtok_r,它会添加一些奇怪的字符。一切正常,直到我取消注释第二行 strtok_r 。然后我开始在 printf(out) 语句的末尾看到垃圾值。更让我困惑的是,我在与第二个 strtok_r
相关的任何事情之前打印出 out
可变方式。它是如何因为尚未发生的事情而被破坏的,这非常令人困惑。
#include "system.h"
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include "zlib.h"
struct timeval begTime;
#if defined(MSDOS) || defined(OS2) || defined(WIN32) ||
defined(__CYGWIN__)
# include <fcntl.h>
# include <io.h>
# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
#else
# define SET_BINARY_MODE(file)
#endif
#define CHUNK 16384
int inf(FILE *source, FILE *dest)
{
int ret;
unsigned have;
z_stream strm;
unsigned char in[CHUNK];
unsigned char out[CHUNK];
/* allocate inflate state */
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
ret = inflateInit2(&strm, 15|32);
if (ret != Z_OK)
return ret;
do {
memset(in, 0, CHUNK);
strm.avail_in = fread(in, 1, CHUNK, source);
printf("read %d\n", strm.avail_in);
if (ferror(source)) {
(void)inflateEnd(&strm);
return Z_ERRNO;
}
if (strm.avail_in == 0)
break;
strm.next_in = in;
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
switch (ret) {
case Z_NEED_DICT:
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
switch (ret) {
case Z_NEED_DICT:
ret = Z_DATA_ERROR; /* and fall through */
case Z_DATA_ERROR:
case Z_MEM_ERROR:
(void)inflateEnd(&strm);
return ret;
}
have = CHUNK - strm.avail_out;
printf("out, %s\n", out);
char *cylv_line;
char *savePtr = (char*)out;
cylv_line = strtok_r(savePtr, "\n", &savePtr); // size of buffer always be greater than used
int line_num =0;
printf("before contract cm\n");
gettimeofday(&begTime, NULL);
int sumlen = 0;
while(cylv_line != NULL)
{
//all works fine till i uncomment this line. Then i start seeing garbage values at the end of printf(out) statement
//cylv_line = strtok_r(savePtr, "\n", &savePtr);
line_num++;
}
printf("odone, %s\n", out);
if (ferror(dest)) {
(void)inflateEnd(&strm);
return Z_ERRNO;
}
} while (strm.avail_out == 0);
/* done when inflate() says it's done */
} while (ret != Z_STREAM_END);
(void)inflateEnd(&strm);
return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
}
void zerr(int ret)
{
fputs("zpipe: ", stderr);
switch (ret) {
case Z_ERRNO:
if (ferror(stdin))
fputs("error reading stdin\n", stderr);
if (ferror(stdout))
fputs("error writing stdout\n", stderr);
break;
case Z_STREAM_ERROR:
fputs("invalid compression level\n", stderr);
break;
case Z_DATA_ERROR:
fputs("invalid or incomplete deflate data\n", stderr);
break;
case Z_MEM_ERROR:
fputs("out of memory\n", stderr);
break;
case Z_VERSION_ERROR:
fputs("zlib version mismatch!\n", stderr);
}
}
int main(int argc, char **argv)
{
int ret;
ret = inf(fopen(argv[1],"r"), stdout);
if (ret != Z_OK)
zerr(ret);
return ret;
}
我已经用粗体显示了我 运行 嵌套 strtok_r
时添加的垃圾39,1489116595360920,1,19,39,N,1173603600662,1000000000000369,1225,B,75000,2000 40,1489116595360937,1,19,40,N,1173603600662,1000000000000370,1223,B,75000,2000 41,1489116595360952,1,19,41,N,1173603600662,1000000000000371,1269,B,665050000,10 42,1489116595360964,1,19,42,N,1173603600662,1000000000000372,1269,B,665225000,100 43,1489116595360977,1,19,43,N,1173603600662,1000000000000373,1269,S,670475000,10 44,1489116595361024,1,19,44,N,1173603600662,1000000000000374,1269,S,669950000,10 45,1489116595361072,1,19,45,N,1173603600662,1000000000000375,1237,B,75000,2000 46,1489116595361078,1,19,46,N,1173603600662,1000000000000376,1269,B,667250000,10 47,1489116595361082,1,19,47,N,1173603600662,1000000000000377,1269,B,667525000,5 48,1489116595361085,1,19,48,N,1173603600662,1000000000000378,1327,B,75000,2000 49,1489116595361087,1,19,49,N,1173603600662,1000000000000379,1279,B,100000,2500 50,1489116595361095,1,19,50,N,1173603600662,1000000000000380,1269,B,665225000,200 51,1489116595361110,1,19,51,N,11736036006ÑgÂ"ü^? 合同前厘米 odone, 1,1489116595360464,1,19,1,N,1173603600658,1000000000000333,1269,B,667125000,99 出,62,1000000000000381,1008,B,25000,1373 52,1489116595361125,1,19,52,N,1173603600662,1000000000000382,1269,B,663225000,200 53,1489116595361139,1,19,53,N,1173603600662,1000000000000383,1387,S,150000,2000
你使用的strtok_r
功能有误。在随后的调用中,第一个参数应该是 last 调用的结果(在你的例子中 cylv_line
),而不是保存的状态。
所以你应该这样做
cylv_line = strtok_r(out, "\n", &savePtr); // Initial call
...
while (cylv_line != NULL)
{
...
cylv_line = strtok_r(cylv_line, "\n", &savePtr);
...
}
实际的问题是你把展开后的数据当成字符串,却没有加字符串终止符。
将 out
的定义更改为 space 作为终止符:
char out[CHUNK + 1]; // +1 for the string terminator
然后在 inflation 之后添加终止符:
out[have] = '[=12=]'; // Terminate as a C string
没有终止符的字符串函数(如 strtok
和 strtok_r
)将超出数据的末尾,您将有 未定义的行为。