在 C 中,解析由多个空格分隔的整数组成的字符串
In C, parsing a string of multiple whitespace separated integers
我正在尝试使用 C 将包含多行空格分隔整数的文件解析为动态 int 数组的动态数组。每行将是数组数组中的一个数组。行数和每行中的元素是非常量。
到目前为止我所做的是使用 fgets 将每一行抓取为一个字符串。
但是,我不知道如何解析一串由空格分隔的整数。
我想我可以使用 sscanf(因为 fscanf 可用于解析由空格分隔的整数组成的整个文件)。但是,sscanf 似乎具有不同的功能。 sscanf 只解析字符串中的第一个数字。我的猜测是,因为该行是字符串而不是流。
我四处寻找从字符串中创建流的方法,但它在 C 中似乎不可用(我无法使用非标准库)。
char* line;
char lineBuffer[BUFFER_SIZE];
FILE *filePtr;
int value;
...
while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
printf("%s\n", lineBuffer);
while(sscanf(lineBuffer, "%d ", &value) > 0) {
printf("%d\n", value);
}
}
有没有我可以用来解析字符串的东西。如果没有,整个系统是否有替代方案?我不想使用 REGEX。
使用strtol(),如果有则给出一个指向匹配结束的指针,以及一个存储当前位置的字符指针:
while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
printf("%s\n", lineBuffer);
char* p = lineBuffer;
while(p < lineBuffer+BUFFER_SIZE ) {
char* end;
long int value = strtol( p , &end , 10 );
if( value == 0L && end == p ) //docs also suggest checking errno value
break;
printf("%ld\n", value);
p = end ;
}
}
使用 strtok()
函数,将 " "
(space) 作为定界符,并将其置于一个循环中,该循环在 strtok()
returns [=14= 时终止] 获取每个令牌,然后从每个令牌打印每个数字:
while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
printf("%s\n", lineBuffer);
char *token=strtok(line," ");
while(token!=NULL)
{
if(sscanf(token, "%d", &value) > 0)
printf("%d\n", value);
token=strtok(NULL," ");
}
}
只需在输入行上使用一个循环,无论如何利用 atol() 在下一个空白定界符处停止。仅适用于正整数 ;) 但它速度很快,您无需阅读大量的 strtok 和 sscanf 文档,并且在整数之间散落着 "noise" 的情况下它甚至很健壮。
为了使其也适用于负整数,将 isdigit() 替换为 !isspace() 就可以了。
void bla()
{
const char * input = " 1 3 4 6 ";
size_t i;
size_t len = strlen(input);
for (i = 0; i < len; ++i)
{
if (isdigit(input[i]))
{
printf("%d\n", atol(&input[i]));
while (i < len && isdigit(input[i]))
++i;
}
}
}
void bla1()
{ // positive and negative ints version
const char * input = " 10 -3 42 6 ";
size_t i;
size_t len = strlen(input);
for (i = 0; i < len; ++i)
{
if (!isspace(input[i]))
{
printf("%d\n", atol(&input[i]));
while (i < len && !isspace(input[i]))
++i;
}
}
/* Output:
10
-3
42
6
*/
}
你的问题的下一部分是(隐含地),如何处理动态数组来存储你解析的 int 值。这是一个基于上面代码的解决方案。 chunkSize 对于输入设置得太小,所以我可以测试 realloc 代码部分是否也有效。
typedef struct DataRow_tag
{
int32_t *data;
size_t length;
} DataRow_t;
// Returns a "bool" in C-style. Yes, there is stdbool.h in ansi c99 but it is disadviced.
// (Platform dependent trouble in the context of C/C++ interaction, often across library/DLL boundaries.
// Especially if you compile C with a C-compiler and the C++ code with C++ compiler. Which happens.
// Every now and then, sizeof(c++ bool) != sizeof(C bool) and you waste a lot of time finding the problem.)
// The caller takes ownership of the DataRow_t::data pointer and has to free() it when done using it.
// 0: false -> fail
// 1: true -> success!
int
ReadRowWithUnknownNumberOfColumnsOfInt32
( const char * row // Zero terminated string containing 1 row worth of data.
, DataRow_t *result // Pointer to the place the data will be stored at.
)
{
int success = 0;
size_t chunkSize = 10; // Set this value to something most likely large enough for your application.
// This function is not cleaning up your garbage, dude ;) Gimme a clean result structure!
assert(NULL != result && NULL == result->data);
if (NULL != result && NULL == result->data)
{
result->length = 0;
size_t rowLength = strlen(row);
const char *pInput = row;
const char *pEnd = &row[rowLength-1];
result->data = (int32_t*)malloc(chunkSize * sizeof(int32_t));
if (NULL != result->data )
{
for (; pInput < pEnd; ++pInput)
{
assert(pInput <= pEnd);
assert(*pInput != 0);
if (!isspace(*pInput)) // ultra correct would be to cast to unsigned char first...says microsoft code analyzer in paranoia mode.
{
long lval = atol(pInput); // what is a long anyway? 4 bytes, 2 bytes, 8 bytes? We only hope it will fit into our int32_t...
// TODO: we could test here if lval value fits in an int32_t...platform dependent!
result->data[result->length++] = lval;
if (result->length == chunkSize)
{ // our buffer was too small... we need a bigger one.
chunkSize = chunkSize + chunkSize; // doubling our buffer, hoping it will be enough, now.
int32_t * temp = (int32_t*)realloc(result->data, chunkSize * sizeof(int32_t));
if (NULL == temp)
{ // realloc is a funny function from the dark ages of c. It returns NULL if out of memory.
// So we cannot simply use result->data pointer for realloc call as this might end up with a memory leak.
free(result->data);
result->length = 0;
break;
}
else
{
result->data = temp;
}
}
while (pInput < pEnd && !isspace(*pInput))
++pInput;
}
}
if (pInput >= pEnd)
success = 1;
else
{ // make sure we do not leave result in some funny state.
result->length = 0;
free(result->data); // free(NULL) legal. If memblock is NULL, the pointer is ignored and free immediately returns.
result->data = NULL;
}
}
}
return success;
}
void Bla2()
{
const char * input = "-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13";
DataRow_t dataRow = { 0 };
if (ReadRowWithUnknownNumberOfColumnsOfInt32(input, &dataRow))
{
for (size_t i = 0; i < dataRow.length; ++i)
{
printf("%d ", dataRow.data[i]);
}
printf("\n");
free(dataRow.data);
dataRow.data = NULL;
dataRow.length = 0;
}
}
通过 fgets()
阅读一行是伟大的第一步。
2 种方法:strtol()
(更好的错误处理)和 sscanf()
while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
char *endptr;
while (1) {
errno = 0;
long num = strtol(line, &endptr, 10);
if (line == endptr) break; // no conversion
if (errno) break; // out of range or other error
#if LONG_MIN < INT_MIN || LONG_MAX > INT_MAX
// long and int may have different ranges
if (num < INT_MIN || num > INT_MAX) {
errno = ERANGE;
break; // out of range
}
#endif
int value = (int) num;
printf("%d\n", value);
line = endptr;
}
while (isspace((unsigned char) *endptr)) endptr++;
if (*endptr != '[=10=]') Handle_ExtraGarbageAtEndOfLine();
}
" sscanf 只解析字符串中的第一个数字。"并非如此。使用 sscanf()
和 "%n"
记录扫描停止的位置。
while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
int n;
while (1) {
n = 0;
int value;
if (sscanf(line, "%d %n", &value, &n) != 1) break;
printf("%d\n", value);
line += n;
}
if (line[n] != '[=11=]') Handle_ExtraGarbageAtEndOfLine();
}
你应该使用:
lineBuffer = (char *)malloc(sizeof(BUFFER_SIZE + 1));
比:
char lineBuffer[BUFFER_SIZE];
你的堆栈会感谢你!
我正在尝试使用 C 将包含多行空格分隔整数的文件解析为动态 int 数组的动态数组。每行将是数组数组中的一个数组。行数和每行中的元素是非常量。
到目前为止我所做的是使用 fgets 将每一行抓取为一个字符串。
但是,我不知道如何解析一串由空格分隔的整数。
我想我可以使用 sscanf(因为 fscanf 可用于解析由空格分隔的整数组成的整个文件)。但是,sscanf 似乎具有不同的功能。 sscanf 只解析字符串中的第一个数字。我的猜测是,因为该行是字符串而不是流。
我四处寻找从字符串中创建流的方法,但它在 C 中似乎不可用(我无法使用非标准库)。
char* line;
char lineBuffer[BUFFER_SIZE];
FILE *filePtr;
int value;
...
while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
printf("%s\n", lineBuffer);
while(sscanf(lineBuffer, "%d ", &value) > 0) {
printf("%d\n", value);
}
}
有没有我可以用来解析字符串的东西。如果没有,整个系统是否有替代方案?我不想使用 REGEX。
使用strtol(),如果有则给出一个指向匹配结束的指针,以及一个存储当前位置的字符指针:
while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
printf("%s\n", lineBuffer);
char* p = lineBuffer;
while(p < lineBuffer+BUFFER_SIZE ) {
char* end;
long int value = strtol( p , &end , 10 );
if( value == 0L && end == p ) //docs also suggest checking errno value
break;
printf("%ld\n", value);
p = end ;
}
}
使用 strtok()
函数,将 " "
(space) 作为定界符,并将其置于一个循环中,该循环在 strtok()
returns [=14= 时终止] 获取每个令牌,然后从每个令牌打印每个数字:
while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
printf("%s\n", lineBuffer);
char *token=strtok(line," ");
while(token!=NULL)
{
if(sscanf(token, "%d", &value) > 0)
printf("%d\n", value);
token=strtok(NULL," ");
}
}
只需在输入行上使用一个循环,无论如何利用 atol() 在下一个空白定界符处停止。仅适用于正整数 ;) 但它速度很快,您无需阅读大量的 strtok 和 sscanf 文档,并且在整数之间散落着 "noise" 的情况下它甚至很健壮。
为了使其也适用于负整数,将 isdigit() 替换为 !isspace() 就可以了。
void bla()
{
const char * input = " 1 3 4 6 ";
size_t i;
size_t len = strlen(input);
for (i = 0; i < len; ++i)
{
if (isdigit(input[i]))
{
printf("%d\n", atol(&input[i]));
while (i < len && isdigit(input[i]))
++i;
}
}
}
void bla1()
{ // positive and negative ints version
const char * input = " 10 -3 42 6 ";
size_t i;
size_t len = strlen(input);
for (i = 0; i < len; ++i)
{
if (!isspace(input[i]))
{
printf("%d\n", atol(&input[i]));
while (i < len && !isspace(input[i]))
++i;
}
}
/* Output:
10
-3
42
6
*/
}
你的问题的下一部分是(隐含地),如何处理动态数组来存储你解析的 int 值。这是一个基于上面代码的解决方案。 chunkSize 对于输入设置得太小,所以我可以测试 realloc 代码部分是否也有效。
typedef struct DataRow_tag
{
int32_t *data;
size_t length;
} DataRow_t;
// Returns a "bool" in C-style. Yes, there is stdbool.h in ansi c99 but it is disadviced.
// (Platform dependent trouble in the context of C/C++ interaction, often across library/DLL boundaries.
// Especially if you compile C with a C-compiler and the C++ code with C++ compiler. Which happens.
// Every now and then, sizeof(c++ bool) != sizeof(C bool) and you waste a lot of time finding the problem.)
// The caller takes ownership of the DataRow_t::data pointer and has to free() it when done using it.
// 0: false -> fail
// 1: true -> success!
int
ReadRowWithUnknownNumberOfColumnsOfInt32
( const char * row // Zero terminated string containing 1 row worth of data.
, DataRow_t *result // Pointer to the place the data will be stored at.
)
{
int success = 0;
size_t chunkSize = 10; // Set this value to something most likely large enough for your application.
// This function is not cleaning up your garbage, dude ;) Gimme a clean result structure!
assert(NULL != result && NULL == result->data);
if (NULL != result && NULL == result->data)
{
result->length = 0;
size_t rowLength = strlen(row);
const char *pInput = row;
const char *pEnd = &row[rowLength-1];
result->data = (int32_t*)malloc(chunkSize * sizeof(int32_t));
if (NULL != result->data )
{
for (; pInput < pEnd; ++pInput)
{
assert(pInput <= pEnd);
assert(*pInput != 0);
if (!isspace(*pInput)) // ultra correct would be to cast to unsigned char first...says microsoft code analyzer in paranoia mode.
{
long lval = atol(pInput); // what is a long anyway? 4 bytes, 2 bytes, 8 bytes? We only hope it will fit into our int32_t...
// TODO: we could test here if lval value fits in an int32_t...platform dependent!
result->data[result->length++] = lval;
if (result->length == chunkSize)
{ // our buffer was too small... we need a bigger one.
chunkSize = chunkSize + chunkSize; // doubling our buffer, hoping it will be enough, now.
int32_t * temp = (int32_t*)realloc(result->data, chunkSize * sizeof(int32_t));
if (NULL == temp)
{ // realloc is a funny function from the dark ages of c. It returns NULL if out of memory.
// So we cannot simply use result->data pointer for realloc call as this might end up with a memory leak.
free(result->data);
result->length = 0;
break;
}
else
{
result->data = temp;
}
}
while (pInput < pEnd && !isspace(*pInput))
++pInput;
}
}
if (pInput >= pEnd)
success = 1;
else
{ // make sure we do not leave result in some funny state.
result->length = 0;
free(result->data); // free(NULL) legal. If memblock is NULL, the pointer is ignored and free immediately returns.
result->data = NULL;
}
}
}
return success;
}
void Bla2()
{
const char * input = "-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13";
DataRow_t dataRow = { 0 };
if (ReadRowWithUnknownNumberOfColumnsOfInt32(input, &dataRow))
{
for (size_t i = 0; i < dataRow.length; ++i)
{
printf("%d ", dataRow.data[i]);
}
printf("\n");
free(dataRow.data);
dataRow.data = NULL;
dataRow.length = 0;
}
}
通过 fgets()
阅读一行是伟大的第一步。
2 种方法:strtol()
(更好的错误处理)和 sscanf()
while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
char *endptr;
while (1) {
errno = 0;
long num = strtol(line, &endptr, 10);
if (line == endptr) break; // no conversion
if (errno) break; // out of range or other error
#if LONG_MIN < INT_MIN || LONG_MAX > INT_MAX
// long and int may have different ranges
if (num < INT_MIN || num > INT_MAX) {
errno = ERANGE;
break; // out of range
}
#endif
int value = (int) num;
printf("%d\n", value);
line = endptr;
}
while (isspace((unsigned char) *endptr)) endptr++;
if (*endptr != '[=10=]') Handle_ExtraGarbageAtEndOfLine();
}
" sscanf 只解析字符串中的第一个数字。"并非如此。使用 sscanf()
和 "%n"
记录扫描停止的位置。
while((line = fgets(lineBuffer, BUFFER_SIZE, filePtr)) != NULL) {
int n;
while (1) {
n = 0;
int value;
if (sscanf(line, "%d %n", &value, &n) != 1) break;
printf("%d\n", value);
line += n;
}
if (line[n] != '[=11=]') Handle_ExtraGarbageAtEndOfLine();
}
你应该使用:
lineBuffer = (char *)malloc(sizeof(BUFFER_SIZE + 1));
比:
char lineBuffer[BUFFER_SIZE];
你的堆栈会感谢你!