如何修改字符串拆分函数以忽略连续的分隔符?
How do you modify a string splitting function to ignore consecutive delimiters?
我正在使用 was posted as an answer on another Whosebug question 的功能。然而,发布此消息的用户指出:it does not handle consecutive delimiters
。
我想知道如何修改它以便它可以处理连续的分隔符?当我有一个额外的分隔符时,我想基本上忽略它。
例如说我有这样的东西:
h2,3 d3,4 j3,3 y4,1 g4,3
我想在每个 space 处将其拆分为一个字符串数组,但是如您所见,在某些情况下有多个 space。我只是想忽略额外的分隔符。
编辑:为了清楚起见,这是我在上面链接的答案中使用的代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
char** str_split(char* a_str, const char a_delim)
{
char** result = 0;
size_t count = 0;
char* tmp = a_str;
char* last_comma = 0;
char delim[2];
delim[0] = a_delim;
delim[1] = 0;
/* Count how many elements will be extracted. */
while (*tmp)
{
if (a_delim == *tmp)
{
count++;
last_comma = tmp;
}
tmp++;
}
/* Add space for trailing token. */
count += last_comma < (a_str + strlen(a_str) - 1);
/* Add space for terminating null string so caller
knows where the list of returned strings ends. */
count++;
result = malloc(sizeof(char*) * count);
if (result)
{
size_t idx = 0;
char* token = strtok(a_str, delim);
while (token)
{
assert(idx < count);
*(result + idx++) = strdup(token);
token = strtok(0, delim);
}
assert(idx == count - 1);
*(result + idx) = 0;
}
return result;
}
int main()
{
char months[] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
char** tokens;
printf("months=[%s]\n\n", months);
tokens = str_split(months, ',');
if (tokens)
{
int i;
for (i = 0; *(tokens + i); i++)
{
printf("month=[%s]\n", *(tokens + i));
free(*(tokens + i));
}
printf("\n");
free(tokens);
}
return 0;
}
我认为strtok()
能够满足您的要求。来自man page
A sequence of two or more contiguous delimiter bytes in the parsed string is considered to be a single delimiter.
您的 SO 问题中获得大量投票的答案说 请注意它不处理连续的分隔符,"JAN,,,FEB,MAR" - 但没有证实贡献者的评论。
函数 strsep()
将连续的定界符视为包含空字段,但函数 strtok()
会 忽略多个实例(的任意组合)定界符集。使用 MSVC,我得到了这个程序
#include<stdio.h>
#include<string.h>
int main(void)
{
char months[]= "JAN, ,\t , ,FEB,MAR";
char seps[] = ", \t\r\n";
char *sptr;
sptr = strtok(months, seps);
while (sptr) {
printf ("Month is: %s\n", sptr);
sptr = strtok(NULL, seps);
}
return 0;
}
输出:
Month is: JAN
Month is: FEB
Month is: MAR
在您的具体示例中(我怀疑它可能包含制表符),这将是
#include<stdio.h>
#include<string.h>
int main(void)
{
char stuff[]= "h2,3 d3,4 j3,3 y4,1 g4,3";
char seps[] = " \t";
char *sptr;
sptr = strtok(stuff, seps);
while (sptr) {
printf ("Stuff is: %s\n", sptr);
sptr = strtok(NULL, seps);
}
return 0;
}
输出:
Stuff is: h2,3
Stuff is: d3,4
Stuff is: j3,3
Stuff is: y4,1
Stuff is: g4,3
这应该可以解决问题:
char** str_split(const char *str, char delimiter)
{
int len, i, j;
char* buf;
char** ret;
len = strlen(str);
buf = malloc(len + 1);
memcpy(buf, str, len + 1);
j = 1;
for (i = 0; i < len; ++i)
if (buf[i] == delimiter)
{
while (buf[i + 1] == delimiter) i++;
j++;
}
ret = malloc(sizeof(char*) * (j + 1));
ret[j] = NULL;
ret[0] = buf;
j = 1;
for (i = 0; i < len; ++i)
if (buf[i] == delimiter)
{
buf[i] = '[=10=]';
while (buf[i + 1] == delimiter) i++;
ret[j++] = &buf[i + 1];
}
return ret;
}
如果您希望它禁用顺序分隔符溢出,请删除两行 while (buf[i + 1] == delimiter) i++;
。
我正在使用 was posted as an answer on another Whosebug question 的功能。然而,发布此消息的用户指出:it does not handle consecutive delimiters
。
我想知道如何修改它以便它可以处理连续的分隔符?当我有一个额外的分隔符时,我想基本上忽略它。
例如说我有这样的东西:
h2,3 d3,4 j3,3 y4,1 g4,3
我想在每个 space 处将其拆分为一个字符串数组,但是如您所见,在某些情况下有多个 space。我只是想忽略额外的分隔符。
编辑:为了清楚起见,这是我在上面链接的答案中使用的代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
char** str_split(char* a_str, const char a_delim)
{
char** result = 0;
size_t count = 0;
char* tmp = a_str;
char* last_comma = 0;
char delim[2];
delim[0] = a_delim;
delim[1] = 0;
/* Count how many elements will be extracted. */
while (*tmp)
{
if (a_delim == *tmp)
{
count++;
last_comma = tmp;
}
tmp++;
}
/* Add space for trailing token. */
count += last_comma < (a_str + strlen(a_str) - 1);
/* Add space for terminating null string so caller
knows where the list of returned strings ends. */
count++;
result = malloc(sizeof(char*) * count);
if (result)
{
size_t idx = 0;
char* token = strtok(a_str, delim);
while (token)
{
assert(idx < count);
*(result + idx++) = strdup(token);
token = strtok(0, delim);
}
assert(idx == count - 1);
*(result + idx) = 0;
}
return result;
}
int main()
{
char months[] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
char** tokens;
printf("months=[%s]\n\n", months);
tokens = str_split(months, ',');
if (tokens)
{
int i;
for (i = 0; *(tokens + i); i++)
{
printf("month=[%s]\n", *(tokens + i));
free(*(tokens + i));
}
printf("\n");
free(tokens);
}
return 0;
}
我认为strtok()
能够满足您的要求。来自man page
A sequence of two or more contiguous delimiter bytes in the parsed string is considered to be a single delimiter.
您的 SO 问题中获得大量投票的答案说 请注意它不处理连续的分隔符,"JAN,,,FEB,MAR" - 但没有证实贡献者的评论。
函数 strsep()
将连续的定界符视为包含空字段,但函数 strtok()
会 忽略多个实例(的任意组合)定界符集。使用 MSVC,我得到了这个程序
#include<stdio.h>
#include<string.h>
int main(void)
{
char months[]= "JAN, ,\t , ,FEB,MAR";
char seps[] = ", \t\r\n";
char *sptr;
sptr = strtok(months, seps);
while (sptr) {
printf ("Month is: %s\n", sptr);
sptr = strtok(NULL, seps);
}
return 0;
}
输出:
Month is: JAN
Month is: FEB
Month is: MAR
在您的具体示例中(我怀疑它可能包含制表符),这将是
#include<stdio.h>
#include<string.h>
int main(void)
{
char stuff[]= "h2,3 d3,4 j3,3 y4,1 g4,3";
char seps[] = " \t";
char *sptr;
sptr = strtok(stuff, seps);
while (sptr) {
printf ("Stuff is: %s\n", sptr);
sptr = strtok(NULL, seps);
}
return 0;
}
输出:
Stuff is: h2,3
Stuff is: d3,4
Stuff is: j3,3
Stuff is: y4,1
Stuff is: g4,3
这应该可以解决问题:
char** str_split(const char *str, char delimiter)
{
int len, i, j;
char* buf;
char** ret;
len = strlen(str);
buf = malloc(len + 1);
memcpy(buf, str, len + 1);
j = 1;
for (i = 0; i < len; ++i)
if (buf[i] == delimiter)
{
while (buf[i + 1] == delimiter) i++;
j++;
}
ret = malloc(sizeof(char*) * (j + 1));
ret[j] = NULL;
ret[0] = buf;
j = 1;
for (i = 0; i < len; ++i)
if (buf[i] == delimiter)
{
buf[i] = '[=10=]';
while (buf[i + 1] == delimiter) i++;
ret[j++] = &buf[i + 1];
}
return ret;
}
如果您希望它禁用顺序分隔符溢出,请删除两行 while (buf[i + 1] == delimiter) i++;
。