实现定界符多于一个字符的`strtok`
Implementing `strtok` whose delimiter has more than one character
代码片段:
char str[] = "String1::String2:String3:String4::String5";
char *deli = "::";
char *token = strtok(str,deli);
while(token != NULL)
{
printf("Token= \"%s\"\n", token);
token=strtok(NULL,deli);
}
以上代码片段产生输出:
Token="String1"
Token="String2"
Token="String3"
Token="String4"
Token="String5"
但我希望输出为:
Token="String1"
Token="String2:String3:String4"
Token="String5"
我知道我没有得到预期的输出,因为 strtok
第二个参数中的每个字符 都被视为分隔符。
为了获得预期的输出,我编写了一个程序,该程序使用 strstr
(和其他东西)将给定的字符串拆分为标记,以便获得预期的输出。这是程序:
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
int myStrtok(char* str,char* deli)
{
if(str==NULL || deli==NULL)
return -1;
int tokens=0;
char *token;
char *output=str;
while((token=strstr(output,deli))!=NULL)
{
bool print=true;
if(output != token)
{
printf("Token = \"");
tokens++;
print=false;
}
while(output != token)
{
putchar(*output);
output++;
}
if(print==false)
printf("\"\n");
output+=strlen(deli);
}
if(strlen(output)>0)
{
printf("Token = \"%s\"",output);
tokens++;
}
printf("\n\n");
return tokens;
}
int main(void)
{
char str[]="One:1:Two::Three::::";
char *deli="::";
int retval;
printf("Original string=\"%s\"\n\n",str);
if((retval=myStrtok(str,deli))==-1)
printf("The string or the delimeter is NULL\n");
else
printf("Number of tokens=%d\n", retval);
return(EXIT_SUCCESS);
}
以上程序产生了预期的输出。
我想知道是否有 easier/simpler 方法可以做到这一点。有吗?
使用 strtok
原型并模仿其用法的字符串分隔符函数:
char *strtokm(char *str, const char *delim)
{
static char *tok;
static char *next;
char *m;
if (delim == NULL) return NULL;
tok = (str) ? str : next;
if (tok == NULL) return NULL;
m = strstr(tok, delim);
if (m) {
next = m + strlen(delim);
*m = '[=10=]';
} else {
next = NULL;
}
return tok;
}
如果您不关心与 strtok
相同的用法,我会选择:
// "String1::String2:String3:String4::String5" with delimiter "::" will produce
// "String1[=10=][=10=]String2:String3:String4[=10=][=10=]String5"
// And words should contain a pointer to the first S, the second S and the last S.
char **strToWordArray(char *str, const char *delimiter)
{
char **words;
int nwords = countWords(str, delimiter); //I let you decide how you want to do this
words = malloc(sizeof(*words) * (nwords + 1));
int w = 0;
int len = strlen(delimiter);
words[w++] = str;
while (*str != NULL)
{
if (strncmp(str, delimiter, len) == 0)
{
for (int i = 0; i < len; i++)
{
*(str++) = 0;
}
if (*str != 0)
words[w++] = str;
else
str--; //Anticipate wrong str++ down;
}
str++;
}
words[w] = NULL;
return words;
}
从 strsep 派生的代码 https://code.woboq.org/userspace/glibc/string/strsep.c.html
char *strsepm( char **stringp, const char *delim ) {
char *begin, *end;
begin = *stringp;
if ( begin == NULL ) return NULL;
/* Find the end of the token. */
end = strstr( begin , delim );
if ( end != NULL ) {
/* Terminate the token and set *STRINGP past NUL character. */
*end = '[=10=]';
end += strlen( delim );
*stringp = end;
} else {
/* No more delimiters; this is the last token. */
*stringp = NULL;
}
return begin;
}
int main( int argc , char *argv [] ) {
char *token_ptr;
char *token;
const char *delimiter = "&&";
char buffer [ 256 ];
strcpy( buffer , " && Hello && Bernd && waht's && going && on &&");
token_ptr = buffer;
while ( ( token = strsepm( &token_ptr , delimiter ) ) != NULL ) {
printf( "\'%s\'\n" , token );
}
}
结果:
' '
' Hello '
' Bernd '
' waht's '
' going '
' on '
''
代码片段:
char str[] = "String1::String2:String3:String4::String5";
char *deli = "::";
char *token = strtok(str,deli);
while(token != NULL)
{
printf("Token= \"%s\"\n", token);
token=strtok(NULL,deli);
}
以上代码片段产生输出:
Token="String1"
Token="String2"
Token="String3"
Token="String4"
Token="String5"
但我希望输出为:
Token="String1"
Token="String2:String3:String4"
Token="String5"
我知道我没有得到预期的输出,因为 strtok
第二个参数中的每个字符 都被视为分隔符。
为了获得预期的输出,我编写了一个程序,该程序使用 strstr
(和其他东西)将给定的字符串拆分为标记,以便获得预期的输出。这是程序:
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
int myStrtok(char* str,char* deli)
{
if(str==NULL || deli==NULL)
return -1;
int tokens=0;
char *token;
char *output=str;
while((token=strstr(output,deli))!=NULL)
{
bool print=true;
if(output != token)
{
printf("Token = \"");
tokens++;
print=false;
}
while(output != token)
{
putchar(*output);
output++;
}
if(print==false)
printf("\"\n");
output+=strlen(deli);
}
if(strlen(output)>0)
{
printf("Token = \"%s\"",output);
tokens++;
}
printf("\n\n");
return tokens;
}
int main(void)
{
char str[]="One:1:Two::Three::::";
char *deli="::";
int retval;
printf("Original string=\"%s\"\n\n",str);
if((retval=myStrtok(str,deli))==-1)
printf("The string or the delimeter is NULL\n");
else
printf("Number of tokens=%d\n", retval);
return(EXIT_SUCCESS);
}
以上程序产生了预期的输出。
我想知道是否有 easier/simpler 方法可以做到这一点。有吗?
使用 strtok
原型并模仿其用法的字符串分隔符函数:
char *strtokm(char *str, const char *delim)
{
static char *tok;
static char *next;
char *m;
if (delim == NULL) return NULL;
tok = (str) ? str : next;
if (tok == NULL) return NULL;
m = strstr(tok, delim);
if (m) {
next = m + strlen(delim);
*m = '[=10=]';
} else {
next = NULL;
}
return tok;
}
如果您不关心与 strtok
相同的用法,我会选择:
// "String1::String2:String3:String4::String5" with delimiter "::" will produce
// "String1[=10=][=10=]String2:String3:String4[=10=][=10=]String5"
// And words should contain a pointer to the first S, the second S and the last S.
char **strToWordArray(char *str, const char *delimiter)
{
char **words;
int nwords = countWords(str, delimiter); //I let you decide how you want to do this
words = malloc(sizeof(*words) * (nwords + 1));
int w = 0;
int len = strlen(delimiter);
words[w++] = str;
while (*str != NULL)
{
if (strncmp(str, delimiter, len) == 0)
{
for (int i = 0; i < len; i++)
{
*(str++) = 0;
}
if (*str != 0)
words[w++] = str;
else
str--; //Anticipate wrong str++ down;
}
str++;
}
words[w] = NULL;
return words;
}
从 strsep 派生的代码 https://code.woboq.org/userspace/glibc/string/strsep.c.html
char *strsepm( char **stringp, const char *delim ) {
char *begin, *end;
begin = *stringp;
if ( begin == NULL ) return NULL;
/* Find the end of the token. */
end = strstr( begin , delim );
if ( end != NULL ) {
/* Terminate the token and set *STRINGP past NUL character. */
*end = '[=10=]';
end += strlen( delim );
*stringp = end;
} else {
/* No more delimiters; this is the last token. */
*stringp = NULL;
}
return begin;
}
int main( int argc , char *argv [] ) {
char *token_ptr;
char *token;
const char *delimiter = "&&";
char buffer [ 256 ];
strcpy( buffer , " && Hello && Bernd && waht's && going && on &&");
token_ptr = buffer;
while ( ( token = strsepm( &token_ptr , delimiter ) ) != NULL ) {
printf( "\'%s\'\n" , token );
}
}
结果:
' '
' Hello '
' Bernd '
' waht's '
' going '
' on '
''