实现定界符多于一个字符的`strtok`

Implementing `strtok` whose delimiter has more than one character

代码片段:

char str[] = "String1::String2:String3:String4::String5";
char *deli = "::";
char *token = strtok(str,deli);

while(token != NULL)
{
  printf("Token= \"%s\"\n", token);
  token=strtok(NULL,deli);
}

以上代码片段产生输出:

Token="String1"
Token="String2"
Token="String3"
Token="String4"
Token="String5"

但我希望输出为:

Token="String1"
Token="String2:String3:String4"
Token="String5"

我知道我没有得到预期的输出,因为 strtok 第二个参数中的每个字符 都被视为分隔符。

为了获得预期的输出,我编写了一个程序,该程序使用 strstr(和其他东西)将给定的字符串拆分为标记,以便获得预期的输出。这是程序:

#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>

int myStrtok(char* str,char* deli)
{
    if(str==NULL || deli==NULL)
        return -1;

    int tokens=0;
    char *token;
    char *output=str;


    while((token=strstr(output,deli))!=NULL)
    {

        bool print=true;

        if(output != token)
        {
            printf("Token = \"");
            tokens++;
            print=false;
        }

        while(output != token)
        {
            putchar(*output);
            output++;
        }

        if(print==false)
            printf("\"\n");
        output+=strlen(deli);
    }

    if(strlen(output)>0)
    {
        printf("Token = \"%s\"",output);
        tokens++;
    }
    printf("\n\n");
    return tokens;
}

int main(void)
{
    char str[]="One:1:Two::Three::::";
    char *deli="::";

    int retval;
    printf("Original string=\"%s\"\n\n",str);

    if((retval=myStrtok(str,deli))==-1)
        printf("The string or the delimeter is NULL\n");
    else
        printf("Number of tokens=%d\n", retval);
    return(EXIT_SUCCESS);
}

以上程序产生了预期的输出。

我想知道是否有 easier/simpler 方法可以做到这一点。有吗?

使用 strtok 原型并模仿其用法的字符串分隔符函数:

char *strtokm(char *str, const char *delim)
{
    static char *tok;
    static char *next;
    char *m;

    if (delim == NULL) return NULL;

    tok = (str) ? str : next;
    if (tok == NULL) return NULL;

    m = strstr(tok, delim);

    if (m) {
        next = m + strlen(delim);
        *m = '[=10=]';
    } else {
        next = NULL;
    }

    return tok;
}

如果您不关心与 strtok 相同的用法,我会选择:

// "String1::String2:String3:String4::String5" with delimiter "::" will produce
// "String1[=10=][=10=]String2:String3:String4[=10=][=10=]String5"
// And words should contain a pointer to the first S, the second S and the last S.
char **strToWordArray(char *str, const char *delimiter)
{
  char **words;
  int nwords = countWords(str, delimiter); //I let you decide how you want to do this
  words = malloc(sizeof(*words) * (nwords + 1));

  int w = 0;
  int len = strlen(delimiter);
  words[w++] = str;
  while (*str != NULL)
  {
    if (strncmp(str, delimiter, len) == 0)
    {
      for (int i = 0; i < len; i++)
      {
        *(str++) = 0;
      }
      if (*str != 0)
        words[w++] = str;
      else
        str--; //Anticipate wrong str++ down;
    }
    str++;
  }
  words[w] = NULL;
  return words;
}

从 strsep 派生的代码 https://code.woboq.org/userspace/glibc/string/strsep.c.html

char *strsepm( char **stringp, const char *delim ) {

    char *begin, *end;

    begin = *stringp;

    if  ( begin == NULL ) return NULL;

    /* Find the end of the token.  */
    end = strstr( begin , delim );

    if ( end != NULL ) {

        /* Terminate the token and set *STRINGP past NUL character.  */
        *end = '[=10=]';

        end  += strlen( delim );

        *stringp = end;

    } else {

        /* No more delimiters; this is the last token.  */
        *stringp = NULL;  
    }

    return begin;
}

int main( int argc , char *argv [] ) {

    char            *token_ptr;
    char            *token;
    const char      *delimiter = "&&";

    char            buffer [ 256 ];

    strcpy( buffer , " && Hello && Bernd && waht's && going && on &&");

    token_ptr = buffer;

    while ( ( token = strsepm( &token_ptr , delimiter ) ) != NULL ) {

        printf( "\'%s\'\n" , token );

    }
}

结果:

' '   
' Hello '    
' Bernd '    
' waht's '    
' going '    
' on '    
''