如何在 C 中使用 strtok 将 C 字符串拆分两次?

How can I split a C-string twice with strtok in C?

假设我们有一个 C 字符串

text = "0.4,0.1,-4.1#100,200,300#-32.13,23.41,100#<...>#20,25,30"

目标是先用 # 拆分该字符串,然后用 , 拆分,因为我在 # 之间的每个值之后,[=13= 之间有三个单独的值].

字符串text包含3个带分隔符,的数字的17个元素和#

的16个元素

我确实尝试用这段代码解决这个问题。

char *min_max_bias_char;
float min_max_bias_float[3*17]; /* 3 values per each analog input channel */
for(uint8_t i = 0; i <= 16; i++) {
    if(i == 0)
        min_max_bias_char = strtok(text, DELIMITER);
    else
        min_max_bias_char = strtok(NULL, DELIMITER);
    min_max_bias_float[0 + i*3] = atoff(strtok(min_max_bias_char, ",")); /* Min value */
    min_max_bias_float[1 + i*3] = atoff(strtok(NULL, ","));              /* Max value */
    min_max_bias_float[2 + i*3] = atoff(strtok(NULL, ","));              /* Bias value */
}

我首先根据 # 拆分文本字符串 text,然后我获取 min_max_bias_char 的第一个索引并在分隔符 , 上拆分它。

结果不是很好,因为一旦我做了 strtok(min_max_bias_char) 然后 strtok 忘记了 min_max_bias_char = strtok(NULL, DELIMITER); 语句。

现在我得到了数组 min_max_bias_float,它保存数组 {0.4,0.1,-4.1,100,200,300,-32.13,23.41,100,<...>,20,25,30}

中的值

这是输出。 那么我该如何解决这个问题呢? 我正在尝试将字符串拆分两次。

您不需要 strtok() 的嵌套使用。只需交替使用分隔符:每次通过主循环时,2 个逗号后跟 1 个散列。

char *curptr = text;
for(uint8_t i = 0; i < 17; i++) {
    min_max_bias_float[0 + i*3] = atoff(strtok(curptr, ","));
    min_max_bias_float[1 + i*3] = atoff(strtok(NULL, ","));
    min_max_bias_float[2 + i*3] = atoff(strtok(NULL, DELIMITER));
    curptr = NULL; // so subsequent loops will continue using the same string
}

感谢strtok_r

,这很有效
/* Collect */
char *min_max_bias_char;
char *text_pointer = text;
float min_max_bias_float[3*17]; /* 3 values per each analog input channel */
for(uint8_t i = 0; i <= 16; i++) {
    if(i == 0)
        min_max_bias_char = strtok_r(text, DELIMITER, &text_pointer);
    else
        min_max_bias_char = strtok_r(NULL, DELIMITER, &text_pointer);
    min_max_bias_float[0 + i*3] = atoff(strtok(min_max_bias_char, ",")); /* Min value */
    min_max_bias_float[1 + i*3] = atoff(strtok(NULL, ","));              /* Max value */
    min_max_bias_float[2 + i*3] = atoff(strtok(NULL, ","));              /* Bias value */
}

strtok 接受 multiple delimiters,并且由于您的数据结构似乎不关心当前元素是 ',' 还是 '#' 字符(换句话说,你不是在构建一个需要嵌套循环的二维结构),你可以只提供一个分隔符字符串并在循环中调用 strtok

这是一个您可以适应您的环境的最小示例:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main(void) {
    char delimiters[] = "#,";
    char text[] = "0.4,0.1,-4.1#100,200,300#-32.13,23.41,100#20,25,30";
    int size = 3 * 4; // or 3 * 17;
    float res[size];
    res[0] = atof(strtok(text, delimiters));

    for (int i = 1; i < size; i++) {
        res[i] = atof(strtok(NULL, delimiters));
    }

    for (int i = 0; i < size; i++) {
        printf("%.2f ", res[i]);
    }

    puts("");
    return 0;
}

输出:

0.40 0.10 -4.10 100.00 200.00 300.00 -32.13 23.41 100.00 20.00 25.00 30.00

检查上面代码中 strtok 的 return 值是个好主意。

如果要避开strtok(有),有strtok_r或者用循环手写:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main(void) {
    char delimiters[] = "#,";
    char text[] = "0.4,0.1,-4.1#100,200,300#-32.13,23.41,100#20,25,30";
    int size = 3 * 4; // or 3 * 17;
    float res[size];
    int res_size = 0;
    int last_index = 0;

    for (int i = 0, len = strlen(text); i < len; i++) {
        if (!strchr(delimiters, text[i])) {
            continue;
        }
        else if (i - last_index >= 32 || res_size >= size) {
            fprintf(stderr, "buffer size exceeded\n");
            return 1;
        }
        
        char buf[32] = {0};
        strncpy(buf, text + last_index, i - last_index);
        res[res_size++] = atof(buf);
        last_index = i + 1;
    }

    for (int i = 0; i < res_size; i++) {
        printf("%.2f ", res[i]);
    }

    puts("");
    return 0;
}

您在评论中有有用的提示,并且已经有有用的答案。

无论如何,我会指点你使用状态机。这是表达此类问题的一种常见且可能简单的方法。

在此示例中,它是最小的,只有 2 个状态。

下面是一个完整的C程序,经过一番讨论:)

关于数据

如果我理解正确的话,您有多个字段,在本例中为 3 doubles,由 , 分隔并组成一个组。每个组都被 # 包围或至少终止。组数不固定

如果有一个函数可以获取一行,解析它并以一些有用且随时可用的方式获取值,那就太好了。所以一开始我会查看数据

本组

typedef struct { double field[3]; } Group;

结果集

typedef struct
{
    unsigned n_groups; // # of 3-doubles groups
    unsigned n_incr; // size of increment block
    int      n_size; // # of pointers to Group. Error code is <0
    Group*   g; // the groups

}   Set;

Set 包含一个 Group 的数组。每个 Group 都有 3 个双打。该数组应动态创建,因为组数未知。数组分配在n_incr组中,实际大小保存在n_size中。相当普遍。

而且看起来很方便,因为您可以轻松地迭代结果,或保存它们以备将来参考。查看代码以显示一组 on-screen:

void        print_set(Set* set)
{
    printf("set: %d groups:\n", set->n_groups);
    for (unsigned i = 0; i < set->n_groups; i += 1)
        printf("%3d: %.2f, %.2f, %.2f\n", 1 + i,
            set->g[i].field[0],
            set->g[i].field[1],
            set->g[i].field[2]);
};

这表明,对于行

        "0.4,0.1,-4.1#100,200,300#-32.13,23.41,100#20,25,30", 

解析后:

set: 4 groups:
  1: 0.40, 0.10, -4.10
  2: 100.00, 200.00, 300.00
  3: -32.13, 23.41, 100.00
  4: 20.00, 25.00, 30.00

从字符串中获取结果集的函数

    int         parse(const char*,Set*); // parse string into set

你像上面那样传递一个字符串和一个 Set 并在集合中获取已解析的参数,如果成功则为 0 return 代码。

辅助函数

为了方便起见,既然是例子,程序就用到了这些函数

    Set*        build_set(unsigned);
    Set*        free_set(Set*);
    Set*        insert(Group*, Set*);  // insert group into set
    int         parse(const char*,Set*); // parse string into set
    void        print_set(Set*);

具有(我相信)明显的效果。 build_set() 中的参数是要创建的参数块的大小以及每个扩展名(如果需要)。 free_set() 以正确的顺序释放内存,insert() 将一组插入结果集中,print_set() 在屏幕上显示它们,parse() 是实际的解析器。

main() 进行测试

示例代码获取一个字符串数组并使用上面的函数解析它们:

int         main(void)
{
    // a few tests
    const char* test[] = {
        "0.4,0.1,-4.1#100,200,300#-32.13,23.41,100#20,25,30", 
        "#0.4,0.1,-4.1#100,200,300#-32.13,23.41,100#20,25,30#",
        "1.1,-2.2,3.3",
        "#1,2,3,4#", NULL};

    // parse all tests

    for (int i = 0; test[i] != NULL; i += 1)
    {
        printf("About to parse \"%s\"\n", test[i]);
        Set* values = build_set(10);
        int res = parse(test[i], values);
        printf("\nparse() returned %d, found %d groups\n",
               res, values->n_groups);
        print_set(values);
        values = free_set(values);
        printf("\n\tAnswer set free()'d\n\n");
    };  // for()
    return 0;
}

逻辑很简单:对于每一行:

  • 使用 10 组的块构建一个集合
  • 使用实际函数解析字符串
  • 显示结果集
  • 释放内存并使指针失效

您可以编辑数组 test[] 并尝试另一组。只需将 NULL 保留在末尾即可。实际上,测试中的字符串来自您的代码,并且末尾有 4 个双打的无效行。

测试输出

About to parse "0.4,0.1,-4.1#100,200,300#-32.13,23.41,100#20,25,30"

parse() returned 0, found 4 groups
set: 4 groups:
  1: 0.40, 0.10, -4.10
  2: 100.00, 200.00, 300.00
  3: -32.13, 23.41, 100.00
  4: 20.00, 25.00, 30.00

        Answer set free()'d

About to parse "#0.4,0.1,-4.1#100,200,300#-32.13,23.41,100#20,25,30#"

parse() returned 0, found 4 groups
set: 4 groups:
  1: 0.40, 0.10, -4.10
  2: 100.00, 200.00, 300.00
  3: -32.13, 23.41, 100.00
  4: 20.00, 25.00, 30.00

        Answer set free()'d

About to parse "1.1,-2.2,3.3"

parse() returned 0, found 1 groups
set: 1 groups:
  1: 1.10, -2.20, 3.30

        Answer set free()'d

About to parse "#1,2,3,4#"

parse() returned -4, found 0 groups
set: 0 groups:

        Answer set free()'d

完整的程序

#define ST_INIT 0
#define ST_INFIELD 1

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct { double field[3]; } Group;

typedef struct
{
    unsigned n_groups; // # of 3-doubles groups
    unsigned n_incr; // size of increment block
    int      n_size; // # of pointers to Group. Error code is <0
    Group*   g; // the groups

}   Set;

Set*        build_set(unsigned);
Set*        free_set(Set*);
Set*        insert(Group*, Set*);  // insert group into set
int         parse(const char*,Set*); // parse string into set
void        print_set(Set*);

int         main(void)
{
    // a few tests
    const char* test[] = {
        "0.4,0.1,-4.1#100,200,300#-32.13,23.41,100#20,25,30", 
        "#0.4,0.1,-4.1#100,200,300#-32.13,23.41,100#20,25,30#",
        "1.1,-2.2,3.3",
        "#1,2,3,4#", NULL};

    // parse all tests

    for (int i = 0; test[i] != NULL; i += 1)
    {
        printf("About to parse \"%s\"\n", test[i]);
        Set* values = build_set(10);
        int res = parse(test[i], values);
        printf("\nparse() returned %d, found %d groups\n",
               res, values->n_groups);
        print_set(values);
        values = free_set(values);
        printf("\n\tAnswer set free()'d\n\n");
    };  // for()
    return 0;
}


Set*        build_set(unsigned block)
{   // block is # of groups
    // allocated each time
    Set* set      = (Set*)malloc(sizeof(Set));
    set->n_groups = 0;
    set->n_incr   = block;
    set->n_size   = block;
    set->g        = (Group*)malloc(block * sizeof(Group));
    return set;
}


Set*        free_set(Set* set)
{
    if (set == NULL) return NULL;
    free(set->g);
    free(set);
    return NULL;
};


Set*        insert(Group* g, Set* s)
{
    // check for need of extension
    if (s->n_groups >= (unsigned)s->n_size)
    {  // Set if full: adds 1 block
        unsigned sz   = s->n_size + s->n_incr;
        Group*   temp = (Group*)realloc( s->g, sz * sizeof(Group));
        if (temp == NULL) return NULL;
        s->g = temp;  // extended
        s->n_size = sz;
    };  // if()
    s->g[s->n_groups].field[0] = g->field[0];
    s->g[s->n_groups].field[1] = g->field[1];
    s->g[s->n_groups].field[2] = g->field[2];
    s->n_groups += 1;
    return s;
};


int         parse(const char* text, Set* set)
{
    if (text == NULL) return -1;
    char     line[30];
    char state = ST_INIT;
    unsigned ix    = 0;
    unsigned i_f   = 0; // inside field
    unsigned n_f   = 0; // # of fields in the group
    Group    grp;
    while (1)
    {
        switch (state)
        {
            case ST_INIT:
                switch (text[ix])
                {
                    case 0:
                        return -2;  // empty
                        break;
                    case ',':
                        return -30;
                        break;
                    case '#':  // start at #
                        state = ST_INFIELD;
                        break;
                    default:
                        line[i_f++] = text[ix];
                        state       = ST_INFIELD;
                        break;
                };  // switch()
                ix += 1;

            case ST_INFIELD:
                switch (text[ix])
                {
                    case 0:  // end of text: should have 0 or 3 fields
                        if (i_f == 0) return 0;  // normal end
                        if (n_f != 2) return -3;
                        line[i_f] = 0;       // terminate string
                        grp.field[n_f] = atof(line);
                        //printf("Field: %d, from \"%s\" = %f\n", n_f,
                        //       line, grp.field[n_f]);
                        insert(&grp, set);
                        return 0;
                        break;
                    case ',':                    // end of field
                        if (n_f > 1) return -4;  // misplaced
                        // must have 3 fields
                        line[i_f]      = 0;
                        grp.field[n_f] = atof(line);
                        //printf("Field: %d, from \"%s\" = %f\n", n_f,
                        //       line, grp.field[n_f]);
                        n_f += 1;
                        i_f = 0;
                        if (n_f == 3)
                        {
                            insert(&grp, set);
                            n_f = 0;
                            i_f = 0;
                        }
                        break;
                    case '#':                     // group terminator #
                        if (n_f != 2) return -5;  // must have 3 fields
                        line[i_f]      = 0;       // terminate string
                        grp.field[n_f] = atof(line);
                        //printf("Field: %d, from \"%s\" = %f\n", n_f,
                        //       line, grp.field[n_f]);
                        n_f += 1;
                        i_f = 0;
                        if (n_f == 3)
                        {
                            n_f = 0;
                            i_f = 0;
                            insert(&grp, set);
                        }
                        break;
                    default:
                        line[i_f++] = text[ix];
                        break;
                };  // switch()
                ix += 1;
        };  // switch()
    };  // while()
    return 0;
}


void        print_set(Set* set)
{
    printf("set: %d groups:\n", set->n_groups);
    for (unsigned i = 0; i < set->n_groups; i += 1)
        printf("%3d: %.2f, %.2f, %.2f\n", 1 + i,
            set->g[i].field[0],
            set->g[i].field[1],
            set->g[i].field[2]);
};

/*

how-can-i-split-a-c-string-twice-with-strtok-in-c
*/