二叉树的标记化问题

Question

我正在尝试对文本文件进行标记，然后将标记放入二叉树中，其中具有较低值的标记位于树的左侧分支，具有较高值的标记位于右侧和重复的值有一个更新的计数。我遇到的问题是，每当 "fgets" 从文本文件中获取新的字符串行时，它会将二叉树的顶部更改为新字符串的第一个标记。它基本上创建了一个新的二叉树，而不是继续我想要的原始二叉树。我认为问题出在我标记文本文件的方式中。

函数"insert"完成二叉树的所有计算。函数 "addNode" 将文本文件的第一个标记添加到二叉树的顶部。

示例文本文件：

二七八十

九五零

八个

十一个

    int main(void)
    {
        char buffer[100];
        char *del = " ";
        char *token;
        struct node* root = NULL;
        int i = 0;

        while (fgets(buffer, sizeof(buffer), stdin) != NULL)
        {
            token = strtok(buffer, del);

            if(root == NULL)
            {   
                printf("add: %s\n", token);
                root = addNode(token);    
            }

            else
            {
                insert(root, token);
            }


            while(token != NULL)
            {   
                token = strtok(NULL, del);

                if(token != NULL)
                    insert(root, token);
            }

        }

    }

void insert(struct node* root, char *token)
{


    if(strcmp(token, root->word) < 0)
    {
        printf("%s\n", root->word);
        if(root->left == NULL)
        {
            root->left = addNode(token);
            printf("left add: %s\n", token);

        }

        else
        {
            printf("going left\n");
            insert(root->left, token);
        }
    }

    else if(strcmp(token, root->word) > 0)
    {
        if(root->right == NULL)
        {
            root->right = addNode(token);
            printf("right add: %s\n", token);
        }
        else
        {
            printf("going right\n");
            insert(root->right, token);
        }
    }
    else
    {
        printf("updating count: %s\n", token);
        root->count = root->count + 1;
    }
}

struct node* addNode(char *token)
{
    struct node* temp = malloc(sizeof(struct node));

    temp->word = token;
    temp->left = NULL;
    temp->right = NULL;

    return temp;
}

Answer 1

这里的问题是变量 root 作为值而不是引用传递给函数 insert。作为一个值，它不能被修改。

修复：

 void insert(struct node **_root, char *token)
 {
      struct node *root = *_root; /* ADDED */
      if(strcmp(token, root->word) < 0)
      {
         printf("%s\n", root->word);
         if(root->left == NULL)
         {
            root->left = addNode(token);
            printf("left add: %s\n", token);
         }
         else
         {
            printf("going left\n");
            insert(&root->left, token);    // MODIFIED
         }
     }
    else if(strcmp(token, root->word) > 0)
    {
         if(root->right == NULL)
         {
             root->right = addNode(token);
             printf("right add: %s\n", token);
         } 
         else
         {
             printf("going right\n");
             insert(&root->right, token);    // MODIFIED
         }
     }
     else
     { 
         printf("updating count: %s\n", token);
         root->count = root->count + 1;
     }
     *_root = root;
 }

 insert(&root, token);

Answer 2

可能编写 insert 函数的最简单方法（以多次覆盖子指针为代价）是：

    struct node* insert(struct node* nd, const char* token)
    {
        int cmp;

        if(nd == NULL)
        {
            printf("add: %s\n", token);
            return addNode(token);
        }

        cmp = strcmp(token, nd->word);
        if(cmp < 0)
            nd->left = insert(nd->left, token);
        else if(cmp > 0)
            nd->right = insert(nd->right, token);
        else
        {
            printf("updating count: %s\n", token);
            nd->count ++;
        }

        return nd;
    }

然后在 main:

    while (fgets(buffer, sizeof(buffer), stdin) != NULL)
    {
        for( token = strtok(buffer, del); token != NULL;
            token = strtok(NULL, del))
        {   
            root = insert(root, token);
        }
    }

Answer 3

程序正确率为 90%。这是您的问题：

strtok() 使用一些静态缓冲区。它总是 return 指向同一内存区域的指针。这就是为什么你觉得你的顶级节点每次都在变化。只需复制已解析的字符串即可。

你忘了初始化每个节点的计数！你迟早运行会遇到麻烦，除非你也解决这个问题。

struct node* addNode(char *token)
{
    struct node* temp = malloc(sizeof(struct node));

    temp->word = strdup(token);
    temp->left = NULL;
    temp->right = NULL;
    temp->count = 0;

    return temp;
}

二叉树的标记化问题

Trouble tokenizing for binary tree

c

binary-tree

tokenize