N-ary 树函数与 stdin (makenode, insert)

N-ary tree functions with stdin (makenode, insert)

我尝试编写以下函数的代码

typedef struct node{
    char *name;
    int n;  //number of kids
    struct node **kids;
    int dtype; // COMPOSITE or BASIC
    union data{
        double price; //for BASIC
        char *quantity; //for COMPOSITE
    }data;
}node;

void create_kid(node **parent){
    if(*parent == NULL){
        (*parent) = malloc(sizeof(node));
        (*parent)->n = 0;
        (*parent)->kids = NULL;
    }else{
        (*parent)->n += 1;
        (*parent)->kids = realloc((*parent)->kids, ((*parent)->n)*(sizeof(node)));
        (*parent)->kids[(*parent)->n - 1] = malloc(sizeof(node));
        (*parent)->kids[(*parent)->n - 1]->n = 0;
        (*parent)->kids[(*parent)->n - 1]->kids = NULL;
    }
}

void insert_c(node *node, char *str){
        node->name = malloc(MAX);
        node->name = str;
        node->dtype = COMPOSITE;
}

void insert_b(node *node, char *str){
        node->name = malloc(MAX);
        node->name = str;
        node->dtype = BASIC;
}

根据这样的输入生成这样的 n 叉树

BIKE(2*WHEEL(RIM[60.0 ],
        2*AXLE,
        SPOKE[120.],
        HUB(2*GEAR[25.],AXLE(5*BOLT[0.1], 7 * NUT[.15]))),
    FRAME(REARFRAME [175.00],
        1*FRONTFRAME (FORK[22.5] ,AXLE, 2 *HANDLE[10.])))

我知道 n 叉树的另一种表示形式,其中包括 *siblings 和 *firstkid,但我相信这种 **kids 表示形式更适合我的情况。我想问几个问题

首先,函数有什么问题吗?它们适合构建n叉树吗?

其次,即使我得到了正确的函数,我也无法从输入构建树。例如,根据输入,如果我一次读取一个单词,函数调用必须是这样的:

create_kid(&tree);
insert_c(tree, "BIKE");
create_kid(&tree);
insert_c(tree->kids[0], "WHEEL");
create_kid(&(tree->kids[0]));
insert_c(tree->kids[0]->kids[0], "RIM");
.
.
.
create_kid(&tree);
insert_c(tree->kids[1], "FRAME");
.
.

如果按照这种方式形成树,我需要从较高的深度到较低的深度,例如从NUTFRAME。所以我相信一定有更简单的方法,也许是递归方法。有什么方法可以用递归来实现吗?

小事优先:

如果您的 AXLE 节点的同一实例位于 "tree" 中的不同位置,则您的 n 元树不再是 n 元树。这是一个图表。如果你的 Axle,然后还包含一辆自行车,它甚至是一个循环图。

在实施所有这些时,您会注意到不同之处。您将需要引用计数或图表中的某些内容来决定何时真正释放节点。在真正的 n 叉树中,您不需要它,因为每个节点在树中只出现一次。

接下来的管理事项:

您决定使用指针数组作为数据结构来包含节点的子节点。因此,为了降低代码的复杂性,第一步实际创建一个(动态)指针数组是个好主意。好处:您的树代码没有动态数组代码的负担,您可以进行单元测试,甚至可以将您的动态指针数组重新用于其他项目。

我敲了一个示例指针数组实现(并没有真正深入测试)来说明原理。

为了使此处显示的代码更加“用户友好 - 首先是我使用的包含,以消除它:

#include <crtdbg.h> // Windows specific - helps find memory leaks.
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

现在到指针数组的实现:

typedef struct PointerArray_tag
{
    void**data;
    size_t capacity;
    size_t size;
} PointerArray_t;

void PointerArrayInitialize(PointerArray_t *array)
{
    array->data = NULL;
    array->capacity = 0;
    array->size = 0;
}

int PointerArrayReserve(PointerArray_t *array, size_t capacity)
{
    if (array->capacity < capacity)
    {
        void **newData = (void**)realloc(array->data, capacity * sizeof(void*));
        if (NULL != newData)
        {
            array->data = newData;
            array->capacity = capacity;
            return 1; // 1 = "true" in C and indicates success.
        }
        return 0; // 0 = "false" in C and indicates failure...
    }
    else
    {
        return 1;
    }
}
typedef void (*PointerArrayElementFree_t)(void *element);

int PointerArrayResize(PointerArray_t *array, size_t newSize, void* value, PointerArrayElementFree_t elementFree)
{
    if (NULL == array) return 0;
    if (NULL == elementFree) return 0;

    if (newSize < array->size)
    {
        for (size_t index = newSize; index < array->size; index++)
        {
            elementFree(array->data[index]);
        }
        array->size = newSize;
        return 1;
    }
    else if (newSize > array->size)
    {
        size_t oldSize = array->size;
        if (PointerArrayReserve(array, newSize))
        {
            for (size_t index = oldSize; index < newSize; index++)
            {
                array->data[index] = value;
            }
            array->size = newSize;
            return 1;
        }
        return 0; // Reserve() failed, so this function also failed.
    }
    else
        return 1; // oldSize == newSize - nothing to do.
}

int PointerArrayInitializeWithCapacity(PointerArray_t *array, size_t initialCapacity)
{
    PointerArrayInitialize(array);
    return PointerArrayReserve(array, initialCapacity);
}

int PointerArrayPushBack(PointerArray_t *array, void*element)
{
    if (PointerArrayReserve(array, array->size + 1))
    {
        array->data[array->size] = element;
        array->size++;
        return 1;
    }
    return 0;
}

int PointerArrayClear(PointerArray_t *array, PointerArrayElementFree_t elementFree)
{
    if (NULL == elementFree) return 0;
    if (NULL == array) return 0;

    for (size_t index = 0; index < array->size; index++)
    {
        elementFree(array->data[index]);
        array->data[index] = NULL;
    }
    array->size = 0;
    return 1;
}

int PointerArrayUninitialize(PointerArray_t *array, PointerArrayElementFree_t elementFree)
{
    if (PointerArrayClear(array, elementFree))
    {
        free(array->data);
        array->data = NULL;
        array->capacity = 0;
        return 1;
    }
    return 0;
}

没什么特别的。具有常用操作的空指针动态数组。

接下来,虽然不是 100% 适合你的问题(我觉得这会花费我更多的时间来完成这封信),但 n-ary 树实现。如上所述,尚未准备好用作图表。

此外,我冒昧地假设了一种语法形式:

<Material> ::= <Count> <Name> <Size>
            |  <Count> <Name>

解析这个产生式,解析器首先会找到一个数字(例如 2 个轮子),然后是 material(轮子)的名称,然后是可选的测量值(例如 Nuts[0.15])。

当应用程序使用 AST 并且过于复杂时,更接近您的问题的替代语法稍后将更难处理:

<Material> ::= <Count> <Name> <Size>
            | <Count> <Name>
            | <Name>
            | <Name> <Size>

这暗示如果您对输入文本的语法有影响,您可以对其进行一些改进。

所以,这里是具有上述约束的 ComponentTree 代码。

enum NodeContentType
{
    EMPTY = 0
,   NUMBER = 1
,   NAME = 2
};

typedef struct NodeContent_tag
{
    NodeContentType type;
    union
    {
        float number;
        char *name;
    };
} NodeContent_t;

typedef struct ComponentNode_tag
{
    NodeContent_t content;
    ComponentNode_tag * parent;
    PointerArray_t children;
} ComponentNode_t;

void ComponentNodeFree(ComponentNode_t *node)
{
    // Clean up anything heap based in NodeContent_t.
    switch (node->content.type)
    {
    case NAME:
        free(node->content.name);
        node->content.name = NULL;
        node->content.type = EMPTY;
        break;
    default:
        // nothing to do.
        break;
    }

    // Free all the children below this node recursively.
    PointerArrayUninitialize(&node->children, (PointerArrayElementFree_t)ComponentNodeFree);
    free(node);
}
typedef struct ComponentTree_tag
{
    ComponentNode_t root;
} ComponentTree_t;

void ComponentTreeInitialize(ComponentTree_t *tree)
{
    tree->root.content.type = EMPTY;
    PointerArrayInitialize(&tree->root.children);
    tree->root.parent = NULL;
}

ComponentNode_t *ComponentTreeRoot(ComponentTree_t *tree)
{
    return &tree->root;
}

void ComponentTreeClear(ComponentTree_t *tree)
{
    if (NULL != tree)
    {
        PointerArrayClear(&tree->root.children,(PointerArrayElementFree_t)ComponentNodeFree);
    }
}

void ComponentTreeUninitialize(ComponentTree_t *tree)
{
    if (NULL != tree)
    {
        PointerArrayUninitialize(&tree->root.children, (PointerArrayElementFree_t)ComponentNodeFree);
    }
}

ComponentNode_t *ComponentTreeCreateEmptyNode()
{
    ComponentNode_t *node = (ComponentNode_t*)malloc(sizeof(ComponentNode_t));
    if (NULL != node)
    {
        node->content.type = EMPTY;
        node->parent = NULL;
        PointerArrayInitialize(&node->children);
    }
    return node;
}

ComponentNode_t *ComponentTreeCreateNameNode(const char *name)
{
    ComponentNode_t *node = (ComponentNode_t*)malloc(sizeof(ComponentNode_t));
    if (NULL != node)
    {
        node->content.type = NAME;
        node->content.name = _strdup(name);
        assert(NULL != node->content.name); 
        if (NULL == node->content.name)
        {
            free(node);
            return NULL;
        }
        node->parent = NULL;
        PointerArrayInitialize(&node->children);
    }
    return node;
}

ComponentNode_t *ComponentTreeCreateNumberNode(float number)
{
    ComponentNode_t *node = (ComponentNode_t*)malloc(sizeof(ComponentNode_t));
    if (NULL != node)
    {
        node->content.type = NUMBER;
        node->content.number = number;
        node->parent = NULL;
        PointerArrayInitialize(&node->children);
    }
    return node;
}


int ComponentTreeJoin(ComponentTree_t *tree, ComponentNode_t *where, ComponentNode_t *child)
{
    if (NULL == child) return 0;
    if (NULL != child->parent) return 0; // is already in a tree.
    if (NULL == tree) return 0;
    if (NULL == where)
    {
        where = &tree->root;
    }
    child->parent = where;
    return PointerArrayPushBack(&where->children, child);
}

ComponentTree_t *tree 参数存在于 ComponentTreeJoin() 中,乍一看可能有点滑稽。但是, CompoenentTreeCreateXXXNode() 函数也应该有它。为什么?因为很可能以后有人想要像 ComponentTree_t *ComponentTreeFromNode(ComponentNode_t *node)ComponentNode_t *ComponentTreeGetRoot(ComponentNode_t *node) 这样的函数。

最后一点,树的手动组装可能看起来像这样(不完整,这个答案已经很长了):

int _tmain(int argc, _TCHAR* argv[])
{
    ComponentTree_t bikeTree;
    ComponentTreeInitialize(&bikeTree);
    ComponentNode_t * nutSize = ComponentTreeCreateNumberNode(0.15f);
    ComponentNode_t * nut = ComponentTreeCreateNameNode("NUT");
    ComponentTreeJoin(&bikeTree, nut, nutSize);
    ComponentNode_t *nutCount = ComponentTreeCreateNumberNode(7.0f);
    ComponentTreeJoin(&bikeTree, nutCount, nut);
    ComponentNode_t * boltSize = ComponentTreeCreateNumberNode(0.1f);
    ComponentNode_t * bolt = ComponentTreeCreateNameNode("BOLT");
    ComponentTreeJoin(&bikeTree, bolt, boltSize);
    ComponentNode_t * boltCount = ComponentTreeCreateNumberNode(5.0f);
    ComponentTreeJoin(&bikeTree, boltCount, bolt);
    ComponentNode_t * axle = ComponentTreeCreateNameNode("AXLE");
    ComponentTreeJoin(&bikeTree, axle, nutCount);
    ComponentTreeJoin(&bikeTree, axle, boltCount);
    ComponentNode_t *axleCount = ComponentTreeCreateNumberNode(1.0f);
    ComponentTreeJoin(&bikeTree, axleCount, axle);
    ComponentNode_t *gearSize = ComponentTreeCreateNumberNode(25.0f);
    ComponentNode_t *gear = ComponentTreeCreateNameNode("GEAR");
    ComponentTreeJoin(&bikeTree, gear, gearSize);
    ComponentNode_t *gearCount = ComponentTreeCreateNumberNode(2.0f);
    ComponentTreeJoin(&bikeTree, gearCount, gear);
    ComponentNode_t *hub = ComponentTreeCreateNameNode("HUB");
    ComponentTreeJoin(&bikeTree, hub, gearCount);
    ComponentTreeJoin(&bikeTree, hub, axleCount);
    ComponentNode_t *hubCount = ComponentTreeCreateNumberNode(1.0f);
    ComponentTreeJoin(&bikeTree, hubCount, hub);
    ComponentNode_t * spokeSize = ComponentTreeCreateNumberNode(120.0f);
    ComponentNode_t * spoke = ComponentTreeCreateNameNode("SPOKE");
    ComponentTreeJoin(&bikeTree, spoke, spokeSize);
    ComponentNode_t * spokeCount = ComponentTreeCreateNumberNode(1.0f);
    ComponentTreeJoin(&bikeTree, spokeCount, spoke);
    ComponentNode_t * axle1 = ComponentTreeCreateNameNode("AXLE");
    ComponentNode_t * axle1Count = ComponentTreeCreateNumberNode(2.0f);
    ComponentTreeJoin(&bikeTree, axle1Count, axle1);
    // ...
    ComponentNode_t * wheel = ComponentTreeCreateNameNode("WHEEL");
    ComponentTreeJoin(&bikeTree, wheel, axle1Count);
    ComponentTreeJoin(&bikeTree, wheel, spokeCount);
    ComponentTreeJoin(&bikeTree, wheel, hubCount);
    ComponentNode_t * wheelCount = ComponentTreeCreateNumberNode(2.0f);
    ComponentTreeJoin(&bikeTree, wheelCount, wheel);
    ComponentNode_t *bike = ComponentTreeCreateNameNode("BIKE");
    ComponentTreeJoin(&bikeTree, bike, wheelCount);
    ComponentNode_t *bikeCount = ComponentTreeCreateNumberNode(1.0f);
    ComponentTreeJoin(&bikeTree, bikeCount, bike);
    // .. frame branch omitted .. 
    ComponentTreeJoin(&bikeTree, NULL, bikeCount);
    ComponentTreeUninitialize(&bikeTree);

    _CrtDumpMemoryLeaks();
    return 0;
}

总结:

  • 如果它真的应该是图,而不是 n 叉树,那是有人选错了术语。
  • 混合到树实现中的动态数组代码会降低代码的可测试性和可读性。
  • 如果使用像 kids-list 这样的动态指针数组,或者单向链表不是真正的基础。列表可能更适合,因为示例输入中的大多数节点无论如何都只有 1 个子节点,而且差异很小。数组的当前实现也没有节省堆操作的数量(但它可以,如果默认容量没有被编程为 0 而可能是 1 或 2 或其他合适的值)。
  • main() 代码显示了树的构造方式与遍历输入文本的解析器生成的顺序不完全相同。 (我有时从右到左而不是 100% 从左到右)。
  • 此处显示的代码并未将 material 项目的数量视为节点的成员,但它在 material 名称节点之上生成了一个数字节点。这可能但不一定是有益的,因为它完全取决于语法的定义方式和解析器的类型等。这种情况经常发生。 AST 最终的样子也由解析器做出的决定驱动。