使用 Bison 的递归规则和使用它存储值时遇到问题

Troubles using Bison's recursive rules, and storing values using it

我正在尝试为 Newick 文件格式树制作一个 flex+bison 扫描器和解析器,以便对它们进行操作。实现的语法解释基于简化(标签和长度总是相同的类型,由 flex 返回)this example.

这本质上是一种文件格式的解析器,它表示具有一系列(递归)子树 and/or 叶子的树。 主树将始终以 ; 结束,并且所述树和其中的所有子树将包含一系列介于 )之间的节点,名称和长度在最右边的 parenthesis 的右边,由 name:length 指定,它们是可选的(您可以避免指定它们,将其中之一(name:length)或两者都放在 名称:长度).

如果任何节点缺少名称或长度,将应用默认值。 (例如:'missingName''1'

一个例子是 (child1:4, child2:6)root:6; , ((child1Of1:2, child2Of1:9)child1:5, child2:6)root:6;

上述语法的实现如下(注意:我翻译了我自己的代码,因为它是用我的语言编写的,为了清楚起见,删除了很多附属内容):

    struct node {
        char* name; /*the node's assigned name, either from the file or from default values*/
        float length; /*node's length*/
    } dataOfNode;
}

 

%start tree
 
%token<dataOfNode> OP CP COMMA SEMICOLON COLON DISTANCE NAME

%type<dataOfNode> tree subtrees recursive_subtrees subtree leaf


%%

tree:   subtrees NAME COLON DISTANCE SEMICOLON          {} // with name and distance    
        | subtrees NAME SEMICOLON                       {} // without distance
        | subtrees COLON DISTANCE SEMICOLON             {} // without name
        | subtrees SEMICOLON                            {} // without name nor distance
        ;
    
    
subtrees:   OP recursive_subtrees CP                    {}
            ;   
        
            
recursive_subtrees: subtree                             {} // just one subtree, or the last one of the list         
                    | recursive_subtrees COMMA subtree  {} // (subtree, subtree, subtree...)        
    

subtree:    subtrees NAME COLON DISTANCE    {   $$.NAME= .name; $$.length = .length; $$.lengthAcum = $$.lengthAcum + .length;
                                                } // group of subtrees, same as the main tree but without ";" at the end, with name and distance                                        
            
            | subtrees NAME                 {   $$.name= .name; $$.length = 1.0;}             // without distance                                 
            
            | subtrees COLON DISTANCE       {   $$.name= "missingName"; $$.length = .length;} // without name                             
           
            | subtrees                      {   $$.name= "missingName"; $$.length = 1.0;}       // without name nor distance                            
            
            | leaf                          {   $$.name= .name; $$.length = .length;}       // a leaf
                    
                    
                    
leaf:   NAME COLON DISTANCE {   $$.name= $$.name; $$.length = .length;}       // with name and distance
        | NAME              {   $$.name= .name; $$.length = 1.0;}             // without distance
        | COLON DISTANCE    {   $$.name= "missingName"; $$.length = .length;} // without name
        |                   {   $$.name= "missingName"; $$.length = 1.0;}       // without name nor distance
        ;


%%

现在,假设我要区分每个子树和叶子的parent是谁,这样我就可以累加一个parent子树的长度与“最长”的长度" child,递归。

不知道是不是我选错了这个设计,但是我过不去 为每个子树(和叶子,也被认为是子树)分配名称和长度,以及 我不认为我理解递归性是如何工作的,以便在匹配过程中识别 parents。

这主要是定义要保存树的数据结构,并在规则的操作中“自下而上”地构建该数据结构。 “自下而上”部分是野牛解析器工作方式的重要含义——它们是“自下而上”的,从语法的叶子识别结构,然后将它们组装成更高的非终端(并最终进入起始非-terminal,这将是最后一个操作 运行)。您还可以通过没有那么多冗余规则来简化事情。最后,IMO 最好对单个字符标记使用字符文字而不是名称。所以你可能会得到:

%{
struct tree {
    struct tree  *next;     /* each tree is potentially part of a list of (sub)trees */
    struct tree  *subtree;  /* and may contain a subtress */
    const char   *name;
    double       length;
};

struct tree *new_leaf(const char *name, double length);   /* malloc a new leaf "tree" */
void append_tree(struct tree **list, struct tree *t);  /* append a tree on to a list of trees */
%}

%union {
    const char   *name;
    double       value;
    struct tree  *tree;
}

%type<tree> subtrees recursive_subtrees subtree leaf
%token<name> NAME
%token<value> DISTANCE

%%

tree: subtrees leaf ';'  { ->subtree = ;  print_tree(); } 
    ;
    
subtrees: '(' recursive_subtrees ')'  { $$ = ; }
        ;   
            
recursive_subtrees: subtree                         { $$ = ; } // just one subtree, or the last one of the list
                  | recursive_subtrees ',' subtree  { append_tree(&($$=)->next, ); } // (subtree, subtree, subtree...)
                  ;

subtree: subtrees leaf  { ($$=)->subtree = ; }
       | leaf           { $$ = ; }
       ;
                    
leaf: NAME ':' DISTANCE { $$ = new_leaf(, );}              // with name and distance
    | NAME              { $$ = new_leaf(, 1.0);}             // without distance
    | ':' DISTANCE      { $$ = new_leaf("missingName", ; }   // without name
    |                   { $$ = new_leaf("missingName", 1.0); } // without name nor distance
    ;

%%

struct tree *new_leaf(const char *name, double length) {
    struct tree *rv = malloc(sizeof(struct tree));
    rv->subtree = rv->next = NULL;
    rv->name = name;
    rv->length = length;
    return rv;
}

void append_tree(struct tree **list, struct tree *t) {
    assert(t->next == NULL);  // must not be part of a list yet
    while (*list) list = &(*list)->next;
    *list = t;
}