使用 Bison 的递归规则和使用它存储值时遇到问题

Question

我正在尝试为 Newick 文件格式树制作一个 flex+bison 扫描器和解析器，以便对它们进行操作。实现的语法解释基于简化（标签和长度总是相同的类型，由 flex 返回）this example.

这本质上是一种文件格式的解析器，它表示具有一系列（递归）子树 and/or 叶子的树。主树将始终以 ; 结束，并且所述树和其中的所有子树将包含一系列介于（和 ）之间的节点，名称和长度在最右边的 parenthesis 的右边，由 name 和 :length 指定，它们是可选的（您可以避免指定它们，将其中之一（name 或 :length）或两者都放在 名称:长度).

如果任何节点缺少名称或长度，将应用默认值。（例如：'missingName' 和 '1'）

一个例子是 (child1:4, child2:6)root:6; , ((child1Of1:2, child2Of1:9)child1:5, child2:6)root:6;

上述语法的实现如下（注意：我翻译了我自己的代码，因为它是用我的语言编写的，为了清楚起见，删除了很多附属内容）：

    struct node {
        char* name; /*the node's assigned name, either from the file or from default values*/
        float length; /*node's length*/
    } dataOfNode;
}

 

%start tree
 
%token<dataOfNode> OP CP COMMA SEMICOLON COLON DISTANCE NAME

%type<dataOfNode> tree subtrees recursive_subtrees subtree leaf


%%

tree:   subtrees NAME COLON DISTANCE SEMICOLON          {} // with name and distance    
        | subtrees NAME SEMICOLON                       {} // without distance
        | subtrees COLON DISTANCE SEMICOLON             {} // without name
        | subtrees SEMICOLON                            {} // without name nor distance
        ;
    
    
subtrees:   OP recursive_subtrees CP                    {}
            ;   
        
            
recursive_subtrees: subtree                             {} // just one subtree, or the last one of the list         
                    | recursive_subtrees COMMA subtree  {} // (subtree, subtree, subtree...)        
    

subtree:    subtrees NAME COLON DISTANCE    {   $$.NAME= .name; $$.length = .length; $$.lengthAcum = $$.lengthAcum + .length;
                                                } // group of subtrees, same as the main tree but without ";" at the end, with name and distance                                        
            
            | subtrees NAME                 {   $$.name= .name; $$.length = 1.0;}             // without distance                                 
            
            | subtrees COLON DISTANCE       {   $$.name= "missingName"; $$.length = .length;} // without name                             
           
            | subtrees                      {   $$.name= "missingName"; $$.length = 1.0;}       // without name nor distance                            
            
            | leaf                          {   $$.name= .name; $$.length = .length;}       // a leaf
                    
                    
                    
leaf:   NAME COLON DISTANCE {   $$.name= $$.name; $$.length = .length;}       // with name and distance
        | NAME              {   $$.name= .name; $$.length = 1.0;}             // without distance
        | COLON DISTANCE    {   $$.name= "missingName"; $$.length = .length;} // without name
        |                   {   $$.name= "missingName"; $$.length = 1.0;}       // without name nor distance
        ;


%%

现在，假设我要区分每个子树和叶子的parent是谁，这样我就可以累加一个parent子树的长度与“最长”的长度" child，递归。

不知道是不是我选错了这个设计，但是我过不去为每个子树（和叶子，也被认为是子树）分配名称和长度，以及我不认为我理解递归性是如何工作的，以便在匹配过程中识别 parents。

Answer 1

这主要是定义要保存树的数据结构，并在规则的操作中“自下而上”地构建该数据结构。 “自下而上”部分是野牛解析器工作方式的重要含义——它们是“自下而上”的，从语法的叶子识别结构，然后将它们组装成更高的非终端（并最终进入起始非-terminal，这将是最后一个操作运行)。您还可以通过没有那么多冗余规则来简化事情。最后，IMO 最好对单个字符标记使用字符文字而不是名称。所以你可能会得到：

%{
struct tree {
    struct tree  *next;     /* each tree is potentially part of a list of (sub)trees */
    struct tree  *subtree;  /* and may contain a subtress */
    const char   *name;
    double       length;
};

struct tree *new_leaf(const char *name, double length);   /* malloc a new leaf "tree" */
void append_tree(struct tree **list, struct tree *t);  /* append a tree on to a list of trees */
%}

%union {
    const char   *name;
    double       value;
    struct tree  *tree;
}

%type<tree> subtrees recursive_subtrees subtree leaf
%token<name> NAME
%token<value> DISTANCE

%%

tree: subtrees leaf ';'  { ->subtree = ;  print_tree(); } 
    ;
    
subtrees: '(' recursive_subtrees ')'  { $$ = ; }
        ;   
            
recursive_subtrees: subtree                         { $$ = ; } // just one subtree, or the last one of the list
                  | recursive_subtrees ',' subtree  { append_tree(&($$=)->next, ); } // (subtree, subtree, subtree...)
                  ;

subtree: subtrees leaf  { ($$=)->subtree = ; }
       | leaf           { $$ = ; }
       ;
                    
leaf: NAME ':' DISTANCE { $$ = new_leaf(, );}              // with name and distance
    | NAME              { $$ = new_leaf(, 1.0);}             // without distance
    | ':' DISTANCE      { $$ = new_leaf("missingName", ; }   // without name
    |                   { $$ = new_leaf("missingName", 1.0); } // without name nor distance
    ;

%%

struct tree *new_leaf(const char *name, double length) {
    struct tree *rv = malloc(sizeof(struct tree));
    rv->subtree = rv->next = NULL;
    rv->name = name;
    rv->length = length;
    return rv;
}

void append_tree(struct tree **list, struct tree *t) {
    assert(t->next == NULL);  // must not be part of a list yet
    while (*list) list = &(*list)->next;
    *list = t;
}

使用 Bison 的递归规则和使用它存储值时遇到问题

Troubles using Bison's recursive rules, and storing values using it

grammar

bison

context-free-grammar