使用 Bison 的递归规则和使用它存储值时遇到问题
Troubles using Bison's recursive rules, and storing values using it
我正在尝试为 Newick 文件格式树制作一个 flex+bison 扫描器和解析器,以便对它们进行操作。实现的语法解释基于简化(标签和长度总是相同的类型,由 flex 返回)this example.
这本质上是一种文件格式的解析器,它表示具有一系列(递归)子树 and/or 叶子的树。
主树将始终以 ; 结束,并且所述树和其中的所有子树将包含一系列介于 ( 和 )之间的节点,名称和长度在最右边的 parenthesis 的右边,由 name 和 :length 指定,它们是可选的(您可以避免指定它们,将其中之一(name 或 :length)或两者都放在 名称:长度).
如果任何节点缺少名称或长度,将应用默认值。 (例如:'missingName' 和 '1')
一个例子是 (child1:4, child2:6)root:6; , ((child1Of1:2, child2Of1:9)child1:5, child2:6)root:6;
上述语法的实现如下(注意:我翻译了我自己的代码,因为它是用我的语言编写的,为了清楚起见,删除了很多附属内容):
struct node {
char* name; /*the node's assigned name, either from the file or from default values*/
float length; /*node's length*/
} dataOfNode;
}
%start tree
%token<dataOfNode> OP CP COMMA SEMICOLON COLON DISTANCE NAME
%type<dataOfNode> tree subtrees recursive_subtrees subtree leaf
%%
tree: subtrees NAME COLON DISTANCE SEMICOLON {} // with name and distance
| subtrees NAME SEMICOLON {} // without distance
| subtrees COLON DISTANCE SEMICOLON {} // without name
| subtrees SEMICOLON {} // without name nor distance
;
subtrees: OP recursive_subtrees CP {}
;
recursive_subtrees: subtree {} // just one subtree, or the last one of the list
| recursive_subtrees COMMA subtree {} // (subtree, subtree, subtree...)
subtree: subtrees NAME COLON DISTANCE { $$.NAME= .name; $$.length = .length; $$.lengthAcum = $$.lengthAcum + .length;
} // group of subtrees, same as the main tree but without ";" at the end, with name and distance
| subtrees NAME { $$.name= .name; $$.length = 1.0;} // without distance
| subtrees COLON DISTANCE { $$.name= "missingName"; $$.length = .length;} // without name
| subtrees { $$.name= "missingName"; $$.length = 1.0;} // without name nor distance
| leaf { $$.name= .name; $$.length = .length;} // a leaf
leaf: NAME COLON DISTANCE { $$.name= $$.name; $$.length = .length;} // with name and distance
| NAME { $$.name= .name; $$.length = 1.0;} // without distance
| COLON DISTANCE { $$.name= "missingName"; $$.length = .length;} // without name
| { $$.name= "missingName"; $$.length = 1.0;} // without name nor distance
;
%%
现在,假设我要区分每个子树和叶子的parent是谁,这样我就可以累加一个parent子树的长度与“最长”的长度" child,递归。
不知道是不是我选错了这个设计,但是我过不去
为每个子树(和叶子,也被认为是子树)分配名称和长度,以及
我不认为我理解递归性是如何工作的,以便在匹配过程中识别 parents。
这主要是定义要保存树的数据结构,并在规则的操作中“自下而上”地构建该数据结构。 “自下而上”部分是野牛解析器工作方式的重要含义——它们是“自下而上”的,从语法的叶子识别结构,然后将它们组装成更高的非终端(并最终进入起始非-terminal,这将是最后一个操作 运行)。您还可以通过没有那么多冗余规则来简化事情。最后,IMO 最好对单个字符标记使用字符文字而不是名称。所以你可能会得到:
%{
struct tree {
struct tree *next; /* each tree is potentially part of a list of (sub)trees */
struct tree *subtree; /* and may contain a subtress */
const char *name;
double length;
};
struct tree *new_leaf(const char *name, double length); /* malloc a new leaf "tree" */
void append_tree(struct tree **list, struct tree *t); /* append a tree on to a list of trees */
%}
%union {
const char *name;
double value;
struct tree *tree;
}
%type<tree> subtrees recursive_subtrees subtree leaf
%token<name> NAME
%token<value> DISTANCE
%%
tree: subtrees leaf ';' { ->subtree = ; print_tree(); }
;
subtrees: '(' recursive_subtrees ')' { $$ = ; }
;
recursive_subtrees: subtree { $$ = ; } // just one subtree, or the last one of the list
| recursive_subtrees ',' subtree { append_tree(&($$=)->next, ); } // (subtree, subtree, subtree...)
;
subtree: subtrees leaf { ($$=)->subtree = ; }
| leaf { $$ = ; }
;
leaf: NAME ':' DISTANCE { $$ = new_leaf(, );} // with name and distance
| NAME { $$ = new_leaf(, 1.0);} // without distance
| ':' DISTANCE { $$ = new_leaf("missingName", ; } // without name
| { $$ = new_leaf("missingName", 1.0); } // without name nor distance
;
%%
struct tree *new_leaf(const char *name, double length) {
struct tree *rv = malloc(sizeof(struct tree));
rv->subtree = rv->next = NULL;
rv->name = name;
rv->length = length;
return rv;
}
void append_tree(struct tree **list, struct tree *t) {
assert(t->next == NULL); // must not be part of a list yet
while (*list) list = &(*list)->next;
*list = t;
}
我正在尝试为 Newick 文件格式树制作一个 flex+bison 扫描器和解析器,以便对它们进行操作。实现的语法解释基于简化(标签和长度总是相同的类型,由 flex 返回)this example.
这本质上是一种文件格式的解析器,它表示具有一系列(递归)子树 and/or 叶子的树。 主树将始终以 ; 结束,并且所述树和其中的所有子树将包含一系列介于 ( 和 )之间的节点,名称和长度在最右边的 parenthesis 的右边,由 name 和 :length 指定,它们是可选的(您可以避免指定它们,将其中之一(name 或 :length)或两者都放在 名称:长度).
如果任何节点缺少名称或长度,将应用默认值。 (例如:'missingName' 和 '1')
一个例子是 (child1:4, child2:6)root:6; , ((child1Of1:2, child2Of1:9)child1:5, child2:6)root:6;
上述语法的实现如下(注意:我翻译了我自己的代码,因为它是用我的语言编写的,为了清楚起见,删除了很多附属内容):
struct node {
char* name; /*the node's assigned name, either from the file or from default values*/
float length; /*node's length*/
} dataOfNode;
}
%start tree
%token<dataOfNode> OP CP COMMA SEMICOLON COLON DISTANCE NAME
%type<dataOfNode> tree subtrees recursive_subtrees subtree leaf
%%
tree: subtrees NAME COLON DISTANCE SEMICOLON {} // with name and distance
| subtrees NAME SEMICOLON {} // without distance
| subtrees COLON DISTANCE SEMICOLON {} // without name
| subtrees SEMICOLON {} // without name nor distance
;
subtrees: OP recursive_subtrees CP {}
;
recursive_subtrees: subtree {} // just one subtree, or the last one of the list
| recursive_subtrees COMMA subtree {} // (subtree, subtree, subtree...)
subtree: subtrees NAME COLON DISTANCE { $$.NAME= .name; $$.length = .length; $$.lengthAcum = $$.lengthAcum + .length;
} // group of subtrees, same as the main tree but without ";" at the end, with name and distance
| subtrees NAME { $$.name= .name; $$.length = 1.0;} // without distance
| subtrees COLON DISTANCE { $$.name= "missingName"; $$.length = .length;} // without name
| subtrees { $$.name= "missingName"; $$.length = 1.0;} // without name nor distance
| leaf { $$.name= .name; $$.length = .length;} // a leaf
leaf: NAME COLON DISTANCE { $$.name= $$.name; $$.length = .length;} // with name and distance
| NAME { $$.name= .name; $$.length = 1.0;} // without distance
| COLON DISTANCE { $$.name= "missingName"; $$.length = .length;} // without name
| { $$.name= "missingName"; $$.length = 1.0;} // without name nor distance
;
%%
现在,假设我要区分每个子树和叶子的parent是谁,这样我就可以累加一个parent子树的长度与“最长”的长度" child,递归。
不知道是不是我选错了这个设计,但是我过不去 为每个子树(和叶子,也被认为是子树)分配名称和长度,以及 我不认为我理解递归性是如何工作的,以便在匹配过程中识别 parents。
这主要是定义要保存树的数据结构,并在规则的操作中“自下而上”地构建该数据结构。 “自下而上”部分是野牛解析器工作方式的重要含义——它们是“自下而上”的,从语法的叶子识别结构,然后将它们组装成更高的非终端(并最终进入起始非-terminal,这将是最后一个操作 运行)。您还可以通过没有那么多冗余规则来简化事情。最后,IMO 最好对单个字符标记使用字符文字而不是名称。所以你可能会得到:
%{
struct tree {
struct tree *next; /* each tree is potentially part of a list of (sub)trees */
struct tree *subtree; /* and may contain a subtress */
const char *name;
double length;
};
struct tree *new_leaf(const char *name, double length); /* malloc a new leaf "tree" */
void append_tree(struct tree **list, struct tree *t); /* append a tree on to a list of trees */
%}
%union {
const char *name;
double value;
struct tree *tree;
}
%type<tree> subtrees recursive_subtrees subtree leaf
%token<name> NAME
%token<value> DISTANCE
%%
tree: subtrees leaf ';' { ->subtree = ; print_tree(); }
;
subtrees: '(' recursive_subtrees ')' { $$ = ; }
;
recursive_subtrees: subtree { $$ = ; } // just one subtree, or the last one of the list
| recursive_subtrees ',' subtree { append_tree(&($$=)->next, ); } // (subtree, subtree, subtree...)
;
subtree: subtrees leaf { ($$=)->subtree = ; }
| leaf { $$ = ; }
;
leaf: NAME ':' DISTANCE { $$ = new_leaf(, );} // with name and distance
| NAME { $$ = new_leaf(, 1.0);} // without distance
| ':' DISTANCE { $$ = new_leaf("missingName", ; } // without name
| { $$ = new_leaf("missingName", 1.0); } // without name nor distance
;
%%
struct tree *new_leaf(const char *name, double length) {
struct tree *rv = malloc(sizeof(struct tree));
rv->subtree = rv->next = NULL;
rv->name = name;
rv->length = length;
return rv;
}
void append_tree(struct tree **list, struct tree *t) {
assert(t->next == NULL); // must not be part of a list yet
while (*list) list = &(*list)->next;
*list = t;
}