如何使用 Struct 创建带树的表达式解析器
How to create an Expression Parser with Tree using Struct
我有一个问题我不明白。我想创建一个表达式解析器。所以,首先我为那个解析器创建了一棵树。就是这样。
enum {
integer, plus, minus, multi, divis, string, character
};
struct Tree {
int operation;
struct Tree *left;
struct Tree *right;
char *value;
};
struct Tree *make_node(int operation, struct Tree *left, struct Tree *right, char *value) {
struct Tree *n;
n = (struct Tree *)malloc(sizeof(struct Tree));
if(n == NULL) {
printf("Unable to malloc \'make_node()\'\n");
}
n -> operation = operation;
n -> left = left;
n -> right = right;
n -> value = value;
return n;
}
// Print ostorder
int print_post_order_data(struct Tree *n) {
if(n == NULL) {
return 0;
}
print_post_order_data(n -> left);
print_post_order_data(n -> right);
printf("Operation => %d \t Value => %s\n", n -> operation, n -> value);
}
int main(void) {
struct Tree *m;
// Expression is ( 2 + 3 * 5 - 8 / 3 )
m = make_node(plus, NULL, NULL, NULL);
m -> left = make_node(minus, NULL, NULL, NULL);
m -> right = make_node(integer, NULL, NULL, "2");
m -> left -> left = make_node(multi, NULL, NULL, NULL);
m -> left -> right = make_node(divis, NULL, NULL, NULL);
m -> left -> left -> left = make_node(integer, NULL, NULL, "3");
m -> left -> left -> right = make_node(integer, NULL, NULL, "5");
m -> left -> right -> left = make_node(integer, NULL, NULL, "8");
m -> left -> right -> right = make_node(integer, NULL, NULL, "3");
print_post_order_data(n);
return 0;
}
你可以看到我已经手动创建了表达树。表达式为 2 + 3 * 5 - 8 / 3.
假设,这个程序可以将2识别为数字,+识别为加号等。如何编写解析器为了这。也就是说,要创建一个如上所述的节点?可以告诉我 code 或 pseudo 代码吗?
以下是更多信息
e.g. => 1 + 2 * 3
The tree is,
+
/ \
/ \
* 1
/ \
/ \
2 3
1 + 2 * 3 => 1 + ( 2 * 3 )
So manually I can create tree like this.
struct Tree *n;
n = make_node(plus, NULL, NULL, NULL);
n -> left = make_node(multi, NULL, NULL, NULL);
n -> right = make_node(integer, NULL, NULL, "1");
n -> left -> left = make_node(integer, NULL, NULL, "2");
n -> left -> right = make_node(integer, NULL, NULL, "3");
我试过创建这样的解析器。
addictive_expression() {
multiplicative_expression()
while(1) {
multiplicative_expression()
....
}
}
multiplicative_expression() {
primary_expression()
while(+ || * || /) {
primary_expression()
....
}
}
primary_expression() {
switch(current token) {
case integer:
....
....
}
}
虽然我试着这样做,但我很难弄清楚如何将树连接到它。
编辑 1
- 我想在不使用 Bison 等工具的情况下创建解析器
- 为此所需的词法分析器已经制作完成
编辑 2 :
// This is the Source of Struct Tree
struct TREE {
int operation;
struct TREE *left;
struct TREE *right;
char *value;
} Tree;
struct TREE *create_new_node(int operation, struct TREE *left, struct TREE *right, char value[MAX_LENG]) {
struct TREE *n;
n = (struct TREE *) malloc (sizeof(struct TREE));
if(n == NULL) {
fatal("Unable to Malloc New Structure TREE in \'create_new_node()\' Function in tree.c File");
}
n -> operation = operation;
n -> left = left;
n -> right = right;
n -> value = value;
return n;
}
// This is the Source of Parser
int expression(void) {
next_token(); // This Function will get the next Token
addictive_expression();
}
int addictive_expression(void) {
int token_type;
multiplicative_expression();
token_type = Token.current_token; // Token.current_token is the Current Token
if(token_type == END_FILE) {
return 0;
}
while(1) {
next_token();
multiplicative_expression();
token_type = Token.current_token;
if(token_type == END_FILE) { // End File is a Enum
return 0;
}
}
return 0;
}
int multiplicative_expression(void) {
int token_type;
primary_expression();
token_type = Token.current_token;
if(token_type == END_FILE) {
return 0;
}
// O_MLTI, O_DIVS. O_MUDL are the tokens ( Enum )
while(token_type == O_MLTI || token_type == O_DIVS || token_type == O_MUDL) {
next_token();
primary_expression();
token_type = Token.current_token;
if(token_type == END_FILE) {
return 0;
}
}
return 0;
}
int primary_expression(void) {
switch(Token.current_token) {
case INTEGER:
next_token();
break;
case O_PLUS:
case O_MNUS:
case O_MLTI:
case O_DIVS:
case O_MUDL:
next_token();
break;
default:
error_d("Syntax Error in Primary Expression", Token.current_token); // Custom Error Message
break;
}
return 0;
}
/*
struct Tree *m;
m = make_node(plus, NULL, NULL, NULL);
m -> left = make_node(minus, NULL, NULL, NULL);
m -> right = make_node(integer, NULL, NULL, "2");
m -> left -> left = make_node(multi, NULL, NULL, NULL);
m -> left -> right = make_node(divis, NULL, NULL, NULL);
m -> left -> left -> left = make_node(integer, NULL, NULL, "3");
m -> left -> left -> right = make_node(integer, NULL, NULL, "5");
m -> left -> right -> left = make_node(integer, NULL, NULL, "8");
m -> left -> right -> right = make_node(integer, NULL, NULL, "3");
*/
我很难弄清楚如何将树连接到它。你能给我一个解决方案吗?
语法的非终结符由返回自身 sub-expression 的函数实现,在您的例子中由 struct Tree *
.
表示
这允许在递归调用时构建树。
所以基本上你的 addictive_expression
应该看起来像这样:
static struct Tree *addictive_expression() {
struct Tree *expr = multiplicative_expression();
while (token->type == OPERATOR && (token->op == PLUS || token->op == MINUS)) {
Operator op = token->op;
token = next_token();
struct Tree *expr2 = multiplicative_expression();
switch (op) {
case PLUS:
expr = create_new_node(OPERATOR, PLUS, expr, expr2, NULL);
break;
case MINUS:
expr = create_new_node(OPERATOR, MINUS, expr, expr2, NULL);
break;
}
}
return expr;
}
它是如何工作的:
它调用multiplicative_expression
,它又调用其他函数来得到一个表达式。在 recursive-descending 解析的简单形式中,每个优先级都有一个单独的函数。由于 PLUS 和 MINUS 是 left-associative 运算符,因此它们在循环中处理。如果连续存在相同优先级的操作,则在创建新节点时将前一个节点设置为左表达式。
为了更好地理解,我在其中添加了带有 PLUS 和 MINUS 大小写的 switch 语句,但如您所见,您可以将其简化为:
static struct Tree *multiplicative_expression() {
struct Tree *expr = value_expression();
while (token->type == OPERATOR && (token->op == MULT || token->op == DIV)) {
Operator op = token->op;
token = next_token();
struct Tree *expr2 = value_expression();
expr = create_new_node(OPERATOR, op, expr, expr2, NULL);
}
return expr;
}
这里只是使用运算符来创建一个新节点。
数据结构
注意:类型和运算符是分开的。
typedef enum {
NONE,
END,
NUMERIC,
OPERATOR
} Type;
typedef enum {
INVALID,
PLUS,
MINUS,
MULT,
DIV
} Operator;
typedef struct {
Type type;
Operator op;
char *value;
} Token;
那么树结构是:
struct Tree {
Type type;
Operator op;
struct Tree *left;
struct Tree *right;
char *value;
};
完整示例
一个小而完整的示例,其中函数名称基于问题中的示例片段,可能看起来像这样具有两个优先级:
*
和 /
+
和 -
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "parser.h"
#include "lexer.h"
static Token *token;
static void fatal(char *msg) {
fprintf(stderr, "%s\n", msg);
exit(1);
}
static struct Tree *create_new_node(Type type,
Operator operation,
struct Tree *left,
struct Tree *right,
char *value) {
struct Tree *n = (struct Tree*) malloc(sizeof(struct Tree));
if (n == NULL) {
fatal("Unable to Malloc New Structure Tree in \'create_new_node()\' Function in tree.c File");
}
n->type = type;
n->op = operation;
n->left = left;
n->right = right;
n->value = value;
return n;
}
static struct Tree *value_expression() {
if (token->type == NUMERIC) {
struct Tree *result = create_new_node(NUMERIC, NONE, NULL, NULL, strdup(token->value));
token = next_token();
return result;
}
fatal("can't determine value for token");
}
static struct Tree *multiplicative_expression() {
struct Tree *expr = value_expression();
while (token->type == OPERATOR && (token->op == MULT || token->op == DIV)) {
Operator op = token->op;
token = next_token();
struct Tree *expr2 = value_expression();
expr = create_new_node(OPERATOR, op, expr, expr2, NULL);
}
return expr;
}
static struct Tree *addictive_expression() {
struct Tree *expr = multiplicative_expression();
while (token->type == OPERATOR && (token->op == PLUS || token->op == MINUS)) {
Operator op = token->op;
token = next_token();
struct Tree *expr2 = multiplicative_expression();
expr = create_new_node(OPERATOR, op, expr, expr2, NULL);
}
return expr;
}
struct Tree *expression() {
token = next_token();
struct Tree *expr = addictive_expression();
putback_token(token);
return expr;
}
树输出
#include <stdio.h>
#include <stdlib.h>
#include "lexer.h"
#include "parser.h"
void test_parser();
int main(void) {
test_parser();
return 0;
}
void print_expr(struct Tree *expr, int level) {
for(int i = 0; i < level; i++) {
printf(" | ");
}
switch(expr->type) {
case OPERATOR:
switch(expr->op) {
case INVALID:
fprintf(stderr, "invalid op\n");
exit(1);
case PLUS:
printf("+\n");
print_expr(expr->left, level + 1);
print_expr(expr->right, level + 1);
printf("\n");
break;
case MINUS:
printf("-\n");
print_expr(expr->left, level + 1);
print_expr(expr->right, level + 1);
printf("\n");
break;
case MULT:
printf("*\n");
print_expr(expr->left, level + 1);
print_expr(expr->right, level + 1);
printf("\n");
break;
case DIV:
printf("/\n");
print_expr(expr->left, level + 1);
print_expr(expr->right, level + 1);
printf("\n");
break;
}
break;
case NUMERIC:
printf("%s\n", expr->value);
break;
case NONE:
fprintf(stderr, "unexpected NONE\n");
exit(1);
case END:
fprintf(stderr, "unexpected END\n");
exit(1);
}
}
void test_parser() {
setup_lexer("../input.txt");
struct Tree *expr = expression();
print_expr(expr, 0);
}
结果
对于输入2 + 3 * 5 - 8 / 3
,上面的小测试程序向调试控制台输出以下内容:
-
| +
| | 2
| | *
| | | 3
| | | 5
| /
| | 8
| | 3
看起来像正确的语法树!
我有一个问题我不明白。我想创建一个表达式解析器。所以,首先我为那个解析器创建了一棵树。就是这样。
enum {
integer, plus, minus, multi, divis, string, character
};
struct Tree {
int operation;
struct Tree *left;
struct Tree *right;
char *value;
};
struct Tree *make_node(int operation, struct Tree *left, struct Tree *right, char *value) {
struct Tree *n;
n = (struct Tree *)malloc(sizeof(struct Tree));
if(n == NULL) {
printf("Unable to malloc \'make_node()\'\n");
}
n -> operation = operation;
n -> left = left;
n -> right = right;
n -> value = value;
return n;
}
// Print ostorder
int print_post_order_data(struct Tree *n) {
if(n == NULL) {
return 0;
}
print_post_order_data(n -> left);
print_post_order_data(n -> right);
printf("Operation => %d \t Value => %s\n", n -> operation, n -> value);
}
int main(void) {
struct Tree *m;
// Expression is ( 2 + 3 * 5 - 8 / 3 )
m = make_node(plus, NULL, NULL, NULL);
m -> left = make_node(minus, NULL, NULL, NULL);
m -> right = make_node(integer, NULL, NULL, "2");
m -> left -> left = make_node(multi, NULL, NULL, NULL);
m -> left -> right = make_node(divis, NULL, NULL, NULL);
m -> left -> left -> left = make_node(integer, NULL, NULL, "3");
m -> left -> left -> right = make_node(integer, NULL, NULL, "5");
m -> left -> right -> left = make_node(integer, NULL, NULL, "8");
m -> left -> right -> right = make_node(integer, NULL, NULL, "3");
print_post_order_data(n);
return 0;
}
你可以看到我已经手动创建了表达树。表达式为 2 + 3 * 5 - 8 / 3.
假设,这个程序可以将2识别为数字,+识别为加号等。如何编写解析器为了这。也就是说,要创建一个如上所述的节点?可以告诉我 code 或 pseudo 代码吗?
以下是更多信息
e.g. => 1 + 2 * 3
The tree is,
+
/ \
/ \
* 1
/ \
/ \
2 3
1 + 2 * 3 => 1 + ( 2 * 3 )
So manually I can create tree like this.
struct Tree *n;
n = make_node(plus, NULL, NULL, NULL);
n -> left = make_node(multi, NULL, NULL, NULL);
n -> right = make_node(integer, NULL, NULL, "1");
n -> left -> left = make_node(integer, NULL, NULL, "2");
n -> left -> right = make_node(integer, NULL, NULL, "3");
我试过创建这样的解析器。
addictive_expression() {
multiplicative_expression()
while(1) {
multiplicative_expression()
....
}
}
multiplicative_expression() {
primary_expression()
while(+ || * || /) {
primary_expression()
....
}
}
primary_expression() {
switch(current token) {
case integer:
....
....
}
}
虽然我试着这样做,但我很难弄清楚如何将树连接到它。
编辑 1
- 我想在不使用 Bison 等工具的情况下创建解析器
- 为此所需的词法分析器已经制作完成
编辑 2 :
// This is the Source of Struct Tree
struct TREE {
int operation;
struct TREE *left;
struct TREE *right;
char *value;
} Tree;
struct TREE *create_new_node(int operation, struct TREE *left, struct TREE *right, char value[MAX_LENG]) {
struct TREE *n;
n = (struct TREE *) malloc (sizeof(struct TREE));
if(n == NULL) {
fatal("Unable to Malloc New Structure TREE in \'create_new_node()\' Function in tree.c File");
}
n -> operation = operation;
n -> left = left;
n -> right = right;
n -> value = value;
return n;
}
// This is the Source of Parser
int expression(void) {
next_token(); // This Function will get the next Token
addictive_expression();
}
int addictive_expression(void) {
int token_type;
multiplicative_expression();
token_type = Token.current_token; // Token.current_token is the Current Token
if(token_type == END_FILE) {
return 0;
}
while(1) {
next_token();
multiplicative_expression();
token_type = Token.current_token;
if(token_type == END_FILE) { // End File is a Enum
return 0;
}
}
return 0;
}
int multiplicative_expression(void) {
int token_type;
primary_expression();
token_type = Token.current_token;
if(token_type == END_FILE) {
return 0;
}
// O_MLTI, O_DIVS. O_MUDL are the tokens ( Enum )
while(token_type == O_MLTI || token_type == O_DIVS || token_type == O_MUDL) {
next_token();
primary_expression();
token_type = Token.current_token;
if(token_type == END_FILE) {
return 0;
}
}
return 0;
}
int primary_expression(void) {
switch(Token.current_token) {
case INTEGER:
next_token();
break;
case O_PLUS:
case O_MNUS:
case O_MLTI:
case O_DIVS:
case O_MUDL:
next_token();
break;
default:
error_d("Syntax Error in Primary Expression", Token.current_token); // Custom Error Message
break;
}
return 0;
}
/*
struct Tree *m;
m = make_node(plus, NULL, NULL, NULL);
m -> left = make_node(minus, NULL, NULL, NULL);
m -> right = make_node(integer, NULL, NULL, "2");
m -> left -> left = make_node(multi, NULL, NULL, NULL);
m -> left -> right = make_node(divis, NULL, NULL, NULL);
m -> left -> left -> left = make_node(integer, NULL, NULL, "3");
m -> left -> left -> right = make_node(integer, NULL, NULL, "5");
m -> left -> right -> left = make_node(integer, NULL, NULL, "8");
m -> left -> right -> right = make_node(integer, NULL, NULL, "3");
*/
我很难弄清楚如何将树连接到它。你能给我一个解决方案吗?
语法的非终结符由返回自身 sub-expression 的函数实现,在您的例子中由 struct Tree *
.
这允许在递归调用时构建树。
所以基本上你的 addictive_expression
应该看起来像这样:
static struct Tree *addictive_expression() {
struct Tree *expr = multiplicative_expression();
while (token->type == OPERATOR && (token->op == PLUS || token->op == MINUS)) {
Operator op = token->op;
token = next_token();
struct Tree *expr2 = multiplicative_expression();
switch (op) {
case PLUS:
expr = create_new_node(OPERATOR, PLUS, expr, expr2, NULL);
break;
case MINUS:
expr = create_new_node(OPERATOR, MINUS, expr, expr2, NULL);
break;
}
}
return expr;
}
它是如何工作的:
它调用multiplicative_expression
,它又调用其他函数来得到一个表达式。在 recursive-descending 解析的简单形式中,每个优先级都有一个单独的函数。由于 PLUS 和 MINUS 是 left-associative 运算符,因此它们在循环中处理。如果连续存在相同优先级的操作,则在创建新节点时将前一个节点设置为左表达式。
为了更好地理解,我在其中添加了带有 PLUS 和 MINUS 大小写的 switch 语句,但如您所见,您可以将其简化为:
static struct Tree *multiplicative_expression() {
struct Tree *expr = value_expression();
while (token->type == OPERATOR && (token->op == MULT || token->op == DIV)) {
Operator op = token->op;
token = next_token();
struct Tree *expr2 = value_expression();
expr = create_new_node(OPERATOR, op, expr, expr2, NULL);
}
return expr;
}
这里只是使用运算符来创建一个新节点。
数据结构
注意:类型和运算符是分开的。
typedef enum {
NONE,
END,
NUMERIC,
OPERATOR
} Type;
typedef enum {
INVALID,
PLUS,
MINUS,
MULT,
DIV
} Operator;
typedef struct {
Type type;
Operator op;
char *value;
} Token;
那么树结构是:
struct Tree {
Type type;
Operator op;
struct Tree *left;
struct Tree *right;
char *value;
};
完整示例
一个小而完整的示例,其中函数名称基于问题中的示例片段,可能看起来像这样具有两个优先级:
*
和/
+
和-
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "parser.h"
#include "lexer.h"
static Token *token;
static void fatal(char *msg) {
fprintf(stderr, "%s\n", msg);
exit(1);
}
static struct Tree *create_new_node(Type type,
Operator operation,
struct Tree *left,
struct Tree *right,
char *value) {
struct Tree *n = (struct Tree*) malloc(sizeof(struct Tree));
if (n == NULL) {
fatal("Unable to Malloc New Structure Tree in \'create_new_node()\' Function in tree.c File");
}
n->type = type;
n->op = operation;
n->left = left;
n->right = right;
n->value = value;
return n;
}
static struct Tree *value_expression() {
if (token->type == NUMERIC) {
struct Tree *result = create_new_node(NUMERIC, NONE, NULL, NULL, strdup(token->value));
token = next_token();
return result;
}
fatal("can't determine value for token");
}
static struct Tree *multiplicative_expression() {
struct Tree *expr = value_expression();
while (token->type == OPERATOR && (token->op == MULT || token->op == DIV)) {
Operator op = token->op;
token = next_token();
struct Tree *expr2 = value_expression();
expr = create_new_node(OPERATOR, op, expr, expr2, NULL);
}
return expr;
}
static struct Tree *addictive_expression() {
struct Tree *expr = multiplicative_expression();
while (token->type == OPERATOR && (token->op == PLUS || token->op == MINUS)) {
Operator op = token->op;
token = next_token();
struct Tree *expr2 = multiplicative_expression();
expr = create_new_node(OPERATOR, op, expr, expr2, NULL);
}
return expr;
}
struct Tree *expression() {
token = next_token();
struct Tree *expr = addictive_expression();
putback_token(token);
return expr;
}
树输出
#include <stdio.h>
#include <stdlib.h>
#include "lexer.h"
#include "parser.h"
void test_parser();
int main(void) {
test_parser();
return 0;
}
void print_expr(struct Tree *expr, int level) {
for(int i = 0; i < level; i++) {
printf(" | ");
}
switch(expr->type) {
case OPERATOR:
switch(expr->op) {
case INVALID:
fprintf(stderr, "invalid op\n");
exit(1);
case PLUS:
printf("+\n");
print_expr(expr->left, level + 1);
print_expr(expr->right, level + 1);
printf("\n");
break;
case MINUS:
printf("-\n");
print_expr(expr->left, level + 1);
print_expr(expr->right, level + 1);
printf("\n");
break;
case MULT:
printf("*\n");
print_expr(expr->left, level + 1);
print_expr(expr->right, level + 1);
printf("\n");
break;
case DIV:
printf("/\n");
print_expr(expr->left, level + 1);
print_expr(expr->right, level + 1);
printf("\n");
break;
}
break;
case NUMERIC:
printf("%s\n", expr->value);
break;
case NONE:
fprintf(stderr, "unexpected NONE\n");
exit(1);
case END:
fprintf(stderr, "unexpected END\n");
exit(1);
}
}
void test_parser() {
setup_lexer("../input.txt");
struct Tree *expr = expression();
print_expr(expr, 0);
}
结果
对于输入2 + 3 * 5 - 8 / 3
,上面的小测试程序向调试控制台输出以下内容:
-
| +
| | 2
| | *
| | | 3
| | | 5
| /
| | 8
| | 3
看起来像正确的语法树!