Ocaml:在递归函数中为 Ast 类型构建列表
Ocaml: build list for Ast type in recursive function
我有一个手写的 LL1 解析器。我的 AST 并没有尽可能地简化。语句部分如下所示:
type stmt_opt = StmtExpression of assignment | OptNil
[@@deriving show]
(*stmt_list -> stmt stmt_list | ε *)
type stmtlist =
| StmtList of stmt * stmtlist
| StmtlistNil
[@@deriving show]
and stmt =
| Assignment of assignment
| Return of stmt_opt
| Parentheses of stmtlist
| If of assignment * stmt
| For of assignment * assignment * assignment * stmt
| While of assignment * stmt
(*“lparen” formals_opt “rparen” “LBRACE” vdecl_list stmt_list “RBRACE”*)
[@@deriving show]
如您所见,我仍然掌握着很多不必要的信息。我想像这样构建我的声明:
type stmt =
Block of stmt list
| Expr of expr
| Return of expr
| If of expr * stmt * stmt
| For of expr * expr * expr * stmt
| While of expr * stmt
我有点不知所措,因为我真的是按照书本构建了我的 LL1 解析器(我相信这不会预料到很长的语法):每个非终结符都有一个解析方法,每个解析方法 returns 一个令牌列表和一个 ast。
我认为,为了构建像我的目标语句 AST 中那样的 Block 类型,我需要在我的递归 parseStmt 方法中构建一个语句列表。我已将我的解析器代码缩减为仅调用 parseStmtList 的解析器方法以及它们调用 parseStmtList
的特定实例
(*stmt_list = stmt stmt_list | epsilon*)
let rec parseStmtList tokenlist lst =
match tokenlist.head with
| Lexer.RightBrace -> (tokenlist, Ast.StmtlistNil )
| _ -> let (tokenlist_stmt, stmt) = parseStmt tokenlist in
let new_lst = lst::stmt in
let (tokenlist_stmt_list, stmt_list) = tokenlist_stmt new_lst |> parseStmtList in
(tokenlist_stmt_list, Ast.Block(stmt_lst))
(*stmt -> assignment SEMI
| RETURN stmt_opt SEMI
| LBRACE stmt_list RBRACE
| IF LPAREN assignment RPAREN stmt
| FOR LPAREN assignment SEMI assignment SEMI assignment RPAREN stmt
| WHILE LPAREN assignment RPAREN stmt
*)
and parseStmt tokenlist =
begin
match tokenlist.head with
| Lexer.ID identifier -> let (tokenlist_assignment, assignment) = parseAssignment tokenlist in
begin
match tokenlist_assignment.head with
| Lexer.Semicolon -> (next tokenlist_assignment, Ast.Assignment(assignment))
| _-> let err_msg = __LOC__ ^ "Syntax Error semicolon expected but received" ^ show_token_list tokenlist in
raise (Syntax_error err_msg)
end
| Lexer.LeftBrace -> let tokenlist_leftbrace = next tokenlist in
let (tokenlist_expr, expr) = parseStmtList tokenlist_leftbrace [] in
begin
match tokenlist_expr.head with
| Lexer.RightBrace -> (next tokenlist_expr, Ast.Parentheses(expr))
| _-> let err_msg = __LOC__ ^ "Syntax Error right brace expected but received" ^ show_token_list tokenlist in
raise (Syntax_error err_msg)
end
| _-> let err_msg = __LOC__ ^ "Syntax Error left brace expected but received" ^ show_token_list tokenlist in
raise (Syntax_error err_msg)
end
但是,我收到错误消息:
Error: This expression has type 'a -> token_list * Ast.stmtlist
but an expression was expected of type 'b * 'c
对于 parseStmtList
中的 let (tokenlist_stmt_list, stmt_list) = tokenlist_stmt new_lst |> parseStmtList in
行
tokenlist_stmt new_lst |> parseStmtList
此处您将 tokenlist_stmt
应用于参数 new_lst
,然后将 parseStmtList
应用于结果。但是 tokenlist_stmt
实际上不是一个函数,所以这是一个类型错误。
大概你的意图是用 tokenlist_stmt
和 new_lst
作为它的两个参数来调用 parseStmtList
。语法很简单:
parseStmtList tokenlist_stmt new_lst
此外 lst::stmt
也是类型错误,原因有二:
::
将列表作为它的右操作数,而不是左操作数,所以它应该是 stmt::lst
lst
实际上不是一个列表,它是一个 Ast.Block
因为那是 parseStmtList
returns.
一旦你解决了所有这些问题,你会注意到列表将是错误的(大概这就是你首先尝试 lst::stmt
的原因,但你不能附加到像这样的列表的结尾)。这是使用累加器构建列表时的常见问题。解决方案是在完成构建后反转列表,或者首先不使用累加器。
需要指出的重要一点是,所有这些问题在使用 Ast.stmtlist
时也会出现。也就是说,如果您的代码如下所示:
let new_lst = Ast.StmtList(lst, stmt) in
let (tokenlist_stmt_list, stmt_list) = tokenlist_stmt new_lst |> parseStmtList in
(tokenlist_stmt_list, Ast.Block(stmt_lst))
然后你会得到完全相同的错误。这让我觉得,你改变的代码比你需要的要多。由于您的旧代码可能有效,我假设它看起来像这样:
let rec parseStmtList tokenlist =
match tokenlist.head with
| Lexer.RightBrace -> (tokenlist, Ast.StmtlistNil )
| _ -> let (tokenlist_stmt, stmt) = parseStmt tokenlist in
let (tokenlist_stmt_list, stmt_list) = parseStmtList tokenlist_stmt in
(tokenlist_stmt_list, Ast.StmtList (stmt, stmt_lst))
然后在 parseStmt
你有:
let (tokenlist_stmtlist, stmtlist) = parseStmtList tokenlist_leftbrace in
begin
match tokenlist_expr.head with
| Lexer.RightBrace -> (next tokenlist_stmtlist, Ast.Block(stmtlist))
现在删除 Ast.stmtlist
后,您需要更改的只是您实际使用其构造函数的部分,并将这些部分替换为列表构造函数(::
和 []
)。所以 parseStmt
中的代码将保持完全不变, parseStmtList
中唯一的变化应该是替换行
| Lexer.RightBrace -> (tokenlist, Ast.StmtlistNil )
与
| Lexer.RightBrace -> (tokenlist, [] )
和行
(tokenlist_stmt_list, Ast.StmtList (stmt, stmt_lst))
与
(tokenlist_stmt_list, stmt :: stmt_lst)
如果您的旧代码看起来与我上面提出的不同,您可能需要更改不同的行,但想法保持不变:将 Ast.StmtList
替换为 ::
和 Ast.StmtListNil
与 []
.
就是这样。这就是所有必要的改变。你太复杂了。
我有一个手写的 LL1 解析器。我的 AST 并没有尽可能地简化。语句部分如下所示:
type stmt_opt = StmtExpression of assignment | OptNil
[@@deriving show]
(*stmt_list -> stmt stmt_list | ε *)
type stmtlist =
| StmtList of stmt * stmtlist
| StmtlistNil
[@@deriving show]
and stmt =
| Assignment of assignment
| Return of stmt_opt
| Parentheses of stmtlist
| If of assignment * stmt
| For of assignment * assignment * assignment * stmt
| While of assignment * stmt
(*“lparen” formals_opt “rparen” “LBRACE” vdecl_list stmt_list “RBRACE”*)
[@@deriving show]
如您所见,我仍然掌握着很多不必要的信息。我想像这样构建我的声明:
type stmt =
Block of stmt list
| Expr of expr
| Return of expr
| If of expr * stmt * stmt
| For of expr * expr * expr * stmt
| While of expr * stmt
我有点不知所措,因为我真的是按照书本构建了我的 LL1 解析器(我相信这不会预料到很长的语法):每个非终结符都有一个解析方法,每个解析方法 returns 一个令牌列表和一个 ast。
我认为,为了构建像我的目标语句 AST 中那样的 Block 类型,我需要在我的递归 parseStmt 方法中构建一个语句列表。我已将我的解析器代码缩减为仅调用 parseStmtList 的解析器方法以及它们调用 parseStmtList
的特定实例(*stmt_list = stmt stmt_list | epsilon*)
let rec parseStmtList tokenlist lst =
match tokenlist.head with
| Lexer.RightBrace -> (tokenlist, Ast.StmtlistNil )
| _ -> let (tokenlist_stmt, stmt) = parseStmt tokenlist in
let new_lst = lst::stmt in
let (tokenlist_stmt_list, stmt_list) = tokenlist_stmt new_lst |> parseStmtList in
(tokenlist_stmt_list, Ast.Block(stmt_lst))
(*stmt -> assignment SEMI
| RETURN stmt_opt SEMI
| LBRACE stmt_list RBRACE
| IF LPAREN assignment RPAREN stmt
| FOR LPAREN assignment SEMI assignment SEMI assignment RPAREN stmt
| WHILE LPAREN assignment RPAREN stmt
*)
and parseStmt tokenlist =
begin
match tokenlist.head with
| Lexer.ID identifier -> let (tokenlist_assignment, assignment) = parseAssignment tokenlist in
begin
match tokenlist_assignment.head with
| Lexer.Semicolon -> (next tokenlist_assignment, Ast.Assignment(assignment))
| _-> let err_msg = __LOC__ ^ "Syntax Error semicolon expected but received" ^ show_token_list tokenlist in
raise (Syntax_error err_msg)
end
| Lexer.LeftBrace -> let tokenlist_leftbrace = next tokenlist in
let (tokenlist_expr, expr) = parseStmtList tokenlist_leftbrace [] in
begin
match tokenlist_expr.head with
| Lexer.RightBrace -> (next tokenlist_expr, Ast.Parentheses(expr))
| _-> let err_msg = __LOC__ ^ "Syntax Error right brace expected but received" ^ show_token_list tokenlist in
raise (Syntax_error err_msg)
end
| _-> let err_msg = __LOC__ ^ "Syntax Error left brace expected but received" ^ show_token_list tokenlist in
raise (Syntax_error err_msg)
end
但是,我收到错误消息:
Error: This expression has type 'a -> token_list * Ast.stmtlist
but an expression was expected of type 'b * 'c
对于 parseStmtList
let (tokenlist_stmt_list, stmt_list) = tokenlist_stmt new_lst |> parseStmtList in
行
tokenlist_stmt new_lst |> parseStmtList
此处您将 tokenlist_stmt
应用于参数 new_lst
,然后将 parseStmtList
应用于结果。但是 tokenlist_stmt
实际上不是一个函数,所以这是一个类型错误。
大概你的意图是用 tokenlist_stmt
和 new_lst
作为它的两个参数来调用 parseStmtList
。语法很简单:
parseStmtList tokenlist_stmt new_lst
此外 lst::stmt
也是类型错误,原因有二:
::
将列表作为它的右操作数,而不是左操作数,所以它应该是stmt::lst
lst
实际上不是一个列表,它是一个Ast.Block
因为那是parseStmtList
returns.
一旦你解决了所有这些问题,你会注意到列表将是错误的(大概这就是你首先尝试 lst::stmt
的原因,但你不能附加到像这样的列表的结尾)。这是使用累加器构建列表时的常见问题。解决方案是在完成构建后反转列表,或者首先不使用累加器。
需要指出的重要一点是,所有这些问题在使用 Ast.stmtlist
时也会出现。也就是说,如果您的代码如下所示:
let new_lst = Ast.StmtList(lst, stmt) in
let (tokenlist_stmt_list, stmt_list) = tokenlist_stmt new_lst |> parseStmtList in
(tokenlist_stmt_list, Ast.Block(stmt_lst))
然后你会得到完全相同的错误。这让我觉得,你改变的代码比你需要的要多。由于您的旧代码可能有效,我假设它看起来像这样:
let rec parseStmtList tokenlist =
match tokenlist.head with
| Lexer.RightBrace -> (tokenlist, Ast.StmtlistNil )
| _ -> let (tokenlist_stmt, stmt) = parseStmt tokenlist in
let (tokenlist_stmt_list, stmt_list) = parseStmtList tokenlist_stmt in
(tokenlist_stmt_list, Ast.StmtList (stmt, stmt_lst))
然后在 parseStmt
你有:
let (tokenlist_stmtlist, stmtlist) = parseStmtList tokenlist_leftbrace in
begin
match tokenlist_expr.head with
| Lexer.RightBrace -> (next tokenlist_stmtlist, Ast.Block(stmtlist))
现在删除 Ast.stmtlist
后,您需要更改的只是您实际使用其构造函数的部分,并将这些部分替换为列表构造函数(::
和 []
)。所以 parseStmt
中的代码将保持完全不变, parseStmtList
中唯一的变化应该是替换行
| Lexer.RightBrace -> (tokenlist, Ast.StmtlistNil )
与
| Lexer.RightBrace -> (tokenlist, [] )
和行
(tokenlist_stmt_list, Ast.StmtList (stmt, stmt_lst))
与
(tokenlist_stmt_list, stmt :: stmt_lst)
如果您的旧代码看起来与我上面提出的不同,您可能需要更改不同的行,但想法保持不变:将 Ast.StmtList
替换为 ::
和 Ast.StmtListNil
与 []
.
就是这样。这就是所有必要的改变。你太复杂了。