获取在解析器内部引发解析错误的输入字符串
Get the input string that raises parsing error inside the parser
我有一个用 menhir
编写的前端,它试图解析一个表达式:从字符串到表达式 AST。前端 Parser_e.main
的入口点在我的 OCaml 代码中的几个不同位置被调用。所以我希望能够在前端而不是外部捕获可能的错误。捕获错误时,我想显示的一个特别重要的信息是前端无法解析的整个 输入字符串 。 (词法分析器的错误很少见,因为前端几乎可以读取所有内容)。
所以我尝试按照this thread,当出现错误时打印更多信息。在parser_e.mly
中,我添加了
exception LexErr of string
exception ParseErr of string
let error msg start finish =
Printf.sprintf "(line %d: char %d..%d): %s" start.pos_lnum
(start.pos_cnum - start.pos_bol) (finish.pos_cnum - finish.pos_bol) msg
let parse_error msg nterm =
raise (ParseErr (error msg (rhs_start_pos nterm) (rhs_end_pos nterm)))
e_expression:
/* empty */ { EE_empty }
| INTEGER { EE_integer }
| DOUBLE { EE_double }
...
| error { parse_error "e_expression" 1; ERR "" }
但它仍然没有输入字符串作为信息。如果我缺少任何功能,有人能得到吗?
在错误的上下文中,您可以使用 Parsing.symbol_start_pos
和 Parsing.symbol_end_pos
函数以两个位置的格式提取失败的词位位置。不幸的是 Parsing
模块并没有真正提供对词素作为字符串的访问,但是如果输入存储在文件中,那么可以手动提取它或以编译器样式打印错误,下降 IDE 会理解并手动突出显示它。下面是一个模块Parser_error
。它定义了将引发 Parser_error.T
异常的函数 Parser_error.throw
。该异常带有一条诊断消息和一个失败的词位的位置。提供了几个方便的函数来从文件中提取这个词位,或者生成文件位置消息。如果您的输入未存储在文件中,则可以使用 string_of_exn
函数接受字符串形式的输入和 Parser_error.T
异常,并从中提取有问题的子字符串。这是使用此异常进行错误报告的解析器的 example。
open Lexing
(** T(message,start,finish) parser failed with a [message] on an
input specified by [start] and [finish] position.*)
exception T of (string * position * position)
(** [throw msg] raise a [Parser_error.T] exception with corresponding
message. Must be called in a semantic action of a production rule *)
let throw my_unique_msg =
let check_pos f = try f () with _ -> dummy_pos in
Printexc.(print_raw_backtrace stderr (get_raw_backtrace ()));
let sp = check_pos Parsing.symbol_start_pos in
let ep = check_pos Parsing.symbol_end_pos in
raise (T (my_unique_msg,sp,ep))
(** [fileposition start finish] creates a string describing a position
of an lexeme specified by [start] and [finish] file positions. The
message has the same format as OCaml and GNU compilers, so it is
recognized by most IDE, e.g., Emacs. *)
let fileposition err_s err_e =
Printf.sprintf
"\nFile \"%s\", line %d, at character %d-%d\n"
err_s.pos_fname err_s.pos_lnum err_s.pos_cnum err_e.pos_cnum
(** [string_of_exn line exn] given a [line] in a file, extract a failed
lexeme form the exception [exn] and create a string denoting the
parsing error in a format similar to the format used by OCaml
compiler, i.e., with fancy underlying. *)
let string_of_exn line (msg,err_s,err_e) =
let b = Buffer.create 42 in
if err_s.pos_fname <> "" then
Buffer.add_string b (fileposition err_s err_e);
Buffer.add_string b
(Printf.sprintf "Parse error: %s\n%s\n" msg line);
let start = max 0 (err_s.pos_cnum - err_s.pos_bol) in
for i=1 to start do
Buffer.add_char b ' '
done;
let diff = max 1 (err_e.pos_cnum - err_s.pos_cnum) in
for i=1 to diff do
Buffer.add_char b '^'
done;
Buffer.contents b
(** [extract_line err] a helper function that will extract a line from
a file designated by the parsing error exception *)
let extract_line err =
let line = ref "" in
try
let ic = open_in err.pos_fname in
for i=0 to max 0 (err.pos_lnum - 1) do
line := input_line ic
done;
close_in ic;
!line
with exn -> !line
(** [to_string exn] converts an exception to a string *)
let to_string ((msg,err,_) as exn) =
let line = extract_line err in
string_of_exn line exn
这是一个示例,展示了在没有文件的情况下如何使用,并且输入来自流或交互式(shell-like)来源:
let parse_command line =
try
let lbuf = Lexing.from_string line in
`Ok Parser.statement Lexer.tokens lbuf
with
| Parsing.Parse_error -> `Fail "Parse error"
| Parser_error.T exn -> `Fail (Parser_error.string_of_exn line exn)
我有一个用 menhir
编写的前端,它试图解析一个表达式:从字符串到表达式 AST。前端 Parser_e.main
的入口点在我的 OCaml 代码中的几个不同位置被调用。所以我希望能够在前端而不是外部捕获可能的错误。捕获错误时,我想显示的一个特别重要的信息是前端无法解析的整个 输入字符串 。 (词法分析器的错误很少见,因为前端几乎可以读取所有内容)。
所以我尝试按照this thread,当出现错误时打印更多信息。在parser_e.mly
中,我添加了
exception LexErr of string
exception ParseErr of string
let error msg start finish =
Printf.sprintf "(line %d: char %d..%d): %s" start.pos_lnum
(start.pos_cnum - start.pos_bol) (finish.pos_cnum - finish.pos_bol) msg
let parse_error msg nterm =
raise (ParseErr (error msg (rhs_start_pos nterm) (rhs_end_pos nterm)))
e_expression:
/* empty */ { EE_empty }
| INTEGER { EE_integer }
| DOUBLE { EE_double }
...
| error { parse_error "e_expression" 1; ERR "" }
但它仍然没有输入字符串作为信息。如果我缺少任何功能,有人能得到吗?
在错误的上下文中,您可以使用 Parsing.symbol_start_pos
和 Parsing.symbol_end_pos
函数以两个位置的格式提取失败的词位位置。不幸的是 Parsing
模块并没有真正提供对词素作为字符串的访问,但是如果输入存储在文件中,那么可以手动提取它或以编译器样式打印错误,下降 IDE 会理解并手动突出显示它。下面是一个模块Parser_error
。它定义了将引发 Parser_error.T
异常的函数 Parser_error.throw
。该异常带有一条诊断消息和一个失败的词位的位置。提供了几个方便的函数来从文件中提取这个词位,或者生成文件位置消息。如果您的输入未存储在文件中,则可以使用 string_of_exn
函数接受字符串形式的输入和 Parser_error.T
异常,并从中提取有问题的子字符串。这是使用此异常进行错误报告的解析器的 example。
open Lexing
(** T(message,start,finish) parser failed with a [message] on an
input specified by [start] and [finish] position.*)
exception T of (string * position * position)
(** [throw msg] raise a [Parser_error.T] exception with corresponding
message. Must be called in a semantic action of a production rule *)
let throw my_unique_msg =
let check_pos f = try f () with _ -> dummy_pos in
Printexc.(print_raw_backtrace stderr (get_raw_backtrace ()));
let sp = check_pos Parsing.symbol_start_pos in
let ep = check_pos Parsing.symbol_end_pos in
raise (T (my_unique_msg,sp,ep))
(** [fileposition start finish] creates a string describing a position
of an lexeme specified by [start] and [finish] file positions. The
message has the same format as OCaml and GNU compilers, so it is
recognized by most IDE, e.g., Emacs. *)
let fileposition err_s err_e =
Printf.sprintf
"\nFile \"%s\", line %d, at character %d-%d\n"
err_s.pos_fname err_s.pos_lnum err_s.pos_cnum err_e.pos_cnum
(** [string_of_exn line exn] given a [line] in a file, extract a failed
lexeme form the exception [exn] and create a string denoting the
parsing error in a format similar to the format used by OCaml
compiler, i.e., with fancy underlying. *)
let string_of_exn line (msg,err_s,err_e) =
let b = Buffer.create 42 in
if err_s.pos_fname <> "" then
Buffer.add_string b (fileposition err_s err_e);
Buffer.add_string b
(Printf.sprintf "Parse error: %s\n%s\n" msg line);
let start = max 0 (err_s.pos_cnum - err_s.pos_bol) in
for i=1 to start do
Buffer.add_char b ' '
done;
let diff = max 1 (err_e.pos_cnum - err_s.pos_cnum) in
for i=1 to diff do
Buffer.add_char b '^'
done;
Buffer.contents b
(** [extract_line err] a helper function that will extract a line from
a file designated by the parsing error exception *)
let extract_line err =
let line = ref "" in
try
let ic = open_in err.pos_fname in
for i=0 to max 0 (err.pos_lnum - 1) do
line := input_line ic
done;
close_in ic;
!line
with exn -> !line
(** [to_string exn] converts an exception to a string *)
let to_string ((msg,err,_) as exn) =
let line = extract_line err in
string_of_exn line exn
这是一个示例,展示了在没有文件的情况下如何使用,并且输入来自流或交互式(shell-like)来源:
let parse_command line =
try
let lbuf = Lexing.from_string line in
`Ok Parser.statement Lexer.tokens lbuf
with
| Parsing.Parse_error -> `Fail "Parse error"
| Parser_error.T exn -> `Fail (Parser_error.string_of_exn line exn)