为什么这组解析器组合器会溢出堆栈?
Why is this set of parser combinators overflowing the stack?
TL;DR
我的 EBNF 语法规范的解析器组合器溢出了堆栈。为什么?我该如何解决?
背景
我正在尝试通过 scala 库中的组合器为 EBNF syntax 定义解析器。实际上,代码构建了语法的 AST,但我已经删除了这些位并内联了一个实用方法以生成 MVCE(如下)。
问题
所写的代码在 运行 时出现堆栈溢出(也在下方)。我无法理解的是它似乎在解析的跳过空白部分溢出了。我该如何解决这个错误?如果无法解析 EBNF 语法,那将是非常不幸的——我打算为此开发一些工具。
MVCE
package org.benknoble.ebnf
import scala.util.parsing.combinator._
class EbnfParserSimple extends RegexParsers {
def nonterminal: Parser[String] = """<[^>]+>""".r
def goesTo: Parser[String] = """::="""
def epsilon: Parser[String] = "ε"
def terminal: Parser[String] = "\"[^\"]+\"".r
def sequence: Parser[String] =
exp ~ exp ^^ { case left ~ right => left + right }
def alternation: Parser[String] =
exp ~ "|" ~ exp ^^ { case left ~ _ ~ right => left + "|" + right }
def opt: Parser[String] =
"[" ~ exp ~ "]" ^^ { case lb ~ e ~ rb => lb + e + rb }
def repetition: Parser[String] =
"{" ~ exp ~ "}" ^^ { case lb ~ e ~ rb => lb + e + rb }
def group: Parser[String] =
"(" ~ exp ~ ")" ^^ { case lb ~ e ~ rb => lb + e + rb }
def exp: Parser[String] =
(epsilon
| terminal
| nonterminal
| sequence
| alternation
| opt
| repetition
| group)
def rule: Parser[String] = nonterminal ~ goesTo ~ exp ~ ";" ^^ {
case nt ~ delim ~ e ~ semi => nt + delim + e + semi
}
def join[A](sep: String, list: Seq[A]): String = list match {
case h :: t => h.toString() + t.foldLeft("")(_.toString() + sep + _.toString())
case Nil => ""
}
def root: Parser[String] = phrase(
rep(rule) ^^ {
case rules =>
val joined = join(" ;\n", rules)
if (joined.isEmpty)
joined
else
joined + " ;"
}
)
}
object Main extends App {
val parser = new EbnfParserSimple()
val grammar = """<A> ::= ["a"|ε]"c" ; """
// <B> ::= <A>"b" ;
// <C> ::= {<B>}"$" ;
// <D> ::= "a""b""d" ;
// <E> ::= ("a"|"b")"c" ;
// """
val rule = parser.root
println(parser.parse(rule, grammar))
}
错误跟踪
可以找到完整日志 as a Gist。
[info] Loading project definition from /Users/Knoble/loner/project
[info] Loading settings for project root from build.sbt ...
[info] Set current project to loner (in build file:/Users/Knoble/loner/)
[info] Set current project to ebnf (in build file:/Users/Knoble/loner/)
[info] Running org.benknoble.ebnf.Main
[error] (run-main-0) java.lang.WhosebugError
[error] java.lang.WhosebugError
[error] at java.util.regex.Pattern.compile(Pattern.java:1673)
[error] at java.util.regex.Pattern.<init>(Pattern.java:1351)
[error] at java.util.regex.Pattern.compile(Pattern.java:1028)
[error] at scala.util.matching.Regex.<init>(Regex.scala:226)
[error] at scala.collection.immutable.StringLike.r(StringLike.scala:284)
[error] at scala.collection.immutable.StringLike.r$(StringLike.scala:284)
[error] at scala.collection.immutable.StringOps.r(StringOps.scala:33)
[error] at scala.collection.immutable.StringLike.r(StringLike.scala:273)
[error] at scala.collection.immutable.StringLike.r$(StringLike.scala:273)
[error] at scala.collection.immutable.StringOps.r(StringOps.scala:33)
[error] at org.benknoble.ebnf.EbnfParserSimple.terminal(EbnfParser_strings.scala:13)
[error] at org.benknoble.ebnf.EbnfParserSimple.$anonfun$exp(EbnfParser_strings.scala:32)
[error] at scala.util.parsing.combinator.Parsers$Parser.p$lzycompute(Parsers.scala:253)
[error] at scala.util.parsing.combinator.Parsers$Parser.p(Parsers.scala:253)
[error] at scala.util.parsing.combinator.Parsers$Parser.$anonfun$append(Parsers.scala:254)
[error] at scala.util.parsing.combinator.Parsers$Failure.append(Parsers.scala:202)
[error] at scala.util.parsing.combinator.Parsers$Parser.$anonfun$append(Parsers.scala:254)
[error] at scala.util.parsing.combinator.Parsers$$anon.apply(Parsers.scala:222)
[error] at scala.util.parsing.combinator.Parsers$Parser.$anonfun$append(Parsers.scala:254)
[...]
[error] at scala.util.parsing.combinator.Parsers$$anon.apply(Parsers.scala:222)
[error] at scala.util.parsing.combinator.Parsers$Parser.$anonfun$append(Parsers.scala:254)
[error] at scala.util.parsing.combinator.Parsers$$anon.apply(Parsers.scala:222)
[error] at scala.util.parsing.combinator.Parsers$Parser.$anonfun$append(Parsers.scala:254)
[error] at scala.util.parsing.combinator.Parsers$$anon.apply(Parsers.scala:222)
[error] at scala.util.parsing.combinator.Parsers$Parser.$anonfun$append(Parsers.scala:254)
[error] at scala.util.parsing.combinator.Parsers$$anon.apply(Parsers.scala:222)
[error] at scala.util.parsing.combinator.Parsers$Parser.$anonfun$append(Parsers.scala:254)
[error] at scala.util.parsing.combinator.Parsers$$anon.apply(Parsers.scala:222)
[error] at scala.util.parsing.combinator.Parsers$Parser.$anonfun$flatMap(Parsers.scala:239)
[error] at scala.util.parsing.combinator.Parsers$$anon.apply(Parsers.scala:222)
[error] Nonzero exit code: 1
[error] (Compile / run) Nonzero exit code: 1
[error] Total time: 1 s, completed 12 mars 2019 21:11:15
我最终能够通过 删除左递归来解决我的问题。下面找到工作代码。
我不得不仔细考虑这些转换:特别是 alternation.+ ^^ { _.reduce(_ + _) }
和 sequence.+ ^^ { _.reduce(_ + _) }
——将它们转换回 AST 生成器可能并不简单(因为它们的构造函数只需要左和正确的)。重复也让我有点困扰,但在不提取辅助函数的情况下,这是唯一要做的事情。
package org.benknoble.ebnf
import scala.util.parsing.combinator._
class EbnfParserSimple extends RegexParsers {
def epsilon: Parser[String] = "ε"
def terminal: Parser[String] = "\"[^\"]+\"".r
def nonterminal: Parser[String] = """<[^>]+>""".r
def opt: Parser[String] =
"[" ~ exp ~ "]" ^^ { case lb ~ e ~ rb => lb + e + rb }
def repetition: Parser[String] =
"{" ~ exp ~ "}" ^^ { case lb ~ e ~ rb => lb + e + rb }
def group: Parser[String] =
"(" ~ exp ~ ")" ^^ { case lb ~ e ~ rb => lb + e + rb }
def alternation: Parser[String] =
chainl1(epsilon
| terminal
| nonterminal
| opt
| repetition
| group,
"|" ^^^ { (lb: String, rb: String) => lb + "|" + rb })
// exp ~ "|" ~ exp ^^ { case left ~ _ ~ right => left + "|" + right }
def sequence: Parser[String] =
alternation.+ ^^ { _.reduce(_ + _) }
// alternation ~ alternation ^^ { case lb ~ rb => lb + rb }
// exp ~ exp ^^ { case left ~ right => left + right }
def exp: Parser[String] =
sequence.+ ^^ { _.reduce(_ + _) }
def goesTo: Parser[String] = """::="""
def rule: Parser[String] = nonterminal ~ goesTo ~ exp ~ ";" ^^ {
case nt ~ delim ~ e ~ semi => nt + delim + e + semi
}
def join[A](sep: String, list: Seq[A]): String = list match {
case h :: t => h.toString() + t.foldLeft("")(_.toString() + sep + _.toString())
case Nil => ""
}
def root: Parser[String] = phrase(
rep(rule) ^^ {
case rules =>
val joined = join(" ;\n", rules)
if (joined.isEmpty)
joined
else
joined + " ;"
}
)
}
object Main extends App {
val parser = new EbnfParserSimple()
val grammar = """<A> ::= ["a"|ε]"c" ; """
// <B> ::= <A>"b" ;
// <C> ::= {<B>}"$" ;
// <D> ::= "a""b""d" ;
// <E> ::= ("a"|"b")"c" ;
// """
val rule = parser.root
println(parser.parse(rule, grammar))
}
TL;DR
我的 EBNF 语法规范的解析器组合器溢出了堆栈。为什么?我该如何解决?
背景
我正在尝试通过 scala 库中的组合器为 EBNF syntax 定义解析器。实际上,代码构建了语法的 AST,但我已经删除了这些位并内联了一个实用方法以生成 MVCE(如下)。
问题
所写的代码在 运行 时出现堆栈溢出(也在下方)。我无法理解的是它似乎在解析的跳过空白部分溢出了。我该如何解决这个错误?如果无法解析 EBNF 语法,那将是非常不幸的——我打算为此开发一些工具。
MVCE
package org.benknoble.ebnf
import scala.util.parsing.combinator._
class EbnfParserSimple extends RegexParsers {
def nonterminal: Parser[String] = """<[^>]+>""".r
def goesTo: Parser[String] = """::="""
def epsilon: Parser[String] = "ε"
def terminal: Parser[String] = "\"[^\"]+\"".r
def sequence: Parser[String] =
exp ~ exp ^^ { case left ~ right => left + right }
def alternation: Parser[String] =
exp ~ "|" ~ exp ^^ { case left ~ _ ~ right => left + "|" + right }
def opt: Parser[String] =
"[" ~ exp ~ "]" ^^ { case lb ~ e ~ rb => lb + e + rb }
def repetition: Parser[String] =
"{" ~ exp ~ "}" ^^ { case lb ~ e ~ rb => lb + e + rb }
def group: Parser[String] =
"(" ~ exp ~ ")" ^^ { case lb ~ e ~ rb => lb + e + rb }
def exp: Parser[String] =
(epsilon
| terminal
| nonterminal
| sequence
| alternation
| opt
| repetition
| group)
def rule: Parser[String] = nonterminal ~ goesTo ~ exp ~ ";" ^^ {
case nt ~ delim ~ e ~ semi => nt + delim + e + semi
}
def join[A](sep: String, list: Seq[A]): String = list match {
case h :: t => h.toString() + t.foldLeft("")(_.toString() + sep + _.toString())
case Nil => ""
}
def root: Parser[String] = phrase(
rep(rule) ^^ {
case rules =>
val joined = join(" ;\n", rules)
if (joined.isEmpty)
joined
else
joined + " ;"
}
)
}
object Main extends App {
val parser = new EbnfParserSimple()
val grammar = """<A> ::= ["a"|ε]"c" ; """
// <B> ::= <A>"b" ;
// <C> ::= {<B>}"$" ;
// <D> ::= "a""b""d" ;
// <E> ::= ("a"|"b")"c" ;
// """
val rule = parser.root
println(parser.parse(rule, grammar))
}
错误跟踪
可以找到完整日志 as a Gist。
[info] Loading project definition from /Users/Knoble/loner/project
[info] Loading settings for project root from build.sbt ...
[info] Set current project to loner (in build file:/Users/Knoble/loner/)
[info] Set current project to ebnf (in build file:/Users/Knoble/loner/)
[info] Running org.benknoble.ebnf.Main
[error] (run-main-0) java.lang.WhosebugError
[error] java.lang.WhosebugError
[error] at java.util.regex.Pattern.compile(Pattern.java:1673)
[error] at java.util.regex.Pattern.<init>(Pattern.java:1351)
[error] at java.util.regex.Pattern.compile(Pattern.java:1028)
[error] at scala.util.matching.Regex.<init>(Regex.scala:226)
[error] at scala.collection.immutable.StringLike.r(StringLike.scala:284)
[error] at scala.collection.immutable.StringLike.r$(StringLike.scala:284)
[error] at scala.collection.immutable.StringOps.r(StringOps.scala:33)
[error] at scala.collection.immutable.StringLike.r(StringLike.scala:273)
[error] at scala.collection.immutable.StringLike.r$(StringLike.scala:273)
[error] at scala.collection.immutable.StringOps.r(StringOps.scala:33)
[error] at org.benknoble.ebnf.EbnfParserSimple.terminal(EbnfParser_strings.scala:13)
[error] at org.benknoble.ebnf.EbnfParserSimple.$anonfun$exp(EbnfParser_strings.scala:32)
[error] at scala.util.parsing.combinator.Parsers$Parser.p$lzycompute(Parsers.scala:253)
[error] at scala.util.parsing.combinator.Parsers$Parser.p(Parsers.scala:253)
[error] at scala.util.parsing.combinator.Parsers$Parser.$anonfun$append(Parsers.scala:254)
[error] at scala.util.parsing.combinator.Parsers$Failure.append(Parsers.scala:202)
[error] at scala.util.parsing.combinator.Parsers$Parser.$anonfun$append(Parsers.scala:254)
[error] at scala.util.parsing.combinator.Parsers$$anon.apply(Parsers.scala:222)
[error] at scala.util.parsing.combinator.Parsers$Parser.$anonfun$append(Parsers.scala:254)
[...]
[error] at scala.util.parsing.combinator.Parsers$$anon.apply(Parsers.scala:222)
[error] at scala.util.parsing.combinator.Parsers$Parser.$anonfun$append(Parsers.scala:254)
[error] at scala.util.parsing.combinator.Parsers$$anon.apply(Parsers.scala:222)
[error] at scala.util.parsing.combinator.Parsers$Parser.$anonfun$append(Parsers.scala:254)
[error] at scala.util.parsing.combinator.Parsers$$anon.apply(Parsers.scala:222)
[error] at scala.util.parsing.combinator.Parsers$Parser.$anonfun$append(Parsers.scala:254)
[error] at scala.util.parsing.combinator.Parsers$$anon.apply(Parsers.scala:222)
[error] at scala.util.parsing.combinator.Parsers$Parser.$anonfun$append(Parsers.scala:254)
[error] at scala.util.parsing.combinator.Parsers$$anon.apply(Parsers.scala:222)
[error] at scala.util.parsing.combinator.Parsers$Parser.$anonfun$flatMap(Parsers.scala:239)
[error] at scala.util.parsing.combinator.Parsers$$anon.apply(Parsers.scala:222)
[error] Nonzero exit code: 1
[error] (Compile / run) Nonzero exit code: 1
[error] Total time: 1 s, completed 12 mars 2019 21:11:15
我最终能够通过
我不得不仔细考虑这些转换:特别是 alternation.+ ^^ { _.reduce(_ + _) }
和 sequence.+ ^^ { _.reduce(_ + _) }
——将它们转换回 AST 生成器可能并不简单(因为它们的构造函数只需要左和正确的)。重复也让我有点困扰,但在不提取辅助函数的情况下,这是唯一要做的事情。
package org.benknoble.ebnf
import scala.util.parsing.combinator._
class EbnfParserSimple extends RegexParsers {
def epsilon: Parser[String] = "ε"
def terminal: Parser[String] = "\"[^\"]+\"".r
def nonterminal: Parser[String] = """<[^>]+>""".r
def opt: Parser[String] =
"[" ~ exp ~ "]" ^^ { case lb ~ e ~ rb => lb + e + rb }
def repetition: Parser[String] =
"{" ~ exp ~ "}" ^^ { case lb ~ e ~ rb => lb + e + rb }
def group: Parser[String] =
"(" ~ exp ~ ")" ^^ { case lb ~ e ~ rb => lb + e + rb }
def alternation: Parser[String] =
chainl1(epsilon
| terminal
| nonterminal
| opt
| repetition
| group,
"|" ^^^ { (lb: String, rb: String) => lb + "|" + rb })
// exp ~ "|" ~ exp ^^ { case left ~ _ ~ right => left + "|" + right }
def sequence: Parser[String] =
alternation.+ ^^ { _.reduce(_ + _) }
// alternation ~ alternation ^^ { case lb ~ rb => lb + rb }
// exp ~ exp ^^ { case left ~ right => left + right }
def exp: Parser[String] =
sequence.+ ^^ { _.reduce(_ + _) }
def goesTo: Parser[String] = """::="""
def rule: Parser[String] = nonterminal ~ goesTo ~ exp ~ ";" ^^ {
case nt ~ delim ~ e ~ semi => nt + delim + e + semi
}
def join[A](sep: String, list: Seq[A]): String = list match {
case h :: t => h.toString() + t.foldLeft("")(_.toString() + sep + _.toString())
case Nil => ""
}
def root: Parser[String] = phrase(
rep(rule) ^^ {
case rules =>
val joined = join(" ;\n", rules)
if (joined.isEmpty)
joined
else
joined + " ;"
}
)
}
object Main extends App {
val parser = new EbnfParserSimple()
val grammar = """<A> ::= ["a"|ε]"c" ; """
// <B> ::= <A>"b" ;
// <C> ::= {<B>}"$" ;
// <D> ::= "a""b""d" ;
// <E> ::= ("a"|"b")"c" ;
// """
val rule = parser.root
println(parser.parse(rule, grammar))
}