在 Scala 中解析案例语句

Parsing case statements in scala

在 scala 中解析 case 语句

CASE WHEN col1 <> 0 AND col2 <> 0 THEN 'COL1 & COL2 IS NOT ZERO' ELSE 'COL1 & COL2 IS ZERO'

这里的挑战是给出 case 语句可以出现的所有场景,例如它可以进入一个函数。另外 case statements/functions 等可以进入另一个必须处理的 case 语句。

这个问题可以用scala parser combinator解决

首先定义映射表达式所需的类

sealed trait Exp {
  def asStr: String

  override def toString: String = asStr
}
case class OperationExp(a: Exp, op: String, b: Exp, c: Option[String]) extends Exp { override def asStr = s"$a $op $b ${c.getOrElse("")}" }
case class CaseConditions(conditionValue: List[(String, String)] , elseValue: String, asAlias: Option[Exp]) extends Exp {
  override def asStr = "CASE " + conditionValue.map(c => s"WHEN ${c._1} THEN ${c._2}").mkString(" ") + s" ELSE ${elseValue} END ${asAlias.getOrElse("")}"
}

现在是解决方案

case class OperationExp(a: Exp, op: String, b: Exp, c: Option[String]) extends Exp { override def asStr = s"$a $op $b ${c.getOrElse("")}" }

case class CaseConditions(conditionValue: List[(String, String)] , elseValue: String, asAlias: Option[Exp]) extends Exp {
  override def asStr = "CASE " + conditionValue.map(c => s"WHEN ${c._1} THEN ${c._2}").mkString(" ") + s" ELSE ${elseValue} END ${asAlias.getOrElse("")}"
}
val identifiers: Parser[String]     = "[a-zA-Z0-9_~\|,'\-\+:.()]+".r
      val operatorTokens: Parser[String]  = "[<>=!]+".r | ("IS NOT" | "IN" | "IS")
  val conditionJoiner: Parser[String] = ( "AND" | "OR" )

  val excludeKeywords = List("CASE","WHEN", "THEN", "ELSE", "END")


  val identifierWithoutCaseKw: Parser[Exp] = Parser(input =>
    identifiers(input).filterWithError(
      !excludeKeywords.contains(_),
      reservedWord => s"$reservedWord encountered",
      input
    )
  ) ^^ StrExp

  val anyStrExp: Parser[Exp]     = "[^()]*".r  ^^ StrExp




  val funcIdentifier: Parser[Exp] = name ~ ("(" ~> (caseConditionExpresionParser | funcIdentifier | anyStrExp) <~ ")") ^^ {case func ~ param => FunCallExp(func, Seq(param))}

  val identifierOrFunctions =  funcIdentifier | identifierWithoutCaseKw

  val conditionParser: Parser[String] =
    identifierOrFunctions ~ operatorTokens ~ identifierOrFunctions ~ opt(conditionJoiner) ^^ {
      case a ~ op ~ b ~ c => s"$a $op $b ${c.getOrElse("")}"
    }

  def caseConditionExpresionParser: Parser[CaseConditions]  = "CASE" ~ rep1("WHEN" ~ rep(conditionParser) ~ "THEN" ~ rep(identifierWithoutCaseKw)) ~ "ELSE" ~ rep(identifierWithoutCaseKw) ~ "END" ~ opt("AS" ~> identifierWithoutCaseKw)^^ {
    case "CASE" ~ conditionValuePair ~ "ELSE" ~ falseValue ~ "END" ~ asName =>
      CaseConditions(
        conditionValuePair.map(cv => (
          cv._1._1._2.mkString(" "),
          parsePipes(cv._2.mkString(" ")).isRight match {
            case true => parsePipes(cv._2.mkString(" ")).right.get
            case _    => cv._2.mkString(" ")
          }
        )),
        parsePipes(falseValue.mkString("")).isRight match {
          case true => parsePipes(falseValue.mkString(" ")).right.get
          case _    => falseValue.mkString("")
        }, asName)
  }
//this parser can be used to get the results
  val caseExpression = caseConditionExpresionParser | funcIdentifier

def parsePipes(input: String): Either[Seq[ParsingError], String] = {
    parse(caseExpression, input) match {
      case Success(parsed, _) => Right(parsed.asStr)
      case Failure(msg, next) => Left(Seq(ParsingError(s"Failed to parse $pipedStr: $msg, next: ${next.source}.")))
      case Error(msg, next)   => Left(Seq(ParsingError(s"Error in $pipedStr parse: $msg, next: ${next.source}.")))
    }
  }