在 Apache Calcite 中将 SQL 带有聚合函数的查询转换为关系代数表达式 - 未找到函数签名的匹配项
Converting SQL Query with Aggregate Function to Relational Algebra Expression in Apache Calcite - No match found for function signature
我正在尝试使用 Apache Calcite SqlToRelConverter
.
将 SQL 查询转换为关系代数表达式
对于这个查询它工作正常(引号是为了确保小写):
queryToRelationalAlgebraRoot("SELECT \"country\" FROM \"mytable\"")
但是这个查询失败了:
queryToRelationalAlgebraRoot("SELECT \"country\", SUM(\"salary\") FROM \"mytable\" GROUP BY \"country\"")
出现此错误:
org.apache.calcite.sql.validate.SqlValidatorException: No match found for function signature SUM(<NUMERIC>)
SQL 验证器似乎没有注册总和或计数等聚合函数。
case class Income(id: Int, salary: Double, country: String)
class SparkDataFrameTable(df: DataFrame) extends AbstractTable {
def getRowType(typeFactory: RelDataTypeFactory): RelDataType = {
val typeList = df.schema.fields.map {
field => field.dataType match {
case t: StringType => typeFactory.createSqlType(SqlTypeName.VARCHAR)
case t: IntegerType => typeFactory.createSqlType(SqlTypeName.INTEGER)
case t: DoubleType => typeFactory.createSqlType(SqlTypeName.DOUBLE)
}
}.toList.asJava
val fieldNameList = df.schema.fieldNames.toList.asJava
typeFactory.createStructType(typeList, fieldNameList)
}
}
object RelationalAlgebra {
def queryToRelationalAlgebraRoot(query: String): RelRoot = {
val sqlParser = SqlParser.create(query)
val sqlParseTree = sqlParser.parseQuery()
val frameworkConfig = Frameworks.newConfigBuilder().build()
val planner = new PlannerImpl(frameworkConfig)
val rootSchema = CalciteSchema.createRootSchema(true, true)
// some sample data for testing
val inc1 = new Income(1, 100000, "USA")
val inc2 = new Income(2, 110000, "USA")
val inc3 = new Income(3, 80000, "Canada")
val spark = SparkSession.builder().master("local").getOrCreate()
import spark.implicits._
val df = Seq(inc1, inc2, inc3).toDF()
rootSchema.add("mytable", new SparkDataFrameTable(df))
val defaultSchema = List[String]().asJava
val calciteConnectionConfigProperties = new Properties()
val calciteConnectionConfigImpl = new CalciteConnectionConfigImpl(calciteConnectionConfigProperties)
val sqlTypeFactoryImpl = new SqlTypeFactoryImpl(RelDataTypeSystem.DEFAULT)
val calciteCatelogReader = new CalciteCatalogReader(rootSchema, defaultSchema, sqlTypeFactoryImpl, calciteConnectionConfigImpl)
val defaultValidator = SqlValidatorUtil.newValidator(new SqlStdOperatorTable(), calciteCatelogReader, sqlTypeFactoryImpl, SqlConformanceEnum.LENIENT)
val relExpressionOptimizationCluster = RelOptCluster.create(new VolcanoPlanner(), new RexBuilder(sqlTypeFactoryImpl))
val sqlToRelConfig = SqlToRelConverter.configBuilder().build()
val sqlToRelConverter = new SqlToRelConverter(planner, defaultValidator, calciteCatelogReader, relExpressionOptimizationCluster, StandardConvertletTable.INSTANCE, sqlToRelConfig)
sqlToRelConverter.convertQuery(sqlParseTree, true, true)
}
}
我不熟悉 api 但您的 SQL 需要按国家分组。如果一个工具要获取此输出并使用它,它可能会要求您也使用别名命名该列。
代码的问题是 new SqlStdOperatorTable()
创建了一个未初始化的验证器。使用 SqlStdOperatorTable
的正确方法是使用 SqlStdOperatorTable.instance()
.
我在向 dev@calcite.apache.org 邮件列表发送电子邮件后找到了解决方案。非常感谢 Yuzhao Chen 调查了我的问题并指出了我的代码的问题。
我正在尝试使用 Apache Calcite SqlToRelConverter
.
对于这个查询它工作正常(引号是为了确保小写):
queryToRelationalAlgebraRoot("SELECT \"country\" FROM \"mytable\"")
但是这个查询失败了:
queryToRelationalAlgebraRoot("SELECT \"country\", SUM(\"salary\") FROM \"mytable\" GROUP BY \"country\"")
出现此错误:
org.apache.calcite.sql.validate.SqlValidatorException: No match found for function signature SUM(<NUMERIC>)
SQL 验证器似乎没有注册总和或计数等聚合函数。
case class Income(id: Int, salary: Double, country: String)
class SparkDataFrameTable(df: DataFrame) extends AbstractTable {
def getRowType(typeFactory: RelDataTypeFactory): RelDataType = {
val typeList = df.schema.fields.map {
field => field.dataType match {
case t: StringType => typeFactory.createSqlType(SqlTypeName.VARCHAR)
case t: IntegerType => typeFactory.createSqlType(SqlTypeName.INTEGER)
case t: DoubleType => typeFactory.createSqlType(SqlTypeName.DOUBLE)
}
}.toList.asJava
val fieldNameList = df.schema.fieldNames.toList.asJava
typeFactory.createStructType(typeList, fieldNameList)
}
}
object RelationalAlgebra {
def queryToRelationalAlgebraRoot(query: String): RelRoot = {
val sqlParser = SqlParser.create(query)
val sqlParseTree = sqlParser.parseQuery()
val frameworkConfig = Frameworks.newConfigBuilder().build()
val planner = new PlannerImpl(frameworkConfig)
val rootSchema = CalciteSchema.createRootSchema(true, true)
// some sample data for testing
val inc1 = new Income(1, 100000, "USA")
val inc2 = new Income(2, 110000, "USA")
val inc3 = new Income(3, 80000, "Canada")
val spark = SparkSession.builder().master("local").getOrCreate()
import spark.implicits._
val df = Seq(inc1, inc2, inc3).toDF()
rootSchema.add("mytable", new SparkDataFrameTable(df))
val defaultSchema = List[String]().asJava
val calciteConnectionConfigProperties = new Properties()
val calciteConnectionConfigImpl = new CalciteConnectionConfigImpl(calciteConnectionConfigProperties)
val sqlTypeFactoryImpl = new SqlTypeFactoryImpl(RelDataTypeSystem.DEFAULT)
val calciteCatelogReader = new CalciteCatalogReader(rootSchema, defaultSchema, sqlTypeFactoryImpl, calciteConnectionConfigImpl)
val defaultValidator = SqlValidatorUtil.newValidator(new SqlStdOperatorTable(), calciteCatelogReader, sqlTypeFactoryImpl, SqlConformanceEnum.LENIENT)
val relExpressionOptimizationCluster = RelOptCluster.create(new VolcanoPlanner(), new RexBuilder(sqlTypeFactoryImpl))
val sqlToRelConfig = SqlToRelConverter.configBuilder().build()
val sqlToRelConverter = new SqlToRelConverter(planner, defaultValidator, calciteCatelogReader, relExpressionOptimizationCluster, StandardConvertletTable.INSTANCE, sqlToRelConfig)
sqlToRelConverter.convertQuery(sqlParseTree, true, true)
}
}
我不熟悉 api 但您的 SQL 需要按国家分组。如果一个工具要获取此输出并使用它,它可能会要求您也使用别名命名该列。
代码的问题是 new SqlStdOperatorTable()
创建了一个未初始化的验证器。使用 SqlStdOperatorTable
的正确方法是使用 SqlStdOperatorTable.instance()
.
我在向 dev@calcite.apache.org 邮件列表发送电子邮件后找到了解决方案。非常感谢 Yuzhao Chen 调查了我的问题并指出了我的代码的问题。