从 orc 文件中获取 table DDL 的最简单方法是什么?
Whats the easiest way to get a table DDL from an orc file?
使用 spark 我可以做的例子:
spark.read.orc("/path/to/file").printSchema
但我想在 hive 中得到类似于 show create table
的输出。可能吗?
这应该可以处理大多数情况(如果需要,可以根据您的具体情况进行调整):
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.{ArrayType, BooleanType, DoubleType, IntegerType, LongType, StringType, StructField}
object Main {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().master("local[*]").getOrCreate()
val types = spark.read.orc("/path/to/orc/orc_file.orc").schema
println("CREATE EXTERNAL TABLE name (")
types.foreach {
//case (name, typ) => println(" " + name + " " + getType(typ))
case StructField(name, dataType, nullable, metadata) =>
println(" " + name.toLowerCase + " " + getType(dataType) + ",")
}
println(")")
}
def getType(typ: Any): String = {
typ match {
case StringType => "string"
case IntegerType => "int"
case DoubleType => "double"
case LongType => "bigint"
case BooleanType => "boolean"
case ArrayType(elementType, containsNull) => "array<" + getType(elementType) + ">"
case StructField(name, dataType, nullable, metadata) => s"${name.toLowerCase}:${getType(dataType)}"
case seq: Seq[StructField] => "struct<" + seq.map(e => getType(e)).mkString(",") + ">"
}
}
}
使用 spark 我可以做的例子:
spark.read.orc("/path/to/file").printSchema
但我想在 hive 中得到类似于 show create table
的输出。可能吗?
这应该可以处理大多数情况(如果需要,可以根据您的具体情况进行调整):
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.{ArrayType, BooleanType, DoubleType, IntegerType, LongType, StringType, StructField}
object Main {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().master("local[*]").getOrCreate()
val types = spark.read.orc("/path/to/orc/orc_file.orc").schema
println("CREATE EXTERNAL TABLE name (")
types.foreach {
//case (name, typ) => println(" " + name + " " + getType(typ))
case StructField(name, dataType, nullable, metadata) =>
println(" " + name.toLowerCase + " " + getType(dataType) + ",")
}
println(")")
}
def getType(typ: Any): String = {
typ match {
case StringType => "string"
case IntegerType => "int"
case DoubleType => "double"
case LongType => "bigint"
case BooleanType => "boolean"
case ArrayType(elementType, containsNull) => "array<" + getType(elementType) + ">"
case StructField(name, dataType, nullable, metadata) => s"${name.toLowerCase}:${getType(dataType)}"
case seq: Seq[StructField] => "struct<" + seq.map(e => getType(e)).mkString(",") + ">"
}
}
}