从 orc 文件中获取 table DDL 的最简单方法是什么?

Whats the easiest way to get a table DDL from an orc file?

使用 spark 我可以做的例子:

spark.read.orc("/path/to/file").printSchema

但我想在 hive 中得到类似于 show create table 的输出。可能吗?

这应该可以处理大多数情况(如果需要,可以根据您的具体情况进行调整):

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.{ArrayType, BooleanType, DoubleType, IntegerType, LongType, StringType, StructField}

object Main {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().master("local[*]").getOrCreate()
    val types = spark.read.orc("/path/to/orc/orc_file.orc").schema
    println("CREATE EXTERNAL TABLE name (")
    types.foreach {
      //case (name, typ) => println("    " + name + " " + getType(typ))
      case StructField(name, dataType, nullable, metadata) =>
        println("  " + name.toLowerCase + " " + getType(dataType) + ",")
    }
    println(")")
  }

  def getType(typ: Any): String = {
    typ match {
      case StringType => "string"
      case IntegerType => "int"
      case DoubleType => "double"
      case LongType => "bigint"
      case BooleanType => "boolean"
      case ArrayType(elementType, containsNull) => "array<" + getType(elementType) + ">"
      case StructField(name, dataType, nullable, metadata) => s"${name.toLowerCase}:${getType(dataType)}"
      case seq: Seq[StructField] => "struct<" + seq.map(e => getType(e)).mkString(",") + ">"
    }
  }
}