如何将 JSON 模式从 Camel 大小写转换为小写

Question

我有一个 JSON 架构，其中键采用驼峰式大小写，我正在尝试将所有数据类型转换为小写。我遇到了 ArrayType.

的问题

 import org.apache.spark.sql.types.{ArrayType, IntegerType, StringType, StructField, StructType}
 import org.apache.spark.sql.types.{DataType, StructType}
 import spark.implicits._

 val spark: SparkSession = SparkSession.builder().enableHiveSupport().getOrCreate()
 var sample_schema = spark.read.json("path").schema

 def columnsToLowercase(schema: StructType): StructType = {
    def recurRename(schema: StructType): Seq[StructField] =
       schema.fields.map {
          case StructField(name, dtype: StructType, nullable, meta) =>
             StructField(name.toLowerCase, StructType(recurRename(dtype)), nullable, meta)                        
          case StructField(name, dtype, nullable, meta) =>
             StructField(name.toLowerCase, dtype, nullable, meta)
       }

    StructType(recurRename(schema))
 }

 val jsonDFrame: DataFrame = spark.read.schema(columnsToLowercase(sample_schema)).json("path")

示例架构：

root
 |-- id: string (nullable = true)
 |-- master: struct (nullable = true)
 |    |-- code: string (nullable = true)
 |    |-- provInfo: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- claimInfo: array (nullable = true)
 |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |-- claimId: string (nullable = true)
 |    |    |    |-- demoInfo: struct (nullable = true)
 |    |    |    |    |-- family: struct (nullable = true)
 |    |    |    |    |    |-- outOrder: struct (nullable = true)
 |    |    |    |    |    |    |-- LocOut: boolean (nullable = true)
 |    |    |    |    |    |    |-- found: boolean (nullable = true)
 |    |-- claimAddr: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- address: string (nullable = true)
 |-- system: string (nullable = true)

Answer 1

您应该能够通过添加另一个 case 子句将嵌套在 ArrayType 中的字段小写。对于数组列，还需要检查其子元素类型：

def columnsToLowercase(schema: StructType): StructType = {
     // ....
          case StructField(name, dtype: ArrayType, nullable, meta) => dtype.elementType match {
            case s: StructType => StructField(name.toLowerCase, ArrayType(StructType(recurRename(s)), true), nullable, meta)
            case dt => StructField(name.toLowerCase, dt, nullable, meta)
          }          
    //.... 
}

正在应用您的架构：

df.printSchema
//root
// |-- id: string (nullable = true)
// |-- master: struct (nullable = true)
// |    |-- provInfo: struct (nullable = true)
// |    |    |-- claimInfo: array (nullable = true)
// |    |    |    |-- element: struct (containsNull = true)
// |    |    |    |    |-- claimId: string (nullable = true)
// |    |    |-- demoInfo: struct (nullable = true)
// |    |    |    |-- family: struct (nullable = true)
// |    |    |    |    |-- outOrder: struct (nullable = true)
// |    |    |    |    |    |-- LocOut: boolean (nullable = false)
// |    |    |    |    |    |-- found: boolean (nullable = false)
// |    |-- claimAddr: array (nullable = true)
// |    |    |-- element: struct (containsNull = true)
// |    |    |    |-- address: string (nullable = true)
// |-- system: string (nullable = true)


columnsToLowercase(df.schema).printTreeString()
//root
// |-- id: string (nullable = true)
// |-- master: struct (nullable = true)
// |    |-- provinfo: struct (nullable = true)
// |    |    |-- claiminfo: array (nullable = true)
// |    |    |    |-- element: struct (containsNull = true)
// |    |    |    |    |-- claimid: string (nullable = true)
// |    |    |-- demoinfo: struct (nullable = true)
// |    |    |    |-- family: struct (nullable = true)
// |    |    |    |    |-- outorder: struct (nullable = true)
// |    |    |    |    |    |-- locout: boolean (nullable = false)
// |    |    |    |    |    |-- found: boolean (nullable = false)
// |    |-- claimaddr: array (nullable = true)
// |    |    |-- element: struct (containsNull = true)
// |    |    |    |-- address: string (nullable = true)
// |-- system: string (nullable = true)

如何将 JSON 模式从 Camel 大小写转换为小写

How to convert JSON Schema from Camel case to lower case

scala

dataframe

apache-spark

apache-spark-sql