从 ArrayType() 和 StructType() 创建 MapType

Creating a MapType from ArrayType() and StructType()

我有一个 JSON 看起来像这样:

 "mapping_field" : {
        "values" : {
            "key1" : {
                "id" : "key1", 
                "field1" : "value1", 
                "field2" : "value2", 
            }, 
            "key2" : {
                "id" : "key2", 
                "field1" : "value3", 
                "field2" : "value4", 
            }
        }, 
        "keys" : [
            "key1", 
            "key2"
        ]
}

我正在尝试将此结构映射到 Spark 模式。我已经创建了以下内容;但是它不起作用。我还尝试删除值字段映射中的 ArrayType

StructType("mapping_field",
    MapType(
        StructField("keys", ArrayType(StringType())),
        StructField("values", ArrayType(StructType([
            StructField("id",StringType()),
            StructField("field1",StringType()),
            StructField("field2",StringType())
        ])))
    )
)

此外,请注意“key1”和“key2”是将使用唯一标识符生成的动态字段。也可以有两个以上的密钥。 有人能够将 ArrayType 映射到 StructType 吗?

提供的结构类型 JSON:

import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.{ArrayType, MapType, StructField, StructType, StringType}

val json = """ {
    "mapping_field" : {
            "values" : {
                "key1" : {
                    "id" : "key1",
                    "field1" : "value1",
                    "field2" : "value2"
                },
                "key2" : {
                    "id" : "key2",
                    "field1" : "value3",
                    "field2" : "value4"
                }
            },
            "keys" : [
                "key1",
                "key2"
            ]
    }
  }
  """


val struct = StructType(
  StructField("mapping_field", StructType(
    StructType(
      StructField("values", MapType(StringType, StructType(
        StructField("id", StringType, false) ::
        StructField("field1", StringType, false) ::
        StructField("field2", StringType, false) :: Nil)
      ), false) ::
      StructField("keys", ArrayType(StringType), false) :: Nil)
  ), false) :: Nil)

import spark.implicits._
val df = List(json)
    .toDF("json_col")
    .select(from_json($"json_col", struct))