Scala Databricks：读取一个 JSON 文件和 return 两个 DataFrame

Question

我有以下代码：

def reader (path:String):DataFrame={

val path =sc.textFile("/FileStore/tables/Data_exo2.json").reduce((a,b)=>s"$a$b")

val df = Seq((path)).toDF()

val schema_tran = new StructType()
.add("Devis", StringType, false)
.add("IdTransaction",LongType, false)
.add("Pays", StringType, false)
.add("Prix",DoubleType, false)   
.add("TypeProduit", StringType, false)

val schema = new StructType().add("Transaction", ArrayType(schema_tran),true)
                      
val df_66 = df.select(from_json($"value",schema)as "struct")
.select($"struct.*")
.withColumn("Transaction", explode(col("Transaction")))
.select($"Transaction.*")



val schema_devis = new StructType()
                                    
.add("Devis", StringType, false)
.add("Taux",DoubleType, false) 

val schema_1 = new StructType()
.add("Devis", ArrayType(schema_devis),true)

val df_67 = df.select(from_json($"value",schema_1)as "struct")
.select($"struct.*")
.withColumn("Devis", explode(col("Devis")))
.select($"Devis.*")
 *****}

所以我有两个数据框 df_66 和 df_67。基本上，我想为 return 那些数据框定义一个函数，像这样一个一个地定义 reader(path)(0)

Answer 1

这是您要找的吗？

def reader(path: String): Map[Int, DataFrame] = {
    
    val df = spark.read.format("json").load(path)

    val schema_tran = new StructType()
    .add("Devis", StringType, false)
    .add("IdTransaction",LongType, false)
    .add("Pays", StringType, false)
    .add("Prix",DoubleType, false)   
    .add("TypeProduit", StringType, false)

    val schema = new StructType().add("Transaction", ArrayType(schema_tran),true)
                        
    val df_66 = df.select(from_json($"value",schema)as "struct")
    .select($"struct.*")
    .withColumn("Transaction", explode(col("Transaction")))
    .select($"Transaction.*")


    val schema_devis = new StructType()
                                        
    .add("Devis", StringType, false)
    .add("Taux",DoubleType, false) 

    val schema_1 = new StructType()
    .add("Devis", ArrayType(schema_devis),true)

    val df_67 = df.select(from_json($"value",schema_1)as "struct")
    .select($"struct.*")
    .withColumn("Devis", explode(col("Devis")))
    .select($"Devis.*")

    Map(
        0 -> df_66,
        1 -> df_67
    )
}

您可以使用

取回 DataFrame

reader("some path")(0) // df_66
reader("some path")(1) // df_67

Scala Databricks：读取一个 JSON 文件和 return 两个 DataFrame

Scala Databricks: read a JSON file and return two DataFrame

scala

apache-spark

apache-spark-sql