Scala Databricks:读取一个 JSON 文件和 return 两个 DataFrame
Scala Databricks: read a JSON file and return two DataFrame
我有以下代码:
def reader (path:String):DataFrame={
val path =sc.textFile("/FileStore/tables/Data_exo2.json").reduce((a,b)=>s"$a$b")
val df = Seq((path)).toDF()
val schema_tran = new StructType()
.add("Devis", StringType, false)
.add("IdTransaction",LongType, false)
.add("Pays", StringType, false)
.add("Prix",DoubleType, false)
.add("TypeProduit", StringType, false)
val schema = new StructType().add("Transaction", ArrayType(schema_tran),true)
val df_66 = df.select(from_json($"value",schema)as "struct")
.select($"struct.*")
.withColumn("Transaction", explode(col("Transaction")))
.select($"Transaction.*")
val schema_devis = new StructType()
.add("Devis", StringType, false)
.add("Taux",DoubleType, false)
val schema_1 = new StructType()
.add("Devis", ArrayType(schema_devis),true)
val df_67 = df.select(from_json($"value",schema_1)as "struct")
.select($"struct.*")
.withColumn("Devis", explode(col("Devis")))
.select($"Devis.*")
*****}
所以我有两个数据框 df_66 和 df_67。
基本上,我想为 return 那些数据框定义一个函数,像这样一个一个地定义 reader(path)(0)
这是您要找的吗?
def reader(path: String): Map[Int, DataFrame] = {
val df = spark.read.format("json").load(path)
val schema_tran = new StructType()
.add("Devis", StringType, false)
.add("IdTransaction",LongType, false)
.add("Pays", StringType, false)
.add("Prix",DoubleType, false)
.add("TypeProduit", StringType, false)
val schema = new StructType().add("Transaction", ArrayType(schema_tran),true)
val df_66 = df.select(from_json($"value",schema)as "struct")
.select($"struct.*")
.withColumn("Transaction", explode(col("Transaction")))
.select($"Transaction.*")
val schema_devis = new StructType()
.add("Devis", StringType, false)
.add("Taux",DoubleType, false)
val schema_1 = new StructType()
.add("Devis", ArrayType(schema_devis),true)
val df_67 = df.select(from_json($"value",schema_1)as "struct")
.select($"struct.*")
.withColumn("Devis", explode(col("Devis")))
.select($"Devis.*")
Map(
0 -> df_66,
1 -> df_67
)
}
您可以使用
取回 DataFrame
reader("some path")(0) // df_66
reader("some path")(1) // df_67
我有以下代码:
def reader (path:String):DataFrame={
val path =sc.textFile("/FileStore/tables/Data_exo2.json").reduce((a,b)=>s"$a$b")
val df = Seq((path)).toDF()
val schema_tran = new StructType()
.add("Devis", StringType, false)
.add("IdTransaction",LongType, false)
.add("Pays", StringType, false)
.add("Prix",DoubleType, false)
.add("TypeProduit", StringType, false)
val schema = new StructType().add("Transaction", ArrayType(schema_tran),true)
val df_66 = df.select(from_json($"value",schema)as "struct")
.select($"struct.*")
.withColumn("Transaction", explode(col("Transaction")))
.select($"Transaction.*")
val schema_devis = new StructType()
.add("Devis", StringType, false)
.add("Taux",DoubleType, false)
val schema_1 = new StructType()
.add("Devis", ArrayType(schema_devis),true)
val df_67 = df.select(from_json($"value",schema_1)as "struct")
.select($"struct.*")
.withColumn("Devis", explode(col("Devis")))
.select($"Devis.*")
*****}
所以我有两个数据框 df_66 和 df_67。 基本上,我想为 return 那些数据框定义一个函数,像这样一个一个地定义 reader(path)(0)
这是您要找的吗?
def reader(path: String): Map[Int, DataFrame] = {
val df = spark.read.format("json").load(path)
val schema_tran = new StructType()
.add("Devis", StringType, false)
.add("IdTransaction",LongType, false)
.add("Pays", StringType, false)
.add("Prix",DoubleType, false)
.add("TypeProduit", StringType, false)
val schema = new StructType().add("Transaction", ArrayType(schema_tran),true)
val df_66 = df.select(from_json($"value",schema)as "struct")
.select($"struct.*")
.withColumn("Transaction", explode(col("Transaction")))
.select($"Transaction.*")
val schema_devis = new StructType()
.add("Devis", StringType, false)
.add("Taux",DoubleType, false)
val schema_1 = new StructType()
.add("Devis", ArrayType(schema_devis),true)
val df_67 = df.select(from_json($"value",schema_1)as "struct")
.select($"struct.*")
.withColumn("Devis", explode(col("Devis")))
.select($"Devis.*")
Map(
0 -> df_66,
1 -> df_67
)
}
您可以使用
取回 DataFramereader("some path")(0) // df_66
reader("some path")(1) // df_67