在 Hive 中加载 SparkR 数据帧
loading SparkR data frame in Hive
我需要加载在 SparkR 中创建的 DataFrame,以便在 Hive 中加载。
#created a dataframe df_test
df_test <- createDataFrame(sqlContext, data.frame(mon = c(1,2,3,4,5), year = c(2011,2012,2013,2014,2015)))
#initialized the Hive context
>sc <- sparkR.init()
>hiveContext <- sparkRHive.init(sc)
#used the saveAsTable fn to save dataframe "df_test" in hive table named "table_hive"
>saveAsTable(df_test, "table_hive")
16/08/24 23:08:36 ERROR RBackendHandler: saveAsTable on 13 failed
Error in invokeJava(isStatic = FALSE, objId$id, methodName, ...) :
java.lang.RuntimeException: Tables created with SQLContext must be TEMPORARY. Use a HiveContext instead.
at scala.sys.package$.error(package.scala:27)
at org.apache.spark.sql.execution.SparkStrategies$DDLStrategy$.apply(SparkStrategies.scala:392)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun.apply(QueryPlanner.scala:58)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon.hasNext(Iterator.scala:371)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:47)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:45)
at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:52)
at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:52)
at org.apache.spark.sql.execution
抛出上述错误。请帮忙。
范围内有 HiveContext
是不够的。每个数据框都绑定到一个特定的 SQLContext
/ SparkSession
实例,并且 df_test
显然是使用与 hiveContext
不同的上下文创建的
让我们举例说明:
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 1.6.1
/_/
Spark context is available as sc, SQL context is available as sqlContext
> library(magrittr)
> createDataFrame(sqlContext, mtcars) %>% saveAsTable("foo")
16/08/24 20:22:13 ERROR RBackendHandler: saveAsTable on 22 failed
Error in invokeJava(isStatic = FALSE, objId$id, methodName, ...) :
java.lang.RuntimeException: Tables created with SQLContext must be TEMPORARY. Use a HiveContext instead.
at scala.sys.package$.error(package.scala:27)
at org.apache.spark.sql.execution.SparkStrategies$DDLStrategy$.apply(SparkStrategies.scala:392)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun.apply(QueryPlanner.scala:58)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon.hasNext(Iterator.scala:396)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:47)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:45)
at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:52)
at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:52)
at org.apache.spark.sql.execu
>
> hiveContext <- sparkRHive.init(sc)
> createDataFrame(hiveContext, mtcars) %>% saveAsTable("foo")
NULL
我需要加载在 SparkR 中创建的 DataFrame,以便在 Hive 中加载。
#created a dataframe df_test
df_test <- createDataFrame(sqlContext, data.frame(mon = c(1,2,3,4,5), year = c(2011,2012,2013,2014,2015)))
#initialized the Hive context
>sc <- sparkR.init()
>hiveContext <- sparkRHive.init(sc)
#used the saveAsTable fn to save dataframe "df_test" in hive table named "table_hive"
>saveAsTable(df_test, "table_hive")
16/08/24 23:08:36 ERROR RBackendHandler: saveAsTable on 13 failed Error in invokeJava(isStatic = FALSE, objId$id, methodName, ...) : java.lang.RuntimeException: Tables created with SQLContext must be TEMPORARY. Use a HiveContext instead. at scala.sys.package$.error(package.scala:27) at org.apache.spark.sql.execution.SparkStrategies$DDLStrategy$.apply(SparkStrategies.scala:392) at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun.apply(QueryPlanner.scala:58) at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun.apply(QueryPlanner.scala:58) at scala.collection.Iterator$$anon.hasNext(Iterator.scala:371) at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59) at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:47) at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:45) at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:52) at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:52) at org.apache.spark.sql.execution
抛出上述错误。请帮忙。
范围内有 HiveContext
是不够的。每个数据框都绑定到一个特定的 SQLContext
/ SparkSession
实例,并且 df_test
显然是使用与 hiveContext
让我们举例说明:
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 1.6.1
/_/
Spark context is available as sc, SQL context is available as sqlContext
> library(magrittr)
> createDataFrame(sqlContext, mtcars) %>% saveAsTable("foo")
16/08/24 20:22:13 ERROR RBackendHandler: saveAsTable on 22 failed
Error in invokeJava(isStatic = FALSE, objId$id, methodName, ...) :
java.lang.RuntimeException: Tables created with SQLContext must be TEMPORARY. Use a HiveContext instead.
at scala.sys.package$.error(package.scala:27)
at org.apache.spark.sql.execution.SparkStrategies$DDLStrategy$.apply(SparkStrategies.scala:392)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun.apply(QueryPlanner.scala:58)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon.hasNext(Iterator.scala:396)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:47)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:45)
at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:52)
at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:52)
at org.apache.spark.sql.execu
>
> hiveContext <- sparkRHive.init(sc)
> createDataFrame(hiveContext, mtcars) %>% saveAsTable("foo")
NULL