The error of Spark job "java.lang.NoClassDefFoundError: org/apache/spark/sql/SQLContext" in Spark Job Server
The error of Spark job "java.lang.NoClassDefFoundError: org/apache/spark/sql/SQLContext" in Spark Job Server
我用 IntelliJ 创建了一个 spark 作业,我希望它被 spark Job-Server 加载并运行。为此,我遵循了 link 中的步骤:http://github.com/ooyala/spark-jobserver
而我的spark版本是1.4.0.
这是我项目中的 Scala 代码:
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable.ArrayBuffer
//spark job server
import com.typesafe.config.{Config, ConfigFactory}
import scala.util.Try
import spark.jobserver.SparkJob
import spark.jobserver.SparkJobValidation
import spark.jobserver.SparkJobValid
import spark.jobserver.SparkJobInvalid
class hiveSparkRest extends SparkJob {
var idCard:String =""
def main(args: Array[String]): Unit = {
val sc = new SparkContext("local[4]", "SmartApp")
val config = ConfigFactory.parseString("")
val results = runJob(sc, config)
println("Result is " + results)
enterTimesMax(sc, hiveContext)
}
override def validate(sc: SparkContext, config: Config): SparkJobValidation = {
Try(config.getString("input.string"))
.map(x => SparkJobValid)
.getOrElse(SparkJobInvalid("No input.string config param"))
}
override def runJob(sc: SparkContext,config: Config): Any = {
idCard = config.getString("input.string")
enterTimesMax(sc, hiveContext)
}
def enterTimesMax(sc:SparkContext,hiveContext:HiveContext): Unit = {
val hiveContext = new HiveContext(sc)
hiveContext.sql("use default")
val sqlUrl = "select max(num) from (select idcard,count(1) as num from passenger group by idcard)as t"
val idCardArray = hiveContext.sql(sqlUrl).collect()
}
}
但是当我执行它时,我得到了卷曲:(52) 来自服务器的空回复,spark 作业服务器中出现此错误:
> job-server[ERROR] Uncaught error from thread [JobServer-akka.actor.default-dispatcher-12] shutting down JVM since 'akka.jvm-exit-on-fatal-error' is enabled for ActorSystem[JobServer]
job-server[ERROR] java.lang.NoClassDefFoundError: org/apache/spark/sql/SQLContext
job-server[ERROR] at java.lang.ClassLoader.defineClass1(Native Method)
job-server[ERROR] at java.lang.ClassLoader.defineClassCond(ClassLoader.java:631)
job-server[ERROR] at java.lang.ClassLoader.defineClass(ClassLoader.java:615)
job-server[ERROR] at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:141)
job-server[ERROR] at java.net.URLClassLoader.defineClass(URLClassLoader.java:283)
job-server[ERROR] at java.net.URLClassLoader.access[=11=]0(URLClassLoader.java:58)
job-server[ERROR] at java.net.URLClassLoader.run(URLClassLoader.java:197)
job-server[ERROR] at java.security.AccessController.doPrivileged(Native Method)
job-server[ERROR] at java.net.URLClassLoader.findClass(URLClassLoader.java:190)
job-server[ERROR] at java.lang.ClassLoader.loadClass(ClassLoader.java:306)
job-server[ERROR] at java.lang.ClassLoader.loadClass(ClassLoader.java:247)
job-server[ERROR] at sql.hiveSparkRest.shadePassenger(hiveSparkRest.scala:62)
job-server[ERROR] at sql.hiveSparkRest.runJob(hiveSparkRest.scala:56)
job-server[ERROR] at spark.jobserver.JobManagerActor$$anonfun$spark$jobserver$JobManagerActor$$getJobFuture.apply(JobManagerActor.scala:222)
job-server[ERROR] at scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1(Future.scala:24)
job-server[ERROR] at scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
job-server[ERROR] at akka.dispatch.TaskInvocation.run(AbstractDispatcher.scala:42)
job-server[ERROR] at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
job-server[ERROR] at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
job-server[ERROR] at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
job-server[ERROR] at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
job-server[ERROR] at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
job-server[ERROR] Caused by: java.lang.ClassNotFoundException: org.apache.spark.sql.SQLContext
job-server[ERROR] at java.net.URLClassLoader.run(URLClassLoader.java:202)
job-server[ERROR] at java.security.AccessController.doPrivileged(Native Method)
job-server[ERROR] at java.net.URLClassLoader.findClass(URLClassLoader.java:190)
job-server[ERROR] at java.lang.ClassLoader.loadClass(ClassLoader.java:306)
job-server[ERROR] at java.lang.ClassLoader.loadClass(ClassLoader.java:247)
job-server[ERROR] ... 22 more
job-server ... finished with exit code 255
似乎 class HiveContext 受 spark jar 文件 spark-assembly-1.4.0-hadoop1.0.4.jar.
支持
我不认为 ooyala 回购是主要的。在维护的 repo 中,下面的 link 显示了使用 HiveContext 的测试作业。对于 SparkHiveJob 特性,您需要 jobserver-extras jar.
我用 IntelliJ 创建了一个 spark 作业,我希望它被 spark Job-Server 加载并运行。为此,我遵循了 link 中的步骤:http://github.com/ooyala/spark-jobserver 而我的spark版本是1.4.0.
这是我项目中的 Scala 代码:
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable.ArrayBuffer
//spark job server
import com.typesafe.config.{Config, ConfigFactory}
import scala.util.Try
import spark.jobserver.SparkJob
import spark.jobserver.SparkJobValidation
import spark.jobserver.SparkJobValid
import spark.jobserver.SparkJobInvalid
class hiveSparkRest extends SparkJob {
var idCard:String =""
def main(args: Array[String]): Unit = {
val sc = new SparkContext("local[4]", "SmartApp")
val config = ConfigFactory.parseString("")
val results = runJob(sc, config)
println("Result is " + results)
enterTimesMax(sc, hiveContext)
}
override def validate(sc: SparkContext, config: Config): SparkJobValidation = {
Try(config.getString("input.string"))
.map(x => SparkJobValid)
.getOrElse(SparkJobInvalid("No input.string config param"))
}
override def runJob(sc: SparkContext,config: Config): Any = {
idCard = config.getString("input.string")
enterTimesMax(sc, hiveContext)
}
def enterTimesMax(sc:SparkContext,hiveContext:HiveContext): Unit = {
val hiveContext = new HiveContext(sc)
hiveContext.sql("use default")
val sqlUrl = "select max(num) from (select idcard,count(1) as num from passenger group by idcard)as t"
val idCardArray = hiveContext.sql(sqlUrl).collect()
}
}
但是当我执行它时,我得到了卷曲:(52) 来自服务器的空回复,spark 作业服务器中出现此错误:
> job-server[ERROR] Uncaught error from thread [JobServer-akka.actor.default-dispatcher-12] shutting down JVM since 'akka.jvm-exit-on-fatal-error' is enabled for ActorSystem[JobServer]
job-server[ERROR] java.lang.NoClassDefFoundError: org/apache/spark/sql/SQLContext
job-server[ERROR] at java.lang.ClassLoader.defineClass1(Native Method)
job-server[ERROR] at java.lang.ClassLoader.defineClassCond(ClassLoader.java:631)
job-server[ERROR] at java.lang.ClassLoader.defineClass(ClassLoader.java:615)
job-server[ERROR] at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:141)
job-server[ERROR] at java.net.URLClassLoader.defineClass(URLClassLoader.java:283)
job-server[ERROR] at java.net.URLClassLoader.access[=11=]0(URLClassLoader.java:58)
job-server[ERROR] at java.net.URLClassLoader.run(URLClassLoader.java:197)
job-server[ERROR] at java.security.AccessController.doPrivileged(Native Method)
job-server[ERROR] at java.net.URLClassLoader.findClass(URLClassLoader.java:190)
job-server[ERROR] at java.lang.ClassLoader.loadClass(ClassLoader.java:306)
job-server[ERROR] at java.lang.ClassLoader.loadClass(ClassLoader.java:247)
job-server[ERROR] at sql.hiveSparkRest.shadePassenger(hiveSparkRest.scala:62)
job-server[ERROR] at sql.hiveSparkRest.runJob(hiveSparkRest.scala:56)
job-server[ERROR] at spark.jobserver.JobManagerActor$$anonfun$spark$jobserver$JobManagerActor$$getJobFuture.apply(JobManagerActor.scala:222)
job-server[ERROR] at scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1(Future.scala:24)
job-server[ERROR] at scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
job-server[ERROR] at akka.dispatch.TaskInvocation.run(AbstractDispatcher.scala:42)
job-server[ERROR] at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
job-server[ERROR] at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
job-server[ERROR] at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
job-server[ERROR] at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
job-server[ERROR] at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
job-server[ERROR] Caused by: java.lang.ClassNotFoundException: org.apache.spark.sql.SQLContext
job-server[ERROR] at java.net.URLClassLoader.run(URLClassLoader.java:202)
job-server[ERROR] at java.security.AccessController.doPrivileged(Native Method)
job-server[ERROR] at java.net.URLClassLoader.findClass(URLClassLoader.java:190)
job-server[ERROR] at java.lang.ClassLoader.loadClass(ClassLoader.java:306)
job-server[ERROR] at java.lang.ClassLoader.loadClass(ClassLoader.java:247)
job-server[ERROR] ... 22 more
job-server ... finished with exit code 255
似乎 class HiveContext 受 spark jar 文件 spark-assembly-1.4.0-hadoop1.0.4.jar.
支持我不认为 ooyala 回购是主要的。在维护的 repo 中,下面的 link 显示了使用 HiveContext 的测试作业。对于 SparkHiveJob 特性,您需要 jobserver-extras jar.