在 spark-graphX 中创建图形
create graph in spark-graphX
我有 spark 2.3,我使用 scala 和 sbt。我想在 graphx 中创建一个图表。
这是我的代码:
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql._
import org.apache.spark.sql.types._
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.sql.Row
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
import spark.implicits._
object ne {
def main(args: Array[String]){
val spark = SparkSession
.builder
.appName("Scala-Northern-E")
.getOrCreate()
val vertexArray = Array(
(1L, ("Alice", 28)),
(2L, ("Bob", 27)),
(3L, ("Charlie", 65)),
(4L, ("David", 42)),
(5L, ("Ed", 55)),
(6L, ("Fran", 50))
)
val edgeArray = Array(
Edge(2L, 1L, 7),
Edge(2L, 4L, 2),
Edge(3L, 2L, 4),
Edge(3L, 6L, 3),
Edge(4L, 1L, 1),
Edge(5L, 2L, 2),
Edge(5L, 3L, 8),
Edge(5L, 6L, 3)
)
val vertexRDD: RDD[(Long, (String, Int))] = spark.sparkContext.parallelize(vertexArray)
val edgeRDD: RDD[Edge[Int]] = spark.saprkContext.parallelize(edgeArray)
}
}
但是我得到这个错误:
[error] /home/azade/data (3rd copy)/spark-ne.scala:10:8: not found: object spark
[error] import spark.implicits._
[error] ^
[error] /home/azade/data (3rd copy)/spark-ne.scala:42:37: value saprkContext is not a member of org.apache.spark.sql.SparkSession
[error] val edgeRDD: RDD[Edge[Int]] = spark.saprkContext.parallelize(edgeArray)
[error] ^
[error] two errors found
[error] (Compile / compileIncremental) Compilation failed
[error] Total time: 7 s, completed Jul 10, 2018 8:22:11 PM
为什么会出现此错误,我应该怎么做?
我应该导入什么以获得支持 sc.parallelize
?
代替sc
添加
spark.sparkContext
我有 spark 2.3,我使用 scala 和 sbt。我想在 graphx 中创建一个图表。
这是我的代码:
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql._
import org.apache.spark.sql.types._
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.sql.Row
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
import spark.implicits._
object ne {
def main(args: Array[String]){
val spark = SparkSession
.builder
.appName("Scala-Northern-E")
.getOrCreate()
val vertexArray = Array(
(1L, ("Alice", 28)),
(2L, ("Bob", 27)),
(3L, ("Charlie", 65)),
(4L, ("David", 42)),
(5L, ("Ed", 55)),
(6L, ("Fran", 50))
)
val edgeArray = Array(
Edge(2L, 1L, 7),
Edge(2L, 4L, 2),
Edge(3L, 2L, 4),
Edge(3L, 6L, 3),
Edge(4L, 1L, 1),
Edge(5L, 2L, 2),
Edge(5L, 3L, 8),
Edge(5L, 6L, 3)
)
val vertexRDD: RDD[(Long, (String, Int))] = spark.sparkContext.parallelize(vertexArray)
val edgeRDD: RDD[Edge[Int]] = spark.saprkContext.parallelize(edgeArray)
}
}
但是我得到这个错误:
[error] /home/azade/data (3rd copy)/spark-ne.scala:10:8: not found: object spark
[error] import spark.implicits._
[error] ^
[error] /home/azade/data (3rd copy)/spark-ne.scala:42:37: value saprkContext is not a member of org.apache.spark.sql.SparkSession
[error] val edgeRDD: RDD[Edge[Int]] = spark.saprkContext.parallelize(edgeArray)
[error] ^
[error] two errors found
[error] (Compile / compileIncremental) Compilation failed
[error] Total time: 7 s, completed Jul 10, 2018 8:22:11 PM
为什么会出现此错误,我应该怎么做?
我应该导入什么以获得支持 sc.parallelize
?
代替sc
添加
spark.sparkContext