如何修复 R 中关于 spakly 模型的错误
How to fix error in R regarding spakly model
有人可以帮助我解决我进入 R sparkly 时出现的错误吗
kmeans_model <- iris_tbl %>%
select(Petal_Width, Petal_Length) %>%
ml_kmeans(centers = 3)
Error: java.lang.IllegalArgumentException: Field "features" does not
exist. Available fields: Petal_Width, Petal_Length
at org.apache.spark.sql.types.StructType$$anonfun$apply.apply(StructType.scala:274)
at org.apache.spark.sql.types.StructType$$anonfun$apply.apply(StructType.scala:274)
at scala.collection.MapLike$class.getOrElse(MapLike.scala:128)
at scala.collection.AbstractMap.getOrElse(Map.scala:59)
at org.apache.spark.sql.types.StructType.apply(StructType.scala:273)
at org.apache.spark.ml.util.SchemaUtils$.checkColumnTypes(SchemaUtils.scala:58)
at org.apache.spark.ml.util.SchemaUtils$.validateVectorCompatibleColumn(SchemaUtils.scala:119)
at org.apache.spark.ml.clustering.KMeansParams$class.validateAndTransformSchema(KMeans.scala:96)
at org.apache.spark.ml.clustering.KMeans.validateAndTransformSchema(KMeans.scala:285)
at org.apache.spark.ml.clustering.KMeans.transformSchema(KMeans.scala:382)
at org.apache.spark.ml.PipelineStage.transformSchema(Pipeline.scala:74)
at org.apache.spark.ml.clustering.KMeans$$anonfun$fit.apply(KMeans.scala:341)
at org.apache.spark.ml.clustering.KMeans$$anonfun$fit.apply(KMeans.scala:340)
at org.apache.spark.ml.util.Instrumentation$$anonfun.apply(Instrumentation.scala:183)
at scala.util.Try$.apply(Try.scala:192)
at org.apache.spark.ml.util.Instrumentation$.instrumented(Instrumentation.scala:183)
at org.apache.spark.ml.clustering.KMeans.fit(KMeans.scala:340)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at sparklyr.Invoke.invoke(invoke.scala:139)
at sparklyr.StreamHandler.handleMethodCall(stream.scala:123)
at sparklyr.StreamHandler.read(stream.scala:66)
at sparklyr.BackendHandler.channelRead0(handler.scala:51)
at sparklyr.BackendHandler.channelRead0(handler.scala:4)
at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)
at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)
at io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:310)
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:284)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)
at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1359)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:935)
at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:138)
at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:645)
at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:580)
at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:497)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:459)
at io.netty.util.concurrent.SingleThreadEventExecutor.run(SingleThreadEventExecutor.java:858)
at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:138)
at java.lang.Thread.run(Unknown Source)
Warning: Some components of ... were not used: centers
我已经尝试过使用其他功能,但它不能用于 3 个集群并且只能拾取 2 个
kmeans_model <- iris_tbl %>%
ml_kmeans(formula= ~ Petal_Width + Petal_Length, centers = 3)
#Warning: Some components of ... were not used: centers
print(kmeans_model)
#K-means clustering with 2 clusters
#
#Cluster centers:
# Petal_Width Petal_Length
#1 1.6818182 4.925253
#2 0.2627451 1.492157
#
#Within Set Sum of Squared Errors = 86.39022>
你的错误的第一行非常直截了当:
Field "features" does not exist.
如果您查看 ?ml_kmeans
的文档,您会发现您需要指定一个公式(您的第二次尝试)或 features_col。现在快速说明,在 Spark 中,模型的特征预计将在 data.frame
的一列中进行矢量化
您的第二条 error/warning 消息也很直接:
Warning: Some components of ... were not used: centers
centers
不是 ml_kmeans
中的参数。你要用的是k
kmeans_model <- iris_tbl %>%
ml_kmeans(formula= ~ Petal_Width + Petal_Length, k = 3)
kmeans_model
# K-means clustering with 3 clusters
#
# Cluster centers:
# Petal_Width Petal_Length
# 1 1.359259 4.292593
# 2 0.246000 1.462000
# 3 2.047826 5.626087
#
# Within Set Sum of Squared Errors = 31.41289
到运行没有公式你需要使用ft_vector_assembler
kmeans_model <- iris_tbl %>%
ft_vector_assembler(input_cols=c("Sepal_Width","Petal_Length"), output_col="features") %>%
ml_kmeans(k = 3)
有人可以帮助我解决我进入 R sparkly 时出现的错误吗
kmeans_model <- iris_tbl %>%
select(Petal_Width, Petal_Length) %>%
ml_kmeans(centers = 3)
Error: java.lang.IllegalArgumentException: Field "features" does not exist. Available fields: Petal_Width, Petal_Length
at org.apache.spark.sql.types.StructType$$anonfun$apply.apply(StructType.scala:274) at org.apache.spark.sql.types.StructType$$anonfun$apply.apply(StructType.scala:274) at scala.collection.MapLike$class.getOrElse(MapLike.scala:128) at scala.collection.AbstractMap.getOrElse(Map.scala:59) at org.apache.spark.sql.types.StructType.apply(StructType.scala:273) at org.apache.spark.ml.util.SchemaUtils$.checkColumnTypes(SchemaUtils.scala:58) at org.apache.spark.ml.util.SchemaUtils$.validateVectorCompatibleColumn(SchemaUtils.scala:119) at org.apache.spark.ml.clustering.KMeansParams$class.validateAndTransformSchema(KMeans.scala:96) at org.apache.spark.ml.clustering.KMeans.validateAndTransformSchema(KMeans.scala:285) at org.apache.spark.ml.clustering.KMeans.transformSchema(KMeans.scala:382) at org.apache.spark.ml.PipelineStage.transformSchema(Pipeline.scala:74) at org.apache.spark.ml.clustering.KMeans$$anonfun$fit.apply(KMeans.scala:341) at org.apache.spark.ml.clustering.KMeans$$anonfun$fit.apply(KMeans.scala:340) at org.apache.spark.ml.util.Instrumentation$$anonfun.apply(Instrumentation.scala:183) at scala.util.Try$.apply(Try.scala:192) at org.apache.spark.ml.util.Instrumentation$.instrumented(Instrumentation.scala:183) at org.apache.spark.ml.clustering.KMeans.fit(KMeans.scala:340) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) at java.lang.reflect.Method.invoke(Unknown Source) at sparklyr.Invoke.invoke(invoke.scala:139) at sparklyr.StreamHandler.handleMethodCall(stream.scala:123) at sparklyr.StreamHandler.read(stream.scala:66) at sparklyr.BackendHandler.channelRead0(handler.scala:51) at sparklyr.BackendHandler.channelRead0(handler.scala:4) at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340) at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340) at io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:310) at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:284) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340) at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1359) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348) at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:935) at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:138) at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:645) at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:580) at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:497) at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:459) at io.netty.util.concurrent.SingleThreadEventExecutor.run(SingleThreadEventExecutor.java:858) at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:138) at java.lang.Thread.run(Unknown Source)
Warning: Some components of ... were not used: centers
我已经尝试过使用其他功能,但它不能用于 3 个集群并且只能拾取 2 个
kmeans_model <- iris_tbl %>%
ml_kmeans(formula= ~ Petal_Width + Petal_Length, centers = 3)
#Warning: Some components of ... were not used: centers
print(kmeans_model)
#K-means clustering with 2 clusters
#
#Cluster centers:
# Petal_Width Petal_Length
#1 1.6818182 4.925253
#2 0.2627451 1.492157
#
#Within Set Sum of Squared Errors = 86.39022>
你的错误的第一行非常直截了当:
Field "features" does not exist.
如果您查看 ?ml_kmeans
的文档,您会发现您需要指定一个公式(您的第二次尝试)或 features_col。现在快速说明,在 Spark 中,模型的特征预计将在 data.frame
您的第二条 error/warning 消息也很直接:
Warning: Some components of ... were not used: centers
centers
不是 ml_kmeans
中的参数。你要用的是k
kmeans_model <- iris_tbl %>%
ml_kmeans(formula= ~ Petal_Width + Petal_Length, k = 3)
kmeans_model
# K-means clustering with 3 clusters
#
# Cluster centers:
# Petal_Width Petal_Length
# 1 1.359259 4.292593
# 2 0.246000 1.462000
# 3 2.047826 5.626087
#
# Within Set Sum of Squared Errors = 31.41289
到运行没有公式你需要使用ft_vector_assembler
kmeans_model <- iris_tbl %>%
ft_vector_assembler(input_cols=c("Sepal_Width","Petal_Length"), output_col="features") %>%
ml_kmeans(k = 3)