证明 runtimeClass 满足 Scala 中的 Bound 类型
Prove that a runtimeClass satisfies a type Bound in Scala
我有一个方法,它以 Parquet 形式编写我的 类 Foo
之一,它被定义为 Thrift。
import Foo
import org.apache.spark.rdd.RDD
import org.apache.thrift.TBase
import org.apache.hadoop.mapreduce.Job
import org.apache.parquet.hadoop.ParquetOutputFormat
import org.apache.parquet.hadoop.thrift.ParquetThriftOutputFormat
def writeThriftParquet(rdd: RDD[Foo], outputPath: String): Unit = {
val job = Job.getInstance()
ParquetThriftOutputFormat.setThriftClass(job, classOf[Foo])
ParquetOutputFormat.setWriteSupportClass(job, classOf[Foo])
rdd
.map(x => (null, x))
.saveAsNewAPIHadoopFile(
outputPath,
classOf[Void],
classOf[Foo],
classOf[ParquetThriftOutputFormat[Foo]],
job.getConfiguration)
}
这很好用,但我更愿意编写一个更通用的方法。我尝试了(相对)简单的:
def writeThriftParquetGeneral[A <: TBase[_, _]](rdd: RDD[A], outputPath: String): Unit = {
val job = Job.getInstance()
ParquetThriftOutputFormat.setThriftClass(job, classOf[A])
ParquetOutputFormat.setWriteSupportClass(job, classOf[A])
rdd
.map(x => (null, x))
.saveAsNewAPIHadoopFile(
outputPath,
classOf[Void],
classOf[A],
classOf[ParquetThriftOutputFormat[A]],
job.getConfiguration)
}
但是失败并出现如下错误:
class type required but A found ParquetThriftOutputFormat.setThriftClass(job, classOf[A])
class type required but A found ParquetOutputFormat.setWriteSupportClass(job, classOf[A])
为了尝试解决这个问题,我使用了 ClassTag
,但还没有编译的东西。
import scala.reflect._
implicit val ct = ClassTag[Foo](classOf[Foo])
def writeThriftParquetGeneral[A <: TBase[_, _]](rdd: RDD[A], outputPath: String)(
implicit tag: ClassTag[A]): Unit = {
val job = Job.getInstance()
// The problem line
ParquetThriftOutputFormat.setThriftClass(job, tag.runtimeClass)
// Seems OK from here
ParquetOutputFormat.setWriteSupportClass(job, tag.runtimeClass)
rdd
.map(x => (null, x))
.saveAsNewAPIHadoopFile(
outputPath,
classOf[Void],
tag.runtimeClass,
classOf[ParquetThriftOutputFormat[A]],
job.getConfiguration)
}
这在以下行失败:ParquetThriftOutputFormat.setThriftClass(job, tag.runtimeClass)
[error] found : Class[_] where type _
[error] required: Class[_ <: org.apache.thrift.TBase[_, _]]
我很惊讶编译器 (Scala 2.11) 没有识别出 tag.runtimeClass
必须是 classOf[A]
,并且 A
满足定义绑定的类型。
ClassTag#runtimeClass
returns 只是一个 Class[_]
https://github.com/scala/scala/blob/2.13.x/src/library/scala/reflect/ClassTag.scala#L55
Class[_ <: TBase[_, _]]
是不同于 Class[_]
的存在类型(实际上是它的子类型)
implicitly[Class[_ <: TBase[_, _]] <:< Class[_]]
尝试将问题行替换为
ParquetThriftOutputFormat.setThriftClass(job, classTag.runtimeClass.asSubclass(classOf[TBase[_, _]]))
我有一个方法,它以 Parquet 形式编写我的 类 Foo
之一,它被定义为 Thrift。
import Foo
import org.apache.spark.rdd.RDD
import org.apache.thrift.TBase
import org.apache.hadoop.mapreduce.Job
import org.apache.parquet.hadoop.ParquetOutputFormat
import org.apache.parquet.hadoop.thrift.ParquetThriftOutputFormat
def writeThriftParquet(rdd: RDD[Foo], outputPath: String): Unit = {
val job = Job.getInstance()
ParquetThriftOutputFormat.setThriftClass(job, classOf[Foo])
ParquetOutputFormat.setWriteSupportClass(job, classOf[Foo])
rdd
.map(x => (null, x))
.saveAsNewAPIHadoopFile(
outputPath,
classOf[Void],
classOf[Foo],
classOf[ParquetThriftOutputFormat[Foo]],
job.getConfiguration)
}
这很好用,但我更愿意编写一个更通用的方法。我尝试了(相对)简单的:
def writeThriftParquetGeneral[A <: TBase[_, _]](rdd: RDD[A], outputPath: String): Unit = {
val job = Job.getInstance()
ParquetThriftOutputFormat.setThriftClass(job, classOf[A])
ParquetOutputFormat.setWriteSupportClass(job, classOf[A])
rdd
.map(x => (null, x))
.saveAsNewAPIHadoopFile(
outputPath,
classOf[Void],
classOf[A],
classOf[ParquetThriftOutputFormat[A]],
job.getConfiguration)
}
但是失败并出现如下错误:
class type required but A found ParquetThriftOutputFormat.setThriftClass(job, classOf[A])
class type required but A found ParquetOutputFormat.setWriteSupportClass(job, classOf[A])
为了尝试解决这个问题,我使用了 ClassTag
,但还没有编译的东西。
import scala.reflect._
implicit val ct = ClassTag[Foo](classOf[Foo])
def writeThriftParquetGeneral[A <: TBase[_, _]](rdd: RDD[A], outputPath: String)(
implicit tag: ClassTag[A]): Unit = {
val job = Job.getInstance()
// The problem line
ParquetThriftOutputFormat.setThriftClass(job, tag.runtimeClass)
// Seems OK from here
ParquetOutputFormat.setWriteSupportClass(job, tag.runtimeClass)
rdd
.map(x => (null, x))
.saveAsNewAPIHadoopFile(
outputPath,
classOf[Void],
tag.runtimeClass,
classOf[ParquetThriftOutputFormat[A]],
job.getConfiguration)
}
这在以下行失败:ParquetThriftOutputFormat.setThriftClass(job, tag.runtimeClass)
[error] found : Class[_] where type _
[error] required: Class[_ <: org.apache.thrift.TBase[_, _]]
我很惊讶编译器 (Scala 2.11) 没有识别出 tag.runtimeClass
必须是 classOf[A]
,并且 A
满足定义绑定的类型。
ClassTag#runtimeClass
returns 只是一个 Class[_]
https://github.com/scala/scala/blob/2.13.x/src/library/scala/reflect/ClassTag.scala#L55
Class[_ <: TBase[_, _]]
是不同于 Class[_]
的存在类型(实际上是它的子类型)
implicitly[Class[_ <: TBase[_, _]] <:< Class[_]]
尝试将问题行替换为
ParquetThriftOutputFormat.setThriftClass(job, classTag.runtimeClass.asSubclass(classOf[TBase[_, _]]))