使用 scalatest-embedded-kafka 集成测试 Flink 和 Kafka
Integration test Flink and Kafka with scalatest-embedded-kafka
我想运行integration test with Flink和Kafka。过程是从kafka读取,用flink做一些操作,然后把datastream放到kafka中。
我想从头到尾测试这个过程。现在我使用 scalatest-embedded-kafka.
我在这里举了一个例子我尽量简单:
import java.util.Properties
import net.manub.embeddedkafka.{EmbeddedKafka, EmbeddedKafkaConfig}
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.streaming.api.functions.sink.SinkFunction
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer011, FlinkKafkaProducer011}
import org.scalatest.{Matchers, WordSpec}
import scala.collection.mutable.ListBuffer
object SimpleFlinkKafkaTest {
class CollectSink extends SinkFunction[String] {
override def invoke(string: String): Unit = {
synchronized {
CollectSink.values += string
}
}
}
object CollectSink {
val values: ListBuffer[String] = ListBuffer.empty[String]
}
val kafkaPort = 9092
val zooKeeperPort = 2181
val props = new Properties()
props.put("bootstrap.servers", "localhost:" + kafkaPort.toString)
props.put("schema.registry.url", "localhost:" + zooKeeperPort.toString)
val inputString = "mystring"
val expectedString = "MYSTRING"
}
class SimpleFlinkKafkaTest extends WordSpec with Matchers with EmbeddedKafka {
"runs with embedded kafka" should {
"work" in {
implicit val config = EmbeddedKafkaConfig(
kafkaPort = SimpleFlinkKafkaTest.kafkaPort,
zooKeeperPort = SimpleFlinkKafkaTest.zooKeeperPort
)
withRunningKafka {
publishStringMessageToKafka("input-topic", SimpleFlinkKafkaTest.inputString)
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val kafkaConsumer = new FlinkKafkaConsumer011(
"input-topic",
new SimpleStringSchema,
SimpleFlinkKafkaTest.props
)
implicit val typeInfo = TypeInformation.of(classOf[String])
val inputStream = env.addSource(kafkaConsumer)
val outputStream = inputStream.map(_.toUpperCase)
val kafkaProducer = new FlinkKafkaProducer011(
"output-topic",
new SimpleStringSchema(),
SimpleFlinkKafkaTest.props
)
outputStream.addSink(kafkaProducer)
env.execute()
consumeFirstStringMessageFrom("output-topic") shouldEqual SimpleFlinkKafkaTest.expectedString
}
}
}
}
我遇到了错误,所以我添加了 implicit val typeInfo = TypeInformation.of(classOf[String])
行,但我真的不明白为什么我必须这样做。
目前此代码不起作用,它 运行 没有中断但不要停止并且不给出任何结果。
如果有人有什么想法?更好的主意是测试这种管道。
谢谢!
编辑:添加 env.execute()
并更改错误。
这是我想出的一个简单的解决方案。
想法是:
- 启动 Kafka 嵌入式服务器
- 创建你的测试主题(这里是输入和输出)
- 在 Future 中启动 Flink 作业以避免阻塞主线程
- 向输入主题发布消息
- 检查输出主题的结果
工作原型:
import java.util.Properties
import org.apache.flink.streaming.api.scala._
import net.manub.embeddedkafka.{EmbeddedKafka, EmbeddedKafkaConfig}
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.core.fs.FileSystem.WriteMode
import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer011, FlinkKafkaProducer011}
import org.scalatest.{Matchers, WordSpec}
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future
class SimpleFlinkKafkaTest extends WordSpec with Matchers with EmbeddedKafka {
"runs with embedded kafka on arbitrary available ports" should {
val env = StreamExecutionEnvironment.getExecutionEnvironment
"work" in {
val userDefinedConfig = EmbeddedKafkaConfig(kafkaPort = 9092, zooKeeperPort = 2182)
val properties = new Properties()
properties.setProperty("bootstrap.servers", "localhost:9092")
properties.setProperty("zookeeper.connect", "localhost:2182")
properties.setProperty("group.id", "test")
properties.setProperty("auto.offset.reset", "earliest")
val kafkaConsumer = new FlinkKafkaConsumer011[String]("input", new SimpleStringSchema(), properties)
val kafkaSink = new FlinkKafkaProducer011[String]("output", new SimpleStringSchema(), properties)
val stream = env
.addSource(kafkaConsumer)
.map(_.toUpperCase)
.addSink(kafkaSink)
withRunningKafkaOnFoundPort(userDefinedConfig) { implicit actualConfig =>
createCustomTopic("input")
createCustomTopic("output")
Future{env.execute()}
publishStringMessageToKafka("input", "Titi")
consumeFirstStringMessageFrom("output") shouldEqual "TITI"
}
}
}
}
我想运行integration test with Flink和Kafka。过程是从kafka读取,用flink做一些操作,然后把datastream放到kafka中。
我想从头到尾测试这个过程。现在我使用 scalatest-embedded-kafka.
我在这里举了一个例子我尽量简单:
import java.util.Properties
import net.manub.embeddedkafka.{EmbeddedKafka, EmbeddedKafkaConfig}
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.streaming.api.functions.sink.SinkFunction
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer011, FlinkKafkaProducer011}
import org.scalatest.{Matchers, WordSpec}
import scala.collection.mutable.ListBuffer
object SimpleFlinkKafkaTest {
class CollectSink extends SinkFunction[String] {
override def invoke(string: String): Unit = {
synchronized {
CollectSink.values += string
}
}
}
object CollectSink {
val values: ListBuffer[String] = ListBuffer.empty[String]
}
val kafkaPort = 9092
val zooKeeperPort = 2181
val props = new Properties()
props.put("bootstrap.servers", "localhost:" + kafkaPort.toString)
props.put("schema.registry.url", "localhost:" + zooKeeperPort.toString)
val inputString = "mystring"
val expectedString = "MYSTRING"
}
class SimpleFlinkKafkaTest extends WordSpec with Matchers with EmbeddedKafka {
"runs with embedded kafka" should {
"work" in {
implicit val config = EmbeddedKafkaConfig(
kafkaPort = SimpleFlinkKafkaTest.kafkaPort,
zooKeeperPort = SimpleFlinkKafkaTest.zooKeeperPort
)
withRunningKafka {
publishStringMessageToKafka("input-topic", SimpleFlinkKafkaTest.inputString)
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val kafkaConsumer = new FlinkKafkaConsumer011(
"input-topic",
new SimpleStringSchema,
SimpleFlinkKafkaTest.props
)
implicit val typeInfo = TypeInformation.of(classOf[String])
val inputStream = env.addSource(kafkaConsumer)
val outputStream = inputStream.map(_.toUpperCase)
val kafkaProducer = new FlinkKafkaProducer011(
"output-topic",
new SimpleStringSchema(),
SimpleFlinkKafkaTest.props
)
outputStream.addSink(kafkaProducer)
env.execute()
consumeFirstStringMessageFrom("output-topic") shouldEqual SimpleFlinkKafkaTest.expectedString
}
}
}
}
我遇到了错误,所以我添加了 implicit val typeInfo = TypeInformation.of(classOf[String])
行,但我真的不明白为什么我必须这样做。
目前此代码不起作用,它 运行 没有中断但不要停止并且不给出任何结果。
如果有人有什么想法?更好的主意是测试这种管道。
谢谢!
编辑:添加 env.execute()
并更改错误。
这是我想出的一个简单的解决方案。
想法是:
- 启动 Kafka 嵌入式服务器
- 创建你的测试主题(这里是输入和输出)
- 在 Future 中启动 Flink 作业以避免阻塞主线程
- 向输入主题发布消息
- 检查输出主题的结果
工作原型:
import java.util.Properties
import org.apache.flink.streaming.api.scala._
import net.manub.embeddedkafka.{EmbeddedKafka, EmbeddedKafkaConfig}
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.core.fs.FileSystem.WriteMode
import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer011, FlinkKafkaProducer011}
import org.scalatest.{Matchers, WordSpec}
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future
class SimpleFlinkKafkaTest extends WordSpec with Matchers with EmbeddedKafka {
"runs with embedded kafka on arbitrary available ports" should {
val env = StreamExecutionEnvironment.getExecutionEnvironment
"work" in {
val userDefinedConfig = EmbeddedKafkaConfig(kafkaPort = 9092, zooKeeperPort = 2182)
val properties = new Properties()
properties.setProperty("bootstrap.servers", "localhost:9092")
properties.setProperty("zookeeper.connect", "localhost:2182")
properties.setProperty("group.id", "test")
properties.setProperty("auto.offset.reset", "earliest")
val kafkaConsumer = new FlinkKafkaConsumer011[String]("input", new SimpleStringSchema(), properties)
val kafkaSink = new FlinkKafkaProducer011[String]("output", new SimpleStringSchema(), properties)
val stream = env
.addSource(kafkaConsumer)
.map(_.toUpperCase)
.addSink(kafkaSink)
withRunningKafkaOnFoundPort(userDefinedConfig) { implicit actualConfig =>
createCustomTopic("input")
createCustomTopic("output")
Future{env.execute()}
publishStringMessageToKafka("input", "Titi")
consumeFirstStringMessageFrom("output") shouldEqual "TITI"
}
}
}
}