在 scalacheck 中生成递归结构
Generating recursive structures in scalacheck
我正在尝试为名为 Row
的递归数据类型创建一个生成器。一行是命名 Val
的列表,其中 Val
是原子 Bin
或嵌套 Row
.
这是我的代码:
package com.dtci.data.anonymize.parquet
import java.nio.charset.StandardCharsets
import org.scalacheck.Gen
object TestApp extends App {
sealed trait Val
case class Bin(bytes: Array[Byte]) extends Val
object Bin {
def from_string(str: String): Bin = Bin(str.getBytes(StandardCharsets.UTF_8))
}
case class Row(flds: List[(String, Val)]) extends Val
val gen_bin = Gen.alphaStr.map(Bin.from_string)
val gen_field_name = Gen.alphaLowerStr
val gen_field = Gen.zip(gen_field_name, gen_val)
val gen_row = Gen.nonEmptyListOf(gen_field).map(Row.apply)
def gen_val: Gen[Val] = Gen.oneOf(gen_bin, gen_row)
gen_row.sample.get.flds.foreach( fld => println(s"${fld._1} --> ${fld._2}"))
}
它因以下堆栈跟踪而崩溃:
Exception in thread "main" java.lang.NullPointerException
at org.scalacheck.Gen.$anonfun$flatMap(Gen.scala:84)
at org.scalacheck.Gen$R.flatMap(Gen.scala:243)
at org.scalacheck.Gen$R.flatMap$(Gen.scala:240)
at org.scalacheck.Gen$R$$anon.flatMap(Gen.scala:228)
at org.scalacheck.Gen.$anonfun$flatMap(Gen.scala:84)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:255)
at org.scalacheck.Gen$$anon.$anonfun$doApply(Gen.scala:110)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:109)
at org.scalacheck.Gen.$anonfun$map(Gen.scala:79)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:255)
at org.scalacheck.Gen.$anonfun$flatMap(Gen.scala:84)
at org.scalacheck.Gen$R.flatMap(Gen.scala:243)
at org.scalacheck.Gen$R.flatMap$(Gen.scala:240)
at org.scalacheck.Gen$R$$anon.flatMap(Gen.scala:228)
at org.scalacheck.Gen.$anonfun$flatMap(Gen.scala:84)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:255)
at org.scalacheck.Gen$$anon.$anonfun$doApply(Gen.scala:110)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:109)
at org.scalacheck.Gen$.$anonfun$sequence(Gen.scala:492)
at scala.collection.LinearSeqOps.foldLeft(LinearSeq.scala:168)
at scala.collection.LinearSeqOps.foldLeft$(LinearSeq.scala:164)
at scala.collection.immutable.List.foldLeft(List.scala:79)
at org.scalacheck.Gen$.$anonfun$sequence(Gen.scala:490)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:255)
at org.scalacheck.Gen.$anonfun$map(Gen.scala:79)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:255)
at org.scalacheck.Gen$$anon.$anonfun$doApply(Gen.scala:110)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:109)
at org.scalacheck.Gen.$anonfun$flatMap(Gen.scala:84)
at org.scalacheck.Gen$R.flatMap(Gen.scala:243)
at org.scalacheck.Gen$R.flatMap$(Gen.scala:240)
at org.scalacheck.Gen$R$$anon.flatMap(Gen.scala:228)
at org.scalacheck.Gen.$anonfun$flatMap(Gen.scala:84)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:255)
at org.scalacheck.Gen$.$anonfun$sized(Gen.scala:551)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:255)
at org.scalacheck.Gen$$anon.$anonfun$doApply(Gen.scala:110)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:109)
at org.scalacheck.Gen.$anonfun$map(Gen.scala:79)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:255)
at org.scalacheck.Gen.sample(Gen.scala:154)
我的代码有什么问题,我自己诊断它的最佳方法是什么?
请注意,我已经看到关于 Gen.oneOf
严格并且需要 Gen.lzy
递归结构的评论。但是,如果在我的代码中,我将 gen_val
的定义包装在 Gen.lzy(...)
中,那么我会得到一个堆栈溢出,而不是当前的空指针异常。
首先要小心使用object Main extends App
。我发现它的字段初始化语义不如具有逐行语义的普通旧 main
明显:
object Main {
def main(args: Array[String]): Unit = {...}
}
这可能是 NullPointerException
的问题。
通常,可以通过仔细检查字段初始化顺序并将部分(或全部)val
标记为 lazy
。
来解决此问题
WhosebugError
是因为生成的数据结构太深
通常,当您处理任何类型的递归时,请始终考虑基本情况递归应该停止的时间和步骤最终将达到 基本情况 .
在您的特定情况下,我们可以利用 Gen.sized
和 Gen.resize
,它们负责生成元素的“大”程度(查看文档和 google 以获取更多信息)。
package com.dtci.data.anonymize.parquet
import java.nio.charset.StandardCharsets
import org.scalacheck.Gen
object Main extends App {
sealed trait Val
case class Bin(bytes: Array[Byte]) extends Val
object Bin {
def from_string(str: String): Bin = Bin(str.getBytes(StandardCharsets.UTF_8))
}
case class Row(flds: List[(String, Val)]) extends Val
val gen_bin = Gen.alphaStr.map(Bin.from_string)
val gen_field_name = Gen.alphaLowerStr
val gen_field = Gen.zip(gen_field_name, gen_val)
val gen_row = Gen.sized(size => Gen.resize(size / 2, Gen.nonEmptyListOf(gen_field).map(Row.apply)))
def gen_val: Gen[Val] = Gen.sized { size =>
if (size <= 0) {
gen_bin
} else {
Gen.oneOf(gen_bin, gen_row)
}
}
gen_row.sample.get.flds.foreach(fld => println(s"${fld._1} --> ${fld._2}"))
}
我正在尝试为名为 Row
的递归数据类型创建一个生成器。一行是命名 Val
的列表,其中 Val
是原子 Bin
或嵌套 Row
.
这是我的代码:
package com.dtci.data.anonymize.parquet
import java.nio.charset.StandardCharsets
import org.scalacheck.Gen
object TestApp extends App {
sealed trait Val
case class Bin(bytes: Array[Byte]) extends Val
object Bin {
def from_string(str: String): Bin = Bin(str.getBytes(StandardCharsets.UTF_8))
}
case class Row(flds: List[(String, Val)]) extends Val
val gen_bin = Gen.alphaStr.map(Bin.from_string)
val gen_field_name = Gen.alphaLowerStr
val gen_field = Gen.zip(gen_field_name, gen_val)
val gen_row = Gen.nonEmptyListOf(gen_field).map(Row.apply)
def gen_val: Gen[Val] = Gen.oneOf(gen_bin, gen_row)
gen_row.sample.get.flds.foreach( fld => println(s"${fld._1} --> ${fld._2}"))
}
它因以下堆栈跟踪而崩溃:
Exception in thread "main" java.lang.NullPointerException
at org.scalacheck.Gen.$anonfun$flatMap(Gen.scala:84)
at org.scalacheck.Gen$R.flatMap(Gen.scala:243)
at org.scalacheck.Gen$R.flatMap$(Gen.scala:240)
at org.scalacheck.Gen$R$$anon.flatMap(Gen.scala:228)
at org.scalacheck.Gen.$anonfun$flatMap(Gen.scala:84)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:255)
at org.scalacheck.Gen$$anon.$anonfun$doApply(Gen.scala:110)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:109)
at org.scalacheck.Gen.$anonfun$map(Gen.scala:79)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:255)
at org.scalacheck.Gen.$anonfun$flatMap(Gen.scala:84)
at org.scalacheck.Gen$R.flatMap(Gen.scala:243)
at org.scalacheck.Gen$R.flatMap$(Gen.scala:240)
at org.scalacheck.Gen$R$$anon.flatMap(Gen.scala:228)
at org.scalacheck.Gen.$anonfun$flatMap(Gen.scala:84)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:255)
at org.scalacheck.Gen$$anon.$anonfun$doApply(Gen.scala:110)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:109)
at org.scalacheck.Gen$.$anonfun$sequence(Gen.scala:492)
at scala.collection.LinearSeqOps.foldLeft(LinearSeq.scala:168)
at scala.collection.LinearSeqOps.foldLeft$(LinearSeq.scala:164)
at scala.collection.immutable.List.foldLeft(List.scala:79)
at org.scalacheck.Gen$.$anonfun$sequence(Gen.scala:490)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:255)
at org.scalacheck.Gen.$anonfun$map(Gen.scala:79)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:255)
at org.scalacheck.Gen$$anon.$anonfun$doApply(Gen.scala:110)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:109)
at org.scalacheck.Gen.$anonfun$flatMap(Gen.scala:84)
at org.scalacheck.Gen$R.flatMap(Gen.scala:243)
at org.scalacheck.Gen$R.flatMap$(Gen.scala:240)
at org.scalacheck.Gen$R$$anon.flatMap(Gen.scala:228)
at org.scalacheck.Gen.$anonfun$flatMap(Gen.scala:84)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:255)
at org.scalacheck.Gen$.$anonfun$sized(Gen.scala:551)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:255)
at org.scalacheck.Gen$$anon.$anonfun$doApply(Gen.scala:110)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:109)
at org.scalacheck.Gen.$anonfun$map(Gen.scala:79)
at org.scalacheck.Gen$Parameters.useInitialSeed(Gen.scala:318)
at org.scalacheck.Gen$$anon.doApply(Gen.scala:255)
at org.scalacheck.Gen.sample(Gen.scala:154)
我的代码有什么问题,我自己诊断它的最佳方法是什么?
请注意,我已经看到关于 Gen.oneOf
严格并且需要 Gen.lzy
递归结构的评论。但是,如果在我的代码中,我将 gen_val
的定义包装在 Gen.lzy(...)
中,那么我会得到一个堆栈溢出,而不是当前的空指针异常。
首先要小心使用object Main extends App
。我发现它的字段初始化语义不如具有逐行语义的普通旧 main
明显:
object Main {
def main(args: Array[String]): Unit = {...}
}
这可能是 NullPointerException
的问题。
通常,可以通过仔细检查字段初始化顺序并将部分(或全部)val
标记为 lazy
。
WhosebugError
是因为生成的数据结构太深
通常,当您处理任何类型的递归时,请始终考虑基本情况递归应该停止的时间和步骤最终将达到 基本情况 .
在您的特定情况下,我们可以利用 Gen.sized
和 Gen.resize
,它们负责生成元素的“大”程度(查看文档和 google 以获取更多信息)。
package com.dtci.data.anonymize.parquet
import java.nio.charset.StandardCharsets
import org.scalacheck.Gen
object Main extends App {
sealed trait Val
case class Bin(bytes: Array[Byte]) extends Val
object Bin {
def from_string(str: String): Bin = Bin(str.getBytes(StandardCharsets.UTF_8))
}
case class Row(flds: List[(String, Val)]) extends Val
val gen_bin = Gen.alphaStr.map(Bin.from_string)
val gen_field_name = Gen.alphaLowerStr
val gen_field = Gen.zip(gen_field_name, gen_val)
val gen_row = Gen.sized(size => Gen.resize(size / 2, Gen.nonEmptyListOf(gen_field).map(Row.apply)))
def gen_val: Gen[Val] = Gen.sized { size =>
if (size <= 0) {
gen_bin
} else {
Gen.oneOf(gen_bin, gen_row)
}
}
gen_row.sample.get.flds.foreach(fld => println(s"${fld._1} --> ${fld._2}"))
}