均匀分布的比特序列
Uniformly distributed bit sequence
假设您有一个规则的数字生成器,它能够生成均匀分布的随机 32 位数字。并假设您正在寻找一种生成伪随机位序列的方法,其中 1(即设置的位)以预定义的概率出现在序列中。
生成这种序列的一种简单方法是 运行 每个位级别的数字生成器,但对于小概率(例如序列中 0.01 或 1% 的位,大多数位将为零)来说效率非常低。平均只有百分之一会被设置。另一方面,即使概率如此之低,也有机会遇到超出 8、16、32、64 位的长连续子序列。
问题是如何使用常规 PRNG 有效地生成这样的序列。
编辑
Peter O 建议的 javascript 中有理伯努利变量抽样的玩具实现:
// Based on
// https://arxiv.org/abs/1304.1916
// https://arxiv.org/pdf/1304.1916.pdf (page 21, figure 6)
class Xor128 {
constructor(x, y, z, w) {
this.x = x;
this.y = y;
this.z = z;
this.w = w;
}
prev() {
var t = this.w ^ this.z ^ (this.z >>> 19);
t ^= t >>> 8;
t ^= t >>> 16;
this.w = this.z;
this.z = this.y;
this.y = this.x;
t ^= t << 11;
t ^= t << 22;
this.x = t;
return this.w;
}
curr() {
return this.w;
}
next() {
var t = this.x ^ (this.x << 11);
this.x = this.y;
this.y = this.z;
this.z = this.w;
return this.w = this.w ^ (this.w >>> 19) ^ (t ^ (t >>> 8));
}
}
function* flip(xor128) {
while (true) {
var value = xor128.next();
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
}
}
function* bernoulli(flip, k, n) {
var b;
var v = k
for (const bit of flip) {
v <<= 1;
if (v >= n) {
v -= n;
b = 1;
} else {
b = 0;
}
if (bit === 1) {
yield b;
v = k;
}
}
}
var xor128 = new Xor128(1, 2, 3, 4);
var z = 0, o = 0;
var then = Date.now();
for (const value of bernoulli(flip(xor128), 5, 1000)) {
if (value === 0) {
z++;
} else {
o++;
}
if (Date.now() - then > 1000) {
console.log(`${z} ${o}`);
}
}
// Pieces of code to test out xor128:
//
// for (let index = 0; index < 100; index++) {
// console.log(xor128.curr())
// xor128.next();
// }
// console.log('-----------------------------------')
// for (let index = 0; index < 100; index++) {
// xor128.prev();
// console.log(xor128.curr())
// }
另一个编辑
下面的代码是用 C# 实现的,每秒产生 9120 万比特,打包成 UInt32 数据类型 (MacBookPro 2019 Core I9 2.4 Ghz)。我认为在 C 中有可能获得超过 1 亿位,而且感觉有可能进一步利用二进制算法并行生成所有 32 位随机数,一些循环展开或者 SIMD 不确定,无论如何这里是代码:
public class Bernoulli
{
public UInt32 X { get; set; }
public UInt32 Y { get; set; }
public UInt32 Z { get; set; }
public UInt32 W { get; set; }
public Bernoulli()
: this(Guid.NewGuid())
{
}
public Bernoulli(Guid guid)
{
var index = 0;
var bytes = guid.ToByteArray();
X = (UInt32)((bytes[index++] << 24) | (bytes[index++] << 16) | (bytes[index++] << 8) | bytes[index++]);
Y = (UInt32)((bytes[index++] << 24) | (bytes[index++] << 16) | (bytes[index++] << 8) | bytes[index++]);
Z = (UInt32)((bytes[index++] << 24) | (bytes[index++] << 16) | (bytes[index++] << 8) | bytes[index++]);
W = (UInt32)((bytes[index++] << 24) | (bytes[index++] << 16) | (bytes[index++] << 8) | bytes[index++]);
}
public Bernoulli(UInt32 x, UInt32 y, UInt32 z, UInt32 w)
{
X = x;
Y = y;
Z = z;
W = w;
}
UInt64 bits = 0;
UInt32 bitsCount = 0;
public UInt32 Next(UInt32 k, UInt32 n)
{
UInt32 b;
var c = 0;
var v = k;
var r = 0u;
// ------------------------
do
{
while (bitsCount <= 32)
{
b = X ^ (X << 11);
X = Y;
Y = Z;
Z = W;
bits <<= 32;
bits |= ((UInt64)(W = W ^ (W >> 19) ^ (b ^ (b >> 8))));
bitsCount += 32;
}
while (c < 32 && 0 < bitsCount)
{
v <<= 1;
// Two lines of code below is a two step optimization:
// First we optimize the following statement:
//
// if (v >= n)
// {
// v -= n;
// b = 1;
// }
// else
// {
// b = 0;
// }
//
// into the following:
//
// var b = v < n ? 0u : 1u;
// v -= b * n
//
// thus reducing branching, but we would like also to omit
// multiplication, which we can do through:
b = v < n ? 0u : 0xFFFFFFFFu;
v -= b & n;
if ((bits & 1) == 1)
{
r |= b & 1;
r <<= 1;
v = k;
c++;
}
bits >>= 1;
bitsCount--;
}
} while (c < 32);
return r;
}
}
这个问题可以重述为:
- 生成区间[0,N]内的随机整数。
- 如果整数为 0,则输出 1,否则为 0。
有多种方法可以generate random integers in a range from a random bit stream. Of these, J. Lumbroso showed an optimal way to solve this problem given a random bit stream ("Optimal Discrete Uniform Generation from Coin Flips, and Applications", 2013). (However, Appendix B of that paper also points out a solution to your direct problem: generating 0 or 1 with a given probability.) Other ways include those mentioned in "Efficiently Generating a Random Number in a Range" as well as a brand-new algorithm, the "Fast Loaded Dice Roller"。
另一种可能的方法。假设您想要 5% 的 1 位和 95% 的 0 位:
bitArray = [1, 1, 1, 1, 1, 0, ... 0, 0]; // 95 0s
bitArray.shuffle();
从打乱的 bitArray
中拉出你想要的位。如果需要,您可以使用 32 位 RNG 创建 shuffle()
方法。
假设您有一个规则的数字生成器,它能够生成均匀分布的随机 32 位数字。并假设您正在寻找一种生成伪随机位序列的方法,其中 1(即设置的位)以预定义的概率出现在序列中。
生成这种序列的一种简单方法是 运行 每个位级别的数字生成器,但对于小概率(例如序列中 0.01 或 1% 的位,大多数位将为零)来说效率非常低。平均只有百分之一会被设置。另一方面,即使概率如此之低,也有机会遇到超出 8、16、32、64 位的长连续子序列。
问题是如何使用常规 PRNG 有效地生成这样的序列。
编辑
Peter O 建议的 javascript 中有理伯努利变量抽样的玩具实现:
// Based on
// https://arxiv.org/abs/1304.1916
// https://arxiv.org/pdf/1304.1916.pdf (page 21, figure 6)
class Xor128 {
constructor(x, y, z, w) {
this.x = x;
this.y = y;
this.z = z;
this.w = w;
}
prev() {
var t = this.w ^ this.z ^ (this.z >>> 19);
t ^= t >>> 8;
t ^= t >>> 16;
this.w = this.z;
this.z = this.y;
this.y = this.x;
t ^= t << 11;
t ^= t << 22;
this.x = t;
return this.w;
}
curr() {
return this.w;
}
next() {
var t = this.x ^ (this.x << 11);
this.x = this.y;
this.y = this.z;
this.z = this.w;
return this.w = this.w ^ (this.w >>> 19) ^ (t ^ (t >>> 8));
}
}
function* flip(xor128) {
while (true) {
var value = xor128.next();
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
yield value & 1; value >>>= 1;
}
}
function* bernoulli(flip, k, n) {
var b;
var v = k
for (const bit of flip) {
v <<= 1;
if (v >= n) {
v -= n;
b = 1;
} else {
b = 0;
}
if (bit === 1) {
yield b;
v = k;
}
}
}
var xor128 = new Xor128(1, 2, 3, 4);
var z = 0, o = 0;
var then = Date.now();
for (const value of bernoulli(flip(xor128), 5, 1000)) {
if (value === 0) {
z++;
} else {
o++;
}
if (Date.now() - then > 1000) {
console.log(`${z} ${o}`);
}
}
// Pieces of code to test out xor128:
//
// for (let index = 0; index < 100; index++) {
// console.log(xor128.curr())
// xor128.next();
// }
// console.log('-----------------------------------')
// for (let index = 0; index < 100; index++) {
// xor128.prev();
// console.log(xor128.curr())
// }
另一个编辑
下面的代码是用 C# 实现的,每秒产生 9120 万比特,打包成 UInt32 数据类型 (MacBookPro 2019 Core I9 2.4 Ghz)。我认为在 C 中有可能获得超过 1 亿位,而且感觉有可能进一步利用二进制算法并行生成所有 32 位随机数,一些循环展开或者 SIMD 不确定,无论如何这里是代码:
public class Bernoulli
{
public UInt32 X { get; set; }
public UInt32 Y { get; set; }
public UInt32 Z { get; set; }
public UInt32 W { get; set; }
public Bernoulli()
: this(Guid.NewGuid())
{
}
public Bernoulli(Guid guid)
{
var index = 0;
var bytes = guid.ToByteArray();
X = (UInt32)((bytes[index++] << 24) | (bytes[index++] << 16) | (bytes[index++] << 8) | bytes[index++]);
Y = (UInt32)((bytes[index++] << 24) | (bytes[index++] << 16) | (bytes[index++] << 8) | bytes[index++]);
Z = (UInt32)((bytes[index++] << 24) | (bytes[index++] << 16) | (bytes[index++] << 8) | bytes[index++]);
W = (UInt32)((bytes[index++] << 24) | (bytes[index++] << 16) | (bytes[index++] << 8) | bytes[index++]);
}
public Bernoulli(UInt32 x, UInt32 y, UInt32 z, UInt32 w)
{
X = x;
Y = y;
Z = z;
W = w;
}
UInt64 bits = 0;
UInt32 bitsCount = 0;
public UInt32 Next(UInt32 k, UInt32 n)
{
UInt32 b;
var c = 0;
var v = k;
var r = 0u;
// ------------------------
do
{
while (bitsCount <= 32)
{
b = X ^ (X << 11);
X = Y;
Y = Z;
Z = W;
bits <<= 32;
bits |= ((UInt64)(W = W ^ (W >> 19) ^ (b ^ (b >> 8))));
bitsCount += 32;
}
while (c < 32 && 0 < bitsCount)
{
v <<= 1;
// Two lines of code below is a two step optimization:
// First we optimize the following statement:
//
// if (v >= n)
// {
// v -= n;
// b = 1;
// }
// else
// {
// b = 0;
// }
//
// into the following:
//
// var b = v < n ? 0u : 1u;
// v -= b * n
//
// thus reducing branching, but we would like also to omit
// multiplication, which we can do through:
b = v < n ? 0u : 0xFFFFFFFFu;
v -= b & n;
if ((bits & 1) == 1)
{
r |= b & 1;
r <<= 1;
v = k;
c++;
}
bits >>= 1;
bitsCount--;
}
} while (c < 32);
return r;
}
}
这个问题可以重述为:
- 生成区间[0,N]内的随机整数。
- 如果整数为 0,则输出 1,否则为 0。
有多种方法可以generate random integers in a range from a random bit stream. Of these, J. Lumbroso showed an optimal way to solve this problem given a random bit stream ("Optimal Discrete Uniform Generation from Coin Flips, and Applications", 2013). (However, Appendix B of that paper also points out a solution to your direct problem: generating 0 or 1 with a given probability.) Other ways include those mentioned in "Efficiently Generating a Random Number in a Range" as well as a brand-new algorithm, the "Fast Loaded Dice Roller"。
另一种可能的方法。假设您想要 5% 的 1 位和 95% 的 0 位:
bitArray = [1, 1, 1, 1, 1, 0, ... 0, 0]; // 95 0s
bitArray.shuffle();
从打乱的 bitArray
中拉出你想要的位。如果需要,您可以使用 32 位 RNG 创建 shuffle()
方法。