可以统一处理MIN和MAX_SAFE_INTEGER全范围的randomInt函数

randomInt function that can uniformly handle the full range of MIN and MAX_SAFE_INTEGER

要求和背景

我想要一个通用的 randomInt 函数,它可以处理一系列值,包括 Number.MIN_SAFE_INTEGER to Number.MAX_SAFE_INTEGER and that the values returned are uniformly distributed.

所以,我开始在 MDN 上查看 Math.random 页面。他们给出了一个例子,看起来是均匀分布的。

// Returns a random integer between min (included) and max (excluded)
// Using Math.round() will give you a non-uniform distribution!
function getRandomInt(min, max) {
  return Math.floor(Math.random() * (max - min)) + min;
}

但它带有以下注释。

Note that as numbers in JavaScript are IEEE 754 floating point numbers with round-to-nearest-even behavior, the ranges claimed for the functions below (excluding the one for Math.random() itself) aren't exact. If extremely large bounds are chosen (2^53 or higher), it's possible in extremely rare cases to calculate the usually-excluded upper bound.

我想使用范围 -(2^53 - 1) 和 2^53 - 1,所以我认为这条注释不适用。然后我注意到 max - min:对于我指定的较大范围,这将是一个问题:

示例 - 最大范围

Number.MAX_SAFE_INTEGER - Number.MIN_SAFE_INTEGER > Number.MAX_SAFE_INTEGER

解决方案 1 - 不是解决方案

Off I go to have a little play 并根据 MDN 示例和我的要求得出以下代码。

Number.MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER || 9007199254740991;

Number.MIN_SAFE_INTEGER = Number.MIN_SAFE_INTEGER || -Number.MAX_SAFE_INTEGER;

Number.toInteger = Number.toInteger || function (inputArg) {
    var number = +inputArg,
        val = 0;

    if (number === number) {
        if (!number || number === Infinity || number === -Infinity) {
            val = number;
        } else {
            val = (number > 0 || -1) * Math.floor(Math.abs(number));
        }
    }

    return val;
};

function clampSafeInt(number) {
    return Math.min(Math.max(Number.toInteger(number), Number.MIN_SAFE_INTEGER), Number.MAX_SAFE_INTEGER);
}

// Returns a random integer between min (included) and max (included)
// Using Math.round() will give you a non-uniform distribution!
function randomInt(min, max) {
    var tmp,
        val;

    if (arguments.length === 1) {
        max = min;
        min = 0;
    }

    min = clampSafeInt(min);
    max = clampSafeInt(max);
    if (min > max) {
        tmp = min;
        min = max;
        max = tmp;
    }

    tmp = max - min + 1;
    if (tmp > Number.MAX_SAFE_INTEGER) {
        throw new RangeError('Difference of max and min is greater than Number.MAX_SAFE_INTEGER: ' + tmp);
    } else {
        val = Math.floor(Math.random() * tmp) + min;
    }
    
    return val;
}

console.log(randomInt(Number.MIN_SAFE_INTEGER, Number.MAX_SAFE_INTEGER));

但是如您所见,这将在我需要的更大范围之前引发错误。

解决方案 2 - 解决了数学问题,但似乎打破了一致性

所以我有一个 fiddle 并提出以下内容。

Number.MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER || 9007199254740991;

Number.MIN_SAFE_INTEGER = Number.MIN_SAFE_INTEGER || -Number.MAX_SAFE_INTEGER;

Number.toInteger = Number.toInteger || function (inputArg) {
    var number = +inputArg,
        val = 0;

    if (number === number) {
        if (!number || number === Infinity || number === -Infinity) {
            val = number;
        } else {
            val = (number > 0 || -1) * Math.floor(Math.abs(number));
        }
    }

    return val;
};

function clampSafeInt(number) {
    return Math.min(Math.max(Number.toInteger(number), Number.MIN_SAFE_INTEGER), Number.MAX_SAFE_INTEGER);
}

// Returns a random integer between min (included) and max (included)
// Using Math.round() will give you a non-uniform distribution!
function randomInt(min, max) {
    var tmp,
        val;

    if (arguments.length === 1) {
        max = min;
        min = 0;
    }

    min = clampSafeInt(min);
    max = clampSafeInt(max);
    if (min > max) {
        tmp = min;
        min = max;
        max = tmp;
    }

    tmp = max - min + 1;
    if (tmp > Number.MAX_SAFE_INTEGER) {
        if (Math.floor(Math.random() * 2)) {
            val = Math.floor(Math.random() * (max - 0 + 1)) + 0;
        } else {
            val = Math.floor(Math.random() * (0 - min + 1)) + min;
        }
    } else {
        val = Math.floor(Math.random() * tmp) + min;
    }
    
    return val;
}

console.log(randomInt(Number.MIN_SAFE_INTEGER, Number.MAX_SAFE_INTEGER));

虽然我们不再抛出错误并且数学似乎在最大安全整数值范围内,但我不确定这究竟是如何影响原始 MDN 示例的均匀分布(如果它是均匀分布的)?

我的测试似乎表明这打破了均匀分布。

分布图

function getData() {
  var x = {},
    c = 1000000,
    min = -20,
    max = 20,
    q,
    i;

  for (i = 0; i < c; i += 1) {
    if (Math.floor(Math.random() * 2)) {
      q = Math.floor(Math.random() * (max - 0 + 1)) + 0;
    } else {
      q = Math.floor(Math.random() * (1 - min + 1)) + min;
    }

    if (!x[q]) {
      x[q] = 1;
    } else {
      x[q] += 1;
    }
  };

  return Object.keys(x).sort(function(x, y) {
    return x - y;
  }).map(function(key, index) {
    return {
      'q': +key,
      'p': (x[key] / c) * 100
    };
  });
}

var data = getData(),
  margin = {
    top: 20,
    right: 20,
    bottom: 30,
    left: 50
  },
  width = 960 - margin.left - margin.right,
  height = 500 - margin.top - margin.bottom,
  x = d3.scale.linear().range([0, width]),
  y = d3.scale.linear().range([height, 0]),
  xAxis = d3.svg.axis().scale(x).orient("bottom"),
  yAxis = d3.svg.axis().scale(y).orient("left"),
  line = d3.svg.line().x(function(d) {
    return x(d.q);
  }).y(function(d) {
    return y(d.p);
  }),
  svg = d3.select("body").append("svg")
  .attr("width", width + margin.left + margin.right)
  .attr("height", height + margin.top + margin.bottom)
  .append("g")
  .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

x.domain(d3.extent(data, function(d) {
  return d.q;
}));

y.domain(d3.extent(data, function(d) {
  return d.p;
}));

svg.append("g")
  .attr("class", "x axis")
  .attr("transform", "translate(0," + height + ")")
  .call(xAxis);

svg.append("g")
  .attr("class", "y axis")
  .call(yAxis);

svg.append("path")
  .datum(data)
  .attr("class", "line")
  .attr("d", line);
body {
  font: 10px sans-serif;
}
.axis path,
.axis line {
  fill: none;
  stroke: #000;
  shape-rendering: crispEdges;
}
.line {
  fill: none;
  stroke: steelblue;
  stroke-width: 1.5px;
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.4.11/d3.min.js"></script>

解决方案 3 - 不是解决方案

所以我按下并查看创建 Box-Muller Transform function for creating the random normally distributed range that I thought I required (but my mistake I wanted uniformly distributed). I did some reading and chose rejection sampling as the method to generate observations from a distribution. Found out how to calculate the deviation for a range without having to use Math.sqrt

If the value of x is negative, Math.sqrt() returns NaN

这是我想出来的。

Number.MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER || 9007199254740991;

Number.MIN_SAFE_INTEGER = Number.MIN_SAFE_INTEGER || -Number.MAX_SAFE_INTEGER;

Number.toInteger = Number.toInteger || function (inputArg) {
    var number = +inputArg,
        val = 0;

    if (number === number) {
        if (!number || number === Infinity || number === -Infinity) {
            val = number;
        } else {
            val = (number > 0 || -1) * Math.floor(Math.abs(number));
        }
    }

    return val;
};

function clampSafeInt(number) {
    return Math.min(Math.max(Number.toInteger(number), Number.MIN_SAFE_INTEGER), Number.MAX_SAFE_INTEGER);
}

var boxMullerRandom = (function () {
    var phase = 0,
        RAND_MAX,
        array,
        random,
        x1, x2, w, z;

    if (crypto && crypto.getRandomValues) {
        RAND_MAX = Math.pow(2, 32) - 1;
        array = new Uint32Array(1);
        random = function () {
            crypto.getRandomValues(array);

            return array[0] / RAND_MAX;
        };
    } else {
        random = Math.random;
    }

    return function () {
        if (!phase) {
            do {
                x1 = 2.0 * random() - 1.0;
                x2 = 2.0 * random() - 1.0;
                w = x1 * x1 + x2 * x2;
            } while (w >= 1.0);

            w = Math.sqrt((-2.0 * Math.log(w)) / w);
            z = x1 * w;
        } else {
            z = x2 * w;
        }

        phase ^= 1;

        return z;
    }
}());

function rejectionSample(stdev, mean, from, to) {
    var retVal;
    
    do {
        retVal = (boxMullerRandom() * stdev) + mean;
    } while (retVal < from || to < retVal);

    return retVal;
}

function randomInt(min, max) {
    var tmp,
        val;

    if (arguments.length === 1) {
        max = min;
        min = 0;
    }

    min = clampSafeInt(min);
    max = clampSafeInt(max);
    if (min > max) {
        tmp = min;
        min = max;
        max = tmp;
    }

    tmp = {};
    tmp.mean = (min / 2) + (max / 2);
    tmp.variance = (Math.pow(min - tmp.mean, 2) + Math.pow(max - tmp.mean, 2)) / 2;
    tmp.deviation = Math.sqrt(tmp.variance);
    console.log(tmp);
    return Math.floor(rejectionSample(tmp.deviation, tmp.mean, min, max + 1));
}

console.log(randomInt(Number.MIN_SAFE_INTEGER, Number.MAX_SAFE_INTEGER));

不确定我是否已正确完成所有操作(没有破坏正态分布),但在较小的整数范围内,我看到了生成的随机整数的正确范围。

但是当我使用范围的最大限制(或实际在这些限制之前)时仍然存在问题。数学仍然超出 Number.MAX_SAFE_INTEGER 值。上面的输出 console.log(tmp);

{mean: 0, variance: 8.112963841460666e+31, deviation: 9007199254740991} 

如您所见,计算出的 variance 并不安全。由于我对分布类型的混淆,这个算法可以忽略。

分布图

我已经包含了这个,这样你就可以看到我实际上非常接近将它作为正态分布工作,即使这不是我实际需要的。它可能会帮助希望执行此类分发的人。

Number.MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER || 9007199254740991;

Number.MIN_SAFE_INTEGER = Number.MIN_SAFE_INTEGER || -Number.MAX_SAFE_INTEGER;

Number.toInteger = Number.toInteger || function(inputArg) {
  var number = +inputArg,
    val = 0;

  if (number === number) {
    if (!number || number === Infinity || number === -Infinity) {
      val = number;
    } else {
      val = (number > 0 || -1) * Math.floor(Math.abs(number));
    }
  }

  return val;
};

function clampSafeInt(number) {
  return Math.min(Math.max(Number.toInteger(number), Number.MIN_SAFE_INTEGER), Number.MAX_SAFE_INTEGER);
}

var boxMullerRandom = (function() {
  var phase = 0,
    RAND_MAX,
    array,
    random,
    x1, x2, w, z;

  if (crypto && crypto.getRandomValues) {
    RAND_MAX = Math.pow(2, 32) - 1;
    array = new Uint32Array(1);
    random = function() {
      crypto.getRandomValues(array);

      return array[0] / RAND_MAX;
    };
  } else {
    random = Math.random;
  }

  return function() {
    if (!phase) {
      do {
        x1 = 2.0 * random() - 1.0;
        x2 = 2.0 * random() - 1.0;
        w = x1 * x1 + x2 * x2;
      } while (w >= 1.0);

      w = Math.sqrt((-2.0 * Math.log(w)) / w);
      z = x1 * w;
    } else {
      z = x2 * w;
    }

    phase ^= 1;

    return z;
  }
}());

function rejectionSample(stdev, mean, from, to) {
  var retVal;

  do {
    retVal = (boxMullerRandom() * stdev) + mean;
  } while (retVal < from || to < retVal);

  return retVal;
}

function randomInt(min, max) {
  var tmp,
    val;

  if (arguments.length === 1) {
    max = min;
    min = 0;
  }

  min = clampSafeInt(min);
  max = clampSafeInt(max);
  if (min > max) {
    tmp = min;
    min = max;
    max = tmp;
  }

  tmp = {};
  tmp.mean = (min / 2) + (max / 2);
  tmp.variance = (Math.pow(min - tmp.mean, 2) + Math.pow(max - tmp.mean, 2)) / 2;
  tmp.deviation = Math.sqrt(tmp.variance);

  return Math.floor(rejectionSample(tmp.deviation, tmp.mean, min, max + 1));
}

function getData() {
  var x = {},
    c = 1000000,
    q,
    i;

  for (i = 0; i < c; i += 1) {
    q = randomInt(-9, 3);
    if (!x[q]) {
      x[q] = 1;
    } else {
      x[q] += 1;
    }
  };

  return Object.keys(x).sort(function(x, y) {
    return x - y;
  }).map(function(key) {
    return {
      'q': +key,
      'p': x[key] / c
    };
  });
}

var data = getData(),
  margin = {
    top: 20,
    right: 20,
    bottom: 30,
    left: 50
  },
  width = 960 - margin.left - margin.right,
  height = 500 - margin.top - margin.bottom,
  x = d3.scale.linear().range([0, width]),
  y = d3.scale.linear().range([height, 0]),
  xAxis = d3.svg.axis().scale(x).orient("bottom"),
  yAxis = d3.svg.axis().scale(y).orient("left"),
  line = d3.svg.line().x(function(d) {
    return x(d.q);
  }).y(function(d) {
    return y(d.p);
  }),
  svg = d3.select("body").append("svg")
  .attr("width", width + margin.left + margin.right)
  .attr("height", height + margin.top + margin.bottom)
  .append("g")
  .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

x.domain(d3.extent(data, function(d) {
  return d.q;
}));

y.domain(d3.extent(data, function(d) {
  return d.p;
}));

svg.append("g")
  .attr("class", "x axis")
  .attr("transform", "translate(0," + height + ")")
  .call(xAxis);

svg.append("g")
  .attr("class", "y axis")
  .call(yAxis);

svg.append("path")
  .datum(data)
  .attr("class", "line")
  .attr("d", line);
body {
  font: 10px sans-serif;
}
.axis path,
.axis line {
  fill: none;
  stroke: #000;
  shape-rendering: crispEdges;
}
.line {
  fill: none;
  stroke: steelblue;
  stroke-width: 1.5px;
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.4.11/d3.min.js"></script>

有什么解决办法?

所以,我错过了什么?是否有一些我忽略的简单方法?我必须使用大数字库作为解决方案吗?如何测试分布:我有一些我正在绘制的图表,这对于小范围来说很好,但更大的范围是不可能的?

请让我摆脱这件事的痛苦。 :)

为什么不调用 Math.random() 两次,每次使用 26 位?这在它可以产生良好均匀性的范围内是相当安全的。

function random53() {
    var hi, lo, sign = 0;

    while (true) {
        hi = Math.floor(67108864 * Math.random());
        lo = Math.floor(67108864 * Math.random());

        if (hi >= 33554432) {
            sign = -1;
            hi -= 33554432;

            // Gotta throw out negative zero!
            if (0 === hi && 0 === lo) { continue; }
        }
        return sign * (hi * 67108864) + lo;
    }
}

一个可行的解决方案,我采用的方法是使用 BigNumber 库。我仍然觉得必须有一个解决这个问题而不需要依赖BigNumber库的方法,但我还没有找到任何其他方法。

console.log(window);
Number.MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER || 9007199254740991;

Number.MIN_SAFE_INTEGER = Number.MIN_SAFE_INTEGER || -Number.MAX_SAFE_INTEGER;

Number.toInteger = Number.toInteger || function (inputArg) {
    var number = +inputArg,
        val = 0;

    if (number === number) {
        if (!number || number === Infinity || number === -Infinity) {
            val = number;
        } else {
            val = (number > 0 || -1) * Math.floor(Math.abs(number));
        }
    }

    return val;
};

function clampSafeInt(number) {
    return Math.min(Math.max(Number.toInteger(number), Number.MIN_SAFE_INTEGER), Number.MAX_SAFE_INTEGER);
}

// Returns a random integer between min (included) and max (included)
// Using Math.round() will give you a non-uniform distribution!
function randomInt(min, max) {
    var tmp,
    val;

    if (arguments.length === 1) {
        max = min;
        min = 0;
    }

    min = clampSafeInt(min);
    max = clampSafeInt(max);
    if (min > max) {
        tmp = min;
        min = max;
        max = tmp;
    }

    tmp = max - min + 1;
    if (tmp > Number.MAX_SAFE_INTEGER) {
        tmp = new Big(max).minus(min).plus(1);
        val = Math.floor(tmp.times(Math.random())) + min;
    } else {
        val = Math.floor(Math.random() * tmp) + min;
    }

    return val;
}

console.log(randomInt(Number.MIN_SAFE_INTEGER, Number.MAX_SAFE_INTEGER));
<script src="https://rawgithub.com/MikeMcl/big.js/master/big.min.js"></script>

对于遇到这个老问题的其他任何人:有一种方法可以通过完全避免将 Math.random() 的 return 值解释为浮点数来完全回避 OP 描述的数字边界问题类型:

const all64RandomBits = Float64Array.of(Math.random()).buffer

上面的行只是获取类型数组类型下的二进制缓冲区,该类型数组具有与 JavaScript Number 类型相同的 IEEE 754 编码:a Float64Array.

获得此 BufferArray 后,您可以通过选择适当的整数视图 and/or 根据需要应用按位 AND、OR 和移位来获取所需的任意数量的随机位。例如,想要将所有 64 个随机位转换为 2 个数字,每个数字的无符号整数值在 0 - UINT32_MAX 范围内?你只需要这个可爱的小单线:

const [x, y] = new Uint32Array(Float64Array.of(Math.random()).buffer)