生日悖论
Birthday paradox
我想模拟java中的生日悖论。出于某种原因,我的输出(概率)一直非常接近 1,例如模拟(10)->0,9268。一开始你可以看到我的模拟应该接近的概率。很长一段时间以来,我一直在寻找我的代码中的错误,所以我希望你们中的任何人都能帮助我。我查找了生日悖论的其他代码,但其中 none 似乎能够帮助我处理奇怪的输出。
p.s。你可以忽略//TODO,一旦我得到代码和运行,我就会修复它。
谢谢提前!
static final int DAYS_IN_YEAR = 365;
static final int NUMBER_OF_SIMULATIONS = 500;
LCG random;
HashSet<Integer> birthdaySet = new HashSet<Integer>();
BirthdayProblem2(){
birthdaySet.clear();
random = new LCG(); //random numbers between 0 and 1
}
int generateBirthday(){ //generates random birthday
return (int) Math.round(Math.random()*DAYS_IN_YEAR); //TODO use LGC
}
double iteration(int numberOfStudents){ //one iteration from the simulation
int sameBirthdays = 0;
for (int i = 0; i < numberOfStudents; i++){
int bd = generateBirthday();
if (birthdaySet.contains(bd)){
sameBirthdays++;
} else {
birthdaySet.add(bd);
}
}
return (double) sameBirthdays/numberOfStudents; //probability of two students having the same birthday
}
void simulation(int numberOfStudents){
double probabilitySum = 0;
for (int i = 0; i < NUMBER_OF_SIMULATIONS; i++){
probabilitySum += iteration(numberOfStudents);
}
System.out.printf("For n=%d -> P=%.4f \n", numberOfStudents, probabilitySum/NUMBER_OF_SIMULATIONS);
}
private void start() {
simulation(10); //should be about 0.1
simulation(20); //should be about 0.4
simulation(23); //should be about 0.5
simulation(35); //should be about 0.8
}
public static void main(String[] argv) {
new BirthdayProblem2().start();
}
}
您没有清除迭代之间的 birthdaySet
。将它从字段更改为局部变量将防止出现此类错误,因为为什么首先需要它作为字段?
换句话说,generateBirthday
应该使用 Random.nextInt(DAYS_IN_YEAR)
。 Random
的实例是作为字段的主要候选者。那 LCG random;
行到底是什么?
更新
方法iteration()
应该return 布尔值,用于判断是否有相同生日的学生。真值数 returned 除以调用数等于概率。
由于一年中的天数相对较少,布尔数组会比 Set
更快,所以这里更新代码:
private static final int DAYS_IN_YEAR = 365;
private static final int NUMBER_OF_SIMULATIONS = 500;
private Random rnd = new Random();
private int generateBirthday() {
return this.rnd.nextInt(DAYS_IN_YEAR);
}
private boolean iteration(int numberOfStudents) { //one iteration from the simulation
boolean[] present = new boolean[DAYS_IN_YEAR];
for (int i = 0; i < numberOfStudents; i++) {
int birthday = generateBirthday();
if (present[birthday])
return true;
present[birthday] = true;
}
return false;
}
void simulation(int numberOfStudents){
int count = 0;
for (int i = 0; i < NUMBER_OF_SIMULATIONS; i++)
if (iteration(numberOfStudents))
count++;
System.out.printf("For n=%d -> P=%.4f%n", numberOfStudents, (double)count/NUMBER_OF_SIMULATIONS);
}
示例输出
For n=10 -> P=0.1340
For n=20 -> P=0.4120
For n=23 -> P=0.5080
For n=35 -> P=0.8160
For n=10 -> P=0.1200
For n=20 -> P=0.4120
For n=23 -> P=0.5260
For n=35 -> P=0.8140
For n=10 -> P=0.1260
For n=20 -> P=0.3920
For n=23 -> P=0.5080
For n=35 -> P=0.7920
增加 NUMBER_OF_SIMULATIONS
到 5_000_000
:
For n=10 -> P=0.1167
For n=20 -> P=0.4113
For n=23 -> P=0.5075
For n=35 -> P=0.8145
For n=10 -> P=0.1168
For n=20 -> P=0.4113
For n=23 -> P=0.5072
For n=35 -> P=0.8142
正如 andreas 已经指出的那样 birthdaySet 应该在迭代之间被清除,否则该集合将包含来自先前模拟的数据
但是你的设计还有一个致命的缺陷,看看维基百科页面就知道了:
在概率论中,生日问题或生日悖论关注的是在一组 n 个随机选择的人中,其中一些人的生日相同的概率。
是至少有2个人生日相同的概率。这意味着只要 2 等于 1 就应该返回。
我在下面的代码中总结了它
import java.util.HashSet;
import java.util.Random;
public class BirthdayProblem2 {
static final int DAYS_IN_YEAR = 365;
static final int NUMBER_OF_SIMULATIONS = 500;
Random random;
HashSet<Integer> birthdaySet = new HashSet<Integer>();
BirthdayProblem2() { random = new Random(); }
int generateBirthday() { return random.nextInt(DAYS_IN_YEAR); }
double iteration(int numberOfStudents) { //one iteration from the simulation
birthdaySet.clear();
for (int i = 0; i < numberOfStudents; i++) {
int bd = generateBirthday();
if (birthdaySet.contains(bd)) {
return 1.0;
}
birthdaySet.add(bd);
}
return 0.0;
}
void simulation(int numberOfStudents) {
double probabilitySum = 0;
for (int i = 0; i < NUMBER_OF_SIMULATIONS; i++) {
probabilitySum += iteration(numberOfStudents);
}
System.out.printf("For n = %d -> P = %.4f \n", numberOfStudents, probabilitySum / NUMBER_OF_SIMULATIONS);
}
private void start() {
simulation(10); //should be about 0.1
simulation(20); //should be about 0.4
simulation(23); //should be about 0.5
simulation(35); //should be about 0.8
}
public static void main(String... whatever) { new BirthdayProblem2().start(); }
}
我想模拟java中的生日悖论。出于某种原因,我的输出(概率)一直非常接近 1,例如模拟(10)->0,9268。一开始你可以看到我的模拟应该接近的概率。很长一段时间以来,我一直在寻找我的代码中的错误,所以我希望你们中的任何人都能帮助我。我查找了生日悖论的其他代码,但其中 none 似乎能够帮助我处理奇怪的输出。 p.s。你可以忽略//TODO,一旦我得到代码和运行,我就会修复它。 谢谢提前!
static final int DAYS_IN_YEAR = 365;
static final int NUMBER_OF_SIMULATIONS = 500;
LCG random;
HashSet<Integer> birthdaySet = new HashSet<Integer>();
BirthdayProblem2(){
birthdaySet.clear();
random = new LCG(); //random numbers between 0 and 1
}
int generateBirthday(){ //generates random birthday
return (int) Math.round(Math.random()*DAYS_IN_YEAR); //TODO use LGC
}
double iteration(int numberOfStudents){ //one iteration from the simulation
int sameBirthdays = 0;
for (int i = 0; i < numberOfStudents; i++){
int bd = generateBirthday();
if (birthdaySet.contains(bd)){
sameBirthdays++;
} else {
birthdaySet.add(bd);
}
}
return (double) sameBirthdays/numberOfStudents; //probability of two students having the same birthday
}
void simulation(int numberOfStudents){
double probabilitySum = 0;
for (int i = 0; i < NUMBER_OF_SIMULATIONS; i++){
probabilitySum += iteration(numberOfStudents);
}
System.out.printf("For n=%d -> P=%.4f \n", numberOfStudents, probabilitySum/NUMBER_OF_SIMULATIONS);
}
private void start() {
simulation(10); //should be about 0.1
simulation(20); //should be about 0.4
simulation(23); //should be about 0.5
simulation(35); //should be about 0.8
}
public static void main(String[] argv) {
new BirthdayProblem2().start();
}
}
您没有清除迭代之间的 birthdaySet
。将它从字段更改为局部变量将防止出现此类错误,因为为什么首先需要它作为字段?
换句话说,generateBirthday
应该使用 Random.nextInt(DAYS_IN_YEAR)
。 Random
的实例是作为字段的主要候选者。那 LCG random;
行到底是什么?
更新
方法iteration()
应该return 布尔值,用于判断是否有相同生日的学生。真值数 returned 除以调用数等于概率。
由于一年中的天数相对较少,布尔数组会比 Set
更快,所以这里更新代码:
private static final int DAYS_IN_YEAR = 365;
private static final int NUMBER_OF_SIMULATIONS = 500;
private Random rnd = new Random();
private int generateBirthday() {
return this.rnd.nextInt(DAYS_IN_YEAR);
}
private boolean iteration(int numberOfStudents) { //one iteration from the simulation
boolean[] present = new boolean[DAYS_IN_YEAR];
for (int i = 0; i < numberOfStudents; i++) {
int birthday = generateBirthday();
if (present[birthday])
return true;
present[birthday] = true;
}
return false;
}
void simulation(int numberOfStudents){
int count = 0;
for (int i = 0; i < NUMBER_OF_SIMULATIONS; i++)
if (iteration(numberOfStudents))
count++;
System.out.printf("For n=%d -> P=%.4f%n", numberOfStudents, (double)count/NUMBER_OF_SIMULATIONS);
}
示例输出
For n=10 -> P=0.1340
For n=20 -> P=0.4120
For n=23 -> P=0.5080
For n=35 -> P=0.8160
For n=10 -> P=0.1200
For n=20 -> P=0.4120
For n=23 -> P=0.5260
For n=35 -> P=0.8140
For n=10 -> P=0.1260
For n=20 -> P=0.3920
For n=23 -> P=0.5080
For n=35 -> P=0.7920
增加 NUMBER_OF_SIMULATIONS
到 5_000_000
:
For n=10 -> P=0.1167
For n=20 -> P=0.4113
For n=23 -> P=0.5075
For n=35 -> P=0.8145
For n=10 -> P=0.1168
For n=20 -> P=0.4113
For n=23 -> P=0.5072
For n=35 -> P=0.8142
正如 andreas 已经指出的那样 birthdaySet 应该在迭代之间被清除,否则该集合将包含来自先前模拟的数据
但是你的设计还有一个致命的缺陷,看看维基百科页面就知道了:
在概率论中,生日问题或生日悖论关注的是在一组 n 个随机选择的人中,其中一些人的生日相同的概率。
是至少有2个人生日相同的概率。这意味着只要 2 等于 1 就应该返回。
我在下面的代码中总结了它
import java.util.HashSet;
import java.util.Random;
public class BirthdayProblem2 {
static final int DAYS_IN_YEAR = 365;
static final int NUMBER_OF_SIMULATIONS = 500;
Random random;
HashSet<Integer> birthdaySet = new HashSet<Integer>();
BirthdayProblem2() { random = new Random(); }
int generateBirthday() { return random.nextInt(DAYS_IN_YEAR); }
double iteration(int numberOfStudents) { //one iteration from the simulation
birthdaySet.clear();
for (int i = 0; i < numberOfStudents; i++) {
int bd = generateBirthday();
if (birthdaySet.contains(bd)) {
return 1.0;
}
birthdaySet.add(bd);
}
return 0.0;
}
void simulation(int numberOfStudents) {
double probabilitySum = 0;
for (int i = 0; i < NUMBER_OF_SIMULATIONS; i++) {
probabilitySum += iteration(numberOfStudents);
}
System.out.printf("For n = %d -> P = %.4f \n", numberOfStudents, probabilitySum / NUMBER_OF_SIMULATIONS);
}
private void start() {
simulation(10); //should be about 0.1
simulation(20); //should be about 0.4
simulation(23); //should be about 0.5
simulation(35); //should be about 0.8
}
public static void main(String... whatever) { new BirthdayProblem2().start(); }
}