我在引导后创建了一个 class 到 return 的置信区间,但我的置信区间看起来异常狭窄。我做错了什么?
I created a class to return a confidence interval after bootstrapping, but my confidence interval looks oddly narrow. What did I do wrong?
我的意图是让代码对给定列表执行引导(统计)
样本量等于列表长度 10,000 次,然后计算
95% 置信区间。
import numpy
from random import choice
class bootstrapping(object):
def __init__(self,bslist=[],iteration=10000):
self.bslist = bslist
self.iteration = iteration
def CI(self):
listofmeans = []
for numbers in range(0,self.iteration):
bootstraplist = [choice(self.bslist) for _ in range(len(self.bslist))]
listofmeans.append(sum(bootstraplist) / len(bootstraplist))
s = numpy.std(listofmeans)
z = 1.96
n = self.iteration**0.5
lower_confidence = (sum(listofmeans) / len(listofmeans)) - (z*s/n)
upper_confidence = (sum(listofmeans) / len(listofmeans)) + (z*s/n)
return lower_confidence,upper_confidence
test = bootstrapping([60,33,102,53,63,33,42,19,31,86,15,50,
45,47,26,23,30,20,18,48,22,20,17,29,43,52,29],10000)
test.CI()
我得到的置信区间 (37.897427638499948, 38.102572361500052) 是
奇怪的狭窄。当我将相同的数字列表输入 Minitab 时,95%
我得到的置信区间是 (30.74, 47.48)。是不是我做错了什么?
要找到 95% 的置信区间,让 z = 1.96
(近似值)并计算平均值的区间,加上或减去 z*std
,其中 std
是标准差。换句话说,使用 z*std
而不是 z*std/n
:
import numpy as np
import random
random.seed(2017)
class Bootstrapping(object):
def __init__(self,bslist=[],iteration=10000):
self.bslist = bslist
self.iteration = iteration
def CI(self):
listofmeans = []
for numbers in range(0,self.iteration):
bootstraplist = [random.choice(self.bslist) for _ in range(len(self.bslist))]
mean = sum(bootstraplist) / len(bootstraplist)
listofmeans.append(mean)
mean = np.mean(listofmeans, axis=0)
std = np.std(listofmeans, axis=0)
z = 1.96
err = z*std
lower_confidence = mean - err
upper_confidence = mean + err
return lower_confidence, upper_confidence
test = Bootstrapping([60,33,102,53,63,33,42,19,31,86,15,50,
45,47,26,23,30,20,18,48,22,20,17,29,43,52,29],10000)
print(test.CI())
产量
(31.309540089458281, 46.876348799430602)
或者,您可以计算置信区间而不求助于均值 +/-1.96*std 公式。您可以通过排序 listofmeans
并找到第 5 个和第 95 个百分位数的值来获得置信区间的经验估计值:
import random
random.seed(2017)
class Bootstrapping(object):
def __init__(self,bslist=[],iteration=10000):
self.bslist = bslist
self.iteration = iteration
def CI(self):
listofmeans = []
for numbers in range(0,self.iteration):
bootstraplist = [random.choice(self.bslist) for _ in range(len(self.bslist))]
mean = sum(bootstraplist) / len(bootstraplist)
listofmeans.append(mean)
listofmeans = sorted(listofmeans)
a, b = round(self.iteration*0.05), round(self.iteration*0.95)
lower_confidence = listofmeans[a]
upper_confidence = listofmeans[b]
return lower_confidence, upper_confidence
test = Bootstrapping([60,33,102,53,63,33,42,19,31,86,15,50,
45,47,26,23,30,20,18,48,22,20,17,29,43,52,29],10000)
print(test.CI())
产量
(32.888888888888886, 45.888888888888886)
我的意图是让代码对给定列表执行引导(统计) 样本量等于列表长度 10,000 次,然后计算 95% 置信区间。
import numpy
from random import choice
class bootstrapping(object):
def __init__(self,bslist=[],iteration=10000):
self.bslist = bslist
self.iteration = iteration
def CI(self):
listofmeans = []
for numbers in range(0,self.iteration):
bootstraplist = [choice(self.bslist) for _ in range(len(self.bslist))]
listofmeans.append(sum(bootstraplist) / len(bootstraplist))
s = numpy.std(listofmeans)
z = 1.96
n = self.iteration**0.5
lower_confidence = (sum(listofmeans) / len(listofmeans)) - (z*s/n)
upper_confidence = (sum(listofmeans) / len(listofmeans)) + (z*s/n)
return lower_confidence,upper_confidence
test = bootstrapping([60,33,102,53,63,33,42,19,31,86,15,50,
45,47,26,23,30,20,18,48,22,20,17,29,43,52,29],10000)
test.CI()
我得到的置信区间 (37.897427638499948, 38.102572361500052) 是 奇怪的狭窄。当我将相同的数字列表输入 Minitab 时,95% 我得到的置信区间是 (30.74, 47.48)。是不是我做错了什么?
要找到 95% 的置信区间,让 z = 1.96
(近似值)并计算平均值的区间,加上或减去 z*std
,其中 std
是标准差。换句话说,使用 z*std
而不是 z*std/n
:
import numpy as np
import random
random.seed(2017)
class Bootstrapping(object):
def __init__(self,bslist=[],iteration=10000):
self.bslist = bslist
self.iteration = iteration
def CI(self):
listofmeans = []
for numbers in range(0,self.iteration):
bootstraplist = [random.choice(self.bslist) for _ in range(len(self.bslist))]
mean = sum(bootstraplist) / len(bootstraplist)
listofmeans.append(mean)
mean = np.mean(listofmeans, axis=0)
std = np.std(listofmeans, axis=0)
z = 1.96
err = z*std
lower_confidence = mean - err
upper_confidence = mean + err
return lower_confidence, upper_confidence
test = Bootstrapping([60,33,102,53,63,33,42,19,31,86,15,50,
45,47,26,23,30,20,18,48,22,20,17,29,43,52,29],10000)
print(test.CI())
产量
(31.309540089458281, 46.876348799430602)
或者,您可以计算置信区间而不求助于均值 +/-1.96*std 公式。您可以通过排序 listofmeans
并找到第 5 个和第 95 个百分位数的值来获得置信区间的经验估计值:
import random
random.seed(2017)
class Bootstrapping(object):
def __init__(self,bslist=[],iteration=10000):
self.bslist = bslist
self.iteration = iteration
def CI(self):
listofmeans = []
for numbers in range(0,self.iteration):
bootstraplist = [random.choice(self.bslist) for _ in range(len(self.bslist))]
mean = sum(bootstraplist) / len(bootstraplist)
listofmeans.append(mean)
listofmeans = sorted(listofmeans)
a, b = round(self.iteration*0.05), round(self.iteration*0.95)
lower_confidence = listofmeans[a]
upper_confidence = listofmeans[b]
return lower_confidence, upper_confidence
test = Bootstrapping([60,33,102,53,63,33,42,19,31,86,15,50,
45,47,26,23,30,20,18,48,22,20,17,29,43,52,29],10000)
print(test.CI())
产量
(32.888888888888886, 45.888888888888886)