在 python 意外输出中进行引导时初始化 np.empty() 问题
Initializing np.empty() issues when doing bootstrapping in python unexpected output
我是 python 的初学者,正在学习一些统计知识!
面对一些函数的问题,这些函数有助于 bootstrap 数据数组 n 次并输出数组中每个 bootstrap 的平均值。
提前感谢您的帮助!
import numpy as np
np.random.seed(42)
rainfall = np.array([875.5, 648.2, 788.1, 940.3, 491.1, 743.5, 730.1, 686.5,
878.8, 865.6, 654.9, 831.5, 798.1, 681.8, 743.8, 689.1,
752.1, 837.2, 710.6, 749.2, 967.1, 701.2, 619., 747.6,
803.4, 645.6, 804.1, 787.4, 646.8, 997.1, 774., 734.5,
835., 840.7, 659.6, 828.3, 909.7, 856.9, 578.3, 904.2,
883.9, 740.1, 773.9, 741.4, 866.8, 871.1, 712.5, 919.2,
927.9, 809.4, 633.8, 626.8, 871.3, 774.3, 898.8, 789.6,
936.3, 765.4, 882.1, 681.1, 661.3, 847.9, 683.9, 985.7,
771.1, 736.6, 713.2, 774.5, 937.7, 694.5, 598.2, 983.8,
700.2, 901.3, 733.5, 964.4, 609.3, 1035.2, 718., 688.6,
736.8, 643.3, 1038.5, 969., 802.7, 876.6, 944.7, 786.6,
770.4, 808.6, 761.3, 774.2, 559.3, 674.2, 883.6, 823.9,
960.4, 877.8, 940.6, 831.8, 906.2, 866.5, 674.1, 998.1,
789.3, 915., 737.1, 763., 666.7, 824.5, 913.8, 905.1,
667.8, 747.4, 784.7, 925.4, 880.2, 1086.9, 764.4, 1050.1,
595.2, 855.2, 726.9, 785.2, 948.8, 970.6, 896., 618.4,
572.4, 1146.4, 728.2, 864.2, 793.])
def bootstrap_replicate_1d(data, func):
"""Generate bootstrap replicate of 1D data."""
bs_sample = np.random.choice(data, len(data))
return func(bs_sample)
print(bootstrap_replicate_1d(rainfall, np.mean))
Output: 816.7992481203007
Data output make sense as the mean rainfall data is around 800.
def draw_bs_reps(data, func, size=1):
"""Draw bootstrap replicates."""
# Initialize array of replicates: bs_replicates
bs_replicates = np.empty(size)
# Generate replicates
for i in range(size):
bootstrap_replicate_1d(data, func)
return bs_replicates
test = draw_bs_reps(rainfall, np.mean, 1)
Output: array([105804.1])
test = draw_bs_reps(rainfall, np.mean, 5)
Output: array([3.3 , 4. , 4.35 , 4.6 , 4.9775])
The result of the function seems to be erroneous as the mean deviates a lot, even worse when the size is increased.
bs_replicates 未启动但未重新分配到 for 循环中,draw_bs_reps 函数将被编辑为:
def draw_bs_reps(data, func, size=1):
"""Draw bootstrap replicates."""
# Initialize array of replicates: bs_replicates
bs_replicates = np.empty(size)
# Generate replicates
for i in range(size):
bs_replicates[i] = bootstrap_replicate_1d(data, func)
return bs_replicates
我是 python 的初学者,正在学习一些统计知识! 面对一些函数的问题,这些函数有助于 bootstrap 数据数组 n 次并输出数组中每个 bootstrap 的平均值。
提前感谢您的帮助!
import numpy as np
np.random.seed(42)
rainfall = np.array([875.5, 648.2, 788.1, 940.3, 491.1, 743.5, 730.1, 686.5,
878.8, 865.6, 654.9, 831.5, 798.1, 681.8, 743.8, 689.1,
752.1, 837.2, 710.6, 749.2, 967.1, 701.2, 619., 747.6,
803.4, 645.6, 804.1, 787.4, 646.8, 997.1, 774., 734.5,
835., 840.7, 659.6, 828.3, 909.7, 856.9, 578.3, 904.2,
883.9, 740.1, 773.9, 741.4, 866.8, 871.1, 712.5, 919.2,
927.9, 809.4, 633.8, 626.8, 871.3, 774.3, 898.8, 789.6,
936.3, 765.4, 882.1, 681.1, 661.3, 847.9, 683.9, 985.7,
771.1, 736.6, 713.2, 774.5, 937.7, 694.5, 598.2, 983.8,
700.2, 901.3, 733.5, 964.4, 609.3, 1035.2, 718., 688.6,
736.8, 643.3, 1038.5, 969., 802.7, 876.6, 944.7, 786.6,
770.4, 808.6, 761.3, 774.2, 559.3, 674.2, 883.6, 823.9,
960.4, 877.8, 940.6, 831.8, 906.2, 866.5, 674.1, 998.1,
789.3, 915., 737.1, 763., 666.7, 824.5, 913.8, 905.1,
667.8, 747.4, 784.7, 925.4, 880.2, 1086.9, 764.4, 1050.1,
595.2, 855.2, 726.9, 785.2, 948.8, 970.6, 896., 618.4,
572.4, 1146.4, 728.2, 864.2, 793.])
def bootstrap_replicate_1d(data, func):
"""Generate bootstrap replicate of 1D data."""
bs_sample = np.random.choice(data, len(data))
return func(bs_sample)
print(bootstrap_replicate_1d(rainfall, np.mean))
Output: 816.7992481203007
Data output make sense as the mean rainfall data is around 800.
def draw_bs_reps(data, func, size=1):
"""Draw bootstrap replicates."""
# Initialize array of replicates: bs_replicates
bs_replicates = np.empty(size)
# Generate replicates
for i in range(size):
bootstrap_replicate_1d(data, func)
return bs_replicates
test = draw_bs_reps(rainfall, np.mean, 1)
Output: array([105804.1])
test = draw_bs_reps(rainfall, np.mean, 5)
Output: array([3.3 , 4. , 4.35 , 4.6 , 4.9775])
The result of the function seems to be erroneous as the mean deviates a lot, even worse when the size is increased.
bs_replicates 未启动但未重新分配到 for 循环中,draw_bs_reps 函数将被编辑为:
def draw_bs_reps(data, func, size=1):
"""Draw bootstrap replicates."""
# Initialize array of replicates: bs_replicates
bs_replicates = np.empty(size)
# Generate replicates
for i in range(size):
bs_replicates[i] = bootstrap_replicate_1d(data, func)
return bs_replicates