绘制根据多峰分布确定的单峰分布
Plot unimodal distributions determined from a multimodal distribution
我使用 GaussianMixture 来分析多峰分布。从 GaussianMixture class 我可以使用属性 means_
和 covariances_
访问均值和协方差。我现在如何使用它们来绘制两个基础单峰分布?
我想使用 scipy.stats.norm 但我不知道 select 作为 loc
和 scale
的参数。所需的输出将类似于附图所示。
本题示例代码修改自答案。
import numpy as np
import matplotlib.pyplot as plt
from sklearn import mixture
from scipy.stats import norm
ls = np.linspace(0, 60, 1000)
multimodal_norm = norm.pdf(ls, 0, 5) + norm.pdf(ls, 20, 10)
plt.plot(ls, multimodal_norm)
# concatenate ls and multimodal to form an array of samples
# the shape is [n_samples, n_features]
# we reshape them to create an additional axis and concatenate along it
samples = np.concatenate([ls.reshape((-1, 1)), multimodal_norm.reshape((-1,1))], axis=-1)
print(samples.shape)
gmix = mixture.GaussianMixture(n_components = 2, covariance_type = "full")
fitted = gmix.fit(samples)
print(fitted.means_)
print(fitted.covariances_)
# The idea is something like the following (not working):
new_norm1 = norm.pdf(ls, fitted.means_, fitted.covariances_)
new_norm2 = norm.pdf(ls, fitted.means_, fitted.covariances_)
plt.plot(ls, new_norm1, label='Norm 1')
plt.plot(ls, new_norm2, label='Norm 2')
不完全清楚您要完成的任务。您正在将 GaussianMixture 模型拟合到在均匀网格上采样的两个高斯函数的 pdf 值之和的串联,以及 unifrom 网格本身。这不是高斯混合模型的拟合方式。通常,一个模型适合从某种分布中抽取的随机观察值(通常是未知的,但可以是模拟的)。
假设您想要将 GaussianMixture 模型 拟合到从高斯混合分布 中抽取的样本。假设您知道预期结果是什么,大概是为了测试拟合效果如何。这是执行此操作的代码,既可以模拟正确的分布,也可以拟合模型。它打印拟合从样本中恢复的参数——我们观察到它们确实接近我们用来模拟样本的参数。最后生成适合数据的 GaussianMixture 分布的密度图
import numpy as np
import matplotlib.pyplot as plt
from sklearn import mixture
from scipy.stats import norm
# set simulation parameters
mean1, std1, w1 = 0,5,0.5
mean2, std2, w2 = 20,10,1-w1
# simulate constituents
n_samples = 100000
np.random.seed(2021)
gauss_sample_1 = np.random.normal(loc = mean1,scale = std1,size = n_samples)
gauss_sample_2 = np.random.normal(loc = mean2,scale = std2,size = n_samples)
binomial = np.random.binomial(n=1, p=w1, size = n_samples)
# simulate gaussian mixture
mutlimodal_samples = (gauss_sample_1 * binomial + gauss_sample_2 * (1-binomial)).reshape(-1,1)
# define and fit the mixture model
gmix = mixture.GaussianMixture(n_components = 2, covariance_type = "full")
fitted = gmix.fit(mutlimodal_samples)
print('fitted means:',fitted.means_[0][0],fitted.means_[1][0])
print('fitted stdevs:',np.sqrt(fitted.covariances_[0][0][0]),np.sqrt(fitted.covariances_[1][0][0]))
print('fitted weights:',fitted.weights_)
# Plot component pdfs and a joint pdf
ls = np.linspace(-50, 50, 1000)
new_norm1 = norm.pdf(ls, fitted.means_[0][0], np.sqrt(fitted.covariances_[0][0][0]))
new_norm2 = norm.pdf(ls, fitted.means_[1][0], np.sqrt(fitted.covariances_[1][0][0]))
multi_pdf = w1*new_norm1 + (1-w1)*new_norm2
plt.plot(ls, new_norm1, label='Norm pdf 1')
plt.plot(ls, new_norm2, label='Norm pdf 2')
plt.plot(ls, multi_pdf, label='multi-norm pdf')
plt.legend(loc = 'best')
plt.show()
结果是
fitted means: 22.358448018824642 0.8607494960575028
fitted stdevs: 8.770962351118127 5.58538485134623
fitted weights: [0.42517515 0.57482485]
正如我们所见,它们与模拟中的内容很接近(取决于它们的顺序,模型当然无法恢复,但无论如何它都是不相关的):
mean1, std1, w1 = 0,5,0.5
mean2, std2, w2 = 20,10,1-w1
以及密度及其部分的图。回想一下,GaussianMixture 的 pdf 是 而不是 pdf 的总和,而是权重为 w1
、1-w1
:
的加权平均值
我使用 GaussianMixture 来分析多峰分布。从 GaussianMixture class 我可以使用属性 means_
和 covariances_
访问均值和协方差。我现在如何使用它们来绘制两个基础单峰分布?
我想使用 scipy.stats.norm 但我不知道 select 作为 loc
和 scale
的参数。所需的输出将类似于附图所示。
本题示例代码修改自答案
import numpy as np
import matplotlib.pyplot as plt
from sklearn import mixture
from scipy.stats import norm
ls = np.linspace(0, 60, 1000)
multimodal_norm = norm.pdf(ls, 0, 5) + norm.pdf(ls, 20, 10)
plt.plot(ls, multimodal_norm)
# concatenate ls and multimodal to form an array of samples
# the shape is [n_samples, n_features]
# we reshape them to create an additional axis and concatenate along it
samples = np.concatenate([ls.reshape((-1, 1)), multimodal_norm.reshape((-1,1))], axis=-1)
print(samples.shape)
gmix = mixture.GaussianMixture(n_components = 2, covariance_type = "full")
fitted = gmix.fit(samples)
print(fitted.means_)
print(fitted.covariances_)
# The idea is something like the following (not working):
new_norm1 = norm.pdf(ls, fitted.means_, fitted.covariances_)
new_norm2 = norm.pdf(ls, fitted.means_, fitted.covariances_)
plt.plot(ls, new_norm1, label='Norm 1')
plt.plot(ls, new_norm2, label='Norm 2')
不完全清楚您要完成的任务。您正在将 GaussianMixture 模型拟合到在均匀网格上采样的两个高斯函数的 pdf 值之和的串联,以及 unifrom 网格本身。这不是高斯混合模型的拟合方式。通常,一个模型适合从某种分布中抽取的随机观察值(通常是未知的,但可以是模拟的)。
假设您想要将 GaussianMixture 模型 拟合到从高斯混合分布 中抽取的样本。假设您知道预期结果是什么,大概是为了测试拟合效果如何。这是执行此操作的代码,既可以模拟正确的分布,也可以拟合模型。它打印拟合从样本中恢复的参数——我们观察到它们确实接近我们用来模拟样本的参数。最后生成适合数据的 GaussianMixture 分布的密度图
import numpy as np
import matplotlib.pyplot as plt
from sklearn import mixture
from scipy.stats import norm
# set simulation parameters
mean1, std1, w1 = 0,5,0.5
mean2, std2, w2 = 20,10,1-w1
# simulate constituents
n_samples = 100000
np.random.seed(2021)
gauss_sample_1 = np.random.normal(loc = mean1,scale = std1,size = n_samples)
gauss_sample_2 = np.random.normal(loc = mean2,scale = std2,size = n_samples)
binomial = np.random.binomial(n=1, p=w1, size = n_samples)
# simulate gaussian mixture
mutlimodal_samples = (gauss_sample_1 * binomial + gauss_sample_2 * (1-binomial)).reshape(-1,1)
# define and fit the mixture model
gmix = mixture.GaussianMixture(n_components = 2, covariance_type = "full")
fitted = gmix.fit(mutlimodal_samples)
print('fitted means:',fitted.means_[0][0],fitted.means_[1][0])
print('fitted stdevs:',np.sqrt(fitted.covariances_[0][0][0]),np.sqrt(fitted.covariances_[1][0][0]))
print('fitted weights:',fitted.weights_)
# Plot component pdfs and a joint pdf
ls = np.linspace(-50, 50, 1000)
new_norm1 = norm.pdf(ls, fitted.means_[0][0], np.sqrt(fitted.covariances_[0][0][0]))
new_norm2 = norm.pdf(ls, fitted.means_[1][0], np.sqrt(fitted.covariances_[1][0][0]))
multi_pdf = w1*new_norm1 + (1-w1)*new_norm2
plt.plot(ls, new_norm1, label='Norm pdf 1')
plt.plot(ls, new_norm2, label='Norm pdf 2')
plt.plot(ls, multi_pdf, label='multi-norm pdf')
plt.legend(loc = 'best')
plt.show()
结果是
fitted means: 22.358448018824642 0.8607494960575028
fitted stdevs: 8.770962351118127 5.58538485134623
fitted weights: [0.42517515 0.57482485]
正如我们所见,它们与模拟中的内容很接近(取决于它们的顺序,模型当然无法恢复,但无论如何它都是不相关的):
mean1, std1, w1 = 0,5,0.5
mean2, std2, w2 = 20,10,1-w1
以及密度及其部分的图。回想一下,GaussianMixture 的 pdf 是 而不是 pdf 的总和,而是权重为 w1
、1-w1
: