支持 python 的双变量分布
Standise bivariate distribution with python
我正在生成下面的多元概率密度函数。这工作正常,但我希望标准化 Z 值,以便它得出 0 到 1 之间的值。
为了实现这一点,我想将分布值除以均值,因此它总是在均值处为 1 而在其他地方较低。 我知道所有值的总和将大于 1。
我正在潜水 Z
但是 Z
的 sum
但是当打印这些值时,它们仍然在我预期的标准化范围之外。
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
# Our 2-dimensional distribution will be over variables X and Y
N = 60
X = np.linspace(-3, 3, N)
Y = np.linspace(-3, 4, N)
X, Y = np.meshgrid(X, Y)
# Mean vector and covariance matrix
mu = np.array([0., 1.])
Sigma = np.array([[ 1. , -0.5], [-0.5, 1.5]])
# Pack X and Y into a single 3-dimensional array
pos = np.empty(X.shape + (2,))
pos[:, :, 0] = X
pos[:, :, 1] = Y
def multivariate_gaussian(pos, mu, Sigma):
"""Return the multivariate Gaussian distribution on array pos.
pos is an array constructed by packing the meshed arrays of variables
x_1, x_2, x_3, ..., x_k into its _last_ dimension.
"""
n = mu.shape[0]
Sigma_det = np.linalg.det(Sigma)
Sigma_inv = np.linalg.inv(Sigma)
N = np.sqrt((2*np.pi)**n * Sigma_det)
# This einsum call calculates (x-mu)T.Sigma-1.(x-mu) in a vectorized
# way across all the input variables.
fac = np.einsum('...k,kl,...l->...', pos-mu, Sigma_inv, pos-mu)
return np.exp(-fac / 2) / N
# The distribution on the variables X, Y packed into pos.
Z = multivariate_gaussian(pos, mu, Sigma)
#normalise Z so range is 0-1
Z = Z/sum(Z)
print(Z)
# Create a surface plot and projected filled contour plot under it.
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z, rstride=3, cstride=3, linewidth=1, antialiased=True,
cmap=cm.magma)
cset = ax.contourf(X, Y, Z, zdir='z', offset=-0.15, cmap=cm.magma)
# Adjust the limits, ticks and view angle
ax.set_zlim(-0.15,1)
ax.view_init(27, -21)
plt.show()
如果要标准化 Z
,您需要将其除以总和,而是除以所有值的最大值。因此,您确保新的最大值为 1:
# normalise Z so range is 0-1
Z = Z / np.max(Z)
# show summary statistics for the result
import pandas as pd
print(pd.Series(Z.flatten()).describe())
count 3.600000e+03
mean 1.605148e-01
std 2.351826e-01
min 6.184228e-08
25% 7.278911e-03
50% 4.492385e-02
75% 2.135538e-01
max 1.000000e+00
dtype: float64
由于您有一个高斯分布并且您只更改了缩放比例,因此最大值仍将处于平均值 x
和 y
值。但是请注意,Z
现在不再是概率密度函数。
我正在生成下面的多元概率密度函数。这工作正常,但我希望标准化 Z 值,以便它得出 0 到 1 之间的值。
为了实现这一点,我想将分布值除以均值,因此它总是在均值处为 1 而在其他地方较低。 我知道所有值的总和将大于 1。
我正在潜水 Z
但是 Z
的 sum
但是当打印这些值时,它们仍然在我预期的标准化范围之外。
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
# Our 2-dimensional distribution will be over variables X and Y
N = 60
X = np.linspace(-3, 3, N)
Y = np.linspace(-3, 4, N)
X, Y = np.meshgrid(X, Y)
# Mean vector and covariance matrix
mu = np.array([0., 1.])
Sigma = np.array([[ 1. , -0.5], [-0.5, 1.5]])
# Pack X and Y into a single 3-dimensional array
pos = np.empty(X.shape + (2,))
pos[:, :, 0] = X
pos[:, :, 1] = Y
def multivariate_gaussian(pos, mu, Sigma):
"""Return the multivariate Gaussian distribution on array pos.
pos is an array constructed by packing the meshed arrays of variables
x_1, x_2, x_3, ..., x_k into its _last_ dimension.
"""
n = mu.shape[0]
Sigma_det = np.linalg.det(Sigma)
Sigma_inv = np.linalg.inv(Sigma)
N = np.sqrt((2*np.pi)**n * Sigma_det)
# This einsum call calculates (x-mu)T.Sigma-1.(x-mu) in a vectorized
# way across all the input variables.
fac = np.einsum('...k,kl,...l->...', pos-mu, Sigma_inv, pos-mu)
return np.exp(-fac / 2) / N
# The distribution on the variables X, Y packed into pos.
Z = multivariate_gaussian(pos, mu, Sigma)
#normalise Z so range is 0-1
Z = Z/sum(Z)
print(Z)
# Create a surface plot and projected filled contour plot under it.
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z, rstride=3, cstride=3, linewidth=1, antialiased=True,
cmap=cm.magma)
cset = ax.contourf(X, Y, Z, zdir='z', offset=-0.15, cmap=cm.magma)
# Adjust the limits, ticks and view angle
ax.set_zlim(-0.15,1)
ax.view_init(27, -21)
plt.show()
如果要标准化 Z
,您需要将其除以总和,而是除以所有值的最大值。因此,您确保新的最大值为 1:
# normalise Z so range is 0-1
Z = Z / np.max(Z)
# show summary statistics for the result
import pandas as pd
print(pd.Series(Z.flatten()).describe())
count 3.600000e+03
mean 1.605148e-01
std 2.351826e-01
min 6.184228e-08
25% 7.278911e-03
50% 4.492385e-02
75% 2.135538e-01
max 1.000000e+00
dtype: float64
由于您有一个高斯分布并且您只更改了缩放比例,因此最大值仍将处于平均值 x
和 y
值。但是请注意,Z
现在不再是概率密度函数。