使用 matplotlib 绘制函数
Function plotting with matplotlib
我正在尝试对依赖于 T
和参数 xi
、mu
、sig
.
的方程建模
我已经推断出不同持续时间(1 小时、3 小时等)的参数和这些参数的分布(标准差)。在示例代码中,参数持续时间为 1 小时。
我需要创建一个 forloop 以使用 xi、mu 和 sig 数组创建 zp 云。 T 可以取的不同值是 [2, 5, 25, 50, 75, 100]
我还想在第 2 行显示带有标准偏差的误差条或不确定性。我使用 Metropolis Hastings 算法探索参数 space,在 3 个链中进行 15000 次迭代
xi = accepted[0:]
xi = array([[-2.00000000e-01, 1.00000000e-01, 1.00000000e-01],
[-2.06044711e-01, 1.51739593e-01, 1.36675069e-01],
[-2.91747418e-01, 1.10818827e-01, 1.80040639e-01],
...,
[ 1.45611857e-02, 1.46824099e+01, 5.16110127e+00],
[ 1.45611857e-02, 1.46824099e+01, 5.16110127e+00],
[-3.14226453e-02, 1.44844410e+01, 5.00637147e+00]])
mu = accepted[1:]
mu = array([[-2.06044711e-01, 1.51739593e-01, 1.36675069e-01],
[-2.91747418e-01, 1.10818827e-01, 1.80040639e-01],
[-2.91747418e-01, 1.10818827e-01, 1.80040639e-01],
...,
[ 1.45611857e-02, 1.46824099e+01, 5.16110127e+00],
[ 1.45611857e-02, 1.46824099e+01, 5.16110127e+00],
[-3.14226453e-02, 1.44844410e+01, 5.00637147e+00]])
sig = accepted [2:]
sig = array([[-2.91747418e-01, 1.10818827e-01, 1.80040639e-01],
[-2.91747418e-01, 1.10818827e-01, 1.80040639e-01],
[-2.91747418e-01, 1.10818827e-01, 1.80040639e-01],
...,
[ 1.45611857e-02, 1.46824099e+01, 5.16110127e+00],
[ 1.45611857e-02, 1.46824099e+01, 5.16110127e+00],
[-3.14226453e-02, 1.44844410e+01, 5.00637147e+00]])
spread = accepted[:,0].std(), accepted[:,1].std(), accepted[:,2].std()
(xi, mu, sig)
def zp(T, xi = accepted[0:], mu = accepted[1:], sig= accepted[2:]):
p = 1/T
yp = - np.log10(1 - p)
zp = np.ndarray(shape=(xi.size, T.size))
for i in range(xi.size):
if xi[i] == 0:
zp[i,:] = mu[i] - (sig[i]*(np.log10(yp)))
else:
zp[i,:] = mu[i] - ((sig[i]/xi[i])*(1-(yp**(-xi[i]))))
return zp
# get results
res = zp(T, xi, mu, sig)
因此,您有 (15000,3)
矩阵 accepted
,其中 xi=accepted[:,0]
、mu=accepted[:,1]
和 sig=accepted[:,2]
。
我将为 xi
、mu
和 sig
生成一些示例数据,只是为了向您展示绘图的结果。
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# I will generate some sample data
# You'll have to use your own data
n = 15000
np.random.seed(1)
xi, mu, sig = (
np.random.normal(loc=-0.15153068743678966, scale=0.2254333661580348, size=(n)),
np.random.normal(loc=14.241861263759796, scale=2.6116567608814196, size=(n)),
np.random.normal(loc=5.5779131542307345, scale=0.9627764065564182, size=(n)),
)
您将 T
定义为
# define T steps
T = np.array([2, 5, 25, 50, 75, 100])
现在我们取参数的均值和标准差
xi_mean = xi.mean()
mu_mean = mu.mean()
sig_mean = sig.mean()
xi_std = xi.std()
mu_std = mu.std()
sig_std = sig.std()
并定义一个函数zp
# function zp
def zp(T, xi, mu, sig):
p = 1 / T
yp = - np.log10(1 - p)
# ravel results
_xi = xi.ravel()
_mu = mu.ravel()
_sig = sig.ravel()
res = np.ndarray(shape=(_xi.size, T.size))
for i in range(_xi.size):
if _xi[i] == 0:
res[i,:] = _mu[i] - (_sig[i]*(np.log10(yp)))
else:
res[i,:] = _mu[i] - ((_sig[i]/_xi[i])*(1-(yp**(-_xi[i]))))
return res
# get results
res = zp(T, xi, mu, sig)
我们可以定义一个包含所有结果的 DataFrame
# define results DataFrame
df = pd.DataFrame(res, columns=T)
print(df)
2 5 25 50 75 100
0 24.610952 34.489626 54.614356 65.349657 72.376143 77.735341
1 16.554362 20.033999 23.514591 24.524273 25.023313 25.342476
2 23.468215 28.276272 33.212243 34.678420 35.410825 35.882346
3 23.102447 26.089680 28.680803 29.339580 29.646593 29.835899
4 21.021596 30.494043 45.594905 52.182941 56.105955 58.925041
... ... ... ... ... ... ...
14995 22.964737 27.856439 33.039263 34.623438 35.425031 35.945247
14996 21.371429 29.078696 47.122467 57.868230 65.281555 71.127181
14997 18.534785 21.512996 24.424363 25.251344 25.656252 25.913699
14998 19.915343 28.939309 43.440076 49.808611 53.612702 56.351668
14999 20.835338 25.069838 29.829853 31.364291 32.159584 32.683499
[15000 rows x 6 columns]
现在我们用参数的均值+/-标准差
计算zp
zp_mean = zp(T, xi_mean, mu_mean, sig_mean).ravel()
zp_lo = zp(T, xi_mean-xi_std, mu_mean-mu_std, sig_mean-sig_std).ravel()
zp_hi = zp(T, xi_mean+xi_std, mu_mean+mu_std, sig_mean+sig_std).ravel()
我们终于可以绘制出 15000 条线和均值+/-std
fig, ax = plt.subplots(figsize=(12, 5))
ax.errorbar(
T, zp_mean,
yerr=[zp_mean-zp_lo, zp_hi-zp_mean],
color='k', zorder=999,
label='mean and std'
)
for i, col in enumerate(df.T.columns):
_df = df.T[col]
ax.plot(_df, lw=1, alpha=.01, color='r')
ax.set(
xlabel='duration',
ylabel='value',
# adjust this
ylim=(10, 150)
)
ax.legend()
plt.show()
您还可以选择 seaborn
的更快解决方案。
首先,熔化DataFrame
melt_df = df.melt(var_name='duration')
print(melt_df)
duration value
0 2 24.610952
1 2 16.554362
2 2 23.468215
3 2 23.102447
4 2 21.021596
... ... ...
89995 100 35.945247
89996 100 71.127181
89997 100 25.913699
89998 100 56.351668
89999 100 32.683499
[90000 rows x 2 columns]
然后用relplot
绘图并选择想要的置信区间(这里是99%,也可以是'sd'
)
import seaborn as sns
g = sns.relplot(
kind='line',
data=melt_df,
x='duration', y='value',
ci=99
)
g.axes.flat[0].set_title('Confidence Interval 99%')
plt.show()
我正在尝试对依赖于 T
和参数 xi
、mu
、sig
.
我已经推断出不同持续时间(1 小时、3 小时等)的参数和这些参数的分布(标准差)。在示例代码中,参数持续时间为 1 小时。
我需要创建一个 forloop 以使用 xi、mu 和 sig 数组创建 zp 云。 T 可以取的不同值是 [2, 5, 25, 50, 75, 100]
我还想在第 2 行显示带有标准偏差的误差条或不确定性。我使用 Metropolis Hastings 算法探索参数 space,在 3 个链中进行 15000 次迭代
xi = accepted[0:]
xi = array([[-2.00000000e-01, 1.00000000e-01, 1.00000000e-01],
[-2.06044711e-01, 1.51739593e-01, 1.36675069e-01],
[-2.91747418e-01, 1.10818827e-01, 1.80040639e-01],
...,
[ 1.45611857e-02, 1.46824099e+01, 5.16110127e+00],
[ 1.45611857e-02, 1.46824099e+01, 5.16110127e+00],
[-3.14226453e-02, 1.44844410e+01, 5.00637147e+00]])
mu = accepted[1:]
mu = array([[-2.06044711e-01, 1.51739593e-01, 1.36675069e-01],
[-2.91747418e-01, 1.10818827e-01, 1.80040639e-01],
[-2.91747418e-01, 1.10818827e-01, 1.80040639e-01],
...,
[ 1.45611857e-02, 1.46824099e+01, 5.16110127e+00],
[ 1.45611857e-02, 1.46824099e+01, 5.16110127e+00],
[-3.14226453e-02, 1.44844410e+01, 5.00637147e+00]])
sig = accepted [2:]
sig = array([[-2.91747418e-01, 1.10818827e-01, 1.80040639e-01],
[-2.91747418e-01, 1.10818827e-01, 1.80040639e-01],
[-2.91747418e-01, 1.10818827e-01, 1.80040639e-01],
...,
[ 1.45611857e-02, 1.46824099e+01, 5.16110127e+00],
[ 1.45611857e-02, 1.46824099e+01, 5.16110127e+00],
[-3.14226453e-02, 1.44844410e+01, 5.00637147e+00]])
spread = accepted[:,0].std(), accepted[:,1].std(), accepted[:,2].std()
(xi, mu, sig)
def zp(T, xi = accepted[0:], mu = accepted[1:], sig= accepted[2:]):
p = 1/T
yp = - np.log10(1 - p)
zp = np.ndarray(shape=(xi.size, T.size))
for i in range(xi.size):
if xi[i] == 0:
zp[i,:] = mu[i] - (sig[i]*(np.log10(yp)))
else:
zp[i,:] = mu[i] - ((sig[i]/xi[i])*(1-(yp**(-xi[i]))))
return zp
# get results
res = zp(T, xi, mu, sig)
因此,您有 (15000,3)
矩阵 accepted
,其中 xi=accepted[:,0]
、mu=accepted[:,1]
和 sig=accepted[:,2]
。
我将为 xi
、mu
和 sig
生成一些示例数据,只是为了向您展示绘图的结果。
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# I will generate some sample data
# You'll have to use your own data
n = 15000
np.random.seed(1)
xi, mu, sig = (
np.random.normal(loc=-0.15153068743678966, scale=0.2254333661580348, size=(n)),
np.random.normal(loc=14.241861263759796, scale=2.6116567608814196, size=(n)),
np.random.normal(loc=5.5779131542307345, scale=0.9627764065564182, size=(n)),
)
您将 T
定义为
# define T steps
T = np.array([2, 5, 25, 50, 75, 100])
现在我们取参数的均值和标准差
xi_mean = xi.mean()
mu_mean = mu.mean()
sig_mean = sig.mean()
xi_std = xi.std()
mu_std = mu.std()
sig_std = sig.std()
并定义一个函数zp
# function zp
def zp(T, xi, mu, sig):
p = 1 / T
yp = - np.log10(1 - p)
# ravel results
_xi = xi.ravel()
_mu = mu.ravel()
_sig = sig.ravel()
res = np.ndarray(shape=(_xi.size, T.size))
for i in range(_xi.size):
if _xi[i] == 0:
res[i,:] = _mu[i] - (_sig[i]*(np.log10(yp)))
else:
res[i,:] = _mu[i] - ((_sig[i]/_xi[i])*(1-(yp**(-_xi[i]))))
return res
# get results
res = zp(T, xi, mu, sig)
我们可以定义一个包含所有结果的 DataFrame
# define results DataFrame
df = pd.DataFrame(res, columns=T)
print(df)
2 5 25 50 75 100
0 24.610952 34.489626 54.614356 65.349657 72.376143 77.735341
1 16.554362 20.033999 23.514591 24.524273 25.023313 25.342476
2 23.468215 28.276272 33.212243 34.678420 35.410825 35.882346
3 23.102447 26.089680 28.680803 29.339580 29.646593 29.835899
4 21.021596 30.494043 45.594905 52.182941 56.105955 58.925041
... ... ... ... ... ... ...
14995 22.964737 27.856439 33.039263 34.623438 35.425031 35.945247
14996 21.371429 29.078696 47.122467 57.868230 65.281555 71.127181
14997 18.534785 21.512996 24.424363 25.251344 25.656252 25.913699
14998 19.915343 28.939309 43.440076 49.808611 53.612702 56.351668
14999 20.835338 25.069838 29.829853 31.364291 32.159584 32.683499
[15000 rows x 6 columns]
现在我们用参数的均值+/-标准差
计算zp
zp_mean = zp(T, xi_mean, mu_mean, sig_mean).ravel()
zp_lo = zp(T, xi_mean-xi_std, mu_mean-mu_std, sig_mean-sig_std).ravel()
zp_hi = zp(T, xi_mean+xi_std, mu_mean+mu_std, sig_mean+sig_std).ravel()
我们终于可以绘制出 15000 条线和均值+/-std
fig, ax = plt.subplots(figsize=(12, 5))
ax.errorbar(
T, zp_mean,
yerr=[zp_mean-zp_lo, zp_hi-zp_mean],
color='k', zorder=999,
label='mean and std'
)
for i, col in enumerate(df.T.columns):
_df = df.T[col]
ax.plot(_df, lw=1, alpha=.01, color='r')
ax.set(
xlabel='duration',
ylabel='value',
# adjust this
ylim=(10, 150)
)
ax.legend()
plt.show()
您还可以选择 seaborn
的更快解决方案。
首先,熔化DataFrame
melt_df = df.melt(var_name='duration')
print(melt_df)
duration value
0 2 24.610952
1 2 16.554362
2 2 23.468215
3 2 23.102447
4 2 21.021596
... ... ...
89995 100 35.945247
89996 100 71.127181
89997 100 25.913699
89998 100 56.351668
89999 100 32.683499
[90000 rows x 2 columns]
然后用relplot
绘图并选择想要的置信区间(这里是99%,也可以是'sd'
)
import seaborn as sns
g = sns.relplot(
kind='line',
data=melt_df,
x='duration', y='value',
ci=99
)
g.axes.flat[0].set_title('Confidence Interval 99%')
plt.show()