使用 matplotlib 绘制函数

Question

我正在尝试对依赖于 T 和参数 xi、mu、sig.

的方程建模

我已经推断出不同持续时间（1 小时、3 小时等）的参数和这些参数的分布（标准差）。在示例代码中，参数持续时间为 1 小时。

我需要创建一个 forloop 以使用 xi、mu 和 sig 数组创建 zp 云。 T 可以取的不同值是 [2, 5, 25, 50, 75, 100]

我还想在第 2 行显示带有标准偏差的误差条或不确定性。我使用 Metropolis Hastings 算法探索参数 space，在 3 个链中进行 15000 次迭代

xi = accepted[0:]
xi = array([[-2.00000000e-01,  1.00000000e-01,  1.00000000e-01],
   [-2.06044711e-01,  1.51739593e-01,  1.36675069e-01],
   [-2.91747418e-01,  1.10818827e-01,  1.80040639e-01],
   ...,
   [ 1.45611857e-02,  1.46824099e+01,  5.16110127e+00],
   [ 1.45611857e-02,  1.46824099e+01,  5.16110127e+00],
   [-3.14226453e-02,  1.44844410e+01,  5.00637147e+00]])
 
   mu = accepted[1:]
   mu = array([[-2.06044711e-01,  1.51739593e-01,  1.36675069e-01],
   [-2.91747418e-01,  1.10818827e-01,  1.80040639e-01],
   [-2.91747418e-01,  1.10818827e-01,  1.80040639e-01],
   ...,
   [ 1.45611857e-02,  1.46824099e+01,  5.16110127e+00],
   [ 1.45611857e-02,  1.46824099e+01,  5.16110127e+00],
   [-3.14226453e-02,  1.44844410e+01,  5.00637147e+00]])

   sig = accepted [2:]

   sig = array([[-2.91747418e-01,  1.10818827e-01,  1.80040639e-01],
   [-2.91747418e-01,  1.10818827e-01,  1.80040639e-01],
   [-2.91747418e-01,  1.10818827e-01,  1.80040639e-01],
   ...,
   [ 1.45611857e-02,  1.46824099e+01,  5.16110127e+00],
   [ 1.45611857e-02,  1.46824099e+01,  5.16110127e+00],
   [-3.14226453e-02,  1.44844410e+01,  5.00637147e+00]])

spread = accepted[:,0].std(), accepted[:,1].std(), accepted[:,2].std()
(xi, mu, sig)
def zp(T, xi = accepted[0:], mu = accepted[1:], sig= accepted[2:]):
    p = 1/T
    yp = - np.log10(1 - p)
    zp = np.ndarray(shape=(xi.size, T.size))
    for i in range(xi.size):
    if xi[i] == 0:
        zp[i,:] = mu[i] - (sig[i]*(np.log10(yp)))
    else:
        zp[i,:] = mu[i] - ((sig[i]/xi[i])*(1-(yp**(-xi[i]))))
    return zp
    # get results
    res = zp(T, xi, mu, sig)

Answer 1

因此，您有 (15000,3) 矩阵 accepted，其中 xi=accepted[:,0]、mu=accepted[:,1] 和 sig=accepted[:,2]。

我将为 xi、mu 和 sig 生成一些示例数据，只是为了向您展示绘图的结果。

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# I will generate some sample data
# You'll have to use your own data
n = 15000
np.random.seed(1)
xi, mu, sig = (
    np.random.normal(loc=-0.15153068743678966, scale=0.2254333661580348, size=(n)),
    np.random.normal(loc=14.241861263759796, scale=2.6116567608814196, size=(n)),
    np.random.normal(loc=5.5779131542307345, scale=0.9627764065564182, size=(n)),
)

您将 T 定义为

# define T steps
T =  np.array([2, 5, 25, 50, 75, 100])

现在我们取参数的均值和标准差

xi_mean = xi.mean()
mu_mean = mu.mean()
sig_mean = sig.mean()

xi_std = xi.std()
mu_std = mu.std()
sig_std = sig.std()

并定义一个函数zp

# function zp
def zp(T, xi, mu, sig):
    p = 1 / T
    yp = - np.log10(1 - p)
    
    # ravel results
    _xi = xi.ravel()
    _mu = mu.ravel()
    _sig = sig.ravel()
    
    res = np.ndarray(shape=(_xi.size, T.size))
    
    for i in range(_xi.size):
        if _xi[i] == 0:
            res[i,:] = _mu[i] - (_sig[i]*(np.log10(yp)))
        else:
            res[i,:] = _mu[i] - ((_sig[i]/_xi[i])*(1-(yp**(-_xi[i]))))
    return res

# get results
res = zp(T, xi, mu, sig)

我们可以定义一个包含所有结果的 DataFrame

# define results DataFrame
df = pd.DataFrame(res, columns=T)
print(df)

             2          5          25         50         75         100
0      24.610952  34.489626  54.614356  65.349657  72.376143  77.735341
1      16.554362  20.033999  23.514591  24.524273  25.023313  25.342476
2      23.468215  28.276272  33.212243  34.678420  35.410825  35.882346
3      23.102447  26.089680  28.680803  29.339580  29.646593  29.835899
4      21.021596  30.494043  45.594905  52.182941  56.105955  58.925041
...          ...        ...        ...        ...        ...        ...
14995  22.964737  27.856439  33.039263  34.623438  35.425031  35.945247
14996  21.371429  29.078696  47.122467  57.868230  65.281555  71.127181
14997  18.534785  21.512996  24.424363  25.251344  25.656252  25.913699
14998  19.915343  28.939309  43.440076  49.808611  53.612702  56.351668
14999  20.835338  25.069838  29.829853  31.364291  32.159584  32.683499

[15000 rows x 6 columns]

现在我们用参数的均值+/-标准差

计算zp

zp_mean = zp(T, xi_mean, mu_mean, sig_mean).ravel()
zp_lo = zp(T, xi_mean-xi_std, mu_mean-mu_std, sig_mean-sig_std).ravel()
zp_hi = zp(T, xi_mean+xi_std, mu_mean+mu_std, sig_mean+sig_std).ravel()

我们终于可以绘制出 15000 条线和均值+/-std

fig, ax = plt.subplots(figsize=(12, 5))

ax.errorbar(
    T, zp_mean,
    yerr=[zp_mean-zp_lo, zp_hi-zp_mean],
    color='k', zorder=999,
    label='mean and std'
)

for i, col in enumerate(df.T.columns):
    _df = df.T[col]
    ax.plot(_df, lw=1, alpha=.01, color='r')
    
ax.set(
    xlabel='duration',
    ylabel='value',
    # adjust this
    ylim=(10, 150)
)
ax.legend()
plt.show()

您还可以选择 seaborn 的更快解决方案。

首先，熔化DataFrame

melt_df = df.melt(var_name='duration')
print(melt_df)

       duration      value
0             2  24.610952
1             2  16.554362
2             2  23.468215
3             2  23.102447
4             2  21.021596
...         ...        ...
89995       100  35.945247
89996       100  71.127181
89997       100  25.913699
89998       100  56.351668
89999       100  32.683499

[90000 rows x 2 columns]

然后用relplot绘图并选择想要的置信区间（这里是99%，也可以是'sd'）

import seaborn as sns
g = sns.relplot(
    kind='line',
    data=melt_df,
    x='duration', y='value',
    ci=99
)
g.axes.flat[0].set_title('Confidence Interval 99%')
plt.show()

使用 matplotlib 绘制函数

Function plotting with matplotlib

python

matplotlib

data-analysis

errorbar