使用 Plotnine 拟合非线性曲线
Fit non-linear curve using Plotnine
这是为一些用户定义的指标生成线性曲线的工作示例。。
如何让它拟合非线性曲线?
def plot(version, str_metric):
metric1 = [0.459, 0.5844, 0.6246, 0.6459, 0.6429, 0.6629, 0.664, 0.6722, 0.6832, 0.6931, 0.7103, 0.7144, 0.7213, 0.716, 0.7182, 0.7419, 0.7418, 0.7594, 0.7639, 0.7854, 0.7752, 0.7912, 0.8164, 0.8311,
0.8283, 0.8103, 0.869, 0.8708, 0.884, 0.9017, 0.9248, 0.9233, 0.9432, 0.9306, 0.945, 0.959, 0.9675, 0.9649, 0.982, 0.9276, 0.975, 0.9772, 0.9786, 0.988, 0.9825, 0.9901, 0.9934, 0.993, 0.9956, 0.9951]
metric2 = [0.4541, 0.5807, 0.6138, 0.6339, 0.6322, 0.63, 0.6451, 0.6514, 0.6589, 0.6597, 0.6759, 0.6817, 0.6866, 0.6803, 0.6819, 0.7009, 0.7039, 0.7162, 0.7197, 0.7237, 0.7145, 0.7239, 0.7416, 0.7426,
0.7461, 0.7054, 0.7495, 0.7331, 0.7316, 0.7023, 0.7232, 0.7359, 0.7257, 0.7178, 0.6918, 0.7118, 0.7183, 0.6939, 0.6855, 0.6874, 0.6745, 0.6952, 0.6491, 0.6659, 0.6624, 0.6737, 0.6587, 0.653, 0.6485, 0.6854]
epochs = [i for i in range(len(metric1))]
color_dict = {'SGD_Train': 'darkblue',
'SGD_Test': 'red'}
df = pd.DataFrame({'Epochs': np.hstack([epochs, epochs]),
str_metric: np.hstack([metric1, metric2]),
'Type': np.repeat(['SGD_Train', 'SGD_Test'], len(epochs))})
pdb.set_trace()
p = ggplot(df, aes(x='Epochs', y=str_metric, color='Type')) +\
geom_smooth(show_legend=True, method='lm',
span=0.10, se=True, level=0.80) +\
ggtitle(version) +\
scale_color_manual(values=color_dict) +\
theme(text=element_text(size=25), legend_box_margin=1, legend_title=element_text(size=23), legend_position=(0.77, 0.5), legend_text=element_text(size=20), legend_box_background=element_rect(fill='transparent')) +\
theme(plot_title=element_text(margin={'t': -15, 'b': -20}))
P.S。当数据值更多时,非线性曲线很容易,我可以将 lm
更改为 loess
并使其工作。但是这里的数据条目较少。另外,我无法使 stat_smooth 方法起作用。
我正在尝试如下所示:
stat_smooth(method='wls', method_args={'start': list(a=1, b=1), formula='y~a*x^b'}, se=True)
任何帮助将不胜感激。
公式接口是有限的,因为没有万无一失的方法来使用不美观的变量,例如公式 y ~ a*x^b
中的 a
和 b
。最好使用计算交互的外部函数。例如,请参阅下面的 poly
函数(注意文档字符串中的警告)。
import pandas as pd
import numpy as np
from plotnine import *
version = 'Title'
str_metric = 'Met'
metric1 = [0.459, 0.5844, 0.6246, 0.6459, 0.6429, 0.6629, 0.664, 0.6722, 0.6832, 0.6931, 0.7103, 0.7144, 0.7213, 0.716, 0.7182, 0.7419, 0.7418, 0.7594, 0.7639, 0.7854, 0.7752, 0.7912, 0.8164, 0.8311,
0.8283, 0.8103, 0.869, 0.8708, 0.884, 0.9017, 0.9248, 0.9233, 0.9432, 0.9306, 0.945, 0.959, 0.9675, 0.9649, 0.982, 0.9276, 0.975, 0.9772, 0.9786, 0.988, 0.9825, 0.9901, 0.9934, 0.993, 0.9956, 0.9951]
metric2 = [0.4541, 0.5807, 0.6138, 0.6339, 0.6322, 0.63, 0.6451, 0.6514, 0.6589, 0.6597, 0.6759, 0.6817, 0.6866, 0.6803, 0.6819, 0.7009, 0.7039, 0.7162, 0.7197, 0.7237, 0.7145, 0.7239, 0.7416, 0.7426,
0.7461, 0.7054, 0.7495, 0.7331, 0.7316, 0.7023, 0.7232, 0.7359, 0.7257, 0.7178, 0.6918, 0.7118, 0.7183, 0.6939, 0.6855, 0.6874, 0.6745, 0.6952, 0.6491, 0.6659, 0.6624, 0.6737, 0.6587, 0.653, 0.6485, 0.6854]
epochs = [i for i in range(len(metric1))]
color_dict = {'SGD_Train': 'darkblue',
'SGD_Test': 'red'}
df = pd.DataFrame({'Epochs': np.hstack([epochs, epochs]),
str_metric: np.hstack([metric1, metric2]),
'Type': np.repeat(['SGD_Train', 'SGD_Test'], len(epochs))})
def poly(x, degree=1):
"""
Fit Polynomial
These are non orthogonal factors, but it may not matter if
we only need this for smoothing and not extrapolated
predictions.
"""
d = {}
for i in range(degree+1):
if i == 1:
d['x'] = x
else:
d[f'x**{i}'] = np.power(x, i)
return pd.DataFrame(d)
(ggplot(df, aes(x='Epochs', y=str_metric, color='Type'))
+ geom_point()
+ stat_smooth(
method='lm',
formula='y ~ poly(x, degree=2)',
span=0.10,
se=True,
level=0.80
)
+ ggtitle(version)
+ scale_color_manual(values=color_dict)
)
这是为一些用户定义的指标生成线性曲线的工作示例。
如何让它拟合非线性曲线?
def plot(version, str_metric):
metric1 = [0.459, 0.5844, 0.6246, 0.6459, 0.6429, 0.6629, 0.664, 0.6722, 0.6832, 0.6931, 0.7103, 0.7144, 0.7213, 0.716, 0.7182, 0.7419, 0.7418, 0.7594, 0.7639, 0.7854, 0.7752, 0.7912, 0.8164, 0.8311,
0.8283, 0.8103, 0.869, 0.8708, 0.884, 0.9017, 0.9248, 0.9233, 0.9432, 0.9306, 0.945, 0.959, 0.9675, 0.9649, 0.982, 0.9276, 0.975, 0.9772, 0.9786, 0.988, 0.9825, 0.9901, 0.9934, 0.993, 0.9956, 0.9951]
metric2 = [0.4541, 0.5807, 0.6138, 0.6339, 0.6322, 0.63, 0.6451, 0.6514, 0.6589, 0.6597, 0.6759, 0.6817, 0.6866, 0.6803, 0.6819, 0.7009, 0.7039, 0.7162, 0.7197, 0.7237, 0.7145, 0.7239, 0.7416, 0.7426,
0.7461, 0.7054, 0.7495, 0.7331, 0.7316, 0.7023, 0.7232, 0.7359, 0.7257, 0.7178, 0.6918, 0.7118, 0.7183, 0.6939, 0.6855, 0.6874, 0.6745, 0.6952, 0.6491, 0.6659, 0.6624, 0.6737, 0.6587, 0.653, 0.6485, 0.6854]
epochs = [i for i in range(len(metric1))]
color_dict = {'SGD_Train': 'darkblue',
'SGD_Test': 'red'}
df = pd.DataFrame({'Epochs': np.hstack([epochs, epochs]),
str_metric: np.hstack([metric1, metric2]),
'Type': np.repeat(['SGD_Train', 'SGD_Test'], len(epochs))})
pdb.set_trace()
p = ggplot(df, aes(x='Epochs', y=str_metric, color='Type')) +\
geom_smooth(show_legend=True, method='lm',
span=0.10, se=True, level=0.80) +\
ggtitle(version) +\
scale_color_manual(values=color_dict) +\
theme(text=element_text(size=25), legend_box_margin=1, legend_title=element_text(size=23), legend_position=(0.77, 0.5), legend_text=element_text(size=20), legend_box_background=element_rect(fill='transparent')) +\
theme(plot_title=element_text(margin={'t': -15, 'b': -20}))
P.S。当数据值更多时,非线性曲线很容易,我可以将 lm
更改为 loess
并使其工作。但是这里的数据条目较少。另外,我无法使 stat_smooth 方法起作用。
我正在尝试如下所示:
stat_smooth(method='wls', method_args={'start': list(a=1, b=1), formula='y~a*x^b'}, se=True)
任何帮助将不胜感激。
公式接口是有限的,因为没有万无一失的方法来使用不美观的变量,例如公式 y ~ a*x^b
中的 a
和 b
。最好使用计算交互的外部函数。例如,请参阅下面的 poly
函数(注意文档字符串中的警告)。
import pandas as pd
import numpy as np
from plotnine import *
version = 'Title'
str_metric = 'Met'
metric1 = [0.459, 0.5844, 0.6246, 0.6459, 0.6429, 0.6629, 0.664, 0.6722, 0.6832, 0.6931, 0.7103, 0.7144, 0.7213, 0.716, 0.7182, 0.7419, 0.7418, 0.7594, 0.7639, 0.7854, 0.7752, 0.7912, 0.8164, 0.8311,
0.8283, 0.8103, 0.869, 0.8708, 0.884, 0.9017, 0.9248, 0.9233, 0.9432, 0.9306, 0.945, 0.959, 0.9675, 0.9649, 0.982, 0.9276, 0.975, 0.9772, 0.9786, 0.988, 0.9825, 0.9901, 0.9934, 0.993, 0.9956, 0.9951]
metric2 = [0.4541, 0.5807, 0.6138, 0.6339, 0.6322, 0.63, 0.6451, 0.6514, 0.6589, 0.6597, 0.6759, 0.6817, 0.6866, 0.6803, 0.6819, 0.7009, 0.7039, 0.7162, 0.7197, 0.7237, 0.7145, 0.7239, 0.7416, 0.7426,
0.7461, 0.7054, 0.7495, 0.7331, 0.7316, 0.7023, 0.7232, 0.7359, 0.7257, 0.7178, 0.6918, 0.7118, 0.7183, 0.6939, 0.6855, 0.6874, 0.6745, 0.6952, 0.6491, 0.6659, 0.6624, 0.6737, 0.6587, 0.653, 0.6485, 0.6854]
epochs = [i for i in range(len(metric1))]
color_dict = {'SGD_Train': 'darkblue',
'SGD_Test': 'red'}
df = pd.DataFrame({'Epochs': np.hstack([epochs, epochs]),
str_metric: np.hstack([metric1, metric2]),
'Type': np.repeat(['SGD_Train', 'SGD_Test'], len(epochs))})
def poly(x, degree=1):
"""
Fit Polynomial
These are non orthogonal factors, but it may not matter if
we only need this for smoothing and not extrapolated
predictions.
"""
d = {}
for i in range(degree+1):
if i == 1:
d['x'] = x
else:
d[f'x**{i}'] = np.power(x, i)
return pd.DataFrame(d)
(ggplot(df, aes(x='Epochs', y=str_metric, color='Type'))
+ geom_point()
+ stat_smooth(
method='lm',
formula='y ~ poly(x, degree=2)',
span=0.10,
se=True,
level=0.80
)
+ ggtitle(version)
+ scale_color_manual(values=color_dict)
)