如何绘制不连接所有点的样条曲线
How to draw spline that is not connecting all points
我正在尝试用样条曲线绘制大量数据,它应该看起来像这样。
但是当我尝试用 plotly 来做时,样条曲线坚持通过所有的点,就像这样
当第一张图片只有数据点,第二张图片是样条时。
我尝试的代码是
dates = [dates_arr]
x = dates.strftime("%Y-%m-%d")
y = [data_points]
xy_data = go.Scatter(x=x, y=y, mode='markers', marker=dict(size=4),
name='AAPL')
mov_avg = go.Scatter(x=x, y=y, name="spline",text=
["spline"],hoverinfo='text+name',line_shape='spline', line_smoothing = 1.3)
data = [xy_data, mov_avg]
py.iplot(data, filename='Spline fit')
#################################
first_plot_url = py.plot(data, filename='apple stock moving average',
auto_open=True, )
有人知道吗?
在您的第一张图片中,样条曲线是您所有数据点的近似值。在您的代码段中,spline
是一个属性设置为您的行 between 您的数据点的图形表示。这些是非常不同的事情。要完成您正在寻找的内容,您应该仔细查看用户 np8
和 Matthew Drury
在 other SO posts and github. You should also take a closer look at how different splines are calculated. The following plot, where a natural cubic spline is estimated, is produced by the code sample named Snippet 2: The whole thing
below. It's pretty large, but that's mostly becaus of the function get_natural_cubic_spline_model
from Python natural smoothing splines 上的贡献。情节部分简单地遵循以下逻辑:
片段 1: 只关注剧情部分
# data points
points = go.Scatter(
x = x,
y = y,
mode = 'markers',
name = 'iris')
# spline
line = go.Scatter(
x = df_spline['x'],
y = df_spline['y_est'],
mode = 'lines',
name = 'spline')
# gather data
data=[points, line]
# build figure
fig=go.Figure(data)
# plot
fig.show()
剧情:
片段 2: 全部内容
# imports
import plotly.express as px
import plotly.graph_objs as go
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
# sample data set
iris = px.data.iris() # iris is a pandas DataFrame
x=iris['sepal_length']
y=iris['sepal_width']
# spline using function from
def get_natural_cubic_spline_model(x, y, minval=None, maxval=None, n_knots=None, knots=None):
"""
Get a natural cubic spline model for the data.
For the knots, give (a) `knots` (as an array) or (b) minval, maxval and n_knots.
If the knots are not directly specified, the resulting knots are equally
space within the *interior* of (max, min). That is, the endpoints are
*not* included as knots.
Parameters
----------
x: np.array of float
The input data
y: np.array of float
The outpur data
minval: float
Minimum of interval containing the knots.
maxval: float
Maximum of the interval containing the knots.
n_knots: positive integer
The number of knots to create.
knots: array or list of floats
The knots.
Returns
--------
model: a model object
The returned model will have following method:
- predict(x):
x is a numpy array. This will return the predicted y-values.
"""
if knots:
spline = NaturalCubicSpline(knots=knots)
else:
spline = NaturalCubicSpline(max=maxval, min=minval, n_knots=n_knots)
p = Pipeline([
('nat_cubic', spline),
('regression', LinearRegression(fit_intercept=True))
])
p.fit(x, y)
return p
class AbstractSpline(BaseEstimator, TransformerMixin):
"""Base class for all spline basis expansions."""
def __init__(self, max=None, min=None, n_knots=None, n_params=None, knots=None):
if knots is None:
if not n_knots:
n_knots = self._compute_n_knots(n_params)
knots = np.linspace(min, max, num=(n_knots + 2))[1:-1]
max, min = np.max(knots), np.min(knots)
self.knots = np.asarray(knots)
@property
def n_knots(self):
return len(self.knots)
def fit(self, *args, **kwargs):
return self
class NaturalCubicSpline(AbstractSpline):
"""Apply a natural cubic basis expansion to an array.
The features created with this basis expansion can be used to fit a
piecewise cubic function under the constraint that the fitted curve is
linear *outside* the range of the knots.. The fitted curve is continuously
differentiable to the second order at all of the knots.
This transformer can be created in two ways:
- By specifying the maximum, minimum, and number of knots.
- By specifying the cutpoints directly.
If the knots are not directly specified, the resulting knots are equally
space within the *interior* of (max, min). That is, the endpoints are
*not* included as knots.
Parameters
----------
min: float
Minimum of interval containing the knots.
max: float
Maximum of the interval containing the knots.
n_knots: positive integer
The number of knots to create.
knots: array or list of floats
The knots.
"""
def _compute_n_knots(self, n_params):
return n_params
@property
def n_params(self):
return self.n_knots - 1
def transform(self, X, **transform_params):
X_spl = self._transform_array(X)
if isinstance(X, pd.Series):
col_names = self._make_names(X)
X_spl = pd.DataFrame(X_spl, columns=col_names, index=X.index)
return X_spl
def _make_names(self, X):
first_name = "{}_spline_linear".format(X.name)
rest_names = ["{}_spline_{}".format(X.name, idx)
for idx in range(self.n_knots - 2)]
return [first_name] + rest_names
def _transform_array(self, X, **transform_params):
X = X.squeeze()
try:
X_spl = np.zeros((X.shape[0], self.n_knots - 1))
except IndexError: # For arrays with only one element
X_spl = np.zeros((1, self.n_knots - 1))
X_spl[:, 0] = X.squeeze()
def d(knot_idx, x):
def ppart(t): return np.maximum(0, t)
def cube(t): return t*t*t
numerator = (cube(ppart(x - self.knots[knot_idx]))
- cube(ppart(x - self.knots[self.n_knots - 1])))
denominator = self.knots[self.n_knots - 1] - self.knots[knot_idx]
return numerator / denominator
for i in range(0, self.n_knots - 2):
X_spl[:, i+1] = (d(i, X) - d(self.n_knots - 2, X)).squeeze()
return X_spl
# spline calculations
m1=get_natural_cubic_spline_model(x, y, minval=min(x), maxval=max(x), n_knots=6)
y_est_m1=m1.predict(x)
# gather results and sort them so that the line is not messed up
df_spline=pd.DataFrame({'x':x,
'y':y,
'y_est':m1.predict(x)})
df_spline=df_spline.sort_values(by=['x'])
### PLOTLY ###
# data source
points = go.Scatter(
x = x,
y = y,
mode = 'markers',
name = 'iris')
# spline
line = go.Scatter(
x = df_spline['x'],
y = df_spline['y_est'],
mode = 'lines',
name = 'spline')
# gather data
data=[points, line]
# build figure
fig=go.Figure(data)
# plot
fig.show()
我正在尝试用样条曲线绘制大量数据,它应该看起来像这样。
但是当我尝试用 plotly 来做时,样条曲线坚持通过所有的点,就像这样
当第一张图片只有数据点,第二张图片是样条时。
我尝试的代码是
dates = [dates_arr]
x = dates.strftime("%Y-%m-%d")
y = [data_points]
xy_data = go.Scatter(x=x, y=y, mode='markers', marker=dict(size=4),
name='AAPL')
mov_avg = go.Scatter(x=x, y=y, name="spline",text=
["spline"],hoverinfo='text+name',line_shape='spline', line_smoothing = 1.3)
data = [xy_data, mov_avg]
py.iplot(data, filename='Spline fit')
#################################
first_plot_url = py.plot(data, filename='apple stock moving average',
auto_open=True, )
有人知道吗?
在您的第一张图片中,样条曲线是您所有数据点的近似值。在您的代码段中,spline
是一个属性设置为您的行 between 您的数据点的图形表示。这些是非常不同的事情。要完成您正在寻找的内容,您应该仔细查看用户 np8
和 Matthew Drury
在 other SO posts and github. You should also take a closer look at how different splines are calculated. The following plot, where a natural cubic spline is estimated, is produced by the code sample named Snippet 2: The whole thing
below. It's pretty large, but that's mostly becaus of the function get_natural_cubic_spline_model
from Python natural smoothing splines 上的贡献。情节部分简单地遵循以下逻辑:
片段 1: 只关注剧情部分
# data points
points = go.Scatter(
x = x,
y = y,
mode = 'markers',
name = 'iris')
# spline
line = go.Scatter(
x = df_spline['x'],
y = df_spline['y_est'],
mode = 'lines',
name = 'spline')
# gather data
data=[points, line]
# build figure
fig=go.Figure(data)
# plot
fig.show()
剧情:
片段 2: 全部内容
# imports
import plotly.express as px
import plotly.graph_objs as go
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
# sample data set
iris = px.data.iris() # iris is a pandas DataFrame
x=iris['sepal_length']
y=iris['sepal_width']
# spline using function from
def get_natural_cubic_spline_model(x, y, minval=None, maxval=None, n_knots=None, knots=None):
"""
Get a natural cubic spline model for the data.
For the knots, give (a) `knots` (as an array) or (b) minval, maxval and n_knots.
If the knots are not directly specified, the resulting knots are equally
space within the *interior* of (max, min). That is, the endpoints are
*not* included as knots.
Parameters
----------
x: np.array of float
The input data
y: np.array of float
The outpur data
minval: float
Minimum of interval containing the knots.
maxval: float
Maximum of the interval containing the knots.
n_knots: positive integer
The number of knots to create.
knots: array or list of floats
The knots.
Returns
--------
model: a model object
The returned model will have following method:
- predict(x):
x is a numpy array. This will return the predicted y-values.
"""
if knots:
spline = NaturalCubicSpline(knots=knots)
else:
spline = NaturalCubicSpline(max=maxval, min=minval, n_knots=n_knots)
p = Pipeline([
('nat_cubic', spline),
('regression', LinearRegression(fit_intercept=True))
])
p.fit(x, y)
return p
class AbstractSpline(BaseEstimator, TransformerMixin):
"""Base class for all spline basis expansions."""
def __init__(self, max=None, min=None, n_knots=None, n_params=None, knots=None):
if knots is None:
if not n_knots:
n_knots = self._compute_n_knots(n_params)
knots = np.linspace(min, max, num=(n_knots + 2))[1:-1]
max, min = np.max(knots), np.min(knots)
self.knots = np.asarray(knots)
@property
def n_knots(self):
return len(self.knots)
def fit(self, *args, **kwargs):
return self
class NaturalCubicSpline(AbstractSpline):
"""Apply a natural cubic basis expansion to an array.
The features created with this basis expansion can be used to fit a
piecewise cubic function under the constraint that the fitted curve is
linear *outside* the range of the knots.. The fitted curve is continuously
differentiable to the second order at all of the knots.
This transformer can be created in two ways:
- By specifying the maximum, minimum, and number of knots.
- By specifying the cutpoints directly.
If the knots are not directly specified, the resulting knots are equally
space within the *interior* of (max, min). That is, the endpoints are
*not* included as knots.
Parameters
----------
min: float
Minimum of interval containing the knots.
max: float
Maximum of the interval containing the knots.
n_knots: positive integer
The number of knots to create.
knots: array or list of floats
The knots.
"""
def _compute_n_knots(self, n_params):
return n_params
@property
def n_params(self):
return self.n_knots - 1
def transform(self, X, **transform_params):
X_spl = self._transform_array(X)
if isinstance(X, pd.Series):
col_names = self._make_names(X)
X_spl = pd.DataFrame(X_spl, columns=col_names, index=X.index)
return X_spl
def _make_names(self, X):
first_name = "{}_spline_linear".format(X.name)
rest_names = ["{}_spline_{}".format(X.name, idx)
for idx in range(self.n_knots - 2)]
return [first_name] + rest_names
def _transform_array(self, X, **transform_params):
X = X.squeeze()
try:
X_spl = np.zeros((X.shape[0], self.n_knots - 1))
except IndexError: # For arrays with only one element
X_spl = np.zeros((1, self.n_knots - 1))
X_spl[:, 0] = X.squeeze()
def d(knot_idx, x):
def ppart(t): return np.maximum(0, t)
def cube(t): return t*t*t
numerator = (cube(ppart(x - self.knots[knot_idx]))
- cube(ppart(x - self.knots[self.n_knots - 1])))
denominator = self.knots[self.n_knots - 1] - self.knots[knot_idx]
return numerator / denominator
for i in range(0, self.n_knots - 2):
X_spl[:, i+1] = (d(i, X) - d(self.n_knots - 2, X)).squeeze()
return X_spl
# spline calculations
m1=get_natural_cubic_spline_model(x, y, minval=min(x), maxval=max(x), n_knots=6)
y_est_m1=m1.predict(x)
# gather results and sort them so that the line is not messed up
df_spline=pd.DataFrame({'x':x,
'y':y,
'y_est':m1.predict(x)})
df_spline=df_spline.sort_values(by=['x'])
### PLOTLY ###
# data source
points = go.Scatter(
x = x,
y = y,
mode = 'markers',
name = 'iris')
# spline
line = go.Scatter(
x = df_spline['x'],
y = df_spline['y_est'],
mode = 'lines',
name = 'spline')
# gather data
data=[points, line]
# build figure
fig=go.Figure(data)
# plot
fig.show()