Holoviews 挖掘相关热图和回归图的流
Holoviews tap stream of correlation heatmap and regression plot
我想为 DataFrame 制作相关热图,为每对变量制作回归图。我已经尝试阅读所有文档,但仍然很难连接两个图,因此当我点击热图时,可以显示相应的回归图。
下面是一些示例代码:
import holoviews as hv
from holoviews import opts
import seaborn as sns
import numpy as np
import pandas as pd
hv.extension('bokeh')
df = sns.load_dataset('tips')
df = df[['total_bill', 'tip', 'size']]
corr = df.corr()
heatmap = hv.HeatMap((corr.columns, corr.index, corr))\
.opts(tools=['tap', 'hover'], height=400, width=400, toolbar='above')
m, b = np.polyfit(df.tip, df.total_bill, deg=1)
x = np.linspace(df.tip.min(), df.tip.max())
y = m*x + b
curve = hv.Curve((x, y))\
.opts(height=400, width=400, color='red', ylim=(0, 100))
points = hv.Scatter((df.tip, df.total_bill))
hv.Layout((points * curve) + heatmap).cols(2)
我用你的代码调整了文档的相关部分http://holoviews.org/reference/streams/bokeh/Tap.html。也许这会消除您的困惑。
import pandas as pd
import numpy as np
import holoviews as hv
from holoviews import opts
hv.extension('bokeh', width=90)
import seaborn as sns
# Declare dataset
df = sns.load_dataset('tips')
df = df[['total_bill', 'tip', 'size']]
# Declare HeatMap
corr = df.corr()
heatmap = hv.HeatMap((corr.columns, corr.index, corr))
# Declare Tap stream with heatmap as source and initial values
posxy = hv.streams.Tap(source=heatmap, x='total_bill', y='tip')
# Define function to compute histogram based on tap location
def tap_histogram(x, y):
m, b = np.polyfit(df[x], df[y], deg=1)
x_data = np.linspace(df.tip.min(), df.tip.max())
y_data = m*x_data + b
return hv.Curve((x_data, y_data), x, y) * hv.Scatter((df[x], df[y]), x, y)
tap_dmap = hv.DynamicMap(tap_histogram, streams=[posxy])
(heatmap + tap_dmap).opts(
opts.Scatter(height=400, width=400, color='red', ylim=(0, 100), framewise=True),
opts.HeatMap(tools=['tap', 'hover'], height=400, width=400, toolbar='above'),
opts.Curve(framewise=True)
)
我们在建模时面临的两个常见问题是共线性和非线性。共线性可以用相关热图可视化,但是用大量 variables/features 很难探索。在下面的应用中,您可以将鼠标悬停在上面查看任意两个变量之间的相关系数。当您点击时,散点图将更新为 second-degree 拟合曲线以显示两个变量之间的非线性。
在@doopler的帮助下,我稍微修改了一下代码,在此分享一下:
import numpy as np
import pandas as pd
import holoviews as hv
hv.extension('bokeh')
# generate random data
df = pd.DataFrame(data={'col_1': np.random.normal(5, 2, 100)})
df['col_2'] = df.col_1 + np.random.gamma(5, 2, 100)
df['col_3'] = df.col_1*2 + np.random.normal(0, 10, 100)
df['col_4'] = df.col_1**2 + np.random.normal(0, 10, 100)
df['col_5'] = np.sin(df.col_1)
df['col_6'] = np.cos(df.col_1)
corr = df.corr().abs()
# mask the upper triangle of the heatmap
corr.values[np.triu_indices_from(corr, 0)] = np.nan
heatmap = hv.HeatMap((corr.columns, corr.index, corr))\
.opts(tools=['hover'], height=400, width=400, fontsize=9,
toolbar='above', colorbar=False, cmap='Blues',
invert_yaxis=True, xrotation=90, xlabel='', ylabel='',
title='Correlation Coefficient Heatmap (absolute value)')
# define tap stream with heatmap as source
tap_xy = hv.streams.Tap(source=heatmap, x='col_1', y='col_4')
# calculate correlation plot based on tap
def tap_corrplot(x, y):
# drop missing values if there are any
df_notnull = df[[x, y]].dropna(how='any')
# fit a 2nd degree line/curve
m1, m2, b = np.polyfit(df_notnull[x], df_notnull[y], deg=2)
# generate data to plot fitted line/curve
x_curve = np.linspace(df[x].min(), df[x].max())
y_curve = m1*x_curve**2 + m2*x_curve+ b
curve = hv.Curve((x_curve, y_curve), x, y)\
.opts(color='#fc4f30', framewise=True)
scatter = hv.Scatter((df[x], df[y]), x, y)\
.opts(height=400, width=400, fontsize=9, size=5,
alpha=0.2, ylim=(df[y].min(), df[y].max()),
color='#30a2da', framewise=True,
title='Correlation Plot (2nd degree fit)')
return curve * scatter
# map tap in heatmap with correlation plot
tap_dmap = hv.DynamicMap(tap_corrplot, streams=[tap_xy])
layout = heatmap + tap_dmap
layout
如果您需要 运行 Bokeh 应用程序:
from bokeh.server.server import Server
renderer = hv.renderer('bokeh')
app = renderer.app(layout)
server = Server({'/': app}, port=0)
server.start()
server.show('/')
该代码适用于 Jupyter Lab。如果您使用 Jupyter Notebook,请检查此 link.
我想为 DataFrame 制作相关热图,为每对变量制作回归图。我已经尝试阅读所有文档,但仍然很难连接两个图,因此当我点击热图时,可以显示相应的回归图。
下面是一些示例代码:
import holoviews as hv
from holoviews import opts
import seaborn as sns
import numpy as np
import pandas as pd
hv.extension('bokeh')
df = sns.load_dataset('tips')
df = df[['total_bill', 'tip', 'size']]
corr = df.corr()
heatmap = hv.HeatMap((corr.columns, corr.index, corr))\
.opts(tools=['tap', 'hover'], height=400, width=400, toolbar='above')
m, b = np.polyfit(df.tip, df.total_bill, deg=1)
x = np.linspace(df.tip.min(), df.tip.max())
y = m*x + b
curve = hv.Curve((x, y))\
.opts(height=400, width=400, color='red', ylim=(0, 100))
points = hv.Scatter((df.tip, df.total_bill))
hv.Layout((points * curve) + heatmap).cols(2)
我用你的代码调整了文档的相关部分http://holoviews.org/reference/streams/bokeh/Tap.html。也许这会消除您的困惑。
import pandas as pd
import numpy as np
import holoviews as hv
from holoviews import opts
hv.extension('bokeh', width=90)
import seaborn as sns
# Declare dataset
df = sns.load_dataset('tips')
df = df[['total_bill', 'tip', 'size']]
# Declare HeatMap
corr = df.corr()
heatmap = hv.HeatMap((corr.columns, corr.index, corr))
# Declare Tap stream with heatmap as source and initial values
posxy = hv.streams.Tap(source=heatmap, x='total_bill', y='tip')
# Define function to compute histogram based on tap location
def tap_histogram(x, y):
m, b = np.polyfit(df[x], df[y], deg=1)
x_data = np.linspace(df.tip.min(), df.tip.max())
y_data = m*x_data + b
return hv.Curve((x_data, y_data), x, y) * hv.Scatter((df[x], df[y]), x, y)
tap_dmap = hv.DynamicMap(tap_histogram, streams=[posxy])
(heatmap + tap_dmap).opts(
opts.Scatter(height=400, width=400, color='red', ylim=(0, 100), framewise=True),
opts.HeatMap(tools=['tap', 'hover'], height=400, width=400, toolbar='above'),
opts.Curve(framewise=True)
)
我们在建模时面临的两个常见问题是共线性和非线性。共线性可以用相关热图可视化,但是用大量 variables/features 很难探索。在下面的应用中,您可以将鼠标悬停在上面查看任意两个变量之间的相关系数。当您点击时,散点图将更新为 second-degree 拟合曲线以显示两个变量之间的非线性。
在@doopler的帮助下,我稍微修改了一下代码,在此分享一下:
import numpy as np
import pandas as pd
import holoviews as hv
hv.extension('bokeh')
# generate random data
df = pd.DataFrame(data={'col_1': np.random.normal(5, 2, 100)})
df['col_2'] = df.col_1 + np.random.gamma(5, 2, 100)
df['col_3'] = df.col_1*2 + np.random.normal(0, 10, 100)
df['col_4'] = df.col_1**2 + np.random.normal(0, 10, 100)
df['col_5'] = np.sin(df.col_1)
df['col_6'] = np.cos(df.col_1)
corr = df.corr().abs()
# mask the upper triangle of the heatmap
corr.values[np.triu_indices_from(corr, 0)] = np.nan
heatmap = hv.HeatMap((corr.columns, corr.index, corr))\
.opts(tools=['hover'], height=400, width=400, fontsize=9,
toolbar='above', colorbar=False, cmap='Blues',
invert_yaxis=True, xrotation=90, xlabel='', ylabel='',
title='Correlation Coefficient Heatmap (absolute value)')
# define tap stream with heatmap as source
tap_xy = hv.streams.Tap(source=heatmap, x='col_1', y='col_4')
# calculate correlation plot based on tap
def tap_corrplot(x, y):
# drop missing values if there are any
df_notnull = df[[x, y]].dropna(how='any')
# fit a 2nd degree line/curve
m1, m2, b = np.polyfit(df_notnull[x], df_notnull[y], deg=2)
# generate data to plot fitted line/curve
x_curve = np.linspace(df[x].min(), df[x].max())
y_curve = m1*x_curve**2 + m2*x_curve+ b
curve = hv.Curve((x_curve, y_curve), x, y)\
.opts(color='#fc4f30', framewise=True)
scatter = hv.Scatter((df[x], df[y]), x, y)\
.opts(height=400, width=400, fontsize=9, size=5,
alpha=0.2, ylim=(df[y].min(), df[y].max()),
color='#30a2da', framewise=True,
title='Correlation Plot (2nd degree fit)')
return curve * scatter
# map tap in heatmap with correlation plot
tap_dmap = hv.DynamicMap(tap_corrplot, streams=[tap_xy])
layout = heatmap + tap_dmap
layout
如果您需要 运行 Bokeh 应用程序:
from bokeh.server.server import Server
renderer = hv.renderer('bokeh')
app = renderer.app(layout)
server = Server({'/': app}, port=0)
server.start()
server.show('/')
该代码适用于 Jupyter Lab。如果您使用 Jupyter Notebook,请检查此 link.