在 plotly express 中添加另一个数据框作为注释
Add another dataframe as annotation in plotly express
我使用 UMAP 可视化数据,但无法添加正确的注释。如何使用相同长度的另一个数据框将悬停文本添加到 plotly express 散点图?
据我了解,我只能从 data_2d
那里指定一列。我可以从 another_df
获取所有行作为注释吗?
import plotly.express as px
def scatter(data_2d, labels, another_df):
c = dict(zip(labels.unique, px.colors.qualitative.G10)) # make the same colors for another data
fig = px.scatter(
data_2d, x=0, y=1,
color=labels,
color_discrete_map=c,
text=another_df, # here I'm stuck
# expected annotation
# column1: 57575874
# column2: 0.4545
# columnN: ....
# hover_data awaits for labels from data_2d and it doesn't work
# text is constant, I see just a mess of text
)
fig.update_traces(marker=dict(size=5, opacity=0.7))
fig.show()
- 你的示例代码
- 没有定义数据结构,从评论中我已经暗示了代码中的内容
- x 和 y 因为值在语法上不正确,已修复从 UMAP
提取到 2D numpy 数组
- 解决方案
- 您已定义 another_df 与 data_2d 的长度相同。一个选项是在通过转换之前使用所有列的数据框
- labels参数真的是多余的,是another_df
的一部分
- 您想定义悬停输入,已定义hover_name和hover_data 。使用了 another_df
中的所有列
import plotly.express as px
import pandas as pd
import numpy as np
import umap.umap_ as umap
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
def scatter(data_2d, labels, another_df):
c = dict(zip(labels.unique(), px.colors.qualitative.G10)) # make the same colors for another data
fig = px.scatter(
another_df,
x=data_2d[:,0],y=data_2d[:,1],
color=labels,
color_discrete_map=c,
hover_name="island",
hover_data=another_df.columns
)
fig.update_traces(marker=dict(size=5, opacity=0.7))
fig.show()
penguins = pd.read_csv("https://github.com/allisonhorst/palmerpenguins/raw/5b5891f01b52ae26ad8cb9755ec93672f49328a8/data/penguins_size.csv")
data = penguins.loc[:,["culmen_length_mm","culmen_depth_mm","flipper_length_mm","body_mass_g",]].dropna()
scaled_penguin_data = StandardScaler().fit_transform(data.values)
reducer = umap.UMAP()
embedding = reducer.fit_transform(scaled_penguin_data)
scatter(embedding, penguins.loc[data.index, "sex"].fillna("UNKNOWN"), penguins.loc[data.index])
我使用 UMAP 可视化数据,但无法添加正确的注释。如何使用相同长度的另一个数据框将悬停文本添加到 plotly express 散点图?
据我了解,我只能从 data_2d
那里指定一列。我可以从 another_df
获取所有行作为注释吗?
import plotly.express as px
def scatter(data_2d, labels, another_df):
c = dict(zip(labels.unique, px.colors.qualitative.G10)) # make the same colors for another data
fig = px.scatter(
data_2d, x=0, y=1,
color=labels,
color_discrete_map=c,
text=another_df, # here I'm stuck
# expected annotation
# column1: 57575874
# column2: 0.4545
# columnN: ....
# hover_data awaits for labels from data_2d and it doesn't work
# text is constant, I see just a mess of text
)
fig.update_traces(marker=dict(size=5, opacity=0.7))
fig.show()
- 你的示例代码
- 没有定义数据结构,从评论中我已经暗示了代码中的内容
- x 和 y 因为值在语法上不正确,已修复从 UMAP 提取到 2D numpy 数组
- 解决方案
- 您已定义 another_df 与 data_2d 的长度相同。一个选项是在通过转换之前使用所有列的数据框
- labels参数真的是多余的,是another_df 的一部分
- 您想定义悬停输入,已定义hover_name和hover_data 。使用了 another_df 中的所有列
import plotly.express as px
import pandas as pd
import numpy as np
import umap.umap_ as umap
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
def scatter(data_2d, labels, another_df):
c = dict(zip(labels.unique(), px.colors.qualitative.G10)) # make the same colors for another data
fig = px.scatter(
another_df,
x=data_2d[:,0],y=data_2d[:,1],
color=labels,
color_discrete_map=c,
hover_name="island",
hover_data=another_df.columns
)
fig.update_traces(marker=dict(size=5, opacity=0.7))
fig.show()
penguins = pd.read_csv("https://github.com/allisonhorst/palmerpenguins/raw/5b5891f01b52ae26ad8cb9755ec93672f49328a8/data/penguins_size.csv")
data = penguins.loc[:,["culmen_length_mm","culmen_depth_mm","flipper_length_mm","body_mass_g",]].dropna()
scaled_penguin_data = StandardScaler().fit_transform(data.values)
reducer = umap.UMAP()
embedding = reducer.fit_transform(scaled_penguin_data)
scatter(embedding, penguins.loc[data.index, "sex"].fillna("UNKNOWN"), penguins.loc[data.index])