Plotly:如何将分类变量插入平行坐标图中?
Plotly: How to insert a categorical variable into a parallel coordinates plot?
到目前为止,我试过这个:
import pandas as pd
import plotly.graph_objects as go
df = pd.read_csv('https://raw.githubusercontent.com/vyaduvanshi/helper-files/master/parallel_coordinates.csv')
dimensions = list([dict(range=[df['gm_Retail & Recreation'].min(),df['gm_Retail & Recreation'].max()],
label='Retail & Recreation', values=df['gm_Retail & Recreation']),
dict(range=[df['gm_Grocery & Pharmacy'].min(),df['gm_Grocery & Pharmacy'].max()],
label='Grocery & Pharmacy', values=df['gm_Grocery & Pharmacy']),
dict(range=[df['gm_Parks'].min(),df['gm_Parks'].max()],
label='Parks', values=df['gm_Parks']),
dict(range=[df['gm_Transit Stations'].min(),df['gm_Transit Stations'].max()],
label='Transit Stations', values=df['gm_Transit Stations']),
dict(range=[df['gm_Workplaces'].min(),df['gm_Workplaces'].max()],
label='Workplaces', values=df['gm_Workplaces']),
dict(range=[df['gm_Residential'].min(),df['gm_Residential'].max()],
label='Residential', values=df['gm_Residential']),])
# dict(range=[0,len(df)], values=df['country'],
# label='Country')])
fig = go.Figure(data=go.Parcoords(line = dict(color = '#ff0000',
colorscale = 'Electric',
showscale = True,
cmin = -4000,
cmax = -100), dimensions=dimensions))
fig.show()
它returns这个:
我要做的是将这些行分配给最后一列,即 country
列(分类)。 (我的尝试在代码片段中被注释掉了)。我正在思考如何将这些值 link 用于分类国家。索引可能是一种方式?我还想按国家/地区对线条进行颜色编码,我猜这些不同颜色的列表可能会有所帮助。我被卡住了,需要一些帮助。
在你的例子中,你可以通过让一个虚拟变量代表 df['country]
中的每个唯一元素来实现,你在这里有一个长格式的数据集,所以你会得到重复的虚拟变量。但别担心,下面的代码会为您解决这个问题。然后您可以将最后一个维度指定为:
dict(range=[0,df['dummy'].max()],
tickvals = dfg['dummy'], ticktext = dfg['country'],
label='Country', values=df['dummy']),
最后为线条分配颜色范围,例如:
line = dict(color = df['dummy'],
colorscale = [[0,'rgba(200,0,0,0.1)'],[0.5,'rgba(0,200,0,0.1)'],[1,'rgba(0,0,200,0.1)']])
剧情:
完整代码:
import pandas as pd
import plotly.graph_objects as go
df = pd.read_csv('https://raw.githubusercontent.com/vyaduvanshi/helper-files/master/parallel_coordinates.csv')
group_vars = df['country'].unique()
dfg = pd.DataFrame({'country':df['country'].unique()})
dfg['dummy'] = dfg.index
df = pd.merge(df, dfg, on = 'country', how='left')
dimensions = list([dict(range=[df['gm_Retail & Recreation'].min(),df['gm_Retail & Recreation'].max()],
label='Retail & Recreation', values=df['gm_Retail & Recreation']),
dict(range=[df['gm_Grocery & Pharmacy'].min(),df['gm_Grocery & Pharmacy'].max()],
label='Grocery & Pharmacy', values=df['gm_Grocery & Pharmacy']),
dict(range=[df['gm_Parks'].min(),df['gm_Parks'].max()],
label='Parks', values=df['gm_Parks']),
dict(range=[df['gm_Transit Stations'].min(),df['gm_Transit Stations'].max()],
label='Transit Stations', values=df['gm_Transit Stations']),
dict(range=[df['gm_Workplaces'].min(),df['gm_Workplaces'].max()],
label='Workplaces', values=df['gm_Workplaces']),
dict(range=[df['gm_Residential'].min(),df['gm_Residential'].max()],
label='Residential', values=df['gm_Residential']),
dict(range=[0,df['dummy'].max()],
tickvals = dfg['dummy'], ticktext = dfg['country'],
label='Country', values=df['dummy']),
])
fig = go.Figure(data=go.Parcoords(line = dict(color = df['dummy'],
colorscale = [[0,'rgba(200,0,0,0.1)'],[0.5,'rgba(0,200,0,0.1)'],[1,'rgba(0,0,200,0.1)']]), dimensions=dimensions))
fig.show()
使用df.infer_objects()自动推断每列的数据类型。
到目前为止,我试过这个:
import pandas as pd
import plotly.graph_objects as go
df = pd.read_csv('https://raw.githubusercontent.com/vyaduvanshi/helper-files/master/parallel_coordinates.csv')
dimensions = list([dict(range=[df['gm_Retail & Recreation'].min(),df['gm_Retail & Recreation'].max()],
label='Retail & Recreation', values=df['gm_Retail & Recreation']),
dict(range=[df['gm_Grocery & Pharmacy'].min(),df['gm_Grocery & Pharmacy'].max()],
label='Grocery & Pharmacy', values=df['gm_Grocery & Pharmacy']),
dict(range=[df['gm_Parks'].min(),df['gm_Parks'].max()],
label='Parks', values=df['gm_Parks']),
dict(range=[df['gm_Transit Stations'].min(),df['gm_Transit Stations'].max()],
label='Transit Stations', values=df['gm_Transit Stations']),
dict(range=[df['gm_Workplaces'].min(),df['gm_Workplaces'].max()],
label='Workplaces', values=df['gm_Workplaces']),
dict(range=[df['gm_Residential'].min(),df['gm_Residential'].max()],
label='Residential', values=df['gm_Residential']),])
# dict(range=[0,len(df)], values=df['country'],
# label='Country')])
fig = go.Figure(data=go.Parcoords(line = dict(color = '#ff0000',
colorscale = 'Electric',
showscale = True,
cmin = -4000,
cmax = -100), dimensions=dimensions))
fig.show()
它returns这个:
我要做的是将这些行分配给最后一列,即 country
列(分类)。 (我的尝试在代码片段中被注释掉了)。我正在思考如何将这些值 link 用于分类国家。索引可能是一种方式?我还想按国家/地区对线条进行颜色编码,我猜这些不同颜色的列表可能会有所帮助。我被卡住了,需要一些帮助。
在你的例子中,你可以通过让一个虚拟变量代表 df['country]
中的每个唯一元素来实现,你在这里有一个长格式的数据集,所以你会得到重复的虚拟变量。但别担心,下面的代码会为您解决这个问题。然后您可以将最后一个维度指定为:
dict(range=[0,df['dummy'].max()],
tickvals = dfg['dummy'], ticktext = dfg['country'],
label='Country', values=df['dummy']),
最后为线条分配颜色范围,例如:
line = dict(color = df['dummy'],
colorscale = [[0,'rgba(200,0,0,0.1)'],[0.5,'rgba(0,200,0,0.1)'],[1,'rgba(0,0,200,0.1)']])
剧情:
完整代码:
import pandas as pd
import plotly.graph_objects as go
df = pd.read_csv('https://raw.githubusercontent.com/vyaduvanshi/helper-files/master/parallel_coordinates.csv')
group_vars = df['country'].unique()
dfg = pd.DataFrame({'country':df['country'].unique()})
dfg['dummy'] = dfg.index
df = pd.merge(df, dfg, on = 'country', how='left')
dimensions = list([dict(range=[df['gm_Retail & Recreation'].min(),df['gm_Retail & Recreation'].max()],
label='Retail & Recreation', values=df['gm_Retail & Recreation']),
dict(range=[df['gm_Grocery & Pharmacy'].min(),df['gm_Grocery & Pharmacy'].max()],
label='Grocery & Pharmacy', values=df['gm_Grocery & Pharmacy']),
dict(range=[df['gm_Parks'].min(),df['gm_Parks'].max()],
label='Parks', values=df['gm_Parks']),
dict(range=[df['gm_Transit Stations'].min(),df['gm_Transit Stations'].max()],
label='Transit Stations', values=df['gm_Transit Stations']),
dict(range=[df['gm_Workplaces'].min(),df['gm_Workplaces'].max()],
label='Workplaces', values=df['gm_Workplaces']),
dict(range=[df['gm_Residential'].min(),df['gm_Residential'].max()],
label='Residential', values=df['gm_Residential']),
dict(range=[0,df['dummy'].max()],
tickvals = dfg['dummy'], ticktext = dfg['country'],
label='Country', values=df['dummy']),
])
fig = go.Figure(data=go.Parcoords(line = dict(color = df['dummy'],
colorscale = [[0,'rgba(200,0,0,0.1)'],[0.5,'rgba(0,200,0,0.1)'],[1,'rgba(0,0,200,0.1)']]), dimensions=dimensions))
fig.show()
使用df.infer_objects()自动推断每列的数据类型。