尝试使平行坐标示例适应我的数据时出现空图
Empty plots when trying to adapt parallel coordinates example to my data
我正在尝试重做 ,但遗憾的是我无法对其进行编辑,因此它对我有用。当我 运行 下面的代码时,图表显示为空,没有任何线条。您能否提供一个预定义的结构(也许对像我这样的初学者有一些解释)以便我们可以更改此代码以将其传递给我们自己的目标。 tnx.
from sklearn import datasets
data_wine = datasets.load_wine (as_frame = True).frame
new_data = data_wine.drop (['proline', 'magnesium'], axis = 1)
new_data = new_data.reset_index().melt(id_vars = ['index', 'target'])
base = alt.Chart(
new_data
).transform_window(
index="count()"
).transform_fold(
#[alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue od280/od315_of_diluted_wines "proline "]
["alcohol","malic_acid","ash","alcalinity_of_ash","total_phenols","flavanoids","nonflavanoid_phenols","proanthocyanins","color_intensity","hue","od280/od315_of_diluted_wines"]
).transform_joinaggregate(
min="min(value)",
max="max(value)",
groupby=["variable"]
).transform_calculate(
norm_val="(datum.variable - datum.min) / (datum.max - datum.min)",
mid="(datum.min + datum.max) / 2"
).properties(width=600, height=300)
lines = base.mark_line(opacity=0.3).encode(
alt.Color ('target:N'),
alt.Detail ('index:N'),
x='variable:N',
y=alt.Y('norm_val:Q', axis=None),
#tooltip=["petalLength:N", "petalWidth:N", "sepalLength:N", "sepalWidth:N"]
)
rules = base.mark_rule(
color="#ccc", tooltip=None
).encode(
x="variable:N",
detail="count():Q",
)
def ytick(yvalue, field):
scale = base.encode(x='variable:N', y=alt.value(yvalue), text=f"min({field}):Q")
return alt.layer(
scale.mark_text(baseline="middle", align="right", dx=-5, tooltip=None),
scale.mark_tick(size=8, color="#ccc", orient="horizontal", tooltip=None)
)
alt.layer(
lines, rules, ytick(0, "max"), ytick(150, "mid"), ytick(300, "min")
).configure_axisX(
domain=False, labelAngle=0, tickColor="#ccc", title=None
).configure_view(
stroke=None
)
您的绘图未显示的原因是您的输入数据与您所关注的示例中的结构不同。您已将 pandas 中的宽数据框融化为长格式。这与 Altair 中的 transform_fold
函数执行的功能相同,因此在您的示例中,您尝试执行两次。下面我删除了手动 pandas melt 并将变量名称更改回由 transform_fold
(key
和 value
)自动分配的变量名称:
from sklearn import datasets
import altair as alt
data_wine = datasets.load_wine (as_frame = True).frame
new_data = data_wine.drop (['proline', 'magnesium'], axis = 1)
base = alt.Chart(
new_data
).transform_window(
index="count()"
).transform_fold(
["alcohol","malic_acid","ash","alcalinity_of_ash","total_phenols","flavanoids","nonflavanoid_phenols","proanthocyanins","color_intensity","hue","od280/od315_of_diluted_wines"]
).transform_joinaggregate(
min="min(value)",
max="max(value)",
groupby=["key"]
).transform_calculate(
norm_val="(datum.value - datum.min) / (datum.max - datum.min)",
mid="(datum.min + datum.max) / 2"
).properties(width=1200, height=300)
lines = base.mark_line(opacity=0.3).encode(
x='key:N',
y=alt.Y('norm_val:Q', axis=None),
color=alt.Color ('target:N'),
detail=alt.Detail ('index:N'),
)
rules = base.mark_rule(
color="#ccc", tooltip=None
).encode(
x="key:N",
detail="count():Q",
)
def ytick(yvalue, field):
scale = base.encode(x='key:N', y=alt.value(yvalue), text=f"min({field}):Q")
return alt.layer(
scale.mark_text(baseline="middle", align="right", dx=-5, tooltip=None),
scale.mark_tick(size=8, color="#ccc", orient="horizontal", tooltip=None)
)
alt.layer(
lines, rules, ytick(0, "max"), ytick(150, "mid"), ytick(300, "min")
).configure_axisX(
domain=False, labelAngle=0, tickColor="#ccc", title=None
).configure_view(
stroke=None
)
如果您不介意数据框中的每一列没有单独的 y-axis,您可以像这样创建一个更简单的平行坐标图:
from sklearn import datasets
import altair as alt
data = datasets.load_wine (as_frame=True).frame
num_cols = ["alcohol","malic_acid","ash","alcalinity_of_ash","total_phenols","flavanoids","nonflavanoid_phenols","proanthocyanins","color_intensity","hue","od280/od315_of_diluted_wines"]
# You could skip this rescaling but it would compress the y-axis range for columns with smaller absolute values
data[num_cols] = data[num_cols].apply(lambda x: (x - x.min()) / (x.max() - x.min()))
alt.Chart(data).transform_window(
index='count()'
).transform_fold(
num_cols
).mark_line().encode(
alt.X('key:O', title=None, scale=alt.Scale(nice=False, padding=0.05)),
alt.Y('value:Q', title=None),
alt.Color('target:N', title=None),
detail='index:N'
).properties(
width=1200
)
如果您使用它进行探索性数据分析并且不需要大量自定义绘图,那么您也可以试用我的实验包 [altair_ally][3]
来快速创建一些常见的探索性绘图:
from sklearn import datasets
import altair_ally as aly
data_wine = datasets.load_wine (as_frame = True).frame
data_wine['target'] = data_wine['target'].astype(str)
aly.parcoord(data_wine, color='target')
我正在尝试重做
from sklearn import datasets
data_wine = datasets.load_wine (as_frame = True).frame
new_data = data_wine.drop (['proline', 'magnesium'], axis = 1)
new_data = new_data.reset_index().melt(id_vars = ['index', 'target'])
base = alt.Chart(
new_data
).transform_window(
index="count()"
).transform_fold(
#[alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue od280/od315_of_diluted_wines "proline "]
["alcohol","malic_acid","ash","alcalinity_of_ash","total_phenols","flavanoids","nonflavanoid_phenols","proanthocyanins","color_intensity","hue","od280/od315_of_diluted_wines"]
).transform_joinaggregate(
min="min(value)",
max="max(value)",
groupby=["variable"]
).transform_calculate(
norm_val="(datum.variable - datum.min) / (datum.max - datum.min)",
mid="(datum.min + datum.max) / 2"
).properties(width=600, height=300)
lines = base.mark_line(opacity=0.3).encode(
alt.Color ('target:N'),
alt.Detail ('index:N'),
x='variable:N',
y=alt.Y('norm_val:Q', axis=None),
#tooltip=["petalLength:N", "petalWidth:N", "sepalLength:N", "sepalWidth:N"]
)
rules = base.mark_rule(
color="#ccc", tooltip=None
).encode(
x="variable:N",
detail="count():Q",
)
def ytick(yvalue, field):
scale = base.encode(x='variable:N', y=alt.value(yvalue), text=f"min({field}):Q")
return alt.layer(
scale.mark_text(baseline="middle", align="right", dx=-5, tooltip=None),
scale.mark_tick(size=8, color="#ccc", orient="horizontal", tooltip=None)
)
alt.layer(
lines, rules, ytick(0, "max"), ytick(150, "mid"), ytick(300, "min")
).configure_axisX(
domain=False, labelAngle=0, tickColor="#ccc", title=None
).configure_view(
stroke=None
)
您的绘图未显示的原因是您的输入数据与您所关注的示例中的结构不同。您已将 pandas 中的宽数据框融化为长格式。这与 Altair 中的 transform_fold
函数执行的功能相同,因此在您的示例中,您尝试执行两次。下面我删除了手动 pandas melt 并将变量名称更改回由 transform_fold
(key
和 value
)自动分配的变量名称:
from sklearn import datasets
import altair as alt
data_wine = datasets.load_wine (as_frame = True).frame
new_data = data_wine.drop (['proline', 'magnesium'], axis = 1)
base = alt.Chart(
new_data
).transform_window(
index="count()"
).transform_fold(
["alcohol","malic_acid","ash","alcalinity_of_ash","total_phenols","flavanoids","nonflavanoid_phenols","proanthocyanins","color_intensity","hue","od280/od315_of_diluted_wines"]
).transform_joinaggregate(
min="min(value)",
max="max(value)",
groupby=["key"]
).transform_calculate(
norm_val="(datum.value - datum.min) / (datum.max - datum.min)",
mid="(datum.min + datum.max) / 2"
).properties(width=1200, height=300)
lines = base.mark_line(opacity=0.3).encode(
x='key:N',
y=alt.Y('norm_val:Q', axis=None),
color=alt.Color ('target:N'),
detail=alt.Detail ('index:N'),
)
rules = base.mark_rule(
color="#ccc", tooltip=None
).encode(
x="key:N",
detail="count():Q",
)
def ytick(yvalue, field):
scale = base.encode(x='key:N', y=alt.value(yvalue), text=f"min({field}):Q")
return alt.layer(
scale.mark_text(baseline="middle", align="right", dx=-5, tooltip=None),
scale.mark_tick(size=8, color="#ccc", orient="horizontal", tooltip=None)
)
alt.layer(
lines, rules, ytick(0, "max"), ytick(150, "mid"), ytick(300, "min")
).configure_axisX(
domain=False, labelAngle=0, tickColor="#ccc", title=None
).configure_view(
stroke=None
)
如果您不介意数据框中的每一列没有单独的 y-axis,您可以像这样创建一个更简单的平行坐标图:
from sklearn import datasets
import altair as alt
data = datasets.load_wine (as_frame=True).frame
num_cols = ["alcohol","malic_acid","ash","alcalinity_of_ash","total_phenols","flavanoids","nonflavanoid_phenols","proanthocyanins","color_intensity","hue","od280/od315_of_diluted_wines"]
# You could skip this rescaling but it would compress the y-axis range for columns with smaller absolute values
data[num_cols] = data[num_cols].apply(lambda x: (x - x.min()) / (x.max() - x.min()))
alt.Chart(data).transform_window(
index='count()'
).transform_fold(
num_cols
).mark_line().encode(
alt.X('key:O', title=None, scale=alt.Scale(nice=False, padding=0.05)),
alt.Y('value:Q', title=None),
alt.Color('target:N', title=None),
detail='index:N'
).properties(
width=1200
)
如果您使用它进行探索性数据分析并且不需要大量自定义绘图,那么您也可以试用我的实验包 [altair_ally][3]
来快速创建一些常见的探索性绘图:
from sklearn import datasets
import altair_ally as aly
data_wine = datasets.load_wine (as_frame = True).frame
data_wine['target'] = data_wine['target'].astype(str)
aly.parcoord(data_wine, color='target')