在 altair-viz 中分面拼接图像

Faceting concatenated images in altair-viz

这是对 的跟进查询。我用一个额外的列修改了数据框。我的目标是对“标记”列中的对象进行分组,以便最终得到一个多面图形,每个图形都有条形图和文本。数据框如下。在尝试刻面之前,我正在粘贴单个图像的样子。我随后粘贴了代码和我的最佳尝试。我想知道是否有办法在 altair 代码中执行此操作,或者您是否建议在 altair 代码之外使用 for 循环执行 groupby。

,Bug,Unknown,Level,LDA_Score,p_value,Marker
0,a,4.10808792666,Low,3.43193376894,0.0381678194757,GM
1,b,2.80231776318,High,2.86568860404,0.048078814719199996,GM
2,c,1.55012602444,High,3.0159901714,0.047006554908300004,GM
3,d,2.11298173821,High,2.94493334678,0.0120363750248,GM
4,e,2.08807237447,High,2.9096371889,0.0149437560986,GM
5,f,2.762619332479999,High,2.52323422148,0.040652301139,GM
6,g,4.390454714340001,Low,3.85075499081,0.029978515680400004,GM
7,h,3.32306083381,High,3.01988462626,0.0244409015043,GM
8,i,2.84614167157,High,2.97142565384,0.0438396924694,GM
9,j,4.51419624602,Low,3.84190054285,0.0460224914387,GM
10,k,4.027450677669999,High,3.52319882849,0.0113390729281,IFN
11,l,4.26967903787,Low,3.8458771734,0.00548234585386,IFN
12,m,1.7823168924,High,2.50020069082,0.0203578926278,IFN

这段代码和图片是在没有尝试按最后一列分组的情况下的样子:

y_sort = alt.EncodingSortField(field='LDA_Score', order='descending')

bars = alt.Chart(df).mark_bar().encode(
    alt.X('LDA_Score', title='LDA_Score (log10)', axis=alt.Axis(titleFontSize=14)),
    alt.Y("Bug:N", sort=y_sort, axis=alt.Axis(title=None, labelFontStyle='italic')),
    color=alt.Color('Level:N', legend=alt.Legend(title=None, labelFontSize=12, orient='right')),# scale=alt.Scale(domain=['>12weeks', '<12weeks'], range=['green', 'red'])),
    row=alt.Row('Level:N', header=alt.Header(title=None, labelFontSize=0), spacing=0),
).resolve_scale(
    y='independent'
)

text = alt.Chart(df).mark_text().encode(
    alt.Text('p_value:Q', format='.3e'),
    alt.Y("Bug:N", sort=y_sort, axis=None),
    row=alt.Row('Level:N', header=alt.Header(title=None, labelFontSize=0), spacing=0),
).resolve_scale(
    y='independent'
).properties(width=50, title="p_value"
)

FinalChart = alt.hconcat(bars, text, spacing=-10)\
    .configure_title(anchor='end', fontStyle='italic', fontSize=14)\
    .configure_axis(grid=True, gridOpacity=0.5).configure_view(opacity=0.5)
FinalChart.display()

我随后尝试使用 altair/vega 中的 facet 选项。我正在粘贴代码,然后是我得到的:

y_sort = alt.EncodingSortField(field='LDA_Score', order='descending')

bars = alt.Chart(df).mark_bar().encode(
    alt.X('LDA_Score', title='LDA_Score (log10)', axis=alt.Axis(titleFontSize=14)),
    alt.Y("Bug:N", sort=y_sort, axis=alt.Axis(title=None, labelFontStyle='italic')),
    color=alt.Color('Level:N', legend=alt.Legend(title=None, labelFontSize=12, orient='right')),# scale=alt.Scale(domain=['>12weeks', '<12weeks'], range=['green', 'red'])),
    row=alt.Row('Level:N', header=alt.Header(title=None, labelFontSize=0), spacing=0),
).resolve_scale(
    y='independent'
).facet(column='Marker')

text = alt.Chart(df).mark_text().encode(
    alt.Text('p_value:Q', format='.3e'),
    alt.Y("Bug:N", sort=y_sort, axis=None),
    row=alt.Row('Level:N', header=alt.Header(title=None, labelFontSize=0), spacing=0),
).resolve_scale(
    y='independent'
).properties(width=50, title="p_value"
).facet(column='Marker')

FinalChart = alt.hconcat(bars, text, spacing=-10)\
    .configure_title(anchor='end', fontStyle='italic', fontSize=14)\
    .configure_axis(grid=True, gridOpacity=0.5).configure_view(opacity=0.5)
FinalChart.display()

我认为这可能很直观,但我想要的是这样的:

您所追求的是对连接图表进行分面;不幸的是,这不受 Altair 或 Vega-Lite 支持。但是您可以通过手动构建 facet 来解决这个问题:facet 操作基本上是一个过滤器加上一个 concat,因此您可以像这样构建您想要的图表:

import altair as alt
import pandas as pd
from io import StringIO

df = pd.read_csv(StringIO("""\
,Bug,Unknown,Level,LDA_Score,p_value,Marker
0,a,4.10808792666,Low,3.43193376894,0.0381678194757,GM
1,b,2.80231776318,High,2.86568860404,0.048078814719199996,GM
2,c,1.55012602444,High,3.0159901714,0.047006554908300004,GM
3,d,2.11298173821,High,2.94493334678,0.0120363750248,GM
4,e,2.08807237447,High,2.9096371889,0.0149437560986,GM
5,f,2.762619332479999,High,2.52323422148,0.040652301139,GM
6,g,4.390454714340001,Low,3.85075499081,0.029978515680400004,GM
7,h,3.32306083381,High,3.01988462626,0.0244409015043,GM
8,i,2.84614167157,High,2.97142565384,0.0438396924694,GM
9,j,4.51419624602,Low,3.84190054285,0.0460224914387,GM
10,k,4.027450677669999,High,3.52319882849,0.0113390729281,IFN
11,l,4.26967903787,Low,3.8458771734,0.00548234585386,IFN
12,m,1.7823168924,High,2.50020069082,0.0203578926278,IFN
"""))

y_sort = alt.EncodingSortField(field='LDA_Score', order='descending')

bars = alt.Chart(df).mark_bar().encode(
    alt.X('LDA_Score', title='LDA_Score (log10)', axis=alt.Axis(titleFontSize=14)),
    alt.Y("Bug:N", sort=y_sort, axis=alt.Axis(title=None, labelFontStyle='italic')),
    color=alt.Color('Level:N', legend=alt.Legend(title=None, labelFontSize=12, orient='right')),# scale=alt.Scale(domain=['>12weeks', '<12weeks'], range=['green', 'red'])),
    row=alt.Row('Level:N', header=alt.Header(title=None, labelFontSize=0), spacing=0),
    column=alt.Column('Marker:N', title=None),
).resolve_scale(
    y='independent'
)

text = alt.Chart(df).mark_text().encode(
    alt.Text('p_value:Q', format='.3e'),
    alt.Y("Bug:N", sort=y_sort, axis=None),
    row=alt.Row('Level:N', header=alt.Header(title=None, labelFontSize=0), spacing=0),
).resolve_scale(
    y='independent'
).properties(width=50, title="p_value"
)

FinalChart = alt.hconcat(
    bars.transform_filter('datum.Marker == "GM"'),
    text.transform_filter('datum.Marker == "GM"'),
    bars.transform_filter('datum.Marker == "IFN"'),
    text.transform_filter('datum.Marker == "IFN"')
).configure_title(anchor='end', fontStyle='italic', fontSize=14)\
 .configure_axis(grid=True, gridOpacity=0.5).configure_view(opacity=0.5)
FinalChart.display()