如何使用 Altair 在一张图中并排显示 3 个条形图

How to display 3 bar charts next to each other in one graph using Altair

我正在尝试复制此图表,但我很难将我创建的所有 3 个图表放在一个图表中。到目前为止,我已经能够使用适当的数据和颜色创建 3 个单独的条形图,但分层并不成功。

我正在复制的图表:

这是我用来创建每个图表的代码。它基本上是相同的代码重复 3 次不同的时间来创建每个单独的图表,每个图表的名称标记为 'seen_movies_top'、'seen_movies_middle' 和 'seen_movies_bottom'。我觉得我在这里做得太过头了,有一种更简单的方法可以解决这个问题,但我很高兴至少能够创建每个单独的图表。现在只是为了让他们在同一张图上..

# fix the labels a bit so will create a mapping to the full names
episodes = ['EI', 'EII', 'EIII', 'EIV', 'EV', 'EVI']
names = {
    'EI' : 'The Phantom Meanance', 'EII' : 'Attack of the clones', 'EIII' : 'Revenge of the Sith', 
    'EIV': 'A New Hope', 'EV': 'The Empire Strikes Back', 'EVI' : 'The Return of the Jedi'
}

# going to use this order to sort, so names_l will now have our sort order
names_l = [names[ep] for ep in episodes]

print("sort order: ",names_l)

seen_every = seen_at_least_one.dropna(subset=['seen_EI','seen_EII','seen_EIII','seen_EIV','seen_EV','seen_EVI'])

# only use those people who have seen at least one movie, let's get the people, toss NAs
# and get the total count

# find people who have at least on of the columns (seen_*) not NaN
seen_at_least_one = sw.dropna(subset=['seen_' + ep for ep in episodes],how='all')
total = len(seen_every)

seen_every = seen_at_least_one.dropna(subset=['seen_EI','seen_EII','seen_EIII','seen_EIV','seen_EV','seen_EVI'])

print("total who have seen at least one: ", total)

total_rank = len(seen_every)

# calculating the percents and generating a new data frame
percs_seen_top3 = []

# looping over each column and calculating the number of people who have seen the movie
# specifically, filter out the people who are *NaN* for a specific episode (e.g., ep_EII), count them
# and divide by the percent

for rank_ep in ['rank_' + ep for ep in episodes]:
    #my_value_count = seen_every[rank_ep].value_counts()
    perc_seen_top3 = (seen_every[rank_ep].value_counts()['1'] + seen_every[rank_ep].value_counts()['2'])/ total_rank 
    percs_seen_top3.append(perc_seen_top3)

# creating tuples--pairing names with percents--using "zip" and then making a dataframe
tuples_top = list(zip([names[ep] for ep in episodes],percs_seen_top3))
seen_per_df_top = pd.DataFrame(tuples_top, columns = ['Name', 'Percentage'])

bars_top = alt.Chart(seen_per_df_top).mark_bar(size=20).encode(
    # encode x as the percent, and hide the axis
    x=alt.X(
        'Percentage',
        axis=None),
    y=alt.Y(
        # encode y using the name, use the movie name to label the axis, sort using the names_l
        'Name:N',
         axis=alt.Axis(tickCount=5, title=''),
         # we give the sorting order to avoid alphabetical order
         sort=names_l
    )
)

text_top = bars_top.mark_text(
    align='left',
    baseline='middle',
    dx=3  # Nudges text to right so it doesn't appear on top of the bar
).encode(
    # we'll use the percentage as the text
    text=alt.Text('Percentage:Q',format='.0%')
)


seen_movies_top = (text_top + bars_top).configure_mark(
    color='#008fd5'
).configure_view(
    # we don't want a stroke around the bars
    strokeWidth=0
).configure_scale(
    # add some padding
    bandPaddingInner=0.2
).properties(
    # set the dimensions of the visualization
    width=500,
    height=180
).properties(
    # add a title
    title={
    "text":["How People Rate the 'Star Wars' Movies"],
    "subtitle":["How often each film was rated in the top, middle and bottom third (by 471 respondents who have seen all six films)"]}
).configure_title(
    # customize title and sub-title
    fontSize=30, align='left',anchor ='start', fontWeight='bold', subtitleFontWeight='lighter'
)

seen_movies_top 

percs_seen_middle3 = []

for rank_ep in ['rank_' + ep for ep in episodes]:
    #my_value_count = seen_every[rank_ep].value_counts()
    perc_seen_middle3 = (seen_every[rank_ep].value_counts()['3'] + seen_every[rank_ep].value_counts()['4'])/ total_rank 
    percs_seen_middle3.append(perc_seen_middle3)
    
tuples_middle = list(zip([names[ep] for ep in episodes],percs_seen_middle3))
seen_per_df_middle = pd.DataFrame(tuples_middle, columns = ['Name', 'Percentage'])

# ok, time to make the chart... let's make a bar chart (use mark_bar)
bars_middle = alt.Chart(seen_per_df_middle).mark_bar(size=20).encode(
    # encode x as the percent, and hide the axis
    x=alt.X(
        'Percentage',
        axis=None),
    y=alt.Y(
        # encode y using the name, use the movie name to label the axis, sort using the names_l
        'Name:N',
         axis=alt.Axis(tickCount=5, title=''),
         # we give the sorting order to avoid alphabetical order
         sort=names_l
    )
)

# at this point we don't really have a great plot (it's missing the annotations, titles, etc.)
bars_middle

text_middle = bars_middle.mark_text(
    align='left',
    baseline='middle',
    dx=3  # Nudges text to right so it doesn't appear on top of the bar
).encode(
    # we'll use the percentage as the text
    text=alt.Text('Percentage:Q',format='.0%')
)

seen_movies_middle = (text_middle + bars_middle).configure_mark(
    # we don't love the blue
    color='#69a14f'
).configure_view(
    # we don't want a stroke around the bars
    strokeWidth=0
).configure_scale(
    # add some padding
    bandPaddingInner=0.2
).properties(
    # set the dimensions of the visualization
    width=500,
    height=180
).properties(
    # add a title
    title={
    "text":["How People Rate the 'Star Wars' Movies"],
    "subtitle":["How often each film was rated in the top, middle and bottom third (by 471 respondents who have seen all six films)"]}
).configure_title(
    # customize title and sub-title
    fontSize=30, align='left',anchor ='start', fontWeight='bold', subtitleFontWeight='lighter'
)

seen_movies_middle 

percs_seen_bottom3 = []

for rank_ep in ['rank_' + ep for ep in episodes]:
    #my_value_count = seen_every[rank_ep].value_counts()
    perc_seen_bottom3 = (seen_every[rank_ep].value_counts()['5'] + seen_every[rank_ep].value_counts()['6'])/ total_rank 
    percs_seen_bottom3.append(perc_seen_bottom3)  

tuples_bottom = list(zip([names[ep] for ep in episodes],percs_seen_bottom3))
seen_per_df_bottom = pd.DataFrame(tuples_bottom, columns = ['Name', 'Percentage'])


# ok, time to make the chart... let's make a bar chart (use mark_bar)
bars_bottom = alt.Chart(seen_per_df_bottom).mark_bar(size=20).encode(
    # encode x as the percent, and hide the axis
    x=alt.X(
        'Percentage',
        axis=None),
    y=alt.Y(
        # encode y using the name, use the movie name to label the axis, sort using the names_l
        'Name:N',
         axis=alt.Axis(tickCount=5, title=''),
         # we give the sorting order to avoid alphabetical order
         sort=names_l
    )
)

# at this point we don't really have a great plot (it's missing the annotations, titles, etc.)
bars_bottom

text_bottom = bars_bottom.mark_text(
    align='left',
    baseline='middle',
    dx=3  # Nudges text to right so it doesn't appear on top of the bar
).encode(
    # we'll use the percentage as the text
    text=alt.Text('Percentage:Q',format='.0%')
)


seen_movies_bottom = (text_bottom + bars_bottom).configure_mark(
    # we don't love the blue
    color='#fd3a4a'
).configure_view(
    # we don't want a stroke around the bars
    strokeWidth=0
).configure_scale(
    # add some padding
    bandPaddingInner=0.2
).properties(
    # set the dimensions of the visualization
    width=500,
    height=180
).properties(
    # add a title
    title={
    "text":["How People Rate the 'Star Wars' Movies"],
    "subtitle":["How often each film was rated in the top, middle and bottom third (by 471 respondents who have seen all six films)"]}
).configure_title(
    # customize title and sub-title
    fontSize=30, align='left',anchor ='start', fontWeight='bold', subtitleFontWeight='lighter'
)

seen_movies_bottom

我平时不怎么用altair,所以我做了很多研究,然后创建了这个,所以代码可能不一致。您期望的输出是注释文本颜色不是黑色的地方。根据我的经验,这无法解决。此外,由于目标不是格式化数据,因此我为图表创建了示例数据并创建了图表。

import pandas as pd
import numpy as np
import io
import altair as alt
from altair import datum

data = '''
episode name "Top third" "Middle third" "Bottom third"
1 "The Phantom Menace" 0.16 0.37 0.46
2 "Attack of the Clones" 0.14 0.29 0.57
3 "Revenge of the Sith" 0.13 0.40 0.47
4 "A New Hope" 0.50 0.31 0.19
5 "The Empire Strikes Back" 0.64 0.22 0.14
6 "Return of the Jedi" 0.43 0.41 0.17
'''

df = pd.read_csv(io.StringIO(data), delim_whitespace=True)
df = df.set_index(['episode','name']).stack().to_frame(name='percentage').reset_index()
df.columns = ['episode','name', 'rank', 'percentage']
episode name rank percentage
0 1 The Phantom Menace Top third 0.16
1 1 The Phantom Menace Middle third 0.37
2 1 The Phantom Menace Bottom third 0.46
3 2 Attack of the Clones Top third 0.14
4 2 Attack of the Clones Middle third 0.29
5 2 Attack of the Clones Bottom third 0.57
6 3 Revenge of the Sith Top third 0.13
7 3 Revenge of the Sith Middle third 0.4
8 3 Revenge of the Sith Bottom third 0.47
9 4 A New Hope Top third 0.5
10 4 A New Hope Middle third 0.31
11 4 A New Hope Bottom third 0.19
12 5 The Empire Strikes Back Top third 0.64
13 5 The Empire Strikes Back Middle third 0.22
14 5 The Empire Strikes Back Bottom third 0.14
15 6 Return of the Jedi Top third 0.43
16 6 Return of the Jedi Middle third 0.41
17 6 Return of the Jedi Bottom third 0.17
domain = ['Top third','Middle third','Bottom third']
range_ = ['green', 'blue', 'red']

bar1 = alt.Chart(df,title=domain\[0\]).mark_bar().encode(
    alt.X('percentage:Q', axis=None, title=domain\[0\]),
    alt.Y('name:O', sort=df.name.unique(), title=''),
    color=alt.Color('rank:N', legend=None, scale=alt.Scale(domain=domain, range=range_)),
).transform_filter(
    (datum.rank == 'Top third')
).properties(
    width=50
)

text1 = bar1.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    text=alt.Text('percentage:Q', format='.0%')
)

bar2 = alt.Chart(df,title=domain\[1\]).mark_bar().encode(
    alt.X('percentage:Q', axis=None),
    alt.Y('name:O', axis=None),
    color=alt.Color('rank:N', legend=None, scale=alt.Scale(domain=domain, range=range_)),
).transform_filter(
    (datum.rank == 'Middle third')
).properties(
    width=50
)

text2 = bar2.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    text=alt.Text('percentage:Q', format='.0%')
)

bar3 = alt.Chart(df,title=domain\[2\]).mark_bar().encode(
    alt.X('percentage:Q', axis=None),
    alt.Y('name:O', axis=None),
    color=alt.Color('rank:N', legend=None, scale=alt.Scale(domain=domain, range=range_)),
).transform_filter(
    (datum.rank == 'Bottom third')
).properties(
    width=50
)

text3 = bar3.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    text=alt.Text('percentage:Q', format='.0%')
)

alt.hconcat(
    bar1+text1 ,bar2+text2, bar3+text3,
    title=alt.TitleParams(
        text="How People Rate the 'Star Wars' Movies",
        subtitle=\["How often each film was rated in the top, middle and bottom third ",
                  "(by 471 respondents who have seen all six films)"\],)
).configure_axis(
    grid=False,
).configure_view(
    strokeWidth=0
).configure(
    background='#dcdcdc'
)