如何使用 Altair 在一张图中并排显示 3 个条形图
How to display 3 bar charts next to each other in one graph using Altair
我正在尝试复制此图表,但我很难将我创建的所有 3 个图表放在一个图表中。到目前为止,我已经能够使用适当的数据和颜色创建 3 个单独的条形图,但分层并不成功。
我正在复制的图表:
这是我用来创建每个图表的代码。它基本上是相同的代码重复 3 次不同的时间来创建每个单独的图表,每个图表的名称标记为 'seen_movies_top'、'seen_movies_middle' 和 'seen_movies_bottom'。我觉得我在这里做得太过头了,有一种更简单的方法可以解决这个问题,但我很高兴至少能够创建每个单独的图表。现在只是为了让他们在同一张图上..
# fix the labels a bit so will create a mapping to the full names
episodes = ['EI', 'EII', 'EIII', 'EIV', 'EV', 'EVI']
names = {
'EI' : 'The Phantom Meanance', 'EII' : 'Attack of the clones', 'EIII' : 'Revenge of the Sith',
'EIV': 'A New Hope', 'EV': 'The Empire Strikes Back', 'EVI' : 'The Return of the Jedi'
}
# going to use this order to sort, so names_l will now have our sort order
names_l = [names[ep] for ep in episodes]
print("sort order: ",names_l)
seen_every = seen_at_least_one.dropna(subset=['seen_EI','seen_EII','seen_EIII','seen_EIV','seen_EV','seen_EVI'])
# only use those people who have seen at least one movie, let's get the people, toss NAs
# and get the total count
# find people who have at least on of the columns (seen_*) not NaN
seen_at_least_one = sw.dropna(subset=['seen_' + ep for ep in episodes],how='all')
total = len(seen_every)
seen_every = seen_at_least_one.dropna(subset=['seen_EI','seen_EII','seen_EIII','seen_EIV','seen_EV','seen_EVI'])
print("total who have seen at least one: ", total)
total_rank = len(seen_every)
# calculating the percents and generating a new data frame
percs_seen_top3 = []
# looping over each column and calculating the number of people who have seen the movie
# specifically, filter out the people who are *NaN* for a specific episode (e.g., ep_EII), count them
# and divide by the percent
for rank_ep in ['rank_' + ep for ep in episodes]:
#my_value_count = seen_every[rank_ep].value_counts()
perc_seen_top3 = (seen_every[rank_ep].value_counts()['1'] + seen_every[rank_ep].value_counts()['2'])/ total_rank
percs_seen_top3.append(perc_seen_top3)
# creating tuples--pairing names with percents--using "zip" and then making a dataframe
tuples_top = list(zip([names[ep] for ep in episodes],percs_seen_top3))
seen_per_df_top = pd.DataFrame(tuples_top, columns = ['Name', 'Percentage'])
bars_top = alt.Chart(seen_per_df_top).mark_bar(size=20).encode(
# encode x as the percent, and hide the axis
x=alt.X(
'Percentage',
axis=None),
y=alt.Y(
# encode y using the name, use the movie name to label the axis, sort using the names_l
'Name:N',
axis=alt.Axis(tickCount=5, title=''),
# we give the sorting order to avoid alphabetical order
sort=names_l
)
)
text_top = bars_top.mark_text(
align='left',
baseline='middle',
dx=3 # Nudges text to right so it doesn't appear on top of the bar
).encode(
# we'll use the percentage as the text
text=alt.Text('Percentage:Q',format='.0%')
)
seen_movies_top = (text_top + bars_top).configure_mark(
color='#008fd5'
).configure_view(
# we don't want a stroke around the bars
strokeWidth=0
).configure_scale(
# add some padding
bandPaddingInner=0.2
).properties(
# set the dimensions of the visualization
width=500,
height=180
).properties(
# add a title
title={
"text":["How People Rate the 'Star Wars' Movies"],
"subtitle":["How often each film was rated in the top, middle and bottom third (by 471 respondents who have seen all six films)"]}
).configure_title(
# customize title and sub-title
fontSize=30, align='left',anchor ='start', fontWeight='bold', subtitleFontWeight='lighter'
)
seen_movies_top
percs_seen_middle3 = []
for rank_ep in ['rank_' + ep for ep in episodes]:
#my_value_count = seen_every[rank_ep].value_counts()
perc_seen_middle3 = (seen_every[rank_ep].value_counts()['3'] + seen_every[rank_ep].value_counts()['4'])/ total_rank
percs_seen_middle3.append(perc_seen_middle3)
tuples_middle = list(zip([names[ep] for ep in episodes],percs_seen_middle3))
seen_per_df_middle = pd.DataFrame(tuples_middle, columns = ['Name', 'Percentage'])
# ok, time to make the chart... let's make a bar chart (use mark_bar)
bars_middle = alt.Chart(seen_per_df_middle).mark_bar(size=20).encode(
# encode x as the percent, and hide the axis
x=alt.X(
'Percentage',
axis=None),
y=alt.Y(
# encode y using the name, use the movie name to label the axis, sort using the names_l
'Name:N',
axis=alt.Axis(tickCount=5, title=''),
# we give the sorting order to avoid alphabetical order
sort=names_l
)
)
# at this point we don't really have a great plot (it's missing the annotations, titles, etc.)
bars_middle
text_middle = bars_middle.mark_text(
align='left',
baseline='middle',
dx=3 # Nudges text to right so it doesn't appear on top of the bar
).encode(
# we'll use the percentage as the text
text=alt.Text('Percentage:Q',format='.0%')
)
seen_movies_middle = (text_middle + bars_middle).configure_mark(
# we don't love the blue
color='#69a14f'
).configure_view(
# we don't want a stroke around the bars
strokeWidth=0
).configure_scale(
# add some padding
bandPaddingInner=0.2
).properties(
# set the dimensions of the visualization
width=500,
height=180
).properties(
# add a title
title={
"text":["How People Rate the 'Star Wars' Movies"],
"subtitle":["How often each film was rated in the top, middle and bottom third (by 471 respondents who have seen all six films)"]}
).configure_title(
# customize title and sub-title
fontSize=30, align='left',anchor ='start', fontWeight='bold', subtitleFontWeight='lighter'
)
seen_movies_middle
percs_seen_bottom3 = []
for rank_ep in ['rank_' + ep for ep in episodes]:
#my_value_count = seen_every[rank_ep].value_counts()
perc_seen_bottom3 = (seen_every[rank_ep].value_counts()['5'] + seen_every[rank_ep].value_counts()['6'])/ total_rank
percs_seen_bottom3.append(perc_seen_bottom3)
tuples_bottom = list(zip([names[ep] for ep in episodes],percs_seen_bottom3))
seen_per_df_bottom = pd.DataFrame(tuples_bottom, columns = ['Name', 'Percentage'])
# ok, time to make the chart... let's make a bar chart (use mark_bar)
bars_bottom = alt.Chart(seen_per_df_bottom).mark_bar(size=20).encode(
# encode x as the percent, and hide the axis
x=alt.X(
'Percentage',
axis=None),
y=alt.Y(
# encode y using the name, use the movie name to label the axis, sort using the names_l
'Name:N',
axis=alt.Axis(tickCount=5, title=''),
# we give the sorting order to avoid alphabetical order
sort=names_l
)
)
# at this point we don't really have a great plot (it's missing the annotations, titles, etc.)
bars_bottom
text_bottom = bars_bottom.mark_text(
align='left',
baseline='middle',
dx=3 # Nudges text to right so it doesn't appear on top of the bar
).encode(
# we'll use the percentage as the text
text=alt.Text('Percentage:Q',format='.0%')
)
seen_movies_bottom = (text_bottom + bars_bottom).configure_mark(
# we don't love the blue
color='#fd3a4a'
).configure_view(
# we don't want a stroke around the bars
strokeWidth=0
).configure_scale(
# add some padding
bandPaddingInner=0.2
).properties(
# set the dimensions of the visualization
width=500,
height=180
).properties(
# add a title
title={
"text":["How People Rate the 'Star Wars' Movies"],
"subtitle":["How often each film was rated in the top, middle and bottom third (by 471 respondents who have seen all six films)"]}
).configure_title(
# customize title and sub-title
fontSize=30, align='left',anchor ='start', fontWeight='bold', subtitleFontWeight='lighter'
)
seen_movies_bottom
我平时不怎么用altair,所以我做了很多研究,然后创建了这个,所以代码可能不一致。您期望的输出是注释文本颜色不是黑色的地方。根据我的经验,这无法解决。此外,由于目标不是格式化数据,因此我为图表创建了示例数据并创建了图表。
import pandas as pd
import numpy as np
import io
import altair as alt
from altair import datum
data = '''
episode name "Top third" "Middle third" "Bottom third"
1 "The Phantom Menace" 0.16 0.37 0.46
2 "Attack of the Clones" 0.14 0.29 0.57
3 "Revenge of the Sith" 0.13 0.40 0.47
4 "A New Hope" 0.50 0.31 0.19
5 "The Empire Strikes Back" 0.64 0.22 0.14
6 "Return of the Jedi" 0.43 0.41 0.17
'''
df = pd.read_csv(io.StringIO(data), delim_whitespace=True)
df = df.set_index(['episode','name']).stack().to_frame(name='percentage').reset_index()
df.columns = ['episode','name', 'rank', 'percentage']
episode
name
rank
percentage
0
1
The Phantom Menace
Top third
0.16
1
1
The Phantom Menace
Middle third
0.37
2
1
The Phantom Menace
Bottom third
0.46
3
2
Attack of the Clones
Top third
0.14
4
2
Attack of the Clones
Middle third
0.29
5
2
Attack of the Clones
Bottom third
0.57
6
3
Revenge of the Sith
Top third
0.13
7
3
Revenge of the Sith
Middle third
0.4
8
3
Revenge of the Sith
Bottom third
0.47
9
4
A New Hope
Top third
0.5
10
4
A New Hope
Middle third
0.31
11
4
A New Hope
Bottom third
0.19
12
5
The Empire Strikes Back
Top third
0.64
13
5
The Empire Strikes Back
Middle third
0.22
14
5
The Empire Strikes Back
Bottom third
0.14
15
6
Return of the Jedi
Top third
0.43
16
6
Return of the Jedi
Middle third
0.41
17
6
Return of the Jedi
Bottom third
0.17
domain = ['Top third','Middle third','Bottom third']
range_ = ['green', 'blue', 'red']
bar1 = alt.Chart(df,title=domain\[0\]).mark_bar().encode(
alt.X('percentage:Q', axis=None, title=domain\[0\]),
alt.Y('name:O', sort=df.name.unique(), title=''),
color=alt.Color('rank:N', legend=None, scale=alt.Scale(domain=domain, range=range_)),
).transform_filter(
(datum.rank == 'Top third')
).properties(
width=50
)
text1 = bar1.mark_text(
align='left',
baseline='middle',
dx=3
).encode(
text=alt.Text('percentage:Q', format='.0%')
)
bar2 = alt.Chart(df,title=domain\[1\]).mark_bar().encode(
alt.X('percentage:Q', axis=None),
alt.Y('name:O', axis=None),
color=alt.Color('rank:N', legend=None, scale=alt.Scale(domain=domain, range=range_)),
).transform_filter(
(datum.rank == 'Middle third')
).properties(
width=50
)
text2 = bar2.mark_text(
align='left',
baseline='middle',
dx=3
).encode(
text=alt.Text('percentage:Q', format='.0%')
)
bar3 = alt.Chart(df,title=domain\[2\]).mark_bar().encode(
alt.X('percentage:Q', axis=None),
alt.Y('name:O', axis=None),
color=alt.Color('rank:N', legend=None, scale=alt.Scale(domain=domain, range=range_)),
).transform_filter(
(datum.rank == 'Bottom third')
).properties(
width=50
)
text3 = bar3.mark_text(
align='left',
baseline='middle',
dx=3
).encode(
text=alt.Text('percentage:Q', format='.0%')
)
alt.hconcat(
bar1+text1 ,bar2+text2, bar3+text3,
title=alt.TitleParams(
text="How People Rate the 'Star Wars' Movies",
subtitle=\["How often each film was rated in the top, middle and bottom third ",
"(by 471 respondents who have seen all six films)"\],)
).configure_axis(
grid=False,
).configure_view(
strokeWidth=0
).configure(
background='#dcdcdc'
)
我正在尝试复制此图表,但我很难将我创建的所有 3 个图表放在一个图表中。到目前为止,我已经能够使用适当的数据和颜色创建 3 个单独的条形图,但分层并不成功。
我正在复制的图表:
这是我用来创建每个图表的代码。它基本上是相同的代码重复 3 次不同的时间来创建每个单独的图表,每个图表的名称标记为 'seen_movies_top'、'seen_movies_middle' 和 'seen_movies_bottom'。我觉得我在这里做得太过头了,有一种更简单的方法可以解决这个问题,但我很高兴至少能够创建每个单独的图表。现在只是为了让他们在同一张图上..
# fix the labels a bit so will create a mapping to the full names
episodes = ['EI', 'EII', 'EIII', 'EIV', 'EV', 'EVI']
names = {
'EI' : 'The Phantom Meanance', 'EII' : 'Attack of the clones', 'EIII' : 'Revenge of the Sith',
'EIV': 'A New Hope', 'EV': 'The Empire Strikes Back', 'EVI' : 'The Return of the Jedi'
}
# going to use this order to sort, so names_l will now have our sort order
names_l = [names[ep] for ep in episodes]
print("sort order: ",names_l)
seen_every = seen_at_least_one.dropna(subset=['seen_EI','seen_EII','seen_EIII','seen_EIV','seen_EV','seen_EVI'])
# only use those people who have seen at least one movie, let's get the people, toss NAs
# and get the total count
# find people who have at least on of the columns (seen_*) not NaN
seen_at_least_one = sw.dropna(subset=['seen_' + ep for ep in episodes],how='all')
total = len(seen_every)
seen_every = seen_at_least_one.dropna(subset=['seen_EI','seen_EII','seen_EIII','seen_EIV','seen_EV','seen_EVI'])
print("total who have seen at least one: ", total)
total_rank = len(seen_every)
# calculating the percents and generating a new data frame
percs_seen_top3 = []
# looping over each column and calculating the number of people who have seen the movie
# specifically, filter out the people who are *NaN* for a specific episode (e.g., ep_EII), count them
# and divide by the percent
for rank_ep in ['rank_' + ep for ep in episodes]:
#my_value_count = seen_every[rank_ep].value_counts()
perc_seen_top3 = (seen_every[rank_ep].value_counts()['1'] + seen_every[rank_ep].value_counts()['2'])/ total_rank
percs_seen_top3.append(perc_seen_top3)
# creating tuples--pairing names with percents--using "zip" and then making a dataframe
tuples_top = list(zip([names[ep] for ep in episodes],percs_seen_top3))
seen_per_df_top = pd.DataFrame(tuples_top, columns = ['Name', 'Percentage'])
bars_top = alt.Chart(seen_per_df_top).mark_bar(size=20).encode(
# encode x as the percent, and hide the axis
x=alt.X(
'Percentage',
axis=None),
y=alt.Y(
# encode y using the name, use the movie name to label the axis, sort using the names_l
'Name:N',
axis=alt.Axis(tickCount=5, title=''),
# we give the sorting order to avoid alphabetical order
sort=names_l
)
)
text_top = bars_top.mark_text(
align='left',
baseline='middle',
dx=3 # Nudges text to right so it doesn't appear on top of the bar
).encode(
# we'll use the percentage as the text
text=alt.Text('Percentage:Q',format='.0%')
)
seen_movies_top = (text_top + bars_top).configure_mark(
color='#008fd5'
).configure_view(
# we don't want a stroke around the bars
strokeWidth=0
).configure_scale(
# add some padding
bandPaddingInner=0.2
).properties(
# set the dimensions of the visualization
width=500,
height=180
).properties(
# add a title
title={
"text":["How People Rate the 'Star Wars' Movies"],
"subtitle":["How often each film was rated in the top, middle and bottom third (by 471 respondents who have seen all six films)"]}
).configure_title(
# customize title and sub-title
fontSize=30, align='left',anchor ='start', fontWeight='bold', subtitleFontWeight='lighter'
)
seen_movies_top
percs_seen_middle3 = []
for rank_ep in ['rank_' + ep for ep in episodes]:
#my_value_count = seen_every[rank_ep].value_counts()
perc_seen_middle3 = (seen_every[rank_ep].value_counts()['3'] + seen_every[rank_ep].value_counts()['4'])/ total_rank
percs_seen_middle3.append(perc_seen_middle3)
tuples_middle = list(zip([names[ep] for ep in episodes],percs_seen_middle3))
seen_per_df_middle = pd.DataFrame(tuples_middle, columns = ['Name', 'Percentage'])
# ok, time to make the chart... let's make a bar chart (use mark_bar)
bars_middle = alt.Chart(seen_per_df_middle).mark_bar(size=20).encode(
# encode x as the percent, and hide the axis
x=alt.X(
'Percentage',
axis=None),
y=alt.Y(
# encode y using the name, use the movie name to label the axis, sort using the names_l
'Name:N',
axis=alt.Axis(tickCount=5, title=''),
# we give the sorting order to avoid alphabetical order
sort=names_l
)
)
# at this point we don't really have a great plot (it's missing the annotations, titles, etc.)
bars_middle
text_middle = bars_middle.mark_text(
align='left',
baseline='middle',
dx=3 # Nudges text to right so it doesn't appear on top of the bar
).encode(
# we'll use the percentage as the text
text=alt.Text('Percentage:Q',format='.0%')
)
seen_movies_middle = (text_middle + bars_middle).configure_mark(
# we don't love the blue
color='#69a14f'
).configure_view(
# we don't want a stroke around the bars
strokeWidth=0
).configure_scale(
# add some padding
bandPaddingInner=0.2
).properties(
# set the dimensions of the visualization
width=500,
height=180
).properties(
# add a title
title={
"text":["How People Rate the 'Star Wars' Movies"],
"subtitle":["How often each film was rated in the top, middle and bottom third (by 471 respondents who have seen all six films)"]}
).configure_title(
# customize title and sub-title
fontSize=30, align='left',anchor ='start', fontWeight='bold', subtitleFontWeight='lighter'
)
seen_movies_middle
percs_seen_bottom3 = []
for rank_ep in ['rank_' + ep for ep in episodes]:
#my_value_count = seen_every[rank_ep].value_counts()
perc_seen_bottom3 = (seen_every[rank_ep].value_counts()['5'] + seen_every[rank_ep].value_counts()['6'])/ total_rank
percs_seen_bottom3.append(perc_seen_bottom3)
tuples_bottom = list(zip([names[ep] for ep in episodes],percs_seen_bottom3))
seen_per_df_bottom = pd.DataFrame(tuples_bottom, columns = ['Name', 'Percentage'])
# ok, time to make the chart... let's make a bar chart (use mark_bar)
bars_bottom = alt.Chart(seen_per_df_bottom).mark_bar(size=20).encode(
# encode x as the percent, and hide the axis
x=alt.X(
'Percentage',
axis=None),
y=alt.Y(
# encode y using the name, use the movie name to label the axis, sort using the names_l
'Name:N',
axis=alt.Axis(tickCount=5, title=''),
# we give the sorting order to avoid alphabetical order
sort=names_l
)
)
# at this point we don't really have a great plot (it's missing the annotations, titles, etc.)
bars_bottom
text_bottom = bars_bottom.mark_text(
align='left',
baseline='middle',
dx=3 # Nudges text to right so it doesn't appear on top of the bar
).encode(
# we'll use the percentage as the text
text=alt.Text('Percentage:Q',format='.0%')
)
seen_movies_bottom = (text_bottom + bars_bottom).configure_mark(
# we don't love the blue
color='#fd3a4a'
).configure_view(
# we don't want a stroke around the bars
strokeWidth=0
).configure_scale(
# add some padding
bandPaddingInner=0.2
).properties(
# set the dimensions of the visualization
width=500,
height=180
).properties(
# add a title
title={
"text":["How People Rate the 'Star Wars' Movies"],
"subtitle":["How often each film was rated in the top, middle and bottom third (by 471 respondents who have seen all six films)"]}
).configure_title(
# customize title and sub-title
fontSize=30, align='left',anchor ='start', fontWeight='bold', subtitleFontWeight='lighter'
)
seen_movies_bottom
我平时不怎么用altair,所以我做了很多研究,然后创建了这个,所以代码可能不一致。您期望的输出是注释文本颜色不是黑色的地方。根据我的经验,这无法解决。此外,由于目标不是格式化数据,因此我为图表创建了示例数据并创建了图表。
import pandas as pd
import numpy as np
import io
import altair as alt
from altair import datum
data = '''
episode name "Top third" "Middle third" "Bottom third"
1 "The Phantom Menace" 0.16 0.37 0.46
2 "Attack of the Clones" 0.14 0.29 0.57
3 "Revenge of the Sith" 0.13 0.40 0.47
4 "A New Hope" 0.50 0.31 0.19
5 "The Empire Strikes Back" 0.64 0.22 0.14
6 "Return of the Jedi" 0.43 0.41 0.17
'''
df = pd.read_csv(io.StringIO(data), delim_whitespace=True)
df = df.set_index(['episode','name']).stack().to_frame(name='percentage').reset_index()
df.columns = ['episode','name', 'rank', 'percentage']
episode | name | rank | percentage | |
---|---|---|---|---|
0 | 1 | The Phantom Menace | Top third | 0.16 |
1 | 1 | The Phantom Menace | Middle third | 0.37 |
2 | 1 | The Phantom Menace | Bottom third | 0.46 |
3 | 2 | Attack of the Clones | Top third | 0.14 |
4 | 2 | Attack of the Clones | Middle third | 0.29 |
5 | 2 | Attack of the Clones | Bottom third | 0.57 |
6 | 3 | Revenge of the Sith | Top third | 0.13 |
7 | 3 | Revenge of the Sith | Middle third | 0.4 |
8 | 3 | Revenge of the Sith | Bottom third | 0.47 |
9 | 4 | A New Hope | Top third | 0.5 |
10 | 4 | A New Hope | Middle third | 0.31 |
11 | 4 | A New Hope | Bottom third | 0.19 |
12 | 5 | The Empire Strikes Back | Top third | 0.64 |
13 | 5 | The Empire Strikes Back | Middle third | 0.22 |
14 | 5 | The Empire Strikes Back | Bottom third | 0.14 |
15 | 6 | Return of the Jedi | Top third | 0.43 |
16 | 6 | Return of the Jedi | Middle third | 0.41 |
17 | 6 | Return of the Jedi | Bottom third | 0.17 |
domain = ['Top third','Middle third','Bottom third']
range_ = ['green', 'blue', 'red']
bar1 = alt.Chart(df,title=domain\[0\]).mark_bar().encode(
alt.X('percentage:Q', axis=None, title=domain\[0\]),
alt.Y('name:O', sort=df.name.unique(), title=''),
color=alt.Color('rank:N', legend=None, scale=alt.Scale(domain=domain, range=range_)),
).transform_filter(
(datum.rank == 'Top third')
).properties(
width=50
)
text1 = bar1.mark_text(
align='left',
baseline='middle',
dx=3
).encode(
text=alt.Text('percentage:Q', format='.0%')
)
bar2 = alt.Chart(df,title=domain\[1\]).mark_bar().encode(
alt.X('percentage:Q', axis=None),
alt.Y('name:O', axis=None),
color=alt.Color('rank:N', legend=None, scale=alt.Scale(domain=domain, range=range_)),
).transform_filter(
(datum.rank == 'Middle third')
).properties(
width=50
)
text2 = bar2.mark_text(
align='left',
baseline='middle',
dx=3
).encode(
text=alt.Text('percentage:Q', format='.0%')
)
bar3 = alt.Chart(df,title=domain\[2\]).mark_bar().encode(
alt.X('percentage:Q', axis=None),
alt.Y('name:O', axis=None),
color=alt.Color('rank:N', legend=None, scale=alt.Scale(domain=domain, range=range_)),
).transform_filter(
(datum.rank == 'Bottom third')
).properties(
width=50
)
text3 = bar3.mark_text(
align='left',
baseline='middle',
dx=3
).encode(
text=alt.Text('percentage:Q', format='.0%')
)
alt.hconcat(
bar1+text1 ,bar2+text2, bar3+text3,
title=alt.TitleParams(
text="How People Rate the 'Star Wars' Movies",
subtitle=\["How often each film was rated in the top, middle and bottom third ",
"(by 471 respondents who have seen all six films)"\],)
).configure_axis(
grid=False,
).configure_view(
strokeWidth=0
).configure(
background='#dcdcdc'
)