Python 中的可视化熊猫
visualization Panda in Python
嘿 Whosebug 团队,
我有以下问题。
我从两个 table 中创建了一个 table,其中包含以下列:
Index(['state_name', 'overall_outcome', 'date', 'new_results_reported',
'total_results_reported', 'Population_2019'],
dtype='object')
但现在每个州都列出了 12 倍,我创建了一个数据透视表,准确总结了一年的所有内容。只是现在缺少 Population_2019 值,我也想以图形方式添加。
pivot_df = Amerika3.pivot_table(index='state_name' , columns='Month', values='new_results_reported', aggfunc='sum')
print(pivot_df)
我现在的问题是如何创建一个图表来显示 state_name、Population_2019、new_results_reported 以及相应月份的图表。
在图表中,我想看到例如在阿拉巴马州,第 1、2、3 个月有 'x' 个案例....与阿拉巴马州的人口相比报告
my created table's
{'state_name': ['Alabama',
'Alabama',
'Alabama',
'Alabama',
'Alabama',
'Alabama',
'Alabama',
'Alabama',
'Alabama',
'Alabama',
'Alabama',
'Alabama',
'Alaska',
'Alaska',
'Alaska',
'Alaska',
'Alaska',
'Alaska',
'Alaska',
'Alaska',
'Alaska',
'Alaska',
'Alaska',
'Alaska'],
'Month': [1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12],
'Population_2019': [4903185.0,
4903185.0,
4903185.0,
4903185.0,
4903185.0,
4903185.0,
4903185.0,
4903185.0,
4903185.0,
4903185.0,
4903185.0,
4903185.0,
731545.0,
731545.0,
731545.0,
731545.0,
731545.0,
731545.0,
731545.0,
731545.0,
731545.0,
731545.0,
731545.0,
731545.0],
'new_results_reported': [446366,
322959,
272495,
298138,
316494,
270164,
452655,
436392,
376831,
432346,
400787,
554491,
235778,
199990,
224981,
221276,
154334,
81092,
150296,
175596,
158323,
221439,
296043,
261463],
'test_rate_in_Procent': [9.103592868716968,
6.586718632888622,
5.557510067435759,
6.0804966567649394,
6.454865561874577,
5.509969540207028,
9.23185643617363,
8.900174070527626,
7.685433039952602,
8.817656278521003,
8.174013421888016,
11.308792142250395,
32.23014305340068,
27.338031153244163,
30.75422564572241,
30.247763295491048,
21.096993349691406,
11.085032363012528,
20.545010901585,
24.00344476416352,
21.64227764525764,
30.27004490496142,
40.46818719285895,
35.74120525736626]}
- 使用您提供的示例数据
- select 您要绘制的变量... new_results_reported 因此使用
.loc[]
所以 unstack()
会产生预期的 12列不是 24...
- 重塑它以进行绘图 - 我更喜欢使用
unstack()
而不是 pivot()
。 Population_2019 保留在索引中以启用此
- 然后用每月数据和州数据的不同表示来绘制它就很简单了
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
Amerika2 = pd.DataFrame({"state_name": ["Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska"],
"Month": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
"Population_2019": [4903185.0, 4903185.0, 4903185.0, 4903185.0, 4903185.0, 4903185.0, 4903185.0, 4903185.0, 4903185.0, 4903185.0, 4903185.0, 4903185.0, 731545.0, 731545.0, 731545.0, 731545.0, 731545.0, 731545.0, 731545.0, 731545.0, 731545.0, 731545.0, 731545.0, 731545.0],
"new_results_reported": [446366, 322959, 272495, 298138, 316494, 270164, 452655, 436392, 376831, 432346, 400787, 554491, 235778, 199990, 224981, 221276, 154334, 81092, 150296, 175596, 158323, 221439, 296043, 261463],
"test_rate_in_Procent": [9.103592868716968, 6.586718632888622, 5.557510067435759, 6.0804966567649394, 6.454865561874577, 5.509969540207028, 9.23185643617363, 8.900174070527626, 7.685433039952602, 8.817656278521003, 8.174013421888016, 11.308792142250395, 32.23014305340068, 27.338031153244163, 30.75422564572241, 30.247763295491048, 21.096993349691406, 11.085032363012528, 20.545010901585, 24.00344476416352, 21.64227764525764, 30.27004490496142, 40.46818719285895, 35.74120525736626]
})
df = (
Amerika2.loc[:,["state_name","Month","Population_2019","new_results_reported"]]
.set_index(["state_name", "Month", "Population_2019"])
.unstack("Month")
.droplevel(0, 1)
)
# create figure of monthly values
fig = px.bar(df.reset_index(), x="state_name", y=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
# add a trace that is state population
fig.add_trace(
go.Scatter(
x=df.index.get_level_values("state_name"),
y=df.index.get_level_values("Population_2019"),
yaxis="y2",
name="population",
)
)
fig.update_layout({"yaxis2": {"anchor": "x", "overlaying": "y", "side": "right"}})
嘿 Whosebug 团队,
我有以下问题。
我从两个 table 中创建了一个 table,其中包含以下列:
Index(['state_name', 'overall_outcome', 'date', 'new_results_reported',
'total_results_reported', 'Population_2019'],
dtype='object')
但现在每个州都列出了 12 倍,我创建了一个数据透视表,准确总结了一年的所有内容。只是现在缺少 Population_2019 值,我也想以图形方式添加。
pivot_df = Amerika3.pivot_table(index='state_name' , columns='Month', values='new_results_reported', aggfunc='sum')
print(pivot_df)
我现在的问题是如何创建一个图表来显示 state_name、Population_2019、new_results_reported 以及相应月份的图表。
在图表中,我想看到例如在阿拉巴马州,第 1、2、3 个月有 'x' 个案例....与阿拉巴马州的人口相比报告
my created table's
{'state_name': ['Alabama',
'Alabama',
'Alabama',
'Alabama',
'Alabama',
'Alabama',
'Alabama',
'Alabama',
'Alabama',
'Alabama',
'Alabama',
'Alabama',
'Alaska',
'Alaska',
'Alaska',
'Alaska',
'Alaska',
'Alaska',
'Alaska',
'Alaska',
'Alaska',
'Alaska',
'Alaska',
'Alaska'],
'Month': [1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12],
'Population_2019': [4903185.0,
4903185.0,
4903185.0,
4903185.0,
4903185.0,
4903185.0,
4903185.0,
4903185.0,
4903185.0,
4903185.0,
4903185.0,
4903185.0,
731545.0,
731545.0,
731545.0,
731545.0,
731545.0,
731545.0,
731545.0,
731545.0,
731545.0,
731545.0,
731545.0,
731545.0],
'new_results_reported': [446366,
322959,
272495,
298138,
316494,
270164,
452655,
436392,
376831,
432346,
400787,
554491,
235778,
199990,
224981,
221276,
154334,
81092,
150296,
175596,
158323,
221439,
296043,
261463],
'test_rate_in_Procent': [9.103592868716968,
6.586718632888622,
5.557510067435759,
6.0804966567649394,
6.454865561874577,
5.509969540207028,
9.23185643617363,
8.900174070527626,
7.685433039952602,
8.817656278521003,
8.174013421888016,
11.308792142250395,
32.23014305340068,
27.338031153244163,
30.75422564572241,
30.247763295491048,
21.096993349691406,
11.085032363012528,
20.545010901585,
24.00344476416352,
21.64227764525764,
30.27004490496142,
40.46818719285895,
35.74120525736626]}
- 使用您提供的示例数据
- select 您要绘制的变量... new_results_reported 因此使用
.loc[]
所以unstack()
会产生预期的 12列不是 24... - 重塑它以进行绘图 - 我更喜欢使用
unstack()
而不是pivot()
。 Population_2019 保留在索引中以启用此 - 然后用每月数据和州数据的不同表示来绘制它就很简单了
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
Amerika2 = pd.DataFrame({"state_name": ["Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska", "Alaska"],
"Month": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
"Population_2019": [4903185.0, 4903185.0, 4903185.0, 4903185.0, 4903185.0, 4903185.0, 4903185.0, 4903185.0, 4903185.0, 4903185.0, 4903185.0, 4903185.0, 731545.0, 731545.0, 731545.0, 731545.0, 731545.0, 731545.0, 731545.0, 731545.0, 731545.0, 731545.0, 731545.0, 731545.0],
"new_results_reported": [446366, 322959, 272495, 298138, 316494, 270164, 452655, 436392, 376831, 432346, 400787, 554491, 235778, 199990, 224981, 221276, 154334, 81092, 150296, 175596, 158323, 221439, 296043, 261463],
"test_rate_in_Procent": [9.103592868716968, 6.586718632888622, 5.557510067435759, 6.0804966567649394, 6.454865561874577, 5.509969540207028, 9.23185643617363, 8.900174070527626, 7.685433039952602, 8.817656278521003, 8.174013421888016, 11.308792142250395, 32.23014305340068, 27.338031153244163, 30.75422564572241, 30.247763295491048, 21.096993349691406, 11.085032363012528, 20.545010901585, 24.00344476416352, 21.64227764525764, 30.27004490496142, 40.46818719285895, 35.74120525736626]
})
df = (
Amerika2.loc[:,["state_name","Month","Population_2019","new_results_reported"]]
.set_index(["state_name", "Month", "Population_2019"])
.unstack("Month")
.droplevel(0, 1)
)
# create figure of monthly values
fig = px.bar(df.reset_index(), x="state_name", y=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
# add a trace that is state population
fig.add_trace(
go.Scatter(
x=df.index.get_level_values("state_name"),
y=df.index.get_level_values("Population_2019"),
yaxis="y2",
name="population",
)
)
fig.update_layout({"yaxis2": {"anchor": "x", "overlaying": "y", "side": "right"}})