Altair 或 Plotly 等值线图中缺失值的问题
Problem with missing values in Altair or Plotly choropleth map
我有美国几个州的数据,其他州的数据为空。在创建地图时,我想在文本中缺少值的状态下添加阴影,但我正在努力寻找正确的方法。使用我当前的代码,我无法获取整个美国地图,包括具有 Null 值的州,并且只会弹出具有特定指定值的州。我还查看了之前发布的问题并尝试对地图进行分层,但这给了我一个错误。
here's how cc_df looks like
这是我的代码:
# import the required library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt# import seaborn library
%matplotlib inline
import altair as alt
from vega_datasets import data
# State database
states_df = pd.read_csv(
'https://www2.census.gov/geo/docs/reference/state.txt',
# pipe seperated file
sep="|",
# FIPS are best as strings
dtype='str',
# rename columns
header=0, names=['state_fips', 'state', 'state_name', 'StateENS'],
# drop last column
usecols=['state_fips', 'state_name', 'state']
).set_index('state')
states_df['id'] = states_df['state_fips'].astype(int)
# The data to map
cc_df = pd.read_csv('hv_cwad.csv',
usecols=['state', 'CWAD'])
cc_df = cc_df.groupby('state').mean()
# Combine state database and our own data
#cc_state_df = pd.merge(cc_df, states_df)
cc_state_df = states_df.join(cc_df).reset_index()
# %%
# Create the map
states = alt.topo_feature(data.us_10m.url, 'states')
variable_list = ['CWAD']
alt.Chart(states).mark_geoshape(stroke='lightgrey',
strokeWidth=.5).encode(
alt.Color(alt.repeat('row'), type='quantitative')
).transform_lookup(
lookup='id',
from_=alt.LookupData(cc_state_df, 'id', variable_list)
).properties(
width=300,
height=300
).project(
type='albersUsa'
).repeat(
row=variable_list
).resolve_scale(
color='independent'
)
输出如下所示:
我知道你指出的点是问题,所以我对 NaN 值进行了一些研究并找到了 。但是空值的条件判断不起作用,所以我用-1替换了缺失值得到了想要的输出。
import pandas as pd
import matplotlib.pyplot as plt
import altair as alt
from vega_datasets import data
# State database
states_df = pd.read_csv('https://www2.census.gov/geo/docs/reference/state.txt',
sep="|",
dtype='str',
header=0,
names=['state_fips', 'state', 'state_name', 'StateENS'],
usecols=['state_fips', 'state_name', 'state']).set_index('state')
states_df['id'] = states_df['state_fips'].astype(int)
import io
data = '''
state CWAD
AR 377.715148
FL 6560.929494
GA 1958.122132
IA 0.409179
KS 63.706671
'''
cc_df = pd.read_csv(io.StringIO(data), delim_whitespace=True)
# The data to map
#cc_df = pd.read_csv('hv_cwad.csv', usecols=['state', 'CWAD'])
cc_df = cc_df.groupby('state').mean()
# Combine state database and our own data
#cc_state_df = pd.merge(cc_df, states_df)
cc_state_df = states_df.join(cc_df).reset_index()
cc_state_df.fillna(-1, inplace=True)
# Create the map
states = alt.topo_feature(data.us_10m.url, 'states')
variable_list = ['CWAD']
alt.Chart(states).mark_geoshape(
stroke='lightgrey',
strokeWidth=.5
).encode(
color=alt.condition('datum.CWAD !== -1', 'CWAD:Q', alt.value('lightgray'))
).transform_lookup(
lookup='id',
from_=alt.LookupData(
cc_state_df,
'id',
variable_list)
).properties(
width=300,
height=300
).project(
type='albersUsa'
).repeat(
row=variable_list
).resolve_scale(
color='independent'
)
我能够使用 Plotly 而不是 Altair 来绘制缺失的数据。如果您不是特别喜欢使用 Altair,这可能会有所帮助。
import the required library
按原样导入 plotly.graph_objects
将 numpy 导入为 np
将 pandas 导入为 pd
将 matplotlib.pyplot 导入为 plt
%matplotlib 内联
导入 altair 作为 alt
从 vega_datasets 导入数据
# State database
states_df = pd.read_csv(
'https://www2.census.gov/geo/docs/reference/state.txt',
# pipe seperated file
sep="|",
# FIPS are best as strings
dtype='str',
# rename columns
header=0, names=['state_fips', 'state', 'state_name', 'StateENS'],
# drop last column
usecols=['state_fips', 'state_name', 'state']
).set_index('state')
states_df['id'] = states_df['state_fips'].astype(int)
# The data to map
cc_df = pd.read_csv('cwad_hv.csv',
usecols=['state', 'GWAD'])
cc_df = cc_df.groupby('state').mean()
# Combine state database and our own data
#cc_state_df = pd.merge(cc_df, states_df) #don't use this.
cc_state_df = states_df.join(cc_df).reset_index()
#cc_state_df.fillna(0, inplace=True)#This changes the states with no data from NA to zero. If your data has a range -ve to +ve, skip this.
fig = go.Figure(data=go.Choropleth(
locations=cc_state_df['state'],
z=cc_state_df['CWAD'].astype(float),
locationmode='USA-states',
#color='Greens',
autocolorscale=True,
#range_color=[0, 6500],
#text=df['text'], # hover text
marker_line_color='black', # line markers between states
colorbar_title="CWAD kg/ha"
))
fig.update_layout(
title_text='CWAD',
geo = dict(
scope='usa',
projection=go.layout.geo.Projection(type = 'albers usa'),
showlakes=False, # lakes
lakecolor='rgb(255, 255, 255)'),
)
fig.show()
Here is the output
我有美国几个州的数据,其他州的数据为空。在创建地图时,我想在文本中缺少值的状态下添加阴影,但我正在努力寻找正确的方法。使用我当前的代码,我无法获取整个美国地图,包括具有 Null 值的州,并且只会弹出具有特定指定值的州。我还查看了之前发布的问题并尝试对地图进行分层,但这给了我一个错误。 here's how cc_df looks like
这是我的代码:
# import the required library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt# import seaborn library
%matplotlib inline
import altair as alt
from vega_datasets import data
# State database
states_df = pd.read_csv(
'https://www2.census.gov/geo/docs/reference/state.txt',
# pipe seperated file
sep="|",
# FIPS are best as strings
dtype='str',
# rename columns
header=0, names=['state_fips', 'state', 'state_name', 'StateENS'],
# drop last column
usecols=['state_fips', 'state_name', 'state']
).set_index('state')
states_df['id'] = states_df['state_fips'].astype(int)
# The data to map
cc_df = pd.read_csv('hv_cwad.csv',
usecols=['state', 'CWAD'])
cc_df = cc_df.groupby('state').mean()
# Combine state database and our own data
#cc_state_df = pd.merge(cc_df, states_df)
cc_state_df = states_df.join(cc_df).reset_index()
# %%
# Create the map
states = alt.topo_feature(data.us_10m.url, 'states')
variable_list = ['CWAD']
alt.Chart(states).mark_geoshape(stroke='lightgrey',
strokeWidth=.5).encode(
alt.Color(alt.repeat('row'), type='quantitative')
).transform_lookup(
lookup='id',
from_=alt.LookupData(cc_state_df, 'id', variable_list)
).properties(
width=300,
height=300
).project(
type='albersUsa'
).repeat(
row=variable_list
).resolve_scale(
color='independent'
)
输出如下所示:
我知道你指出的点是问题,所以我对 NaN 值进行了一些研究并找到了
import pandas as pd
import matplotlib.pyplot as plt
import altair as alt
from vega_datasets import data
# State database
states_df = pd.read_csv('https://www2.census.gov/geo/docs/reference/state.txt',
sep="|",
dtype='str',
header=0,
names=['state_fips', 'state', 'state_name', 'StateENS'],
usecols=['state_fips', 'state_name', 'state']).set_index('state')
states_df['id'] = states_df['state_fips'].astype(int)
import io
data = '''
state CWAD
AR 377.715148
FL 6560.929494
GA 1958.122132
IA 0.409179
KS 63.706671
'''
cc_df = pd.read_csv(io.StringIO(data), delim_whitespace=True)
# The data to map
#cc_df = pd.read_csv('hv_cwad.csv', usecols=['state', 'CWAD'])
cc_df = cc_df.groupby('state').mean()
# Combine state database and our own data
#cc_state_df = pd.merge(cc_df, states_df)
cc_state_df = states_df.join(cc_df).reset_index()
cc_state_df.fillna(-1, inplace=True)
# Create the map
states = alt.topo_feature(data.us_10m.url, 'states')
variable_list = ['CWAD']
alt.Chart(states).mark_geoshape(
stroke='lightgrey',
strokeWidth=.5
).encode(
color=alt.condition('datum.CWAD !== -1', 'CWAD:Q', alt.value('lightgray'))
).transform_lookup(
lookup='id',
from_=alt.LookupData(
cc_state_df,
'id',
variable_list)
).properties(
width=300,
height=300
).project(
type='albersUsa'
).repeat(
row=variable_list
).resolve_scale(
color='independent'
)
我能够使用 Plotly 而不是 Altair 来绘制缺失的数据。如果您不是特别喜欢使用 Altair,这可能会有所帮助。
import the required library
按原样导入 plotly.graph_objects 将 numpy 导入为 np 将 pandas 导入为 pd 将 matplotlib.pyplot 导入为 plt %matplotlib 内联 导入 altair 作为 alt 从 vega_datasets 导入数据
# State database
states_df = pd.read_csv(
'https://www2.census.gov/geo/docs/reference/state.txt',
# pipe seperated file
sep="|",
# FIPS are best as strings
dtype='str',
# rename columns
header=0, names=['state_fips', 'state', 'state_name', 'StateENS'],
# drop last column
usecols=['state_fips', 'state_name', 'state']
).set_index('state')
states_df['id'] = states_df['state_fips'].astype(int)
# The data to map
cc_df = pd.read_csv('cwad_hv.csv',
usecols=['state', 'GWAD'])
cc_df = cc_df.groupby('state').mean()
# Combine state database and our own data
#cc_state_df = pd.merge(cc_df, states_df) #don't use this.
cc_state_df = states_df.join(cc_df).reset_index()
#cc_state_df.fillna(0, inplace=True)#This changes the states with no data from NA to zero. If your data has a range -ve to +ve, skip this.
fig = go.Figure(data=go.Choropleth(
locations=cc_state_df['state'],
z=cc_state_df['CWAD'].astype(float),
locationmode='USA-states',
#color='Greens',
autocolorscale=True,
#range_color=[0, 6500],
#text=df['text'], # hover text
marker_line_color='black', # line markers between states
colorbar_title="CWAD kg/ha"
))
fig.update_layout(
title_text='CWAD',
geo = dict(
scope='usa',
projection=go.layout.geo.Projection(type = 'albers usa'),
showlakes=False, # lakes
lakecolor='rgb(255, 255, 255)'),
)
fig.show()
Here is the output