Altair 或 Plotly 等值线图中缺失值的问题

Problem with missing values in Altair or Plotly choropleth map

我有美国几个州的数据,其他州的数据为空。在创建地图时,我想在文本中缺少值的状态下添加阴影,但我正在努力寻找正确的方法。使用我当前的代码,我无法获取整个美国地图,包括具有 Null 值的州,并且只会弹出具有特定指定值的州。我还查看了之前发布的问题并尝试对地图进行分层,但这给了我一个错误。 here's how cc_df looks like

这是我的代码:

# import the required library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt# import seaborn library
%matplotlib inline
import altair as alt
from vega_datasets import data

# State database
states_df = pd.read_csv(
    'https://www2.census.gov/geo/docs/reference/state.txt',
# pipe seperated file
sep="|",
# FIPS are best as strings
dtype='str',
# rename columns
header=0, names=['state_fips', 'state', 'state_name', 'StateENS'],
# drop last column
usecols=['state_fips', 'state_name', 'state']
).set_index('state')
states_df['id'] = states_df['state_fips'].astype(int)

# The data to map
cc_df = pd.read_csv('hv_cwad.csv',
                    usecols=['state', 'CWAD'])
cc_df = cc_df.groupby('state').mean()

# Combine state database and our own data
#cc_state_df = pd.merge(cc_df, states_df)
cc_state_df = states_df.join(cc_df).reset_index()

# %%
# Create the map
states = alt.topo_feature(data.us_10m.url, 'states')
variable_list = ['CWAD']

alt.Chart(states).mark_geoshape(stroke='lightgrey',
                                strokeWidth=.5).encode(
    alt.Color(alt.repeat('row'), type='quantitative')
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(cc_state_df, 'id', variable_list)
).properties(
    width=300,
    height=300
).project(
    type='albersUsa'
).repeat(
    row=variable_list
).resolve_scale(
    color='independent'
)

输出如下所示:

我知道你指出的点是问题,所以我对 NaN 值进行了一些研究并找到了 。但是空值的条件判断不起作用,所以我用-1替换了缺失值得到了想要的输出。

import pandas as pd
import matplotlib.pyplot as plt
import altair as alt
from vega_datasets import data

# State database
states_df = pd.read_csv('https://www2.census.gov/geo/docs/reference/state.txt', 
                        sep="|", 
                        dtype='str', 
                        header=0, 
                        names=['state_fips', 'state', 'state_name', 'StateENS'],
                        usecols=['state_fips', 'state_name', 'state']).set_index('state')

states_df['id'] = states_df['state_fips'].astype(int)

import io

data = '''
state CWAD
AR 377.715148
FL 6560.929494
GA 1958.122132
IA 0.409179
KS 63.706671 
'''

cc_df = pd.read_csv(io.StringIO(data), delim_whitespace=True)
# The data to map
#cc_df = pd.read_csv('hv_cwad.csv', usecols=['state', 'CWAD'])
cc_df = cc_df.groupby('state').mean()

# Combine state database and our own data
#cc_state_df = pd.merge(cc_df, states_df)
cc_state_df = states_df.join(cc_df).reset_index()
cc_state_df.fillna(-1, inplace=True)

# Create the map
states = alt.topo_feature(data.us_10m.url, 'states')

variable_list = ['CWAD']

alt.Chart(states).mark_geoshape(
    stroke='lightgrey',
    strokeWidth=.5
).encode(
        color=alt.condition('datum.CWAD !== -1', 'CWAD:Q', alt.value('lightgray'))
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(
        cc_state_df,
        'id',
        variable_list)
).properties(
    width=300,
    height=300
).project(
    type='albersUsa'
).repeat(
    row=variable_list
).resolve_scale(
    color='independent'
)

我能够使用 Plotly 而不是 Altair 来绘制缺失的数据。如果您不是特别喜欢使用 Altair,这可能会有所帮助。

import the required library

按原样导入 plotly.graph_objects 将 numpy 导入为 np 将 pandas 导入为 pd 将 matplotlib.pyplot 导入为 plt %matplotlib 内联 导入 altair 作为 alt 从 vega_datasets 导入数据

# State database
states_df = pd.read_csv(
    'https://www2.census.gov/geo/docs/reference/state.txt',
# pipe seperated file
sep="|",
# FIPS are best as strings
dtype='str',
# rename columns
header=0, names=['state_fips', 'state', 'state_name', 'StateENS'],
# drop last column
usecols=['state_fips', 'state_name', 'state']
).set_index('state')
states_df['id'] = states_df['state_fips'].astype(int)

# The data to map
cc_df = pd.read_csv('cwad_hv.csv',
                    usecols=['state', 'GWAD'])
cc_df = cc_df.groupby('state').mean()

# Combine state database and our own data
#cc_state_df = pd.merge(cc_df, states_df) #don't use this. 
cc_state_df = states_df.join(cc_df).reset_index()
#cc_state_df.fillna(0, inplace=True)#This changes the states with no data from NA to zero. If your data has a range -ve to +ve, skip this.

fig = go.Figure(data=go.Choropleth(
    locations=cc_state_df['state'],
    z=cc_state_df['CWAD'].astype(float),
    locationmode='USA-states',
    #color='Greens',
    autocolorscale=True,
    #range_color=[0, 6500],
    #text=df['text'], # hover text
    marker_line_color='black', # line markers between states
    colorbar_title="CWAD kg/ha"
))

fig.update_layout(
    title_text='CWAD',
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=False, # lakes
        lakecolor='rgb(255, 255, 255)'),
)

fig.show()

Here is the output