为什么 Seaborn 在图例中显示的值不在数据框中
Why is Seaborn showing values in the legend that aren't in the data frame
我正在绘制我通过 api 访问的一些 Covid 数据。我已经设法将其放入 df,尽管这不太可能是最佳实践,也不是这个问题的重点。
# import packages
import json
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# set options
pd.set_option('display.max_columns', None)
# list of indicators
import re
indicators = "'covid' or 'flu' or 'cli_w11' or 'ili_W11' or 'mask' or'contact' or 'finance' or 'anosmia' or 'vaccine_acpt' or 'access_wash' or 'covid_vaccine' or 'trust_fam' or 'trust_healthcare' or 'trust_who' or 'trust_govt' or'trust_politicians' or 'twodoses' or 'concerned_sideeffects' or 'wash_hands_24h_3to6' or 'wash_hands_24h_7orMore' or 'hesitant_sideeffects' or 'modified_acceptance' or 'cmty_covid' or 'barrier_reason_side_effects' or 'barrier_reason_wontwork' or 'barrier_reason_dontbelieve' or 'barrier_reason_dontlike' or 'barrier_reason_waitlater' or 'barrier_reason_otherpeople' or 'barrier_reason_cost' or 'barrier_reason_religious' or 'barrier_reason_other' or 'trust_doctors' or 'barrier_reason_dontneed_alreadyhad' or 'barrier_reason_dontneed_dontspendtime' or 'barrier_reason_dontneed_nothighrisk' or 'barrier_reason_dontneed_takeprecautions' or 'barrier_reason_dontneed_notserious' or 'barrier_reason_dontneed_notbeneficial' or 'barrier_reason_dontneed_other' or 'informed_access' or 'appointment_have' or 'appointment_tried' or 'barrier_reason_government' or 'activity_work_outside_home' or 'activity_shop' or 'activity_restaurant_bar' or 'activity_spent_time' or 'activity_large_event' or 'activity_public_transit' or 'food_security' or 'anxious_7d' or 'depressed_7d' or 'worried_become_ill' or 'symp_fever' or 'symp_cough' or 'symp_diff_breathing' or 'symp_fatigue' or 'symp_stuffy_nose' or 'symp_aches' or 'symp_sore_throat' or 'symp_chest_pain' or 'symp_nausea' or 'symp_eye_pain' or 'symp_headache' or 'sick_spend_time_7d' or 'ever_tested' or 'pay_test' or 'reduce_spending' or 'symp_chills' or 'symp_changes' or 'testing_rate' or 'tested_positive_14d' or 'tested_positive_recent' or 'flu_vaccine_thisyr' or 'flu_vaccine_lastyr' or 'avoid_contact' or 'vaccinated_appointment_or_accept' or 'appointment_or_accept_covid_vaccine' or 'accept_covid_vaccine_no_appointment' or 'appointment_not_vaccinated' or 'vaccine_tried' or 'had_covid_ever' or 'worried_catch_covid' or 'belief_distancing_effective' or 'belief_masking_effective' or 'others_distanced_public' or 'others_masked_public' or 'covid_vaccinated_friends' or 'belief_vaccinated_mask_unnecessary' or 'belief_children_immune' or 'belief_no_spread_hot_humid' or 'received_news_local_health' or 'received_news_experts' or 'received_news_who' or 'received_news_govt_health' or 'received_news_politicians' or 'received_news_journalists' or 'received_news_friends' or 'received_news_religious' or 'received_news_none' or 'trust_covid_info_local_health' or 'trust_covid_info_experts' or 'trust_covid_info_who' or 'trust_covid_info_govt_health' or 'trust_covid_info_politicians' or 'trust_covid_info_journalists' or 'trust_covid_info_friends' or 'trust_covid_info_religious' or 'want_info_covid_treatment' or 'want_info_vaccine_access' or 'want_info_covid_variants' or 'want_info_children_education' or 'want_info_economic_impact' or 'want_info_mental_health' or 'want_info_relationships' or 'want_info_employment' or 'want_info_none' or 'news_online' or 'news_messaging' or 'news_newspaper' or 'news_television' or 'news_radio' or 'news_none' or 'trust_news_online' or 'trust_news_messaging' or 'trust_news_newspaper' or 'trust_news_television' or 'trust_news_radio' or 'vaccinate_children' or 'delayed_care_cost' or 'vaccine_barrier_eligible' or 'vaccine_barrier_no_appointments' or 'vaccine_barrier_appointment_time' or 'vaccine_barrier_technical_difficulties' or 'vaccine_barrier_document' or 'vaccine_barrier_technology_access' or 'vaccine_barrier_travel' or 'vaccine_barrier_language' or 'vaccine_barrier_childcare' or 'vaccine_barrier_time' or 'vaccine_barrier_type' or 'vaccine_barrier_none' or 'try_vaccinate_1m' or 'vaccine_barrier_eligible_has' or 'vaccine_barrier_no_appointments_has' or 'vaccine_barrier_appointment_time_has' or 'vaccine_barrier_technical_difficulties_has' or 'vaccine_barrier_document_has' or 'vaccine_barrier_technology_access_has' or 'vaccine_barrier_travel_has' or 'vaccine_barrier_language_has' or 'vaccine_barrier_childcare_has' or 'vaccine_barrier_time_has' or 'vaccine_barrier_type_has' or 'vaccine_barrier_none_has' or 'vaccine_barrier_eligible_tried' or 'vaccine_barrier_no_appointments_tried' or 'vaccine_barrier_appointment_time_tried' or 'vaccine_barrier_technical_difficulties_tried' or 'vaccine_barrier_document_tried' or 'vaccine_barrier_technology_access_tried' or 'vaccine_barrier_travel_tried' or 'vaccine_barrier_language_tried' or 'vaccine_barrier_childcare_tried' or 'vaccine_barrier_time_tried' or 'vaccine_barrier_type_tried' or 'vaccine_barrier_none_tried' or 'mask_work_outside_home_1d' or 'mask_shop_1d' or 'mask_restaurant_1d' or 'mask_spent_time_1d' or 'mask_large_event_1d' or 'mask_public_transit_1d'"
indicators = re.sub(r"\s{2}or\s"," or ", indicators)
indicators = re.sub(r"\sor'", " or '", indicators)
inds = re.split(r"' or '", indicators)
# request list of countries
r = requests.get('https://covidmap.umd.edu/api/country')
json_countries = r.json()
countries = pd.DataFrame.from_dict(json_countries['data'])
countries = countries.country.values.tolist()
# loop through all countries for given indicator + append to list
ind = 'covid'
response_list = []
for country in countries:
r = requests.get(url, params = {'type' : 'daily', 'indicator' : 'covid', 'country' : country, 'daterange' : '20190101-20221231'})
response = r.json()['data']
response_list.append(response)
# reshape list of dictionaries to list of dfs
df_list = []
for i in response_list:
df_list.append(pd.DataFrame(i))
covid_df = pd.concat(df_list)
# change df data types
covid_df = covid_df.astype({'country' : 'category', 'iso_code' : 'category', 'gid_0' : 'category'})
covid_df['survey_date'] = pd.to_datetime(covid_df['survey_date'], format = '%Y%m%d')
# sample plot data
filtered_data = covid_df.loc[(covid_df['country'] == 'Australia' | (covid_df['country'] == 'Poland'), :]
首先,我试图在一个图形上绘制几个国家(澳大利亚和波兰),以色调区分。
我已经将所有国家/地区的 df 子集化,为了简单起见,只减少了两个。当我绘制数据时,图例显示了原始 df 中的每个国家,即使我已指定使用子集 df (filtered_df)。
# plot data
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')
# sns.set(rc={"figure.dpi":300, 'savefig.dpi':300}) # increase dpi for higher res plot
sns.set_theme('paper')
sns.set_style('white')
g = sns.relplot(x = "survey_date", y = 'pct_covid', data = filtered_data, hue = 'country', kind = 'line', height = 10, aspect = 2) # width = height * aspect
g.fig.suptitle("Covid-19 Rates")
g.set(ylabel = "Covid Rate (%)")
plt.show()
知道为什么图例显示的值不在 filtered_data df 中吗?
我认为错误的原因是您将国家名称用作类别变量。我从 运行 你的代码中得到的案例数是 1370,但是当我在结果数据框中得到国家名称的数量时,我得到的结果包括所有国家名称。当我禁用我创建分类变量的行时,我得到了预期的输出。
编辑
禁用以下行
#covid_df = covid_df.astype({'country' : 'category', 'iso_code' : 'category', 'gid_0' : 'category'})
修正前
filtered_data['country'].value_counts()
Poland 686
Australia 684
Albania 0
Norway 0
Qatar 0
...
France 0
Finland 0
Ethiopia 0
El Salvador
修正后
filtered_data['country'].value_counts()
Poland 686
Australia 684
Name: country, dtype: int64
我正在绘制我通过 api 访问的一些 Covid 数据。我已经设法将其放入 df,尽管这不太可能是最佳实践,也不是这个问题的重点。
# import packages
import json
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# set options
pd.set_option('display.max_columns', None)
# list of indicators
import re
indicators = "'covid' or 'flu' or 'cli_w11' or 'ili_W11' or 'mask' or'contact' or 'finance' or 'anosmia' or 'vaccine_acpt' or 'access_wash' or 'covid_vaccine' or 'trust_fam' or 'trust_healthcare' or 'trust_who' or 'trust_govt' or'trust_politicians' or 'twodoses' or 'concerned_sideeffects' or 'wash_hands_24h_3to6' or 'wash_hands_24h_7orMore' or 'hesitant_sideeffects' or 'modified_acceptance' or 'cmty_covid' or 'barrier_reason_side_effects' or 'barrier_reason_wontwork' or 'barrier_reason_dontbelieve' or 'barrier_reason_dontlike' or 'barrier_reason_waitlater' or 'barrier_reason_otherpeople' or 'barrier_reason_cost' or 'barrier_reason_religious' or 'barrier_reason_other' or 'trust_doctors' or 'barrier_reason_dontneed_alreadyhad' or 'barrier_reason_dontneed_dontspendtime' or 'barrier_reason_dontneed_nothighrisk' or 'barrier_reason_dontneed_takeprecautions' or 'barrier_reason_dontneed_notserious' or 'barrier_reason_dontneed_notbeneficial' or 'barrier_reason_dontneed_other' or 'informed_access' or 'appointment_have' or 'appointment_tried' or 'barrier_reason_government' or 'activity_work_outside_home' or 'activity_shop' or 'activity_restaurant_bar' or 'activity_spent_time' or 'activity_large_event' or 'activity_public_transit' or 'food_security' or 'anxious_7d' or 'depressed_7d' or 'worried_become_ill' or 'symp_fever' or 'symp_cough' or 'symp_diff_breathing' or 'symp_fatigue' or 'symp_stuffy_nose' or 'symp_aches' or 'symp_sore_throat' or 'symp_chest_pain' or 'symp_nausea' or 'symp_eye_pain' or 'symp_headache' or 'sick_spend_time_7d' or 'ever_tested' or 'pay_test' or 'reduce_spending' or 'symp_chills' or 'symp_changes' or 'testing_rate' or 'tested_positive_14d' or 'tested_positive_recent' or 'flu_vaccine_thisyr' or 'flu_vaccine_lastyr' or 'avoid_contact' or 'vaccinated_appointment_or_accept' or 'appointment_or_accept_covid_vaccine' or 'accept_covid_vaccine_no_appointment' or 'appointment_not_vaccinated' or 'vaccine_tried' or 'had_covid_ever' or 'worried_catch_covid' or 'belief_distancing_effective' or 'belief_masking_effective' or 'others_distanced_public' or 'others_masked_public' or 'covid_vaccinated_friends' or 'belief_vaccinated_mask_unnecessary' or 'belief_children_immune' or 'belief_no_spread_hot_humid' or 'received_news_local_health' or 'received_news_experts' or 'received_news_who' or 'received_news_govt_health' or 'received_news_politicians' or 'received_news_journalists' or 'received_news_friends' or 'received_news_religious' or 'received_news_none' or 'trust_covid_info_local_health' or 'trust_covid_info_experts' or 'trust_covid_info_who' or 'trust_covid_info_govt_health' or 'trust_covid_info_politicians' or 'trust_covid_info_journalists' or 'trust_covid_info_friends' or 'trust_covid_info_religious' or 'want_info_covid_treatment' or 'want_info_vaccine_access' or 'want_info_covid_variants' or 'want_info_children_education' or 'want_info_economic_impact' or 'want_info_mental_health' or 'want_info_relationships' or 'want_info_employment' or 'want_info_none' or 'news_online' or 'news_messaging' or 'news_newspaper' or 'news_television' or 'news_radio' or 'news_none' or 'trust_news_online' or 'trust_news_messaging' or 'trust_news_newspaper' or 'trust_news_television' or 'trust_news_radio' or 'vaccinate_children' or 'delayed_care_cost' or 'vaccine_barrier_eligible' or 'vaccine_barrier_no_appointments' or 'vaccine_barrier_appointment_time' or 'vaccine_barrier_technical_difficulties' or 'vaccine_barrier_document' or 'vaccine_barrier_technology_access' or 'vaccine_barrier_travel' or 'vaccine_barrier_language' or 'vaccine_barrier_childcare' or 'vaccine_barrier_time' or 'vaccine_barrier_type' or 'vaccine_barrier_none' or 'try_vaccinate_1m' or 'vaccine_barrier_eligible_has' or 'vaccine_barrier_no_appointments_has' or 'vaccine_barrier_appointment_time_has' or 'vaccine_barrier_technical_difficulties_has' or 'vaccine_barrier_document_has' or 'vaccine_barrier_technology_access_has' or 'vaccine_barrier_travel_has' or 'vaccine_barrier_language_has' or 'vaccine_barrier_childcare_has' or 'vaccine_barrier_time_has' or 'vaccine_barrier_type_has' or 'vaccine_barrier_none_has' or 'vaccine_barrier_eligible_tried' or 'vaccine_barrier_no_appointments_tried' or 'vaccine_barrier_appointment_time_tried' or 'vaccine_barrier_technical_difficulties_tried' or 'vaccine_barrier_document_tried' or 'vaccine_barrier_technology_access_tried' or 'vaccine_barrier_travel_tried' or 'vaccine_barrier_language_tried' or 'vaccine_barrier_childcare_tried' or 'vaccine_barrier_time_tried' or 'vaccine_barrier_type_tried' or 'vaccine_barrier_none_tried' or 'mask_work_outside_home_1d' or 'mask_shop_1d' or 'mask_restaurant_1d' or 'mask_spent_time_1d' or 'mask_large_event_1d' or 'mask_public_transit_1d'"
indicators = re.sub(r"\s{2}or\s"," or ", indicators)
indicators = re.sub(r"\sor'", " or '", indicators)
inds = re.split(r"' or '", indicators)
# request list of countries
r = requests.get('https://covidmap.umd.edu/api/country')
json_countries = r.json()
countries = pd.DataFrame.from_dict(json_countries['data'])
countries = countries.country.values.tolist()
# loop through all countries for given indicator + append to list
ind = 'covid'
response_list = []
for country in countries:
r = requests.get(url, params = {'type' : 'daily', 'indicator' : 'covid', 'country' : country, 'daterange' : '20190101-20221231'})
response = r.json()['data']
response_list.append(response)
# reshape list of dictionaries to list of dfs
df_list = []
for i in response_list:
df_list.append(pd.DataFrame(i))
covid_df = pd.concat(df_list)
# change df data types
covid_df = covid_df.astype({'country' : 'category', 'iso_code' : 'category', 'gid_0' : 'category'})
covid_df['survey_date'] = pd.to_datetime(covid_df['survey_date'], format = '%Y%m%d')
# sample plot data
filtered_data = covid_df.loc[(covid_df['country'] == 'Australia' | (covid_df['country'] == 'Poland'), :]
首先,我试图在一个图形上绘制几个国家(澳大利亚和波兰),以色调区分。
我已经将所有国家/地区的 df 子集化,为了简单起见,只减少了两个。当我绘制数据时,图例显示了原始 df 中的每个国家,即使我已指定使用子集 df (filtered_df)。
# plot data
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')
# sns.set(rc={"figure.dpi":300, 'savefig.dpi':300}) # increase dpi for higher res plot
sns.set_theme('paper')
sns.set_style('white')
g = sns.relplot(x = "survey_date", y = 'pct_covid', data = filtered_data, hue = 'country', kind = 'line', height = 10, aspect = 2) # width = height * aspect
g.fig.suptitle("Covid-19 Rates")
g.set(ylabel = "Covid Rate (%)")
plt.show()
知道为什么图例显示的值不在 filtered_data df 中吗?
我认为错误的原因是您将国家名称用作类别变量。我从 运行 你的代码中得到的案例数是 1370,但是当我在结果数据框中得到国家名称的数量时,我得到的结果包括所有国家名称。当我禁用我创建分类变量的行时,我得到了预期的输出。
编辑
禁用以下行
#covid_df = covid_df.astype({'country' : 'category', 'iso_code' : 'category', 'gid_0' : 'category'})
修正前
filtered_data['country'].value_counts()
Poland 686
Australia 684
Albania 0
Norway 0
Qatar 0
...
France 0
Finland 0
Ethiopia 0
El Salvador
修正后
filtered_data['country'].value_counts()
Poland 686
Australia 684
Name: country, dtype: int64