仅针对某些州使用 ggplot2 绘制美国地图
Plotting US Map with ggplot2 for some states only
我有以下代码:
library(ggplot2)
library(ggsn) # for scale bar `scalebar`
library(fiftystater)
library(tidyverse)
ggplot(data= data.to.work.african, aes(map_id = State_L)) +
ggtitle("African American") +
geom_map(aes(fill = Suicide_Rate_By_Pop), color= "white", map = fifty_states) +
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map() +
geom_text(data = fifty_states %>%
group_by(id) %>%
summarise(lat = mean(c(max(lat), min(lat))),
long = mean(c(max(long), min(long)))) %>%
mutate(State_L = id) %>%
left_join(data.to.work.african, by = "State_L"), size=2,
aes(x = long, y = lat, label = paste(Acronym, Suicide_Rate_By_Pop, sep = '\n'))
) +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
labs(x = "", y = "") +
labs(fill = "Suicides Rate by 100,000 inhabitants")+
scale_fill_gradientn(colours=rev(heat.colors(10)),na.value="grey90",
guide = guide_colourbar(barwidth = 25, barheight = 0.4,
#put legend title on top of legend
title.position = "top")
) +
theme(legend.position = "bottom",
legend.title=element_text(size=10),
legend.text=element_text(size=08))
结果是:
在我的数据集中,我没有所有州的信息,因为地图上的一些州显示了 'NA' 结果,并且没有围绕它自己的区域排列。
我该如何解决这个问题?我想为我的数据集上没有行的州提供州名,例如 MT。
数据
# The data extracted from dput is:
structure(list(Acronym = c("AL", "AK", "AR", "CA", "CO", "CT",
"DE", "DC", "FL", "GA", "HI", "IL", "IN", "IA", "KS", "KY", "LA",
"MD", "MA", "MI", "MS", "MO", "NE", "NV", "NH", "NJ", "NY", "NC",
"OH", "OK", "OR", "PA", "RI", "SC", "TN", "TX", "VA", "WA", "WI"
), State_U = c("Alabama", "Alaska", "Arkansas", "California",
"Colorado", "Connecticut", "Delaware", "District of Columbia",
"Florida", "Georgia", "Hawaii", "Illinois", "Indiana", "Iowa",
"Kansas", "Kentucky", "Louisiana", "Maryland", "Massachusetts",
"Michigan", "Mississippi", "Missouri", "Nebraska", "Nevada",
"New Hampshire", "New Jersey", "New York", "North Carolina",
"Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island",
"South Carolina", "Tennessee", "Texas", "Virginia", "Washington",
"Wisconsin"), State_L = c("alabama", "alaska", "arkansas", "california",
"colorado", "connecticut", "delaware", "district of columbia",
"florida", "georgia", "hawaii", "illinois", "indiana", "iowa",
"kansas", "kentucky", "louisiana", "maryland", "massachusetts",
"michigan", "mississippi", "missouri", "nebraska", "nevada",
"new hampshire", "new jersey", "new york", "north carolina",
"ohio", "oklahoma", "oregon", "pennsylvania", "rhode island",
"south carolina", "tennessee", "texas", "virginia", "washington",
"wisconsin"), Race = c("African American", "African American",
"African American", "African American", "African American", "African American",
"African American", "African American", "African American", "African American",
"African American", "African American", "African American", "African American",
"African American", "African American", "African American", "African American",
"African American", "African American", "African American", "African American",
"African American", "African American", "African American", "African American",
"African American", "African American", "African American", "African American",
"African American", "African American", "African American", "African American",
"African American", "African American", "African American", "African American",
"African American"), Suicide_Rates = c(14L, 1L, 4L, 42L, 3L,
4L, 1L, 1L, 26L, 33L, 2L, 20L, 6L, 1L, 2L, 6L, 21L, 20L, 6L,
23L, 14L, 9L, 2L, 4L, 1L, 9L, 27L, 27L, 14L, 3L, 1L, 24L, 3L,
9L, 13L, 40L, 24L, 7L, 2L), Population = c(4452173L, 627963L,
2678588L, 33987977L, 4326921L, 3411777L, 786373L, 572046L, 16047515L,
8227303L, 1213519L, 12434161L, 6091866L, 2929067L, 2693681L,
4049021L, 4471885L, 5311034L, 6361104L, 9952450L, 2848353L, 5607285L,
1713820L, 2018741L, 1239882L, 8430621L, 19001780L, 8081614L,
11363543L, 3454365L, 3429708L, 12284173L, 1050268L, 4024223L,
5703719L, 20944499L, 7105817L, 5910512L, 5373999L), Suicide_Rate_By_Pop = c(0,
0, 0, 0.124, 0, 0, 0, 0, 0.162, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0.142, 0, 0, 0, 0, 0, 0, 0, 0, 0.191, 0,
NA, 0)), row.names = c(1L, 4L, 11L, 14L, 18L, 22L, 26L, 28L,
30L, 34L, 38L, 44L, 48L, 51L, 54L, 58L, 61L, 65L, 69L, 73L, 80L,
84L, 88L, 92L, 96L, 99L, 106L, 110L, 116L, 119L, 123L, 127L,
131L, 133L, 138L, 142L, 150L, 153L, 158L), class = "data.frame")
# I read it's .csv using
data.to.work.african <- read.csv2("dataSuicideAfrican.csv", sep = ';',
stringsAsFactors=FALSE,
header = TRUE)
问题不在于 ggplot
代码本身,而在于数据。在 ggplot
的 geom_text
部分,标签 return NA
因为你当前的 data.to.work.african
数据只有 39 个状态,而 fifty_states
数据有51. 你可以通过以下方式克服这个问题:
- 使用
setdiff()
识别缺失状态
- 将这些州名称添加到
data.to.work.african
数据集中的 State_L
列
- 使用
state.abb()
函数将州名称转换为首字母缩略词
重新运行您的代码,一切正常!
代码:
# Find missing states
missing_states <- setdiff(unique(fifty_states$id), data.to.work.african$State_L)
#> missing_states
# [1] "arizona" "idaho" "maine" "minnesota" "montana" "new mexico"
# [7] "north dakota" "south dakota" "utah" "vermont" "west virginia" "wyoming"
# add missing states to `data.to.work.african` dataset
currows <- nrow(data.to.work.african) # current number of rows
# add state names
data.to.work.african[(currows+1):(currows + length(missing_states)),"State_L"] <- missing_states
# add acronyms
data.to.work.african[(currows+1):(currows + length(missing_states)),"Acronym"] <- state.abb[match(missing_states,tolower(state.name))]
重新运行您的代码:
请注意,fiftystater
包不适用于当前版本的 R,但可以找到 fifty_states
数据 here on Github
我有以下代码:
library(ggplot2)
library(ggsn) # for scale bar `scalebar`
library(fiftystater)
library(tidyverse)
ggplot(data= data.to.work.african, aes(map_id = State_L)) +
ggtitle("African American") +
geom_map(aes(fill = Suicide_Rate_By_Pop), color= "white", map = fifty_states) +
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map() +
geom_text(data = fifty_states %>%
group_by(id) %>%
summarise(lat = mean(c(max(lat), min(lat))),
long = mean(c(max(long), min(long)))) %>%
mutate(State_L = id) %>%
left_join(data.to.work.african, by = "State_L"), size=2,
aes(x = long, y = lat, label = paste(Acronym, Suicide_Rate_By_Pop, sep = '\n'))
) +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
labs(x = "", y = "") +
labs(fill = "Suicides Rate by 100,000 inhabitants")+
scale_fill_gradientn(colours=rev(heat.colors(10)),na.value="grey90",
guide = guide_colourbar(barwidth = 25, barheight = 0.4,
#put legend title on top of legend
title.position = "top")
) +
theme(legend.position = "bottom",
legend.title=element_text(size=10),
legend.text=element_text(size=08))
结果是:
在我的数据集中,我没有所有州的信息,因为地图上的一些州显示了 'NA' 结果,并且没有围绕它自己的区域排列。
我该如何解决这个问题?我想为我的数据集上没有行的州提供州名,例如 MT。
数据
# The data extracted from dput is:
structure(list(Acronym = c("AL", "AK", "AR", "CA", "CO", "CT",
"DE", "DC", "FL", "GA", "HI", "IL", "IN", "IA", "KS", "KY", "LA",
"MD", "MA", "MI", "MS", "MO", "NE", "NV", "NH", "NJ", "NY", "NC",
"OH", "OK", "OR", "PA", "RI", "SC", "TN", "TX", "VA", "WA", "WI"
), State_U = c("Alabama", "Alaska", "Arkansas", "California",
"Colorado", "Connecticut", "Delaware", "District of Columbia",
"Florida", "Georgia", "Hawaii", "Illinois", "Indiana", "Iowa",
"Kansas", "Kentucky", "Louisiana", "Maryland", "Massachusetts",
"Michigan", "Mississippi", "Missouri", "Nebraska", "Nevada",
"New Hampshire", "New Jersey", "New York", "North Carolina",
"Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island",
"South Carolina", "Tennessee", "Texas", "Virginia", "Washington",
"Wisconsin"), State_L = c("alabama", "alaska", "arkansas", "california",
"colorado", "connecticut", "delaware", "district of columbia",
"florida", "georgia", "hawaii", "illinois", "indiana", "iowa",
"kansas", "kentucky", "louisiana", "maryland", "massachusetts",
"michigan", "mississippi", "missouri", "nebraska", "nevada",
"new hampshire", "new jersey", "new york", "north carolina",
"ohio", "oklahoma", "oregon", "pennsylvania", "rhode island",
"south carolina", "tennessee", "texas", "virginia", "washington",
"wisconsin"), Race = c("African American", "African American",
"African American", "African American", "African American", "African American",
"African American", "African American", "African American", "African American",
"African American", "African American", "African American", "African American",
"African American", "African American", "African American", "African American",
"African American", "African American", "African American", "African American",
"African American", "African American", "African American", "African American",
"African American", "African American", "African American", "African American",
"African American", "African American", "African American", "African American",
"African American", "African American", "African American", "African American",
"African American"), Suicide_Rates = c(14L, 1L, 4L, 42L, 3L,
4L, 1L, 1L, 26L, 33L, 2L, 20L, 6L, 1L, 2L, 6L, 21L, 20L, 6L,
23L, 14L, 9L, 2L, 4L, 1L, 9L, 27L, 27L, 14L, 3L, 1L, 24L, 3L,
9L, 13L, 40L, 24L, 7L, 2L), Population = c(4452173L, 627963L,
2678588L, 33987977L, 4326921L, 3411777L, 786373L, 572046L, 16047515L,
8227303L, 1213519L, 12434161L, 6091866L, 2929067L, 2693681L,
4049021L, 4471885L, 5311034L, 6361104L, 9952450L, 2848353L, 5607285L,
1713820L, 2018741L, 1239882L, 8430621L, 19001780L, 8081614L,
11363543L, 3454365L, 3429708L, 12284173L, 1050268L, 4024223L,
5703719L, 20944499L, 7105817L, 5910512L, 5373999L), Suicide_Rate_By_Pop = c(0,
0, 0, 0.124, 0, 0, 0, 0, 0.162, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0.142, 0, 0, 0, 0, 0, 0, 0, 0, 0.191, 0,
NA, 0)), row.names = c(1L, 4L, 11L, 14L, 18L, 22L, 26L, 28L,
30L, 34L, 38L, 44L, 48L, 51L, 54L, 58L, 61L, 65L, 69L, 73L, 80L,
84L, 88L, 92L, 96L, 99L, 106L, 110L, 116L, 119L, 123L, 127L,
131L, 133L, 138L, 142L, 150L, 153L, 158L), class = "data.frame")
# I read it's .csv using
data.to.work.african <- read.csv2("dataSuicideAfrican.csv", sep = ';',
stringsAsFactors=FALSE,
header = TRUE)
问题不在于 ggplot
代码本身,而在于数据。在 ggplot
的 geom_text
部分,标签 return NA
因为你当前的 data.to.work.african
数据只有 39 个状态,而 fifty_states
数据有51. 你可以通过以下方式克服这个问题:
- 使用
setdiff()
识别缺失状态
- 将这些州名称添加到
data.to.work.african
数据集中的State_L
列 - 使用
state.abb()
函数将州名称转换为首字母缩略词
重新运行您的代码,一切正常!
代码:
# Find missing states
missing_states <- setdiff(unique(fifty_states$id), data.to.work.african$State_L)
#> missing_states
# [1] "arizona" "idaho" "maine" "minnesota" "montana" "new mexico"
# [7] "north dakota" "south dakota" "utah" "vermont" "west virginia" "wyoming"
# add missing states to `data.to.work.african` dataset
currows <- nrow(data.to.work.african) # current number of rows
# add state names
data.to.work.african[(currows+1):(currows + length(missing_states)),"State_L"] <- missing_states
# add acronyms
data.to.work.african[(currows+1):(currows + length(missing_states)),"Acronym"] <- state.abb[match(missing_states,tolower(state.name))]
重新运行您的代码:
请注意,fiftystater
包不适用于当前版本的 R,但可以找到 fifty_states
数据 here on Github