仅针对某些州使用 ggplot2 绘制美国地图

Plotting US Map with ggplot2 for some states only

我有以下代码:

library(ggplot2)
library(ggsn) # for scale bar `scalebar`
library(fiftystater)
library(tidyverse)

ggplot(data= data.to.work.african, aes(map_id = State_L)) + 
ggtitle("African American") +
geom_map(aes(fill = Suicide_Rate_By_Pop),  color= "white", map = fifty_states) + 
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map() +
geom_text(data = fifty_states %>%
          group_by(id) %>%
          summarise(lat = mean(c(max(lat), min(lat))),
                    long = mean(c(max(long), min(long)))) %>%
          mutate(State_L = id) %>%
          left_join(data.to.work.african, by = "State_L"),  size=2, 
          aes(x = long, y = lat, label = paste(Acronym, Suicide_Rate_By_Pop, sep = '\n'))
         ) +
scale_x_continuous(breaks = NULL) + 
scale_y_continuous(breaks = NULL) +
labs(x = "", y = "") + 
labs(fill = "Suicides Rate by 100,000 inhabitants")+
  
scale_fill_gradientn(colours=rev(heat.colors(10)),na.value="grey90",
                     guide = guide_colourbar(barwidth = 25, barheight = 0.4,
                                             #put legend title on top of legend
                                             title.position = "top")
                                            ) +
theme(legend.position = "bottom",
      legend.title=element_text(size=10), 
      legend.text=element_text(size=08))

结果是:

在我的数据集中,我没有所有州的信息,因为地图上的一些州显示了 'NA' 结果,并且没有围绕它自己的区域排列。

我该如何解决这个问题?我想为我的数据集上没有行的州提供州名,例如 MT。

数据

# The data extracted from dput is:
structure(list(Acronym = c("AL", "AK", "AR", "CA", "CO", "CT", 
"DE", "DC", "FL", "GA", "HI", "IL", "IN", "IA", "KS", "KY", "LA", 
"MD", "MA", "MI", "MS", "MO", "NE", "NV", "NH", "NJ", "NY", "NC", 
"OH", "OK", "OR", "PA", "RI", "SC", "TN", "TX", "VA", "WA", "WI"
), State_U = c("Alabama", "Alaska", "Arkansas", "California", 
"Colorado", "Connecticut", "Delaware", "District of Columbia", 
"Florida", "Georgia", "Hawaii", "Illinois", "Indiana", "Iowa", 
"Kansas", "Kentucky", "Louisiana", "Maryland", "Massachusetts", 
"Michigan", "Mississippi", "Missouri", "Nebraska", "Nevada", 
"New Hampshire", "New Jersey", "New York", "North Carolina", 
"Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", 
"South Carolina", "Tennessee", "Texas", "Virginia", "Washington", 
"Wisconsin"), State_L = c("alabama", "alaska", "arkansas", "california", 
"colorado", "connecticut", "delaware", "district of columbia", 
"florida", "georgia", "hawaii", "illinois", "indiana", "iowa", 
"kansas", "kentucky", "louisiana", "maryland", "massachusetts", 
"michigan", "mississippi", "missouri", "nebraska", "nevada", 
"new hampshire", "new jersey", "new york", "north carolina", 
"ohio", "oklahoma", "oregon", "pennsylvania", "rhode island", 
"south carolina", "tennessee", "texas", "virginia", "washington", 
"wisconsin"), Race = c("African American", "African American", 
"African American", "African American", "African American", "African American", 
"African American", "African American", "African American", "African American", 
"African American", "African American", "African American", "African American", 
"African American", "African American", "African American", "African American", 
"African American", "African American", "African American", "African American", 
"African American", "African American", "African American", "African American", 
"African American", "African American", "African American", "African American", 
"African American", "African American", "African American", "African American", 
"African American", "African American", "African American", "African American", 
"African American"), Suicide_Rates = c(14L, 1L, 4L, 42L, 3L, 
4L, 1L, 1L, 26L, 33L, 2L, 20L, 6L, 1L, 2L, 6L, 21L, 20L, 6L, 
23L, 14L, 9L, 2L, 4L, 1L, 9L, 27L, 27L, 14L, 3L, 1L, 24L, 3L, 
9L, 13L, 40L, 24L, 7L, 2L), Population = c(4452173L, 627963L, 
2678588L, 33987977L, 4326921L, 3411777L, 786373L, 572046L, 16047515L, 
8227303L, 1213519L, 12434161L, 6091866L, 2929067L, 2693681L, 
4049021L, 4471885L, 5311034L, 6361104L, 9952450L, 2848353L, 5607285L, 
1713820L, 2018741L, 1239882L, 8430621L, 19001780L, 8081614L, 
11363543L, 3454365L, 3429708L, 12284173L, 1050268L, 4024223L, 
5703719L, 20944499L, 7105817L, 5910512L, 5373999L), Suicide_Rate_By_Pop = c(0, 
0, 0, 0.124, 0, 0, 0, 0, 0.162, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0.142, 0, 0, 0, 0, 0, 0, 0, 0, 0.191, 0, 
NA, 0)), row.names = c(1L, 4L, 11L, 14L, 18L, 22L, 26L, 28L, 
30L, 34L, 38L, 44L, 48L, 51L, 54L, 58L, 61L, 65L, 69L, 73L, 80L, 
84L, 88L, 92L, 96L, 99L, 106L, 110L, 116L, 119L, 123L, 127L, 
131L, 133L, 138L, 142L, 150L, 153L, 158L), class = "data.frame")

#  I read it's .csv using
data.to.work.african  <- read.csv2("dataSuicideAfrican.csv", sep = ';', 
                                stringsAsFactors=FALSE,  
                                header = TRUE)

问题不在于 ggplot 代码本身,而在于数据。在 ggplotgeom_text 部分,标签 return NA 因为你当前的 data.to.work.african 数据只有 39 个状态,而 fifty_states 数据有51. 你可以通过以下方式克服这个问题:

  1. 使用 setdiff()
  2. 识别缺失状态
  3. 将这些州名称添加到 data.to.work.african 数据集中的 State_L
  4. 使用 state.abb() 函数将州名称转换为首字母缩略词

重新运行您的代码,一切正常!

代码:

# Find missing states
missing_states <- setdiff(unique(fifty_states$id), data.to.work.african$State_L)

#> missing_states
# [1] "arizona"       "idaho"         "maine"         "minnesota"     "montana"       "new mexico"   
# [7] "north dakota"  "south dakota"  "utah"          "vermont"       "west virginia" "wyoming"  

# add missing states to `data.to.work.african` dataset
currows <- nrow(data.to.work.african) # current number of rows

# add state names
data.to.work.african[(currows+1):(currows + length(missing_states)),"State_L"] <- missing_states

# add acronyms
data.to.work.african[(currows+1):(currows + length(missing_states)),"Acronym"] <- state.abb[match(missing_states,tolower(state.name))]

重新运行您的代码:

请注意,fiftystater 包不适用于当前版本的 R,但可以找到 fifty_states 数据 here on Github