为什么 geom_sf 不允许使用数据框中的离散列进行填充？

Question

我使用 geom_sf 绘制了一个空间地图，但是当我的 fill 参数在 aes 内部时，它一直在为我的 fill 参数选择一个连续的比例尺，但是当我将它取出时并填写 scale_fill_manual 它也不起作用，它覆盖了我的手动颜色并且图例不显示。我尝试在 aes 层 as.factors 中传递 fill 但这会导致错误：

Error: Discrete value supplied to continuous scale

但这些值是谨慎的！所以我不得不把它变成数字。可在此处找到可重现的示例和数据文件 new_file.csv：

https://github.com/THsTestingGround/SO_question_fill_map/blob/master/new_file.csv

代码：

options(scipen = 9999,tigris_use_cache = TRUE)
library(sf)
library(tidyverse)
library(tidycensus)
library(RCurl)
library(tigris)

#Took out my census api key because of a feed back from a SO member. Please add a comment
#if you would like my census key.

url <- getURL("https://raw.githubusercontent.com/THsTestingGround/SO_question_fill_map/master/new_file.csv")

#read the csv file
gainsville_df <- read_csv(url) #store the csv file content from my github link

#get the population geomtry shapefiles
alachua <- tidycensus::get_acs(state = "FL", county = "Alachua", 
                               geography = "tract", geometry = T,
                               variables = c("B01003_001", year = 2018))

#insert the geometry shapefile
gainsville_df$Geomtry <- alachua$geometry[match(gainsville_df$`Geo ID`, alachua$GEOID)]

#plot
ggplot2::ggplot() + 
      #geom_sf(data = gainsville_df, aes(geometry= Geomtry,fill= as.numeric(`Cluster Group`)), alpha= 0.2) + #aes() fill OK
      geom_sf(data = gainsville_df, aes(geometry= Geomtry), alpha= 0.2,fill = gainsville_df$`Cluster Group`) +
      coord_sf(crs = "+init=epsg:4326")+
      #scale_fill_gradientn(colours= rev(RColorBrewer::brewer.pal(6,"Set3")), name= "Cluster")+ #fills gradient OK
      theme_bw()+
      scale_fill_manual(values = c("red", "grey", "seagreen3","gold", "green","orange"), name= "Cluster Group")+ #gets overridden no matter where we put it
      theme(legend.position = "right") #doesn't show up when fill in color manually

1) 下面是填充 aes 参数的样子。我想将填充颜色转换为离散颜色：

2) 这是我将填充移出 aes 函数时图形的样子。手动填充被覆盖：

我想要低调的自定义颜色。

请注意，由于最大限制，SO 限制了我的 dput 个字符，因此我只能在此处提供前 10 行，您可以在回购中使用 CSV 文件，因为您将拥有来自Cluster Group 列。我已经给出了我用来做这个例子的一切。

#here is chunk of dput from the data right before I ggplot it

> dput(gainsville_df[1:10, ])
structure(list(GID = c(12001000500, 12001000200, 12001000500, 
12001001902, 12001001202, 12001001202, 12001001100, 12001000302, 
12001000500, 12001001100), Tract = c(500, 200, 500, 1902, 1202, 
1202, 1100, 302, 500, 1100), Population = c(5171, 6671, 5171, 
3192, 7309, 7309, 7143, 2343, 5171, 7143), ClusterGroup = c(6, 
5, 6, 1, 3, 3, 3, 1, 6, 3), Geomtry = structure(list(structure(list(
    list(structure(c(-82.33082, -82.326592, -82.326618, -82.326591, 
    -82.325846, -82.323109, -82.323086, -82.310256, -82.30183, 
    -82.306576, -82.311469, -82.311505, -82.315105, -82.317746, 
    -82.318469, -82.325033, -82.326534, -82.326582, -82.330837, 
    -82.33082, 29.653382, 29.653325, 29.658964, 29.659237, 29.659193, 
    29.659231, 29.666604, 29.666546, 29.666511, 29.659338, 29.651985, 
    29.651741, 29.647026, 29.645798, 29.645586, 29.644474, 29.644219, 
    29.650285, 29.650274, 29.653382), .Dim = c(20L, 2L)))), class = c("XY", 
"MULTIPOLYGON", "sfg")), structure(list(list(structure(c(-82.339354, 
-82.339375, -82.339373, -82.33928, -82.33926, -82.339208, -82.337143, 
-82.33295, -82.328297, -82.326591, -82.326618, -82.326592, -82.33082, 
-82.330837, -82.326582, -82.326534, -82.333655, -82.337821, -82.339384, 
-82.339354, 29.644897, 29.648466, 29.652056, 29.653917, 29.655723, 
29.659186, 29.659396, 29.659426, 29.65944, 29.659237, 29.658964, 
29.653325, 29.653382, 29.650274, 29.650285, 29.644219, 29.642994, 
29.642023, 29.640976, 29.644897), .Dim = c(20L, 2L)))), class = c("XY", 
"MULTIPOLYGON", "sfg")), structure(list(list(structure(c(-82.33082, 
-82.326592, -82.326618, -82.326591, -82.325846, -82.323109, -82.323086, 
-82.310256, -82.30183, -82.306576, -82.311469, -82.311505, -82.315105, 
-82.317746, -82.318469, -82.325033, -82.326534, -82.326582, -82.330837, 
-82.33082, 29.653382, 29.653325, 29.658964, 29.659237, 29.659193, 
29.659231, 29.666604, 29.666546, 29.666511, 29.659338, 29.651985, 
29.651741, 29.647026, 29.645798, 29.645586, 29.644474, 29.644219, 
29.650285, 29.650274, 29.653382), .Dim = c(20L, 2L)))), class = c("XY", 
"MULTIPOLYGON", "sfg")), structure(list(list(structure(c(-82.343091, 
-82.322426, -82.305774, -82.28828, -82.28496, -82.279257, -82.277492, 
-82.274088, -82.264291, -82.255698, -82.255733, -82.239226, -82.241055, 
-82.242922, -82.247494, -82.248053, -82.254454, -82.255336, -82.256147, 
-82.257945, -82.258194, -82.265867, -82.268733, -82.286494, -82.291313, 
-82.293869, -82.291049, -82.29141, -82.289125, -82.289154, -82.289147, 
-82.302225, -82.30183, -82.296868, -82.289284, -82.295816, -82.299939, 
-82.305789, -82.319368, -82.325905, -82.338875, -82.343091, 29.703215, 
29.703182, 29.702969, 29.703034, 29.703027, 29.703026, 29.70244, 
29.707463, 29.707526, 29.707465, 29.688234, 29.68786, 29.687374, 
29.686101, 29.681811, 29.681287, 29.675263, 29.674573, 29.674123, 
29.673631, 29.673605, 29.673687, 29.674424, 29.683442, 29.676539, 
29.673816, 29.672477, 29.670273, 29.669202, 29.666451, 29.665342, 
29.665245, 29.666511, 29.673817, 29.685025, 29.687759, 29.688317, 
29.688349, 29.688417, 29.68846, 29.699636, 29.703215), .Dim = c(42L, 
2L)))), class = c("XY", "MULTIPOLYGON", "sfg")), structure(list(
    list(structure(c(-82.37232, -82.372184, -82.371557, -82.369737, 
    -82.367884, -82.363764, -82.362359, -82.361649, -82.359822, 
    -82.356055, -82.354722, -82.354178, -82.353686, -82.353577, 
    -82.343091, -82.347301, -82.347267, -82.351368, -82.351484, 
    -82.347376, -82.347435, -82.351523, -82.351553, -82.352019, 
    -82.353638, -82.368944, -82.369654, -82.370429, -82.371798, 
    -82.372325, -82.37232, 29.688692, 29.697181, 29.698842, 29.700623, 
    29.701505, 29.703331, 29.703949, 29.704283, 29.705076, 29.706956, 
    29.708439, 29.709796, 29.711902, 29.712234, 29.703215, 29.703272, 
    29.695877, 29.695848, 29.688519, 29.688553, 29.685718, 29.68576, 
    29.681337, 29.681292, 29.681271, 29.68144, 29.681661, 29.682495, 
    29.686602, 29.686819, 29.688692), .Dim = c(31L, 2L)))), class = c("XY", 
"MULTIPOLYGON", "sfg")), structure(list(list(structure(c(-82.37232, 
-82.372184, -82.371557, -82.369737, -82.367884, -82.363764, -82.362359, 
-82.361649, -82.359822, -82.356055, -82.354722, -82.354178, -82.353686, 
-82.353577, -82.343091, -82.347301, -82.347267, -82.351368, -82.351484, 
-82.347376, -82.347435, -82.351523, -82.351553, -82.352019, -82.353638, 
-82.368944, -82.369654, -82.370429, -82.371798, -82.372325, -82.37232, 
29.688692, 29.697181, 29.698842, 29.700623, 29.701505, 29.703331, 
29.703949, 29.704283, 29.705076, 29.706956, 29.708439, 29.709796, 
29.711902, 29.712234, 29.703215, 29.703272, 29.695877, 29.695848, 
29.688519, 29.688553, 29.685718, 29.68576, 29.681337, 29.681292, 
29.681271, 29.68144, 29.681661, 29.682495, 29.686602, 29.686819, 
29.688692), .Dim = c(31L, 2L)))), class = c("XY", "MULTIPOLYGON", 
"sfg")), structure(list(list(structure(c(-82.388979, -82.38896, 
-82.388965, -82.38893, -82.37232, -82.372325, -82.371798, -82.370429, 
-82.369654, -82.368944, -82.353638, -82.352019, -82.351152, -82.346231, 
-82.343221, -82.339192, -82.339216, -82.343244, -82.352908, -82.355771, 
-82.372373, -82.373171, -82.388981, -82.388979, 29.675476, 29.679565, 
29.682268, 29.688733, 29.688692, 29.686819, 29.686602, 29.682495, 
29.681661, 29.68144, 29.681271, 29.681292, 29.680782, 29.674448, 
29.674003, 29.673968, 29.666678, 29.666711, 29.666737, 29.666741, 
29.66677, 29.666905, 29.674086, 29.675476), .Dim = c(24L, 2L)))), class = c("XY", 
"MULTIPOLYGON", "sfg")), structure(list(list(structure(c(-82.339164, 
-82.339091, -82.338972, -82.338875, -82.325905, -82.319368, -82.319369, 
-82.319818, -82.321558, -82.330801, -82.339192, -82.339164, 29.679131, 
29.688544, 29.698982, 29.699636, 29.68846, 29.688417, 29.679087, 
29.6771, 29.673888, 29.673934, 29.673968, 29.679131), .Dim = c(12L, 
2L)))), class = c("XY", "MULTIPOLYGON", "sfg")), structure(list(
    list(structure(c(-82.33082, -82.326592, -82.326618, -82.326591, 
    -82.325846, -82.323109, -82.323086, -82.310256, -82.30183, 
    -82.306576, -82.311469, -82.311505, -82.315105, -82.317746, 
    -82.318469, -82.325033, -82.326534, -82.326582, -82.330837, 
    -82.33082, 29.653382, 29.653325, 29.658964, 29.659237, 29.659193, 
    29.659231, 29.666604, 29.666546, 29.666511, 29.659338, 29.651985, 
    29.651741, 29.647026, 29.645798, 29.645586, 29.644474, 29.644219, 
    29.650285, 29.650274, 29.653382), .Dim = c(20L, 2L)))), class = c("XY", 
"MULTIPOLYGON", "sfg")), structure(list(list(structure(c(-82.388979, 
-82.38896, -82.388965, -82.38893, -82.37232, -82.372325, -82.371798, 
-82.370429, -82.369654, -82.368944, -82.353638, -82.352019, -82.351152, 
-82.346231, -82.343221, -82.339192, -82.339216, -82.343244, -82.352908, 
-82.355771, -82.372373, -82.373171, -82.388981, -82.388979, 29.675476, 
29.679565, 29.682268, 29.688733, 29.688692, 29.686819, 29.686602, 
29.682495, 29.681661, 29.68144, 29.681271, 29.681292, 29.680782, 
29.674448, 29.674003, 29.673968, 29.666678, 29.666711, 29.666737, 
29.666741, 29.66677, 29.666905, 29.674086, 29.675476), .Dim = c(24L, 
2L)))), class = c("XY", "MULTIPOLYGON", "sfg"))), class = c("sfc_MULTIPOLYGON", 
"sfc"), precision = 0, bbox = structure(c(xmin = -82.388981, 
ymin = 29.640976, xmax = -82.239226, ymax = 29.712234), class = "bbox"), crs = structure(list(
    epsg = 4269L, proj4string = "+proj=longlat +datum=NAD83 +no_defs"), class = "crs"), n_empty = 0L)), row.names = c(NA, 
-10L), class = c("tbl_df", "tbl", "data.frame"))

Answer 1

使用您的代码，我在尝试生成 geom_sf 时不断出错。因此，我不想将 Geometry 添加到您的初始数据集，而是采用相反的方式将您的数据集与 "alachua" 几何对象合并（我使用了 dplyr 中的 left_join 函数这样做）。我还在您的初始数据集中重命名您的 colnames 并将您的 GEOID 转换为字符向量以进行匹配。

然后，我删除所有不匹配的缺失值，并将以下代码传递给 geom_sf

colnames(gainsville_df) <- c("GEOID", "Tract","Population","ClusterGroup")
gainsville_df$GEOID <- as.character(gainsville_df$GEOID)

library(dplyr)
DF <- left_join(alachua, gainsville_df, by = "GEOID")

library(ggplot2)
library(sf)
ggplot()+
  geom_sf(data = subset(DF, !is.na(ClusterGroup)), aes(fill = factor(ClusterGroup)))+
  theme_bw()+
  scale_fill_manual(values = c("red", "grey", "seagreen3","gold", "green","orange"), name= "Cluster Group")+ 
  theme(legend.position = "right")

它能回答您的问题吗？

顺便说一句：alpha 值在这里效率不高，因为每个簇都有多次（见下文），所以您只是在每个簇上叠加颜色，隐藏了透明效果（例如群集 1，您叠加了 2455 次相同的红色）。我认为您需要先清理初始数据集，以便为每个区域获得一个聚类值。

> summary(as.factor(DF$ClusterGroup))
   1    2    3    4    5    6 NA's 
2455 1344 1783  269  820 1741   29

为什么 geom_sf 不允许使用数据框中的离散列进行填充？

Why does geom_sf is not allowing fill with discrete column from a dataframe?

r

geospatial

ggplot2

sf

tidycensus