子设置形状文件以绘制区域并解决 ggplot 和数据中的不匹配问题
Sub setting shape file to plot a region and addressing mismatch in ggplot & data
我正在用数据(个案)绘制印度的地区地图。首先,我读取每个地区和地区形状文件的数据。后来我合并了两个上区。使用强化数据我得到了 ggplot。我使用 ggplot 得到了一个漂亮的情节。
##nr1 <- read.csv("../OUTPUT/working_districts_state.csv")
## Creating data frame
DISTRICT <- c("Alappuzha","Ernakulam","Idukki","Kannur","Kasaragod","Kollam","Kottayam","Kozhikode","Malappuram","Palakkad","Pathanamthitta","Thiruvananthapuram","Thrissur","Wayanad")
ST_NM <- rep("Kerala",length(DISTRICT))
reg.cases <- sample(0:250,length(DISTRICT), replace=T)
nr1 <- data.frame(DISTRICT, ST_NM, reg.cases)
india.dist <- readShapeSpatial("./shapes2/2011_Dist.shp")
india.dist@data <- merge(india.dist@data,nr1[,c(1,3)],by="DISTRICT")
india.dist@data$id <- rownames(india.dist@data)
ss.fort <- fortify(india.dist)
ss.fort1 <- merge(ss.fort, india.dist@data, by="id")
d <- ggplot() +
geom_polygon(data = ss.fort1,
aes(x = long, y = lat, group = group, fill=reg.cases)) +
coord_equal()
我有两个问题:
1.我想单独绘制一个状态。我对形状文件进行了子设置,并尝试使用类似的代码进行绘图。当我使用 fortify 命令时出现问题。该命令创建一个 id 列,其中包含从原始数据中提取的 ID,即 india.dist 而不是子文件。因此,当我将 state.fort 与 state@data 合并时,我没有得到正确的合并。还是很迷茫,不知道怎么重置ID。
state <- kr
state@data$id <- rownames(state@data)
state.fort <- fortify(state)
state.fort1 <- merge(state.fort, state@data, by="id")
unique(state.fort[,c(4:7)])
hole piece id group
1 FALSE 1 7 7.1
363 FALSE 1 173 173.1
719 FALSE 1 232 232.1
1384 FALSE 1 277 277.1
1960 TRUE 2 277 277.2
2013 FALSE 1 288 288.1
2561 FALSE 1 314 314.1
3018 FALSE 1 320 320.1
3349 FALSE 1 321 321.1
3753 FALSE 1 358 358.1
4144 FALSE 1 422 422.1
4735 FALSE 1 436 436.1
5204 FALSE 1 567 567.1
5663 FALSE 1 571 571.1
6292 FALSE 1 610 610.1
state@data
DISTRICT ST_NM ST_CEN_CD DT_CEN_CD censuscode reg.cases id
1 Alappuzha Kerala 32 11 598 259 1
2 Ernakulam Kerala 32 8 595 769 2
3 Idukki Kerala 32 9 596 42 3
4 Kannur Kerala 32 2 589 50 4
5 Kasaragod Kerala 32 1 588 38 5
6 Kollam Kerala 32 13 600 381 6
7 Kottayam Kerala 32 10 597 63 7
8 Kozhikode Kerala 32 4 591 237 8
9 Malappuram Kerala 32 5 592 95 9
10 Palakkad Kerala 32 6 593 31 10
11 Pathanamthitta Kerala 32 12 599 30 11
12 Thiruvananthapuram Kerala 32 14 601 97 12
13 Thrissur Kerala 32 7 594 99 13
14 Wayanad Kerala 32 3 590 39 14
- 第二个问题是:我怀疑 ggplot 没有正确地将数据(案例)与地区匹配。它通过给错误的区域提供错误的阴影来使数据不匹配。我不知道如何检查这种不匹配是否没有发生,如果发生了那么如何解决。我认为这个问题还涉及处理形状文件中的 ID。
我很感激任何关于这方面的指导。提前致谢。
这就是你的想法。在交换了所有评论之后,我认为这可能就是您所追求的。如果需要创建所有地图,只需删除lapply()
中的[1:2]
即可。您还想检查 reg.cases
的最小值和最大值。在当前示例中,我有 24 和 240。您的真实数据会有不同的值。调整 scale_fill_gradientn()
中的限制。最后,确保在最后一行中删除 [1:2]
。抱歉,时间太短了,但这是我现在可以为您做的。
library(rgdal)
library(dplyr)
library(ggplot2)
library(ggthemes)
### Import data. I am using Mac here.
india <- readOGR(dsn = ".", layer = "2011_Dist", stringsAsFactors = FALSE)
### Just get data for the district names in nr1.
temp <- subset(india, DISTRICT %in% nr1$DISTRICT)
### Add reg.cases by merging the two
x <- merge(temp, nr1, by = "DISTRICT")
### Get district names for the lapply part.
ana <- x$DISTRICT
### Subset data, get reg.cases value, fortify the SPdataframe,
### add reg.cases to the fortified data frame. Then, draw graphics.
foo <- lapply(ana[1:2], function(y){
temp <- subset(x, x@data$DISTRICT == y)
whatever <- temp@data$reg.cases
temp2 <- fortify(temp) %>%
mutate(hue = whatever)
g <- ggplot() +
geom_map(data = temp2, map = temp2,
aes(x = long, y = lat, map_id = id, group = group,
fill = hue)) +
scale_fill_gradientn(limits = c(24, 240),
colors = c("blue", "red"),
name = "reg.cases") +
coord_map() +
theme_map()
g
})
# If you print all maps, delete [1:2] in the following line.
invisible(mapply(ggsave, file=paste0(ana[1:2], ".png"), plot = foo))
数据
nr1 <- structure(list(DISTRICT = structure(1:14, .Label = c("Alappuzha",
"Ernakulam", "Idukki", "Kannur", "Kasaragod", "Kollam", "Kottayam",
"Kozhikode", "Malappuram", "Palakkad", "Pathanamthitta", "Thiruvananthapuram",
"Thrissur", "Wayanad"), class = "factor"), ST_NM = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Kerala", class = "factor"),
reg.cases = c(54L, 84L, 45L, 137L, 140L, 171L, 152L, 20L,
57L, 184L, 25L, 103L, 94L, 215L)), .Names = c("DISTRICT",
"ST_NM", "reg.cases"), row.names = c(NA, -14L), class = "data.frame")
我正在用数据(个案)绘制印度的地区地图。首先,我读取每个地区和地区形状文件的数据。后来我合并了两个上区。使用强化数据我得到了 ggplot。我使用 ggplot 得到了一个漂亮的情节。
##nr1 <- read.csv("../OUTPUT/working_districts_state.csv")
## Creating data frame
DISTRICT <- c("Alappuzha","Ernakulam","Idukki","Kannur","Kasaragod","Kollam","Kottayam","Kozhikode","Malappuram","Palakkad","Pathanamthitta","Thiruvananthapuram","Thrissur","Wayanad")
ST_NM <- rep("Kerala",length(DISTRICT))
reg.cases <- sample(0:250,length(DISTRICT), replace=T)
nr1 <- data.frame(DISTRICT, ST_NM, reg.cases)
india.dist <- readShapeSpatial("./shapes2/2011_Dist.shp")
india.dist@data <- merge(india.dist@data,nr1[,c(1,3)],by="DISTRICT")
india.dist@data$id <- rownames(india.dist@data)
ss.fort <- fortify(india.dist)
ss.fort1 <- merge(ss.fort, india.dist@data, by="id")
d <- ggplot() +
geom_polygon(data = ss.fort1,
aes(x = long, y = lat, group = group, fill=reg.cases)) +
coord_equal()
我有两个问题: 1.我想单独绘制一个状态。我对形状文件进行了子设置,并尝试使用类似的代码进行绘图。当我使用 fortify 命令时出现问题。该命令创建一个 id 列,其中包含从原始数据中提取的 ID,即 india.dist 而不是子文件。因此,当我将 state.fort 与 state@data 合并时,我没有得到正确的合并。还是很迷茫,不知道怎么重置ID。
state <- kr
state@data$id <- rownames(state@data)
state.fort <- fortify(state)
state.fort1 <- merge(state.fort, state@data, by="id")
unique(state.fort[,c(4:7)])
hole piece id group
1 FALSE 1 7 7.1
363 FALSE 1 173 173.1
719 FALSE 1 232 232.1
1384 FALSE 1 277 277.1
1960 TRUE 2 277 277.2
2013 FALSE 1 288 288.1
2561 FALSE 1 314 314.1
3018 FALSE 1 320 320.1
3349 FALSE 1 321 321.1
3753 FALSE 1 358 358.1
4144 FALSE 1 422 422.1
4735 FALSE 1 436 436.1
5204 FALSE 1 567 567.1
5663 FALSE 1 571 571.1
6292 FALSE 1 610 610.1
state@data
DISTRICT ST_NM ST_CEN_CD DT_CEN_CD censuscode reg.cases id
1 Alappuzha Kerala 32 11 598 259 1
2 Ernakulam Kerala 32 8 595 769 2
3 Idukki Kerala 32 9 596 42 3
4 Kannur Kerala 32 2 589 50 4
5 Kasaragod Kerala 32 1 588 38 5
6 Kollam Kerala 32 13 600 381 6
7 Kottayam Kerala 32 10 597 63 7
8 Kozhikode Kerala 32 4 591 237 8
9 Malappuram Kerala 32 5 592 95 9
10 Palakkad Kerala 32 6 593 31 10
11 Pathanamthitta Kerala 32 12 599 30 11
12 Thiruvananthapuram Kerala 32 14 601 97 12
13 Thrissur Kerala 32 7 594 99 13
14 Wayanad Kerala 32 3 590 39 14
- 第二个问题是:我怀疑 ggplot 没有正确地将数据(案例)与地区匹配。它通过给错误的区域提供错误的阴影来使数据不匹配。我不知道如何检查这种不匹配是否没有发生,如果发生了那么如何解决。我认为这个问题还涉及处理形状文件中的 ID。
我很感激任何关于这方面的指导。提前致谢。
这就是你的想法。在交换了所有评论之后,我认为这可能就是您所追求的。如果需要创建所有地图,只需删除lapply()
中的[1:2]
即可。您还想检查 reg.cases
的最小值和最大值。在当前示例中,我有 24 和 240。您的真实数据会有不同的值。调整 scale_fill_gradientn()
中的限制。最后,确保在最后一行中删除 [1:2]
。抱歉,时间太短了,但这是我现在可以为您做的。
library(rgdal)
library(dplyr)
library(ggplot2)
library(ggthemes)
### Import data. I am using Mac here.
india <- readOGR(dsn = ".", layer = "2011_Dist", stringsAsFactors = FALSE)
### Just get data for the district names in nr1.
temp <- subset(india, DISTRICT %in% nr1$DISTRICT)
### Add reg.cases by merging the two
x <- merge(temp, nr1, by = "DISTRICT")
### Get district names for the lapply part.
ana <- x$DISTRICT
### Subset data, get reg.cases value, fortify the SPdataframe,
### add reg.cases to the fortified data frame. Then, draw graphics.
foo <- lapply(ana[1:2], function(y){
temp <- subset(x, x@data$DISTRICT == y)
whatever <- temp@data$reg.cases
temp2 <- fortify(temp) %>%
mutate(hue = whatever)
g <- ggplot() +
geom_map(data = temp2, map = temp2,
aes(x = long, y = lat, map_id = id, group = group,
fill = hue)) +
scale_fill_gradientn(limits = c(24, 240),
colors = c("blue", "red"),
name = "reg.cases") +
coord_map() +
theme_map()
g
})
# If you print all maps, delete [1:2] in the following line.
invisible(mapply(ggsave, file=paste0(ana[1:2], ".png"), plot = foo))
数据
nr1 <- structure(list(DISTRICT = structure(1:14, .Label = c("Alappuzha",
"Ernakulam", "Idukki", "Kannur", "Kasaragod", "Kollam", "Kottayam",
"Kozhikode", "Malappuram", "Palakkad", "Pathanamthitta", "Thiruvananthapuram",
"Thrissur", "Wayanad"), class = "factor"), ST_NM = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Kerala", class = "factor"),
reg.cases = c(54L, 84L, 45L, 137L, 140L, 171L, 152L, 20L,
57L, 184L, 25L, 103L, 94L, 215L)), .Names = c("DISTRICT",
"ST_NM", "reg.cases"), row.names = c(NA, -14L), class = "data.frame")