gganimate 图中的标签行为问题
Issue with label behavior in gganimate plot
我有以下生成静态图表的 R 脚本
library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)
library(gtable)
library(cowplot)
library(ggrepel)
library(tidyquant)
library(gganimate)
library(gifski)
################################## BASIC SETUP TASKS ###############################
plotDate="May19"
basepath = "C:/Users/your output path here/"
#Countries with testing data reported
#going to leave out Spain because their data is so sporadic
testCountries <- c("ARG", "AUS", "AUT","BEL","BGD","BHR","BGR","BOL","CAN","CHE",
"CHL","COL","CRI","CUB","CZE","DEU","DNK","ECU","EST",
"ETH","FIN","FRA","GBR","GHA","GRC","HKG","HRV","HUN","IDN",
"IND","IRL","IRN","ISL","ISR","ITA","JPN","KAZ","KEN","KOR",
"LTU","LUX","LVA","MAR","MEX","MMR","MYS","NGA","NLD","NOR",
"NPL","NZL","PAK","PAN","PER","PHL","POL","PRT","PRY","ROU",
"RUS","RWA","SEN","SGP","SLV","SRB","SVK","SVN","SWE","THA",
"TUN","TUR","TWN","UGA","URY","USA","VNM","ZAF")
# Get population data
url <- "https://population.un.org/wpp/Download/Files/1_Indicators%20(Standard)/CSV_FILES/WPP2019_TotalPopulationBySex.csv"
pops <- read.csv(url, stringsAsFactors = FALSE, header = TRUE)
pops <- pops %>% filter(Time==2020) %>% select(Location, PopTotal) %>% distinct()
pops$PopTotal <- pops$PopTotal * 1000
names(pops)[1] <- "location"
pops$location <- pops$location %>% str_replace_all("United States of America", "United States")
pops$location <- pops$location %>% str_replace_all("Republic of Korea", "South Korea")
# Get COVID cases, available from:
url <- "https://covid.ourworldindata.org/data/owid-covid-data.csv"
COVtests <- read.csv(url, stringsAsFactors = FALSE, header = TRUE)
COVtests <- COVtests %>% filter(iso_code %in% testCountries) %>%
mutate(Date=as.Date(as.character(date), format="%Y-%m-%d"))
# merge in the population data
COVtests <- COVtests %>% left_join(pops,by='location')
#calculate test rates and test positivity rate
COVtests <- COVtests %>% mutate(PosRate = (total_cases/total_tests) * 100,
TestRate = (total_tests/PopTotal) * 100)
COVtests <- COVtests %>% group_by(location) %>% filter(total_cases > 0) %>%
mutate(maxTestRate = max(TestRate, na.rm=TRUE)) %>% ungroup()
#keeping temporary data frames while testing
plotCOV <- COVtests %>% filter(PopTotal > 20000000, maxTestRate > 0.75) %>%
select(TestRate, PosRate, location, Date)
#France has a 38% positive rate on day 1 that drops precipitously...removing to clean up
plotCOV[which(plotCOV$location=="France" & plotCOV$Date=="2020-02-24"),2] <- NA
plotCOV2 <- plotCOV[complete.cases(plotCOV),]
#endpoint layer
Endpoints <- plotCOV2 %>%
group_by(location) %>%
filter(TestRate == max(TestRate, na.rm=TRUE)) %>%
ungroup()
#if there's a stall you get doubles
Endpoints <- Endpoints %>% group_by(location) %>% filter(Date==max(Date)) %>% ungroup()
g <- plotCOV2 %>%
ggplot(aes(x=TestRate, y=PosRate)) +
geom_line(aes(color=location), show.legend = FALSE) +
labs(x = "Cumulative % of Population Tested",
y = "% of Tests Returning Positive",
title = "Test Positivity Rate (pop > 2,000,000 & > 0.75% pop tested)",
subtitle = paste("Data as of", format(max(plotCOV2$Date), "%A, %B %e, %Y")),
caption = "Data: Our World in Data (https://covid.ourworldindata.org)\n
UN Population Data (https://population.un.org)") +
theme(plot.title = element_text(size = rel(1), face = "bold"),
plot.subtitle = element_text(size = rel(0.7)),
plot.caption = element_text(size = rel(1), lineheight = 0.5))
#add the endpoints and labels - kept out of g so I can animate it later
h <- g + geom_label_repel(data=Endpoints, aes(label=location), show.legend = FALSE) +
geom_point(data = Endpoints,size = 1.5,shape = 21,
aes(color = location,fill = location), show.legend = FALSE)
print(h)
这运行良好并产生以下结果
我现在想做的是动画绘制线条以传达有关数据的时间信息。所以我添加
#I left the endpoint labels out of g - now add geom_text labels
g <- g + geom_text(aes(label = location))
a <- g + transition_reveal(Date)
animate(a,
renderer= gifski_renderer(file = paste(basepath,plotDate,"/animated/positivity.gif",sep=""), loop = FALSE, width = 2000, height = 1500))
这产生了下面的 gif,它很接近,但标签行为一团糟,我不明白
我不知道为什么前面有重复的行进,也不知道为什么动画结束时只剩下一个标签。昨天是美国,今天是南非
其他小问题,我不知道为什么gifski输出如此"narrow"...我尝试了各种宽度和高度参数,试图让它看起来更像静态图并且它结果总是一样的。另外,我想知道如何引用日期,以便我可以将日期与标题中的每个转换相关联。我以为我可以使用 labs(title='Test Positivity Rate: {frame_time}')
来做到这一点,但会引发有关 frame_time 的错误。主要问题是让标签正常运行。
使用 ggplot2
的小 "life tip":如果看起来连接不正确,您可能需要使用 group=
美学。这就是这里发生的事情。 ggplot2
不知道使用什么基础来连接标签,所以很可能它试图按照与预期不同的分组将它们全部连接起来。在这种情况下,解决方法是让 ggplot2
知道您希望这些点保持基于 location
的分组。因此,在此处添加 group=
美学:
g <- g + geom_text(aes(label=location, group=location))
它解决了您的问题:
我有以下生成静态图表的 R 脚本
library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)
library(gtable)
library(cowplot)
library(ggrepel)
library(tidyquant)
library(gganimate)
library(gifski)
################################## BASIC SETUP TASKS ###############################
plotDate="May19"
basepath = "C:/Users/your output path here/"
#Countries with testing data reported
#going to leave out Spain because their data is so sporadic
testCountries <- c("ARG", "AUS", "AUT","BEL","BGD","BHR","BGR","BOL","CAN","CHE",
"CHL","COL","CRI","CUB","CZE","DEU","DNK","ECU","EST",
"ETH","FIN","FRA","GBR","GHA","GRC","HKG","HRV","HUN","IDN",
"IND","IRL","IRN","ISL","ISR","ITA","JPN","KAZ","KEN","KOR",
"LTU","LUX","LVA","MAR","MEX","MMR","MYS","NGA","NLD","NOR",
"NPL","NZL","PAK","PAN","PER","PHL","POL","PRT","PRY","ROU",
"RUS","RWA","SEN","SGP","SLV","SRB","SVK","SVN","SWE","THA",
"TUN","TUR","TWN","UGA","URY","USA","VNM","ZAF")
# Get population data
url <- "https://population.un.org/wpp/Download/Files/1_Indicators%20(Standard)/CSV_FILES/WPP2019_TotalPopulationBySex.csv"
pops <- read.csv(url, stringsAsFactors = FALSE, header = TRUE)
pops <- pops %>% filter(Time==2020) %>% select(Location, PopTotal) %>% distinct()
pops$PopTotal <- pops$PopTotal * 1000
names(pops)[1] <- "location"
pops$location <- pops$location %>% str_replace_all("United States of America", "United States")
pops$location <- pops$location %>% str_replace_all("Republic of Korea", "South Korea")
# Get COVID cases, available from:
url <- "https://covid.ourworldindata.org/data/owid-covid-data.csv"
COVtests <- read.csv(url, stringsAsFactors = FALSE, header = TRUE)
COVtests <- COVtests %>% filter(iso_code %in% testCountries) %>%
mutate(Date=as.Date(as.character(date), format="%Y-%m-%d"))
# merge in the population data
COVtests <- COVtests %>% left_join(pops,by='location')
#calculate test rates and test positivity rate
COVtests <- COVtests %>% mutate(PosRate = (total_cases/total_tests) * 100,
TestRate = (total_tests/PopTotal) * 100)
COVtests <- COVtests %>% group_by(location) %>% filter(total_cases > 0) %>%
mutate(maxTestRate = max(TestRate, na.rm=TRUE)) %>% ungroup()
#keeping temporary data frames while testing
plotCOV <- COVtests %>% filter(PopTotal > 20000000, maxTestRate > 0.75) %>%
select(TestRate, PosRate, location, Date)
#France has a 38% positive rate on day 1 that drops precipitously...removing to clean up
plotCOV[which(plotCOV$location=="France" & plotCOV$Date=="2020-02-24"),2] <- NA
plotCOV2 <- plotCOV[complete.cases(plotCOV),]
#endpoint layer
Endpoints <- plotCOV2 %>%
group_by(location) %>%
filter(TestRate == max(TestRate, na.rm=TRUE)) %>%
ungroup()
#if there's a stall you get doubles
Endpoints <- Endpoints %>% group_by(location) %>% filter(Date==max(Date)) %>% ungroup()
g <- plotCOV2 %>%
ggplot(aes(x=TestRate, y=PosRate)) +
geom_line(aes(color=location), show.legend = FALSE) +
labs(x = "Cumulative % of Population Tested",
y = "% of Tests Returning Positive",
title = "Test Positivity Rate (pop > 2,000,000 & > 0.75% pop tested)",
subtitle = paste("Data as of", format(max(plotCOV2$Date), "%A, %B %e, %Y")),
caption = "Data: Our World in Data (https://covid.ourworldindata.org)\n
UN Population Data (https://population.un.org)") +
theme(plot.title = element_text(size = rel(1), face = "bold"),
plot.subtitle = element_text(size = rel(0.7)),
plot.caption = element_text(size = rel(1), lineheight = 0.5))
#add the endpoints and labels - kept out of g so I can animate it later
h <- g + geom_label_repel(data=Endpoints, aes(label=location), show.legend = FALSE) +
geom_point(data = Endpoints,size = 1.5,shape = 21,
aes(color = location,fill = location), show.legend = FALSE)
print(h)
这运行良好并产生以下结果
我现在想做的是动画绘制线条以传达有关数据的时间信息。所以我添加
#I left the endpoint labels out of g - now add geom_text labels
g <- g + geom_text(aes(label = location))
a <- g + transition_reveal(Date)
animate(a,
renderer= gifski_renderer(file = paste(basepath,plotDate,"/animated/positivity.gif",sep=""), loop = FALSE, width = 2000, height = 1500))
这产生了下面的 gif,它很接近,但标签行为一团糟,我不明白
我不知道为什么前面有重复的行进,也不知道为什么动画结束时只剩下一个标签。昨天是美国,今天是南非
其他小问题,我不知道为什么gifski输出如此"narrow"...我尝试了各种宽度和高度参数,试图让它看起来更像静态图并且它结果总是一样的。另外,我想知道如何引用日期,以便我可以将日期与标题中的每个转换相关联。我以为我可以使用 labs(title='Test Positivity Rate: {frame_time}')
来做到这一点,但会引发有关 frame_time 的错误。主要问题是让标签正常运行。
使用 ggplot2
的小 "life tip":如果看起来连接不正确,您可能需要使用 group=
美学。这就是这里发生的事情。 ggplot2
不知道使用什么基础来连接标签,所以很可能它试图按照与预期不同的分组将它们全部连接起来。在这种情况下,解决方法是让 ggplot2
知道您希望这些点保持基于 location
的分组。因此,在此处添加 group=
美学:
g <- g + geom_text(aes(label=location, group=location))
它解决了您的问题: