使用 ggplot2 在 3 个子图中绘制时间序列数据的原始值、mom 和 yoy 变化
Plot original value, mom and yoy change for time series data in 3 subplots using ggplot2
给定来自 this link 的两个月度时间序列数据样本。
我需要创建一个包含 3 个子图的图:plot1 用于原始值,plot2 用于逐月变化,plot3 用于逐年变化。
我可以用下面的代码画图,但是代码太冗余了。所以我的问题是如何以简洁的方式实现这一目标?谢谢。
library(xlsx)
library(ggplot2)
library(reshape)
library(dplyr)
library(tidyverse)
library(lubridate)
library(cowplot)
library(patchwork)
df <- read.xlsx('./sample_data.xlsx', 'Sheet1')
colnames(df)
# df
cols <- c('food_index', 'energy_index')
df <- df %>% mutate(date=as.Date(date)) %>%
mutate(across(-contains('date'), as.numeric)) %>%
mutate(date= floor_date(date, 'month')) %>%
group_by(date) %>%
summarise_at(vars(cols), funs(mean(., na.rm=TRUE))) %>%
mutate(across(cols, list(yoy = ~(. - lag(., 12))/lag(., 12)))*100) %>%
mutate(across(cols, list(mom = ~(. - lag(., 1))/lag(., 1)))*100) %>%
filter(date >= '2018-01-01' & date <= '2021-12-31') %>%
as.data.frame()
df1 <- df %>%
select(!grep('mom|yoy', names(df)))
df1_long <- melt(df1, id.vars = 'date')
plot1 <- ggplot(df1_long[!is.na(df1_long$value), ],
aes(x = date,
y = value,
col = variable)) +
geom_line(size=0.6, alpha=0.5) +
geom_point(size=1, alpha=0.8) +
labs(
x='',
y='Unit: $'
)
# MoM changes
df2 <- df %>%
select(grep('date|mom', names(df)))
df2_long <- melt(df2, id.vars = 'date')
plot2 <- ggplot(df2_long[!is.na(df2_long$value), ],
aes(x = date,
y = value,
col = variable)) +
geom_line(size=0.6, alpha=0.5) +
geom_point(size=1, alpha=0.8) +
labs(
x='',
y='Unit: %'
)
# YoY changes
df3 <- df %>%
select(grep('date|yoy', names(df)))
df3_long <- melt(df3, id.vars = 'date')
plot3 <- ggplot(df3_long[!is.na(df3_long$value), ],
aes(x = date,
y = value,
col = variable)) +
geom_line(size=0.6, alpha=0.5) +
geom_point(size=1, alpha=0.8) +
labs(
x='',
y='Unit: %'
)
plot <- plot1 + plot2 + plot3 + plot_layout(ncol=1)
# plot <- plot_grid(plot1, plot2, plot3, labels = c('Value', 'MoM', 'YoY'), label_size = 12)
plot
输出:
预期结果将类似于下图(上图显示原始数据,中间图显示mom变化数据,下图显示yoy变化数据):
参考文献:
https://waterdata.usgs.gov/blog/beyond-basic-plotting/
Side-by-side plots with ggplot2
也许这就是您要找的?通过使用绘图函数将数据重塑为正确的形状,例如purrr::map2
您无需像这样重复代码就可以达到您想要的结果。
使用一些虚假的随机示例数据来模仿您的真实数据:
library(tidyr)
library(dplyr)
library(ggplot2)
df_long <- df |>
rename(food_index_raw = food_index, energy_index_raw = energy_index) |>
pivot_longer(-date, names_to = c("variable", ".value"), names_pattern = "^(.*?_index)_(.*)$")
plot_fun <- function(x, y, ylab) {
x <- x |>
select(date, variable, value = .data[[y]]) |>
filter(!is.na(value))
ggplot(
x,
aes(
x = date,
y = value,
col = variable
)
) +
geom_line(size = 0.6, alpha = 0.5) +
geom_point(size = 1, alpha = 0.8) +
labs(
x = "",
y = ylab
)
}
yvars <- c("raw", "mom", "yoy")
ylabs <- paste0("Unit: ", c("$", "%", "%"))
plots <- purrr::map2(yvars, ylabs, plot_fun, x = df_long)
library(patchwork)
wrap_plots(plots) + plot_layout(ncol = 1)
数据
set.seed(123)
date <- seq.POSIXt(as.POSIXct("2017-01-31"), as.POSIXct("2022-12-31"), by = "month")
food_index <- runif(length(date))
energy_index <- runif(length(date))
df <- data.frame(date, food_index, energy_index)
EDIT 在使用 patchwork
时为每个情节添加字幕(目前)有点棘手。在这种情况下我会做的是使用分面“hack”。为此,我稍微调整了函数以获取字幕参数并切换为 purrr::pmap
:
library(tidyr)
library(dplyr)
library(ggplot2)
df_long <- df |>
rename(food_index_raw = food_index, energy_index_raw = energy_index) |>
pivot_longer(-date, names_to = c("variable", ".value"), names_pattern = "^(.*?_index)_(.*)$")
plot_fun <- function(x, y, ylab, subtitle) {
x <- x |>
select(date, variable, value = .data[[y]]) |>
filter(!is.na(value))
ggplot(
x,
aes(
x = date,
y = value,
col = variable
)
) +
geom_line(size = 0.6, alpha = 0.5) +
geom_point(size = 1, alpha = 0.8) +
facet_wrap(~.env$subtitle) +
labs(
x = "",
y = ylab
) +
theme(strip.background = element_blank(), strip.text.x = element_text(hjust = 0))
}
yvars <- c("raw", "mom", "yoy")
ylabs <- paste0("Unit: ", c("$", "%", "%"))
subtitle <- c("Original", "Month-to-Month", "Year-to-Year")
plots <- purrr::pmap(list(y = yvars, ylab = ylabs, subtitle = subtitle), plot_fun, x = df_long)
library(patchwork)
wrap_plots(plots) + plot_layout(ncol = 1)
目标输出是用构面完成的,而不是将图拼接在一起。如果你愿意,你也可以这样做,但它需要以不同的方式重塑你的数据。您采用哪种方法真的是一个品味问题。
library(ggplot2)
library(dplyr)
yoy <- function(x) 100 * (x - lag(x, 13)) / lag(x, 12)
mom <- function(x) 100 * (x - lag(x)) / lag(x)
df %>%
mutate(date = as.Date(date, origin = "1899-12-30"),
`Actual value (Dollars).Food Index` = food_index,
`Month-on-month change (%).Food Index` = mom(food_index),
`Year-on-year change (%).Food Index` = yoy(food_index),
`Actual value (Dollars).Energy Index` = energy_index,
`Month-on-month change (%).Energy Index` = mom(energy_index),
`Year-on-year change (%).Energy Index` = yoy(energy_index)) %>%
select(-food_index, -energy_index) %>%
tidyr::pivot_longer(-1) %>%
filter(date > as.Date("2018-01-01")) %>%
tidyr::separate(name, into = c("series", "index"), sep = "\.") %>%
ggplot(aes(date, value, color = index)) +
geom_point(na.rm = TRUE) +
geom_line() +
facet_grid(series~., scales = "free_y") +
theme_bw(base_size = 16)
从有问题的 link 中获取的可重现数据
df <- structure(list(date = c(42766, 42794, 42825, 42855, 42886, 42916,
42947, 42978, 43008, 43039, 43069, 43100, 43131, 43159, 43190,
43220, 43251, 43281, 43312, 43343, 43373, 43404, 43434, 43465,
43496, 43524, 43555, 43585, 43616, 43646, 43677, 43708, 43738,
43769, 43799, 43830, 43861, 43890, 43921, 43951, 43982, 44012,
44043, 44074, 44104, 44135, 44165, 44196, 44227, 44255, 44286,
44316, 44347, 44377, 44408, 44439, 44469, 44500, 44530, 44561
), food_index = c(58.53, 61.23, 55.32, 55.34, 61.73, 56.91, 54.27,
59.08, 60.11, 66.01, 60.11, 63.41, 69.8, 72.45, 81.11, 89.64,
88.64, 88.62, 98.27, 111.11, 129.39, 140.14, 143.44, 169.21,
177.39, 163.88, 135.07, 151.28, 172.81, 143.82, 162.13, 172.22,
176.67, 179.3, 157.27, 169.12, 192.51, 194.2, 179.4, 169.1, 193.17,
174.92, 181.92, 188.41, 192.14, 203.41, 194.19, 174.3, 174.86,
182.33, 182.82, 185.36, 192.41, 195.59, 202.6, 201.51, 225.01,
243.78, 270.67, 304.57), energy_index = c(127.36, 119.87, 120.96,
112.09, 112.19, 109.24, 109.56, 106.89, 109.35, 108.35, 112.39,
117.77, 119.52, 122.24, 120.91, 125.41, 129.72, 135.25, 139.33,
148.6, 169.62, 184.23, 204.38, 198.55, 189.29, 202.47, 220.23,
240.67, 263.12, 249.74, 240.84, 243.42, 261.2, 256.76, 258.69,
277.98, 289.63, 293.46, 310.81, 318.68, 310.04, 302.17, 298.62,
260.92, 269.29, 258.84, 241.68, 224.18, 216.36, 226.57, 235.98,
253.86, 267.37, 261.99, 273.37, 280.91, 291.84, 297.88, 292.78,
289.79)), row.names = c(NA, 60L), class = "data.frame")
给定来自 this link 的两个月度时间序列数据样本。
我需要创建一个包含 3 个子图的图:plot1 用于原始值,plot2 用于逐月变化,plot3 用于逐年变化。
我可以用下面的代码画图,但是代码太冗余了。所以我的问题是如何以简洁的方式实现这一目标?谢谢。
library(xlsx)
library(ggplot2)
library(reshape)
library(dplyr)
library(tidyverse)
library(lubridate)
library(cowplot)
library(patchwork)
df <- read.xlsx('./sample_data.xlsx', 'Sheet1')
colnames(df)
# df
cols <- c('food_index', 'energy_index')
df <- df %>% mutate(date=as.Date(date)) %>%
mutate(across(-contains('date'), as.numeric)) %>%
mutate(date= floor_date(date, 'month')) %>%
group_by(date) %>%
summarise_at(vars(cols), funs(mean(., na.rm=TRUE))) %>%
mutate(across(cols, list(yoy = ~(. - lag(., 12))/lag(., 12)))*100) %>%
mutate(across(cols, list(mom = ~(. - lag(., 1))/lag(., 1)))*100) %>%
filter(date >= '2018-01-01' & date <= '2021-12-31') %>%
as.data.frame()
df1 <- df %>%
select(!grep('mom|yoy', names(df)))
df1_long <- melt(df1, id.vars = 'date')
plot1 <- ggplot(df1_long[!is.na(df1_long$value), ],
aes(x = date,
y = value,
col = variable)) +
geom_line(size=0.6, alpha=0.5) +
geom_point(size=1, alpha=0.8) +
labs(
x='',
y='Unit: $'
)
# MoM changes
df2 <- df %>%
select(grep('date|mom', names(df)))
df2_long <- melt(df2, id.vars = 'date')
plot2 <- ggplot(df2_long[!is.na(df2_long$value), ],
aes(x = date,
y = value,
col = variable)) +
geom_line(size=0.6, alpha=0.5) +
geom_point(size=1, alpha=0.8) +
labs(
x='',
y='Unit: %'
)
# YoY changes
df3 <- df %>%
select(grep('date|yoy', names(df)))
df3_long <- melt(df3, id.vars = 'date')
plot3 <- ggplot(df3_long[!is.na(df3_long$value), ],
aes(x = date,
y = value,
col = variable)) +
geom_line(size=0.6, alpha=0.5) +
geom_point(size=1, alpha=0.8) +
labs(
x='',
y='Unit: %'
)
plot <- plot1 + plot2 + plot3 + plot_layout(ncol=1)
# plot <- plot_grid(plot1, plot2, plot3, labels = c('Value', 'MoM', 'YoY'), label_size = 12)
plot
输出:
预期结果将类似于下图(上图显示原始数据,中间图显示mom变化数据,下图显示yoy变化数据):
参考文献:
https://waterdata.usgs.gov/blog/beyond-basic-plotting/
Side-by-side plots with ggplot2
也许这就是您要找的?通过使用绘图函数将数据重塑为正确的形状,例如purrr::map2
您无需像这样重复代码就可以达到您想要的结果。
使用一些虚假的随机示例数据来模仿您的真实数据:
library(tidyr)
library(dplyr)
library(ggplot2)
df_long <- df |>
rename(food_index_raw = food_index, energy_index_raw = energy_index) |>
pivot_longer(-date, names_to = c("variable", ".value"), names_pattern = "^(.*?_index)_(.*)$")
plot_fun <- function(x, y, ylab) {
x <- x |>
select(date, variable, value = .data[[y]]) |>
filter(!is.na(value))
ggplot(
x,
aes(
x = date,
y = value,
col = variable
)
) +
geom_line(size = 0.6, alpha = 0.5) +
geom_point(size = 1, alpha = 0.8) +
labs(
x = "",
y = ylab
)
}
yvars <- c("raw", "mom", "yoy")
ylabs <- paste0("Unit: ", c("$", "%", "%"))
plots <- purrr::map2(yvars, ylabs, plot_fun, x = df_long)
library(patchwork)
wrap_plots(plots) + plot_layout(ncol = 1)
数据
set.seed(123)
date <- seq.POSIXt(as.POSIXct("2017-01-31"), as.POSIXct("2022-12-31"), by = "month")
food_index <- runif(length(date))
energy_index <- runif(length(date))
df <- data.frame(date, food_index, energy_index)
EDIT 在使用 patchwork
时为每个情节添加字幕(目前)有点棘手。在这种情况下我会做的是使用分面“hack”。为此,我稍微调整了函数以获取字幕参数并切换为 purrr::pmap
:
library(tidyr)
library(dplyr)
library(ggplot2)
df_long <- df |>
rename(food_index_raw = food_index, energy_index_raw = energy_index) |>
pivot_longer(-date, names_to = c("variable", ".value"), names_pattern = "^(.*?_index)_(.*)$")
plot_fun <- function(x, y, ylab, subtitle) {
x <- x |>
select(date, variable, value = .data[[y]]) |>
filter(!is.na(value))
ggplot(
x,
aes(
x = date,
y = value,
col = variable
)
) +
geom_line(size = 0.6, alpha = 0.5) +
geom_point(size = 1, alpha = 0.8) +
facet_wrap(~.env$subtitle) +
labs(
x = "",
y = ylab
) +
theme(strip.background = element_blank(), strip.text.x = element_text(hjust = 0))
}
yvars <- c("raw", "mom", "yoy")
ylabs <- paste0("Unit: ", c("$", "%", "%"))
subtitle <- c("Original", "Month-to-Month", "Year-to-Year")
plots <- purrr::pmap(list(y = yvars, ylab = ylabs, subtitle = subtitle), plot_fun, x = df_long)
library(patchwork)
wrap_plots(plots) + plot_layout(ncol = 1)
目标输出是用构面完成的,而不是将图拼接在一起。如果你愿意,你也可以这样做,但它需要以不同的方式重塑你的数据。您采用哪种方法真的是一个品味问题。
library(ggplot2)
library(dplyr)
yoy <- function(x) 100 * (x - lag(x, 13)) / lag(x, 12)
mom <- function(x) 100 * (x - lag(x)) / lag(x)
df %>%
mutate(date = as.Date(date, origin = "1899-12-30"),
`Actual value (Dollars).Food Index` = food_index,
`Month-on-month change (%).Food Index` = mom(food_index),
`Year-on-year change (%).Food Index` = yoy(food_index),
`Actual value (Dollars).Energy Index` = energy_index,
`Month-on-month change (%).Energy Index` = mom(energy_index),
`Year-on-year change (%).Energy Index` = yoy(energy_index)) %>%
select(-food_index, -energy_index) %>%
tidyr::pivot_longer(-1) %>%
filter(date > as.Date("2018-01-01")) %>%
tidyr::separate(name, into = c("series", "index"), sep = "\.") %>%
ggplot(aes(date, value, color = index)) +
geom_point(na.rm = TRUE) +
geom_line() +
facet_grid(series~., scales = "free_y") +
theme_bw(base_size = 16)
从有问题的 link 中获取的可重现数据
df <- structure(list(date = c(42766, 42794, 42825, 42855, 42886, 42916,
42947, 42978, 43008, 43039, 43069, 43100, 43131, 43159, 43190,
43220, 43251, 43281, 43312, 43343, 43373, 43404, 43434, 43465,
43496, 43524, 43555, 43585, 43616, 43646, 43677, 43708, 43738,
43769, 43799, 43830, 43861, 43890, 43921, 43951, 43982, 44012,
44043, 44074, 44104, 44135, 44165, 44196, 44227, 44255, 44286,
44316, 44347, 44377, 44408, 44439, 44469, 44500, 44530, 44561
), food_index = c(58.53, 61.23, 55.32, 55.34, 61.73, 56.91, 54.27,
59.08, 60.11, 66.01, 60.11, 63.41, 69.8, 72.45, 81.11, 89.64,
88.64, 88.62, 98.27, 111.11, 129.39, 140.14, 143.44, 169.21,
177.39, 163.88, 135.07, 151.28, 172.81, 143.82, 162.13, 172.22,
176.67, 179.3, 157.27, 169.12, 192.51, 194.2, 179.4, 169.1, 193.17,
174.92, 181.92, 188.41, 192.14, 203.41, 194.19, 174.3, 174.86,
182.33, 182.82, 185.36, 192.41, 195.59, 202.6, 201.51, 225.01,
243.78, 270.67, 304.57), energy_index = c(127.36, 119.87, 120.96,
112.09, 112.19, 109.24, 109.56, 106.89, 109.35, 108.35, 112.39,
117.77, 119.52, 122.24, 120.91, 125.41, 129.72, 135.25, 139.33,
148.6, 169.62, 184.23, 204.38, 198.55, 189.29, 202.47, 220.23,
240.67, 263.12, 249.74, 240.84, 243.42, 261.2, 256.76, 258.69,
277.98, 289.63, 293.46, 310.81, 318.68, 310.04, 302.17, 298.62,
260.92, 269.29, 258.84, 241.68, 224.18, 216.36, 226.57, 235.98,
253.86, 267.37, 261.99, 273.37, 280.91, 291.84, 297.88, 292.78,
289.79)), row.names = c(NA, 60L), class = "data.frame")