使用 ggplot2 在 3 个子图中绘制时间序列数据的原始值、mom 和 yoy 变化

Plot original value, mom and yoy change for time series data in 3 subplots using ggplot2

给定来自 this link 的两个月度时间序列数据样本。

我需要创建一个包含 3 个子图的图:plot1 用于原始值,plot2 用于逐月变化,plot3 用于逐年变化。

我可以用下面的代码画图,但是代码太冗余了。所以我的问题是如何以简洁的方式实现这一目标?谢谢。

library(xlsx)
library(ggplot2)
library(reshape)
library(dplyr)
library(tidyverse)
library(lubridate)
library(cowplot)
library(patchwork)

df <- read.xlsx('./sample_data.xlsx', 'Sheet1')
colnames(df)
# df

cols <- c('food_index', 'energy_index')
df <- df %>% mutate(date=as.Date(date)) %>% 
  mutate(across(-contains('date'), as.numeric)) %>% 
  mutate(date= floor_date(date, 'month')) %>%
  group_by(date) %>%
  summarise_at(vars(cols), funs(mean(., na.rm=TRUE))) %>%
  mutate(across(cols, list(yoy = ~(. - lag(., 12))/lag(., 12)))*100) %>%
  mutate(across(cols, list(mom = ~(. - lag(., 1))/lag(., 1)))*100) %>% 
  filter(date >= '2018-01-01' & date <= '2021-12-31') %>%
  as.data.frame()

df1 <- df %>%
  select(!grep('mom|yoy', names(df))) 

df1_long <- melt(df1, id.vars = 'date')
plot1 <- ggplot(df1_long[!is.na(df1_long$value), ],
       aes(x = date,
           y = value,
           col = variable)) +
  geom_line(size=0.6, alpha=0.5) +
  geom_point(size=1, alpha=0.8) +
  labs(
    x='',
    y='Unit: $'
  ) 

# MoM changes
df2 <- df %>%
  select(grep('date|mom', names(df)))

df2_long <- melt(df2, id.vars = 'date')
plot2 <- ggplot(df2_long[!is.na(df2_long$value), ],
       aes(x = date,
           y = value,
           col = variable)) +
  geom_line(size=0.6, alpha=0.5) +
  geom_point(size=1, alpha=0.8) +
  labs(
    x='',
    y='Unit: %'
  ) 

# YoY changes
df3 <- df %>%
  select(grep('date|yoy', names(df))) 

df3_long <- melt(df3, id.vars = 'date')
plot3 <- ggplot(df3_long[!is.na(df3_long$value), ],
       aes(x = date,
           y = value,
           col = variable)) +
  geom_line(size=0.6, alpha=0.5) +
  geom_point(size=1, alpha=0.8) +
  labs(
    x='',
    y='Unit: %'
  )
plot <- plot1 + plot2 + plot3 + plot_layout(ncol=1)
# plot <- plot_grid(plot1, plot2, plot3, labels = c('Value', 'MoM', 'YoY'), label_size = 12)
plot

输出:

预期结果将类似于下图(上图显示原始数据,中间图显示mom变化数据,下图显示yoy变化数据):

参考文献:

https://waterdata.usgs.gov/blog/beyond-basic-plotting/

http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/81-ggplot2-easy-way-to-mix-multiple-graphs-on-the-same-page/

Side-by-side plots with ggplot2

也许这就是您要找的?通过使用绘图函数将数据重塑为正确的形状,例如purrr::map2 您无需像这样重复代码就可以达到您想要的结果。

使用一些虚假的随机示例数据来模仿您的真实数据:

library(tidyr)
library(dplyr)
library(ggplot2)

df_long <- df |> 
  rename(food_index_raw = food_index, energy_index_raw = energy_index) |> 
  pivot_longer(-date, names_to = c("variable", ".value"), names_pattern = "^(.*?_index)_(.*)$")

plot_fun <- function(x, y, ylab) {
  x <- x |> 
    select(date, variable, value = .data[[y]]) |> 
    filter(!is.na(value))
  
  ggplot(
    x,
    aes(
      x = date,
      y = value,
      col = variable
    )
  ) +
    geom_line(size = 0.6, alpha = 0.5) +
    geom_point(size = 1, alpha = 0.8) +
    labs(
      x = "",
      y = ylab
    )  
}

yvars <- c("raw", "mom", "yoy")
ylabs <- paste0("Unit: ", c("$", "%", "%"))

plots <- purrr::map2(yvars, ylabs, plot_fun, x = df_long)

library(patchwork)

wrap_plots(plots) + plot_layout(ncol = 1)

数据

set.seed(123)

date <- seq.POSIXt(as.POSIXct("2017-01-31"), as.POSIXct("2022-12-31"), by = "month")
food_index <- runif(length(date))
energy_index <- runif(length(date))

df <- data.frame(date, food_index, energy_index)

EDIT 在使用 patchwork 时为每个情节添加字幕(目前)有点棘手。在这种情况下我会做的是使用分面“hack”。为此,我稍微调整了函数以获取字幕参数并切换为 purrr::pmap:

library(tidyr)
library(dplyr)
library(ggplot2)

df_long <- df |> 
  rename(food_index_raw = food_index, energy_index_raw = energy_index) |> 
  pivot_longer(-date, names_to = c("variable", ".value"), names_pattern = "^(.*?_index)_(.*)$")

plot_fun <- function(x, y, ylab, subtitle) {
  x <- x |> 
    select(date, variable, value = .data[[y]]) |> 
    filter(!is.na(value))
  
  ggplot(
    x,
    aes(
      x = date,
      y = value,
      col = variable
    )
  ) +
    geom_line(size = 0.6, alpha = 0.5) +
    geom_point(size = 1, alpha = 0.8) +
    facet_wrap(~.env$subtitle) +
    labs(
      x = "",
      y = ylab
    ) +
    theme(strip.background = element_blank(), strip.text.x = element_text(hjust = 0))
}

yvars <- c("raw", "mom", "yoy")
ylabs <- paste0("Unit: ", c("$", "%", "%"))
subtitle <- c("Original", "Month-to-Month", "Year-to-Year")

plots <- purrr::pmap(list(y = yvars, ylab = ylabs, subtitle = subtitle), plot_fun, x = df_long)

library(patchwork)

wrap_plots(plots) + plot_layout(ncol = 1)

目标输出是用构面完成的,而不是将图拼接在一起。如果你愿意,你也可以这样做,但它需要以不同的方式重塑你的数据。您采用哪种方法真的是一个品味问题。

library(ggplot2)
library(dplyr)

yoy <- function(x) 100 * (x - lag(x, 13)) / lag(x, 12)
mom <- function(x) 100 * (x - lag(x)) / lag(x)

df %>%
  mutate(date = as.Date(date, origin = "1899-12-30"),
         `Actual value (Dollars).Food Index` = food_index,
         `Month-on-month change (%).Food Index` = mom(food_index),
         `Year-on-year change (%).Food Index` = yoy(food_index),
         `Actual value (Dollars).Energy Index` = energy_index,
         `Month-on-month change (%).Energy Index` = mom(energy_index),
         `Year-on-year change (%).Energy Index` = yoy(energy_index)) %>%
  select(-food_index, -energy_index) %>%
  tidyr::pivot_longer(-1) %>%
  filter(date > as.Date("2018-01-01")) %>%
  tidyr::separate(name, into = c("series", "index"), sep = "\.") %>%
  ggplot(aes(date, value, color = index)) +
  geom_point(na.rm = TRUE) +
  geom_line() +
  facet_grid(series~., scales = "free_y") +
  theme_bw(base_size = 16)


从有问题的 link 中获取的可重现数据

df <- structure(list(date = c(42766, 42794, 42825, 42855, 42886, 42916, 
42947, 42978, 43008, 43039, 43069, 43100, 43131, 43159, 43190, 
43220, 43251, 43281, 43312, 43343, 43373, 43404, 43434, 43465, 
43496, 43524, 43555, 43585, 43616, 43646, 43677, 43708, 43738, 
43769, 43799, 43830, 43861, 43890, 43921, 43951, 43982, 44012, 
44043, 44074, 44104, 44135, 44165, 44196, 44227, 44255, 44286, 
44316, 44347, 44377, 44408, 44439, 44469, 44500, 44530, 44561
), food_index = c(58.53, 61.23, 55.32, 55.34, 61.73, 56.91, 54.27, 
59.08, 60.11, 66.01, 60.11, 63.41, 69.8, 72.45, 81.11, 89.64, 
88.64, 88.62, 98.27, 111.11, 129.39, 140.14, 143.44, 169.21, 
177.39, 163.88, 135.07, 151.28, 172.81, 143.82, 162.13, 172.22, 
176.67, 179.3, 157.27, 169.12, 192.51, 194.2, 179.4, 169.1, 193.17, 
174.92, 181.92, 188.41, 192.14, 203.41, 194.19, 174.3, 174.86, 
182.33, 182.82, 185.36, 192.41, 195.59, 202.6, 201.51, 225.01, 
243.78, 270.67, 304.57), energy_index = c(127.36, 119.87, 120.96, 
112.09, 112.19, 109.24, 109.56, 106.89, 109.35, 108.35, 112.39, 
117.77, 119.52, 122.24, 120.91, 125.41, 129.72, 135.25, 139.33, 
148.6, 169.62, 184.23, 204.38, 198.55, 189.29, 202.47, 220.23, 
240.67, 263.12, 249.74, 240.84, 243.42, 261.2, 256.76, 258.69, 
277.98, 289.63, 293.46, 310.81, 318.68, 310.04, 302.17, 298.62, 
260.92, 269.29, 258.84, 241.68, 224.18, 216.36, 226.57, 235.98, 
253.86, 267.37, 261.99, 273.37, 280.91, 291.84, 297.88, 292.78, 
289.79)), row.names = c(NA, 60L), class = "data.frame")