如何绘制不同的索引,在 R 中保持一个固定

How to plot different indices keeping one fixed in R

我有一个包含房价指数(销售和租金)的数据框,其中包含 3 个不同版本的 HPI 和中位数价格指数。因此,我有一个带有索引值的变量“值”,“索引”具有 HPI1、HPI2、HPI3 或中值,用于索引类型、销售或出租的“操作”和月份。看起来像这样,但时间更长。

Index Value Operation Month
HPI1 0.9 Sale 01/2020
HPI1 1.1 Rent 02/2020
HPI2 0.89 Sale 01/2020
HPI2 1.12 Rent 02/2020
HPI3 0.85 Sale 01/2020
HPI3 1.22 Rent 02/2020
Median 0.91 Sale 01/2020
Median 1.02 Rent 02/2020

我需要通过运算用中位数绘制每个 HPI。这意味着我需要在一个图中绘制三个图,第一个是 HPI1 + Median,第二个是 HPI2 + Median,第三个是 HPI3 + Median。我在循环中使用 ggplot,但我只设法用四行绘制了 1 个图。我不知道如何通过 HPI 重复情节但保持中位数。有什么想法吗?

非常感谢!

我目前正在使用这段代码(RES是数据框)

for(z in operation  
ggplot(RES[operation==z,],aes(x=interaction(month,year,sep = " "),y=Value,group=index),alpha=0.02) + geom_line(aes(linetype=index)) + geom_point() + scale_linetype_manual(values=c("dotted", "solid")) + theme(legend.position='bottom',legend.title=element_blank(),axis.text.x = element_text(angle = 90))+ geom_hline(aes(yintercept=1,colour="red"),show.legend = F)+facet_wrap(~operacion,scales = "free",ncol = 2)
}

这是一个可能的解决方案。 它在许多 HPI 的情况下是可扩展的。 它完全基于 tidyverse。 这个想法是通过使用来自 tidyr 的两个 pivot 命令在每个 HPIn 旁边设置 Median。 您可以使用 facet_gridfacet_wrap.

在一张图像中获得多个绘图

解决方案

library(dplyr)
library(tidyr)
library(ggplot2)


df %>% 
 
 # transform in date
 mutate(Month = as.Date(paste0("01/", Month), format = "%d/%m/%Y")) %>% 
 
 # reshape data
 pivot_wider(names_from = Index, values_from = Value) %>% 
 pivot_longer(starts_with("HPI"), names_to = "Index", values_to = "Value") %>% 
 
 # plot by HPI
 ggplot(aes(x = Month)) +
 geom_line(aes(y = Value, colour = Index)) +
 geom_line(aes(y = Median, colour = "Median")) +
 geom_point(aes(y = Value, colour = Index)) +
 geom_point(aes(y = Median, colour = "Median")) +
 scale_x_date(date_labels = "%m %Y", date_breaks = "1 month") +
 facet_grid(Index~Operation)

图例是多余的。如果您不想要它:删除第二个 geom_line 中的 color = "Median" 并在第一个 geom_line 中添加 show.legend = FALSE。 或者你可以在最后添加+ theme(legend.position = "none")

数据

# (I just tripled your data)
df <- tibble::tribble(
 ~Index,    ~Value, ~Operation, ~Month,
 "HPI1",    0.9,    "Sale", "01/2020",
 "HPI1",    1.1,    "Rent", "02/2020",
 "HPI2",    0.89,   "Sale", "01/2020",
 "HPI2",    1.12,   "Rent", "02/2020",
 "HPI3",    0.85,   "Sale", "01/2020",
 "HPI3",    1.22,   "Rent", "02/2020",
 "Median",  0.91,   "Sale", "01/2020",
 "Median",  1.02,   "Rent", "02/2020",
 "HPI1",    0.9,    "Sale", "02/2020",
 "HPI1",    1.1,    "Rent", "03/2020",
 "HPI2",    0.89,   "Sale", "02/2020",
 "HPI2",    1.12,   "Rent", "03/2020",
 "HPI3",    0.85,   "Sale", "02/2020",
 "HPI3",    1.22,   "Rent", "03/2020",
 "Median",  0.91,   "Sale", "02/2020",
 "Median",  1.02,   "Rent", "03/2020",
 "HPI1",    0.9,    "Sale", "03/2020",
 "HPI1",    1.1,    "Rent", "04/2020",
 "HPI2",    0.89,   "Sale", "03/2020",
 "HPI2",    1.12,   "Rent", "04/2020",
 "HPI3",    0.85,   "Sale", "03/2020",
 "HPI3",    1.22,   "Rent", "04/2020",
 "Median",  0.91,   "Sale", "03/2020",
 "Median",  1.02,   "Rent", "04/2020")

另一种使用 foreach 循环的方法

注意:我使用了@Edo answer

中提供的示例数据

初始库加载并准备图例供以后使用

library(ggplot2) # For obvious reason - graph
library(dplyr) # For manipulate data a bit
library(foreach) # Foreach Loop
library(cowplot) # For manipulate graphs

# Define the line type Solid for variables and dotted line for Median
line_type <- c("solid", "1111")
names(line_type) <- c("Index (HPI1, HPI2, HPI3)", "Median")

# Generate dummy plot just to extract legend
plot_for_legend <- ggplot(data = RES %>%
    mutate(Index = if_else(Index == "Median", "Median",
  # If you have more Index than HPI1, HPI2, HPI3, can just simply remove
  # the text in parentheses and leave it as Index - make sure you change
  # the line_type variable accordingly and adjust all the places needed.
                           "Index (HPI1, HPI2, HPI3)")),
  aes(x = Month, y = Value, group = Index)) +
  geom_line(aes(linetype = Index)) +
  scale_linetype_manual(values = line_type) +
  theme(legend.position = 'bottom', legend.title = element_blank(),
  axis.text.x = element_text(angle = 90))
# Extracting legend using get_legend from cowplot package
legend_area <- get_legend(plot_for_legend)

使用 foreach 循环遍历每个操作并绘制它们

operation <- unique(RES[["Operation"]])  
operation_plot <- foreach(z = operation) %do% {
  # filter data to current operation Rent/ Sale
  operation_data <- RES %>% filter(Operation == z)
  median_data <- operation_data %>% filter(Index == "Median")
  
  median_line_point <- geom_line(data = median_data,
    aes(x = Month, y = Value, group = Index, linetype = "Median"), 
    alpha=1)
  median_point <- geom_point(data = median_data,
    aes(x = Month, y = Value))
  
  list_index <- unique(operation_data[["Index"]])
  list_index <- list_index[list_index != "Median"]
  
  list_plot <- foreach(i_index = list_index) %do% {
    ggplot(data = operation_data %>% filter(Index == i_index),
      aes(x = Month, y = Value, group = Index, 
          # in case you change the line_type make sure to change the name 
          # here accordingly
          linetype ="Index (HPI1, HPI2, HPI3)"), 
      alpha=1) +
      geom_line(linetype = "solid") +
      geom_point() +
      median_line_point + median_point +
      scale_linetype_manual(values = line_type) +
      geom_hline(aes(yintercept = 1,colour = "red"), show.legend = F) +
      ylab(i_index) + 
      theme(legend.position = "none")
  }
  # put all the plot in the list into one
  current_plot <- plot_grid(plotlist = list_plot, ncol = 2)

  # Generate title for operation
  title <- ggdraw() + 
    draw_label(
      sprintf("%s operation", z),
      fontface = 'bold',
      x = 0,
      hjust = 0
    ) +
    theme(
      # add margin on the left of the drawing canvas,
      # so title is aligned with left edge of first plot
      plot.margin = margin(0, 0, 0, 7)
    )

  # Combine title, main plot & legend into one
  current_plot_w_legend <- plot_grid(title, current_plot, legend_area, nrow = 3, rel_heights = c(.5, 10, 1))
}
# Set names of the plot variables for easier access
names(operation_plot) <- operation

这里有一些示例数据是输出