使用 pivot_longer 转换为更长的格式

Pivoting to a longer format using pivot_longer

我正在尝试使用 dplyr::pivot_longer 转换为更长的格式,但似乎无法按照我的意愿进行操作。我可以使用 reshape::melt 进行管理,但我也希望能够使用 pivot_longer.

实现相同的效果

我要重新格式化的数据是 mtcars 数据集的相关矩阵:

# Load packages
library(reshape2)
library(dplyr)

# Get the correlation matrix
mydata <- mtcars[, c(1,3,4,5,6,7)]
cormat <- round(cor(mydata),2)

head(cormat)
       mpg  disp    hp  drat    wt  qsec
mpg   1.00 -0.85 -0.78  0.68 -0.87  0.42
disp -0.85  1.00  0.79 -0.71  0.89 -0.43
hp   -0.78  0.79  1.00 -0.45  0.66 -0.71
drat  0.68 -0.71 -0.45  1.00 -0.71  0.09
wt   -0.87  0.89  0.66 -0.71  1.00 -0.17
qsec  0.42 -0.43 -0.71  0.09 -0.17  1.00

那么,我只想过滤掉矩阵的上三角;

#Get upper triangle of the correlation matrix
cormat[upper.tri(cormat)] <- NA #OR upper.tri function

然后将其整形为长格式:

# Reshape into a long format
melted_cormat <- 
  cormat %>% 
  melt(na.rm=TRUE)

head(melted_cormat)
   Var1 Var2 value value_2
1   mpg  mpg  1.00       1
7   mpg disp -0.85   -0.85
8  disp disp  1.00       1
13  mpg   hp -0.78   -0.78
14 disp   hp  0.79    0.79
15   hp   hp  1.00       1

最后,我做的图是:

ggplot(data = melted_cormat, aes(Var2, Var1, fill = value))+
  geom_tile(color="white") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
                       midpoint = 0, limit = c(-1,1), 
                       #space = "Lab", 
                       name="Spearman\nCorrelation") +
  theme_minimal()+ 
  coord_fixed() +
  geom_text(aes(Var2, Var1, label = value), color = "black", size = 4) +
  theme(
    axis.text.x=element_text(family="Calibri", face="plain", color="black", size=12, angle=0), 
    axis.title.x=element_blank(),
    axis.title.y=element_blank(),
    panel.grid.major=element_blank(),
    panel.border=element_blank(),
    panel.background=element_blank(),
    axis.ticks = element_blank(),
    legend.justification = c(1, 0),
    legend.position = c(0.9, 0.3),
    legend.direction = "horizontal")+
  guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
                               title.position = "top", title.hjust = 0.5))

我似乎想不出一种方法来使用 pivot_longer 而不是重塑,这样它仍然可以正确地制作图形。以下几乎可以工作(感谢@geoff),数据集似乎是正确的但数字不正确:

melted_cormat <- 
  cormat %>% 
  as_tibble() %>% 
  mutate(Var1 = colnames(cormat)) %>% 
  pivot_longer(names_to = "Var2", values_to = "value", mpg:qsec, values_drop_na=TRUE)

运行 与上面相同的 ggplot 代码给出:

这是否实现了您需要的行为?

cormat |> 
  as_tibble() |> 
  mutate(Var1 = rownames(cormat)) |> 
  pivot_longer(names_to = "Var2", values_to = "val", mpg:qsec)

输出:

# A tibble: 36 x 3
   Var1  Var2    val
   <chr> <chr> <dbl>
 1 mpg   mpg    1   
 2 mpg   disp  -0.85
 3 mpg   hp    -0.78
 4 mpg   drat   0.68
 5 mpg   wt    -0.87
 6 mpg   qsec   0.42
 7 disp  mpg   -0.85
 8 disp  disp   1   
 9 disp  hp     0.79
10 disp  drat  -0.71
# ... with 26 more rows

来自 corrplot::corrplot() 的近乎自动化的输出可能会以非常小的努力让您得到您想要的东西。查看 vingette 了解更多信息。

library(tidyverse)
library(corrplot)

mtcars[, c(1,3,4,5,6,7)] %>% 
  cor(.) %>% 
  round(2) %>% 
  corrplot(method = "color", 
           type = "upper", 
           addCoef.col = "black",
           col = colorRampPalette(c("red", "white", "blue"))(200),
           tl.col = 'black')

reprex package (v2.0.1)

于 2022 年 1 月 12 日创建

比较 str(melted_cormat)str(pivoted_cormat)。您会发现较旧的 reshape2::melt()string 转换为 factor,而 tidyr::pivot_longer() 将其保留为 string

这样做的结果是,对于 melted 版本,ggplot() 将根据因子水平对行和列进行排序,从而保留 cormat 中的原始顺序,但在第二种情况下,它们只是普通的 strings,它们只是按字母顺序排列。

要解决此问题,只需将 mutate() Var1Var2 转换为 factor,使用 cormat 中列的原始顺序作为级别。这会给你你想要的情节。

观察下面示例最后两行的区别,还要注意 cor 的默认 method"pearson" 所以在标记图例时要小心相关法。

# Load packages
library(tidyverse)
library(reshape2)

# define plotting function
plot_fun <- function(dat) {
  ggplot(data = dat, aes(Var2, Var1, fill = value)) +
    geom_tile(color = "white") +
    scale_fill_gradient2(
      low = "blue",
      high = "red",
      mid = "white",
      midpoint = 0,
      limit = c(-1, 1),
      #space = "Lab",
      name = "Spearman\nCorrelation"
    ) +
    theme_minimal() +
    coord_fixed() +
    geom_text(aes(Var2, Var1, label = value),
              color = "black",
              size = 4) +
    theme(
      axis.text.x = element_text(
        family = "Calibri",
        face = "plain",
        color = "black",
        size = 12,
        angle = 0
      ),
      axis.title.x = element_blank(),
      axis.title.y = element_blank(),
      panel.grid.major = element_blank(),
      panel.border = element_blank(),
      panel.background = element_blank(),
      axis.ticks = element_blank(),
      legend.justification = c(1, 0),
      legend.position = c(0.9, 0.3),
      legend.direction = "horizontal"
    ) +
    guides(fill = guide_colorbar(
      barwidth = 7,
      barheight = 1,
      title.position = "top",
      title.hjust = 0.5
    ))
}

# Get the correlation matrix
cormat <- mtcars[, c(1, 3, 4, 5, 6, 7)] %>%
  cor(., method = "spearman") %>% # note selection of correlation method
  round(2) %>%
  replace(upper.tri(.), NA)

# make melted version
melted <- cormat %>%
  melt(na.rm = TRUE)

# make pivoted version
pivoted <-
  cormat %>%
  as.data.frame() %>%
  rownames_to_column("Var1") %>%
  pivot_longer(
    -Var1,
    names_to = "Var2",
    values_to = "value",
    values_drop_na = TRUE
  )

# note column types on melted vs pivoted
str(melted)
#> 'data.frame':    21 obs. of  3 variables:
#>  $ Var1 : Factor w/ 6 levels "mpg","disp","hp",..: 1 2 3 4 5 6 2 3 4 5 ...
#>  $ Var2 : Factor w/ 6 levels "mpg","disp","hp",..: 1 1 1 1 1 1 2 2 2 2 ...
#>  $ value: num  1 -0.91 -0.89 0.65 -0.89 0.47 1 0.85 -0.68 0.9 ...
str(pivoted)
#> tibble [21 x 3] (S3: tbl_df/tbl/data.frame)
#>  $ Var1 : chr [1:21] "mpg" "disp" "disp" "hp" ...
#>  $ Var2 : chr [1:21] "mpg" "mpg" "disp" "mpg" ...
#>  $ value: num [1:21] 1 -0.91 1 -0.89 0.85 1 0.65 -0.68 -0.52 1 ...

# melted version gives desired plot
melted %>% 
  plot_fun()

# pivoted version orders variables in alphabetical order
pivoted %>% 
  plot_fun()

# turning the variable names into a factor fixes the plot
pivoted %>% 
  mutate(across(starts_with("Var"), ~factor(.x, levels = colnames(cormat)))) %>%
  plot_fun()

reprex package (v2.0.1)

于 2022 年 1 月 12 日创建