使用 pivot_longer 转换为更长的格式
Pivoting to a longer format using pivot_longer
我正在尝试使用 dplyr::pivot_longer 转换为更长的格式,但似乎无法按照我的意愿进行操作。我可以使用 reshape::melt 进行管理,但我也希望能够使用 pivot_longer.
实现相同的效果
我要重新格式化的数据是 mtcars 数据集的相关矩阵:
# Load packages
library(reshape2)
library(dplyr)
# Get the correlation matrix
mydata <- mtcars[, c(1,3,4,5,6,7)]
cormat <- round(cor(mydata),2)
head(cormat)
mpg disp hp drat wt qsec
mpg 1.00 -0.85 -0.78 0.68 -0.87 0.42
disp -0.85 1.00 0.79 -0.71 0.89 -0.43
hp -0.78 0.79 1.00 -0.45 0.66 -0.71
drat 0.68 -0.71 -0.45 1.00 -0.71 0.09
wt -0.87 0.89 0.66 -0.71 1.00 -0.17
qsec 0.42 -0.43 -0.71 0.09 -0.17 1.00
那么,我只想过滤掉矩阵的上三角;
#Get upper triangle of the correlation matrix
cormat[upper.tri(cormat)] <- NA #OR upper.tri function
然后将其整形为长格式:
# Reshape into a long format
melted_cormat <-
cormat %>%
melt(na.rm=TRUE)
head(melted_cormat)
Var1 Var2 value value_2
1 mpg mpg 1.00 1
7 mpg disp -0.85 -0.85
8 disp disp 1.00 1
13 mpg hp -0.78 -0.78
14 disp hp 0.79 0.79
15 hp hp 1.00 1
最后,我做的图是:
ggplot(data = melted_cormat, aes(Var2, Var1, fill = value))+
geom_tile(color="white") +
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1),
#space = "Lab",
name="Spearman\nCorrelation") +
theme_minimal()+
coord_fixed() +
geom_text(aes(Var2, Var1, label = value), color = "black", size = 4) +
theme(
axis.text.x=element_text(family="Calibri", face="plain", color="black", size=12, angle=0),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
panel.grid.major=element_blank(),
panel.border=element_blank(),
panel.background=element_blank(),
axis.ticks = element_blank(),
legend.justification = c(1, 0),
legend.position = c(0.9, 0.3),
legend.direction = "horizontal")+
guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
title.position = "top", title.hjust = 0.5))
我似乎想不出一种方法来使用 pivot_longer 而不是重塑,这样它仍然可以正确地制作图形。以下几乎可以工作(感谢@geoff),数据集似乎是正确的但数字不正确:
melted_cormat <-
cormat %>%
as_tibble() %>%
mutate(Var1 = colnames(cormat)) %>%
pivot_longer(names_to = "Var2", values_to = "value", mpg:qsec, values_drop_na=TRUE)
运行 与上面相同的 ggplot 代码给出:
这是否实现了您需要的行为?
cormat |>
as_tibble() |>
mutate(Var1 = rownames(cormat)) |>
pivot_longer(names_to = "Var2", values_to = "val", mpg:qsec)
输出:
# A tibble: 36 x 3
Var1 Var2 val
<chr> <chr> <dbl>
1 mpg mpg 1
2 mpg disp -0.85
3 mpg hp -0.78
4 mpg drat 0.68
5 mpg wt -0.87
6 mpg qsec 0.42
7 disp mpg -0.85
8 disp disp 1
9 disp hp 0.79
10 disp drat -0.71
# ... with 26 more rows
来自 corrplot::corrplot()
的近乎自动化的输出可能会以非常小的努力让您得到您想要的东西。查看 vingette 了解更多信息。
library(tidyverse)
library(corrplot)
mtcars[, c(1,3,4,5,6,7)] %>%
cor(.) %>%
round(2) %>%
corrplot(method = "color",
type = "upper",
addCoef.col = "black",
col = colorRampPalette(c("red", "white", "blue"))(200),
tl.col = 'black')
由 reprex package (v2.0.1)
于 2022 年 1 月 12 日创建
比较 str(melted_cormat)
与 str(pivoted_cormat)
。您会发现较旧的 reshape2::melt()
将 string
转换为 factor
,而 tidyr::pivot_longer()
将其保留为 string
。
这样做的结果是,对于 melt
ed 版本,ggplot()
将根据因子水平对行和列进行排序,从而保留 cormat
中的原始顺序,但在第二种情况下,它们只是普通的 string
s,它们只是按字母顺序排列。
要解决此问题,只需将 mutate()
Var1
和 Var2
转换为 factor
,使用 cormat
中列的原始顺序作为级别。这会给你你想要的情节。
观察下面示例最后两行的区别,还要注意 cor
的默认 method
是 "pearson"
所以在标记图例时要小心相关法。
# Load packages
library(tidyverse)
library(reshape2)
# define plotting function
plot_fun <- function(dat) {
ggplot(data = dat, aes(Var2, Var1, fill = value)) +
geom_tile(color = "white") +
scale_fill_gradient2(
low = "blue",
high = "red",
mid = "white",
midpoint = 0,
limit = c(-1, 1),
#space = "Lab",
name = "Spearman\nCorrelation"
) +
theme_minimal() +
coord_fixed() +
geom_text(aes(Var2, Var1, label = value),
color = "black",
size = 4) +
theme(
axis.text.x = element_text(
family = "Calibri",
face = "plain",
color = "black",
size = 12,
angle = 0
),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.justification = c(1, 0),
legend.position = c(0.9, 0.3),
legend.direction = "horizontal"
) +
guides(fill = guide_colorbar(
barwidth = 7,
barheight = 1,
title.position = "top",
title.hjust = 0.5
))
}
# Get the correlation matrix
cormat <- mtcars[, c(1, 3, 4, 5, 6, 7)] %>%
cor(., method = "spearman") %>% # note selection of correlation method
round(2) %>%
replace(upper.tri(.), NA)
# make melted version
melted <- cormat %>%
melt(na.rm = TRUE)
# make pivoted version
pivoted <-
cormat %>%
as.data.frame() %>%
rownames_to_column("Var1") %>%
pivot_longer(
-Var1,
names_to = "Var2",
values_to = "value",
values_drop_na = TRUE
)
# note column types on melted vs pivoted
str(melted)
#> 'data.frame': 21 obs. of 3 variables:
#> $ Var1 : Factor w/ 6 levels "mpg","disp","hp",..: 1 2 3 4 5 6 2 3 4 5 ...
#> $ Var2 : Factor w/ 6 levels "mpg","disp","hp",..: 1 1 1 1 1 1 2 2 2 2 ...
#> $ value: num 1 -0.91 -0.89 0.65 -0.89 0.47 1 0.85 -0.68 0.9 ...
str(pivoted)
#> tibble [21 x 3] (S3: tbl_df/tbl/data.frame)
#> $ Var1 : chr [1:21] "mpg" "disp" "disp" "hp" ...
#> $ Var2 : chr [1:21] "mpg" "mpg" "disp" "mpg" ...
#> $ value: num [1:21] 1 -0.91 1 -0.89 0.85 1 0.65 -0.68 -0.52 1 ...
# melted version gives desired plot
melted %>%
plot_fun()
# pivoted version orders variables in alphabetical order
pivoted %>%
plot_fun()
# turning the variable names into a factor fixes the plot
pivoted %>%
mutate(across(starts_with("Var"), ~factor(.x, levels = colnames(cormat)))) %>%
plot_fun()
由 reprex package (v2.0.1)
于 2022 年 1 月 12 日创建
我正在尝试使用 dplyr::pivot_longer 转换为更长的格式,但似乎无法按照我的意愿进行操作。我可以使用 reshape::melt 进行管理,但我也希望能够使用 pivot_longer.
实现相同的效果我要重新格式化的数据是 mtcars 数据集的相关矩阵:
# Load packages
library(reshape2)
library(dplyr)
# Get the correlation matrix
mydata <- mtcars[, c(1,3,4,5,6,7)]
cormat <- round(cor(mydata),2)
head(cormat)
mpg disp hp drat wt qsec
mpg 1.00 -0.85 -0.78 0.68 -0.87 0.42
disp -0.85 1.00 0.79 -0.71 0.89 -0.43
hp -0.78 0.79 1.00 -0.45 0.66 -0.71
drat 0.68 -0.71 -0.45 1.00 -0.71 0.09
wt -0.87 0.89 0.66 -0.71 1.00 -0.17
qsec 0.42 -0.43 -0.71 0.09 -0.17 1.00
那么,我只想过滤掉矩阵的上三角;
#Get upper triangle of the correlation matrix
cormat[upper.tri(cormat)] <- NA #OR upper.tri function
然后将其整形为长格式:
# Reshape into a long format
melted_cormat <-
cormat %>%
melt(na.rm=TRUE)
head(melted_cormat)
Var1 Var2 value value_2
1 mpg mpg 1.00 1
7 mpg disp -0.85 -0.85
8 disp disp 1.00 1
13 mpg hp -0.78 -0.78
14 disp hp 0.79 0.79
15 hp hp 1.00 1
最后,我做的图是:
ggplot(data = melted_cormat, aes(Var2, Var1, fill = value))+
geom_tile(color="white") +
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1),
#space = "Lab",
name="Spearman\nCorrelation") +
theme_minimal()+
coord_fixed() +
geom_text(aes(Var2, Var1, label = value), color = "black", size = 4) +
theme(
axis.text.x=element_text(family="Calibri", face="plain", color="black", size=12, angle=0),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
panel.grid.major=element_blank(),
panel.border=element_blank(),
panel.background=element_blank(),
axis.ticks = element_blank(),
legend.justification = c(1, 0),
legend.position = c(0.9, 0.3),
legend.direction = "horizontal")+
guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
title.position = "top", title.hjust = 0.5))
我似乎想不出一种方法来使用 pivot_longer 而不是重塑,这样它仍然可以正确地制作图形。以下几乎可以工作(感谢@geoff),数据集似乎是正确的但数字不正确:
melted_cormat <-
cormat %>%
as_tibble() %>%
mutate(Var1 = colnames(cormat)) %>%
pivot_longer(names_to = "Var2", values_to = "value", mpg:qsec, values_drop_na=TRUE)
运行 与上面相同的 ggplot 代码给出:
这是否实现了您需要的行为?
cormat |>
as_tibble() |>
mutate(Var1 = rownames(cormat)) |>
pivot_longer(names_to = "Var2", values_to = "val", mpg:qsec)
输出:
# A tibble: 36 x 3
Var1 Var2 val
<chr> <chr> <dbl>
1 mpg mpg 1
2 mpg disp -0.85
3 mpg hp -0.78
4 mpg drat 0.68
5 mpg wt -0.87
6 mpg qsec 0.42
7 disp mpg -0.85
8 disp disp 1
9 disp hp 0.79
10 disp drat -0.71
# ... with 26 more rows
来自 corrplot::corrplot()
的近乎自动化的输出可能会以非常小的努力让您得到您想要的东西。查看 vingette 了解更多信息。
library(tidyverse)
library(corrplot)
mtcars[, c(1,3,4,5,6,7)] %>%
cor(.) %>%
round(2) %>%
corrplot(method = "color",
type = "upper",
addCoef.col = "black",
col = colorRampPalette(c("red", "white", "blue"))(200),
tl.col = 'black')
由 reprex package (v2.0.1)
于 2022 年 1 月 12 日创建比较 str(melted_cormat)
与 str(pivoted_cormat)
。您会发现较旧的 reshape2::melt()
将 string
转换为 factor
,而 tidyr::pivot_longer()
将其保留为 string
。
这样做的结果是,对于 melt
ed 版本,ggplot()
将根据因子水平对行和列进行排序,从而保留 cormat
中的原始顺序,但在第二种情况下,它们只是普通的 string
s,它们只是按字母顺序排列。
要解决此问题,只需将 mutate()
Var1
和 Var2
转换为 factor
,使用 cormat
中列的原始顺序作为级别。这会给你你想要的情节。
观察下面示例最后两行的区别,还要注意 cor
的默认 method
是 "pearson"
所以在标记图例时要小心相关法。
# Load packages
library(tidyverse)
library(reshape2)
# define plotting function
plot_fun <- function(dat) {
ggplot(data = dat, aes(Var2, Var1, fill = value)) +
geom_tile(color = "white") +
scale_fill_gradient2(
low = "blue",
high = "red",
mid = "white",
midpoint = 0,
limit = c(-1, 1),
#space = "Lab",
name = "Spearman\nCorrelation"
) +
theme_minimal() +
coord_fixed() +
geom_text(aes(Var2, Var1, label = value),
color = "black",
size = 4) +
theme(
axis.text.x = element_text(
family = "Calibri",
face = "plain",
color = "black",
size = 12,
angle = 0
),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.justification = c(1, 0),
legend.position = c(0.9, 0.3),
legend.direction = "horizontal"
) +
guides(fill = guide_colorbar(
barwidth = 7,
barheight = 1,
title.position = "top",
title.hjust = 0.5
))
}
# Get the correlation matrix
cormat <- mtcars[, c(1, 3, 4, 5, 6, 7)] %>%
cor(., method = "spearman") %>% # note selection of correlation method
round(2) %>%
replace(upper.tri(.), NA)
# make melted version
melted <- cormat %>%
melt(na.rm = TRUE)
# make pivoted version
pivoted <-
cormat %>%
as.data.frame() %>%
rownames_to_column("Var1") %>%
pivot_longer(
-Var1,
names_to = "Var2",
values_to = "value",
values_drop_na = TRUE
)
# note column types on melted vs pivoted
str(melted)
#> 'data.frame': 21 obs. of 3 variables:
#> $ Var1 : Factor w/ 6 levels "mpg","disp","hp",..: 1 2 3 4 5 6 2 3 4 5 ...
#> $ Var2 : Factor w/ 6 levels "mpg","disp","hp",..: 1 1 1 1 1 1 2 2 2 2 ...
#> $ value: num 1 -0.91 -0.89 0.65 -0.89 0.47 1 0.85 -0.68 0.9 ...
str(pivoted)
#> tibble [21 x 3] (S3: tbl_df/tbl/data.frame)
#> $ Var1 : chr [1:21] "mpg" "disp" "disp" "hp" ...
#> $ Var2 : chr [1:21] "mpg" "mpg" "disp" "mpg" ...
#> $ value: num [1:21] 1 -0.91 1 -0.89 0.85 1 0.65 -0.68 -0.52 1 ...
# melted version gives desired plot
melted %>%
plot_fun()
# pivoted version orders variables in alphabetical order
pivoted %>%
plot_fun()
# turning the variable names into a factor fixes the plot
pivoted %>%
mutate(across(starts_with("Var"), ~factor(.x, levels = colnames(cormat)))) %>%
plot_fun()
由 reprex package (v2.0.1)
于 2022 年 1 月 12 日创建