通过分离列来融合数据
Melt the data by separating the columns
我有一个数据框 df
来自 dplyr 的 summarise_each
V1_mean V2_mean V3_mean V4_mean V5_mean V1_median V2_median V3_median V4_median V5_median V1_my_mode V2_my_mode V3_my_mode V4_my_mode V5_my_mode V1_sum
1 3 4 NA 3.75 5 3 4 NA 4 5 1 2 NA 4 5 4
V2_sum V3_sum V4_sum V5_sum
1 4 4 4 4
如何按以下格式输入?
var mean median my_mode sum
1 V1 3 3 1 4
2 V2 4 4 2 4
3 V3 NA NA NA 4
4 V4 3 4 4 4
5 V5 5 5 5 4
df
structure(list(V1_mean = 3, V2_mean = 4, V3_mean = NA_real_,
V4_mean = 3.75, V5_mean = 5, V1_median = 3, V2_median = 4,
V3_median = NA_real_, V4_median = 4, V5_median = 5, V1_my_mode = "1",
V2_my_mode = "2", V3_my_mode = NA, V4_my_mode = "4", V5_my_mode = "5",
V1_sum = 4L, V2_sum = 4L, V3_sum = 4L, V4_sum = 4L, V5_sum = 4L), class = "data.frame", .Names = c("V1_mean",
"V2_mean", "V3_mean", "V4_mean", "V5_mean", "V1_median", "V2_median",
"V3_median", "V4_median", "V5_median", "V1_my_mode", "V2_my_mode",
"V3_my_mode", "V4_my_mode", "V5_my_mode", "V1_sum", "V2_sum",
"V3_sum", "V4_sum", "V5_sum"), row.names = c(NA, -1L))
这是一种进行了小修改的方法:将列名称中的 my_mode
更改为 myMode
。这使得 separate
在 melt
编辑了数据(使用 gather
)后很容易工作:
library(dplyr)
library(tidyr)
df %>%
gather(var, val, starts_with("V")) %>%
separate(var, into = c("V1", "V2")) %>%
spread(V2, val)
# V1 mean median myMode sum
# 1 V1 3 3 1 4
# 2 V2 4 4 2 4
# 3 V3 <NA> <NA> <NA> 4
# 4 V4 3.75 4 4 4
# 5 V5 5 5 5 4
我有一个数据框 df
来自 dplyr 的 summarise_each
V1_mean V2_mean V3_mean V4_mean V5_mean V1_median V2_median V3_median V4_median V5_median V1_my_mode V2_my_mode V3_my_mode V4_my_mode V5_my_mode V1_sum
1 3 4 NA 3.75 5 3 4 NA 4 5 1 2 NA 4 5 4
V2_sum V3_sum V4_sum V5_sum
1 4 4 4 4
如何按以下格式输入?
var mean median my_mode sum
1 V1 3 3 1 4
2 V2 4 4 2 4
3 V3 NA NA NA 4
4 V4 3 4 4 4
5 V5 5 5 5 4
df
structure(list(V1_mean = 3, V2_mean = 4, V3_mean = NA_real_,
V4_mean = 3.75, V5_mean = 5, V1_median = 3, V2_median = 4,
V3_median = NA_real_, V4_median = 4, V5_median = 5, V1_my_mode = "1",
V2_my_mode = "2", V3_my_mode = NA, V4_my_mode = "4", V5_my_mode = "5",
V1_sum = 4L, V2_sum = 4L, V3_sum = 4L, V4_sum = 4L, V5_sum = 4L), class = "data.frame", .Names = c("V1_mean",
"V2_mean", "V3_mean", "V4_mean", "V5_mean", "V1_median", "V2_median",
"V3_median", "V4_median", "V5_median", "V1_my_mode", "V2_my_mode",
"V3_my_mode", "V4_my_mode", "V5_my_mode", "V1_sum", "V2_sum",
"V3_sum", "V4_sum", "V5_sum"), row.names = c(NA, -1L))
这是一种进行了小修改的方法:将列名称中的 my_mode
更改为 myMode
。这使得 separate
在 melt
编辑了数据(使用 gather
)后很容易工作:
library(dplyr)
library(tidyr)
df %>%
gather(var, val, starts_with("V")) %>%
separate(var, into = c("V1", "V2")) %>%
spread(V2, val)
# V1 mean median myMode sum
# 1 V1 3 3 1 4
# 2 V2 4 4 2 4
# 3 V3 <NA> <NA> <NA> 4
# 4 V4 3.75 4 4 4
# 5 V5 5 5 5 4