以特定形式重塑数据
Reshaping datas in a specific form
我有如下数据,这是一个但实际上我很少experiment
,它是简化数据集:
DF=structure(list(theoric = c("E", "E", "F", "F", "F"), observed = c("E",
"E", "F", "F", "E"), experiment = c("RO(2)", "RO(2)", "RO(2)", "RO(2)",
"RO(2)")), .Names = c("theoric", "observed", "experiment"), row.names = 2:6, class = "data.frame")
现在我的数据具有以下形式:
theoric observed experiment
2 E E RO(2)
3 E E RO(2)
4 F F RO(2)
5 F F RO(2)
6 F E RO(2)
我希望它按如下方式重塑:
2 3 4 5 6
RO(2) theoric E E F F F
RO(2) observed E E F F E
最简单的方法是什么?我真的不知道该怎么做。我试过了
meltR <- melt(DF, id="experiment")
但是我丢失了 theoric
和 observed
之间的所有通信。非常感谢
编辑:完整数据集:
DF=structure(list(theoric = c("E", "E", "F", "F", "F", "E", "F",
"F", "F", "F", "F", "E", "E", "E", "E"), observed = c("E", "E",
"F", "F", "E", "F", "F", "F", "F", "F", "F", "E", "E", "E", "F"
), experiment = c("RO", "RO", "RO", "RO", "RO", "MO", "MO", "MO",
"MO", "MO", "MO", "EL", "EL", "EL", "EL")), .Names = c("theoric",
"observed", "experiment"), row.names = c(2L, 3L, 4L, 5L, 6L,
24L, 25L, 26L, 27L, 28L, 29L, 21L, 22L, 23L, 13L), class = "data.frame")
输出:
col2 col1.2 col1.3 col1.4 col1.5 col1.6 col1.24 col1.25 col1.26
1 RO theoric E E F F F <NA> <NA> <NA>
6 MO theoric <NA> <NA> <NA> <NA> <NA> E F F
12 EL theoric <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
16 RO observed E E F F E <NA> <NA> <NA>
21 MO observed <NA> <NA> <NA> <NA> <NA> F F F
27 EL observed <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
col1.27 col1.28 col1.29 col1.21 col1.22 col1.23 col1.13
1 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
6 F F F <NA> <NA> <NA> <NA>
12 <NA> <NA> <NA> E E E E
16 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
21 F F F <NA> <NA> <NA> <NA>
27 <NA> <NA> <NA> E E E F
编辑 2:添加 EL
输出
RO theoric E E F F F
RO observed E E F F E
MO theoric E F F F F
MO observed F F F F F
EL theoric E E E E
EL observed E E E F
根据预期的输出,我们可能需要创建一个包含 row.names
的列。创建新数据集 ('df2'),方法是 unlist
前两列,复制 'experiment' 列和行名列。然后使用 base R
中的 reshape
将 'long' 格式转换为 'wide'.
df2 <- data.frame(col1 = unlist(DF[1:2], use.names=FALSE),
col2 = paste( rep(DF$experiment, 2),
rep(colnames(DF)[1:2], each = nrow(DF))), col3 = rep(row.names(DF), 2))
reshape(df2, idvar = "col2", direction="wide", timevar = "col3")
# col2 col1.2 col1.3 col1.4 col1.5 col1.6
#1 RO(2) theoric E E F F F
#6 RO(2) observed E E F F E
或使用 data.table
中的 melt/dcast
。将 'data.frame' 转换为 'data.table' 保留行名称 (setDT(DF, keep.row.names = TRUE)
),melt
将其转换为 'long' 格式,paste
'experiment'和 'variable' 列,然后 dcast
从 'long' 到 'wide' 格式。
library(data.table)
dcast(melt(setDT(DF, keep.rownames = TRUE), id.var = c("rn", "experiment"))[,
experiment := paste(experiment, variable)], experiment~rn, value.var = "value")
# experiment 2 3 4 5 6
#1: RO(2) observed E E F F E
#2: RO(2) theoric E E F F F
更新
使用新数据集,
library(data.table)#v1.9.7+
dcast(melt(setDT(DF), id.var = "experiment"), paste(experiment,
variable)~rowid(experiment, variable), value.var="value", fill="")
# experiment 1 2 3 4 5 6
#1: EL observed E E E F
#2: EL theoric E E E E
#3: MO observed F F F F F F
#4: MO theoric E F F F F F
#5: RO observed E E F F E
#6: RO theoric E E F F F
您还可以执行以下操作:
require(tidyverse)
DF %>%
gather(type, val, theoric, observed) %>%
unite(experiment, experiment, type, sep=" ") %>%
group_by(experiment) %>%
mutate(experiment_number = 1:n()) %>%
spread(experiment_number, val, fill="")
这给你:
experiment `1` `2` `3` `4` `5` `6`
* <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 EL observed E E E F
2 EL theoric E E E E
3 MO observed F F F F F F
4 MO theoric E F F F F F
5 RO observed E E F F E
6 RO theoric E E F F F
我有如下数据,这是一个但实际上我很少experiment
,它是简化数据集:
DF=structure(list(theoric = c("E", "E", "F", "F", "F"), observed = c("E",
"E", "F", "F", "E"), experiment = c("RO(2)", "RO(2)", "RO(2)", "RO(2)",
"RO(2)")), .Names = c("theoric", "observed", "experiment"), row.names = 2:6, class = "data.frame")
现在我的数据具有以下形式:
theoric observed experiment
2 E E RO(2)
3 E E RO(2)
4 F F RO(2)
5 F F RO(2)
6 F E RO(2)
我希望它按如下方式重塑:
2 3 4 5 6
RO(2) theoric E E F F F
RO(2) observed E E F F E
最简单的方法是什么?我真的不知道该怎么做。我试过了
meltR <- melt(DF, id="experiment")
但是我丢失了 theoric
和 observed
之间的所有通信。非常感谢
编辑:完整数据集:
DF=structure(list(theoric = c("E", "E", "F", "F", "F", "E", "F",
"F", "F", "F", "F", "E", "E", "E", "E"), observed = c("E", "E",
"F", "F", "E", "F", "F", "F", "F", "F", "F", "E", "E", "E", "F"
), experiment = c("RO", "RO", "RO", "RO", "RO", "MO", "MO", "MO",
"MO", "MO", "MO", "EL", "EL", "EL", "EL")), .Names = c("theoric",
"observed", "experiment"), row.names = c(2L, 3L, 4L, 5L, 6L,
24L, 25L, 26L, 27L, 28L, 29L, 21L, 22L, 23L, 13L), class = "data.frame")
输出:
col2 col1.2 col1.3 col1.4 col1.5 col1.6 col1.24 col1.25 col1.26
1 RO theoric E E F F F <NA> <NA> <NA>
6 MO theoric <NA> <NA> <NA> <NA> <NA> E F F
12 EL theoric <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
16 RO observed E E F F E <NA> <NA> <NA>
21 MO observed <NA> <NA> <NA> <NA> <NA> F F F
27 EL observed <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
col1.27 col1.28 col1.29 col1.21 col1.22 col1.23 col1.13
1 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
6 F F F <NA> <NA> <NA> <NA>
12 <NA> <NA> <NA> E E E E
16 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
21 F F F <NA> <NA> <NA> <NA>
27 <NA> <NA> <NA> E E E F
编辑 2:添加 EL
输出
RO theoric E E F F F
RO observed E E F F E
MO theoric E F F F F
MO observed F F F F F
EL theoric E E E E
EL observed E E E F
根据预期的输出,我们可能需要创建一个包含 row.names
的列。创建新数据集 ('df2'),方法是 unlist
前两列,复制 'experiment' 列和行名列。然后使用 base R
中的 reshape
将 'long' 格式转换为 'wide'.
df2 <- data.frame(col1 = unlist(DF[1:2], use.names=FALSE),
col2 = paste( rep(DF$experiment, 2),
rep(colnames(DF)[1:2], each = nrow(DF))), col3 = rep(row.names(DF), 2))
reshape(df2, idvar = "col2", direction="wide", timevar = "col3")
# col2 col1.2 col1.3 col1.4 col1.5 col1.6
#1 RO(2) theoric E E F F F
#6 RO(2) observed E E F F E
或使用 data.table
中的 melt/dcast
。将 'data.frame' 转换为 'data.table' 保留行名称 (setDT(DF, keep.row.names = TRUE)
),melt
将其转换为 'long' 格式,paste
'experiment'和 'variable' 列,然后 dcast
从 'long' 到 'wide' 格式。
library(data.table)
dcast(melt(setDT(DF, keep.rownames = TRUE), id.var = c("rn", "experiment"))[,
experiment := paste(experiment, variable)], experiment~rn, value.var = "value")
# experiment 2 3 4 5 6
#1: RO(2) observed E E F F E
#2: RO(2) theoric E E F F F
更新
使用新数据集,
library(data.table)#v1.9.7+
dcast(melt(setDT(DF), id.var = "experiment"), paste(experiment,
variable)~rowid(experiment, variable), value.var="value", fill="")
# experiment 1 2 3 4 5 6
#1: EL observed E E E F
#2: EL theoric E E E E
#3: MO observed F F F F F F
#4: MO theoric E F F F F F
#5: RO observed E E F F E
#6: RO theoric E E F F F
您还可以执行以下操作:
require(tidyverse)
DF %>%
gather(type, val, theoric, observed) %>%
unite(experiment, experiment, type, sep=" ") %>%
group_by(experiment) %>%
mutate(experiment_number = 1:n()) %>%
spread(experiment_number, val, fill="")
这给你:
experiment `1` `2` `3` `4` `5` `6`
* <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 EL observed E E E F
2 EL theoric E E E E
3 MO observed F F F F F F
4 MO theoric E F F F F F
5 RO observed E E F F E
6 RO theoric E E F F F