通过将一个 table 中的列名称与 R 中另一个中的列值相匹配来为行添加值
Add value to a Row by matching column name in one table to column value in another in R
Df1:
variant ID1 ID2 ID3 ID4 .... ID80000
123 0 1 2 1 0
321 1 2 1 1 1
543 1 1 2 1 1
6542 1 0 0 1 0
243 1 0 2 1 1
654 0 1 1 2 1
342 1 2 1 2 1
present 0 1 0 1 0
Df2:
ID sex yob disease
ID1 M 10/10/1910 cancer
ID2 F 05/02/2000 CML
ID3 F 01/01/1983 gout
我想将 DF2 中的列作为行添加到 DF1 中,通过匹配 ID 将列名称放入 DF1 的变体列中
期望的结果
variant ID1 ID2 ID3 ID4 .... ID80000
123 0 1 2 1 0
321 1 2 1 1 1
543 1 1 2 1 1
6542 1 0 0 1 0
243 1 0 2 1 1
654 0 1 1 2 1
342 1 2 1 2 1
present 0 1 0 1 0
sex M F F NA NA
yob 10/10/1910 05/02/2000 01/01/1983 NA NA
disease cancer CML gout NA NA
我试过:
df1["sex",] <- df2$sex[match(df2$ID, colnames(df1),]
这行不通。
我已经开始工作了:
df1["sex",] <- ifelse(colnames(df1) %in% df2$ID, df2$sex, NA)
我什至不知道如何一次处理多个列。
如有任何帮助,我们将不胜感激
使用data.table
:
虽然这适用于此示例,但您不能按原样将其用于“任何”其他数据集。它需要一些数据知识,可以在按照准备步骤(见解释)时轻松调整。
library(data.table)
rbindlist(list(df1, cbind( variant=names(df2)[2:ncol(df2)],
setnames( data.frame( t(df2[,2:ncol(df2)]) ), df2[,1] ))), fill=T)
variant ID1 ID2 ID3 ID4
1: 123 0 1 2 1
2: 321 1 2 1 1
3: 543 1 1 2 1
4: 6542 1 0 0 1
5: 243 1 0 2 1
6: 654 0 1 1 2
7: 342 1 2 1 2
8: present 0 1 0 1
9: sex M F F NA
10: yob 10/10/1910 05/02/2000 01/01/1983 NA
11: disease cancer CML gout NA
说明
df1
很好,但 df2
需要一些注意,因为我们没有 variant 列。
# first part of df2, all "ID" columns [2->end]
setnames( data.frame( t(df2[,2:ncol(df2)]) ), df2[,1] )
# ID1 ID2 ID3
#sex M F F
#yob 10/10/1910 05/02/2000 01/01/1983
#disease cancer CML gout
# second part of df2, prepare first column
names(df2)[2:ncol(df2)]
#[1] "sex" "yob" "disease"
# put together with name variant
cbind( variant=names(df2)[2:ncol(df2)],
setnames( data.frame( t(df2[,2:ncol(df2)]) ), df2[,1] ))
# variant ID1 ID2 ID3
#sex sex M F F
#yob yob 10/10/1910 05/02/2000 01/01/1983
#disease disease cancer CML gout
# now df2 is ready to be matched with df1s column names using rbindlist like above
数据
df1 <- structure(list(variant = c("123", "321", "543", "6542", "243",
"654", "342", "present"), ID1 = c(0L, 1L, 1L, 1L, 1L, 0L, 1L,
0L), ID2 = c(1L, 2L, 1L, 0L, 0L, 1L, 2L, 1L), ID3 = c(2L, 1L,
2L, 0L, 2L, 1L, 1L, 0L), ID4 = c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
1L)), class = "data.frame", row.names = c(NA, -8L))
df2 <- structure(list(ID = c("ID1", "ID2", "ID3"), sex = c("M", "F",
"F"), yob = c("10/10/1910", "05/02/2000", "01/01/1983"), disease = c("cancer",
"CML", "gout")), class = "data.frame", row.names = c(NA, -3L))
另一种方式,使用 dplyr 调整 df2,使用 magrittr 进行管道操作,data.table 连接两个 df's
library(dplyr)
library(magrittr)
df2 <- as_tibble(t(df2[, -1])) %>%
`colnames<-` (df2[["ID"]]) %>%
mutate(variant = rownames(t(df2[, -1]))) %>%
relocate(variant)
library(data.table)
rbindlist(list(df1, df2), fill = TRUE)
Df1:
variant ID1 ID2 ID3 ID4 .... ID80000
123 0 1 2 1 0
321 1 2 1 1 1
543 1 1 2 1 1
6542 1 0 0 1 0
243 1 0 2 1 1
654 0 1 1 2 1
342 1 2 1 2 1
present 0 1 0 1 0
Df2:
ID sex yob disease
ID1 M 10/10/1910 cancer
ID2 F 05/02/2000 CML
ID3 F 01/01/1983 gout
我想将 DF2 中的列作为行添加到 DF1 中,通过匹配 ID 将列名称放入 DF1 的变体列中
期望的结果
variant ID1 ID2 ID3 ID4 .... ID80000
123 0 1 2 1 0
321 1 2 1 1 1
543 1 1 2 1 1
6542 1 0 0 1 0
243 1 0 2 1 1
654 0 1 1 2 1
342 1 2 1 2 1
present 0 1 0 1 0
sex M F F NA NA
yob 10/10/1910 05/02/2000 01/01/1983 NA NA
disease cancer CML gout NA NA
我试过:
df1["sex",] <- df2$sex[match(df2$ID, colnames(df1),]
这行不通。
我已经开始工作了:
df1["sex",] <- ifelse(colnames(df1) %in% df2$ID, df2$sex, NA)
我什至不知道如何一次处理多个列。
如有任何帮助,我们将不胜感激
使用data.table
:
虽然这适用于此示例,但您不能按原样将其用于“任何”其他数据集。它需要一些数据知识,可以在按照准备步骤(见解释)时轻松调整。
library(data.table)
rbindlist(list(df1, cbind( variant=names(df2)[2:ncol(df2)],
setnames( data.frame( t(df2[,2:ncol(df2)]) ), df2[,1] ))), fill=T)
variant ID1 ID2 ID3 ID4
1: 123 0 1 2 1
2: 321 1 2 1 1
3: 543 1 1 2 1
4: 6542 1 0 0 1
5: 243 1 0 2 1
6: 654 0 1 1 2
7: 342 1 2 1 2
8: present 0 1 0 1
9: sex M F F NA
10: yob 10/10/1910 05/02/2000 01/01/1983 NA
11: disease cancer CML gout NA
说明
df1
很好,但 df2
需要一些注意,因为我们没有 variant 列。
# first part of df2, all "ID" columns [2->end]
setnames( data.frame( t(df2[,2:ncol(df2)]) ), df2[,1] )
# ID1 ID2 ID3
#sex M F F
#yob 10/10/1910 05/02/2000 01/01/1983
#disease cancer CML gout
# second part of df2, prepare first column
names(df2)[2:ncol(df2)]
#[1] "sex" "yob" "disease"
# put together with name variant
cbind( variant=names(df2)[2:ncol(df2)],
setnames( data.frame( t(df2[,2:ncol(df2)]) ), df2[,1] ))
# variant ID1 ID2 ID3
#sex sex M F F
#yob yob 10/10/1910 05/02/2000 01/01/1983
#disease disease cancer CML gout
# now df2 is ready to be matched with df1s column names using rbindlist like above
数据
df1 <- structure(list(variant = c("123", "321", "543", "6542", "243",
"654", "342", "present"), ID1 = c(0L, 1L, 1L, 1L, 1L, 0L, 1L,
0L), ID2 = c(1L, 2L, 1L, 0L, 0L, 1L, 2L, 1L), ID3 = c(2L, 1L,
2L, 0L, 2L, 1L, 1L, 0L), ID4 = c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
1L)), class = "data.frame", row.names = c(NA, -8L))
df2 <- structure(list(ID = c("ID1", "ID2", "ID3"), sex = c("M", "F",
"F"), yob = c("10/10/1910", "05/02/2000", "01/01/1983"), disease = c("cancer",
"CML", "gout")), class = "data.frame", row.names = c(NA, -3L))
另一种方式,使用 dplyr 调整 df2,使用 magrittr 进行管道操作,data.table 连接两个 df's
library(dplyr)
library(magrittr)
df2 <- as_tibble(t(df2[, -1])) %>%
`colnames<-` (df2[["ID"]]) %>%
mutate(variant = rownames(t(df2[, -1]))) %>%
relocate(variant)
library(data.table)
rbindlist(list(df1, df2), fill = TRUE)