用另一个数据集中的数据填充一个数据集中的多个列
Fill out multiple columns in one dataset with data from another dataset
我想用另一个数据集中的值填充一个数据集中的多个列。
我有两个数据框,df_1 和 df_lookup。
df_1 看起来像这样:
sub_id diag_1 diag_2 diag_3
1 1 S019 T028 S021
2 2 S520
3 3 S320 S270 S324
4 4 S023 S109
5 5 S826 S420 S729
df_lookup 看起来像这样:
diag_map ais iss_br
1 S019 1 6
2 S020 3 6
3 S021 2 1
4 S025 1 1
5 S109 1 1
6 S110 5 2
7 S270 0 0
8 S320 0 0
9 S420 4 4
10 S520 2 5
11 S729 2 3
12 T028 1 0
我想将 df_1 中的诊断(diag_1、diag_2、diag_3)与 df_lookup 中的 diag_map 和将df_lookup中对应的ais和iss_br插入df_1得到:
sub_id diag_1 ais_1 iss_br_1 diag_2 ais_2 iss_br_2 diag_3 ais_3 iss_br_3
1 1 S019 1 6 T028 1 0 S021 2 1
2 2 S520 2 5
3 3 S320 0 0 S270 0 0 S324 <NA> <NA>
4 4 S023 NA NA S109 1 1
5 5 S826 NA NA S420 4 4 S729 2 3
在我的实际数据集中,还有更多行(df_1 中大约有 4000 行,df_lookup 中大约有 25000 行)。
任何人都可以帮助我在 R 中执行此操作吗?谢谢!
df <- read.table(text = ' sub_id diag_1 diag_2 diag_3
1 1 S019 T028 S021
2 2 S520 "" ""
3 3 S320 S270 S324
4 4 S023 "" S109
5 5 S826 S420 S729
', header = TRUE)
df_lookup <- read.table(header = T, text = " diag_map ais iss_br
1 S019 1 6
2 S020 3 6
3 S021 2 1
4 S025 1 1
5 S109 1 1
6 S110 5 2
7 S270 0 0
8 S320 0 0
9 S420 4 4
10 S520 2 5
11 S729 2 3
12 T028 1 0")
library(tidyverse)
df %>%
mutate(across(2:4, ~ df_lookup$ais[match(., df_lookup$diag_map)],
.names = '{.col}_ais'),
across(2:4, ~df_lookup$iss_br[match(., df_lookup$diag_map)],
.names = '{.col}_iss_br'))
#> sub_id diag_1 diag_2 diag_3 diag_1_ais diag_2_ais diag_3_ais diag_1_iss_br
#> 1 1 S019 T028 S021 1 1 2 6
#> 2 2 S520 2 NA NA 5
#> 3 3 S320 S270 S324 0 0 NA 0
#> 4 4 S023 S109 NA NA 1 NA
#> 5 5 S826 S420 S729 NA 4 2 NA
#> diag_2_iss_br diag_3_iss_br
#> 1 0 1
#> 2 NA NA
#> 3 0 NA
#> 4 NA 1
#> 5 4 3
由 reprex package (v2.0.0)
于 2021 年 8 月 12 日创建
sub_id <- c(1,2,3,4,5)
diag_1 <- c('S019', 'S520', 'S320', 'S023', 'S826')
diag_2 <- c('T028', NA, 'S270', NA, 'S420')
diag_3 <- c('S021', NA, 'S324', 'S109', 'S729')
df_1 <- as.data.frame(cbind(sub_id, diag_1, diag_2, diag_3))
diag_map <- c('S019', 'S020', 'S021', 'S025', 'S109', 'S110',
'S270', 'S320', 'S420', 'S520', 'S729', 'T028')
ais <- c(1,3,2,1,1,5,0,0,4,2,2,1)
iss_br <- c(6,6,1,1,1,2,0,0,4,5,3,0)
df_lookup <- as.data.frame(cbind(diag_map, ais, iss_br))
library(dplyr)
join1 <- select(left_join(df_1, df_lookup, by = c("diag_1" = "diag_map")), sub_id, diag_1, ais, iss_br)
join2 <- select(left_join(df_1, df_lookup, by = c("diag_2" = "diag_map")), diag_2, ais, iss_br)
join3 <- select(left_join(df_1, df_lookup, by = c("diag_3" = "diag_map")), diag_3, ais, iss_br)
final <- cbind(join1, join2, join3)
colnames(final) <- c('sub_id', 'diag_1', 'ais_1', 'iss_br_1', 'diag_2', 'ais_2', 'iss_br_2', 'diag_3', 'ais_3', 'iss_br_3')
print(final)
> final
sub_id diag_1 ais_1 iss_br_1 diag_2 ais_2 iss_br_2 diag_3 ais_3 iss_br_3
1 1 S019 1 6 T028 1 0 S021 2 1
2 2 S520 2 5 <NA> <NA> <NA> <NA> <NA> <NA>
3 3 S320 0 0 S270 0 0 S324 <NA> <NA>
4 4 S023 <NA> <NA> <NA> <NA> <NA> S109 1 1
5 5 S826 <NA> <NA> S420 4 4 S729 2 3
这个问题可能有更多的自主方法,但这里有一个粗略的可重现示例。
我想用另一个数据集中的值填充一个数据集中的多个列。
我有两个数据框,df_1 和 df_lookup。
df_1 看起来像这样:
sub_id diag_1 diag_2 diag_3
1 1 S019 T028 S021
2 2 S520
3 3 S320 S270 S324
4 4 S023 S109
5 5 S826 S420 S729
df_lookup 看起来像这样:
diag_map ais iss_br
1 S019 1 6
2 S020 3 6
3 S021 2 1
4 S025 1 1
5 S109 1 1
6 S110 5 2
7 S270 0 0
8 S320 0 0
9 S420 4 4
10 S520 2 5
11 S729 2 3
12 T028 1 0
我想将 df_1 中的诊断(diag_1、diag_2、diag_3)与 df_lookup 中的 diag_map 和将df_lookup中对应的ais和iss_br插入df_1得到:
sub_id diag_1 ais_1 iss_br_1 diag_2 ais_2 iss_br_2 diag_3 ais_3 iss_br_3
1 1 S019 1 6 T028 1 0 S021 2 1
2 2 S520 2 5
3 3 S320 0 0 S270 0 0 S324 <NA> <NA>
4 4 S023 NA NA S109 1 1
5 5 S826 NA NA S420 4 4 S729 2 3
在我的实际数据集中,还有更多行(df_1 中大约有 4000 行,df_lookup 中大约有 25000 行)。
任何人都可以帮助我在 R 中执行此操作吗?谢谢!
df <- read.table(text = ' sub_id diag_1 diag_2 diag_3
1 1 S019 T028 S021
2 2 S520 "" ""
3 3 S320 S270 S324
4 4 S023 "" S109
5 5 S826 S420 S729
', header = TRUE)
df_lookup <- read.table(header = T, text = " diag_map ais iss_br
1 S019 1 6
2 S020 3 6
3 S021 2 1
4 S025 1 1
5 S109 1 1
6 S110 5 2
7 S270 0 0
8 S320 0 0
9 S420 4 4
10 S520 2 5
11 S729 2 3
12 T028 1 0")
library(tidyverse)
df %>%
mutate(across(2:4, ~ df_lookup$ais[match(., df_lookup$diag_map)],
.names = '{.col}_ais'),
across(2:4, ~df_lookup$iss_br[match(., df_lookup$diag_map)],
.names = '{.col}_iss_br'))
#> sub_id diag_1 diag_2 diag_3 diag_1_ais diag_2_ais diag_3_ais diag_1_iss_br
#> 1 1 S019 T028 S021 1 1 2 6
#> 2 2 S520 2 NA NA 5
#> 3 3 S320 S270 S324 0 0 NA 0
#> 4 4 S023 S109 NA NA 1 NA
#> 5 5 S826 S420 S729 NA 4 2 NA
#> diag_2_iss_br diag_3_iss_br
#> 1 0 1
#> 2 NA NA
#> 3 0 NA
#> 4 NA 1
#> 5 4 3
由 reprex package (v2.0.0)
于 2021 年 8 月 12 日创建sub_id <- c(1,2,3,4,5)
diag_1 <- c('S019', 'S520', 'S320', 'S023', 'S826')
diag_2 <- c('T028', NA, 'S270', NA, 'S420')
diag_3 <- c('S021', NA, 'S324', 'S109', 'S729')
df_1 <- as.data.frame(cbind(sub_id, diag_1, diag_2, diag_3))
diag_map <- c('S019', 'S020', 'S021', 'S025', 'S109', 'S110',
'S270', 'S320', 'S420', 'S520', 'S729', 'T028')
ais <- c(1,3,2,1,1,5,0,0,4,2,2,1)
iss_br <- c(6,6,1,1,1,2,0,0,4,5,3,0)
df_lookup <- as.data.frame(cbind(diag_map, ais, iss_br))
library(dplyr)
join1 <- select(left_join(df_1, df_lookup, by = c("diag_1" = "diag_map")), sub_id, diag_1, ais, iss_br)
join2 <- select(left_join(df_1, df_lookup, by = c("diag_2" = "diag_map")), diag_2, ais, iss_br)
join3 <- select(left_join(df_1, df_lookup, by = c("diag_3" = "diag_map")), diag_3, ais, iss_br)
final <- cbind(join1, join2, join3)
colnames(final) <- c('sub_id', 'diag_1', 'ais_1', 'iss_br_1', 'diag_2', 'ais_2', 'iss_br_2', 'diag_3', 'ais_3', 'iss_br_3')
print(final)
> final
sub_id diag_1 ais_1 iss_br_1 diag_2 ais_2 iss_br_2 diag_3 ais_3 iss_br_3
1 1 S019 1 6 T028 1 0 S021 2 1
2 2 S520 2 5 <NA> <NA> <NA> <NA> <NA> <NA>
3 3 S320 0 0 S270 0 0 S324 <NA> <NA>
4 4 S023 <NA> <NA> <NA> <NA> <NA> S109 1 1
5 5 S826 <NA> <NA> S420 4 4 S729 2 3
这个问题可能有更多的自主方法,但这里有一个粗略的可重现示例。