用另一个数据集中的数据填充一个数据集中的多个列

Fill out multiple columns in one dataset with data from another dataset

我想用另一个数据集中的值填充一个数据集中的多个列。

我有两个数据框,df_1 和 df_lookup。

df_1 看起来像这样:

  sub_id diag_1 diag_2 diag_3
1      1   S019   T028   S021
2      2   S520              
3      3   S320   S270   S324
4      4   S023          S109
5      5   S826   S420   S729

df_lookup 看起来像这样:

  diag_map ais iss_br
1      S019   1      6
2      S020   3      6
3      S021   2      1
4      S025   1      1
5      S109   1      1
6      S110   5      2
7      S270   0      0
8      S320   0      0
9      S420   4      4
10     S520   2      5
11     S729   2      3
12     T028   1      0

我想将 df_1 中的诊断(diag_1、diag_2、diag_3)与 df_lookup 中的 diag_map 和将df_lookup中对应的ais和iss_br插入df_1得到:

  sub_id diag_1 ais_1 iss_br_1 diag_2 ais_2 iss_br_2 diag_3 ais_3 iss_br_3
1      1   S019     1        6   T028     1        0   S021     2        1
2      2   S520     2        5                                            
3      3   S320     0        0   S270     0        0   S324  <NA>     <NA>
4      4   S023    NA       NA                         S109     1        1
5      5   S826    NA       NA   S420     4        4   S729     2        3

在我的实际数据集中,还有更多行(df_1 中大约有 4000 行,df_lookup 中大约有 25000 行)。

任何人都可以帮助我在 R 中执行此操作吗?谢谢!

df <- read.table(text = '  sub_id diag_1 diag_2 diag_3
1      1   S019   T028   S021
2      2   S520   ""  ""         
3      3   S320   S270   S324
4      4   S023   ""       S109
5      5   S826   S420   S729
', header = TRUE)

df_lookup <- read.table(header = T, text = "  diag_map ais iss_br
1      S019   1      6
2      S020   3      6
3      S021   2      1
4      S025   1      1
5      S109   1      1
6      S110   5      2
7      S270   0      0
8      S320   0      0
9      S420   4      4
10     S520   2      5
11     S729   2      3
12     T028   1      0")

library(tidyverse)
df %>%
  mutate(across(2:4, ~ df_lookup$ais[match(., df_lookup$diag_map)],
                .names = '{.col}_ais'),
         across(2:4, ~df_lookup$iss_br[match(., df_lookup$diag_map)],
         .names = '{.col}_iss_br'))
#>   sub_id diag_1 diag_2 diag_3 diag_1_ais diag_2_ais diag_3_ais diag_1_iss_br
#> 1      1   S019   T028   S021          1          1          2             6
#> 2      2   S520                        2         NA         NA             5
#> 3      3   S320   S270   S324          0          0         NA             0
#> 4      4   S023          S109         NA         NA          1            NA
#> 5      5   S826   S420   S729         NA          4          2            NA
#>   diag_2_iss_br diag_3_iss_br
#> 1             0             1
#> 2            NA            NA
#> 3             0            NA
#> 4            NA             1
#> 5             4             3

reprex package (v2.0.0)

于 2021 年 8 月 12 日创建
sub_id <- c(1,2,3,4,5)
diag_1 <- c('S019', 'S520', 'S320', 'S023', 'S826')
diag_2 <- c('T028', NA, 'S270', NA, 'S420')
diag_3 <- c('S021', NA, 'S324', 'S109', 'S729')
df_1 <- as.data.frame(cbind(sub_id, diag_1, diag_2, diag_3))

diag_map <- c('S019', 'S020', 'S021', 'S025', 'S109', 'S110', 
              'S270', 'S320', 'S420', 'S520', 'S729', 'T028')
ais <- c(1,3,2,1,1,5,0,0,4,2,2,1)
iss_br <- c(6,6,1,1,1,2,0,0,4,5,3,0)
df_lookup <- as.data.frame(cbind(diag_map, ais, iss_br))


library(dplyr)
join1 <- select(left_join(df_1, df_lookup, by = c("diag_1" = "diag_map")), sub_id, diag_1, ais, iss_br)
join2 <- select(left_join(df_1, df_lookup, by = c("diag_2" = "diag_map")), diag_2, ais, iss_br)
join3 <- select(left_join(df_1, df_lookup, by = c("diag_3" = "diag_map")), diag_3, ais, iss_br)
final <- cbind(join1, join2, join3)
colnames(final) <- c('sub_id', 'diag_1', 'ais_1', 'iss_br_1', 'diag_2', 'ais_2', 'iss_br_2', 'diag_3', 'ais_3', 'iss_br_3')
print(final)

> final
  sub_id diag_1 ais_1 iss_br_1 diag_2 ais_2 iss_br_2 diag_3 ais_3 iss_br_3
1      1   S019     1        6   T028     1        0   S021     2        1
2      2   S520     2        5   <NA>  <NA>     <NA>   <NA>  <NA>     <NA>
3      3   S320     0        0   S270     0        0   S324  <NA>     <NA>
4      4   S023  <NA>     <NA>   <NA>  <NA>     <NA>   S109     1        1
5      5   S826  <NA>     <NA>   S420     4        4   S729     2        3

这个问题可能有更多的自主方法,但这里有一个粗略的可重现示例。