左连接两个数据框,没有错误代码但不工作

left join two data frames, no error codes but not working

我有两个 df,我想做 left_join。但出于某种原因,我无法将它们加入到一起。谁能看看我哪里做错了?没有错误代码只是来自 cov 的信息都是空的。

df<-structure(list(LBCAT = c("CHEMISTRY", "CHEMISTRY", "CHEMISTRY", 
"CHEMISTRY", "CHEMISTRY", "CHEMISTRY"), LBTEST = c("ALANINE AMINOTRANSFERASE", 
"ALANINE AMINOTRANSFERASE", "ALANINE AMINOTRANSFERASE", "ALANINE AMINOTRANSFERASE", 
"ALANINE AMINOTRANSFERASE", "ALANINE AMINOTRANSFERASE"), lbstresn = c(6, 
11, 26, 21, 14, 13), lbstresu = c("U/L", "U/L", "U/L", "U/L", 
"U/L", "U/L"), lbstnrlo = c(6, 6, 6, 6, 6, 6), lbstnrhi = c(37, 
37, 37, 37, 37, 37)), row.names = c(NA, -6L), class = c("tbl_df", 
"tbl", "data.frame"))

cov<-structure(list(LBCAT = c("CHEMISTRY", "CHEMISTRY", "CHEMISTRY", 
"CHEMISTRY", "CHEMISTRY", "CHEMISTRY", "CHEMISTRY"), LBTESTCD = c("ALT", 
"ALT", "ALT", "ALT", "ALT", "ALT", "ALT"), LBTEST = c("Alanine Aminotransferase", 
"Alanine Aminotransferase", "Alanine Aminotransferase", "Alanine Aminotransferase", 
"Alanine Aminotransferase", "Alanine Aminotransferase", "Alanine Aminotransferase"
), LBORRESU = c("NKAT/L", "IU/L", "MU/ML", "U/L", "UKAT/L", "MCKAT/L", 
"IU/ML"), LBSTRESU = c("ukat/L", "ukat/L", "ukat/L", "ukat/L", 
"ukat/L", "ukat/L", "ukat/L"), LBSICF = c(0.001, 0.0167, 0.0167, 
0.0167, 1, 1, 16.7)), row.names = c(NA, -7L), class = c("tbl_df", 
"tbl", "data.frame"))

test <- left_join(df, cov, by = c('LBCAT'= toupper('LBCAT'), 'LBTEST' = toupper('LBTEST'), 'lbstresu' = toupper('LBORRESU'))) 

我们需要先mutate将列更改为大写

library(dplyr)
cov %>%
   dplyr::mutate(across(c(LBCAT, LBTEST, LBORRESU), toupper)) %>%
   left_join(df, ., by = c("LBCAT", "LBTEST", 'lbstresu' = 'LBORRESU'))

-输出

# A tibble: 6 x 9
  LBCAT     LBTEST                   lbstresn lbstresu lbstnrlo lbstnrhi LBTESTCD LBSTRESU LBSICF
  <chr>     <chr>                       <dbl> <chr>       <dbl>    <dbl> <chr>    <chr>     <dbl>
1 CHEMISTRY ALANINE AMINOTRANSFERASE        6 U/L             6       37 ALT      ukat/L   0.0167
2 CHEMISTRY ALANINE AMINOTRANSFERASE       11 U/L             6       37 ALT      ukat/L   0.0167
3 CHEMISTRY ALANINE AMINOTRANSFERASE       26 U/L             6       37 ALT      ukat/L   0.0167
4 CHEMISTRY ALANINE AMINOTRANSFERASE       21 U/L             6       37 ALT      ukat/L   0.0167
5 CHEMISTRY ALANINE AMINOTRANSFERASE       14 U/L             6       37 ALT      ukat/L   0.0167
6 CHEMISTRY ALANINE AMINOTRANSFERASE       13 U/L             6       37 ALT      ukat/L   0.0167

我们可以使用 fuzzyjoin 参数 ignore_case=TRUE

require(fuzzyjoin)
regex_left_join(df,cov,by= c('LBCAT', 'LBTEST', 'lbstresu' = 'LBORRESU'), ignore_case =TRUE)
# A tibble: 6 x 12
  LBCAT.x   LBTEST.x                 lbstresn lbstresu lbstnrlo lbstnrhi LBCAT.y   LBTESTCD LBTEST.y        LBORRESU LBSTRESU LBSICF
  <chr>     <chr>                       <dbl> <chr>       <dbl>    <dbl> <chr>     <chr>    <chr>           <chr>    <chr>     <dbl>
1 CHEMISTRY ALANINE AMINOTRANSFERASE        6 U/L             6       37 CHEMISTRY ALT      Alanine Aminot~ U/L      ukat/L   0.0167
2 CHEMISTRY ALANINE AMINOTRANSFERASE       11 U/L             6       37 CHEMISTRY ALT      Alanine Aminot~ U/L      ukat/L   0.0167
3 CHEMISTRY ALANINE AMINOTRANSFERASE       26 U/L             6       37 CHEMISTRY ALT      Alanine Aminot~ U/L      ukat/L   0.0167
4 CHEMISTRY ALANINE AMINOTRANSFERASE       21 U/L             6       37 CHEMISTRY ALT      Alanine Aminot~ U/L      ukat/L   0.0167
5 CHEMISTRY ALANINE AMINOTRANSFERASE       14 U/L             6       37 CHEMISTRY ALT      Alanine Aminot~ U/L      ukat/L   0.0167
6 CHEMISTRY ALANINE AMINOTRANSFERASE       13 U/L             6       37 CHEMISTRY ALT      Alanine Aminot~ U/L      ukat/L   0.0167

您可以使用 join 来自 plyr 包。

library(plyr)

test <- plyr::join(df, cov, type = 'left', by = c('LBCAT', 'LBTEST'))