匹配两个数据帧并替换其中一个中的相应条目

Match two dataframes and replace the corresponding entries in one of them

我有以下两个数据框:

df = data.frame(From=c("Mike","Elena","Mike","Mark","Alice","Joana"),
                To=c("Jasmine","Mariah","Erik","Jack","Joana","Mike"),
                Number=1:6, stringsAsFactors = FALSE)
df2 = data.frame(ID=c("1738799","657940","13253","97980","6569874","64839","8494","2773"), 
                 Name=c("Mike","Elena","Mark","Alice","Joana","Mariah","Erik","Jack"),
                 stringsAsFactors = FALSE)

Dataframe df2 包含与 df 中大多数名称关联的 ID。我想用相应的 ID 替换 df 中的名称。这样:

> df
  From             To   Number
  1738799     Jasmine        1
  657940        64839        2
  1738799        8494        3
  13253          2773        4
  97980       6569874        5
  6569874     1738799        6

在基础 R 中你可以这样做:

df3 <- df
nms <- do.call(setNames, unname(df2))
df3[1:2]<- lapply(df[1:2], function(x) ifelse(is.na(a<-nms[x]), x, a))
df3
     From      To Number
1 1738799 Jasmine      1
2  657940   64839      2
3 1738799    8494      3
4   13253    2773      4
5   97980 6569874      5
6 6569874 1738799      6

您还可以使用以下tidyverse解决方案:

library(dplyr)
library(purrr)


df %>%
  map_if(~ is.character(.x), ~ ifelse(!is.na(match(.x, df2$Name)),
                                      str_replace(., .x, df2$ID[match(.x, df2$Name)]), 
                                      .x)) %>%
  bind_cols()


# A tibble: 6 x 3
  From    To      Number
  <chr>   <chr>    <int>
1 1738799 Jasmine      1
2 657940  64839        2
3 1738799 8494         3
4 13253   2773         4
5 97980   6569874      5
6 6569874 1738799      6

我们可以在 tidyverse

中使用 coalescedeframe
library(dplyr)
library(tibble)
df %>%
    mutate(across(From:To, ~ coalesce(deframe(df2[2:1])[.], .)))
#     From      To Number
#1 1738799 Jasmine      1
#2  657940   64839      2
#3 1738799    8494      3
#4   13253    2773      4
#5   97980 6569874      5
#6 6569874 1738799      6

或使用 base R (R 4.1.0)

df[1:2] <- setNames(df2$ID, df2$Name)[as.matrix(df[1:2])] |>
       {\(x) ifelse(is.na(x), as.matrix(df[1:2]), x)}() 

-输出

df
#     From      To Number
#1 1738799 Jasmine      1
#2  657940   64839      2
#3 1738799    8494      3
#4   13253    2773      4
#5   97980 6569874      5
#6 6569874 1738799      6

基本 R 选项

transform(
  df,
  To = with(df2, {
    m <- ID[match(To, Name)]
    ifelse(is.na(m), To, m)
  })
)

给予

   From      To Number
1  Mike Jasmine      1
2 Elena   64839      2
3  Mike    8494      3
4  Mark    2773      4
5 Alice 6569874      5
6 Joana 1738799      6