匹配两个数据帧并替换其中一个中的相应条目
Match two dataframes and replace the corresponding entries in one of them
我有以下两个数据框:
df = data.frame(From=c("Mike","Elena","Mike","Mark","Alice","Joana"),
To=c("Jasmine","Mariah","Erik","Jack","Joana","Mike"),
Number=1:6, stringsAsFactors = FALSE)
df2 = data.frame(ID=c("1738799","657940","13253","97980","6569874","64839","8494","2773"),
Name=c("Mike","Elena","Mark","Alice","Joana","Mariah","Erik","Jack"),
stringsAsFactors = FALSE)
Dataframe df2
包含与 df
中大多数名称关联的 ID
。我想用相应的 ID
替换 df
中的名称。这样:
> df
From To Number
1738799 Jasmine 1
657940 64839 2
1738799 8494 3
13253 2773 4
97980 6569874 5
6569874 1738799 6
在基础 R 中你可以这样做:
df3 <- df
nms <- do.call(setNames, unname(df2))
df3[1:2]<- lapply(df[1:2], function(x) ifelse(is.na(a<-nms[x]), x, a))
df3
From To Number
1 1738799 Jasmine 1
2 657940 64839 2
3 1738799 8494 3
4 13253 2773 4
5 97980 6569874 5
6 6569874 1738799 6
您还可以使用以下tidyverse
解决方案:
library(dplyr)
library(purrr)
df %>%
map_if(~ is.character(.x), ~ ifelse(!is.na(match(.x, df2$Name)),
str_replace(., .x, df2$ID[match(.x, df2$Name)]),
.x)) %>%
bind_cols()
# A tibble: 6 x 3
From To Number
<chr> <chr> <int>
1 1738799 Jasmine 1
2 657940 64839 2
3 1738799 8494 3
4 13253 2773 4
5 97980 6569874 5
6 6569874 1738799 6
我们可以在 tidyverse
中使用 coalesce
和 deframe
library(dplyr)
library(tibble)
df %>%
mutate(across(From:To, ~ coalesce(deframe(df2[2:1])[.], .)))
# From To Number
#1 1738799 Jasmine 1
#2 657940 64839 2
#3 1738799 8494 3
#4 13253 2773 4
#5 97980 6569874 5
#6 6569874 1738799 6
或使用 base R
(R 4.1.0
)
df[1:2] <- setNames(df2$ID, df2$Name)[as.matrix(df[1:2])] |>
{\(x) ifelse(is.na(x), as.matrix(df[1:2]), x)}()
-输出
df
# From To Number
#1 1738799 Jasmine 1
#2 657940 64839 2
#3 1738799 8494 3
#4 13253 2773 4
#5 97980 6569874 5
#6 6569874 1738799 6
基本 R 选项
transform(
df,
To = with(df2, {
m <- ID[match(To, Name)]
ifelse(is.na(m), To, m)
})
)
给予
From To Number
1 Mike Jasmine 1
2 Elena 64839 2
3 Mike 8494 3
4 Mark 2773 4
5 Alice 6569874 5
6 Joana 1738799 6
我有以下两个数据框:
df = data.frame(From=c("Mike","Elena","Mike","Mark","Alice","Joana"),
To=c("Jasmine","Mariah","Erik","Jack","Joana","Mike"),
Number=1:6, stringsAsFactors = FALSE)
df2 = data.frame(ID=c("1738799","657940","13253","97980","6569874","64839","8494","2773"),
Name=c("Mike","Elena","Mark","Alice","Joana","Mariah","Erik","Jack"),
stringsAsFactors = FALSE)
Dataframe df2
包含与 df
中大多数名称关联的 ID
。我想用相应的 ID
替换 df
中的名称。这样:
> df
From To Number
1738799 Jasmine 1
657940 64839 2
1738799 8494 3
13253 2773 4
97980 6569874 5
6569874 1738799 6
在基础 R 中你可以这样做:
df3 <- df
nms <- do.call(setNames, unname(df2))
df3[1:2]<- lapply(df[1:2], function(x) ifelse(is.na(a<-nms[x]), x, a))
df3
From To Number
1 1738799 Jasmine 1
2 657940 64839 2
3 1738799 8494 3
4 13253 2773 4
5 97980 6569874 5
6 6569874 1738799 6
您还可以使用以下tidyverse
解决方案:
library(dplyr)
library(purrr)
df %>%
map_if(~ is.character(.x), ~ ifelse(!is.na(match(.x, df2$Name)),
str_replace(., .x, df2$ID[match(.x, df2$Name)]),
.x)) %>%
bind_cols()
# A tibble: 6 x 3
From To Number
<chr> <chr> <int>
1 1738799 Jasmine 1
2 657940 64839 2
3 1738799 8494 3
4 13253 2773 4
5 97980 6569874 5
6 6569874 1738799 6
我们可以在 tidyverse
coalesce
和 deframe
library(dplyr)
library(tibble)
df %>%
mutate(across(From:To, ~ coalesce(deframe(df2[2:1])[.], .)))
# From To Number
#1 1738799 Jasmine 1
#2 657940 64839 2
#3 1738799 8494 3
#4 13253 2773 4
#5 97980 6569874 5
#6 6569874 1738799 6
或使用 base R
(R 4.1.0
)
df[1:2] <- setNames(df2$ID, df2$Name)[as.matrix(df[1:2])] |>
{\(x) ifelse(is.na(x), as.matrix(df[1:2]), x)}()
-输出
df
# From To Number
#1 1738799 Jasmine 1
#2 657940 64839 2
#3 1738799 8494 3
#4 13253 2773 4
#5 97980 6569874 5
#6 6569874 1738799 6
基本 R 选项
transform(
df,
To = with(df2, {
m <- ID[match(To, Name)]
ifelse(is.na(m), To, m)
})
)
给予
From To Number
1 Mike Jasmine 1
2 Elena 64839 2
3 Mike 8494 3
4 Mark 2773 4
5 Alice 6569874 5
6 Joana 1738799 6