根据 R 中的另一个 df 替换 df 中的值

Replace a value in df based on another df in R

我正在使用 R。我有 2 个数据帧:

column_a <- c("20", "30", "40", "60")
column_b <- c("1.2", "1.3", "1.4", "1.5")
column_c <- c("30", "50", "60", "70")
column_d <- c("2.3", "4.5", "6.7", "8.9")

df1 <- data.frame(column_a, column_b)
df2 <- data.frame (column_c, column_d)

我需要做的是:如果 column_a 中的值存在于 column_c 中,则 column_b 中的相应值必须替换 column_d 中的值.

新的 df2 应该是这样的:

  column_c column_d
1       30      1.3
2       50      4.5
3       60      1.5
4       70      8.9

如您所见,值“2.3”和“6.7”已分别替换为“1.3”和“1.5”,因为“30”和“60”是唯一同时存在的值df_2 的 column_c 和 df_1 的 column_a。

library(tidyverse)

column_a <- c("20", "30", "40", "60")
column_b <- c("1.2", "1.3", "1.4", "1.5")
column_c <- c("30", "50", "60", "70")
column_d <- c("2.3", "4.5", "6.7", "8.9")

df1 <- data.frame(column_a, column_b)
df2 <- data.frame(column_c, column_d)

df1
#>   column_a column_b
#> 1       20      1.2
#> 2       30      1.3
#> 3       40      1.4
#> 4       60      1.5
df2
#>   column_c column_d
#> 1       30      2.3
#> 2       50      4.5
#> 3       60      6.7
#> 4       70      8.9

bind_cols(df1, df2) %>%
  mutate(
    column_b = ifelse(column_a == column_c, column_d, column_b)
  ) %>%
  select(column_c, column_d)
#>   column_c column_d
#> 1       30      2.3
#> 2       50      4.5
#> 3       60      6.7
#> 4       70      8.9

reprex package (v2.0.1)

于 2021-10-07 创建

您可以加​​入数据框并将值替换为 coalesce

library(dplyr)

df2 %>%
  left_join(df1, by = c('column_c' = 'column_a')) %>%
  transmute(column_c, column_d = coalesce(column_b, column_d))

#  column_c column_d
#1       30      1.3
#2       50      4.5
#3       60      1.5
#4       70      8.9

在基础 R 中 -

transform(merge(df2, df1, all.x = TRUE, by.x = 'column_c', by.y = 'column_a'), 
          column_d = ifelse(is.na(column_b), column_d, column_b))

使用基本 R ifelse 命令:

df2$column_d = apply(df2, 1, function(x) {ifelse(x['column_c'] %in% df1$column_a,
               df1[df1$column_a == x['column_c'], 'column_b'], x['column_d'])})

使用data.table

library(data.table)
setDT(df2)[df1, column_d := fcoalesce(column_b, column_d), on = .(column_c = column_a)]
> df2
   column_c column_d
1:       30      1.3
2:       50      4.5
3:       60      1.5
4:       70      8.9