如何替换多列中的特定字符串?
how to replace specific strings across many columns?
如何将观察值(在 df1 中)映射到参考数据框 (df2) 中的正确单元格?这些单元格分布在(许多!)不同的列中,所以我不能在 df2$specific.columnn.name
上使用简单的 gsub
df1 包含观察值和 ID,df2 包含参考数据(ID 及其 'consistency rules')
我想用 DF1 的观测值替换 df2 中的所有 ID。
df1 <- data.frame(x = c("id1", "id2", "id3", "id4"), y = c(18, 21, 44, 800))
head(df1)
# x y
# id1 18
# id2 21
# id3 44
# id4 800
df2 <-data.frame(check = c("id2", "id3", "id1", "id1"),
checkpart2 = c("+", "==", "*", ">"), checkpart3 = c("id1", "id4", "id4", "id2"), checkpart4 = c("==", NA, "==", NA), checkpart5 = c("id4", NA,"id1",NA))
head(df2)
# check checkpart2 checkpart3 checkpart4 checkpart5
# id2 + id1 == id4
# id3 == id4 <NA> <NA>
# id1 * id4 == id1
# id1 > id2 <NA> <NA>
结果应该是这样的,但是有 100 多列:
head(df2)
check checkpart2 checkpart3 checkpart4 checkpart5
1 21 + 18 == 800
2 44 == 800 <NA> <NA>
3 18 * 800 == 18
4 18 > 21 <NA> <NA>
这是您要找的吗?
df2 <-data.frame(check = c("id2", "id3", "id1", "id1"),
checkpart2 = c("+", "==", "*", ">"), checkpart3 = c("id1", "id4", "id4", "id2"), checkpart4 = c("==", NA, "==", NA), checkpart5 = c("id4", NA,"id1",NA))
df2[df2 == "id1"] <- 18
df2[df2 == "id2"] <- 21
df2[df2 == "id3"] <- 44
df2[df2 == "id4"] <- 800
下面的 dplyr
答案将在任意数量的行上满足您的目的,甚至无需输入列名。您的示例演示。
df1 <- data.frame(x = c("id1", "id2", "id3", "id4"), y = c(18, 21, 44, 800))
df2 <-data.frame(check = c("id2", "id3", "id1", "id1"),
checkpart2 = c("+", "==", "*", ">"), checkpart3 = c("id1", "id4", "id4", "id2"), checkpart4 = c("==", NA, "==", NA), checkpart5 = c("id4", NA,"id1",NA))
library(dplyr, warn.conflicts = F)
df2 %>%
mutate(across(everything(), ~ifelse(. %in% df1$x, df1$y[match(., df1$x)], .)))
#> check checkpart2 checkpart3 checkpart4 checkpart5
#> 1 21 + 18 == 800
#> 2 44 == 800 <NA> <NA>
#> 3 18 * 800 == 18
#> 4 18 > 21 <NA> <NA>
由 reprex package (v2.0.0)
于 2021-06-30 创建
如何将观察值(在 df1 中)映射到参考数据框 (df2) 中的正确单元格?这些单元格分布在(许多!)不同的列中,所以我不能在 df2$specific.columnn.name
上使用简单的 gsubdf1 包含观察值和 ID,df2 包含参考数据(ID 及其 'consistency rules')
我想用 DF1 的观测值替换 df2 中的所有 ID。
df1 <- data.frame(x = c("id1", "id2", "id3", "id4"), y = c(18, 21, 44, 800))
head(df1)
# x y
# id1 18
# id2 21
# id3 44
# id4 800
df2 <-data.frame(check = c("id2", "id3", "id1", "id1"),
checkpart2 = c("+", "==", "*", ">"), checkpart3 = c("id1", "id4", "id4", "id2"), checkpart4 = c("==", NA, "==", NA), checkpart5 = c("id4", NA,"id1",NA))
head(df2)
# check checkpart2 checkpart3 checkpart4 checkpart5
# id2 + id1 == id4
# id3 == id4 <NA> <NA>
# id1 * id4 == id1
# id1 > id2 <NA> <NA>
结果应该是这样的,但是有 100 多列:
head(df2)
check checkpart2 checkpart3 checkpart4 checkpart5
1 21 + 18 == 800
2 44 == 800 <NA> <NA>
3 18 * 800 == 18
4 18 > 21 <NA> <NA>
这是您要找的吗?
df2 <-data.frame(check = c("id2", "id3", "id1", "id1"),
checkpart2 = c("+", "==", "*", ">"), checkpart3 = c("id1", "id4", "id4", "id2"), checkpart4 = c("==", NA, "==", NA), checkpart5 = c("id4", NA,"id1",NA))
df2[df2 == "id1"] <- 18
df2[df2 == "id2"] <- 21
df2[df2 == "id3"] <- 44
df2[df2 == "id4"] <- 800
下面的 dplyr
答案将在任意数量的行上满足您的目的,甚至无需输入列名。您的示例演示。
df1 <- data.frame(x = c("id1", "id2", "id3", "id4"), y = c(18, 21, 44, 800))
df2 <-data.frame(check = c("id2", "id3", "id1", "id1"),
checkpart2 = c("+", "==", "*", ">"), checkpart3 = c("id1", "id4", "id4", "id2"), checkpart4 = c("==", NA, "==", NA), checkpart5 = c("id4", NA,"id1",NA))
library(dplyr, warn.conflicts = F)
df2 %>%
mutate(across(everything(), ~ifelse(. %in% df1$x, df1$y[match(., df1$x)], .)))
#> check checkpart2 checkpart3 checkpart4 checkpart5
#> 1 21 + 18 == 800
#> 2 44 == 800 <NA> <NA>
#> 3 18 * 800 == 18
#> 4 18 > 21 <NA> <NA>
由 reprex package (v2.0.0)
于 2021-06-30 创建