如何仅用数据框 2 匹配的值替换数据场 1 中特定列的缺失值
How to replace only missing value of specific column from datafarm 1 by value matched from data frame 2
我有两个数据框,如下所示:
structure(list(ID = c("a", "a", "c", "d", "a", "f"), a1 = c("2001",
"2000", "2000", "1998", "2006", "2002"), value = c("100", NA,
"200", NA, "400", "500")), class = "data.frame", row.names = c(NA,
-6L))
ID2 <- c("x","a","c","d","n","f","g","m")
a2 <- c(2000,2000,2000,2000,2000,2000,2000,2000)
value2 <- c(100, 1000, 20000, 1000, 400,500,1,1)
ID a1 value
1 a 2001 100
2 a 2000 <NA>
3 c 2000 200
4 d 1998 <NA>
5 a 2006 400
6 f 2002 500
ID2 a2 value2
1 x 2000 100
2 a 2000 1000
3 c 2000 20000
4 d 2000 1000
5 n 2000 400
6 f 2000 500
7 g 2000 1
8 m 2000 1
我想做什么:我想替换仅缺少的值列值 数据框 1 的相应值来自数据框 2 的 value2 列。年份和 ID 应该匹配。我使用 left_join
尝试了以下代码;然而,这给了我错误的结果
r <- left_join(data, data2, by=c("ID"="ID2","a1"="a2"))
有什么建议吗?
预期结果
ID a1 value
1 a 2001 100
2 a 2000 **1000**
3 c 2000 200
4 d 1998 <NA>
5 a 2006 400
6 f 2002 500
这是一个 tidyverse
方法。
请注意,因为数据帧 1 中的 a1
和 value
是 character
,我还将数据帧 2 的相应列更改为 character
。
你需要做的是left_join
他们在一起,coalesce
value
和value2
。
library(tidyverse)
ID2 <- c("x","a","c","d","n","f","g","m")
a2 <- c("2000","2000","2000","2000","2000","2000","2000","2000")
value2 <- c("100", "1000", "20000", "1000", "400","500","1","1")
df2 <- data.frame(ID2, a2, value2)
left_join(df1, df2, by = c("ID" = "ID2", "a1" = "a2")) %>%
transmute(ID = ID, a1 = a1, value = coalesce(value, value2))
ID a1 value
1 a 2001 100
2 a 2000 1000
3 c 2000 200
4 d 1998 <NA>
5 a 2006 400
6 f 2002 500
仅使用基础 R。
merge(df1, df2, by.x=c('ID', 'a1'), by.y=c('ID2', 'a2'), all.x=TRUE) |>
transform(value=ifelse(is.na(value), value2, value), value2=NULL)
# ID a1 value
# 1 a 2000 1000
# 2 a 2001 100
# 3 a 2006 400
# 4 c 2000 200
# 5 d 1998 NA
# 6 f 2002 500
数据:
df1 <- structure(list(ID = c("a", "a", "c", "d", "a", "f"), a1 = c(2001L,
2000L, 2000L, 1998L, 2006L, 2002L), value = c(100L, NA, 200L,
NA, 400L, 500L)), class = "data.frame", row.names = c("1", "2",
"3", "4", "5", "6"))
df2 <- structure(list(ID2 = c("x", "a", "c", "d", "n", "f", "g", "m"
), a2 = c(2000L, 2000L, 2000L, 2000L, 2000L, 2000L, 2000L, 2000L
), value2 = c(100L, 1000L, 20000L, 1000L, 400L, 500L, 1L, 1L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8"))
我有两个数据框,如下所示:
structure(list(ID = c("a", "a", "c", "d", "a", "f"), a1 = c("2001",
"2000", "2000", "1998", "2006", "2002"), value = c("100", NA,
"200", NA, "400", "500")), class = "data.frame", row.names = c(NA,
-6L))
ID2 <- c("x","a","c","d","n","f","g","m")
a2 <- c(2000,2000,2000,2000,2000,2000,2000,2000)
value2 <- c(100, 1000, 20000, 1000, 400,500,1,1)
ID a1 value
1 a 2001 100
2 a 2000 <NA>
3 c 2000 200
4 d 1998 <NA>
5 a 2006 400
6 f 2002 500
ID2 a2 value2
1 x 2000 100
2 a 2000 1000
3 c 2000 20000
4 d 2000 1000
5 n 2000 400
6 f 2000 500
7 g 2000 1
8 m 2000 1
我想做什么:我想替换仅缺少的值列值 数据框 1 的相应值来自数据框 2 的 value2 列。年份和 ID 应该匹配。我使用 left_join
尝试了以下代码;然而,这给了我错误的结果
r <- left_join(data, data2, by=c("ID"="ID2","a1"="a2"))
有什么建议吗?
预期结果
ID a1 value
1 a 2001 100
2 a 2000 **1000**
3 c 2000 200
4 d 1998 <NA>
5 a 2006 400
6 f 2002 500
这是一个 tidyverse
方法。
请注意,因为数据帧 1 中的 a1
和 value
是 character
,我还将数据帧 2 的相应列更改为 character
。
你需要做的是left_join
他们在一起,coalesce
value
和value2
。
library(tidyverse)
ID2 <- c("x","a","c","d","n","f","g","m")
a2 <- c("2000","2000","2000","2000","2000","2000","2000","2000")
value2 <- c("100", "1000", "20000", "1000", "400","500","1","1")
df2 <- data.frame(ID2, a2, value2)
left_join(df1, df2, by = c("ID" = "ID2", "a1" = "a2")) %>%
transmute(ID = ID, a1 = a1, value = coalesce(value, value2))
ID a1 value
1 a 2001 100
2 a 2000 1000
3 c 2000 200
4 d 1998 <NA>
5 a 2006 400
6 f 2002 500
仅使用基础 R。
merge(df1, df2, by.x=c('ID', 'a1'), by.y=c('ID2', 'a2'), all.x=TRUE) |>
transform(value=ifelse(is.na(value), value2, value), value2=NULL)
# ID a1 value
# 1 a 2000 1000
# 2 a 2001 100
# 3 a 2006 400
# 4 c 2000 200
# 5 d 1998 NA
# 6 f 2002 500
数据:
df1 <- structure(list(ID = c("a", "a", "c", "d", "a", "f"), a1 = c(2001L,
2000L, 2000L, 1998L, 2006L, 2002L), value = c(100L, NA, 200L,
NA, 400L, 500L)), class = "data.frame", row.names = c("1", "2",
"3", "4", "5", "6"))
df2 <- structure(list(ID2 = c("x", "a", "c", "d", "n", "f", "g", "m"
), a2 = c(2000L, 2000L, 2000L, 2000L, 2000L, 2000L, 2000L, 2000L
), value2 = c(100L, 1000L, 20000L, 1000L, 400L, 500L, 1L, 1L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8"))