用R替换另一个数据帧中给定多个条件的数据帧值
Replacing dataframe value given multiple condition from another dataframe with R
我有两个数据框,一个是给定网格单元的多次调查重复的日期(以月为单位),另一个是同一网格单元每个月的雪数据,它们有一个匹配的 ID 列识别细胞。
我想要做的是在第一个数据框中替换,即具有几个月的调查复制的数据框,考虑到网格单元 ID,将月份值替换为该月的雪值。
谢谢
CellID <- c(1,2,3,4,5,6)
sampl1 <- c("oct", "oct", "oct", "nov", NA, NA)
sampl2 <- c("nov", "nov", "jan", NA, NA, NA)
sampl3 <- c("dec", "dec", "jan", NA, NA, NA)
df1 <- data.frame(CellID, sampl1, sampl2, sampl3)
print(df1)
CellID <- c(1,2,3,4,5,6)
oct <- c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1)
nov <- c(0.4, 0.5, 0.4, 0.5, 0.6, 0.5)
dec <- c(0.6, 0.7, 0.8, 0.7, 0.6, 0.8)
df2 <- data.frame(CellID, oct, nov, dec)
print(df2)
CellID <- c(1,2,3,4,5,6)
sampl1_snow <- c(0.1, 0.1, 0.1, 0.5, NA, NA)
sampl2_snow <- c(0.4, 0.5, 0.9, NA, NA, NA)
sampl3_snow <- c(0.6, 0.7, 1, NA, NA, NA)
df3 <- data.frame(CellID, sampl1_snow, sampl2_snow, sampl3_snow)
print(df3)
df3 <- df1
df3[!is.na(df1)] <- df2[!is.na(df1)]
# CellID sampl1 sampl2 sampl3
# 1 1 0.1 0.4 0.6
# 2 2 0.1 0.5 0.7
# 3 3 0.1 0.4 0.8
# 4 4 0.1 <NA> <NA>
# 5 5 <NA> <NA> <NA>
# 6 6 <NA> <NA> <NA>
在base R
中,我们可以使用match
df1[-1] <- df2[-1][cbind(df1$CellID[col(df1[-1])],
match(as.matrix(df1[-1]), names(df2)[-1]))]
你可以使用
library(purrr)
library(dplyr)
df1 %>%
mutate(
across(
starts_with("sampl"),
~imap_dbl(.x, ~ifelse(is.null(df2[.y, .x]), NA_real_, df2[.y, .x])),
.names = "{.col}_snow"
),
.keep = "unused"
)
获得
CellID sampl1_snow sampl2_snow sampl3_snow
1 1 0.1 0.4 0.6
2 2 0.1 0.5 0.7
3 3 0.1 0.9 0.9
4 4 0.5 NA NA
5 5 NA NA NA
6 6 NA NA NA
数据
对于df2
我用
structure(list(CellID = c(1, 2, 3, 4, 5, 6), oct = c(0.1, 0.1,
0.1, 0.1, 0.1, 0.1), nov = c(0.4, 0.5, 0.4, 0.5, 0.6, 0.5), dec = c(0.6,
0.7, 0.8, 0.7, 0.6, 0.8), jan = c(0, 0, 0.9, 0, 0, 0)), class = "data.frame", row.names = c(NA,
-6L))
我有两个数据框,一个是给定网格单元的多次调查重复的日期(以月为单位),另一个是同一网格单元每个月的雪数据,它们有一个匹配的 ID 列识别细胞。 我想要做的是在第一个数据框中替换,即具有几个月的调查复制的数据框,考虑到网格单元 ID,将月份值替换为该月的雪值。 谢谢
CellID <- c(1,2,3,4,5,6)
sampl1 <- c("oct", "oct", "oct", "nov", NA, NA)
sampl2 <- c("nov", "nov", "jan", NA, NA, NA)
sampl3 <- c("dec", "dec", "jan", NA, NA, NA)
df1 <- data.frame(CellID, sampl1, sampl2, sampl3)
print(df1)
CellID <- c(1,2,3,4,5,6)
oct <- c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1)
nov <- c(0.4, 0.5, 0.4, 0.5, 0.6, 0.5)
dec <- c(0.6, 0.7, 0.8, 0.7, 0.6, 0.8)
df2 <- data.frame(CellID, oct, nov, dec)
print(df2)
CellID <- c(1,2,3,4,5,6)
sampl1_snow <- c(0.1, 0.1, 0.1, 0.5, NA, NA)
sampl2_snow <- c(0.4, 0.5, 0.9, NA, NA, NA)
sampl3_snow <- c(0.6, 0.7, 1, NA, NA, NA)
df3 <- data.frame(CellID, sampl1_snow, sampl2_snow, sampl3_snow)
print(df3)
df3 <- df1
df3[!is.na(df1)] <- df2[!is.na(df1)]
# CellID sampl1 sampl2 sampl3
# 1 1 0.1 0.4 0.6
# 2 2 0.1 0.5 0.7
# 3 3 0.1 0.4 0.8
# 4 4 0.1 <NA> <NA>
# 5 5 <NA> <NA> <NA>
# 6 6 <NA> <NA> <NA>
在base R
中,我们可以使用match
df1[-1] <- df2[-1][cbind(df1$CellID[col(df1[-1])],
match(as.matrix(df1[-1]), names(df2)[-1]))]
你可以使用
library(purrr)
library(dplyr)
df1 %>%
mutate(
across(
starts_with("sampl"),
~imap_dbl(.x, ~ifelse(is.null(df2[.y, .x]), NA_real_, df2[.y, .x])),
.names = "{.col}_snow"
),
.keep = "unused"
)
获得
CellID sampl1_snow sampl2_snow sampl3_snow
1 1 0.1 0.4 0.6
2 2 0.1 0.5 0.7
3 3 0.1 0.9 0.9
4 4 0.5 NA NA
5 5 NA NA NA
6 6 NA NA NA
数据
对于df2
我用
structure(list(CellID = c(1, 2, 3, 4, 5, 6), oct = c(0.1, 0.1,
0.1, 0.1, 0.1, 0.1), nov = c(0.4, 0.5, 0.4, 0.5, 0.6, 0.5), dec = c(0.6,
0.7, 0.8, 0.7, 0.6, 0.8), jan = c(0, 0, 0.9, 0, 0, 0)), class = "data.frame", row.names = c(NA,
-6L))