用R替换另一个数据帧中给定多个条件的数据帧值

Replacing dataframe value given multiple condition from another dataframe with R

我有两个数据框,一个是给定网格单元的多次调查重复的日期(以月为单位),另一个是同一网格单元每个月的雪数据,它们有一个匹配的 ID 列识别细胞。 我想要做的是在第一个数据框中替换,即具有几个月的调查复制的数据框,考虑到网格单元 ID,将月份值替换为该月的雪值。 谢谢

CellID <- c(1,2,3,4,5,6)
sampl1 <- c("oct", "oct", "oct", "nov", NA, NA)
sampl2 <- c("nov", "nov", "jan", NA, NA, NA)
sampl3 <- c("dec", "dec", "jan", NA, NA, NA)
df1 <- data.frame(CellID, sampl1, sampl2, sampl3)
print(df1)

CellID <- c(1,2,3,4,5,6)
oct <- c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1)
nov <- c(0.4, 0.5, 0.4, 0.5, 0.6, 0.5)
dec <- c(0.6, 0.7, 0.8, 0.7, 0.6, 0.8)
df2 <- data.frame(CellID, oct, nov, dec)
print(df2)

CellID <- c(1,2,3,4,5,6)
sampl1_snow <- c(0.1, 0.1, 0.1, 0.5, NA, NA)
sampl2_snow <- c(0.4, 0.5, 0.9, NA, NA, NA)
sampl3_snow <- c(0.6, 0.7, 1, NA, NA, NA)
df3 <- data.frame(CellID, sampl1_snow, sampl2_snow, sampl3_snow)
print(df3)
df3 <- df1
df3[!is.na(df1)] <- df2[!is.na(df1)]
#   CellID sampl1 sampl2 sampl3
# 1      1    0.1    0.4    0.6
# 2      2    0.1    0.5    0.7
# 3      3    0.1    0.4    0.8
# 4      4    0.1   <NA>   <NA>
# 5      5   <NA>   <NA>   <NA>
# 6      6   <NA>   <NA>   <NA>

base R中,我们可以使用match

df1[-1] <- df2[-1][cbind(df1$CellID[col(df1[-1])], 
         match(as.matrix(df1[-1]), names(df2)[-1]))]

你可以使用

library(purrr)
library(dplyr)

df1 %>% 
  mutate(
    across(
      starts_with("sampl"),
      ~imap_dbl(.x, ~ifelse(is.null(df2[.y, .x]), NA_real_, df2[.y, .x])),
      .names = "{.col}_snow"
      ),
    .keep = "unused"
    )

获得

  CellID sampl1_snow sampl2_snow sampl3_snow
1      1         0.1         0.4         0.6
2      2         0.1         0.5         0.7
3      3         0.1         0.9         0.9
4      4         0.5          NA          NA
5      5          NA          NA          NA
6      6          NA          NA          NA

数据

对于df2我用

structure(list(CellID = c(1, 2, 3, 4, 5, 6), oct = c(0.1, 0.1, 
0.1, 0.1, 0.1, 0.1), nov = c(0.4, 0.5, 0.4, 0.5, 0.6, 0.5), dec = c(0.6, 
0.7, 0.8, 0.7, 0.6, 0.8), jan = c(0, 0, 0.9, 0, 0, 0)), class = "data.frame", row.names = c(NA, 
-6L))