在多个变量/列中应用 ifelse 以将 99 和 999 替换为 NA

Applying ifelse in multiple variables / columns for replacing 99 and 999 to NA

我有一个数据框,其中一些列应将 99 视为缺失值 (NA) 和其他列,其中 999 是为此目的给出的值。

dat$variable1 <- ifelse(dat$variable1 == 99, NA, dat$variable1)
dat$variable2 <- ifelse(dat$variable2 == 99, NA, dat$variable2)
dat$variable3 <- ifelse(dat$variable3 == 99, NA, dat$variable3)
dat$variable4 <- ifelse(dat$variable4 == 99, NA, dat$variable4)
dat$variable5 <- ifelse(dat$variable5 == 999, NA, dat$variable5)
dat$variable6 <- ifelse(dat$variable6 == 999, NA, dat$variable6)
dat$variable7 <- ifelse(dat$variable7 == 999, NA, dat$variable7)

我想找到一种更好的方法来做到这一点,因为有时我们可以处理很多列。我不知道如何遍历我应该将这些值替换为 NA 的特定变量,而且我不知道可以帮助我的包(我是 R 的初学者)。

编辑: 我必须为我在问题中犯的错误道歉。我首先发布 dat$variable1 <- ifelse(dat$variable1 == 99, NA, dat$EC),在所有代码行中保留“dat$EC”。谢谢大家的回答。

您可以尝试使用 dplyr::across

对于虚拟数据 dat 定义为

dat <- data.frame(
  variable1 = c(1,2,3,4,5,6,99),
  variable2 = c(1,2,99,4,5,6,7),
  variable3 = c(1:7),
  variable4 = c(5:11),
  variable5 = c(1,2,3,4,5,6,999),
  variable6 = c(1,2,3,4,999,6,7),
  variable7 = c(1:7),
  EC = c(-1,-2,-3,-4,-5,-6,-7)
)

  variable1 variable2 variable3 variable4 variable5 variable6 variable7 EC
1         1         1         1         5         1         1         1 -1
2         2         2         2         6         2         2         2 -2
3         3        99         3         7         3         3         3 -3
4         4         4         4         8         4         4         4 -4
5         5         5         5         9         5       999         5 -5
6         6         6         6        10         6         6         6 -6
7        99         7         7        11       999         7         7 -7

你可以这样试试

library(dplyr)
dat %>%
  rowwise %>%
  mutate(across(variable1:variable4, ~ifelse(.x == 99, NA, EC)),
         across(variable5:variable7, ~ifelse(.x == 999, NA, EC)))

  variable1 variable2 variable3 variable4 variable5 variable6 variable7    EC
      <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl> <dbl>
1        -1        -1        -1        -1        -1        -1        -1    -1
2        -2        -2        -2        -2        -2        -2        -2    -2
3        -3        NA        -3        -3        -3        -3        -3    -3
4        -4        -4        -4        -4        -4        -4        -4    -4
5        -5        -5        -5        -5        -5        NA        -5    -5
6        -6        -6        -6        -6        -6        -6        -6    -6
7        NA        -7        -7        -7        NA        -7        -7    -7

如果你知道列索引,比如我的 dat,从 variable1variable41:4variable5variable75:7,仅使用列索引会得到相同的结果。

dat %>%
  rowwise %>%
  mutate(across(1:4, ~ifelse(.x == 99, NA, EC)),
         across(5:7, ~ifelse(.x == 999, NA, EC)))

  variable1 variable2 variable3 variable4 variable5 variable6 variable7    EC
      <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl> <dbl>
1        -1        -1        -1        -1        -1        -1        -1    -1
2        -2        -2        -2        -2        -2        -2        -2    -2
3        -3        NA        -3        -3        -3        -3        -3    -3
4        -4        -4        -4        -4        -4        -4        -4    -4
5        -5        -5        -5        -5        -5        NA        -5    -5
6        -6        -6        -6        -6        -6        -6        -6    -6
7        NA        -7        -7        -7        NA        -7        -7    -7

添加

dat <- data.frame(
  variable1 = c(1,2,3,4,5,6,99),
  variable2 = c(1,2,99,4,5,6,7),
  variable3 = c(1:7),
  variable4 = c(5:10,999),
  variable5 = c(1,2,3,4,5,6,99),
  variable6 = c(1,2,3,4,999,6,7),
  variable7 = c(1:7),
  EC = c(-1,-2,-3,-4,-5,-6,-7)
)

dat %>%
  rowwise %>%
  mutate(across(c(variable1, variable2, variable3, variable5), ~ifelse(.x == 99, NA, EC)),
         across(c(variable4, variable6, variable7), ~ifelse(.x == 999, NA, EC)))

  variable1 variable2 variable3 variable4 variable5 variable6 variable7    EC
      <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl> <dbl>
1        -1        -1        -1        -1        -1        -1        -1    -1
2        -2        -2        -2        -2        -2        -2        -2    -2
3        -3        NA        -3        -3        -3        -3        -3    -3
4        -4        -4        -4        -4        -4        -4        -4    -4
5        -5        -5        -5        -5        -5        NA        -5    -5
6        -6        -6        -6        -6        -6        -6        -6    -6
7        NA        -7        -7        NA        NA        -7        -7    -7

如果 99 和 999 是数据框中缺失的唯一值 dat,您可以:

dat[dat == 999] <- NA
dat[dat == 99] <- NA

如果没有,您可以使用 dplyr

中的 na_if
library(dplyr)
dat_1 <- dat %>%
  mutate(across(c(variable1, variable2, variable3, variable4), na_if, 99),
         across(c(variable5, variable6, variable7), na_if, 999))
dat_1

考虑 运行 ifelse 列块,因为它适用于向量和矩阵:

var_99 <- c("variable1", "variable2", "variable3", "variable4")
var_999 <- c("variable5", "variable6", "variable7")

dat[var_99] <- ifelse(dat[var_99] == 99, NA, dat$EC)
dat[var_999] <- ifelse(dat[var_999] == 999, NA, dat$EC)

对于多个变量替换,将 no 参数强制转换为矩阵:

dat[var_99] <- ifelse(dat[var_99] == 99, NA, as.matrix(dat[var_99]))
dat[var_999] <- ifelse(dat[var_999] == 999, NA, as.matrix(dat[var_99]))