行条件,不循环修改所有列

Condition in rows, modify all columns without a loop

我想做的是根据行条件修改 R 数据 table 的所有选定列,即

对于在 cols 变量中选择的所有 4 列,如果值大于(或等于)1.5,我想将它们设置为 1,否则为 0

我试过类似的东西:iris[(cols) > 1.5 , (cols) := 1, .SDcols = cols]

感谢

一种data.table方法:

iris <- as.data.table(iris)
cols <- names(iris)[1:4]
cols
# [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width" 
iris[, (cols) := lapply(.SD, function(z) fifelse(z > 1.5, 1, z)), .SDcols = cols]
iris
#      Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
#             <num>       <num>        <num>       <num>    <fctr>
#   1:            1           1          1.4         0.2    setosa
#   2:            1           1          1.4         0.2    setosa
#   3:            1           1          1.3         0.2    setosa
#   4:            1           1          1.5         0.2    setosa
#   5:            1           1          1.4         0.2    setosa
#   6:            1           1          1.0         0.4    setosa
#   7:            1           1          1.4         0.3    setosa
#   8:            1           1          1.5         0.2    setosa
#   9:            1           1          1.4         0.2    setosa
#  10:            1           1          1.5         0.1    setosa
#  ---                                                            
# 141:            1           1          1.0         1.0 virginica
# 142:            1           1          1.0         1.0 virginica
# 143:            1           1          1.0         1.0 virginica
# 144:            1           1          1.0         1.0 virginica
# 145:            1           1          1.0         1.0 virginica
# 146:            1           1          1.0         1.0 virginica
# 147:            1           1          1.0         1.0 virginica
# 148:            1           1          1.0         1.0 virginica
# 149:            1           1          1.0         1.0 virginica
# 150:            1           1          1.0         1.0 virginica

使用 set 的替代方法:

for (nm in cols) set(iris, which(iris[[nm]] > 1.5), nm, 1)

另一个解决方案:

library(dplyr)
library(data.table)

iris[,1:4] %>% data.table() %>% mutate_all(~ ifelse(.x>=1.5,1,0))

如果您只需要检查横跨的数字列可能是一个很好的选择,它也适用于位置和名称等更具体的选择

library(tidyverse)

iris |>
  as_tibble() |> 
  mutate(across(.cols = where(is.numeric),.fns = ~ if_else(.x > 1.5,1,.x)))
#> # A tibble: 150 x 5
#>    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#>           <dbl>       <dbl>        <dbl>       <dbl> <fct>  
#>  1            1           1          1.4         0.2 setosa 
#>  2            1           1          1.4         0.2 setosa 
#>  3            1           1          1.3         0.2 setosa 
#>  4            1           1          1.5         0.2 setosa 
#>  5            1           1          1.4         0.2 setosa 
#>  6            1           1          1           0.4 setosa 
#>  7            1           1          1.4         0.3 setosa 
#>  8            1           1          1.5         0.2 setosa 
#>  9            1           1          1.4         0.2 setosa 
#> 10            1           1          1.5         0.1 setosa 
#> # ... with 140 more rows

reprex package (v2.0.1)

于 2021-10-18 创建

我们可以

library(dplyr)
iris %>%
     mutate(across(where(is.numeric), ~ +(. > 1.5)))

基础 R 选项 -

data <- iris
cols <- 1:4
data[cols] <- +(data[cols] > 1.5)

#    Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
#1              1           1            0           0     setosa
#2              1           1            0           0     setosa
#3              1           1            0           0     setosa
#4              1           1            0           0     setosa
#5              1           1            0           0     setosa
#6              1           1            1           0     setosa
#...
#...

开头的+用于将逻辑值(TRUE/FALSE)变为整数(1/0)。