R:用 1 替换非零数据

R: Replace Non-Zero Data with 1

我正在使用 R 编程语言。假设我有以下数据:

> head(my_data)
  survey_1_var_1 survey_1_var_2 survey_1_var_3 survey_2_var_4 survey_2_var_5 survey_2_var_6 survey_3_var_7 survey_3_var_8 survey_3_var_9 g
1       15.22394       0.000000      16.657620       0.000000       6.646745       9.146625        0.00000        0.00000      0.0000000 C
2        0.00000      21.144729       0.000000       0.000000      13.974305       0.000000       10.83326        0.00000     11.0154182 A
3       28.21113       0.000000      -3.157330       7.730749      -1.919841      19.842216       18.18518       13.45900     10.6051849 C
4        0.00000       0.000000      -2.125495       0.000000       0.000000      16.317981       11.52731       15.25231      0.0000000 C
5        0.00000       0.000000      -1.331926      16.843596       0.000000     -13.215788       10.61635        0.00000     -0.8529851 B
6      -11.25795       7.150576       0.000000       0.000000       0.000000       8.292532       11.43462        0.00000      0.0000000 A

我的问题有没有办法用1替换所有非零数据?

我可以走很长的路:

my_data$survey_1_var_1 = ifelse(survey_1_var_1 >0,1,0)
my_data$survey_1_var_2 = ifelse(survey_1_var_2 >0,1,0)

etc..

但是有没有办法一次完成所有这些?

谢谢!

你可以试试

library(dplyr)

my_data %>%
  mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))

  survey_1_var_1 survey_1_var_2 survey_1_var_3 survey_2_var_4 survey_2_var_5 survey_2_var_6 survey_3_var_7 survey_3_var_8 survey_3_var_9 g
1              1              0              1              0              1              1              0              0              0 C
2              0              1              0              0              1              0              1              0              1 A
3              1              0              0              1              0              1              1              1              1 C
4              0              0              0              0              0              1              1              1              0 C
5              0              0              0              1              0              0              1              0              0 B
6              0              1              0              0              0              1              1              0              0 A

使用 dplyracross 语法:

df %>% 
  mutate(across(starts_with("survey"), ~ ifelse(.>0,1,0)))

我会更进一步,将值转换为布尔值(TRUE 表示非零,FALSE 表示 0),然后使用 + 前缀:

library(dplyr)
df %>% mutate(across(where(is.numeric), ~ +as.logical(.x)))

输出:

  survey_1_var_1 survey_1_var_2 survey_1_var_3 survey_2_var_4 survey_2_var_5 survey_2_var_6 survey_3_var_7 survey_3_var_8 survey_3_var_9 g
1              1              0              1              0              1              1              0              0              0 C
2              0              1              0              0              1              0              1              0              1 A
3              1              0              1              1              1              1              1              1              1 C
4              0              0              1              0              0              1              1              1              0 C
5              0              0              1              1              0              1              1              0              1 B
6              1              1              0              0              0              1              1              0              0 A

这也是一个基本的 R 选项。

isnum <- sapply(df, is.numeric)

df[,isnum] <- as.data.frame(ifelse(df[,isnum] > 0 | df[,isnum] < 0, 1, 0))

输出

  survey_1_var_1 survey_1_var_2 survey_1_var_3 survey_2_var_4 survey_2_var_5 survey_2_var_6 survey_3_var_7 survey_3_var_8 survey_3_var_9 g
1              1              0              1              0              1              1              0              0              0 C
2              0              1              0              0              1              0              1              0              1 A
3              1              0              1              1              1              1              1              1              1 C
4              0              0              1              0              0              1              1              1              0 C
5              0              0              1              1              0              1              1              0              1 B
6              1              1              0              0              0              1              1              0              0 A

数据

df <- structure(
  list(
    survey_1_var_1 = c(15.22394, 0, 28.21113, 0, 0,-11.25795),
    survey_1_var_2 = c(0, 21.144729, 0, 0, 0, 7.150576),
    survey_1_var_3 = c(16.65762, 0,-3.15733,-2.125495,-1.331926, 0),
    survey_2_var_4 = c(0, 0, 7.730749, 0, 16.843596, 0),
    survey_2_var_5 = c(6.646745, 13.974305,-1.919841, 0, 0, 0),
    survey_2_var_6 = c(9.146625, 0, 19.842216, 16.317981,-13.215788, 8.292532),
    survey_3_var_7 = c(0, 10.83326, 18.18518, 11.52731, 10.61635, 11.43462),
    survey_3_var_8 = c(0, 0, 13.459, 15.25231, 0, 0),
    survey_3_var_9 = c(0, 11.0154182, 10.6051849, 0,-0.8529851, 0),
    g = c("C", "A", "C", "C", "B", "A")
  ),
  class = "data.frame",
  row.names = c(NA,-6L)
)