根据范围内的数字是否出现在 select 列中创建虚拟变量
Create dummy variable based on if a number in a range appears in select columns
我正在尝试查询数据集中的多个列以查看是否存在 135 到 225 之间的数字。
我目前一直以非常手动的方式进行操作,但这是不可持续的,因为我尝试查询的列远不止 2 列。我怎样才能更有效地对此进行编码,以便我可以列出 6 个以上的列名,而不会显得冗长冗长?
sample_dates_test$day180_completion[(sample_dates_test$blood_day180_sincedx <=225 & sample_dates_test$blood_day180_sincedx >=135) | (sample_dates_test$prevaxblood.dayneg7_sincedx <= 225 & sample_dates_test$prevaxblood.dayneg7_sincedx >=135]) <- "Complete"
structure(list(record_id = 2:4, first_dose = structure(c(NA,
18718, 18660), class = "Date"), second_dose = structure(c(NA,
18745, 18688), class = "Date"), day0 = structure(c(18449, 18451,
18462), class = "Date"), blood_it_plus2 = structure(c(18459,
18464, NA), class = "Date"), blood_day180 = structure(c(18647,
NA, NA), class = "Date"), blood_day90 = structure(c(NA, 18551,
NA), class = "Date"), blood_it_plus7 = structure(c(NA, NA, 18470
), class = "Date"), blood_day30 = structure(c(NA_real_, NA_real_,
NA_real_), class = "Date"), blood_day365 = structure(c(NA_real_,
NA_real_, NA_real_), class = "Date"), postvaxblood.day90 = structure(c(NA_real_,
NA_real_, NA_real_), class = "Date"), blood_day7 = structure(c(NA_real_,
NA_real_, NA_real_), class = "Date"), blood_day0 = structure(c(NA_real_,
NA_real_, NA_real_), class = "Date"), postvaxblood.day30 = structure(c(NA_real_,
NA_real_, NA_real_), class = "Date"), postvaxblood.day7 = structure(c(NA_real_,
NA_real_, NA_real_), class = "Date"), prevaxblood.dayneg7 = structure(c(NA_real_,
NA_real_, NA_real_), class = "Date"), blood_it_plus2_sincedx = c(10,
13, NA), blood_it_plus7_sincedx = c(NA, NA, 8), blood_day0_sincedx = c(NA_real_,
NA_real_, NA_real_), blood_day7_sincedx = c(NA_real_, NA_real_,
NA_real_), blood_day30_sincedx = c(NA_real_, NA_real_, NA_real_
), blood_day90_sincedx = c(NA, 100, NA), blood_day180_sincedx = c(198,
NA, NA), blood_day365_sincedx = c(NA_real_, NA_real_, NA_real_
), prevaxblood.dayneg7_sincedx = c(NA_real_, NA_real_, NA_real_
), postvaxblood.day7_sincedx = c(NA_real_, NA_real_, NA_real_
), postvaxblood.day30_sincedx = c(NA_real_, NA_real_, NA_real_
), postvaxblood.day90_sincedx = c(NA_real_, NA_real_, NA_real_
), first_dose_sincedx = c(NA, 267, 198), second_dose_sincedx = c(NA,
294, 226), today = c(378, 376, 365)), reshapeWide = list(v.names = "enrollment_obtdatetime",
timevar = "redcap_event_name", idvar = "record_id", times = c("enrollment_arm_1",
"it__2_days_arm_1", "day_180__30_days_arm_1", "day_90__14_days_arm_1",
"it__7_days_arm_1", "day_30__7_days_arm_1", "day_365__60_days_arm_1",
"post_vaccine_day_9_arm_1", "day_7__2_days_arm_1", "day_0_3_days_arm_1",
"post_vaccine_day_3_arm_1", "post_vaccine_day_7_arm_1", "vaccine_day_7_to_1_arm_1"
), varying = structure(c("enrollment_obtdatetime.enrollment_arm_1",
"enrollment_obtdatetime.it__2_days_arm_1", "enrollment_obtdatetime.day_180__30_days_arm_1",
"enrollment_obtdatetime.day_90__14_days_arm_1", "enrollment_obtdatetime.it__7_days_arm_1",
"enrollment_obtdatetime.day_30__7_days_arm_1", "enrollment_obtdatetime.day_365__60_days_arm_1",
"enrollment_obtdatetime.post_vaccine_day_9_arm_1", "enrollment_obtdatetime.day_7__2_days_arm_1",
"enrollment_obtdatetime.day_0_3_days_arm_1", "enrollment_obtdatetime.post_vaccine_day_3_arm_1",
"enrollment_obtdatetime.post_vaccine_day_7_arm_1", "enrollment_obtdatetime.vaccine_day_7_to_1_arm_1"
), .Dim = c(1L, 13L))), row.names = c(1L, 4L, 7L), class = "data.frame")```
假设您的数据名为 sample
。这样做
library(dplyr, warn.conflicts = FALSE)
sample <- sample %>%
rowwise() %>%
mutate(dummy = ifelse(any(between(c_across(17:28), 135, 225)), 'complete', 'incomplte')) %>% ungroup()
#> # A tibble: 3 x 32
#> record_id first_dose second_dose day0 blood_it_plus2 blood_day180
#> <int> <date> <date> <date> <date> <date>
#> 1 2 NA NA 2020-07-06 2020-07-16 2021-01-20
#> 2 3 2021-04-01 2021-04-28 2020-07-08 2020-07-21 NA
#> 3 4 2021-02-02 2021-03-02 2020-07-19 NA NA
#> # ... with 26 more variables: blood_day90 <date>, blood_it_plus7 <date>,
#> # blood_day30 <date>, blood_day365 <date>, postvaxblood.day90 <date>,
#> # blood_day7 <date>, blood_day0 <date>, postvaxblood.day30 <date>,
#> # postvaxblood.day7 <date>, prevaxblood.dayneg7 <date>,
#> # blood_it_plus2_sincedx <dbl>, blood_it_plus7_sincedx <dbl>,
#> # blood_day0_sincedx <dbl>, blood_day7_sincedx <dbl>,
#> # blood_day30_sincedx <dbl>, blood_day90_sincedx <dbl>,
#> # blood_day180_sincedx <dbl>, blood_day365_sincedx <dbl>,
#> # prevaxblood.dayneg7_sincedx <dbl>, postvaxblood.day7_sincedx <dbl>,
#> # postvaxblood.day30_sincedx <dbl>, postvaxblood.day90_sincedx <dbl>,
#> # first_dose_sincedx <dbl>, second_dose_sincedx <dbl>, today <dbl>,
#> # dummy <chr>
由 reprex package (v2.0.0)
于 2021-07-21 创建
我正在尝试查询数据集中的多个列以查看是否存在 135 到 225 之间的数字。
我目前一直以非常手动的方式进行操作,但这是不可持续的,因为我尝试查询的列远不止 2 列。我怎样才能更有效地对此进行编码,以便我可以列出 6 个以上的列名,而不会显得冗长冗长?
sample_dates_test$day180_completion[(sample_dates_test$blood_day180_sincedx <=225 & sample_dates_test$blood_day180_sincedx >=135) | (sample_dates_test$prevaxblood.dayneg7_sincedx <= 225 & sample_dates_test$prevaxblood.dayneg7_sincedx >=135]) <- "Complete"
structure(list(record_id = 2:4, first_dose = structure(c(NA,
18718, 18660), class = "Date"), second_dose = structure(c(NA,
18745, 18688), class = "Date"), day0 = structure(c(18449, 18451,
18462), class = "Date"), blood_it_plus2 = structure(c(18459,
18464, NA), class = "Date"), blood_day180 = structure(c(18647,
NA, NA), class = "Date"), blood_day90 = structure(c(NA, 18551,
NA), class = "Date"), blood_it_plus7 = structure(c(NA, NA, 18470
), class = "Date"), blood_day30 = structure(c(NA_real_, NA_real_,
NA_real_), class = "Date"), blood_day365 = structure(c(NA_real_,
NA_real_, NA_real_), class = "Date"), postvaxblood.day90 = structure(c(NA_real_,
NA_real_, NA_real_), class = "Date"), blood_day7 = structure(c(NA_real_,
NA_real_, NA_real_), class = "Date"), blood_day0 = structure(c(NA_real_,
NA_real_, NA_real_), class = "Date"), postvaxblood.day30 = structure(c(NA_real_,
NA_real_, NA_real_), class = "Date"), postvaxblood.day7 = structure(c(NA_real_,
NA_real_, NA_real_), class = "Date"), prevaxblood.dayneg7 = structure(c(NA_real_,
NA_real_, NA_real_), class = "Date"), blood_it_plus2_sincedx = c(10,
13, NA), blood_it_plus7_sincedx = c(NA, NA, 8), blood_day0_sincedx = c(NA_real_,
NA_real_, NA_real_), blood_day7_sincedx = c(NA_real_, NA_real_,
NA_real_), blood_day30_sincedx = c(NA_real_, NA_real_, NA_real_
), blood_day90_sincedx = c(NA, 100, NA), blood_day180_sincedx = c(198,
NA, NA), blood_day365_sincedx = c(NA_real_, NA_real_, NA_real_
), prevaxblood.dayneg7_sincedx = c(NA_real_, NA_real_, NA_real_
), postvaxblood.day7_sincedx = c(NA_real_, NA_real_, NA_real_
), postvaxblood.day30_sincedx = c(NA_real_, NA_real_, NA_real_
), postvaxblood.day90_sincedx = c(NA_real_, NA_real_, NA_real_
), first_dose_sincedx = c(NA, 267, 198), second_dose_sincedx = c(NA,
294, 226), today = c(378, 376, 365)), reshapeWide = list(v.names = "enrollment_obtdatetime",
timevar = "redcap_event_name", idvar = "record_id", times = c("enrollment_arm_1",
"it__2_days_arm_1", "day_180__30_days_arm_1", "day_90__14_days_arm_1",
"it__7_days_arm_1", "day_30__7_days_arm_1", "day_365__60_days_arm_1",
"post_vaccine_day_9_arm_1", "day_7__2_days_arm_1", "day_0_3_days_arm_1",
"post_vaccine_day_3_arm_1", "post_vaccine_day_7_arm_1", "vaccine_day_7_to_1_arm_1"
), varying = structure(c("enrollment_obtdatetime.enrollment_arm_1",
"enrollment_obtdatetime.it__2_days_arm_1", "enrollment_obtdatetime.day_180__30_days_arm_1",
"enrollment_obtdatetime.day_90__14_days_arm_1", "enrollment_obtdatetime.it__7_days_arm_1",
"enrollment_obtdatetime.day_30__7_days_arm_1", "enrollment_obtdatetime.day_365__60_days_arm_1",
"enrollment_obtdatetime.post_vaccine_day_9_arm_1", "enrollment_obtdatetime.day_7__2_days_arm_1",
"enrollment_obtdatetime.day_0_3_days_arm_1", "enrollment_obtdatetime.post_vaccine_day_3_arm_1",
"enrollment_obtdatetime.post_vaccine_day_7_arm_1", "enrollment_obtdatetime.vaccine_day_7_to_1_arm_1"
), .Dim = c(1L, 13L))), row.names = c(1L, 4L, 7L), class = "data.frame")```
假设您的数据名为 sample
。这样做
library(dplyr, warn.conflicts = FALSE)
sample <- sample %>%
rowwise() %>%
mutate(dummy = ifelse(any(between(c_across(17:28), 135, 225)), 'complete', 'incomplte')) %>% ungroup()
#> # A tibble: 3 x 32
#> record_id first_dose second_dose day0 blood_it_plus2 blood_day180
#> <int> <date> <date> <date> <date> <date>
#> 1 2 NA NA 2020-07-06 2020-07-16 2021-01-20
#> 2 3 2021-04-01 2021-04-28 2020-07-08 2020-07-21 NA
#> 3 4 2021-02-02 2021-03-02 2020-07-19 NA NA
#> # ... with 26 more variables: blood_day90 <date>, blood_it_plus7 <date>,
#> # blood_day30 <date>, blood_day365 <date>, postvaxblood.day90 <date>,
#> # blood_day7 <date>, blood_day0 <date>, postvaxblood.day30 <date>,
#> # postvaxblood.day7 <date>, prevaxblood.dayneg7 <date>,
#> # blood_it_plus2_sincedx <dbl>, blood_it_plus7_sincedx <dbl>,
#> # blood_day0_sincedx <dbl>, blood_day7_sincedx <dbl>,
#> # blood_day30_sincedx <dbl>, blood_day90_sincedx <dbl>,
#> # blood_day180_sincedx <dbl>, blood_day365_sincedx <dbl>,
#> # prevaxblood.dayneg7_sincedx <dbl>, postvaxblood.day7_sincedx <dbl>,
#> # postvaxblood.day30_sincedx <dbl>, postvaxblood.day90_sincedx <dbl>,
#> # first_dose_sincedx <dbl>, second_dose_sincedx <dbl>, today <dbl>,
#> # dummy <chr>
由 reprex package (v2.0.0)
于 2021-07-21 创建