根据其他管道的条件创建变量

Create a variable based on conditions on others with pipes

我刚刚开始使用 dplyr 和管道,我需要帮助来创建新变量 'visit_tag',它应该等于 'visit' 的值,当 [=17] =] 等于 'last_date',并且应该在每个 ID 的每一行中重复。 下面是模拟数据集的代码和我写的代码:

df <- data.frame(ID=c(rep(7,3), rep(8,3), rep(9,3)), visit= c(rep(c(0, 180, 360),3)), date = c(as.Date("2014-04-28"), NA, as.Date("2015-10-13"), as.Date("2013-06-21"), NA, NA, as.Date("2013-06-29"), as.Date("2013-09-12"), as.Date("2015-02-18")), last_date = c(as.Date("2015-10-13"), NA, as.Date("2015-10-13"), as.Date("2013-06-21"), NA, NA, as.Date("2015-02-18"), as.Date("2015-02-18"), as.Date("2015-02-18")) )


df <- df %>% 
        group_by(ID) %>% 
        mutate(visit_tag = visit[date==last_date] )

谢谢!

在下文中,我假设每个 ID,只有一个记录 date == last_date:

library(dplyr)

df %>% 
  mutate(visit_tag =
           case_when(date == last_date ~ visit,
                     TRUE  ~ NA_real_)) %>%
  group_by(ID) %>%
  mutate(visit_tag = na.omit(visit_tag))


+ # A tibble: 9 × 5
# Groups:   ID [3]
     ID visit date       last_date  visit_tag
  <dbl> <dbl> <date>     <date>         <dbl>
1     7     0 2014-04-28 2015-10-13       360
2     7   180 NA         NA               360
3     7   360 2015-10-13 2015-10-13       360
4     8     0 2013-06-21 2013-06-21         0
5     8   180 NA         NA                 0
6     8   360 NA         NA                 0
7     9     0 2013-06-29 2015-02-18       360
8     9   180 2013-09-12 2015-02-18       360
9     9   360 2015-02-18 2015-02-18       360

这是另一个使用 mutatefilltidyverse 选项:

library(tidyverse)

df %>% 
  group_by(ID) %>% 
  mutate(visit_tag = ifelse(date == last_date, visit, NA)) %>% 
  fill(visit_tag, .direction ="updown")

输出

     ID visit date       last_date  visit_tag
  <dbl> <dbl> <date>     <date>         <dbl>
1     7     0 2014-04-28 2015-10-13       360
2     7   180 NA         NA               360
3     7   360 2015-10-13 2015-10-13       360
4     8     0 2013-06-21 2013-06-21         0
5     8   180 NA         NA                 0
6     8   360 NA         NA                 0
7     9     0 2013-06-29 2015-02-18       360
8     9   180 2013-09-12 2015-02-18       360
9     9   360 2015-02-18 2015-02-18       360

data.table 的另一个选项:

library(data.table)
dt <- as.data.table(df)

dt[, visit_tag := zoo::na.locf(ifelse(date == last_date, visit, NA)), by = ID]

数据

df <- structure(list(ID = c(7, 7, 7, 8, 8, 8, 9, 9, 9), visit = c(0, 
180, 360, 0, 180, 360, 0, 180, 360), date = structure(c(16188, 
NA, 16721, 15877, NA, NA, 15885, 15960, 16484), class = "Date"), 
    last_date = structure(c(16721, NA, 16721, 15877, NA, NA, 
    16484, 16484, 16484), class = "Date")), class = "data.frame", row.names = c(NA, 
-9L))