根据其他管道的条件创建变量
Create a variable based on conditions on others with pipes
我刚刚开始使用 dplyr 和管道,我需要帮助来创建新变量 'visit_tag',它应该等于 'visit' 的值,当 [=17] =] 等于 'last_date',并且应该在每个 ID 的每一行中重复。
下面是模拟数据集的代码和我写的代码:
df <- data.frame(ID=c(rep(7,3), rep(8,3), rep(9,3)), visit= c(rep(c(0, 180, 360),3)), date = c(as.Date("2014-04-28"), NA, as.Date("2015-10-13"), as.Date("2013-06-21"), NA, NA, as.Date("2013-06-29"), as.Date("2013-09-12"), as.Date("2015-02-18")), last_date = c(as.Date("2015-10-13"), NA, as.Date("2015-10-13"), as.Date("2013-06-21"), NA, NA, as.Date("2015-02-18"), as.Date("2015-02-18"), as.Date("2015-02-18")) )
df <- df %>%
group_by(ID) %>%
mutate(visit_tag = visit[date==last_date] )
谢谢!
在下文中,我假设每个 ID
,只有一个记录 date == last_date
:
library(dplyr)
df %>%
mutate(visit_tag =
case_when(date == last_date ~ visit,
TRUE ~ NA_real_)) %>%
group_by(ID) %>%
mutate(visit_tag = na.omit(visit_tag))
+ # A tibble: 9 × 5
# Groups: ID [3]
ID visit date last_date visit_tag
<dbl> <dbl> <date> <date> <dbl>
1 7 0 2014-04-28 2015-10-13 360
2 7 180 NA NA 360
3 7 360 2015-10-13 2015-10-13 360
4 8 0 2013-06-21 2013-06-21 0
5 8 180 NA NA 0
6 8 360 NA NA 0
7 9 0 2013-06-29 2015-02-18 360
8 9 180 2013-09-12 2015-02-18 360
9 9 360 2015-02-18 2015-02-18 360
这是另一个使用 mutate
和 fill
的 tidyverse
选项:
library(tidyverse)
df %>%
group_by(ID) %>%
mutate(visit_tag = ifelse(date == last_date, visit, NA)) %>%
fill(visit_tag, .direction ="updown")
输出
ID visit date last_date visit_tag
<dbl> <dbl> <date> <date> <dbl>
1 7 0 2014-04-28 2015-10-13 360
2 7 180 NA NA 360
3 7 360 2015-10-13 2015-10-13 360
4 8 0 2013-06-21 2013-06-21 0
5 8 180 NA NA 0
6 8 360 NA NA 0
7 9 0 2013-06-29 2015-02-18 360
8 9 180 2013-09-12 2015-02-18 360
9 9 360 2015-02-18 2015-02-18 360
或 data.table
的另一个选项:
library(data.table)
dt <- as.data.table(df)
dt[, visit_tag := zoo::na.locf(ifelse(date == last_date, visit, NA)), by = ID]
数据
df <- structure(list(ID = c(7, 7, 7, 8, 8, 8, 9, 9, 9), visit = c(0,
180, 360, 0, 180, 360, 0, 180, 360), date = structure(c(16188,
NA, 16721, 15877, NA, NA, 15885, 15960, 16484), class = "Date"),
last_date = structure(c(16721, NA, 16721, 15877, NA, NA,
16484, 16484, 16484), class = "Date")), class = "data.frame", row.names = c(NA,
-9L))
我刚刚开始使用 dplyr 和管道,我需要帮助来创建新变量 'visit_tag',它应该等于 'visit' 的值,当 [=17] =] 等于 'last_date',并且应该在每个 ID 的每一行中重复。 下面是模拟数据集的代码和我写的代码:
df <- data.frame(ID=c(rep(7,3), rep(8,3), rep(9,3)), visit= c(rep(c(0, 180, 360),3)), date = c(as.Date("2014-04-28"), NA, as.Date("2015-10-13"), as.Date("2013-06-21"), NA, NA, as.Date("2013-06-29"), as.Date("2013-09-12"), as.Date("2015-02-18")), last_date = c(as.Date("2015-10-13"), NA, as.Date("2015-10-13"), as.Date("2013-06-21"), NA, NA, as.Date("2015-02-18"), as.Date("2015-02-18"), as.Date("2015-02-18")) )
df <- df %>%
group_by(ID) %>%
mutate(visit_tag = visit[date==last_date] )
谢谢!
在下文中,我假设每个 ID
,只有一个记录 date == last_date
:
library(dplyr)
df %>%
mutate(visit_tag =
case_when(date == last_date ~ visit,
TRUE ~ NA_real_)) %>%
group_by(ID) %>%
mutate(visit_tag = na.omit(visit_tag))
+ # A tibble: 9 × 5
# Groups: ID [3]
ID visit date last_date visit_tag
<dbl> <dbl> <date> <date> <dbl>
1 7 0 2014-04-28 2015-10-13 360
2 7 180 NA NA 360
3 7 360 2015-10-13 2015-10-13 360
4 8 0 2013-06-21 2013-06-21 0
5 8 180 NA NA 0
6 8 360 NA NA 0
7 9 0 2013-06-29 2015-02-18 360
8 9 180 2013-09-12 2015-02-18 360
9 9 360 2015-02-18 2015-02-18 360
这是另一个使用 mutate
和 fill
的 tidyverse
选项:
library(tidyverse)
df %>%
group_by(ID) %>%
mutate(visit_tag = ifelse(date == last_date, visit, NA)) %>%
fill(visit_tag, .direction ="updown")
输出
ID visit date last_date visit_tag
<dbl> <dbl> <date> <date> <dbl>
1 7 0 2014-04-28 2015-10-13 360
2 7 180 NA NA 360
3 7 360 2015-10-13 2015-10-13 360
4 8 0 2013-06-21 2013-06-21 0
5 8 180 NA NA 0
6 8 360 NA NA 0
7 9 0 2013-06-29 2015-02-18 360
8 9 180 2013-09-12 2015-02-18 360
9 9 360 2015-02-18 2015-02-18 360
或 data.table
的另一个选项:
library(data.table)
dt <- as.data.table(df)
dt[, visit_tag := zoo::na.locf(ifelse(date == last_date, visit, NA)), by = ID]
数据
df <- structure(list(ID = c(7, 7, 7, 8, 8, 8, 9, 9, 9), visit = c(0,
180, 360, 0, 180, 360, 0, 180, 360), date = structure(c(16188,
NA, 16721, 15877, NA, NA, 15885, 15960, 16484), class = "Date"),
last_date = structure(c(16721, NA, 16721, 15877, NA, NA,
16484, 16484, 16484), class = "Date")), class = "data.frame", row.names = c(NA,
-9L))