使用 R 从数据框中排除基于条件的某些行
excluding some rows based on condition from a data frame using R
我有一些这样的数据
structure(list(id = c(1, 1, 2, 3, 4, 4, 5), deathdate = c("2007/04/10",
"2007/04/10", "2004/04/01", "NA", "NA", "2018/01/01", "2016/01/02"
), admidate = c("2007/03/08", "2007/04/11", "2004/04/15", "2012/10/20",
"2017/10/14", "2018/01/02", "2015/12/20")), class = "data.frame", row.names = c(NA,
-7L))
并且我希望从新的 df 中删除死亡日期小于 admidate 的行,例如这个
structure(list(id2 = c(1, 3, 4, 5), deathdate2 = c("2007/04/10",
"NA", "NA", "2016/01/02"), admidate2 = c("2007/03/08", "2012/10/20",
"2017/10/14", "2015/12/20")), class = "data.frame", row.names = c(NA,
-4L))
我试过了
deathbefore <- with(df,(!is.na(deathdate))& !is.na(admidate)& deathdate < admidate)
df2 <- df[-deathbefore,]
但是,并没有解决问题。
您可以使用 lubridate
和 difftime
:
library(lubridate)
library(dplyr)
df %>%
mutate(diff = difftime(ymd(admidate),ymd(deathdate)) > 0) %>%
filter(!diff == "TRUE"|deathdate == "NA") %>%
select(-diff)
id deathdate admidate
1 1 2007/04/10 2007/03/08
2 3 NA 2012/10/20
3 4 NA 2017/10/14
4 5 2016/01/02 2015/12/20
(警告可忽略)
编辑:
您的解决方案离工作不远了,只需添加 which
:
deathbefore <- with(df,(!is.na(deathdate))& !is.na(admidate) & deathdate < admidate)
df2 <- df[-which(deathbefore),]
数据:
df <- structure(list(id = c(1, 1, 2, 3, 4, 4, 5), deathdate = c("2007/04/10",
"2007/04/10", "2004/04/01", "NA", "NA", "2018/01/01", "2016/01/02"
), admidate = c("2007/03/08", "2007/04/11", "2004/04/15", "2012/10/20",
"2017/10/14", "2018/01/02", "2015/12/20")), class = "data.frame", row.names = c(NA,
-7L))
将日期更改为日期对象和 select 行,其中 deathdate > admidate
或具有 NA
值。
library(dplyr)
df %>%
mutate(across(contains('date'), na_if, "NA"),
across(contains('date'), lubridate::ymd)) %>%
filter(deathdate > admidate | is.na(deathdate) | is.na(admidate))
# id deathdate admidate
#1 1 2007-04-10 2007-03-08
#2 3 <NA> 2012-10-20
#3 4 <NA> 2017-10-14
#4 5 2016-01-02 2015-12-20
我有一些这样的数据
structure(list(id = c(1, 1, 2, 3, 4, 4, 5), deathdate = c("2007/04/10",
"2007/04/10", "2004/04/01", "NA", "NA", "2018/01/01", "2016/01/02"
), admidate = c("2007/03/08", "2007/04/11", "2004/04/15", "2012/10/20",
"2017/10/14", "2018/01/02", "2015/12/20")), class = "data.frame", row.names = c(NA,
-7L))
并且我希望从新的 df 中删除死亡日期小于 admidate 的行,例如这个
structure(list(id2 = c(1, 3, 4, 5), deathdate2 = c("2007/04/10",
"NA", "NA", "2016/01/02"), admidate2 = c("2007/03/08", "2012/10/20",
"2017/10/14", "2015/12/20")), class = "data.frame", row.names = c(NA,
-4L))
我试过了
deathbefore <- with(df,(!is.na(deathdate))& !is.na(admidate)& deathdate < admidate)
df2 <- df[-deathbefore,]
但是,并没有解决问题。
您可以使用 lubridate
和 difftime
:
library(lubridate)
library(dplyr)
df %>%
mutate(diff = difftime(ymd(admidate),ymd(deathdate)) > 0) %>%
filter(!diff == "TRUE"|deathdate == "NA") %>%
select(-diff)
id deathdate admidate
1 1 2007/04/10 2007/03/08
2 3 NA 2012/10/20
3 4 NA 2017/10/14
4 5 2016/01/02 2015/12/20
(警告可忽略)
编辑:
您的解决方案离工作不远了,只需添加 which
:
deathbefore <- with(df,(!is.na(deathdate))& !is.na(admidate) & deathdate < admidate)
df2 <- df[-which(deathbefore),]
数据:
df <- structure(list(id = c(1, 1, 2, 3, 4, 4, 5), deathdate = c("2007/04/10",
"2007/04/10", "2004/04/01", "NA", "NA", "2018/01/01", "2016/01/02"
), admidate = c("2007/03/08", "2007/04/11", "2004/04/15", "2012/10/20",
"2017/10/14", "2018/01/02", "2015/12/20")), class = "data.frame", row.names = c(NA,
-7L))
将日期更改为日期对象和 select 行,其中 deathdate > admidate
或具有 NA
值。
library(dplyr)
df %>%
mutate(across(contains('date'), na_if, "NA"),
across(contains('date'), lubridate::ymd)) %>%
filter(deathdate > admidate | is.na(deathdate) | is.na(admidate))
# id deathdate admidate
#1 1 2007-04-10 2007-03-08
#2 3 <NA> 2012-10-20
#3 4 <NA> 2017-10-14
#4 5 2016-01-02 2015-12-20