确定以 R 中的日期类型为条件的重叠日期间隔
Determine overlapping date intervals conditional on date type in R
我有一个简单的数据集,其中包含医疗机构的“入院日期”和“出院日期”,以及每位患者的日期类型(住院、门诊和感染期)。我需要确定一名患者是否与另一名患者的传染期重叠。我通常可以使用 lubridate
包的 interval
和 int_overlaps
函数来执行此操作。 我的具体问题是有多个感染期不重叠。
我正在使用 R.Code 重现示例数据,如下图。
如果每次访问都在传染期的间隔内,我想用逻辑标记 T/F。下图可能有助于可视化这些数据。红色矩形是住院,红色圆圈是门诊。紫色是患者住院期间的传染期。仅应标记与紫色间隔重叠的 inpatient/outpatient 次访问(即,使用合乎逻辑的 TRUE
或 FALSE
)。理想情况下,不会标记导致传染期的患者(即 K00005
的长期住院会 return F
,但如果这会导致并发症,我可以解决这个问题。
我试过:
library(tidyverse); library(lubridate);
test <- have %>% mutate(Int=interval(datein, dateout),
overlaps=map(seq_along(Int), function(x){
y=setdiff(seq_along(Int),x)
return(any(int_overlaps(Int[x],Int[y])))
}))
我觉得我很接近,但这似乎要检查每个间隔与所有间隔,而不仅仅是传染期,所以它们都会回来 TRUE
。有没有办法让它只检查传染期的时间间隔(通过修改以上代码或使用新代码)?
我搜索了 SO 并阅读了一些处理类似问题的 questions/responses,但其中 none 正在解决这个特定问题。任何帮助将不胜感激!
library(tidyverse); library(lubridate);
have <- structure(list(id = c("K00005", "K52253", "K32022", "K20113",
"K52253", "K00164", "K00164", "K10003", "K00347", "K00046", "K52253",
"K00198", "K32022", "K00198", "K00685", "K00685", "K18122", "K00198",
"K00347", "K00198", "K00198", "K32022", "K52135", "K34060", "K00164",
"K04048", "K00135", "K32022", "K00685", "K00198", "K52253", "K30008",
"K32022", "K32022", "K00347", "K00164", "K00135", "K00137", "K32022",
"K32022", "K52253", "K00005", "K00046", "K00137"),
datetype = c("Inpatient", "Outpatient", "Inpatient", "Outpatient", "Outpatient", "Outpatient",
"Outpatient", "Outpatient", "Outpatient", "Inpatient", "Outpatient",
"Inpatient", "Outpatient", "Outpatient", "Outpatient", "Outpatient",
"Outpatient", "Outpatient", "Outpatient", "Outpatient", "Outpatient",
"Outpatient", "Outpatient", "Outpatient", "Outpatient", "Outpatient",
"Outpatient", "Outpatient", "Outpatient", "Inpatient", "Outpatient",
"Outpatient", "Outpatient", "Outpatient", "Outpatient", "Inpatient",
"Inpatient", "Inpatient", "Outpatient", "Outpatient", "Outpatient",
"Infectious Period", "Infectious Period", "Infectious Period"),
datein = structure(c(17542, 17544, 17556, 17559, 17586, 17602,
17608, 17623, 17626, 17626, 17641, 17642, 17651, 17657, 17659,
17661, 17664, 17668, 17668, 17675, 17675, 17676, 17681, 17685,
17699, 17703, 17712, 17712, 17713, 17719, 17721, 17739, 17739,
17742, 17745, 17746, 17755, 17760, 17768, 17768, 17788, 17542,
17626, 17760), class = "Date"),
dateout = structure(c(17733, 17544, 17560, 17559, 17586, 17602, 17608, 17623, 17626, 17638,
17641, 17655, 17651, 17657, 17659, 17661, 17664, 17668, 17668,
17675, 17675, 17676, 17681, 17685, 17699, 17703, 17712, 17712,
17713, 17795, 17721, 17739, 17739, 17742, 17745, 17753, 17762,
17794, 17768, 17768, 17788, 17564, 17638, 17777), class = "Date"),
color = c("#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39",
"#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39",
"#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39",
"#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39",
"#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39",
"#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39",
"#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39",
"purple", "purple", "purple")),
row.names = c(NA, -44L), class = c("tbl_df", "tbl", "data.frame"))
require(vistime)
gg_vistime(have,
col.group="id",
col.event="datetype",
col.start="datein",
col.end="dateout",
col.color = "color",
show_labels = FALSE,
optimize_y = TRUE,
# linewidth = 5,
title = "Figure for Stack Overflow Question")
这是使用非常基本的 for 循环的 freshcode Base R 方法(无库)。如果患者在有感染时入住 (started_during
),或在有感染时离开 (ended_during
),或在感染期开始和结束时住院 (in_during
)它应该将重叠标记为 TRUE。
infectious_periods <- have[which(have$datetype=="Infectious Period"),]
have$overlap <- FALSE # initializes a new column
for(i in 1:nrow(have)){
if(have$datetype[i] != "Infectious Period"){
started_during <- any(have$datein[i] >= infectious_periods$datein &
have$datein[i] <= infectious_periods$dateout)
ended_during <- any(have$dateout[i] >= infectious_periods$datein &
have$dateout[i] <= infectious_periods$dateout)
in_during <- any(have$datein[i] >= infectious_periods$datein &
have$dateout[i] <= infectious_periods$dateout)
if(started_during | ended_during | in_during){
have$overlap[i] <- TRUE
}
}
}
have$overlap
# A tibble: 44 x 6
# id datetype datein dateout color overlap
# <chr> <chr> <date> <date> <chr> <lgl>
# 1 K00005 Inpatient 2018-01-11 2018-07-21 #DD4B39 TRUE
# 2 K52253 Outpatient 2018-01-13 2018-01-13 #DD4B39 TRUE
# 3 K32022 Inpatient 2018-01-25 2018-01-29 #DD4B39 TRUE
# 4 K20113 Outpatient 2018-01-28 2018-01-28 #DD4B39 TRUE
# 5 K52253 Outpatient 2018-02-24 2018-02-24 #DD4B39 FALSE
# 6 K00164 Outpatient 2018-03-12 2018-03-12 #DD4B39 FALSE
# 7 K00164 Outpatient 2018-03-18 2018-03-18 #DD4B39 FALSE
# 8 K10003 Outpatient 2018-04-02 2018-04-02 #DD4B39 FALSE
# 9 K00347 Outpatient 2018-04-05 2018-04-05 #DD4B39 TRUE
#10 K00046 Inpatient 2018-04-05 2018-04-17 #DD4B39 TRUE
# ... with 34 more rows
如果这不能解决您的需求,请告诉我。肯定还有更多的事情可以做,比如计算患者重叠的其他感染期,但这应该让你开始。
图片代码
library(ggplot2)
have$size <- ifelse(have$overlap,2,1)
ggplot(have, aes(datein,datetype,col=datetype,shape=datetype,cex = size)) + geom_point() +
facet_grid(rows = vars(id),switch = "y") +
geom_vline(xintercept=infectious_periods$datein) +
geom_vline(xintercept=infectious_periods$dateout) +
theme(strip.text.y.left = element_text(angle = 0)) +
geom_linerange(aes(xmin = datein, xmax = dateout), color = have$color,size = 2)
我有一个简单的数据集,其中包含医疗机构的“入院日期”和“出院日期”,以及每位患者的日期类型(住院、门诊和感染期)。我需要确定一名患者是否与另一名患者的传染期重叠。我通常可以使用 lubridate
包的 interval
和 int_overlaps
函数来执行此操作。 我的具体问题是有多个感染期不重叠。
我正在使用 R.Code 重现示例数据,如下图。
如果每次访问都在传染期的间隔内,我想用逻辑标记 T/F。下图可能有助于可视化这些数据。红色矩形是住院,红色圆圈是门诊。紫色是患者住院期间的传染期。仅应标记与紫色间隔重叠的 inpatient/outpatient 次访问(即,使用合乎逻辑的 TRUE
或 FALSE
)。理想情况下,不会标记导致传染期的患者(即 K00005
的长期住院会 return F
,但如果这会导致并发症,我可以解决这个问题。
我试过:
library(tidyverse); library(lubridate);
test <- have %>% mutate(Int=interval(datein, dateout),
overlaps=map(seq_along(Int), function(x){
y=setdiff(seq_along(Int),x)
return(any(int_overlaps(Int[x],Int[y])))
}))
我觉得我很接近,但这似乎要检查每个间隔与所有间隔,而不仅仅是传染期,所以它们都会回来 TRUE
。有没有办法让它只检查传染期的时间间隔(通过修改以上代码或使用新代码)?
我搜索了 SO 并阅读了一些处理类似问题的 questions/responses,但其中 none 正在解决这个特定问题。任何帮助将不胜感激!
library(tidyverse); library(lubridate);
have <- structure(list(id = c("K00005", "K52253", "K32022", "K20113",
"K52253", "K00164", "K00164", "K10003", "K00347", "K00046", "K52253",
"K00198", "K32022", "K00198", "K00685", "K00685", "K18122", "K00198",
"K00347", "K00198", "K00198", "K32022", "K52135", "K34060", "K00164",
"K04048", "K00135", "K32022", "K00685", "K00198", "K52253", "K30008",
"K32022", "K32022", "K00347", "K00164", "K00135", "K00137", "K32022",
"K32022", "K52253", "K00005", "K00046", "K00137"),
datetype = c("Inpatient", "Outpatient", "Inpatient", "Outpatient", "Outpatient", "Outpatient",
"Outpatient", "Outpatient", "Outpatient", "Inpatient", "Outpatient",
"Inpatient", "Outpatient", "Outpatient", "Outpatient", "Outpatient",
"Outpatient", "Outpatient", "Outpatient", "Outpatient", "Outpatient",
"Outpatient", "Outpatient", "Outpatient", "Outpatient", "Outpatient",
"Outpatient", "Outpatient", "Outpatient", "Inpatient", "Outpatient",
"Outpatient", "Outpatient", "Outpatient", "Outpatient", "Inpatient",
"Inpatient", "Inpatient", "Outpatient", "Outpatient", "Outpatient",
"Infectious Period", "Infectious Period", "Infectious Period"),
datein = structure(c(17542, 17544, 17556, 17559, 17586, 17602,
17608, 17623, 17626, 17626, 17641, 17642, 17651, 17657, 17659,
17661, 17664, 17668, 17668, 17675, 17675, 17676, 17681, 17685,
17699, 17703, 17712, 17712, 17713, 17719, 17721, 17739, 17739,
17742, 17745, 17746, 17755, 17760, 17768, 17768, 17788, 17542,
17626, 17760), class = "Date"),
dateout = structure(c(17733, 17544, 17560, 17559, 17586, 17602, 17608, 17623, 17626, 17638,
17641, 17655, 17651, 17657, 17659, 17661, 17664, 17668, 17668,
17675, 17675, 17676, 17681, 17685, 17699, 17703, 17712, 17712,
17713, 17795, 17721, 17739, 17739, 17742, 17745, 17753, 17762,
17794, 17768, 17768, 17788, 17564, 17638, 17777), class = "Date"),
color = c("#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39",
"#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39",
"#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39",
"#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39",
"#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39",
"#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39",
"#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39",
"purple", "purple", "purple")),
row.names = c(NA, -44L), class = c("tbl_df", "tbl", "data.frame"))
require(vistime)
gg_vistime(have,
col.group="id",
col.event="datetype",
col.start="datein",
col.end="dateout",
col.color = "color",
show_labels = FALSE,
optimize_y = TRUE,
# linewidth = 5,
title = "Figure for Stack Overflow Question")
这是使用非常基本的 for 循环的 freshcode Base R 方法(无库)。如果患者在有感染时入住 (started_during
),或在有感染时离开 (ended_during
),或在感染期开始和结束时住院 (in_during
)它应该将重叠标记为 TRUE。
infectious_periods <- have[which(have$datetype=="Infectious Period"),]
have$overlap <- FALSE # initializes a new column
for(i in 1:nrow(have)){
if(have$datetype[i] != "Infectious Period"){
started_during <- any(have$datein[i] >= infectious_periods$datein &
have$datein[i] <= infectious_periods$dateout)
ended_during <- any(have$dateout[i] >= infectious_periods$datein &
have$dateout[i] <= infectious_periods$dateout)
in_during <- any(have$datein[i] >= infectious_periods$datein &
have$dateout[i] <= infectious_periods$dateout)
if(started_during | ended_during | in_during){
have$overlap[i] <- TRUE
}
}
}
have$overlap
# A tibble: 44 x 6
# id datetype datein dateout color overlap
# <chr> <chr> <date> <date> <chr> <lgl>
# 1 K00005 Inpatient 2018-01-11 2018-07-21 #DD4B39 TRUE
# 2 K52253 Outpatient 2018-01-13 2018-01-13 #DD4B39 TRUE
# 3 K32022 Inpatient 2018-01-25 2018-01-29 #DD4B39 TRUE
# 4 K20113 Outpatient 2018-01-28 2018-01-28 #DD4B39 TRUE
# 5 K52253 Outpatient 2018-02-24 2018-02-24 #DD4B39 FALSE
# 6 K00164 Outpatient 2018-03-12 2018-03-12 #DD4B39 FALSE
# 7 K00164 Outpatient 2018-03-18 2018-03-18 #DD4B39 FALSE
# 8 K10003 Outpatient 2018-04-02 2018-04-02 #DD4B39 FALSE
# 9 K00347 Outpatient 2018-04-05 2018-04-05 #DD4B39 TRUE
#10 K00046 Inpatient 2018-04-05 2018-04-17 #DD4B39 TRUE
# ... with 34 more rows
如果这不能解决您的需求,请告诉我。肯定还有更多的事情可以做,比如计算患者重叠的其他感染期,但这应该让你开始。
图片代码
library(ggplot2)
have$size <- ifelse(have$overlap,2,1)
ggplot(have, aes(datein,datetype,col=datetype,shape=datetype,cex = size)) + geom_point() +
facet_grid(rows = vars(id),switch = "y") +
geom_vline(xintercept=infectious_periods$datein) +
geom_vline(xintercept=infectious_periods$dateout) +
theme(strip.text.y.left = element_text(angle = 0)) +
geom_linerange(aes(xmin = datein, xmax = dateout), color = have$color,size = 2)