确定以 R 中的日期类型为条件的重叠日期间隔

Determine overlapping date intervals conditional on date type in R

我有一个简单的数据集,其中包含医疗机构的“入院日期”和“出院日期”,以及每位患者的日期类型(住院、门诊和感染期)。我需要确定一名患者是否与另一名患者的传染期重叠。我通常可以使用 lubridate 包的 intervalint_overlaps 函数来执行此操作。 我的具体问题是有多个感染期重叠。

我正在使用 R.Code 重现示例数据,如下图。

如果每次访问都在传染期的间隔内,我想用逻辑标记 T/F。下图可能有助于可视化这些数据。红色矩形是住院,红色圆圈是门诊。紫色是患者住院期间的传染期。仅应标记与紫色间隔重叠的 inpatient/outpatient 次访问(即,使用合乎逻辑的 TRUEFALSE)。理想情况下,不会标记导致传染期的患者(即 K00005 的长期住院会 return F,但如果这会导致并发症,我可以解决这个问题。

我试过:

library(tidyverse); library(lubridate);

test <- have %>% mutate(Int=interval(datein, dateout),
                        overlaps=map(seq_along(Int), function(x){
                                      y=setdiff(seq_along(Int),x)
                                      return(any(int_overlaps(Int[x],Int[y])))
                                      }))

我觉得我很接近,但这似乎要检查每个间隔与所有间隔,而不仅仅是传染期,所以它们都会回来 TRUE。有没有办法让它只检查传染期的时间间隔(通过修改以上代码或使用新代码)?

我搜索了 SO 并阅读了一些处理类似问题的 questions/responses,但其中 none 正在解决这个特定问题。任何帮助将不胜感激!

library(tidyverse); library(lubridate);

have <- structure(list(id = c("K00005", "K52253", "K32022", "K20113", 
                              "K52253", "K00164", "K00164", "K10003", "K00347", "K00046", "K52253", 
                              "K00198", "K32022", "K00198", "K00685", "K00685", "K18122", "K00198", 
                              "K00347", "K00198", "K00198", "K32022", "K52135", "K34060", "K00164", 
                              "K04048", "K00135", "K32022", "K00685", "K00198", "K52253", "K30008", 
                              "K32022", "K32022", "K00347", "K00164", "K00135", "K00137", "K32022", 
                              "K32022", "K52253", "K00005", "K00046", "K00137"), 
                       datetype = c("Inpatient", "Outpatient", "Inpatient", "Outpatient", "Outpatient", "Outpatient", 
                                     "Outpatient", "Outpatient", "Outpatient", "Inpatient", "Outpatient", 
                                     "Inpatient", "Outpatient", "Outpatient", "Outpatient", "Outpatient", 
                                     "Outpatient", "Outpatient", "Outpatient", "Outpatient", "Outpatient", 
                                     "Outpatient", "Outpatient", "Outpatient", "Outpatient", "Outpatient", 
                                     "Outpatient", "Outpatient", "Outpatient", "Inpatient", "Outpatient", 
                                     "Outpatient", "Outpatient", "Outpatient", "Outpatient", "Inpatient", 
                                     "Inpatient", "Inpatient", "Outpatient", "Outpatient", "Outpatient", 
                                     "Infectious Period", "Infectious Period", "Infectious Period"), 
                       datein = structure(c(17542, 17544, 17556, 17559, 17586, 17602, 
                                             17608, 17623, 17626, 17626, 17641, 17642, 17651, 17657, 17659, 
                                             17661, 17664, 17668, 17668, 17675, 17675, 17676, 17681, 17685, 
                                             17699, 17703, 17712, 17712, 17713, 17719, 17721, 17739, 17739, 
                                             17742, 17745, 17746, 17755, 17760, 17768, 17768, 17788, 17542, 
                                             17626, 17760), class = "Date"), 
                       dateout = structure(c(17733, 17544, 17560, 17559, 17586, 17602, 17608, 17623, 17626, 17638, 
                                             17641, 17655, 17651, 17657, 17659, 17661, 17664, 17668, 17668, 
                                             17675, 17675, 17676, 17681, 17685, 17699, 17703, 17712, 17712, 
                                             17713, 17795, 17721, 17739, 17739, 17742, 17745, 17753, 17762, 
                                             17794, 17768, 17768, 17788, 17564, 17638, 17777), class = "Date"), 
                       color = c("#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", 
                                 "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", 
                                 "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", 
                                 "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", 
                                 "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", 
                                 "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", 
                                 "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", "#DD4B39", 
                                 "purple", "purple", "purple")), 
                  row.names = c(NA, -44L), class = c("tbl_df", "tbl", "data.frame"))

                      

require(vistime)
gg_vistime(have, 
           col.group="id", 
           col.event="datetype", 
           col.start="datein", 
           col.end="dateout", 
           col.color = "color", 
           show_labels = FALSE, 
           optimize_y = TRUE,
           #           linewidth = 5,
           title = "Figure for Stack Overflow Question")

这是使用非常基本的 for 循环的 freshcode Base R 方法(无库)。如果患者在有感染时入住 (started_during),或在有感染时离开 (ended_during),或在感染期开始和结束时住院 (in_during)它应该将重叠标记为 TRUE。

infectious_periods <- have[which(have$datetype=="Infectious Period"),]
have$overlap <- FALSE # initializes a new column

for(i in 1:nrow(have)){
  if(have$datetype[i] != "Infectious Period"){
    started_during <- any(have$datein[i] >= infectious_periods$datein & 
                            have$datein[i] <= infectious_periods$dateout)
    ended_during <- any(have$dateout[i] >= infectious_periods$datein & 
                          have$dateout[i] <= infectious_periods$dateout)
    in_during <- any(have$datein[i] >= infectious_periods$datein & 
                       have$dateout[i] <= infectious_periods$dateout)
    if(started_during | ended_during | in_during){
        have$overlap[i] <- TRUE
      }
  }
}
have$overlap
# A tibble: 44 x 6
#   id     datetype   datein     dateout    color   overlap
#   <chr>  <chr>      <date>     <date>     <chr>   <lgl>  
# 1 K00005 Inpatient  2018-01-11 2018-07-21 #DD4B39 TRUE   
# 2 K52253 Outpatient 2018-01-13 2018-01-13 #DD4B39 TRUE   
# 3 K32022 Inpatient  2018-01-25 2018-01-29 #DD4B39 TRUE   
# 4 K20113 Outpatient 2018-01-28 2018-01-28 #DD4B39 TRUE   
# 5 K52253 Outpatient 2018-02-24 2018-02-24 #DD4B39 FALSE  
# 6 K00164 Outpatient 2018-03-12 2018-03-12 #DD4B39 FALSE  
# 7 K00164 Outpatient 2018-03-18 2018-03-18 #DD4B39 FALSE  
# 8 K10003 Outpatient 2018-04-02 2018-04-02 #DD4B39 FALSE  
# 9 K00347 Outpatient 2018-04-05 2018-04-05 #DD4B39 TRUE   
#10 K00046 Inpatient  2018-04-05 2018-04-17 #DD4B39 TRUE  
# ... with 34 more rows

如果这不能解决您的需求,请告诉我。肯定还有更多的事情可以做,比如计算患者重叠的其他感染期,但这应该让你开始。

图片代码

library(ggplot2)
have$size <- ifelse(have$overlap,2,1)
ggplot(have, aes(datein,datetype,col=datetype,shape=datetype,cex = size)) + geom_point() + 
  facet_grid(rows = vars(id),switch = "y") + 
  geom_vline(xintercept=infectious_periods$datein) + 
  geom_vline(xintercept=infectious_periods$dateout) +  
  theme(strip.text.y.left = element_text(angle = 0)) +
  geom_linerange(aes(xmin = datein, xmax = dateout), color = have$color,size = 2)