Kaplan-Meier 生存曲线与手动固定的患者池下降率

Kaplan-Meier survival curve with manually fixed decline rate of patient pool

我创建了一个包含两条 Kaplan-Meier 生存曲线的图形来显示两种药物对患者生存的影响。该数据集包括 41 名患者,其中 26 名 (A1-A26) 接受了口服药物治疗,15 名 (B1-B15) 接受了疫苗。 x 轴显示天数,y 轴显示总体患者池的百分比。我只对研究的 0-400 天进行绘图感兴趣,这意味着 'oral' (A25,A26) 和 'vaccine' (B14,B15) 的两个数据点将不会显示。此外,我想绘制 Kaplan-Meier 曲线,该曲线在患者死亡后下降 1.45 个单位(如数据列 'survival' 所示)。基于此,'oral' 的曲线将在 62.32% 处停止,'vaccine' 的曲线将在 81.16% 处停止(不包括两个数据点,每个数据点都 > 400 天),因此 y 轴将开始60%(而不是 0%)。然而,目前 'oral' 的曲线下降了 26/100 个单位,'vaccine' 的曲线下降了 15/100 个单位,这是基于所有患者都将在试验结束时死亡的假设。因此,我很想知道:

  1. 病人池的递减率是否可以固定在1.45个单位,
  2. 如何显示数据点持续超过 400 天(实际上没有将曲线延伸到那些数据点 > 400 天)和
  3. 我是否正确使用对象'status'(即我给每位患者的状态为 1)。

下面是一个可重现的示例数据集和我目前使用的代码。

所需的软件包: 图书馆(生存), 图书馆(ggplot2)

  1. 加载可重现的数据

    structure(list(patient = structure(c(1L, 12L, 20L, 21L, 22L, 
    23L, 24L, 25L, 26L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 
    13L, 14L, 15L, 16L, 17L, 18L, 19L, 27L, 34L, 35L, 36L, 37L, 38L, 
    39L, 40L, 41L, 28L, 29L, 30L, 31L, 32L, 33L), .Label = c("A1", 
    "A10", "A11", "A12", "A13", "A14", "A15", "A16", "A17", "A18", 
    "A19", "A2", "A20", "A21", "A22", "A23", "A24", "A25", "A26", 
    "A3", "A4", "A5", "A6", "A7", "A8", "A9", "B1", "B10", "B11", 
    "B12", "B13", "B14", "B15", "B2", "B3", "B4", "B5", "B6", "B7", 
    "B8", "B9"), class = "factor"), survival = c(98.55, 97.1, 95.65, 
    94.2, 92.75, 91.3, 89.85, 88.4, 86.95, 85.5, 84.05, 82.6, 81.15, 
    79.7, 78.25, 76.8, 75.35, 73.9, 72.45, 71, 69.55, 68.1, 66.65, 
    65.2, 49.9, 57.97, 98.55, 97.1, 95.65, 94.2, 92.75, 91.3, 89.85, 
    88.4, 86.95, 85.5, 84.05, 82.6, 81.15, 67.6, 72), days = c(103L, 
    105L, 110L, 121L, 124L, 126L, 140L, 144L, 152L, 173L, 176L, 181L, 
    185L, 200L, 206L, 211L, 223L, 247L, 253L, 261L, 276L, 281L, 309L, 
    334L, 402L, 489L, 148L, 216L, 255L, 257L, 280L, 290L, 306L, 325L, 
    305L, 307L, 334L, 329L, 343L, 560L, 610L), treatment = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("oral", "vaccine"
    ), class = "factor"), status = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L)), .Names = c("patient", "survival", "days", "treatment", 
    "status"), class = "data.frame", row.names = c(NA, -41L))
    
  2. 创建 Surv 对象并为您的数据估计幸存者函数

    fit.test <- survfit(Surv(days, status == 1) ~ treatment, data=test, conf.int=FALSE)
    
  3. 运行函数ggsurv

  4. 情节

    ggsurv(fit.test, lty.est = 1) + 
    geom_text(data = NULL, size=5.0, col = "red", x = 39.0, y = 0.23,  label = "oral") +
    geom_text(data = NULL, size=5.0, col = "blue", x = 30.5, y = 0.12, label = "vaccine") +
    
    scale_x_continuous(expand=c(0.01,0.01),
                 limits=c(0,400),
                 breaks=c(0,50,100,150,200,250,300,350,400),
                 labels=c("0","50","100","150","200","250","300","350","400")) +
    scale_y_continuous(expand=c(0.005,0.01),
                 limits=c(0,1.0),   
                 breaks=c(0,0.2,0.4,0.6,0.8,1),
                 labels=c("0","0.2","0.4","0.6","0.8","1.0")) +
    
    xlab("Time") +
    ylab("Survival") + 
    
    theme_bw() +
    theme(legend.position="none") +
    theme(axis.title.x = element_text(vjust=0.1,face="bold", size=16),
    axis.text.x = element_text(vjust=4, size=14))+ 
    theme(axis.title.y = element_text(angle=90, vjust=0.70, face="bold", size=18),
    axis.text.y = element_text(size=14)) +
    theme(panel.grid.minor=element_blank(), panel.grid.major=element_blank()) +
    theme(panel.border = element_rect(size=2, colour = "black", fill=NA, linetype=1)) +
    theme(plot.margin = unit(c(-0.9,0.4,0.28,0.0),"lines"))
    

创建 survival 对象时出现问题。从您呈现数据的方式来看,该组中的所有患者在口服 489 天和接种疫苗 610 天后都经历过该事件。但是您知道这只是数据的一部分,因为您有可用的剩余患者百分比。您可以为该组的最后一天没有经历过该事件的患者添加行,并为他们指定状态 0。或者,您只需使用 geom_step 来创建绘图而不使用 ggsurv 函数。

round(100/1.45)
test <- test[ ,c(1,3:5)]
extra_patients <- 
  data.frame(patient = c(paste('A', 27:69, sep = ''), 
                        paste('B', 16:69, sep = '')),
            days = rep(c(489, 610), c(43, 54)),
            treatment = rep(c('oral', 'vaccine'), c(43, 54)),
            status = 0)
 full_test <- rbind(test, extra_patients)
 library(survival)
 fit.test <- survfit(Surv(days, status == 1) ~ treatment, data=full_test, conf.int=FALSE)
 library(GGally)
 ggsurv(fit.test) + coord_cartesian(xlim = c(0,400))