R 从 SurvFit 中提取数据
R Extract Data From SurvFit
library(survival)
etime <- with(mgus2, ifelse(pstat==0, futime, ptime))
event <- with(mgus2, ifelse(pstat==0, 2*death, 1))
event <- factor(event, 0:2, labels=c("censor", "pcm", "death"))
mfit2 <- survfit(Surv(etime, event) ~ sex, data=mgus2)
plot(mfit2, col=c(1,2,1,2), lty=c(2,2,1,1),
mark.time=FALSE, lwd=2, xscale=12,
xlab="Years post diagnosis", ylab="Probability in State")
legend(240, .6, c("death:female", "death:male", "pcm:female", "pcm:male"),
col=c(1,2,1,2), lty=c(1,1,2,2), lwd=2, bty='n')
这是一个可重现的例子。我想知道,如何才能从 'mfit2' 中取出这些数据以便在 ggplot2 中绘制?
您可以使用 lapply
从拟合对象的摘要中提取数据
sfit <- summary(mfit2)
str(sfit)
List of 24
$ n : int [1:2] 631 753
$ time : num [1:359] 1 2 3 4 5 6 7 8 9 10 ...
$ n.risk : int [1:359, 1:3] 631 610 599 595 588 587 581 580 573 569 ...
$ n.event : int [1:359, 1:3] 0 0 0 0 0 0 0 0 0 0 ...
$ n.censor : num [1:359] 1 0 0 0 0 0 0 0 0 1 ...
$ pstate : num [1:359, 1:3] 0.968 0.951 0.944 0.933 0.932 ...
$ p0 : num [1:2, 1:3] 1 1 0 0 0 0
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:2] "sex=F" "sex=M"
.. ..$ : chr [1:3] "(s0)" "pcm" "death"
$ strata : Factor w/ 2 levels "sex=F","sex=M": 1 1 1 1 1 1 1 1 1 1 ...
...
我认为您需要的列是 time
、pstate
和 `strata。但是其他一些,例如处于危险中的数字可能会有用。
cols <- lapply(c(2:6, 8, 16, 17), function(x) sfit[x])
然后用do.call
将这些列组合成一个数据框
data <- do.call(data.frame, cols)
str(data)
'data.frame': 359 obs. of 21 variables:
$ time : num 1 2 3 4 5 6 7 8 9 10 ...
$ n.risk.1 : int 631 610 599 595 588 587 581 580 573 569 ...
$ n.risk.2 : int 0 0 0 0 0 0 0 0 0 0 ...
$ n.risk.3 : int 0 0 0 0 0 0 0 0 0 0 ...
$ n.event.1: int 0 0 0 0 0 0 0 0 0 0 ...
$ n.event.2: int 0 2 0 1 0 1 0 0 2 1 ...
$ n.event.3: int 20 9 4 6 1 5 1 7 2 2 ...
$ n.censor : num 1 0 0 0 0 0 0 0 0 1 ...
$ pstate.1 : num 0.968 0.951 0.944 0.933 0.932 ...
$ pstate.2 : num 0 0.00317 0.00317 0.00476 0.00476 ...
$ pstate.3 : num 0.0317 0.046 0.0523 0.0619 0.0634 ...
$ strata : Factor w/ 2 levels "sex=F","sex=M": 1 1 1 1 1 1 1 1 1 1 ...
$ lower.1 : num 0.955 0.934 0.927 0.914 0.912 ...
$ lower.2 : num NA 0.000796 0.000796 0.00154 0.00154 ...
$ lower.3 : num 0.0206 0.0322 0.0375 0.0456 0.047 ...
$ upper.1 : num 0.982 0.968 0.963 0.953 0.952 ...
$ upper.2 : num NA 0.0127 0.0127 0.0147 0.0147 ...
$ upper.3 : num 0.0488 0.0656 0.0729 0.0838 0.0856 ...
此数据为宽格式,最好重新整形为图表的长格式。
mgus3 <- data %>%
pivot_longer(cols=-c(time, strata, n.censor),
names_to=c(".value","state"),
names_pattern="(.+).(.+)") %>%
filter(state!=1) %>% # Exclude the censored state
mutate(state=factor(state, labels=c("pcm","death")),
group=interaction(strata, state))
然后绘制它。
library(ggplot)
mgus3 %>%
ggplot(aes(x=time, y=pstate, col=group)) +
geom_line(aes(linetype=group)) +
ylab("Probability in State") +
theme_bw()
您应该可以添加置信带并使其更漂亮。
library(survival)
etime <- with(mgus2, ifelse(pstat==0, futime, ptime))
event <- with(mgus2, ifelse(pstat==0, 2*death, 1))
event <- factor(event, 0:2, labels=c("censor", "pcm", "death"))
mfit2 <- survfit(Surv(etime, event) ~ sex, data=mgus2)
plot(mfit2, col=c(1,2,1,2), lty=c(2,2,1,1),
mark.time=FALSE, lwd=2, xscale=12,
xlab="Years post diagnosis", ylab="Probability in State")
legend(240, .6, c("death:female", "death:male", "pcm:female", "pcm:male"),
col=c(1,2,1,2), lty=c(1,1,2,2), lwd=2, bty='n')
这是一个可重现的例子。我想知道,如何才能从 'mfit2' 中取出这些数据以便在 ggplot2 中绘制?
您可以使用 lapply
sfit <- summary(mfit2)
str(sfit)
List of 24
$ n : int [1:2] 631 753
$ time : num [1:359] 1 2 3 4 5 6 7 8 9 10 ...
$ n.risk : int [1:359, 1:3] 631 610 599 595 588 587 581 580 573 569 ...
$ n.event : int [1:359, 1:3] 0 0 0 0 0 0 0 0 0 0 ...
$ n.censor : num [1:359] 1 0 0 0 0 0 0 0 0 1 ...
$ pstate : num [1:359, 1:3] 0.968 0.951 0.944 0.933 0.932 ...
$ p0 : num [1:2, 1:3] 1 1 0 0 0 0
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:2] "sex=F" "sex=M"
.. ..$ : chr [1:3] "(s0)" "pcm" "death"
$ strata : Factor w/ 2 levels "sex=F","sex=M": 1 1 1 1 1 1 1 1 1 1 ...
...
我认为您需要的列是 time
、pstate
和 `strata。但是其他一些,例如处于危险中的数字可能会有用。
cols <- lapply(c(2:6, 8, 16, 17), function(x) sfit[x])
然后用do.call
data <- do.call(data.frame, cols)
str(data)
'data.frame': 359 obs. of 21 variables:
$ time : num 1 2 3 4 5 6 7 8 9 10 ...
$ n.risk.1 : int 631 610 599 595 588 587 581 580 573 569 ...
$ n.risk.2 : int 0 0 0 0 0 0 0 0 0 0 ...
$ n.risk.3 : int 0 0 0 0 0 0 0 0 0 0 ...
$ n.event.1: int 0 0 0 0 0 0 0 0 0 0 ...
$ n.event.2: int 0 2 0 1 0 1 0 0 2 1 ...
$ n.event.3: int 20 9 4 6 1 5 1 7 2 2 ...
$ n.censor : num 1 0 0 0 0 0 0 0 0 1 ...
$ pstate.1 : num 0.968 0.951 0.944 0.933 0.932 ...
$ pstate.2 : num 0 0.00317 0.00317 0.00476 0.00476 ...
$ pstate.3 : num 0.0317 0.046 0.0523 0.0619 0.0634 ...
$ strata : Factor w/ 2 levels "sex=F","sex=M": 1 1 1 1 1 1 1 1 1 1 ...
$ lower.1 : num 0.955 0.934 0.927 0.914 0.912 ...
$ lower.2 : num NA 0.000796 0.000796 0.00154 0.00154 ...
$ lower.3 : num 0.0206 0.0322 0.0375 0.0456 0.047 ...
$ upper.1 : num 0.982 0.968 0.963 0.953 0.952 ...
$ upper.2 : num NA 0.0127 0.0127 0.0147 0.0147 ...
$ upper.3 : num 0.0488 0.0656 0.0729 0.0838 0.0856 ...
此数据为宽格式,最好重新整形为图表的长格式。
mgus3 <- data %>%
pivot_longer(cols=-c(time, strata, n.censor),
names_to=c(".value","state"),
names_pattern="(.+).(.+)") %>%
filter(state!=1) %>% # Exclude the censored state
mutate(state=factor(state, labels=c("pcm","death")),
group=interaction(strata, state))
然后绘制它。
library(ggplot)
mgus3 %>%
ggplot(aes(x=time, y=pstate, col=group)) +
geom_line(aes(linetype=group)) +
ylab("Probability in State") +
theme_bw()
您应该可以添加置信带并使其更漂亮。