当一个因素分层时,在 RStudio 中使用 ggforest 绘制 Cox PH 模型?
Plotting a Cox PH model using ggforest in RStudio when a factor is stratified?
当其中一个模型变量需要分层时,我正在尝试找到一种方法来根据 Cox-PH 模型绘制风险比森林图。对于非分层模型,ggforest()
函数非常好。 运行 一些示例代码
library(survival)
library(survminer)
model <- coxph(Surv(time, status) ~ sex + rx + adhere,
data = colon )
ggforest(model)
colon <- within(colon, {
sex <- factor(sex, labels = c("female", "male"))
differ <- factor(differ, labels = c("well", "moderate", "poor"))
extent <- factor(extent, labels = c("submuc.", "muscle", "serosa", "contig."))
})
bigmodel <-
coxph(Surv(time, status) ~ sex + rx + adhere + differ + extent + node4,
data = colon )
ggforest(bigmodel)
生成此图
但是,如果我必须纠正分层的不相称性
stratamodel <- coxph(Surv(time, status) ~ sex + strata(rx) + adhere + differ + extent + node4,
data = colon )
ggforest(stratamodel)
出现以下错误信息:
"Error in [.data.frame
(data, , var) : undefined columns selected
另外:警告信息:
In .get_data(model, data = data) : The data
argument is not
provided. Data will be extracted from model fit."
关于如何从分层模型中获取 ggforest 需要的信息以便它可以生成图,有什么建议吗?感谢您的帮助!
我收集到所需的森林图是一个简单地跳过模型公式中的分层 RX 变量的图。如果是这样,我们可以简单地在其中插入一个 if 子句,以忽略与数据中的列名不完全对应的公式部分(例如“strata(rx)”不是列名)。
方法一
如果您熟悉 R 足以修改函数,运行 trace(ggforest, edit = TRUE)
并替换 pop-up [= 中的 allTerms <- lapply(...)
(大约第 10-25 行) 38=] 具有以下版本:
allTerms <- lapply(seq_along(terms), function(i) {
var <- names(terms)[i]
if(var %in% colnames(data)) {
if (terms[i] %in% c("factor", "character")) {
adf <- as.data.frame(table(data[, var]))
cbind(var = var, adf, pos = 1:nrow(adf))
}
else if (terms[i] == "numeric") {
data.frame(var = var, Var1 = "", Freq = nrow(data),
pos = 1)
}
else {
vars = grep(paste0("^", var, "*."), coef$term,
value = TRUE)
data.frame(var = vars, Var1 = "", Freq = nrow(data),
pos = seq_along(vars))
}
} else {
message(var, "is not found in data columns, and will be skipped.")
}
})
ggforest(stratamodel) # this should work after the modification
修改不会持续到后续的 R 会话。如果你想在当前会话中撤销修改,只需 运行 untrace(ggforest)
.
方法二
如果您希望拥有该函数的永久修改版本以供将来使用/不想乱用库的函数,请保存以下函数:
ggforest2 <- function (model, data = NULL, main = "Hazard ratio",
cpositions = c(0.02, 0.22, 0.4), fontsize = 0.7,
refLabel = "reference", noDigits = 2) {
conf.high <- conf.low <- estimate <- NULL
stopifnot(class(model) == "coxph")
data <- survminer:::.get_data(model, data = data)
terms <- attr(model$terms, "dataClasses")[-1]
coef <- as.data.frame(broom::tidy(model))
gmodel <- broom::glance(model)
allTerms <- lapply(seq_along(terms), function(i) {
var <- names(terms)[i]
if(var %in% colnames(data)) {
if (terms[i] %in% c("factor", "character")) {
adf <- as.data.frame(table(data[, var]))
cbind(var = var, adf, pos = 1:nrow(adf))
}
else if (terms[i] == "numeric") {
data.frame(var = var, Var1 = "", Freq = nrow(data),
pos = 1)
}
else {
vars = grep(paste0("^", var, "*."), coef$term,
value = TRUE)
data.frame(var = vars, Var1 = "", Freq = nrow(data),
pos = seq_along(vars))
}
} else {
message(var, "is not found in data columns, and will be skipped.")
}
})
allTermsDF <- do.call(rbind, allTerms)
colnames(allTermsDF) <- c("var", "level", "N",
"pos")
inds <- apply(allTermsDF[, 1:2], 1, paste0, collapse = "")
rownames(coef) <- gsub(coef$term, pattern = "`", replacement = "")
toShow <- cbind(allTermsDF, coef[inds, ])[, c("var", "level", "N", "p.value",
"estimate", "conf.low",
"conf.high", "pos")]
toShowExp <- toShow[, 5:7]
toShowExp[is.na(toShowExp)] <- 0
toShowExp <- format(exp(toShowExp), digits = noDigits)
toShowExpClean <- data.frame(toShow, pvalue = signif(toShow[, 4], noDigits + 1),
toShowExp)
toShowExpClean$stars <- paste0(round(toShowExpClean$p.value, noDigits + 1), " ",
ifelse(toShowExpClean$p.value < 0.05, "*", ""),
ifelse(toShowExpClean$p.value < 0.01, "*", ""),
ifelse(toShowExpClean$p.value < 0.001, "*", ""))
toShowExpClean$ci <- paste0("(", toShowExpClean[, "conf.low.1"],
" - ", toShowExpClean[, "conf.high.1"], ")")
toShowExpClean$estimate.1[is.na(toShowExpClean$estimate)] = refLabel
toShowExpClean$stars[which(toShowExpClean$p.value < 0.001)] = "<0.001 ***"
toShowExpClean$stars[is.na(toShowExpClean$estimate)] = ""
toShowExpClean$ci[is.na(toShowExpClean$estimate)] = ""
toShowExpClean$estimate[is.na(toShowExpClean$estimate)] = 0
toShowExpClean$var = as.character(toShowExpClean$var)
toShowExpClean$var[duplicated(toShowExpClean$var)] = ""
toShowExpClean$N <- paste0("(N=", toShowExpClean$N, ")")
toShowExpClean <- toShowExpClean[nrow(toShowExpClean):1, ]
rangeb <- range(toShowExpClean$conf.low, toShowExpClean$conf.high,
na.rm = TRUE)
breaks <- axisTicks(rangeb/2, log = TRUE, nint = 7)
rangeplot <- rangeb
rangeplot[1] <- rangeplot[1] - diff(rangeb)
rangeplot[2] <- rangeplot[2] + 0.15 * diff(rangeb)
width <- diff(rangeplot)
y_variable <- rangeplot[1] + cpositions[1] * width
y_nlevel <- rangeplot[1] + cpositions[2] * width
y_cistring <- rangeplot[1] + cpositions[3] * width
y_stars <- rangeb[2]
x_annotate <- seq_len(nrow(toShowExpClean))
annot_size_mm <- fontsize *
as.numeric(grid::convertX(unit(theme_get()$text$size, "pt"), "mm"))
p <- ggplot(toShowExpClean,
aes(seq_along(var), exp(estimate))) +
geom_rect(aes(xmin = seq_along(var) - 0.5,
xmax = seq_along(var) + 0.5,
ymin = exp(rangeplot[1]),
ymax = exp(rangeplot[2]),
fill = ordered(seq_along(var)%%2 + 1))) +
scale_fill_manual(values = c("#FFFFFF33", "#00000033"), guide = "none") +
geom_point(pch = 15, size = 4) +
geom_errorbar(aes(ymin = exp(conf.low), ymax = exp(conf.high)),
width = 0.15) +
geom_hline(yintercept = 1, linetype = 3) +
coord_flip(ylim = exp(rangeplot)) +
ggtitle(main) +
scale_y_log10(name = "", labels = sprintf("%g", breaks),
expand = c(0.02, 0.02), breaks = breaks) +
theme_light() +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_blank(),
legend.position = "none",
panel.border = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
plot.title = element_text(hjust = 0.5)) +
xlab("") +
annotate(geom = "text", x = x_annotate, y = exp(y_variable),
label = toShowExpClean$var, fontface = "bold",
hjust = 0, size = annot_size_mm) +
annotate(geom = "text", x = x_annotate, y = exp(y_nlevel), hjust = 0,
label = toShowExpClean$level,
vjust = -0.1, size = annot_size_mm) +
annotate(geom = "text", x = x_annotate, y = exp(y_nlevel),
label = toShowExpClean$N, fontface = "italic", hjust = 0,
vjust = ifelse(toShowExpClean$level == "", 0.5, 1.1),
size = annot_size_mm) +
annotate(geom = "text", x = x_annotate, y = exp(y_cistring),
label = toShowExpClean$estimate.1, size = annot_size_mm,
vjust = ifelse(toShowExpClean$estimate.1 == "reference", 0.5, -0.1)) +
annotate(geom = "text", x = x_annotate, y = exp(y_cistring),
label = toShowExpClean$ci, size = annot_size_mm,
vjust = 1.1, fontface = "italic") +
annotate(geom = "text", x = x_annotate, y = exp(y_stars),
label = toShowExpClean$stars, size = annot_size_mm,
hjust = -0.2, fontface = "italic") +
annotate(geom = "text", x = 0.5, y = exp(y_variable),
label = paste0("# Events: ", gmodel$nevent,
"; Global p-value (Log-Rank): ",
format.pval(gmodel$p.value.log, eps = ".001"),
" \nAIC: ", round(gmodel$AIC, 2),
"; Concordance Index: ", round(gmodel$concordance, 2)),
size = annot_size_mm, hjust = 0, vjust = 1.2, fontface = "italic")
gt <- ggplot_gtable(ggplot_build(p))
gt$layout$clip[gt$layout$name == "panel"] <- "off"
ggpubr::as_ggplot(gt)
}
它是 ggforest
函数 当前状态 的快照,具有与上述相同的修改。如果包的创建者将来对包进行修改,这可能会中断或变得过时。但是现在,ggforest2(stratamodel)
将产生与方法 1 相同的结果。
当其中一个模型变量需要分层时,我正在尝试找到一种方法来根据 Cox-PH 模型绘制风险比森林图。对于非分层模型,ggforest()
函数非常好。 运行 一些示例代码
library(survival)
library(survminer)
model <- coxph(Surv(time, status) ~ sex + rx + adhere,
data = colon )
ggforest(model)
colon <- within(colon, {
sex <- factor(sex, labels = c("female", "male"))
differ <- factor(differ, labels = c("well", "moderate", "poor"))
extent <- factor(extent, labels = c("submuc.", "muscle", "serosa", "contig."))
})
bigmodel <-
coxph(Surv(time, status) ~ sex + rx + adhere + differ + extent + node4,
data = colon )
ggforest(bigmodel)
生成此图
但是,如果我必须纠正分层的不相称性
stratamodel <- coxph(Surv(time, status) ~ sex + strata(rx) + adhere + differ + extent + node4,
data = colon )
ggforest(stratamodel)
出现以下错误信息:
"Error in
[.data.frame
(data, , var) : undefined columns selected
另外:警告信息:
In .get_data(model, data = data) : The
data
argument is not provided. Data will be extracted from model fit."
关于如何从分层模型中获取 ggforest 需要的信息以便它可以生成图,有什么建议吗?感谢您的帮助!
我收集到所需的森林图是一个简单地跳过模型公式中的分层 RX 变量的图。如果是这样,我们可以简单地在其中插入一个 if 子句,以忽略与数据中的列名不完全对应的公式部分(例如“strata(rx)”不是列名)。
方法一
如果您熟悉 R 足以修改函数,运行 trace(ggforest, edit = TRUE)
并替换 pop-up [= 中的 allTerms <- lapply(...)
(大约第 10-25 行) 38=] 具有以下版本:
allTerms <- lapply(seq_along(terms), function(i) {
var <- names(terms)[i]
if(var %in% colnames(data)) {
if (terms[i] %in% c("factor", "character")) {
adf <- as.data.frame(table(data[, var]))
cbind(var = var, adf, pos = 1:nrow(adf))
}
else if (terms[i] == "numeric") {
data.frame(var = var, Var1 = "", Freq = nrow(data),
pos = 1)
}
else {
vars = grep(paste0("^", var, "*."), coef$term,
value = TRUE)
data.frame(var = vars, Var1 = "", Freq = nrow(data),
pos = seq_along(vars))
}
} else {
message(var, "is not found in data columns, and will be skipped.")
}
})
ggforest(stratamodel) # this should work after the modification
修改不会持续到后续的 R 会话。如果你想在当前会话中撤销修改,只需 运行 untrace(ggforest)
.
方法二
如果您希望拥有该函数的永久修改版本以供将来使用/不想乱用库的函数,请保存以下函数:
ggforest2 <- function (model, data = NULL, main = "Hazard ratio",
cpositions = c(0.02, 0.22, 0.4), fontsize = 0.7,
refLabel = "reference", noDigits = 2) {
conf.high <- conf.low <- estimate <- NULL
stopifnot(class(model) == "coxph")
data <- survminer:::.get_data(model, data = data)
terms <- attr(model$terms, "dataClasses")[-1]
coef <- as.data.frame(broom::tidy(model))
gmodel <- broom::glance(model)
allTerms <- lapply(seq_along(terms), function(i) {
var <- names(terms)[i]
if(var %in% colnames(data)) {
if (terms[i] %in% c("factor", "character")) {
adf <- as.data.frame(table(data[, var]))
cbind(var = var, adf, pos = 1:nrow(adf))
}
else if (terms[i] == "numeric") {
data.frame(var = var, Var1 = "", Freq = nrow(data),
pos = 1)
}
else {
vars = grep(paste0("^", var, "*."), coef$term,
value = TRUE)
data.frame(var = vars, Var1 = "", Freq = nrow(data),
pos = seq_along(vars))
}
} else {
message(var, "is not found in data columns, and will be skipped.")
}
})
allTermsDF <- do.call(rbind, allTerms)
colnames(allTermsDF) <- c("var", "level", "N",
"pos")
inds <- apply(allTermsDF[, 1:2], 1, paste0, collapse = "")
rownames(coef) <- gsub(coef$term, pattern = "`", replacement = "")
toShow <- cbind(allTermsDF, coef[inds, ])[, c("var", "level", "N", "p.value",
"estimate", "conf.low",
"conf.high", "pos")]
toShowExp <- toShow[, 5:7]
toShowExp[is.na(toShowExp)] <- 0
toShowExp <- format(exp(toShowExp), digits = noDigits)
toShowExpClean <- data.frame(toShow, pvalue = signif(toShow[, 4], noDigits + 1),
toShowExp)
toShowExpClean$stars <- paste0(round(toShowExpClean$p.value, noDigits + 1), " ",
ifelse(toShowExpClean$p.value < 0.05, "*", ""),
ifelse(toShowExpClean$p.value < 0.01, "*", ""),
ifelse(toShowExpClean$p.value < 0.001, "*", ""))
toShowExpClean$ci <- paste0("(", toShowExpClean[, "conf.low.1"],
" - ", toShowExpClean[, "conf.high.1"], ")")
toShowExpClean$estimate.1[is.na(toShowExpClean$estimate)] = refLabel
toShowExpClean$stars[which(toShowExpClean$p.value < 0.001)] = "<0.001 ***"
toShowExpClean$stars[is.na(toShowExpClean$estimate)] = ""
toShowExpClean$ci[is.na(toShowExpClean$estimate)] = ""
toShowExpClean$estimate[is.na(toShowExpClean$estimate)] = 0
toShowExpClean$var = as.character(toShowExpClean$var)
toShowExpClean$var[duplicated(toShowExpClean$var)] = ""
toShowExpClean$N <- paste0("(N=", toShowExpClean$N, ")")
toShowExpClean <- toShowExpClean[nrow(toShowExpClean):1, ]
rangeb <- range(toShowExpClean$conf.low, toShowExpClean$conf.high,
na.rm = TRUE)
breaks <- axisTicks(rangeb/2, log = TRUE, nint = 7)
rangeplot <- rangeb
rangeplot[1] <- rangeplot[1] - diff(rangeb)
rangeplot[2] <- rangeplot[2] + 0.15 * diff(rangeb)
width <- diff(rangeplot)
y_variable <- rangeplot[1] + cpositions[1] * width
y_nlevel <- rangeplot[1] + cpositions[2] * width
y_cistring <- rangeplot[1] + cpositions[3] * width
y_stars <- rangeb[2]
x_annotate <- seq_len(nrow(toShowExpClean))
annot_size_mm <- fontsize *
as.numeric(grid::convertX(unit(theme_get()$text$size, "pt"), "mm"))
p <- ggplot(toShowExpClean,
aes(seq_along(var), exp(estimate))) +
geom_rect(aes(xmin = seq_along(var) - 0.5,
xmax = seq_along(var) + 0.5,
ymin = exp(rangeplot[1]),
ymax = exp(rangeplot[2]),
fill = ordered(seq_along(var)%%2 + 1))) +
scale_fill_manual(values = c("#FFFFFF33", "#00000033"), guide = "none") +
geom_point(pch = 15, size = 4) +
geom_errorbar(aes(ymin = exp(conf.low), ymax = exp(conf.high)),
width = 0.15) +
geom_hline(yintercept = 1, linetype = 3) +
coord_flip(ylim = exp(rangeplot)) +
ggtitle(main) +
scale_y_log10(name = "", labels = sprintf("%g", breaks),
expand = c(0.02, 0.02), breaks = breaks) +
theme_light() +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_blank(),
legend.position = "none",
panel.border = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
plot.title = element_text(hjust = 0.5)) +
xlab("") +
annotate(geom = "text", x = x_annotate, y = exp(y_variable),
label = toShowExpClean$var, fontface = "bold",
hjust = 0, size = annot_size_mm) +
annotate(geom = "text", x = x_annotate, y = exp(y_nlevel), hjust = 0,
label = toShowExpClean$level,
vjust = -0.1, size = annot_size_mm) +
annotate(geom = "text", x = x_annotate, y = exp(y_nlevel),
label = toShowExpClean$N, fontface = "italic", hjust = 0,
vjust = ifelse(toShowExpClean$level == "", 0.5, 1.1),
size = annot_size_mm) +
annotate(geom = "text", x = x_annotate, y = exp(y_cistring),
label = toShowExpClean$estimate.1, size = annot_size_mm,
vjust = ifelse(toShowExpClean$estimate.1 == "reference", 0.5, -0.1)) +
annotate(geom = "text", x = x_annotate, y = exp(y_cistring),
label = toShowExpClean$ci, size = annot_size_mm,
vjust = 1.1, fontface = "italic") +
annotate(geom = "text", x = x_annotate, y = exp(y_stars),
label = toShowExpClean$stars, size = annot_size_mm,
hjust = -0.2, fontface = "italic") +
annotate(geom = "text", x = 0.5, y = exp(y_variable),
label = paste0("# Events: ", gmodel$nevent,
"; Global p-value (Log-Rank): ",
format.pval(gmodel$p.value.log, eps = ".001"),
" \nAIC: ", round(gmodel$AIC, 2),
"; Concordance Index: ", round(gmodel$concordance, 2)),
size = annot_size_mm, hjust = 0, vjust = 1.2, fontface = "italic")
gt <- ggplot_gtable(ggplot_build(p))
gt$layout$clip[gt$layout$name == "panel"] <- "off"
ggpubr::as_ggplot(gt)
}
它是 ggforest
函数 当前状态 的快照,具有与上述相同的修改。如果包的创建者将来对包进行修改,这可能会中断或变得过时。但是现在,ggforest2(stratamodel)
将产生与方法 1 相同的结果。