绘制线性回归中每个变量的影响?
Plot the impact for each variable in linear regression?
我想为使用 R 计算的 lm 模型创建如下图。
有简单的方法吗?
以上图为here in this page.
收集
包 {caret} 提供了一个方便的方法 varImp
:
示例:
library(caret)
my_model <- lm(mpg ~ disp + cyl, data = mtcars)
## > varImp(my_model)
##
## Overall
## disp 2.006696
## cyl 2.229809
对于可变重要性的不同度量,请参阅 ?varImp
。将值输入您选择的绘图库。
额外:{ggstatsplot} calculates and plots a host of model stats for a plethora of model objects. This includes hypotheses about regression coefficients, for which method ggcoefstats()
可能会满足您的目的(不过请记住缩放预测变量以便对系数进行有意义的比较)。
按照链接文章中的方法(r 平方的相对边际增加),您可以编写自己的函数,采用公式和数据框,然后绘制相对重要性:
library(ggplot2)
plot_importance <- function(formula, data) {
lhs <- as.character(as.list(formula)[[2]])
rhs <- as.list(as.list(formula)[[3]])
vars <- grep("[+\*]", rapply(rhs, as.character), invert = TRUE, value = TRUE)
df <- do.call(rbind, lapply(seq_along(vars), function(i) {
f1 <- as.formula(paste(lhs, paste(vars[-i], collapse = "+"), sep = "~"))
f2 <- as.formula(paste(lhs, paste(c(vars[-i], vars[i]), collapse = "+"),
sep = "~"))
r1 <- summary(lm(f1, data = data))$r.squared
r2 <- summary(lm(f2, data = data))$r.squared
data.frame(variable = vars[i], importance = r2 - r1)
}))
df$importance <- df$importance / sum(df$importance)
df$variable <- reorder(factor(df$variable), -df$importance)
ggplot(df, aes(x = variable, y = importance)) +
geom_col(fill = "deepskyblue4") +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
labs(title = "Relative importance of variables",
subtitle = deparse(formula)) +
theme_classic(base_size = 16)
}
我们可以使用链接文章中提供的样本数据对此进行测试:
IV <- read.csv(paste0("https://statisticsbyjim.com/wp-content/uploads/",
"2017/07/ImportantVariables.csv"))
plot_importance(Strength ~ Time + Pressure + Temperature, data = IV)
而且我们看到剧情是一样的
我们还可以在一些 built-in 数据集上对其进行测试,以证明其用途是通用的:
plot_importance(mpg ~ disp + wt + gear, data = mtcars)
plot_importance(Petal.Length ~ Species + Petal.Width, data = iris)
由 reprex package (v2.0.1)
于 2022-05-01 创建
刚刚结束使用 relaimpo
包并显示 @Allan Cameron 回答的 ggplot
library(relaimpo)
relative_importance <- calc.relimp(mymodel, type="lmg")$lmg
df = data.frame(
variable=names(relative_importance),
importance=round(c(relative_importance) * 100,2)
)
ggplot(df, aes(x = reorder(variable, -importance), y = importance)) +
geom_col(fill = "deepskyblue4") +
geom_text(aes(label=importance), vjust=.3, hjust=1.2, size=3, color="white")+
coord_flip() +
labs(title = "Relative importance of variables") +
theme_classic(base_size = 16)
我想为使用 R 计算的 lm 模型创建如下图。
有简单的方法吗?
以上图为here in this page.
收集包 {caret} 提供了一个方便的方法 varImp
:
示例:
library(caret)
my_model <- lm(mpg ~ disp + cyl, data = mtcars)
## > varImp(my_model)
##
## Overall
## disp 2.006696
## cyl 2.229809
对于可变重要性的不同度量,请参阅 ?varImp
。将值输入您选择的绘图库。
额外:{ggstatsplot} calculates and plots a host of model stats for a plethora of model objects. This includes hypotheses about regression coefficients, for which method ggcoefstats()
可能会满足您的目的(不过请记住缩放预测变量以便对系数进行有意义的比较)。
按照链接文章中的方法(r 平方的相对边际增加),您可以编写自己的函数,采用公式和数据框,然后绘制相对重要性:
library(ggplot2)
plot_importance <- function(formula, data) {
lhs <- as.character(as.list(formula)[[2]])
rhs <- as.list(as.list(formula)[[3]])
vars <- grep("[+\*]", rapply(rhs, as.character), invert = TRUE, value = TRUE)
df <- do.call(rbind, lapply(seq_along(vars), function(i) {
f1 <- as.formula(paste(lhs, paste(vars[-i], collapse = "+"), sep = "~"))
f2 <- as.formula(paste(lhs, paste(c(vars[-i], vars[i]), collapse = "+"),
sep = "~"))
r1 <- summary(lm(f1, data = data))$r.squared
r2 <- summary(lm(f2, data = data))$r.squared
data.frame(variable = vars[i], importance = r2 - r1)
}))
df$importance <- df$importance / sum(df$importance)
df$variable <- reorder(factor(df$variable), -df$importance)
ggplot(df, aes(x = variable, y = importance)) +
geom_col(fill = "deepskyblue4") +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
labs(title = "Relative importance of variables",
subtitle = deparse(formula)) +
theme_classic(base_size = 16)
}
我们可以使用链接文章中提供的样本数据对此进行测试:
IV <- read.csv(paste0("https://statisticsbyjim.com/wp-content/uploads/",
"2017/07/ImportantVariables.csv"))
plot_importance(Strength ~ Time + Pressure + Temperature, data = IV)
而且我们看到剧情是一样的
我们还可以在一些 built-in 数据集上对其进行测试,以证明其用途是通用的:
plot_importance(mpg ~ disp + wt + gear, data = mtcars)
plot_importance(Petal.Length ~ Species + Petal.Width, data = iris)
由 reprex package (v2.0.1)
于 2022-05-01 创建刚刚结束使用 relaimpo
包并显示 @Allan Cameron 回答的 ggplot
library(relaimpo)
relative_importance <- calc.relimp(mymodel, type="lmg")$lmg
df = data.frame(
variable=names(relative_importance),
importance=round(c(relative_importance) * 100,2)
)
ggplot(df, aes(x = reorder(variable, -importance), y = importance)) +
geom_col(fill = "deepskyblue4") +
geom_text(aes(label=importance), vjust=.3, hjust=1.2, size=3, color="white")+
coord_flip() +
labs(title = "Relative importance of variables") +
theme_classic(base_size = 16)