寻求在 R 中根据它们的 R 平方值(从线性回归模型得出)绘制大变量的想法(最好使用 GGPLOT2)
Asking for ideas to plot large variables against their R squared values (drawn from linear regression models) in R (preferably using GGPLOT2)
我必须为我的项目构建 231 个线性回归模型。在 运行ning 231 个模型之后,我剩下 231 个 R 平方值,我必须在针对变量名称的图中显示这些值。由于 231 个 R 平方值对于 table 来说太多了,我正在寻找绘图思路,以便我可以将 R 平方值显示为 y 轴,将变量名称显示为 x 轴。当我 运行 dput(head(df, 5)) 我得到这个(这可能会让你了解我的数据):
structure(list(Band = c(402, 411, 419, 427, 434), R.squared = c(0.044655015122032,
0.852028718800355, 0.818617476505653, 0.825782272278991, 0.860844967662728
), Adj.Rsquared = c(-0.0614944276421867, 0.835587465333728, 0.798463862784058,
0.806424746976656, 0.845383297403031), Intercept = c(0.000142126282140086,
-0.00373545760470339, -0.00258909036368109, 0.000626075834918527,
-3.3448513588372e-05), Slope = c(-0.00108714482110104, 0.393380133190131,
0.443463459485279, 0.503881831479685, 0.480162723468755)), row.names = c(NA,
5L), class = "data.frame")
请注意,我的完整数据有 231 个观测值,我想将变量带(作为一个因子)绘制为 x 轴,将 R 平方绘制为 y 轴。我已经在 ggplot2 中尝试了 geom_point() 但它看起来非常混乱且难以理解。有什么想法吗?
更新:当我使用@Duck 建议的代码时,我得到了这个用于科学演示的图有点乱。
如果你有大量的值,你可以避开轴上的标签,这里有一个例子:
library(ggplot2)
#Code
ggplot(mdf,aes(x=factor(Band),y=R.squared))+
geom_point()+
scale_x_discrete(guide = guide_axis(n.dodge=2))+
coord_flip()
输出:
使用了一些数据:
#Data
mdf <- structure(list(Band = c(402, 411, 419, 427, 434, 412, 421, 429,
437, 444, 422, 431, 439, 447, 454, 432, 441, 449, 457, 464),
R.squared = c(0.044655015122032, 0.852028718800355, 0.818617476505653,
0.825782272278991, 0.860844967662728, 0.044655015122032,
0.852028718800355, 0.818617476505653, 0.825782272278991,
0.860844967662728, 0.044655015122032, 0.852028718800355,
0.818617476505653, 0.825782272278991, 0.860844967662728,
0.044655015122032, 0.852028718800355, 0.818617476505653,
0.825782272278991, 0.860844967662728), Adj.Rsquared = c(-0.0614944276421867,
0.835587465333728, 0.798463862784058, 0.806424746976656,
0.845383297403031, -0.0614944276421867, 0.835587465333728,
0.798463862784058, 0.806424746976656, 0.845383297403031,
-0.0614944276421867, 0.835587465333728, 0.798463862784058,
0.806424746976656, 0.845383297403031, -0.0614944276421867,
0.835587465333728, 0.798463862784058, 0.806424746976656,
0.845383297403031), Intercept = c(0.000142126282140086, -0.00373545760470339,
-0.00258909036368109, 0.000626075834918527, -3.3448513588372e-05,
0.000142126282140086, -0.00373545760470339, -0.00258909036368109,
0.000626075834918527, -3.3448513588372e-05, 0.000142126282140086,
-0.00373545760470339, -0.00258909036368109, 0.000626075834918527,
-3.3448513588372e-05, 0.000142126282140086, -0.00373545760470339,
-0.00258909036368109, 0.000626075834918527, -3.3448513588372e-05
), Slope = c(-0.00108714482110104, 0.393380133190131, 0.443463459485279,
0.503881831479685, 0.480162723468755, -0.00108714482110104,
0.393380133190131, 0.443463459485279, 0.503881831479685,
0.480162723468755, -0.00108714482110104, 0.393380133190131,
0.443463459485279, 0.503881831479685, 0.480162723468755,
-0.00108714482110104, 0.393380133190131, 0.443463459485279,
0.503881831479685, 0.480162723468755)), row.names = c(NA,
-20L), class = "data.frame")
@DaveArmstrong 的建议也很有帮助(非常感谢他):
#Code 2
ggplot(mdf,aes(x=reorder(factor(Band), R.squared, mean),y=R.squared))+
geom_point()+
scale_x_discrete(guide = guide_axis(n.dodge=2))+
coord_flip()
输出:
另一种选择:
#Code 3
ggplot(mdf,aes(x=reorder(factor(Band), R.squared, mean),y=R.squared))+
geom_point()+
geom_segment( aes(x=reorder(factor(Band), R.squared, mean),
xend=reorder(factor(Band), R.squared, mean),
y=0,
yend=R.squared))+
scale_x_discrete(guide = guide_axis(n.dodge=2))+
coord_flip()
输出:
热图怎么样。
dat <- tibble(
Band = 100:330,
R.squared = runif(231, 0, 1),
Adj.Rsquared = R.squared - runif(231, 0, .1)
) %>%
mutate(Band = reorder(factor(Band), R.squared, mean))
q <- c(0, quantile(dat$R.squared, c(.2,.4, .6, .8)), 1)
dat <- dat %>% mutate(group=cut(R.squared, breaks=q))
levels(dat$group) <- c("Quntile 1", "Quintile 2", "Quintile 3", "Quintile 4", "Quintile 5")
ggplot(dat, aes(x=1, y = Band, fill=R.squared)) +
geom_tile(col="white") +
facet_wrap(~group, scales="free_y", nrow=1) +
scale_x_continuous(breaks=NULL) +
scale_fill_viridis_c() +
labs(x = "", y="", fill=expression(R^2)) +
theme(legend.position="top",
axis.text.y = element_text(size=10))
这是另一种选择,标签位于单元格内:
R2 <- matrix(dat$R.squared, ncol=21, nrow=11)
txt <- matrix(as.character(dat$Band), ncol=21, nrow=11)
dat2 <- tibble(
R.squared = c(R2),
Band = c(txt),
row = rep(1:11, 21),
col = rep(21:1, each=11)
)
ggplot(dat2, aes(x=row, y=col, fill=R.squared)) +
geom_tile() +
geom_text(aes(label=Band), color="white") +
scale_fill_viridis_c(option="B") +
theme_void() +
theme(legend.position="bottom") +
labs(x = "", y="", fill=expression(R^2))
我必须为我的项目构建 231 个线性回归模型。在 运行ning 231 个模型之后,我剩下 231 个 R 平方值,我必须在针对变量名称的图中显示这些值。由于 231 个 R 平方值对于 table 来说太多了,我正在寻找绘图思路,以便我可以将 R 平方值显示为 y 轴,将变量名称显示为 x 轴。当我 运行 dput(head(df, 5)) 我得到这个(这可能会让你了解我的数据):
structure(list(Band = c(402, 411, 419, 427, 434), R.squared = c(0.044655015122032,
0.852028718800355, 0.818617476505653, 0.825782272278991, 0.860844967662728
), Adj.Rsquared = c(-0.0614944276421867, 0.835587465333728, 0.798463862784058,
0.806424746976656, 0.845383297403031), Intercept = c(0.000142126282140086,
-0.00373545760470339, -0.00258909036368109, 0.000626075834918527,
-3.3448513588372e-05), Slope = c(-0.00108714482110104, 0.393380133190131,
0.443463459485279, 0.503881831479685, 0.480162723468755)), row.names = c(NA,
5L), class = "data.frame")
请注意,我的完整数据有 231 个观测值,我想将变量带(作为一个因子)绘制为 x 轴,将 R 平方绘制为 y 轴。我已经在 ggplot2 中尝试了 geom_point() 但它看起来非常混乱且难以理解。有什么想法吗?
更新:当我使用@Duck 建议的代码时,我得到了这个用于科学演示的图有点乱。
如果你有大量的值,你可以避开轴上的标签,这里有一个例子:
library(ggplot2)
#Code
ggplot(mdf,aes(x=factor(Band),y=R.squared))+
geom_point()+
scale_x_discrete(guide = guide_axis(n.dodge=2))+
coord_flip()
输出:
使用了一些数据:
#Data
mdf <- structure(list(Band = c(402, 411, 419, 427, 434, 412, 421, 429,
437, 444, 422, 431, 439, 447, 454, 432, 441, 449, 457, 464),
R.squared = c(0.044655015122032, 0.852028718800355, 0.818617476505653,
0.825782272278991, 0.860844967662728, 0.044655015122032,
0.852028718800355, 0.818617476505653, 0.825782272278991,
0.860844967662728, 0.044655015122032, 0.852028718800355,
0.818617476505653, 0.825782272278991, 0.860844967662728,
0.044655015122032, 0.852028718800355, 0.818617476505653,
0.825782272278991, 0.860844967662728), Adj.Rsquared = c(-0.0614944276421867,
0.835587465333728, 0.798463862784058, 0.806424746976656,
0.845383297403031, -0.0614944276421867, 0.835587465333728,
0.798463862784058, 0.806424746976656, 0.845383297403031,
-0.0614944276421867, 0.835587465333728, 0.798463862784058,
0.806424746976656, 0.845383297403031, -0.0614944276421867,
0.835587465333728, 0.798463862784058, 0.806424746976656,
0.845383297403031), Intercept = c(0.000142126282140086, -0.00373545760470339,
-0.00258909036368109, 0.000626075834918527, -3.3448513588372e-05,
0.000142126282140086, -0.00373545760470339, -0.00258909036368109,
0.000626075834918527, -3.3448513588372e-05, 0.000142126282140086,
-0.00373545760470339, -0.00258909036368109, 0.000626075834918527,
-3.3448513588372e-05, 0.000142126282140086, -0.00373545760470339,
-0.00258909036368109, 0.000626075834918527, -3.3448513588372e-05
), Slope = c(-0.00108714482110104, 0.393380133190131, 0.443463459485279,
0.503881831479685, 0.480162723468755, -0.00108714482110104,
0.393380133190131, 0.443463459485279, 0.503881831479685,
0.480162723468755, -0.00108714482110104, 0.393380133190131,
0.443463459485279, 0.503881831479685, 0.480162723468755,
-0.00108714482110104, 0.393380133190131, 0.443463459485279,
0.503881831479685, 0.480162723468755)), row.names = c(NA,
-20L), class = "data.frame")
@DaveArmstrong 的建议也很有帮助(非常感谢他):
#Code 2
ggplot(mdf,aes(x=reorder(factor(Band), R.squared, mean),y=R.squared))+
geom_point()+
scale_x_discrete(guide = guide_axis(n.dodge=2))+
coord_flip()
输出:
另一种选择:
#Code 3
ggplot(mdf,aes(x=reorder(factor(Band), R.squared, mean),y=R.squared))+
geom_point()+
geom_segment( aes(x=reorder(factor(Band), R.squared, mean),
xend=reorder(factor(Band), R.squared, mean),
y=0,
yend=R.squared))+
scale_x_discrete(guide = guide_axis(n.dodge=2))+
coord_flip()
输出:
热图怎么样。
dat <- tibble(
Band = 100:330,
R.squared = runif(231, 0, 1),
Adj.Rsquared = R.squared - runif(231, 0, .1)
) %>%
mutate(Band = reorder(factor(Band), R.squared, mean))
q <- c(0, quantile(dat$R.squared, c(.2,.4, .6, .8)), 1)
dat <- dat %>% mutate(group=cut(R.squared, breaks=q))
levels(dat$group) <- c("Quntile 1", "Quintile 2", "Quintile 3", "Quintile 4", "Quintile 5")
ggplot(dat, aes(x=1, y = Band, fill=R.squared)) +
geom_tile(col="white") +
facet_wrap(~group, scales="free_y", nrow=1) +
scale_x_continuous(breaks=NULL) +
scale_fill_viridis_c() +
labs(x = "", y="", fill=expression(R^2)) +
theme(legend.position="top",
axis.text.y = element_text(size=10))
这是另一种选择,标签位于单元格内:
R2 <- matrix(dat$R.squared, ncol=21, nrow=11)
txt <- matrix(as.character(dat$Band), ncol=21, nrow=11)
dat2 <- tibble(
R.squared = c(R2),
Band = c(txt),
row = rep(1:11, 21),
col = rep(21:1, each=11)
)
ggplot(dat2, aes(x=row, y=col, fill=R.squared)) +
geom_tile() +
geom_text(aes(label=Band), color="white") +
scale_fill_viridis_c(option="B") +
theme_void() +
theme(legend.position="bottom") +
labs(x = "", y="", fill=expression(R^2))