寻求在 R 中根据它们的 R 平方值(从线性回归模型得出)绘制大变量的想法(最好使用 GGPLOT2)

Asking for ideas to plot large variables against their R squared values (drawn from linear regression models) in R (preferably using GGPLOT2)

我必须为我的项目构建 231 个线性回归模型。在 运行ning 231 个模型之后,我剩下 231 个 R 平方值,我必须在针对变量名称的图中显示这些值。由于 231 个 R 平方值对于 table 来说太多了,我正在寻找绘图思路,以便我可以将 R 平方值显示为 y 轴,将变量名称显示为 x 轴。当我 运行 dput(head(df, 5)) 我得到这个(这可能会让你了解我的数据):

structure(list(Band = c(402, 411, 419, 427, 434), R.squared = c(0.044655015122032, 
0.852028718800355, 0.818617476505653, 0.825782272278991, 0.860844967662728
), Adj.Rsquared = c(-0.0614944276421867, 0.835587465333728, 0.798463862784058, 
0.806424746976656, 0.845383297403031), Intercept = c(0.000142126282140086, 
-0.00373545760470339, -0.00258909036368109, 0.000626075834918527, 
-3.3448513588372e-05), Slope = c(-0.00108714482110104, 0.393380133190131, 
0.443463459485279, 0.503881831479685, 0.480162723468755)), row.names = c(NA, 
5L), class = "data.frame")

请注意,我的完整数据有 231 个观测值,我想将变量带(作为一个因子)绘制为 x 轴,将 R 平方绘制为 y 轴。我已经在 ggplot2 中尝试了 geom_point() 但它看起来非常混乱且难以理解。有什么想法吗?

更新:当我使用@Duck 建议的代码时,我得到了这个用于科学演示的图有点乱。

如果你有大量的值,你可以避开轴上的标签,这里有一个例子:

library(ggplot2)
#Code
ggplot(mdf,aes(x=factor(Band),y=R.squared))+
  geom_point()+
  scale_x_discrete(guide = guide_axis(n.dodge=2))+
  coord_flip()

输出:

使用了一些数据:

#Data
mdf <- structure(list(Band = c(402, 411, 419, 427, 434, 412, 421, 429, 
437, 444, 422, 431, 439, 447, 454, 432, 441, 449, 457, 464), 
    R.squared = c(0.044655015122032, 0.852028718800355, 0.818617476505653, 
    0.825782272278991, 0.860844967662728, 0.044655015122032, 
    0.852028718800355, 0.818617476505653, 0.825782272278991, 
    0.860844967662728, 0.044655015122032, 0.852028718800355, 
    0.818617476505653, 0.825782272278991, 0.860844967662728, 
    0.044655015122032, 0.852028718800355, 0.818617476505653, 
    0.825782272278991, 0.860844967662728), Adj.Rsquared = c(-0.0614944276421867, 
    0.835587465333728, 0.798463862784058, 0.806424746976656, 
    0.845383297403031, -0.0614944276421867, 0.835587465333728, 
    0.798463862784058, 0.806424746976656, 0.845383297403031, 
    -0.0614944276421867, 0.835587465333728, 0.798463862784058, 
    0.806424746976656, 0.845383297403031, -0.0614944276421867, 
    0.835587465333728, 0.798463862784058, 0.806424746976656, 
    0.845383297403031), Intercept = c(0.000142126282140086, -0.00373545760470339, 
    -0.00258909036368109, 0.000626075834918527, -3.3448513588372e-05, 
    0.000142126282140086, -0.00373545760470339, -0.00258909036368109, 
    0.000626075834918527, -3.3448513588372e-05, 0.000142126282140086, 
    -0.00373545760470339, -0.00258909036368109, 0.000626075834918527, 
    -3.3448513588372e-05, 0.000142126282140086, -0.00373545760470339, 
    -0.00258909036368109, 0.000626075834918527, -3.3448513588372e-05
    ), Slope = c(-0.00108714482110104, 0.393380133190131, 0.443463459485279, 
    0.503881831479685, 0.480162723468755, -0.00108714482110104, 
    0.393380133190131, 0.443463459485279, 0.503881831479685, 
    0.480162723468755, -0.00108714482110104, 0.393380133190131, 
    0.443463459485279, 0.503881831479685, 0.480162723468755, 
    -0.00108714482110104, 0.393380133190131, 0.443463459485279, 
    0.503881831479685, 0.480162723468755)), row.names = c(NA, 
-20L), class = "data.frame")

@DaveArmstrong 的建议也很有帮助(非常感谢他):

#Code 2
ggplot(mdf,aes(x=reorder(factor(Band), R.squared, mean),y=R.squared))+
  geom_point()+
  scale_x_discrete(guide = guide_axis(n.dodge=2))+
  coord_flip()

输出:

另一种选择:

#Code 3
ggplot(mdf,aes(x=reorder(factor(Band), R.squared, mean),y=R.squared))+
  geom_point()+
  geom_segment( aes(x=reorder(factor(Band), R.squared, mean),
                    xend=reorder(factor(Band), R.squared, mean),
                    y=0,
                    yend=R.squared))+
  scale_x_discrete(guide = guide_axis(n.dodge=2))+
  coord_flip()

输出:

热图怎么样。

dat <- tibble(
  Band = 100:330, 
  R.squared = runif(231, 0, 1), 
  Adj.Rsquared = R.squared - runif(231, 0, .1)
) %>% 
  mutate(Band = reorder(factor(Band), R.squared, mean)) 

q <- c(0, quantile(dat$R.squared, c(.2,.4, .6, .8)), 1)
dat <- dat %>% mutate(group=cut(R.squared, breaks=q))
levels(dat$group) <- c("Quntile 1", "Quintile 2", "Quintile 3", "Quintile 4", "Quintile 5")

ggplot(dat, aes(x=1, y = Band, fill=R.squared)) + 
  geom_tile(col="white") + 
  facet_wrap(~group, scales="free_y", nrow=1) + 
  scale_x_continuous(breaks=NULL) + 
  scale_fill_viridis_c() + 
  labs(x = "", y="", fill=expression(R^2)) + 
  theme(legend.position="top", 
        axis.text.y = element_text(size=10))

这是另一种选择,标签位于单元格内:

R2 <- matrix(dat$R.squared, ncol=21, nrow=11)
txt <- matrix(as.character(dat$Band), ncol=21, nrow=11)

dat2 <- tibble(
  R.squared = c(R2), 
  Band = c(txt), 
  row = rep(1:11, 21), 
  col = rep(21:1, each=11)
)

ggplot(dat2, aes(x=row, y=col, fill=R.squared)) + 
  geom_tile() + 
  geom_text(aes(label=Band), color="white") + 
  scale_fill_viridis_c(option="B") + 
  theme_void() + 
  theme(legend.position="bottom") + 
  labs(x = "", y="", fill=expression(R^2))