基于 P 值显示回归线

Displaying regression lines based on P-value

我使用 ggpmisc 包在绘图中显示线性回归模型。如果 p 值小于 0.2,我只希望在图中显示回归线、p 值和 r2 值。 @Ricardo Semião e Castro 用很棒的代码帮助了我(谢谢!),但它只是有时有效。它是否有效取决于满足 P<0.2 标准的回归模型的数量。关于如何编写代码以使其在 0、1 或 2 个模型的 P 值低于 P 时都有效的想法?

代码如下:

library(ggpmisc)
library(ggplot2)

# Below code is to ensure that only LM with p < 0.2 is displayed in the graph
names = c(0,0) #Create a starting point of a matrix for the group names

#For each group, run a lm to find if pvalue < 0.2
for(i in unique(df$days)){
  lm = summary(lm(acetone~bacteria, df[df$days==i,])) 
  p = pf(lm$fstatistic[1], lm$fstatistic[2], lm$fstatistic[3], lower.tail=FALSE)
  if(p < 0.2){names = rbind(names, c(i))} #Get the groups that pass
}

#names = names[-1,] #Remove starting point

#Create subset of df with groups where p < 0.2; looping in order to check pair by pair
df_P0.2 = numeric()
for(i in 1:nrow(names)){
  df_P0.2 = rbind(df_P0.2,df[df$days%in%names[i,1],])
}


formula <- y~x

(plot <- ggplot(df, 
               aes(bacteria, 
                   acetone)) +
    geom_smooth(method = "lm",
                formula = y~x, color="black", data = df_P0.2) +
    geom_point(aes(shape=soil_type, color=soil_type, size=soil_type,
                   fill=soil_type)) +
    scale_fill_manual(values=c("#00AFBB", "brown")) + 
    scale_color_manual(values=c("black", "black")) + 
    scale_shape_manual(values=c(21, 24))+
    scale_size_manual(values=c(2.4, 1.7))+
    labs(shape="Soil type", color="Soil type", size="Soil type", fill="Soil type") +
    theme_bw() +
    facet_wrap(~days, 
               ncol = 4, scales = "free")+
    stat_poly_eq(data=df_P0.2,
                 aes(label = paste(stat(adj.rr.label),
                                   stat(p.value.label), 
                                   sep = "*\", \"*")),
                 formula = formula, 
                 rr.digits = 1, 
                 p.digits = 1, 
                 parse = TRUE,size=3.5))

这是数据:

df <- structure(list(bacteria = c(0.301, 1.079, 0, 0.301, 0.301, 0, 
0, 0.477, 0.301, 0, 0.477, 0, 0.477, 0.477, 0, 0.778, 0, 0.477, 
0.477, 0, 0, 0, 0.477, 0.477, 0, 0.477, 0, 0, 0, 0, 0.477, 0.477, 
0, 0, 0.477, 0.477, 0, 0, 0, 0, 0.477, 0, 0.477, 0, 0.477, 1.204, 
1.176, 0, 0.301, 0.778, 0.301, 0.477, 0.477, 0, 0, 0.301, 0, 
0, 0.602, 0.301, 0.602, 0.477, 0, 0.699, 0, 0.602, 0.602, 0, 
0.699, 0, 0.602, 0.602, 0, 0.602, 0, 0.301, 0.845, 0, 0.602, 
0.845, 0, 0.903, 0.602, 0.602, 0.954, 0, 0, 0.602, 0.602, 0.903, 
0.602, 0.602, 0.602, 1.462, 0.954, 0.602, 0.778, 0.301, 0.477, 
0.477, 0, 0, 0.301, 0, 0, 0.602, 0.301, 0.602, 0.477, 0, 0.301, 
0.699, 0, 0.602, 0.602, 0, 0.699, 0, 0.602, 0.602, 0, 0.602, 
0, 0.301, 0.845, 0, 0.602, 0.845, 0, 0.903, 0.602, 0.602, 0.954, 
0, 0, 0.602, 0.602, 0.903, 0.602, 0.602, 0.602, 1.462, 0.954, 
0.602, 0.699, 1.23, 0.477, 0.477, 0.477, 0, 0, 0.301, 0.602, 
0.301, 0.602, 0.602, 0, 0.778, 0, 0, 0.602, 0, 0.477, 0.477, 
0.602, 0.602, 0, 0.602, 0, 0, 1.114, 0, 0.699, 0.602, 0, 0, 0.602, 
0.602, 0.699, 0.602, 0.301, 0.699, 0.602, 0.845, 0.699, 1, 1.146, 
0.699), acetone = c(0.002, 0, 0.002, 0.003, 0.014, 0.002, 0.024, 
0.006, 0.001, 0.041, 0.035, 0.014, 0.01, 0.005, 0.017, 0.002, 
0.004, 0.002, 0.011, 0.019, 0, 0.01, 0.004, 0.002, 0.007, 0.004, 
0.021, 0.022, 0.002, 0.005, 0.032, 0.003, 0.002, 0.004, 0.007, 
0.091, 0.005, 0.002, 0.004, 0.001, 0.003, 0.005, 0.019, 0, 0.049, 
0, 0.001, 0.001, 0, 0, 0, 0.095, 0.023, 0, 0.024, 0.031, 0, 0.058, 
0.059, 0, 0.017, 0.008, 0, 0, 0.002, 0.007, 0.083, 0.011, 0, 
0, 0.001, 0.057, 0, 0.059, 0.142, 0.01, 0, 0, 0.052, 0, 0, 0.041, 
0, 0, 0, 0.002, 0, 0, 0.016, 0.016, 0.042, 0, 0.005, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0.007, 0, 0.001, 0.061, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0.015, 0, 0.025, 0, 0, 0, 0, 0.02, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.001, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0.001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0.004, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.002, 0, 0, 0.002, 
0, 0), days = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 10, 10, 10, 
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
10, 10, 10, 10, 10, 10, 10, 10, 10, 24, 24, 24, 24, 24, 24, 24, 
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
24, 24, 24, 24, 24, 24, 24, 24, 24, 94, 94, 94, 94, 94, 94, 94, 
94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 
94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 
94, 94, 94, 94, 94), soil = c(31, 6, 12, 18, 2, 39, 1, 14, 4, 
9, 16, 10, 28, 33, 8, 92, 25, 23, 20, 83, 66, 19, 27, 22, 95, 
26, 21, 69, 30, 113, 15, 100, 38, 24, 110, 102, 34, 37, 7, 36, 
17, 13, 29, 32, 90, 5, 3, 35, 31, 6, 12, 18, 2, 39, 1, 14, 4, 
9, 16, 10, 28, 33, 8, 92, 25, 23, 20, 83, 66, 19, 27, 22, 95, 
26, 21, 69, 30, 113, 15, 100, 38, 24, 110, 102, 34, 37, 7, 36, 
17, 13, 29, 32, 90, 5, 3, 35, 6, 12, 18, 2, 39, 1, 14, 4, 9, 
16, 10, 28, 33, 8, 31, 92, 25, 23, 20, 83, 66, 19, 27, 22, 95, 
26, 21, 69, 30, 113, 15, 100, 38, 24, 110, 102, 34, 37, 7, 36, 
17, 13, 29, 32, 90, 5, 3, 35, 31, 6, 12, 18, 2, 39, 4, 9, 16, 
10, 28, 33, 8, 92, 25, 23, 20, 83, 66, 19, 27, 22, 95, 26, 21, 
69, 30, 113, 15, 100, 38, 24, 110, 102, 34, 37, 7, 36, 17, 13, 
29, 5, 3, 35), soil_type = c("org", "min", "org", "min", "min", 
"min", "min", "org", "min", "org", "min", "min", "min", "min", 
"min", "min", "min", "min", "min", "min", "org", "min", "min", 
"min", "min", "min", "min", "min", "org", "min", "min", "org", 
"min", "min", "min", "min", "org", "min", "min", "org", "min", 
"org", "min", "min", "min", "org", "min", "min", "org", "min", 
"org", "min", "min", "min", "min", "org", "min", "org", "min", 
"min", "min", "min", "min", "min", "min", "min", "min", "min", 
"org", "min", "min", "min", "min", "min", "min", "min", "org", 
"min", "min", "org", "min", "min", "min", "min", "org", "min", 
"min", "org", "min", "org", "min", "min", "min", "org", "min", 
"min", "min", "org", "min", "min", "min", "min", "org", "min", 
"org", "min", "min", "min", "min", "min", "org", "min", "min", 
"min", "min", "min", "org", "min", "min", "min", "min", "min", 
"min", "min", "org", "min", "min", "org", "min", "min", "min", 
"min", "org", "min", "min", "org", "min", "org", "min", "min", 
"min", "org", "min", "min", "org", "min", "org", "min", "min", 
"min", "min", "org", "min", "min", "min", "min", "min", "min", 
"min", "min", "min", "min", "org", "min", "min", "min", "min", 
"min", "min", "min", "org", "min", "min", "org", "min", "min", 
"min", "min", "org", "min", "min", "org", "min", "org", "min", 
"org", "min", "min")), row.names = c(NA, -188L), class = "data.frame")

你可以试试 tidyverse

library(tidyverse)
library(ggpmisc)
library(broom)

想法是使用tidyrnestpurrrmap以及boom的[预先计算p值=18=]函数。生成的 pvalue 被添加到数据框中。

df %>% 
  as_tibble() %>% # optional
  nest(data =-days) %>% # calculate p values
  mutate(p=map(data, ~lm(acetone~bacteria, data= .) %>% 
                 broom::tidy() %>% 
                 slice(2) %>% 
                 pull(p.value))) %>% 
  unnest(p) # to show the pvalue output
# A tibble: 4 x 3
days data                   p
<dbl> <list>             <dbl>
1     0 <tibble [48 x 4]> 0.955 
2    10 <tibble [48 x 4]> 0.746 
3    24 <tibble [48 x 4]> 0.475 
4    94 <tibble [44 x 4]> 0.0152

最后,剧情。数据在各自的 geoms 中被过滤为 p<0.2。

df %>% 
  as_tibble() %>% # optional
  nest(data =-days) %>% # calculate p values
  mutate(p=map(data, ~lm(acetone~bacteria, data= .) %>% 
                 broom::tidy() %>% 
                 slice(2) %>% 
                 pull(p.value))) %>% 
  unnest(cols = c(data, p)) %>% 
  ggplot(aes(bacteria, acetone)) +
    geom_point(aes(shape=soil_type, color=soil_type, size=soil_type, fill=soil_type)) +
    facet_wrap(~days, ncol = 4, scales = "free") +
    geom_smooth(data = . %>% group_by(days) %>% filter(any(p<0.2)), method = "lm", formula = y~x, color="black") +
    stat_poly_eq(data = . %>% group_by(days) %>% filter(any(p<0.2)),
               aes(label = paste(stat(adj.rr.label),
                                 stat(p.value.label), 
                                 sep = "*\", \"*")),
               formula = formula, 
               rr.digits = 1, 
               p.digits = 1, 
               parse = TRUE,size=3.5) +
  scale_fill_manual(values=c("#00AFBB", "brown")) + 
  scale_color_manual(values=c("black", "black")) + 
  scale_shape_manual(values=c(21, 24))+
  scale_size_manual(values=c(2.4, 1.7))+
  labs(shape="Soil type", color="Soil type", size="Soil type", fill="Soil type") +
  theme_bw() 

我简化了代码以展示如何轻松完成 stat_poly_eq() 的测试。为了能够测试 p-value,我们需要 stat return 数值而不是现成的格式化标签。使用手头的数值,我们可以将不需要的标签设置为 "" 并设置格式,然后手动格式化。所以这是部分答案...

ggplot(df, 
       aes(bacteria, 
           acetone)) +
  geom_point(aes(shape=soil_type, color=soil_type,
                 fill=soil_type)) +
  stat_poly_eq(aes(label = ifelse(stat(p.value) <= 0.2,
                                  sprintf("R^2~`=`~%.2f*\", \"*P~`=`~%.2g",
                                          stat(r.squared), stat(p.value)),
                                  "")),
               formula = formula, 
               output.type = "numeric",
               size = 3.5,
               parse = TRUE) +
  theme_bw() +
  facet_wrap(~days, 
             ncol = 4, scales = "free")