在 Q-Q 图中绘制红色十分位数 stat_qq() 或 geom_qq()

Ploting red deciles in Q-Q plot stat_qq() or geom_qq()

我正在尝试将分布与理论正态分布进行比较,但我想在 Q-Q 图中将小数分位数表示为红色。

这是我正在使用的数值数组(作为 df)。

structure(list(v1 = c(84.01, 86.88, 48.5, 80.28, 63.85, 81.95, 
8.94, 59.16, 35.16, 49.6, 56.16, 19.71, 63.03, 64.82, 2.92, 46.69, 
42, 63.83, 53.29, 67.79, 15.87, 37.95, 52.99, 52.11, 66.14, 68.61, 
61.19, 44.9, 54.8, 61.81, 80.9, 26.17, 12.7, 57.51, 73.41, 54.25, 
55.61, 55.64, 57.69, 67.77, 23.21, 74.23, 29.43, 52.08, 41.1, 
58.65, 55.25, 79.08, 71.39, 32.29, 66.9, 60.52, 100, 43.34, 47.69, 
65.64, 75.9, 59.95, 55.12, 37.57, 60.76, 79.82, 55.67, 43.17, 
56.56, 39.87, 65.32, 53.82, 57.76, 26.88, 62.83, 42.71, 73.82, 
55.02, 73.52, 68.9, 60.82, 52.16, 61.34, 57.59, 19.71, 74.17, 
63.93, 59.88, 55.47, 43.19, 45.17, 64.23, 99.18, 53.25, 58, 73.66, 
100, 58.83, 86.6, 54.83, 58.13, 68.57, 56.17, 59.73, 47.02, 33.97, 
60.2, 60.53, 47.18, 54.72, 49.71, 35.29, 62.25, 44.07, 46.35, 
17.29, 26.93, 60.26, 77.35, 59.18, 71.83, 72.01, 58.78, 29.14, 
62.56, 23.41, 39.82, 56.27, 46.55, 60.05, 80.74, 15.99, 58.64, 
37.43, 60.38, 48.77, 54.41, 49.4, 36.05, 35.17, 80.04, 31.64, 
56.2, 39.35, 10.61, 75.99, 51.69, 59.65, 52.58, 64.76, 76.07, 
43.96, 66.95, 29.45, 43.18, 30.99, 51.22, 30.45, 73.49, 58.92, 
67.22, 51.68, 62.56, 64.29, 46.11, 72.86, 66.33, 56.27, 62.04, 
49.68, 46.44, 49.3, 66.93, 43.86, 12.72, 44.43, 60.62, 33.98, 
83.57, 50.37, 36.96, 54.04, 30.47, 28.46, 48.35, 35.64, 71.47, 
76.1, 7.32, 3.36, 83.27, 52.11, 90.43, 22.85, 7.9, 52.19, 41.96, 
23.22, 42.86, 30.83, 54.66, 63.9, 46.04, 43.69, 84.14, 50.09, 
65.29, 42.9, 72.92, 60.2, 45.36, 51.76, 58.7, 36.84, 77.72, 21.04, 
53.21, 68.87, 12.22, 63.93, 18.57, 74.93, 65.3, 79.18, 38.03, 
55.69, 73.93, 63.33, 93.97, 34.99, 45.87, 61.12, 47.95, 40.82, 
54.1, 69.17, 54.9, 41.72, 76.39, 61.75, 79.3, 29.62, 41.29, 33.33, 
52.31, 31.04, 65.94, 23.29, 64.27, 63.71, 73.43, 68.94, 68.85, 
68.39, 39.13, 65.16, 61.77, 88.54, 76.29, 78.85, 44.97, 12.35, 
90.31, 63.24, 77.85, 58.5, 81.05, 85.38, 74.16, 78.56, 54.86, 
71.15, 62.7, 64.49, 27.32, 45.71, 56.29, 60.76, 90, 63.91, 71.72, 
20.42, 45.02, 14.21, 54.64, 68.88, 71.67, 37.67, 53.88, 58.8, 
33.86, 57.98, 57.84, 73.74, 39.83, 40.69, 54.66, 18.08, 43.79, 
31.05, 89.34, 46.65, 24.99, 64.33, 51.87, 65.17, 40.04, 69.41, 
13.89, 61.99, 44.14, 65.46, 47.14, 27.88, 52.46, 12.69, 45.41, 
84.13, 59.48, 40.2, 40.76, 42.66, 35.56, 57.52, 41.17, 60.02, 
44.58, 1.76, 35.8, 70.61, 20.74, 1.78, 60.11, 70.64, 26.04, 33.58, 
61.88, 12.38, 48.17, 27.09, 75.23, 28.85, 53.25, 44.19, 64.56, 
17.12, 51.22, 66.02, 43.06, 58.71, 55.65, 27.52, 45.28, 44.83, 
74.25, 58.35, 23.1, 58.52, 54.1, 57.74, 47.61, 17.25, 57.44, 
70.38, 43.69, 89.84, 72.9, 57.53, 33.53, 18.08, 41.71, 52.07, 
46.63, 42.64, 41.98, 27.74, 72.13, 61.92, 83.65, 22.42, 62.47, 
13.49, 68.39, 48.94, 72.79, 49.76, 63.05, 85, 45.58, 59.58, 57.94, 
65.93, 65.26, 35.56, 65.71, 62.17, 21.07, 53.33, 72.97, 36.32, 
55.38, 68.96, 66.11, 56.05, 70.68, 89.05, 73.7, 85.43, 88.74, 
51.16, 49.83, 63.85, 45.61, 29.15, 49.93, 62.9, 61.39, 69.17, 
6.61, 63.66, 76.21, 56.85, 49.86, 68.41, 75.94, 67.88, 62.25, 
26.8, 74.27, 76.48, 46.25, 73.84, 47.21, 60.76, 42.17, 45.73, 
41.4, 67.04, 48.28, 63.73, 65.9, 36.62, 74.91, 50.11, 24.21, 
91.45, 78.71, 100, 27.59, 78.42, 38.69, 21.68, 0, 44.85, 59.28, 
45.75, 36.84, 73.91, 41.8, 66, 74.88, 59.25, 54.7, 44.39, 40.95, 
42.03, 66.93, 38, 72.08, 48, 41.7, 72.1, 64.23, 14.18, 43.53, 
57.81, 52.68, 45.73, 97.12, 18.98, 57.12, 48.33, 18.94, 60.6, 
52.05, 67.09, 72.89, 64.16, 57.79, 45.62, 55.02, 48.86, 60.74, 
7.35, 56.31, 60.51, 49, 67.56, 84.62, 15.98, 54.67, 65.72, 65.79, 
70.55, 74.18, 55.72, 57.89, 54.67, 39.83, 21.2, 78)), class = "data.frame", row.names = c(NA, 
-507L))

这是生成Q-Q图

example%>% 
  ggplot(aes(sample=v1)) +
  stat_qq() + stat_qq_line() 
# + add deciles colour='red' ?

如何将十分位数着色为红色?

首先使用 ntile 函数创建十分位数。之后,您可以通过过滤该数据并将它们着色为红色来为第一个十分位数着色。您可以使用以下代码:

library(tidyverse)
example <- example %>% mutate(decile = ntile(v1, 10))

ggplot() + 
  geom_qq(example, mapping = aes(sample=example$v1),col=ifelse(example$decile == 1, "red", "black")[order(example$v1)]) +
  geom_qq_line(aes(sample=example$v1))

输出:

感谢@Quinten 的回答,我可以设法从 qq 积分中脱颖而出。所以我添加了 ntile() 函数并计算了变化点(实际十分位数),这就是解决方案:

library(tidyverse)
example <- example %>% 
  mutate(decile = ntile(v1, 10)) %>% 
  arrange(v1) %>% 
  mutate(difs = decile - lag(decile)) %>% 
  mutate(difs= ifelse(is.na(difs),1,difs))
  
example %>% 
  ggplot(aes(sample=v1)) +
  geom_qq(col=ifelse(example$difs == 1, "red", "black"), 
          alpha=ifelse(example$difs == 1, 1, 0.33), 
          size=ifelse(example$difs == 1, 1.5, 1)) +
  geom_qq_line()