为 Q-Q 图着色,比较 R 中四分位数的两个分布

Colour a Q-Q plot comparing two distributions by quartiles in R

我正在尝试构建一个比较两个分布的 Q-Q 图,第 99 个百分位数的颜色如下例所示:

但是我不确定如何实现这一点,这是我的数据的一个子集:

dfw <- structure(list(Date.Time = structure(c(848502000, 848509200, 
                                               848512800, 848520000, 848523600, 848530800, 848534400, 848541600, 
                                               848545200, 848552400, 848556000, 848563200, 848566800, 848574000, 
                                               848577600, 848588400, 848595600, 848599200, 848606400, 848610000, 
                                               848617200, 848620800, 848628000, 848631600, 848638800, 848642400, 
                                               848649600, 848653200, 848660400, 848664000, 848674800, 848682000, 
                                               848685600, 848692800, 848696400, 848703600, 848707200, 848714400, 
                                               848718000, 848725200, 848728800, 848736000, 848739600, 848746800, 
                                               848750400, 848761200, 848768400, 848772000, 848779200, 848782800, 
                                               848790000, 848793600, 848800800, 848804400, 848811600, 848815200, 
                                               848822400, 848826000, 848833200, 848847600, 848854800, 848858400, 
                                               848865600, 848869200, 848876400, 848880000, 848887200, 848890800, 
                                               848898000, 848901600, 848908800, 848912400, 848919600, 848923200, 
                                               848934000, 848941200, 848944800, 848952000, 848955600, 848962800, 
                                               848966400, 848973600, 848977200, 848984400, 848988000, 848995200, 
                                               848998800, 849006000, 849009600, 853682400, 853686000, 853714800, 
                                               853718400, 853725600, 853729200, 853736400, 853750800, 853758000, 
                                               853761600, 853768800), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
                       Hs.Mod = c(1.5960001, 1.5600001, 1.5480001, 1.552, 1.552, 
                                  1.534, 1.5180001, 1.462, 1.4260001, 1.3740001, 1.36, 1.3340001, 
                                  1.3080001, 1.2720001, 1.256, 1.218, 1.212, 1.21, 1.2060001, 
                                  1.2160001, 1.248, 1.25, 1.25, 1.264, 1.3560001, 1.394, 1.3700001, 
                                  1.332, 1.2900001, 1.2800001, 1.268, 1.2800001, 1.2800001, 
                                  1.3240001, 1.3240001, 1.286, 1.2540001, 1.19, 1.172, 1.1700001, 
                                  1.1860001, 1.2340001, 1.274, 1.3640001, 1.4120001, 1.58, 
                                  1.6580001, 1.6660001, 1.682, 1.748, 1.9280001, 1.9800001, 
                                  2.026, 2.052, 2.214, 2.328, 2.4320002, 2.39, 2.2180002, 1.9080001, 
                                  1.792, 1.7400001, 1.7140001, 1.7140001, 1.692, 1.6680001, 
                                  1.608, 1.58, 1.536, 1.524, 1.5640001, 1.5760001, 1.6100001, 
                                  1.6240001, 1.6120001, 1.5920001, 1.58, 1.542, 1.5200001, 
                                  1.48, 1.4640001, 1.4260001, 1.406, 1.386, 1.3820001, 1.34, 
                                  1.312, 1.268, 1.248, 1.8080001, 1.7960001, 1.644, 1.6420001, 
                                  1.6600001, 1.6880001, 1.7820001, 2.138, 2.2740002, 2.2940001, 
                                  2.252), Hs.Obs = c(1.741, 1.524, 1.618, 1.658, 1.697, 1.822, 
                                                     1.792, 1.463, 1.433, 1.376, 1.208, 1.299, 1.255, 1.304, 1.328, 
                                                     1.182, 1.282, 1.293, 1.228, 1.281, 1.45, 1.356, 1.501, 1.5, 
                                                     1.356, 1.477, 1.408, 1.544, 1.497, 1.768, 2.04, 2.074, 2.042, 
                                                     2.147, 2.224, 2.022, 2.017, 2.047, 2.353, 2.597, 2.838, 2.67, 
                                                     2.762, 2.687, 2.734, 2.738, 2.938, 2.795, 2.549, 2.669, 2.447, 
                                                     2.676, 2.577, 2.383, 2.362, 2.284, 2.341, 2.33, 2.397, 2.498, 
                                                     2.317, 2.373, 2.377, 2.362, 2.218, 2.226, 1.97, 2.087, 1.874, 
                                                     2.116, 2.022, 1.886, 2.046, 1.879, 1.638, 1.677, 1.638, 1.647, 
                                                     1.551, 1.596, 1.591, 1.384, 1.345, 1.522, 1.469, 1.503, 1.459, 
                                                     1.327, 1.453, 2.448, 2.235, 2.104, 1.958, 2.118, 2.209, 2.034, 
                                                     2.229, 2.505, 2.163, 2.372)), row.names = c(NA, 100L), class = "data.frame")

制作 Q-Q 图的代码:

ggplot(data=dfw, aes(x=sort(Hs.Obs), y=sort(Hs.Mod))) + geom_point(shape = 1, size =2) + xlab('Obs') + ylab('Model')+
  theme_bw()+
  geom_abline(linetype=2)

尝试为第 99 个百分位数着色的代码:

ggplot(data=dfw, aes(x=sort(Hs.Obs), y=sort(Hs.Mod), col=cut(Hs.Mod,quantile(Hs.Mod, probs = .99)))) + 
  geom_point(shape = 1, size =2) + xlab('Obs') + ylab('Model')+
  theme_bw()+
  geom_abline(linetype=2)

结果图:

我正在寻求一些帮助来解决这个问题,因为我尝试过的尝试都没有奏效。 提前致谢!

我正在使用 dplyrfilter() quantile() 的数据: 代码:

library(dplyr)
library(ggplot2) 


 ggplot()+
geom_point(data=filter(dfw,Hs.Obs>quantile(dfw$Hs.Mod,.99)),aes(x=sort(Hs.Obs),y=sort(Hs.Mod), col="Cuantile 99%"))+
geom_point(data=filter(dfw,Hs.Obs<quantile(dfw$Hs.Mod,.99)),aes(x=sort(Hs.Obs),y=sort(Hs.Mod), col="Cuantile 1-98%"))+
geom_abline(linetype=2)+xlab('Obs') + ylab('Model')+ theme_bw()

试试这个

dfw %>% 
 mutate(qq = quantile(Hs.Mod, probs = c(0.99)),
        qq_gt99 = ifelse(qq<= Hs.Mod, 1, 0)) %>% 
 ggplot(aes(x=Hs.Mod, y = Hs.Obs, col= as.factor(qq_gt99))) + geom_point()

如果您先订购观察结果

dfw %>% 
  mutate(mod_ordered = sort(Hs.Mod),
         obs_ordered = sort(Hs.Obs),
         qq = quantile(mod_ordered, probs = c(0.99)),
         qq_gt99 = ifelse(qq<= mod_ordered, 1, 0)) %>% 

   ggplot(aes(x=mod_ordered, y = obs_ordered, col= 
 as.factor(qq_gt99))) + geom_point()