为 Q-Q 图着色,比较 R 中四分位数的两个分布
Colour a Q-Q plot comparing two distributions by quartiles in R
我正在尝试构建一个比较两个分布的 Q-Q 图,第 99 个百分位数的颜色如下例所示:
但是我不确定如何实现这一点,这是我的数据的一个子集:
dfw <- structure(list(Date.Time = structure(c(848502000, 848509200,
848512800, 848520000, 848523600, 848530800, 848534400, 848541600,
848545200, 848552400, 848556000, 848563200, 848566800, 848574000,
848577600, 848588400, 848595600, 848599200, 848606400, 848610000,
848617200, 848620800, 848628000, 848631600, 848638800, 848642400,
848649600, 848653200, 848660400, 848664000, 848674800, 848682000,
848685600, 848692800, 848696400, 848703600, 848707200, 848714400,
848718000, 848725200, 848728800, 848736000, 848739600, 848746800,
848750400, 848761200, 848768400, 848772000, 848779200, 848782800,
848790000, 848793600, 848800800, 848804400, 848811600, 848815200,
848822400, 848826000, 848833200, 848847600, 848854800, 848858400,
848865600, 848869200, 848876400, 848880000, 848887200, 848890800,
848898000, 848901600, 848908800, 848912400, 848919600, 848923200,
848934000, 848941200, 848944800, 848952000, 848955600, 848962800,
848966400, 848973600, 848977200, 848984400, 848988000, 848995200,
848998800, 849006000, 849009600, 853682400, 853686000, 853714800,
853718400, 853725600, 853729200, 853736400, 853750800, 853758000,
853761600, 853768800), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
Hs.Mod = c(1.5960001, 1.5600001, 1.5480001, 1.552, 1.552,
1.534, 1.5180001, 1.462, 1.4260001, 1.3740001, 1.36, 1.3340001,
1.3080001, 1.2720001, 1.256, 1.218, 1.212, 1.21, 1.2060001,
1.2160001, 1.248, 1.25, 1.25, 1.264, 1.3560001, 1.394, 1.3700001,
1.332, 1.2900001, 1.2800001, 1.268, 1.2800001, 1.2800001,
1.3240001, 1.3240001, 1.286, 1.2540001, 1.19, 1.172, 1.1700001,
1.1860001, 1.2340001, 1.274, 1.3640001, 1.4120001, 1.58,
1.6580001, 1.6660001, 1.682, 1.748, 1.9280001, 1.9800001,
2.026, 2.052, 2.214, 2.328, 2.4320002, 2.39, 2.2180002, 1.9080001,
1.792, 1.7400001, 1.7140001, 1.7140001, 1.692, 1.6680001,
1.608, 1.58, 1.536, 1.524, 1.5640001, 1.5760001, 1.6100001,
1.6240001, 1.6120001, 1.5920001, 1.58, 1.542, 1.5200001,
1.48, 1.4640001, 1.4260001, 1.406, 1.386, 1.3820001, 1.34,
1.312, 1.268, 1.248, 1.8080001, 1.7960001, 1.644, 1.6420001,
1.6600001, 1.6880001, 1.7820001, 2.138, 2.2740002, 2.2940001,
2.252), Hs.Obs = c(1.741, 1.524, 1.618, 1.658, 1.697, 1.822,
1.792, 1.463, 1.433, 1.376, 1.208, 1.299, 1.255, 1.304, 1.328,
1.182, 1.282, 1.293, 1.228, 1.281, 1.45, 1.356, 1.501, 1.5,
1.356, 1.477, 1.408, 1.544, 1.497, 1.768, 2.04, 2.074, 2.042,
2.147, 2.224, 2.022, 2.017, 2.047, 2.353, 2.597, 2.838, 2.67,
2.762, 2.687, 2.734, 2.738, 2.938, 2.795, 2.549, 2.669, 2.447,
2.676, 2.577, 2.383, 2.362, 2.284, 2.341, 2.33, 2.397, 2.498,
2.317, 2.373, 2.377, 2.362, 2.218, 2.226, 1.97, 2.087, 1.874,
2.116, 2.022, 1.886, 2.046, 1.879, 1.638, 1.677, 1.638, 1.647,
1.551, 1.596, 1.591, 1.384, 1.345, 1.522, 1.469, 1.503, 1.459,
1.327, 1.453, 2.448, 2.235, 2.104, 1.958, 2.118, 2.209, 2.034,
2.229, 2.505, 2.163, 2.372)), row.names = c(NA, 100L), class = "data.frame")
制作 Q-Q 图的代码:
ggplot(data=dfw, aes(x=sort(Hs.Obs), y=sort(Hs.Mod))) + geom_point(shape = 1, size =2) + xlab('Obs') + ylab('Model')+
theme_bw()+
geom_abline(linetype=2)
尝试为第 99 个百分位数着色的代码:
ggplot(data=dfw, aes(x=sort(Hs.Obs), y=sort(Hs.Mod), col=cut(Hs.Mod,quantile(Hs.Mod, probs = .99)))) +
geom_point(shape = 1, size =2) + xlab('Obs') + ylab('Model')+
theme_bw()+
geom_abline(linetype=2)
结果图:
我正在寻求一些帮助来解决这个问题,因为我尝试过的尝试都没有奏效。
提前致谢!
我正在使用 dplyr
库 filter()
quantile()
的数据:
代码:
library(dplyr)
library(ggplot2)
ggplot()+
geom_point(data=filter(dfw,Hs.Obs>quantile(dfw$Hs.Mod,.99)),aes(x=sort(Hs.Obs),y=sort(Hs.Mod), col="Cuantile 99%"))+
geom_point(data=filter(dfw,Hs.Obs<quantile(dfw$Hs.Mod,.99)),aes(x=sort(Hs.Obs),y=sort(Hs.Mod), col="Cuantile 1-98%"))+
geom_abline(linetype=2)+xlab('Obs') + ylab('Model')+ theme_bw()
试试这个
dfw %>%
mutate(qq = quantile(Hs.Mod, probs = c(0.99)),
qq_gt99 = ifelse(qq<= Hs.Mod, 1, 0)) %>%
ggplot(aes(x=Hs.Mod, y = Hs.Obs, col= as.factor(qq_gt99))) + geom_point()
如果您先订购观察结果
dfw %>%
mutate(mod_ordered = sort(Hs.Mod),
obs_ordered = sort(Hs.Obs),
qq = quantile(mod_ordered, probs = c(0.99)),
qq_gt99 = ifelse(qq<= mod_ordered, 1, 0)) %>%
ggplot(aes(x=mod_ordered, y = obs_ordered, col=
as.factor(qq_gt99))) + geom_point()
我正在尝试构建一个比较两个分布的 Q-Q 图,第 99 个百分位数的颜色如下例所示:
但是我不确定如何实现这一点,这是我的数据的一个子集:
dfw <- structure(list(Date.Time = structure(c(848502000, 848509200,
848512800, 848520000, 848523600, 848530800, 848534400, 848541600,
848545200, 848552400, 848556000, 848563200, 848566800, 848574000,
848577600, 848588400, 848595600, 848599200, 848606400, 848610000,
848617200, 848620800, 848628000, 848631600, 848638800, 848642400,
848649600, 848653200, 848660400, 848664000, 848674800, 848682000,
848685600, 848692800, 848696400, 848703600, 848707200, 848714400,
848718000, 848725200, 848728800, 848736000, 848739600, 848746800,
848750400, 848761200, 848768400, 848772000, 848779200, 848782800,
848790000, 848793600, 848800800, 848804400, 848811600, 848815200,
848822400, 848826000, 848833200, 848847600, 848854800, 848858400,
848865600, 848869200, 848876400, 848880000, 848887200, 848890800,
848898000, 848901600, 848908800, 848912400, 848919600, 848923200,
848934000, 848941200, 848944800, 848952000, 848955600, 848962800,
848966400, 848973600, 848977200, 848984400, 848988000, 848995200,
848998800, 849006000, 849009600, 853682400, 853686000, 853714800,
853718400, 853725600, 853729200, 853736400, 853750800, 853758000,
853761600, 853768800), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
Hs.Mod = c(1.5960001, 1.5600001, 1.5480001, 1.552, 1.552,
1.534, 1.5180001, 1.462, 1.4260001, 1.3740001, 1.36, 1.3340001,
1.3080001, 1.2720001, 1.256, 1.218, 1.212, 1.21, 1.2060001,
1.2160001, 1.248, 1.25, 1.25, 1.264, 1.3560001, 1.394, 1.3700001,
1.332, 1.2900001, 1.2800001, 1.268, 1.2800001, 1.2800001,
1.3240001, 1.3240001, 1.286, 1.2540001, 1.19, 1.172, 1.1700001,
1.1860001, 1.2340001, 1.274, 1.3640001, 1.4120001, 1.58,
1.6580001, 1.6660001, 1.682, 1.748, 1.9280001, 1.9800001,
2.026, 2.052, 2.214, 2.328, 2.4320002, 2.39, 2.2180002, 1.9080001,
1.792, 1.7400001, 1.7140001, 1.7140001, 1.692, 1.6680001,
1.608, 1.58, 1.536, 1.524, 1.5640001, 1.5760001, 1.6100001,
1.6240001, 1.6120001, 1.5920001, 1.58, 1.542, 1.5200001,
1.48, 1.4640001, 1.4260001, 1.406, 1.386, 1.3820001, 1.34,
1.312, 1.268, 1.248, 1.8080001, 1.7960001, 1.644, 1.6420001,
1.6600001, 1.6880001, 1.7820001, 2.138, 2.2740002, 2.2940001,
2.252), Hs.Obs = c(1.741, 1.524, 1.618, 1.658, 1.697, 1.822,
1.792, 1.463, 1.433, 1.376, 1.208, 1.299, 1.255, 1.304, 1.328,
1.182, 1.282, 1.293, 1.228, 1.281, 1.45, 1.356, 1.501, 1.5,
1.356, 1.477, 1.408, 1.544, 1.497, 1.768, 2.04, 2.074, 2.042,
2.147, 2.224, 2.022, 2.017, 2.047, 2.353, 2.597, 2.838, 2.67,
2.762, 2.687, 2.734, 2.738, 2.938, 2.795, 2.549, 2.669, 2.447,
2.676, 2.577, 2.383, 2.362, 2.284, 2.341, 2.33, 2.397, 2.498,
2.317, 2.373, 2.377, 2.362, 2.218, 2.226, 1.97, 2.087, 1.874,
2.116, 2.022, 1.886, 2.046, 1.879, 1.638, 1.677, 1.638, 1.647,
1.551, 1.596, 1.591, 1.384, 1.345, 1.522, 1.469, 1.503, 1.459,
1.327, 1.453, 2.448, 2.235, 2.104, 1.958, 2.118, 2.209, 2.034,
2.229, 2.505, 2.163, 2.372)), row.names = c(NA, 100L), class = "data.frame")
制作 Q-Q 图的代码:
ggplot(data=dfw, aes(x=sort(Hs.Obs), y=sort(Hs.Mod))) + geom_point(shape = 1, size =2) + xlab('Obs') + ylab('Model')+
theme_bw()+
geom_abline(linetype=2)
尝试为第 99 个百分位数着色的代码:
ggplot(data=dfw, aes(x=sort(Hs.Obs), y=sort(Hs.Mod), col=cut(Hs.Mod,quantile(Hs.Mod, probs = .99)))) +
geom_point(shape = 1, size =2) + xlab('Obs') + ylab('Model')+
theme_bw()+
geom_abline(linetype=2)
结果图:
我正在寻求一些帮助来解决这个问题,因为我尝试过的尝试都没有奏效。 提前致谢!
我正在使用 dplyr
库 filter()
quantile()
的数据:
代码:
library(dplyr)
library(ggplot2)
ggplot()+
geom_point(data=filter(dfw,Hs.Obs>quantile(dfw$Hs.Mod,.99)),aes(x=sort(Hs.Obs),y=sort(Hs.Mod), col="Cuantile 99%"))+
geom_point(data=filter(dfw,Hs.Obs<quantile(dfw$Hs.Mod,.99)),aes(x=sort(Hs.Obs),y=sort(Hs.Mod), col="Cuantile 1-98%"))+
geom_abline(linetype=2)+xlab('Obs') + ylab('Model')+ theme_bw()
试试这个
dfw %>%
mutate(qq = quantile(Hs.Mod, probs = c(0.99)),
qq_gt99 = ifelse(qq<= Hs.Mod, 1, 0)) %>%
ggplot(aes(x=Hs.Mod, y = Hs.Obs, col= as.factor(qq_gt99))) + geom_point()
如果您先订购观察结果
dfw %>%
mutate(mod_ordered = sort(Hs.Mod),
obs_ordered = sort(Hs.Obs),
qq = quantile(mod_ordered, probs = c(0.99)),
qq_gt99 = ifelse(qq<= mod_ordered, 1, 0)) %>%
ggplot(aes(x=mod_ordered, y = obs_ordered, col=
as.factor(qq_gt99))) + geom_point()