垂直 95% 置信区间图 2 组比较
Vertical 95% Confidence Interval plot 2 groups comparison
所以我的最终目标是像这个例子一样在 2 组中垂直绘制具有多个 95% 置信区间的图:
我找到了这段代码:https://rpubs.com/nati147/703463
但是如何在图中添加分组比较?
Write a function ‘CI_95’ that takes input a vector of sample values,
and outputs the 95% confidence interval for this sample. You can use
the ‘margin_error_95’ function.
CI_95 <- function(sample_vals, sig){
error <- margin_error_95(sample_vals, sig)
CI <- mean(sample_vals) + c(-1, 1)*error
}
Write a function called ‘margin_error_95’ that takes input a vector of
sample values, and outputs the margin of error for a 95% confidence
interval.
margin_error_95 <- function(sample_vals, sig){
n <- length(sample_vals)
mar_err <- 1.96*(sig/sqrt(n))
}
plot_CI_95 <- function(seed){
B <- 100
n <- 30
mu <- 5
sig <- 1.2
set.seed(seed)
# extract upper bound of CI's
x_1 <- replicate(B,
{samp <- rnorm(n, mean = mu, sd = sig )
max(CI_95(samp, sig))
}
)
#extract lower bound of CI's
set.seed(seed)
x_0 <- replicate(B,
{samp <- rnorm(n, mean = mu, sd = sig )
min(CI_95(samp, sig))
}
)
set.seed(seed)
means <- replicate(B, mean(rnorm(n, mean = mu, sd = sig)))
plot(means, 1:B, pch = 20,
xlim = c(mu - sig, mu + sig),
ylim = c(0,B+1),
xlab = "sample means",
ylab = "index of the CI",
main = paste(B, "Confidence intervals")
)
for (i in 1:B){
if(between(mu, x_0[i], x_1[i])){
segments(x_0[i], i, x_1[i], i, lwd = 2) #plot CI's that contain the mean in black
} else {
segments(x_0[i], i, x_1[i], i, col = "red", lwd = 2) #plot CI's that don't contain the mean in red
}
}
abline(v=mu, col = "blue") #plot a vertical line at the population mean
}
运行剧情:
plot_CI_95(1)
这是一个解决方案,但可能需要根据您的喜好进一步完善。我保留了 plot_CI_95
函数的一般结构,但在不同的组上添加了一个循环。这意味着 mu
和 sig
变量现在必须有多个值,如果您要显示组间差异,则每组一个。还有一些颜色和其他图形参数。结果如下图
为避免两组的间隔重叠,需要调整一些参数。 1) png
中的数字 height
(增加值使组间更多 space,2)offset
参数(最多可增加约 0.3),或 3)lwd
在 segments
函数中(值越小意味着线条越细)。使用 png
或类似的功能直接保存图形将允许微调所需的外观。
library(dplyr)
CI_95 <- function(sample_vals, sig){
error <- margin_error_95(sample_vals, sig)
CI <- mean(sample_vals) + c(-1, 1)*error
}
margin_error_95 <- function(sample_vals, sig){
n <- length(sample_vals)
mar_err <- 1.96*(sig/sqrt(n))
}
png("group_plot.png",height=7,width=3,units = 'in',res=1000)
plot_CI_95 <- function(seed){
B <- 100
n <- 30
# mean and std dev as a vector of values
# need to have same length
# assume one value per group
mu <- c(5,4.5) # group1, group2
sig <- c(1.2,1) # group1, group2
offset<- 0.25 # controls point and line offset from nominal value
# colors for 2 groups
colsuse <- c('steelblue','gold')
# loop over groups
# mu and sig are now indexed by this loop
for(j in 1:length(mu)){
# use seed+j to make different random sample for each group
# extract upper bound of CI's
set.seed(seed+j)
x_1 <- replicate(B,
{samp <- rnorm(n, mean = mu[j], sd = sig[j])
max(CI_95(samp, sig[j]))
}
)
#extract lower bound of CI's
set.seed(seed+j)
x_0 <- replicate(B,
{samp <- rnorm(n, mean = mu[j], sd = sig[j])
min(CI_95(samp, sig[j]))
}
)
set.seed(seed+j)
means <- replicate(B, mean(rnorm(n, mean = mu[j], sd = sig[j])))
# for first group, establish the plot
# for second group, add values to the plot
# if groups are very different this might need to be modified with the xlim
if(j == 1){
plot(means, (1:B)+offset*ifelse(j==1,1,-1), pch = 20,
xlim = c(mu[j] - sig[j], mu[j] + sig[j]),
ylim = c(0,B+1),
xlab = "sample means",
ylab = "index of the CI",
main = paste(B, "Confidence intervals"),
col=colsuse[j]
)
}else{
points(means, (1:B)+offset*ifelse(j==1,1,-1), pch = 20,
col=colsuse[j])
}
for (i in 1:B){
if(between(mu[j], x_0[i], x_1[i])){
segments(x_0[i], i+offset*ifelse(j==1,1,-1), x_1[i], i+offset*ifelse(j==1,1,-1), col=colsuse[j], lwd = 1) #plot CI's that contain the mean in black
} else {
segments(x_0[i], i+offset*ifelse(j==1,1,-1), x_1[i], i+offset*ifelse(j==1,1,-1), col = "red", lwd = 1) #plot CI's that don't contain the mean in red
}
}
abline(v=mu[j], col = colsuse[j]) #plot a vertical line at the population mean
}
par(xpd=F)
# legend for the groups
legend("topright",legend = c('Male','Female'),lty=1,col=colsuse,cex=0.5)
}
plot_CI_95(1)
dev.off()
所以我的最终目标是像这个例子一样在 2 组中垂直绘制具有多个 95% 置信区间的图:
我找到了这段代码:https://rpubs.com/nati147/703463
但是如何在图中添加分组比较?
Write a function ‘CI_95’ that takes input a vector of sample values, and outputs the 95% confidence interval for this sample. You can use the ‘margin_error_95’ function.
CI_95 <- function(sample_vals, sig){
error <- margin_error_95(sample_vals, sig)
CI <- mean(sample_vals) + c(-1, 1)*error
}
Write a function called ‘margin_error_95’ that takes input a vector of sample values, and outputs the margin of error for a 95% confidence interval.
margin_error_95 <- function(sample_vals, sig){
n <- length(sample_vals)
mar_err <- 1.96*(sig/sqrt(n))
}
plot_CI_95 <- function(seed){
B <- 100
n <- 30
mu <- 5
sig <- 1.2
set.seed(seed)
# extract upper bound of CI's
x_1 <- replicate(B,
{samp <- rnorm(n, mean = mu, sd = sig )
max(CI_95(samp, sig))
}
)
#extract lower bound of CI's
set.seed(seed)
x_0 <- replicate(B,
{samp <- rnorm(n, mean = mu, sd = sig )
min(CI_95(samp, sig))
}
)
set.seed(seed)
means <- replicate(B, mean(rnorm(n, mean = mu, sd = sig)))
plot(means, 1:B, pch = 20,
xlim = c(mu - sig, mu + sig),
ylim = c(0,B+1),
xlab = "sample means",
ylab = "index of the CI",
main = paste(B, "Confidence intervals")
)
for (i in 1:B){
if(between(mu, x_0[i], x_1[i])){
segments(x_0[i], i, x_1[i], i, lwd = 2) #plot CI's that contain the mean in black
} else {
segments(x_0[i], i, x_1[i], i, col = "red", lwd = 2) #plot CI's that don't contain the mean in red
}
}
abline(v=mu, col = "blue") #plot a vertical line at the population mean
}
运行剧情:
plot_CI_95(1)
这是一个解决方案,但可能需要根据您的喜好进一步完善。我保留了 plot_CI_95
函数的一般结构,但在不同的组上添加了一个循环。这意味着 mu
和 sig
变量现在必须有多个值,如果您要显示组间差异,则每组一个。还有一些颜色和其他图形参数。结果如下图
为避免两组的间隔重叠,需要调整一些参数。 1) png
中的数字 height
(增加值使组间更多 space,2)offset
参数(最多可增加约 0.3),或 3)lwd
在 segments
函数中(值越小意味着线条越细)。使用 png
或类似的功能直接保存图形将允许微调所需的外观。
library(dplyr)
CI_95 <- function(sample_vals, sig){
error <- margin_error_95(sample_vals, sig)
CI <- mean(sample_vals) + c(-1, 1)*error
}
margin_error_95 <- function(sample_vals, sig){
n <- length(sample_vals)
mar_err <- 1.96*(sig/sqrt(n))
}
png("group_plot.png",height=7,width=3,units = 'in',res=1000)
plot_CI_95 <- function(seed){
B <- 100
n <- 30
# mean and std dev as a vector of values
# need to have same length
# assume one value per group
mu <- c(5,4.5) # group1, group2
sig <- c(1.2,1) # group1, group2
offset<- 0.25 # controls point and line offset from nominal value
# colors for 2 groups
colsuse <- c('steelblue','gold')
# loop over groups
# mu and sig are now indexed by this loop
for(j in 1:length(mu)){
# use seed+j to make different random sample for each group
# extract upper bound of CI's
set.seed(seed+j)
x_1 <- replicate(B,
{samp <- rnorm(n, mean = mu[j], sd = sig[j])
max(CI_95(samp, sig[j]))
}
)
#extract lower bound of CI's
set.seed(seed+j)
x_0 <- replicate(B,
{samp <- rnorm(n, mean = mu[j], sd = sig[j])
min(CI_95(samp, sig[j]))
}
)
set.seed(seed+j)
means <- replicate(B, mean(rnorm(n, mean = mu[j], sd = sig[j])))
# for first group, establish the plot
# for second group, add values to the plot
# if groups are very different this might need to be modified with the xlim
if(j == 1){
plot(means, (1:B)+offset*ifelse(j==1,1,-1), pch = 20,
xlim = c(mu[j] - sig[j], mu[j] + sig[j]),
ylim = c(0,B+1),
xlab = "sample means",
ylab = "index of the CI",
main = paste(B, "Confidence intervals"),
col=colsuse[j]
)
}else{
points(means, (1:B)+offset*ifelse(j==1,1,-1), pch = 20,
col=colsuse[j])
}
for (i in 1:B){
if(between(mu[j], x_0[i], x_1[i])){
segments(x_0[i], i+offset*ifelse(j==1,1,-1), x_1[i], i+offset*ifelse(j==1,1,-1), col=colsuse[j], lwd = 1) #plot CI's that contain the mean in black
} else {
segments(x_0[i], i+offset*ifelse(j==1,1,-1), x_1[i], i+offset*ifelse(j==1,1,-1), col = "red", lwd = 1) #plot CI's that don't contain the mean in red
}
}
abline(v=mu[j], col = colsuse[j]) #plot a vertical line at the population mean
}
par(xpd=F)
# legend for the groups
legend("topright",legend = c('Male','Female'),lty=1,col=colsuse,cex=0.5)
}
plot_CI_95(1)
dev.off()