如何计算 R 中的 Bonferroni 下限和上限?
How to calculate the Bonferroni Lower and Upper limits in R?
使用以下数据,我正在尝试计算卡方和 Bonferroni 上下置信区间。 "Data_No" 列标识数据集(因为需要为每个数据集单独进行计算)。
Data_No Area Observed
1 3353 31
1 2297 2
1 1590 15
1 1087 16
1 817 2
1 847 10
1 1014 28
1 872 29
1 1026 29
1 1215 21
2 3353 31
2 2297 2
2 1590 15
3 1087 16
3 817 2
我使用的代码是
library(dplyr)
setwd("F:/GIS/July 2019/")
total_data <- read.csv("test.csv")
result_data <- NULL
for(i in unique(total_data$Data_No)){
data <- total_data[which(total_data$Data_No == i),] data <- data %>%
mutate(RelativeArea = Area/sum(Area), Expected = RelativeArea*sum(Observed), OminusE = Observed-Expected, O2 = OminusE^2, O2divE = O2/Expected, APU = Observed/sum(Observed), Alpha = 0.05/2*count(Data_No),
Zvalue = qnorm(Alpha,lower.tail=FALSE), lower = APU-Zvalue*sqrt(APU*(1-APU)/sum(Observed)), upper = APU+Zvalue*sqrt(APU*(1-APU)/sum(Observed)))
result_data <- rbind(result_data,data) }
write.csv(result_data,file='final_result.csv')
我收到的错误信息是:
Error in UseMethod("summarise_") : no applicable method for
'summarise_' applied to an object of class "c('integer', 'numeric')"
我调用的列 "Alpha" 是 0.05/2k 的 alpha 值,其中 K 是类别数 - 在我的示例中,我有 10 个类别("Data_No" 列)第一个数据集,所以"Alpha"需要0.05/20 = 0.0025,对应的Z值为2.807。在我的示例中,第二个数据集有 3 个类别(因此 0.05/6),第三个数据集有 2 个类别(0.05/4)table(Data_No" 列)。使用新计算的值 "Alpha" 列,然后我需要计算 ZValue 列 (Zvalue = qnorm(Alpha,lower.tail=FALSE)
),然后我用它来计算上下置信区间。
根据以上数据,这是我应该得到的结果,但请注意,我不得不手动计算 Alpha 列和 Z 值,而不是将这些计算插入 R 代码中:
Data_No Area Observed RelativeArea Alpha Z value lower upper
1 3353 31 0.237 0.003 2.807 0.092 0.247
1 2297 2 0.163 0.003 2.807 -0.011 0.033
1 1590 15 0.113 0.003 2.807 0.025 0.139
1 1087 16 0.077 0.003 2.807 0.029 0.146
1 817 2 0.058 0.003 2.807 -0.011 0.033
1 847 10 0.060 0.003 2.807 0.007 0.102
1 1014 28 0.072 0.003 2.807 0.078 0.228
1 872 29 0.062 0.003 2.807 0.083 0.234
1 1026 29 0.073 0.003 2.807 0.083 0.234
1 1215 21 0.086 0.003 2.807 0.049 0.181
2 3353 31 0.463 0.008 2.394 0.481 0.811
2 2297 2 0.317 0.008 2.394 -0.027 0.111
2 1590 15 0.220 0.008 2.394 0.152 0.473
3 1087 16 0.571 0.013 2.241 0.723 1.055
3 817 2 0.429 0.013 2.241 -0.055 0.277
请注意,我只包含了一些从代码生成的列。
# You need to check the closing bracket for lower c.f. sqrt value. Following code should work.
data <- read.csv("test.csv")
data <- data %>% mutate(RelativeArea =
Area/sum(Area), Expected = RelativeArea*sum(Observed), OminusE =
Observed-Expected, O2 = OminusE^2, O2divE = O2/Expected, APU =
Observed/sum(Observed), lower =
APU-2.394*sqrt(APU*(1-APU)/sum(Observed)), upper =
APU+2.394*sqrt(APU*(1-APU)/sum(Observed)))
#Answer to follow-up question.
#Sample Data
Data_No Area Observed
1 3353 31
1 2297 2
2 1590 15
2 1087 16
#Code to run
total_data <- read.csv("test.csv")
result_data <- NULL
for(i in unique(total_data$Data_No)){
data <- total_data[which(total_data$Data_No == i),]
data <- data %>% mutate(RelativeArea =
Area/sum(Area), Expected = RelativeArea*sum(Observed), OminusE =
Observed-Expected, O2 = OminusE^2, O2divE = O2/Expected, APU =
Observed/sum(Observed), lower =
APU-2.394*sqrt(APU*(1-APU)/sum(Observed)), upper =
APU+2.394*sqrt(APU*(1-APU)/sum(Observed)))
result_data <- rbind(result_data,data)
}
write.csv(result_data,file='final_result.csv')
#Issue in calculating Alpha. I have updated the code.
library(dplyr)
setwd("F:/GIS/July 2019/")
total_data <- read.csv("test.csv")
#Creating the NO_OF_CATEGORIES column based on your question.
total_data$NO_OF_CATEGORIES <- 0
total_data[which(total_data$Data_No==1),]$NO_OF_CATEGORIES <- 10
total_data[which(total_data$Data_No==2),]$NO_OF_CATEGORIES <- 3
total_data[which(total_data$Data_No==3),]$NO_OF_CATEGORIES <- 2
#Actual code
result_data <- NULL
for(i in unique(total_data$Data_No)){
data <- total_data[which(total_data$Data_No == i),]
data <- data %>%
mutate(RelativeArea = Area/sum(Area), Expected = RelativeArea*sum(Observed), OminusE = Observed-Expected, O2 = OminusE^2, O2divE = O2/Expected, APU = Observed/sum(Observed), Alpha = 0.05/(2*(unique(data$NO_OF_CATEGORIES))),
Zvalue = qnorm(Alpha,lower.tail=FALSE), lower = APU-Zvalue*sqrt(APU*(1-APU)/sum(Observed)), upper = APU+Zvalue*sqrt(APU*(1-APU)/sum(Observed)))
result_data <- rbind(result_data,data) }
write.csv(result_data,file='final_result.csv')
使用以下数据,我正在尝试计算卡方和 Bonferroni 上下置信区间。 "Data_No" 列标识数据集(因为需要为每个数据集单独进行计算)。
Data_No Area Observed
1 3353 31
1 2297 2
1 1590 15
1 1087 16
1 817 2
1 847 10
1 1014 28
1 872 29
1 1026 29
1 1215 21
2 3353 31
2 2297 2
2 1590 15
3 1087 16
3 817 2
我使用的代码是
library(dplyr)
setwd("F:/GIS/July 2019/")
total_data <- read.csv("test.csv")
result_data <- NULL
for(i in unique(total_data$Data_No)){
data <- total_data[which(total_data$Data_No == i),] data <- data %>%
mutate(RelativeArea = Area/sum(Area), Expected = RelativeArea*sum(Observed), OminusE = Observed-Expected, O2 = OminusE^2, O2divE = O2/Expected, APU = Observed/sum(Observed), Alpha = 0.05/2*count(Data_No),
Zvalue = qnorm(Alpha,lower.tail=FALSE), lower = APU-Zvalue*sqrt(APU*(1-APU)/sum(Observed)), upper = APU+Zvalue*sqrt(APU*(1-APU)/sum(Observed)))
result_data <- rbind(result_data,data) }
write.csv(result_data,file='final_result.csv')
我收到的错误信息是:
Error in UseMethod("summarise_") : no applicable method for 'summarise_' applied to an object of class "c('integer', 'numeric')"
我调用的列 "Alpha" 是 0.05/2k 的 alpha 值,其中 K 是类别数 - 在我的示例中,我有 10 个类别("Data_No" 列)第一个数据集,所以"Alpha"需要0.05/20 = 0.0025,对应的Z值为2.807。在我的示例中,第二个数据集有 3 个类别(因此 0.05/6),第三个数据集有 2 个类别(0.05/4)table(Data_No" 列)。使用新计算的值 "Alpha" 列,然后我需要计算 ZValue 列 (Zvalue = qnorm(Alpha,lower.tail=FALSE)
),然后我用它来计算上下置信区间。
根据以上数据,这是我应该得到的结果,但请注意,我不得不手动计算 Alpha 列和 Z 值,而不是将这些计算插入 R 代码中:
Data_No Area Observed RelativeArea Alpha Z value lower upper
1 3353 31 0.237 0.003 2.807 0.092 0.247
1 2297 2 0.163 0.003 2.807 -0.011 0.033
1 1590 15 0.113 0.003 2.807 0.025 0.139
1 1087 16 0.077 0.003 2.807 0.029 0.146
1 817 2 0.058 0.003 2.807 -0.011 0.033
1 847 10 0.060 0.003 2.807 0.007 0.102
1 1014 28 0.072 0.003 2.807 0.078 0.228
1 872 29 0.062 0.003 2.807 0.083 0.234
1 1026 29 0.073 0.003 2.807 0.083 0.234
1 1215 21 0.086 0.003 2.807 0.049 0.181
2 3353 31 0.463 0.008 2.394 0.481 0.811
2 2297 2 0.317 0.008 2.394 -0.027 0.111
2 1590 15 0.220 0.008 2.394 0.152 0.473
3 1087 16 0.571 0.013 2.241 0.723 1.055
3 817 2 0.429 0.013 2.241 -0.055 0.277
请注意,我只包含了一些从代码生成的列。
# You need to check the closing bracket for lower c.f. sqrt value. Following code should work.
data <- read.csv("test.csv")
data <- data %>% mutate(RelativeArea =
Area/sum(Area), Expected = RelativeArea*sum(Observed), OminusE =
Observed-Expected, O2 = OminusE^2, O2divE = O2/Expected, APU =
Observed/sum(Observed), lower =
APU-2.394*sqrt(APU*(1-APU)/sum(Observed)), upper =
APU+2.394*sqrt(APU*(1-APU)/sum(Observed)))
#Answer to follow-up question.
#Sample Data
Data_No Area Observed
1 3353 31
1 2297 2
2 1590 15
2 1087 16
#Code to run
total_data <- read.csv("test.csv")
result_data <- NULL
for(i in unique(total_data$Data_No)){
data <- total_data[which(total_data$Data_No == i),]
data <- data %>% mutate(RelativeArea =
Area/sum(Area), Expected = RelativeArea*sum(Observed), OminusE =
Observed-Expected, O2 = OminusE^2, O2divE = O2/Expected, APU =
Observed/sum(Observed), lower =
APU-2.394*sqrt(APU*(1-APU)/sum(Observed)), upper =
APU+2.394*sqrt(APU*(1-APU)/sum(Observed)))
result_data <- rbind(result_data,data)
}
write.csv(result_data,file='final_result.csv')
#Issue in calculating Alpha. I have updated the code.
library(dplyr)
setwd("F:/GIS/July 2019/")
total_data <- read.csv("test.csv")
#Creating the NO_OF_CATEGORIES column based on your question.
total_data$NO_OF_CATEGORIES <- 0
total_data[which(total_data$Data_No==1),]$NO_OF_CATEGORIES <- 10
total_data[which(total_data$Data_No==2),]$NO_OF_CATEGORIES <- 3
total_data[which(total_data$Data_No==3),]$NO_OF_CATEGORIES <- 2
#Actual code
result_data <- NULL
for(i in unique(total_data$Data_No)){
data <- total_data[which(total_data$Data_No == i),]
data <- data %>%
mutate(RelativeArea = Area/sum(Area), Expected = RelativeArea*sum(Observed), OminusE = Observed-Expected, O2 = OminusE^2, O2divE = O2/Expected, APU = Observed/sum(Observed), Alpha = 0.05/(2*(unique(data$NO_OF_CATEGORIES))),
Zvalue = qnorm(Alpha,lower.tail=FALSE), lower = APU-Zvalue*sqrt(APU*(1-APU)/sum(Observed)), upper = APU+Zvalue*sqrt(APU*(1-APU)/sum(Observed)))
result_data <- rbind(result_data,data) }
write.csv(result_data,file='final_result.csv')