boot.ci() 无法计算置信区间
boot.ci() can't compute confidence intervals
我正在尝试使用 R
中的 boot
包计算 Kruskal-Wallis 检验的 eta 平方的置信区间。但它似乎不起作用,我不确定为什么。
获取 eta 平方(有效!):
我采取小步骤,从 return eta 平方值的自定义函数开始。所以这有效。
# set up
set.seed(123)
library(tidyverse)
library(PMCMRplus)
# custom function to get eta-squared value
kw_eta_h <- function(data,
x,
y) {
# creating a dataframe from entered data
data <- dplyr::select(
.data = data,
x = !!rlang::enquo(x),
y = !!rlang::enquo(y)
) %>%
dplyr::filter(.data = ., !is.na(x), !is.na(y)) %>%
tibble::as.tibble(x = .)
# running the function
fit <-
PMCMRplus::kruskalTest(
formula = y ~ x,
data = data,
dist = "KruskalWallis"
)
# calculating the eta-squared estimate using the H-statistic
# ref. http://www.tss.awf.poznan.pl/files/3_Trends_Vol21_2014__no1_20.pdf
effsize <-
(fit$statistic[[1]] - fit$parameter[[1]] + 1) /
(fit$parameter[[3]] - fit$parameter[[1]])
# return the value of interest: effect size
return(effsize[[1]])
}
# using the function
kw_eta_h(iris, Species, Sepal.Length)
#> [1] 0.6458329
获取 eta 平方(不起作用):
现在我使用刚刚与 boot
包结合使用的自定义函数,但它为 eta 平方生成相同的值,因此没有计算置信区间。我在这里做错了什么?
# function to get confidence intervals
kw_eta_h_ci <- function(data,
x,
y,
nboot = 100,
conf.level = 0.95,
conf.type = "norm",
...) {
# creating a dataframe from entered data
data <- dplyr::select(
.data = data,
x = !!rlang::enquo(x),
y = !!rlang::enquo(y)
) %>%
dplyr::filter(.data = ., !is.na(x), !is.na(y)) %>%
tibble::as.tibble(x = .)
# eta-squared value
eta_sq_H <- kw_eta_h(
data = data,
x = x,
y = y
)
# function to obtain 95% CI for for eta-squared
eta_h_ci <- function(data, x, y, indices) {
# allows boot to select sample
d <- data[indices, ]
# running the function
fit <-
kw_eta_h(
data = data,
x = x,
y = y
)
# return the value of interest: effect size
return(fit)
}
# save the bootstrapped results to an object
bootobj <- boot::boot(
data = data,
x = x,
y = y,
statistic = eta_h_ci,
R = nboot,
parallel = "multicore",
...
)
# get 95% CI from the bootstrapped object
bootci <- boot::boot.ci(
boot.out = bootobj,
conf = conf.level,
type = conf.type
)
# extracting ci part
if (conf.type == "norm") {
ci <- bootci$normal
} else if (conf.type == "basic") {
ci <- bootci$basic
} else if (conf.type == "perc") {
ci <- bootci$perc
} else if (conf.type == "bca") {
ci <- bootci$bca
}
# preparing a dataframe out of the results
results_df <-
tibble::as_data_frame(x = cbind.data.frame(
"eta_sq_H" = eta_sq_H,
ci,
"nboot" = bootci$R
))
# returning the results
return(results_df)
}
# using the function
kw_eta_h_ci(iris, Species, Sepal.Length)
#> [1] "All values of t are equal to 0.645832897963594 \n Cannot calculate confidence intervals"
#> Error in data.frame(..., check.names = FALSE): arguments imply differing number of rows: 1, 0
由 reprex package (v0.2.1)
创建于 2018-11-16
在 eta_h_ci
中创建 d
作为新样本,然后在 kw_eta_h
中调用未采样的 data
。这纠正了我这边的行为。
eta_h_ci <- function(data, x, y, indices) {
# allows boot to select sample
d <- data[indices, ]
# running the function
fit <-
kw_eta_h(
data = d, # d instead of data
x = x,
y = y
)
# return the value of interest: effect size
return(fit)
}
我正在尝试使用 R
中的 boot
包计算 Kruskal-Wallis 检验的 eta 平方的置信区间。但它似乎不起作用,我不确定为什么。
获取 eta 平方(有效!):
我采取小步骤,从 return eta 平方值的自定义函数开始。所以这有效。
# set up
set.seed(123)
library(tidyverse)
library(PMCMRplus)
# custom function to get eta-squared value
kw_eta_h <- function(data,
x,
y) {
# creating a dataframe from entered data
data <- dplyr::select(
.data = data,
x = !!rlang::enquo(x),
y = !!rlang::enquo(y)
) %>%
dplyr::filter(.data = ., !is.na(x), !is.na(y)) %>%
tibble::as.tibble(x = .)
# running the function
fit <-
PMCMRplus::kruskalTest(
formula = y ~ x,
data = data,
dist = "KruskalWallis"
)
# calculating the eta-squared estimate using the H-statistic
# ref. http://www.tss.awf.poznan.pl/files/3_Trends_Vol21_2014__no1_20.pdf
effsize <-
(fit$statistic[[1]] - fit$parameter[[1]] + 1) /
(fit$parameter[[3]] - fit$parameter[[1]])
# return the value of interest: effect size
return(effsize[[1]])
}
# using the function
kw_eta_h(iris, Species, Sepal.Length)
#> [1] 0.6458329
获取 eta 平方(不起作用):
现在我使用刚刚与 boot
包结合使用的自定义函数,但它为 eta 平方生成相同的值,因此没有计算置信区间。我在这里做错了什么?
# function to get confidence intervals
kw_eta_h_ci <- function(data,
x,
y,
nboot = 100,
conf.level = 0.95,
conf.type = "norm",
...) {
# creating a dataframe from entered data
data <- dplyr::select(
.data = data,
x = !!rlang::enquo(x),
y = !!rlang::enquo(y)
) %>%
dplyr::filter(.data = ., !is.na(x), !is.na(y)) %>%
tibble::as.tibble(x = .)
# eta-squared value
eta_sq_H <- kw_eta_h(
data = data,
x = x,
y = y
)
# function to obtain 95% CI for for eta-squared
eta_h_ci <- function(data, x, y, indices) {
# allows boot to select sample
d <- data[indices, ]
# running the function
fit <-
kw_eta_h(
data = data,
x = x,
y = y
)
# return the value of interest: effect size
return(fit)
}
# save the bootstrapped results to an object
bootobj <- boot::boot(
data = data,
x = x,
y = y,
statistic = eta_h_ci,
R = nboot,
parallel = "multicore",
...
)
# get 95% CI from the bootstrapped object
bootci <- boot::boot.ci(
boot.out = bootobj,
conf = conf.level,
type = conf.type
)
# extracting ci part
if (conf.type == "norm") {
ci <- bootci$normal
} else if (conf.type == "basic") {
ci <- bootci$basic
} else if (conf.type == "perc") {
ci <- bootci$perc
} else if (conf.type == "bca") {
ci <- bootci$bca
}
# preparing a dataframe out of the results
results_df <-
tibble::as_data_frame(x = cbind.data.frame(
"eta_sq_H" = eta_sq_H,
ci,
"nboot" = bootci$R
))
# returning the results
return(results_df)
}
# using the function
kw_eta_h_ci(iris, Species, Sepal.Length)
#> [1] "All values of t are equal to 0.645832897963594 \n Cannot calculate confidence intervals"
#> Error in data.frame(..., check.names = FALSE): arguments imply differing number of rows: 1, 0
由 reprex package (v0.2.1)
创建于 2018-11-16在 eta_h_ci
中创建 d
作为新样本,然后在 kw_eta_h
中调用未采样的 data
。这纠正了我这边的行为。
eta_h_ci <- function(data, x, y, indices) {
# allows boot to select sample
d <- data[indices, ]
# running the function
fit <-
kw_eta_h(
data = d, # d instead of data
x = x,
y = y
)
# return the value of interest: effect size
return(fit)
}