R 上具有自举置信区间的相关矩阵
Correlation matrix with bootstrapped confidence interval on R
我想获得具有 bootstrapped 置信区间的相关矩阵。
我发现 jmv
包给出了具有 p 值和置信区间的相关矩阵;但在这种情况下,它们是使用 Fisher 变换计算的。
我找到了如何获得 bootstrap 置信区间:
library(boot)
Brep = 5000
pearson <- function(d,i=c(1:n)){
d2 <- d[i,]
return(cor(d2$var1,d2$var2))}
bootcorr <- boot(data = data.frame(cbind(var1, var2)), statistic =
pearson ,R=Brep)
ciu <- boot.ci(bootcorr,conf=.95, type="basic")$basic[5]
cil <- boot.ci(bootcorr,conf=.95, type="basic")$basic[4]
如何获得具有 bootstrapped 置信区间的 CorrMatrix 类型的矩阵?
下面是一个例子:
DAT = matrix (data = c (rnorm (10), rnorm (10),rnorm (10)), nrow =
10, byrow = F)
DAT = as.data.frame (DAT)
library('jmv')
corrMatrix(data= DAT, ci=T, vars = c("V1", "V2", "V3"))
下面是完成这项工作的函数,我已经写好了,所以如果它对其他人有用...:[=11=]
# Rounding functions for HTML printing
printDec<-function(value,dec=1){
ifelse (nchar(trunc(value))<4,sprintf(paste0("%.",dec,"f"),trunc(value*10^dec+sign(value*10^dec)*0.5)/10^dec),formatC(value, format = "e", digits = 1))
}
# Check if packages are installed
packages<-function(x){
x<-as.character(match.call()[[2]])
if (!require(x,character.only=TRUE)){
install.packages(pkgs=x,repos="http://cran.r-project.org")
require(x,character.only=TRUE)
}
}
# Prepare data frame + names
corrMat = function( data, method = c('pearson', 'kendall', 'spearman'), Brep = 5000, coeffBoot = T, pvalueBoot = T, limfactor = 5)
# method : just for tests on categorical variables / 1 categorical + 1 numerical variables
# Brep : bootstrap numbers
# coeffBoot : is the printed coefficient the one estimated by bootstrap ?
# pvalueBoot : is the printed pvalue the one estimated by bootstrap ?
# limfactor : max number of levels for a variable to be considered as categorical
{
packages(Kendall)
data = as.data.frame(data)
# Everything is made as numerical. Be careful with factor with string non supported by R like mathematical symbol
# In this case, do as.numerci(as.factor())
oldw <- getOption("warn")
options(warn = -1) # temporary suppresion of warnings print
for (i in 1:ncol(data)){ if ( (is.factor(data[,i]) |is.character(data[,i])) == T & sum(is.na(as.numeric(as.character(data[,i]))))!= length(data[,i])){data[,i] = as.numeric(as.character(data[,i]))}
if ( (is.factor(data[,i]) |is.character(data[,i])) == T & sum(is.na(as.numeric(as.numeric(as.character(data[,i])))))== length(data[,i])){data[,i] = as.numeric(as.factor(data[,i]))} }
options(warn = oldw)
listVar = names(data)
if(is.null(listVar) == T) {listVar = colnames(data)}
permut = combn(listVar, m=2)
coeffcorr = matrix(NA, nrow = dim(permut)[2], ncol = 8)
for (i in 1:dim(permut)[2] ) {
if (length(unique(data[,t(permut)[i,1]])) > limfactor | length(unique(data[,t(permut)[i,2]])) > limfactor) # verifier que l'une des veux variables est numérique
{
z1 = replicate( Brep, sample(1:dim(data)[1], dim(data)[1], replace = T))
res = do.call ( rbind, apply(z1, 2, function(x){ res=cor.test(data[,t(permut)[i,1]][x], data[,t(permut)[i,2]][x], method = method ) ; return ((list(res$p.value,res$estimate))) }))
if (coeffBoot == T){ # if the user wants the bootstrapped value
coeffcorr[i,1] = printDec ( mean(unlist(res[,2]), na.rm = T)) # na.rm : if the calculation of the coefficient does not work
} else {
coeffcorr[i,1] = printDec( cor.test(data[,t(permut)[i,1]], data[,t(permut)[i,2]])$estimate)
}
coeffcorr[i,2] = printDec( quantile(unlist(res[,2]), c(0.025, 0.975), na.rm = T)[1] ) # limit 1 of CI
coeffcorr[i,3] = printDec( quantile(unlist(res[,2]), c(0.025, 0.975), na.rm = T)[2] ) # limit 2 of CI
if (pvalueBoot == T){
coeffcorr[i,4] = pdec ( mean (unlist(res[,1]), na.rm = T ) ) # bootstrapped pvalue
}else{
coeffcorr[i,4] = pdec ( cor.test(data[,t(permut)[i,1]], data[,t(permut)[i,2]])$p.value ) } # "classical" pvalue
}
else # if the two variables to compare are categorical
{
z1 = replicate(Brep, sample(1:dim(data)[1], dim(data)[1], replace = T))
res = do.call ( rbind, apply(z1, 2, function(x){ res=Kendall(data[,t(permut)[i,1]][x], data[,t(permut)[i,2]][x] ) ; return ((list(res$sl,res$tau))) }))
if (coeffBoot == T){
coeffcorr[i,1] = printDec(mean(unlist(res[,2]), na.rm = T))
} else {
coeffcorr[i,1] = printDec( Kendall(data[,t(permut)[i,1]], data[,t(permut)[i,2]])$tau)
}
coeffcorr[i,2] = printDec( quantile(unlist(res[,2]), c(0.025, 0.975), na.rm = T)[1] ) # limit 1 of CI
coeffcorr[i,3] = printDec( quantile(unlist(res[,2]), c(0.025, 0.975), na.rm = T)[2] ) # limit 2 of CI
if (pvalueBoot == T){
coeffcorr[i,4] = paste("Kendall",pdec ( mean (unlist(res[,1]), na.rm = T ) )) # bootstrapped pvalue
}else{
coeffcorr[i,4] = pdec ( cor.test(data[,t(permut)[i,1]], data[,t(permut)[i,2]])$p.value ) } # classical pvalue
}}
df = as.data.frame(matrix(NA, nrow = length(listVar), ncol = length(listVar)))
m = 1
for (i in 1:length(listVar)){
for (j in 1:length(listVar)){
if (i == j) {df[i,j]=paste("---")} # diagonale
if (i < j){ # partie supérieure de la matrice
df[i,j]=paste(coeffcorr[m,1], "[", coeffcorr[m,2],";", coeffcorr[m,3], "] p =",coeffcorr[m,4])
m = m +1
}
if (i > j){ df[i,j]=paste("")}
}
}
df = data.frame(lapply(df, function(x) {gsub("-0.0", "0.0", x)})) # replacement of -0.0 in 0.0
df = rbind (t(listVar), df); df = cbind (c(paste(""),listVar), df )
return(df)
}
我想获得具有 bootstrapped 置信区间的相关矩阵。
我发现 jmv
包给出了具有 p 值和置信区间的相关矩阵;但在这种情况下,它们是使用 Fisher 变换计算的。
我找到了如何获得 bootstrap 置信区间:
library(boot)
Brep = 5000
pearson <- function(d,i=c(1:n)){
d2 <- d[i,]
return(cor(d2$var1,d2$var2))}
bootcorr <- boot(data = data.frame(cbind(var1, var2)), statistic =
pearson ,R=Brep)
ciu <- boot.ci(bootcorr,conf=.95, type="basic")$basic[5]
cil <- boot.ci(bootcorr,conf=.95, type="basic")$basic[4]
如何获得具有 bootstrapped 置信区间的 CorrMatrix 类型的矩阵?
下面是一个例子:
DAT = matrix (data = c (rnorm (10), rnorm (10),rnorm (10)), nrow =
10, byrow = F)
DAT = as.data.frame (DAT)
library('jmv')
corrMatrix(data= DAT, ci=T, vars = c("V1", "V2", "V3"))
下面是完成这项工作的函数,我已经写好了,所以如果它对其他人有用...:[=11=]
# Rounding functions for HTML printing
printDec<-function(value,dec=1){
ifelse (nchar(trunc(value))<4,sprintf(paste0("%.",dec,"f"),trunc(value*10^dec+sign(value*10^dec)*0.5)/10^dec),formatC(value, format = "e", digits = 1))
}
# Check if packages are installed
packages<-function(x){
x<-as.character(match.call()[[2]])
if (!require(x,character.only=TRUE)){
install.packages(pkgs=x,repos="http://cran.r-project.org")
require(x,character.only=TRUE)
}
}
# Prepare data frame + names
corrMat = function( data, method = c('pearson', 'kendall', 'spearman'), Brep = 5000, coeffBoot = T, pvalueBoot = T, limfactor = 5)
# method : just for tests on categorical variables / 1 categorical + 1 numerical variables
# Brep : bootstrap numbers
# coeffBoot : is the printed coefficient the one estimated by bootstrap ?
# pvalueBoot : is the printed pvalue the one estimated by bootstrap ?
# limfactor : max number of levels for a variable to be considered as categorical
{
packages(Kendall)
data = as.data.frame(data)
# Everything is made as numerical. Be careful with factor with string non supported by R like mathematical symbol
# In this case, do as.numerci(as.factor())
oldw <- getOption("warn")
options(warn = -1) # temporary suppresion of warnings print
for (i in 1:ncol(data)){ if ( (is.factor(data[,i]) |is.character(data[,i])) == T & sum(is.na(as.numeric(as.character(data[,i]))))!= length(data[,i])){data[,i] = as.numeric(as.character(data[,i]))}
if ( (is.factor(data[,i]) |is.character(data[,i])) == T & sum(is.na(as.numeric(as.numeric(as.character(data[,i])))))== length(data[,i])){data[,i] = as.numeric(as.factor(data[,i]))} }
options(warn = oldw)
listVar = names(data)
if(is.null(listVar) == T) {listVar = colnames(data)}
permut = combn(listVar, m=2)
coeffcorr = matrix(NA, nrow = dim(permut)[2], ncol = 8)
for (i in 1:dim(permut)[2] ) {
if (length(unique(data[,t(permut)[i,1]])) > limfactor | length(unique(data[,t(permut)[i,2]])) > limfactor) # verifier que l'une des veux variables est numérique
{
z1 = replicate( Brep, sample(1:dim(data)[1], dim(data)[1], replace = T))
res = do.call ( rbind, apply(z1, 2, function(x){ res=cor.test(data[,t(permut)[i,1]][x], data[,t(permut)[i,2]][x], method = method ) ; return ((list(res$p.value,res$estimate))) }))
if (coeffBoot == T){ # if the user wants the bootstrapped value
coeffcorr[i,1] = printDec ( mean(unlist(res[,2]), na.rm = T)) # na.rm : if the calculation of the coefficient does not work
} else {
coeffcorr[i,1] = printDec( cor.test(data[,t(permut)[i,1]], data[,t(permut)[i,2]])$estimate)
}
coeffcorr[i,2] = printDec( quantile(unlist(res[,2]), c(0.025, 0.975), na.rm = T)[1] ) # limit 1 of CI
coeffcorr[i,3] = printDec( quantile(unlist(res[,2]), c(0.025, 0.975), na.rm = T)[2] ) # limit 2 of CI
if (pvalueBoot == T){
coeffcorr[i,4] = pdec ( mean (unlist(res[,1]), na.rm = T ) ) # bootstrapped pvalue
}else{
coeffcorr[i,4] = pdec ( cor.test(data[,t(permut)[i,1]], data[,t(permut)[i,2]])$p.value ) } # "classical" pvalue
}
else # if the two variables to compare are categorical
{
z1 = replicate(Brep, sample(1:dim(data)[1], dim(data)[1], replace = T))
res = do.call ( rbind, apply(z1, 2, function(x){ res=Kendall(data[,t(permut)[i,1]][x], data[,t(permut)[i,2]][x] ) ; return ((list(res$sl,res$tau))) }))
if (coeffBoot == T){
coeffcorr[i,1] = printDec(mean(unlist(res[,2]), na.rm = T))
} else {
coeffcorr[i,1] = printDec( Kendall(data[,t(permut)[i,1]], data[,t(permut)[i,2]])$tau)
}
coeffcorr[i,2] = printDec( quantile(unlist(res[,2]), c(0.025, 0.975), na.rm = T)[1] ) # limit 1 of CI
coeffcorr[i,3] = printDec( quantile(unlist(res[,2]), c(0.025, 0.975), na.rm = T)[2] ) # limit 2 of CI
if (pvalueBoot == T){
coeffcorr[i,4] = paste("Kendall",pdec ( mean (unlist(res[,1]), na.rm = T ) )) # bootstrapped pvalue
}else{
coeffcorr[i,4] = pdec ( cor.test(data[,t(permut)[i,1]], data[,t(permut)[i,2]])$p.value ) } # classical pvalue
}}
df = as.data.frame(matrix(NA, nrow = length(listVar), ncol = length(listVar)))
m = 1
for (i in 1:length(listVar)){
for (j in 1:length(listVar)){
if (i == j) {df[i,j]=paste("---")} # diagonale
if (i < j){ # partie supérieure de la matrice
df[i,j]=paste(coeffcorr[m,1], "[", coeffcorr[m,2],";", coeffcorr[m,3], "] p =",coeffcorr[m,4])
m = m +1
}
if (i > j){ df[i,j]=paste("")}
}
}
df = data.frame(lapply(df, function(x) {gsub("-0.0", "0.0", x)})) # replacement of -0.0 in 0.0
df = rbind (t(listVar), df); df = cbind (c(paste(""),listVar), df )
return(df)
}