通过读取 csv 数据在 R 中计算 t 检验并在 Boxplot 中可视化

Compute t-test in R via reading csv data and visualize it in Boxplot

我正在尝试从 csv 文件导入一些数据并在箱线图中可视化,以便使用平台再次在线显示。 我使用 R 作为开发语言来计算 t 检验函数,然后在绘图中显示结果。 当我尝试计算 t 检验时出现此错误:

这是我写的代码:

labels <- list('non-failing heart (NF)', 'failing heart (F)')

data <- read.csv("data.csv", header=T)
data[data == 'NA_integer_'] <- NA
t.test(data$NF, data$F)

df <- setNames(do.call(rbind.data.frame, 
                       lapply(data, function(d) data.frame(d[1], d[2]))),
              labels)    

                           
                           
results <- t.test(data$`non-failing heart (NF)`, data$`failing heart (F)`)


                           
results$statistic
results$estimate
results$p.value
                           

df$NF <- df$`non-failing heart (NF)`
df$F <- df$`failing heart (F)`
boxplot(df[3:4],
        data=df,
        cex.lab=0.65,
        xlab="Fig. 1 IRE binding activity for non-failing (NF) and failing (F) hearts.",
        ylab="IRE binding activity (%)",
        col="orange",
        border="brown",
        ylim = c(0, 120)
)

示例数据

structure(list(NF = c("99", "96", "100", "105", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_" ), F = c(52L, 40L, 38L, 18L, 11L, 5L, 42L, 55L, 53L, 39L)), row.names = c(NA, 10L), class = "data.frame")

更新

如答案中所述将数据转换为数字后,出现此错误:

首先你需要把所有的列都转成数字类型:

# add this line after data[data == 'NA_integer_']
library(tidyverse)
data %>% mutate_all(as.numeric) -> data

并更改您用来计算结果的data.frame

# change data with df
results <- t.test(df$`non-failing heart (NF)`, df$`failing heart (F)`)

完整代码:

library(tidyverse)
#> Warning: package 'tibble' was built under R version 3.6.2
#> Warning: package 'purrr' was built under R version 3.6.2
#> Warning: package 'dplyr' was built under R version 3.6.2
labels <- list('non-failing heart (NF)', 'failing heart (F)')

data<-structure(list(NF = c("99", "96", "100", "105", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_" ), F = c(52L, 40L, 38L, 18L, 11L, 5L, 42L, 55L, 53L, 39L)), row.names = c(NA, 10L), class = "data.frame")
data[data == 'NA_integer_'] <- NA
data %>% mutate_all(as.numeric) -> data
t.test(data$NF, data$F)
#> 
#>  Welch Two Sample t-test
#> 
#> data:  data$NF and data$F
#> t = 10.866, df = 10.695, p-value = 4.118e-07
#> alternative hypothesis: true difference in means is not equal to 0
#> 95 percent confidence interval:
#>  51.54831 77.85169
#> sample estimates:
#> mean of x mean of y 
#>     100.0      35.3
df <- setNames(do.call(rbind.data.frame, 
                       lapply(data, function(d) data.frame(d[1], d[2]))),
              labels)    

                           
                           
results <- t.test(df$`non-failing heart (NF)`, df$`failing heart (F)`)

                           
results$statistic
#>         t 
#> 0.2051717
results$estimate
#> mean of x mean of y 
#>      75.5      68.0
results$p.value
#> [1] 0.8569285
                           

df$NF <- df$`non-failing heart (NF)`
df$F <- df$`failing heart (F)`
boxplot(df[3:4],
        data=df,
        cex.lab=0.65,
        xlab="Fig. 1 IRE binding activity for non-failing (NF) and failing (F) hearts.",
        ylab="IRE binding activity (%)",
        col="orange",
        border="brown",
        ylim = c(0, 120)
)