通过读取 csv 数据在 R 中计算 t 检验并在 Boxplot 中可视化
Compute t-test in R via reading csv data and visualize it in Boxplot
我正在尝试从 csv 文件导入一些数据并在箱线图中可视化,以便使用平台再次在线显示。
我使用 R 作为开发语言来计算 t 检验函数,然后在绘图中显示结果。
当我尝试计算 t 检验时出现此错误:
这是我写的代码:
labels <- list('non-failing heart (NF)', 'failing heart (F)')
data <- read.csv("data.csv", header=T)
data[data == 'NA_integer_'] <- NA
t.test(data$NF, data$F)
df <- setNames(do.call(rbind.data.frame,
lapply(data, function(d) data.frame(d[1], d[2]))),
labels)
results <- t.test(data$`non-failing heart (NF)`, data$`failing heart (F)`)
results$statistic
results$estimate
results$p.value
df$NF <- df$`non-failing heart (NF)`
df$F <- df$`failing heart (F)`
boxplot(df[3:4],
data=df,
cex.lab=0.65,
xlab="Fig. 1 IRE binding activity for non-failing (NF) and failing (F) hearts.",
ylab="IRE binding activity (%)",
col="orange",
border="brown",
ylim = c(0, 120)
)
示例数据
structure(list(NF = c("99", "96", "100", "105", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_" ), F = c(52L, 40L, 38L, 18L, 11L, 5L, 42L, 55L, 53L, 39L)), row.names = c(NA, 10L), class = "data.frame")
更新
如答案中所述将数据转换为数字后,出现此错误:
首先你需要把所有的列都转成数字类型:
# add this line after data[data == 'NA_integer_']
library(tidyverse)
data %>% mutate_all(as.numeric) -> data
并更改您用来计算结果的data.frame
# change data with df
results <- t.test(df$`non-failing heart (NF)`, df$`failing heart (F)`)
完整代码:
library(tidyverse)
#> Warning: package 'tibble' was built under R version 3.6.2
#> Warning: package 'purrr' was built under R version 3.6.2
#> Warning: package 'dplyr' was built under R version 3.6.2
labels <- list('non-failing heart (NF)', 'failing heart (F)')
data<-structure(list(NF = c("99", "96", "100", "105", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_" ), F = c(52L, 40L, 38L, 18L, 11L, 5L, 42L, 55L, 53L, 39L)), row.names = c(NA, 10L), class = "data.frame")
data[data == 'NA_integer_'] <- NA
data %>% mutate_all(as.numeric) -> data
t.test(data$NF, data$F)
#>
#> Welch Two Sample t-test
#>
#> data: data$NF and data$F
#> t = 10.866, df = 10.695, p-value = 4.118e-07
#> alternative hypothesis: true difference in means is not equal to 0
#> 95 percent confidence interval:
#> 51.54831 77.85169
#> sample estimates:
#> mean of x mean of y
#> 100.0 35.3
df <- setNames(do.call(rbind.data.frame,
lapply(data, function(d) data.frame(d[1], d[2]))),
labels)
results <- t.test(df$`non-failing heart (NF)`, df$`failing heart (F)`)
results$statistic
#> t
#> 0.2051717
results$estimate
#> mean of x mean of y
#> 75.5 68.0
results$p.value
#> [1] 0.8569285
df$NF <- df$`non-failing heart (NF)`
df$F <- df$`failing heart (F)`
boxplot(df[3:4],
data=df,
cex.lab=0.65,
xlab="Fig. 1 IRE binding activity for non-failing (NF) and failing (F) hearts.",
ylab="IRE binding activity (%)",
col="orange",
border="brown",
ylim = c(0, 120)
)
我正在尝试从 csv 文件导入一些数据并在箱线图中可视化,以便使用平台再次在线显示。
我使用 R 作为开发语言来计算 t 检验函数,然后在绘图中显示结果。
当我尝试计算 t 检验时出现此错误:
这是我写的代码:
labels <- list('non-failing heart (NF)', 'failing heart (F)')
data <- read.csv("data.csv", header=T)
data[data == 'NA_integer_'] <- NA
t.test(data$NF, data$F)
df <- setNames(do.call(rbind.data.frame,
lapply(data, function(d) data.frame(d[1], d[2]))),
labels)
results <- t.test(data$`non-failing heart (NF)`, data$`failing heart (F)`)
results$statistic
results$estimate
results$p.value
df$NF <- df$`non-failing heart (NF)`
df$F <- df$`failing heart (F)`
boxplot(df[3:4],
data=df,
cex.lab=0.65,
xlab="Fig. 1 IRE binding activity for non-failing (NF) and failing (F) hearts.",
ylab="IRE binding activity (%)",
col="orange",
border="brown",
ylim = c(0, 120)
)
示例数据
structure(list(NF = c("99", "96", "100", "105", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_" ), F = c(52L, 40L, 38L, 18L, 11L, 5L, 42L, 55L, 53L, 39L)), row.names = c(NA, 10L), class = "data.frame")
更新
如答案中所述将数据转换为数字后,出现此错误:
首先你需要把所有的列都转成数字类型:
# add this line after data[data == 'NA_integer_']
library(tidyverse)
data %>% mutate_all(as.numeric) -> data
并更改您用来计算结果的data.frame
# change data with df
results <- t.test(df$`non-failing heart (NF)`, df$`failing heart (F)`)
完整代码:
library(tidyverse)
#> Warning: package 'tibble' was built under R version 3.6.2
#> Warning: package 'purrr' was built under R version 3.6.2
#> Warning: package 'dplyr' was built under R version 3.6.2
labels <- list('non-failing heart (NF)', 'failing heart (F)')
data<-structure(list(NF = c("99", "96", "100", "105", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_" ), F = c(52L, 40L, 38L, 18L, 11L, 5L, 42L, 55L, 53L, 39L)), row.names = c(NA, 10L), class = "data.frame")
data[data == 'NA_integer_'] <- NA
data %>% mutate_all(as.numeric) -> data
t.test(data$NF, data$F)
#>
#> Welch Two Sample t-test
#>
#> data: data$NF and data$F
#> t = 10.866, df = 10.695, p-value = 4.118e-07
#> alternative hypothesis: true difference in means is not equal to 0
#> 95 percent confidence interval:
#> 51.54831 77.85169
#> sample estimates:
#> mean of x mean of y
#> 100.0 35.3
df <- setNames(do.call(rbind.data.frame,
lapply(data, function(d) data.frame(d[1], d[2]))),
labels)
results <- t.test(df$`non-failing heart (NF)`, df$`failing heart (F)`)
results$statistic
#> t
#> 0.2051717
results$estimate
#> mean of x mean of y
#> 75.5 68.0
results$p.value
#> [1] 0.8569285
df$NF <- df$`non-failing heart (NF)`
df$F <- df$`failing heart (F)`
boxplot(df[3:4],
data=df,
cex.lab=0.65,
xlab="Fig. 1 IRE binding activity for non-failing (NF) and failing (F) hearts.",
ylab="IRE binding activity (%)",
col="orange",
border="brown",
ylim = c(0, 120)
)