如何对我的数据应用配对 t.test 或 Wilcoxon 检验

Question

先从一个数据说起：

structure(list(Group = c("Mark", "Matt", "Tim", "Tom"), `1` = c(0.749552072382562, 
1.06820497349356, 1.00116263663573, 0.864987635002866), `2` = c(1.00839505250436, 
0.796306651704629, 1.02603677593328, 1.00321936833133), `3` = c(0.736638669191169, 
0.973483626272054, 1.14805519301778, 0.899272693725192), `4` = c(0.728882841159455, 
0.871211836418332, 1.0442119745299, 0.859935708928745), `5` = c(0.749552072382562, 
1.06820497349356, 1.00116263663573, 0.864987635002866), `6` = c(1.00839505250436, 
0.796306651704629, 1.02603677593328, 1.00321936833133), `7` = c(0.736638669191169, 
0.973483626272054, 1.14805519301778, 0.899272693725192), `8` = c(0.728882841159455, 
0.871211836418332, 1.0442119745299, 0.859935708928745), `9` = c(0.749552072382562, 
1.06820497349356, 1.00116263663573, 0.864987635002866), `10` = c(1.00839505250436, 
0.796306651704629, 1.02603677593328, 1.00321936833133), `11` = c(0.736638669191169, 
0.973483626272054, 1.14805519301778, 0.899272693725192), `12` = c(0.728882841159455, 
0.871211836418332, 1.0442119745299, 0.859935708928745), `13` = c(0.749552072382562, 
1.06820497349356, 1.00116263663573, 0.864987635002866), `14` = c(1.00839505250436, 
0.796306651704629, 1.02603677593328, 1.00321936833133), `15` = c(0.736638669191169, 
0.973483626272054, 1.14805519301778, 0.899272693725192), `16` = c(0.728882841159455, 
0.871211836418332, 1.0442119745299, 0.859935708928745), `17` = c(0.766036811789943, 
0.871085862829362, 1.02210371210681, 0.937452345474458), `18` = c(1.0357237385154, 
1.02805558505417, 0.946794300033338, 1.04688545274238), `19` = c(0.763210436944137, 
0.801397021884422, 0.952553568039278, 0.990226493248718), `20` = c(0.789338028300063, 
0.822815644347233, 0.958462750269733, 1.04183361434861), `21` = c(0.766036811789943, 
0.871085862829362, 1.02210371210681, 0.937452345474458), `22` = c(1.0357237385154, 
1.02805558505417, 0.946794300033338, 1.04688545274238), `23` = c(0.763210436944137, 
0.801397021884422, 0.952553568039278, 0.990226493248718), `24` = c(0.789338028300063, 
0.822815644347233, 0.958462750269733, 1.04183361434861), `25` = c(0.766036811789943, 
0.871085862829362, 1.02210371210681, 0.937452345474458), `26` = c(1.0357237385154, 
1.02805558505417, 0.946794300033338, 1.04688545274238), `27` = c(0.763210436944137, 
0.801397021884422, 0.952553568039278, 0.990226493248718), `28` = c(0.789338028300063, 
0.822815644347233, 0.958462750269733, 1.04183361434861), `29` = c(0.766036811789943, 
0.871085862829362, 1.02210371210681, 0.937452345474458), `30` = c(1.0357237385154, 
1.02805558505417, 0.946794300033338, 1.04688545274238), `31` = c(0.763210436944137, 
0.801397021884422, 0.952553568039278, 0.990226493248718), `32` = c(0.789338028300063, 
0.822815644347233, 0.958462750269733, 1.04183361434861), `33` = c(0.937894856206067, 
NA, 1.00383773624603, 1.04181193834546), `34` = c(1.03944921519508, 
NA, 0.983868286249464, 1.10409633668759), `35` = c(0.949802513948967, 
NA, 1.06522152108054, 1.04376827636719), `36` = c(0.965871712940006, 
NA, 1.18437146805406, 1.01355356488254), `37` = c(0.937894856206067, 
NA, 1.00383773624603, 1.04181193834546), `38` = c(1.03944921519508, 
NA, 0.983868286249464, 1.10409633668759), `39` = c(0.949802513948967, 
NA, 1.06522152108054, 1.04376827636719), `40` = c(0.965871712940006, 
NA, 1.18437146805406, 1.01355356488254), `41` = c(0.937894856206067, 
NA, 1.00383773624603, 1.04181193834546), `42` = c(1.03944921519508, 
NA, 0.983868286249464, 1.10409633668759), `43` = c(0.949802513948967, 
NA, 1.06522152108054, 1.04376827636719), `44` = c(0.965871712940006, 
NA, 1.18437146805406, 1.01355356488254), `45` = c(0.937894856206067, 
NA, 1.00383773624603, 1.04181193834546), `46` = c(1.03944921519508, 
NA, 0.983868286249464, 1.10409633668759), `47` = c(0.949802513948967, 
NA, 1.06522152108054, 1.04376827636719)), .Names = c("Group", 
"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", 
"13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", 
"24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", 
"35", "36", "37", "38", "39", "40", "41", "42", "43", "44", "45", 
"46", "47"), row.names = c(NA, 4L), class = "data.frame")

每一行都是我从两组比较中得到的 collection 比率。我想知道比率是否与 1 有显着差异。因此，我想通过使用标题中提到的两个测试来测试每一行（向量）是否不同于 1。如何将这些测试应用于我的数据？请考虑每一行可能有不同的长度。 NAs 应该被忽略。作为输出，我想要一个包含 3 列的 table：Group name、p-value t-test、p.value Wilcoxon。

有人可以帮助我吗？

Answer 1

可能有一种方法可以使用您拥有的原始数据框的行，但我强烈建议您使用列（整齐的数据框）。

library(dplyr)
library(tidyr)

# assuming this is the name of your original dataset
dt

# reshape to create a column for each name
dt2 = data.frame(t(dt), stringsAsFactors = F)
names(dt2) = dt2[1,]
dt2 = dt2[-1,]
dt2[,names(dt2)] = sapply(dt2[,names(dt2)], as.numeric)

# reshape to create a column of names and values
dt3 = dt2 %>% 
  gather(name,value,Mark:Tom) %>%
  filter(!is.na(value))   # remove NAs

dt3 %>%
  group_by(name) %>%     # for each name
  summarise(pval_ttest = t.test(value, mu=1, data=.)$p.value,      # calculate t test p value
            pval_wilc = wilcox.test(value, mu=1, data=.)$p.value)  # calculate Wilcoxon p value


# # A tibble: 4 × 3
#    name   pval_ttest    pval_wilc
#   <chr>        <dbl>        <dbl>
# 1  Mark 4.408038e-09 1.020895e-06
# 2  Matt 6.679416e-06 2.502045e-04
# 3   Tim 1.777060e-02 6.932590e-02
# 4   Tom 2.433548e-01 5.148382e-01

关于配对 t 如何检验 "understands" 您提供的测量值以及为什么差异和比率可能会得出不同结果的一些附加信息。考虑以下示例：

# paired t test of 2 vectors of same size (before and after treatment)
# it compares the means of those vectors
t.test(1:10, 13:4, paired = T)

# Paired t-test
# 
# data:  1:10 and 13:4
# t = -1.5667, df = 9, p-value = 0.1516
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
#   -7.331701  1.331701
# sample estimates:
#   mean of the differences 
# -3 


# t test that compares one vector's mean to 0
# that vector is the differences of the two initial vectors
t.test(1:10 - 13:4, mu=0)

# One Sample t-test
# 
# data:  1:10 - 13:4
# t = -1.5667, df = 9, p-value = 0.1516
# alternative hypothesis: true mean is not equal to 0
# 95 percent confidence interval:
#   -7.331701  1.331701
# sample estimates:
#   mean of x 
# -3 


# t test that compares one vector's mean to 1
# that vector is the ratios of the two initial vectors
t.test(1:10 / 13:4, mu=1)

# One Sample t-test
# 
# data:  1:10/13:4
# t = -0.46036, df = 9, p-value = 0.6562
# alternative hypothesis: true mean is not equal to 1
# 95 percent confidence interval:
#   0.3229789 1.4480623
# sample estimates:
#   mean of x 
# 0.8855206

您可以看到配对 t 检验是差异向量的简单 t 检验，这是可能的，因为您有 2 个相同长度的向量（处理前后）。这与比率向量的简单 t 检验不同。

所以，有不同的结果是合理的，但在某些应用中，比率测试更好。检查你的参考书目。

如何对我的数据应用配对 t.test 或 Wilcoxon 检验

How to apply paired t.test or Wilcoxon test to my data

r

usage-statistics