对具有大数据框的独立组进行多重 t 检验
Multiple t-test on independent group with a large dataframe
我看过很多类似的帖子,但其中绝大多数至少已有 3 年历史,我不确定它们是否适用于我的情况,所以我们开始吧。
一位同事要求我帮助她的项目进行多重 t 检验。
基本上她有 20 个观察值 x 30 个可变数据框,如下所示:
|集团 |脂质1 |脂质2 | ... |脂质 28|
| ------ | -------------- |
|一个|
|乙|
| |
|B |
我们要做的是对每个脂类进行组比较(意思是对 A 组和 B 组之间的脂类 1 进行 t 检验,然后对脂类 2 进行 t 检验,依此类推)。
我们不想比较它们之间的脂质。
当然,我们希望不必 copy/paste 相同的 3 行代码,尤其是因为我们有另外 2 个具有相同变量但条件不同的数据框。
我已经尝试了我在这里看到的一种解决方案,但它给了我一个我不确定是否理解的错误:
sapply(foetal[,2:20], function(i) t.test(i ~ foetal$ID))
Error in if (stderr < 10 * .Machine$double.eps * max(abs(mx), abs(my))) stop("data are essentially constant") : missing value where TRUE/FALSE needed In addition: Warning messages: 1: In mean.default(x) : l'argument n'est ni numérique, ni logique : renvoi de NA 2: In var(x) : NAs introduced by coercion 3: In mean.default(y) : l'argument n'est ni numérique, ni logique : renvoi de NA 4: In var(y) : Error in if (stderr < 10 * .Machine$double.eps * max(abs(mx), abs(my))) stop("data are essentially constant") : missing value where TRUE/FALSE needed
我看到的另一种解决方案是使用 gather 函数获取一列 Lipids,一列用于每个 Lipids 的值,然后创建一个列表列,展开数据框并改变一个包含t 检验的 p 值。
tips %>%
select(tip, total_bill, sex) %>%
gather(key = variable, value = value, -sex) %>%
group_by(sex, variable) %>%
summarise(value = list(value)) %>%
spread(sex, value) %>%
group_by(variable) %>%
mutate(p_value = t.test(unlist(Female), unlist(Male))$p.value,
t_value = t.test(unlist(Female), unlist(Male))$statistic)
(https://sebastiansauer.github.io/multiple-t-tests-with-dplyr/)
老实说,我不确定该怎么做。有人有什么建议吗?
这里是数据的 dput()...虽然不确定为什么有必要...
dput(dummy)
structure(list(ID = c("A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B"),
Lipid.1 = c(0.737, 0.419, 0.468, 0.805, 1.036, 0.825, 0.286,
1.166, 0.898, 0.504, 1.433, 0.41, 0.325, 0.866, 0.337, 0.876,
0.636, 0.953, 0.481, 0.602), Lipid.2 = c(0.001, 0.017, 0.013,
0.025, 0.018, 0.003, 0.007, NA, 0.01, 0.002, 0.01, 0.022,
0.005, NA, 0.018, NA, 0.015, 0.016, NA, 0.01), Lipid.3 = c(0.035,
0.018, 0.036, 0.024, 0.023, 0.027, 0.036, 0.037, 0.013, 0.037,
0.03, 0.04, 0.038, 0.033, 0.016, 0.034, 0.029, 0.033, 0.018,
0.029), Lipid.4 = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), Lipid.5 = c(0.09,
0.099, 0.12, 0.058, 0.136, 0.103, 0.153, 0.148, 0.047, 0.085,
0.098, 0.133, 0.099, 0.121, 0.084, 0.065, 0.11, 0.088, 0.065,
0.043), Lipid.6 = c(0.39, 0.555, 0.568, 0.6, 0.626, 0.378,
0.657, 0.57, 0.271, 0.41, 0.474, 0.617, 0.491, 0.738, 0.459,
0.365, 0.499, 0.388, 0.271, 0.275), Lipid.7 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), Lipid.8 = c(0.186, 0.197, 0.191, 0.125, 0.209,
0.107, 0.174, 0.143, 0.055, 0.134, 0.148, 0.193, 0.184, 0.213,
0.134, 0.085, 0.165, 0.215, 0.163, 0.061), Lipid.9 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, "0,007"), Lipid.10 = c("0,242", "0,254", "0,134",
"0,226", "0,243", "0,122", "0,082", "0,119", "0,098", "0,093",
"0,27", "0,284", "0,258", "0,236", "0,173", "0,106", "0,138",
"0,066", "0,072", "0,081"), Lipid.11 = c("0,053", "0,114",
"0,038", "0,094", "0,073", "0,067", "0,028", "0,022", "0,021",
"0,05", "0,085", "0,102", "0,122", "0,096", "0,027", "0,03",
NA, "0,078", "0,066", NA), Lipid.12 = c(0.223, 0.261, 0.258,
0.212, 0.168, 0.101, 0.191, 0.09, 0.195, 0.082, 0.155, 0.2,
0.167, 0.231, 0.145, 0.089, 0.239, 0.141, 0.106, 0.124),
Lipid.13 = c(0.737, 0.763, 0.707, 0.587, 0.545, 0.317, 0.74,
0.602, 0.481, 0.531, 0.632, 0.448, 0.62, 0.766, 0.397, 0.623,
0.997, 0.578, 0.418, 0.412), Lipid.14 = c(0.683, 0.666, 0.507,
0.366, 0.443, 0.266, 0.493, 0.345, 0.368, 0.355, 0.432, 0.411,
0.491, 0.565, 0.357, 0.285, 0.604, 0.426, 0.538, 0.295),
Lipid.15 = c(0.911, 1.017, 0.503, 0.76, 0.741, 0.486, 0.648,
0.581, 0.955, 0.515, 0.932, 0.707, 0.626, 0.928, 0.836, 0.537,
0.654, 0.351, 0.498, 0.529), Lipid.16 = c(0.148, 0.116, 0.069,
0.104, 0.091, 0.064, 0.093, 0.123, 0.11, 0.097, 0.283, 0.076,
0.095, 0.194, 0.06, 0.061, 0.086, 0.051, 0.064, 0.059), Lipid.17 = c("0,155",
"0,274", "0,149", "0,127", "0,174", "nd", "0,109", "0,134",
"0,1", "0,09", "0,25", "0,112", "0,088", "0,243", "0,092",
"0,073", "0,153", "0,12", "0,14", "0,06"), Lipid.18 = c(3.143,
3.441, 4.359, 1.945, 2.573, 2.267, 3.585, 3.405, 2.296, 1.998,
3.468, 2.98, 3.626, 3.635, 3.236, 2.092, 2.586, 2.08, 1.718,
1.736), Lipid.19 = c(37.993, 36.148, 40.244, 30.395, 37.339,
35.742, 47.316, 47.555, 34.351, 32.377, 38.694, 39.413, 36.114,
41.235, 32.779, 32.222, 36.418, 36.918, 33.334, 31.421),
Lipid.20 = c(6.613, 5.913, 9.662, 3.789, 7.485, 6.297, 8.254,
8.07, 4.905, 5.686, 7.742, 7.533, 6.875, 7.908, 7.022, 5.446,
6.1, 6.782, 6.062, 6.089), Lipid.21 = c(7.235, 6.759, 8.331,
4.931, 6.558, 4.186, 5.99, 5.629, 3.066, 3.439, 7.102, 7.655,
6.606, 7.858, 5.804, 3.135, 3.218, 3.639, 2.975, 3.13), Lipid.22 = c(6.453,
6.664, 9.048, 4.341, 8.03, 7.599, 10.24, 10.954, 5.873, 6.687,
8.005, 8.908, 6.708, 8.06, 5.931, 6.083, 5.734, 5.587, 5.388,
6.088), Lipid.23 = c(4.943, 3.164, 5.153, 2.51, 4.071, 5.255,
7.636, 8.376, 4.726, 5.56, 4.762, 5.044, 4.549, 4.875, 4.57,
5.147, 4.396, 4.031, 3.556, 4.38), Lipid.24 = c(3.973, 4.279,
5.928, 3.066, 4.95, 4.667, 7.949, 7.268, 4.948, 3.72, 5.137,
5.539, 4.006, 5.276, 3.909, 4.163, 4.954, 5.02, 3.961, 4.201
), Lipid.25 = c(7.638, 5.224, 8.417, 3.902, 7.267, 6.007,
8.256, 7.457, 4.801, 4.86, 7.581, 8.173, 7.57, 8.591, 7.482,
5.091, 5.651, 6.577, 5.415, 5.76), Lipid.26 = c(10.225, 8.293,
13.188, 5.607, 10.993, 4.491, 5.767, 5.011, 3.589, 3.145,
11.471, 12.183, 9.686, 12.562, 9.697, 3.34, 4.186, 4.485,
3.23, 4.229), Lipid.27 = c(5.848, 4.856, 6.503, 3.534, 5.358,
8.933, 14.034, 12.806, 7.781, 8.094, 6.765, 6.867, 5.539,
7.772, 5.883, 7.832, 8.607, 7.586, 6.628, 7.563), Lipid.28 = c(32.941,
30.579, 31.358, 15.861, 30.353, 25.222, 35.662, 34.035, 20.338,
24.682, 30.698, 34.024, 31.608, 37.539, 24.901, 20.131, 23.126,
30.803, 25.639, 18.935)), class = "data.frame", row.names = c(NA,
-20L))
让我们从您粘贴的数据变脏开始吧!而不是数字,你有丁字裤。例如,Lipid.10
Lipid.10 = c("0,242", "0,254", "0,134",
"0,226", "0,243", "0,122", "0,082", "0,119", "0,098", "0,093",
"0,27", "0,284", "0,258", "0,236", "0,173", "0,106", "0,138",
"0,066", "0,072", "0,081")
此外,您有仅包含 NA 值的变量
Lipid.4 = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_)
所以我不得不稍微清理一下它们。
structure(list(ID = c("A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B"),
Lipid.1 = c(0.737, 0.419, 0.468, 0.805, 1.036, 0.825, 0.286,
1.166, 0.898, 0.504, 1.433, 0.41, 0.325, 0.866, 0.337, 0.876,
0.636, 0.953, 0.481, 0.602), Lipid.2 = c(0.001, 0.017, 0.013,
0.025, 0.018, 0.003, 0.007, NA, 0.01, 0.002, 0.01, 0.022,
0.005, NA, 0.018, NA, 0.015, 0.016, NA, 0.01), Lipid.3 = c(0.035,
0.018, 0.036, 0.024, 0.023, 0.027, 0.036, 0.037, 0.013, 0.037,
0.03, 0.04, 0.038, 0.033, 0.016, 0.034, 0.029, 0.033, 0.018,
0.029), Lipid.4 = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), Lipid.5 = c(0.09,
0.099, 0.12, 0.058, 0.136, 0.103, 0.153, 0.148, 0.047, 0.085,
0.098, 0.133, 0.099, 0.121, 0.084, 0.065, 0.11, 0.088, 0.065,
0.043), Lipid.6 = c(0.39, 0.555, 0.568, 0.6, 0.626, 0.378,
0.657, 0.57, 0.271, 0.41, 0.474, 0.617, 0.491, 0.738, 0.459,
0.365, 0.499, 0.388, 0.271, 0.275), Lipid.7 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), Lipid.8 = c(0.186, 0.197, 0.191, 0.125, 0.209,
0.107, 0.174, 0.143, 0.055, 0.134, 0.148, 0.193, 0.184, 0.213,
0.134, 0.085, 0.165, 0.215, 0.163, 0.061), Lipid.9 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 0.007), Lipid.10 = c(0.242, 0.254, 0.134, 0.226,
0.243, 0.122, 0.082, 0.119, 0.098, 0.093, 0.27, 0.284, 0.258,
0.236, 0.173, 0.106, 0.138, 0.066, 0.072, 0.081), Lipid.11 = c(0.053,
0.114, 0.038, 0.094, 0.073, 0.067, 0.028, 0.022, 0.021, 0.05,
0.085, 0.102, 0.122, 0.096, 0.027, 0.03, NA, 0.078, 0.066,
NA), Lipid.12 = c(0.223, 0.261, 0.258, 0.212, 0.168, 0.101,
0.191, 0.09, 0.195, 0.082, 0.155, 0.2, 0.167, 0.231, 0.145,
0.089, 0.239, 0.141, 0.106, 0.124), Lipid.13 = c(0.737, 0.763,
0.707, 0.587, 0.545, 0.317, 0.74, 0.602, 0.481, 0.531, 0.632,
0.448, 0.62, 0.766, 0.397, 0.623, 0.997, 0.578, 0.418, 0.412
), Lipid.14 = c(0.683, 0.666, 0.507, 0.366, 0.443, 0.266,
0.493, 0.345, 0.368, 0.355, 0.432, 0.411, 0.491, 0.565, 0.357,
0.285, 0.604, 0.426, 0.538, 0.295), Lipid.15 = c(0.911, 1.017,
0.503, 0.76, 0.741, 0.486, 0.648, 0.581, 0.955, 0.515, 0.932,
0.707, 0.626, 0.928, 0.836, 0.537, 0.654, 0.351, 0.498, 0.529
), Lipid.16 = c(0.148, 0.116, 0.069, 0.104, 0.091, 0.064,
0.093, 0.123, 0.11, 0.097, 0.283, 0.076, 0.095, 0.194, 0.06,
0.061, 0.086, 0.051, 0.064, 0.059), Lipid.17 = c(0.155, 0.274,
0.149, 0.127, 0.174, NA, 0.109, 0.134, 0.1, 0.09, 0.25, 0.112,
0.088, 0.243, 0.092, 0.073, 0.153, 0.12, 0.14, 0.06), Lipid.18 = c(3.143,
3.441, 4.359, 1.945, 2.573, 2.267, 3.585, 3.405, 2.296, 1.998,
3.468, 2.98, 3.626, 3.635, 3.236, 2.092, 2.586, 2.08, 1.718,
1.736), Lipid.19 = c(37.993, 36.148, 40.244, 30.395, 37.339,
35.742, 47.316, 47.555, 34.351, 32.377, 38.694, 39.413, 36.114,
41.235, 32.779, 32.222, 36.418, 36.918, 33.334, 31.421),
Lipid.20 = c(6.613, 5.913, 9.662, 3.789, 7.485, 6.297, 8.254,
8.07, 4.905, 5.686, 7.742, 7.533, 6.875, 7.908, 7.022, 5.446,
6.1, 6.782, 6.062, 6.089), Lipid.21 = c(7.235, 6.759, 8.331,
4.931, 6.558, 4.186, 5.99, 5.629, 3.066, 3.439, 7.102, 7.655,
6.606, 7.858, 5.804, 3.135, 3.218, 3.639, 2.975, 3.13), Lipid.22 = c(6.453,
6.664, 9.048, 4.341, 8.03, 7.599, 10.24, 10.954, 5.873, 6.687,
8.005, 8.908, 6.708, 8.06, 5.931, 6.083, 5.734, 5.587, 5.388,
6.088), Lipid.23 = c(4.943, 3.164, 5.153, 2.51, 4.071, 5.255,
7.636, 8.376, 4.726, 5.56, 4.762, 5.044, 4.549, 4.875, 4.57,
5.147, 4.396, 4.031, 3.556, 4.38), Lipid.24 = c(3.973, 4.279,
5.928, 3.066, 4.95, 4.667, 7.949, 7.268, 4.948, 3.72, 5.137,
5.539, 4.006, 5.276, 3.909, 4.163, 4.954, 5.02, 3.961, 4.201
), Lipid.25 = c(7.638, 5.224, 8.417, 3.902, 7.267, 6.007,
8.256, 7.457, 4.801, 4.86, 7.581, 8.173, 7.57, 8.591, 7.482,
5.091, 5.651, 6.577, 5.415, 5.76), Lipid.26 = c(10.225, 8.293,
13.188, 5.607, 10.993, 4.491, 5.767, 5.011, 3.589, 3.145,
11.471, 12.183, 9.686, 12.562, 9.697, 3.34, 4.186, 4.485,
3.23, 4.229), Lipid.27 = c(5.848, 4.856, 6.503, 3.534, 5.358,
8.933, 14.034, 12.806, 7.781, 8.094, 6.765, 6.867, 5.539,
7.772, 5.883, 7.832, 8.607, 7.586, 6.628, 7.563), Lipid.28 = c(32.941,
30.579, 31.358, 15.861, 30.353, 25.222, 35.662, 34.035, 20.338,
24.682, 30.698, 34.024, 31.608, 37.539, 24.901, 20.131, 23.126,
30.803, 25.639, 18.935)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
剩下的就简单了。
library(tidyverse)
ft = function(data){
tryCatch(
{tout = t.test(data$val ~ data$ID))
tibble(
t = tout$statistic,
p = tout$p.value,
stderr = tout$stderr
)
}, error = function(msg){
return(tibble(t = NA, p = NA, stderr = NA))
})
}
df %>%
pivot_longer(starts_with("Lipid"), names_to = "Lipid", values_to = "val") %>%
group_by(Lipid) %>%
nest() %>%
mutate(testt = map(data, ft)) %>%
select(Lipid, testt) %>%
unnest(testt)
输出
# A tibble: 28 x 4
# Groups: Lipid [28]
Lipid t p stderr
<chr> <dbl> <dbl> <dbl>
1 Lipid.1 0.158 0.876 0.142
2 Lipid.2 -0.870 0.399 0.00350
3 Lipid.3 -0.377 0.711 0.00372
4 Lipid.4 NA NA NA
5 Lipid.5 0.930 0.366 0.0143
6 Lipid.6 0.730 0.475 0.0614
7 Lipid.7 NA NA NA
8 Lipid.8 -0.180 0.859 0.0223
9 Lipid.9 NA NA NA
10 Lipid.10 -0.200 0.844 0.0355
# ... with 18 more rows
根据需要自定义 ft
函数。
我不得不在 ft
中使用 tryCatch
函数,因为变量只包含 NA
个值。
如果您想获得完整的 t 检验输出,您可以循环遍历列:
如果我们从你的 df 开始:
data <- structure(list(ID = c("A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B"),
Lipid.1 = c(0.737, 0.419, 0.468, 0.805, 1.036, 0.825, 0.286,
1.166, 0.898, 0.504, 1.433, 0.41, 0.325, 0.866, 0.337, 0.876,
0.636, 0.953, 0.481, 0.602), Lipid.2 = c(0.001, 0.017, 0.013,
0.025, 0.018, 0.003, 0.007, NA, 0.01, 0.002, 0.01, 0.022,
0.005, NA, 0.018, NA, 0.015, 0.016, NA, 0.01), Lipid.3 = c(0.035,
0.018, 0.036, 0.024, 0.023, 0.027, 0.036, 0.037, 0.013, 0.037,
0.03, 0.04, 0.038, 0.033, 0.016, 0.034, 0.029, 0.033, 0.018,
0.029), Lipid.4 = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), Lipid.5 = c(0.09,
0.099, 0.12, 0.058, 0.136, 0.103, 0.153, 0.148, 0.047, 0.085,
0.098, 0.133, 0.099, 0.121, 0.084, 0.065, 0.11, 0.088, 0.065,
0.043), Lipid.6 = c(0.39, 0.555, 0.568, 0.6, 0.626, 0.378,
0.657, 0.57, 0.271, 0.41, 0.474, 0.617, 0.491, 0.738, 0.459,
0.365, 0.499, 0.388, 0.271, 0.275), Lipid.7 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), Lipid.8 = c(0.186, 0.197, 0.191, 0.125, 0.209,
0.107, 0.174, 0.143, 0.055, 0.134, 0.148, 0.193, 0.184, 0.213,
0.134, 0.085, 0.165, 0.215, 0.163, 0.061), Lipid.9 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, "0,007"), Lipid.10 = c("0,242", "0,254", "0,134",
"0,226", "0,243", "0,122", "0,082", "0,119", "0,098", "0,093",
"0,27", "0,284", "0,258", "0,236", "0,173", "0,106", "0,138",
"0,066", "0,072", "0,081"), Lipid.11 = c("0,053", "0,114",
"0,038", "0,094", "0,073", "0,067", "0,028", "0,022", "0,021",
"0,05", "0,085", "0,102", "0,122", "0,096", "0,027", "0,03",
NA, "0,078", "0,066", NA), Lipid.12 = c(0.223, 0.261, 0.258,
0.212, 0.168, 0.101, 0.191, 0.09, 0.195, 0.082, 0.155, 0.2,
0.167, 0.231, 0.145, 0.089, 0.239, 0.141, 0.106, 0.124),
Lipid.13 = c(0.737, 0.763, 0.707, 0.587, 0.545, 0.317, 0.74,
0.602, 0.481, 0.531, 0.632, 0.448, 0.62, 0.766, 0.397, 0.623,
0.997, 0.578, 0.418, 0.412), Lipid.14 = c(0.683, 0.666, 0.507,
0.366, 0.443, 0.266, 0.493, 0.345, 0.368, 0.355, 0.432, 0.411,
0.491, 0.565, 0.357, 0.285, 0.604, 0.426, 0.538, 0.295),
Lipid.15 = c(0.911, 1.017, 0.503, 0.76, 0.741, 0.486, 0.648,
0.581, 0.955, 0.515, 0.932, 0.707, 0.626, 0.928, 0.836, 0.537,
0.654, 0.351, 0.498, 0.529), Lipid.16 = c(0.148, 0.116, 0.069,
0.104, 0.091, 0.064, 0.093, 0.123, 0.11, 0.097, 0.283, 0.076,
0.095, 0.194, 0.06, 0.061, 0.086, 0.051, 0.064, 0.059), Lipid.17 = c("0,155",
"0,274", "0,149", "0,127", "0,174", "nd", "0,109", "0,134",
"0,1", "0,09", "0,25", "0,112", "0,088", "0,243", "0,092",
"0,073", "0,153", "0,12", "0,14", "0,06"), Lipid.18 = c(3.143,
3.441, 4.359, 1.945, 2.573, 2.267, 3.585, 3.405, 2.296, 1.998,
3.468, 2.98, 3.626, 3.635, 3.236, 2.092, 2.586, 2.08, 1.718,
1.736), Lipid.19 = c(37.993, 36.148, 40.244, 30.395, 37.339,
35.742, 47.316, 47.555, 34.351, 32.377, 38.694, 39.413, 36.114,
41.235, 32.779, 32.222, 36.418, 36.918, 33.334, 31.421),
Lipid.20 = c(6.613, 5.913, 9.662, 3.789, 7.485, 6.297, 8.254,
8.07, 4.905, 5.686, 7.742, 7.533, 6.875, 7.908, 7.022, 5.446,
6.1, 6.782, 6.062, 6.089), Lipid.21 = c(7.235, 6.759, 8.331,
4.931, 6.558, 4.186, 5.99, 5.629, 3.066, 3.439, 7.102, 7.655,
6.606, 7.858, 5.804, 3.135, 3.218, 3.639, 2.975, 3.13), Lipid.22 = c(6.453,
6.664, 9.048, 4.341, 8.03, 7.599, 10.24, 10.954, 5.873, 6.687,
8.005, 8.908, 6.708, 8.06, 5.931, 6.083, 5.734, 5.587, 5.388,
6.088), Lipid.23 = c(4.943, 3.164, 5.153, 2.51, 4.071, 5.255,
7.636, 8.376, 4.726, 5.56, 4.762, 5.044, 4.549, 4.875, 4.57,
5.147, 4.396, 4.031, 3.556, 4.38), Lipid.24 = c(3.973, 4.279,
5.928, 3.066, 4.95, 4.667, 7.949, 7.268, 4.948, 3.72, 5.137,
5.539, 4.006, 5.276, 3.909, 4.163, 4.954, 5.02, 3.961, 4.201
), Lipid.25 = c(7.638, 5.224, 8.417, 3.902, 7.267, 6.007,
8.256, 7.457, 4.801, 4.86, 7.581, 8.173, 7.57, 8.591, 7.482,
5.091, 5.651, 6.577, 5.415, 5.76), Lipid.26 = c(10.225, 8.293,
13.188, 5.607, 10.993, 4.491, 5.767, 5.011, 3.589, 3.145,
11.471, 12.183, 9.686, 12.562, 9.697, 3.34, 4.186, 4.485,
3.23, 4.229), Lipid.27 = c(5.848, 4.856, 6.503, 3.534, 5.358,
8.933, 14.034, 12.806, 7.781, 8.094, 6.765, 6.867, 5.539,
7.772, 5.883, 7.832, 8.607, 7.586, 6.628, 7.563), Lipid.28 = c(32.941,
30.579, 31.358, 15.861, 30.353, 25.222, 35.662, 34.035, 20.338,
24.682, 30.698, 34.024, 31.608, 37.539, 24.901, 20.131, 23.126,
30.803, 25.639, 18.935)), class = "data.frame", row.names = c(NA,
-20L))
清理 df:
# remove the columns which only contain NA:
data$Lipid.4 <- NULL
data$Lipid.7 <- NULL
data$Lipid.9 <- NULL
# convert from string to numeric (I do it now manually with each column. You could use a for-loop)
data$Lipid.10 <- gsub(",", ".", data$Lipid.10) # convert comma to dot
data$Lipid.10 <- as.numeric(data$Lipid.10) # convert from string to numeric
data$Lipid.11 <- gsub(",", ".", data$Lipid.11)
data$Lipid.11 <- as.numeric(data$Lipid.11)
data$Lipid.17 <- gsub(",", ".", data$Lipid.17)
data$Lipid.17 <- as.numeric(data$Lipid.17)
# get the lipid column names
all_lipids <- colnames(data)
all_lipids <- all_lipids[all_lipids != "ID"] # we don't need the ID column for the loop
# now loop over each column an perform a t-test
for (column in all_lipids) {
print(column)
print(t.test(data[,column] ~ data$ID))
}
每种脂质得到:
[1] "Lipid.1"
Welch Two Sample t-test
data: data[, column] by data$ID
t = 0.15843, df = 17.391, p-value = 0.8759
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.2766112 0.3216112
sample estimates:
mean in group A mean in group B
0.7144 0.6919
最后一点:您进行了很多比较。您可以考虑更正多重测试。
F你也可以使用R
中的multtest
库,对于多个二样本t-tests
,如下代码所示:
library(multtest)
df <- as.data.frame(t(as.matrix(dummy)))
X <- apply(as.matrix.noquote(df[2:nrow(df),]), 2, as.numeric)
cl <- ifelse(df[1,] == 'A', 1, 0) # class labels
welch_t_stat <- mt.teststat(X, cl, test='t')
welch_t_stat
# [1] 0.15843467 -0.86954194 -0.37680666 NA 0.92978706 0.72969094 NA -0.17962582 NA NA NAv
# [12] 0.69705527 0.16001073 0.15733921 0.59540273 -0.05557413 NA 0.52706460 0.99860493 -0.14561137 0.58894166 1.25114061
# [23] 1.03458080 0.86540315 -0.62788116 -0.28806189 0.60206042 0.12954702
从上面的结果可以看出,dataframe中有28种脂类进行了28次Welcht-tests
由于您获得了个人 t-statistics
,现在,您可以计算 p-values
并使用 Bonferroni / Holm 应用 FWER
校正或使用 Benjamini & Hochberg 方法应用 FDR
校正(当你有大量测试时很有用):
raw_p <- 2 * (1 - pnorm(abs(welch_t_stat))) # raw p-values assuming normal
# or use pt() with appropriate df
procedures <- c("Bonferroni", "Holm", "BH")
adjusted <- mt.rawp2adjp(raw_p, procedures)
我看过很多类似的帖子,但其中绝大多数至少已有 3 年历史,我不确定它们是否适用于我的情况,所以我们开始吧。
一位同事要求我帮助她的项目进行多重 t 检验。
基本上她有 20 个观察值 x 30 个可变数据框,如下所示: |集团 |脂质1 |脂质2 | ... |脂质 28|
| ------ | -------------- |
|一个| |乙| | | |B |
我们要做的是对每个脂类进行组比较(意思是对 A 组和 B 组之间的脂类 1 进行 t 检验,然后对脂类 2 进行 t 检验,依此类推)。
我们不想比较它们之间的脂质。
当然,我们希望不必 copy/paste 相同的 3 行代码,尤其是因为我们有另外 2 个具有相同变量但条件不同的数据框。
我已经尝试了我在这里看到的一种解决方案,但它给了我一个我不确定是否理解的错误:
sapply(foetal[,2:20], function(i) t.test(i ~ foetal$ID))
Error in if (stderr < 10 * .Machine$double.eps * max(abs(mx), abs(my))) stop("data are essentially constant") : missing value where TRUE/FALSE needed In addition: Warning messages: 1: In mean.default(x) : l'argument n'est ni numérique, ni logique : renvoi de NA 2: In var(x) : NAs introduced by coercion 3: In mean.default(y) : l'argument n'est ni numérique, ni logique : renvoi de NA 4: In var(y) : Error in if (stderr < 10 * .Machine$double.eps * max(abs(mx), abs(my))) stop("data are essentially constant") : missing value where TRUE/FALSE needed
我看到的另一种解决方案是使用 gather 函数获取一列 Lipids,一列用于每个 Lipids 的值,然后创建一个列表列,展开数据框并改变一个包含t 检验的 p 值。
tips %>%
select(tip, total_bill, sex) %>%
gather(key = variable, value = value, -sex) %>%
group_by(sex, variable) %>%
summarise(value = list(value)) %>%
spread(sex, value) %>%
group_by(variable) %>%
mutate(p_value = t.test(unlist(Female), unlist(Male))$p.value,
t_value = t.test(unlist(Female), unlist(Male))$statistic)
(https://sebastiansauer.github.io/multiple-t-tests-with-dplyr/)
老实说,我不确定该怎么做。有人有什么建议吗?
这里是数据的 dput()...虽然不确定为什么有必要...
dput(dummy)
structure(list(ID = c("A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B"),
Lipid.1 = c(0.737, 0.419, 0.468, 0.805, 1.036, 0.825, 0.286,
1.166, 0.898, 0.504, 1.433, 0.41, 0.325, 0.866, 0.337, 0.876,
0.636, 0.953, 0.481, 0.602), Lipid.2 = c(0.001, 0.017, 0.013,
0.025, 0.018, 0.003, 0.007, NA, 0.01, 0.002, 0.01, 0.022,
0.005, NA, 0.018, NA, 0.015, 0.016, NA, 0.01), Lipid.3 = c(0.035,
0.018, 0.036, 0.024, 0.023, 0.027, 0.036, 0.037, 0.013, 0.037,
0.03, 0.04, 0.038, 0.033, 0.016, 0.034, 0.029, 0.033, 0.018,
0.029), Lipid.4 = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), Lipid.5 = c(0.09,
0.099, 0.12, 0.058, 0.136, 0.103, 0.153, 0.148, 0.047, 0.085,
0.098, 0.133, 0.099, 0.121, 0.084, 0.065, 0.11, 0.088, 0.065,
0.043), Lipid.6 = c(0.39, 0.555, 0.568, 0.6, 0.626, 0.378,
0.657, 0.57, 0.271, 0.41, 0.474, 0.617, 0.491, 0.738, 0.459,
0.365, 0.499, 0.388, 0.271, 0.275), Lipid.7 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), Lipid.8 = c(0.186, 0.197, 0.191, 0.125, 0.209,
0.107, 0.174, 0.143, 0.055, 0.134, 0.148, 0.193, 0.184, 0.213,
0.134, 0.085, 0.165, 0.215, 0.163, 0.061), Lipid.9 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, "0,007"), Lipid.10 = c("0,242", "0,254", "0,134",
"0,226", "0,243", "0,122", "0,082", "0,119", "0,098", "0,093",
"0,27", "0,284", "0,258", "0,236", "0,173", "0,106", "0,138",
"0,066", "0,072", "0,081"), Lipid.11 = c("0,053", "0,114",
"0,038", "0,094", "0,073", "0,067", "0,028", "0,022", "0,021",
"0,05", "0,085", "0,102", "0,122", "0,096", "0,027", "0,03",
NA, "0,078", "0,066", NA), Lipid.12 = c(0.223, 0.261, 0.258,
0.212, 0.168, 0.101, 0.191, 0.09, 0.195, 0.082, 0.155, 0.2,
0.167, 0.231, 0.145, 0.089, 0.239, 0.141, 0.106, 0.124),
Lipid.13 = c(0.737, 0.763, 0.707, 0.587, 0.545, 0.317, 0.74,
0.602, 0.481, 0.531, 0.632, 0.448, 0.62, 0.766, 0.397, 0.623,
0.997, 0.578, 0.418, 0.412), Lipid.14 = c(0.683, 0.666, 0.507,
0.366, 0.443, 0.266, 0.493, 0.345, 0.368, 0.355, 0.432, 0.411,
0.491, 0.565, 0.357, 0.285, 0.604, 0.426, 0.538, 0.295),
Lipid.15 = c(0.911, 1.017, 0.503, 0.76, 0.741, 0.486, 0.648,
0.581, 0.955, 0.515, 0.932, 0.707, 0.626, 0.928, 0.836, 0.537,
0.654, 0.351, 0.498, 0.529), Lipid.16 = c(0.148, 0.116, 0.069,
0.104, 0.091, 0.064, 0.093, 0.123, 0.11, 0.097, 0.283, 0.076,
0.095, 0.194, 0.06, 0.061, 0.086, 0.051, 0.064, 0.059), Lipid.17 = c("0,155",
"0,274", "0,149", "0,127", "0,174", "nd", "0,109", "0,134",
"0,1", "0,09", "0,25", "0,112", "0,088", "0,243", "0,092",
"0,073", "0,153", "0,12", "0,14", "0,06"), Lipid.18 = c(3.143,
3.441, 4.359, 1.945, 2.573, 2.267, 3.585, 3.405, 2.296, 1.998,
3.468, 2.98, 3.626, 3.635, 3.236, 2.092, 2.586, 2.08, 1.718,
1.736), Lipid.19 = c(37.993, 36.148, 40.244, 30.395, 37.339,
35.742, 47.316, 47.555, 34.351, 32.377, 38.694, 39.413, 36.114,
41.235, 32.779, 32.222, 36.418, 36.918, 33.334, 31.421),
Lipid.20 = c(6.613, 5.913, 9.662, 3.789, 7.485, 6.297, 8.254,
8.07, 4.905, 5.686, 7.742, 7.533, 6.875, 7.908, 7.022, 5.446,
6.1, 6.782, 6.062, 6.089), Lipid.21 = c(7.235, 6.759, 8.331,
4.931, 6.558, 4.186, 5.99, 5.629, 3.066, 3.439, 7.102, 7.655,
6.606, 7.858, 5.804, 3.135, 3.218, 3.639, 2.975, 3.13), Lipid.22 = c(6.453,
6.664, 9.048, 4.341, 8.03, 7.599, 10.24, 10.954, 5.873, 6.687,
8.005, 8.908, 6.708, 8.06, 5.931, 6.083, 5.734, 5.587, 5.388,
6.088), Lipid.23 = c(4.943, 3.164, 5.153, 2.51, 4.071, 5.255,
7.636, 8.376, 4.726, 5.56, 4.762, 5.044, 4.549, 4.875, 4.57,
5.147, 4.396, 4.031, 3.556, 4.38), Lipid.24 = c(3.973, 4.279,
5.928, 3.066, 4.95, 4.667, 7.949, 7.268, 4.948, 3.72, 5.137,
5.539, 4.006, 5.276, 3.909, 4.163, 4.954, 5.02, 3.961, 4.201
), Lipid.25 = c(7.638, 5.224, 8.417, 3.902, 7.267, 6.007,
8.256, 7.457, 4.801, 4.86, 7.581, 8.173, 7.57, 8.591, 7.482,
5.091, 5.651, 6.577, 5.415, 5.76), Lipid.26 = c(10.225, 8.293,
13.188, 5.607, 10.993, 4.491, 5.767, 5.011, 3.589, 3.145,
11.471, 12.183, 9.686, 12.562, 9.697, 3.34, 4.186, 4.485,
3.23, 4.229), Lipid.27 = c(5.848, 4.856, 6.503, 3.534, 5.358,
8.933, 14.034, 12.806, 7.781, 8.094, 6.765, 6.867, 5.539,
7.772, 5.883, 7.832, 8.607, 7.586, 6.628, 7.563), Lipid.28 = c(32.941,
30.579, 31.358, 15.861, 30.353, 25.222, 35.662, 34.035, 20.338,
24.682, 30.698, 34.024, 31.608, 37.539, 24.901, 20.131, 23.126,
30.803, 25.639, 18.935)), class = "data.frame", row.names = c(NA,
-20L))
让我们从您粘贴的数据变脏开始吧!而不是数字,你有丁字裤。例如,Lipid.10
Lipid.10 = c("0,242", "0,254", "0,134",
"0,226", "0,243", "0,122", "0,082", "0,119", "0,098", "0,093",
"0,27", "0,284", "0,258", "0,236", "0,173", "0,106", "0,138",
"0,066", "0,072", "0,081")
此外,您有仅包含 NA 值的变量
Lipid.4 = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_)
所以我不得不稍微清理一下它们。
structure(list(ID = c("A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B"),
Lipid.1 = c(0.737, 0.419, 0.468, 0.805, 1.036, 0.825, 0.286,
1.166, 0.898, 0.504, 1.433, 0.41, 0.325, 0.866, 0.337, 0.876,
0.636, 0.953, 0.481, 0.602), Lipid.2 = c(0.001, 0.017, 0.013,
0.025, 0.018, 0.003, 0.007, NA, 0.01, 0.002, 0.01, 0.022,
0.005, NA, 0.018, NA, 0.015, 0.016, NA, 0.01), Lipid.3 = c(0.035,
0.018, 0.036, 0.024, 0.023, 0.027, 0.036, 0.037, 0.013, 0.037,
0.03, 0.04, 0.038, 0.033, 0.016, 0.034, 0.029, 0.033, 0.018,
0.029), Lipid.4 = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), Lipid.5 = c(0.09,
0.099, 0.12, 0.058, 0.136, 0.103, 0.153, 0.148, 0.047, 0.085,
0.098, 0.133, 0.099, 0.121, 0.084, 0.065, 0.11, 0.088, 0.065,
0.043), Lipid.6 = c(0.39, 0.555, 0.568, 0.6, 0.626, 0.378,
0.657, 0.57, 0.271, 0.41, 0.474, 0.617, 0.491, 0.738, 0.459,
0.365, 0.499, 0.388, 0.271, 0.275), Lipid.7 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), Lipid.8 = c(0.186, 0.197, 0.191, 0.125, 0.209,
0.107, 0.174, 0.143, 0.055, 0.134, 0.148, 0.193, 0.184, 0.213,
0.134, 0.085, 0.165, 0.215, 0.163, 0.061), Lipid.9 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 0.007), Lipid.10 = c(0.242, 0.254, 0.134, 0.226,
0.243, 0.122, 0.082, 0.119, 0.098, 0.093, 0.27, 0.284, 0.258,
0.236, 0.173, 0.106, 0.138, 0.066, 0.072, 0.081), Lipid.11 = c(0.053,
0.114, 0.038, 0.094, 0.073, 0.067, 0.028, 0.022, 0.021, 0.05,
0.085, 0.102, 0.122, 0.096, 0.027, 0.03, NA, 0.078, 0.066,
NA), Lipid.12 = c(0.223, 0.261, 0.258, 0.212, 0.168, 0.101,
0.191, 0.09, 0.195, 0.082, 0.155, 0.2, 0.167, 0.231, 0.145,
0.089, 0.239, 0.141, 0.106, 0.124), Lipid.13 = c(0.737, 0.763,
0.707, 0.587, 0.545, 0.317, 0.74, 0.602, 0.481, 0.531, 0.632,
0.448, 0.62, 0.766, 0.397, 0.623, 0.997, 0.578, 0.418, 0.412
), Lipid.14 = c(0.683, 0.666, 0.507, 0.366, 0.443, 0.266,
0.493, 0.345, 0.368, 0.355, 0.432, 0.411, 0.491, 0.565, 0.357,
0.285, 0.604, 0.426, 0.538, 0.295), Lipid.15 = c(0.911, 1.017,
0.503, 0.76, 0.741, 0.486, 0.648, 0.581, 0.955, 0.515, 0.932,
0.707, 0.626, 0.928, 0.836, 0.537, 0.654, 0.351, 0.498, 0.529
), Lipid.16 = c(0.148, 0.116, 0.069, 0.104, 0.091, 0.064,
0.093, 0.123, 0.11, 0.097, 0.283, 0.076, 0.095, 0.194, 0.06,
0.061, 0.086, 0.051, 0.064, 0.059), Lipid.17 = c(0.155, 0.274,
0.149, 0.127, 0.174, NA, 0.109, 0.134, 0.1, 0.09, 0.25, 0.112,
0.088, 0.243, 0.092, 0.073, 0.153, 0.12, 0.14, 0.06), Lipid.18 = c(3.143,
3.441, 4.359, 1.945, 2.573, 2.267, 3.585, 3.405, 2.296, 1.998,
3.468, 2.98, 3.626, 3.635, 3.236, 2.092, 2.586, 2.08, 1.718,
1.736), Lipid.19 = c(37.993, 36.148, 40.244, 30.395, 37.339,
35.742, 47.316, 47.555, 34.351, 32.377, 38.694, 39.413, 36.114,
41.235, 32.779, 32.222, 36.418, 36.918, 33.334, 31.421),
Lipid.20 = c(6.613, 5.913, 9.662, 3.789, 7.485, 6.297, 8.254,
8.07, 4.905, 5.686, 7.742, 7.533, 6.875, 7.908, 7.022, 5.446,
6.1, 6.782, 6.062, 6.089), Lipid.21 = c(7.235, 6.759, 8.331,
4.931, 6.558, 4.186, 5.99, 5.629, 3.066, 3.439, 7.102, 7.655,
6.606, 7.858, 5.804, 3.135, 3.218, 3.639, 2.975, 3.13), Lipid.22 = c(6.453,
6.664, 9.048, 4.341, 8.03, 7.599, 10.24, 10.954, 5.873, 6.687,
8.005, 8.908, 6.708, 8.06, 5.931, 6.083, 5.734, 5.587, 5.388,
6.088), Lipid.23 = c(4.943, 3.164, 5.153, 2.51, 4.071, 5.255,
7.636, 8.376, 4.726, 5.56, 4.762, 5.044, 4.549, 4.875, 4.57,
5.147, 4.396, 4.031, 3.556, 4.38), Lipid.24 = c(3.973, 4.279,
5.928, 3.066, 4.95, 4.667, 7.949, 7.268, 4.948, 3.72, 5.137,
5.539, 4.006, 5.276, 3.909, 4.163, 4.954, 5.02, 3.961, 4.201
), Lipid.25 = c(7.638, 5.224, 8.417, 3.902, 7.267, 6.007,
8.256, 7.457, 4.801, 4.86, 7.581, 8.173, 7.57, 8.591, 7.482,
5.091, 5.651, 6.577, 5.415, 5.76), Lipid.26 = c(10.225, 8.293,
13.188, 5.607, 10.993, 4.491, 5.767, 5.011, 3.589, 3.145,
11.471, 12.183, 9.686, 12.562, 9.697, 3.34, 4.186, 4.485,
3.23, 4.229), Lipid.27 = c(5.848, 4.856, 6.503, 3.534, 5.358,
8.933, 14.034, 12.806, 7.781, 8.094, 6.765, 6.867, 5.539,
7.772, 5.883, 7.832, 8.607, 7.586, 6.628, 7.563), Lipid.28 = c(32.941,
30.579, 31.358, 15.861, 30.353, 25.222, 35.662, 34.035, 20.338,
24.682, 30.698, 34.024, 31.608, 37.539, 24.901, 20.131, 23.126,
30.803, 25.639, 18.935)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
剩下的就简单了。
library(tidyverse)
ft = function(data){
tryCatch(
{tout = t.test(data$val ~ data$ID))
tibble(
t = tout$statistic,
p = tout$p.value,
stderr = tout$stderr
)
}, error = function(msg){
return(tibble(t = NA, p = NA, stderr = NA))
})
}
df %>%
pivot_longer(starts_with("Lipid"), names_to = "Lipid", values_to = "val") %>%
group_by(Lipid) %>%
nest() %>%
mutate(testt = map(data, ft)) %>%
select(Lipid, testt) %>%
unnest(testt)
输出
# A tibble: 28 x 4
# Groups: Lipid [28]
Lipid t p stderr
<chr> <dbl> <dbl> <dbl>
1 Lipid.1 0.158 0.876 0.142
2 Lipid.2 -0.870 0.399 0.00350
3 Lipid.3 -0.377 0.711 0.00372
4 Lipid.4 NA NA NA
5 Lipid.5 0.930 0.366 0.0143
6 Lipid.6 0.730 0.475 0.0614
7 Lipid.7 NA NA NA
8 Lipid.8 -0.180 0.859 0.0223
9 Lipid.9 NA NA NA
10 Lipid.10 -0.200 0.844 0.0355
# ... with 18 more rows
根据需要自定义 ft
函数。
我不得不在 ft
中使用 tryCatch
函数,因为变量只包含 NA
个值。
如果您想获得完整的 t 检验输出,您可以循环遍历列:
如果我们从你的 df 开始:
data <- structure(list(ID = c("A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B"),
Lipid.1 = c(0.737, 0.419, 0.468, 0.805, 1.036, 0.825, 0.286,
1.166, 0.898, 0.504, 1.433, 0.41, 0.325, 0.866, 0.337, 0.876,
0.636, 0.953, 0.481, 0.602), Lipid.2 = c(0.001, 0.017, 0.013,
0.025, 0.018, 0.003, 0.007, NA, 0.01, 0.002, 0.01, 0.022,
0.005, NA, 0.018, NA, 0.015, 0.016, NA, 0.01), Lipid.3 = c(0.035,
0.018, 0.036, 0.024, 0.023, 0.027, 0.036, 0.037, 0.013, 0.037,
0.03, 0.04, 0.038, 0.033, 0.016, 0.034, 0.029, 0.033, 0.018,
0.029), Lipid.4 = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), Lipid.5 = c(0.09,
0.099, 0.12, 0.058, 0.136, 0.103, 0.153, 0.148, 0.047, 0.085,
0.098, 0.133, 0.099, 0.121, 0.084, 0.065, 0.11, 0.088, 0.065,
0.043), Lipid.6 = c(0.39, 0.555, 0.568, 0.6, 0.626, 0.378,
0.657, 0.57, 0.271, 0.41, 0.474, 0.617, 0.491, 0.738, 0.459,
0.365, 0.499, 0.388, 0.271, 0.275), Lipid.7 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), Lipid.8 = c(0.186, 0.197, 0.191, 0.125, 0.209,
0.107, 0.174, 0.143, 0.055, 0.134, 0.148, 0.193, 0.184, 0.213,
0.134, 0.085, 0.165, 0.215, 0.163, 0.061), Lipid.9 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, "0,007"), Lipid.10 = c("0,242", "0,254", "0,134",
"0,226", "0,243", "0,122", "0,082", "0,119", "0,098", "0,093",
"0,27", "0,284", "0,258", "0,236", "0,173", "0,106", "0,138",
"0,066", "0,072", "0,081"), Lipid.11 = c("0,053", "0,114",
"0,038", "0,094", "0,073", "0,067", "0,028", "0,022", "0,021",
"0,05", "0,085", "0,102", "0,122", "0,096", "0,027", "0,03",
NA, "0,078", "0,066", NA), Lipid.12 = c(0.223, 0.261, 0.258,
0.212, 0.168, 0.101, 0.191, 0.09, 0.195, 0.082, 0.155, 0.2,
0.167, 0.231, 0.145, 0.089, 0.239, 0.141, 0.106, 0.124),
Lipid.13 = c(0.737, 0.763, 0.707, 0.587, 0.545, 0.317, 0.74,
0.602, 0.481, 0.531, 0.632, 0.448, 0.62, 0.766, 0.397, 0.623,
0.997, 0.578, 0.418, 0.412), Lipid.14 = c(0.683, 0.666, 0.507,
0.366, 0.443, 0.266, 0.493, 0.345, 0.368, 0.355, 0.432, 0.411,
0.491, 0.565, 0.357, 0.285, 0.604, 0.426, 0.538, 0.295),
Lipid.15 = c(0.911, 1.017, 0.503, 0.76, 0.741, 0.486, 0.648,
0.581, 0.955, 0.515, 0.932, 0.707, 0.626, 0.928, 0.836, 0.537,
0.654, 0.351, 0.498, 0.529), Lipid.16 = c(0.148, 0.116, 0.069,
0.104, 0.091, 0.064, 0.093, 0.123, 0.11, 0.097, 0.283, 0.076,
0.095, 0.194, 0.06, 0.061, 0.086, 0.051, 0.064, 0.059), Lipid.17 = c("0,155",
"0,274", "0,149", "0,127", "0,174", "nd", "0,109", "0,134",
"0,1", "0,09", "0,25", "0,112", "0,088", "0,243", "0,092",
"0,073", "0,153", "0,12", "0,14", "0,06"), Lipid.18 = c(3.143,
3.441, 4.359, 1.945, 2.573, 2.267, 3.585, 3.405, 2.296, 1.998,
3.468, 2.98, 3.626, 3.635, 3.236, 2.092, 2.586, 2.08, 1.718,
1.736), Lipid.19 = c(37.993, 36.148, 40.244, 30.395, 37.339,
35.742, 47.316, 47.555, 34.351, 32.377, 38.694, 39.413, 36.114,
41.235, 32.779, 32.222, 36.418, 36.918, 33.334, 31.421),
Lipid.20 = c(6.613, 5.913, 9.662, 3.789, 7.485, 6.297, 8.254,
8.07, 4.905, 5.686, 7.742, 7.533, 6.875, 7.908, 7.022, 5.446,
6.1, 6.782, 6.062, 6.089), Lipid.21 = c(7.235, 6.759, 8.331,
4.931, 6.558, 4.186, 5.99, 5.629, 3.066, 3.439, 7.102, 7.655,
6.606, 7.858, 5.804, 3.135, 3.218, 3.639, 2.975, 3.13), Lipid.22 = c(6.453,
6.664, 9.048, 4.341, 8.03, 7.599, 10.24, 10.954, 5.873, 6.687,
8.005, 8.908, 6.708, 8.06, 5.931, 6.083, 5.734, 5.587, 5.388,
6.088), Lipid.23 = c(4.943, 3.164, 5.153, 2.51, 4.071, 5.255,
7.636, 8.376, 4.726, 5.56, 4.762, 5.044, 4.549, 4.875, 4.57,
5.147, 4.396, 4.031, 3.556, 4.38), Lipid.24 = c(3.973, 4.279,
5.928, 3.066, 4.95, 4.667, 7.949, 7.268, 4.948, 3.72, 5.137,
5.539, 4.006, 5.276, 3.909, 4.163, 4.954, 5.02, 3.961, 4.201
), Lipid.25 = c(7.638, 5.224, 8.417, 3.902, 7.267, 6.007,
8.256, 7.457, 4.801, 4.86, 7.581, 8.173, 7.57, 8.591, 7.482,
5.091, 5.651, 6.577, 5.415, 5.76), Lipid.26 = c(10.225, 8.293,
13.188, 5.607, 10.993, 4.491, 5.767, 5.011, 3.589, 3.145,
11.471, 12.183, 9.686, 12.562, 9.697, 3.34, 4.186, 4.485,
3.23, 4.229), Lipid.27 = c(5.848, 4.856, 6.503, 3.534, 5.358,
8.933, 14.034, 12.806, 7.781, 8.094, 6.765, 6.867, 5.539,
7.772, 5.883, 7.832, 8.607, 7.586, 6.628, 7.563), Lipid.28 = c(32.941,
30.579, 31.358, 15.861, 30.353, 25.222, 35.662, 34.035, 20.338,
24.682, 30.698, 34.024, 31.608, 37.539, 24.901, 20.131, 23.126,
30.803, 25.639, 18.935)), class = "data.frame", row.names = c(NA,
-20L))
清理 df:
# remove the columns which only contain NA:
data$Lipid.4 <- NULL
data$Lipid.7 <- NULL
data$Lipid.9 <- NULL
# convert from string to numeric (I do it now manually with each column. You could use a for-loop)
data$Lipid.10 <- gsub(",", ".", data$Lipid.10) # convert comma to dot
data$Lipid.10 <- as.numeric(data$Lipid.10) # convert from string to numeric
data$Lipid.11 <- gsub(",", ".", data$Lipid.11)
data$Lipid.11 <- as.numeric(data$Lipid.11)
data$Lipid.17 <- gsub(",", ".", data$Lipid.17)
data$Lipid.17 <- as.numeric(data$Lipid.17)
# get the lipid column names
all_lipids <- colnames(data)
all_lipids <- all_lipids[all_lipids != "ID"] # we don't need the ID column for the loop
# now loop over each column an perform a t-test
for (column in all_lipids) {
print(column)
print(t.test(data[,column] ~ data$ID))
}
每种脂质得到:
[1] "Lipid.1"
Welch Two Sample t-test
data: data[, column] by data$ID
t = 0.15843, df = 17.391, p-value = 0.8759
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.2766112 0.3216112
sample estimates:
mean in group A mean in group B
0.7144 0.6919
最后一点:您进行了很多比较。您可以考虑更正多重测试。
F你也可以使用R
中的multtest
库,对于多个二样本t-tests
,如下代码所示:
library(multtest)
df <- as.data.frame(t(as.matrix(dummy)))
X <- apply(as.matrix.noquote(df[2:nrow(df),]), 2, as.numeric)
cl <- ifelse(df[1,] == 'A', 1, 0) # class labels
welch_t_stat <- mt.teststat(X, cl, test='t')
welch_t_stat
# [1] 0.15843467 -0.86954194 -0.37680666 NA 0.92978706 0.72969094 NA -0.17962582 NA NA NAv
# [12] 0.69705527 0.16001073 0.15733921 0.59540273 -0.05557413 NA 0.52706460 0.99860493 -0.14561137 0.58894166 1.25114061
# [23] 1.03458080 0.86540315 -0.62788116 -0.28806189 0.60206042 0.12954702
从上面的结果可以看出,dataframe中有28种脂类进行了28次Welcht-tests
由于您获得了个人 t-statistics
,现在,您可以计算 p-values
并使用 Bonferroni / Holm 应用 FWER
校正或使用 Benjamini & Hochberg 方法应用 FDR
校正(当你有大量测试时很有用):
raw_p <- 2 * (1 - pnorm(abs(welch_t_stat))) # raw p-values assuming normal
# or use pt() with appropriate df
procedures <- c("Bonferroni", "Holm", "BH")
adjusted <- mt.rawp2adjp(raw_p, procedures)