R:按组计算多重相关性(并将输出保存到 csv 文件)

R: Compute multiple correlations by group (and save output to csv file)

有没有办法制作一个文件,其中包含观察到的鱼类原始数量(“num”)和每个环境数据列(“temp”,“do”等)之间的相关统计数据(“组")?

*以及 num 与 env 的均值和中值之间的相关性。因素?

我还希望能够选择使用哪种相关方法(Pearson 相关、Kendall 等级相关、Spearman 相关等)

我的数据:

zeros <- structure(list(year = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L), .Label = c("2019", "2020"), class = "factor"), season = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("dry", "wet"), class = "factor"), 
    site = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 
    1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 1L, 1L, 2L, 2L, 3L, 
    3L, 4L, 4L, 5L, 5L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L
    ), .Label = c("1", "2", "3", "4", "5"), class = "factor"), 
    group = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L
    ), .Label = c("Hardhead silverside", "Sailfin molly"), class = "factor"), 
    num = c(0, 8, 0, 9, 0, 13, 0, 9, 0, 10, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 7, 0, 2, 
    0, 3, 0, 13, 0), temp = c(23L, 36L, 35L, 34L, 30L, 28L, 18L, 
    19L, 33L, 33L, 25L, 20L, 33L, 23L, 36L, 32L, 28L, 17L, 34L, 
    31L, 26L, 34L, 26L, 35L, 15L, 25L, 26L, 20L, 18L, 14L, 23L, 
    17L, 26L, 17L, 17L, 19L, 29L, 31L, 18L, 15L), sal = c(12.5, 
    25.5, 8.5, 15.5, 17.5, 27.5, 9.5, 31.5, 1.5, 34.5, 25.5, 
    21.5, 10.5, 8.5, 32.5, 19.5, 6.5, 5.5, 15.5, 28.5, 6.5, 3.5, 
    29.5, 13.5, 7.5, 16.5, 3.5, 28.5, 22.5, 5.5, 9.5, 12.5, 29.5, 
    24.5, 8.5, 32.5, 37.5, 3.5, 12.5, 19.5), do = c(9.66, 7.66, 
    1.66, 14.66, 15.66, 1.66, 14.66, 15.66, 0.66, 5.66, 10.66, 
    11.66, 4.66, 0.66, 13.66, 1.66, 13.66, 6.66, 6.66, 10.66, 
    9.66, 15.66, 9.66, 15.66, 4.66, 13.66, 1.66, 11.66, 6.66, 
    8.66, 12.66, 0.66, 6.66, 0.66, 9.66, 16.66, 1.66, 10.66, 
    15.66, 10.66), depth = c(120L, 161L, 52L, 52L, 43L, 105L, 
    165L, 23L, 79L, 136L, 41L, 59L, 65L, 118L, 122L, 69L, 137L, 
    88L, 152L, 105L, 108L, 79L, 96L, 80L, 22L, 110L, 157L, 118L, 
    126L, 93L, 156L, 64L, 74L, 24L, 111L, 113L, 157L, 78L, 121L, 
    130L)), class = "data.frame", row.names = c(NA, -40L))

你的问题的第一部分很简单:

zeros.spl <- split(zeros, zeros$group)
zeros.cors <- sapply(zeros.spl, function(x) cor(x[, "num"], x[, 6:9]))
dimnames(zeros.cors)[[1]] <- colnames(zeros)[6:9]
zeros.cors
#       Hardhead silverside Sailfin molly
# temp           -0.3080334    0.36174046
# sal             0.1393580    0.47095129
# do              0.2544695   -0.06646818
# depth           0.1296208    0.08777425
t(zeros.cors)
#                           temp       sal          do      depth
# Hardhead silverside -0.3080334 0.1393580  0.25446948 0.12962078
# Sailfin molly        0.3617405 0.4709513 -0.06646818 0.08777425

使用 write.csv(zeros.cors, file="results.csv")write.csv(t(zeros.cors), file="results.csv"),具体取决于您希望 rows/cols 成为什么。

第二个问题不清楚。组的 means/medians 将是单个值,因此您无法将其与环境变量相关联。您可以使用聚合按组计算均值:

aggregate(zeros[, 5:9], by=list(zeros$group), "mean")
#               Group.1  num  temp   sal   do  depth
# 1 Hardhead silverside 1.45 25.95 15.35 8.51 105.20
# 2       Sailfin molly 2.45 25.00 18.90 9.06  90.25
aggregate(zeros[, 5:9], by=list(zeros$group), "median")
#               Group.1 num temp  sal    do depth
# 1 Hardhead silverside   0   26 11.5  9.66 115.5
# 2       Sailfin molly   0   24 19.5 10.66  90.5