Reshape vs. tidyr 对于具有多个因变量的重复测量 (2)

Question

我有以下 5 个案例的样本数据，对两个因变量 "Rapport" 和 "STRS" 进行了三个重复测量：

df1<-structure(list(SubID = structure(1:5, .Label = c("1", "2", "3", 
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", 
"16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", 
"27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", 
"38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48", 
"49", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59", 
"60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "70", 
"71", "72", "73", "74", "75", "76", "77", "78", "79", "80", "81", 
"82", "83", "84"), class = "factor"), Gender = structure(c(3L, 
2L, 3L, 2L, 3L), .Label = c("#NULL!", "1", "2"), class = "factor"), 
Age = structure(c(5L, 3L, 2L, 2L, 3L), .Label = c("#NULL!", 
"10", "11", "8", "9"), class = "factor"), Rapport.1 = structure(c(22L, 
25L, 19L, 10L, 18L), .Label = c("#NULL!", "1.1", "1.85", 
"2.45", "2.5", "2.55", "2.6", "2.75", "2.8", "2.85", "2.9", 
"2.95", "3.2", "3.25", "3.3", "3.35", "3.4", "3.45", "3.5", 
"3.55", "3.6", "3.65", "3.7", "3.75", "3.8", "3.85", "3.9", 
"3.95"), class = "factor"), Rapport.2 = structure(c(29L, 
31L, 27L, 17L, 9L), .Label = c("#NULL!", "1.25", "1.4", "1.6", 
"1.95", "2.05", "2.3", "2.35", "2.45", "2.5", "2.65", "2.7", 
"2.75", "2.8", "2.85", "3", "3.05", "3.1", "3.15", "3.2", 
"3.35", "3.4", "3.45", "3.5", "3.55", "3.6", "3.65", "3.7", 
"3.75", "3.8", "3.85", "3.9", "3.95", "4"), class = "factor"), 
Rapport.3 = structure(c(32L, 35L, 22L, 22L, 5L), .Label = c("#NULL!", 
"1.35", "1.45", "1.6", "1.75", "1.85", "1.9", "1.95", "2.05", 
"2.1", "2.25", "2.3", "2.35", "2.4", "2.45", "2.6", "2.75", 
"2.8", "2.9", "2.95", "3", "3.05", "3.1", "3.2", "3.25", 
"3.3", "3.35", "3.4", "3.45", "3.5", "3.55", "3.6", "3.7", 
"3.75", "3.8", "3.85"), class = "factor"), STRS.1 = structure(c(33L, 
10L, 8L, 18L, 29L), .Label = c("#NULL!", "100", "102", "103", 
"104", "106", "107", "108", "109", "110", "111", "112", "113", 
"114", "115", "116", "117", "118", "119", "120", "122", "123", 
"124", "125", "126", "127", "128", "129", "132", "133", "69", 
"71", "73", "85", "88", "89", "92", "97", "99"), class = "factor"), 
STRS.2 = structure(c(37L, 19L, 9L, 22L, 21L), .Label = c("#NULL!", 
"100", "101", "103", "104", "105", "106", "107", "108", "110", 
"111", "113", "114", "115", "116", "117", "118", "119", "120", 
"121", "122", "123", "124", "125", "126", "127", "128", "129", 
"131", "132", "136", "137", "138", "139", "158", "63", "76", 
"80", "91", "94", "95", "98", "99"), class = "factor"), STRS.3 = structure(c(31L, 
11L, 19L, 23L, 22L), .Label = c("#NULL!", "102", "104", "105", 
"106", "107", "108", "109", "110", "111", "112", "114", "117", 
"118", "119", "120", "122", "123", "124", "125", "126", "127", 
"128", "129", "130", "131", "132", "133", "134", "135", "66", 
"70", "75", "81", "85", "87", "88", "94", "98"), class = "factor")), .Names = c("SubID", 
"Gender", "Age", "Rapport.1", "Rapport.2", "Rapport.3", "STRS.1", 
"STRS.2", "STRS.3"), row.names = c(NA, 5L), class = "data.frame")

要将数据集从宽格式转换为长格式，我运行使用以下代码：

df2<-reshape(df1, varying = 4:9, sep = ".", direction = 'long')
View(df2)

结果是我想要的，但我不想在第 8 列中添加额外的“id”列，因为它只是我在第 2 列中的“SubID”的副本。我还想知道我是否需要确保我的转发器测量变量标题中有一个分隔符才能使此功能正常工作（例如，“Rapport.1、Rapport.2 等”）。

我也想知道如何使用重塑包中的“熔化”功能产生我想要的结果。以及 tidyr pkg 中的“收集”功能。当我在下面尝试“融化”时，我得到一个错误，它不会运行:

df3<-melt(df1, id.vars=c("SubID","Gender","Age"),
measure.vars=c("Rapport.1","Rapport.2","Rapport.3","STRS.1","STRS.2","STRS.3,
variable.name=c("Rapport","STRS"),
value.name=("Rapport","STRS"))

当我在下面尝试“收集”时，我得到一列包含堆叠的变量名称“Rapport.1…”和“STRS.1…”，另一列的值是：

df4<-gather(df1, Rapport, STRS, Rapport.1:STRS.3)
View(df4)

我知道使用这两个包都有好处，所以我想了解如何使用“收集”和“融合”功能产生我想要的结果。有人可以帮忙吗？

Answer 1

这是 tidyr 版本。您需要先 gather 列，然后 separate 时间列，最后 spread 数据。

require(tidyr)
df1 %>% 
  gather(key, value, -c(SubID, Gender, Age)) %>% 
  separate(key, c("key", "time")) %>%
  spread(key, value, convert = TRUE)
##    SubID Gender Age time Rapport STRS
## 1      1      2   9    1    3.65   73
## 2      1      2   9    2    3.75   76
## 3      1      2   9    3    3.60   66
## 4      2      1  11    1    3.80  110
## 5      2      1  11    2    3.85  120
## 6      2      1  11    3    3.80  112
## 7      3      2  10    1    3.50  108
## 8      3      2  10    2    3.65  108
## 9      3      2  10    3    3.05  124
## 10     4      1  10    1    2.85  118
## 11     4      1  10    2    3.05  123
## 12     4      1  10    3    3.05  128
## 13     5      2  11    1    3.45  132
## 14     5      2  11    2    2.45  122
## 15     5      2  11    3    1.75  127

Answer 2

这是一个使用 devel data.table version

的简单解决方案

library(data.table) # v >= 1.9.5
melt(setDT(df1), measure = list(4:6, 7:9))
#     SubID Gender Age variable value1 value2
#  1:     1      2   9        1   3.65     73
#  2:     2      1  11        1    3.8    110
#  3:     3      2  10        1    3.5    108
#  4:     4      1  10        1   2.85    118
#  5:     5      2  11        1   3.45    132
#  6:     1      2   9        2   3.75     76
#  7:     2      1  11        2   3.85    120
#  8:     3      2  10        2   3.65    108
#  9:     4      1  10        2   3.05    123
# 10:     5      2  11        2   2.45    122
# 11:     1      2   9        3    3.6     66
# 12:     2      1  11        3    3.8    112
# 13:     3      2  10        3   3.05    124
# 14:     4      1  10        3   3.05    128
# 15:     5      2  11        3   1.75    127

Reshape vs. tidyr 对于具有多个因变量的重复测量 (2)

Reshape vs. tidyr for repeated measures with multiple dependent variables (2)

r

melt

reshape2

tidyr