从宽到长重塑数据，在“变化”中保留变量顺序

Question

数据：

structure(list(Day = 1:13, Morning_1_id = structure(1:13, .Label = c("20180502-033-000005", 
"20180503-033-000005", "20180507-033-000006", "20180508-033-000005", 
"20180510-033-000005", "20180511-033-000005", "20180514-033-000005", 
"20180516-033-000005", "20180517-033-000001", "20180518-033-000005", 
"20180521-033-000006", "20180522-033-000005", "20180523-033-000005"
), class = "factor"), W = c(26.3666666666667, 26.4433333333333, 
26.2, 26.2866666666667, 26.43, 25.8733333333333, 26.64, 26.5233333333333, 
27.27, 26.6, 26.6966666666667, 26.27, 26.24), R = c(5.87258333333333, 
5.84598, 5.92537333333333, 6.02874666666667, 5.99018666666667, 
5.88347333333333, 5.25210666666667, 5.88159666666667, 5.87579333333333, 
5.92004, 5.68929, 5.89672, 5.93005), Morning_2_id = structure(1:13, .Label = c("20180502-033-000006", 
"20180503-033-000006", "20180507-033-000007", "20180508-033-000006", 
"20180510-033-000006", "20180511-033-000006", "20180514-033-000006", 
"20180516-033-000006", "20180517-033-000002", "20180518-033-000006", 
"20180521-033-000007", "20180522-033-000006", "20180523-033-000006"
), class = "factor"), W1 = c(26.3066666666667, 26.7233333333333, 
25.7866666666667, 27.12, 26.09, 25.82, 27, 26.2166666666667, 
26.5066666666667, 26.7233333333333, 26.8766666666667, 26.1733333333333, 
26.28), R1 = c(5.74259666666667, 5.91224, 5.85586333333333, 5.99682, 
5.99842333333333, 5.28803333333333, 5.88124333333333, 5.85363, 
5.85148333333333, 5.68396333333333, 5.68045666666667, 5.95528, 
5.84653666666667), Afternoon_1_id = structure(1:13, .Label = c("20180502-033-000024", 
"20180503-033-000015", "20180507-033-000020", "20180508-033-000020", 
"20180510-033-000011", "20180511-033-000017", "20180514-033-000011", 
"20180516-033-000012", "20180517-033-000012", "20180518-033-000011", 
"20180521-033-000012", "20180522-033-000011", "20180523-033-000011"
), class = "factor"), W2 = c(27.0733333333333, 26.2233333333333, 
26.4533333333333, 26.4166666666667, 26.0966666666667, 26.5833333333333, 
26.6266666666667, 26.2766666666667, 26.39, 25.5633333333333, 
25.1866666666667, 26.89, 25.17), R2 = c(5.95638, 5.97475666666667, 
5.78408, 5.91546333333333, 5.73866333333333, 5.79964666666667, 
5.87522333333333, 5.53540333333333, 5.85597666666667, 5.75941666666667, 
5.88696333333333, 5.56677, 5.50966666666667), Afternoon_2_id = structure(1:13, .Label = c("20180502-033-000025", 
"20180503-033-000016", "20180507-033-000021", "20180508-033-000021", 
"20180510-033-000012", "20180511-033-000018", "20180514-033-000012", 
"20180516-033-000014", "20180517-033-000014", "20180518-033-000012", 
"20180521-033-000013", "20180522-033-000012", "20180523-033-000012"
), class = "factor"), W3 = c(26.2233333333333, 26.1266666666667, 
25.7733333333333, 26.7933333333333, 26.8166666666667, 26.6633333333333, 
26.45, 25.7833333333333, 26.18, 26.9433333333333, 26.4666666666667, 
26.78, 26.3666666666667), R3 = c(5.83166, 5.88337, 5.93851, 5.96334666666667, 
5.83277, 5.92955, 5.92999333333333, 5.78252333333333, 5.79061666666667, 
5.61290333333333, 5.88305333333333, 5.88644666666667, 5.79076
)), class = "data.frame", row.names = c(NA, 13L))

我想进行从宽到长的转换（最好在 base R 中），这样 'W' 和 'R' 的 ID 和值就可以在一天中堆叠起来。

我使用reshape函数如下：

mydata<- reshape(new_data, direction='long', 
                 varying = c('Morning_1_id', 'W', 'R', 
                             'Morning_2_id', 'W1', 'R1', 
                             'Afternoon_1_id', 'W2', 'R2',
                             'Afternoon_2_id', 'W3', 'R3'), 
                 v.names = c('TId', 'W', 'R'),
                 timevar = c('W', 'R'), # differentiates
                 times = c('Morning1', 'Morning2', 'Afternoon1', 'Afternoon2'),
                 idvar = 'Day')

这会导致列名称的顺序发生变化。列名与它们所具有的值不同。我想更正此问题，然后执行以下步骤。

执行此操作的正确方法是什么？

Answer 1

这是varying

中的问题

varlist <- lapply(2:4, function(x) seq(x, ncol(new_data), by = 3))
out <- reshape(new_data, direction='long', varying=varlist, 
         v.names = c('TId', 'W', 'R'),
         times = c('Morning1', 'Morning2', 'Afternoon1', 'Afternoon2'), 
         idvar = 'Day')

head(out, 3)

#           Day     time                 TId        W        R
#1.Morning1   1 Morning1 20180502-033-000005 26.36667 5.872583
#2.Morning1   2 Morning1 20180503-033-000005 26.44333 5.845980
#3.Morning1   3 Morning1 20180507-033-000006 26.20000 5.925373

Answer 2

因为你的变量有不同的名字，你必须按照你需要的顺序指定它们，否则如果它们在编号上有差异，那么我们就可以使用 sep 参数 or/and reshape 中的 split 个参数。您只需要将 varying 更改为如下所示的矩阵，指示位置：

mydata = reshape(new_data,matrix(2:ncol(new_data),3),idvar=1,dir="long",
                v.names = c('TId', 'W', 'R'), 
                times = c('Morning1', 'Morning2', 'Afternoon1', 'Afternoon2'))
head(mydata)
              Day       time                 TId        W        R
1.Morning1      1   Morning1 20180502-033-000005 26.36667 5.872583
2.Morning1      2   Morning1 20180503-033-000005 26.44333 5.845980
3.Morning1      3   Morning1 20180507-033-000006 26.20000 5.925373
4.Morning1      4   Morning1 20180508-033-000005 26.28667 6.028747
5.Morning1      5   Morning1 20180510-033-000005 26.43000 5.990187
6.Morning1      6   Morning1 20180511-033-000005 25.87333 5.883473

Answer 3

为了完整起见，data.table 包中的 melt() 函数能够同时重塑多个测量值。此外，它允许将列名指定为正则表达式，从而节省大量输入：

library(data.table)
melt(setDT(new_data), measure.vars = patterns("id$", "^W", "^R"),
     value.name = c("TId", "W", "R"))

    Day variable                 TId        W        R
 1:   1        1 20180502-033-000005 26.36667 5.872583
 2:   2        1 20180503-033-000005 26.44333 5.845980
 3:   3        1 20180507-033-000006 26.20000 5.925373
 4:   4        1 20180508-033-000005 26.28667 6.028747
 5:   5        1 20180510-033-000005 26.43000 5.990187
 6:   6        1 20180511-033-000005 25.87333 5.883473
 7:   7        1 20180514-033-000005 26.64000 5.252107
 8:   8        1 20180516-033-000005 26.52333 5.881597
 9:   9        1 20180517-033-000001 27.27000 5.875793
10:  10        1 20180518-033-000005 26.60000 5.920040
11:  11        1 20180521-033-000006 26.69667 5.689290
12:  12        1 20180522-033-000005 26.27000 5.896720
13:  13        1 20180523-033-000005 26.24000 5.930050
14:   1        2 20180502-033-000006 26.30667 5.742597
15:   2        2 20180503-033-000006 26.72333 5.912240
...

如果需要，可以将因子列 "variable" 替换为 time 列以获得与 reshape:

相同的结果

melt(setDT(new_data), measure.vars = patterns("_id$", "^W", "^R"),
     value.name = c("TId", "W", "R"), variable.name = "time")[
       , time := c('Morning1', 'Morning2', 'Afternoon1', 'Afternoon2')[time]][]

    Day       time                 TId        W        R
 1:   1   Morning1 20180502-033-000005 26.36667 5.872583
 2:   2   Morning1 20180503-033-000005 26.44333 5.845980
 3:   3   Morning1 20180507-033-000006 26.20000 5.925373
 4:   4   Morning1 20180508-033-000005 26.28667 6.028747

这里，time 值是明确给出的（就像 reshape 中的 times = 参数）。或者，可以使用模式匹配和提取自动创建 times 值：

melt(setDT(new_data), measure.vars = patterns("_id$", "^W", "^R"),
     value.name = c("TId", "W", "R"), variable.name = "time")[
       , time := na.omit(stringr::str_extract(names(new_data), ".*(?=_id$)"))[time]][]

    Day        time                 TId        W        R
 1:   1   Morning_1 20180502-033-000005 26.36667 5.872583
 2:   2   Morning_1 20180503-033-000005 26.44333 5.845980
 3:   3   Morning_1 20180507-033-000006 26.20000 5.925373
 4:   4   Morning_1 20180508-033-000005 26.28667 6.028747

此处，搜索 new_data 的列名称以查找以 "_id 结尾的条目，并提取字符串的前面部分。

从宽到长重塑数据，在“变化”中保留变量顺序

Reshape data wide-to-long, preserve variable order in `varying`

r

reshape