限制 R 重塑行数以避免列太多

Limiting the R reshape row count to avoid too many columns

我正在重塑视频的每分钟时间序列数据,我需要将列数限制为每个 id 组的前 10 行。我不知道如何将内部分组限制为按 t.

排序的前 10 行

问题是 t 有 1 到 700 行,reshape returns 所有行 作为列。我只想要前 10 个(每个分组的时间序列的前 10 分钟)。

matrixed = reshape(d, idvar="id",timevar="t",direction="wide") returns 一行 158 列。

这是您可以在 R 中重现的示例数据集:

structure(list(id = c("NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", 
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL"), t = c(0, 
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 
67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 
83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 
99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 
125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 
138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 
151, 152, 153, 154, 155, 156), viewers = c(4L, 12L, 21L, 36L, 
49L, 62L, 84L, 113L, 145L, 203L, 270L, 362L, 419L, 496L, 532L, 
702L, 778L, 835L, 963L, 1042L, 1091L, 1159L, 1209L, 1247L, 1278L, 
1316L, 1356L, 1399L, 1443L, 1446L, 1441L, 1464L, 1488L, 1504L, 
1527L, 1558L, 1583L, 1645L, 1672L, 1724L, 1736L, 1767L, 1800L, 
1794L, 1800L, 1828L, 1860L, 1914L, 1942L, 1948L, 1960L, 1972L, 
2004L, 2024L, 2062L, 2076L, 2052L, 2059L, 2074L, 2092L, 2119L, 
2116L, 2113L, 2132L, 2122L, 2157L, 2171L, 2183L, 2179L, 2183L, 
2178L, 2184L, 2176L, 2200L, 2207L, 2205L, 2203L, 2222L, 2246L, 
2286L, 2298L, 2286L, 2294L, 2290L, 2304L, 2296L, 2293L, 2351L, 
2328L, 2305L, 2279L, 2284L, 2260L, 2241L, 2222L, 2205L, 2180L, 
2182L, 2184L, 2166L, 2169L, 2133L, 2122L, 2100L, 2114L, 2101L, 
2075L, 2065L, 2036L, 2006L, 2005L, 1989L, 1974L, 1973L, 1966L, 
1959L, 1922L, 1897L, 1872L, 1870L, 1871L, 1872L, 1865L, 1847L, 
1816L, 1809L, 1792L, 1774L, 1770L, 1747L, 1704L, 1681L, 1671L, 
1659L, 1611L, 1574L, 1566L, 1555L, 1527L, 1480L, 1451L, 1400L, 
1358L, 1341L, 1290L, 1289L, 1254L, 1220L, 1181L, 1142L, 1119L, 
1055L, 1004L, 942L, 866L, 787L, 713L)), .Names = c("id", "t", 
"viewers"), row.names = c(NA, -157L), class = "data.frame")

一种选择是使用分组方法,获取前 10 行,然后将其整形为 'wide'。使用 dplyr/tidyr,我们可以使用 slice 来获取每个 'id' 和 spread 的前 10 行到 'wide' 格式

library(dplyr)
library(tidyr)
d %>% 
   group_by(id) %>% 
   slice(seq_len(10)) %>%
   spread(t, viewers)

第一个子集 d 给出 d10,然后在 d10 上应用您的 reshape 命令。没有使用包。

如果我们知道每个 id 的前 10 行有 t = 0, 1, 2, ..., 9 -- 这在这个例子中是正确的 -- 那么:

d10 <- subset(d, t < 10) 

或者如果我们不能做出那个假设那么:

d10 <- do.call("rbind", by(d, d$id, head, 10))

或:

d10 <- subset(d, ave(t, id, FUN = seq_along) <= 10)