在序列数据上使用 dplyr 的简单 Table
Simple Table with dplyr on Sequence Data
我想用
做一个简单的 table
dplyr
和
summarise
但我真的想不通如何...(尽管它应该很简单)。
我有一个序列矩阵。
当我简单地列出
table(dta)
我得到了我想要的结果。
dta
acquaintance alone child notnotnot nuclear
1 2 17 19 131
nuclear and acquaintance nuclear and acquaintance nuclear and acquaintance nuclear and acquaintance partner
1 1 1 35 2
但是,
我不知道如何用 summarise
做同样的事情
有什么建议吗?
dta = structure(c("nuclear", "nuclear", "child", "child", "child",
"acquaintance", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "child", "child",
"child", "alone", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "child", "child", "child",
"child", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "child", "child", "child",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "child", "child",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"partner", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"partner", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear and acquaintance",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear and acquaintance",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear and acquaintance",
"nuclear", "nuclear", "nuclear", "nuclear", "notnotnot", "nuclear",
"nuclear", "nuclear", "nuclear", "nuclear and acquaintance",
"nuclear", "nuclear", "nuclear", "nuclear", "notnotnot", "nuclear",
"nuclear", "nuclear", "nuclear", "nuclear and acquaintance",
"nuclear", "nuclear", "nuclear", "nuclear", "notnotnot", "nuclear",
"nuclear", "nuclear", "nuclear", "nuclear and acquaintance",
"nuclear", "nuclear", "nuclear", "nuclear", "notnotnot", "nuclear",
"nuclear", "nuclear", "nuclear", "nuclear and acquaintance",
"nuclear", "nuclear", "child", "nuclear", "notnotnot", "nuclear",
"nuclear", "nuclear", "nuclear", "nuclear and acquaintance",
"nuclear", "nuclear", "child", "alone", "notnotnot", "nuclear"
), .Dim = c(10L, 21L), .Dimnames = list(c("1", "2", "3", "4",
"5", "6", "7", "8", "9", "10"), c("12:10", "12:20", "12:30",
"12:40", "12:50", "13:00", "13:10", "13:20", "13:30", "13:40",
"13:50", "14:00", "14:10", "14:20", "14:30", "14:40", "14:50",
"15:00", "15:10", "15:20", "15:30")))
您只需将数据转换为 data.frame
即可使用 dplyr
,然后您就可以轻松获得所需的输出:
require(dplyr)
# ungrouped
data_frame(var = c(dta)) %>%
group_by_("var") %>%
summarise(n())
## var n()
## 1 acquaintance 1
## 2 alone 2
## 3 child 17
## 4 notnotnot 19
## 5 nuclear 131
## 6 nuclear and acquaintance 1
## 7 nuclear and acquaintance 1
## 8 nuclear and acquaintance 1
## 9 nuclear and acquaintance 35
## 10 partner 2
如果要对每一列分别执行此操作,可以使用 tidyr
先收集结果,然后再次传播。
require(tidyr)
# grouped
dta %>%
as.data.frame %>%
gather %>%
group_by(key, value) %>%
summarise(N = n()) %>%
spread(key, N)
## value 12:10 12:20 12:30 12:40 12:50 13:00 13:10 13:20 13:30 13:40 13:50 14:00 14:10
## 1 acquaintance 1 NA NA NA NA NA NA NA NA NA NA NA NA
## 2 alone NA 1 NA NA NA NA NA NA NA NA NA NA NA
## 3 child 3 3 4 3 2 NA NA NA NA NA NA NA NA
## 4 notnotnot 1 1 1 1 1 1 1 1 1 1 1 NA NA
## 5 nuclear 3 3 3 4 5 7 7 7 7 7 7 7 7
## 6 nuclear and acquaintance NA NA NA NA NA NA NA NA NA NA NA NA NA
## 7 nuclear and acquaintance NA NA NA NA NA NA NA NA NA NA NA NA NA
## 8 nuclear and acquaintance NA NA NA NA NA NA NA NA NA NA NA NA NA
## 9 nuclear and acquaintance 2 2 2 2 2 2 2 2 2 2 2 2 2
## 10 partner NA NA NA NA NA NA NA NA NA NA NA 1 1
## Variables not shown: 14:20 (int), 14:30 (int), 14:40 (int), 14:50 (int), 15:00 (int), 15:10 (int), 15:20 (int),
## 15:30 (int)
我想用
做一个简单的 tabledplyr
和
summarise
但我真的想不通如何...(尽管它应该很简单)。
我有一个序列矩阵。 当我简单地列出
table(dta)
我得到了我想要的结果。
dta
acquaintance alone child notnotnot nuclear
1 2 17 19 131
nuclear and acquaintance nuclear and acquaintance nuclear and acquaintance nuclear and acquaintance partner
1 1 1 35 2
但是, 我不知道如何用 summarise
做同样的事情有什么建议吗?
dta = structure(c("nuclear", "nuclear", "child", "child", "child",
"acquaintance", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "child", "child",
"child", "alone", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "child", "child", "child",
"child", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "child", "child", "child",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "child", "child",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"partner", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"partner", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear and acquaintance",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear and acquaintance",
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance",
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear and acquaintance",
"nuclear", "nuclear", "nuclear", "nuclear", "notnotnot", "nuclear",
"nuclear", "nuclear", "nuclear", "nuclear and acquaintance",
"nuclear", "nuclear", "nuclear", "nuclear", "notnotnot", "nuclear",
"nuclear", "nuclear", "nuclear", "nuclear and acquaintance",
"nuclear", "nuclear", "nuclear", "nuclear", "notnotnot", "nuclear",
"nuclear", "nuclear", "nuclear", "nuclear and acquaintance",
"nuclear", "nuclear", "nuclear", "nuclear", "notnotnot", "nuclear",
"nuclear", "nuclear", "nuclear", "nuclear and acquaintance",
"nuclear", "nuclear", "child", "nuclear", "notnotnot", "nuclear",
"nuclear", "nuclear", "nuclear", "nuclear and acquaintance",
"nuclear", "nuclear", "child", "alone", "notnotnot", "nuclear"
), .Dim = c(10L, 21L), .Dimnames = list(c("1", "2", "3", "4",
"5", "6", "7", "8", "9", "10"), c("12:10", "12:20", "12:30",
"12:40", "12:50", "13:00", "13:10", "13:20", "13:30", "13:40",
"13:50", "14:00", "14:10", "14:20", "14:30", "14:40", "14:50",
"15:00", "15:10", "15:20", "15:30")))
您只需将数据转换为 data.frame
即可使用 dplyr
,然后您就可以轻松获得所需的输出:
require(dplyr)
# ungrouped
data_frame(var = c(dta)) %>%
group_by_("var") %>%
summarise(n())
## var n()
## 1 acquaintance 1
## 2 alone 2
## 3 child 17
## 4 notnotnot 19
## 5 nuclear 131
## 6 nuclear and acquaintance 1
## 7 nuclear and acquaintance 1
## 8 nuclear and acquaintance 1
## 9 nuclear and acquaintance 35
## 10 partner 2
如果要对每一列分别执行此操作,可以使用 tidyr
先收集结果,然后再次传播。
require(tidyr)
# grouped
dta %>%
as.data.frame %>%
gather %>%
group_by(key, value) %>%
summarise(N = n()) %>%
spread(key, N)
## value 12:10 12:20 12:30 12:40 12:50 13:00 13:10 13:20 13:30 13:40 13:50 14:00 14:10
## 1 acquaintance 1 NA NA NA NA NA NA NA NA NA NA NA NA
## 2 alone NA 1 NA NA NA NA NA NA NA NA NA NA NA
## 3 child 3 3 4 3 2 NA NA NA NA NA NA NA NA
## 4 notnotnot 1 1 1 1 1 1 1 1 1 1 1 NA NA
## 5 nuclear 3 3 3 4 5 7 7 7 7 7 7 7 7
## 6 nuclear and acquaintance NA NA NA NA NA NA NA NA NA NA NA NA NA
## 7 nuclear and acquaintance NA NA NA NA NA NA NA NA NA NA NA NA NA
## 8 nuclear and acquaintance NA NA NA NA NA NA NA NA NA NA NA NA NA
## 9 nuclear and acquaintance 2 2 2 2 2 2 2 2 2 2 2 2 2
## 10 partner NA NA NA NA NA NA NA NA NA NA NA 1 1
## Variables not shown: 14:20 (int), 14:30 (int), 14:40 (int), 14:50 (int), 15:00 (int), 15:10 (int), 15:20 (int),
## 15:30 (int)