在序列数据上使用 dplyr 的简单 Table

Simple Table with dplyr on Sequence Data

我想用

做一个简单的 table
dplyr 

summarise

但我真的想不通如何...(尽管它应该很简单)。

我有一个序列矩阵。 当我简单地列出

 table(dta) 

我得到了我想要的结果。

 dta
            acquaintance                        alone                        child                    notnotnot                      nuclear 
                       1                            2                           17                           19                          131 
 nuclear and     acquaintance  nuclear and    acquaintance    nuclear and  acquaintance     nuclear and acquaintance                      partner 
                       1                            1                            1                           35                            2 

但是, 我不知道如何用 summarise

做同样的事情

有什么建议吗?

 dta =   structure(c("nuclear", "nuclear", "child", "child", "child", 
"acquaintance", "nuclear and acquaintance", "nuclear and acquaintance", 
"notnotnot", "nuclear", "nuclear", "nuclear", "child", "child", 
"child", "alone", "nuclear and acquaintance", "nuclear and acquaintance", 
"notnotnot", "nuclear", "nuclear", "child", "child", "child", 
"child", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance", 
"notnotnot", "nuclear", "nuclear", "child", "child", "child", 
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance", 
"notnotnot", "nuclear", "nuclear", "nuclear", "child", "child", 
 "nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance", 
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear", 
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance", 
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear", 
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance", 
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear", 
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance", 
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear", 
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance", 
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear", 
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance", 
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear", 
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance", 
"notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear", 
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance", 
"partner", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear", 
"nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance", 
 "partner", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear and acquaintance", 
 "nuclear", "nuclear", "nuclear and acquaintance", "nuclear and  acquaintance", 
 "notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear and     acquaintance", 
 "nuclear", "nuclear", "nuclear and acquaintance", "nuclear and acquaintance", 
 "notnotnot", "nuclear", "nuclear", "nuclear", "nuclear", "nuclear and    acquaintance", 
 "nuclear", "nuclear", "nuclear", "nuclear", "notnotnot", "nuclear", 
  "nuclear", "nuclear", "nuclear", "nuclear and acquaintance", 
 "nuclear", "nuclear", "nuclear", "nuclear", "notnotnot", "nuclear", 
"nuclear", "nuclear", "nuclear", "nuclear and acquaintance", 
"nuclear", "nuclear", "nuclear", "nuclear", "notnotnot", "nuclear", 
"nuclear", "nuclear", "nuclear", "nuclear and acquaintance", 
"nuclear", "nuclear", "nuclear", "nuclear", "notnotnot", "nuclear", 
"nuclear", "nuclear", "nuclear", "nuclear and acquaintance", 
"nuclear", "nuclear", "child", "nuclear", "notnotnot", "nuclear", 
"nuclear", "nuclear", "nuclear", "nuclear and acquaintance", 
"nuclear", "nuclear", "child", "alone", "notnotnot", "nuclear"
), .Dim = c(10L, 21L), .Dimnames = list(c("1", "2", "3", "4", 
"5", "6", "7", "8", "9", "10"), c("12:10", "12:20", "12:30", 
"12:40", "12:50", "13:00", "13:10", "13:20", "13:30", "13:40", 
"13:50", "14:00", "14:10", "14:20", "14:30", "14:40", "14:50", 
"15:00", "15:10", "15:20", "15:30")))

您只需将数据转换为 data.frame 即可使用 dplyr,然后您就可以轻松获得所需的输出:

require(dplyr)
# ungrouped
data_frame(var = c(dta)) %>% 
  group_by_("var") %>% 
  summarise(n())
##                             var n()
## 1                  acquaintance   1
## 2                         alone   2
## 3                         child  17
## 4                     notnotnot  19
## 5                       nuclear 131
## 6  nuclear and     acquaintance   1
## 7   nuclear and    acquaintance   1
## 8     nuclear and  acquaintance   1
## 9      nuclear and acquaintance  35
## 10                      partner   2

如果要对每一列分别执行此操作,可以使用 tidyr 先收集结果,然后再次传播。

require(tidyr)
# grouped
dta %>% 
  as.data.frame %>% 
  gather %>% 
  group_by(key, value) %>% 
  summarise(N = n()) %>% 
  spread(key, N)
##                           value 12:10 12:20 12:30 12:40 12:50 13:00 13:10 13:20 13:30 13:40 13:50 14:00 14:10
## 1                  acquaintance     1    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## 2                         alone    NA     1    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## 3                         child     3     3     4     3     2    NA    NA    NA    NA    NA    NA    NA    NA
## 4                     notnotnot     1     1     1     1     1     1     1     1     1     1     1    NA    NA
## 5                       nuclear     3     3     3     4     5     7     7     7     7     7     7     7     7
## 6  nuclear and     acquaintance    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## 7   nuclear and    acquaintance    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## 8     nuclear and  acquaintance    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## 9      nuclear and acquaintance     2     2     2     2     2     2     2     2     2     2     2     2     2
## 10                      partner    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA     1     1
## Variables not shown: 14:20 (int), 14:30 (int), 14:40 (int), 14:50 (int), 15:00 (int), 15:10 (int), 15:20 (int), 
## 15:30 (int)