计算数据框中的唯一出现次数

Count unique occurrences within data frame

设table如下:

v1 v2 v3
一个 B 一个
B 一个
一个 C
D C D

我希望 R 为每列的唯一值的出现次数创建 table:

v1 v2 v3
一个 1 1
B 1 2
C 0 1
D 1 0

一个选项可以是:

sapply(df, function(x) table(factor(x, levels = unique(unlist(df)))))

  V1 v2 v3
A  1  1  2
B  1  2  0
D  1  0  1
C  0  1  1

像这样table试试

> table(unlist(df),names(df)[col(df)])

    V1 v2 v3
  A  1  1  2
  B  1  2  0
  C  0  1  1
  D  1  0  1

数据

> dput(df)
structure(list(V1 = c("A", "B", NA, "D"), v2 = c("B", "B", "A", 
"C"), v3 = c("A", "A", "C", "D")), class = "data.frame", row.names = c(NA,
-4L))

要添加到集合中,一个 tidyverse 版本。

library(tidyverse)

df %>% 
  pivot_longer(
    everything(), 
    values_to="Value", 
    names_to="Variable"
  ) %>% 
  group_by(Variable, Value) %>% 
  summarise(N=n(), .groups="drop") %>% 
  filter(!is.na(Value)) %>% 
  pivot_wider(values_from=N, names_from=Variable, values_fill=0) %>% 
  arrange(Value)
# A tibble: 4 x 4
  Value    v1    v2    v3
  <chr> <int> <int> <int>
1 A         1     1     2
2 B         1     2     0
3 C         0     1     1
4 D         1     0     1

为了完整起见,这里有一种结合使用 melt()dcast() 的方法:

library(data.table)
dcast(melt(setDT(df1), measure.vars = patterns("^v"))[value != ""], value ~ variable)
   value v1 v2 v3
1:     A  1  1  2
2:     B  1  2  0
3:     C  0  1  1
4:     D  1  0  1

该方法类似于 将数据从宽变长再变回宽但不那么冗长。

编辑

代替dcast(),由宽变长后可以调用table()

melt(setDT(df1), measure.vars = patterns("^v"))[value != ""][
  , table(value, variable)]
     variable
value v1 v2 v3
    A  1  1  2
    B  1  2  0
    C  0  1  1
    D  1  0  1

注意这里使用了data.table链接

并且,为了节省几次击键:

melt(setDT(df1), measure.vars = names(df1))[value != ""][, table(rev(.SD))]

数据

df1 <- fread("
|v1|v2|v3|
|A |B | A|
|B |B | A|
|  |A | C|
|D |C | D|", 
drop = c(1,5), header = TRUE)

我们可以使用mtabulate

library(qdapTools)
 t(mtabulate(df))
  V1 v2 v3
A  1  1  2
B  1  2  0
C  0  1  1
D  1  0  1

数据

df <- structure(list(V1 = c("A", "B", NA, "D"), v2 = c("B", "B", "A", 
"C"), v3 = c("A", "A", "C", "D")), class = "data.frame", row.names = c(NA,
-4L))