计算数据框中的唯一出现次数
Count unique occurrences within data frame
设table如下:
v1
v2
v3
一个
B
一个
B
乙
一个
一个
C
D
C
D
我希望 R 为每列的唯一值的出现次数创建 table:
v1
v2
v3
一个
1
1
B
1
2
C
0
1
D
1
0
一个选项可以是:
sapply(df, function(x) table(factor(x, levels = unique(unlist(df)))))
V1 v2 v3
A 1 1 2
B 1 2 0
D 1 0 1
C 0 1 1
像这样table
试试
> table(unlist(df),names(df)[col(df)])
V1 v2 v3
A 1 1 2
B 1 2 0
C 0 1 1
D 1 0 1
数据
> dput(df)
structure(list(V1 = c("A", "B", NA, "D"), v2 = c("B", "B", "A",
"C"), v3 = c("A", "A", "C", "D")), class = "data.frame", row.names = c(NA,
-4L))
要添加到集合中,一个 tidyverse 版本。
library(tidyverse)
df %>%
pivot_longer(
everything(),
values_to="Value",
names_to="Variable"
) %>%
group_by(Variable, Value) %>%
summarise(N=n(), .groups="drop") %>%
filter(!is.na(Value)) %>%
pivot_wider(values_from=N, names_from=Variable, values_fill=0) %>%
arrange(Value)
# A tibble: 4 x 4
Value v1 v2 v3
<chr> <int> <int> <int>
1 A 1 1 2
2 B 1 2 0
3 C 0 1 1
4 D 1 0 1
为了完整起见,这里有一种结合使用 melt()
和 dcast()
的方法:
library(data.table)
dcast(melt(setDT(df1), measure.vars = patterns("^v"))[value != ""], value ~ variable)
value v1 v2 v3
1: A 1 1 2
2: B 1 2 0
3: C 0 1 1
4: D 1 0 1
该方法类似于 将数据从宽变长再变回宽但不那么冗长。
编辑
代替dcast()
,由宽变长后可以调用table()
:
melt(setDT(df1), measure.vars = patterns("^v"))[value != ""][
, table(value, variable)]
variable
value v1 v2 v3
A 1 1 2
B 1 2 0
C 0 1 1
D 1 0 1
注意这里使用了data.table链接。
并且,为了节省几次击键:
melt(setDT(df1), measure.vars = names(df1))[value != ""][, table(rev(.SD))]
数据
df1 <- fread("
|v1|v2|v3|
|A |B | A|
|B |B | A|
| |A | C|
|D |C | D|",
drop = c(1,5), header = TRUE)
我们可以使用mtabulate
library(qdapTools)
t(mtabulate(df))
V1 v2 v3
A 1 1 2
B 1 2 0
C 0 1 1
D 1 0 1
数据
df <- structure(list(V1 = c("A", "B", NA, "D"), v2 = c("B", "B", "A",
"C"), v3 = c("A", "A", "C", "D")), class = "data.frame", row.names = c(NA,
-4L))
设table如下:
v1 | v2 | v3 |
---|---|---|
一个 | B | 一个 |
B | 乙 | 一个 |
一个 | C | |
D | C | D |
我希望 R 为每列的唯一值的出现次数创建 table:
v1 | v2 | v3 |
---|---|---|
一个 | 1 | 1 |
B | 1 | 2 |
C | 0 | 1 |
D | 1 | 0 |
一个选项可以是:
sapply(df, function(x) table(factor(x, levels = unique(unlist(df)))))
V1 v2 v3
A 1 1 2
B 1 2 0
D 1 0 1
C 0 1 1
像这样table
试试
> table(unlist(df),names(df)[col(df)])
V1 v2 v3
A 1 1 2
B 1 2 0
C 0 1 1
D 1 0 1
数据
> dput(df)
structure(list(V1 = c("A", "B", NA, "D"), v2 = c("B", "B", "A",
"C"), v3 = c("A", "A", "C", "D")), class = "data.frame", row.names = c(NA,
-4L))
要添加到集合中,一个 tidyverse 版本。
library(tidyverse)
df %>%
pivot_longer(
everything(),
values_to="Value",
names_to="Variable"
) %>%
group_by(Variable, Value) %>%
summarise(N=n(), .groups="drop") %>%
filter(!is.na(Value)) %>%
pivot_wider(values_from=N, names_from=Variable, values_fill=0) %>%
arrange(Value)
# A tibble: 4 x 4
Value v1 v2 v3
<chr> <int> <int> <int>
1 A 1 1 2
2 B 1 2 0
3 C 0 1 1
4 D 1 0 1
为了完整起见,这里有一种结合使用 melt()
和 dcast()
的方法:
library(data.table)
dcast(melt(setDT(df1), measure.vars = patterns("^v"))[value != ""], value ~ variable)
value v1 v2 v3 1: A 1 1 2 2: B 1 2 0 3: C 0 1 1 4: D 1 0 1
该方法类似于
编辑
代替dcast()
,由宽变长后可以调用table()
:
melt(setDT(df1), measure.vars = patterns("^v"))[value != ""][
, table(value, variable)]
variable value v1 v2 v3 A 1 1 2 B 1 2 0 C 0 1 1 D 1 0 1
注意这里使用了data.table链接。
并且,为了节省几次击键:
melt(setDT(df1), measure.vars = names(df1))[value != ""][, table(rev(.SD))]
数据
df1 <- fread("
|v1|v2|v3|
|A |B | A|
|B |B | A|
| |A | C|
|D |C | D|",
drop = c(1,5), header = TRUE)
我们可以使用mtabulate
library(qdapTools)
t(mtabulate(df))
V1 v2 v3
A 1 1 2
B 1 2 0
C 0 1 1
D 1 0 1
数据
df <- structure(list(V1 = c("A", "B", NA, "D"), v2 = c("B", "B", "A",
"C"), v3 = c("A", "A", "C", "D")), class = "data.frame", row.names = c(NA,
-4L))