R中因子值的比例
proportion of factor values in R
我有一个包含一堆因子值的数据集。我正在寻找一种体面的方式来打印出每个值的百分比。非常感谢。
mtcars; rownames(mtcars) <- NULL
df <- mtcars[,c(2,8,9)]
df$am <- factor(df$am); df$vs <- factor(df$vs); df$cyl <- factor(df$cyl)
sapply(df, function(x) if("factor" %in% class(x)) {prop.table(table(x))})
预期答案
0 1 4 6 8
cyl NA NA 0.34 0.21 0.43
vs 0.56 0.43 NA NA NA
am 0.59 0.40 NA NA NA
您可以使用 dplyr::bind_rows
:
library(dplyr)
s <- sapply(df, function(x) if("factor" %in% class(x)) prop.table(table(x)))
bind_rows(s, .id = "col") %>%
relocate(col, order(colnames(.)))
## A tibble: 3 × 6
# col `0` `1` `4` `6` `8`
# <chr> <table> <table> <table> <table> <table>
#1 cyl NA NA 0.34375 0.21875 0.4375
#2 vs 0.56250 0.43750 NA NA NA
#3 am 0.59375 0.40625 NA NA NA
或者,使用行名:
bind_rows(s) %>%
relocate(order(colnames(.))) %>%
as.data.frame() %>%
`rownames<-`(names(s))
# 0 1 4 6 8
#cyl NA NA 0.34375 0.21875 0.4375
#vs 0.56250 0.43750 NA NA NA
#am 0.59375 0.40625 NA NA NA
一个可能的紧凑的解决方案,基于purrr::map_dfr
:
library(tidyverse)
map_dfr(df, ~ prop.table(table(.x)), .id = "id") %>%
column_to_rownames("id") %>% select(sort(names(.)))
#> 0 1 4 6 8
#> cyl NA NA 0.34375 0.21875 0.4375
#> vs 0.56250 0.43750 NA NA NA
#> am 0.59375 0.40625 NA NA NA
另一个可能的解决方案(不是那么紧凑),基于purrr::map
和bind_rows
:
library(tidyverse)
map(df, ~ prop.table(table(.x))) %>% bind_rows() %>% as.data.frame %>%
`rownames<-`(names(df)) %>% select(sort(names(.)))
#> 0 1 4 6 8
#> cyl NA NA 0.34375 0.21875 0.4375
#> vs 0.56250 0.43750 NA NA NA
#> am 0.59375 0.40625 NA NA NA
我有一个包含一堆因子值的数据集。我正在寻找一种体面的方式来打印出每个值的百分比。非常感谢。
mtcars; rownames(mtcars) <- NULL
df <- mtcars[,c(2,8,9)]
df$am <- factor(df$am); df$vs <- factor(df$vs); df$cyl <- factor(df$cyl)
sapply(df, function(x) if("factor" %in% class(x)) {prop.table(table(x))})
预期答案
0 1 4 6 8
cyl NA NA 0.34 0.21 0.43
vs 0.56 0.43 NA NA NA
am 0.59 0.40 NA NA NA
您可以使用 dplyr::bind_rows
:
library(dplyr)
s <- sapply(df, function(x) if("factor" %in% class(x)) prop.table(table(x)))
bind_rows(s, .id = "col") %>%
relocate(col, order(colnames(.)))
## A tibble: 3 × 6
# col `0` `1` `4` `6` `8`
# <chr> <table> <table> <table> <table> <table>
#1 cyl NA NA 0.34375 0.21875 0.4375
#2 vs 0.56250 0.43750 NA NA NA
#3 am 0.59375 0.40625 NA NA NA
或者,使用行名:
bind_rows(s) %>%
relocate(order(colnames(.))) %>%
as.data.frame() %>%
`rownames<-`(names(s))
# 0 1 4 6 8
#cyl NA NA 0.34375 0.21875 0.4375
#vs 0.56250 0.43750 NA NA NA
#am 0.59375 0.40625 NA NA NA
一个可能的紧凑的解决方案,基于purrr::map_dfr
:
library(tidyverse)
map_dfr(df, ~ prop.table(table(.x)), .id = "id") %>%
column_to_rownames("id") %>% select(sort(names(.)))
#> 0 1 4 6 8
#> cyl NA NA 0.34375 0.21875 0.4375
#> vs 0.56250 0.43750 NA NA NA
#> am 0.59375 0.40625 NA NA NA
另一个可能的解决方案(不是那么紧凑),基于purrr::map
和bind_rows
:
library(tidyverse)
map(df, ~ prop.table(table(.x))) %>% bind_rows() %>% as.data.frame %>%
`rownames<-`(names(df)) %>% select(sort(names(.)))
#> 0 1 4 6 8
#> cyl NA NA 0.34375 0.21875 0.4375
#> vs 0.56250 0.43750 NA NA NA
#> am 0.59375 0.40625 NA NA NA