R中因子值的比例

proportion of factor values in R

我有一个包含一堆因子值的数据集。我正在寻找一种体面的方式来打印出每个值的百分比。非常感谢。

mtcars; rownames(mtcars) <- NULL
df <- mtcars[,c(2,8,9)]
df$am <- factor(df$am); df$vs <- factor(df$vs); df$cyl <- factor(df$cyl)


sapply(df, function(x) if("factor" %in% class(x)) {prop.table(table(x))})

预期答案

     0    1     4    6    8 
cyl  NA  NA     0.34 0.21 0.43 
vs   0.56 0.43  NA   NA   NA
am   0.59 0.40  NA   NA   NA

您可以使用 dplyr::bind_rows:

library(dplyr)
s <- sapply(df, function(x) if("factor" %in% class(x)) prop.table(table(x)))

bind_rows(s, .id = "col") %>% 
  relocate(col, order(colnames(.)))

## A tibble: 3 × 6
#  col   `0`     `1`     `4`     `6`     `8`    
#  <chr> <table> <table> <table> <table> <table>
#1 cyl        NA      NA 0.34375 0.21875 0.4375 
#2 vs    0.56250 0.43750      NA      NA     NA 
#3 am    0.59375 0.40625      NA      NA     NA 

或者,使用行名:

bind_rows(s) %>% 
  relocate(order(colnames(.))) %>% 
  as.data.frame() %>% 
  `rownames<-`(names(s))

#          0       1       4       6      8
#cyl      NA      NA 0.34375 0.21875 0.4375
#vs  0.56250 0.43750      NA      NA     NA
#am  0.59375 0.40625      NA      NA     NA

一个可能的紧凑的解决方案,基于purrr::map_dfr

library(tidyverse)

map_dfr(df, ~ prop.table(table(.x)), .id = "id") %>% 
  column_to_rownames("id") %>% select(sort(names(.)))

#>           0       1       4       6      8
#> cyl      NA      NA 0.34375 0.21875 0.4375
#> vs  0.56250 0.43750      NA      NA     NA
#> am  0.59375 0.40625      NA      NA     NA

另一个可能的解决方案(不是那么紧凑),基于purrr::mapbind_rows

library(tidyverse)

map(df, ~ prop.table(table(.x))) %>% bind_rows() %>% as.data.frame %>%
  `rownames<-`(names(df)) %>% select(sort(names(.)))

#>           0       1       4       6      8
#> cyl      NA      NA 0.34375 0.21875 0.4375
#> vs  0.56250 0.43750      NA      NA     NA
#> am  0.59375 0.40625      NA      NA     NA