通过分类观察在 df 中按某个变量进行透视、计数和分组

Question

我有这个具有可重现数据的 df:

structure(list(`Loperamida en diarrea` = structure(c(1L, 1L, 
1L, 2L, 4L, 3L, 4L, 1L, 2L, 1L), .Label = c("muy efectiva", "algo efectiva", 
"no efectiva", "no se"), class = c("ordered", "factor")), `Carbón en diarrea` = structure(c(2L, 
2L, 2L, 4L, 4L, 3L, 4L, 3L, 3L, 4L), .Label = c("muy efectiva", 
"algo efectiva", "no efectiva", "no se"), class = c("ordered", 
"factor")), `Bismuto en diarrea` = structure(c(2L, 1L, 2L, 4L, 
3L, 3L, 2L, 2L, 2L, 1L), .Label = c("muy efectiva", "algo efectiva", 
"no efectiva", "no se"), class = c("ordered", "factor")), `Rifaximina en diarrea` = structure(c(2L, 
2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L), .Label = c("muy efectiva", 
"algo efectiva", "no efectiva", "no se"), class = c("ordered", 
"factor")), `Otros antibióticos en diarrea` = structure(c(2L, 
1L, 2L, 2L, 2L, 1L, 1L, 3L, 3L, 4L), .Label = c("muy efectiva", 
"algo efectiva", "no efectiva", "no se"), class = c("ordered", 
"factor")), `Probióticos en diarrea` = structure(c(2L, 2L, 2L, 
2L, 2L, 1L, 2L, 3L, 3L, 2L), .Label = c("muy efectiva", "algo efectiva", 
"no efectiva", "no se"), class = c("ordered", "factor")), `Orientación dicotómica` = c("Neurogastro", 
"Neurogastro", "Neurogastro", "No neurogastro", "Neurogastro", 
"Neurogastro", "Neurogastro", "No neurogastro", "No neurogastro", 
"No neurogastro")), row.names = c(NA, 10L), class = "data.frame")

并且我通过使用以下代码旋转 df 创建了一个数据框来计算分类观察：

library(tidyverse)
library(janitor)

df %>%
         pivot_longer(cols = everything()) %>%
  count(name, value) %>%
  pivot_wider(names_from = value, values_from = n, values_fill = 0) %>%
        mutate("efectiva" = `algo efectiva` + `muy efectiva`) %>%
                arrange(desc(`efectiva`)) %>%
        select(c(`name`,`efectiva`, `no efectiva`)) %>%
          adorn_percentages("row") %>%
  adorn_pct_formatting(digits = 1) %>%
  adorn_ns()

df$name <- str_remove(df$name, " en diarrea")

结果如下所示：

                name    efectiva no efectiva
             Rifaximina 100.0% (10)    0.0% (0)
            Probióticos  80.0%  (8)   20.0% (2)
                Bismuto  77.8%  (7)   22.2% (2)
             Loperamida  87.5%  (7)   12.5% (1)
     Otros antibióticos  77.8%  (7)   22.2% (2)
                 Carbón  50.0%  (3)   50.0% (3)
 Orientación dicotómica      -  (0)       - (0)

我一直在尝试通过变量 Orientación dicotómica（Neurogastro vs No neurogastro）来分隔列，但我一直无法解决。我期望的是这样的：


                             Neurogastro                 No neurogastro 
                 name   efectiva    no efectiva     efectiva    no efectiva
             Rifaximina 98.1% (52)   1.9%  (1)      96.4% (240)  3.6%   (9)
                  Dieta 98.1% (51)   1.9%  (1)      91.6% (229)  8.4%  (21)
            Trimebutina 96.0% (48)   4.0%  (2)      86.3% (214) 13.7%  (34)
          Amitriptilina 97.8% (45)   2.2%  (1)      88.8% (214) 11.2%  (27)
 Trimebutina/simeticona 88.2% (45)  11.8%  (6)      84.0% (205) 16.0%  (39)
       Antiespasmódicos 93.6% (44)   6.4%  (3)      81.4% (184) 18.6%  (42)

有什么建议吗？

Answer 1

编辑虽然有点延迟

library(janitor)
library(tidyverse)

df %>%
  pivot_longer(cols = 1:6) %>%
  count(`Orientación dicotómica`, name, value) %>%
  pivot_wider(id_cols = c(`Orientación dicotómica`, name), names_from = value, 
              values_from = n, values_fill = 0, values_fn = sum) %>%
  mutate("efectiva" = `algo efectiva` + `muy efectiva`) %>%
  select(c(`Orientación dicotómica`,`name`,`efectiva`, `no efectiva`)) %>%
  adorn_percentages("row") %>%
  adorn_pct_formatting(digits = 1) %>%
  adorn_ns() -> out

merge(out %>% filter(`Orientación dicotómica` == 'Neurogastro') %>% select(name, 
                                                                           `Neurogastro efectiva` = efectiva, 
                                                                           `Neurogastro no efectiva` = `no efectiva`), 
      out %>% filter(`Orientación dicotómica` == 'No neurogastro') %>% select(name, 
                                                                           `No Neurogastro efectiva` = efectiva, 
                                                                           `No Neurogastro no efectiva` = `no efectiva`),
      by = "name")

                           name Neurogastro efectiva Neurogastro no efectiva No Neurogastro efectiva No Neurogastro no efectiva
1            Bismuto en diarrea            66.7% (4)               33.3% (2)              100.0% (3)                   0.0% (0)
2             Carbón en diarrea            75.0% (3)               25.0% (1)                0.0% (0)                 100.0% (2)
3         Loperamida en diarrea            75.0% (3)               25.0% (1)              100.0% (4)                   0.0% (0)
4 Otros antibióticos en diarrea           100.0% (6)                0.0% (0)               33.3% (1)                  66.7% (2)
5        Probióticos en diarrea           100.0% (6)                0.0% (0)               50.0% (2)                  50.0% (2)
6         Rifaximina en diarrea           100.0% (6)                0.0% (0)              100.0% (4)                   0.0% (0)

Answer 2

这不是我要找的东西，但已经很接近了：

## fist I create 2 df's with filter:
library(dplyr)
df1 <- df %>% filter(`Orientación dicotómica` == "Neurogastro")
df2 <- df %>% filter(`Orientación dicotómica` != "Neurogastro")

## then I bind the 2 df's
df3 <- cbind(df1,df2) 

## finally I drop the repeated column and create a new df with renamed columns
df4 <- as.data.frame(as.matrix(df3[-4]) %>%
  list(name = df_tto$name, Neurogastro = df3[, c(2,3)], No_neurogastro = df3[,c(5,6)])) 

df4 <- df4[-(16),-(2:6)]

对此可能有更好的答案和简化的代码，但这就是我所能想到的，无论如何，它几乎完成了工作...

通过分类观察在 df 中按某个变量进行透视、计数和分组

Pivot, count and group by a certain variable in df with categorical observations

merge

join

r

dataframe

janitor