通过分类观察在 df 中按某个变量进行透视、计数和分组
Pivot, count and group by a certain variable in df with categorical observations
我有这个具有可重现数据的 df:
structure(list(`Loperamida en diarrea` = structure(c(1L, 1L,
1L, 2L, 4L, 3L, 4L, 1L, 2L, 1L), .Label = c("muy efectiva", "algo efectiva",
"no efectiva", "no se"), class = c("ordered", "factor")), `Carbón en diarrea` = structure(c(2L,
2L, 2L, 4L, 4L, 3L, 4L, 3L, 3L, 4L), .Label = c("muy efectiva",
"algo efectiva", "no efectiva", "no se"), class = c("ordered",
"factor")), `Bismuto en diarrea` = structure(c(2L, 1L, 2L, 4L,
3L, 3L, 2L, 2L, 2L, 1L), .Label = c("muy efectiva", "algo efectiva",
"no efectiva", "no se"), class = c("ordered", "factor")), `Rifaximina en diarrea` = structure(c(2L,
2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L), .Label = c("muy efectiva",
"algo efectiva", "no efectiva", "no se"), class = c("ordered",
"factor")), `Otros antibióticos en diarrea` = structure(c(2L,
1L, 2L, 2L, 2L, 1L, 1L, 3L, 3L, 4L), .Label = c("muy efectiva",
"algo efectiva", "no efectiva", "no se"), class = c("ordered",
"factor")), `Probióticos en diarrea` = structure(c(2L, 2L, 2L,
2L, 2L, 1L, 2L, 3L, 3L, 2L), .Label = c("muy efectiva", "algo efectiva",
"no efectiva", "no se"), class = c("ordered", "factor")), `Orientación dicotómica` = c("Neurogastro",
"Neurogastro", "Neurogastro", "No neurogastro", "Neurogastro",
"Neurogastro", "Neurogastro", "No neurogastro", "No neurogastro",
"No neurogastro")), row.names = c(NA, 10L), class = "data.frame")
并且我通过使用以下代码旋转 df 创建了一个数据框来计算分类观察:
library(tidyverse)
library(janitor)
df %>%
pivot_longer(cols = everything()) %>%
count(name, value) %>%
pivot_wider(names_from = value, values_from = n, values_fill = 0) %>%
mutate("efectiva" = `algo efectiva` + `muy efectiva`) %>%
arrange(desc(`efectiva`)) %>%
select(c(`name`,`efectiva`, `no efectiva`)) %>%
adorn_percentages("row") %>%
adorn_pct_formatting(digits = 1) %>%
adorn_ns()
df$name <- str_remove(df$name, " en diarrea")
结果如下所示:
name efectiva no efectiva
Rifaximina 100.0% (10) 0.0% (0)
Probióticos 80.0% (8) 20.0% (2)
Bismuto 77.8% (7) 22.2% (2)
Loperamida 87.5% (7) 12.5% (1)
Otros antibióticos 77.8% (7) 22.2% (2)
Carbón 50.0% (3) 50.0% (3)
Orientación dicotómica - (0) - (0)
我一直在尝试通过变量 Orientación dicotómica
(Neurogastro vs No neurogastro)来分隔列,但我一直无法解决。我期望的是这样的:
Neurogastro No neurogastro
name efectiva no efectiva efectiva no efectiva
Rifaximina 98.1% (52) 1.9% (1) 96.4% (240) 3.6% (9)
Dieta 98.1% (51) 1.9% (1) 91.6% (229) 8.4% (21)
Trimebutina 96.0% (48) 4.0% (2) 86.3% (214) 13.7% (34)
Amitriptilina 97.8% (45) 2.2% (1) 88.8% (214) 11.2% (27)
Trimebutina/simeticona 88.2% (45) 11.8% (6) 84.0% (205) 16.0% (39)
Antiespasmódicos 93.6% (44) 6.4% (3) 81.4% (184) 18.6% (42)
有什么建议吗?
编辑虽然有点延迟
library(janitor)
library(tidyverse)
df %>%
pivot_longer(cols = 1:6) %>%
count(`Orientación dicotómica`, name, value) %>%
pivot_wider(id_cols = c(`Orientación dicotómica`, name), names_from = value,
values_from = n, values_fill = 0, values_fn = sum) %>%
mutate("efectiva" = `algo efectiva` + `muy efectiva`) %>%
select(c(`Orientación dicotómica`,`name`,`efectiva`, `no efectiva`)) %>%
adorn_percentages("row") %>%
adorn_pct_formatting(digits = 1) %>%
adorn_ns() -> out
merge(out %>% filter(`Orientación dicotómica` == 'Neurogastro') %>% select(name,
`Neurogastro efectiva` = efectiva,
`Neurogastro no efectiva` = `no efectiva`),
out %>% filter(`Orientación dicotómica` == 'No neurogastro') %>% select(name,
`No Neurogastro efectiva` = efectiva,
`No Neurogastro no efectiva` = `no efectiva`),
by = "name")
name Neurogastro efectiva Neurogastro no efectiva No Neurogastro efectiva No Neurogastro no efectiva
1 Bismuto en diarrea 66.7% (4) 33.3% (2) 100.0% (3) 0.0% (0)
2 Carbón en diarrea 75.0% (3) 25.0% (1) 0.0% (0) 100.0% (2)
3 Loperamida en diarrea 75.0% (3) 25.0% (1) 100.0% (4) 0.0% (0)
4 Otros antibióticos en diarrea 100.0% (6) 0.0% (0) 33.3% (1) 66.7% (2)
5 Probióticos en diarrea 100.0% (6) 0.0% (0) 50.0% (2) 50.0% (2)
6 Rifaximina en diarrea 100.0% (6) 0.0% (0) 100.0% (4) 0.0% (0)
这不是我要找的东西,但已经很接近了:
## fist I create 2 df's with filter:
library(dplyr)
df1 <- df %>% filter(`Orientación dicotómica` == "Neurogastro")
df2 <- df %>% filter(`Orientación dicotómica` != "Neurogastro")
## then I bind the 2 df's
df3 <- cbind(df1,df2)
## finally I drop the repeated column and create a new df with renamed columns
df4 <- as.data.frame(as.matrix(df3[-4]) %>%
list(name = df_tto$name, Neurogastro = df3[, c(2,3)], No_neurogastro = df3[,c(5,6)]))
df4 <- df4[-(16),-(2:6)]
对此可能有更好的答案和简化的代码,但这就是我所能想到的,无论如何,它几乎完成了工作...
我有这个具有可重现数据的 df:
structure(list(`Loperamida en diarrea` = structure(c(1L, 1L,
1L, 2L, 4L, 3L, 4L, 1L, 2L, 1L), .Label = c("muy efectiva", "algo efectiva",
"no efectiva", "no se"), class = c("ordered", "factor")), `Carbón en diarrea` = structure(c(2L,
2L, 2L, 4L, 4L, 3L, 4L, 3L, 3L, 4L), .Label = c("muy efectiva",
"algo efectiva", "no efectiva", "no se"), class = c("ordered",
"factor")), `Bismuto en diarrea` = structure(c(2L, 1L, 2L, 4L,
3L, 3L, 2L, 2L, 2L, 1L), .Label = c("muy efectiva", "algo efectiva",
"no efectiva", "no se"), class = c("ordered", "factor")), `Rifaximina en diarrea` = structure(c(2L,
2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L), .Label = c("muy efectiva",
"algo efectiva", "no efectiva", "no se"), class = c("ordered",
"factor")), `Otros antibióticos en diarrea` = structure(c(2L,
1L, 2L, 2L, 2L, 1L, 1L, 3L, 3L, 4L), .Label = c("muy efectiva",
"algo efectiva", "no efectiva", "no se"), class = c("ordered",
"factor")), `Probióticos en diarrea` = structure(c(2L, 2L, 2L,
2L, 2L, 1L, 2L, 3L, 3L, 2L), .Label = c("muy efectiva", "algo efectiva",
"no efectiva", "no se"), class = c("ordered", "factor")), `Orientación dicotómica` = c("Neurogastro",
"Neurogastro", "Neurogastro", "No neurogastro", "Neurogastro",
"Neurogastro", "Neurogastro", "No neurogastro", "No neurogastro",
"No neurogastro")), row.names = c(NA, 10L), class = "data.frame")
并且我通过使用以下代码旋转 df 创建了一个数据框来计算分类观察:
library(tidyverse)
library(janitor)
df %>%
pivot_longer(cols = everything()) %>%
count(name, value) %>%
pivot_wider(names_from = value, values_from = n, values_fill = 0) %>%
mutate("efectiva" = `algo efectiva` + `muy efectiva`) %>%
arrange(desc(`efectiva`)) %>%
select(c(`name`,`efectiva`, `no efectiva`)) %>%
adorn_percentages("row") %>%
adorn_pct_formatting(digits = 1) %>%
adorn_ns()
df$name <- str_remove(df$name, " en diarrea")
结果如下所示:
name efectiva no efectiva
Rifaximina 100.0% (10) 0.0% (0)
Probióticos 80.0% (8) 20.0% (2)
Bismuto 77.8% (7) 22.2% (2)
Loperamida 87.5% (7) 12.5% (1)
Otros antibióticos 77.8% (7) 22.2% (2)
Carbón 50.0% (3) 50.0% (3)
Orientación dicotómica - (0) - (0)
我一直在尝试通过变量 Orientación dicotómica
(Neurogastro vs No neurogastro)来分隔列,但我一直无法解决。我期望的是这样的:
Neurogastro No neurogastro
name efectiva no efectiva efectiva no efectiva
Rifaximina 98.1% (52) 1.9% (1) 96.4% (240) 3.6% (9)
Dieta 98.1% (51) 1.9% (1) 91.6% (229) 8.4% (21)
Trimebutina 96.0% (48) 4.0% (2) 86.3% (214) 13.7% (34)
Amitriptilina 97.8% (45) 2.2% (1) 88.8% (214) 11.2% (27)
Trimebutina/simeticona 88.2% (45) 11.8% (6) 84.0% (205) 16.0% (39)
Antiespasmódicos 93.6% (44) 6.4% (3) 81.4% (184) 18.6% (42)
有什么建议吗?
编辑虽然有点延迟
library(janitor)
library(tidyverse)
df %>%
pivot_longer(cols = 1:6) %>%
count(`Orientación dicotómica`, name, value) %>%
pivot_wider(id_cols = c(`Orientación dicotómica`, name), names_from = value,
values_from = n, values_fill = 0, values_fn = sum) %>%
mutate("efectiva" = `algo efectiva` + `muy efectiva`) %>%
select(c(`Orientación dicotómica`,`name`,`efectiva`, `no efectiva`)) %>%
adorn_percentages("row") %>%
adorn_pct_formatting(digits = 1) %>%
adorn_ns() -> out
merge(out %>% filter(`Orientación dicotómica` == 'Neurogastro') %>% select(name,
`Neurogastro efectiva` = efectiva,
`Neurogastro no efectiva` = `no efectiva`),
out %>% filter(`Orientación dicotómica` == 'No neurogastro') %>% select(name,
`No Neurogastro efectiva` = efectiva,
`No Neurogastro no efectiva` = `no efectiva`),
by = "name")
name Neurogastro efectiva Neurogastro no efectiva No Neurogastro efectiva No Neurogastro no efectiva
1 Bismuto en diarrea 66.7% (4) 33.3% (2) 100.0% (3) 0.0% (0)
2 Carbón en diarrea 75.0% (3) 25.0% (1) 0.0% (0) 100.0% (2)
3 Loperamida en diarrea 75.0% (3) 25.0% (1) 100.0% (4) 0.0% (0)
4 Otros antibióticos en diarrea 100.0% (6) 0.0% (0) 33.3% (1) 66.7% (2)
5 Probióticos en diarrea 100.0% (6) 0.0% (0) 50.0% (2) 50.0% (2)
6 Rifaximina en diarrea 100.0% (6) 0.0% (0) 100.0% (4) 0.0% (0)
这不是我要找的东西,但已经很接近了:
## fist I create 2 df's with filter:
library(dplyr)
df1 <- df %>% filter(`Orientación dicotómica` == "Neurogastro")
df2 <- df %>% filter(`Orientación dicotómica` != "Neurogastro")
## then I bind the 2 df's
df3 <- cbind(df1,df2)
## finally I drop the repeated column and create a new df with renamed columns
df4 <- as.data.frame(as.matrix(df3[-4]) %>%
list(name = df_tto$name, Neurogastro = df3[, c(2,3)], No_neurogastro = df3[,c(5,6)]))
df4 <- df4[-(16),-(2:6)]
对此可能有更好的答案和简化的代码,但这就是我所能想到的,无论如何,它几乎完成了工作...