如何根据 facet wrap 中的 2 组中的 1 组对条形图进行排序?
How to sort bars according to 1 of 2 groups in a facet wrap?
希望有人能帮我解决以下问题:
我想显示 2 个不同组 (gruppe) 的不同实验室参数 (parameter) 的值 (avg)。此外,我想根据 3 个不同方面随时间的变化(性能)绘制此信息。
这是数据集的一小部分:
# A tibble: 402 x 4
# Groups: gruppe, parameter [134]
gruppe parameter performance avg
<chr> <chr> <chr> <dbl>
1 DGE ACPA(citrull. Prot.-Ak) EIA/Se change_t1t0 NaN
2 DGE ACPA(citrull. Prot.-Ak) EIA/Se change_t2t0 37.6
3 DGE ACPA(citrull. Prot.-Ak) EIA/Se change_t3t0 NaN
4 Fasten Apolipoprot. A1 HP change_t1t0 41.2
5 DGE Apolipoprot. A1 HP change_t2t0 NaN
6 DGE Apolipoprot. A1 HP change_t3t0 NaN
7 DGE Apolipoprotein B change_t1t0 NaN
8 DGE Apolipoprotein B change_t2t0 NaN
9 Fasten Apolipoprotein B change_t3t0 NaN
10 DGE aPTT Pathromtin SL change_t1t0 0.571
# … with 392 more rows
使用这段代码完全没问题:
#Create labels for 3 facets
lab_labels <- c("Change from Baseline to Day 7 [%]",
"Change from Baseline to Week 6 [%]",
"Change from Baseline to Week 12 [%]")
names(lab_labels) <- c("change_t1t0",
"change_t2t0",
"change_t3t0")
labor_summ_long %>%
filter(parameter %in% c("Hämatokrit (l/l)","Hämoglobin", "Leukozyten","MCV", "MCH", "MCHC", "RDW-CV", "Thromobzyten","MPV")) %>%
arrange(desc(avg))%>%
group_by(gruppe, performance)%>%
ggplot(aes(x=reorder(parameter,avg), y=avg, group=gruppe, fill = gruppe))+
geom_col(position = position_dodge())+
facet_wrap(~performance,
scales ="free_y",
dir="v",
labeller = labeller(performance = lab_labels))+
ylab("") +
xlab("") +
labs(color="", linetype="")+
theme_pubclean()+
theme(strip.background=element_rect(fill="lightgrey"),
strip.text = element_text(face="bold"),
legend.position = "bottom",
legend.title=element_blank())+
theme(axis.text.x = element_text(angle=45, hjust=1, vjust = 1))+
scale_x_discrete(labels = c("Hämoglobin"="Hemoglobin", "Leukozyten" = "Leucocytes",
"MCV", "MCH", "MCHC", "RDW-CV", "Thromobzyten"="Thrombocytes",
"MPV", "Hämatokrit (l/l)"="Hematocrite"))+
scale_fill_discrete(labels=c('DGE', "Fasten"='Fasting'))
This is how the plot looks like
我遗漏了什么并且未能找到解决方案:
我想订购酒吧...
- 按照从高到低的avg-value
- 禁食组(蓝色条)
- 从基线到第 7 天的表现(change_t1t0),也就是第一个方面。
我用 arrange、sort 等来欺骗,但无法将上述所有条件放在一起。
你有什么想法吗?
非常感谢!
问题是 reorder
通过取每个 parameter
的所有值的平均值重新排序,而不考虑任何分组。
根据您的情况调整 答案并使用一些随机示例数据来模拟您的真实数据,这可以像这样实现:
辅助函数 reorder_where
允许根据附加条件对类别进行排序,例如在你的情况下 gruppe == "Fasten" & performance == "change_t1t0"
是 TRUE
library(dplyr)
library(ggplot2)
reorder_where <- function (x, by, where, fun = mean, ...) {
xx <- x[where]
byby <- by[where]
byby <- tapply(byby, xx, FUN = fun, ...)[x]
reorder(x, byby)
}
labor_summ_long %>%
filter(parameter %in% c("Hämatokrit (l/l)","Hämoglobin", "Leukozyten","MCV", "MCH", "MCHC", "RDW-CV", "Thromobzyten","MPV")) %>%
ggplot(aes(x=reorder_where(parameter, -avg, gruppe == "Fasten" & performance == "change_t1t0"), y=avg, group=gruppe, fill = gruppe))+
geom_col(position = position_dodge())+
facet_wrap(~performance,
scales ="free_y",
dir="v",
labeller = labeller(performance = lab_labels))+
ylab("") +
xlab("") +
labs(color="", linetype="")+
#theme_pubclean()+
theme(strip.background=element_rect(fill="lightgrey"),
strip.text = element_text(face="bold"),
legend.position = "bottom",
legend.title=element_blank())+
theme(axis.text.x = element_text(angle=45, hjust=1, vjust = 1))+
scale_x_discrete(labels = c("Hämoglobin"="Hemoglobin", "Leukozyten" = "Leucocytes",
"MCV", "MCH", "MCHC", "RDW-CV", "Thromobzyten"="Thrombocytes",
"MPV", "Hämatokrit (l/l)"="Hematocrite"))+
scale_fill_discrete(labels=c('DGE', "Fasten"='Fasting'))
数据
set.seed(42)
labor_summ_long <- data.frame(
parameter = sample(c("Hämatokrit (l/l)","Hämoglobin", "Leukozyten","MCV", "MCH", "MCHC", "RDW-CV", "Thromobzyten","MPV"), 100, replace = TRUE),
gruppe = sample(c("DGE", "Fasten"), 100, replace = TRUE),
performance = sample(c("change_t1t0",
"change_t2t0",
"change_t3t0"), 100, replace = TRUE),
avg = runif(100, 0, 50)
)
labor_summ_long <- dplyr::distinct(labor_summ_long, parameter, gruppe, performance, .keep_all = TRUE)
希望有人能帮我解决以下问题: 我想显示 2 个不同组 (gruppe) 的不同实验室参数 (parameter) 的值 (avg)。此外,我想根据 3 个不同方面随时间的变化(性能)绘制此信息。 这是数据集的一小部分:
# A tibble: 402 x 4
# Groups: gruppe, parameter [134]
gruppe parameter performance avg
<chr> <chr> <chr> <dbl>
1 DGE ACPA(citrull. Prot.-Ak) EIA/Se change_t1t0 NaN
2 DGE ACPA(citrull. Prot.-Ak) EIA/Se change_t2t0 37.6
3 DGE ACPA(citrull. Prot.-Ak) EIA/Se change_t3t0 NaN
4 Fasten Apolipoprot. A1 HP change_t1t0 41.2
5 DGE Apolipoprot. A1 HP change_t2t0 NaN
6 DGE Apolipoprot. A1 HP change_t3t0 NaN
7 DGE Apolipoprotein B change_t1t0 NaN
8 DGE Apolipoprotein B change_t2t0 NaN
9 Fasten Apolipoprotein B change_t3t0 NaN
10 DGE aPTT Pathromtin SL change_t1t0 0.571
# … with 392 more rows
使用这段代码完全没问题:
#Create labels for 3 facets
lab_labels <- c("Change from Baseline to Day 7 [%]",
"Change from Baseline to Week 6 [%]",
"Change from Baseline to Week 12 [%]")
names(lab_labels) <- c("change_t1t0",
"change_t2t0",
"change_t3t0")
labor_summ_long %>%
filter(parameter %in% c("Hämatokrit (l/l)","Hämoglobin", "Leukozyten","MCV", "MCH", "MCHC", "RDW-CV", "Thromobzyten","MPV")) %>%
arrange(desc(avg))%>%
group_by(gruppe, performance)%>%
ggplot(aes(x=reorder(parameter,avg), y=avg, group=gruppe, fill = gruppe))+
geom_col(position = position_dodge())+
facet_wrap(~performance,
scales ="free_y",
dir="v",
labeller = labeller(performance = lab_labels))+
ylab("") +
xlab("") +
labs(color="", linetype="")+
theme_pubclean()+
theme(strip.background=element_rect(fill="lightgrey"),
strip.text = element_text(face="bold"),
legend.position = "bottom",
legend.title=element_blank())+
theme(axis.text.x = element_text(angle=45, hjust=1, vjust = 1))+
scale_x_discrete(labels = c("Hämoglobin"="Hemoglobin", "Leukozyten" = "Leucocytes",
"MCV", "MCH", "MCHC", "RDW-CV", "Thromobzyten"="Thrombocytes",
"MPV", "Hämatokrit (l/l)"="Hematocrite"))+
scale_fill_discrete(labels=c('DGE', "Fasten"='Fasting'))
This is how the plot looks like
我遗漏了什么并且未能找到解决方案: 我想订购酒吧...
- 按照从高到低的avg-value
- 禁食组(蓝色条)
- 从基线到第 7 天的表现(change_t1t0),也就是第一个方面。
我用 arrange、sort 等来欺骗,但无法将上述所有条件放在一起。
你有什么想法吗? 非常感谢!
问题是 reorder
通过取每个 parameter
的所有值的平均值重新排序,而不考虑任何分组。
根据您的情况调整
辅助函数 reorder_where
允许根据附加条件对类别进行排序,例如在你的情况下 gruppe == "Fasten" & performance == "change_t1t0"
是 TRUE
library(dplyr)
library(ggplot2)
reorder_where <- function (x, by, where, fun = mean, ...) {
xx <- x[where]
byby <- by[where]
byby <- tapply(byby, xx, FUN = fun, ...)[x]
reorder(x, byby)
}
labor_summ_long %>%
filter(parameter %in% c("Hämatokrit (l/l)","Hämoglobin", "Leukozyten","MCV", "MCH", "MCHC", "RDW-CV", "Thromobzyten","MPV")) %>%
ggplot(aes(x=reorder_where(parameter, -avg, gruppe == "Fasten" & performance == "change_t1t0"), y=avg, group=gruppe, fill = gruppe))+
geom_col(position = position_dodge())+
facet_wrap(~performance,
scales ="free_y",
dir="v",
labeller = labeller(performance = lab_labels))+
ylab("") +
xlab("") +
labs(color="", linetype="")+
#theme_pubclean()+
theme(strip.background=element_rect(fill="lightgrey"),
strip.text = element_text(face="bold"),
legend.position = "bottom",
legend.title=element_blank())+
theme(axis.text.x = element_text(angle=45, hjust=1, vjust = 1))+
scale_x_discrete(labels = c("Hämoglobin"="Hemoglobin", "Leukozyten" = "Leucocytes",
"MCV", "MCH", "MCHC", "RDW-CV", "Thromobzyten"="Thrombocytes",
"MPV", "Hämatokrit (l/l)"="Hematocrite"))+
scale_fill_discrete(labels=c('DGE', "Fasten"='Fasting'))
数据
set.seed(42)
labor_summ_long <- data.frame(
parameter = sample(c("Hämatokrit (l/l)","Hämoglobin", "Leukozyten","MCV", "MCH", "MCHC", "RDW-CV", "Thromobzyten","MPV"), 100, replace = TRUE),
gruppe = sample(c("DGE", "Fasten"), 100, replace = TRUE),
performance = sample(c("change_t1t0",
"change_t2t0",
"change_t3t0"), 100, replace = TRUE),
avg = runif(100, 0, 50)
)
labor_summ_long <- dplyr::distinct(labor_summ_long, parameter, gruppe, performance, .keep_all = TRUE)