R:如何在 ggplots 中按另一列(因子或字符标签)对字符列进行排序
R: How can I order a character column by another column (factor or character label) in ggplots
我正在尝试使用 ggplot 绘制冲积地块。到目前为止一切顺利,直到我想尝试清理情节。
正如您在图中看到的那样,从左到右,第一个 stratum/column 是 ID 列,然后是一列标签:疾病风险。我想要实现的是在外图中,而不是让患者 ID 曲折,我希望它们按疾病风险列排序,以便所有高风险 ID 都在顶部,其次是低风险,然后是非充满的。这样就更容易看出有没有关系了。
我四处寻找 arrange() 和 order() 函数,它们似乎对我的实际输入数据有用,但是一旦我在 ggplot 中传递该数据框,输出图形仍然是混乱的。
我想到了将 ID 设置为 factor,然后使用 levels=...。但是如果患者 ID 不断增长,这不是很明智。
有没有更聪明的方法?请赐教。我在示例数据中附上了 link。
https://drive.google.com/file/d/16Pd8V3MCgEHmZEButVi2UjDiwZWklK-T/view?usp=sharing
我绘制图表的代码:
library(tidyr)
library(ggplot2)
library(ggalluvial)
library(RColorBrewer)
# Define the number of colors you want
nb.cols <- 10
mycolor1 <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
mycolors <- c("Black")
#read the data
CLL3S.plusrec <- read.csv("xxxx.CSV", as.is = T)
CLL3S.plusrec$risk_by_DS <- factor(CLL3S.plusrec$risk_by_DS, levels = c("low_risk", "high_risk", "Not filled"))
CLL3S.plusrec$`Enriched response phenotype` <- factor(CLL3S.plusrec$`Enriched response phenotype`, levels = c("Live cells","Pre-dead", "TN & PDB", "PDB & Lenalidomide", "TN & STSVEN & Live cells","Mixed"))
#here I reorder the dataframe and it looks good
#but the output ggplot changes the order of ID in the output graph
OR <- with(CLL3S.plusrec, CLL3S.plusrec[order(risk_by_DS),])
d <-ggplot(OR, aes(y = count,
axis1= Patient.ID,
axis2= risk_by_DS,
axis3 = `Cluster assigned consensus`,
axis4 = `Cluster assigned single drug`,
axis5 = `Enriched response phenotype`
)) +
scale_x_discrete(limits = c("Patient ID","Disease Risk", "Consensus cluster", "Single-drug cluster", "Enriched drug response by Phenoptype")) +
geom_alluvium(aes(fill=`Cluster assigned consensus`)) +
geom_stratum(width = 1/3, fill = c(mycolor1[1:69],mycolor1[1:3],mycolor1[1:8],mycolor1[1:8],mycolor1[1:6]), color = "red") +
#geom_stratum() +
geom_text(stat = "stratum", aes(label = after_stat(stratum)), size=3) +
theme(axis.title.x = element_text(size = 15, face="bold"))+
theme(axis.title.y = element_text(size = 15, face="bold"))+
theme(axis.text.x = element_text(size = 10, face="bold")) +
theme(axis.text.y = element_text(size = 10, face="bold")) +
labs(fill = "Consensus clusters")+
guides(fill=guide_legend(override.aes = list(color=mycolors)))+
ggtitle("Patient flow between the Consensus clusters and Single-drug treated clusters",
"3S stimulated patients")
print(d)
不确定这是否是您想要的,请尝试以这种方式格式化风险列:
library(tidyr)
library(ggplot2)
library(ggalluvial)
library(RColorBrewer)
# Define the number of colors you want
nb.cols <- 10
mycolor1 <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
mycolors <- c("Black")
#read the data
CLL3S.plusrec <- read.csv("test data.CSV", as.is = T)
CLL3S.plusrec$risk_by_DS <- factor(CLL3S.plusrec$risk_by_DS,
levels = c("high_risk","low_risk","Not filled"),ordered = T)
CLL3S.plusrec$Enriched.response.phenotype <- factor(CLL3S.plusrec$Enriched.response.phenotype, levels = c("Live cells","Pre-dead", "TN & PDB", "PDB & Lenalidomide", "TN & STSVEN & Live cells","Mixed"))
#here I reorder the dataframe and it looks good
#but the output ggplot changes the order of ID in the output graph
OR <- with(CLL3S.plusrec, CLL3S.plusrec[order(risk_by_DS),])
ggplot(OR, aes(y = count,
axis1= reorder(Patient.ID,risk_by_DS),
axis2= risk_by_DS,
axis3 = reorder(Cluster.assigned.consensus,risk_by_DS),
axis4 = reorder(Cluster.assigned.single.drug,risk_by_DS),
axis5 = reorder(Enriched.response.phenotype,risk_by_DS)
)) +
scale_x_discrete(limits = c("Patient ID","Disease Risk", "Consensus cluster", "Single-drug cluster", "Enriched drug response by Phenoptype")) +
geom_alluvium(aes(fill=Cluster.assigned.consensus)) +
geom_stratum(width = 1/3, fill = c(mycolor1[1:69],mycolor1[1:3],mycolor1[1:8],mycolor1[1:8],mycolor1[1:6]), color = "red") +
#geom_stratum() +
geom_text(stat = "stratum", aes(label = after_stat(stratum)), size=3) +
theme(axis.title.x = element_text(size = 15, face="bold"))+
theme(axis.title.y = element_text(size = 15, face="bold"))+
theme(axis.text.x = element_text(size = 10, face="bold")) +
theme(axis.text.y = element_text(size = 10, face="bold")) +
labs(fill = "Consensus clusters")+
guides(fill=guide_legend(override.aes = list(color=mycolors)))+
ggtitle("Patient flow between the Consensus clusters and Single-drug treated clusters",
"3S stimulated patients")
输出:
在我的 read.csv()
中,引号也消失了,点在变量中。这就是为什么您原来引用的变量现在有点。可能是阅读的问题。
更新:
#Update
OR <- with(CLL3S.plusrec, CLL3S.plusrec[order(risk_by_DS),])
OR <- OR[order(OR$risk_by_DS,OR$Patient.ID),]
OR$Patient.ID <- factor(OR$Patient.ID,levels = unique(OR$Patient.ID),ordered = T)
#Plot
ggplot(OR, aes(y = count,
axis1= reorder(Patient.ID,risk_by_DS),
axis2= risk_by_DS,
axis3 = reorder(Cluster.assigned.consensus,risk_by_DS),
axis4 = reorder(Cluster.assigned.single.drug,risk_by_DS),
axis5 = reorder(Enriched.response.phenotype,risk_by_DS)
)) +
scale_x_discrete(limits = c("Patient ID","Disease Risk", "Consensus cluster", "Single-drug cluster", "Enriched drug response by Phenoptype")) +
geom_alluvium(aes(fill=Cluster.assigned.consensus)) +
geom_stratum(width = 1/3, fill = c(mycolor1[1:69],mycolor1[1:3],mycolor1[1:8],mycolor1[1:8],mycolor1[1:6]), color = "red") +
#geom_stratum() +
geom_text(stat = "stratum", aes(label = after_stat(stratum)), size=3) +
theme(axis.title.x = element_text(size = 15, face="bold"))+
theme(axis.title.y = element_text(size = 15, face="bold"))+
theme(axis.text.x = element_text(size = 10, face="bold")) +
theme(axis.text.y = element_text(size = 10, face="bold")) +
labs(fill = "Consensus clusters")+
guides(fill=guide_legend(override.aes = list(color=mycolors)))+
ggtitle("Patient flow between the Consensus clusters and Single-drug treated clusters",
"3S stimulated patients")
输出:
我正在尝试使用 ggplot 绘制冲积地块。到目前为止一切顺利,直到我想尝试清理情节。
正如您在图中看到的那样,从左到右,第一个 stratum/column 是 ID 列,然后是一列标签:疾病风险。我想要实现的是在外图中,而不是让患者 ID 曲折,我希望它们按疾病风险列排序,以便所有高风险 ID 都在顶部,其次是低风险,然后是非充满的。这样就更容易看出有没有关系了。
我四处寻找 arrange() 和 order() 函数,它们似乎对我的实际输入数据有用,但是一旦我在 ggplot 中传递该数据框,输出图形仍然是混乱的。
我想到了将 ID 设置为 factor,然后使用 levels=...。但是如果患者 ID 不断增长,这不是很明智。
有没有更聪明的方法?请赐教。我在示例数据中附上了 link。
https://drive.google.com/file/d/16Pd8V3MCgEHmZEButVi2UjDiwZWklK-T/view?usp=sharing
我绘制图表的代码:
library(tidyr)
library(ggplot2)
library(ggalluvial)
library(RColorBrewer)
# Define the number of colors you want
nb.cols <- 10
mycolor1 <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
mycolors <- c("Black")
#read the data
CLL3S.plusrec <- read.csv("xxxx.CSV", as.is = T)
CLL3S.plusrec$risk_by_DS <- factor(CLL3S.plusrec$risk_by_DS, levels = c("low_risk", "high_risk", "Not filled"))
CLL3S.plusrec$`Enriched response phenotype` <- factor(CLL3S.plusrec$`Enriched response phenotype`, levels = c("Live cells","Pre-dead", "TN & PDB", "PDB & Lenalidomide", "TN & STSVEN & Live cells","Mixed"))
#here I reorder the dataframe and it looks good
#but the output ggplot changes the order of ID in the output graph
OR <- with(CLL3S.plusrec, CLL3S.plusrec[order(risk_by_DS),])
d <-ggplot(OR, aes(y = count,
axis1= Patient.ID,
axis2= risk_by_DS,
axis3 = `Cluster assigned consensus`,
axis4 = `Cluster assigned single drug`,
axis5 = `Enriched response phenotype`
)) +
scale_x_discrete(limits = c("Patient ID","Disease Risk", "Consensus cluster", "Single-drug cluster", "Enriched drug response by Phenoptype")) +
geom_alluvium(aes(fill=`Cluster assigned consensus`)) +
geom_stratum(width = 1/3, fill = c(mycolor1[1:69],mycolor1[1:3],mycolor1[1:8],mycolor1[1:8],mycolor1[1:6]), color = "red") +
#geom_stratum() +
geom_text(stat = "stratum", aes(label = after_stat(stratum)), size=3) +
theme(axis.title.x = element_text(size = 15, face="bold"))+
theme(axis.title.y = element_text(size = 15, face="bold"))+
theme(axis.text.x = element_text(size = 10, face="bold")) +
theme(axis.text.y = element_text(size = 10, face="bold")) +
labs(fill = "Consensus clusters")+
guides(fill=guide_legend(override.aes = list(color=mycolors)))+
ggtitle("Patient flow between the Consensus clusters and Single-drug treated clusters",
"3S stimulated patients")
print(d)
不确定这是否是您想要的,请尝试以这种方式格式化风险列:
library(tidyr)
library(ggplot2)
library(ggalluvial)
library(RColorBrewer)
# Define the number of colors you want
nb.cols <- 10
mycolor1 <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
mycolors <- c("Black")
#read the data
CLL3S.plusrec <- read.csv("test data.CSV", as.is = T)
CLL3S.plusrec$risk_by_DS <- factor(CLL3S.plusrec$risk_by_DS,
levels = c("high_risk","low_risk","Not filled"),ordered = T)
CLL3S.plusrec$Enriched.response.phenotype <- factor(CLL3S.plusrec$Enriched.response.phenotype, levels = c("Live cells","Pre-dead", "TN & PDB", "PDB & Lenalidomide", "TN & STSVEN & Live cells","Mixed"))
#here I reorder the dataframe and it looks good
#but the output ggplot changes the order of ID in the output graph
OR <- with(CLL3S.plusrec, CLL3S.plusrec[order(risk_by_DS),])
ggplot(OR, aes(y = count,
axis1= reorder(Patient.ID,risk_by_DS),
axis2= risk_by_DS,
axis3 = reorder(Cluster.assigned.consensus,risk_by_DS),
axis4 = reorder(Cluster.assigned.single.drug,risk_by_DS),
axis5 = reorder(Enriched.response.phenotype,risk_by_DS)
)) +
scale_x_discrete(limits = c("Patient ID","Disease Risk", "Consensus cluster", "Single-drug cluster", "Enriched drug response by Phenoptype")) +
geom_alluvium(aes(fill=Cluster.assigned.consensus)) +
geom_stratum(width = 1/3, fill = c(mycolor1[1:69],mycolor1[1:3],mycolor1[1:8],mycolor1[1:8],mycolor1[1:6]), color = "red") +
#geom_stratum() +
geom_text(stat = "stratum", aes(label = after_stat(stratum)), size=3) +
theme(axis.title.x = element_text(size = 15, face="bold"))+
theme(axis.title.y = element_text(size = 15, face="bold"))+
theme(axis.text.x = element_text(size = 10, face="bold")) +
theme(axis.text.y = element_text(size = 10, face="bold")) +
labs(fill = "Consensus clusters")+
guides(fill=guide_legend(override.aes = list(color=mycolors)))+
ggtitle("Patient flow between the Consensus clusters and Single-drug treated clusters",
"3S stimulated patients")
输出:
在我的 read.csv()
中,引号也消失了,点在变量中。这就是为什么您原来引用的变量现在有点。可能是阅读的问题。
更新:
#Update
OR <- with(CLL3S.plusrec, CLL3S.plusrec[order(risk_by_DS),])
OR <- OR[order(OR$risk_by_DS,OR$Patient.ID),]
OR$Patient.ID <- factor(OR$Patient.ID,levels = unique(OR$Patient.ID),ordered = T)
#Plot
ggplot(OR, aes(y = count,
axis1= reorder(Patient.ID,risk_by_DS),
axis2= risk_by_DS,
axis3 = reorder(Cluster.assigned.consensus,risk_by_DS),
axis4 = reorder(Cluster.assigned.single.drug,risk_by_DS),
axis5 = reorder(Enriched.response.phenotype,risk_by_DS)
)) +
scale_x_discrete(limits = c("Patient ID","Disease Risk", "Consensus cluster", "Single-drug cluster", "Enriched drug response by Phenoptype")) +
geom_alluvium(aes(fill=Cluster.assigned.consensus)) +
geom_stratum(width = 1/3, fill = c(mycolor1[1:69],mycolor1[1:3],mycolor1[1:8],mycolor1[1:8],mycolor1[1:6]), color = "red") +
#geom_stratum() +
geom_text(stat = "stratum", aes(label = after_stat(stratum)), size=3) +
theme(axis.title.x = element_text(size = 15, face="bold"))+
theme(axis.title.y = element_text(size = 15, face="bold"))+
theme(axis.text.x = element_text(size = 10, face="bold")) +
theme(axis.text.y = element_text(size = 10, face="bold")) +
labs(fill = "Consensus clusters")+
guides(fill=guide_legend(override.aes = list(color=mycolors)))+
ggtitle("Patient flow between the Consensus clusters and Single-drug treated clusters",
"3S stimulated patients")
输出: