如何在使用多变量对象或两个单独对象的组合 ggplot2 图形之上添加额外的统计信息
How to add additional statistics on top of a combined ggplot2 graph that uses a multi-variable object or two separate objects
我有一个 ggplot2 图,它将两个独立的小提琴图绘制到一个图上,由这个例子给出(感谢@jared_mamrot 提供):
library(tidyverse)
data("Puromycin")
head(Puromycin)
dat1 <- Puromycin %>%
filter(state == "treated")
dat2 <- Puromycin %>%
filter(state == "untreated")
mycp <- ggplot() +
geom_violin(data = dat1, aes(x= state, y = conc, colour = "Puromycin (Treatment1)")) +
geom_violin(data = dat2, aes(x= state, y = conc, colour = "Puromycin (Treatment2)"))
mycp
我想添加箱线图或其他汇总统计数据,例如 http://www.sthda.com/english/wiki/ggplot2-violin-plot-quick-start-guide-r-software-and-data-visualization and https://www.maths.usyd.edu.au/u/UG/SM/STAT3022/r/current/Misc/data-visualization-2.1.pdf 中的那些,但尝试在这些地方建议的代码不会改变原始图。
mycp + geom_boxplot()
感谢阅读,希望这是有道理的!
更新 ================================== ========================================
所以上面的例子并不能完全反映我现在意识到的情况。本质上,我想将统计信息应用到一个组合的 ggplot2 图上,该图使用两个单独的对象作为其变量(此处为 TNBC_List1
和 ER_List1
)这是一个示例(抱歉,对于较长的示例,我承认我在创建一个更简单的可重现示例时遇到了麻烦,而且我对一般编码还很陌生):
# Libraries -------------------------------------------------------------
library(BiocManager)
library(GEOquery)
library(plyr)
library(dplyr)
library(Matrix)
library(devtools)
library(Seurat)
library(ggplot2)
library(cowplot)
library(SAVER)
library(metap)
library(multtest)
# Loading Raw Data into RStudio ----------------------------------
filePaths = getGEOSuppFiles("GSE75688")
tarF <- list.files(path = "./GSE75688/", pattern = "*.tar", full.names = TRUE)
tarF
untar(tarF, exdir = "./GSE75688/")
gzipF <- list.files(path = "./GSE75688/", pattern = "*.gz", full.names = TRUE)
ldply(.data = gzipF, .fun = gunzip)
list.files(path = "./GSE75688/", full.names = TRUE)
list.files(path = "./GSE75688/", pattern = "\.txt$",full.names = TRUE)
# full matrix ----------------------------------------------------------
fullmat <- read.table(file = './GSE75688//GSE75688_GEO_processed_Breast_Cancer_raw_TPM_matrix.txt',
sep = '\t', header = FALSE, stringsAsFactors = FALSE)
fullmat <- data.frame(fullmat[,-1], row.names=fullmat[,1])
colnames(fullmat) <- as.character(fullmat[1, ])
fullmat <- fullmat[-1,]
fullmat <- as.matrix(fullmat)
# BC01 ER+ matrix -----------------------------------------------------------
BC01mat <- grep(pattern =c("^BC01") , x = colnames(fullmat), value = TRUE)
BC01mat = fullmat[,grepl(c("^BC01"),colnames(fullmat))]
BC01mat = BC01mat[,!grepl("^BC01_Pooled",colnames(BC01mat))]
BC01mat = BC01mat[,!grepl("^BC01_Tumor",colnames(BC01mat))]
BC01pdat <- data.frame("samples" = colnames(BC01mat), "treatment" = "ER+")
# BC07 TNBC matrix -----------------------------------------------------------
BC07mat <- grep(pattern =c("^BC07") , x = colnames(fullmat), value = TRUE)
BC07mat <- fullmat[,grepl(c("^BC07"),colnames(fullmat))]
BC07mat <- BC07mat[,!grepl("^BC07_Pooled",colnames(BC07mat))]
BC07mat <- BC07mat[,!grepl("^BC07_Tumor",colnames(BC07mat))]
BC07mat <- BC07mat[,!grepl("^BC07LN_Pooled",colnames(BC07mat))]
BC07mat <- BC07mat[,!grepl("^BC07LN",colnames(BC07mat))]
BC07pdat <- data.frame("samples" = colnames(BC07mat), "treatment" = "TNBC")
#merge samples together =========================================================================
joined <- cbind(BC01mat, BC07mat)
pdat_joined <- rbind(BC01pdat, BC07pdat)
#fdat ___________________________________________________________________________________
fdat <- grep(pattern =c("gene_name|gene_type") , x = colnames(fullmat), value = TRUE)
fdat <- fullmat[,grepl(c("gene_name|gene_type"),colnames(fullmat))]
fdat <- as.data.frame(fdat, stringsAsFactors = FALSE)
fdat <- setNames(cbind(rownames(fdat), fdat, row.names = NULL),
c("ensembl_id", "gene_short_name", "gene_type"))
rownames(pdat_joined) <- pdat_joined$samples
rownames(fdat) = make.names(fdat$gene_short_name, unique=TRUE)
rownames(joined) <- rownames(fdat)
# Create Seurat Object __________________________________________________________________
joined <- as.data.frame(joined)
sobj_pre <- CreateSeuratObject(counts = joined)
sobj_pre <-AddMetaData(sobj_pre,metadata=pdat_joined)
head(sobj_pre@meta.data)
#gene name input
sobj_pre[["RNA"]]@meta.features<-fdat
head(sobj_pre[["RNA"]]@meta.features)
#Downstream analysis -------------------------------------------------------
sobj <- sobj_pre
sobj <- FindVariableFeatures(object = sobj, mean.function = ExpMean, dispersion.function = LogVMR, nfeatures = 2000)
sobj <- ScaleData(object = sobj, features = rownames(sobj), block.size = 2000)
sobj <- RunPCA(sobj, npcs = 100, ndims.print = 1:10, nfeatures.print = 5)
sobj <- FindNeighbors(sobj, reduction = "pca", dims = 1:4, nn.eps = 0.5)
sobj <- FindClusters(sobj, resolution = 1, n.start = 10)
umap.method = 'umap-learn'
metric = 'correlation'
sobj <- RunUMAP(object = sobj, reduction = "pca", dims = 1:4,min.dist = 0.5, seed.use = 123)
p0 <- DimPlot(sobj, reduction = "umap", pt.size = 0.1,label=TRUE) + ggtitle(label = "Title")
p0
# ER+ score computation -------------------
ERlist <- list(c("CPB1", "RP11-53O19.1", "TFF1", "MB", "ANKRD30B",
"LINC00173", "DSCAM-AS1", "IGHG1", "SERPINA5", "ESR1",
"ILRP2", "IGLC3", "CA12", "RP11-64B16.2", "SLC7A2",
"AFF3", "IGFBP4", "GSTM3", "ANKRD30A", "GSTT1", "GSTM1",
"AC026806.2", "C19ORF33", "STC2", "HSPB8", "RPL29P11",
"FBP1", "AGR3", "TCEAL1", "CYP4B1", "SYT1", "COX6C",
"MT1E", "SYTL2", "THSD4", "IFI6", "K1AA1467", "SLC39A6",
"ABCD3", "SERPINA3", "DEGS2", "ERLIN2", "HEBP1", "BCL2",
"TCEAL3", "PPT1", "SLC7A8", "RP11-96D1.10", "H4C8",
"PI15", "PLPP5", "PLAAT4", "GALNT6", "IL6ST", "MYC",
"BST2", "RP11-658F2.8", "MRPS30", "MAPT", "AMFR", "TCEAL4",
"MED13L", "ISG15", "NDUFC2", "TIMP3", "RP13-39P12.3", "PARD68"))
sobj <- AddModuleScore(object = sobj, features = ERlist, name = "ER_List")
#TNBC computation -------------------
tnbclist <- list(c("FABP7", "TSPAN8", "CYP4Z1", "HOXA10", "CLDN1",
"TMSB15A", "C10ORF10", "TRPV6", "HOXA9", "ATP13A4",
"GLYATL2", "RP11-48O20.4", "DYRK3", "MUCL1", "ID4", "FGFR2",
"SHOX2", "Z83851.1", "CD82", "COL6A1", "KRT23", "GCHFR",
"PRICKLE1", "GCNT2", "KHDRBS3", "SIPA1L2", "LMO4", "TFAP2B",
"SLC43A3", "FURIN", "ELF5", "C1ORF116", "ADD3", "EFNA3",
"EFCAB4A", "LTF", "LRRC31", "ARL4C", "GPNMB", "VIM",
"SDR16C5", "RHOV", "PXDC1", "MALL", "YAP1", "A2ML1",
"RP1-257A7.5", "RP11-353N4.6", "ZBTB18", "CTD-2314B22.3", "GALNT3",
"BCL11A", "CXADR", "SSFA2", "ADM", "GUCY1A3", "GSTP1",
"ADCK3", "SLC25A37", "SFRP1", "PRNP", "DEGS1", "RP11-110G21.2",
"AL589743.1", "ATF3", "SIVA1", "TACSTD2", "HEBP2"))
sobj <- AddModuleScore(object = sobj, features = tnbclist, name = "TNBC_List")
#ggplot2 issue ----------------------------------------------------------------------------
sobj[["ClusterName"]] <- Idents(object = sobj)
sobjlists <- FetchData(object = sobj, vars = c("ER_List1", "TNBC_List1", "ClusterName"))
library(reshape2)
melt(sobjlists, id.vars = c("ER_List1", "TNBC_List1", "ClusterName"))
p <- ggplot() + geom_violin(data = sobjlists, aes(x= ClusterName, y = ER_List1, fill = ER_List1, colour = "ER+ Signature"))+ geom_violin(data = sobjlists, aes(x= ClusterName, y = TNBC_List1, fill = TNBC_List1, colour="TNBC Signature"))
分机 ================================== ====================================
如果您想使用两个对象(例如 sobjlists1
和 sobjlists2
)而不是我的示例显示的内容(两个变量但一个对象),rbind
两者然后按照@StupidWolf 所说的去做
library(reshape2)
sobjlists1= melt(sobjlists1, id.vars = "treatment")
sobjlists2= melt(sobjlists2, id.vars = "treatment")
combosobjlists <- rbind(sobjlists1, sobjlists2)
然后使用 combosobjlists
:
继续他们的代码
ggplot(combosobjlists,aes(x= ClusterName, y = value)) +
geom_violin(aes(fill=variable)) +
geom_boxplot(aes(col=variable),
width = 0.2,position=position_dodge(0.9))
希望这篇文章对您有所帮助!
为了能够在不指定数据的情况下使用绘图中的数据(如 geom_boxplot()
),您需要将数据放入 ggplot()
函数调用中。那么下面的函数就可以继承了。
您也不需要为每种颜色绘制额外的小提琴图
library(tidyverse)
data("Puromycin")
head(Puromycin)
mycp <- ggplot(Puromycin,aes(x= state, y = conc, colour=state))+geom_violin()
mycp + geom_boxplot(width=0.1, color= "black") +
scale_color_discrete(
labels= c("Puromycin (Treatment1)","Puromycin (Treatment2)")
)
结果:
尽量只包含最少的代码来说明您的问题。就像在您的示例中一样,无需从整个 seurat
处理开始。您只需为 data.frame 提供 dput() ,我们就可以看到 ggplot2 的问题,请参阅 .
创建一些示例数据:
library(Seurat)
library(ggplot2)
genes = c(unlist(c(ERlist,tnbclist)))
mat = matrix(rnbinom(500*length(genes),mu=500,size=1),ncol=500)
rownames(mat) = genes
colnames(mat) = paste0("cell",1:500)
sobj = CreateSeuratObject(mat)
sobj = NormalizeData(sobj)
添加一些虚构的集群:
sobj$ClusterName = factor(sample(0:1,ncol(sobj),replace=TRUE))
添加您的模块分数:
sobj = AddModuleScore(object = sobj, features = tnbclist,
name = "TNBC_List",ctrl=5)
sobj = AddModuleScore(object = sobj, features = ERlist,
name = "ER_List",ctrl=5)
我们得到了数据,你需要做的是正确地旋转它。用ggplot2画两次会导致各种问题:
sobjlists = FetchData(object = sobj, vars = c("ER_List1", "TNBC_List1", "ClusterName"))
head(sobjlists)
ER_List1 TNBC_List1 ClusterName
cell1 -0.05391108 -0.008736057 1
cell2 0.07074816 -0.039064126 1
cell3 0.08688374 -0.066967324 1
cell4 -0.12503649 0.120665057 0
cell5 0.05356685 -0.072293651 0
cell6 -0.20053804 0.178977042 1
应该是这样的:
library(reshape2)
sobjlists = melt(sobjlists, id.vars = "ClusterName")
ClusterName variable value
1 1 ER_List1 -0.05391108
2 1 ER_List1 0.07074816
3 1 ER_List1 0.08688374
4 0 ER_List1 -0.12503649
5 0 ER_List1 0.05356685
6 1 ER_List1 -0.20053804
现在我们绘制:
ggplot(sobjlists,aes(x= ClusterName, y = value)) +
geom_violin(aes(fill=variable)) +
geom_boxplot(aes(col=variable),
width = 0.2,position=position_dodge(0.9))
我有一个 ggplot2 图,它将两个独立的小提琴图绘制到一个图上,由这个例子给出(感谢@jared_mamrot 提供):
library(tidyverse)
data("Puromycin")
head(Puromycin)
dat1 <- Puromycin %>%
filter(state == "treated")
dat2 <- Puromycin %>%
filter(state == "untreated")
mycp <- ggplot() +
geom_violin(data = dat1, aes(x= state, y = conc, colour = "Puromycin (Treatment1)")) +
geom_violin(data = dat2, aes(x= state, y = conc, colour = "Puromycin (Treatment2)"))
mycp
我想添加箱线图或其他汇总统计数据,例如 http://www.sthda.com/english/wiki/ggplot2-violin-plot-quick-start-guide-r-software-and-data-visualization and https://www.maths.usyd.edu.au/u/UG/SM/STAT3022/r/current/Misc/data-visualization-2.1.pdf 中的那些,但尝试在这些地方建议的代码不会改变原始图。
mycp + geom_boxplot()
感谢阅读,希望这是有道理的!
更新 ================================== ========================================
所以上面的例子并不能完全反映我现在意识到的情况。本质上,我想将统计信息应用到一个组合的 ggplot2 图上,该图使用两个单独的对象作为其变量(此处为 TNBC_List1
和 ER_List1
)这是一个示例(抱歉,对于较长的示例,我承认我在创建一个更简单的可重现示例时遇到了麻烦,而且我对一般编码还很陌生):
# Libraries -------------------------------------------------------------
library(BiocManager)
library(GEOquery)
library(plyr)
library(dplyr)
library(Matrix)
library(devtools)
library(Seurat)
library(ggplot2)
library(cowplot)
library(SAVER)
library(metap)
library(multtest)
# Loading Raw Data into RStudio ----------------------------------
filePaths = getGEOSuppFiles("GSE75688")
tarF <- list.files(path = "./GSE75688/", pattern = "*.tar", full.names = TRUE)
tarF
untar(tarF, exdir = "./GSE75688/")
gzipF <- list.files(path = "./GSE75688/", pattern = "*.gz", full.names = TRUE)
ldply(.data = gzipF, .fun = gunzip)
list.files(path = "./GSE75688/", full.names = TRUE)
list.files(path = "./GSE75688/", pattern = "\.txt$",full.names = TRUE)
# full matrix ----------------------------------------------------------
fullmat <- read.table(file = './GSE75688//GSE75688_GEO_processed_Breast_Cancer_raw_TPM_matrix.txt',
sep = '\t', header = FALSE, stringsAsFactors = FALSE)
fullmat <- data.frame(fullmat[,-1], row.names=fullmat[,1])
colnames(fullmat) <- as.character(fullmat[1, ])
fullmat <- fullmat[-1,]
fullmat <- as.matrix(fullmat)
# BC01 ER+ matrix -----------------------------------------------------------
BC01mat <- grep(pattern =c("^BC01") , x = colnames(fullmat), value = TRUE)
BC01mat = fullmat[,grepl(c("^BC01"),colnames(fullmat))]
BC01mat = BC01mat[,!grepl("^BC01_Pooled",colnames(BC01mat))]
BC01mat = BC01mat[,!grepl("^BC01_Tumor",colnames(BC01mat))]
BC01pdat <- data.frame("samples" = colnames(BC01mat), "treatment" = "ER+")
# BC07 TNBC matrix -----------------------------------------------------------
BC07mat <- grep(pattern =c("^BC07") , x = colnames(fullmat), value = TRUE)
BC07mat <- fullmat[,grepl(c("^BC07"),colnames(fullmat))]
BC07mat <- BC07mat[,!grepl("^BC07_Pooled",colnames(BC07mat))]
BC07mat <- BC07mat[,!grepl("^BC07_Tumor",colnames(BC07mat))]
BC07mat <- BC07mat[,!grepl("^BC07LN_Pooled",colnames(BC07mat))]
BC07mat <- BC07mat[,!grepl("^BC07LN",colnames(BC07mat))]
BC07pdat <- data.frame("samples" = colnames(BC07mat), "treatment" = "TNBC")
#merge samples together =========================================================================
joined <- cbind(BC01mat, BC07mat)
pdat_joined <- rbind(BC01pdat, BC07pdat)
#fdat ___________________________________________________________________________________
fdat <- grep(pattern =c("gene_name|gene_type") , x = colnames(fullmat), value = TRUE)
fdat <- fullmat[,grepl(c("gene_name|gene_type"),colnames(fullmat))]
fdat <- as.data.frame(fdat, stringsAsFactors = FALSE)
fdat <- setNames(cbind(rownames(fdat), fdat, row.names = NULL),
c("ensembl_id", "gene_short_name", "gene_type"))
rownames(pdat_joined) <- pdat_joined$samples
rownames(fdat) = make.names(fdat$gene_short_name, unique=TRUE)
rownames(joined) <- rownames(fdat)
# Create Seurat Object __________________________________________________________________
joined <- as.data.frame(joined)
sobj_pre <- CreateSeuratObject(counts = joined)
sobj_pre <-AddMetaData(sobj_pre,metadata=pdat_joined)
head(sobj_pre@meta.data)
#gene name input
sobj_pre[["RNA"]]@meta.features<-fdat
head(sobj_pre[["RNA"]]@meta.features)
#Downstream analysis -------------------------------------------------------
sobj <- sobj_pre
sobj <- FindVariableFeatures(object = sobj, mean.function = ExpMean, dispersion.function = LogVMR, nfeatures = 2000)
sobj <- ScaleData(object = sobj, features = rownames(sobj), block.size = 2000)
sobj <- RunPCA(sobj, npcs = 100, ndims.print = 1:10, nfeatures.print = 5)
sobj <- FindNeighbors(sobj, reduction = "pca", dims = 1:4, nn.eps = 0.5)
sobj <- FindClusters(sobj, resolution = 1, n.start = 10)
umap.method = 'umap-learn'
metric = 'correlation'
sobj <- RunUMAP(object = sobj, reduction = "pca", dims = 1:4,min.dist = 0.5, seed.use = 123)
p0 <- DimPlot(sobj, reduction = "umap", pt.size = 0.1,label=TRUE) + ggtitle(label = "Title")
p0
# ER+ score computation -------------------
ERlist <- list(c("CPB1", "RP11-53O19.1", "TFF1", "MB", "ANKRD30B",
"LINC00173", "DSCAM-AS1", "IGHG1", "SERPINA5", "ESR1",
"ILRP2", "IGLC3", "CA12", "RP11-64B16.2", "SLC7A2",
"AFF3", "IGFBP4", "GSTM3", "ANKRD30A", "GSTT1", "GSTM1",
"AC026806.2", "C19ORF33", "STC2", "HSPB8", "RPL29P11",
"FBP1", "AGR3", "TCEAL1", "CYP4B1", "SYT1", "COX6C",
"MT1E", "SYTL2", "THSD4", "IFI6", "K1AA1467", "SLC39A6",
"ABCD3", "SERPINA3", "DEGS2", "ERLIN2", "HEBP1", "BCL2",
"TCEAL3", "PPT1", "SLC7A8", "RP11-96D1.10", "H4C8",
"PI15", "PLPP5", "PLAAT4", "GALNT6", "IL6ST", "MYC",
"BST2", "RP11-658F2.8", "MRPS30", "MAPT", "AMFR", "TCEAL4",
"MED13L", "ISG15", "NDUFC2", "TIMP3", "RP13-39P12.3", "PARD68"))
sobj <- AddModuleScore(object = sobj, features = ERlist, name = "ER_List")
#TNBC computation -------------------
tnbclist <- list(c("FABP7", "TSPAN8", "CYP4Z1", "HOXA10", "CLDN1",
"TMSB15A", "C10ORF10", "TRPV6", "HOXA9", "ATP13A4",
"GLYATL2", "RP11-48O20.4", "DYRK3", "MUCL1", "ID4", "FGFR2",
"SHOX2", "Z83851.1", "CD82", "COL6A1", "KRT23", "GCHFR",
"PRICKLE1", "GCNT2", "KHDRBS3", "SIPA1L2", "LMO4", "TFAP2B",
"SLC43A3", "FURIN", "ELF5", "C1ORF116", "ADD3", "EFNA3",
"EFCAB4A", "LTF", "LRRC31", "ARL4C", "GPNMB", "VIM",
"SDR16C5", "RHOV", "PXDC1", "MALL", "YAP1", "A2ML1",
"RP1-257A7.5", "RP11-353N4.6", "ZBTB18", "CTD-2314B22.3", "GALNT3",
"BCL11A", "CXADR", "SSFA2", "ADM", "GUCY1A3", "GSTP1",
"ADCK3", "SLC25A37", "SFRP1", "PRNP", "DEGS1", "RP11-110G21.2",
"AL589743.1", "ATF3", "SIVA1", "TACSTD2", "HEBP2"))
sobj <- AddModuleScore(object = sobj, features = tnbclist, name = "TNBC_List")
#ggplot2 issue ----------------------------------------------------------------------------
sobj[["ClusterName"]] <- Idents(object = sobj)
sobjlists <- FetchData(object = sobj, vars = c("ER_List1", "TNBC_List1", "ClusterName"))
library(reshape2)
melt(sobjlists, id.vars = c("ER_List1", "TNBC_List1", "ClusterName"))
p <- ggplot() + geom_violin(data = sobjlists, aes(x= ClusterName, y = ER_List1, fill = ER_List1, colour = "ER+ Signature"))+ geom_violin(data = sobjlists, aes(x= ClusterName, y = TNBC_List1, fill = TNBC_List1, colour="TNBC Signature"))
分机 ================================== ====================================
如果您想使用两个对象(例如 sobjlists1
和 sobjlists2
)而不是我的示例显示的内容(两个变量但一个对象),rbind
两者然后按照@StupidWolf 所说的去做
library(reshape2)
sobjlists1= melt(sobjlists1, id.vars = "treatment")
sobjlists2= melt(sobjlists2, id.vars = "treatment")
combosobjlists <- rbind(sobjlists1, sobjlists2)
然后使用 combosobjlists
:
ggplot(combosobjlists,aes(x= ClusterName, y = value)) +
geom_violin(aes(fill=variable)) +
geom_boxplot(aes(col=variable),
width = 0.2,position=position_dodge(0.9))
希望这篇文章对您有所帮助!
为了能够在不指定数据的情况下使用绘图中的数据(如 geom_boxplot()
),您需要将数据放入 ggplot()
函数调用中。那么下面的函数就可以继承了。
您也不需要为每种颜色绘制额外的小提琴图
library(tidyverse)
data("Puromycin")
head(Puromycin)
mycp <- ggplot(Puromycin,aes(x= state, y = conc, colour=state))+geom_violin()
mycp + geom_boxplot(width=0.1, color= "black") +
scale_color_discrete(
labels= c("Puromycin (Treatment1)","Puromycin (Treatment2)")
)
结果:
尽量只包含最少的代码来说明您的问题。就像在您的示例中一样,无需从整个 seurat
处理开始。您只需为 data.frame 提供 dput() ,我们就可以看到 ggplot2 的问题,请参阅
创建一些示例数据:
library(Seurat)
library(ggplot2)
genes = c(unlist(c(ERlist,tnbclist)))
mat = matrix(rnbinom(500*length(genes),mu=500,size=1),ncol=500)
rownames(mat) = genes
colnames(mat) = paste0("cell",1:500)
sobj = CreateSeuratObject(mat)
sobj = NormalizeData(sobj)
添加一些虚构的集群:
sobj$ClusterName = factor(sample(0:1,ncol(sobj),replace=TRUE))
添加您的模块分数:
sobj = AddModuleScore(object = sobj, features = tnbclist,
name = "TNBC_List",ctrl=5)
sobj = AddModuleScore(object = sobj, features = ERlist,
name = "ER_List",ctrl=5)
我们得到了数据,你需要做的是正确地旋转它。用ggplot2画两次会导致各种问题:
sobjlists = FetchData(object = sobj, vars = c("ER_List1", "TNBC_List1", "ClusterName"))
head(sobjlists)
ER_List1 TNBC_List1 ClusterName
cell1 -0.05391108 -0.008736057 1
cell2 0.07074816 -0.039064126 1
cell3 0.08688374 -0.066967324 1
cell4 -0.12503649 0.120665057 0
cell5 0.05356685 -0.072293651 0
cell6 -0.20053804 0.178977042 1
应该是这样的:
library(reshape2)
sobjlists = melt(sobjlists, id.vars = "ClusterName")
ClusterName variable value
1 1 ER_List1 -0.05391108
2 1 ER_List1 0.07074816
3 1 ER_List1 0.08688374
4 0 ER_List1 -0.12503649
5 0 ER_List1 0.05356685
6 1 ER_List1 -0.20053804
现在我们绘制:
ggplot(sobjlists,aes(x= ClusterName, y = value)) +
geom_violin(aes(fill=variable)) +
geom_boxplot(aes(col=variable),
width = 0.2,position=position_dodge(0.9))