facet_wrap 和 geom_bar 分组错误
Wrong grouping with facet_wrap and geom_bar
我的数据集如下所示:
Pair,Total readNUM,Uniquely mapped readNUM,Batch
CP3027_merged_trimmed,83750278,75237898,P160411
CP3028_merged_trimmed,94621036,86736510,P160411
CP3029_merged_trimmed,89051500,80999978,P160411
CP3030_merged_trimmed,100399436,89787060,P160411
CP3032_merged_trimmed,91591620,83432242,P160411
CP3036_merged_trimmed,81272998,73541686,P160411
CP3037_merged_trimmed,85289630,77513350,P160411
CP3058_merged_trimmed,85092730,78269348,P160411
CP3059_merged_trimmed,81696100,74981834,P160411
CP3060_merged_trimmed,88098518,79513000,P160411
CP3065_merged_trimmed,75924870,68052566,P160411
CP3066_merged_trimmed,89746438,79933004,P160411
CP3068_merged_trimmed,82041060,73183314,P160411
CP3074_merged_trimmed,82162078,74321554,P160411
CP3078_merged_trimmed,77500516,70835090,P160411
CP3185_merged_trimmed,99023950,90729150,P160411
CP3081_trimmed,88044290,76494036,P160475
CP3084_trimmed,88741718,79056712,P160475
CP3085_trimmed,81212190,71851198,P160475
CP3091_trimmed,82675822,72460250,P160475
CP3092_trimmed,96965168,86268756,P160475
CP3093_trimmed,68717952,60125000,P160475
CTL001_trimmed,74160410,63648530,P160475
CTL004_trimmed,100474172,85822840,P160475
CP1950_trimmed,162963640,136601638,SO41314
CP2160_trimmed,77991138,65584038,SO41314
CP2171_trimmed,89296686,75887918,SO41314
CP2204_trimmed,71691448,60311650,SO41314
CP2325_trimmed,95803886,80002310,SO41314
CP3133_trimmed,76307744,64964436,SO41314
CP3249_trimmed,78904062,67382812,SO41314
CP3541_trimmed,67020194,56703314,SO41314
CP0678_trimmed,19986550,18575050,SBSQ8092_1
CP2032_trimmed,21722580,20138926,SBSQ8092_1
CP2164_trimmed,23275750,21359668,SBSQ8092_1
CP2544_trimmed,22376982,20652410,SBSQ8092_1
CP2695_trimmed,22264402,20631472,SBSQ8092_1
CP3127_trimmed,33050232,29990758,SBSQ8092_2
CP3141_trimmed,24164170,21655048,SBSQ8092_2
CP2997_trimmed,96381034,91772686,NG-10002
L0218_001_trimmed,257181636,81639268,x
L0218_002_trimmed,263258410,31357342,x
L0218_003_trimmed,183642720,30657224,x
对于每个样本(Pair = col 1),我将读取总数(col 2)绘制在映射读取(col 3)上,并根据实验(col 4)对条形进行着色。
结果如图1所示:
样本未排序,难以比较属于同一实验(批次)的样本。
为了使情节更具可读性,我使用 facet_wrap
.
通过实验将它们分组
生成的图(图 2)具有正确的颜色,但样本未按 facet_wrap
(或 facet_grid
)放置在正确的组中。
在不同的 post ("") 中,建议避免使用 $
来引用 aes()
中的变量。
然后我修改了代码(BamSummaryRaw$Pair -> Pair
和 BamSummaryRaw$Batch -> Batch
),但问题仍然存在。
这是我使用的代码:
library(ggplot2);library(cowplot);library(grid)
library(gridExtra);library(reshape2)
BamSummaryRaw <- read.table('BamSummary_B38.csv',header=T,sep=',',quote='',check.names=F,stringsAsFactors=FALSE)
# convert # in millions
totReadsMill <- BamSummaryRaw$`Total readNUM`/1000000
totMappedMill <- BamSummaryRaw$`Uniquely mapped paired readNUM`/1000000
experiments <- BamSummaryRaw$Batch
# plots
gg.MAIN <- ggplot(BamSummaryRaw,aes(x=Pair,fill=experiments))
gg.reads <- gg.MAIN + geom_bar(aes(y=totReadsMill),fill='white',colour='black',stat='identity',width = 0.5,show.legend = T) +
geom_bar(aes(y=totMappedMill),colour='black',stat='identity',width = 0.5,show.legend = T) +
theme(axis.text.x = element_text(angle = 20, hjust = 1,size=5)) +
labs(x='samples',y='# of reads [10^6]') +
ylim(0,200)
#prova <- gg.reads + facet_grid(~BamSummaryRaw$Batch,scales='free_x', labeller = label_wrap_gen(multi_line=FALSE))
prova <- gg.reads + facet_wrap(~Batch,scales='free_x',nrow=1)
如果我没理解错的话,您只需要订购 x 轴。您可以使用 scale_x_discrete()
执行此操作。只需将样本向量传递给函数,x 轴就会根据它进行排序。
# Using OPs data
# Rename for easier manipulation
colnames(BamSummaryRaw) <- c("pair", "total", "mapped", "batch")
# Plot
library(ggplot2)
ggplot(BamSummaryRaw, aes(pair, fill = batch)) +
# Number of total reads (M)
geom_bar(aes(y = total / 1e6), fill = "white", color = "black",
stat = "identity", position = "dodge", width = 0.5) +
# Number of mapped reads (M)
geom_bar(aes(y = mapped / 1e6), color = "black", stat = "identity", position = "dodge", width = 0.5) +
# Order x-axis
scale_x_discrete(limits = BamSummaryRaw$pair) +
# Add labels
labs(title = "Number of reads",
subtile = "Total and Mapped / Grouped per batch",
x = NULL,
y = "Number of reads, M",
fill = "Batch") +
# Nicer theme
theme_classic() +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "bottom")
表示 NGS 统计数据的另一种方法是绘制映射读取的百分比:
ggplot(BamSummaryRaw, aes(pair, fill = batch)) +
geom_bar(aes(y = mapped * 100 / total), color = "black", stat = "identity", position = "dodge", width = 0.5) +
scale_x_discrete(limits = BamSummaryRaw$pair) +
labs(title = "Percentage of mapped reads",
subtile = "Total and Mapped / Grouped per batch",
x = NULL,
y = "Mapped, %",
fill = "Batch") +
theme_minimal() +
scale_y_continuous(limits = c(0, 100)) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "bottom")
我的数据集如下所示:
Pair,Total readNUM,Uniquely mapped readNUM,Batch
CP3027_merged_trimmed,83750278,75237898,P160411
CP3028_merged_trimmed,94621036,86736510,P160411
CP3029_merged_trimmed,89051500,80999978,P160411
CP3030_merged_trimmed,100399436,89787060,P160411
CP3032_merged_trimmed,91591620,83432242,P160411
CP3036_merged_trimmed,81272998,73541686,P160411
CP3037_merged_trimmed,85289630,77513350,P160411
CP3058_merged_trimmed,85092730,78269348,P160411
CP3059_merged_trimmed,81696100,74981834,P160411
CP3060_merged_trimmed,88098518,79513000,P160411
CP3065_merged_trimmed,75924870,68052566,P160411
CP3066_merged_trimmed,89746438,79933004,P160411
CP3068_merged_trimmed,82041060,73183314,P160411
CP3074_merged_trimmed,82162078,74321554,P160411
CP3078_merged_trimmed,77500516,70835090,P160411
CP3185_merged_trimmed,99023950,90729150,P160411
CP3081_trimmed,88044290,76494036,P160475
CP3084_trimmed,88741718,79056712,P160475
CP3085_trimmed,81212190,71851198,P160475
CP3091_trimmed,82675822,72460250,P160475
CP3092_trimmed,96965168,86268756,P160475
CP3093_trimmed,68717952,60125000,P160475
CTL001_trimmed,74160410,63648530,P160475
CTL004_trimmed,100474172,85822840,P160475
CP1950_trimmed,162963640,136601638,SO41314
CP2160_trimmed,77991138,65584038,SO41314
CP2171_trimmed,89296686,75887918,SO41314
CP2204_trimmed,71691448,60311650,SO41314
CP2325_trimmed,95803886,80002310,SO41314
CP3133_trimmed,76307744,64964436,SO41314
CP3249_trimmed,78904062,67382812,SO41314
CP3541_trimmed,67020194,56703314,SO41314
CP0678_trimmed,19986550,18575050,SBSQ8092_1
CP2032_trimmed,21722580,20138926,SBSQ8092_1
CP2164_trimmed,23275750,21359668,SBSQ8092_1
CP2544_trimmed,22376982,20652410,SBSQ8092_1
CP2695_trimmed,22264402,20631472,SBSQ8092_1
CP3127_trimmed,33050232,29990758,SBSQ8092_2
CP3141_trimmed,24164170,21655048,SBSQ8092_2
CP2997_trimmed,96381034,91772686,NG-10002
L0218_001_trimmed,257181636,81639268,x
L0218_002_trimmed,263258410,31357342,x
L0218_003_trimmed,183642720,30657224,x
对于每个样本(Pair = col 1),我将读取总数(col 2)绘制在映射读取(col 3)上,并根据实验(col 4)对条形进行着色。
结果如图1所示:
样本未排序,难以比较属于同一实验(批次)的样本。
为了使情节更具可读性,我使用 facet_wrap
.
生成的图(图 2)具有正确的颜色,但样本未按 facet_wrap
(或 facet_grid
)放置在正确的组中。
在不同的 post ("$
来引用 aes()
中的变量。
然后我修改了代码(BamSummaryRaw$Pair -> Pair
和 BamSummaryRaw$Batch -> Batch
),但问题仍然存在。
这是我使用的代码:
library(ggplot2);library(cowplot);library(grid)
library(gridExtra);library(reshape2)
BamSummaryRaw <- read.table('BamSummary_B38.csv',header=T,sep=',',quote='',check.names=F,stringsAsFactors=FALSE)
# convert # in millions
totReadsMill <- BamSummaryRaw$`Total readNUM`/1000000
totMappedMill <- BamSummaryRaw$`Uniquely mapped paired readNUM`/1000000
experiments <- BamSummaryRaw$Batch
# plots
gg.MAIN <- ggplot(BamSummaryRaw,aes(x=Pair,fill=experiments))
gg.reads <- gg.MAIN + geom_bar(aes(y=totReadsMill),fill='white',colour='black',stat='identity',width = 0.5,show.legend = T) +
geom_bar(aes(y=totMappedMill),colour='black',stat='identity',width = 0.5,show.legend = T) +
theme(axis.text.x = element_text(angle = 20, hjust = 1,size=5)) +
labs(x='samples',y='# of reads [10^6]') +
ylim(0,200)
#prova <- gg.reads + facet_grid(~BamSummaryRaw$Batch,scales='free_x', labeller = label_wrap_gen(multi_line=FALSE))
prova <- gg.reads + facet_wrap(~Batch,scales='free_x',nrow=1)
如果我没理解错的话,您只需要订购 x 轴。您可以使用 scale_x_discrete()
执行此操作。只需将样本向量传递给函数,x 轴就会根据它进行排序。
# Using OPs data
# Rename for easier manipulation
colnames(BamSummaryRaw) <- c("pair", "total", "mapped", "batch")
# Plot
library(ggplot2)
ggplot(BamSummaryRaw, aes(pair, fill = batch)) +
# Number of total reads (M)
geom_bar(aes(y = total / 1e6), fill = "white", color = "black",
stat = "identity", position = "dodge", width = 0.5) +
# Number of mapped reads (M)
geom_bar(aes(y = mapped / 1e6), color = "black", stat = "identity", position = "dodge", width = 0.5) +
# Order x-axis
scale_x_discrete(limits = BamSummaryRaw$pair) +
# Add labels
labs(title = "Number of reads",
subtile = "Total and Mapped / Grouped per batch",
x = NULL,
y = "Number of reads, M",
fill = "Batch") +
# Nicer theme
theme_classic() +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "bottom")
表示 NGS 统计数据的另一种方法是绘制映射读取的百分比:
ggplot(BamSummaryRaw, aes(pair, fill = batch)) +
geom_bar(aes(y = mapped * 100 / total), color = "black", stat = "identity", position = "dodge", width = 0.5) +
scale_x_discrete(limits = BamSummaryRaw$pair) +
labs(title = "Percentage of mapped reads",
subtile = "Total and Mapped / Grouped per batch",
x = NULL,
y = "Mapped, %",
fill = "Batch") +
theme_minimal() +
scale_y_continuous(limits = c(0, 100)) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "bottom")