在 ggplot2 构面图中订购热图行

Order heatmap rows in ggplot2 facet plot

我在 ggplot2 中遇到多面热图渲染问题。这个想法是我有几个元素(这些是现实生活中的基因)和几个实验(下面示例中的 F1 和 F2)。使用 F1 实验,我能够根据他们的平均表达(高、...、中等、...、低)创建 class of elements/genes。在通过下面的示例生成的热图中,我想根据 F1 中的平均表达式值对每个 class(01、02、03、04)中的每个元素进行排序。不幸的是,元素按字母顺序出现。我会很高兴得到一些提示...... 最佳

library(ggplot2)
library(reshape2)

set.seed(123)

# let's create a fake dataset
nb.experiment <- 4
n.row <- 200
n.col <- 5
d <- matrix(round(runif(n.row * n.col),2), nc=n.col) 
colnames(d) <- sprintf("%02d", 1:5)

# These strings will be the row names of each heatmap
# in the subsequent facet plot
elements <- sample(replicate(n.row/2, rawToChar(as.raw(sample(65:90, 6, replace=T)))))


# let's create a data.frame d
d <- data.frame(d, 
                experiment = sort(rep(c("F1","F2"), n.row/2)),
                elements= elements)

# For elements related to experiment F1 
# we artificially produce a gradient of values that will
# create elements with increasing row means
d[d$experiment =="F1",1:5] <- round(sweep(d[d$experiment =="F1",1:5], 
                                          1, 
                                          seq(from=1, 10, length.out = 100), 
                                          "+"), 2)
# For elements related to experiment F2
# we artificially produce a gradient of values that will
# create elements with decreasing row means
d[d$experiment =="F2",1:5] <- round(sweep(d[d$experiment =="F2",1:5], 
                                          1, 
                                          seq(from=10, 1, length.out = 100), 
                                          "+"), 2)

#print(d[d$experiment =="F1",1:5])

# Now we split the dataset by experiments
d.split <- split(d, d$experiment)

# For all experiments, we order elements based on the mean expression signal in 
# F1.
row.means.F1 <- rowMeans(d.split$F1[,1:5])
pos <- order(row.means.F1)

for(s in names(d.split)){
  d.split[[s]] <- d.split[[s]][pos,]
}


# We create several classes of elements based on their 
# mean expression signal in F1.
cuts <- cut(1:nrow(d.split$F1), nb.experiment)
levels(cuts) <- sprintf("%02d", 1:nb.experiment)

for(s in names(d.split)){
  d.split[[s]] <- split(d.split[[s]], cuts)
}



# Data are melt (their is perhaps a better solution...)
# in order to use the ggplot function
dm <- melt(do.call('rbind',lapply(d.split, melt)), id.var=c( "experiment", "elements", "variable", "L1"))
dm <- dm[, -5]
colnames(dm) <- c("experiment","elements", "pos", "rowMeanClass", "exprs")


# Now we plot the data
p <- ggplot(dm, aes(x = pos, y = elements, fill = exprs))
p <- p + geom_raster()                                        
p <- p + facet_wrap(~rowMeanClass +experiment , scales = "free", ncol = 2)
p <- p + theme_bw()
p <- p + theme(text = element_text(size=4))
p <- p + theme(text = element_text(family = "mono", face = "bold"))

ggsave("RPlot_test.jpeg", p)

根据您的建议,我找到了解决方案(这意味着要明确指定 'elements' 因素的级别顺序)。谢谢@hrbrmstr(以及所有其他人)。

注意:与下面用 'Added: begin' 和 'Added: end' 标志表示的原始代码相比,我只添加了几行。

library(ggplot2)
library(reshape2)

set.seed(123)

# let's create a fake dataset
nb.experiment <- 4
n.row <- 200
n.col <- 5
d <- matrix(round(runif(n.row * n.col),2), nc=n.col) 
colnames(d) <- sprintf("%02d", 1:5)

# These strings will be the row names of each heatmap
# in the subsequent facet plot
elements <- sample(replicate(n.row/2, rawToChar(as.raw(sample(65:90, 6, replace=T)))))


# let's create a data.frame d
d <- data.frame(d, 
                experiment = sort(rep(c("F1","F2"), n.row/2)),
                elements= elements)

# For elements related to experiment F1 
# we artificially produce a gradient of values that will
# create elements with increasing row means
d[d$experiment =="F1",1:5] <- round(sweep(d[d$experiment =="F1",1:5], 
                                          1, 
                                          seq(from=1, 10, length.out = 100), 
                                          "+"), 2)
# For elements related to experiment F2
# we artificially produce a gradient of values that will
# create elements with decreasing row means
d[d$experiment =="F2",1:5] <- round(sweep(d[d$experiment =="F2",1:5], 
                                          1, 
                                          seq(from=10, 1, length.out = 100), 
                                          "+"), 2)

#print(d[d$experiment =="F1",1:5])

# Now we split the dataset by experiments
d.split <- split(d, d$experiment)

# For all experiments, we order elements based on the mean expression signal in 
# F1.
row.means.F1 <- rowMeans(d.split$F1[,1:5])
pos <- order(row.means.F1)

for(s in names(d.split)){
  d.split[[s]] <- d.split[[s]][pos,]
}

## Added: begin ###
#Get the list of elements in proper order (based on row mean)
mean.order <- as.character(d.split$F1$elements)
## Added: end###

# We create several classes of elements based on their 
# mean expression signal in F1.
cuts <- cut(1:nrow(d.split$F1), nb.experiment)
levels(cuts) <- sprintf("%02d", 1:nb.experiment)

for(s in names(d.split)){
  d.split[[s]] <- split(d.split[[s]], cuts)
}



# Data are melt (their is perhaps a better solution...)
# in order to use the ggplot function
dm <- melt(do.call('rbind',lapply(d.split, melt)), id.var=c( "experiment", "elements", "variable", "L1"))
dm <- dm[, -5]
colnames(dm) <- c("experiment","elements", "pos", "rowMeanClass", "exprs")

## Added: begin###
#Ensure that dm$elements is an ordered factor with levels
# ordered as expected
dm$elements <- factor(dm$elements, levels = mean.order, ordered = TRUE)
## Added: end###

# Now we plot the data
p <- ggplot(dm, aes(x = pos, y = elements, fill = exprs))
p <- p + geom_raster()                                        
p <- p + facet_wrap(~rowMeanClass +experiment , scales = "free", ncol = 2)
p <- p + theme_bw()
p <- p + theme(text = element_text(size=4))
p <- p + theme(text = element_text(family = "mono", face = "bold"))

ggsave("RPlot_test.jpeg", p)