Ticktext 值不能修复 ggplot2 facet_grid() 在与 ggplotly() 结合时崩溃

Ticktext value does not fix ggplot2 facet_grid() breaking down when combined with ggplotly()

我有一个数据框:

gene_symbol<-c("DADA","SDAASD","SADDSD","SDADD","ASDAD","XCVXCVX","EQWESDA","DASDADS","SDASDASD","DADADASD","sdaadfd","DFSD","SADADDAD","SADDADADA","DADSADSASDWQ","SDADASDAD","ASD","DSADD")
panel<-c("growth","growth","growth","growth","big","big","big","small","small","dfgh","DF","DF","DF","DF","DF","gh","DF","DF")
ASDDA<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDb<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf2<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf3<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf4<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf5<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDA1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDb1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf11<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf21<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf31<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf41<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf51<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
Gene_states22 <- data.frame(gene_symbol, panel, ASDDA, ASDDb, ASDDAf, ASDDAf1, ASDDAf2, 
                            ASDDAf3, ASDDAf4, ASDDAf5, ASDDA1, ASDDb1, ASDDAf1, ASDDAf11,
                            ASDDAf21, ASDDAf31, ASDDAf41, ASDDAf51)

然后我创建了一个热图:

library(ggplot2); library(reshape2)
HG3 <- split(Gene_states22[,1:15], Gene_states22$panel)
HG4 <- melt(HG3, id.vars= c("gene_symbol","panel"))
HG4 <- HG4[,-5]
pp <- ggplot(HG4, aes(gene_symbol,variable)) + 
  geom_tile(aes(fill = value),
            colour = "grey50") + 
  facet_grid(~panel, scales = "free" ,space = "free") +
  scale_fill_manual(values = c("white", "red", "blue", "black", "yellow", "green", "brown"))

如您所见,我使用 facet_grid 根据面板值将我的热图分成几组。问题是,当我使用 ggplotly(pp) 时,列宽因组而异,我的图看起来很难看。

为了解决这个问题,我在 R 中使用了 Plotly 和 ggplot 的改编答案和 facet_grid:How to get yaxis labels to use ticktext value instead of range value? :

library(plotly)
library(ggplot2)
library(data.table)
library(datasets) 


#add fake model for use in facet
dt<-data.table(HG4[1:50,])
dt[,variable:=rownames(HG4)]
dt[,panel:=substr(variable,1,regexpr(" ",variable)-1)][panel=="",panel:=variable]

ggplot.test<-ggplot(dt,aes(gene_symbol,variable))+facet_grid(panel~.,scales="free_y",space="free",drop=TRUE)+
  geom_tile(aes(fill = value),
            colour = "grey50") + 
  scale_fill_manual(values = c("white", "red", "blue", "black", "yellow", "green", "brown")) +
  labs(title = "Heatmap", x = "gene_symbol", y = "sample", fill = "value") +
  guides(fill = FALSE)+
  theme(panel.background = element_rect(fill = NA),
        panel.spacing = unit(0.5, "lines"), ## It was here where you had a 0 for distance between facets. I replaced it by 0.5 .
        strip.placement = "outside")



p <- ggplotly(ggplot.test)
len <- length(unique(HG4$panel))


total <- 1
for (i in 2:len) {
  total <- total + length(p[['x']][['layout']][[paste('yaxis', i, sep='')]][['ticktext']])
}

spacer <- 0.01 #space between the horizontal plots
total_length = total + len * spacer
end <- 1
start <- 1

for (i in c('', seq(2, len))) {
  tick_l <- length(p[['x']][['layout']][[paste('yaxis', i, sep='')]][['ticktext']]) + 1

  #fix the y-axis
  p[['x']][['layout']][[paste('yaxis', i, sep='')]][['tickvals']] <- seq(1, tick_l)
  p[['x']][['layout']][[paste('yaxis', i, sep='')]][['ticktext']][[tick_l]] <- ''

  end <- start - spacer
  start <- start - (tick_l - 1) / total_length
  v <- c(start, end)
  #fix the size
  p[['x']][['layout']][[paste('yaxis', i, sep='')]]$domain <- v
}

p[['x']][['layout']][['annotations']][[3]][['y']] <- (p[['x']][['layout']][['yaxis']]$domain[2] + p[['x']][['layout']][['yaxis']]$domain[1]) /2
p[['x']][['layout']][['shapes']][[2]][['y0']] <- p[['x']][['layout']][['yaxis']]$domain[1]
p[['x']][['layout']][['shapes']][[2]][['y1']] <- p[['x']][['layout']][['yaxis']]$domain[2]

#fix the annotations
for (i in 3:len + 1) {
  #fix the y position
  p[['x']][['layout']][['annotations']][[i]][['y']] <- (p[['x']][['layout']][[paste('yaxis', i - 2, sep='')]]$domain[1] + p[['x']][['layout']][[paste('yaxis', i - 2, sep='')]]$domain[2]) /2
  #trim the text
  p[['x']][['layout']][['annotations']][[i]][['text']] <- substr(p[['x']][['layout']][['annotations']][[i]][['text']], 1, length(p[['x']][['layout']][[paste('yaxis', i - 2, sep='')]][['ticktext']]) * 3 - 3)
}

#fix the rectangle shapes in the background
for (i in seq(0,(len - 2) * 2, 2)) {
  p[['x']][['layout']][['shapes']][[i+4]][['y0']] <- p[['x']][['layout']][[paste('yaxis', i /2 + 2, sep='')]]$domain[1]
  p[['x']][['layout']][['shapes']][[i+4]][['y1']] <- p[['x']][['layout']][[paste('yaxis', i /2 + 2, sep='')]]$domain[2]
}
p

但热图仍然不正确:

首先要做的事情是:

对于您的情况,我什至不确定 plotly 热图是否是您所需要的。此外,你应该 永远不要 将复杂的 ggplot 转换为 plotly。它会失败!在 90% 的情况下。尝试在 plotly 或任何你希望它结束​​的地方重新创建你的情节。其他任何事情最终都会陷入编码地狱。

我开始做一些研究:

  1. Here 很好地描述了如何在 plotly
  2. 中创建不同颜色的热图
  3. This 解释了如何在子图中创建标题。

来自 post 1 我知道我必须为数据中的每个级别创建一个矩阵。所以我为此写了一个函数:

mymat<-as.matrix(Gene_states22[,-1:-2])

### Creates a 1-NA dummy matrix for each level. The output is stored in a list
dummy_mat<-function(mat,levels,names_col){
  mat_list<-lapply(levels,function(x){
                            mat[mat!=x]=NA
                            mat[mat==x]=1
                            mymat=t(apply(mat,2,as.numeric))
                            colnames(mymat)=names_col
                            return(mymat)
                            })
  names(mat_list)=levels
  return(mat_list)
}
my_mat_list<-dummy_mat(mymat,c('DF','low','normal','over'),Gene_states22$gene_symbol)

### Optional: The heatmap type is peculiar - I created a text-NA matrix for each category as well
text_mat<-function(mat,levels,names_col){
  mat_list<-lapply(levels,function(x){
                            mat[mat!=x]=NA
                            mat=t(mat)
                            colnames(mat)=names_col
                            return(mat)
                            })
  names(mat_list)=levels
  return(mat_list)
}
   my_mat_list_t<-text_mat(mymat,c('DF','low','normal','over'),as.character(Gene_states22$gene_symbol))

此外,我需要为每个级别设置颜色。这些颜色是使用一些数据框创建的。你也可以在这里写一个类似的(lapply-)循环:

DF_Color <- data.frame(x = c(0,1), y = c("#DEDEDE", "#DEDEDE"))
colnames(DF_Color) <- NULL

lowColor <- data.frame(x = c(0,1), y = c("#00CCFF", "#00CCFF"))
colnames(lowColor) <- NULL

normColor <- data.frame(x = c(0,1), y = c("#DEDE00", "#DEDE00"))
colnames(normColor) <- NULL

overColor <- data.frame(x = c(0,1), y = c("#DE3333", "#DE3333"))
colnames(overColor) <- NULL

此外,我们需要矩阵中每个 panel-category 的列:

mycols<-lapply(levels(Gene_states22$panel),function(x) grep(x,Gene_states22$panel))

我也将其存储在列表中。 接下来我使用 lapply-loop 来绘图。我将值存储在列表中并使用 subplot 将所有内容放在一起:

library(plotly)

p_list<-lapply(1:length(mycols),function(j){
  columns<-mycols[[j]]

p<-plot_ly(
    type = "heatmap"
) %>% add_trace(
    y=rownames(my_mat_list$DF),x=colnames(my_mat_list$DF)[columns],
    z = my_mat_list$DF[,columns],
    xgap=3,ygap=3, text=my_mat_list_t$DF[,columns],hoverinfo="x+y+text",
    colorscale = DF_Color,
    colorbar = list(
        len = 0.3,
        y = 0.3,
        yanchor = 'top',
        title = 'DF series',
        tickvals = ''
    )
) %>% add_trace(
  y=rownames(my_mat_list$low),x=colnames(my_mat_list$low)[columns],
    z = my_mat_list$low[,columns],
    xgap=3,ygap=3,text=my_mat_list_t$low[,columns],hoverinfo="x+y+text",
    colorscale = lowColor,
    colorbar = list(
        len = 0.3,
        y = 0.3,
        yanchor = 'top',
        title = 'low series',
        tickvals = ''
    )
) %>% add_trace(
  y=rownames(my_mat_list$normal),x=colnames(my_mat_list$normal)[columns],
    z = my_mat_list$normal[,columns],
    xgap=3,ygap=3,text=my_mat_list_t$normal[,columns],hoverinfo="x+y+text",
    colorscale = normColor,
    colorbar = list(
        len = 0.3,
        y = 1,
        yanchor = 'top',
        title = 'normal series',
        tickvals = ''
    )
) %>% add_trace(
  y=rownames(my_mat_list$over),x=colnames(my_mat_list$over)[columns],
    z = my_mat_list$over[,columns],
    xgap=3,ygap=3,text=my_mat_list_t$over[,columns],hoverinfo="x+y+text",
    colorscale = overColor,
    colorbar = list(
        len = 0.3,
        y = 1,
        yanchor = 'top',
        title = 'over series',
        tickvals = ''
    )
 )
return(p)
})

subplot(p_list[[1]],p_list[[2]],shareY = TRUE) %>%
  layout(annotations = list(
 list(x = 0.2 , y = 1.05, text = levels(Gene_states22$panel)[1], showarrow = F, xref='paper', yref='paper'),
  list(x = 0.8 , y = 1.05, text = levels(Gene_states22$panel)[2], showarrow = F, xref='paper', yref='paper'))
)

可能的问题

  1. 您必须围绕像 dfgh 这样只出现一次的类别进行创建。如果在 R 中只选择了一列,输出将自动转换为(数字或字符)vector 类型。因此可能会添加一个 as.matrix() 到所有 ztext arguments
  2. hover-text 并没有真正起作用。但是 plotly 有很好的文档 there。你应该能够弄清楚。
  3. 您还必须在 subplot 函数中指定宽度。如果您有超过 10 个类别,那将很麻烦。
  4. 交互性并没有真正发挥作用。你不能删除痕迹。为什么?不知道。如果需要,请进行一些研究。我想这与情节类型有关。
  5. 我建议以 px 为单位指定图的扩展。这可能会使图块更相似。
  6. 最后你需要一些 reference for the (subplot) titles 并且你需要调整绘图的边距。这样标题就可见了。