将元数据作为注释添加到相关热图

Question

我正在尝试生成样本以对样本相关热图进行采样，我能够做到。

是否可以像我们对普通热图所做的那样将元数据添加到相关热图中？

我的代码

library(Hmisc)
library(pheatmap)

    df1 <- read.csv("figure1/TCGA_data/BEAT_AML/TCGA_AML/analysis/TCGA_expr_EF.txt",sep = "\t",header = T) 
    
    dim(df1)
    names(df1)
    df1 <- df1[-c(1)]
    names(df1)
    data1 <- df1
    gen <- data1
    
    head(gen)
    dim(gen)
    
    gendat <- t(gen)
    gendat <- gen
    dim(gendat)
    macolor = colorRampPalette(c("navyblue","white","red"))(100)
    cor_5 <- rcorr(as.matrix(PCA_map_rlog[-c(1:2)]))
    
    
    cor_5 <- rcorr(as.matrix(gendat))
    M <- cor_5$r
    str(M)
    p_mat <- cor_5$P
    row_names <- rownames(M)
    
    head(M)
    
    plot_data_pheatmap <- pheatmap(M, color = rev(macolor), 
                      clustering_method = "complete", fontsize_row = 8, fontsize_col = 8,show_colnames = FALSE,show_rownames = FALSE)
    dev.off()

元数据小子集

aa <- dput(head(b))
structure(list(name = structure(1:6, .Label = c("TCGA-AB-2803", 
"TCGA-AB-2805", "TCGA-AB-2806", "TCGA-AB-2807", "TCGA-AB-2808", 
"TCGA-AB-2810", "TCGA-AB-2811", "TCGA-AB-2812", "TCGA-AB-2813", 
"TCGA-AB-2814", "TCGA-AB-2815", "TCGA-AB-2816", "TCGA-AB-2817", 
"TCGA-AB-2818", "TCGA-AB-2819", "TCGA-AB-2820", "TCGA-AB-2821", 
"TCGA-AB-2822", "TCGA-AB-2823", "TCGA-AB-2824", "TCGA-AB-2825", 
"TCGA-AB-2826", "TCGA-AB-2828", "TCGA-AB-2830", "TCGA-AB-2832", 
"TCGA-AB-2833", "TCGA-AB-2834", "TCGA-AB-2835", "TCGA-AB-2836", 
"TCGA-AB-2837", "TCGA-AB-2838", "TCGA-AB-2839", "TCGA-AB-2840", 
"TCGA-AB-2841", "TCGA-AB-2842", "TCGA-AB-2843", "TCGA-AB-2844", 
"TCGA-AB-2845", "TCGA-AB-2846", "TCGA-AB-2847", "TCGA-AB-2848", 
"TCGA-AB-2849", "TCGA-AB-2851", "TCGA-AB-2853", "TCGA-AB-2854", 
"TCGA-AB-2855", "TCGA-AB-2856", "TCGA-AB-2857", "TCGA-AB-2858", 
"TCGA-AB-2859", "TCGA-AB-2860", "TCGA-AB-2861", "TCGA-AB-2862", 
"TCGA-AB-2863", "TCGA-AB-2865", "TCGA-AB-2866", "TCGA-AB-2867", 
"TCGA-AB-2868", "TCGA-AB-2869", "TCGA-AB-2870", "TCGA-AB-2871", 
"TCGA-AB-2872", "TCGA-AB-2873", "TCGA-AB-2874", "TCGA-AB-2875", 
"TCGA-AB-2877", "TCGA-AB-2879", "TCGA-AB-2880", "TCGA-AB-2881", 
"TCGA-AB-2882", "TCGA-AB-2884", "TCGA-AB-2885", "TCGA-AB-2886", 
"TCGA-AB-2887", "TCGA-AB-2888", "TCGA-AB-2889", "TCGA-AB-2890", 
"TCGA-AB-2891", "TCGA-AB-2895", "TCGA-AB-2896", "TCGA-AB-2897", 
"TCGA-AB-2898", "TCGA-AB-2899", "TCGA-AB-2900", "TCGA-AB-2901", 
"TCGA-AB-2903", "TCGA-AB-2904", "TCGA-AB-2908", "TCGA-AB-2909", 
"TCGA-AB-2910", "TCGA-AB-2911", "TCGA-AB-2912", "TCGA-AB-2913", 
"TCGA-AB-2914", "TCGA-AB-2915", "TCGA-AB-2916", "TCGA-AB-2917", 
"TCGA-AB-2918", "TCGA-AB-2919", "TCGA-AB-2920", "TCGA-AB-2921", 
"TCGA-AB-2924", "TCGA-AB-2925", "TCGA-AB-2927", "TCGA-AB-2928", 
"TCGA-AB-2929", "TCGA-AB-2930", "TCGA-AB-2931", "TCGA-AB-2932", 
"TCGA-AB-2933", "TCGA-AB-2934", "TCGA-AB-2935", "TCGA-AB-2936", 
"TCGA-AB-2937", "TCGA-AB-2938", "TCGA-AB-2939", "TCGA-AB-2940", 
"TCGA-AB-2941", "TCGA-AB-2942", "TCGA-AB-2943", "TCGA-AB-2944", 
"TCGA-AB-2946", "TCGA-AB-2948", "TCGA-AB-2949", "TCGA-AB-2950", 
"TCGA-AB-2952", "TCGA-AB-2954", "TCGA-AB-2955", "TCGA-AB-2956", 
"TCGA-AB-2959", "TCGA-AB-2963", "TCGA-AB-2964", "TCGA-AB-2965", 
"TCGA-AB-2966", "TCGA-AB-2967", "TCGA-AB-2969", "TCGA-AB-2970", 
"TCGA-AB-2971", "TCGA-AB-2972", "TCGA-AB-2973", "TCGA-AB-2975", 
"TCGA-AB-2976", "TCGA-AB-2977", "TCGA-AB-2978", "TCGA-AB-2979", 
"TCGA-AB-2980", "TCGA-AB-2981", "TCGA-AB-2982", "TCGA-AB-2983", 
"TCGA-AB-2984", "TCGA-AB-2985", "TCGA-AB-2986", "TCGA-AB-2987", 
"TCGA-AB-2988", "TCGA-AB-2990", "TCGA-AB-2991", "TCGA-AB-2992", 
"TCGA-AB-2993", "TCGA-AB-2994", "TCGA-AB-2995", "TCGA-AB-2996", 
"TCGA-AB-2998", "TCGA-AB-2999", "TCGA-AB-3000", "TCGA-AB-3001", 
"TCGA-AB-3002", "TCGA-AB-3005", "TCGA-AB-3006", "TCGA-AB-3007", 
"TCGA-AB-3008", "TCGA-AB-3009", "TCGA-AB-3011", "TCGA-AB-3012"
), class = "factor"), FAB = structure(c(4L, 1L, 2L, 2L, 3L, 3L
), .Label = c("M0", "M1", "M2", "M3", "M4", "M5", "M6", "M7", 
"nc"), class = "factor")), row.names = c(NA, 6L), class = "data.frame")

我想添加 FAB 列作为我对最终相关热图的注释。

如有任何建议或帮助，我们将不胜感激

我的表情TCGA_expr_EF.txt used and my metadata

Answer 1

您可以使用 annotation_row/annotation_col 和 annotation_colors 参数将颜色条注释图层添加到 pheatmap() 中的行和列。

你的例子远非最小，所以我在这里使用了一个小的包含数据集。

另外如评论中所述，您的相关值范围从 -1 到 1，因此使用不以 0 为中心的发散色图会产生误导。我提供两种可能的解决方案。首先，您可以使用 pheatmap() 中的 breaks 参数将其置于 0 中心，这将强调您的大多数值实际上非常相似并且相互高度相关。其次，您可以使用顺序调色板，例如在我的第二个示例中，它会强调样本之间的差异。

library(Hmisc)
library(pheatmap)
library(tidyverse)
library(viridisLite)

# prep data
d <- mtcars %>% 
  rownames_to_column("car") %>% 
  mutate(car = abbreviate(car)) %>% 
  column_to_rownames("car") %>% 
  as.matrix()

# run correlation
c <- d %>% 
  t() %>% 
  rcorr(type = "pearson") %>% 
  .[["r"]]

# create diverging colormap
macolor <- colorRampPalette(c("navyblue","white","red"))(100)

# prep sample annotations
samp_annot <- data.frame(row.names = rownames(d), cyl = factor(d[,2]))
# prep annotation colors
annot_col <- list(cyl = setNames(c("#E41A1C", "#377EB8", "#4DAF4A"), unique(samp_annot$cyl)))

# set divergent palette to center at 0 with breaks
c %>%
  pheatmap(
    mat = .,
    color = rev(macolor),
    breaks = seq(-1, 1, length.out = 101),
    clustering_method = "complete",
    annotation_row = samp_annot,
    annotation_col = samp_annot,
    annotation_colors = annot_col[1],
    show_colnames = FALSE,
    show_rownames = FALSE
  )

# use sequential palette
c %>%
  pheatmap(
    mat = .,
    color = viridis(100, option = "A"),
    clustering_method = "complete",
    annotation_row = samp_annot,
    annotation_col = samp_annot,
    annotation_colors = annot_col[1],
    show_colnames = FALSE,
    show_rownames = FALSE
  )

^{由 reprex package (v2.0.1)}

创建于 2022-02-07

将元数据作为注释添加到相关热图

Adding metadata as annotation to correlation heat-map

r

heatmap

correlation

pheatmap