如何使用 ggplot 函数添加到 cnetplot?

How to add to a cnetplot using ggplot functions?

我在 R 中有一个数据集,它是 'Formal class enrichResult' 的 class。我使用包 DOSE 中的 cnetplot() 绘制此数据集中的基因 - 这意味着基于 ggplot 图形。这绘制了相互作用途径中的基因网络:

我为此编写代码:

kegg_organism = "hsa"
kegg_enrich <-  enrichKEGG(gene   = df$geneID,
                   organism     = 'hsa',
                   pvalueCutoff = 0.05,
                   pAdjustMethod = 'fdr')

kegg <- setReadable(kegg_enrich, 'org.Hs.eg.db', 'ENTREZID')
kegg_genes <- kegg[,]

gene_of_interest <- dplyr::filter(kegg_genes, grepl('CALML6', geneID))
gene_of_interest <- enrichDF2enrichResult(gene_of_interest)

gene_list_scores <- df$Score
names(gene_list_scores) <- df$geneID
gene_list_scores <- na.omit(gene_list_scores)
gene_list_scores <- sort(gene_list_scores, decreasing = TRUE)

plot <- cnetplot(gene_of_interest, foldChange = gene_list_scores)

plot <- plot + scale_color_gradient2(name='Score', low='steelblue', high='firebrick')

我希望用指示基因药物类型类别的形状覆盖此图,但我无法使其正常工作。

我将药物数据与 enrichResult 数据分开,我的药物数据如下所示:

drugs <- structure(list(Gene = c("ACE", "AQP1", "IRS2", "SMAD3", 
"HLA-B"), Druggability = c("KINASE", "DRUGGABLE GENOME", "CLINICALLY ACTIONABLE", 
"KINASE", "CLINICALLY ACTIONABLE")), row.names = c(NA, -5L), class = c("data.table", 
"data.frame"))

#drugs data is 2 columns like:
Gene        Druggability
TLN2        KINASE
PDGFC       DRUGGABLE GENOME 

我正在编码以像这样在图上叠加药物形状:

drugs <- fread('genes_dgidb_export.tsv')
drugs <- dplyr::select(drugs, Gene, Druggability)
drugs <- drugs[1:80,] #making data same size otherwise I get an Aesthetics number mismatch error
Druggability <- drugs$Druggability
names(Druggability) <- drugs$Gene

options(ggrepel.max.overlaps = Inf)
pother <- cnetplot(gene_of_interest,
                   categorySize ='pvalue', 
                   foldChange = gene_list_scores, 
                  )

pother <- pother + scale_color_gradient2(name='Score', low='steelblue', high='red') +
  scale_size_continuous(range = c(2, 8)) 

#Overlaying shapes by drug:
library(ggraph)

pother + geom_node_point(aes(shape=Druggability)) +
  scale_shape_manual(values=c(2, 5, 3, 4))

然而,这里叠加的形状 mismatched/in 错误的地方,并且有分配给米色通路节点的药物,而这些药物应该只分配给红色基因节点。

是否有任何其他函数可以用来叠加此图上的形状点以正确对应基因?

用于获取 enrichResult 的示例输入数据和包:

library(data.table)
library(clusterProfiler)
library(enrichplot)
library(tidyverse)
library(DOSE)
library(clusterProfiler)
library(multienrichjam)
library(RColorBrewer)

df <- structure(list(geneID = c(107986084L, 100874369L, 100506380L, 100288797L, 
100137049L, 100130742L, 723788L, 643136L, 641649L, 497258L), 
    Score = c(0.809859097, 0.913054347, 0.423823357, 0.369738668, 
    0.798110485, 0.78013134, 0.764999211, 0.231925398, 0.317150593, 
    0.754656732)), row.names = c(NA, -10L), class = c("data.table", 
"data.frame"))

kegg_genes <- structure(list(ID = c("hsa04924", "hsa04925", "hsa04022", "hsa04934", 
"hsa05166", "hsa04218", "hsa05410", "hsa04024", "hsa05414", "hsa04933"
), Description = c("Renin secretion", "Aldosterone synthesis and secretion", 
"cGMP-PKG signaling pathway", "Cushing syndrome", "Human T-cell leukemia virus 1 infection", 
"Cellular senescence", "Hypertrophic cardiomyopathy", "cAMP signaling pathway", 
"Dilated cardiomyopathy", "AGE-RAGE signaling pathway in diabetic complications"
), GeneRatio = c("22/381", "25/381", "31/381", "28/381", "33/381", 
"27/381", "20/381", "32/381", "20/381", "20/381"), BgRatio = c("69/8093", 
"98/8093", "167/8093", "155/8093", "222/8093", "156/8093", "90/8093", 
"219/8093", "96/8093", "100/8093"), pvalue = c(2.67752556162864e-13, 
1.75172505637096e-12, 3.06146322777988e-11, 5.54589140412457e-10, 
2.99051127478473e-09, 3.0449722225371e-09, 4.26754375382907e-09, 
8.08219152477885e-09, 1.39375994107875e-08, 2.90351315318027e-08
), p.adjust = c(8.19322821858365e-11, 2.68013933624758e-10, 3.12269249233547e-09, 
4.2426069241553e-08, 1.55293583349392e-07, 1.55293583349392e-07, 
1.86552626953099e-07, 3.09143825822791e-07, 4.73878379966775e-07, 
8.88475024873162e-07), qvalue = c(4.70680809254719e-11, 1.53967412849448e-10, 
1.79391003171663e-09, 2.43727332760211e-08, 8.92123440638062e-08, 
8.92123440638062e-08, 1.0716989577285e-07, 1.77595524294483e-07, 
2.72231473871522e-07, 5.10407049032742e-07), geneID = c("CALML6/CALML4/PLCB1/CLCA2/PPP3CA/PLCB3/PDE3A/PDE1A/NPR1/NPPA/GNAS/EDNRA/EDN1/ACE/CACNA1D/CACNA1C/AQP1/AGT/ADRB2/ADRB1/ADORA1/ADCY5", 
"CALML6/CALML4/KCNK9/PRKD3/PLCB1/CREB5/PRKD1/PRKCE/PLCB3/NPR1/NPPA/LDLR/KCNK3/GNAS/CYP21A2/CYP11B2/CAMK2B/CACNA1D/CACNA1C/ATP2B1/ATF1/AGT/ADCY9/ADCY5/ADCY3", 
"CALML6/CALML4/PLCB1/CREB5/IRS2/PDE5A/SLC8A1/MAP2K2/PRKCE/PPP3CA/PLCB3/PDE3A/NPR1/NPPB/NPPA/NFATC2/MEF2A/INSR/GTF2I/EDNRA/CACNA1D/CACNA1C/ATP2B1/AKT2/ADRB2/ADRB1/ADRA2B/ADORA1/ADCY9/ADCY5/ADCY3", 
"WNT3A/LEF1/PDE11A/PLCB1/CREB5/TCF7L2/MAP2K2/PLCB3/PBX1/LDLR/KCNK3/GNAS/DVL1/CYP21A2/CYP17A1/CYP11B1/CDKN2C/CDKN1A/CDK6/CCNE1/CAMK2B/CACNA1D/CACNA1C/CCND1/AGT/ADCY9/ADCY5/ADCY3", 
"TLN2/VAC14/CREB5/CDC16/KAT2B/PIK3R3/MAD1L1/XPO1/TLN1/TGFB2/TGFB1/TERT/STAT5B/RELA/PTEN/MAP2K2/PPP3CA/NFKBIA/NFATC2/SMAD3/IL6/HLA-DQB1/HLA-DQA1/HLA-B/CDKN2C/CDKN1A/CDC20/CCNE1/CCND1/AKT2/ADCY9/ADCY5/ADCY3", 
"CALML6/CALML4/TRPM7/HIPK2/BTRC/PIK3R3/TGFB2/TGFB1/RRAS/RELA/PTEN/MAP2K2/PPP3CA/NFATC2/SMAD3/IL6/IGFBP3/HLA-B/FOXO3/CDKN1A/CDK6/CDC25A/CCNE1/CACNA1D/ZFP36L1/CCND1/AKT2", 
"PRKAG2/CACNA2D2/TGFB2/TGFB1/SLC8A1/SGCD/PRKAG1/ITGB5/ITGA9/ITGA1/IL6/IGF1/EDN1/ACE/DAG1/CACNB4/CACNB2/CACNA1D/CACNA1C/AGT", 
"CALML6/CALML4/PDE10A/CREB5/PIK3R3/RRAS/RELA/MAP2K2/PDE3A/NPR1/NPPA/NFKBIA/HTR1D/GRIN2B/GNAS/GIPR/GIP/EDNRA/EDN1/CHRM2/CAMK2B/CACNA1D/CACNA1C/ATP2B1/AKT2/ADRB2/ADRB1/ADORA1/ADCY9/ADCY5/ADCY3/ACOX1", 
"CACNA2D2/TGFB2/TGFB1/SLC8A1/SGCD/ITGB5/ITGA9/ITGA1/IGF1/GNAS/DAG1/CACNB4/CACNB2/CACNA1D/CACNA1C/AGT/ADRB1/ADCY9/ADCY5/ADCY3", 
"PLCD3/NOX4/PLCB1/PIK3R3/VEGFA/TGFB2/TGFB1/STAT5B/RELA/PRKCE/PLCB3/SMAD3/IL6/FN1/EDN1/COL4A4/BCL2/CCND1/AKT2/AGT"
), Count = c(22L, 25L, 31L, 28L, 33L, 27L, 20L, 32L, 20L, 20L
)), row.names = c("hsa04924", "hsa04925", "hsa04022", "hsa04934", 
"hsa05166", "hsa04218", "hsa05410", "hsa04024", "hsa05414", "hsa04933"
), class = "data.frame")

上面运行示例数据的输出图(以前的图是我的全部数据):

我使用红色下划线绘制了分配给它们的成药性的基因,但由于某种原因,这些形状将进入通路节点。

  1. 我使用 clusterProfiler 示例使代码可重现 (https://yulab-smu.top/biomedical-knowledge-mining-book/universal-api.html)

  2. 我使用了来自 (https://www.dgidb.org/downloads)

    的 categories.tsv 文件
library(clusterProfiler)
library(dplyr)
library(ggraph)
library(msigdbr)

data(geneList, package="DOSE")
cat_table = read.table("categories.tsv",sep="\t",header = T,quote = "" )
geneList=geneList[c(1:50,12476:12495)] # a 70 gene sub list to simplify the plot
m_t2g <- msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, entrez_gene) 
em2 <- GSEA(geneList, TERM2GENE = m_t2g)
em2 = setReadable(em2, 'org.Hs.eg.db', 'ENTREZID')
p = cnetplot(em2,foldChange = geneList)
m = match(p$data$name ,cat_table$entrez_gene_symbol)
category = cat_table$category[m]
p + geom_node_point(aes(shape= category))

这里的重点是将基因名称与包含基因名称和通路名称的 cnetplot 对象 (p$data$name) 中的名称进行匹配,因此匹配必不可少

做个验证

cat_table[m[!is.na(m)],c(1,4)]
       entrez_gene_symbol              category
6211               KIF23                ENZYME
13765              CENPE                KINASE