提取簇中至少 10% 的细胞表达的基因名称
Extract names of genes expressed by at least 10% of cells in a cluster
我有一个带有已定义簇的 Seurat 对象。我需要提取由我的簇中至少 10% 的细胞表达的所有基因的列表。我需要为我拥有的每个集群分别重复它。
我知道一个代码可以从整个 Seurat 中提取至少 10% 的细胞表达的基因:
genes.to.keep <- Matrix::rowSums(Monocyte.integrated@assays$RNA@counts > 0) >= floor(0.1 * ncol(Monocyte.integrated@assays$RNA@counts))
counts.sub <- Monocyte.integrated@assays$RNA@counts[genes.to.keep,]
但这不是我想要的。而且我不确定如何修改它以包含集群名称(考虑到它是正确的)。
我将集群名称存储在名为“cluster_names”的元数据变量中。
如有任何帮助,我将不胜感激
体重
您可以使用 lapply
遍历聚类的因子水平以单独对它们进行子集和过滤,然后使用 setNames
命名结果列表。下面是一个可重现的例子:
library(Seurat)
data("pbmc_small")
pbmc_small <- FindClusters(pbmc_small, resolution = 1)
names(pbmc_small@meta.data)[names(pbmc_small@meta.data)=="seurat_clusters"] <- "cluster_names"
levels(pbmc_small$cluster_names) <- paste0("cluster_", seq_along(levels(pbmc_small$cluster_names)))
setNames(lapply(levels(pbmc_small$cluster_names), function(x) {
p <- subset(pbmc_small, cluster_names==x)
rownames(p)[Matrix::rowSums(p@assays$RNA@counts > 0) >= .1*dim(p)[2]]
}), levels(pbmc_small$cluster_names))
#> $cluster_1
#> [1] "CD79B" "HLA-DRA" "LTB" "SP100" "PPP3CC" "CXCR4"
#> [7] "STX10" "SNHG7" "CD3D" "NOSIP" "SAFB2" "CD2"
#> [13] "IL7R" "PIK3IP1" "MPHOSPH6" "KHDRBS1" "MAL" "CCR7"
#> [19] "THYN1" "TAF7" "LDHB" "TMEM123" "EPC1" "EIF4A2"
#> [25] "CD3E" "TMUB1" "BLOC1S4" "SRSF7" "ACAP1" "TNFAIP8"
#> [31] "CD7" "TAGAP" "DNAJB1" "ASNSD1" "S1PR4" "CTSW"
#> [37] "GZMK" "NKG7" "IL32" "DNAJC2" "LYAR" "CST7"
#> [43] "LCK" "CCL5" "HNRNPH1" "SSR2" "GIMAP1" "MMADHC"
#> [49] "CD8A" "GYPC" "HNRNPF" "RPL7L1" "KLRG1" "CRBN"
#> [55] "SATB1" "PMPCB" "NRBP1" "TCF7" "HNRNPA3" "S100A8"
#> [61] "S100A9" "LYZ" "FCN1" "TYROBP" "NFKBIA" "TYMP"
#> [67] "CTSS" "TSPO" "CTSB" "LGALS1" "BLVRA" "LGALS3"
#> [73] "IFI6" "HLA-DPA1" "CST3" "GSTP1" "EIF3G" "VPS28"
#> [79] "ZFP36L1" "ANXA2" "HSP90AA1" "LST1" "AIF1" "PSAP"
#> [85] "YWHAB" "MYO1G" "SAT1" "RGS2" "FCGR3A" "S100A11"
#> [91] "FCER1G" "IFITM2" "COTL1" "LGALS9" "CD68" "RHOC"
#> [97] "CARD16" "COPS6" "PPBP" "GPX1" "TPM4" "PF4"
#> [103] "SDPR" "NRGN" "SPARC" "GNG11" "CLU" "HIST1H2AC"
#> [109] "NCOA4" "GP9" "FERMT3" "ODC1" "CD9" "RUFY1"
#> [115] "TUBB1" "TALDO1" "TREML1" "NGFRAP1" "PGRMC1" "CA2"
#> [121] "ITGA2B" "MYL9" "TMEM40" "PARVB" "PTCRA" "ACRBP"
#> [127] "TSC22D1" "VDAC3" "GZMB" "GZMA" "GNLY" "FGFBP2"
#> [133] "AKR1C3" "CCL4" "PRF1" "GZMH" "XBP1" "GZMM"
#> [139] "PTGDR" "IGFBP7" "TTC38" "KLRD1" "ARHGDIA" "IL2RB"
#> [145] "CLIC3" "PPP1R18" "CD247" "ALOX5AP" "XCL2" "C12orf75"
#> [151] "RARRES3" "PCMT1" "LAMP1" "SPON2"
#>
#> $cluster_2
#> [1] "CD79B" "CD79A" "HLA-DRA" "HLA-DQB1"
#> [5] "HVCN1" "HLA-DMB" "LTB" "SP100"
#> [9] "NCF1" "EAF2" "FAM96A" "CXCR4"
#> [13] "STX10" "SNHG7" "NT5C" "NOSIP"
#> [17] "IL7R" "KHDRBS1" "TAF7" "LDHB"
#> [21] "TMEM123" "EIF4A2" "TMUB1" "BLOC1S4"
#> [25] "SRSF7" "TNFAIP8" "TAGAP" "DNAJB1"
#> [29] "S1PR4" "NKG7" "IL32" "DNAJC2"
#> [33] "LYAR" "CCL5" "SSR2" "GIMAP1"
#> [37] "MMADHC" "HNRNPF" "RPL7L1" "HNRNPA3"
#> [41] "S100A8" "S100A9" "LYZ" "CD14"
#> [45] "FCN1" "TYROBP" "ASGR1" "NFKBIA"
#> [49] "TYMP" "CTSS" "TSPO" "RBP7"
#> [53] "CTSB" "LGALS1" "FPR1" "VSTM1"
#> [57] "BLVRA" "MPEG1" "BID" "SMCO4"
#> [61] "CFD" "LINC00936" "LGALS2" "MS4A6A"
#> [65] "FCGRT" "LGALS3" "NUP214" "SCO2"
#> [69] "IL17RA" "IFI6" "HLA-DPA1" "FCER1A"
#> [73] "CLEC10A" "HLA-DMA" "RGS1" "HLA-DPB1"
#> [77] "HLA-DQA1" "RNF130" "HLA-DRB5" "HLA-DRB1"
#> [81] "CST3" "IL1B" "POP7" "HLA-DQA2"
#> [85] "GSTP1" "EIF3G" "VPS28" "LY86"
#> [89] "ZFP36L1" "ANXA2" "GRN" "CFP"
#> [93] "HSP90AA1" "LST1" "AIF1" "PSAP"
#> [97] "YWHAB" "MYO1G" "SAT1" "RGS2"
#> [101] "SERPINA1" "IFITM3" "FCGR3A" "LILRA3"
#> [105] "S100A11" "FCER1G" "TNFRSF1B" "IFITM2"
#> [109] "WARS" "IFI30" "MS4A7" "C5AR1"
#> [113] "HCK" "COTL1" "LGALS9" "CD68"
#> [117] "RP11-290F20.3" "RHOC" "CARD16" "LRRC25"
#> [121] "COPS6" "ADAR" "GPX1" "TPM4"
#> [125] "NRGN" "NCOA4" "FERMT3" "ODC1"
#> [129] "TALDO1" "PARVB" "VDAC3" "GZMB"
#> [133] "XBP1" "IGFBP7" "ARHGDIA" "PPP1R18"
#> [137] "ALOX5AP" "RARRES3" "PCMT1" "SPON2"
#>
#> $cluster_3
#> [1] "MS4A1" "CD79B" "CD79A" "HLA-DRA"
#> [5] "TCL1A" "HLA-DQB1" "HVCN1" "HLA-DMB"
#> [9] "LTB" "LINC00926" "FCER2" "SP100"
#> [13] "NCF1" "PPP3CC" "EAF2" "PPAPDC1B"
#> [17] "CD19" "KIAA0125" "CYB561A3" "CD180"
#> [21] "RP11-693J15.5" "FAM96A" "CXCR4" "STX10"
#> [25] "SNHG7" "NT5C" "BANK1" "IGLL5"
#> [29] "CD200" "FCRLA" "CD3D" "NOSIP"
#> [33] "CD2" "IL7R" "PIK3IP1" "KHDRBS1"
#> [37] "THYN1" "TAF7" "LDHB" "TMEM123"
#> [41] "CCDC104" "EPC1" "EIF4A2" "CD3E"
#> [45] "SRSF7" "ACAP1" "TNFAIP8" "CD7"
#> [49] "TAGAP" "DNAJB1" "S1PR4" "CTSW"
#> [53] "GZMK" "NKG7" "IL32" "DNAJC2"
#> [57] "LYAR" "CST7" "LCK" "CCL5"
#> [61] "HNRNPH1" "SSR2" "GIMAP1" "MMADHC"
#> [65] "CD8A" "PTPN22" "GYPC" "HNRNPF"
#> [69] "RPL7L1" "CRBN" "SATB1" "SIT1"
#> [73] "PMPCB" "NRBP1" "TCF7" "HNRNPA3"
#> [77] "S100A9" "LYZ" "FCN1" "TYROBP"
#> [81] "NFKBIA" "TYMP" "CTSS" "TSPO"
#> [85] "CTSB" "LGALS1" "BLVRA" "MPEG1"
#> [89] "BID" "CFD" "LINC00936" "LGALS2"
#> [93] "MS4A6A" "FCGRT" "LGALS3" "SCO2"
#> [97] "HLA-DPA1" "FCER1A" "CLEC10A" "HLA-DMA"
#> [101] "RGS1" "HLA-DPB1" "HLA-DQA1" "RNF130"
#> [105] "HLA-DRB5" "HLA-DRB1" "CST3" "IL1B"
#> [109] "POP7" "HLA-DQA2" "CD1C" "GSTP1"
#> [113] "EIF3G" "VPS28" "LY86" "ZFP36L1"
#> [117] "ZNF330" "ANXA2" "GRN" "CFP"
#> [121] "HSP90AA1" "FUOM" "LST1" "AIF1"
#> [125] "PSAP" "YWHAB" "MYO1G" "SAT1"
#> [129] "RGS2" "SERPINA1" "IFITM3" "FCGR3A"
#> [133] "S100A11" "FCER1G" "TNFRSF1B" "IFITM2"
#> [137] "WARS" "IFI30" "MS4A7" "HCK"
#> [141] "COTL1" "LGALS9" "CD68" "RHOC"
#> [145] "CARD16" "LRRC25" "COPS6" "ADAR"
#> [149] "GPX1" "TPM4" "NCOA4" "FERMT3"
#> [153] "ODC1" "RUFY1" "TALDO1" "VDAC3"
#> [157] "GZMA" "GNLY" "FGFBP2" "PRF1"
#> [161] "XBP1" "GZMM" "PTGDR" "ARHGDIA"
#> [165] "PPP1R18" "CD247" "ALOX5AP" "XCL2"
#> [169] "C12orf75" "RARRES3" "PCMT1" "SPON2"
由 reprex package (v1.0.0)
于 2021-03-26 创建
我有一个带有已定义簇的 Seurat 对象。我需要提取由我的簇中至少 10% 的细胞表达的所有基因的列表。我需要为我拥有的每个集群分别重复它。
我知道一个代码可以从整个 Seurat 中提取至少 10% 的细胞表达的基因:
genes.to.keep <- Matrix::rowSums(Monocyte.integrated@assays$RNA@counts > 0) >= floor(0.1 * ncol(Monocyte.integrated@assays$RNA@counts))
counts.sub <- Monocyte.integrated@assays$RNA@counts[genes.to.keep,]
但这不是我想要的。而且我不确定如何修改它以包含集群名称(考虑到它是正确的)。 我将集群名称存储在名为“cluster_names”的元数据变量中。
如有任何帮助,我将不胜感激
体重
您可以使用 lapply
遍历聚类的因子水平以单独对它们进行子集和过滤,然后使用 setNames
命名结果列表。下面是一个可重现的例子:
library(Seurat)
data("pbmc_small")
pbmc_small <- FindClusters(pbmc_small, resolution = 1)
names(pbmc_small@meta.data)[names(pbmc_small@meta.data)=="seurat_clusters"] <- "cluster_names"
levels(pbmc_small$cluster_names) <- paste0("cluster_", seq_along(levels(pbmc_small$cluster_names)))
setNames(lapply(levels(pbmc_small$cluster_names), function(x) {
p <- subset(pbmc_small, cluster_names==x)
rownames(p)[Matrix::rowSums(p@assays$RNA@counts > 0) >= .1*dim(p)[2]]
}), levels(pbmc_small$cluster_names))
#> $cluster_1
#> [1] "CD79B" "HLA-DRA" "LTB" "SP100" "PPP3CC" "CXCR4"
#> [7] "STX10" "SNHG7" "CD3D" "NOSIP" "SAFB2" "CD2"
#> [13] "IL7R" "PIK3IP1" "MPHOSPH6" "KHDRBS1" "MAL" "CCR7"
#> [19] "THYN1" "TAF7" "LDHB" "TMEM123" "EPC1" "EIF4A2"
#> [25] "CD3E" "TMUB1" "BLOC1S4" "SRSF7" "ACAP1" "TNFAIP8"
#> [31] "CD7" "TAGAP" "DNAJB1" "ASNSD1" "S1PR4" "CTSW"
#> [37] "GZMK" "NKG7" "IL32" "DNAJC2" "LYAR" "CST7"
#> [43] "LCK" "CCL5" "HNRNPH1" "SSR2" "GIMAP1" "MMADHC"
#> [49] "CD8A" "GYPC" "HNRNPF" "RPL7L1" "KLRG1" "CRBN"
#> [55] "SATB1" "PMPCB" "NRBP1" "TCF7" "HNRNPA3" "S100A8"
#> [61] "S100A9" "LYZ" "FCN1" "TYROBP" "NFKBIA" "TYMP"
#> [67] "CTSS" "TSPO" "CTSB" "LGALS1" "BLVRA" "LGALS3"
#> [73] "IFI6" "HLA-DPA1" "CST3" "GSTP1" "EIF3G" "VPS28"
#> [79] "ZFP36L1" "ANXA2" "HSP90AA1" "LST1" "AIF1" "PSAP"
#> [85] "YWHAB" "MYO1G" "SAT1" "RGS2" "FCGR3A" "S100A11"
#> [91] "FCER1G" "IFITM2" "COTL1" "LGALS9" "CD68" "RHOC"
#> [97] "CARD16" "COPS6" "PPBP" "GPX1" "TPM4" "PF4"
#> [103] "SDPR" "NRGN" "SPARC" "GNG11" "CLU" "HIST1H2AC"
#> [109] "NCOA4" "GP9" "FERMT3" "ODC1" "CD9" "RUFY1"
#> [115] "TUBB1" "TALDO1" "TREML1" "NGFRAP1" "PGRMC1" "CA2"
#> [121] "ITGA2B" "MYL9" "TMEM40" "PARVB" "PTCRA" "ACRBP"
#> [127] "TSC22D1" "VDAC3" "GZMB" "GZMA" "GNLY" "FGFBP2"
#> [133] "AKR1C3" "CCL4" "PRF1" "GZMH" "XBP1" "GZMM"
#> [139] "PTGDR" "IGFBP7" "TTC38" "KLRD1" "ARHGDIA" "IL2RB"
#> [145] "CLIC3" "PPP1R18" "CD247" "ALOX5AP" "XCL2" "C12orf75"
#> [151] "RARRES3" "PCMT1" "LAMP1" "SPON2"
#>
#> $cluster_2
#> [1] "CD79B" "CD79A" "HLA-DRA" "HLA-DQB1"
#> [5] "HVCN1" "HLA-DMB" "LTB" "SP100"
#> [9] "NCF1" "EAF2" "FAM96A" "CXCR4"
#> [13] "STX10" "SNHG7" "NT5C" "NOSIP"
#> [17] "IL7R" "KHDRBS1" "TAF7" "LDHB"
#> [21] "TMEM123" "EIF4A2" "TMUB1" "BLOC1S4"
#> [25] "SRSF7" "TNFAIP8" "TAGAP" "DNAJB1"
#> [29] "S1PR4" "NKG7" "IL32" "DNAJC2"
#> [33] "LYAR" "CCL5" "SSR2" "GIMAP1"
#> [37] "MMADHC" "HNRNPF" "RPL7L1" "HNRNPA3"
#> [41] "S100A8" "S100A9" "LYZ" "CD14"
#> [45] "FCN1" "TYROBP" "ASGR1" "NFKBIA"
#> [49] "TYMP" "CTSS" "TSPO" "RBP7"
#> [53] "CTSB" "LGALS1" "FPR1" "VSTM1"
#> [57] "BLVRA" "MPEG1" "BID" "SMCO4"
#> [61] "CFD" "LINC00936" "LGALS2" "MS4A6A"
#> [65] "FCGRT" "LGALS3" "NUP214" "SCO2"
#> [69] "IL17RA" "IFI6" "HLA-DPA1" "FCER1A"
#> [73] "CLEC10A" "HLA-DMA" "RGS1" "HLA-DPB1"
#> [77] "HLA-DQA1" "RNF130" "HLA-DRB5" "HLA-DRB1"
#> [81] "CST3" "IL1B" "POP7" "HLA-DQA2"
#> [85] "GSTP1" "EIF3G" "VPS28" "LY86"
#> [89] "ZFP36L1" "ANXA2" "GRN" "CFP"
#> [93] "HSP90AA1" "LST1" "AIF1" "PSAP"
#> [97] "YWHAB" "MYO1G" "SAT1" "RGS2"
#> [101] "SERPINA1" "IFITM3" "FCGR3A" "LILRA3"
#> [105] "S100A11" "FCER1G" "TNFRSF1B" "IFITM2"
#> [109] "WARS" "IFI30" "MS4A7" "C5AR1"
#> [113] "HCK" "COTL1" "LGALS9" "CD68"
#> [117] "RP11-290F20.3" "RHOC" "CARD16" "LRRC25"
#> [121] "COPS6" "ADAR" "GPX1" "TPM4"
#> [125] "NRGN" "NCOA4" "FERMT3" "ODC1"
#> [129] "TALDO1" "PARVB" "VDAC3" "GZMB"
#> [133] "XBP1" "IGFBP7" "ARHGDIA" "PPP1R18"
#> [137] "ALOX5AP" "RARRES3" "PCMT1" "SPON2"
#>
#> $cluster_3
#> [1] "MS4A1" "CD79B" "CD79A" "HLA-DRA"
#> [5] "TCL1A" "HLA-DQB1" "HVCN1" "HLA-DMB"
#> [9] "LTB" "LINC00926" "FCER2" "SP100"
#> [13] "NCF1" "PPP3CC" "EAF2" "PPAPDC1B"
#> [17] "CD19" "KIAA0125" "CYB561A3" "CD180"
#> [21] "RP11-693J15.5" "FAM96A" "CXCR4" "STX10"
#> [25] "SNHG7" "NT5C" "BANK1" "IGLL5"
#> [29] "CD200" "FCRLA" "CD3D" "NOSIP"
#> [33] "CD2" "IL7R" "PIK3IP1" "KHDRBS1"
#> [37] "THYN1" "TAF7" "LDHB" "TMEM123"
#> [41] "CCDC104" "EPC1" "EIF4A2" "CD3E"
#> [45] "SRSF7" "ACAP1" "TNFAIP8" "CD7"
#> [49] "TAGAP" "DNAJB1" "S1PR4" "CTSW"
#> [53] "GZMK" "NKG7" "IL32" "DNAJC2"
#> [57] "LYAR" "CST7" "LCK" "CCL5"
#> [61] "HNRNPH1" "SSR2" "GIMAP1" "MMADHC"
#> [65] "CD8A" "PTPN22" "GYPC" "HNRNPF"
#> [69] "RPL7L1" "CRBN" "SATB1" "SIT1"
#> [73] "PMPCB" "NRBP1" "TCF7" "HNRNPA3"
#> [77] "S100A9" "LYZ" "FCN1" "TYROBP"
#> [81] "NFKBIA" "TYMP" "CTSS" "TSPO"
#> [85] "CTSB" "LGALS1" "BLVRA" "MPEG1"
#> [89] "BID" "CFD" "LINC00936" "LGALS2"
#> [93] "MS4A6A" "FCGRT" "LGALS3" "SCO2"
#> [97] "HLA-DPA1" "FCER1A" "CLEC10A" "HLA-DMA"
#> [101] "RGS1" "HLA-DPB1" "HLA-DQA1" "RNF130"
#> [105] "HLA-DRB5" "HLA-DRB1" "CST3" "IL1B"
#> [109] "POP7" "HLA-DQA2" "CD1C" "GSTP1"
#> [113] "EIF3G" "VPS28" "LY86" "ZFP36L1"
#> [117] "ZNF330" "ANXA2" "GRN" "CFP"
#> [121] "HSP90AA1" "FUOM" "LST1" "AIF1"
#> [125] "PSAP" "YWHAB" "MYO1G" "SAT1"
#> [129] "RGS2" "SERPINA1" "IFITM3" "FCGR3A"
#> [133] "S100A11" "FCER1G" "TNFRSF1B" "IFITM2"
#> [137] "WARS" "IFI30" "MS4A7" "HCK"
#> [141] "COTL1" "LGALS9" "CD68" "RHOC"
#> [145] "CARD16" "LRRC25" "COPS6" "ADAR"
#> [149] "GPX1" "TPM4" "NCOA4" "FERMT3"
#> [153] "ODC1" "RUFY1" "TALDO1" "VDAC3"
#> [157] "GZMA" "GNLY" "FGFBP2" "PRF1"
#> [161] "XBP1" "GZMM" "PTGDR" "ARHGDIA"
#> [165] "PPP1R18" "CD247" "ALOX5AP" "XCL2"
#> [169] "C12orf75" "RARRES3" "PCMT1" "SPON2"
由 reprex package (v1.0.0)
于 2021-03-26 创建