在 R 中与 biomart 循环
Looping with biomart in R
我有一个基于许多文件创建的数据集列表。
list.function <- function() {
sample1 <- data.frame(ensembl.id = c("ENSG00000000005.6", "ENSG00000000003.15", "ENSG00000000419.13", "ENSG00000000457.14", "ENSG00000000460.17"), counts = c(4, 5, 6, 1, 1))
sample2 <- data.frame(ensembl.id = c("ENSG00000000005.6", "ENSG00000000003.15", "ENSG00000000419.13", "ENSG00000000457.14", "ENSG00000000460.17"), counts = c(4, 5, 6, 1, 1))
sample3 <- data.frame(ensembl.id = c("ENSG00000000005.6", "ENSG00000000003.15", "ENSG00000000419.13", "ENSG00000000457.14", "ENSG00000000460.17"), counts = c(4, 5, 6, 1, 1))
sample4 <- data.frame(ensembl.id = c("ENSG00000000005.6", "ENSG00000000003.15", "ENSG00000000419.13", "ENSG00000000457.14", "ENSG00000000460.17"), counts = c(4, 5, 6, 1, 1))
sapply(paste('sample', seq(1,4,1), sep=''), get, environment(), simplify = FALSE)
}
my.list3 <- list.function()
my.list3
library("biomaRt")
grch38 <- useMart("ensembl",dataset="hsapiens_gene_ensembl")
我正在尝试自动执行此操作:
my.list4 = lapply(my.list3, function(x){
atributos = getBM(attributes = c("ensembl_gene_id_version", "external_gene_name", "chromosome_name", "gene_biotype", "entrezgene_description"),
filters = "ensembl_gene_id_version",
values = x$ensembl.id,
mart = grch38)
atributos_unique = atributos %>% distinct(ensembl_gene_id_version, .keep_all = TRUE)
merged = merge(x, atributos_unique, by.x="ensembl.id", by.y="ensembl_gene_id_version" )
merged$gene_biotype = as.factor(merged$gene_biotype)
})
正确使用所有数据集,
但输出不正确!
我需要“合并”的最终输出对于我的“my.list3”列表中的每个数据集都是唯一的,并且与原始数据集同名
有什么想法吗?
您没有在函数调用中返回数据框。
library(biomaRt)
library(tidyverse)
grch38 = useMart("ensembl", dataset="hsapiens_gene_ensembl")
my.list4 = lapply(my.list3, function(x){
atributos = getBM(attributes = c("ensembl_gene_id_version",
"external_gene_name",
"chromosome_name",
"gene_biotype",
"entrezgene_description"),
filters = "ensembl_gene_id_version",
values = x$ensembl.id,
mart = grch38)
atributos_unique = atributos %>%
distinct(ensembl_gene_id_version, .keep_all = TRUE)
merged = merge(x,
atributos_unique,
by.x="ensembl.id",
by.y="ensembl_gene_id_version" )
merged$gene_biotype = as.factor(merged$gene_biotype)
return(merged) #or just merged
})
在函数调用的末尾添加 return(merged)
。
我有一个基于许多文件创建的数据集列表。
list.function <- function() {
sample1 <- data.frame(ensembl.id = c("ENSG00000000005.6", "ENSG00000000003.15", "ENSG00000000419.13", "ENSG00000000457.14", "ENSG00000000460.17"), counts = c(4, 5, 6, 1, 1))
sample2 <- data.frame(ensembl.id = c("ENSG00000000005.6", "ENSG00000000003.15", "ENSG00000000419.13", "ENSG00000000457.14", "ENSG00000000460.17"), counts = c(4, 5, 6, 1, 1))
sample3 <- data.frame(ensembl.id = c("ENSG00000000005.6", "ENSG00000000003.15", "ENSG00000000419.13", "ENSG00000000457.14", "ENSG00000000460.17"), counts = c(4, 5, 6, 1, 1))
sample4 <- data.frame(ensembl.id = c("ENSG00000000005.6", "ENSG00000000003.15", "ENSG00000000419.13", "ENSG00000000457.14", "ENSG00000000460.17"), counts = c(4, 5, 6, 1, 1))
sapply(paste('sample', seq(1,4,1), sep=''), get, environment(), simplify = FALSE)
}
my.list3 <- list.function()
my.list3
library("biomaRt")
grch38 <- useMart("ensembl",dataset="hsapiens_gene_ensembl")
我正在尝试自动执行此操作:
my.list4 = lapply(my.list3, function(x){
atributos = getBM(attributes = c("ensembl_gene_id_version", "external_gene_name", "chromosome_name", "gene_biotype", "entrezgene_description"),
filters = "ensembl_gene_id_version",
values = x$ensembl.id,
mart = grch38)
atributos_unique = atributos %>% distinct(ensembl_gene_id_version, .keep_all = TRUE)
merged = merge(x, atributos_unique, by.x="ensembl.id", by.y="ensembl_gene_id_version" )
merged$gene_biotype = as.factor(merged$gene_biotype)
})
正确使用所有数据集, 但输出不正确!
我需要“合并”的最终输出对于我的“my.list3”列表中的每个数据集都是唯一的,并且与原始数据集同名
有什么想法吗?
您没有在函数调用中返回数据框。
library(biomaRt)
library(tidyverse)
grch38 = useMart("ensembl", dataset="hsapiens_gene_ensembl")
my.list4 = lapply(my.list3, function(x){
atributos = getBM(attributes = c("ensembl_gene_id_version",
"external_gene_name",
"chromosome_name",
"gene_biotype",
"entrezgene_description"),
filters = "ensembl_gene_id_version",
values = x$ensembl.id,
mart = grch38)
atributos_unique = atributos %>%
distinct(ensembl_gene_id_version, .keep_all = TRUE)
merged = merge(x,
atributos_unique,
by.x="ensembl.id",
by.y="ensembl_gene_id_version" )
merged$gene_biotype = as.factor(merged$gene_biotype)
return(merged) #or just merged
})
在函数调用的末尾添加 return(merged)
。