根据较早的目标参数将三个变量之一映射到目标?
Mapping one of three variables to target depending on earlier target parameter?
我正在研究这样定义的 Drake 工作流程:
projectName <- c("lake_2018_CER_lib_norm_log2", "lake_2018_CER_lib_norm", "lake_2018_CER_raw_counts")
normalize <- c(TRUE, TRUE, FALSE)
logTransform <- c(TRUE, FALSE, FALSE)
normalize_fxn <- function(datExpr) {
datExpr <- sweep(datExpr, 2, colSums(datExpr), FUN = "/")
return(datExpr)
}
plan <- drake_plan(
datExpr = target(fread(file_in(filePath), sep = "\t") %>% select(-1), transform = map(filePath = !!filePath, .id = FALSE)),
datExprNorm = target(if(normalize == TRUE) {normalize_fxn(datExpr)*1e6 + 1} else {datExpr}, transform = map(datExpr, normalize = !!normalize)),
datExprLog = target(if(logTransform == TRUE) {log2(datExprNorm*1e6 + 1)} else {datExprNorm}, transform = map(datExprNorm, logTransform = !!logTransform)),
filterGenesMinCells = target(if(is.numeric(percentCells)) {round(ncol(datExprLog)*percentCells)} else {NULL}, transform = cross(datExprLog, percentCells = !!percentCells)),
makePlots = target(realVsPermCor(datExpr = datExprLog,
projectName = projectName,
featureType = featureType,
nPerms = 100,
subsampleReal = NULL,
resampleReal = NULL,
subsamplePerm,
filterGenesMinCells = filterGenesMinCells,
filterCellsMinGenes = NULL,
fdrSubsample,
futureThreads = NULL,
openBlasThreads = 10,
outDir),
transform = cross(filterGenesMinCells, featureType = !!featureType, .id = c(featureType, percentCells)))
)
目标输出如下所示:
> plan$target
[1] "datExpr" "datExprLog_TRUE_datExprNorm_TRUE_datExpr"
[3] "datExprLog_FALSE_datExprNorm_TRUE_datExpr_2" "datExprLog_FALSE_datExprNorm_FALSE_datExpr"
[5] "datExprNorm_TRUE_datExpr" "datExprNorm_TRUE_datExpr_2"
[7] "datExprNorm_FALSE_datExpr" "filterGenesMinCells_NULL_datExprLog_TRUE_datExprNorm_TRUE_datExpr"
[9] "filterGenesMinCells_0.01_datExprLog_TRUE_datExprNorm_TRUE_datExpr" "filterGenesMinCells_0.02_datExprLog_TRUE_datExprNorm_TRUE_datExpr"
[11] "filterGenesMinCells_NULL_datExprLog_FALSE_datExprNorm_TRUE_datExpr_2" "filterGenesMinCells_0.01_datExprLog_FALSE_datExprNorm_TRUE_datExpr_2"
[13] "filterGenesMinCells_0.02_datExprLog_FALSE_datExprNorm_TRUE_datExpr_2" "filterGenesMinCells_NULL_datExprLog_FALSE_datExprNorm_FALSE_datExpr"
[15] "filterGenesMinCells_0.01_datExprLog_FALSE_datExprNorm_FALSE_datExpr" "filterGenesMinCells_0.02_datExprLog_FALSE_datExprNorm_FALSE_datExpr"
[17] "makePlots_gene_NULL" "makePlots_cell_NULL"
[19] "makePlots_gene_0.01" "makePlots_cell_0.01"
[21] "makePlots_gene_0.02" "makePlots_cell_0.02"
[23] "makePlots_gene_NULL_2" "makePlots_cell_NULL_2"
[25] "makePlots_gene_0.01_2" "makePlots_cell_0.01_2"
[27] "makePlots_gene_0.02_2" "makePlots_cell_0.02_2"
[29] "makePlots_gene_NULL_3" "makePlots_cell_NULL_3"
[31] "makePlots_gene_0.01_3" "makePlots_cell_0.01_3"
[33] "makePlots_gene_0.02_3" "makePlots_cell_0.02_3"
这非常接近我想要的,但我坚持的是 projectName
:我希望将三个项目名称之一用于最终目标,具体取决于是否在前面的步骤中产生的输入被规范化 and/or log transformed.
目前,我生产了18个目标,所以我希望每个项目名称都映射到其中的6个目标。
有什么方法可以做到这一点吗?
看来您可以编写一个函数来接受规范化和日志转换设置并输出项目名称。素描如下。
drake
中的静态分支很难。在 drake
的继任者 targets
中,我尝试使这两种分支更容易。 (虽然在项目中期进行切换可能不可行。)
targets
: https://github.com/ropensci/targets
tarchetypes
,targets
静态分支包:https://github.com/ropensci/tarchetypes
- 用户手册:https://wlandau.github.io/targets-manual (soon to go to https://books.ropensci.org/targets).
library(drake)
filePath <- "file_path.txt"
normalize <- c(TRUE, TRUE, FALSE)
logTransform <- c(TRUE, FALSE, FALSE)
percentCells <- "percent_cells"
featureType <- "feature_type"
normalize_fxn <- function(datExpr) {
datExpr <- sweep(datExpr, 2, colSums(datExpr), FUN = "/")
return(datExpr)
}
name_project <- function(normalize, log_transform) {
switch(
paste0(normalize, "_", log_transform),
TRUE_TRUE = "lake_2018_CER_lib_norm_log2",
TRUE_FALSE = "lake_2018_CER_lib_norm",
FALSE_FALSE = "lake_2018_CER_raw_counts"
)
}
plan <- drake_plan(
datExpr = target(fread(file_in(filePath), sep = "\t") %>% select(-1), transform = map(filePath = !!filePath, .id = FALSE)),
datExprNorm = target(if(normalize == TRUE) {normalize_fxn(datExpr)*1e6 + 1} else {datExpr}, transform = map(datExpr, normalize = !!normalize)),
datExprLog = target(if(logTransform == TRUE) {log2(datExprNorm*1e6 + 1)} else {datExprNorm}, transform = map(datExprNorm, logTransform = !!logTransform)),
filterGenesMinCells = target(if(is.numeric(percentCells)) {round(ncol(datExprLog)*percentCells)} else {NULL}, transform = cross(datExprLog, percentCells = !!percentCells)),
makePlots = target(
realVsPermCor(
datExpr = datExprLog,
# The project name is a function of normalization and log transform.
projectName = !!name_project(deparse(substitute(normalize)), deparse(substitute(logTransform))),
featureType = featureType,
nPerms = 100,
subsampleReal = NULL,
resampleReal = NULL,
subsamplePerm,
filterGenesMinCells = filterGenesMinCells,
filterCellsMinGenes = NULL,
fdrSubsample,
futureThreads = NULL,
openBlasThreads = 10,
outDir
),
transform = cross(filterGenesMinCells, featureType = !!featureType, .id = c(featureType, percentCells))
)
)
dplyr::filter(plan, grepl("makePlots", target))$command
#> [[1]]
#> realVsPermCor(datExpr = datExprLog_TRUE_datExprNorm_TRUE_datExpr,
#> projectName = "lake_2018_CER_lib_norm_log2", featureType = "feature_type",
#> nPerms = 100, subsampleReal = NULL, resampleReal = NULL,
#> subsamplePerm, filterGenesMinCells = filterGenesMinCells_percent_cells_datExprLog_TRUE_datExprNorm_TRUE_datExpr,
#> filterCellsMinGenes = NULL, fdrSubsample, futureThreads = NULL,
#> openBlasThreads = 10, outDir)
#>
#> [[2]]
#> realVsPermCor(datExpr = datExprLog_FALSE_datExprNorm_TRUE_datExpr_2,
#> projectName = "lake_2018_CER_lib_norm", featureType = "feature_type",
#> nPerms = 100, subsampleReal = NULL, resampleReal = NULL,
#> subsamplePerm, filterGenesMinCells = filterGenesMinCells_percent_cells_datExprLog_FALSE_datExprNorm_TRUE_datExpr_2,
#> filterCellsMinGenes = NULL, fdrSubsample, futureThreads = NULL,
#> openBlasThreads = 10, outDir)
#>
#> [[3]]
#> realVsPermCor(datExpr = datExprLog_FALSE_datExprNorm_FALSE_datExpr,
#> projectName = "lake_2018_CER_raw_counts", featureType = "feature_type",
#> nPerms = 100, subsampleReal = NULL, resampleReal = NULL,
#> subsamplePerm, filterGenesMinCells = filterGenesMinCells_percent_cells_datExprLog_FALSE_datExprNorm_FALSE_datExpr,
#> filterCellsMinGenes = NULL, fdrSubsample, futureThreads = NULL,
#> openBlasThreads = 10, outDir)
由 reprex package (v0.3.0)
于 2021 年 1 月 12 日创建
我正在研究这样定义的 Drake 工作流程:
projectName <- c("lake_2018_CER_lib_norm_log2", "lake_2018_CER_lib_norm", "lake_2018_CER_raw_counts")
normalize <- c(TRUE, TRUE, FALSE)
logTransform <- c(TRUE, FALSE, FALSE)
normalize_fxn <- function(datExpr) {
datExpr <- sweep(datExpr, 2, colSums(datExpr), FUN = "/")
return(datExpr)
}
plan <- drake_plan(
datExpr = target(fread(file_in(filePath), sep = "\t") %>% select(-1), transform = map(filePath = !!filePath, .id = FALSE)),
datExprNorm = target(if(normalize == TRUE) {normalize_fxn(datExpr)*1e6 + 1} else {datExpr}, transform = map(datExpr, normalize = !!normalize)),
datExprLog = target(if(logTransform == TRUE) {log2(datExprNorm*1e6 + 1)} else {datExprNorm}, transform = map(datExprNorm, logTransform = !!logTransform)),
filterGenesMinCells = target(if(is.numeric(percentCells)) {round(ncol(datExprLog)*percentCells)} else {NULL}, transform = cross(datExprLog, percentCells = !!percentCells)),
makePlots = target(realVsPermCor(datExpr = datExprLog,
projectName = projectName,
featureType = featureType,
nPerms = 100,
subsampleReal = NULL,
resampleReal = NULL,
subsamplePerm,
filterGenesMinCells = filterGenesMinCells,
filterCellsMinGenes = NULL,
fdrSubsample,
futureThreads = NULL,
openBlasThreads = 10,
outDir),
transform = cross(filterGenesMinCells, featureType = !!featureType, .id = c(featureType, percentCells)))
)
目标输出如下所示:
> plan$target
[1] "datExpr" "datExprLog_TRUE_datExprNorm_TRUE_datExpr"
[3] "datExprLog_FALSE_datExprNorm_TRUE_datExpr_2" "datExprLog_FALSE_datExprNorm_FALSE_datExpr"
[5] "datExprNorm_TRUE_datExpr" "datExprNorm_TRUE_datExpr_2"
[7] "datExprNorm_FALSE_datExpr" "filterGenesMinCells_NULL_datExprLog_TRUE_datExprNorm_TRUE_datExpr"
[9] "filterGenesMinCells_0.01_datExprLog_TRUE_datExprNorm_TRUE_datExpr" "filterGenesMinCells_0.02_datExprLog_TRUE_datExprNorm_TRUE_datExpr"
[11] "filterGenesMinCells_NULL_datExprLog_FALSE_datExprNorm_TRUE_datExpr_2" "filterGenesMinCells_0.01_datExprLog_FALSE_datExprNorm_TRUE_datExpr_2"
[13] "filterGenesMinCells_0.02_datExprLog_FALSE_datExprNorm_TRUE_datExpr_2" "filterGenesMinCells_NULL_datExprLog_FALSE_datExprNorm_FALSE_datExpr"
[15] "filterGenesMinCells_0.01_datExprLog_FALSE_datExprNorm_FALSE_datExpr" "filterGenesMinCells_0.02_datExprLog_FALSE_datExprNorm_FALSE_datExpr"
[17] "makePlots_gene_NULL" "makePlots_cell_NULL"
[19] "makePlots_gene_0.01" "makePlots_cell_0.01"
[21] "makePlots_gene_0.02" "makePlots_cell_0.02"
[23] "makePlots_gene_NULL_2" "makePlots_cell_NULL_2"
[25] "makePlots_gene_0.01_2" "makePlots_cell_0.01_2"
[27] "makePlots_gene_0.02_2" "makePlots_cell_0.02_2"
[29] "makePlots_gene_NULL_3" "makePlots_cell_NULL_3"
[31] "makePlots_gene_0.01_3" "makePlots_cell_0.01_3"
[33] "makePlots_gene_0.02_3" "makePlots_cell_0.02_3"
这非常接近我想要的,但我坚持的是 projectName
:我希望将三个项目名称之一用于最终目标,具体取决于是否在前面的步骤中产生的输入被规范化 and/or log transformed.
目前,我生产了18个目标,所以我希望每个项目名称都映射到其中的6个目标。
有什么方法可以做到这一点吗?
看来您可以编写一个函数来接受规范化和日志转换设置并输出项目名称。素描如下。
drake
中的静态分支很难。在 drake
的继任者 targets
中,我尝试使这两种分支更容易。 (虽然在项目中期进行切换可能不可行。)
targets
: https://github.com/ropensci/targetstarchetypes
,targets
静态分支包:https://github.com/ropensci/tarchetypes- 用户手册:https://wlandau.github.io/targets-manual (soon to go to https://books.ropensci.org/targets).
library(drake)
filePath <- "file_path.txt"
normalize <- c(TRUE, TRUE, FALSE)
logTransform <- c(TRUE, FALSE, FALSE)
percentCells <- "percent_cells"
featureType <- "feature_type"
normalize_fxn <- function(datExpr) {
datExpr <- sweep(datExpr, 2, colSums(datExpr), FUN = "/")
return(datExpr)
}
name_project <- function(normalize, log_transform) {
switch(
paste0(normalize, "_", log_transform),
TRUE_TRUE = "lake_2018_CER_lib_norm_log2",
TRUE_FALSE = "lake_2018_CER_lib_norm",
FALSE_FALSE = "lake_2018_CER_raw_counts"
)
}
plan <- drake_plan(
datExpr = target(fread(file_in(filePath), sep = "\t") %>% select(-1), transform = map(filePath = !!filePath, .id = FALSE)),
datExprNorm = target(if(normalize == TRUE) {normalize_fxn(datExpr)*1e6 + 1} else {datExpr}, transform = map(datExpr, normalize = !!normalize)),
datExprLog = target(if(logTransform == TRUE) {log2(datExprNorm*1e6 + 1)} else {datExprNorm}, transform = map(datExprNorm, logTransform = !!logTransform)),
filterGenesMinCells = target(if(is.numeric(percentCells)) {round(ncol(datExprLog)*percentCells)} else {NULL}, transform = cross(datExprLog, percentCells = !!percentCells)),
makePlots = target(
realVsPermCor(
datExpr = datExprLog,
# The project name is a function of normalization and log transform.
projectName = !!name_project(deparse(substitute(normalize)), deparse(substitute(logTransform))),
featureType = featureType,
nPerms = 100,
subsampleReal = NULL,
resampleReal = NULL,
subsamplePerm,
filterGenesMinCells = filterGenesMinCells,
filterCellsMinGenes = NULL,
fdrSubsample,
futureThreads = NULL,
openBlasThreads = 10,
outDir
),
transform = cross(filterGenesMinCells, featureType = !!featureType, .id = c(featureType, percentCells))
)
)
dplyr::filter(plan, grepl("makePlots", target))$command
#> [[1]]
#> realVsPermCor(datExpr = datExprLog_TRUE_datExprNorm_TRUE_datExpr,
#> projectName = "lake_2018_CER_lib_norm_log2", featureType = "feature_type",
#> nPerms = 100, subsampleReal = NULL, resampleReal = NULL,
#> subsamplePerm, filterGenesMinCells = filterGenesMinCells_percent_cells_datExprLog_TRUE_datExprNorm_TRUE_datExpr,
#> filterCellsMinGenes = NULL, fdrSubsample, futureThreads = NULL,
#> openBlasThreads = 10, outDir)
#>
#> [[2]]
#> realVsPermCor(datExpr = datExprLog_FALSE_datExprNorm_TRUE_datExpr_2,
#> projectName = "lake_2018_CER_lib_norm", featureType = "feature_type",
#> nPerms = 100, subsampleReal = NULL, resampleReal = NULL,
#> subsamplePerm, filterGenesMinCells = filterGenesMinCells_percent_cells_datExprLog_FALSE_datExprNorm_TRUE_datExpr_2,
#> filterCellsMinGenes = NULL, fdrSubsample, futureThreads = NULL,
#> openBlasThreads = 10, outDir)
#>
#> [[3]]
#> realVsPermCor(datExpr = datExprLog_FALSE_datExprNorm_FALSE_datExpr,
#> projectName = "lake_2018_CER_raw_counts", featureType = "feature_type",
#> nPerms = 100, subsampleReal = NULL, resampleReal = NULL,
#> subsamplePerm, filterGenesMinCells = filterGenesMinCells_percent_cells_datExprLog_FALSE_datExprNorm_FALSE_datExpr,
#> filterCellsMinGenes = NULL, fdrSubsample, futureThreads = NULL,
#> openBlasThreads = 10, outDir)
由 reprex package (v0.3.0)
于 2021 年 1 月 12 日创建