如何在不合并数据集的情况下将多个 drake 目标合并为一个交叉目标?
How to combine multiple drake targets into a single cross target without combining the datasets?
德雷克摇滚!我有一个复杂的多级处理问题。这个问题可以用这个例子来说明。我在级别 l 有 2 个进程,我希望由 所有 级别 1 进程生成的所有数据集由级别 2 的单个目标处理。
下面的代码可以满足我的要求,但我必须在第 2 层重复代码,在我的复杂情况下,这似乎是错误的。
library(drake)
library(tidyverse)
f_process1a = function(x) {
x
}
f_process1b = function(x) {
x
}
f_process2 = function(data, x) {
c(data, x )
}
drakeplan <- drake::drake_plan(
process1a = target(
f_process1a (process1a_var),
transform = map( process1a_var = c(1,2) )
)
,
#
process1b = target(
f_process1b ( process1b_var),
transform = map(process1b_var = c(2,3) )
)
,
process2a = target(
f_process2( process1a, process2_var ),
transform=cross( process1a, process2_var = c(4,5))
)
,
process2b = target(
f_process2( process1b, process2_var ),
transform=cross( process1b, process2_var = c(4,5))
)
)
drake_plan_source(drakeplan )
#> drake_plan(
#> process1a_1 = f_process1a(1),
#> process1a_2 = f_process1a(2),
#> process1b_2 = f_process1b(2),
#> process1b_3 = f_process1b(3),
#> process2a_4_process1a_1 = f_process2(process1a_1, 4),
#> process2a_5_process1a_1 = f_process2(process1a_1, 5),
#> process2a_4_process1a_2 = f_process2(process1a_2, 4),
#> process2a_5_process1a_2 = f_process2(process1a_2, 5),
#> process2b_4_process1b_2 = f_process2(process1b_2, 4),
#> process2b_5_process1b_2 = f_process2(process1b_2, 5),
#> process2b_4_process1b_3 = f_process2(process1b_3, 4),
#> process2b_5_process1b_3 = f_process2(process1b_3, 5)
#> )
由 reprex package (v0.3.0)
于 2019-09-05 创建
我想将 process2[ab] 步骤合并到一个目标中。这可能吗?
看来我应该可以有一个单一的目标,例如:
process2 = target(
f_process2( data, process2_var ),
transform=cross( data=c(process1a, process2a),
process2_var = c(4,5))
)
但这不起作用。
一个紧凑的解决方案是为 map()
.
提供自定义 .data
网格
library(drake)
library(rlang)
library(tidyverse)
grid <- tibble(
fun1 = syms(c("f1a", "f1a", "f1b", "f1b")),
var1 = c(1, 2, 2, 3)
)
plan <- drake_plan(
x = target(
fun1(var1),
transform = map(.data = !!grid)
),
y = target(
f2(x, var2),
transform = cross(x, var2 = c(4, 5))
)
)
config <- drake_config(plan)
vis_drake_graph(config)
由 reprex package (v0.3.0)
于 2019-09-05 创建
但我在这里可能过度拟合了您的示例。另一种方法是使用标签。每个转换都理解 arguments .tag_in
and .tag_out
。在这里,.tag_out
可以定义一个总体分组变量来涵盖 process1a_*
和 process1b_*
目标。然后,您可以在处理 process2
.
时将该分组变量传递给 cross()
library(drake)
plan <- drake_plan(
process1a = target(
f_process1a(process1a_var),
transform = map(process1a_var = c(1, 2), .tag_out = process1)
),
process1b = target(
f_process1b(process1b_var),
transform = map(process1b_var = c(2, 3), .tag_out = process1)
),
process2 = target(
f_process2(process1, process2_var),
transform = cross(process1, process2_var = c(4, 5))
),
trace = TRUE
)
config <- drake_config(plan)
vis_drake_graph(config)
由 reprex package (v0.3.0)
于 2019-09-05 创建
德雷克摇滚!我有一个复杂的多级处理问题。这个问题可以用这个例子来说明。我在级别 l 有 2 个进程,我希望由 所有 级别 1 进程生成的所有数据集由级别 2 的单个目标处理。
下面的代码可以满足我的要求,但我必须在第 2 层重复代码,在我的复杂情况下,这似乎是错误的。
library(drake)
library(tidyverse)
f_process1a = function(x) {
x
}
f_process1b = function(x) {
x
}
f_process2 = function(data, x) {
c(data, x )
}
drakeplan <- drake::drake_plan(
process1a = target(
f_process1a (process1a_var),
transform = map( process1a_var = c(1,2) )
)
,
#
process1b = target(
f_process1b ( process1b_var),
transform = map(process1b_var = c(2,3) )
)
,
process2a = target(
f_process2( process1a, process2_var ),
transform=cross( process1a, process2_var = c(4,5))
)
,
process2b = target(
f_process2( process1b, process2_var ),
transform=cross( process1b, process2_var = c(4,5))
)
)
drake_plan_source(drakeplan )
#> drake_plan(
#> process1a_1 = f_process1a(1),
#> process1a_2 = f_process1a(2),
#> process1b_2 = f_process1b(2),
#> process1b_3 = f_process1b(3),
#> process2a_4_process1a_1 = f_process2(process1a_1, 4),
#> process2a_5_process1a_1 = f_process2(process1a_1, 5),
#> process2a_4_process1a_2 = f_process2(process1a_2, 4),
#> process2a_5_process1a_2 = f_process2(process1a_2, 5),
#> process2b_4_process1b_2 = f_process2(process1b_2, 4),
#> process2b_5_process1b_2 = f_process2(process1b_2, 5),
#> process2b_4_process1b_3 = f_process2(process1b_3, 4),
#> process2b_5_process1b_3 = f_process2(process1b_3, 5)
#> )
由 reprex package (v0.3.0)
于 2019-09-05 创建我想将 process2[ab] 步骤合并到一个目标中。这可能吗?
看来我应该可以有一个单一的目标,例如:
process2 = target(
f_process2( data, process2_var ),
transform=cross( data=c(process1a, process2a),
process2_var = c(4,5))
)
但这不起作用。
一个紧凑的解决方案是为 map()
.
.data
网格
library(drake)
library(rlang)
library(tidyverse)
grid <- tibble(
fun1 = syms(c("f1a", "f1a", "f1b", "f1b")),
var1 = c(1, 2, 2, 3)
)
plan <- drake_plan(
x = target(
fun1(var1),
transform = map(.data = !!grid)
),
y = target(
f2(x, var2),
transform = cross(x, var2 = c(4, 5))
)
)
config <- drake_config(plan)
vis_drake_graph(config)
由 reprex package (v0.3.0)
于 2019-09-05 创建但我在这里可能过度拟合了您的示例。另一种方法是使用标签。每个转换都理解 arguments .tag_in
and .tag_out
。在这里,.tag_out
可以定义一个总体分组变量来涵盖 process1a_*
和 process1b_*
目标。然后,您可以在处理 process2
.
cross()
library(drake)
plan <- drake_plan(
process1a = target(
f_process1a(process1a_var),
transform = map(process1a_var = c(1, 2), .tag_out = process1)
),
process1b = target(
f_process1b(process1b_var),
transform = map(process1b_var = c(2, 3), .tag_out = process1)
),
process2 = target(
f_process2(process1, process2_var),
transform = cross(process1, process2_var = c(4, 5))
),
trace = TRUE
)
config <- drake_config(plan)
vis_drake_graph(config)
由 reprex package (v0.3.0)
于 2019-09-05 创建