在德雷克的多个计划中处理多个文件
Working with multiple files across multiple plans in drake
我正在尝试使用 drake
R 包来处理跨多个计划的多个文件输入,因此我可以迭代地构建我的目标,测试每个阶段的工作原理。下面是一个简单的代表,显示了我正在努力完成的事情。官方文档展示了如何在一个计划中做这种事情,但我的困难是我想跨多个计划做这件事。
我无法弄清楚要将正确的输入名称(来自 read_in_plan
的目标)转换为 munge_plan
library(drake)
dplyr_version_dep = pkg_description = utils::packageDescription('dplyr')
munge_data = function(input){
message("I did something!")
}
file_inputs = c("file1.csv", "file2.csv")
# get my data in
read_in_plan = drake_plan(
# make the plan dependent on changes to dplyr
pkg = utils::packageDescription('dplyr'),
data = target(
read.csv(input),
transform = map(input = !!file_inputs)
)
)
read_in_plan
#> # A tibble: 3 x 2
#> target command
#> <chr> <expr>
#> 1 pkg utils::packageDescription("dplyr")
#> 2 data_file1.csv read.csv("file1.csv")
#> 3 data_file2.csv read.csv("file2.csv")
# now do something to each of those targets
munge_plan = drake_plan(
munged = munge_data(data_file1.csv)
)
munge_plan
#> # A tibble: 1 x 2
#> target command
#> <chr> <expr>
#> 1 munged munge_data(data_file1.csv)
# but really I want to do munge data on all of the
# data_file1.csv
# data_file2.csv
# munge_data_proper = drake_plan(
# munged = target(
# # some kind of transform here
# )
# )
full_plan = bind_plans(read_in_plan,
munge_plan)
# make(full_plan)
由 reprex package (v0.2.1)
于 2019-05-23 创建
转换旨在全部发生在对 drake_plan()
的单个调用中,因此很难将下面的 data_*
目标和 munged_*
目标拆分为不同的计划。
library(drake)
dplyr_version_dep = pkg_description = utils::packageDescription('dplyr')
munge_data = function(input){
message("I did something!")
}
file_inputs <- c("file1.csv", "file2.csv")
plan <- drake_plan(
pkg = target(
dplyr_version_dep,
# Triggers are always checked even though commands do not always run:
trigger = trigger(change = utils::packageDescription("dplyr"))
),
data = target(
read.csv(input),
transform = map(input = !!file_inputs, .id = FALSE)
),
# Borrow from the previous transform:
munged = target(
munge_data(data),
transform = map(data)
)
)
drake_plan_source(plan)
#> drake_plan(
#> pkg = target(
#> command = dplyr_version_dep,
#> trigger = trigger(
#> change = utils::packageDescription("dplyr")
#> )
#> ),
#> data = read.csv("file1.csv"),
#> data_2 = read.csv("file2.csv"),
#> munged_data = munge_data(data),
#> munged_data_2 = munge_data(data_2)
#> )
由 reprex package (v0.3.0)
于 2019-05-23 创建
对您来说,一种破解方法是从 drake_plan(trace = TRUE)
获取信息。脆弱,但在这个小例子中很有用。
library(drake)
dplyr_version_dep = pkg_description = utils::packageDescription('dplyr')
munge_data = function(input){
message("I did something!")
}
file_inputs <- c("file1.csv", "file2.csv")
plan1 <- drake_plan(
pkg = target(
dplyr_version_dep,
# Triggers are always checked even though commands do not always run:
trigger = trigger(change = utils::packageDescription("dplyr"))
),
data = target(
read.csv(input),
transform = map(input = !!file_inputs, .id = FALSE)
),
trace = TRUE
)
plan1
#> # A tibble: 3 x 5
#> target command trigger input data
#> <chr> <expr> <expr> <chr> <chr>
#> 1 pkg dplyr_version_… trigger(change = utils::packageD… <NA> <NA>
#> 2 data read.csv("file… NA … "\"file1.… data
#> 3 data_2 read.csv("file… NA … "\"file2.… data…
plan1$input
#> [1] NA "\"file1.csv\"" "\"file2.csv\""
plan1$data
#> [1] NA "data" "data_2"
# Put together the data manually for the next transformation.
library(magrittr)
data <- plan1$data %>%
na.omit() %>%
unique() %>%
rlang::syms()
str(data)
#> List of 2
#> $ : symbol data
#> $ : symbol data_2
plan2 <- drake_plan(
munged = target(
munge_data(d),
transform = map(d = !!data) # !! is key
)
)
plan2
#> # A tibble: 2 x 2
#> target command
#> <chr> <expr>
#> 1 munged_data munge_data(data)
#> 2 munged_data_2 munge_data(data_2)
full_plan <- bind_plans(dplyr::select(plan1, target, command), plan2)
full_plan
#> # A tibble: 5 x 2
#> target command
#> <chr> <expr>
#> 1 pkg dplyr_version_dep
#> 2 data read.csv("file1.csv")
#> 3 data_2 read.csv("file2.csv")
#> 4 munged_data munge_data(data)
#> 5 munged_data_2 munge_data(data_2)
由 reprex package (v0.3.0)
于 2019-05-23 创建
我正在尝试使用 drake
R 包来处理跨多个计划的多个文件输入,因此我可以迭代地构建我的目标,测试每个阶段的工作原理。下面是一个简单的代表,显示了我正在努力完成的事情。官方文档展示了如何在一个计划中做这种事情,但我的困难是我想跨多个计划做这件事。
我无法弄清楚要将正确的输入名称(来自 read_in_plan
的目标)转换为 munge_plan
library(drake)
dplyr_version_dep = pkg_description = utils::packageDescription('dplyr')
munge_data = function(input){
message("I did something!")
}
file_inputs = c("file1.csv", "file2.csv")
# get my data in
read_in_plan = drake_plan(
# make the plan dependent on changes to dplyr
pkg = utils::packageDescription('dplyr'),
data = target(
read.csv(input),
transform = map(input = !!file_inputs)
)
)
read_in_plan
#> # A tibble: 3 x 2
#> target command
#> <chr> <expr>
#> 1 pkg utils::packageDescription("dplyr")
#> 2 data_file1.csv read.csv("file1.csv")
#> 3 data_file2.csv read.csv("file2.csv")
# now do something to each of those targets
munge_plan = drake_plan(
munged = munge_data(data_file1.csv)
)
munge_plan
#> # A tibble: 1 x 2
#> target command
#> <chr> <expr>
#> 1 munged munge_data(data_file1.csv)
# but really I want to do munge data on all of the
# data_file1.csv
# data_file2.csv
# munge_data_proper = drake_plan(
# munged = target(
# # some kind of transform here
# )
# )
full_plan = bind_plans(read_in_plan,
munge_plan)
# make(full_plan)
由 reprex package (v0.2.1)
于 2019-05-23 创建转换旨在全部发生在对 drake_plan()
的单个调用中,因此很难将下面的 data_*
目标和 munged_*
目标拆分为不同的计划。
library(drake)
dplyr_version_dep = pkg_description = utils::packageDescription('dplyr')
munge_data = function(input){
message("I did something!")
}
file_inputs <- c("file1.csv", "file2.csv")
plan <- drake_plan(
pkg = target(
dplyr_version_dep,
# Triggers are always checked even though commands do not always run:
trigger = trigger(change = utils::packageDescription("dplyr"))
),
data = target(
read.csv(input),
transform = map(input = !!file_inputs, .id = FALSE)
),
# Borrow from the previous transform:
munged = target(
munge_data(data),
transform = map(data)
)
)
drake_plan_source(plan)
#> drake_plan(
#> pkg = target(
#> command = dplyr_version_dep,
#> trigger = trigger(
#> change = utils::packageDescription("dplyr")
#> )
#> ),
#> data = read.csv("file1.csv"),
#> data_2 = read.csv("file2.csv"),
#> munged_data = munge_data(data),
#> munged_data_2 = munge_data(data_2)
#> )
由 reprex package (v0.3.0)
于 2019-05-23 创建对您来说,一种破解方法是从 drake_plan(trace = TRUE)
获取信息。脆弱,但在这个小例子中很有用。
library(drake)
dplyr_version_dep = pkg_description = utils::packageDescription('dplyr')
munge_data = function(input){
message("I did something!")
}
file_inputs <- c("file1.csv", "file2.csv")
plan1 <- drake_plan(
pkg = target(
dplyr_version_dep,
# Triggers are always checked even though commands do not always run:
trigger = trigger(change = utils::packageDescription("dplyr"))
),
data = target(
read.csv(input),
transform = map(input = !!file_inputs, .id = FALSE)
),
trace = TRUE
)
plan1
#> # A tibble: 3 x 5
#> target command trigger input data
#> <chr> <expr> <expr> <chr> <chr>
#> 1 pkg dplyr_version_… trigger(change = utils::packageD… <NA> <NA>
#> 2 data read.csv("file… NA … "\"file1.… data
#> 3 data_2 read.csv("file… NA … "\"file2.… data…
plan1$input
#> [1] NA "\"file1.csv\"" "\"file2.csv\""
plan1$data
#> [1] NA "data" "data_2"
# Put together the data manually for the next transformation.
library(magrittr)
data <- plan1$data %>%
na.omit() %>%
unique() %>%
rlang::syms()
str(data)
#> List of 2
#> $ : symbol data
#> $ : symbol data_2
plan2 <- drake_plan(
munged = target(
munge_data(d),
transform = map(d = !!data) # !! is key
)
)
plan2
#> # A tibble: 2 x 2
#> target command
#> <chr> <expr>
#> 1 munged_data munge_data(data)
#> 2 munged_data_2 munge_data(data_2)
full_plan <- bind_plans(dplyr::select(plan1, target, command), plan2)
full_plan
#> # A tibble: 5 x 2
#> target command
#> <chr> <expr>
#> 1 pkg dplyr_version_dep
#> 2 data read.csv("file1.csv")
#> 3 data_2 read.csv("file2.csv")
#> 4 munged_data munge_data(data)
#> 5 munged_data_2 munge_data(data_2)
由 reprex package (v0.3.0)
于 2019-05-23 创建