My goal is to customize {drake} plan to reduce duplication of codes for both simple and complex cases, for example, setting drake::trigger
for multiple targets based on specific conditions of the plans (ie programmatically identify the targets to be set by detecting names of targets that end with (data) _raw when querying web API or other more complex cases). For brevity, the aspects of *programmatically identifying targets are not shown here and the codes below simply use names of columns typed out manually. I prefer to use {data.table} to manipulate the plans, instead of {dplyr}.
Is there a way, for example, to set drake::trigger
for multiple targets? Or is there a better way to get the job done?
I came out with a simple solution below, which is to manually manipulate the drake plan with the familiar data.frame-style methods. Simplified version of reprex
library(drake)
library(data.table)
library(purrr)
#>
#> Attaching package: 'purrr'
#> The following object is masked from 'package:data.table':
#>
#> transpose
library(magrittr)
#>
#> Attaching package: 'magrittr'
#> The following object is masked from 'package:purrr':
#>
#> set_names
library(rlang)
#>
#> Attaching package: 'rlang'
#> The following object is masked from 'package:magrittr':
#>
#> set_names
#> The following objects are masked from 'package:purrr':
#>
#> %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
#> flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
#> splice
#> The following object is masked from 'package:data.table':
#>
#> :=
trigger_func <- function(target_name) do_something(target_name)
# Helper func
reset_drake_attr <- function(data, command_names = command_names, ...) {
# Reset original 'class' of drake plan
setattr(data, 'class', c('drake_plan', 'tbl_df', 'tbl', 'data.frame'))
# Remove non-drake attributes created by data.table
map( setdiff( names(attributes(data)),
c('names', 'row.names', 'class') ),
~ setattr(data, . , NULL))
# TODO Comment this out temporarily for testing
# setattr(data$command, 'names', command_names)
invisible()
}
plan <- drake_plan(
a = 1,
b = target(2, trigger = trigger(condition = trigger_func('b'))),
c = 3, d = 4,
e = target(5, trigger = trigger(condition = trigger_func('e')))
)
# Manipulate plan with data.table ----------------
my_plan <- drake_plan(a = 1, b = 2, c = 3, d = 4, e = 5)
command_names <- names(my_plan$command) # For testing later
setDT(my_plan)[, trigger := .(.(expr(NA)))] # Pre-populate targets without trigger
map(c('b', 'e'),
~ my_plan [
][target == . , trigger := .(expr(
trigger(condition = trigger_func(!!.))
))])
#> [[1]]
#> target command trigger
#> 1: a 1 NA
#> 2: b 2 <call>
#> 3: c 3 NA
#> 4: d 4 NA
#> 5: e 5 <call>
#>
#> [[2]]
#> target command trigger
#> 1: a 1 NA
#> 2: b 2 <call>
#> 3: c 3 NA
#> 4: d 4 NA
#> 5: e 5 <call>
reset_drake_attr(my_plan)
# Test equality ----------------------------------
plan
#> # A tibble: 5 x 3
#> target command trigger
#> <chr> <expr> <expr>
#> 1 a 1 NA
#> 2 b 2 trigger(condition = trigger_func("b"))
#> 3 c 3 NA
#> 4 d 4 NA
#> 5 e 5 trigger(condition = trigger_func("e"))
my_plan
#> # A tibble: 5 x 3
#> target command trigger
#> <chr> <expr> <expr>
#> 1 a 1 NA
#> 2 b 2 trigger(condition = trigger_func("b"))
#> 3 c 3 NA
#> 4 d 4 NA
#> 5 e 5 trigger(condition = trigger_func("e"))
identical(plan, my_plan)
#> [1] FALSE
all.equal(plan$command, my_plan$command)
#> [1] "names for target but not for current"
# Reason:
names(plan$command)
#> [1] "a" "" "c" "d" ""
command_names # Saved earlier
#> [1] "a" "b" "c" "d" "e"
names(my_plan$command) # data.table removes 'names' of 'my_plan$command'
#> NULL
# Can't test the exact equality of `identical(plan, my_plan)` because only targets without `target` have 'names' on 'command' column
drake_plan(
# without `target`
a = 1, b = 2,
# with `target`
c = target(3),
d = target(4, trigger = trigger(condition = TRUE)),
e = target(func(a), map(func = !!c('x', 'y')))
) %>%
{names(.$command)}
#> [1] "a" "b" "" "" "" ""
# Test without 'names' of 'command' column -------
identical( unname(plan$command), unname(my_plan$command) )
#> [1] TRUE
# Copy objects, remove 'names' of 'command' column and test
plan_test <- plan ; my_plan_test <- my_plan
names(plan_test$command) <- NULL ; names(my_plan_test$command) <- NULL
identical(plan_test, my_plan_test)
#> [1] TRUE
Created on 2019-12-06 by the reprex package (v0.3.0)
Are names of my_plan$command necessary? Are they used by {drake} internally? I have three options and which options should I go for?
- ignore
names(my_plan$command) <- my_plan$target
- Save
command_names <- names(my_plan$command)
and reset it back (like this)
# Test with 'names' of 'command' column ----------
# --- NOT RUN ---
{
# Put this in helper func, re-run `reset_drake_attr`
setattr(my_plan$command, 'names', command_names)
# (run manipulation steps...)
reset_drake_attr(my_plan, command_names)
identical( names(my_plan$command), command_names )
# TRUE
}