-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcommon.R
386 lines (327 loc) · 14.2 KB
/
common.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
# Common utility functions.
#
# Markus Konrad <markus.konrad@wzb.eu>
# Clara Bicalho <clara.bicalho@wzb.eu>
# Sisi Huang <sisi.huang@wzb.eu>
#
# Oct. 2018
#
library(shiny)
library(stringr)
library(future)
library(rlang)
library(digest)
library(DeclareDesign)
# Append `v` to list `l` and return the resulting list. Appending is slow, don't use that often!
list_append <- function(l, v) {
l[[length(l)+1]] <- v
l
}
# Merge list `l1` with `l2` and return combined list. If the same key exists in both lists, the
# value in `l2` will overwrite the value in `l1`.
list_merge <- function(l1, l2) {
l_out <- list()
keys1 <- names(l1)
keys2 <- names(l2)
for (k in keys1) { l_out[[k]] <- l1[[k]] }
for (k in keys2) { l_out[[k]] <- l2[[k]] }
stopifnot(setequal(union(keys1, keys2), names(l_out)))
l_out
}
# Get "tips" from `definitions` attribute of designer
get_tips <- function(designer){
def <- attr(designer, "definitions")
split(def$tips, def$names)
}
# Check if lists `a` and `b` have equal elements in a "shallow" way, i.e. *not* traversing recursively
# through nested lists.
lists_equal_shallow <- function(a, b, na.rm = FALSE) {
if (na.rm) {
a <- a[!is.na(a)]
b <- b[!is.na(b)]
}
if (!setequal(names(a), names(b))) {
return(FALSE)
}
all(sapply(names(a), function (e) {
a_elem <- a[[e]]
b_elem <- b[[e]]
if (na.rm) { # doing this here already because we need to remove NAs before checking length()
a_elem <- a_elem[!is.na(a_elem)]
b_elem <- b_elem[!is.na(b_elem)]
}
is.numeric(a_elem) && is.numeric(b_elem) && length(a_elem) == length(b_elem) && all(a_elem == b_elem)
}))
}
# Round numeric values in a data frame to `digits`.
# Copied from "wizard_shiny" repository.
round_df <- function(df, digits){
i <- vapply(df, is.numeric, TRUE)
df[i] <- lapply(df[i], round, digits)
df
}
# Parse a sequence string `s` in the form of "n, n+s, ..., m", e.g. "1, 2, ..., 5" or "0, 0.25, ..., 1", etc.
# Convert the result to a vector of class `cls`.
# Finds the step `s` and generates the sequence using `seq()`.
# If a sequence without ellipsis that indicate range is passed, then the sequence is used as is. So
# a string of "1, 3, 8, 2" will just be split and converted depending on `cls`.
# Is fault tolerant to commas, so "1, 2 ... 5" is also accepted.
#
# If you want to react on input errors, this function should be wrapped inside a tryCatch expression.
parse_sequence_string <- function(s, cls = 'numeric') {
if (cls %in% c('numeric', 'integer') && grepl('...', s, fixed = TRUE)) { # int/num sequences with range like 1, 2, ..., 10
start_end <- str_split(s, "\\.\\.\\.")[[1]]
start <- str_trim(str_split(start_end[1], ',')[[1]])
startnums <- as.numeric(start[start != ''])
end <- str_trim(str_replace_all(start_end[[2]], ',', ''))
endnum <- as.numeric(end)
if (length(startnums) > 1) {
step <- startnums[2] - startnums[1]
} else {
step <- ifelse(startnums[1] <= endnum, 1, -1)
}
return(seq(from = startnums[1], to = endnum, by = step))
} else { # character list or int/num scalar or int/num sequence like 1, 3, 8, 2
elems <- str_trim(str_split(s, ',')[[1]])
elems <- elems[nchar(elems) > 0]
if (cls %in% c('numeric', 'integer')) {
if (length(elems) == 1 && elems == '') {
return(numeric())
} else {
return(as.numeric(elems))
}
} else {
if (length(elems) == 1 && elems == '') {
return(character())
} else {
return(elems)
}
}
}
}
# Parse a string `s` that denotes a sequence of sequences of class `cls` such as:
#
# '(1, 2, 3), (4, 5, 6), (7,8, 9)'
# or
# '(1, 2, 3),
# (4, 5, 6),
# (7,8, 9)'
# or
# (1, 2) (3) (4, 5 ,6)
#
# Note the inner sequences must be denoted as "(a, b, ... z)", i.e. there must be a comma to
# split the values. However, to split the sequences themselves no comma is needed.
#
# If `require_rectangular` is TRUE, all sequences in `s` must be of the same length, i.e. they
# must form a regular, rectangular matrix (like the first two examples). The output will then
# be a matrix of size NxM, where N is the number of sequences and M is the number of items in
# each sequence. If `s` does not form a regular matrix, NULL will be returned.
# If `require_rectangular` is TRUE, the sequences in `s` can be of any length > 0. The output will
# then be a list of length N, where N is the number of sequences. Each list item is then a numeric
# vector of variable length.
#
# If you want to react on input errors, this function should be wrapped inside a tryCatch expression.
#
# If the input cannot be parsed, NULL will be returned.
parse_sequence_of_sequences_string <- function(s, cls = 'numeric', require_rectangular = FALSE) {
if (str_trim(s) == '') return(list())
m <- gregexpr('\\(([^\\(\\)]*)\\)', s)
vecs <- regmatches(s, m)
if (length(vecs[[1]]) == 0) {
return(NULL)
}
parsed <- sapply(vecs[[1]], function(v) {
v <- gsub('[\\(\\)]', '', v)
if (cls == 'character') {
v <- gsub('["\']', '', v)
}
parse_sequence_string(v, cls = cls)
}, USE.NAMES = FALSE, simplify = require_rectangular)
if (require_rectangular) {
if (class(parsed) != 'list') {
return(t(parsed))
} else {
return(NULL)
}
} else {
return(parsed)
}
}
# Turn a string `s` into a valid R object name.
make_valid_r_object_name <- function(s) {
# Identifiers consist of a sequence of letters, digits, the period (‘.’) and the underscore.
# They must not start with a digit or an underscore, or with a period followed by a digit.
# — R Language Definition
s <- str_replace_all(s, '[^A-Za-z0-9\\._]', '_')
s <- str_replace(s, '^[\\d_]+', '')
str_replace(s, '^\\.\\d+', '')
}
# Return a list of valid designer parameters
get_designer_args <- function(designer) {
formals(designer)
}
# Take a argument value `arg_val` from a designer with class `arg_class` and `arg_is_vector` indicating if
# the designer expects this argument to be a vector. Convert the argument to fractions for numbers with many repeating digits
# (like 0.333... will become 1/3).
# If `to_char` is TRUE, additionally convert the result to a character vector as formatted for an input box (e.g.
# "(1/3, 1/3, 1/3)" for a vector input with fractions).
designer_arg_value_to_fraction <- function(arg_val, arg_class, arg_is_vector, to_char = FALSE) {
# try to convert to fractions if there is a number with many repeating digits after the decimal point like "0.3333333333"
if (arg_class == "numeric" && any(grepl(sprintf('\\.(%s)$', paste(sprintf('%s{10,}', 1:9), collapse = '|')), as.character(arg_val)))) {
arg_val <- MASS::fractions(arg_val)
}
if (to_char) {
arg_val <- as.character(arg_val)
if (arg_is_vector) { # vector of vectors input
return(sprintf('(%s)', paste(arg_val, collapse = ', ')))
} else {
return(arg_val)
}
} else {
arg_val
}
}
# evaluate argument defaults of designers in separate environment (because they might be "language" constructs)
# return evaluated argument defaults
# `args` is a list of arguments with defaults as returned from `formals(<designer>)`
evaluate_designer_args <- function(args, definition) {
eval_envir <- new.env()
args_eval <- lapply(1:length(args), function(a) {
argdef <- definition[a,]
# convert the value to fraction if necessary
evaluated_arg <- designer_arg_value_to_fraction(invisible(eval(args[[a]], envir = eval_envir)), argdef$class, argdef$vector)
invisible(assign(x = names(args)[a], value = evaluated_arg, envir = eval_envir))
hold <- invisible(get(names(args)[a], envir = eval_envir))
if(length(hold) > 1) hold <- paste0(hold, collapse = ", ")
return(hold)
})
names(args_eval) <- names(args)
args_eval
}
# get cache file name unique to cache type `cachetype` (designs, simulation or diagnosis results),
# parameter space `args`, number of (bootstrap) simulations `sims`, designer object `designer`
get_diagnosis_cache_filename <- function(cachetype, args, sims, bs_sims, designer) {
fingerprint_args <- args
fingerprint_args$sims <- sims
fingerprint_args$bs_sims <- bs_sims
fingerprint_args$designer_src <- deparse(designer)
fingerprint_args$cache_version <- 1 # increment whenever the simulated data in cache is not compatible anymore (i.e. DD upgrade)
fingerprint <- digest(fingerprint_args) # uses MD5
# print('CACHE FINGERPRINT ARGS/')
# print(fingerprint_args)
# print('/CACHE FINGERPRINT ARGS')
sprintf('.cache/%s_%s.RDS', cachetype, fingerprint)
}
# Run diagnoses on designer `designer` and parameter space `args`. Run `sims` simulations and `bootstrap_sims` bootstraps.
# `diagnosands_call` is a closure to calculate the diagnosands
# If `use_cache` is TRUE, check if simulated data already exists for this designer / parameter combinations and use cached
# data or create newly simulated data for running diagnoses.
# The simulations are run in parallel if packages `future` and `future.apply` are installed.
run_diagnoses <- function(designer, args, sims, bootstrap_sims, diagnosands_call, use_cache = TRUE,
advance_progressbar = 0, n_diagnosis_workers = 1) {
if (n_diagnosis_workers > 1) {
# set up to run in parallel
plan('multicore', workers = n_diagnosis_workers)
}
all_designs <- NULL
if (use_cache) {
# cache fingerprint generated from designer object, simulation config. and parameter space
designs_cache_file <- get_diagnosis_cache_filename('designs', args, NULL, NULL, designer)
if (file.exists(designs_cache_file)) { # read and return result object from cache
if (advance_progressbar) incProgress(advance_progressbar)
all_designs <- readRDS(designs_cache_file)
print('loaded generated designs from cache')
}
} else {
designs_cache_file <- NULL
}
if (is.null(all_designs)) { # generate designs
# generate designs from designer with arguments `args`
all_designs <- eval_bare(expr(expand_design(designer = designer, expand = TRUE, !!!args)))
if (advance_progressbar) incProgress(advance_progressbar)
# save designs to cache if requested
if (!is.null(all_designs) && !is.null(designs_cache_file)) {
saveRDS(all_designs, designs_cache_file)
}
}
from_cache <- FALSE # records if some data was loaded from cache
simdata <- NULL
if (use_cache) {
# cache fingerprint generated from designer object, simulation config. and parameter space
cache_file <- get_diagnosis_cache_filename('simdata', args, sims, NULL, designer)
if (file.exists(cache_file)) { # read and return result object from cache
if (advance_progressbar) incProgress(advance_progressbar)
simdata <- readRDS(cache_file)
print('loaded simulation data from cache')
from_cache <- TRUE
}
} else {
cache_file <- NULL
}
if (is.null(simdata)) { # generate simulations
# simulate data
simdata <- simulate_designs(all_designs, sims = sims)
if (advance_progressbar) incProgress(advance_progressbar)
# save simulations to cache if requested
if (!is.null(simdata) && !is.null(cache_file)) {
saveRDS(simdata, cache_file)
}
}
diag_res <- NULL
if (use_cache) {
stopifnot(!is.null(cache_file))
# make the cache fingerprint dependent on simulated data's fingerprint and on parameters for diagnosands
diag_call_src <- deparse(diagnosands_call)
diag_call_env <- environment(diagnosands_call)
if (is.null(diag_call_env)) {
diag_call_objnames <- NULL
diag_call_objvals <- NULL
} else {
diag_call_objnames <- ls(diag_call_env)
diag_call_objvals <- get(diag_call_objnames, diag_call_env)
}
args <- list(
'diag_call_src' = diag_call_src,
'diag_call_objnames' = diag_call_objnames,
'diag_call_objvals' = diag_call_objvals,
'from_simdata' = cache_file
)
diag_cache_file <- get_diagnosis_cache_filename('diagresult', args, sims, bootstrap_sims, designer)
if (file.exists(diag_cache_file)) { # read and return result object from cache
if (advance_progressbar) incProgress(advance_progressbar)
diag_res <- readRDS(diag_cache_file)
print('loaded diagnosis results from cache')
from_cache <- TRUE
}
} else {
diag_cache_file <- NULL
}
if (is.null(diag_res)) { # run diagnoses using the simulated data
diag_res <- diagnose_designs(simdata, diagnosands = diagnosands_call, bootstrap_sims = bootstrap_sims)
if (advance_progressbar) incProgress(advance_progressbar)
# save diagnosis results to cache if requested
if (!is.null(diag_res) && !is.null(diag_cache_file)) {
saveRDS(diag_res, diag_cache_file)
}
}
list(results = diag_res, from_cache = from_cache)
}
# Show a shinyalert message box with title `title` and content loaded from `html_file`.
# Set label of the confirmation button to `confirm_btn_label`.
alert_with_content_from_html_file <- function(title, html_file, confirm_btn_label = 'OK', className = '') {
shinyalert(
title = title,
text = readChar(html_file, file.info(html_file)$size),
closeOnEsc = TRUE,
closeOnClickOutside = TRUE,
html = TRUE,
showConfirmButton = TRUE,
showCancelButton = FALSE,
confirmButtonText = confirm_btn_label,
timer = 0,
imageUrl = "",
confirmButtonCol = "light-blue darken-3",
animation = FALSE,
className = className
)
}