-
Notifications
You must be signed in to change notification settings - Fork 1
/
train-fit-model.R
78 lines (60 loc) · 1.99 KB
/
train-fit-model.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# Extreme gradient boosting
# Parallel processing -----------------------------------------------------
library(doParallel)
all_cores <- parallel::detectCores(logical = TRUE)
# for 8 core 16 thread machine, good performance running more than
# physical but less than all logical
registerDoParallel(cores = all_cores - 6)
# Model specification -----------------------------------------------------
boost_mod <- boost_tree(
mode = "regression",
trees = 1500,
tree_depth = tune(), min_n = tune(), loss_reduction = tune(),
# randomness
sample_size = tune(), mtry = tune(),
# step size
learn_rate = tune()
) %>%
set_engine("xgboost",
objective = "reg:squarederror")
# Model parameters --------------------------------------------------------
set.seed(42)
# filled 6 dimensional tuning space
xgboost_grid <- grid_latin_hypercube(
min_n(), tree_depth(), loss_reduction(),
sample_size = sample_prop(),
# has unknown, finalize with data to find max
finalize(mtry(), df_train_prep),
learn_rate(),
size = 500
)
# Model tuning -----------------------------------------------------------
tictoc::tic()
set.seed(42)
xgb_tuned_results <- tune_grid(
boost_mod,
scan_age ~ .,
resamples = train_cv,
grid = xgboost_grid,
metrics = metric_set(mae, rmse, rsq),
control = control_grid(verbose = FALSE,
save_pred = TRUE)
)
tictoc::toc()
beepr::beep(2)
# Pick best model ---------------------------------------------------------
xgb_tuned_results %>%
# want to minimize MAE
show_best("mae")
# select parsimonious params within one SE of best model
best_xgb_params <- xgb_tuned_results %>%
select_by_one_std_err(metric = "mae", maximize = FALSE, tree_depth)
# Fit and save best model -------------------------------------------------
xgb_final_mod <- boost_mod %>%
finalize_model(best_xgb_params) %>%
fit(scan_age ~ .,
data = df_train_prep)
# save mod
saveRDS(xgb_final_mod, file = here::here(
"model", "xgboost_9to19_brain_age_mod.rds")
)