-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtrain_predict_eval.sh
executable file
·164 lines (142 loc) · 4.53 KB
/
train_predict_eval.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#!/bin/bash
#=====================================
# SuperLearner launch script
#=====================================
# The user must specify:
# 1) the training/testing data set,
# 2) the number of inputs
# 3) the SuperLearner configuration,
# 4) the working directory,
# 5) Boolean flags (HPO, CV, SMOGN, ONNX)
# For example:
#
# train_predict_eval.sh /path/to/data.csv \ #-----Core params------
# $NUM_INPUTS
# /path/to/sl_conf.py \
# /path/to/work_dir \
# /path/to/conda_install
# $CONDA_ENV_NAME
# $HPO_true_or_false \ #-----Bool opts---------
# $CV_true_or_false \
# $SMOGN_true_or_false \
# $ONNX_true_or_false \
# $NUM_JOBS \ #-----HPC opts----------
# $BACKEND \
# $PREDICT_VAR \ #-----Predict opts------
# /path/to/predict_data
# $FPI_CUTOFF #-----FPI opts----------
#====================================
echo Starting $0
echo The option for ONNX is ignored!
#====================================
# Command line requirements
#====================================
# Define the training data (input)
input_data=$1
# Set the number of inputs
num_inputs=$2
# Set the SuperLearner configuration
sl_conf=$3
# Define the work dir (where to run and put output)
work_dir=$4
# Define Conda environment location and name
miniconda_loc=$5
my_env=$6
# Workflow boolean options (all either True or False)
hpo=$7
cv=$8
smogn=$9
onnx=${10}
# HPC options
num_jobs=${11}
backend=${12}
# Predict options
predict_var=${13}
predict_data=${14}
# FPI options
fpi_cutoff=${15}
echo Checking command line inputs:
echo input_data $input_data
echo num_inputs $num_inputs
echo sl_conf $sl_conf
echo work_dir $work_dir
echo miniconda_loc $miniconda_loc
echo my_env $my_env
echo hpo $hpo
echo cv $cv
echo smogn $smogn
echo onnx $onnx
echo num_jobs $num_jobs
echo backend $backend
echo predict_var $predict_var
echo predict_data $predict_data
echo fpi_cutoff $fpi_cutoff
#===================================
# Conda activate and log env
#===================================
source ${miniconda_loc}/etc/profile.d/conda.sh
conda activate $my_env
# Save Conda env setup and zip the file
# because we don't want GH Dependabot to
# interpret it as the repo's actual
# requirements since this environment is
# emphemeral (otherwise, GH may detect
# security risks in packages and send lots
# of warnings). The HPC code in this Conda
# environment is being executed entirely
# within the environment of the cluster and
# is not exposed to the outside world.
# Provide both conda list and
# conda env export.
conda list -e | gzip -1c > ${work_dir}/requirements.txt.gz
conda env export | gzip -1c > ${work_dir}/requirements.yaml.gz
#===================================
# Run the SuperLearner
#===================================
python -m train \
--conda_sh "${miniconda_loc}/etc/profile.d/conda.sh" \
--superlearner_conf $sl_conf \
--n_jobs $num_jobs \
--num_inputs $num_inputs \
--predict_var ${predict_var} \
--cross_val_score $cv \
--model_dir ${work_dir} \
--hpo $hpo \
--smogn $smogn \
--data ${input_data} \
--backend $backend 1> ${work_dir}/train.std.out 2> ${work_dir}/train.std.err
#===================================
# Print out information about the
# model and make predictions
#===================================
python -m predict \
--model_dir ${work_dir} \
--predict_var ${predict_var} \
--num_inputs ${num_inputs} \
--predict_data ${predict_data} 1> ${work_dir}/predict.std.out 2> ${work_dir}/predict.std.err
#===================================
# Run PCA on predictions
#===================================
python -m pca \
--model_dir ${work_dir} \
--num_inputs ${num_inputs} \
--data ${input_data} \
--predict_var ${predict_var} \
--predict_data ${predict_data} 1> ${work_dir}/pca.std.out 2> ${work_dir}/pca.std.err
#===================================
# Run FPI
#===================================
python -m fpi \
--model_dir ${work_dir} \
--predict_var ${predict_var} \
--num_inputs ${num_inputs} \
--corr_cutoff ${fpi_cutoff} \
--predict_data ${predict_data} 1> ${work_dir}/fpi.std.out 2> ${work_dir}/fpi.std.err
#===================================
# Compress outputs
#===================================
cd $work_dir
ls
#WORKING HERE
#===================================
echo $0 finished!