-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathextract-dataset.sh
executable file
·786 lines (680 loc) · 24.5 KB
/
extract-dataset.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
#!/bin/bash
# Meteorological Data Processing Workflow
# Copyright (C) 2022-2023, University of Saskatchewan
# Copyright (C) 2023-2024, University of Calgary
#
# This file is part of Meteorological Data Processing Workflow
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# =========================
# Credits and contributions
# =========================
# 1) Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html
# 2) Parts of the code are taken from https://stackoverflow.com/a/17557904/5188208
# ================
# General comments
# ================
# 1) All variables are camelCased;
# ==============
# Help functions
# ==============
function short_usage () {
echo "Usage: $(basename $0) [-jh] [-i DIR] [-d DATASET] [-co DIR] [-se DATE] [-ln REAL,REAL] [-p STR]
Try \`$(basename $0) --help\` for more options." >&1;
}
function version () {
echo "$(basename $0): version $(cat $(dirname $0)/VERSION)";
exit 0;
}
function usage () {
echo "Meteorological Data Processing Script - version $(cat $(dirname $0)/VERSION)
Usage:
$(basename $0) [options...]
Script options:
-d, --dataset Meteorological forcing dataset of interest
-i, --dataset-dir=DIR The source path of the dataset file(s)
-v, --variable=var1[,var2[...]] Variables to process
-o, --output-dir=DIR Writes processed files to DIR
-s, --start-date=DATE The start date of the data
-e, --end-date=DATE The end date of the data
-l, --lat-lims=REAL,REAL Latitude's upper and lower bounds
optional; within the [-90, +90] limits
-n, --lon-lims=REAL,REAL Longitude's upper and lower bounds
optional; within the [-180, +180] limits
-a, --shape-file=PATH Path to the ESRI shapefile; optional
-m, --ensemble=ens1,[ens2,[...]] Ensemble members to process; optional
Leave empty to extract all ensemble members
-M, --model=model1,[model2,[...]] Models that are part of a dataset,
only applicable to climate datasets, optional
-S, --scenario=scn1,[scn2,[...]] Climate scenarios to process, only applicable
to climate datasets, optional
-j, --submit-job Submit the data extraction process as a job
on the SLURM system; optional
-k, --no-chunk No parallelization, recommended for small domains
-p, --prefix=STR Prefix prepended to the output files
-b, --parsable Parsable SLURM message mainly used
for chained job submissions
-c, --cache=DIR Path of the cache directory; optional
-E, --email=user@example.com E-mail user when job starts, ends, or
fails; optional
-C, --cluster=JSON JSON file detailing cluster-specific details
-L, --list-datasets List all the available datasets and the
corresponding keywords for '--dataset' option
-V, --version Show version
-h, --help Show this screen and exit
For bug reports, questions, discussions open an issue
at https://github.com/kasra-keshavarz/datatool/issues" >&1;
exit 0;
}
function list_datasets () {
echo "Meteorological Data Processing Script - version $(cat $(dirname $0)/VERSION)
Currently, the following meteorological datasets are
available for processing:
$(cat $(dirname $0)/etc/usages/DATASETS | sed 's/^\(.*\)$/\o033[34m\1\o033[0m/')" >&1;
exit 0;
}
# useful log date format function
logDate () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; }
logDirDate () { echo "$(date +"%Y%m%d_%H%M%S")"; }
# useful maximum function
max () { printf "%s\n" "${@:2}" | sort "$1" | tail -n1; }
count_values () { max -g $(echo $1 | tr ',' ' ' | wc -w) 1 ; }
# =====================
# Necessary Assumptions
# =====================
# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
alias date='TZ=UTC date'
# expand aliases for the one stated above
shopt -s expand_aliases
# local paths
schedulersPath="$(dirname $0)/etc/schedulers/"
scriptPath="$(dirname $0)/etc/scripts" # core script path
recipePath="$(realpath $(dirname $0)/var/repos/builtin/recipes/)"
extract_submodel="$(dirname $0)/etc/scripts/extract_subdir_level.sh" # script path
# =======================
# Parsing input arguments
# =======================
# argument parsing using GNU getopt
parsedArguments=$( \
getopt --alternative \
--name "extract-dataset" \
-o jhVbLE:d:i:v:o:s:e:t:l:n:p:c:m:M:S:ka:C:u: \
--long submit-job,help,version, \
--long parsable,list-datasets,email:, \
--long dataset:,dataset-dir:,variable:, \
--long output-dir:,start-date:,end-date:, \
--long time-scale:,lat-lims:,lon-lims:, \
--long prefix:,cache:,ensemble:,model:, \
--long scenario:,no-chunk,shape-file:, \
--long cluster:,account: -- "$@" \
)
validArguments=$?
# check if there is no valid options
if [ "$validArguments" != "0" ]; then
short_usage;
exit 1;
fi
# check if no options were passed
if [ $# -eq 0 ]; then
short_usage;
exit 1;
fi
# check long and short options passed
eval set -- "$parsedArguments"
while :
do
case "$1" in
-h | --help) usage ; shift ;; # optional
-V | --version) version ; shift ;; # optional
-L | --list-datasets) list_datasets ; shift ;; # optional
-j | --submit-job) jobSubmission=true ; shift ;; # optional
-E | --email) email="$2" ; shift 2 ;; # optional
-i | --dataset-dir) datasetDir="$2" ; shift 2 ;; # required
-d | --dataset) dataset="$2" ; shift 2 ;; # required
-v | --variable) variables="$2" ; shift 2 ;; # required
-o | --output-dir) outputDir="$2" ; shift 2 ;; # required
-s | --start-date) startDate="$2" ; shift 2 ;; # required
-e | --end-date) endDate="$2" ; shift 2 ;; # required
-t | --time-scale) timeScale="$2" ; shift 2 ;; # required
-l | --lat-lims) latLims="$2" ; shift 2 ;; # required
-n | --lon-lims) lonLims="$2" ; shift 2 ;; # required
-m | --ensemble) ensemble="$2" ; shift 2 ;; # optional
-M | --model) model="$2" ; shift 2 ;; # optional
-S | --scenario) scenario="$2" ; shift 2 ;; # optional
-k | --no-chunk) parallel=false ; shift ;; # optional
-p | --prefix) prefixStr="$2" ; shift 2 ;; # required
-b | --parsable) parsable=true ; shift ;; # optional
-c | --cache) cache="$2" ; shift 2 ;; # optional
-C | --cluster) cluster="$2" ; shift 2 ;; # required
-a | --shape-file) shapefile="$2" ; shift 2 ;; # optional
-u | --account) account="$2" ; shift 2 ;; # optional
# -- means the end of the arguments; drop this, and break out of the while loop
--) shift; break ;;
# in case of invalid option
*) echo "$(basename $0): invalid option '$1'" >$2;
short_usage;
exit 1;;
esac
done
# =================
# Base dependencies
# =================
# if cluster is not provided, exit with an error
if [[ -z $cluster ]]; then
echo "$(basename $0): ERROR! --cluster missing"
exit 1
fi
# `gdal' and `jq' or the basics we need to run this file
# initialize the cluster-dependent settings
inits="$(jq -r '.modules.init | join("; ")' $cluster)"
if [[ -n "$inits" ]]; then
eval $inits
fi
# assure `jq' and `gdal' are loaded
gdal_init="$(jq -r '.modules.gdal' $cluster)"
if [[ -n "$gdal_init" ]]; then
eval $gdal_init
else
echo "$(basename $0): ERROR! GDAL missing"
exit 1;
fi
# ==============
# Routine checks
# ==============
# default value for timeScale if not provided as an argument
if [[ -z $timeScale ]]; then
timeScale="M"
fi
# default value for cache path if not provided as an argument
if [[ -z $cache ]] && [[ -n $jobSubmission ]]; then
cache="$HOME/scratch/.temp_data_jobid"
elif [[ -z $cache ]]; then
cache="$HOME/scratch/.temp_data_$(date +"%N")"
fi
# default value for parallelization
if [[ -z $parallel ]]; then
parallel=true
fi
# email withought job submission not allowed
if [[ -n $email ]] && [[ -z $jobSubmission ]]; then
echo "$(basename $0): ERROR! Email is not supported wihtout job submission;"
exit 1;
fi
# parsable without job submission not allowed
if [[ -n $parsable ]] && [[ -z $jobSubmission ]]; then
echo "$(basename $0): ERROR! --parsable argument cannot be used without job submission"
exit 1;
fi
# if parsable argument is provided
if [[ -n $parsable ]]; then
parsable="--parsable"
else
parsable=""
fi
# depreciation message for --account
if [[ -n $account ]]; then
echo "$(basename $0): WARNING! --account is no longer a valid option."
echo "$(basename $0): configure your scheduler account via --cluster"
fi
# if shapefile is provided extract the extents from it
if [[ -n $shapefile ]]; then
# extract the shapefile extent
IFS=' ' read -ra shapefileExtents <<< "$(ogrinfo -so -al "$shapefile" | sed 's/[),(]//g' | grep Extent)"
# transform the extents in case they are not in EPSG:4326
IFS=':' read -ra sourceProj4 <<< "$(gdalsrsinfo $shapefile | grep -e "PROJ.4")" 1>&2
# Assuming EPSG:4326 if no definition of the CRS is provided
if [[ ${#sourceProj4[@]} -eq 0 ]]; then
echo "$(basename $0): WARNING! Assuming EPSG:4326 for --shape-file as none provided"
sourceProj4=('PROJ4.J' '+proj=longlat +datum=WGS84 +no_defs')
fi
# transform limits and assign to variables
IFS=' ' read -ra leftBottomLims <<< $(echo "${shapefileExtents[@]:1:2}" | gdaltransform -s_srs "${sourceProj4[1]}" -t_srs EPSG:4326 -output_xy)
IFS=' ' read -ra rightTopLims <<< $(echo "${shapefileExtents[@]:4:5}" | gdaltransform -s_srs "${sourceProj4[1]}" -t_srs EPSG:4326 -output_xy)
# define $latLims and $lonLims from $shapefileExtents
lonLims="${leftBottomLims[0]},${rightTopLims[0]}"
latLims="${leftBottomLims[1]},${rightTopLims[1]}"
fi
# check mandatory arguments whether provided
if [[ -z "${datasetDir}" ]] || \
[[ -z "${dataset}" ]] || \
[[ -z "${variables}" ]] || \
[[ -z "${outputDir}" ]] || \
[[ -z "${startDate}" ]] || \
[[ -z "${endDate}" ]] || \
[[ -z "${latLims}" ]] || \
[[ -z "${lonLims}" ]] || \
[[ -z "${prefixStr}" ]]; then
echo "$(basename $0): mandatory option(s) missing.";
short_usage;
exit 1;
fi
# ===========================
# Quasi-parallel requirements
# ===========================
# necessary arrays
startDateArr=() # start dates array
endDateArr=() # end dates array
# necessary one-liner functions
unix_epoch () { date --date="$@" +"%s"; } # unix EPOCH date value
format_date () { date --date="$1" +"$2"; } # format date
# default date format
dateFormat="%Y-%m-%dT%H:%M:%S"
#######################################
# Chunking dates based on given time-
# steps
#
# Globals:
# startDate: start date of the
# subsetting process
# parallel: true by default, false if
# --no-chunk is activated
# startDateArr: array of chunked
# start dates
# endDateArr: array of chunked end
# dates
# startDate: start date of the
# process
# endDate: end date of the process
# dateFormat: default date format
# for manipulations
#
# Arguments:
# 1: -> tStep: string of time-step
# intervals for chunks
#
# Outputs:
# startDateArray and endDateArray
# will be filled for each chunk of
# date for further processing
#######################################
function chunk_dates () {
# local variables
local toDate="$startDate"
local tStep="$1"
local midDate
local toDateEnd
# if no chunking
if [[ "$parallel" == "false" ]]; then
startDateArr+=("$(format_date "$startDate" "$dateFormat")")
endDateArr+=("$(format_date "$endDate" "$dateFormat")")
return # exit the function
# if chunking
elif [[ "$parallel" == "true" ]]; then
while [[ "$(unix_epoch "$toDate")" -le "$(unix_epoch "$endDate")" ]]; do
midDate="$(format_date "$toDate" "%Y-%m-01")"
toDateEnd="$(format_date "$midDate $tStep -1hour" "$dateFormat")"
# considering last month if not fully being a $tStep
if [[ "$(unix_epoch "$toDateEnd")" -ge "$(unix_epoch "$endDate")" ]]; then
startDateArr+=("$(format_date "$toDate" "$dateFormat")")
endDateArr+=("$(format_date "$endDate" "$dateFormat")")
break # break the while loop
fi
startDateArr+=("$(format_date "$toDate" "$dateFormat")")
endDateArr+=("$(format_date "$toDateEnd" "$dateFormat")")
toDate=$(date --date="$midDate $tStep")
done
fi
}
#######################################
# Create m4 variable definitions to be
# used in m4 macro calls in CLI from
# JSON objects
#
# Globals:
# None
#
# Arguments:
# 1: JSON object
#
# Outputs:
# A string in "-D__KEY__=$value"
# format of all the fields of a JSON
# object
#######################################
function json_to_m4_vars () {
# local variables
local json="$1"
# echo the string using jq>1.6
echo "$json" | jq -r \
'to_entries |
map(select(.value != null and .value != "")) |
map(
"-D" +
"__" +
(.key | tostring | ascii_upcase) +
"__" + "=" +
(.value | tostring)
) |
join(" ")'
}
# ======================
# Necessary preparations
# ======================
# put necessary arguments in an array - just for legibility
declare -A funcArgs=([jobSubmission]="$jobSubmission" \
[datasetDir]="$datasetDir" \
[variables]="$variables" \
[outputDir]="$outputDir" \
[timeScale]="$timeScale" \
[startDate]="$startDate" \
[endDate]="$endDate" \
[latLims]="$latLims" \
[lonLims]="$lonLims" \
[prefixStr]="$prefixStr" \
[cache]="$cache" \
[ensemble]="$ensemble" \
[model]="$model" \
[scenario]="$scenario"
);
# ========================
# Data processing function
# ========================
function call_processing_func () {
# input arguments as local variables
local scriptFile="$1" # script local path
local chunkTStep="$2" # chunking time-frame periods
local submodelFlag="$3" # flag for submodels' existence
# local script related variables
# script requirements
local scriptName=$(basename $scriptFile | cut -d '.' -f 1)
local script
# local directory for logs
local logDir="$HOME/.datatool/${scriptName}_$(logDirDate)/"
# selected scheduler
local scheduler
# local length variables for chunking jobs
local startDateStr
local endDateStr
local jobArrLen
local modelLen
local scenarioLen
local ensembleLen
local dateLen
local taskLen
# local iterator variables
local dateIter
local modelIter
local ensembleIter
# local JSON variables
local jobDirectiveJSON
local schedulerJSON
local jobChunkArrayJSON
local jobChunkJSON
local JSON
# local variables defining module information
local jobModules
local jobModulesInit
# local m4 variables
local jobDirectiveM4
local jobScriptM4
# make the $logDir if haven't been created yet
mkdir -p $logDir
# if dataset contains sub-models, extract them
if [[ $submodelFlag == 1 ]]; then
model=$($extract_submodel "$datasetDir" "$model")
funcArgs[model]=$model
fi
# typical script to run for all sub-modules
script=$(cat <<- EOF
bash ${scriptFile} \
--dataset-dir="${funcArgs[datasetDir]}" \
--variable="${funcArgs[variables]}" \
--output-dir="${funcArgs[outputDir]}" \
--start-date="${funcArgs[startDate]}" \
--end-date="${funcArgs[endDate]}" \
--time-scale="${funcArgs[timeScale]}" \
--lat-lims="${funcArgs[latLims]}" \
--lon-lims="${funcArgs[lonLims]}" \
--prefix="${funcArgs[prefixStr]}" \
--cache="${funcArgs[cache]}" \
--ensemble="${funcArgs[ensemble]}" \
--scenario="${funcArgs[scenario]}" \
--model="${funcArgs[model]}"
EOF
)
# evaluate the script file using the arguments provided
if [[ "${funcArgs[jobSubmission]}" == true ]]; then
# ==========================================
# Chunk time-frame and other relevant arrays
# ==========================================
# chunk dates
chunk_dates "$chunkTStep"
# converting chunk date Bash arrays to comma-delimited strings
startDateStr="$(IFS=,; echo "${startDateArr[*]}")"
endDateStr="$(IFS=,; echo "${endDateArr[*]}")"
# ========================
# Building job array specs
# ========================
# relevant array lengths
let "ensembleLen = $(count_values $ensemble)"
let "modelLen = $(count_values $model)"
let "scenarioLen = $(count_values $scenario)"
let "dateLen = $(max -g ${#startDateArr[@]} 1)"
# relevant iterator variables
let "dateIter = $ensembleLen * $modelLen * $scenarioLen"
let "ensembleIter = $modelLen * $scenarioLen"
let "modelIter = $scenarioLen"
# length of total number of tasks and indices
let "taskLen = $dateLen * $ensembleLen * $modelLen * $scenarioLen"
let "jobArrLen = $taskLen - 1"
# ==============================
# Building relevant JSON objects
# ==============================
# job chunk array information
jobChunkArrayJSON="$(
jq -n \
--arg "startDateArr" "${startDateStr}" \
--arg "endDateArr" "${endDateStr}" \
--arg "ensembleArr" "${ensemble}" \
--arg "modelArr" "${model}" \
--arg "scenarioArr" "${scenario}" \
'$ARGS.named | with_entries(.value |= split(","))' \
)"
# job chunk variable information
jobChunkJSON="$(
jq -n \
--arg "ensembleLen" "$ensembleLen" \
--arg "modelLen" "$modelLen" \
--arg "scenarioLen" "$scenarioLen" \
--arg "dateLen" "$dateLen" \
--arg "dateIter" "$dateIter" \
--arg "ensembleIter" "$ensembleIter" \
--arg "modelIter" "$modelIter" \
'$ARGS.named'
)"
# scheduler information
scheduler="$(
jq -r \
'.scheduler' $cluster \
)"
schedulerJSON="$(
jq -r \
'.environment_variables' ${schedulersPath}/${scheduler}.json \
)"
# job directives information
jobDirectiveJSON="$(
jq -n \
--arg "jobArrLen" "$jobArrLen" \
--arg "scriptName" "$scriptName" \
--arg "logDir" "$logDir" \
--arg "email" "$email" \
--arg "parsable" "$parsable" \
--argjson "specs" "$(jq -r '.specs' $cluster)" \
'$ARGS.named + $specs | del(.specs)' \
)"
# job script information
# arguments used for pallaleization, i.e.,
# startDate, endDate, ensemble, model, scenario,
# are removed, as they are processed elsewhere and fed
# as part of various other JSONs, namely
# `$jobChunkJSON` and `$jobChunkArrJSON`
jobScriptJSON="$(
jq -n \
--arg "scriptFile" "$scriptFile" \
--arg "datasetDir" "${funcArgs[datasetDir]}" \
--arg "variable" "${funcArgs[variables]}" \
--arg "outputDir" "${funcArgs[outputDir]}" \
--arg "timeScale" "${funcArgs[timeScale]}" \
--arg "latLims" "${funcArgs[latLims]}" \
--arg "lonLims" "${funcArgs[lonLims]}" \
--arg "prefix" "${funcArgs[prefixStr]}" \
--arg "cache" "${funcArgs[cache]}" \
'$ARGS.named' \
)"
# job module init information - not JSON as echoed as is
jobModulesInit="$(
jq -r \
'.modules.init[] | select(length > 0)' $cluster \
)"
# job module information - not JSON as echoed as is
jobModules="$(
jq -r \
'.modules[] |
select(length > 0) |
select(type != "array")' $cluster \
)"
# ============
# Parallel run
# ============
# determining job script path
local jobScriptPath="$logDir/job.${scheduler}"
local jobConfPath="$logDir/job.json"
# create JSON config file for final submission
JSON="$(
jq -n \
--argjson "jobscript" "$jobScriptJSON" \
--argjson "jobdirective" "$jobDirectiveJSON" \
--argjson "scheduler" "$schedulerJSON" \
--argjson "jobchunks" "$jobChunkJSON" \
--argjson "jobchunksarrays" "$jobChunkArrayJSON" \
'$jobscript +
$jobdirective +
$scheduler +
$jobchunks +
$jobchunksarrays' \
)"
# exporting job configurations as a JSON to the job $logDir
echo "$JSON" > "$jobConfPath"
# generating the submission script using m4 macros
# m4 variables defined
# m4 variables are in the following form: __VAR__
jobDirectiveM4="$(json_to_m4_vars "$jobDirectiveJSON")"
# append the main processing script using m4 macros
jobScriptM4="-D__CONF__=$jobConfPath "
jobScriptM4+="$(json_to_m4_vars "$schedulerJSON") "
# create scheduler-specific job submission script
# 1. job scheduler directives
m4 ${jobDirectiveM4} ${schedulersPath}/${scheduler}.m4 > \
${jobScriptPath}
# 2. module inititation, if applicable
echo -e "\n${jobModulesInit}" >> "${jobScriptPath}"
# 3. loading core modules, if applicable
echo -e "\n${jobModules}" >> "${jobScriptPath}"
# 4. main body of script
m4 ${jobScriptM4} ${scriptPath}/main.m4 >> \
${jobScriptPath}
# choose applicable scheduler and submit the job
case "${scheduler,,}" in
"slurm")
sbatch --export=NONE ${jobScriptPath} ;;
"pbs")
qsub ${jobScriptPath} ;;
"lfs")
bsub --env=none ${jobScriptPath} ;;
esac
else
# serial mode
# load all the necessary modules
mods="$( \
jq -r \
'.modules |
to_entries |
map(
select(
.value |
type != "array" and . != ""
)
) |
map(.value) |
join(" && ")' \
$cluster)"
eval "$mods"
eval "${script}"
fi
}
# ======================
# Checking input dataset
# ======================
case "${dataset,,}" in
# ============
# WRF products
# ============
# NCAR-GWF CONUSI
"conus1" | "conusi" | "conus_1" | "conus_i" | "conus 1" | "conus i" | "conus-1" | "conus-i")
call_processing_func "$recipePath/gwf-ncar-conus_i/conus_i.sh" "3months"
;;
# NCAR-GWF CONUSII
"conus2" | "conusii" | "conus_2" | "conus_ii" | "conus 2" | "conus ii" | "conus-2" | "conus-ii")
call_processing_func "$recipePath/gwf-ncar-conus_ii/conus_ii.sh" "1month"
;;
# ==========
# Reanalysis
# ==========
# ECMWF ERA5
"era_5" | "era5" | "era-5" | "era 5")
call_processing_func "$recipePath/ecmwf-era5/era5_simplified.sh" "2years"
;;
# ECCC RDRS
"rdrs" | "rdrsv2.1")
call_processing_func "$recipePath/eccc-rdrs/rdrs.sh" "6months"
;;
# ====================
# Observation datasets
# ====================
# Daymet dataset
"daymet" | "Daymet" )
call_processing_func "$recipePath/ornl-daymet/daymet.sh" "5years"
;;
# ================
# Climate datasets
# ================
# ESPO-G6-R2 dataset
"espo" | "espo-g6-r2" | "espo_g6_r2" | "espo_g6-r2" | "espo-g6_r2" )
call_processing_func "$recipePath/ouranos-espo-g6-r2/espo-g6-r2.sh" "151years" "1"
;;
# Ouranos-MRCC5-CMIP6 dataset
"crcm5-cmip6" | "mrcc5-cmip6" | "crcm5" | "mrcc5" )
call_processing_func "$recipePath/ouranos-mrcc5-cmip6/mrcc5-cmip6.sh" "5years"
;;
# Alberta Government Downscaled Climate Dataset - CMIP6
"alberta" | "ab-gov" | "ab" | "ab_gov" | "abgov" )
call_processing_func "$recipePath/ab-gov/ab-gov.sh" "151years" "0"
;;
# NASA GDDP-NEX-CMIP6
"gddp" | "nex" | "gddp-nex" | "nex-gddp" | "gddp-nex-cmip6" | "nex-gddp-cmip6")
call_processing_func "$recipePath/nasa-nex-gddp-cmip6/nex-gddp-cmip6.sh" "100years" "0"
;;
# CanRCM4-WFDEI-GEM-CaPA
"canrcm4_g" | "canrcm4-wfdei-gem-capa" | "canrcm4_wfdei_gem_capa")
call_processing_func "$recipePath/ccrn-canrcm4_wfdei_gem_capa/canrcm4_wfdei_gem_capa.sh" "5years"
;;
# WFDEI-GEM-CaPA
"wfdei_g" | "wfdei-gem-capa" | "wfdei_gem_capa" | "wfdei-gem_capa" | "wfdei_gem-capa")
call_processing_func "$recipePath/ccrn-wfdei_gem_capa/wfdei_gem_capa.sh" "5years"
;;
# dataset not included above
*)
echo "$(basename $0): missing/unknown dataset";
exit 1;;
esac