-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathLaunchTAMiTSummit.py
233 lines (176 loc) · 8.21 KB
/
LaunchTAMiTSummit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
#!/usr/bin/env python
import os
import sys
import pdb
import matlab.engine
import argparse
import re
import pdb
import subprocess
import numpy as np
'''
Name : LaunchTAMiTSummit.py
Description : Runs multiple fits on Summit by distirbuting them across multiple cores and nodes as needed.
Usage : LaunchTAMiTSummit.py -F
'''
def parse_args():
# parse comand line arguments
parser = argparse.ArgumentParser( prog='LaunchTAMiTSummit.py', description='Launcher for multiple matlab tamit fits on summit')
# Fit option: Run fits
parser.add_argument('-F', '--fit', action='store_true',
help='launch fit jobs via sbatch')
# DryRun option: Run normal and create jobs but don't launch the jobs
parser.add_argument('-n', '--dryrun', action='store_true',
help='run normal except dont launch the jobs')
opts = parser.parse_args()
# ensure dir is specified if certain conditions are met
if not opts.fit and opts.analyze and not opts.dir:
raise Exception('dir must be specified with analysis flag if fit flag is disabled')
return opts
class SimulSingleCell( object):
def __init__(self, opts):
self.opts = opts
self.path_params = []
self.path_analysis = []
# filenames
self.fname = {
'initparams' : "initParams",
}
# sbatch options
self.slurm= {
'routine' : 'multicore', # singlecore or multicore
'account' : 'ucb-summit-smr',
'time' : '4:00:00',
'qos' : 'condo',
'partition' : 'shas',
'jobname' : 'TAMiT',
'output' : 'sim.log',
'error' : 'sim.err',
# Define architecture of clusters
'coresPerNode' : 24,
'socksPerNode' : 2,
# 'username' : 'saan8193@colorado.edu',
# 'mailtype' : 'FAIL',
}
# paths for different servers
paths_workdir = {
'Summit' : '/projects/saan8193/ImageAnalysis/TAMiT',
'Local' : '/Users/saadjansari/Documents/Projects/ImageAnalysis/TAMiT'
}
paths_launch = {
'Summit' : '/projects/saan8193',
'Local' : '/Users/saadjansari/Documents/Projects/ImageAnalysis/TAMiT'
}
# set the correct working directory
self.opts.loc = 'Summit'
if self.opts.loc == 'Summit':
self.workdir = paths_workdir['Summit']
self.launchdir = paths_launch['Summit']
elif self.opts.loc == 'Local':
self.workdir = paths_workdir['Local']
self.launchdir = paths_launch['Summit']
else:
raise ValueError('LaunchTAMiTSummit: acceptable configuration input argument options are Summit and Local.')
os.chdir( self.workdir)
def Launch(self):
# Launcher for multiple single cell fits
# Prepare fits
if self.opts.fit:
# Initialize params for the different cells
self.InitializeFit()
# Prepare fits
if self.opts.analyze:
# Initialize params for the different cells
self.InitializeAnalysis()
# create a bash script for executing the jobs
jobStrings = self.WriteLaunchScript()
# Launch individually
if (self.opts.fit or self.opts.analyze):
for spath, jobString in zip( self.path_params, jobStrings):
os.chdir( os.path.split(spath)[0])
with open('jobscript.sh', 'w') as f:
f.write( jobString)
if not self.opts.dryrun:
subprocess.call(["sbatch", "jobscript.sh"])
# if not self.opts.dryrun and (self.opts.fit or self.opts.analyze):
# status = call(['sbatch', self.fname['launch']])
def InitializeFit(self):
# initialize fit : initialize params.mat for the different cells and get their path locations
# check if initParams exists in the current directory
if os.path.exists( self.fname['initparams']+'.m' ) == False:
raise ImportError('RunSimulSingleCell: {0}.m does not exist in the current working directory'.format( self.fname['initparams'] ) )
print( 'Initializing Fit: running initParams.m to save parameters for the segmented cells :')
# run parameter initialization and get paths to saved parameter files
opts_params = { 'LOC': 'Summit', 'Display': 0}
eng = matlab.engine.start_matlab()
self.path_params = getattr( eng, self.fname['initparams'])( opts_params)
for path in self.path_params:
print('{0}'.format( path) )
# number of jobs based on length of pathParams
self.n_jobs = len( self.path_params)
def InitializeAnalysis(self):
# initialize analysis : get the paths of the cells to analyze
print( 'Initializing Analysis : getting paths for analysis :')
# If also launching fits, then just use the path to saved params to get the parent directory where analysis will be run
if self.opts.fit:
[self.path_analysis.append( os.path.dirname( ppath) ) for ppath in self.path_params]
# Otherwise, if directory not specified, prompt an error.
elif not self.opts.dir:
raise ValueError('For analysis only without launching fits, user must specify directory string to match to')
# Otherwise, use input argument to match folder names
else:
# Parent path where cell of interest is located
apath = os.path.dirname( self.opts.dir)
apath = os.path.abspath( apath)
# Get all folders in the parent directory of interested file
alldirs = [os.path.join(apath, o) for o in os.listdir(apath) if os.path.isdir(os.path.join(apath,o))]
# match the regex( possibly) analyzeDir name to folders that exist in apath
self.path_analysis = [ cell for cell in alldirs if re.findall( self.opts.dir, cell) ];
if not self.path_analysis:
raise ValueError('There were no matching directories for analysis in directory specified in argument')
# number of jobs based on length of analysis paths
self.n_jobs = len( self.path_analysis)
def WriteLaunchScript(self):
# create sbatch bash script for execution on summit
jobStrings = []
nTasks = 1
# Define jobString
jobStringDef = """#!/bin/bash
#SBATCH --job-name={0}
#SBATCH --qos={1}
#SBATCH --partition={2}
#SBATCH --account={3}
#SBATCH --output=fit.log
#SBATCH --error=fit.err
#SBATCH --time={4}
#SBATCH --nodes={5}
#SBATCH --ntasks={6}
export SCRATCH=/scratch/summit/saan8193
mkdir -p $SCRATCH/$SLURM_JOB_ID
module purge
module load matlab/R2019b
"""
# Loop over seeds and make job strings to launch
for spath in self.path_params:
# Find number of nodes and number of processors/task
if self.slurm['routine'] == 'singlecore':
nCpuPerTask = 1
nNodes = int( np.ceil(nTasks/(float(self.slurm['coresPerNode'])/nCpuPerTask)) )
elif self.slurm['routine'] == 'multicore':
nCpuPerTask = 24
nNodes = int( np.ceil(nTasks/(float(self.slurm['coresPerNode'])/nCpuPerTask)) )
# Jobname : SimName_SeedNumber
jobName = '__'.join( spath.split('/')[-2:] )
# Write jobString
jobString = jobStringDef.format( self.slurm['jobname'], self.slurm['qos'], self.slurm['partition'], self.slurm['account'], self.slurm['time'], nNodes, nCpuPerTask)
jobString = jobString + 'matlab -nodesktop -r "clear all; setenv({3},{4}); cd {0}; addpath({1}); TAMiT_cell({2})"\n'.format( repr('/projects/saan8193/ImageAnalysis/TAMiT'), repr('classes'), repr(spath), repr('TZ'), repr('America/Denver'))
jobString = jobString + '\nrm -rf $SCRATCH/$SLURM_JOB_ID\n'
jobStrings += [jobString]
return jobStrings
if __name__ == '__main__':
# parse arguments
opts = parse_args()
# initialize multiple single cell jobs
simulCells = SimulSingleCell(opts)
# launch single cell jobs
simulCells.Launch()