-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsaveLocalisations.py
137 lines (115 loc) · 5.34 KB
/
saveLocalisations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import argparse
import datetime
import os
import tator
import pickle
import yaml
from pprint import pprint
def get_train_test_split(pos_locs,neg_locs,test_size = 0.4):
'''Splits the localization into train, val and test sets, combining positive and negative samples.
Note that the test and val will have same split size.
pos_locs is positive localizations and neg_locs is negative localizations'''
length_pos_locs = len(pos_locs)
length_neg_locs = len(neg_locs)
train_pos_range = int(length_pos_locs - (length_pos_locs*test_size))
val_pos_range = train_pos_range+int((length_pos_locs - train_pos_range)/2)
if length_neg_locs > 0:
train_neg_range = int(length_neg_locs - (length_neg_locs*test_size))
val_neg_range = train_neg_range+int((length_neg_locs - train_neg_range)/2)
train = pos_locs[0:train_pos_range]+neg_locs[0:train_neg_range]
val = pos_locs[train_pos_range:val_pos_range]+neg_locs[train_neg_range:val_neg_range]
test = pos_locs[val_pos_range:]+neg_locs[val_neg_range:]
else:
train = pos_locs[0:train_pos_range]
val = pos_locs[train_pos_range:val_pos_range]
test = pos_locs[val_pos_range:]
return train,val,test
def main():
parser = argparse.ArgumentParser()
parser.add_argument('apiToken')
parser.add_argument('projectId')
parser.add_argument('sectionId')
parser.add_argument('testSize',type = float,default = 0.4,help='The propotion of train and test (val and test) set split.')
parser.add_argument('positives',type = int,help = 'The value of CORRECTED set in tator for positive samples.')
parser.add_argument('positive_size',type = int,default = 1000,help = 'The number of positive localizations to be saved' )
parser.add_argument('--negatives',type = int,help = 'The value of CORRECTED set in tator for negative samples.')
parser.add_argument('--negative_set_size',type = float,default = 0.1,help='The proportion negative to positive samples.')
parser.add_argument('--folder_path',type = str,default='',help='An folder path to save the CSV and PKL files and to create the folder structure needed for yolov5')
args = parser.parse_args()
token = args.apiToken
projectId = args.projectId
sectionId = args.sectionId
positive_size = args.positive_size
positive_corrections = str(args.positives)
negative_set_size = args.negative_set_size
if args.negatives:
negative_corrections = str(args.negatives)
else:
negative_corrections = None
testSize = args.testSize
if args.folder_path:
path = args.folder_path
else:
path = os.getcwd()
if os.path.exists(path) == False:
os.makedirs(path)
ROOT = 'Data_iter'
n=1
base = ROOT + str(n)
while n>0:
sub_dir = os.path.join(path,base)
if os.path.exists(sub_dir) == False:
os.mkdir(sub_dir)
for i in ['images','labels']:
sub_dir2 = os.path.join(sub_dir,i)
sub_train = os.path.join(sub_dir2,'train')
sub_val = os.path.join(sub_dir2,'val')
sub_test = os.path.join(sub_dir2,'test')
os.mkdir(sub_dir2)
os.mkdir(sub_train)
os.mkdir(sub_val)
os.mkdir(sub_test)
break
else:
n+=1
base = ROOT + str(n)
api = tator.get_api(token = token)
media_list = api.get_media_list(projectId,section=sectionId)
media_ids = [x.id for x in media_list]
pos_localisations = api.get_localization_list(projectId,
section = sectionId,
media_id = media_ids,
attribute = [f'Corrected::{str(positive_corrections)}'])
pos_localisations = pos_localisations[0:positive_size]
if negative_corrections:
neg_localisations = api.get_localization_list(projectId,
section = sectionId,
media_id = media_ids,
attribute = [f'Corrected::{str(negative_corrections)}'])
neg_set = min(int(len(pos_localisations)*negative_set_size),len(neg_localisations))
# print(neg_set)
neg_localisations = neg_localisations[0:neg_set]
neg_ids = [str(x.id)+'_'+str(x.media) for x in neg_localisations]
str_ids = ','.join(neg_ids)
with open(f'{path}/negative_ids.csv','w') as f:
f.write(str_ids)
else:
neg_localisations = []
train,val,test = get_train_test_split(pos_localisations,neg_localisations,test_size=testSize)
with open(f'{path}/train_locs.pkl','wb') as f:
pickle.dump(train,f)
with open(f'{path}/val_locs.pkl','wb') as f:
pickle.dump(val,f)
with open(f'{path}/test_locs.pkl','wb') as f:
pickle.dump(test,f)
args_yaml = {'apiToken':token,
'train':f'{path}/train_locs.pkl',
'val':f'{path}/val_locs.pkl',
'test':f'{path}/test_locs.pkl',
'cocoPath':path,
'imagesPath':f'{sub_dir}/images'
}
with open(f'{path}/locsImgCoco.yaml', 'w') as fp:
yaml.dump(args_yaml, fp)
if __name__=="__main__":
main()