-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathutility.py
164 lines (125 loc) · 5.14 KB
/
utility.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import keras.backend as K
from generator import AugmentedImageSequence
import json
import numpy as np
import os
import pandas as pd
from imgaug import augmenters as iaa
import shutil
def get_sample_counts(output_directory: str, datasets: str, class_names: list):
"""
Class-wise positive sample count of a dataset
Arguments:
output_directory {str} -- folder containing the dataset.csv file
datasets {str} -- train|validation|test set(s)
class_names {list of str} -- target classes
"""
df = pd.read_csv(os.path.join(output_directory, f"{datasets}.csv"))
total_count = df.shape[0]
labels = df[class_names].values
positive_counts = np.sum(labels, axis=0)
class_positive_counts = dict(zip(class_names, positive_counts))
return total_count, class_positive_counts
def get_class_weights(total_counts: int, class_positive_counts: dict, multiply: int):
"""Calculate the class_weight used in training
Arguments:
total_counts {int} -- total counts (name implies)
class_positive_counts {dict} -- dict of int, eg. {"Effusion": 300, "Infiltration": 300}
multiply {int} -- positive weight multiply
"""
def get_single_class_weight(pos_counts, total_counts):
denominator = (total_counts - pos_counts) * multiply + pos_counts
return {
0: pos_counts / denominator,
1: (denominator - pos_counts) / denominator,
}
class_names = list(class_positive_counts.keys())
label_counts = np.array(list(class_positive_counts.values()))
class_weights = []
for i, class_name in enumerate(class_names):
class_weights.append(get_single_class_weight(label_counts[i], total_counts))
return class_weights
def augmenter():
"""
Method to augment images.
Following from CheXNet paper, images were randomly flipped with 50% probability
"""
augmenter = iaa.Sequential(
[
iaa.Fliplr(0.5),
],
random_order=True
)
return augmenter
def check_create_output_dir(output_directory: str):
"""
Checks for and creates (if non-existent) directory for each experiment
Arguments:
output_dir {str} -- Where on the filesystem to save the experiment
"""
if not os.path.isdir(output_directory):
os.makedirs(output_directory)
# check used to verify if this directly is being utilized
running_flag_file = os.path.join(output_directory, ".training.lock")
if os.path.isfile(running_flag_file):
raise RuntimeError("There is a process currently utilizing this directory!")
else:
create_training_lock(output_directory)
return True
return False
def create_training_lock(output_directory: str):
"""
Create training lock for the directory where an experiment is potentially running
Arguments:
output_directory {str} -- directory where experiment is currently executing
"""
running_flag_file = os.path.join(output_directory, ".training.lock")
open(running_flag_file, "a").close()
def delete_training_lock(output_directory: str):
"""
Remove a potential .training.lock file on a directly where an experiment is/has been run
Arguments:
output_directory {str} -- directory where an experiment has or is running
"""
running_flag_file = os.path.join(output_directory, ".training.lock")
return os.remove(running_flag_file)
def backup_config_file(output_directory: str, config_file: str):
"""
Backup a copy of the current configuration file to
experiment directory defined in configuration file
Arguments:
output_directory {str} -- Where on the filesystem to save the backup file
config_file {str} -- Filename and location of current experiment configuration file
"""
try:
print(f"Backing up configuration file to {output_directory}")
shutil.copy(config_file, os.path.join(output_directory, os.path.split(config_file)[1]))
except:
raise RuntimeError("Unable to save experiment configuration file! Please remedy this problem before proceeding.")
def build_datasets(dataset_csv_dir: str, output_directory: str):
"""
Import and define partition datasets according to provided .csv file
Arguments:
dataset_csv_dir {str} -- directory where dataset CSV is stored
output_directory {str} -- Current experiment directory
"""
datasets = ["train", "validation", "test"]
for dataset in datasets:
shutil.copy(os.path.join(dataset_csv_dir, f"{dataset}.csv"), output_directory)
return
def create_tensorboard_log_dir(tensorboard_log_dir: str) -> bool:
"""Create a directory in local /var/logs filesystem
"""
if not os.path.isdir(tensorboard_log_dir):
os.makedirs(tensorboard_log_dir)
return True
return False
def get_output_layer(model, layer_name):
"""Get output layer of a particular model
Arguments:
model {[type]} -- model object
layer_name {[type]} -- name of the output layer
"""
layer_dict = dict([(layer.name, layer) for layer in model.layers])
layer = layer_dict[layer_name]
return layer