This repository has been archived by the owner on Oct 18, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 152
/
preprocess.py
127 lines (100 loc) · 3.54 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import numpy as np
import time
from glob import glob
import SimpleITK as itk
from skimage import morphology, measure, segmentation
import h5py
import _pickle as pickle
from config import *
from visual_utils import plot_slices
if PROCESS_DONE:
print('done')
exit()
def preprocess():
print('start preprocess')
log_msg("start at {}".format(time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(int(time.time())))))
ct_files = glob('{}/*/*.mhd'.format(DATASET_PATH))
handled_ids = set([f[-13:-3] for f in glob('{}/*.h5'.format(PREPROCESS_PATH))])
print('{} total, {} processed'.format(len(ct_files), len(handled_ids)))
counter = 0
for f in ct_files:
seriesuid = f[-14:-4]
if seriesuid in handled_ids:
print('{} handled'.format(seriesuid))
continue
counter += 1
print('{} process {}'.format(counter, f))
itk_img = itk.ReadImage(f)
img = itk.GetArrayFromImage(itk_img) # (depth, height, width)
img = np.transpose(img, (2, 1, 0)) # (width, height, depth)
origin = np.array(itk_img.GetOrigin())
spacing = np.array(itk_img.GetSpacing())
_start_time = time.time()
img, pixels = get_lung_img(img)
duration = time.time() - _start_time
cover_ratio = pixels / np.prod(img.shape)
meta = {
'seriesuid': seriesuid,
'shape': img.shape,
'origin': origin,
'spacing': spacing,
'pixels': pixels,
'cover_ratio': cover_ratio,
'process_duration': duration,
}
save_to_numpy(seriesuid, img, meta)
log_msg(meta)
print('all preprocess done')
def log_msg(msg):
with open(MSG_LOG_FILE, 'a') as f:
f.write(str(msg) + '\n')
print(msg)
def save_to_numpy(seriesuid, img, meta):
file = '{}/{}'.format(PREPROCESS_PATH, seriesuid)
with h5py.File(file + '.h5', 'w') as hf:
hf.create_dataset('img', data=img)
with open(file + '.meta', 'wb') as f:
pickle.dump(meta, f)
def get_lung_img(img):
origin_img = img.copy()
if DEBUG_PLOT_WHEN_PREPROCESSING:
plot_slices(img, 'origin')
# binary
img = img < BINARY_THRESHOLD
if DEBUG_PLOT_WHEN_PREPROCESSING:
plot_slices(img, 'binary')
# clear_border
for c in range(img.shape[2]):
img[:, :, c] = segmentation.clear_border(img[:, :, c])
if DEBUG_PLOT_WHEN_PREPROCESSING:
plot_slices(img, 'clear_border')
# keep 2 lagest connected graph
labels = measure.label(img)
regions = measure.regionprops(labels)
labels = [(r.area, r.label) for r in regions]
if len(labels) > 2:
labels.sort(reverse=True)
max_area = labels[2][0]
for r in regions:
if r.area < max_area:
for c in r.coords:
img[c[0], c[1], c[2]] = 0
if DEBUG_PLOT_WHEN_PREPROCESSING:
plot_slices(img, 'keep 2 lagest connected graph')
# erosion
# img = morphology.erosion(img, selem=np.ones((2, 2, 2)))
# if DEBUG_PREPROCESS_PLOT:
# plot_slices(img, 'erosion')
# closing
img = morphology.closing(img, selem=np.ones((4, 4, 4)))
if DEBUG_PLOT_WHEN_PREPROCESSING:
plot_slices(img, 'closing')
# dilation
img = morphology.dilation(img, selem=np.ones((16, 16, 16)))
if DEBUG_PLOT_WHEN_PREPROCESSING:
plot_slices(img, 'dilation')
if DEBUG_PLOT_WHEN_PREPROCESSING:
plot_slices(img * origin_img, 'final')
return img * origin_img, np.sum(img != 0)
if __name__ == '__main__':
preprocess()