-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathgenerate-TFFormat.py
142 lines (111 loc) · 5.89 KB
/
generate-TFFormat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import numpy as np
import os
import cv2
import tensorflow as tf
from object_detection.utils import dataset_util
import contextlib2
from object_detection.dataset_tools import tf_record_creation_util
def create_tf_example(filename, image, labels, imgsize):
# TODO(user): Populate the following variables from your example.
height = imgsize # Image height
width = imgsize # Image width
encoded_image_data = image # Encoded image bytes
image_format = b'png' # b'jpeg' or b'png'
xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
xmaxs = [] # List of normalized right x coordinates in bounding box
# (1 per box)
ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
ymaxs = [] # List of normalized bottom y coordinates in bounding box
# (1 per box)
classes_text = [] # List of string class name of bounding box (1 per box)
classes = [] # List of integer class id of bounding box (1 per box)
for lb in labels:
xmins.append(lb[1])
ymins.append(lb[0])
xmaxs.append(lb[3])
ymaxs.append(lb[2])
classes_text.append("car".encode('utf-8'))
classes.append(int(lb[4]))
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename.encode('utf-8')),
'image/source_id': dataset_util.bytes_feature(filename.encode('utf-8')),
'image/encoded': dataset_util.bytes_feature(encoded_image_data),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
patchsize=500
stride=400
box_size=30
img_count=0
dataset_folder="dataset"
shard_train="data/train-dataset.record"
shard_test="data/test-dataset.record"
test_count=0
train_count=0
examples_shard=128
test_num_shards=10
train_num_shards=test_num_shards*4 #keep 80% train data, 20% test data
with contextlib2.ExitStack() as tf_record_close_stack:
train_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
tf_record_close_stack, shard_train, train_num_shards)
test_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
tf_record_close_stack, shard_test, test_num_shards)
#every image has composed by 2 files: image, positive
for _,_,fileList in os.walk(dataset_folder):
sortedFiles=sorted(fileList)
for i in range(0,len(sortedFiles), 2):
image=cv2.imread(dataset_folder+"/"+sortedFiles[i])
#convert to rgb
rgbImg=image[:,:,::-1]
posAnn=cv2.imread(dataset_folder+"/"+sortedFiles[i+1])
#Slide vertically
for sH in range(0, image.shape[0], stride):
eH=sH+patchsize
#Slide horizontally
for sW in range(0,image.shape[1], stride):
eW=sW+patchsize
#Collect the patch from the image
patch=rgbImg[sH:eH,sW:eW]
#If the patch is smaller than the patch size
if(patch.shape[0]<patchsize or patch.shape[1]<patchsize):
#Resize it
patch=cv2.resize(patch, (patchsize,patchsize))
#encode it
encodedImg=cv2.imencode('.png', patch)[1].tostring()
#Collect the same patch from the annotation image
patchPosAnnotated=posAnn[sH:eH,sW:eW]
#If the patch is smaller than the patch size
if(patch.shape[0]<patchsize or patch.shape[1]<patchsize):
#resize it
patchPosAnnotated=cv2.resize(patchPosAnnotated, (patchsize,patchsize))
#Create labels
labels=np.max(patchPosAnnotated, axis=-1)
#shape num_targets,2
targets=np.argwhere(labels!=0).astype(float)
#The format that I need is ymin, xmin, ymax, xmax normalized and the class starting from 0 and class name
targets=np.concatenate((targets-box_size/2, targets+box_size/2), axis=-1)/patchsize
#create label class 0
isObj=np.zeros((targets.shape[0],1))
targets=np.concatenate((targets, isObj), axis=-1)
if (np.random.rand()<0.2 and test_count<test_num_shards*examples_shard):
imageName="img_"+str(test_count)+".png"
tf_example = create_tf_example(imageName, encodedImg, targets, patchsize)
output_shard_index = test_count //examples_shard
test_tfrecords[output_shard_index].write(tf_example.SerializeToString())
test_count+=1
elif(train_count<train_num_shards*examples_shard):
imageName="img_"+str(train_count)+".png"
tf_example = create_tf_example(imageName, encodedImg, targets, patchsize)
output_shard_index = train_count //examples_shard
train_tfrecords[output_shard_index].write(tf_example.SerializeToString())
train_count+=1
else:
print("Done")