-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvert_voc_labels.py
96 lines (80 loc) · 3.18 KB
/
convert_voc_labels.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""Copied from https://pjreddie.com/media/files/voc_label.py with some changes.
Converts annotations from VOC to YOLO format. By YOLO format I mean a txt file for each
image with a line for each ground truth object in the image that looks like:
<object-class> <x> <y> <w> <h>
Where <object-class> is the index of the class (see voc_classes.txt), <x> and <y> are
coordinates of the center of bounding box, <w> and <h> are width and height of the
bounding box. <x>, <y>, <w>, <h> are normalized by the image width and height so that
they fall between 0 and 1.
Also creates a txt file for each image set (e.g. voc2007_train.txt) in which each line
looks like:
<path-to-image> <path-to-annotations>
Run following commands to download VOC dataset and convert its annotations:
wget https://pjreddie.com/media/files/VOCtrainval_11-May-2012.tar
wget https://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar
wget https://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar
tar xf VOCtrainval_11-May-2012.tar
tar xf VOCtrainval_06-Nov-2007.tar
tar xf VOCtest_06-Nov-2007.tar
python convert_voc_labels.py
"""
import os
import xml.etree.ElementTree as ET
from tqdm import tqdm
def convert(size, box):
dw = 1.0 / size[0]
dh = 1.0 / size[1]
x = (box[0] + box[1]) / 2.0
y = (box[2] + box[3]) / 2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
def convert_annotation(year, image_id):
in_file = open(f"VOCdevkit/VOC{year}/Annotations/{image_id}.xml")
out_file = open(f"VOCdevkit/VOC{year}/labels/{image_id}.txt", "w")
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find("size")
w = int(size.find("width").text)
h = int(size.find("height").text)
for obj in root.iter("object"):
difficult = obj.find("difficult").text
cls = obj.find("name").text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find("bndbox")
b = (
float(xmlbox.find("xmin").text),
float(xmlbox.find("xmax").text),
float(xmlbox.find("ymin").text),
float(xmlbox.find("ymax").text),
)
bb = convert((w, h), b)
out_file.write(f"{cls_id} {' '.join([str(a) for a in bb])}\n")
image_sets = [
("2012", "train"),
("2012", "val"),
("2007", "train"),
("2007", "val"),
("2007", "test"),
]
if __name__ == "__main__":
with open("voc_classes.txt") as f:
classes = f.read().strip().split()
for year, image_set in image_sets:
print("Preparing a set of images:", year, image_set)
os.makedirs(f"VOCdevkit/VOC{year}/labels/", exist_ok=True)
with open(f"VOCdevkit/VOC{year}/ImageSets/Main/{image_set}.txt") as f:
image_ids = f.read().strip().split()
with open(f"voc{year}_{image_set}.txt", "w") as list_file:
for image_id in tqdm(image_ids, leave=False):
list_file.write(
f"VOCdevkit/VOC{year}/JPEGImages/{image_id}.jpg VOCdevkit/VOC{year}/labels/{image_id}.txt\n"
)
convert_annotation(year, image_id)
print("Done!")