-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgraphObjectClasses.py
204 lines (181 loc) · 7.62 KB
/
graphObjectClasses.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
#!/usr/bin/python
import os
import sys
import re
from BeautifulSoup import BeautifulSoup as bsoup
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
"""
Main Program Function:
Visually graphs the breakdown of objects by class in database and saves the graph to outputPath.
Parses all xml annotation files in annotationsPath using BeautifulSoup. Creates and displays a simple graph (bar, pie, etc) showing the breakdown by class of all annotated objects in the database (e.g. 25% person, 25% bicycle, 50% car). Note that you are counting objects, not images. The graph should have a descriptive title and labeled axes.
Args:
annotationsPath: a string that specifies the directory containing the xml annotation files.
outputPath: a string that specifies the directory in which to write the output graph file.
Returns:
Raises:
"""
"""
Parses through all the xml files to return a list of objects within the file.
"""
def writeCountedFiles(outputPath,countedList):
outputFile = os.path.join(outputPath, 'graphObjectClassesData.txt')
f = open(outputFile, 'w')
f.write('Cars=' + str(countedList[0]) + '\n')
f.write('Persons=' + str(countedList[1]) + '\n')
f.write('Bicycles=' + str(countedList[2]) +'\n')
f.close()
def parseFiles(annotationsPath):
objectList = []
# Retrieves all the files in a directory and checks if they are xml
annotationsFullPath = os.path.abspath(annotationsPath)
fileList = os.listdir(annotationsFullPath)
if len(fileList) > 0:
lastFile = ''
for file in fileList:
fileTypeMatch = re.search('.xml',file)
if fileTypeMatch:
print "Processing file: " + file
try:
filePath = os.path.join(annotationsFullPath, file)
f = open(filePath)
soup = bsoup(f)
f.close()
# Finds the object of all xml files and places the objects into a list
# and returns it.
parsedXML = (soup.findAll('name'))
for object in parsedXML:
match = re.search('(<name>)(\w+)(</name>)', str(object))
objectList += match.group(2),
except IOError:
sys.stderr.write("There was a problem with file: " + file + '\n')
else:
sys.stderr.write("Error - No xml files found.")
sys.exit(1)
return objectList
"""
Counts the number of objects and returns a list.
The list is organized as these lines:
[Number of Cars, Number of Persons, Number of Bicycles]
"""
def organizeObjectList(dataCheck,objectList=[]):
if dataCheck == 'n':
countedObjects = [0,0,0]
for objectType in objectList:
if objectType == 'car':
countedObjects[0] += 1
if objectType == 'person':
countedObjects[1] += 1
if objectType == 'bicycle':
countedObjects[2] += 1
else:
f = open('graphObjectClassesData.txt','r')
carMatch = re.search('(Cars=)(\d+)',f.readline())
personMatch = re.search('(Persons=)(\d+)',f.readline())
bicycleMatch = re.search('(Bicycles=)(\d+)',f.readline())
countedObjects = [int(carMatch.group(2)),int(personMatch.group(2)),int(bicycleMatch.group(2))]
f.close()
return countedObjects
"""
Calculates the percentage of an object type. Returns a list of floats rounded to the thousands.
"""
def calculatePercentage(countedList):
sum = 0.0
for float in countedList:
sum += float
percentageList = ['','','']
for x in range(0,len(countedList)):
percentageList[x] += str((round((countedList[x]/sum),3))*100) + '%'
return percentageList
"""
Creates the legend which will display how much each object type makes up of
all the object types in the database as well as the total number of object types.
"""
def createLegend(ax,colors,countedList):
percentageList = calculatePercentage(countedList)
ax.bar([0], [0], width=0.4, label = percentageList[0] +' Cars', align='center', color = colors[0])
ax.bar([0], [0], width=0.4, label = percentageList[1] +' Persons', align='center',color = colors[1])
ax.bar([0], [0], width=0.4, label = percentageList[2] +' Bicycles',align='center',color = colors[2])
ax.legend(loc='upper right',title = 'Object Types by Percentage',fancybox = True,shadow = True,frameon = True)
"""
Creates a graph of object types and shows percentage of object type as well as
the number of object type.
"""
def createGraph(countedList,outputPath):
# CONSTANTS
CATEGORYNUM = len(countedList)
MAXNUM = max(countedList)
IND = np.arange(CATEGORYNUM)
sum = 0
for categoryNum in countedList:
sum += int(categoryNum)
WIDTH = 0.60
fig, ax = plt.subplots()
# Sets the colors for the 3 bars.
colors = ['r','b','g']
bars = ax.bar(IND, countedList, WIDTH, color=colors)
# Graph Labels
ax.set_ylabel(r'Number of Objects',fontsize=13, fontweight='bold')
fig.suptitle(r'The Composition of ' + str(sum) + ' Annotated Objects in the Database by Type', fontsize=16, fontweight='bold')
ax.set_xticks(IND + (WIDTH/2))
ax.set_xticklabels( (r'Cars - ' + str(countedList[0]), r'Persons - ' + str(countedList[1]), r'Bicycles - ' + str(countedList[2])),fontweight='bold' )
ax.set_ylim(0,MAXNUM*2)
# Creates the legend.
createLegend(ax,colors,countedList)
# # Asks the user if he or she wants to save the file
userInput = ''
while userInput is not 'y' or 'n':
userInput = raw_input("Would you like to save the figure? (y/n) ")
userInput = userInput.lower()
if userInput == 'y':
path = os.path.abspath(outputPath)
filename = 'objectClassFigure_1.png'
fullpath = os.path.join(path, filename)
plt.savefig(fullpath)
break
elif userInput == 'n':
break
else:
print "Please answer y or n. " '\n'
plt.show()
"""
Creates a graph of object classes by reading files from the
directory,annotationsPath and outputs it to outputPath
"""
def graphObjectClasses(annotationsPath, outputPath,dataCheck):
if dataCheck == 'n':
objectList = parseFiles(annotationsPath)
countedList = organizeObjectList(dataCheck,objectList=objectList)
writeCountedFiles(outputPath,countedList)
else:
print "Using previously saved data."
countedList = organizeObjectList(dataCheck)
createGraph(countedList,outputPath)
"""
Given the path, it opens the all the xml files.
Use matlab to create a graph to a certain locations.
Create your own directory if not present.
Accepting the argument.
"""
def main():
annotationsPath = raw_input("Path to the annotations?: ")
outputPath = raw_input("Output Path?: ")
dataCheck = raw_input("Use previous data for computation?: (y/n) ")
if os.path.exists(annotationsPath) and os.path.exists(outputPath) and dataCheck.lower() in ("y,n"):
graphObjectClasses(annotationsPath,outputPath,dataCheck.lower())
# Error messages for broken paths.
elif not os.path.exists(annotationsPath):
sys.stderr.write("Error - path does not exist" + '\n')
sys.stderr.write("annotationsPath = " + annotationsPath + '\n')
sys.exit(1)
elif not os.path.exists(outputPath):
sys.stderr.write("Error - path does not exist:" + '\n')
sys.stderr.write("outputPath = " + outputPath + '\n')
sys.exit(1)
elif dataCheck.lower() not in ("y,n"):
sys.stderr.write("Error - Please enter in only y or n" + '\n')
sys.stderr.write("You entered: " + dataCheck + '\n')
sys.exit(1)
if __name__ == '__main__':
main()