-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpre_train_BOV.py
134 lines (99 loc) · 3.61 KB
/
pre_train_BOV.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.metrics import confusion_matrix
import cv2
import read_files
import vocabulary_helpers
import get_data
import plot_data
import os
import pickle
# categories which have more than 100 images: 80 - train 20 -test.
# 1. airplanes.
# 2. chandelier.
# 3. motorbikes.
# categories which have moret than 90 images: 70 - train 20 - test - butterfly.
# 1. butterlfy.
#categories = ['airplanes', 'chandelier', 'motorbikes', 'butterfly', 'revolver','spoon']
# categories = ['cr', 'wr']
path = 'G:\\Machine Learning\\BagOfVisualWords\\BagOfVisualWords\\train_data_ver2'
start_row, start_col= 270 , 834 #tọa độ để cắt ảnh
end_row, end_col=366, 1021
training_data = [] # tập mảng chứa dữ liệu để đưa vào mô hình
def create_training_data(): #hàm tạo dữ liệu train
for img in os.listdir(path):
try:
img_array = cv2.imread(os.path.join(path,img))
new_img = img_array[start_row:end_row,start_col:end_col] #cắt ảnh
training_data.append([new_img, img]) #lưu ảnh vào mảng training_data
# plt.imshow(new_img, cmap='gray')
# plt.show()
except Exception as err:
pass
create_training_data()
print("Parsing image files into categories: ")
train_imgs=[] #tập ảnh
train_labels=[] #tập nhãn
for img, label in training_data:
train_imgs.append(img)
train_labels.append(label)
print("Generating vocabulary: ")
(f_vocabulary, i_vocab) = vocabulary_helpers.generate_vocabulary(train_imgs)
# (t_f_vocabulary, t_i_vocab) = vocabulary_helpers.generate_vocabulary(test_imgs)
n_clusters = 1000
# kmeans = vocabulary_helpers.generate_clusters(f_vocabulary, n_clusters)
kmeans = pickle.load(open("G:\\Machine Learning\\BagOfVisualWords\\BagOfVisualWords\\bov_pickle_1000.sav", 'rb'))
print("generating features: ")
print("Creating feature vectors for test and train images from vocabulary set.")
train_data = vocabulary_helpers.generate_features(i_vocab, kmeans, n_clusters)
# test_data = vocabulary_helpers.generate_features(t_i_vocab, kmeans, n_clusters)
# import random
# random.shuffle(train_data) #trộn ảnh
# print(training_data)
#tao tap anh va nhan
X=[] #tập ảnh
y=[] #tập nhãn
for img in train_data:
X.append(img)
for i in train_labels:
y.append(i)
# print(np.shape(X[-1]))
# print(type(X))
X = np.array(X).reshape(-1,1000)
y = np.array(y)
# print(type(X))
# print(type(y))
#tạo file để lưu tập X và y
pickle_out = open('X.pickel', 'wb')
pickle.dump(X, pickle_out)
pickle_out.close()
pickle_out = open('y.pickel', 'wb')
pickle.dump(y, pickle_out)
pickle_out.close()
#doc file
pickle_in = open('X.pickel', 'rb')
X_new = pickle.load(pickle_in)
pickle_in = open('y.pickel', 'rb')
y_new = pickle.load(pickle_in)
# print(X_new.shape)
# print(y_new.shape)
# print("Applying SVM classifier.")
# # SVM Classifier.
# clf = svm.SVC()
# fitted = clf.fit(train_data, train_labels)
# #luu clf
# with open('clf.pkl', 'wb') as f: #lưu kết quả vào file clf.pkl
# pickle.dump(clf, f)
# predict = clf.predict(test_data)
# print("Actual:")
# print(test_labels)
# print("predicted:")
# print(predict)
# # Confusion matrix.
# test_labels = np.asarray(test_labels)
# cnf_matrix = confusion_matrix(predict, test_labels)
# np.set_printoptions(precision=2)
# plot_data.plot_confusion_matrix(cnf_matrix, classes=categories,
# title='Confusion matrix')
# plt.show()