-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcv.py
executable file
·44 lines (30 loc) · 1.16 KB
/
cv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/env python
import sys
import data
import models
import numpy as np
from sklearn import cross_validation
class CountPrinter:
def __init__(self, total):
self.total = total
self.current = 0
def count(self):
self.current += 1
print '%s/%s' % (self.current, self.total)
def get_error_rate(classifier_class, annotations, n_folds=10, verbose=False, **kwargs):
folds = cross_validation.KFold(len(annotations), n_folds=n_folds)
classifier = classifier_class(**kwargs)
annotations = np.array(annotations)
counter = CountPrinter(n_folds)
fold_errors = []
for train_indices, test_indices in folds:
if verbose: counter.count()
classifier.train(annotations[train_indices])
predictions = classifier.predict(annotations[test_indices])
errors = [int(annotations[test_index].get_group_number() != prediction)
for test_index, prediction in zip(test_indices, predictions)]
fold_errors.append(np.mean(errors))
return np.mean(fold_errors)
if __name__ == "__main__":
annotations = data.load_unambiguous_annotations(sys.argv[1])
print get_error_rate(models.NaiveBayesContextRestricted, annotations, 10, True, window_size=3)