-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_wer.py
74 lines (60 loc) · 2.85 KB
/
test_wer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import unittest
from simplepythonwer.simplepythonwer import wer, ler
class TestWERMethod(unittest.TestCase):
# -1. empty ground-truth -
def test_empty_gt(self):
# self.assertRaises(ZeroDivisionError, lambda :wer(' ', ' cat sat '))
# if gt has at least some char then
self.assertEqual(wer(' ', ' cat sat '), 2.0)
self.assertEqual(wer('', ' cat sat '), 2.0)
# 0. empty str
def test_string_empty_asr(self):
# if gt has at least some char then
self.assertEqual(wer(" _ ", " "), 1.0)
self.assertEqual(wer(" _ ", " "*5), 1.0)
# 1. string equal
# if both arguments are the same the WER should be 0
def test_string_equality(self):
self.assertEqual(wer("the cat", "the cat"), 0)
# 2. As with 1 but with whitespace
# whitespace shouldn't make a difference to the WER (if the word count is equal)
def test_string_equality_whitespace(self):
self.assertEqual(wer("the cat", " the cat "), 0)
# 3. two wrong words in 4 should be 0.5 wer
def test_2_wrong_words(self):
self.assertEqual(wer("the cat sat on", "my dog sat on"), 0.5)
# 4. having a longer res > ground truth should result in > 1.0 WER
def test_longer_res(self):
self.assertGreater(wer("cat", "the cat sat on"), 1.0)
# 5. unicode and emoji should work - and not error
def test_unicode(self):
test_wer = wer("my cat sat on the mat", "Meine Katze saß auf der Matte")
self.assertIsInstance(test_wer, float)
test_wer = wer("Meine Katze saß auf der Matte", "Meine Katze saß auf der Matte")
self.assertEqual(test_wer, 0.0)
test_wer = wer("🎉 🎉 🎉", "🎉 🎉 🎉")
self.assertEqual(test_wer, 0.0)
test_wer = wer("🎉 🎉 🎉 🎉", "🎉 🎉 ")
self.assertEqual(test_wer, 0.5)
# 6. doing WER on non-str should fail
def test_nonstr_fails(self):
self.assertRaises(AttributeError, lambda :wer(['123',123], ['']))
self.assertRaises(AttributeError, lambda :wer(123, 123))
self.assertRaises(AttributeError, lambda :wer('123', 123))
self.assertRaises(AttributeError, lambda :wer(123, "123"))
self.assertRaises(AttributeError, lambda :wer(10.5, "123"))
# 7. similar to #4. if result is empty or completely wrong, the WER will always be 1.0
def test_wer_values(self):
test_cases = ['the', 'cat sat', 'on the matt']
for test in test_cases:
self.assertEqual(wer(test, ''), 1.0)
# 8. ler is identical to wer without the split
def test_ler_values(self):
z = 'the cat sat on the matt'
x=wer(z, z)
y=ler(z.split(), z.split())
self.assertEqual(x,y)
x=wer(z, z[::-1])
y=ler(z.split(), z[::-1].split())
self.assertEqual(x,y)
# running the tests - use: PYTHONPATH=$(pwd) python3 -m unittest discover .