This repository has been archived by the owner on Apr 4, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathtests.py
180 lines (156 loc) · 8.01 KB
/
tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# encoding=utf-8
import twitter_text, sys, os, json, argparse, re
from twitter_text.unicode import force_unicode
narrow_build = True
try:
unichr(0x20000)
narrow_build = False
except:
pass
parser = argparse.ArgumentParser(description = u'Run the integration tests for twitter_text')
parser.add_argument('--ignore-narrow-errors', '-i', help = u'Ignore errors caused by narrow builds', default = False, action = 'store_true')
args = parser.parse_args()
try:
import yaml
except ImportError:
raise Exception('You need to install pyaml to run the tests')
# from http://stackoverflow.com/questions/2890146/how-to-force-pyyaml-to-load-strings-as-unicode-objects
from yaml import Loader, SafeLoader
def construct_yaml_str(self, node):
return self.construct_scalar(node)
Loader.add_constructor(u'tag:yaml.org,2002:str', construct_yaml_str)
SafeLoader.add_constructor(u'tag:yaml.org,2002:str', construct_yaml_str)
try:
from bs4 import BeautifulSoup
except ImportError:
try:
from BeautifulSoup import BeautifulSoup
except ImportError:
raise Exception('You need to install BeautifulSoup to run the tests')
def success(text):
return (u'\033[92m%s\033[0m\n' % text).encode('utf-8')
def error(text):
return (u'\033[91m%s\033[0m\n' % text).encode('utf-8')
attempted = 0
def assert_equal_without_attribute_order(result, test, failure_message = None):
global attempted
attempted += 1
# Beautiful Soup sorts the attributes for us so we can skip all the hoops the ruby version jumps through
assert BeautifulSoup(result) == BeautifulSoup(test.get('expected')), error(u'Test %d Failed: %s' % (attempted, test.get('description')))
sys.stdout.write(success(u'Test %d Passed: %s' % (attempted, test.get('description'))))
sys.stdout.flush()
def assert_equal(result, test):
global attempted
attempted += 1
assert result == test.get('expected'), error(u'\nTest %d Failed: %s%s' % (attempted, test.get('description'), u'\n%s' % test.get('hits') if test.get('hits') else ''))
sys.stdout.write(success(u'Test %d Passed: %s' % (attempted, test.get('description'))))
sys.stdout.flush()
# extractor section
extractor_file = open(os.path.join('twitter-text-conformance', 'extract.yml'), 'r')
extractor_tests = yaml.load(force_unicode(extractor_file.read()))
extractor_file.close()
sys.stdout.write('Testing Extractor\n')
sys.stdout.flush()
for section in extractor_tests.get('tests'):
sys.stdout.write('\nTesting Extractor: %s\n' % section)
sys.stdout.flush()
for test in extractor_tests.get('tests').get(section):
if (args.ignore_narrow_errors or narrow_build) and section in ['hashtags'] and test.get('description') in ['Hashtag with ideographic iteration mark']:
sys.stdout.write('Skipping: %s\n' % test.get('description'))
sys.stdout.flush()
continue
extractor = twitter_text.extractor.Extractor(test.get('text'))
if section == 'mentions':
assert_equal(extractor.extract_mentioned_screen_names(), test)
elif section == 'mentions_with_indices':
assert_equal(extractor.extract_mentioned_screen_names_with_indices(), test)
elif section == 'mentions_or_lists_with_indices':
assert_equal(extractor.extract_mentions_or_lists_with_indices(), test)
elif section == 'replies':
assert_equal(extractor.extract_reply_screen_name(), test)
elif section == 'urls':
assert_equal(extractor.extract_urls(), test)
elif section == 'urls_with_indices':
assert_equal(extractor.extract_urls_with_indices(), test)
elif section == 'hashtags':
assert_equal(extractor.extract_hashtags(), test)
elif section == 'cashtags':
assert_equal(extractor.extract_cashtags(), test)
elif section == 'hashtags_with_indices':
assert_equal(extractor.extract_hashtags_with_indices(), test)
elif section == 'cashtags_with_indices':
assert_equal(extractor.extract_cashtags_with_indices(), test)
# autolink section
autolink_file = open(os.path.join('twitter-text-conformance', 'autolink.yml'), 'r')
autolink_tests = yaml.load(force_unicode(autolink_file.read()))
autolink_file.close()
sys.stdout.write('\nTesting Autolink\n')
sys.stdout.flush()
autolink_options = {'suppress_no_follow': True}
for section in autolink_tests.get('tests'):
sys.stdout.write('\nTesting Autolink: %s\n' % section)
for test in autolink_tests.get('tests').get(section):
if (args.ignore_narrow_errors or narrow_build) and section in ['hashtags'] and test.get('description') in ['Autolink a hashtag containing ideographic iteration mark']:
sys.stdout.write('Skipping: %s\n' % test.get('description'))
sys.stdout.flush()
continue
autolink = twitter_text.autolink.Autolink(test.get('text'))
if section == 'usernames':
assert_equal_without_attribute_order(autolink.auto_link_usernames_or_lists(autolink_options), test)
elif section == 'cashtags':
assert_equal_without_attribute_order(autolink.auto_link_cashtags(autolink_options), test)
elif section == 'urls':
assert_equal_without_attribute_order(autolink.auto_link_urls(autolink_options), test)
elif section == 'hashtags':
assert_equal_without_attribute_order(autolink.auto_link_hashtags(autolink_options), test)
elif section == 'all':
assert_equal_without_attribute_order(autolink.auto_link(autolink_options), test)
elif section == 'lists':
assert_equal_without_attribute_order(autolink.auto_link_usernames_or_lists(autolink_options), test)
elif section == 'json':
assert_equal_without_attribute_order(autolink.auto_link_with_json(json.loads(test.get('json')), autolink_options), test)
# hit_highlighting section
hit_highlighting_file = open(os.path.join('twitter-text-conformance', 'hit_highlighting.yml'), 'r')
hit_highlighting_tests = yaml.load(force_unicode(hit_highlighting_file.read()))
hit_highlighting_file.close()
sys.stdout.write('\nTesting Hit Highlighting\n')
sys.stdout.flush()
for section in hit_highlighting_tests.get('tests'):
sys.stdout.write('\nTesting Hit Highlighting: %s\n' % section)
for test in hit_highlighting_tests.get('tests').get(section):
hit_highlighter = twitter_text.highlighter.HitHighlighter(test.get('text'))
if section == 'plain_text':
assert_equal(hit_highlighter.hit_highlight(hits = test.get('hits')), test)
elif section == 'with_links':
assert_equal_without_attribute_order(hit_highlighter.hit_highlight(hits = test.get('hits')), test)
# validation section
validation_tested = False
validate_tests = None
try:
validate_file = open(os.path.join('twitter-text-conformance', 'validate.yml'), 'r')
validate_file_contents = validate_file.read()
validate_tests = yaml.load(re.sub(ur'\\n', '\n', validate_file_contents.encode('unicode-escape')))
validate_file.close()
except ValueError:
sys.stdout.write('\nValidation tests were skipped because of wide character issues\n')
sys.stdout.flush()
if validate_tests:
sys.stdout.write('\nTesting Validation\n')
sys.stdout.flush()
for section in validate_tests.get('tests'):
sys.stdout.write('\nTesting Validation: %s\n' % section)
for test in validate_tests.get('tests').get(section):
validator = twitter_text.validation.Validation(test.get('text'))
if section == 'tweets':
assert_equal(not validator.tweet_invalid(), test)
elif section == 'usernames':
assert_equal(validator.valid_username(), test)
elif section == 'lists':
assert_equal(validator.valid_list(), test)
elif section == 'hashtags':
assert_equal(validator.valid_hashtag(), test)
elif section == 'urls':
assert_equal(validator.valid_url(), test)
sys.stdout.write(u'\033[0m-------\n\033[92m%d tests passed.\033[0m\n' % attempted)
sys.stdout.flush()
sys.exit(os.EX_OK)