-
Notifications
You must be signed in to change notification settings - Fork 0
/
variations.py
97 lines (73 loc) · 2.63 KB
/
variations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import csv
import json
import os
script_dir = os.path.dirname(os.path.realpath(__file__))
def get_wordlist(dictionary):
"""Get word list from dictionary."""
return sorted(list(set([entry["word"].lower() for entry in dictionary])))
def get_freqlist(
wordlist,
freqlist_org_path=None,
):
"""Get frequency list from with word list.
Notes:
There is no frequency list is available.
"""
# If frequency list is available
if freqlist_org_path:
with open(freqlist_org_path) as freqlist_org_file:
freqlist_org = {
str(row[1]): int(row[2]) for row in csv.reader(freqlist_org_file, delimiter="\t")
}
# Merge uppercase and lowercase words
freqlist_lower = {}
freqlist_upper = {}
for word in freqlist_org:
if word.islower():
freqlist_lower[word] = freqlist_org[word]
elif any(c.isupper() for c in word):
freqlist_upper[word] = freqlist_org[word]
for word in freqlist_upper:
if word.lower() in freqlist_lower:
freqlist_lower[word.lower()] += freqlist_upper[word]
else:
freqlist_lower[word.lower()] = freqlist_upper[word]
freqlist_merged = freqlist_lower
# Filter against word list
freqlist = {}
for word in wordlist:
if word in freqlist_merged:
freqlist[word] = freqlist_merged[word] + 1
else:
freqlist[word] = 1
# If no frequency list is available
else:
freqlist = {word: 1 for word in wordlist}
# Sort by frequency
return dict(sorted(freqlist.items(), key=lambda x: x[1], reverse=True))
def export_wordlist(
wordlist, out_path=os.path.join(script_dir, "output/akl_wordlist.txt")
):
"""Export word list to a txt file.
"""
with open(out_path, "w") as out_file:
out_file.write("\n".join(wordlist))
def export_freqlist(
freqlist, out_path=os.path.join(script_dir, "output/akl_freqlist.csv")
):
"""Export frequency list to a CSV file.
"""
with open(out_path, "w") as out_file:
writer = csv.writer(out_file)
for word in freqlist:
writer.writerow([word, freqlist[word]])
def get_dictionary(in_path=os.path.join(script_dir, "output/akl_dictionary.json")):
"""Get dictionary from a a JSON file."""
with open(in_path) as in_file:
return json.load(in_file)
if __name__ == "__main__":
dictionary = get_dictionary()
wordlist = get_wordlist(dictionary)
export_wordlist(wordlist)
freqlist = get_freqlist(wordlist)
export_freqlist(freqlist)