Skip to content

Commit

Permalink
fix: schemagen
Browse files Browse the repository at this point in the history
  • Loading branch information
ksqsf committed Dec 11, 2023
1 parent 675d7e5 commit b1439b6
Showing 1 changed file with 13 additions and 7 deletions.
20 changes: 13 additions & 7 deletions tools/schemagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@


double_pinyin_choices = ['zrm', 'flypy']
auxiliary_code_choices = ['zrm', 'hanxin']
auxiliary_code_choices = ['zrm', 'hanxin', 'tiger']

args = None
auxiliary_table = defaultdict(list)
Expand Down Expand Up @@ -68,6 +68,8 @@ def to_auxiliary_codes(char):
auxiliary_table = read_txt_table('data/zrmdb.txt')
case 'hanxin':
auxiliary_table = read_txt_table('data/hanxindb.txt')
case 'tiger':
auxiliary_table = read_txt_table('data/tigerdb.txt')
case _:
raise ValueError('Unknown auxiliary code ' + args.auxiliary_code)
return auxiliary_table[char]
Expand Down Expand Up @@ -104,7 +106,10 @@ def iter_char_codes(char, pinyin):

def char_codes(char, pinyin):
if 'compact' in args and args.compact:
return [next(iter_char_codes(char, pinyin))]
try:
return [next(iter_char_codes(char, pinyin))]
except StopIteration:
return []
else:
return list(iter_char_codes(char, pinyin))

Expand Down Expand Up @@ -193,8 +198,9 @@ def handle_gen_dict():
print(f'{output_word}\t{code}')
else:
# 輔助碼與 output_word 一致, 詞頻由 word 決定
weight = pinyin_weight(word, pinyin)
weight = int(weight * float(args.freq_scale))
if not weight:
weight = pinyin_weight(word, pinyin)
weight = int(weight * float(args.freq_scale))
print(f'{output_word}\t{code}\t{weight}')


Expand Down Expand Up @@ -281,9 +287,9 @@ def put_into_dict(word, code, max_len=4):
for (word, pinyin, weight) in read_input_dict():
if len(word) > 1:
try:
# words.append((pinyin_weight(word, pinyin), word, encode_fixed_word(word, pinyin)))
for code in encode_fixed_word_sunshine_strategy(word, pinyin):
words.append((pinyin_weight(word, pinyin), word, code))
words.append((pinyin_weight(word, pinyin), word, encode_fixed_word(word, pinyin)))
# for code in encode_fixed_word_sunshine_strategy(word, pinyin):
# words.append((pinyin_weight(word, pinyin), word, code))
except:
traceback.print_exc()

Expand Down

0 comments on commit b1439b6

Please # to comment.