Skip to content

Commit

Permalink
Use detokenized output in translation_moe scoring script (fixes #2277,
Browse files Browse the repository at this point in the history
  • Loading branch information
Myle Ott committed Jul 17, 2020
1 parent 17069f9 commit 98b79a4
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions examples/translation_moe/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,16 @@ def load_sys(paths):
with open(path) as f:
for line in f:
line = line.rstrip()
if line.startswith(('S-', 'T-', 'H-')):
# S: source
# T: target
# D: detokenized system output
if line.startswith(('S-', 'T-', 'D-')):
i = int(line[line.find('-')+1:line.find('\t')])
if line.startswith('S-'):
src[i] = line.split('\t')[1]
if line.startswith('T-'):
tgt[i] = line.split('\t')[1]
if line.startswith('H-'):
if line.startswith('D-'):
if i not in hypos:
hypos[i] = []
log_probs[i] = []
Expand Down Expand Up @@ -115,7 +118,7 @@ def sentence_bleu(hypothesis, reference):
bleu = compute_bleu(
bleu.counts, bleu.totals,
bleu.sys_len, bleu.ref_len,
smooth='exp', smooth_floor=0.0,
smooth_method='exp',
)
return bleu.score

Expand Down

0 comments on commit 98b79a4

Please # to comment.