Skip to content

Commit

Permalink
Update test_tokenizer.py
Browse files Browse the repository at this point in the history
  • Loading branch information
yinfan98 authored Nov 13, 2024
1 parent bbe555e commit d6c371f
Showing 1 changed file with 0 additions and 7 deletions.
7 changes: 0 additions & 7 deletions tests/transformers/bloom/test_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,22 +87,15 @@ def get_input_output_texts(self, tokenizer):

def test_full_tokenizer(self):
tokenizer = BloomTokenizer(self.vocab_file, self.merges_file, **self.special_tokens_map)
tokenizer2 = BloomTokenizerFast(self.vocab_file, self.merges_file, **self.special_tokens_map)
text = "lower newer"
bpe_tokens = ["\u0120low", "er", "\u0120", "n", "e", "w", "er"]
tokens = tokenizer.tokenize(text, add_prefix_space=True)
self.assertListEqual(tokens, bpe_tokens)

tokens2 = tokenizer2.tokenize(text, add_prefix_space=True)
self.assertListEqual(tokens2, bpe_tokens)

input_tokens = tokens + [tokenizer.unk_token]

input_tokens2 = tokens2 + [tokenizer.unk_token]
input_bpe_tokens = [14, 15, 10, 9, 3, 2, 15, 19]

self.assertListEqual(tokenizer.convert_tokens_to_ids(input_tokens), input_bpe_tokens)
self.assertListEqual(tokenizer.convert_tokens_to_ids(input_tokens2), input_bpe_tokens)

def test_pretokenized_inputs(self, *args, **kwargs):
pass
Expand Down

0 comments on commit d6c371f

Please # to comment.