Skip to content

Commit

Permalink
fix lint
Browse files Browse the repository at this point in the history
  • Loading branch information
yinfan98 committed Nov 13, 2024
1 parent d6c371f commit 6d95920
Show file tree
Hide file tree
Showing 5 changed files with 6 additions and 23 deletions.
5 changes: 1 addition & 4 deletions paddlenlp/transformers/auto/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,7 @@
("blenderbot", "BlenderbotTokenizer"),
(
"bloom",
(
"BloomTokenizer",
"BloomTokenizerFast" if is_tokenizers_available() else None
),
("BloomTokenizer", "BloomTokenizerFast" if is_tokenizers_available() else None),
),
("clip", "CLIPTokenizer"),
("codegen", "CodeGenTokenizer"),
Expand Down
6 changes: 1 addition & 5 deletions paddlenlp/transformers/bloom/tokenizer_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import pickle
from typing import List, Optional, Tuple

from tokenizers import normalizers
from typing import Optional, Tuple

from ..tokenizer_utils_base import BatchEncoding
from ..tokenizer_utils_fast import PretrainedTokenizerFast
from .tokenizer import BloomTokenizer

VOCAB_FILES_NAMES = {"tokenizer_file": "tokenizer.json"}

Expand Down
5 changes: 1 addition & 4 deletions paddlenlp/transformers/convert_slow_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,10 +442,7 @@ def pre_tokenizer(self, replacement, add_prefix_space):
return None


SLOW_TO_FAST_CONVERTERS = {
"LlamaTokenizer": LlamaConverter,
"BertTokenizer": BertConverter
}
SLOW_TO_FAST_CONVERTERS = {"LlamaTokenizer": LlamaConverter, "BertTokenizer": BertConverter}


def convert_slow_tokenizer(transformer_tokenizer, from_tiktoken=False) -> Tokenizer:
Expand Down
1 change: 0 additions & 1 deletion tests/transformers/auto/test_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from paddlenlp.transformers.bert.configuration import BertConfig
from paddlenlp.transformers.bert.tokenizer import BertTokenizer
from paddlenlp.transformers.bert.tokenizer_fast import BertTokenizerFast

from paddlenlp.utils.env import TOKENIZER_CONFIG_NAME

from ...utils.test_module.custom_configuration import CustomConfig
Expand Down
12 changes: 3 additions & 9 deletions tests/transformers/bloom/test_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,8 @@
import os
import unittest

from paddlenlp.transformers import BloomTokenizer
from paddlenlp.transformers import BloomTokenizerFast

from tests.transformers.test_tokenizer_common import (
TokenizerTesterMixin
)
from paddlenlp.transformers import BloomTokenizer, BloomTokenizerFast
from tests.transformers.test_tokenizer_common import TokenizerTesterMixin

VOCAB_FILES_NAMES = {
"vocab_file": "vocab.json",
Expand Down Expand Up @@ -94,7 +90,7 @@ def test_full_tokenizer(self):

input_tokens = tokens + [tokenizer.unk_token]
input_bpe_tokens = [14, 15, 10, 9, 3, 2, 15, 19]

self.assertListEqual(tokenizer.convert_tokens_to_ids(input_tokens), input_bpe_tokens)

def test_pretokenized_inputs(self, *args, **kwargs):
Expand Down Expand Up @@ -150,7 +146,6 @@ def test_padding_if_pad_token_set_slow(self):
# short slice pair does have padding
self.assertTrue(pad_token_id in out_p2["input_ids"][1])
self.assertTrue(0 in out_p2["attention_mask"][1])


def test_add_bos_token_slow(self):
bos_token = "$$$"
Expand All @@ -173,7 +168,6 @@ def test_add_bos_token_slow(self):
self.assertEqual(decode_s.split()[0], bos_token)
self.assertTrue(all(d.split()[0] == bos_token for d in decode_s2))


# tokenizer has no padding token
def test_padding_different_model_input_name(self):
pass
Expand Down

0 comments on commit 6d95920

Please # to comment.