Skip to content

Commit

Permalink
Added to_simplified, added exports for to_translate and is_cjk
Browse files Browse the repository at this point in the history
  • Loading branch information
andreihar committed Apr 27, 2024
1 parent 4e3e8d7 commit 9f20f33
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
2 changes: 1 addition & 1 deletion taibun/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from taibun.taibun import Converter, Tokeniser
from taibun.taibun import Converter, Tokeniser, to_traditional, to_simplified, is_cjk
11 changes: 8 additions & 3 deletions taibun/taibun.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@


word_dict = json.load(open(os.path.join(os.path.dirname(__file__), "data/words.json"),'r', encoding="utf-8"))
trad_dict = json.load(open(os.path.join(os.path.dirname(__file__), "data/simplified.json"),'r', encoding="utf-8"))
simplified_dict = {v: k for k, v in trad_dict.items()}
simplified_dict['臺'] = '台'

# Helper to check if the character is a Chinese character
def is_cjk(input):
Expand All @@ -32,9 +35,11 @@ def is_cjk(input):

# Convert Simplified to Traditional characters
def to_traditional(input):
with open(os.path.join(os.path.dirname(__file__), "data/simplified.json"),'r', encoding="utf-8") as file:
trad = json.load(file)
return ''.join(trad.get(c, c) for c in input)
return ''.join(trad_dict.get(c, c) for c in input)

# Convert Traditional to Simplified characters
def to_simplified(input):
return ''.join(simplified_dict.get(c, c) for c in input)

class Converter(object):

Expand Down

0 comments on commit 9f20f33

Please # to comment.