-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* convert chinese words * index method * apply to dataframe * update inputing * BOT: auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add zhconv * rename variable * add quotes * Create test_to_zh.py * convert chinese words * index method * apply to dataframe * update inputing * BOT: auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add zhconv * rename variable * add quotes * Create test_to_zh.py * Update test_to_zh.py * Update test_to_zh.py Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
- Loading branch information
1 parent
3eb6824
commit bb38b34
Showing
8 changed files
with
181 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,3 +22,7 @@ dependencies: | |
- codecov | ||
# doctest testing | ||
- pytest-doctestplus | ||
|
||
- pip | ||
- pip: | ||
- zhconv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ Conversion | |
|
||
change_axis_type | ||
to_series | ||
to_zh | ||
values_to_dict | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,7 @@ Conversion | |
change_axis_type | ||
swap_index_values | ||
to_datetime | ||
to_zh | ||
to_set | ||
values_to_dict | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
from typing import Hashable | ||
|
||
import pandas as pd | ||
|
||
from dtoolkit.accessor.register import register_dataframe_method | ||
from dtoolkit.accessor.series.to_zh import LOCALIZATION | ||
from dtoolkit.accessor.series.to_zh import to_zh as s_to_zh | ||
|
||
|
||
@register_dataframe_method | ||
def to_zh( | ||
df: pd.DataFrame, | ||
/, | ||
column: Hashable, | ||
*, | ||
locale: LOCALIZATION = "zh-cn", | ||
dictionary: dict = None, | ||
) -> pd.DataFrame: | ||
""" | ||
Simple conversion and localization between simplified and traditional Chinese. | ||
Parameters | ||
---------- | ||
column : Hashable | ||
The column to convert. | ||
locale : {"zh-hans", "zh-hant", "zh-cn", "zh-sg", "zh-tw", "zh-hk", "zh-my", \ | ||
"zh-mo"}, default "zh-cn" | ||
Locale to convert to. | ||
dictionary : dict, default None | ||
A dictionary which updates the conversion table, eg. | ||
``{'from1': 'to1', 'from2': 'to2'}`` | ||
Returns | ||
------- | ||
Series | ||
Raises | ||
------ | ||
ModuleNotFoundError | ||
If don't have module named 'zhconv'. | ||
TypeError | ||
If ``s`` is not string dtype. | ||
See Also | ||
-------- | ||
dtoolkit.accessor.series.to_zh | ||
Examples | ||
-------- | ||
>>> import dtoolkit | ||
>>> import pandas as pd | ||
>>> df = pd.DataFrame({'zh': ['漢', '字']}) | ||
>>> df | ||
zh | ||
0 漢 | ||
1 字 | ||
>>> df.to_zh('zh') | ||
zh | ||
0 汉 | ||
1 字 | ||
""" | ||
|
||
return df.assign( | ||
**{ | ||
column: s_to_zh( | ||
df[column], | ||
locale=locale, | ||
dictionary=dictionary, | ||
), | ||
}, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
from typing import Literal | ||
|
||
import pandas as pd | ||
from pandas.api.types import is_string_dtype | ||
|
||
from dtoolkit.accessor.register import register_series_method | ||
|
||
|
||
LOCALIZATION = Literal[ | ||
"zh-hans", | ||
"zh-hant", | ||
"zh-cn", | ||
"zh-sg", | ||
"zh-tw", | ||
"zh-hk", | ||
"zh-my", | ||
"zh-mo", | ||
] | ||
|
||
|
||
@register_series_method | ||
def to_zh( | ||
s: pd.Series, | ||
/, | ||
*, | ||
locale: LOCALIZATION = "zh-cn", | ||
dictionary: dict = None, | ||
) -> pd.Series: | ||
""" | ||
Simple conversion and localization between simplified and traditional Chinese. | ||
Parameters | ||
---------- | ||
locale : {"zh-hans", "zh-hant", "zh-cn", "zh-sg", "zh-tw", "zh-hk", "zh-my", \ | ||
"zh-mo"}, default "zh-cn" | ||
Locale to convert to. | ||
dictionary : dict, default None | ||
A dictionary which updates the conversion table, eg. | ||
``{'from1': 'to1', 'from2': 'to2'}`` | ||
Returns | ||
------- | ||
Series | ||
Raises | ||
------ | ||
ModuleNotFoundError | ||
If don't have module named 'zhconv'. | ||
TypeError | ||
If ``s`` is not string dtype. | ||
See Also | ||
-------- | ||
dtoolkit.accessor.dataframe.to_zh | ||
Examples | ||
-------- | ||
>>> import dtoolkit | ||
>>> import pandas as pd | ||
>>> s = pd.Series(['漢', '字']) | ||
>>> s | ||
0 漢 | ||
1 字 | ||
dtype: object | ||
>>> s.to_zh(locale="zh-cn") | ||
0 汉 | ||
1 字 | ||
dtype: object | ||
""" | ||
|
||
from zhconv import convert | ||
|
||
if not is_string_dtype(s): | ||
raise TypeError(f"Expected string dtype, but got {s.dtype!r}.") | ||
|
||
return s.apply(convert, locale=locale, update=dictionary) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import pandas as pd | ||
import pytest | ||
|
||
from dtoolkit.accessor.series.to_zh import to_zh | ||
|
||
|
||
pytest.importorskip("zhconv") | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"s, error", | ||
[ | ||
( | ||
pd.Series([1, 2]), | ||
TypeError, | ||
), | ||
], | ||
) | ||
def test_error(s, error): | ||
with pytest.raises(error): | ||
to_zh(s) |