diff --git a/data/dictionary/pinyin/pinyin.txt b/data/dictionary/pinyin/pinyin.txt index 08dfdf7fe..076e740b3 100644 --- a/data/dictionary/pinyin/pinyin.txt +++ b/data/dictionary/pinyin/pinyin.txt @@ -30724,4 +30724,5 @@ 龥=yue4 重启=chong2,qi3 还款=huan2,kuan3 -侠传=xia2,zhuan4 \ No newline at end of file +侠传=xia2,zhuan4 +𩽾𩾌=an1,kang1 \ No newline at end of file diff --git a/src/main/java/com/hankcs/hanlp/dictionary/py/PinyinDictionary.java b/src/main/java/com/hankcs/hanlp/dictionary/py/PinyinDictionary.java index 793241695..90746352c 100644 --- a/src/main/java/com/hankcs/hanlp/dictionary/py/PinyinDictionary.java +++ b/src/main/java/com/hankcs/hanlp/dictionary/py/PinyinDictionary.java @@ -180,15 +180,17 @@ protected static List segLongest(char[] charArray, AhoCorasickDoubleArra protected static List segLongest(char[] charArray, AhoCorasickDoubleArrayTrie trie, boolean remainNone) { final Pinyin[][] wordNet = new Pinyin[charArray.length][]; + final int[] lengths = new int[charArray.length]; trie.parseText(charArray, new AhoCorasickDoubleArrayTrie.IHit() { @Override public void hit(int begin, int end, Pinyin[] value) { int length = end - begin; - if (wordNet[begin] == null || length > wordNet[begin].length) + if (length > lengths[begin]) { - wordNet[begin] = length == 1 ? new Pinyin[]{value[0]} : value; + wordNet[begin] = value; + lengths[begin] = length; } } }); @@ -208,7 +210,7 @@ public void hit(int begin, int end, Pinyin[] value) { pinyinList.add(pinyin); } - offset += wordNet[offset].length; + offset += lengths[offset]; } return pinyinList; } diff --git a/src/test/java/com/hankcs/hanlp/dictionary/py/PinyinDictionaryTest.java b/src/test/java/com/hankcs/hanlp/dictionary/py/PinyinDictionaryTest.java new file mode 100644 index 000000000..b9d7870d7 --- /dev/null +++ b/src/test/java/com/hankcs/hanlp/dictionary/py/PinyinDictionaryTest.java @@ -0,0 +1,17 @@ +package com.hankcs.hanlp.dictionary.py; + +import com.hankcs.hanlp.HanLP; +import junit.framework.TestCase; + +import java.util.Arrays; + +public class PinyinDictionaryTest extends TestCase +{ + + public void testGet() + { + System.out.println(Arrays.toString(PinyinDictionary.get("鼖"))); + System.out.println(PinyinDictionary.convertToPinyin("\uD867\uDF7E\uD867\uDF8C")); + System.out.println(HanLP.convertToPinyinList("\uD867\uDF7E\uD867\uDF8C")); + } +} \ No newline at end of file