From 18e5c7a1ae1337a92af6529f64168993990b3286 Mon Sep 17 00:00:00 2001 From: hankcs Date: Fri, 19 Mar 2021 16:57:29 -0400 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=20convertToPinyinList=20fix?= =?UTF-8?q?=20https://github.com/hankcs/HanLP/issues/1634?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/hankcs/hanlp/dictionary/py/PinyinDictionary.java | 4 ++++ src/test/java/com/hankcs/hanlp/HanLPTest.java | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/src/main/java/com/hankcs/hanlp/dictionary/py/PinyinDictionary.java b/src/main/java/com/hankcs/hanlp/dictionary/py/PinyinDictionary.java index 90746352c..4dcd6f489 100644 --- a/src/main/java/com/hankcs/hanlp/dictionary/py/PinyinDictionary.java +++ b/src/main/java/com/hankcs/hanlp/dictionary/py/PinyinDictionary.java @@ -187,6 +187,10 @@ protected static List segLongest(char[] charArray, AhoCorasickDoubleArra public void hit(int begin, int end, Pinyin[] value) { int length = end - begin; + if (length == 1 && value.length > 1) + { + value = new Pinyin[]{value[0]}; + } if (length > lengths[begin]) { wordNet[begin] = value; diff --git a/src/test/java/com/hankcs/hanlp/HanLPTest.java b/src/test/java/com/hankcs/hanlp/HanLPTest.java index 589d0618f..f8c137d33 100644 --- a/src/test/java/com/hankcs/hanlp/HanLPTest.java +++ b/src/test/java/com/hankcs/hanlp/HanLPTest.java @@ -16,4 +16,9 @@ public void testDicUpdate() { System.out.println(HanLP.segment("大数据是一个新词汇!")); } + + public void testConvertToPinyinList() + { + System.out.println(HanLP.convertToPinyinString("你好", " ", false)); + } } \ No newline at end of file