Skip to content

Commit

Permalink
支持𩽾𩾌(ān kāng)之类的补充字符集 fix #1564
Browse files Browse the repository at this point in the history
  • Loading branch information
hankcs committed Jan 31, 2021
1 parent 2577426 commit b9a899b
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 4 deletions.
3 changes: 2 additions & 1 deletion data/dictionary/pinyin/pinyin.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30724,4 +30724,5 @@
龥=yue4
重启=chong2,qi3
还款=huan2,kuan3
侠传=xia2,zhuan4
侠传=xia2,zhuan4
𩽾𩾌=an1,kang1
Original file line number Diff line number Diff line change
Expand Up @@ -180,15 +180,17 @@ protected static List<Pinyin> segLongest(char[] charArray, AhoCorasickDoubleArra
protected static List<Pinyin> segLongest(char[] charArray, AhoCorasickDoubleArrayTrie<Pinyin[]> trie, boolean remainNone)
{
final Pinyin[][] wordNet = new Pinyin[charArray.length][];
final int[] lengths = new int[charArray.length];
trie.parseText(charArray, new AhoCorasickDoubleArrayTrie.IHit<Pinyin[]>()
{
@Override
public void hit(int begin, int end, Pinyin[] value)
{
int length = end - begin;
if (wordNet[begin] == null || length > wordNet[begin].length)
if (length > lengths[begin])
{
wordNet[begin] = length == 1 ? new Pinyin[]{value[0]} : value;
wordNet[begin] = value;
lengths[begin] = length;
}
}
});
Expand All @@ -208,7 +210,7 @@ public void hit(int begin, int end, Pinyin[] value)
{
pinyinList.add(pinyin);
}
offset += wordNet[offset].length;
offset += lengths[offset];
}
return pinyinList;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package com.hankcs.hanlp.dictionary.py;

import com.hankcs.hanlp.HanLP;
import junit.framework.TestCase;

import java.util.Arrays;

public class PinyinDictionaryTest extends TestCase
{

public void testGet()
{
System.out.println(Arrays.toString(PinyinDictionary.get("鼖")));
System.out.println(PinyinDictionary.convertToPinyin("\uD867\uDF7E\uD867\uDF8C"));
System.out.println(HanLP.convertToPinyinList("\uD867\uDF7E\uD867\uDF8C"));
}
}

1 comment on commit b9a899b

@hanlpbot
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This commit has been mentioned on Butterfly Effect. There might be relevant details there:

https://bbs.hankcs.com/t/topic/2777/3

Please # to comment.