diff --git a/src/main/java/com/hankcs/hanlp/seg/Segment.java b/src/main/java/com/hankcs/hanlp/seg/Segment.java index a7d943dd9..2de317032 100644 --- a/src/main/java/com/hankcs/hanlp/seg/Segment.java +++ b/src/main/java/com/hankcs/hanlp/seg/Segment.java @@ -290,7 +290,7 @@ protected void mergeNumberQuantifier(List termList, WordNet wordNetAll, public List seg(String text) { char[] charArray = text.toCharArray(); - if (config.threadNumber > 1) + if (config.threadNumber > 1 && charArray.length > 10000) // 小文本多线程没意义,反而变慢了 { List sentenceList = SentencesUtil.toSentenceList(charArray); String[] sentenceArray = new String[sentenceList.size()]; @@ -575,7 +575,7 @@ public void run() } /** - * 开启多线程 + * 开启多线程(默认4线程) * @param enable * @return */ diff --git a/src/test/java/com/hankcs/demo/DemoMultithreadingSegment.java b/src/test/java/com/hankcs/demo/DemoMultithreadingSegment.java index 883041053..e155c7c09 100644 --- a/src/test/java/com/hankcs/demo/DemoMultithreadingSegment.java +++ b/src/test/java/com/hankcs/demo/DemoMultithreadingSegment.java @@ -36,17 +36,24 @@ public static void main(String[] args) sbBigText.append(text); } text = sbBigText.toString(); + System.gc(); + long start; + double costTime; // 测个速度 - long start = System.currentTimeMillis(); + + segment.enableMultithreading(false); + start = System.currentTimeMillis(); segment.seg(text); - double costTime = (System.currentTimeMillis() - start) / (double) 1000; + costTime = (System.currentTimeMillis() - start) / (double) 1000; System.out.printf("单线程分词速度:%.2f字每秒\n", text.length() / costTime); + System.gc(); segment.enableMultithreading(true); // 或者 segment.enableMultithreading(4); start = System.currentTimeMillis(); segment.seg(text); costTime = (System.currentTimeMillis() - start) / (double) 1000; System.out.printf("多线程分词速度:%.2f字每秒\n", text.length() / costTime); + System.gc(); } } diff --git a/src/test/java/com/hankcs/test/seg/TestSegment.java b/src/test/java/com/hankcs/test/seg/TestSegment.java index 1acfeaf0e..dc2b9e88d 100644 --- a/src/test/java/com/hankcs/test/seg/TestSegment.java +++ b/src/test/java/com/hankcs/test/seg/TestSegment.java @@ -237,4 +237,11 @@ public void testMultiThreading() throws Exception assertEquals(term1.offset, term2.offset); } } + + public void testTryToCrashSegment() throws Exception + { + String text = "尝试玩坏分词器"; + Segment segment = new ViterbiSegment().enableMultithreading(100); + System.out.println(segment.seg(text)); + } }