From ed7ff38902e95e5ea4a97fee111f5f2617ace49b Mon Sep 17 00:00:00 2001 From: hankcs Date: Mon, 11 May 2015 14:06:03 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8F=AA=E9=92=88=E5=AF=B9=E5=A4=A7=E6=96=87?= =?UTF-8?q?=E6=9C=AC=E5=BC=80=E5=90=AF=E5=A4=9A=E7=BA=BF=E7=A8=8B=E5=88=86?= =?UTF-8?q?=E8=AF=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/java/com/hankcs/hanlp/seg/Segment.java | 4 ++-- .../com/hankcs/demo/DemoMultithreadingSegment.java | 11 +++++++++-- src/test/java/com/hankcs/test/seg/TestSegment.java | 7 +++++++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/hankcs/hanlp/seg/Segment.java b/src/main/java/com/hankcs/hanlp/seg/Segment.java index a7d943dd9..2de317032 100644 --- a/src/main/java/com/hankcs/hanlp/seg/Segment.java +++ b/src/main/java/com/hankcs/hanlp/seg/Segment.java @@ -290,7 +290,7 @@ protected void mergeNumberQuantifier(List termList, WordNet wordNetAll, public List seg(String text) { char[] charArray = text.toCharArray(); - if (config.threadNumber > 1) + if (config.threadNumber > 1 && charArray.length > 10000) // 小文本多线程没意义,反而变慢了 { List sentenceList = SentencesUtil.toSentenceList(charArray); String[] sentenceArray = new String[sentenceList.size()]; @@ -575,7 +575,7 @@ public void run() } /** - * 开启多线程 + * 开启多线程(默认4线程) * @param enable * @return */ diff --git a/src/test/java/com/hankcs/demo/DemoMultithreadingSegment.java b/src/test/java/com/hankcs/demo/DemoMultithreadingSegment.java index 883041053..e155c7c09 100644 --- a/src/test/java/com/hankcs/demo/DemoMultithreadingSegment.java +++ b/src/test/java/com/hankcs/demo/DemoMultithreadingSegment.java @@ -36,17 +36,24 @@ public static void main(String[] args) sbBigText.append(text); } text = sbBigText.toString(); + System.gc(); + long start; + double costTime; // 测个速度 - long start = System.currentTimeMillis(); + + segment.enableMultithreading(false); + start = System.currentTimeMillis(); segment.seg(text); - double costTime = (System.currentTimeMillis() - start) / (double) 1000; + costTime = (System.currentTimeMillis() - start) / (double) 1000; System.out.printf("单线程分词速度:%.2f字每秒\n", text.length() / costTime); + System.gc(); segment.enableMultithreading(true); // 或者 segment.enableMultithreading(4); start = System.currentTimeMillis(); segment.seg(text); costTime = (System.currentTimeMillis() - start) / (double) 1000; System.out.printf("多线程分词速度:%.2f字每秒\n", text.length() / costTime); + System.gc(); } } diff --git a/src/test/java/com/hankcs/test/seg/TestSegment.java b/src/test/java/com/hankcs/test/seg/TestSegment.java index 1acfeaf0e..dc2b9e88d 100644 --- a/src/test/java/com/hankcs/test/seg/TestSegment.java +++ b/src/test/java/com/hankcs/test/seg/TestSegment.java @@ -237,4 +237,11 @@ public void testMultiThreading() throws Exception assertEquals(term1.offset, term2.offset); } } + + public void testTryToCrashSegment() throws Exception + { + String text = "尝试玩坏分词器"; + Segment segment = new ViterbiSegment().enableMultithreading(100); + System.out.println(segment.seg(text)); + } }