Skip to content

Commit

Permalink
只针对大文本开启多线程分词
Browse files Browse the repository at this point in the history
  • Loading branch information
hankcs committed May 11, 2015
1 parent ee50d55 commit ed7ff38
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 4 deletions.
4 changes: 2 additions & 2 deletions src/main/java/com/hankcs/hanlp/seg/Segment.java
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ protected void mergeNumberQuantifier(List<Vertex> termList, WordNet wordNetAll,
public List<Term> seg(String text)
{
char[] charArray = text.toCharArray();
if (config.threadNumber > 1)
if (config.threadNumber > 1 && charArray.length > 10000) // 小文本多线程没意义,反而变慢了
{
List<String> sentenceList = SentencesUtil.toSentenceList(charArray);
String[] sentenceArray = new String[sentenceList.size()];
Expand Down Expand Up @@ -575,7 +575,7 @@ public void run()
}

/**
* 开启多线程
* 开启多线程(默认4线程)
* @param enable
* @return
*/
Expand Down
11 changes: 9 additions & 2 deletions src/test/java/com/hankcs/demo/DemoMultithreadingSegment.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,24 @@ public static void main(String[] args)
sbBigText.append(text);
}
text = sbBigText.toString();
System.gc();

long start;
double costTime;
// 测个速度
long start = System.currentTimeMillis();

segment.enableMultithreading(false);
start = System.currentTimeMillis();
segment.seg(text);
double costTime = (System.currentTimeMillis() - start) / (double) 1000;
costTime = (System.currentTimeMillis() - start) / (double) 1000;
System.out.printf("单线程分词速度:%.2f字每秒\n", text.length() / costTime);
System.gc();

segment.enableMultithreading(true); // 或者 segment.enableMultithreading(4);
start = System.currentTimeMillis();
segment.seg(text);
costTime = (System.currentTimeMillis() - start) / (double) 1000;
System.out.printf("多线程分词速度:%.2f字每秒\n", text.length() / costTime);
System.gc();
}
}
7 changes: 7 additions & 0 deletions src/test/java/com/hankcs/test/seg/TestSegment.java
Original file line number Diff line number Diff line change
Expand Up @@ -237,4 +237,11 @@ public void testMultiThreading() throws Exception
assertEquals(term1.offset, term2.offset);
}
}

public void testTryToCrashSegment() throws Exception
{
String text = "尝试玩坏分词器";
Segment segment = new ViterbiSegment().enableMultithreading(100);
System.out.println(segment.seg(text));
}
}

0 comments on commit ed7ff38

Please # to comment.