Skip to content

Commit

Permalink
Fixed StackOverflowError for long keywords
Browse files Browse the repository at this point in the history
  • Loading branch information
tkuun101 committed Feb 3, 2020
1 parent 89c7355 commit 6d14c1b
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 6 deletions.
34 changes: 28 additions & 6 deletions src/main/java/com/hankcs/algorithm/AhoCorasickDoubleArrayTrie.java
Original file line number Diff line number Diff line change
Expand Up @@ -883,11 +883,28 @@ private int resize(int newSize)
/**
* insert the siblings to double array trie
*
* @param siblings the siblings being inserted
* @return the position to insert them
* @param firstSiblings the initial siblings being inserted
*/
private int insert(List<Map.Entry<Integer, State>> siblings)
private void insert(List<Map.Entry<Integer, State>> firstSiblings)
{
Queue<Map.Entry<Integer, List<Map.Entry<Integer, State>>>> siblingQueue = new ArrayDeque<Map.Entry<Integer, List<Map.Entry<Integer, State>>>>();
siblingQueue.add(new AbstractMap.SimpleEntry<Integer, List<Map.Entry<Integer, State>>>(null, firstSiblings));

while (siblingQueue.isEmpty() == false)
{
insert(siblingQueue);
}
}

/**
* insert the siblings to double array trie
*
* @param siblingQueue a queue holding all siblings being inserted and the position to insert them
*/
private void insert(Queue<Map.Entry<Integer, List<Map.Entry<Integer, State>>>> siblingQueue) {
Map.Entry<Integer, List<Map.Entry<Integer, State>>> tCurrent = siblingQueue.remove();
List<Map.Entry<Integer, State>> siblings = tCurrent.getValue();

int begin = 0;
int pos = Math.max(siblings.get(0).getKey() + 1, nextCheckPos) - 1;
int nonzero_num = 0;
Expand Down Expand Up @@ -962,12 +979,17 @@ else if (first == 0)
}
else
{
int h = insert(new_siblings); // dfs
base[begin + sibling.getKey()] = h;
siblingQueue.add(new AbstractMap.SimpleEntry<Integer, List<Map.Entry<Integer, State>>>(begin + sibling.getKey(), new_siblings));
}
sibling.getValue().setIndex(begin + sibling.getKey());
}
return begin;

// Insert siblings
Integer parentBaseIndex = tCurrent.getKey();
if (parentBaseIndex != null)
{
base[parentBaseIndex] = begin;
}
}

/**
Expand Down
29 changes: 29 additions & 0 deletions src/test/java/TestAhoCorasickDoubleArrayTrie.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
*/

import com.hankcs.algorithm.AhoCorasickDoubleArrayTrie;
import com.hankcs.algorithm.AhoCorasickDoubleArrayTrie.Hit;

import junit.framework.TestCase;
import org.ahocorasick.trie.Trie;

Expand Down Expand Up @@ -75,6 +77,33 @@ public void testBuildAndParseSimply() throws Exception
validateASimpleAhoCorasickDoubleArrayTrie(acdat);
}

public void testBuildVeryLongWord() throws Exception
{
TreeMap<String, String> map = new TreeMap<String, String>();

int longWordLength = 20000;

String word = loadText("cn/text.txt");
map.put(word.substring(10, longWordLength), word.substring(10, longWordLength));
map.put(word.substring(30, 40), null);

word = loadText("en/text.txt");
map.put(word.substring(10, longWordLength), word.substring(10, longWordLength));
map.put(word.substring(30, 40), null);

// Build an AhoCorasickDoubleArrayTrie
AhoCorasickDoubleArrayTrie<String> acdat = new AhoCorasickDoubleArrayTrie<String>();
acdat.build(map);

List<Hit<String>> result = acdat.parseText(word);

assertEquals(2, result.size());
assertEquals(30, result.get(0).begin);
assertEquals(40, result.get(0).end);
assertEquals(10, result.get(1).begin);
assertEquals(longWordLength, result.get(1).end);
}

public void testBuildAndParseWithBigFile() throws Exception
{
// Load test data from disk
Expand Down

0 comments on commit 6d14c1b

Please # to comment.