Skip to content

Commit 7680af5

Browse files
committed
Make the NonDict2 threadsafe
1 parent 32e3b9f commit 7680af5

File tree

2 files changed

+31
-27
lines changed

2 files changed

+31
-27
lines changed

src/edu/stanford/nlp/wordseg/Gale2007ChineseSegmenterFeatureFactory.java

+10-2
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ public class Gale2007ChineseSegmenterFeatureFactory<IN extends CoreLabel> extend
5252

5353
private transient TagAffixDetector taDetector; // = null;
5454
private transient CorpusDictionary outDict; // = null;
55+
private transient NonDict2 nonDict; // = null;
5556

5657
@Override
5758
public void init(SeqClassifierFlags flags) {
@@ -71,6 +72,11 @@ private synchronized void createOutDict() {
7172
}
7273
}
7374

75+
private synchronized void createNonDict() {
76+
if (nonDict == null) {
77+
nonDict = new NonDict2(flags);
78+
}
79+
}
7480

7581
/**
7682
* Extracts all the features from the input data at a certain index.
@@ -479,8 +485,10 @@ protected Collection<String> featuresCpC(PaddedList<? extends CoreLabel> cInfo,
479485
* This is frickin' useful. I hadn't realized. CDM Oct 2007.
480486
*/
481487
if (flags.useDict2) {
482-
NonDict2 nd = new NonDict2(flags);
483-
features.add(nd.checkDic(charp+charc, flags)+"nondict");
488+
if (nonDict == null) {
489+
createNonDict();
490+
}
491+
features.add(nonDict.checkDic(charp+charc, flags)+"nondict");
484492
}
485493

486494
if (flags.useOutDict2) {

src/edu/stanford/nlp/wordseg/NonDict2.java

+21-25
Original file line numberDiff line numberDiff line change
@@ -8,36 +8,32 @@
88
public class NonDict2 {
99

1010
//public String sighanCorporaDict = "/u/nlp/data/chinese-segmenter/";
11-
public String corporaDict = "/u/nlp/data/gale/segtool/stanford-seg/data/";
12-
private static CorpusDictionary cd = null;
11+
public static final String DEFAULT_HOME = "/u/nlp/data/gale/segtool/stanford-seg/data/";
12+
public final String corporaDict;
13+
private final CorpusDictionary cd;
1314

1415
private static Redwood.RedwoodChannels logger = Redwood.channels(NonDict2.class);
1516

1617
public NonDict2(SeqClassifierFlags flags) {
17-
if (cd == null) {
18-
19-
if (flags.sighanCorporaDict != null) {
20-
corporaDict = flags.sighanCorporaDict; // use the same flag for Sighan 2005,
21-
// but our list is extracted from ctb
22-
}
23-
String path;
24-
if (flags.useAs || flags.useHk || flags.useMsr) {
25-
throw new RuntimeException("only support settings for CTB and PKU now.");
26-
} else if ( flags.usePk ) {
27-
path = corporaDict+"/dict/pku.non";
28-
} else { // CTB
29-
path = corporaDict+"/dict/ctb.non";
30-
}
31-
32-
cd = new CorpusDictionary(path);
33-
// just output the msg...
34-
if (flags.useAs || flags.useHk || flags.useMsr) {
35-
} else if ( flags.usePk ) {
36-
logger.info("INFO: flags.usePk=true | building NonDict2 from "+path);
37-
} else { // CTB
38-
logger.info("INFO: flags.usePk=false | building NonDict2 from "+path);
39-
}
18+
if (flags.sighanCorporaDict != null) {
19+
corporaDict = flags.sighanCorporaDict; // use the same flag for Sighan 2005,
20+
// but our list is extracted from ctb
21+
} else {
22+
corporaDict = DEFAULT_HOME;
4023
}
24+
25+
String path;
26+
if (flags.useAs || flags.useHk || flags.useMsr) {
27+
throw new RuntimeException("only support settings for CTB and PKU now.");
28+
} else if ( flags.usePk ) {
29+
path = corporaDict+"/dict/pku.non";
30+
logger.info("INFO: flags.usePk=true | building NonDict2 from "+path);
31+
} else { // CTB
32+
path = corporaDict+"/dict/ctb.non";
33+
logger.info("INFO: flags.usePk=false | building NonDict2 from "+path);
34+
}
35+
36+
cd = new CorpusDictionary(path);
4137
}
4238

4339
public String checkDic(String c2, SeqClassifierFlags flags) {

0 commit comments

Comments
 (0)