|
8 | 8 | public class NonDict2 {
|
9 | 9 |
|
10 | 10 | //public String sighanCorporaDict = "/u/nlp/data/chinese-segmenter/";
|
11 |
| - public String corporaDict = "/u/nlp/data/gale/segtool/stanford-seg/data/"; |
12 |
| - private static CorpusDictionary cd = null; |
| 11 | + public static final String DEFAULT_HOME = "/u/nlp/data/gale/segtool/stanford-seg/data/"; |
| 12 | + public final String corporaDict; |
| 13 | + private final CorpusDictionary cd; |
13 | 14 |
|
14 | 15 | private static Redwood.RedwoodChannels logger = Redwood.channels(NonDict2.class);
|
15 | 16 |
|
16 | 17 | public NonDict2(SeqClassifierFlags flags) {
|
17 |
| - if (cd == null) { |
18 |
| - |
19 |
| - if (flags.sighanCorporaDict != null) { |
20 |
| - corporaDict = flags.sighanCorporaDict; // use the same flag for Sighan 2005, |
21 |
| - // but our list is extracted from ctb |
22 |
| - } |
23 |
| - String path; |
24 |
| - if (flags.useAs || flags.useHk || flags.useMsr) { |
25 |
| - throw new RuntimeException("only support settings for CTB and PKU now."); |
26 |
| - } else if ( flags.usePk ) { |
27 |
| - path = corporaDict+"/dict/pku.non"; |
28 |
| - } else { // CTB |
29 |
| - path = corporaDict+"/dict/ctb.non"; |
30 |
| - } |
31 |
| - |
32 |
| - cd = new CorpusDictionary(path); |
33 |
| - // just output the msg... |
34 |
| - if (flags.useAs || flags.useHk || flags.useMsr) { |
35 |
| - } else if ( flags.usePk ) { |
36 |
| - logger.info("INFO: flags.usePk=true | building NonDict2 from "+path); |
37 |
| - } else { // CTB |
38 |
| - logger.info("INFO: flags.usePk=false | building NonDict2 from "+path); |
39 |
| - } |
| 18 | + if (flags.sighanCorporaDict != null) { |
| 19 | + corporaDict = flags.sighanCorporaDict; // use the same flag for Sighan 2005, |
| 20 | + // but our list is extracted from ctb |
| 21 | + } else { |
| 22 | + corporaDict = DEFAULT_HOME; |
40 | 23 | }
|
| 24 | + |
| 25 | + String path; |
| 26 | + if (flags.useAs || flags.useHk || flags.useMsr) { |
| 27 | + throw new RuntimeException("only support settings for CTB and PKU now."); |
| 28 | + } else if ( flags.usePk ) { |
| 29 | + path = corporaDict+"/dict/pku.non"; |
| 30 | + logger.info("INFO: flags.usePk=true | building NonDict2 from "+path); |
| 31 | + } else { // CTB |
| 32 | + path = corporaDict+"/dict/ctb.non"; |
| 33 | + logger.info("INFO: flags.usePk=false | building NonDict2 from "+path); |
| 34 | + } |
| 35 | + |
| 36 | + cd = new CorpusDictionary(path); |
41 | 37 | }
|
42 | 38 |
|
43 | 39 | public String checkDic(String c2, SeqClassifierFlags flags) {
|
|
0 commit comments