-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathconfig.ini
138 lines (118 loc) · 4.39 KB
/
config.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
[Shared]
workspace = /disks/sdb/zjiehang/zhou_data_new
[Data]
ftrain = ${Shared:workspace}/ptb/ptb_train_3.3.0.sd
fdev = ${Shared:workspace}/ptb/ptb_valid_3.3.0.sd
; fdev = /home/zhouyi/corr.conllu
ftest = ${Shared:workspace}/ptb/ptb_test_3.3.0.sd
; ftest = /home/zhouyi/corr.conllu
fdata = ${Shared:workspace}/ptb/ptb_test_3.3.0.sd
; fdata = /home/zhouyi/err.conllu
# fpred = 'ptb_test_3.3.0.sd'
# path for pretrained embedding
fembed = ${Shared:workspace}/pretrained_embedding/glove/glove.6B.100d.txt
# path for vocab(word dictionary,tag dictionary and so on), save the vocab to file
# and can load it directly for saving time
vocab = ${Shared:workspace}/ptb/vocab
[Model]
train_task = 'parser'
# the input type for models
# type: word, word_tag, word_char, char
# remember not add ' ' here when modified, for example
# input = word_tag is right
# input = 'word_tag' is not right
input = word_tag
parser_model = ${Shared:workspace}/saved_models/${Model:input}/lzynb
; parser_model = /disks/sdb/zjiehang/DependencyParsing/saved_models/word/aug
tagger_model = ${Shared:workspace}/saved_models/tagger/tagger
[Run]
batch_size = 5000
epochs = 300
patience = 40
[Device & Seed & thread & punct & buckets]
device = '0'
threads = 4
seed = 1
# when evaluate: ingore the punctuation(mentioned in Manning et al.)
punct = False
# for k-means: dividing the training data by sentence length
buckets = 64
[Network]
n_embed = 100
# for tag embedding
n_tag_embed = 100
embed_dropout = 0.33
n_lstm_hidden = 400
n_lstm_layers = 3
lstm_dropout = 0.33
n_mlp_arc = 500
n_mlp_rel = 100
mlp_dropout = 0.33
# for char embedding
n_char_embed = 50
n_char_out = 100
[Network_Tagger]
tag_n_lstm_hidden = 200
tag_n_lstm_layers = 2
[Optimizer]
lr = 2e-3
beta_1 = 0.9
beta_2 = 0.9
epsilon = 1e-12
decay = .75
steps = 5000
[Evaluate]
# evaluate dpattack result with pred tags
pred_tag = False
[Attack]
# revised rate for a sentence
# revised_number = sentence_length * revised rate
revised_rate = 0.01
# whether to save attack result
save_result_to_file = True
# file path for attack result, only uses this variable when save_result_to_file is True
result_path = ${Shared:workspace}/result
[BlackBox]
# black box attack method
# substitute : substitute a word
# insert: insert a word (insert a JJ before a NN, insert a RB after a VB, if both NN and VB are not modified)
# delete: delete a word (deleta a NN's modification or delete a VB's modifcation)
blackbox_method = 'insert'
# method for deciding which index to be substituted when substituting a word, used only when blackbox_method is'substitute'
# unk: change each word to <unk> in turn and the index, taking the worst case.
# pos: change the word according to its pos-of-tagging
blackbox_index = 'unk'
# only used when blackbox_index is 'pos'
# NN: noun, VB: verb, JJ: Adjective, RB: Adverb,
blackbox_pos_tag = 'RB'
# method for substituting, insert a word (black box)
# only used when blackbox_method is 'substitute' or 'insert'
# when blackbox_method is 'substitute', the candidate values are 'glove', 'bert', 'tag', 'wordnet'
# when blackbox_method is 'insert', the candidate values are 'bert'
# bert: means using pretrained language models bert
# glove: means according to word vectors
# tag: means using the same tag to replace
# char: means random swap two characters in a word
# wordnet: means substituting word by WordNet's synonym
blackbox_model = 'bert'
# path: for pretrained models, if blackbox_method is 'glove', path is the pretrained embedding path
# if blackbox_method is 'bert', path is the pretrained bert dir
path = ${Shared:workspace}/pretrained_model/bert-base-uncased
# path = '/disks/sdb/zjiehang/DependencyParsing/pretrained_embedding/glove/glove.6B.300d.txt'
language_model_path = ${Shared:workspace}/pretrained_model/gpt2
[WhiteBox]
# method for white box attack
whitebox_method = ''
# view_as_unk_under_char = False
[Augmentation]
# augmentation rate for training data, rate is 100% means all training data are augmented
augmentation_rate = 1.00
# augmentation file dir, for saving augmentation data
augmentation_dir = ${Shared:workspace}/data/augmentation
# whether open augmentation training
# when under augmentation training, augmentation_test_file should be evaluated
augmentation_training = False
# augmentation test file path, uses when augmentation_training is True
augmentation_test_file = ${Shared:workspace}/data/augmentation/ptb_test_glove.sd
[ZHOU]
crf_tagger_path=${Shared:workspace}/saved_models/crftagger