config.ini

[Shared]
workspace = /disks/sdb/zjiehang/zhou_data_new

[Data]
ftrain = ${Shared:workspace}/ptb/ptb_train_3.3.0.sd
fdev = ${Shared:workspace}/ptb/ptb_valid_3.3.0.sd
; fdev = /home/zhouyi/corr.conllu
ftest = ${Shared:workspace}/ptb/ptb_test_3.3.0.sd
; ftest = /home/zhouyi/corr.conllu
fdata = ${Shared:workspace}/ptb/ptb_test_3.3.0.sd
; fdata = /home/zhouyi/err.conllu
# fpred = 'ptb_test_3.3.0.sd'
# path for pretrained embedding
fembed = ${Shared:workspace}/pretrained_embedding/glove/glove.6B.100d.txt
# path for vocab(word dictionary,tag dictionary and so on), save the vocab to file
# and can load it directly for saving time
vocab = ${Shared:workspace}/ptb/vocab

[Model]
train_task = 'parser'
# the input type for models
# type: word, word_tag, word_char, char
# remember not add ' ' here when modified, for example
# input = word_tag is right
# input = 'word_tag' is not right
input = word_tag
parser_model = ${Shared:workspace}/saved_models/${Model:input}/lzynb
; parser_model = /disks/sdb/zjiehang/DependencyParsing/saved_models/word/aug
tagger_model = ${Shared:workspace}/saved_models/tagger/tagger

[Run]
batch_size = 5000
epochs = 300
patience = 40

[Device & Seed & thread & punct & buckets]
device = '0'
threads = 4
seed = 1
# when evaluate: ingore the punctuation(mentioned in Manning et al.)
punct = False
# for k-means: dividing the training data by sentence length
buckets = 64

[Network]
n_embed = 100
# for tag embedding
n_tag_embed = 100
embed_dropout = 0.33
n_lstm_hidden = 400
n_lstm_layers = 3
lstm_dropout = 0.33
n_mlp_arc = 500
n_mlp_rel = 100
mlp_dropout = 0.33
# for char embedding
n_char_embed = 50
n_char_out = 100

[Network_Tagger]
tag_n_lstm_hidden = 200
tag_n_lstm_layers = 2

[Optimizer]
lr = 2e-3
beta_1 = 0.9
beta_2 = 0.9
epsilon = 1e-12
decay = .75
steps = 5000


[Evaluate]
# evaluate dpattack result with pred tags
pred_tag = False

[Attack]

# revised rate for a sentence
# revised_number = sentence_length * revised rate
revised_rate = 0.01

# whether to save attack result
save_result_to_file = True
# file path for attack result, only uses this variable when save_result_to_file is True
result_path = ${Shared:workspace}/result

[BlackBox]
# black box attack method
# substitute : substitute a word
# insert: insert a word (insert a JJ before a NN, insert a RB after a VB, if both NN and VB are not modified)
# delete: delete a word (deleta a NN's modification or delete a VB's modifcation)
blackbox_method = 'insert'

# method for deciding which index to be substituted when substituting a word, used only when blackbox_method is'substitute'
# unk: change each word to <unk> in turn and the index, taking the worst case.
# pos: change the word according to its pos-of-tagging
blackbox_index = 'unk'
# only used when blackbox_index is 'pos'
# NN: noun, VB: verb, JJ: Adjective, RB: Adverb,
blackbox_pos_tag = 'RB'

# method for substituting, insert a word (black box)
# only used when blackbox_method is 'substitute' or 'insert'
# when blackbox_method is 'substitute', the candidate values are 'glove', 'bert', 'tag', 'wordnet'
# when blackbox_method is 'insert', the candidate values are 'bert'
# bert: means using pretrained language models bert
# glove: means according to word vectors
# tag: means using the same tag to replace
# char: means random swap two characters in a word
# wordnet: means substituting word by WordNet's synonym
blackbox_model = 'bert'
# path: for pretrained models, if blackbox_method is 'glove', path is the pretrained embedding path
# if blackbox_method is 'bert', path is the pretrained bert dir
path = ${Shared:workspace}/pretrained_model/bert-base-uncased
# path = '/disks/sdb/zjiehang/DependencyParsing/pretrained_embedding/glove/glove.6B.300d.txt'
language_model_path = ${Shared:workspace}/pretrained_model/gpt2

[WhiteBox]
# method for white box attack
whitebox_method = ''
# view_as_unk_under_char = False

[Augmentation]
# augmentation rate for training data, rate is 100% means all training data are augmented
augmentation_rate = 1.00
# augmentation file dir, for saving augmentation data
augmentation_dir = ${Shared:workspace}/data/augmentation
# whether open augmentation training
# when under augmentation training, augmentation_test_file should be evaluated
augmentation_training = False
# augmentation test file path, uses when augmentation_training is True
augmentation_test_file = ${Shared:workspace}/data/augmentation/ptb_test_glove.sd


[ZHOU]
crf_tagger_path=${Shared:workspace}/saved_models/crftagger