{ "act_epsilon": 0.01, "act_halting_bias_init": 1.0, "act_loss_weight": 0.01, "act_max_steps": 4, "act_type": "basic", "activation_dtype": "float32", "add_ffn_unit_to_the_transition_function": false, "add_or_concat_timing_signal": "add", "add_position_timing_signal": true, "add_relative_to_values": false, "add_sru": false, "add_step_timing_signal": true, "attention_dropout": 0.1, "attention_dropout_broadcast_dims": "", "attention_key_channels": 0, "attention_value_channels": 0, "attention_variables_3d": false, "batch_size": 512, "causal_decoder_self_attention": true, "clip_grad_norm": 0.0, "compress_steps": 0, "conv_first_kernel": 3, "couple_carry_transform_gates": true, "daisy_chain_variables": false, "data_dir": "babi_data/data", "depth_embedding": true, "dropout": 0.2, "dwa_elements": true, "eval_drop_long_sequences": false, "eval_freq_in_steps": 1000, "eval_run_autoregressive": false, "eval_steps": 100, "factored_logits": false, "ffn_layer": "dense_relu_dense", "filter_size": 512, "force_full_predict": false, "gate_ffn_layer": "dense", "grad_noise_scale": 0.0, "heads_share_relative_embedding": false, "hidden_size": 128, "initializer": "uniform_unit_scaling", "initializer_gain": 1.0, "input_modalities": "default", "kernel_height": 3, "kernel_width": 1, "label_smoothing": 0.1, "layer_postprocess_sequence": "da", "layer_prepostprocess_dropout": 0.1, "layer_prepostprocess_dropout_broadcast_dims": "", "layer_preprocess_sequence": "n", "learning_rate": 0.2, "learning_rate_constant": 2.0, "learning_rate_cosine_cycle_steps": 250000, "learning_rate_decay_rate": 1.0, "learning_rate_decay_scheme": "noam", "learning_rate_decay_staircase": false, "learning_rate_decay_steps": 5000, "learning_rate_minimum": null, "learning_rate_schedule": "constant*linear_warmup*rsqrt_decay*rsqrt_hidden_size", "learning_rate_warmup_steps": 8000, "length_bucket_step": 1.1, "lstm_forget_bias": 1.0, "max_input_seq_length": 0, "max_length": 256, "max_relative_position": 0, "max_target_seq_length": 0, "min_length": 0, "min_length_bucket": 8, "mix_with_transformer": null, "model_dir": "babi_data/output_sepconv", "moe_hidden_sizes": "2048", "moe_k": 2, "moe_loss_coef": 0.001, "moe_num_experts": 16, "moe_overhead_eval": 2.0, "moe_overhead_train": 1.0, "multiply_embedding_mode": "sqrt_depth", "multiproblem_class_loss_multiplier": 0.0, "multiproblem_label_weight": 0.5, "multiproblem_mixing_schedule": "constant", "multiproblem_reweight_label_loss": false, "multiproblem_schedule_max_examples": 10000000.0, "multiproblem_schedule_threshold": 0.5, "nbr_decoder_problems": 1, "no_data_parallelism": false, "norm_epsilon": 1e-06, "norm_type": "layer", "num_decoder_layers": 0, "num_encoder_layers": 0, "num_heads": 4, "num_hidden_layers": 2, "num_mixedin_layers": 2, "num_rec_steps": 8, "optimizer": "Adam", "optimizer_adafactor_beta1": 0.0, "optimizer_adafactor_beta2": 0.999, "optimizer_adafactor_clipping_threshold": 1.0, "optimizer_adafactor_decay_type": "pow", "optimizer_adafactor_factored": true, "optimizer_adafactor_memory_exponent": 0.8, "optimizer_adafactor_multiply_by_parameter_scale": true, "optimizer_adam_beta1": 0.9, "optimizer_adam_beta2": 0.997, "optimizer_adam_epsilon": 1e-09, "optimizer_momentum_momentum": 0.9, "optimizer_momentum_nesterov": false, "optimizer_multistep_accumulate_steps": null, "pad_batch": false, "parameter_attention_key_channels": 0, "parameter_attention_value_channels": 0, "pos": null, "position_start_index": null, "prepend_mode": "none", "pretrained_model_dir": "", "proximity_bias": false, "recurrence_type": "basic", "relu_dropout": 0.1, "relu_dropout_broadcast_dims": "", "sampling_method": "argmax", "sampling_temp": 1.0, "schedule": "continuous_train_and_eval", "scheduled_sampling_gold_mixin_prob": 0.5, "scheduled_sampling_prob": 0.0, "scheduled_sampling_warmup_steps": 50000, "self_attention_type": "dot_product", "shared_embedding": false, "shared_embedding_and_softmax_weights": true, "split_to_length": 0, "std_server_protocol": "grpc", "step_timing_signal_type": "learned", "summarize_grads": false, "summarize_vars": false, "symbol_dropout": 0.0, "symbol_modality_num_shards": 16, "symbol_modality_skip_top": false, "target_modality": "default", "tpu_enable_host_call": false, "train_steps": 100000, "transform_bias_init": -1.0, "transformer_ffn_type": "sepconv", "use_fixed_batch_size": false, "use_memory_as_final_state": true, "use_pad_remover": true, "use_target_space_embedding": true, "video_num_input_frames": 1, "video_num_target_frames": 1, "vocab_divisor": 1, "warm_start_from": null, "weight_decay": 0.0, "weight_dtype": "float32", "weight_noise": 0.0 }