-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcontrol.yaml
62 lines (59 loc) · 1.41 KB
/
control.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# Environment config
env:
type: 'classic_control'
# Classic control env name
name : 'CartPole-v0'
# seed
seed: 543
# solution rewards
env_solution: 195
# Agent config
agent:
# type of agent
agent_type: 'vpg'
# model type
model_type: 'mlp'
# Learning rate for the policy network
policy_lr : 0.01
# Learning rate for the agent network
value_lr : 0.001
# optimizer name
opt_name: 'adam'
# norm of gradient clipping, leave empty for no clipping
grad_clip:
# Bellman equation reward discount
gamma : 0.99
# Estimated GAE with TD(lambda)
lambda: 0.97
# vpg scaling:
vpg_scaling: 1.0
# value loss scaling
value_scaling: 1.0
# entropy loss scaling
entropy_scaling: 0.005
# normalize rewards:
reward_norm: true
# input shape
input_shape : [4]
# state size := [state_size] + [input_shape]
state_len: 1
# action_size
action_size: 2
# transform the input
input_transforms:
train:
# Number of training episodes
n_train_episodes : 100
# Max steps in each episode
max_steps : 1000
# model location
model_dest: /data/experiments/agent-of-control/02-12-2020-CartPole-v0-vpg-gae
# save model every save_model steps
save_model: 20
test:
# Number of testing episodes
n_test_episodes : 5
# Max steps in each episode
max_steps : 1000
# path where to save played video
state_dest: /data/experiments/agent-of-control/02-12-2020-CartPole-v0-vpg-gae/states