This repository has been archived by the owner on May 26, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
evaluate.py
224 lines (175 loc) · 8.06 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
from alfred.utils.directory_tree import DirectoryTree
from alfred.utils.recorder import TrainingIterator
from alfred.utils.config import parse_bool, load_config_from_json
from algo_manager import init_from_save
from utils.misc import uniquify
import utils.ml as ml
from utils.data_structures import load_expert_demos
from alg_task_lists import POMMERMAN_TASKS, MUJOCO_TASKS
from env_manager import make_env
from pommerman_plugin.misc import save_gif_from_png_folder, load_game_states_from_demos
from pommerman_plugin.misc import wait_for_ENTER_keypress
from mujoco_py.generated import const
import time
import pickle
import argparse
import imageio
from pathlib import Path
import torch
def get_evaluation_args(overwritten_args=None):
parser = argparse.ArgumentParser()
## Shared hyper-params
parser.add_argument('--storage_name', default='', type=str,
help='Name of the model storage')
parser.add_argument('--root_dir', default=None, type=str)
parser.add_argument('--experiment_num', type=int, default=1)
parser.add_argument('--seed_num', default=1, type=int,
help='Seed directory in experiments folder')
parser.add_argument('--eval_seed', default=1234567, type=int,
help='Random seed for evaluation rollouts')
parser.add_argument('--model_name', type=str, default=None)
parser.add_argument('--max_ep_len', default=1000000, type=int)
parser.add_argument('--act_deterministic', type=parse_bool, default=True)
parser.add_argument('--number_of_eps', type=int, default=3)
parser.add_argument('--make_expert', type=parse_bool, default=False)
parser.add_argument('--expert_save_path', type=str, default=None)
parser.add_argument('--render', type=parse_bool, default=False)
parser.add_argument('--make_gif', type=parse_bool, default=False)
parser.add_argument('--n_skipped_frames', type=int, default=0)
parser.add_argument('--fps', default=3000, type=int)
parser.add_argument('--waiting', type=parse_bool, default=False)
# In cases where the model was trained only from initial states from the demonstrations,
# we can load these demonstrations and also evaluate on these same initial states
parser.add_argument('--demos_name', type=str, default=None,
help='demonstration filename used to train the agent')
parser.add_argument('--demos_folder', type=str, default=None,
help='demonstration folder in data/ used to train the agent')
return parser.parse_args(overwritten_args)
def record_frame(env, iter, n_skipped_frames, task_name, frames, temp_png_folder):
if iter % (n_skipped_frames + 1) == 0:
if task_name in POMMERMAN_TASKS:
env.render(record_pngs_dir=temp_png_folder)
else:
frames.append(env.render('rgb_array'))
def evaluate(args):
# loads config and model
dir_tree = DirectoryTree.init_from_branching_info(root_dir=args.root_dir, storage_name=args.storage_name,
experiment_num=args.experiment_num, seed_num=args.seed_num)
config = load_config_from_json(dir_tree.seed_dir / "config.json")
if args.model_name is not None:
model_path = dir_tree.seed_dir / args.model_name
else:
if 'rl_alg_name' in config.__dict__.keys():
if config.rl_alg_name == "":
model_name = config.irl_alg_name
else:
model_name = config.rl_alg_name
else:
model_name = config.alg_name
model_path = dir_tree.seed_dir / (model_name + '_model_best.pt')
learner = init_from_save(model_path, device=torch.device('cpu'))
if args.make_gif:
gif_path = dir_tree.storage_dir / 'gifs'
gif_path.mkdir(exist_ok=True)
gif_full_name = uniquify(gif_path /
f"{config.task_name}"
f"_experiment{args.experiment_num}"
f"_seed{args.seed_num}"
f"_evalseed{args.eval_seed}.gif")
if config.task_name in POMMERMAN_TASKS:
temp_png_folder_base = uniquify(gif_path / 'temp_png')
else:
temp_png_folder = False
# Makes task_name and recorders
env = make_env(config.task_name)
ml.set_seeds(args.eval_seed, env)
Ti = TrainingIterator(args.number_of_eps)
frames = []
dt = 1. / args.fps
trajectories = []
# camera angles and stuff
if config['task_name'] in MUJOCO_TASKS:
env.render(mode='human' if args.render else 'rgb_array')
env.unwrapped.viewer.cam.type = const.CAMERA_TRACKING
# # Option 1 (FROM THE SIDE)
# env.unwrapped.viewer.cam.trackbodyid = 0
# env.unwrapped.viewer.cam.elevation = -25
# env.unwrapped.viewer.cam.distance = 6
# Option 2 (FROM PERSPECTIVE)
env.unwrapped.viewer.cam.trackbodyid = 0
env.unwrapped.viewer.cam.elevation = -15
env.unwrapped.viewer.cam.distance = 4
env.unwrapped.viewer.cam.azimuth = 35
# Get expert demonstrations initial states
if config.task_name in POMMERMAN_TASKS:
if args.demos_folder is None:
args.demos_folder = config.task_name.replace('learnable', 'agent47')
demos = load_expert_demos(config.demos_folder, config.demos_name)
env.init_game_states = load_game_states_from_demos(demos, idx=0)
# Episodes loop
for it in Ti:
t = 0
trajectory = []
ret = 0
done = False
# Initial reset
obs = env.reset()
# Rendering options
if args.make_gif:
if config.task_name in POMMERMAN_TASKS: # pommerman saves .png per episode
temp_png_folder = temp_png_folder_base / f"ep_{it.itr}"
temp_png_folder.mkdir(parents=True, exist_ok=True)
record_frame(env, t, args.n_skipped_frames, config.task_name, frames, temp_png_folder)
if args.render:
env.render()
if args.waiting:
wait_for_ENTER_keypress()
# transitions loop
while not done:
calc_start = time.time()
action = learner.act(obs=obs, sample=not args.act_deterministic)
next_obs, reward, done, _ = env.step(action)
if args.make_expert:
trajectory.append((obs, action, next_obs, reward, ml.mask(done)))
obs = next_obs
ret += reward
t += 1
if args.render:
# Enforces the fps config
calc_end = time.time()
elapsed = calc_end - calc_start
if elapsed < dt:
time.sleep(dt - elapsed)
env.render('human')
if args.waiting:
wait_for_ENTER_keypress()
if args.make_gif:
# we want the last frame even if we skip some frames
record_frame(env, t * (1 - done), args.n_skipped_frames, config.task_name, frames, temp_png_folder)
if t > args.max_ep_len:
break
it.record('eval_return', ret)
if args.make_expert:
trajectories.append(trajectory)
# Saves gif of all the episodes
if args.make_gif:
if config.task_name in POMMERMAN_TASKS:
save_gif_from_png_folder(temp_png_folder_base, gif_full_name, 1 / dt, delete_folder=True)
else:
imageio.mimsave(str(gif_full_name), frames, duration=dt)
env.close()
# Saves expert_trajectories
if args.make_expert:
if args.expert_save_path is not None:
expert_path = Path(args.expert_save_path)
else:
expert_path = Path('./data/' + config.task_name + f'/expert_demo_{args.number_of_eps}.pkl')
expert_path.parent.mkdir(exist_ok=True, parents=True)
expert_path = uniquify(expert_path)
with open(str(expert_path), 'wb') as fp:
pickle.dump(trajectories, fp)
fp.close()
return Ti.pop_all_means()['eval_return']
if __name__ == "__main__":
args = get_evaluation_args()
print(f'avg: {evaluate(args)}')