Implement SerializedSequenceSimulatedEnvProblem

PiperOrigin-RevId: 263020747
tensorflow · Aug 12, 2019 · f7f8549 · f7f8549
1 parent 41726d4
commit f7f8549
Show file tree

Hide file tree

Showing 5 changed files with 579 additions and 64 deletions.
diff --git a/tensor2tensor/trax/rlax/ppo_training_loop_test.py b/tensor2tensor/trax/rlax/ppo_training_loop_test.py
@@ -171,10 +171,10 @@ def loss(*args, **kwargs):
       )
       trainer.train_epoch(epoch_steps=1, eval_steps=1)
 
-      # Repeat the initial observations over and over again.
+      # Repeat the history over and over again.
       stream = itertools.repeat(np.zeros(history_shape))
       env_fn = functools.partial(
-          simulated_env_problem.SimulatedEnvProblem,
+          simulated_env_problem.RawSimulatedEnvProblem,
           model=model,
           history_length=history_shape[1],
           trajectory_length=3,
@@ -184,7 +184,7 @@ def loss(*args, **kwargs):
           action_space=gym.spaces.Discrete(n=n_actions),
           reward_range=(-1, 1),
           discrete_rewards=False,
-          initial_observation_stream=stream,
+          history_stream=stream,
           output_dir=output_dir,
       )