diff --git a/bolero/controller/__init__.py b/bolero/controller/__init__.py index 568ac73b..19f33581 100644 --- a/bolero/controller/__init__.py +++ b/bolero/controller/__init__.py @@ -1,4 +1,5 @@ -from .controller import Controller, ContextualController +from .controller import Controller, AveragingEpisodesController, \ + ContextualController -__all__ = ["Controller", "ContextualController"] +__all__ = ["Controller", "AveragingEpisodesController", "ContextualController"] diff --git a/bolero/controller/controller.py b/bolero/controller/controller.py index 418233aa..e3e8fcf0 100644 --- a/bolero/controller/controller.py +++ b/bolero/controller/controller.py @@ -1,5 +1,6 @@ # Author: Alexander Fabisch # Jan Hendrik Metzen +# Marc Otto import numpy as np import warnings @@ -308,6 +309,120 @@ def _perform_test(self, meta_parameter_keys, meta_parameters): return performance - optimum +class AveragingEpisodesController(Controller): + """Controller for non-deterministic problems. + + See base class "Controller" for details on usage. + + Additional Parameters + ---------- + num_repetitions_to_average : int, optional (default: 10) + Number of repetitions per episode. For one episode, several rollouts + are executed with the same behavior. Note that this only makes sense + if the environment is stochastic or specifically prepared via the + argument environment_preparation_function + + feedback_averaging_function : function, optional (default: median_of_sums) + Function with signature: list of feedbacks -> single element list, + where the input list is as long as the number of repetitions. + Note that the number of feedbacks per rollout may vary. + See AveragingEpisodesController.median_of_sums (default) for an example + + environment_preparation_function : function, optional (default: None) + function with signature: (environment, int repetition_index) -> None + Prepares the environment for a specific repetition. + """ + def __init__(self, config={}, environment=None, behavior_search=None, + num_repetitions_to_average=10, + feedback_averaging_function=None, + environment_preparation_function=None, + **kwargs): + super(AveragingEpisodesController, self).__init__( + config, environment, behavior_search, **kwargs) + + self.record_inputs = False + self.record_outputs = False + self.record_feedbacks = False + self.accumulate_feedbacks = False # see feedback_averaging_function + + if feedback_averaging_function is None: + feedback_averaging_function = self.median_of_sums + + self._set_attribute(config, "num_repetitions_to_average", + num_repetitions_to_average) + self._set_attribute(config, "feedback_averaging_function", + feedback_averaging_function) + self._set_attribute(config, "environment_preparation_function", + environment_preparation_function) + + @staticmethod + def median_of_sums(list_of_feedbacks): + """Accumulates feedbacks of each episode and returns median + + Parameters + ---------- + list_of_feedbacks : list + feedbacks per episode. Note that episode length may vary, while + the length of this list is equal to num_repetitions_to_average + + Returns + ------- + averaged_accumulated_feedback : list with one element + median of the accumulated feedbacks of all repetitions + """ + return np.median([np.sum(feedbacks_) + for feedbacks_ in list_of_feedbacks]) + + def episode_with(self, behavior, meta_parameter_keys=[], + meta_parameters=[], record=False): + """Execute a behavior in the environment. + + Parameters + ---------- + behavior : Behavior + Fix behavior + + meta_parameter_keys : list, optional (default: []) + Meta parameter keys + + meta_parameters : list, optional (default: []) + Meta parameter values + + record : bool, optional (default: True) + Record feedbacks or trajectories if activated + + Returns + ------- + feedbacks : array, shape (n_steps,) + Feedback for each step in the environment + """ + + if self.num_repetitions_to_average == 1: + return super(AveragingEpisodesController, self).episode_with( + behavior, meta_parameter_keys=meta_parameter_keys, + meta_parameters=meta_parameters, record=record + ) + if record: + raise ValueError("Recording not supported when" + " averaging episodes' returns") + feedbacks = [] + for i in range(self.num_repetitions_to_average): + if i > 0: + # for i==0 it is done already by behavior search + behavior.reset() + + if self.environment_preparation_function: + self.environment_preparation_function(self.environment, i) + + feedbacks.append( + super(AveragingEpisodesController, self).episode_with( + behavior, meta_parameter_keys=meta_parameter_keys, + meta_parameters=meta_parameters, record=record + )) + + return self.feedback_averaging_function(feedbacks) + + class ContextualController(Controller): """Controller for contextual problems.