-
Notifications
You must be signed in to change notification settings - Fork 29
New issue
Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? # to your account
[WIP] AveragingEpisodesController #89
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
from .controller import Controller, ContextualController | ||
from .controller import Controller, AveragingEpisodesController, \ | ||
ContextualController | ||
|
||
|
||
__all__ = ["Controller", "ContextualController"] | ||
__all__ = ["Controller", "AveragingEpisodesController", "ContextualController"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
# Author: Alexander Fabisch <afabisch@informatik.uni-bremen.de> | ||
# Jan Hendrik Metzen <jhm@informatik.uni-bremen.de> | ||
# Marc Otto <maotto@uni-bremen.de> | ||
|
||
import numpy as np | ||
import warnings | ||
|
@@ -308,6 +309,120 @@ def _perform_test(self, meta_parameter_keys, meta_parameters): | |
return performance - optimum | ||
|
||
|
||
class AveragingEpisodesController(Controller): | ||
"""Controller for non-deterministic problems. | ||
|
||
See base class "Controller" for details on usage. | ||
|
||
Additional Parameters | ||
---------- | ||
num_repetitions_to_average : int, optional (default: 10) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we usually try to use |
||
Number of repetitions per episode. For one episode, several rollouts | ||
are executed with the same behavior. Note that this only makes sense | ||
if the environment is stochastic or specifically prepared via the | ||
argument environment_preparation_function | ||
|
||
feedback_averaging_function : function, optional (default: median_of_sums) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is a callback, not just a function. It also does not have to be a function, it can be any callable. |
||
Function with signature: list of feedbacks -> single element list, | ||
where the input list is as long as the number of repetitions. | ||
Note that the number of feedbacks per rollout may vary. | ||
See AveragingEpisodesController.median_of_sums (default) for an example | ||
|
||
environment_preparation_function : function, optional (default: None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same applies here |
||
function with signature: (environment, int repetition_index) -> None | ||
Prepares the environment for a specific repetition. | ||
""" | ||
def __init__(self, config={}, environment=None, behavior_search=None, | ||
num_repetitions_to_average=10, | ||
feedback_averaging_function=None, | ||
environment_preparation_function=None, | ||
**kwargs): | ||
super(AveragingEpisodesController, self).__init__( | ||
config, environment, behavior_search, **kwargs) | ||
|
||
self.record_inputs = False | ||
self.record_outputs = False | ||
self.record_feedbacks = False | ||
self.accumulate_feedbacks = False # see feedback_averaging_function | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this comment does not really help |
||
|
||
if feedback_averaging_function is None: | ||
feedback_averaging_function = self.median_of_sums | ||
|
||
self._set_attribute(config, "num_repetitions_to_average", | ||
num_repetitions_to_average) | ||
self._set_attribute(config, "feedback_averaging_function", | ||
feedback_averaging_function) | ||
self._set_attribute(config, "environment_preparation_function", | ||
environment_preparation_function) | ||
|
||
@staticmethod | ||
def median_of_sums(list_of_feedbacks): | ||
"""Accumulates feedbacks of each episode and returns median | ||
|
||
Parameters | ||
---------- | ||
list_of_feedbacks : list | ||
feedbacks per episode. Note that episode length may vary, while | ||
the length of this list is equal to num_repetitions_to_average | ||
|
||
Returns | ||
------- | ||
averaged_accumulated_feedback : list with one element | ||
median of the accumulated feedbacks of all repetitions | ||
""" | ||
return np.median([np.sum(feedbacks_) | ||
for feedbacks_ in list_of_feedbacks]) | ||
|
||
def episode_with(self, behavior, meta_parameter_keys=[], | ||
meta_parameters=[], record=False): | ||
"""Execute a behavior in the environment. | ||
|
||
Parameters | ||
---------- | ||
behavior : Behavior | ||
Fix behavior | ||
|
||
meta_parameter_keys : list, optional (default: []) | ||
Meta parameter keys | ||
|
||
meta_parameters : list, optional (default: []) | ||
Meta parameter values | ||
|
||
record : bool, optional (default: True) | ||
Record feedbacks or trajectories if activated | ||
|
||
Returns | ||
------- | ||
feedbacks : array, shape (n_steps,) | ||
Feedback for each step in the environment | ||
""" | ||
|
||
if self.num_repetitions_to_average == 1: | ||
return super(AveragingEpisodesController, self).episode_with( | ||
behavior, meta_parameter_keys=meta_parameter_keys, | ||
meta_parameters=meta_parameters, record=record | ||
) | ||
if record: | ||
raise ValueError("Recording not supported when" | ||
" averaging episodes' returns") | ||
feedbacks = [] | ||
for i in range(self.num_repetitions_to_average): | ||
if i > 0: | ||
# for i==0 it is done already by behavior search | ||
behavior.reset() | ||
|
||
if self.environment_preparation_function: | ||
self.environment_preparation_function(self.environment, i) | ||
|
||
feedbacks.append( | ||
super(AveragingEpisodesController, self).episode_with( | ||
behavior, meta_parameter_keys=meta_parameter_keys, | ||
meta_parameters=meta_parameters, record=record | ||
)) | ||
|
||
return self.feedback_averaging_function(feedbacks) | ||
|
||
|
||
class ContextualController(Controller): | ||
"""Controller for contextual problems. | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
more
-