From 58cdc1baf3a09171d238c92164534d26a8523765 Mon Sep 17 00:00:00 2001 From: simonsays1980 Date: Wed, 25 Sep 2024 10:49:37 +0200 Subject: [PATCH 1/4] Removed parts of docstrings for LSTMEncoder(s) b/c no linear output layer is used. Signed-off-by: simonsays1980 --- rllib/core/models/tf/encoder.py | 1 - rllib/core/models/torch/encoder.py | 1 - 2 files changed, 2 deletions(-) diff --git a/rllib/core/models/tf/encoder.py b/rllib/core/models/tf/encoder.py index 8efb6c5a136a..3a6fd858ef1e 100644 --- a/rllib/core/models/tf/encoder.py +++ b/rllib/core/models/tf/encoder.py @@ -313,7 +313,6 @@ class TfLSTMEncoder(TfModel, Encoder): This encoder has... - Zero or one tokenizers. - One or more LSTM layers. - - One linear output layer. """ def __init__(self, config: RecurrentEncoderConfig) -> None: diff --git a/rllib/core/models/torch/encoder.py b/rllib/core/models/torch/encoder.py index d2400447a10a..765f7acc7113 100644 --- a/rllib/core/models/torch/encoder.py +++ b/rllib/core/models/torch/encoder.py @@ -297,7 +297,6 @@ class TorchLSTMEncoder(TorchModel, Encoder): This encoder has... - Zero or one tokenizers. - One or more LSTM layers. - - One linear output layer. """ def __init__(self, config: RecurrentEncoderConfig) -> None: From eb7fc19112ba1c58e633195c7f7f8a0644c4f09f Mon Sep 17 00:00:00 2001 From: simonsays1980 Date: Wed, 25 Sep 2024 10:56:26 +0200 Subject: [PATCH 2/4] Removed part of docstring from 'RecurrentEncoderConfig' b/c no linear layer in recurrent encoders.. Signed-off-by: simonsays1980 --- rllib/core/models/configs.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/rllib/core/models/configs.py b/rllib/core/models/configs.py index a345860213c7..0a8b9491148b 100644 --- a/rllib/core/models/configs.py +++ b/rllib/core/models/configs.py @@ -887,7 +887,6 @@ class RecurrentEncoderConfig(ModelConfig): - Zero or one tokenizers - N LSTM/GRU layers stacked on top of each other and feeding their outputs as inputs to the respective next layer. - - One linear output layer This makes for the following flow of tensors: @@ -901,8 +900,6 @@ class RecurrentEncoderConfig(ModelConfig): | LSTM layer n | - Linear output layer - | Outputs The internal state is structued as (num_layers, B, hidden-size) for all hidden From 1c7ede52e13f012b61522f401b43dd0d71fc6e01 Mon Sep 17 00:00:00 2001 From: simonsays1980 Date: Wed, 25 Sep 2024 10:59:15 +0200 Subject: [PATCH 3/4] Removed parts of 'GRUEncoder(s)' b/c linear output layer is not used. Signed-off-by: simonsays1980 --- rllib/core/models/tf/encoder.py | 1 - rllib/core/models/torch/encoder.py | 1 - 2 files changed, 2 deletions(-) diff --git a/rllib/core/models/tf/encoder.py b/rllib/core/models/tf/encoder.py index 3a6fd858ef1e..ff4956df4a8b 100644 --- a/rllib/core/models/tf/encoder.py +++ b/rllib/core/models/tf/encoder.py @@ -181,7 +181,6 @@ class TfGRUEncoder(TfModel, Encoder): This encoder has... - Zero or one tokenizers. - One or more GRU layers. - - One linear output layer. """ def __init__(self, config: RecurrentEncoderConfig) -> None: diff --git a/rllib/core/models/torch/encoder.py b/rllib/core/models/torch/encoder.py index 765f7acc7113..f9e59bdc6f2f 100644 --- a/rllib/core/models/torch/encoder.py +++ b/rllib/core/models/torch/encoder.py @@ -174,7 +174,6 @@ class TorchGRUEncoder(TorchModel, Encoder): This encoder has... - Zero or one tokenizers. - One or more GRU layers. - - One linear output layer. """ def __init__(self, config: RecurrentEncoderConfig) -> None: From e6db184dfd152e0b19bdb803bf36b89a84b676b3 Mon Sep 17 00:00:00 2001 From: simonsays1980 Date: Wed, 25 Sep 2024 16:24:55 +0200 Subject: [PATCH 4/4] Fixed a bug in 'ActionMaskingTorchRLModule' due to changes in value computation. Signed-off-by: simonsays1980 --- .../examples/rl_modules/classes/action_masking_rlm.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/rllib/examples/rl_modules/classes/action_masking_rlm.py b/rllib/examples/rl_modules/classes/action_masking_rlm.py index 853ef1f979de..e948b8c1a1ef 100644 --- a/rllib/examples/rl_modules/classes/action_masking_rlm.py +++ b/rllib/examples/rl_modules/classes/action_masking_rlm.py @@ -93,19 +93,20 @@ def _forward_exploration( def _forward_train( self, batch: Dict[str, TensorType], **kwargs ) -> Dict[str, TensorType]: - # Preprocess the original batch to extract the action mask. - action_mask, batch = self._preprocess_batch(batch) # Run the forward pass. outs = super()._forward_train(batch, **kwargs) # Mask the action logits and return. - return self._mask_action_logits(outs, action_mask) + return self._mask_action_logits(outs, batch["action_mask"]) @override(ValueFunctionAPI) def compute_values(self, batch: Dict[str, TensorType]): # Preprocess the batch to extract the `observations` to `Columns.OBS`. - _, batch = self._preprocess_batch(batch) + action_mask, batch = self._preprocess_batch(batch) + # NOTE: Because we manipulate the batch we need to add the `action_mask` + # to the batch to access them in `_forward_train`. + batch["action_mask"] = action_mask # Call the super's method to compute values for GAE. - return super()._compute_values(batch) + return super().compute_values(batch) def _preprocess_batch( self, batch: Dict[str, TensorType], **kwargs