diff --git a/.github/workflows/build-publish.yml b/.github/workflows/build-publish.yml
index cba96336..c944eeee 100644
--- a/.github/workflows/build-publish.yml
+++ b/.github/workflows/build-publish.yml
@@ -4,7 +4,7 @@
 #   - https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
 #
 # derived from https://github.com/Farama-Foundation/PettingZoo/blob/e230f4d80a5df3baf9bd905149f6d4e8ce22be31/.github/workflows/build-publish.yml
-name: build-publish
+name: Build artifact for PyPI
 
 on:
   push:
@@ -16,35 +16,18 @@ on:
 
 jobs:
   build-wheels:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        include:
-        - os: ubuntu-latest
-          python: 38
-          platform: manylinux_x86_64
-        - os: ubuntu-latest
-          python: 39
-          platform: manylinux_x86_64
-        - os: ubuntu-latest
-          python: 310
-          platform: manylinux_x86_64
-        - os: ubuntu-latest
-          python: 311
-          platform: manylinux_x86_64
+    runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: '3.x'
+    - uses: actions/checkout@v4
+    - uses: actions/setup-python@v5
+
     - name: Install dependencies
-      run: python -m pip install --upgrade pip setuptools build
+      run: pipx install build
     - name: Build sdist and wheels
-      run: python -m build
+      run: pyproject-build
     - name: Store wheels
-      uses: actions/upload-artifact@v2
+      uses: actions/upload-artifact@v4
       with:
         path: dist
 
@@ -55,10 +38,11 @@ jobs:
     if: github.event_name == 'release' && github.event.action == 'published'
     steps:
     - name: Download dists
-      uses: actions/download-artifact@v4.1.7
+      uses: actions/download-artifact@v4
       with:
         name: artifact
         path: dist
+
     - name: Publish
       uses: pypa/gh-action-pypi-publish@release/v1
       with:
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 80ce02af..9f2cc2ab 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -13,9 +13,7 @@ jobs:
   pre-commit:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-python@v4
-      - run: python -m pip install pre-commit
-      - run: python -m pre_commit --version
-      - run: python -m pre_commit install
-      - run: python -m pre_commit run --all-files
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+      - run: pipx install pre-commit
+      - run: pre-commit run --all-files
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 69680e4e..af334549 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v5.0.0
     hooks:
       - id: check-symlinks
       - id: destroyed-symlinks
@@ -18,13 +18,13 @@ repos:
       - id: detect-private-key
       - id: debug-statements
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.4
+    rev: v2.3.0
     hooks:
       - id: codespell
         args:
           - --ignore-words-list=reacher,ure,referenc,wile,mor,ser,esr,nowe
   - repo: https://github.com/PyCQA/flake8
-    rev: 6.0.0
+    rev: 7.1.1
     hooks:
       - id: flake8
         args:
@@ -35,16 +35,16 @@ repos:
           - --show-source
           - --statistics
   - repo: https://github.com/asottile/pyupgrade
-    rev: v3.3.1
+    rev: v3.18.0
     hooks:
       - id: pyupgrade
         args: ["--py37-plus"]
   - repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
+    rev: 5.13.2
     hooks:
       - id: isort
   - repo: https://github.com/python/black
-    rev: 23.1.0
+    rev: 24.10.0
     hooks:
       - id: black
   - repo: https://github.com/pycqa/pydocstyle
diff --git a/examples/envelope_minecart.py b/examples/envelope_minecart.py
index bf41f0f6..c3f43bf4 100644
--- a/examples/envelope_minecart.py
+++ b/examples/envelope_minecart.py
@@ -1,6 +1,6 @@
 import mo_gymnasium as mo_gym
 import numpy as np
-from mo_gymnasium.utils import MORecordEpisodeStatistics
+from mo_gymnasium.wrappers import MORecordEpisodeStatistics
 
 from morl_baselines.multi_policy.envelope.envelope import Envelope
 
diff --git a/examples/eupg_fishwood.py b/examples/eupg_fishwood.py
index 7b253522..55a77583 100644
--- a/examples/eupg_fishwood.py
+++ b/examples/eupg_fishwood.py
@@ -1,7 +1,7 @@
 import mo_gymnasium as mo_gym
 import numpy as np
 import torch as th
-from mo_gymnasium.utils import MORecordEpisodeStatistics
+from mo_gymnasium.wrappers import MORecordEpisodeStatistics
 
 from morl_baselines.common.evaluation import eval_mo_reward_conditioned
 from morl_baselines.single_policy.esr.eupg import EUPG
diff --git a/examples/mo_q_learning_DST.py b/examples/mo_q_learning_DST.py
index ab234086..8910519e 100644
--- a/examples/mo_q_learning_DST.py
+++ b/examples/mo_q_learning_DST.py
@@ -2,7 +2,7 @@
 
 import mo_gymnasium as mo_gym
 import numpy as np
-from mo_gymnasium.utils import MORecordEpisodeStatistics
+from mo_gymnasium.wrappers import MORecordEpisodeStatistics
 
 from morl_baselines.common.evaluation import eval_mo
 from morl_baselines.common.scalarization import tchebicheff
diff --git a/examples/mp_mo_q_learning_DST.py b/examples/mp_mo_q_learning_DST.py
index 89977c3f..a6d418b6 100644
--- a/examples/mp_mo_q_learning_DST.py
+++ b/examples/mp_mo_q_learning_DST.py
@@ -1,6 +1,6 @@
 import mo_gymnasium as mo_gym
 import numpy as np
-from mo_gymnasium import MORecordEpisodeStatistics
+from mo_gymnasium.wrappers import MORecordEpisodeStatistics
 
 from morl_baselines.common.scalarization import tchebicheff
 from morl_baselines.multi_policy.multi_policy_moqlearning.mp_mo_q_learning import (
diff --git a/examples/pcn_minecart.py b/examples/pcn_minecart.py
index aabc577f..363fcd5f 100644
--- a/examples/pcn_minecart.py
+++ b/examples/pcn_minecart.py
@@ -1,6 +1,6 @@
 import mo_gymnasium as mo_gym
 import numpy as np
-from mo_gymnasium.utils import MORecordEpisodeStatistics
+from mo_gymnasium.wrappers import MORecordEpisodeStatistics
 
 from morl_baselines.multi_policy.pcn.pcn import PCN
 
diff --git a/examples/pgmorl_halfcheetah.py b/examples/pgmorl_halfcheetah.py
index 5b54ed5b..2b5bd5f0 100644
--- a/examples/pgmorl_halfcheetah.py
+++ b/examples/pgmorl_halfcheetah.py
@@ -19,7 +19,7 @@
     algo.train(
         total_timesteps=int(5e6),
         eval_env=make_env(env_id, 42, 0, "PGMORL_eval_env", gamma=0.995)(),
-        ref_point=np.array([0.0, -5.0]),
+        ref_point=np.array([-100.0, -100.0]),
         known_pareto_front=None,
     )
     env = make_env(env_id, 422, 1, "PGMORL_test", gamma=0.995)()  # idx != 0 to avoid taking videos
diff --git a/experiments/benchmark/launch_experiment.py b/experiments/benchmark/launch_experiment.py
index 4b093b20..cf1a1b6d 100644
--- a/experiments/benchmark/launch_experiment.py
+++ b/experiments/benchmark/launch_experiment.py
@@ -15,9 +15,8 @@
 import numpy as np
 import requests
 from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
-from gymnasium.wrappers import FlattenObservation
-from gymnasium.wrappers.record_video import RecordVideo
-from mo_gymnasium.utils import MORecordEpisodeStatistics
+from gymnasium.wrappers import FlattenObservation, RecordVideo
+from mo_gymnasium.wrappers import MORecordEpisodeStatistics
 
 from morl_baselines.common.evaluation import seed_everything
 from morl_baselines.common.experiments import (
@@ -90,13 +89,15 @@ def autotag() -> str:
     git_commit = subprocess.check_output(["git", "rev-parse", "--verify", "HEAD"]).decode("ascii").strip()
     try:
         # try finding the pull request number on github
-        prs = requests.get(f"https://api.github.com/search/issues?q=repo:LucasAlegre/morl-baselines+is:pr+{git_commit}")
+        prs = requests.get(
+            f"https://api.github.com/search/issues?q=repo:LucasAlegre/morl-baselines+is:pr+{git_commit}"  # noqa
+        )
         if prs.status_code == 200:
             prs = prs.json()
             if len(prs["items"]) > 0:
                 pr = prs["items"][0]
                 pr_number = pr["number"]
-                wandb_tag += f",pr-{pr_number}"
+                wandb_tag += f",pr-{pr_number}"  # noqa
         print(f"identified github pull request: {pr_number}")
     except Exception as e:
         print(e)
@@ -165,7 +166,7 @@ def wrap_mario(env):
                     TimeLimit,
                 )
                 from mo_gymnasium.envs.mario.joypad_space import JoypadSpace
-                from mo_gymnasium.utils import MOMaxAndSkipObservation
+                from mo_gymnasium.wrappers import MOMaxAndSkipObservation
 
                 env = JoypadSpace(env, SIMPLE_MOVEMENT)
                 env = MOMaxAndSkipObservation(env, skip=4)
diff --git a/experiments/hyperparameter_search/launch_sweep.py b/experiments/hyperparameter_search/launch_sweep.py
index de48d782..e6fedb44 100644
--- a/experiments/hyperparameter_search/launch_sweep.py
+++ b/experiments/hyperparameter_search/launch_sweep.py
@@ -7,7 +7,7 @@
 import numpy as np
 import wandb
 import yaml
-from mo_gymnasium.utils import MORecordEpisodeStatistics
+from mo_gymnasium.wrappers import MORecordEpisodeStatistics
 
 from morl_baselines.common.evaluation import seed_everything
 from morl_baselines.common.experiments import (
diff --git a/morl_baselines/__init__.py b/morl_baselines/__init__.py
index 498632ab..d2af2963 100644
--- a/morl_baselines/__init__.py
+++ b/morl_baselines/__init__.py
@@ -1,4 +1,3 @@
 """MORL-Baselines contains various MORL algorithms and utility functions."""
 
-
-__version__ = "1.0.0"
+__version__ = "1.1.0"
diff --git a/morl_baselines/common/buffer.py b/morl_baselines/common/buffer.py
index 58b53397..ee3722b7 100644
--- a/morl_baselines/common/buffer.py
+++ b/morl_baselines/common/buffer.py
@@ -1,4 +1,5 @@
 """Replay buffer for multi-objective reinforcement learning."""
+
 import numpy as np
 import torch as th
 
diff --git a/morl_baselines/common/diverse_buffer.py b/morl_baselines/common/diverse_buffer.py
index a0d19c85..e49330fb 100644
--- a/morl_baselines/common/diverse_buffer.py
+++ b/morl_baselines/common/diverse_buffer.py
@@ -1,4 +1,5 @@
 """Diverse Experience Replay Buffer. Code extracted from https://github.com/axelabels/DynMORL."""
+
 from dataclasses import dataclass
 
 import numpy as np
@@ -154,7 +155,7 @@ def update(self, idx: int, p, tree_id=None):
         Keyword Arguments:
             tree_id {object} -- Tree to be updated (default: {None})
         """
-        if type(p) == dict:
+        if isinstance(p, dict):
             for k in p:
                 self.update(idx, p[k], k)
             return
@@ -476,7 +477,10 @@ def get_data(self, include_indices: bool = False):
         Returns:
             The data
         """
-        all_data = list(np.arange(self.capacity) + self.capacity - 1), list(self.tree.data)
+        all_data = (
+            list(np.arange(self.capacity) + self.capacity - 1),
+            list(self.tree.data),
+        )
         indices = []
         data = []
         for i, d in zip(all_data[0], all_data[1]):
diff --git a/morl_baselines/common/evaluation.py b/morl_baselines/common/evaluation.py
index 79b6eff3..af106c08 100644
--- a/morl_baselines/common/evaluation.py
+++ b/morl_baselines/common/evaluation.py
@@ -1,4 +1,5 @@
 """Utilities related to evaluation."""
+
 import os
 import random
 from typing import List, Optional, Tuple
diff --git a/morl_baselines/common/experiments.py b/morl_baselines/common/experiments.py
index dff6237c..71dda225 100644
--- a/morl_baselines/common/experiments.py
+++ b/morl_baselines/common/experiments.py
@@ -1,4 +1,5 @@
 """Common experiment utilities."""
+
 import argparse
 
 from morl_baselines.multi_policy.capql.capql import CAPQL
diff --git a/morl_baselines/common/model_based/probabilistic_ensemble.py b/morl_baselines/common/model_based/probabilistic_ensemble.py
index db40b5f7..f5333468 100644
--- a/morl_baselines/common/model_based/probabilistic_ensemble.py
+++ b/morl_baselines/common/model_based/probabilistic_ensemble.py
@@ -1,4 +1,5 @@
 """Probabilistic ensemble of neural networks."""
+
 import os
 
 import numpy as np
diff --git a/morl_baselines/common/model_based/tabular_model.py b/morl_baselines/common/model_based/tabular_model.py
index 96664800..903429e7 100644
--- a/morl_baselines/common/model_based/tabular_model.py
+++ b/morl_baselines/common/model_based/tabular_model.py
@@ -1,4 +1,5 @@
 """Tabular dynamics model S_{t+1}, R_t ~ m(.,.|s,a) ."""
+
 import random
 
 import numpy as np
diff --git a/morl_baselines/common/model_based/utils.py b/morl_baselines/common/model_based/utils.py
index 10d58467..1d13fa0d 100644
--- a/morl_baselines/common/model_based/utils.py
+++ b/morl_baselines/common/model_based/utils.py
@@ -1,4 +1,5 @@
 """Utility functions for the model."""
+
 from typing import Tuple
 
 import matplotlib.pyplot as plt
@@ -34,7 +35,7 @@ def termination_fn_dst(obs, act, next_obs):
 
 
 def termination_fn_mountaincar(obs, act, next_obs):
-    """Termination function of mountin car."""
+    """Termination function of mountain car."""
     assert len(obs.shape) == len(next_obs.shape) == len(act.shape) == 2
     position = next_obs[:, 0]
     velocity = next_obs[:, 1]
@@ -147,16 +148,29 @@ def step(
             var_obs = var_obs[0]
             var_rewards = var_rewards[0]
 
-        info = {"uncertainty": uncertainties, "var_obs": var_obs, "var_rewards": var_rewards}
+        info = {
+            "uncertainty": uncertainties,
+            "var_obs": var_obs,
+            "var_rewards": var_rewards,
+        }
 
         # info = {'mean': return_means, 'std': return_stds, 'log_prob': log_prob, 'dev': dev}
         return next_obs, rewards, terminals, info
 
 
 def visualize_eval(
-    agent, env, model=None, w=None, horizon=10, init_obs=None, compound=True, deterministic=False, show=False, filename=None
+    agent,
+    env,
+    model=None,
+    w=None,
+    horizon=10,
+    init_obs=None,
+    compound=True,
+    deterministic=False,
+    show=False,
+    filename=None,
 ):
-    """Generates a plot of the evolution of the state, reward and model predicitions ove time.
+    """Generates a plot of the evolution of the state, reward and model predictions over time.
 
     Args:
         agent: agent to be evaluated
@@ -213,10 +227,16 @@ def visualize_eval(
             acts = F.one_hot(acts, num_classes=env.action_space.n).squeeze(1)
         for step in range(len(real_obs)):
             if compound or step == 0:
-                obs, r, done, info = model_env.step(th.tensor(obs).to(agent.device), acts[step], deterministic=deterministic)
+                obs, r, done, info = model_env.step(
+                    th.tensor(obs).to(agent.device),
+                    acts[step],
+                    deterministic=deterministic,
+                )
             else:
                 obs, r, done, info = model_env.step(
-                    th.tensor(real_obs[step - 1]).to(agent.device), acts[step], deterministic=deterministic
+                    th.tensor(real_obs[step - 1]).to(agent.device),
+                    acts[step],
+                    deterministic=deterministic,
                 )
             model_obs.append(obs.copy())
             model_obs_stds.append(np.sqrt(info["var_obs"].copy()))
@@ -240,11 +260,26 @@ def visualize_eval(
             axs[i].set_ylabel(f"Reward {i - obs_dim}")
             axs[i].grid(alpha=0.25)
             if w is not None:
-                axs[i].plot(x, [real_vec_rewards[step][i - obs_dim] for step in x], label="Environment", color="black")
+                axs[i].plot(
+                    x,
+                    [real_vec_rewards[step][i - obs_dim] for step in x],
+                    label="Environment",
+                    color="black",
+                )
             else:
-                axs[i].plot(x, [real_rewards[step] for step in x], label="Environment", color="black")
+                axs[i].plot(
+                    x,
+                    [real_rewards[step] for step in x],
+                    label="Environment",
+                    color="black",
+                )
             if model is not None:
-                axs[i].plot(x, [model_rewards[step][i - obs_dim] for step in x], label="Model", color="blue")
+                axs[i].plot(
+                    x,
+                    [model_rewards[step][i - obs_dim] for step in x],
+                    label="Model",
+                    color="blue",
+                )
                 axs[i].fill_between(
                     x,
                     [model_rewards[step][i - obs_dim] + model_rewards_stds[step][i - obs_dim] for step in x],
diff --git a/morl_baselines/common/morl_algorithm.py b/morl_baselines/common/morl_algorithm.py
index b7af6cd5..920a5342 100644
--- a/morl_baselines/common/morl_algorithm.py
+++ b/morl_baselines/common/morl_algorithm.py
@@ -1,4 +1,5 @@
 """MORL algorithm base classes."""
+
 import os
 import time
 from abc import ABC, abstractmethod
@@ -11,7 +12,7 @@
 import torch.nn
 import wandb
 from gymnasium import spaces
-from mo_gymnasium.utils import MOSyncVectorEnv
+from mo_gymnasium.wrappers.vector import MOSyncVectorEnv
 
 from morl_baselines.common.evaluation import (
     eval_mo_reward_conditioned,
diff --git a/morl_baselines/common/pareto.py b/morl_baselines/common/pareto.py
index 76a64254..63828bd1 100644
--- a/morl_baselines/common/pareto.py
+++ b/morl_baselines/common/pareto.py
@@ -1,4 +1,5 @@
 """Pareto utilities."""
+
 from copy import deepcopy
 from typing import List, Union
 
diff --git a/morl_baselines/common/performance_indicators.py b/morl_baselines/common/performance_indicators.py
index 3d957f1b..8462dbb3 100644
--- a/morl_baselines/common/performance_indicators.py
+++ b/morl_baselines/common/performance_indicators.py
@@ -2,6 +2,7 @@
 
 We mostly rely on pymoo for the computation of axiomatic indicators (HV and IGD), but some are customly made.
 """
+
 from copy import deepcopy
 from typing import Callable, List
 
diff --git a/morl_baselines/common/prioritized_buffer.py b/morl_baselines/common/prioritized_buffer.py
index 99ba8b84..24662b76 100644
--- a/morl_baselines/common/prioritized_buffer.py
+++ b/morl_baselines/common/prioritized_buffer.py
@@ -2,6 +2,7 @@
 
 Code adapted from https://github.com/sfujim/LAP-PAL
 """
+
 import numpy as np
 import torch as th
 
diff --git a/morl_baselines/common/scalarization.py b/morl_baselines/common/scalarization.py
index 3fd2ffc2..a8a7f7f5 100644
--- a/morl_baselines/common/scalarization.py
+++ b/morl_baselines/common/scalarization.py
@@ -1,4 +1,5 @@
 """Scalarization functions relying on numpy."""
+
 import numpy as np
 from pymoo.decomposition.tchebicheff import Tchebicheff
 
diff --git a/morl_baselines/common/utils.py b/morl_baselines/common/utils.py
index 88157a2e..00c01d8e 100644
--- a/morl_baselines/common/utils.py
+++ b/morl_baselines/common/utils.py
@@ -1,4 +1,5 @@
 """General utils for the MORL baselines."""
+
 import math
 import os
 from typing import Callable, List
diff --git a/morl_baselines/multi_policy/capql/capql.py b/morl_baselines/multi_policy/capql/capql.py
index 54ecd625..1ae46bdc 100644
--- a/morl_baselines/multi_policy/capql/capql.py
+++ b/morl_baselines/multi_policy/capql/capql.py
@@ -1,4 +1,5 @@
 """CAPQL algorithm."""
+
 import os
 import random
 from itertools import chain
diff --git a/morl_baselines/multi_policy/envelope/envelope.py b/morl_baselines/multi_policy/envelope/envelope.py
index 6899c585..c4ad7903 100644
--- a/morl_baselines/multi_policy/envelope/envelope.py
+++ b/morl_baselines/multi_policy/envelope/envelope.py
@@ -1,4 +1,5 @@
 """Envelope Q-Learning implementation."""
+
 import os
 from typing import List, Optional, Union
 from typing_extensions import override
diff --git a/morl_baselines/multi_policy/gpi_pd/gpi_pd.py b/morl_baselines/multi_policy/gpi_pd/gpi_pd.py
index 9d315a85..8903288b 100644
--- a/morl_baselines/multi_policy/gpi_pd/gpi_pd.py
+++ b/morl_baselines/multi_policy/gpi_pd/gpi_pd.py
@@ -1,4 +1,5 @@
 """GPI-PD algorithm."""
+
 import os
 import random
 from itertools import chain
diff --git a/morl_baselines/multi_policy/gpi_pd/gpi_pd_continuous_action.py b/morl_baselines/multi_policy/gpi_pd/gpi_pd_continuous_action.py
index a9e31fbd..37e9b165 100644
--- a/morl_baselines/multi_policy/gpi_pd/gpi_pd_continuous_action.py
+++ b/morl_baselines/multi_policy/gpi_pd/gpi_pd_continuous_action.py
@@ -1,4 +1,5 @@
 """GPI-PD algorithm with continuous actions."""
+
 import os
 import random
 from itertools import chain
diff --git a/morl_baselines/multi_policy/linear_support/linear_support.py b/morl_baselines/multi_policy/linear_support/linear_support.py
index a9d6dda1..12df3f92 100644
--- a/morl_baselines/multi_policy/linear_support/linear_support.py
+++ b/morl_baselines/multi_policy/linear_support/linear_support.py
@@ -1,4 +1,5 @@
 """Linear Support implementation."""
+
 import random
 from copy import deepcopy
 from typing import List, Optional
diff --git a/morl_baselines/multi_policy/morld/morld.py b/morl_baselines/multi_policy/morld/morld.py
index c931efc7..69c0ee3c 100644
--- a/morl_baselines/multi_policy/morld/morld.py
+++ b/morl_baselines/multi_policy/morld/morld.py
@@ -2,6 +2,7 @@
 
 See Felten, Talbi & Danoy (2024): https://arxiv.org/abs/2311.12495.
 """
+
 import math
 import time
 from typing import Callable, List, Optional, Tuple, Union
@@ -10,7 +11,7 @@
 import gymnasium as gym
 import numpy as np
 import torch as th
-from mo_gymnasium import MONormalizeReward
+from mo_gymnasium.wrappers import MONormalizeReward
 from torch import optim
 
 from morl_baselines.common.evaluation import log_all_multi_policy_metrics
diff --git a/morl_baselines/multi_policy/multi_policy_moqlearning/mp_mo_q_learning.py b/morl_baselines/multi_policy/multi_policy_moqlearning/mp_mo_q_learning.py
index 232e9b51..0341e08e 100644
--- a/morl_baselines/multi_policy/multi_policy_moqlearning/mp_mo_q_learning.py
+++ b/morl_baselines/multi_policy/multi_policy_moqlearning/mp_mo_q_learning.py
@@ -1,4 +1,5 @@
 """Outer-loop MOQ-learning algorithm (uses multiple weights)."""
+
 import time
 from copy import deepcopy
 from typing import List, Optional
diff --git a/morl_baselines/multi_policy/pareto_q_learning/pql.py b/morl_baselines/multi_policy/pareto_q_learning/pql.py
index 45077aa5..2b315dd7 100644
--- a/morl_baselines/multi_policy/pareto_q_learning/pql.py
+++ b/morl_baselines/multi_policy/pareto_q_learning/pql.py
@@ -1,4 +1,5 @@
 """Pareto Q-Learning."""
+
 import numbers
 from typing import Callable, List, Optional
 
@@ -60,19 +61,19 @@ def __init__(
         # Algorithm setup
         self.ref_point = ref_point
 
-        if type(self.env.action_space) == gym.spaces.Discrete:
+        if isinstance(self.env.action_space, gym.spaces.Discrete):
             self.num_actions = self.env.action_space.n
-        elif type(self.env.action_space) == gym.spaces.MultiDiscrete:
+        elif isinstance(self.env.action_space, gym.spaces.MultiDiscrete):
             self.num_actions = np.prod(self.env.action_space.nvec)
         else:
             raise Exception("PQL only supports (multi)discrete action spaces.")
 
-        if type(self.env.observation_space) == gym.spaces.Discrete:
+        if isinstance(self.env.observation_space, gym.spaces.Discrete):
             self.env_shape = (self.env.observation_space.n,)
-        elif type(self.env.observation_space) == gym.spaces.MultiDiscrete:
+        elif isinstance(self.env.observation_space, gym.spaces.MultiDiscrete):
             self.env_shape = self.env.observation_space.nvec
         elif (
-            type(self.env.observation_space) == gym.spaces.Box
+            isinstance(self.env.observation_space, gym.spaces.Box)
             and self.env.observation_space.is_bounded(manner="both")
             and issubclass(self.env.observation_space.dtype.type, numbers.Integral)
         ):
@@ -83,7 +84,7 @@ def __init__(
             raise Exception("PQL only supports discretizable observation spaces.")
 
         self.num_states = np.prod(self.env_shape)
-        self.num_objectives = self.env.reward_space.shape[0]
+        self.num_objectives = self.env.unwrapped.reward_space.shape[0]
         self.counts = np.zeros((self.num_states, self.num_actions))
         self.non_dominated = [
             [{tuple(np.zeros(self.num_objectives))} for _ in range(self.num_actions)] for _ in range(self.num_states)
@@ -96,7 +97,11 @@ def __init__(
         self.log = log
 
         if self.log:
-            self.setup_wandb(project_name=self.project_name, experiment_name=self.experiment_name, entity=wandb_entity)
+            self.setup_wandb(
+                project_name=self.project_name,
+                experiment_name=self.experiment_name,
+                entity=wandb_entity,
+            )
 
     def get_config(self) -> dict:
         """Get the configuration dictionary.
diff --git a/morl_baselines/multi_policy/pcn/pcn.py b/morl_baselines/multi_policy/pcn/pcn.py
index 2e380024..48c162b9 100644
--- a/morl_baselines/multi_policy/pcn/pcn.py
+++ b/morl_baselines/multi_policy/pcn/pcn.py
@@ -1,4 +1,5 @@
 """Pareto Conditioned Network. Code adapted from https://github.com/mathieu-reymond/pareto-conditioned-networks ."""
+
 import heapq
 import os
 from abc import ABC
diff --git a/morl_baselines/multi_policy/pgmorl/pgmorl.py b/morl_baselines/multi_policy/pgmorl/pgmorl.py
index 02c77fcf..4ca3aef8 100644
--- a/morl_baselines/multi_policy/pgmorl/pgmorl.py
+++ b/morl_baselines/multi_policy/pgmorl/pgmorl.py
@@ -4,6 +4,7 @@
 (!) Limited to 2 objectives for now.
 (!) The post-processing phase has not been implemented yet.
 """
+
 import time
 from copy import deepcopy
 from typing import List, Optional, Tuple, Union
@@ -420,7 +421,7 @@ def __init__(
                 envs = [make_env(env_id, self.seed + i, i, experiment_name, self.gamma) for i in range(self.num_envs)]
             else:
                 envs = [make_env(env_id, i, i, experiment_name, self.gamma) for i in range(self.num_envs)]
-            self.env = mo_gym.MOSyncVectorEnv(envs)
+            self.env = mo_gym.wrappers.vector.MOSyncVectorEnv(envs)
         else:
             raise ValueError("Environments should be vectorized for PPO. You should provide an environment id instead.")
 
@@ -506,7 +507,9 @@ def get_config(self) -> dict:
 
     def __train_all_agents(self, iteration: int, max_iterations: int):
         for i, agent in enumerate(self.agents):
+            agent.global_step = self.global_step
             agent.train(self.start_time, iteration, max_iterations)
+            self.global_step += self.steps_per_iteration * self.num_envs
 
     def __eval_all_agents(
         self,
@@ -631,7 +634,9 @@ def train(
                 }
             )
         self.num_eval_weights_for_eval = num_eval_weights_for_eval
-        max_iterations = total_timesteps // self.steps_per_iteration // self.num_envs
+        # 1 iteration is a full batch for each agents
+        # -> (steps_per_iteration * num_envs * pop_size)  timesteps per iteration
+        max_iterations = total_timesteps // self.steps_per_iteration // self.num_envs // self.pop_size
         iteration = 0
         # Init
         current_evaluations = [np.zeros(self.reward_dim) for _ in range(len(self.agents))]
@@ -646,7 +651,7 @@ def train(
 
         # Warmup
         for i in range(1, self.warmup_iterations + 1):
-            print(f"Warmup iteration #{iteration}")
+            print(f"Warmup iteration #{iteration}, global step: {self.global_step}")
             if self.log:
                 wandb.log({"charts/warmup_iterations": i, "global_step": self.global_step})
             self.__train_all_agents(iteration=iteration, max_iterations=max_iterations)
diff --git a/morl_baselines/single_policy/esr/eupg.py b/morl_baselines/single_policy/esr/eupg.py
index 9eaa9d89..efd417e1 100644
--- a/morl_baselines/single_policy/esr/eupg.py
+++ b/morl_baselines/single_policy/esr/eupg.py
@@ -1,4 +1,5 @@
 """EUPG is an ESR algorithm based on Policy Gradient (REINFORCE like)."""
+
 import time
 from copy import deepcopy
 from typing import Callable, List, Optional, Union
diff --git a/morl_baselines/single_policy/ser/mo_ppo.py b/morl_baselines/single_policy/ser/mo_ppo.py
index 09385534..6c0cf84a 100644
--- a/morl_baselines/single_policy/ser/mo_ppo.py
+++ b/morl_baselines/single_policy/ser/mo_ppo.py
@@ -1,4 +1,5 @@
 """Multi-Objective PPO Algorithm."""
+
 import time
 from copy import deepcopy
 from typing import List, Optional, Union
@@ -9,7 +10,7 @@
 import numpy as np
 import torch as th
 import wandb
-from mo_gymnasium import MORecordEpisodeStatistics
+from mo_gymnasium.wrappers import MORecordEpisodeStatistics
 from torch import nn, optim
 from torch.distributions import Normal
 
@@ -122,7 +123,7 @@ def thunk():
             env = mo_gym.make(env_id, render_mode="rgb_array")
         else:
             env = mo_gym.make(env_id)
-        reward_dim = env.reward_space.shape[0]
+        reward_dim = env.unwrapped.reward_space.shape[0]
         """ if idx == 0:
             env = gym.wrappers.RecordVideo(
                 env,
@@ -131,10 +132,10 @@ def thunk():
             ) """
         env = gym.wrappers.ClipAction(env)
         env = gym.wrappers.NormalizeObservation(env)
-        env = gym.wrappers.TransformObservation(env, lambda obs: np.clip(obs, -10, 10))
+        env = gym.wrappers.TransformObservation(env, lambda obs: np.clip(obs, -10, 10), env.observation_space)
         for o in range(reward_dim):
-            env = mo_gym.utils.MONormalizeReward(env, idx=o, gamma=gamma)
-            env = mo_gym.utils.MOClipReward(env, idx=o, min_r=-10, max_r=10)
+            env = mo_gym.wrappers.MONormalizeReward(env, idx=o, gamma=gamma)
+            env = mo_gym.wrappers.MOClipReward(env, idx=o, min_r=-10, max_r=10)
         env = MORecordEpisodeStatistics(env, gamma=gamma)
         env.reset(seed=seed)
         env.action_space.seed(seed)
@@ -404,7 +405,7 @@ def __collect_samples(self, obs: th.Tensor, done: th.Tensor):
                 value = value.view(self.num_envs, self.networks.reward_dim)
 
             # Perform action on the environment
-            next_obs, reward, next_terminated, _, info = self.envs.step(action.cpu().numpy())
+            next_obs, reward, next_terminated, next_truncated, info = self.envs.step(action.cpu().numpy())
             reward = th.tensor(reward).to(self.device).view(self.num_envs, self.networks.reward_dim)
             # storing to batch
             self.batch.add(obs, action, logprob, reward, done, value)
@@ -413,16 +414,19 @@ def __collect_samples(self, obs: th.Tensor, done: th.Tensor):
             obs, done = th.Tensor(next_obs).to(self.device), th.Tensor(next_terminated).to(self.device)
 
             # Episode info logging
-            if "episode" in info.keys():
-                for item in info["episode"]:
+            if self.log and "episode" in info.keys():
+                indices = np.where(next_terminated | next_truncated)[0]
+                for idx in indices:
+                    # Reconstructs the dict by extracting the relevant information for each vectorized env
+                    info_log = {k: v[idx] for k, v in info["episode"].items()}
+
                     log_episode_info(
-                        item,
+                        info_log,
                         scalarization=np.dot,
                         weights=self.weights,
                         global_timestep=self.global_step,
                         id=self.id,
                     )
-                    break
 
         return obs, done
 
@@ -603,6 +607,7 @@ def train(self, start_time, current_iteration: int, max_iterations: int):
         # Logging
         print("SPS:", int(self.global_step / (time.time() - start_time)))
         if self.log:
+            print(f"Worker {self.id} - Global step: {self.global_step}")
             wandb.log(
                 {"charts/SPS": int(self.global_step / (time.time() - start_time)), "global_step": self.global_step},
             )
diff --git a/morl_baselines/single_policy/ser/mo_q_learning.py b/morl_baselines/single_policy/ser/mo_q_learning.py
index 5abe72d3..1061fcc2 100644
--- a/morl_baselines/single_policy/ser/mo_q_learning.py
+++ b/morl_baselines/single_policy/ser/mo_q_learning.py
@@ -1,4 +1,5 @@
 """Scalarized Q-learning for single policy multi-objective reinforcement learning."""
+
 import time
 from typing import Optional
 from typing_extensions import override
diff --git a/pyproject.toml b/pyproject.toml
index 362d1037..09a544d8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,8 +21,8 @@ classifiers = [
     'Topic :: Scientific/Engineering :: Artificial Intelligence',
 ]
 dependencies = [
-    "mo-gymnasium >=1.0.1",
-    "gymnasium>=0.28.1,<0.30",
+    "mo-gymnasium >=1.1.0",
+    "gymnasium>=1.0.0",
     "numpy >=1.21.0,<2.0.0",
     "torch >=1.12.0",
     "pygame >=2.1.0",
@@ -40,12 +40,12 @@ dynamic = ["version"]
 [project.optional-dependencies]
 # Update dependencies in `all` if any are added or removed
 # OLS requires pycddlib and libgmp to be installed, which does not work on MacOS for now.
-ols = ["pycddlib"]
-gpi = ["pycddlib"]
+ols = ["pycddlib==2.1.6"]
+gpi = ["pycddlib==2.1.6"]
 
 all = [
     # OLS & GPI
-    "pycddlib",
+    "pycddlib==2.1.6",
 ]
 testing = ["pytest ==7.1.3"]
 
@@ -66,7 +66,6 @@ morl_baselines = ["*.json", "assets/*"]
 
 # Linting, testing, ... ########################################################
 [tool.black]
-safe = true
 line-length = 127
 target-version = ['py38', 'py39', 'py310']
 include = '\.pyi?$'
diff --git a/tests/test_algos.py b/tests/test_algos.py
index d64174e7..584b704c 100644
--- a/tests/test_algos.py
+++ b/tests/test_algos.py
@@ -1,4 +1,5 @@
 """Mostly tests to make sure the algorithms are able to run."""
+
 import time
 
 import mo_gymnasium as mo_gym