duyminh1998 · duyminh1998 · Dec 12, 2023 · Dec 2, 2023 · Dec 2, 2023 · Dec 2, 2023
diff --git a/README.md b/README.md
@@ -12,7 +12,7 @@ Read about the project in detail [here](https://minhhua.com/pycmo/).
 
 # Quick Start Guide
 ## Get PyCMO
-1. Make sure the following settings are enabled in your Command Modern Operations' configurations (in `CPE.ini`):  
+1. (Non-Steam, Premium edition only) Make sure the following settings are enabled in your Command Modern Operations' configurations (in `CPE.ini`):  
 ```
 [Lua]
 EnableSocket = 1
@@ -22,13 +22,15 @@ EncodingMode = 8
 ```
 2. Click on "Clone or download", and then "Download Zip". 
 3. Unzip the repo anywhere.
-4. Edit the project's `pycmo/configs/config_template.py` file to fit your system's paths, then rename it as `pycmo/configs/config.py` (IMPORTANT). You only need to edit the lines 8 - 10:
+4. Edit the project's `pycmo/configs/config_template.py` file to fit your system's paths, then rename it as `pycmo/configs/config.py` (IMPORTANT). You only need to edit the lines 8 - 11:
 ```python
 pycmo_path = os.path.join("path/to", "pycmo")
 cmo_path = os.path.join("path/to/steam/installation/of", "Command - Modern Operations")
-command_mo_version = "Command v1.06 - Build 1328.11"
+command_mo_version = "Command v1.06 - Build 1328.12"
+use_gymnasium = False
 ```
-5. Navigate to the folder than contains `setup.py` and install the repository using `pip install .` Anytime you make changes to the files in the project folder, you need to reinstall the package using `pip install .`. Alternatively, use `pip install -e .` to install the package in editable mode. After doing this you can change the code without needing to continue to install it.
+5. Navigate to the folder than contains `setup.py` and install the repository using `pip install .` Anytime you make changes to the files in the project folder, you need to reinstall the package using `pip install .`. Alternatively, use `pip install -e .` to install the package in editable mode. After doing this you can change the code without needing to continue to install it. 
+6. From PyCMO v1.4.0, [gymnasium](https://gymnasium.farama.org/) became an optional dependency for users who want to use PyCMO as a Gym environment. In this case, use `pip install .[gym]` or `pip install -e .[gym]` for setup. Remember to set `use_gymnasium = True` in the `pycmo/configs/config.py` file.
 
 ## Run an agent (Steam edition only)
 1. Load the provided demo scenario `scen/steam_demo.scen` in the game.

diff --git a/pycmo/__init__.py b/pycmo/__init__.py
@@ -0,0 +1,12 @@
+from pycmo.configs.config import get_config
+
+# open config and set important files and folder paths
+config = get_config()
+
+if config["gymnasium"]:
+     from gymnasium.envs.registration import register
+
+     register(
+          id="FloridistanPycmoGymEnv-v0",
+          entry_point="pycmo.env.cmo_gym_env:FloridistanPycmoGymEnv",
+     )
diff --git a/pycmo/configs/config_template.py b/pycmo/configs/config_template.py
@@ -7,7 +7,8 @@
 def get_config():
     pycmo_path = os.path.join("path/to", "pycmo")
     cmo_path = os.path.join("path/to/steam/installation/of", "Command - Modern Operations")
-    command_mo_version = "Command v1.06 - Build 1328.11"
+    command_mo_version = "Command v1.06 - Build 1328.12"
+    use_gymnasium = False
 
     return {
     "command_path": cmo_path,
@@ -17,6 +18,7 @@ def get_config():
     "scen_ended": os.path.join(pycmo_path, "pycmo", "configs", "scen_has_ended.txt"),
     "pickle_path": os.path.join(pycmo_path, "pickle"),
     "scripts_path": os.path.join(pycmo_path, "scripts"),
-    "command_mo_version": command_mo_version
+    "command_mo_version": command_mo_version,
+    "gymnasium": use_gymnasium,
     # "command_cli_output_path": "C:\\ProgramData\\Command Professional Edition 2\\Analysis_Int", # only applicable to Premium version so we update this later
     }
diff --git a/pycmo/env/cmo_env.py b/pycmo/env/cmo_env.py
@@ -263,8 +263,11 @@ def __init__(self,
         with open(self.scen_ended, 'w') as file:
             file.writelines(data)
 
-    def reset(self) -> TimeStep:
+    def reset(self, close_scenario_end_and_player_eval_messages:bool=False) -> TimeStep:
         try:
+            if close_scenario_end_and_player_eval_messages:
+                self.client.close_scenario_end_and_player_eval_messages()
+
             restart_result = self.client.restart_scenario()
 
             # check that the scenario loaded event has fired correctly in CMO, and if not, restart the scenario
@@ -346,7 +349,7 @@ def get_obs(self) -> FeaturesFromSteam:
                 if get_obs_retries > max_get_obs_retries:
                     raise TimeoutError("CMOEnv unable to get observation.")
 
-    def action_spec(self, observation:Features) -> AvailableFunctions:    
+    def action_spec(self, observation:Features | FeaturesFromSteam) -> AvailableFunctions:    
         return AvailableFunctions(observation)
 
     def check_game_ended(self) -> bool:
@@ -365,4 +368,3 @@ def end_game(self) -> TimeStep:
         export_observation_event_name = 'Export observation'
         action = f"ScenEdit_RunScript('{pycmo_lua_lib_path}', true)\nteardown_and_end_scenario('{export_observation_event_name}', true)"
         return self.step(action)
-
diff --git a/pycmo/env/cmo_gym_env.py b/pycmo/env/cmo_gym_env.py
@@ -0,0 +1,123 @@
+from typing import Tuple
+import gymnasium as gym
+from gymnasium import spaces
+import numpy as np
+
+from pycmo.lib.features import FeaturesFromSteam
+from pycmo.env.cmo_env import CMOEnv, StepType
+from pycmo.lib.protocol import SteamClientProps
+
+class BasePycmoGymEnv(gym.Env):
+    metadata = {"render_modes": [None]}
+
+    def __init__(
+            self,
+            player_side: str,
+            steam_client_props:SteamClientProps,
+            observation_path: str, 
+            action_path: str,
+            scen_ended_path: str,
+            pycmo_lua_lib_path: str | None = None,
+            max_resets: int = 20,
+            render_mode=None,
+    ):
+        self.cmo_env = CMOEnv(
+            player_side=player_side,
+            steam_client_props=steam_client_props,
+            observation_path=observation_path,
+            action_path=action_path,
+            scen_ended_path=scen_ended_path,
+            pycmo_lua_lib_path=pycmo_lua_lib_path,
+            max_resets=max_resets
+        )
+
+        assert render_mode is None or render_mode in self.metadata["render_modes"]
+        self.render_mode = render_mode
+
+    def _get_obs(self, observation:FeaturesFromSteam) -> dict:
+        ...
+
+    def _get_info(self) -> dict:
+        ...
+
+    def reset(self, seed:int=None, options:dict=None) -> Tuple[dict, dict]:
+        state = self.cmo_env.reset(close_scenario_end_and_player_eval_messages=options['close_scenario_end_and_player_eval_messages'])
+        observation = self._get_obs(observation=state.observation)
+        info = self._get_info()
+
+        return observation, info
+
+    def step(self, action) -> Tuple[dict, int, bool, bool, dict]:
+        state = self.cmo_env.step(action)
+        terminated = self.cmo_env.check_game_ended() or state.step_type == StepType(2)
+        truncated = False
+        reward = state.reward
+        observation = self._get_obs(observation=state.observation)
+        info = self._get_info()
+
+        return observation, reward, terminated, truncated, info
+
+    def close(self) -> None:
+        self.cmo_env.end_game()
+
+class FloridistanPycmoGymEnv(BasePycmoGymEnv):
+    def __init__(
+            self,
+            observation_space:spaces.Space,
+            action_space:spaces.Space,
+            player_side: str,
+            steam_client_props:SteamClientProps,
+            observation_path: str, 
+            action_path: str,
+            scen_ended_path: str,
+            pycmo_lua_lib_path: str | None = None,
+            max_resets: int = 20,
+            render_mode=None,
+    ):
+        super().__init__(
+            player_side=player_side,
+            steam_client_props=steam_client_props,
+            observation_path=observation_path,
+            action_path=action_path,
+            scen_ended_path=scen_ended_path,
+            pycmo_lua_lib_path=pycmo_lua_lib_path,
+            max_resets=max_resets,
+            render_mode=render_mode
+        )
+
+        self.observation_space = observation_space
+        self.action_space = action_space
+
+    def _get_obs(self, observation:FeaturesFromSteam) -> dict:
+        _observation = {}
+
+        unit_name = "Thunder #1"
+        for unit in observation.units:
+            if unit.Name == unit_name:
+                break
+        _observation[unit_name] = {}
+
+        for key in self.observation_space[unit_name].keys():
+            obs_value = getattr(unit, key)
+            if isinstance(obs_value, float):
+                _observation[unit_name][key] = np.array((obs_value,), dtype=np.float64)
+            else:
+                _observation[unit_name][key] = obs_value        
+
+        contact_name = "BTR-82V"
+        for contact in observation.contacts:
+            if contact.Name == contact_name:
+                break
+        _observation[contact_name] = {}
+
+        for key in self.observation_space[contact_name].keys():
+            obs_value = getattr(contact, key)
+            if isinstance(obs_value, float):
+                _observation[contact_name][key] = np.array((obs_value,), dtype=np.float64)
+            else:
+                _observation[contact_name][key] = obs_value
+
+        return _observation        
+
+    def _get_info(self) -> dict:
+        return {}
diff --git a/pycmo/lib/run_loop.py b/pycmo/lib/run_loop.py
@@ -85,7 +85,7 @@ def run_loop_steam(env: CPEEnv | CMOEnv,
                    agent:BaseAgent=None, 
                    max_steps=None) -> None:       
     # start the game
-    state = env.reset()
+    state = env.reset(close_scenario_end_and_player_eval_messages=False)
     action = ""
 
     # Configure a limit for the maximum number of steps
@@ -108,8 +108,7 @@ def run_loop_steam(env: CPEEnv | CMOEnv,
 
         if state.step_type == StepType(2) or env.check_game_ended():
             print_env_information(state.step_id, parse_utc(int(state.observation.meta.Time)), action, state.reward, state.reward)
-            env.client.close_scenario_end_and_player_eval_messages()
-            state = env.reset()
+            state = env.reset(close_scenario_end_and_player_eval_messages=True)
             action = ''
             agent.reset()
 

diff --git a/pycmo/lib/spaces.py b/pycmo/lib/spaces.py
@@ -0,0 +1,87 @@
+from gymnasium import spaces
+import numpy as np
+
+from pycmo.lib.features import Mount, Loadout
+
+# CONSTANTS
+pycmo_text_max_length = 2000
+pycmo_max_int = 2 ** 62
+pycmo_max_float = float(2 ** 62)
+text_charset = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ~`!@#$%^&*()-_=+[]{{}}\\|;:'\",./<>?"
+
+def get_weapon_space() -> spaces.Dict:
+    weapon_space = spaces.Dict(
+        {
+            "ID": spaces.Text(max_length=pycmo_text_max_length, charset=text_charset),
+            "WeaponID": spaces.Box(0, pycmo_max_int, dtype=np.int64),
+            "QuantRemaining": spaces.Box(0, pycmo_max_int, dtype=np.int64),
+            "MaxQuant": spaces.Box(0, pycmo_max_int, dtype=np.int64),
+        }
+    )
+    return weapon_space
+
+def get_contact_space() -> spaces.Dict:
+    contact_space = spaces.Dict(
+        {
+            "ID": spaces.Text(max_length=pycmo_text_max_length, charset=text_charset),
+            "Name": spaces.Text(max_length=pycmo_text_max_length, charset=text_charset),
+            "CS": spaces.Box(0.0, pycmo_max_float, dtype=np.float64),
+            "CA": spaces.Box(-pycmo_max_float, pycmo_max_float, dtype=np.float64),
+            "Lon": spaces.Box(-180.0, 180.0, dtype=np.float64),
+            "Lat": spaces.Box(-90.0, 90.0, dtype=np.float64),
+        }
+    )
+    return contact_space
+
+def get_mount_space(num_weapons:int=0) -> spaces.Dict:
+    mount_dict =  {
+        "ID": spaces.Text(max_length=pycmo_text_max_length, charset=text_charset),
+        "Name": spaces.Text(max_length=pycmo_text_max_length, charset=text_charset),
+        "DBID": spaces.Box(0, pycmo_max_int, dtype=np.int64),
+    }
+    if num_weapons > 0:
+        mount_dict["Weapons"] = spaces.Tuple([get_weapon_space() for _ in range(num_weapons)])
+    mount_space = spaces.Dict(mount_dict)
+    return mount_space
+
+def get_loadout_space(num_weapons:int=0) -> spaces.Dict:
+    loadout_dict =  {
+        "ID": spaces.Box(0, pycmo_max_int, dtype=np.int64),
+        "Name": spaces.Text(max_length=pycmo_text_max_length, charset=text_charset),
+        "DBID": spaces.Box(0, pycmo_max_int, dtype=np.int64),
+    }
+    if num_weapons > 0:
+        loadout_dict["Weapons"] = spaces.Tuple([get_weapon_space() for _ in range(num_weapons)])
+    loadout_space = spaces.Dict(loadout_dict)
+    return loadout_space
+
+def get_unit_space() -> spaces.Dict:
+    unit_dict = spaces.Dict(
+        {
+            "ID": spaces.Text(max_length=pycmo_text_max_length, charset=text_charset),
+            "Name": spaces.Text(max_length=pycmo_text_max_length, charset=text_charset),
+            "Side": spaces.Text(max_length=pycmo_text_max_length, charset=text_charset),
+            "Type": spaces.Text(max_length=pycmo_text_max_length),
+            "CH": spaces.Box(0.0, 360.0, dtype=np.float64),
+            "CS": spaces.Box(0.0, pycmo_max_float, dtype=np.float64),
+            "CA": spaces.Box(-pycmo_max_float, pycmo_max_float, dtype=np.float64),
+            "Lon": spaces.Box(-180.0, 180.0, dtype=np.float64),
+            "Lat": spaces.Box(-90.0, 90.0, dtype=np.float64),
+            "CurrentFuel": spaces.Box(0.0, pycmo_max_float, dtype=np.float64),
+            "MaxFuel": spaces.Box(0.0, pycmo_max_float, dtype=np.float64),
+        }
+    )
+    unit_space = spaces.Dict(unit_dict)
+    return unit_space
+
+def add_mount_space_to_unit_space(unit_space:spaces.Dict, mounts:list[Mount]) -> spaces.Dict:
+    if len(mounts) > 0:
+        mounts_dict = {}
+        for mount in mounts:
+            mounts_dict[str(mount.ID)] = get_mount_space(num_weapons=len(mount.Weapons))
+        unit_space["Mounts"] = spaces.Dict(mounts_dict)
+    return unit_space
+
+def add_loadout_space_to_unit_space(unit_space:spaces.Dict, loadout:Loadout) -> spaces.Dict:
+    unit_space["Loadout"] = get_loadout_space(num_weapons=len(loadout.Weapons))
+    return unit_space
diff --git a/scripts/floridistan/gym_demo.py b/scripts/floridistan/gym_demo.py
@@ -0,0 +1,60 @@
+import os
+import gymnasium
+from gymnasium import spaces
+import logging
+logging.basicConfig(level=logging.INFO)
+
+from sample_agent import ScriptedGymAgent
+
+from pycmo.configs.config import get_config
+from pycmo.lib.protocol import SteamClientProps
+from pycmo.lib.spaces import get_unit_space, get_contact_space, pycmo_text_max_length, text_charset
+
+# open config and set important files and folder paths
+config = get_config()
+
+scenario_name = "floridistan"
+player_side = "BLUE"
+scenario_script_folder_name = "floridistan" # name of folder containing the lua script that the agent will use
+
+command_version = config["command_mo_version"]
+observation_path = os.path.join(config['steam_observation_folder_path'], f'{scenario_name}.inst')
+action_path = os.path.join(config["scripts_path"], scenario_script_folder_name, "agent_action.lua")
+scen_ended_path = os.path.join(config['steam_observation_folder_path'], f'{scenario_name}_scen_has_ended.inst')
+steam_client_props = SteamClientProps(scenario_name=scenario_name, agent_action_filename=action_path, command_version=command_version)
+
+observation_space = spaces.Dict(
+    {
+        "Thunder #1" : get_unit_space(),
+        "BTR-82V" : get_contact_space()
+    }
+)
+action_space = spaces.Text(max_length = pycmo_text_max_length, charset = text_charset)
+
+env = gymnasium.make('FloridistanPycmoGymEnv-v0',
+    observation_space=observation_space,
+    action_space=action_space,
+    player_side=player_side,
+    steam_client_props=steam_client_props,
+    observation_path=observation_path,
+    action_path=action_path,
+    scen_ended_path=scen_ended_path,                     
+)
+
+attacker_name = "Thunder #1"
+target_name = "BTR-82V"
+strike_weapon_name = "GBU-53/B StormBreaker"
+
+agent = ScriptedGymAgent(player_side=player_side, attacker_name=attacker_name, target_name=target_name, strike_weapon_name=strike_weapon_name)
+
+observation, info = env.reset(seed=42, options={'close_scenario_end_and_player_eval_messages': False})
+for _ in range(282):
+    action = agent.action(observation)
+    observation, reward, terminated, truncated, info = env.step(action=action)
+
+    if terminated or truncated:
+        observation, info = env.reset(options={'close_scenario_end_and_player_eval_messages': True})
+        action = ''
+        agent.reset()
+
+env.close()
diff --git a/scripts/floridistan/restart_demo.py b/scripts/floridistan/restart_demo.py
@@ -42,7 +42,7 @@
 agent = ScriptedAgent(player_side=player_side, attacker_name=attacker_name, target_name=target_name, strike_weapon_name=strike_weapon_name)
 
 # start the game
-state = env.reset()
+state = env.reset(close_scenario_end_and_player_eval_messages=False)
 action = ''
 
 stop_at_step = 200
@@ -70,7 +70,6 @@
 
     if state.step_type == StepType(2) or env.check_game_ended():
         print_env_information(state.step_id, parse_utc(int(state.observation.meta.Time)), action, state.reward, state.reward)
-        env.client.close_scenario_end_and_player_eval_messages()
-        state = env.reset()
+        state = env.reset(close_scenario_end_and_player_eval_messages=True)
         action = ''
         agent.reset()