Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

21 turn pycmo into an openai gym environment #55

Merged
merged 20 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Read about the project in detail [here](https://minhhua.com/pycmo/).

# Quick Start Guide
## Get PyCMO
1. Make sure the following settings are enabled in your Command Modern Operations' configurations (in `CPE.ini`):
1. (Non-Steam, Premium edition only) Make sure the following settings are enabled in your Command Modern Operations' configurations (in `CPE.ini`):
```
[Lua]
EnableSocket = 1
Expand All @@ -22,13 +22,15 @@ EncodingMode = 8
```
2. Click on "Clone or download", and then "Download Zip".
3. Unzip the repo anywhere.
4. Edit the project's `pycmo/configs/config_template.py` file to fit your system's paths, then rename it as `pycmo/configs/config.py` (IMPORTANT). You only need to edit the lines 8 - 10:
4. Edit the project's `pycmo/configs/config_template.py` file to fit your system's paths, then rename it as `pycmo/configs/config.py` (IMPORTANT). You only need to edit the lines 8 - 11:
```python
pycmo_path = os.path.join("path/to", "pycmo")
cmo_path = os.path.join("path/to/steam/installation/of", "Command - Modern Operations")
command_mo_version = "Command v1.06 - Build 1328.11"
command_mo_version = "Command v1.06 - Build 1328.12"
use_gymnasium = False
```
5. Navigate to the folder than contains `setup.py` and install the repository using `pip install .` Anytime you make changes to the files in the project folder, you need to reinstall the package using `pip install .`. Alternatively, use `pip install -e .` to install the package in editable mode. After doing this you can change the code without needing to continue to install it.
5. Navigate to the folder than contains `setup.py` and install the repository using `pip install .` Anytime you make changes to the files in the project folder, you need to reinstall the package using `pip install .`. Alternatively, use `pip install -e .` to install the package in editable mode. After doing this you can change the code without needing to continue to install it.
6. From PyCMO v1.4.0, [gymnasium](https://gymnasium.farama.org/) became an optional dependency for users who want to use PyCMO as a Gym environment. In this case, use `pip install .[gym]` or `pip install -e .[gym]` for setup. Remember to set `use_gymnasium = True` in the `pycmo/configs/config.py` file.

## Run an agent (Steam edition only)
1. Load the provided demo scenario `scen/steam_demo.scen` in the game.
Expand Down
12 changes: 12 additions & 0 deletions pycmo/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from pycmo.configs.config import get_config

# open config and set important files and folder paths
config = get_config()

if config["gymnasium"]:
from gymnasium.envs.registration import register

register(
id="FloridistanPycmoGymEnv-v0",
entry_point="pycmo.env.cmo_gym_env:FloridistanPycmoGymEnv",
)
6 changes: 4 additions & 2 deletions pycmo/configs/config_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
def get_config():
pycmo_path = os.path.join("path/to", "pycmo")
cmo_path = os.path.join("path/to/steam/installation/of", "Command - Modern Operations")
command_mo_version = "Command v1.06 - Build 1328.11"
command_mo_version = "Command v1.06 - Build 1328.12"
use_gymnasium = False

return {
"command_path": cmo_path,
Expand All @@ -17,6 +18,7 @@ def get_config():
"scen_ended": os.path.join(pycmo_path, "pycmo", "configs", "scen_has_ended.txt"),
"pickle_path": os.path.join(pycmo_path, "pickle"),
"scripts_path": os.path.join(pycmo_path, "scripts"),
"command_mo_version": command_mo_version
"command_mo_version": command_mo_version,
"gymnasium": use_gymnasium,
# "command_cli_output_path": "C:\\ProgramData\\Command Professional Edition 2\\Analysis_Int", # only applicable to Premium version so we update this later
}
8 changes: 5 additions & 3 deletions pycmo/env/cmo_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,8 +263,11 @@ def __init__(self,
with open(self.scen_ended, 'w') as file:
file.writelines(data)

def reset(self) -> TimeStep:
def reset(self, close_scenario_end_and_player_eval_messages:bool=False) -> TimeStep:
try:
if close_scenario_end_and_player_eval_messages:
self.client.close_scenario_end_and_player_eval_messages()

restart_result = self.client.restart_scenario()

# check that the scenario loaded event has fired correctly in CMO, and if not, restart the scenario
Expand Down Expand Up @@ -346,7 +349,7 @@ def get_obs(self) -> FeaturesFromSteam:
if get_obs_retries > max_get_obs_retries:
raise TimeoutError("CMOEnv unable to get observation.")

def action_spec(self, observation:Features) -> AvailableFunctions:
def action_spec(self, observation:Features | FeaturesFromSteam) -> AvailableFunctions:
return AvailableFunctions(observation)

def check_game_ended(self) -> bool:
Expand All @@ -365,4 +368,3 @@ def end_game(self) -> TimeStep:
export_observation_event_name = 'Export observation'
action = f"ScenEdit_RunScript('{pycmo_lua_lib_path}', true)\nteardown_and_end_scenario('{export_observation_event_name}', true)"
return self.step(action)

123 changes: 123 additions & 0 deletions pycmo/env/cmo_gym_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
from typing import Tuple
import gymnasium as gym
from gymnasium import spaces
import numpy as np

from pycmo.lib.features import FeaturesFromSteam
from pycmo.env.cmo_env import CMOEnv, StepType
from pycmo.lib.protocol import SteamClientProps

class BasePycmoGymEnv(gym.Env):
metadata = {"render_modes": [None]}

def __init__(
self,
player_side: str,
steam_client_props:SteamClientProps,
observation_path: str,
action_path: str,
scen_ended_path: str,
pycmo_lua_lib_path: str | None = None,
max_resets: int = 20,
render_mode=None,
):
self.cmo_env = CMOEnv(
player_side=player_side,
steam_client_props=steam_client_props,
observation_path=observation_path,
action_path=action_path,
scen_ended_path=scen_ended_path,
pycmo_lua_lib_path=pycmo_lua_lib_path,
max_resets=max_resets
)

assert render_mode is None or render_mode in self.metadata["render_modes"]
self.render_mode = render_mode

def _get_obs(self, observation:FeaturesFromSteam) -> dict:
...

def _get_info(self) -> dict:
...

def reset(self, seed:int=None, options:dict=None) -> Tuple[dict, dict]:
state = self.cmo_env.reset(close_scenario_end_and_player_eval_messages=options['close_scenario_end_and_player_eval_messages'])
observation = self._get_obs(observation=state.observation)
info = self._get_info()

return observation, info

def step(self, action) -> Tuple[dict, int, bool, bool, dict]:
state = self.cmo_env.step(action)
terminated = self.cmo_env.check_game_ended() or state.step_type == StepType(2)
truncated = False
reward = state.reward
observation = self._get_obs(observation=state.observation)
info = self._get_info()

return observation, reward, terminated, truncated, info

def close(self) -> None:
self.cmo_env.end_game()

class FloridistanPycmoGymEnv(BasePycmoGymEnv):
def __init__(
self,
observation_space:spaces.Space,
action_space:spaces.Space,
player_side: str,
steam_client_props:SteamClientProps,
observation_path: str,
action_path: str,
scen_ended_path: str,
pycmo_lua_lib_path: str | None = None,
max_resets: int = 20,
render_mode=None,
):
super().__init__(
player_side=player_side,
steam_client_props=steam_client_props,
observation_path=observation_path,
action_path=action_path,
scen_ended_path=scen_ended_path,
pycmo_lua_lib_path=pycmo_lua_lib_path,
max_resets=max_resets,
render_mode=render_mode
)

self.observation_space = observation_space
self.action_space = action_space

def _get_obs(self, observation:FeaturesFromSteam) -> dict:
_observation = {}

unit_name = "Thunder #1"
for unit in observation.units:
if unit.Name == unit_name:
break
_observation[unit_name] = {}

for key in self.observation_space[unit_name].keys():
obs_value = getattr(unit, key)
if isinstance(obs_value, float):
_observation[unit_name][key] = np.array((obs_value,), dtype=np.float64)
else:
_observation[unit_name][key] = obs_value

contact_name = "BTR-82V"
for contact in observation.contacts:
if contact.Name == contact_name:
break
_observation[contact_name] = {}

for key in self.observation_space[contact_name].keys():
obs_value = getattr(contact, key)
if isinstance(obs_value, float):
_observation[contact_name][key] = np.array((obs_value,), dtype=np.float64)
else:
_observation[contact_name][key] = obs_value

return _observation

def _get_info(self) -> dict:
return {}
5 changes: 2 additions & 3 deletions pycmo/lib/run_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def run_loop_steam(env: CPEEnv | CMOEnv,
agent:BaseAgent=None,
max_steps=None) -> None:
# start the game
state = env.reset()
state = env.reset(close_scenario_end_and_player_eval_messages=False)
action = ""

# Configure a limit for the maximum number of steps
Expand All @@ -108,8 +108,7 @@ def run_loop_steam(env: CPEEnv | CMOEnv,

if state.step_type == StepType(2) or env.check_game_ended():
print_env_information(state.step_id, parse_utc(int(state.observation.meta.Time)), action, state.reward, state.reward)
env.client.close_scenario_end_and_player_eval_messages()
state = env.reset()
state = env.reset(close_scenario_end_and_player_eval_messages=True)
action = ''
agent.reset()

Expand Down
87 changes: 87 additions & 0 deletions pycmo/lib/spaces.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from gymnasium import spaces
import numpy as np

from pycmo.lib.features import Mount, Loadout

# CONSTANTS
pycmo_text_max_length = 2000
pycmo_max_int = 2 ** 62
pycmo_max_float = float(2 ** 62)
text_charset = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ~`!@#$%^&*()-_=+[]{{}}\\|;:'\",./<>?"

def get_weapon_space() -> spaces.Dict:
weapon_space = spaces.Dict(
{
"ID": spaces.Text(max_length=pycmo_text_max_length, charset=text_charset),
"WeaponID": spaces.Box(0, pycmo_max_int, dtype=np.int64),
"QuantRemaining": spaces.Box(0, pycmo_max_int, dtype=np.int64),
"MaxQuant": spaces.Box(0, pycmo_max_int, dtype=np.int64),
}
)
return weapon_space

def get_contact_space() -> spaces.Dict:
contact_space = spaces.Dict(
{
"ID": spaces.Text(max_length=pycmo_text_max_length, charset=text_charset),
"Name": spaces.Text(max_length=pycmo_text_max_length, charset=text_charset),
"CS": spaces.Box(0.0, pycmo_max_float, dtype=np.float64),
"CA": spaces.Box(-pycmo_max_float, pycmo_max_float, dtype=np.float64),
"Lon": spaces.Box(-180.0, 180.0, dtype=np.float64),
"Lat": spaces.Box(-90.0, 90.0, dtype=np.float64),
}
)
return contact_space

def get_mount_space(num_weapons:int=0) -> spaces.Dict:
mount_dict = {
"ID": spaces.Text(max_length=pycmo_text_max_length, charset=text_charset),
"Name": spaces.Text(max_length=pycmo_text_max_length, charset=text_charset),
"DBID": spaces.Box(0, pycmo_max_int, dtype=np.int64),
}
if num_weapons > 0:
mount_dict["Weapons"] = spaces.Tuple([get_weapon_space() for _ in range(num_weapons)])
mount_space = spaces.Dict(mount_dict)
return mount_space

def get_loadout_space(num_weapons:int=0) -> spaces.Dict:
loadout_dict = {
"ID": spaces.Box(0, pycmo_max_int, dtype=np.int64),
"Name": spaces.Text(max_length=pycmo_text_max_length, charset=text_charset),
"DBID": spaces.Box(0, pycmo_max_int, dtype=np.int64),
}
if num_weapons > 0:
loadout_dict["Weapons"] = spaces.Tuple([get_weapon_space() for _ in range(num_weapons)])
loadout_space = spaces.Dict(loadout_dict)
return loadout_space

def get_unit_space() -> spaces.Dict:
unit_dict = spaces.Dict(
{
"ID": spaces.Text(max_length=pycmo_text_max_length, charset=text_charset),
"Name": spaces.Text(max_length=pycmo_text_max_length, charset=text_charset),
"Side": spaces.Text(max_length=pycmo_text_max_length, charset=text_charset),
"Type": spaces.Text(max_length=pycmo_text_max_length),
"CH": spaces.Box(0.0, 360.0, dtype=np.float64),
"CS": spaces.Box(0.0, pycmo_max_float, dtype=np.float64),
"CA": spaces.Box(-pycmo_max_float, pycmo_max_float, dtype=np.float64),
"Lon": spaces.Box(-180.0, 180.0, dtype=np.float64),
"Lat": spaces.Box(-90.0, 90.0, dtype=np.float64),
"CurrentFuel": spaces.Box(0.0, pycmo_max_float, dtype=np.float64),
"MaxFuel": spaces.Box(0.0, pycmo_max_float, dtype=np.float64),
}
)
unit_space = spaces.Dict(unit_dict)
return unit_space

def add_mount_space_to_unit_space(unit_space:spaces.Dict, mounts:list[Mount]) -> spaces.Dict:
if len(mounts) > 0:
mounts_dict = {}
for mount in mounts:
mounts_dict[str(mount.ID)] = get_mount_space(num_weapons=len(mount.Weapons))
unit_space["Mounts"] = spaces.Dict(mounts_dict)
return unit_space

def add_loadout_space_to_unit_space(unit_space:spaces.Dict, loadout:Loadout) -> spaces.Dict:
unit_space["Loadout"] = get_loadout_space(num_weapons=len(loadout.Weapons))
return unit_space
60 changes: 60 additions & 0 deletions scripts/floridistan/gym_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import os
import gymnasium
from gymnasium import spaces
import logging
logging.basicConfig(level=logging.INFO)

from sample_agent import ScriptedGymAgent

from pycmo.configs.config import get_config
from pycmo.lib.protocol import SteamClientProps
from pycmo.lib.spaces import get_unit_space, get_contact_space, pycmo_text_max_length, text_charset

# open config and set important files and folder paths
config = get_config()

scenario_name = "floridistan"
player_side = "BLUE"
scenario_script_folder_name = "floridistan" # name of folder containing the lua script that the agent will use

command_version = config["command_mo_version"]
observation_path = os.path.join(config['steam_observation_folder_path'], f'{scenario_name}.inst')
action_path = os.path.join(config["scripts_path"], scenario_script_folder_name, "agent_action.lua")
scen_ended_path = os.path.join(config['steam_observation_folder_path'], f'{scenario_name}_scen_has_ended.inst')
steam_client_props = SteamClientProps(scenario_name=scenario_name, agent_action_filename=action_path, command_version=command_version)

observation_space = spaces.Dict(
{
"Thunder #1" : get_unit_space(),
"BTR-82V" : get_contact_space()
}
)
action_space = spaces.Text(max_length = pycmo_text_max_length, charset = text_charset)

env = gymnasium.make('FloridistanPycmoGymEnv-v0',
observation_space=observation_space,
action_space=action_space,
player_side=player_side,
steam_client_props=steam_client_props,
observation_path=observation_path,
action_path=action_path,
scen_ended_path=scen_ended_path,
)

attacker_name = "Thunder #1"
target_name = "BTR-82V"
strike_weapon_name = "GBU-53/B StormBreaker"

agent = ScriptedGymAgent(player_side=player_side, attacker_name=attacker_name, target_name=target_name, strike_weapon_name=strike_weapon_name)

observation, info = env.reset(seed=42, options={'close_scenario_end_and_player_eval_messages': False})
for _ in range(282):
action = agent.action(observation)
observation, reward, terminated, truncated, info = env.step(action=action)

if terminated or truncated:
observation, info = env.reset(options={'close_scenario_end_and_player_eval_messages': True})
action = ''
agent.reset()

env.close()
5 changes: 2 additions & 3 deletions scripts/floridistan/restart_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
agent = ScriptedAgent(player_side=player_side, attacker_name=attacker_name, target_name=target_name, strike_weapon_name=strike_weapon_name)

# start the game
state = env.reset()
state = env.reset(close_scenario_end_and_player_eval_messages=False)
action = ''

stop_at_step = 200
Expand Down Expand Up @@ -70,7 +70,6 @@

if state.step_type == StepType(2) or env.check_game_ended():
print_env_information(state.step_id, parse_utc(int(state.observation.meta.Time)), action, state.reward, state.reward)
env.client.close_scenario_end_and_player_eval_messages()
state = env.reset()
state = env.reset(close_scenario_end_and_player_eval_messages=True)
action = ''
agent.reset()
Loading