From 9738638e0b49c892ad19e9186db7ad278b99c3a0 Mon Sep 17 00:00:00 2001 From: Felipe Montealegre-Mora Date: Thu, 13 Jun 2024 20:40:50 +0000 Subject: [PATCH 1/2] pruned off old envs --- src/rl4fisheries/__init__.py | 11 +- src/rl4fisheries/envs/asm.py | 318 ----------------------------- src/rl4fisheries/envs/asm_2o.py | 340 -------------------------------- 3 files changed, 2 insertions(+), 667 deletions(-) delete mode 100644 src/rl4fisheries/envs/asm.py delete mode 100644 src/rl4fisheries/envs/asm_2o.py diff --git a/src/rl4fisheries/__init__.py b/src/rl4fisheries/__init__.py index aaab717..a40d18e 100644 --- a/src/rl4fisheries/__init__.py +++ b/src/rl4fisheries/__init__.py @@ -1,8 +1,5 @@ # Importing from sub-directories here makes these available as 'top-level' imports -from rl4fisheries.envs.asm import Asm -from rl4fisheries.envs.asm_2o import Asm2o from rl4fisheries.envs.asm_env import AsmEnv - from rl4fisheries.envs.asm_esc import AsmEnvEsc from rl4fisheries.envs.asm_cr_like import AsmCRLike @@ -13,14 +10,10 @@ from gymnasium.envs.registration import register -# action is 'harvest' -register(id="Asm-v0", entry_point="rl4fisheries.envs.asm:Asm") +# action is fishing intensity +register(id="AsmEnv", entry_point="rl4fisheries.envs.asm_env:AsmEnv") # action is 'escapement' register(id="AsmEnvEsc", entry_point="rl4fisheries.envs.asm_esc:AsmEnvEsc") -# action is harvest, but observes both total count and mean biomass -register(id="Asm2o-v0", entry_point="rl4fisheries.envs.asm_2o:Asm2o") -# action is harvest, but observes both total count and mean biomass -register(id="AsmEnv", entry_point="rl4fisheries.envs.asm_env:AsmEnv") # CR-like actions register(id="AsmCRLike", entry_point="rl4fisheries.envs.asm_cr_like:AsmCRLike") diff --git a/src/rl4fisheries/envs/asm.py b/src/rl4fisheries/envs/asm.py deleted file mode 100644 index 238172d..0000000 --- a/src/rl4fisheries/envs/asm.py +++ /dev/null @@ -1,318 +0,0 @@ -import numpy as np -import gymnasium as gym -import matplotlib.pyplot as plt -from typing import Optional - -# equilibrium dist will in general depend on parameters, need a more robust way -# to reset to random but not unrealistic starting distribution -equib_init = [ - 0.99999999, - 0.86000001, - 0.73960002, - 0.63605603, - 0.54700819, - 0.47042705, - 0.40456727, - 0.34792786, - 0.29921796, - 0.25732745, - 0.22130161, - 0.19031939, - 0.16367468, - 0.14076023, - 0.1210538, - 0.10410627, - 0.08953139, - 0.076997, - 0.06621742, - 0.40676419, -] - - -class Asm(gym.Env): - """an age-structured model following the gym API standard""" - - metadata = { - "render_modes": ["human", "rgb_array"], - "render_fps": 30, - } - - def __init__(self, render_mode: Optional[str] = 'rgb_array', config={}): - config = config or {} - self.parameters = { - "n_age": 20, # number of age classes - "vbk": config.get("vbk" , np.float32(0.23)), # von Bertalanffy kappa - "s": config.get("s" , np.float32(0.86)), # average survival - "cr": config.get("cr" , np.float32(6.0)), # Goodyear compensation ratio - "rinit": config.get("rinit" , np.float32(0.01)), # initial number age-1 recruits - "ro": config.get("ro" , np.float32(1.0)), # average unfished recruitment - "uo": config.get("uo" , np.float32(0.12)), # average historical exploitation rate - "asl": config.get("asl" , np.float32(0.5)), # vul par 1 - "ahv": config.get("ahv" , np.float32(5.0)), # vul par 2 - "ahm": config.get("ahm" , np.float32(6.0)), # age 50% maturity - "upow": config.get("upow" , np.float32(1.0)), # 1 = max yield objective, < 1 = HARA - "p_big": config.get("p_big" , np.float32(0.05)), # probability of big year class - "sdr": config.get("sdr" , np.float32(0.3)), # recruit sd given stock-recruit relationship - "rho": config.get("rho" , np.float32(0.0)), # autocorrelation in recruitment sequence - "sdv": config.get("sdv" , np.float32(1e-9)), # sd in vulnerable biomass (survey) - "sigma": config.get("sigma" , np.float32(1.5)), - } - # these parameters can be specified in config - self.n_year = config.get("n_year", 1000) - self.Tmax = self.n_year - self.threshold = config.get("threshold", np.float32(1e-4)) - self.training = config.get("training", True) - self.timestep = 0 - self.bound = 50 # a rescaling parameter - self.parameters["ages"] = range( - 1, self.parameters["n_age"] + 1 - ) # vector of ages for calculations - default_init = self.initialize_population() - self.init_state = config.get("init_state", equib_init) - - self.render_mode = render_mode - self.screen_width = 600 - self.screen_height = 400 - self.screen = None - self.clock = None - self.isopen = True - - self.action_space = gym.spaces.Box( - np.array([-1], dtype=np.float32), - np.array([1], dtype=np.float32), - dtype=np.float32, - ) - self.observation_space = gym.spaces.Box( - np.array([-1], dtype=np.float32), - np.array([1], dtype=np.float32), - dtype=np.float32, - ) - - #self.reset() - - - def step(self, action): - mortality = self.mortality_units(action) - self.state, reward = self.harvest(self.state, mortality) - self.state = self.population_growth(self.state) - self.timestep += 1 - terminated = bool(self.timestep >= self.n_year) - - # in training mode only: punish for population collapse - # if sum(n) <= self.threshold: # note CB's code had this as well: `and self.training:` - # terminated = True - # reward -= 50/self.timestep - - observation = self.observe() - return observation, np.float64(reward), terminated, False, {} - - def render(self): - if self.render_mode is None: - assert self.spec is not None - gym.logger.warn( - "You are calling render method without specifying any render mode. " - "You can specify the render_mode at initialization, " - f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")' - ) - return - - try: - import pygame - from pygame import gfxdraw - except ImportError as e: - raise DependencyNotInstalled( - "pygame is not installed, run `pip install gymnasium[classic-control]`" - ) from e - - if self.screen is None: - pygame.init() - if self.render_mode == "human": - pygame.display.init() - self.screen = pygame.display.set_mode( - (self.screen_width, self.screen_height) - ) - else: # mode == "rgb_array": - self.screen = pygame.Surface((self.screen_width, self.screen_height)) - if self.clock is None: - self.clock = pygame.time.Clock() - - world_width = 2 - scale = self.screen_width / world_width - self.surf = pygame.Surface((self.screen_width, self.screen_height)) - self.surf.fill((255, 255, 255)) - - total = self.population_units() - y = 2 * total / self.screen_height - 1 - y = int(np.clip(y, [0], [self.screen_height])) - x = int((self.n_year / self.Tmax ) * self.screen_width) - y = x - gfxdraw.filled_circle( # x, y, rad, color - self.surf, x, y, int(4), (128, 128, 128) - ) - self.surf = pygame.transform.flip(self.surf, False, True) - self.screen.blit(self.surf, (0, 0)) - if self.render_mode == "human": - pygame.event.pump() - self.clock.tick(self.metadata["render_fps"]) - pygame.display.flip() - - elif self.render_mode == "rgb_array": - return np.transpose( - np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2) - ) - - def initialize_population(self): - p = self.parameters # snag those pars - ninit = np.float32([0] * p["n_age"]) # initial numbers - survey_vul = ninit.copy() # vulnerability - wt = ninit.copy() # weight - mat = ninit.copy() # maturity - Lo = ninit.copy() # survivorship unfished - Lf = ninit.copy() # survivorship fished - mwt = ninit.copy() # mature weight - - # leading array calculations to get vul-at-age, wt-at-age, etc. - for a in range(0, p["n_age"], 1): - survey_vul[a] = 1 / (1 + np.exp(-p["asl"] * (p["ages"][a] - p["ahv"]))) - wt[a] = pow( - (1 - np.exp(-p["vbk"] * p["ages"][a])), 3 - ) # 3 --> isometric growth - mat[a] = 1 / (1 + np.exp(-p["asl"] * (p["ages"][a] - p["ahm"]))) - if a == 0: - Lo[a] = 1 - Lf[a] = 1 - elif a > 0 and a < (p["n_age"] - 1): - Lo[a] = Lo[a - 1] * p["s"] - Lf[a] = Lf[a - 1] * p["s"] * (1 - survey_vul[a - 1] * p["uo"]) - elif a == (p["n_age"] - 1): - Lo[a] = Lo[a - 1] * p["s"] / (1 - p["s"]) - Lf[a] = ( - Lf[a - 1] - * p["s"] - * (1 - survey_vul[a - 1] * p["uo"]) - / (1 - p["s"] * (1 - survey_vul[a - 1] * p["uo"])) - ) - ninit = np.array(p["rinit"]) * Lf - mwt = mat * np.array(wt) - sbro = sum(Lo * mwt) # spawner biomass per recruit in the unfished condition - bha = p["cr"] / sbro # beverton-holt alpha - bhb = (p["cr"] - 1) / (p["ro"] * sbro) # beverton-holt beta - - # put it all in self so we can reference later - self.parameters["Lo"] = Lo - self.parameters["Lf"] = Lf - self.parameters["survey_vul"] = survey_vul - self.parameters["harvest_vul"] = survey_vul # TBD: compute it separately - self.parameters["wt"] = wt - self.parameters["mwt"] = mwt - self.parameters["bha"] = bha - self.parameters["bhb"] = bhb - self.parameters["p_big"] = 0.05 - self.parameters["sdr"] = 0.3 - self.parameters["rho"] = 0 - n = np.array(ninit, dtype=np.float32) - self.state = np.clip(n, 0, np.Inf) - return self.state - - def harvest(self, n, mortality): - p = self.parameters - self.vulb = sum(p["harvest_vul"] * n * p["wt"]) - self.vbobs = self.vulb # could multiply this by random deviate - self.ssb = sum(p["mwt"] * n) - if sum(n) > 0: - self.abar = sum(p["harvest_vul"] * np.array(p["ages"]) * n) / sum(n) - self.wbar = sum(p["harvest_vul"] * n * p["wt"]) / sum(n * p["wt"]) - else: - self.abar = 0 - self.wbar = 0 - self.yieldf = mortality[0] * self.vulb # fishery yield - reward = self.yieldf ** p["upow"] # this is utility - n = p["s"] * n * (1 - p["harvest_vul"] * mortality) # eat fish - return n, reward - - def population_growth(self, n): - p = self.parameters - # mu = np.log(1) - p["sigma"] ** 2 / 2 - bh_alpha = p["bha"] # * np.random.lognormal(mu, p["sigma"]) - - n[p["n_age"] - 1] = ( - n[p["n_age"] - 1] + n[p["n_age"] - 2] - ) # plus group accounting - for a in range(p["n_age"] - 2, 0, -1): - n[a] = n[a - 1] # advance fish one a - - n[0] = ( - (bh_alpha) - * self.ssb - / (1 + p["bhb"] * self.ssb) - * self.r_devs[self.timestep] - ) # NOTE eventually needs to be r_devs[t] - return n - - def observe(self): - self.vulb = sum(self.parameters["survey_vul"] * self.state * self.parameters["wt"]) # update vulnerable biomass - observation = 2 * np.array([self.vulb]) / self.bound - 1 - observation = np.clip(observation, -1.0, 1.0) - return np.float32(observation) - - def population_units(self): - total = np.array([sum(self.state)]) - return total - - def mortality_units(self, action): - action = np.clip(action, [-1], [1]) - mortality = (action + 1.0) / 2.0 - return mortality - - def reset(self, *, seed=None, options=None): - self.timestep = 0 - self.state = self.initialize_population() - self.state = self.init_state * np.array( - np.random.uniform(0.1, 1), dtype=np.float32 - ) - self.r_devs = get_r_devs( - n_year=self.n_year, - p_big=self.parameters["p_big"], - sdr=self.parameters["sdr"], - rho=self.parameters["rho"], - ) - obs = self.observe() - return obs, {} - - -def get_r_devs(n_year, p_big=0.05, sdr=0.3, rho=0): - """ - f(x) to create recruitment deviates, which are multiplied - by the stock-recruitment prediction in the age-structured model - - args: - n_year: number of deviates required for simulation - p_big: Pr(big year class) - r_big: magnitude of big year class - sdr: sd of recruitment - rho: autocorrelation in recruitment sequence - returns: - vector of recruitment deviates of length n_year - - """ - r_mult = np.float32([1] * n_year) - u_rand = np.random.uniform(0, 1, n_year) - n_rand = np.random.normal(0, 1, n_year) - r_big = np.random.uniform(10, 30, n_year) - - r_low = (1 - p_big * r_big) / (1 - p_big) # small rec event - r_low = np.clip(r_low, 0, None) - dev_last = 0 - for t in range(0, n_year, 1): - r_mult[t] = r_low[t] - if u_rand[t] < p_big: - r_mult[t] = r_big[t] - r_mult[t] = r_mult[t] * np.exp(sdr * n_rand[t] + rho * dev_last) - dev_last = sdr * n_rand[t] + rho * dev_last - return r_mult - - -# smoke-test -# Confirm environment is correctly defined: -# from stable_baselines3.common.env_checker import check_env -# check_env(asm(), warn=True) diff --git a/src/rl4fisheries/envs/asm_2o.py b/src/rl4fisheries/envs/asm_2o.py deleted file mode 100644 index c3ba9de..0000000 --- a/src/rl4fisheries/envs/asm_2o.py +++ /dev/null @@ -1,340 +0,0 @@ -import numpy as np -import gymnasium as gym -import matplotlib.pyplot as plt -from typing import Optional - -# equilibrium dist will in general depend on parameters, need a more robust way -# to reset to random but not unrealistic starting distribution -equib_init = [ - 0.99999999, - 0.86000001, - 0.73960002, - 0.63605603, - 0.54700819, - 0.47042705, - 0.40456727, - 0.34792786, - 0.29921796, - 0.25732745, - 0.22130161, - 0.19031939, - 0.16367468, - 0.14076023, - 0.1210538, - 0.10410627, - 0.08953139, - 0.076997, - 0.06621742, - 0.40676419, -] - - -class Asm2o(gym.Env): - """an age-structured model following the gym API standard""" - - metadata = { - "render_modes": ["human", "rgb_array"], - "render_fps": 30, - } - - def __init__(self, render_mode: Optional[str] = 'rgb_array', config={}): - config = config or {} - self.parameters = { - "n_age": 20, # number of age classes - "vbk": config.get("vbk" , np.float32(0.23)), # von Bertalanffy kappa - "s": config.get("s" , np.float32(0.86)), # average survival - "cr": config.get("cr" , np.float32(6.0)), # Goodyear compensation ratio - "rinit": config.get("rinit" , np.float32(0.01)), # initial number age-1 recruits - "ro": config.get("ro" , np.float32(1.0)), # average unfished recruitment - "uo": config.get("uo" , np.float32(0.12)), # average historical exploitation rate - "asl": config.get("asl" , np.float32(0.5)), # vul par 1 - "ahv": config.get("ahv" , np.float32(5.0)), # vul par 2 - "ahm": config.get("ahm" , np.float32(6.0)), # age 50% maturity - "upow": config.get("upow" , np.float32(1.0)), # 1 = max yield objective, < 1 = HARA - "p_big": config.get("p_big" , np.float32(0.05)), # probability of big year class - "sdr": config.get("sdr" , np.float32(0.3)), # recruit sd given stock-recruit relationship - "rho": config.get("rho" , np.float32(0.0)), # autocorrelation in recruitment sequence - "sdv": config.get("sdv" , np.float32(1e-9)), # sd in vulnerable biomass (survey) - "sigma": config.get("sigma" , np.float32(1.5)), - } - # these parameters can be specified in config - self.n_year = config.get("n_year", 1000) - self.Tmax = self.n_year - self.threshold = config.get("threshold", np.float32(1e-4)) - self.training = config.get("training", True) - self.timestep = 0 - self.bound = 50 # a rescaling parameter - self.parameters["ages"] = range( - 1, self.parameters["n_age"] + 1 - ) # vector of ages for calculations - - default_init = self.initialize_population() - self.init_state = config.get("init_state", equib_init) - - self.render_mode = render_mode - self.screen_width = 600 - self.screen_height = 400 - self.screen = None - self.clock = None - self.isopen = True - - self.action_space = gym.spaces.Box( - np.array([-1], dtype=np.float32), - np.array([1], dtype=np.float32), - dtype=np.float32, - ) - self.observation_space = gym.spaces.Box( - np.array([-1, -1], dtype=np.float32), - np.array([1, 1], dtype=np.float32), - dtype=np.float32, - ) - - #self.reset() - - - def step(self, action): - mortality = self.mortality_units(action) - self.state, reward = self.harvest(self.state, mortality) - self.state = self.population_growth(self.state) - self.timestep += 1 - terminated = bool(self.timestep >= self.n_year) - - # in training mode only: punish for population collapse - # if sum(n) <= self.threshold: # note CB's code had this as well: `and self.training:` - # terminated = True - # reward -= 50/self.timestep - - observation = self.observe() - return observation, np.float64(reward), terminated, False, {} - - def render(self): - if self.render_mode is None: - assert self.spec is not None - gym.logger.warn( - "You are calling render method without specifying any render mode. " - "You can specify the render_mode at initialization, " - f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")' - ) - return - - try: - import pygame - from pygame import gfxdraw - except ImportError as e: - raise DependencyNotInstalled( - "pygame is not installed, run `pip install gymnasium[classic-control]`" - ) from e - - if self.screen is None: - pygame.init() - if self.render_mode == "human": - pygame.display.init() - self.screen = pygame.display.set_mode( - (self.screen_width, self.screen_height) - ) - else: # mode == "rgb_array": - self.screen = pygame.Surface((self.screen_width, self.screen_height)) - if self.clock is None: - self.clock = pygame.time.Clock() - - world_width = 2 - scale = self.screen_width / world_width - self.surf = pygame.Surface((self.screen_width, self.screen_height)) - self.surf.fill((255, 255, 255)) - - total = self.population_units() - y = 2 * total / self.screen_height - 1 - y = int(np.clip(y, [0], [self.screen_height])) - x = int((self.n_year / self.Tmax ) * self.screen_width) - y = x - gfxdraw.filled_circle( # x, y, rad, color - self.surf, x, y, int(4), (128, 128, 128) - ) - self.surf = pygame.transform.flip(self.surf, False, True) - self.screen.blit(self.surf, (0, 0)) - if self.render_mode == "human": - pygame.event.pump() - self.clock.tick(self.metadata["render_fps"]) - pygame.display.flip() - - elif self.render_mode == "rgb_array": - return np.transpose( - np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2) - ) - - def initialize_population(self): - p = self.parameters # snag those pars - ninit = np.float32([0] * p["n_age"]) # initial numbers - survey_vul = ninit.copy() # vulnerability - wt = ninit.copy() # weight - mat = ninit.copy() # maturity - Lo = ninit.copy() # survivorship unfished - Lf = ninit.copy() # survivorship fished - mwt = ninit.copy() # mature weight - - # leading array calculations to get vul-at-age, wt-at-age, etc. - for a in range(0, p["n_age"], 1): - survey_vul[a] = 1 / (1 + np.exp(-p["asl"] * (p["ages"][a] - p["ahv"]))) - wt[a] = pow( - (1 - np.exp(-p["vbk"] * p["ages"][a])), 3 - ) # 3 --> isometric growth - mat[a] = 1 / (1 + np.exp(-p["asl"] * (p["ages"][a] - p["ahm"]))) - if a == 0: - Lo[a] = 1 - Lf[a] = 1 - elif a > 0 and a < (p["n_age"] - 1): - Lo[a] = Lo[a - 1] * p["s"] - Lf[a] = Lf[a - 1] * p["s"] * (1 - survey_vul[a - 1] * p["uo"]) - elif a == (p["n_age"] - 1): - Lo[a] = Lo[a - 1] * p["s"] / (1 - p["s"]) - Lf[a] = ( - Lf[a - 1] - * p["s"] - * (1 - survey_vul[a - 1] * p["uo"]) - / (1 - p["s"] * (1 - survey_vul[a - 1] * p["uo"])) - ) - - ninit = np.array(p["rinit"]) * Lf - mwt = mat * np.array(wt) - sbro = sum(Lo * mwt) # spawner biomass per recruit in the unfished condition - bha = p["cr"] / sbro # beverton-holt alpha - bhb = (p["cr"] - 1) / (p["ro"] * sbro) # beverton-holt beta - - # put it all in self so we can reference later - self.parameters["Lo"] = Lo - self.parameters["Lf"] = Lf - self.parameters["survey_vul"] = survey_vul - self.parameters["harvest_vul"] = survey_vul - self.parameters["wt"] = wt - self.parameters["min_wt"] = np.min(wt) - self.parameters["max_wt"] = np.max(wt) - self.parameters["mwt"] = mwt - self.parameters["bha"] = bha - self.parameters["bhb"] = bhb - self.parameters["p_big"] = 0.05 - self.parameters["sdr"] = 0.3 - self.parameters["rho"] = 0 - n = np.array(ninit, dtype=np.float32) - self.state = np.clip(n, 0, np.Inf) - return self.state - - def harvest(self, n, mortality): - p = self.parameters - self.vulb = sum(p["harvest_vul"] * n * p["wt"]) - self.vbobs = self.vulb # could multiply this by random deviate - self.ssb = sum(p["mwt"] * n) - if sum(n) > 0: - self.abar = sum(p["harvest_vul"] * np.array(p["ages"]) * n) / sum(n) - self.wbar = sum(p["harvest_vul"] * n * p["wt"]) / sum(n * p["wt"]) - else: - self.abar = 0 - self.wbar = 0 - self.yieldf = mortality[0] * self.vulb # fishery yield - reward = self.yieldf ** p["upow"] # this is utility - n = p["s"] * n * (1 - p["harvest_vul"] * mortality) # eat fish - return n, reward - - def population_growth(self, n): - p = self.parameters - # mu = np.log(1) - p["sigma"] ** 2 / 2 - bh_alpha = p["bha"] # * np.random.lognormal(mu, p["sigma"]) - - n[p["n_age"] - 1] = ( - n[p["n_age"] - 1] + n[p["n_age"] - 2] - ) # plus group accounting - for a in range(p["n_age"] - 2, 0, -1): - n[a] = n[a - 1] # advance fish one a - - n[0] = ( - (bh_alpha) - * self.ssb - / (1 + p["bhb"] * self.ssb) - * self.r_devs[self.timestep] - ) # NOTE eventually needs to be r_devs[t] - return n - - def mean_wt_obs(self): - self.state - - def observe(self): - p = self.parameters - self.vul_pop = p["survey_vul"] * self.state - self.vul_pop_total = sum(self.vul_pop) - self.vulb = sum(self.vul_pop * self.parameters["wt"]) # update vulnerable biomass - - biomass_obs = 2 * self.vulb / self.bound - 1 - - if self.vul_pop_total==0: - vulnuerable_mean_wt = 0 - else: - vulnuerable_mean_wt = self.vulb / self.vul_pop_total - - max_wt, min_wt = self.parameters["max_wt"], self.parameters["min_wt"] # for readability - mean_wt_obs = ( - 2 * (vulnuerable_mean_wt - min_wt) / (max_wt - min_wt) - 1 - ) - - observation = np.clip(np.array([biomass_obs, mean_wt_obs]), -1, 1) - return np.float32(observation) - - def population_units(self): - total = np.array([sum(self.state)]) - return total - - def mortality_units(self, action): - action = np.clip(action, [-1], [1]) - mortality = (action + 1.0) / 2.0 - return mortality - - def reset(self, *, seed=None, options=None): - self.timestep = 0 - self.state = self.initialize_population() - self.state = self.init_state * np.array( - np.random.uniform(0.1, 1), dtype=np.float32 - ) - self.r_devs = get_r_devs( - n_year=self.n_year, - p_big=self.parameters["p_big"], - sdr=self.parameters["sdr"], - rho=self.parameters["rho"], - ) - obs = self.observe() - return obs, {} - - -def get_r_devs(n_year, p_big=0.05, sdr=0.3, rho=0): - """ - f(x) to create recruitment deviates, which are multiplied - by the stock-recruitment prediction in the age-structured model - - args: - n_year: number of deviates required for simulation - p_big: Pr(big year class) - r_big: magnitude of big year class - sdr: sd of recruitment - rho: autocorrelation in recruitment sequence - returns: - vector of recruitment deviates of length n_year - - """ - r_mult = np.float32([1] * n_year) - u_rand = np.random.uniform(0, 1, n_year) - n_rand = np.random.normal(0, 1, n_year) - r_big = np.random.uniform(10, 30, n_year) - - r_low = (1 - p_big * r_big) / (1 - p_big) # small rec event - r_low = np.clip(r_low, 0, None) - dev_last = 0 - for t in range(0, n_year, 1): - r_mult[t] = r_low[t] - if u_rand[t] < p_big: - r_mult[t] = r_big[t] - r_mult[t] = r_mult[t] * np.exp(sdr * n_rand[t] + rho * dev_last) - dev_last = sdr * n_rand[t] + rho * dev_last - return r_mult - - -# smoke-test -# Confirm environment is correctly defined: -# from stable_baselines3.common.env_checker import check_env -# check_env(asm(), warn=True) From ec579b7ecde0d12a2f88ebabf00e19f705146e8c Mon Sep 17 00:00:00 2001 From: Felipe Montealegre-Mora <34276401+felimomo@users.noreply.github.com> Date: Thu, 13 Jun 2024 14:51:44 -0700 Subject: [PATCH 2/2] Update README.md --- README.md | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 04c54c3..6d8c157 100644 --- a/README.md +++ b/README.md @@ -2,22 +2,31 @@ Models: -- asm.py: provides `Asm()`. Observes 1 dimension (total N). Action is harvest -- asm_2o.py: provides `Asm2o()`. Observes 2 dimensions: total N and mean biomass (wt). -- ams_esc.py: escapement `AsmEsc()` escapement-encoded. +- `asm_env.py`: provides `AsmEnv()`. This encodes our population dynamics model, coupled with an observation process, and a harvest process with a corresponding utility model. These processes can all be modified using the `config` argument. Their defaults are defined in `asm_fns.py`. By default, observations are stock biomass and mean weight. +- `asm_esc.py`: provides `AsmEscEnv()` which inherits from `AsmEnv` and has one difference to it: actions in `AsmEscEnv()` represent escapement levels rather than fishing intensities. +- `ams_cr_like.py`: provides `AsmCRLike()`. In this environment, mean weight is observed and the action is to set parameters `(x1, x2, y2)` for a biomass-based harvest control rule of the type `CautionaryRule` (specified below). -Methods: +Strategies evaluated with MSE: -(For both 1d and 2d observations) -- MSE piecewise linear rule (in mortality space) -- Constant Mortality -- Constant escapement +- `agents.cautionary_rule.CautionaryRule`: piece-wise linear harvest-control rule specified by three parameters `(x1, x2, y2)`. Example plot (TBD). +- `agents.msy.Msy`: constant mortality harvest control rule. Specified by one parameter `mortality`. +- `agents.const_esc.ConstEsc`: constant escapement harvest control rule. Specified by one parameter `escapement`. +## Installation -## RL training: +Clone this repo, then: + +```bash +cd rl4fisheries +pip install . +``` -requires `rl4eco` package. Simply run `scripts/train.py` pointing at the chosen configuration. The trained model is automatically pushed to Huggingface (requires a HF token). +## RL training: +Simply run ```bash -python scripts/train.py -f hyperpars/ppo-asm2o-v0-1.yml +python scripts/train.py -f path/to/config/file.yml ``` +The trained model is automatically pushed to Huggingface (requires a HF token). +The config files used for our results are found in `hyperpars/for_results/` +