From 424e94f2bcf56ab2e02a818c887f75a9efaadf66 Mon Sep 17 00:00:00 2001 From: Steve Bachmeier <23350991+stevebachmeier@users.noreply.github.com> Date: Fri, 23 Aug 2024 08:31:08 -0600 Subject: [PATCH] Feature/sbachmei/mic 5222 examples (#473) * update cli args table * update example simulate run command * Update examples and tutorials --- docs/source/tutorials/disease_model.rst | 145 ++++++++++++++++-- docs/source/tutorials/exploration.rst | 5 + .../tutorials/running_a_simulation/cli.rst | 38 +++-- .../running_a_simulation/interactive.rst | 12 +- pyproject.toml | 4 +- .../examples/disease_model/__init__.py | 2 +- .../examples/disease_model/disease_model.yaml | 3 +- .../examples/disease_model/mortality.py | 29 +++- .../examples/disease_model/observer.py | 73 +++++---- .../randomness/test_reproducibility.py | 4 +- 10 files changed, 244 insertions(+), 71 deletions(-) diff --git a/docs/source/tutorials/disease_model.rst b/docs/source/tutorials/disease_model.rst index 390631ef2..fb3119681 100644 --- a/docs/source/tutorials/disease_model.rst +++ b/docs/source/tutorials/disease_model.rst @@ -28,7 +28,7 @@ Setup ----- I'm assuming you've read through the material in :doc:`getting started ` and are working in your -:file:`vivarium_examples` package. If not, you should go there first. +:file:`vivarium/examples` package. If not, you should go there first. .. todo:: package setup with __init__ and stuff @@ -45,7 +45,7 @@ We need a population though. So we'll start with one here and defer explanation of some of the more complex pieces/systems until later. .. literalinclude:: ../../../src/vivarium/examples/disease_model/population.py - :caption: **File**: :file:`~/code/vivarium_examples/disease_model/population.py` + :caption: **File**: :file:`~/code/vivarium/examples/disease_model/population.py` There are a lot of things here. Let's take them piece by piece. (*Note*: I'll be leaving out the docstrings in the code snippets below). @@ -483,7 +483,7 @@ Now that we've done all this hard work, let's see what it gives us. .. code-block:: python from vivarium import InteractiveContext - from vivarium_examples.disease_model.population import BasePopulation + from vivarium.examples.disease_model.population import BasePopulation config = {'randomness': {'key_columns': ['entrance_time', 'age']}} @@ -540,7 +540,7 @@ Now that we have population generation and aging working, the next step is introducing mortality into our simulation. .. literalinclude:: ../../../src/vivarium/examples/disease_model/mortality.py - :caption: **File**: :file:`~/code/vivarium_examples/disease_model/mortality.py` + :caption: **File**: :file:`~/code/vivarium/examples/disease_model/mortality.py` The purpose of this component is to determine who dies every time step based on a mortality rate. You'll see many of the same framework features we used @@ -664,8 +664,8 @@ can see the impact of our mortality component without taking too many steps. .. code-block:: python from vivarium InteractiveContext - from vivarium_examples.disease_model.population import BasePopulation - from vivarium_examples.disease_model.mortality import Mortality + from vivarium.examples.disease_model.population import BasePopulation + from vivarium.examples.disease_model.mortality import Mortality config = { 'population': { @@ -736,25 +736,142 @@ to 0.0097 deaths per person-year, very close to the 0.01 rate we provided. sim = InteractiveContext(components=[BasePopulation(), Mortality()], configuration=config) sim.take_steps(2) - -Observer --------- - -In a real simulation, we typically want to record sophisticated output. We -also frequently work in non-interactive (or even distributed) environments -where we don't have easy access to the simulation object. - Disease ------- +.. todo:: + disease + Risk ---- +.. todo:: + risk + Intervention ------------ +.. todo:: + interventions + +Observer +-------- + +We've spent some time showing how we can look at the population state table to see +how it changes during an interactive simulation. However, we also typically want +the simulation itself to record more sophisticated output. Further, we frequently +work in non-interactive (or even distributed) environments where we simply don't +have access to the simulation object and so would like to write our output to disk. +These recorded outputs (i.e. results) are referred to in vivarium as **observations** +and it is the job of so-called **observers** to register them to the simulation. +:class:`Observers ` are vivarium +:class:`components ` that are created by the user +and added to the simulation via the model specification. + +This example's observers are shown below. + +.. literalinclude:: ../../../src/vivarium/examples/disease_model/observer.py + :caption: **File**: :file:`~/code/vivarium/examples/disease_model/observer.py` + +There are two observers that have each registered a single observation to the +simulation: deaths and years of life lost (YLLs). It is important to note that +neither of those observations are population state table columns; they are +more complex results that require some computation to determine. + +In an interactive setting, we can access these observations via the +``sim.get_results()`` command. This will return a dictionary of all +observations up to this point in the simulation. + +.. code-block:: python + + from vivarium import InteractiveContext + from vivarium.examples.disease_model.population import BasePopulation + from vivarium.examples.disease_model.mortality import Mortality + from vivarium.examples.disease_model.observer import DeathsObserver, YllsObserver + + config = { + 'population': { + 'population_size': 100_000 + }, + 'randomness': { + 'key_columns': ['entrance_time', 'age'] + } + } + + sim = InteractiveContext( + components=[ + BasePopulation(), + Mortality(), + DeathsObserver(), + YllsObserver(), + ], + configuration=config + ) + sim.take_steps(365) # Run for one year with one day time steps + print(sim.get_results()["dead"]) + print(sim.get_results()["ylls"]) + +:: + + stratification value + 0 all 985.0 + + stratification value + 0 all 27966.647762 + +We see that after 365 days of simulation, 985 simlants have died and there has +been a total of 27,987 years of life lost. + +.. testcode:: + :hide: + + from vivarium import InteractiveContext + from vivarium.examples.disease_model.population import BasePopulation + from vivarium.examples.disease_model.mortality import Mortality + from vivarium.examples.disease_model.observer import DeathsObserver, YllsObserver + + config = { + 'population': { + 'population_size': 100_000 + }, + 'randomness': { + 'key_columns': ['entrance_time', 'age'] + } + } + + sim = InteractiveContext( + components=[ + BasePopulation(), + Mortality(), + DeathsObserver(), + YllsObserver(), + ], + configuration=config + ) + sim.take_steps(2) + dead = sim.get_results()["dead"] + assert len(dead) == 1 + assert dead["value"][0] == 6 + ylls = sim.get_results()["ylls"] + assert len(ylls) == 1 + assert ylls["value"][0] == 102.50076885303923 + +.. note:: + + The observer is responsible for recording observations in memory, but it is + the responsibility of the user to write them to disk when in an interactive + environment. When running a full simulation from the command line (i.e. in a + non-interactive environment), the vivarium engine itself will automatically + write the results to disk at the end of the simulation. + Running from the command line ----------------------------- +.. todo:: + running from the command line + Exploring some results ---------------------- + +.. todo:: + exploring some results diff --git a/docs/source/tutorials/exploration.rst b/docs/source/tutorials/exploration.rst index ae72caa63..0aafefc32 100644 --- a/docs/source/tutorials/exploration.rst +++ b/docs/source/tutorials/exploration.rst @@ -169,6 +169,11 @@ configuration by simply printing it. stratification: default: component_configs: [] + deaths: + exclude: + component_configs: [] + include: + component_configs: [] What do we see here? The configuration is *hierarchical*. There are a set of diff --git a/docs/source/tutorials/running_a_simulation/cli.rst b/docs/source/tutorials/running_a_simulation/cli.rst index b70a8b863..68f2088df 100644 --- a/docs/source/tutorials/running_a_simulation/cli.rst +++ b/docs/source/tutorials/running_a_simulation/cli.rst @@ -66,34 +66,44 @@ options for the run. These are: * - Option - Description - * - | **--results-directory** or **-o** + * - | **-\-artifact_path or -i** + - | The path to a directory containing the artifact data file that the + | model requires. This is only required if the model specification + | file does not contain the artifact path or you want to override it. + * - | **-\-results_directory** or **-o** - | The top-level directory in which to write results. | Within this directory, a subdirectory named to match the - | model-specification file will be created. Within this, a further + | model specification file will be created. Within this, a further | subdirectory named for the time at which the run was started will | be created. - * - | **--verbose** or **-v** + * - | **-\-verbose** or **-v** - | Report each time step as it occurs during the run. - * - | **--log** - - | A path at which a log file should be created. - * - | **--pdb** + * - | **-\-quiet** or **-q** + - | Suppress all logging except for warnings and errors. + * - | **-\-pdb** - | If an error occurs, drop into the python debugger. + * - | **-\-help** + - | Print a help message and exit. + +.. note:: + You can see a description of any of the available commands by using the + **-\-help** flag, e.g. ``simulate --help`` or ``simulate run --help``. Let's illustrate how to use them. Say we run the following: .. code-block:: console - simulate run /path/to/your/model/specification -o /path/to/output/directory --log /path/to/log/file --pdb -v + simulate run /path/to/your/model/specification.yaml -i /path/to/artifact.hdf -o /path/to/output/directory --pdb -v Let's walk through how each of these flags will change the behavior from our -initial plain ``simulate run``. First, we have specified an output directory -via the **-o** flag. In our first example, outputs went to -``~/vivarium_results``. Now they will go to our specified directory. Second, we -have also provided a path to a log file via **--log** at which we -can find the log outputs of our simulation run. Next, we have provided the -**--pdb** flag so that if something goes wrong in our run, we will drop into -the python debugger where we can investigate. Finally, we have turned on the +initial plain ``simulate run``. First, we have provided an artifact path via the +**-i** flag which will run the simulation using that artifact (regardless of what +is specified in the model specification). Second, we have specified an output +directory via the **-o** flag. In our first example, outputs went to +``~/vivarium_results``. Now they will go to our specified directory. Next, we have +provided the **-\-pdb** flag so that if something goes wrong in our run, we will drop +into the python debugger where we can investigate. Finally, we have turned on the verbose option via the **-v** flag. Whereas before, we saw nothing printed to the console while our simulation was running, we will now see something like the following: diff --git a/docs/source/tutorials/running_a_simulation/interactive.rst b/docs/source/tutorials/running_a_simulation/interactive.rst index 7a99d381b..93633b201 100644 --- a/docs/source/tutorials/running_a_simulation/interactive.rst +++ b/docs/source/tutorials/running_a_simulation/interactive.rst @@ -124,9 +124,9 @@ example and we will place them in a normal Python list. .. code-block:: python - from vivarium.examples.disease_model import (BasePopulation, Mortality, Observer, - SISDiseaseModel, Risk, RiskEffect, - TreatmentIntervention) + from vivarium.examples.disease_model import (BasePopulation, Mortality, DeathsObserver, + YllsObserver, SISDiseaseModel, Risk, + RiskEffect, TreatmentIntervention) components = [BasePopulation(), Mortality(), @@ -197,9 +197,9 @@ one last way to set up the simulation in an interactive setting. .. testcode:: :hide: - from vivarium.examples.disease_model import (BasePopulation, Mortality, Observer, - SISDiseaseModel, Risk, RiskEffect, - TreatmentIntervention) + from vivarium.examples.disease_model import (BasePopulation, Mortality, DeathsObserver, + YllsObserver, SISDiseaseModel, Risk, + RiskEffect, TreatmentIntervention) from vivarium import InteractiveContext config = { diff --git a/pyproject.toml b/pyproject.toml index 66e0eeaa3..464a65bd7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,8 +13,10 @@ show_missing = true [tool.black] line_length = 94 +exclude = '''.*examples.*''' [tool.isort] line_length = 94 profile = "black" -multi_line_output = 3 # Vertical Hanging Indent, see https://pypi.org/project/isort/ \ No newline at end of file +multi_line_output = 3 # Vertical Hanging Indent, see https://pypi.org/project/isort/ +skip_glob = ["*/examples/*", "**/examples/*"] \ No newline at end of file diff --git a/src/vivarium/examples/disease_model/__init__.py b/src/vivarium/examples/disease_model/__init__.py index 93ec96de6..20ec97e78 100644 --- a/src/vivarium/examples/disease_model/__init__.py +++ b/src/vivarium/examples/disease_model/__init__.py @@ -6,7 +6,7 @@ ) from vivarium.examples.disease_model.intervention import TreatmentIntervention from vivarium.examples.disease_model.mortality import Mortality -from vivarium.examples.disease_model.observer import Observer +from vivarium.examples.disease_model.observer import DeathsObserver, YllsObserver from vivarium.examples.disease_model.population import BasePopulation from vivarium.examples.disease_model.risk import Risk, RiskEffect diff --git a/src/vivarium/examples/disease_model/disease_model.yaml b/src/vivarium/examples/disease_model/disease_model.yaml index 9af587836..c2eb4e0bf 100644 --- a/src/vivarium/examples/disease_model/disease_model.yaml +++ b/src/vivarium/examples/disease_model/disease_model.yaml @@ -12,7 +12,8 @@ components: intervention: - TreatmentIntervention('sqlns', 'child_wasting.proportion_exposed') observer: - - Observer() + - DeathsObserver() + - YllsObserver() configuration: randomness: diff --git a/src/vivarium/examples/disease_model/mortality.py b/src/vivarium/examples/disease_model/mortality.py index 30a12ad8f..5c58ea7e8 100644 --- a/src/vivarium/examples/disease_model/mortality.py +++ b/src/vivarium/examples/disease_model/mortality.py @@ -28,11 +28,7 @@ def configuration_defaults(self) -> Dict[str, Any]: @property def columns_required(self) -> Optional[List[str]]: - return ["alive"] - - @property - def population_view_query(self) -> Optional[str]: - return "alive == 'alive'" + return ["tracked", "alive"] ##################### # Lifecycle methods # @@ -76,7 +72,28 @@ def on_time_step(self, event: Event) -> None: effective_probability = 1 - np.exp(-effective_rate) draw = self.randomness.get_draw(event.index) affected_simulants = draw < effective_probability - self.population_view.update(pd.Series("dead", index=event.index[affected_simulants])) + self.population_view.subview(["alive"]).update( + pd.Series("dead", index=event.index[affected_simulants]) + ) + + def on_time_step_prepare(self, event: Event) -> None: + """Untrack any simulants who died during the previous time step. + + We do this after the previous time step because the mortality + observer needs to collect observations before updating. + + Parameters + ---------- + event : + An event object emitted by the simulation containing an index + representing the simulants affected by the event and timing + information. + """ + population = self.population_view.get(event.index) + population.loc[ + (population["alive"] == "dead") & population["tracked"] == True, "tracked" + ] = False + self.population_view.update(population) ################################## # Pipeline sources and modifiers # diff --git a/src/vivarium/examples/disease_model/observer.py b/src/vivarium/examples/disease_model/observer.py index fd59b6154..35b143fcc 100644 --- a/src/vivarium/examples/disease_model/observer.py +++ b/src/vivarium/examples/disease_model/observer.py @@ -1,47 +1,54 @@ -from typing import Any, Dict, List, Optional +from typing import Any, Optional import pandas as pd from vivarium.framework.engine import Builder -from vivarium.framework.results import Observer as Observer_ +from vivarium.framework.results import Observer -class Observer(Observer_): +class DeathsObserver(Observer): + """Observes the number of deaths.""" + ############## # Properties # ############## @property - def configuration_defaults(self) -> Dict[str, Any]: - return { - "mortality": { - "life_expectancy": 80, - } - } + def columns_required(self) -> Optional[list[str]]: + return ["alive"] - @property - def columns_required(self) -> Optional[List[str]]: - return ["age", "alive"] + ################# + # Setup methods # + ################# def register_observations(self, builder: Builder) -> None: + """We define a newly-dead simulant as one who is 'dead' but who has not + yet become untracked.""" builder.results.register_adding_observation( - name="total_population_alive", - requires_columns=["alive"], - pop_filter='alive == "alive"', - ) - builder.results.register_adding_observation( - name="total_population_dead", + name="dead", requires_columns=["alive"], - pop_filter='alive == "dead"', - ) - builder.results.register_adding_observation( - name="years_of_life_lost", - requires_columns=["age", "alive"], - aggregator=self.calculate_ylls, + pop_filter='tracked == True and alive == "dead"', ) - def calculate_ylls(self, df: pd.DataFrame) -> float: - return (self.life_expectancy - df.loc[df["alive"] == "dead", "age"]).sum() + +class YllsObserver(Observer): + """Observes the years of lives lost.""" + + ############## + # Properties # + ############## + + @property + def columns_required(self) -> Optional[list[str]]: + return ["age", "alive"] + + @property + def configuration_defaults(self) -> dict[str, Any]: + return { + "mortality": { + "life_expectancy": 80, + } + } ##################### # Lifecycle methods # @@ -50,3 +57,17 @@ def calculate_ylls(self, df: pd.DataFrame) -> float: # noinspection PyAttributeOutsideInit def setup(self, builder: Builder) -> None: self.life_expectancy = builder.configuration.mortality.life_expectancy + + ################# + # Setup methods # + ################# + + def register_observations(self, builder: Builder) -> None: + builder.results.register_adding_observation( + name="ylls", + requires_columns=["age", "alive"], + aggregator=self.calculate_ylls, + ) + + def calculate_ylls(self, df: pd.DataFrame) -> float: + return (self.life_expectancy - df.loc[df["alive"] == "dead", "age"]).sum() diff --git a/tests/framework/randomness/test_reproducibility.py b/tests/framework/randomness/test_reproducibility.py index 8db9c908c..480b77fa5 100644 --- a/tests/framework/randomness/test_reproducibility.py +++ b/tests/framework/randomness/test_reproducibility.py @@ -23,8 +23,8 @@ def test_reproducibility(tmp_path, disease_model_spec): ) files = [file for file in results_dir.rglob("**/*.parquet")] - assert len(files) == 6 - for filename in ["total_population_alive", "total_population_dead", "years_of_life_lost"]: + assert len(files) == 4 + for filename in ["dead", "ylls"]: df_paths = [file for file in files if file.stem == filename] df1 = pd.read_parquet(df_paths[0]) df2 = pd.read_parquet(df_paths[1])